November 19

Linux: ISCSI initiator initial setup (This was specifically for Oracle Linux 7 Server to PureX20)

1. Install needed software
yum install iscsi-initiator-utils
yum install device-mapper-multipath

2. Get IQN of the host initiator:
cat /etc/iscsi/initiatorname.iscsi

3. Add the iscsi initiater name into your tagets device configuration to accept your connection
An example of IQN returned: 
InitiatorName=iqn.1994-05.com.redhat:bffc5f96a7fb 

4. Configure your NIC(S) on the same iscsi subnet of your target device. I this is a mutli nic configuration on the same subnet do the following:

Create a bond.  In our case bond2:

cd /etc/sysconfig/network-scripts/
nano ifcfg-bond2

DEVICE=bond2
IPADDR=10.1.200.51
NETMASK=255.255.255.0
GATEWAY=10.1.200.1
ONBOOT=yes
BOOTPROTO=none
NM_CONTROLLED=no
BONDING_OPTS="mode=1 miimon=250 use_carrier=1 updelay=500 downdelay=500 primary_reselect=2"
MTU=9000

nano ifcfg-bond2_p1p1
DEVICE=p1p1
TYPE=Ehternet
NAME=bond2_p1p1
ONBOOT=yes
MASTER=bond2
SLAVE=yes
NM_CONTROLLED=no
MTU=9000


nano ifcfg-bond2_p5p1
DEVICE=p5p1
TYPE=Ehternet
NAME=bond2_p5p1
ONBOOT=yes
MASTER=bond2
SLAVE=yes
NM_CONTROLLED=no
MTU=9000

5. Remove uneeded device configurations
mv ifcfg-p1p1 /root
mv ifcfg-p5p1 /root

6. reboot

7. Setup the multipath base config
mpathconf --enable --with_multipathd y
mpathconf --enable


8. Modify multipath.conf
--------------------------------------------------------------------------
/etc/multipath.conf
# This is a basic configuration file with some examples, for device mapper
# multipath.
#
# For a complete list of the default configuration values, run either
# multipath -t
# or
# multipathd show config
#
# For a list of configuration options with descriptions, see the multipath.conf
# man page

## By default, devices with vendor = "IBM" and product = "S/390.*" are
## blacklisted. To enable mulitpathing on these devies, uncomment the
## following lines.
#blacklist_exceptions {
#	device {
#		vendor	"IBM"
#		product	"S/390.*"
#	}
#}

## Use user friendly names, instead of using WWIDs as names.
defaults {
	user_friendly_names yes
	find_multipaths yes
	polling_interval      10
}
##
## Here is an example of how to configure some standard options.
##
#
#defaults {
#	polling_interval 	10
#	path_selector		"round-robin 0"
#	path_grouping_policy	multibus
#	uid_attribute		ID_SERIAL
#	prio			alua
#	path_checker		readsector0
#	rr_min_io		100
#	max_fds			8192
#	rr_weight		priorities
#	failback		immediate
#	no_path_retry		fail
#	user_friendly_names	yes
#}
##
## The wwid line in the following blacklist section is shown as an example
## of how to blacklist devices by wwid.  The 2 devnode lines are the
## compiled in default blacklist. If you want to blacklist entire types
## of devices, such as all scsi devices, you should use a devnode line.
## However, if you want to blacklist specific devices, you should use
## a wwid line.  Since there is no guarantee that a specific device will
## not change names on reboot (from /dev/sda to /dev/sdb for example)
## devnode lines are not recommended for blacklisting specific devices.
##
blacklist {
	wwid 26353900f02796769
	devnode "^(ram|raw|loop|fd|md|dm-|sr|scd|st|nbd)[0-9]*"
	devnode "^hd[a-z][0-9]*"
	devnode "^etherd"
	devnode "^nvme.*"
	devnode "^xvd[a-z]*"  # here
	devnode "^zram[0-9]*" # here
}
#multipaths {
#	multipath {
#		wwid			3600508b4000156d700012000000b0000
#		alias			yellow
#		path_grouping_policy	multibus
#		path_selector		"round-robin 0"
#		failback		manual
#		rr_weight		priorities
#		no_path_retry		5
#	}
#	multipath {
#		wwid			1DEC_____321816758474
#		alias			red
#	}
#}
#devices {
#	device {
#		vendor			"COMPAQ  "
#		product			"HSV110 (C)COMPAQ"
#		path_grouping_policy	multibus
#		path_checker		readsector0
#		path_selector		"round-robin 0"
#		hardware_handler	"0"
#		failback		15
#		rr_weight		priorities
#		no_path_retry		queue
#	}
#	device {
#		vendor			"COMPAQ  "
#		product			"MSA1000         "
#		path_grouping_policy	multibus
#	}
#}

#blacklist {
#}
######################### for rhel6+  ###############################
####devices {
####   device {
####       vendor                "PURE"
####       path_selector         "queue-length 0"
####       path_grouping_policy  group_by_prio
####       path_checker          tur
####       fast_io_fail_tmo      10
####       dev_loss_tmo          60
####       no_path_retry         0
####       hardware_handler      "1 alua"
####       prio                  alua
####       failback              immediate
####   }
####}
######################### for rhel7+  ###############################
devices {
  device {
        vendor "PURE"
        product "FlashArray"
        path_grouping_policy "multibus"
        path_selector "queue-length 0"
        path_checker "tur"
        features "0"
        hardware_handler "0"
        prio "const"
        failback immediate
        fast_io_fail_tmo 10
        dev_loss_tmo 60
        user_friendly_names no
    }
}

--------------------------------------------------------------------------

9. Start iscsi
systemctl enable iscsi
systemctl start iscsi
systemctl status iscsi

10. Start and enable multipathd
systemctl start multipathd
systemctl enable multipathd
systemctl status multipathd

11. Query the IP of the iscsi target
iscsiadm -m discovery -t sendtargets -p 10.1.206.14
iscsiadm -m discovery -t st -p 10.1.206.14.10 automatic

12. Setup the iscsi initiator for automatic connectivity
iscsiadm -m node -L automatic

13. Verify sessions
iscsiadm -m session

14. Verify disk connectivity
multipath -ll
dmsetup info -c -o name,blkdevname,devnos_used,blkdevs_used,UUID
Category: Linux | Comments Off on Linux: ISCSI initiator initial setup (This was specifically for Oracle Linux 7 Server to PureX20)
November 19

Linux: Kubernetes setup on fedora that actually works

Fedora (Single Node)

        Prerequisites
        Instructions
        Support Level

Prerequisites

    You need 2 or more machines with Fedora installed. These can be either bare metal machines or virtual machines.

Instructions

This is a getting started guide for Fedora. It is a manual configuration so you understand all the underlying packages / services / ports, etc…

This guide will only get ONE node (previously minion) working. Multiple nodes require a functional networking configuration done outside of Kubernetes. Although the additional Kubernetes configuration requirements should be obvious.

The Kubernetes package provides a few services: kube-apiserver, kube-scheduler, kube-controller-manager, kubelet, kube-proxy. These services are managed by systemd and the configuration resides in a central location: /etc/kubernetes. We will break the services up between the hosts. The first host, fed-master, will be the Kubernetes master. This host will run the kube-apiserver, kube-controller-manager, and kube-scheduler. In addition, the master will also run etcd (not needed if etcd runs on a different host but this guide assumes that etcd and Kubernetes master run on the same host). The remaining host, fed-node will be the node and run kubelet, proxy and docker.

System Information:

Hosts:
Fedora (Single Node)

        Prerequisites
        Instructions
        Support Level

Prerequisites

    You need 2 or more machines with Fedora installed. These can be either bare metal machines or virtual machines.

Instructions

This is a getting started guide for Fedora. It is a manual configuration so you understand all the underlying packages / services / ports, etc…

This guide will only get ONE node (previously minion) working. Multiple nodes require a functional networking configuration done outside of Kubernetes. Although the additional Kubernetes configuration requirements should be obvious.

The Kubernetes package provides a few services: kube-apiserver, kube-scheduler, kube-controller-manager, kubelet, kube-proxy. These services are managed by systemd and the configuration resides in a central location: /etc/kubernetes. We will break the services up between the hosts. The first host, fed-master, will be the Kubernetes master. This host will run the kube-apiserver, kube-controller-manager, and kube-scheduler. In addition, the master will also run etcd (not needed if etcd runs on a different host but this guide assumes that etcd and Kubernetes master run on the same host). The remaining host, fed-node will be the node and run kubelet, proxy and docker.

System Information:

Hosts:

fed-master = 192.168.121.9
fed-node = 192.168.121.65

Prepare the hosts:

    Install Kubernetes on all hosts - fed-{master,node}. This will also pull in docker. Also install etcd on fed-master. This guide has been tested with Kubernetes-0.18 and beyond.
    Running on AWS EC2 with RHEL 7.2, you need to enable “extras” repository for yum by editing /etc/yum.repos.d/redhat-rhui.repo and changing the enable=0 to enable=1 for extras.

dnf -y install kubernetes

    Install etcd

dnf -y install etcd

    Add master and node to /etc/hosts on all machines (not needed if hostnames already in DNS). Make sure that communication works between fed-master and fed-node by using a utility such as ping.

echo "192.168.121.9    fed-master
192.168.121.65    fed-node" >> /etc/hosts

    Edit /etc/kubernetes/config (which should be the same on all hosts) to set the name of the master server:

# Comma separated list of nodes in the etcd cluster
KUBE_MASTER="--master=http://fed-master:8080"

    Disable the firewall on both the master and node, as Docker does not play well with other firewall rule managers. Please note that iptables.service does not exist on the default Fedora Server install.

systemctl mask firewalld.service
systemctl stop firewalld.service

systemctl disable iptables.service
systemctl stop iptables.service

Configure the Kubernetes services on the master.

    Edit /etc/kubernetes/apiserver to appear as such. The service-cluster-ip-range IP addresses must be an unused block of addresses, not used anywhere else. They do not need to be routed or assigned to anything.

# The address on the local server to listen to.
KUBE_API_ADDRESS="--address=0.0.0.0"

# Comma separated list of nodes in the etcd cluster
KUBE_ETCD_SERVERS="--etcd-servers=http://127.0.0.1:2379"

# Address range to use for services
KUBE_SERVICE_ADDRESSES="--service-cluster-ip-range=10.254.0.0/16"

# Add your own!
KUBE_API_ARGS=""

    Edit /etc/etcd/etcd.conf to let etcd listen on all available IPs instead of 127.0.0.1. If you have not done this, you might see an error such as “connection refused”.

ETCD_LISTEN_CLIENT_URLS="http://0.0.0.0:2379"

    Start the appropriate services on master:

for SERVICES in etcd kube-apiserver kube-controller-manager kube-scheduler; do
    systemctl restart $SERVICES
    systemctl enable $SERVICES
    systemctl status $SERVICES
done

Configure the Kubernetes services on the node.

We need to configure the kubelet on the node.

    Edit /etc/kubernetes/kubelet to appear as such:

###
# Kubernetes kubelet (node) config

# The address for the info server to serve on (set to 0.0.0.0 or "" for all interfaces)
KUBELET_ADDRESS="--address=0.0.0.0"

# You may leave this blank to use the actual hostname
KUBELET_HOSTNAME="--hostname-override=fed-node"

# location of the api-server
KUBELET_ARGS="--cgroup-driver=systemd --kubeconfig=/etc/kubernetes/master-kubeconfig.yaml"

    Edit /etc/kubernetes/master-kubeconfig.yaml to contain the following information:

kind: Config
clusters:
- name: local
  cluster:
    server: http://fed-master:8080
users:
- name: kubelet
contexts:
- context:
    cluster: local
    user: kubelet
  name: kubelet-context
current-context: kubelet-context

    Start the appropriate services on the node (fed-node).

for SERVICES in kube-proxy kubelet docker; do 
    systemctl restart $SERVICES
    systemctl enable $SERVICES
    systemctl status $SERVICES 
done

    Check to make sure now the cluster can see the fed-node on fed-master, and its status changes to Ready.

kubectl get nodes
NAME            STATUS      AGE      VERSION
fed-node        Ready       4h

    Deletion of nodes:

To delete fed-node from your Kubernetes cluster, one should run the following on fed-master (Please do not do it, it is just for information):

kubectl delete -f ./node.json

You should be finished!

The cluster should be running! Launch a test pod.
Support Level
fed-master = 192.168.121.9
fed-node = 192.168.121.65

Prepare the hosts:

    Install Kubernetes on all hosts - fed-{master,node}. This will also pull in docker. Also install etcd on fed-master. This guide has been tested with Kubernetes-0.18 and beyond.
    Running on AWS EC2 with RHEL 7.2, you need to enable “extras” repository for yum by editing /etc/yum.repos.d/redhat-rhui.repo and changing the enable=0 to enable=1 for extras.

dnf -y install kubernetes

    Install etcd

dnf -y install etcd

    Add master and node to /etc/hosts on all machines (not needed if hostnames already in DNS). Make sure that communication works between fed-master and fed-node by using a utility such as ping.

echo "192.168.121.9    fed-master
192.168.121.65    fed-node" >> /etc/hosts

    Edit /etc/kubernetes/config (which should be the same on all hosts) to set the name of the master server:

# Comma separated list of nodes in the etcd cluster
KUBE_MASTER="--master=http://fed-master:8080"

    Disable the firewall on both the master and node, as Docker does not play well with other firewall rule managers. Please note that iptables.service does not exist on the default Fedora Server install.

systemctl mask firewalld.service
systemctl stop firewalld.service

systemctl disable iptables.service
systemctl stop iptables.service

Configure the Kubernetes services on the master.

    Edit /etc/kubernetes/apiserver to appear as such. The service-cluster-ip-range IP addresses must be an unused block of addresses, not used anywhere else. They do not need to be routed or assigned to anything.

# The address on the local server to listen to.
KUBE_API_ADDRESS="--address=0.0.0.0"

# Comma separated list of nodes in the etcd cluster
KUBE_ETCD_SERVERS="--etcd-servers=http://127.0.0.1:2379"

# Address range to use for services
KUBE_SERVICE_ADDRESSES="--service-cluster-ip-range=10.254.0.0/16"

# Add your own!
KUBE_API_ARGS=""

    Edit /etc/etcd/etcd.conf to let etcd listen on all available IPs instead of 127.0.0.1. If you have not done this, you might see an error such as “connection refused”.

ETCD_LISTEN_CLIENT_URLS="http://0.0.0.0:2379"

    Start the appropriate services on master:

for SERVICES in etcd kube-apiserver kube-controller-manager kube-scheduler; do
    systemctl restart $SERVICES
    systemctl enable $SERVICES
    systemctl status $SERVICES
done

Configure the Kubernetes services on the node.

We need to configure the kubelet on the node.

    Edit /etc/kubernetes/kubelet to appear as such:

###
# Kubernetes kubelet (node) config

swapoff -a


# The address for the info server to serve on (set to 0.0.0.0 or "" for all interfaces)
KUBELET_ADDRESS="--address=0.0.0.0"

# You may leave this blank to use the actual hostname
KUBELET_HOSTNAME="--hostname-override=fed-node"

# location of the api-server
KUBELET_ARGS="--cgroup-driver=systemd --kubeconfig=/etc/kubernetes/master-kubeconfig.yaml"

    Edit /etc/kubernetes/master-kubeconfig.yaml to contain the following information:

kind: Config
clusters:
- name: local
  cluster:
    server: http://fed-master:8080
users:
- name: kubelet
contexts:
- context:
    cluster: local
    user: kubelet
  name: kubelet-context
current-context: kubelet-context

    Start the appropriate services on the node (fed-node).

for SERVICES in kube-proxy kubelet docker; do 
    systemctl restart $SERVICES
    systemctl enable $SERVICES
    systemctl status $SERVICES 
done

    Check to make sure now the cluster can see the fed-node on fed-master, and its status changes to Ready.

kubectl get nodes
NAME            STATUS      AGE      VERSION
fed-node        Ready       4h

    Deletion of nodes:

To delete fed-node from your Kubernetes cluster, one should run the following on fed-master (Please do not do it, it is just for information):

kubectl delete -f ./node.json

You should be finished!

The cluster should be running! Launch a test pod.
Category: Linux | Comments Off on Linux: Kubernetes setup on fedora that actually works
November 19

Linux: A server build with cifs share example

Cloned the server from a template

On the server:

Setup repository with satellite:
subscription-manager register --org="ORG-NAME" --activationkey="KEY"
subscription-manager subscribe --auto

Activated puppet:
Puppet agent setup from client
puppet agent -tv

Puppet agent setup from server
puppet cert sign --all


Setup SNMP using Satellite:
yum install -y net-snmp net-snmp-utils net-snmp-libs net-snmp-devel

mv /etc/snmp/snmpd.conf /etc/snmp/snmpd.conf.old ; echo "snmpd: ALL" >> /etc/hosts.deny ; echo "snmpd: snmpserver1.domainname.com" >> /etc/hosts.allow ; echo "snmpd: snmpserver2.domainname.com" >> /etc/hosts.allow ; echo "snmpd: snmpserver3.domainname.com" >> /etc/hosts.allow ;  echo "snmpd: lansweeper.domainname.com" >> /etc/hosts.allow ; echo "snmpd: snmpserver4.domainname.com" >> /etc/hosts.allow ; echo "sysname `hostname`" >> /etc/snmp/snmpd.conf ; echo "syslocation HQ Server Room" >> /etc/snmp/snmpd.conf ; echo "syscontact [email protected]" >> /etc/snmp/snmpd.conf ; echo "rocommunity communityname 172.22.0.0/24" >> /etc/snmp/snmpd.conf ; systemctl enable snmpd.service ; systemctl restart snmpd.service

Install LANSWEEPER using Satellite:
cd /root ; wget http://satelliteservername/pub/LsAgent-linux-x64.run ; chmod 744 LsAgent-linux-x64.run

cd /root ; ./LsAgent-linux-x64.run --server lansweeper.domainname.com --port 9524 --mode unattended

Installed Mcafee using Satellite:
firewall-cmd --permanent --add-port=8081/tcp ; firewall-cmd --add-port=8081/tcp ; firewall-cmd --reload

cd Downloads/ ; wget http://satelliteservername/pub/AgentforLinux5.5.zip ; unzip AgentforLinux5.5.zip ; sh installrpm.sh -i ; rm -f inst*

cd Downloads/ ; wget http://satelliteservername/pub/ISecTP-10.5.3-1650-Release-ePO.zip ; unzip ISecTP-10.5.3-1650-Release-ePO.zip ; ./isectp-setup ; rm -f isectp* ; rm -f PkgCatalog.z


Added a second scsi controller and a 60GB drive

Drive configuration:

yum install cifs-utils

pvcreate /dev/sdb
vgcreate VG_DATA /dev/sdb
lvcreate -l 100%FREE -n lv_data VG_DATA

mkfs.xfs /dev/VG_DATA/lv_data

mkdir /data
chown -R localuser1:localuser1 /data/


Remote share setup:
useradd -u 1110 localuser2
useradd -u 1120 localuser3


mkdir /share1
chown -R localuser2:localuser2 /irfile/

mkdir /mnt/data1
chown -R localuser3:localuser1 /mnt/Linuxmnt/

mkdir /root/creds

localuser2_creds
username=localuser2
password=assignedpassword

localuser3_creds
username=localuser3
password=assignedpassword


chmod 0600 /root/creds/localuser2_creds
chmod 0600 /root/creds/localuser3_creds


Added the following to fstab:

/dev/mapper/VG_DATA-lv_data  /data          xfs     defaults        1 1
//fileserver1/share1  /share1 cifs credentials=/root/creds/localuser2_creds,vers=2.0,uid=1110,gid=1110,file_mode=0773,dir_mode=0773 0 0
//fileserver2/data1  /mnt/data1 cifs credentials=/root/creds/localuser3_creds,vers=2.0,uid=1120,gid=1000,file_mode=0775,dir_mode=0775 0 0

mount -a
df -h
Category: Linux | Comments Off on Linux: A server build with cifs share example
November 19

Python: Script to break down what is using memory / RAM

#!/usr/bin/env python

# Try to determine how much RAM is currently being used per program.
# Note per _program_, not per process. So for example this script
# will report RAM used by all httpd process together. In detail it reports:
# sum(private RAM for program processes) + sum(Shared RAM for program processes)
# The shared RAM is problematic to calculate, and this script automatically
# selects the most accurate method available for your kernel.

# Licence: LGPLv2
# Author:  [email protected]
# Source:  http://www.pixelbeat.org/scripts/ps_mem.py

# V1.0      06 Jul 2005     Initial release
# V1.1      11 Aug 2006     root permission required for accuracy
# V1.2      08 Nov 2006     Add total to output
#                           Use KiB,MiB,... for units rather than K,M,...
# V1.3      22 Nov 2006     Ignore shared col from /proc/$pid/statm for
#                           2.6 kernels up to and including 2.6.9.
#                           There it represented the total file backed extent
# V1.4      23 Nov 2006     Remove total from output as it's meaningless
#                           (the shared values overlap with other programs).
#                           Display the shared column. This extra info is
#                           useful, especially as it overlaps between programs.
# V1.5      26 Mar 2007     Remove redundant recursion from human()
# V1.6      05 Jun 2007     Also report number of processes with a given name.
#                           Patch from [email protected]
# V1.7      20 Sep 2007     Use PSS from /proc/$pid/smaps if available, which
#                           fixes some over-estimation and allows totalling.
#                           Enumerate the PIDs directly rather than using ps,
#                           which fixes the possible race between reading
#                           RSS with ps, and shared memory with this program.
#                           Also we can show non truncated command names.
# V1.8      28 Sep 2007     More accurate matching for stats in /proc/$pid/smaps
#                           as otherwise could match libraries causing a crash.
#                           Patch from [email protected]
# V1.9      20 Feb 2008     Fix invalid values reported when PSS is available.
#                           Reported by Andrey Borzenkov <[email protected]>
# V3.3      24 Jun 2014
#   http://github.com/pixelb/scripts/commits/master/scripts/ps_mem.py

# Notes:
#
# All interpreted programs where the interpreter is started
# by the shell or with env, will be merged to the interpreter
# (as that's what's given to exec). For e.g. all python programs
# starting with "#!/usr/bin/env python" will be grouped under python.
# You can change this by using the full command line but that will
# have the undesirable affect of splitting up programs started with
# differing parameters (for e.g. mingetty tty[1-6]).
#
# For 2.6 kernels up to and including 2.6.13 and later 2.4 redhat kernels
# (rmap vm without smaps) it can not be accurately determined how many pages
# are shared between processes in general or within a program in our case:
# http://lkml.org/lkml/2005/7/6/250
# A warning is printed if overestimation is possible.
# In addition for 2.6 kernels up to 2.6.9 inclusive, the shared
# value in /proc/$pid/statm is the total file-backed extent of a process.
# We ignore that, introducing more overestimation, again printing a warning.
# Since kernel 2.6.23-rc8-mm1 PSS is available in smaps, which allows
# us to calculate a more accurate value for the total RAM used by programs.
#
# Programs that use CLONE_VM without CLONE_THREAD are discounted by assuming
# they're the only programs that have the same /proc/$PID/smaps file for
# each instance.  This will fail if there are multiple real instances of a
# program that then use CLONE_VM without CLONE_THREAD, or if a clone changes
# its memory map while we're checksumming each /proc/$PID/smaps.
#
# I don't take account of memory allocated for a program
# by other programs. For e.g. memory used in the X server for
# a program could be determined, but is not.
#
# FreeBSD is supported if linprocfs is mounted at /compat/linux/proc/
# FreeBSD 8.0 supports up to a level of Linux 2.6.16

import getopt
import time
import errno
import os
import sys

try:
    # md5 module is deprecated on python 2.6
    # so try the newer hashlib first
    import hashlib
    md5_new = hashlib.md5
except ImportError:
    import md5
    md5_new = md5.new


# The following exits cleanly on Ctrl-C or EPIPE
# while treating other exceptions as before.
def std_exceptions(etype, value, tb):
    sys.excepthook = sys.__excepthook__
    if issubclass(etype, KeyboardInterrupt):
        pass
    elif issubclass(etype, IOError) and value.errno == errno.EPIPE:
        pass
    else:
        sys.__excepthook__(etype, value, tb)
sys.excepthook = std_exceptions

#
#   Define some global variables
#

PAGESIZE = os.sysconf("SC_PAGE_SIZE") / 1024 #KiB
our_pid = os.getpid()

have_pss = 0

class Proc:
    def __init__(self):
        uname = os.uname()
        if uname[0] == "FreeBSD":
            self.proc = '/compat/linux/proc'
        else:
            self.proc = '/proc'

    def path(self, *args):
        return os.path.join(self.proc, *(str(a) for a in args))

    def open(self, *args):
        try:
            return open(self.path(*args))
        except (IOError, OSError):
            val = sys.exc_info()[1]
            if (val.errno == errno.ENOENT or # kernel thread or process gone
                val.errno == errno.EPERM):
                raise LookupError
            raise

proc = Proc()


#
#   Functions
#

def parse_options():
    try:
        long_options = ['split-args', 'help', 'total']
        opts, args = getopt.getopt(sys.argv[1:], "shtp:w:", long_options)
    except getopt.GetoptError:
        sys.stderr.write(help())
        sys.exit(3)

    if len(args):
        sys.stderr.write("Extraneous arguments: %s\n" % args)
        sys.exit(3)

    # ps_mem.py options
    split_args = False
    pids_to_show = None
    watch = None
    only_total = False

    for o, a in opts:
        if o in ('-s', '--split-args'):
            split_args = True
        if o in ('-t', '--total'):
            only_total = True
        if o in ('-h', '--help'):
            sys.stdout.write(help())
            sys.exit(0)
        if o in ('-p',):
            try:
                pids_to_show = [int(x) for x in a.split(',')]
            except:
                sys.stderr.write(help())
                sys.exit(3)
        if o in ('-w',):
            try:
                watch = int(a)
            except:
                sys.stderr.write(help())
                sys.exit(3)

    return (split_args, pids_to_show, watch, only_total)

def help():
    help_msg = 'Usage: ps_mem [OPTION]...\n' \
    'Show program core memory usage\n' \
    '\n' \
    '  -h, -help                   Show this help\n' \
    '  -p <pid>[,pid2,...pidN]     Only show memory usage PIDs in the specified list\n' \
    '  -s, --split-args            Show and separate by, all command line arguments\n' \
    '  -t, --total                 Show only the total value\n' \
    '  -w <N>                      Measure and show process memory every N seconds\n'

    return help_msg

#(major,minor,release)
def kernel_ver():
    kv = proc.open('sys/kernel/osrelease').readline().split(".")[:3]
    last = len(kv)
    if last == 2:
        kv.append('0')
    last -= 1
    while last > 0:
        for char in "-_":
            kv[last] = kv[last].split(char)[0]
        try:
            int(kv[last])
        except:
            kv[last] = 0
        last -= 1
    return (int(kv[0]), int(kv[1]), int(kv[2]))


#return Private,Shared
#Note shared is always a subset of rss (trs is not always)
def getMemStats(pid):
    global have_pss
    mem_id = pid #unique
    Private_lines = []
    Shared_lines = []
    Pss_lines = []
    Rss = (int(proc.open(pid, 'statm').readline().split()[1])
           * PAGESIZE)
    if os.path.exists(proc.path(pid, 'smaps')): #stat
        digester = md5_new()
        for line in proc.open(pid, 'smaps').readlines(): #open
            # Note we checksum smaps as maps is usually but
            # not always different for separate processes.
            digester.update(line.encode('latin1'))
            if line.startswith("Shared"):
                Shared_lines.append(line)
            elif line.startswith("Private"):
                Private_lines.append(line)
            elif line.startswith("Pss"):
                have_pss = 1
                Pss_lines.append(line)
        mem_id = digester.hexdigest()
        Shared = sum([int(line.split()[1]) for line in Shared_lines])
        Private = sum([int(line.split()[1]) for line in Private_lines])
        #Note Shared + Private = Rss above
        #The Rss in smaps includes video card mem etc.
        if have_pss:
            pss_adjust = 0.5 # add 0.5KiB as this avg error due to trunctation
            Pss = sum([float(line.split()[1])+pss_adjust for line in Pss_lines])
            Shared = Pss - Private
    elif (2,6,1) <= kernel_ver() <= (2,6,9):
        Shared = 0 #lots of overestimation, but what can we do?
        Private = Rss
    else:
        Shared = int(proc.open(pid, 'statm').readline().split()[2])
        Shared *= PAGESIZE
        Private = Rss - Shared
    return (Private, Shared, mem_id)


def getCmdName(pid, split_args):
    cmdline = proc.open(pid, 'cmdline').read().split("\0")
    if cmdline[-1] == '' and len(cmdline) > 1:
        cmdline = cmdline[:-1]

    path = proc.path(pid, 'exe')
    try:
        path = os.readlink(path)
        # Some symlink targets were seen to contain NULs on RHEL 5 at least
        # https://github.com/pixelb/scripts/pull/10, so take string up to NUL
        path = path.split('\0')[0]
    except OSError:
        val = sys.exc_info()[1]
        if (val.errno == errno.ENOENT or # either kernel thread or process gone
            val.errno == errno.EPERM):
            raise LookupError
        raise

    if split_args:
        return " ".join(cmdline)
    if path.endswith(" (deleted)"):
        path = path[:-10]
        if os.path.exists(path):
            path += " [updated]"
        else:
            #The path could be have prelink stuff so try cmdline
            #which might have the full path present. This helped for:
            #/usr/libexec/notification-area-applet.#prelink#.fX7LCT (deleted)
            if os.path.exists(cmdline[0]):
                path = cmdline[0] + " [updated]"
            else:
                path += " [deleted]"
    exe = os.path.basename(path)
    cmd = proc.open(pid, 'status').readline()[6:-1]
    if exe.startswith(cmd):
        cmd = exe #show non truncated version
        #Note because we show the non truncated name
        #one can have separated programs as follows:
        #584.0 KiB +   1.0 MiB =   1.6 MiB    mozilla-thunder (exe -> bash)
        # 56.0 MiB +  22.2 MiB =  78.2 MiB    mozilla-thunderbird-bin
    return cmd


#The following matches "du -h" output
#see also human.py
def human(num, power="Ki", units=None):
    if units is None:
        powers = ["Ki", "Mi", "Gi", "Ti"]
        while num >= 1000: #4 digits
            num /= 1024.0
            power = powers[powers.index(power)+1]
        return "%.1f %sB" % (num, power)
    else:
        return "%.f" % ((num * 1024) / units)


def cmd_with_count(cmd, count):
    if count > 1:
        return "%s (%u)" % (cmd, count)
    else:
        return cmd

#Warn of possible inaccuracies
#2 = accurate & can total
#1 = accurate only considering each process in isolation
#0 = some shared mem not reported
#-1= all shared mem not reported
def shared_val_accuracy():
    """http://wiki.apache.org/spamassassin/TopSharedMemoryBug"""
    kv = kernel_ver()
    pid = os.getpid()
    if kv[:2] == (2,4):
        if proc.open('meminfo').read().find("Inact_") == -1:
            return 1
        return 0
    elif kv[:2] == (2,6):
        if os.path.exists(proc.path(pid, 'smaps')):
            if proc.open(pid, 'smaps').read().find("Pss:")!=-1:
                return 2
            else:
                return 1
        if (2,6,1) <= kv <= (2,6,9):
            return -1
        return 0
    elif kv[0] > 2 and os.path.exists(proc.path(pid, 'smaps')):
        return 2
    else:
        return 1

def show_shared_val_accuracy( possible_inacc, only_total=False ):
    level = ("Warning","Error")[only_total]
    if possible_inacc == -1:
        sys.stderr.write(
         "%s: Shared memory is not reported by this system.\n" % level
        )
        sys.stderr.write(
         "Values reported will be too large, and totals are not reported\n"
        )
    elif possible_inacc == 0:
        sys.stderr.write(
         "%s: Shared memory is not reported accurately by this system.\n" % level
        )
        sys.stderr.write(
         "Values reported could be too large, and totals are not reported\n"
        )
    elif possible_inacc == 1:
        sys.stderr.write(
         "%s: Shared memory is slightly over-estimated by this system\n"
         "for each program, so totals are not reported.\n" % level
        )
    sys.stderr.close()
    if only_total and possible_inacc != 2:
        sys.exit(1)

def get_memory_usage( pids_to_show, split_args, include_self=False, only_self=False ):
    cmds = {}
    shareds = {}
    mem_ids = {}
    count = {}
    for pid in os.listdir(proc.path('')):
        if not pid.isdigit():
            continue
        pid = int(pid)

        # Some filters
        if only_self and pid != our_pid:
            continue
        if pid == our_pid and not include_self:
            continue
        if pids_to_show is not None and pid not in pids_to_show:
            continue

        try:
            cmd = getCmdName(pid, split_args)
        except LookupError:
            #operation not permitted
            #kernel threads don't have exe links or
            #process gone
            continue

        try:
            private, shared, mem_id = getMemStats(pid)
        except RuntimeError:
            continue #process gone
        if shareds.get(cmd):
            if have_pss: #add shared portion of PSS together
                shareds[cmd] += shared
            elif shareds[cmd] < shared: #just take largest shared val
                shareds[cmd] = shared
        else:
            shareds[cmd] = shared
        cmds[cmd] = cmds.setdefault(cmd, 0) + private
        if cmd in count:
            count[cmd] += 1
        else:
            count[cmd] = 1
        mem_ids.setdefault(cmd, {}).update({mem_id:None})

    #Add shared mem for each program
    total = 0
    for cmd in cmds:
        cmd_count = count[cmd]
        if len(mem_ids[cmd]) == 1 and cmd_count > 1:
            # Assume this program is using CLONE_VM without CLONE_THREAD
            # so only account for one of the processes
            cmds[cmd] /= cmd_count
            if have_pss:
                shareds[cmd] /= cmd_count
        cmds[cmd] = cmds[cmd] + shareds[cmd]
        total += cmds[cmd] #valid if PSS available

    sorted_cmds = sorted(cmds.items(), key=lambda x:x[1])
    sorted_cmds = [x for x in sorted_cmds if x[1]]

    return sorted_cmds, shareds, count, total

def print_header():
    sys.stdout.write(" Private  +   Shared  =  RAM used\tProgram\n\n")

def print_memory_usage(sorted_cmds, shareds, count, total):
    for cmd in sorted_cmds:
        sys.stdout.write("%9s + %9s = %9s\t%s\n" %
                         (human(cmd[1]-shareds[cmd[0]]),
                          human(shareds[cmd[0]]), human(cmd[1]),
                          cmd_with_count(cmd[0], count[cmd[0]])))
    if have_pss:
        sys.stdout.write("%s\n%s%9s\n%s\n" %
                         ("-" * 33, " " * 24, human(total), "=" * 33))

def verify_environment():
    if os.geteuid() != 0:
        sys.stderr.write("Sorry, root permission required.\n")
        if __name__ == '__main__':
            sys.stderr.close()
            sys.exit(1)

    try:
        kv = kernel_ver()
    except (IOError, OSError):
        val = sys.exc_info()[1]
        if val.errno == errno.ENOENT:
            sys.stderr.write(
              "Couldn't access " + proc.path('') + "\n"
              "Only GNU/Linux and FreeBSD (with linprocfs) are supported\n")
            sys.exit(2)
        else:
            raise

if __name__ == '__main__':
    split_args, pids_to_show, watch, only_total = parse_options()
    verify_environment()

    if not only_total:
        print_header()

    if watch is not None:
        try:
            sorted_cmds = True
            while sorted_cmds:
                sorted_cmds, shareds, count, total = get_memory_usage( pids_to_show, split_args )
                if only_total and have_pss:
                    sys.stdout.write(human(total, units=1)+'\n')
                elif not only_total:
                    print_memory_usage(sorted_cmds, shareds, count, total)
                time.sleep(watch)
            else:
                sys.stdout.write('Process does not exist anymore.\n')
        except KeyboardInterrupt:
            pass
    else:
        # This is the default behavior
        sorted_cmds, shareds, count, total = get_memory_usage( pids_to_show, split_args )
        if only_total and have_pss:
            sys.stdout.write(human(total, units=1)+'\n')
        elif not only_total:
            print_memory_usage(sorted_cmds, shareds, count, total)

    # We must close explicitly, so that any EPIPE exception
    # is handled by our excepthook, rather than the default
    # one which is reenabled after this script finishes.
    sys.stdout.close()

    vm_accuracy = shared_val_accuracy()
    show_shared_val_accuracy( vm_accuracy, only_total )
Category: Programming | Comments Off on Python: Script to break down what is using memory / RAM