forked from OleHolmNielsen/Slurm_tools
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpsnode
executable file
·81 lines (72 loc) · 2.17 KB
/
psnode
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
#!/usr/bin/env bash
# Do a "ps" process status on a node-list, but exclude system processes
# Usage: psnode [-c columns] node-list
# Homepage: https://github.com/OleHolmNielsen/Slurm_tools/
# Default value:
export columns=100
# Parse command options
while getopts "c:" options; do
case $options in
c ) export columns=$OPTARG
if ! [[ $columns =~ ^[0-9]+$ ]]
then
echo Number of columns $columns must be a positive number
exit 1
fi
if [[ $columns -lt 5 ]] || [[ $columns -gt 1024 ]]
then
echo Unreasonable number of columns $columns
exit 1
fi
;;
h | * ) echo "Usage: psnode [-c columns | -h] node-list"
exit 1;;
esac
done
shift $((OPTIND-1))
if test $# -ne 1
then
echo ERROR:
echo "Usage: psnode [-c columns | -h] node-list"
exit -1
fi
PS="/bin/ps"
PSFLAGS="-o pid,nlwp,state,user,start,cputime,%cpu,rssize,command --columns $columns"
SSH="ssh -n -x"
PING="/bin/ping -c 1 -w 3"
# System users EXCLUDED from the process list
USERLIST="root rpc rpcuser daemon ntp smmsp sshd hpsmh named dbus 68 chrony polkitd munge"
# Make a deselect-list consisting only of existent users
deselect_list=""
sep=""
for u in $USERLIST
do
if test -n "`getent passwd $u`"
then
# This user exists in the passwd database
deselect_list="${deselect_list}${sep}${u}"
sep=" "
fi
done
sep=""
# The "scontrol show hostnames" command is used to expand NodeList expressions
for node in `scontrol show hostnames $*`
do
if test -n "$sep"
then
echo $sep
fi
echo Node ${node} information:
# Print node status information:
sinfo -N -O "NodeList:10 ,Partition:10 ,CPUs:4 ,CPUsLoad:9 ,SocketCoreThread:8 ,Memory:7 ,StateLong:11 ,Reason:20" -n $node
echo Jobid list: `squeue -h -O JobID -w $node`
echo Node ${node} user processes:
if $PING $node >/dev/null 2>&1
then
# Count also the number of processes (numprocs) and threads (numthreads) when field $1 is a number
$SSH $node $PS $PSFLAGS --deselect -u \""${deselect_list}"\" | awk '{print $0; if($1~/^[0-9]+$/) {numprocs++; numthreads+=$2}} END {printf("Total: %d processes and %d threads\n", numprocs, numthreads)}'
else
echo '*** WARNING ***' Cannot ping host ${node} !
fi
sep="====================================="
done