Bright: WLM Slurm
Jump to navigation
Jump to search
Information on the hosts
- Use sinfo
[root@head1 ~]# sinfo
PARTITION AVAIL TIMELIMIT NODES STATE NODELIST
defq* up infinite 1 down* storage1
defq* up infinite 16 idle gpuNode[01-16]Control Nodes
# Drain a node
scontrol update nodename=gpuNode02 state=drain reason=knackered
# Resume after draining
scontrol update nodename=gpuNode02 state=resume
# Stop jobs running
scontrol update nodename=gpuNode02 state=downHandy Comparison of Slurm / PBS / SGE
Action Slurm Torque/PBS Maui SGE
------------------------------------------------------------------------------------------------------------
Get information about the job scontrol show job "jobid" qstat -f "jobid" checkjob
Display the queue info squeue qstat showq qstat
Delete a job scancel "jobid" qdel qdel
Submit a job srun/sbatch/salloc testjob qsub testjob msub qsub
Submit a interactive job salloc -N 4 -p active sh qsub -I qlogin
Display all job info squeue -al qstat -f
scontrol show job
Display job scontrol show job "jobID" qstat -f "jobID"
Display free processors srun --test-only -p normal -n 1 -t 10:00 sh showbf
Display the expected start time squeue --start -j "jobid" showstart "jobid"
Display blocked jobs squeue --start mdiag -b/showq -b
Display queues/partitions scontrol show partition qstat -Qf
Display queue sinfo -h qstat -q
sinfo -o "%P %l %c %D "
Graphical Frontend sview xpbs qmon
Node control
Action Slurm Torque/PBS Maui
------------------------------------------------------------------------------------------------
Display node info scontrol show node "node" pbsnodes "node" checknode "node"
Drain node scontrol update NodeName=gpu-1-4 pbsnodes -oN "Timeout" tcc-1-4
State=DRAIN Reason=Timeout
Clear node scontrol update NodeName=gpu-1-4 pbsnodes -cN "" tcc-1-4
State=RESUME
List down Nodes sinfo --list-reasons pbsnodes -ln
sinfo --long -R
sinfo -RlN
pbsnodes -l all:
alias pn='sinfo --format="%25N %.3D %9P %11T %.4c %14C %.8z %.8m %.4d %.8w %10f %20E"'
pbsnodes -ln
alias pl='sinfo --states=down,drain,fail,no_respond,maint,unk --format="%12n %20f %20H %12u %32E"'
showstart:
alias lj='sacct -o user,jobid,jobname,state,node,start'