Installation notes on OpenHPC 3 and Rocky 9
Headnode Installation
- Rocky 9.2 (this guide based on a VM so no OOB), 2 ports, port security disabled on internal port
- Install guide from https://github.com/openhpc/ohpc/releases/download/v3.0.GA/Install_guide-Rocky9-Warewulf-SLURM-3.0-x86_64.pdf
Setup the networking
nmcli device status nmcli device show enp129s0f0 # UI way nmtui # CLI way # set to static (from dhcp) nmcli con mod enp129s0f0 ipv4.addresses 10.141.0.1/16 nmcli con mod enp129s0f0 ipv4.gateway 10.141.0.254 nmcli con mod enp129s0f0 ipv4.dns "8.8.8.8" nmcli con mod enp129s0f0 ipv4.method manual nmcli con up enp129s0f0
OpenHPC vars.sh
[root@openhpc3-headnode scratch]# cat openhpc-vars.sh
# vars needed
# ${sms name}
# ${sms ip}
# ${sms eth internal}
# ${eth provision}
# ${internal netmask}
# ${ntp server}
# ${bmc username}
# ${bmc password}
# ${num computes}
# ${c ip[0]}, ${c ip[1]}, ...
# ${c bmc[0]}, ${c bmc[1]}, ...
# ${c mac[0]}, ${c mac[1]}, ...
# ${c name[0]}, ${c name[1]}, ...
# ${compute regex}
# ${compute prefix}
sms_name="rocky-head"
sms_ip="10.20.30.240"
sms_eth_internal="eth1"
eth_provision="eth1"
internal_netmask="255.255.255.0"
ntp_server=0.centos.pool.ntp.org
compute_regex="compute*"
compute_prefix="compute"
Setup hosts file
# source openhpc-vars.sh
[root@openhpc3-headnode scratch]# echo ${sms_ip} ${sms_name}
10.20.30.240 rocky-head
[root@openhpc3-headnode scratch]# echo ${sms_ip} ${sms_name} >> /etc/hosts
[root@openhpc3-headnode scratch]# cat /etc/hosts
127.0.0.1 localhost localhost.localdomain localhost4 localhost4.localdomain4
::1 localhost localhost.localdomain localhost6 localhost6.localdomain6
10.20.30.240 rocky-head
Disable services and selinux
sed -i 's/SELINUX=enforcing/SELINUX=disabled/g' /etc/sysconfig/selinux systemctl disable firewalld systemctl stop firewalld
Install OpenHPC Components
dnf -y install http://repos.openhpc.community/OpenHPC/3/EL_9/x86_64/ohpc-release-3-1.el9.x86_64.rpm dnf -y install dnf-plugins-core dnf -y config-manager --set-enabled crb dnf -y groupinstall 'Development Tools'
Optional: HPC Docs and Scripts
dnf -y install docs-ohpc # then the file will be /opt/ohpc/pub/doc/recipes/rocky9/x86_64/warewulf/slurm/recipe.sh
however I like to do by hand, so lets crack on!
Add provisioning services on headnode
dnf -y install ohpc-base dnf -y install ohpc-warewulf
Chrony / time services
systemctl enable chronyd.service
echo "local stratum 10" >> /etc/chrony.conf
echo "server ${ntp_server}" >> /etc/chrony.conf
echo "allow all" >> /etc/chrony.conf
systemctl restart chronyd
Slurm Resource Manager
dnf -y install ohpc-slurm-server
cp /etc/slurm/slurm.conf.ohpc /etc/slurm/slurm.conf
cp /etc/slurm/cgroup.conf.example /etc/slurm/cgroup.conf
perl -pi -e "s/SlurmctldHost=\S+/gre=${sms_name}/" /etc/slurm/slurm.conf
Get Werewulf setup
perl -pi -e "s/device = eth1/device = ${sms_eth_internal}/" /etc/warewulf/provision.conf
# not needed ip link set dev ${sms_eth_internal} up
# not needed ip address add ${sms_ip}/${internal_netmask} broadcast + dev ${sms_eth_internal}
systemctl enable httpd.service
systemctl restart httpd
systemctl enable dhcpd.service
systemctl enable tftp.socket
systemctl start tftp.socket
Compute node configuration
# Define chroot location
export CHROOT=/opt/ohpc/admin/images/rocky9.2
# Build initial chroot image
wwmkchroot -v rocky-9 $CHROOT
# failed - need some perl extras
dnf -y install perl-utils
mkdir -p /usr/local/lib64/perl5/5.32
cd /usr/include;
h2ph * */*
# ok lets try this again
wwmkchroot -v rocky-9 $CHROOT
mount -o bind /sys $CHROOT/sys
mount -o bind /proc $CHROOT/pro
# Enable OpenHPC and EPEL repos inside chroot
dnf -y --installroot $CHROOT install epel-release
# hmm something odd happened , needed to cp /etc/resolve.conf $CHROOT/etc ; chroot $CHROOT; rpm -e epel-release ; rpm -ivh https://dl.fedoraproject.org/pub/epel/epel-release-latest-9.noarch.rpm
cp -p /etc/yum.repos.d/OpenHPC*.repo $CHROOT/etc/yum.repos.d
dnf -y --installroot=$CHROOT install ohpc-base-compute
# some files / creds
cp -p /etc/resolv.conf $CHROOT/etc/resolv.conf
cp /etc/passwd /etc/group $CHROOT/etc
dnf -y --installroot=$CHROOT install ohpc-slurm-client
chroot $CHROOT systemctl enable munge
chroot $CHROOT systemctl enable slurmd
echo SLURMD_OPTIONS="--conf-server ${sms_ip}" > $CHROOT/etc/sysconfig/slurmd
dnf -y --installroot=$CHROOT install chrony
echo "server ${sms_ip} iburst" >> $CHROOT/etc/chrony.conf
dnf -y --installroot=$CHROOT install kernel-`uname -r`
# failed; yum -y --installroot=$CHROOT install kernel-`uname -r`
# if you run yum install kernel on headnode, then the versions will be the same (probably avoided if you start the whole process with yum update)
#yum -y --installroot=$CHROOT install kernel
dnf -y --installroot=$CHROOT install lmod-ohpc
Cutomise the system configuration
wwinit database
wwinit ssh_keys
echo "${sms_ip}:/home /home nfs nfsvers=4,nodev,nosuid 0 0" >> $CHROOT/etc/fstab
echo "${sms_ip}:/opt/ohpc/pub /opt/ohpc/pub nfs nfsvers=4,nodev 0 0" >> $CHROOT/etc/fstab
echo "/home *(rw,no_subtree_check,fsid=10,no_root_squash)" >> /etc/exports
echo "/opt/ohpc/pub *(ro,no_subtree_check,fsid=11)" >> /etc/exports
exportfs -a
systemctl restart nfs-server
systemctl enable nfs-server
# Update memlock settings on master
perl -pi -e 's/# End of file/\* soft memlock unlimited\n$&/s' /etc/security/limits.conf
perl -pi -e 's/# End of file/\* hard memlock unlimited\n$&/s' /etc/security/limits.conf
# Update memlock settings within compute image
perl -pi -e 's/# End of file/\* soft memlock unlimited\n$&/s' $CHROOT/etc/security/limits.conf
perl -pi -e 's/# End of file/\* hard memlock unlimited\n$&/s' $CHROOT/etc/security/limits.conf
# no access to compute nodes if job not running
echo "account required pam_slurm.so" >> $CHROOT/etc/pam.d/sshd
Enable IB Infiniband
dnf -y groupinstall "InfiniBand Support" udevadm trigger --type=devices --action=add systemctl restart rdma-load-modules@infiniband.service # and for clients dnf -y --installroot=$CHROOT groupinstall "InfiniBand Support"
Setup Rsyslog
# Configure SMS to receive messages and reload rsyslog configuration
echo 'module(load="imudp")' >> /etc/rsyslog.d/ohpc.conf
echo 'input(type="imudp" port="514")' >> /etc/rsyslog.d/ohpc.conf
systemctl restart rsyslog
# Define compute node forwarding destination
echo "*.* @${sms_ip}:514" >> $CHROOT/etc/rsyslog.conf
echo "Target=\"${sms_ip}\" Protocol=\"udp\"" >> $CHROOT/etc/rsyslog.conf
# Disable most local logging on computes. Emergency and boot logs will remain on the compute nodes
perl -pi -e "s/^\*\.info/\\#\*\.info/" $CHROOT/etc/rsyslog.conf
perl -pi -e "s/^authpriv/\\#authpriv/" $CHROOT/etc/rsyslog.conf
perl -pi -e "s/^mail/\\#mail/" $CHROOT/etc/rsyslog.conf
perl -pi -e "s/^cron/\\#cron/" $CHROOT/etc/rsyslog.conf
perl -pi -e "s/^uucp/\\#uucp/" $CHROOT/etc/rsyslog.conf
Add Nagios (FAILED Revisit)
# Install nagios, nrep, and all available plugins on master host
dnf -y install --skip-broken nagios nrpe nagios-plugins-*
# Install nrpe and an example plugin into compute node image
dnf -y --installroot=$CHROOT install nrpe nagios-plugins-ssh
# Enable and configure Nagios NRPE daemon in compute image
chroot $CHROOT systemctl enable nrpe
perl -pi -e "s/^allowed_hosts=/# allowed_hosts=/" $CHROOT/etc/nagios/nrpe.cfg
echo "nrpe : ${sms_ip} : ALLOW" >> $CHROOT/etc/hosts.allow
echo "nrpe : ALL : DENY" >> $CHROOT/etc/hosts.allow
# Copy example Nagios config file to define a compute group and ssh check
# (note: edit as desired to add all desired compute hosts)
cp /opt/ohpc/pub/examples/nagios/compute.cfg /etc/nagios/objects
# failed no such file or directory in the examples... come back to this another time
Add ClusterShell
# Install ClusterShell
dnf -y install clustershell
# Setup node deficanitions
cd /etc/clustershell/groups.d
mv local.cfg local.cfg.orig
echo "adm: ${sms_name}" > local.cfg
# note set num_computes above
echo "compute: ${compute_prefix}[1-${num_computes}]" >> local.cfg
echo "all: @adm,@compute" >> local.cfg
Add Genders
# Install genders
# assumes we have ipmi setup - I don't!
dnf -y install genders-ohpc
# Generate a sample genders file
echo -e "${sms_name}\tsms" > /etc/genders
for ((i=0; i<$num_computes; i++)) ; do
echo -e "${c_name[$i]}\tcompute,bmc=${c_bmc[$i]}"
done >> /etc/genders
Here's what I hacked without bmc
[root@openhpc3-headnode ~]# cat /etc/genders rocky-head sms compute-1 compute compute-2 compute # or with ipmi head-rocky9 sms gpunode01 compute,bmc=gpunode01-ipmi gpunode02 compute,bmc=gpunode02-ipmi
Add Magpie (skipped)
Add Conman (skipped)
# Install conman to provide a front-end to compute consoles and log output
dnf -y install conman-ohpc
# Configure conman for computes (note your IPMI password is required for console access)
for ((i=0; i<$num_computes; i++)) ; do
echo -n 'CONSOLE name="'${c_name[$i]}'" dev="ipmi:'${c_bmc[$i]}'" '
echo 'ipmiopts="'U:${bmc_username},P:${IPMI_PASSWORD:-undefined},W:solpayloadsize'"'
done >> /etc/conman.conf
# or here's the end of a version populated (oghl)
<snip>
CONSOLE name="gpunode01" dev="ipmi:10.141.128.1" ipmiopts="U:ADMIN,P:ADMIN,W:solpayloadsize"
CONSOLE name="gpunode02" dev="ipmi:10.141.128.2" ipmiopts="U:ADMIN,P:ADMIN,W:solpayloadsize"
CONSOLE name="gpunode03" dev="ipmi:10.141.128.3" ipmiopts="U:ADMIN,P:ADMIN,W:solpayloadsize"
CONSOLE name="gpunode04" dev="ipmi:10.141.128.4" ipmiopts="U:ADMIN,P:ADMIN,W:solpayloadsize"
CONSOLE name="gpunode05" dev="ipmi:10.141.128.5" ipmiopts="U:ADMIN,P:ADMIN,W:solpayloadsize"
CONSOLE name="gpunode06" dev="ipmi:10.141.128.6" ipmiopts="U:ADMIN,P:ADMIN,W:solpayloadsize"
CONSOLE name="gpunode07" dev="ipmi:10.141.128.7" ipmiopts="U:ADMIN,P:ADMIN,W:solpayloadsize"
CONSOLE name="gpunode08" dev="ipmi:10.141.128.8" ipmiopts="U:ADMIN,P:ADMIN,W:solpayloadsize"
CONSOLE name="gpunode09" dev="ipmi:10.141.128.9" ipmiopts="U:ADMIN,P:ADMIN,W:solpayloadsize"
CONSOLE name="gpunode10" dev="ipmi:10.141.128.10" ipmiopts="U:ADMIN,P:ADMIN,W:solpayloadsize"
CONSOLE name="gpunode11" dev="ipmi:10.141.128.11" ipmiopts="U:ADMIN,P:ADMIN,W:solpayloadsize"
CONSOLE name="gpunode12" dev="ipmi:10.141.128.12" ipmiopts="U:ADMIN,P:ADMIN,W:solpayloadsize"
CONSOLE name="gpunode13" dev="ipmi:10.141.128.13" ipmiopts="U:ADMIN,P:ADMIN,W:solpayloadsize"
CONSOLE name="gpunode14" dev="ipmi:10.141.128.14" ipmiopts="U:ADMIN,P:ADMIN,W:solpayloadsize"
CONSOLE name="gpunode15" dev="ipmi:10.141.128.15" ipmiopts="U:ADMIN,P:ADMIN,W:solpayloadsize"
CONSOLE name="gpunode16" dev="ipmi:10.141.128.16" ipmiopts="U:ADMIN,P:ADMIN,W:solpayloadsize"
CONSOLE name="rocky-head" dev="ipmi:10.141.255.254" ipmiopts="U:ADMIN,P:ADMIN,W:solpayloadsize"
</snip>
# Enable and start conman
systemctl enable conman
systemctl start conman
Add Node Health Check
# Install NHC on master and compute nodes dnf -y install nhc-ohpc dnf -y --installroot=$CHROOT install nhc-ohpc # Register as SLURM's health check program echo "HealthCheckProgram=/usr/sbin/nhc" >> /etc/slurm/slurm.conf echo "HealthCheckInterval=600" >> /etc/slurm/slurm.conf # execute every 10 minutes minutes
Add GEOPM (skipped)
Check back on this power framework
Import files ot Warewulf
wwsh -y file import /etc/passwd wwsh -y file import /etc/group wwsh -y file import /etc/shadow wwsh -y file import /etc/munge/munge.key
Perl Fucked
At this stage perl was fucked and didn't have any system header libraries was complaining about sys/ioctl.ph being missing. Here's what I did to fix
dnf -y install perl-utils dnf -y install glibc-devel mkdir -p /usr/local/lib64/perl5/5.32 cd /usr/include; h2ph * sys/* bits/* wwsh file import /etc/passwd export CHROOT=/opt/ohpc/admin/images/rocky9.2 wwmkchroot -v rocky-9 $CHROOT h2ph bits/* cd /usr/include; h2ph -r -l . wwmkchroot -v rocky-9 $CHROOT # now it completes without error
Finalise Compute configuration
# Build bootstrap image
wwbootstrap `uname -r`
# Assemble Virtual Node File System (VNFS) image
# if $chroot/proc and sys are mounted then undo that
umount --force /opt/ohpc/admin/images/rocky9.2/sys
umount --force -l /opt/ohpc/admin/images/rocky9.2/proc
wwvnfs --chroot $CHROOT
# note changed eth_provision to eth0, as its the compute node - clearly mucked up the headnode configuration based on the docs
echo "GATEWAYDEV=${eth_provision}" > /tmp/network.$$
wwsh -y file import /tmp/network.$$ --name network
wwsh -y file set network --path /etc/sysconfig/network --mode=0644 --uid=0
Setup pxeboot image in openstack
# back to openstack node with admin rc sourced. curl --output /tmp/pxeboot.img --location https://linux.web.cern.ch/centos7/docs/pxeboot.img openstack image create pxeboot --property os=LINUX --file /tmp/pxeboot.img --public openstack image set --property hw_vif_model=e1000 pxeboot # then boot 2 nodes, disable port security, grab their IP and MAC addresses
Then setup these nodes in warewulf
# Add nodes to Warewulf data store wwsh -y node new compute-1 --ipaddr=10.20.30.130 --hwaddr=fa:16:3e:32:82:57 -D eth0 wwsh -y node new compute-2 --ipaddr=10.20.30.158 --hwaddr=fa:16:3e:e4:a0:2a -D eth0 wwsh node list NAME GROUPS IPADDR HWADDR ================================================================================ compute-1 UNDEF 10.20.30.130 fa:16:3e:32:82:57 compute-2 UNDEF 10.20.30.158 fa:16:3e:e4:a0:2a # Define provisioning image for hosts wwsh -y provision set compute* --vnfs=rocky9.2 --bootstrap=`uname -r` --files=dynamic_hosts,passwd,group,shadow,munge.key,network wwsh provision list NODE VNFS BOOTSTRAP FILES ================================================================================ compute-1 rocky9.2 5.14.0-362.13.1.el... dynamic_hosts,grou... compute-2 rocky9.2 5.14.0-362.13.1.el... dynamic_hosts,grou... # OGHL wwsh -y node new gpunode02 --ipaddr=10.141.0.2 --netmask=255.255.0.0 --hwaddr=00:25:90:96:24:c6 -D enp129s0f0 wwsh -y provision set gpunode02 --kargs "console=ttyS1,115200" --vnfs=rocky9.2 --bootstrap=`uname -r` --files=dynamic_hosts,passwd,group,shadow,munge.key,network # Restart dhcp / update PXE systemctl restart dhcpd wwsh pxe update
No virtio_net driver in the bootstrap images - lets add
# on the openstack env openstack image set --property hw_vif_model=e1000 pxeboot # the damn VMs run eth0 from the virtio_net driver - lets add to the bootstrap echo "drivers += virtio_net" >> /etc/warewulf/bootstrap.conf wwbootstrap `uname -r` # ^ that didn't work # reboot VM
Final Config SLurm (after adding nodes)
# Start munge and slurm controller on master host
systemctl enable munge
systemctl enable slurmctld
systemctl start munge
systemctl start slurmctld
# Start slurm clients on compute hosts
pdsh -w $compute_prefix[1-${num_computes}] systemctl start munge
pdsh -w $compute_prefix[1-${num_computes}] systemctl start slurmd
# Generate NHC configuration file based on compute node environment
pdsh -w c1 "/usr/sbin/nhc-genconf -H '*' -c -" | dshbak -c
Setup the SlurmDBD service for accounting and OOD (note did this while installing OOD so hopefully everything is in place, otherwise come back to this at the end)
# cat /etc/slurm/slurmdbd.conf # Authentication info AuthType=auth/munge #AuthInfo=/var/run/munge/munge.socket.2 # # slurmDBD info DbdHost=head-rocky9 DbdAddr=head-rocky9 DbdPort=6819 SlurmUser=slurm #MessageTimeout=300 DebugLevel=4 #DefaultQOS=normal,standby # NOTE: By default, slurmdbd will log to syslog #LogFile=/var/log/slurm/slurmdbd.log PidFile=/var/run/slurmdbd.pid #PluginDir=/usr/lib/slurm #PrivateData=accounts,users,usage,jobs #TrackWCKey=yes # # Database info StorageType=accounting_storage/mysql #StorageHost=head-rocky9 StorageUser=slurm StoragePass=slurmp455dbd StorageLoc=slurm_acct_db # chmod 600 /etc/slurm/slurmdbd.conf # chown slurm.slurm /etc/slurm/slurmdbd.conf # add to the slurm.conf file AccountingStorageType=accounting_storage/slurmdbd AccountingStorageHost=head-rocky9 JobAcctGatherType=jobacct_gather/linux JobAcctGatherFrequency=30
Setup the DB and SLURM user for the DBD service
mysql -u root -e "create user 'slurm'@'localhost' identified by 'slurmp455dbd'; grant all on slurm_acct_db.* to 'slurm'@'localhost'; create database slurm_acct_db;" systemctl restart slurmctld slurmdbd
Check its working ok
[root@head-rocky9 ~]# sacctmgr list cluster Cluster ControlHost ControlPort RPC Share GrpJobs GrpTRES GrpSubmit MaxJobs MaxTRES MaxSubmit MaxWall QOS Def QOS ---------- --------------- ------------ ----- --------- ------- ------------- --------- ------- ------------- --------- ----------- -------------------- --------- cluster 10.141.0.1 6817 9728 1 normal
Installing Extra OpenHPC Software MPI / Libs
dnf -y install ohpc-autotools dnf -y install EasyBuild-ohpc dnf -y install hwloc-ohpc dnf -y install spack-ohpc dnf -y install valgrind-ohpc dnf -y install gnu12-compilers-ohpc dnf -y install openmpi4-pmix-gnu12-ohpc mpich-ofi-gnu12-ohpc dnf -y install mpich-ucx-gnu12-ohpc dnf -y install mvapich2-gnu12-ohpc dnf -y install ohpc-gnu12-perf-tools dnf -y install lmod-defaults-gnu12-openmpi4-ohpc dnf -y install ohpc-gnu12-serial-libs dnf -y install ohpc-gnu12-io-libs dnf -y install ohpc-gnu12-python-libs dnf -y install ohpc-gnu12-runtimes
Add users
adduser rsupport
wish file sync
pdsh -w $compute_prefix[1-${num_computes}] /warewulf/bin/wwgetfiles
Add OpenOnDemand
# on headnode yum install https://yum.osc.edu/ondemand/3.0/ondemand-release-web-3.0-1.noarch.rpm dnf install ondemand systemctl restart httpd
Setup basic auth using PAM (not recommended - should use dex/ldap)
dnf -y install mod_authnz_pam echo "LoadModule authnz_pam_module modules/mod_authnz_pam.so" > /etc/httpd/conf.modules.d/55-authnz_pam.conf cp /etc/pam.d/sshd /etc/pam.d/ood chmod 640 /etc/shadow chgrp apache /etc/shadow # Update /etc/ood/config/ood_portal.yml to use PAM authentication auth: - 'AuthType Basic' - 'AuthName "Open OnDemand"' - 'AuthBasicProvider PAM' - 'AuthPAMService ood' - 'Require valid-user' # Capture system user name from authenticated user name user_map_cmd: "/opt/ood/ood_auth_map/bin/ood_auth_map.regex" # Apply modifications to the /etc/ood/config/ood_portal.yml /opt/ood/ood-portal-generator/sbin/update_ood_portal # needed to grab this file - doesn't seem to exist anymore curl https://raw.githubusercontent.com/OSC/ood_auth_map/master/bin/ood_auth_map.regex > /opt/ood/ood_auth_map/bin/ood_auth_map.regex chmod +x /opt/ood/ood_auth_map/bin/ood_auth_map.regex # quick test [root@head-rocky9 log]# /opt/ood/ood_auth_map/bin/ood_auth_map.regex definetech definetech definetech [root@head-rocky9 log]# /opt/ood/ood_auth_map/bin/ood_auth_map.regex definetech definetech systemctl restart httpd
OOD Needs SSL to work - set that up
TBC 935 cd /etc/pki/tls/ 936 ls 937 mkdir ood 938 cd oof 939 cd ood/ 940 openssl genrsa -des3 -out ood-private.key 2048 941 echo "oodpass" > pphrase.txt 942 ll 943 cat ood-private.key 944 penssl req -key ood-private.key -new -out ood-server.csr 945 openssl req -key ood-private.key -new -out ood-server.csr 946 ll 947 openssl x509 -signkey ood-private.key -in ood-server.csr -req -days 3650 -out ood-server.crt 948 openssl x509 -text -noout -in ood-server.crt 949 ll 950 vi /etc/ood/config/ood_portal.yml
Setup the Desktop env in ODD
dnf -y --installroot=$CHROOT install nmap-ncat
dnf -y --installroot=$CHROOT install python3-websockify
# turbo vnc needs its own repo
curl -o $CHROOT/etc/yum.repos.d/TurboVNC.repo https://raw.githubusercontent.com/TurboVNC/repo/main/TurboVNC.repo
dnf -y --installroot=$CHROOT install turbovnc
dnf -y --installroot=$CHROOT group install xfce
# rebuild our vnfs
wwvnfs --chroot ${CHROOT}
Configuration files for Desktop app
# [root@head-rocky9 config]# cat clusters.d/cluster.yml
---
v2:
metadata:
title: "oghl cluster"
login:
host: "head-rocky9.cluster.internal"
job:
adapter: "slurm"
cluster: "cluster"
bin: "/usr/bin"
conf: "/etc/slurm/slurm.conf"
# bin_overrides:
# sbatch: "/usr/local/bin/sbatch"
# squeue: ""
# scontrol: ""
# scancel: ""
copy_enviornment: false
batch_connect:
basic:
script_wrapper: |
module purge
%s
set_host: "host=$(hostname -s)"
vnc:
script_wrapper: |
module purge
export PATH="/opt/TurboVNC/bin:$PATH"
export WEBSOCKIFY_CMD="/usr/bin/websockify"
# Workaround to avoid \"Unable to contact settings server\" when
# lauching xfce4-session\n
#/bin/dbus-launch /bin/xfce4-session $@
#export -f xfce4-session
%s
set_host: "host=$(hostname -s)"
# [root@head-rocky9 config]# cat apps/bc_desktop/cluster.yml
attributes:
bc_queue:
value: normal
desktop: xfce
node:
help: Select a particular node or leave empty to let Slurm pick the next available
label: Node name
value: ''
num_cores:
label: Number of cores
value: 1
cluster: cluster
description: Request a desktop to run GUI applications.
form:
- desktop
- bc_queue
- bc_num_hours
- num_cores
- node
submit: submit/submit.yml.erb
title: Remote Desktop
[root@head-rocky9 config]# cat apps/bc_desktop/submit/submit.yml.erb
---
script:
job_name: "ood-desktop"
native:
- <%= "--nodes=1" %>
- <%= "--ntasks=#{num_cores}" %>
- <%= "--nodelist=#{node}" %>
[root@head-rocky9 config]# grep -v "^#" ood_portal.yml --- servername: head-rocky9.cluster.internal ssl: - 'SSLCertificateFile "/etc/pki/tls/ood/ood-server.crt"' - 'SSLCertificateKeyFile "/etc/pki/tls/ood/ood-private.key"' auth: - 'AuthType Basic' - 'AuthName "Open OnDemand"' - 'AuthBasicProvider PAM' - 'AuthPAMService ood' - 'Require valid-user' user_map_cmd: "/opt/ood/ood_auth_map/bin/ood_auth_map.regex" root_uri: '/pun/sys/dashboard' host_regex: '.+' node_uri: '/node' rnode_uri: '/rnode'
Note; wearwulf doesn't work as OOD automatically puts a redirect in for 80 -> 443 - need to undo that.
# TBC work out how to fix this properly
[root@head-rocky9 conf.d]# diff ood-portal.conf ood-portal.conf.new
46,51c46,51
< #<VirtualHost *:80>
< # ServerName head-rocky9.cluster.internal
< #
< # RewriteEngine On
< # RewriteRule ^(.*) https://%{HTTP_HOST}:443$1 [R=301,NE,L]
< #</VirtualHost>
---
> <VirtualHost *:80>
> ServerName head-rocky9.cluster.internal
>
> RewriteEngine On
> RewriteRule ^(.*) https://%{HTTP_HOST}:443$1 [R=301,NE,L]
> </VirtualHost>
vi /etc/httpd/conf.d/ood-portal.conf.new
# comment out the :80 virtual host section
# TLS passphrase: oodpass
systemctl restart httpd
Add Jupyter App
Setup the following files
mkdir /etc/ood/config/apps/jupyter
mkdir /etc/ood/config/apps/jupyter/submit
cd /etc/ood/config/apps/jupyter
# [root@head-rocky9 jupyter]# cat cluster.yml
attributes:
bc_queue:
value: normal
extra_jupyter_args: ''
modules: ''
node: ''
num_cores:
label: Number of cores
value: 1
cluster: slurm
description: Request a Jupyter Notebook server
form:
- modules
- extra_jupyter_args
- bc_queue
- bc_num_hours
- num_cores
- node
submit: submit/submit.yml.erb
title: Jupyter Notebook
# [root@head-rocky9 jupyter]# cat submit/submit.yml.erb
---
batch_connect:
template: "basic"
script:
job_name: "ood-jupyter"
native:
- <%= "--nodes=1" %>
- <%= "--ntasks=#{num_cores}" %>
- <%= "--nodelist=#{node}" %>
Setup Jupyter in the image
dnf -y --installroot=$CHROOT install python3-pip chroot $CHROOT pip install jupyter exit wwvnfs --chroot $CHROOT
Install the Jupyter app
cd /var/www/ood/apps/sys/jupyter/ git clone https://github.com/OSC/bc_example_jupyter.git /opt/ood/ood-portal-generator/sbin/update_ood_portal systemctl restart httpd