VScaler: CIX Controller Node OHPC Installation

From Define Wiki
Jump to navigation Jump to search

Update installation and install packages

  • 1_ohpc-head-update.sh
#!/bin/bash

## Check to see if you are root

if [ "$USER" != "root" ]
then
        echo "This and the following scripts must be ran as root!"
	exit
fi

## Double check you want to continue

echo "You are about to update the system and reboot."
echo "Are you sure you want to continue? [y/N]"

read input
if [ "$input" != "y" ]
then
        exit
fi

## Install packages, update and reboot
yum -y install kernel* tk* tcl* tigervnc* ipmitool* freeipmi* cairo* perl* gcc* glibc* screen epel-release vim ntp libnl lsof libxml2-python python mlocate numactl* yum-utils htop xinetd
yum -y groupinstall "Development Tools" "Base"
yum -y update

echo "1" >> /tmp/ohpc-step-1

reboot

OHPC Installation

  • 2_ohpc-head-install.sh
  • Be sure to correctly define the following variables:
    • HOSTNAME
    • SLURM_CONTROLLER
    • IF1
    • IF2
    • INTERNAL_IP
    • EXTERNAL_IP
#!/bin/bash

## Check to see if you are root

if [ "$USER" != "root" ]
then
        echo "This and the following scripts must be ran as root!"
        exit
fi

## Check if the previous step is completed

check_prev=$(cat /tmp/ohpc-step-1 2> /dev/null)

if [ "$check_prev" != "1" ]
then
        echo "You did not run the first script! Exiting..."
        exit
fi

echo "After the end of this script, do you want to run the 3rd one as well? [y/N]"

run_3rd="1"
read run3
if [ "$run3" != "y" ]
then
        run_3rd="0"
fi

## Set the required variables

FILE_DIR=/root/ohpc-install
HOSTNAME="controller01.cix.vscaler.cloud"
SLURM_CONTROLLER="controller01"
IF1=enp6s0f0
IF2=enp6s0f1
PROVISION_IF=$IF1
EXTERNAL_IF=$IF2
CHROOT="/opt/ohpc/admin/images/centos7.2"
OHPC_REPO="http://build.openhpc.community/OpenHPC:/1.1/CentOS_7.2/OpenHPC:1.1.repo"
INTERNAL_IP="172.18.0.1"
INTERNAL_NM="255.255.0.0"
NTP="0.centos.pool.ntp.org"

## Make sure the admin wants to continue

echo "*************************************************************************************"
echo "You are about to install the OHPC head-node packages and configure it appropriately, with the following parameters."
echo "MAKE SURE YOU HAVE CHANGED THE INTERFASES AND SUBNETS IN THE SCRIPT ACCORDINGLY!"
echo "You are about to use:"
echo "   Internal/Provision Interface: $PROVISION_IF"
echo "   External Interface:           $EXTERNAL_IF"
echo "   Internal Subnet:              $INTERNAL_IP / $INTERNAL_NM"
echo "   Head Hostname:                $HOSTNAME"
echo "*************************************************************************************"
echo "Are you sure you want to continue? [y/N]"

read input
if [ "$input" != "y" ]
then
        exit
fi


## Stop some services

systemctl stop NetworkManager 
systemctl disable NetworkManager 
systemctl stop firewalld 
systemctl disable firewalld 

## Set the hostname of the head

hostnamectl set-hostname $HOSTNAME

## Configure the provision interface

cp $FILE_DIR/ifcfg-ohpc-provision /etc/sysconfig/network-scripts/ifcfg-$PROVISION_IF
perl -pi -e "s/NAME=eno1/NAME=${PROVISION_IF}/" /etc/sysconfig/network-scripts/ifcfg-$PROVISION_IF
perl -pi -e "s/DEVICE=eno1/DEVICE=${PROVISION_IF}/" /etc/sysconfig/network-scripts/ifcfg-$PROVISION_IF
echo "IPADDR=$INTERNAL_IP" >> /etc/sysconfig/network-scripts/ifcfg-$PROVISION_IF
echo "NETMASK=$INTERNAL_NM" >> /etc/sysconfig/network-scripts/ifcfg-$PROVISION_IF
systemctl restart network

## Add some aliases in bashrc and source it

echo >> /root/.bashrc
echo "CHROOT=$CHROOT" >> /root/.bashrc
echo "ohpc_repo=$OHPC_REPO" >> /root/.bashrc
echo "sms_name=$HOSTNAME" >> /root/.bashrc
echo "sms_ip=$INTERNAL_IP" >> /root/.bashrc
echo "sms_eth_internal=$PROVISION_IF" >> /root/.bashrc
echo "eth_provision=$PROVISION_IF" >> /root/.bashrc
echo "internal_netmask=$PROVISION_NM" >> /root/.bashrc
echo "ntp_server=$NTP" >> /root/.bashrc

source /root/.bashrc

## Add the OHPC repo in yum

wget -P /etc/yum.repos.d ${OHPC_REPO}
yum clean all

## Add ntp

systemctl stop ntpd
ntpdate $NTP
systemctl start ntpd
systemctl enable ntpd

## Install OHPC components

yum -y groupinstall ohpc-base ohpc-warewulf
yum -y groupinstall ohpc-slurm-server 
useradd slurm

## Modify config files

#mkdir /tmp/setup-filesystems
#cd /tmp/setup-filesystems
#cat /srv/warewulf/initramfs/capabilities/setup-filesystems | cpio -i
#rm -rf warewulf/provision/80-mkbootable
#cp $FILE_DIR/80-mkbootable warewulf/provision/
#find warewulf | cpio -o -c > /srv/warewulf/initramfs/capabilities/setup-filesystems
#cd

sed -i '29 s/^/#/' /etc/warewulf/vnfs.conf
sed -i '53,54 s/^/#/' /etc/warewulf/vnfs.conf
sed -i '13,17 s/^/#/' /etc/warewulf/bootstrap.conf
sed -i '31 s/^#//' /etc/warewulf/bootstrap.conf

perl -pi -e "s/device = eth1/device = ${PROVISION_IF}/" /etc/warewulf/provision.conf
perl -pi -e "s/^\s+disable\s+= yes/ disable = no/" /etc/xinetd.d/tftp
MODFILE=/etc/httpd/conf.d/warewulf-httpd.conf
perl -pi -e "s/cgi-bin>\$/cgi-bin>\n Require all granted/" $MODFILE
perl -pi -e "s/Allow from all/Require all granted/" $MODFILE
perl -ni -e "print unless /^\s+Order allow,deny/" $MODFILE
perl -pi -e "s/ControlMachine=\S+/ControlMachine=$SLURM_CONTROLLER/" /etc/slurm/slurm.conf

## Restart and enable some services

systemctl restart xinetd
systemctl enable xinetd
systemctl restart mariadb
systemctl enable mariadb.service
systemctl restart httpd.service
systemctl enable httpd.service
systemctl restart rpcbind.service
systemctl enable rpcbind.service
systemctl restart nfs-server.service
systemctl enable nfs-server.service 

echo "1" >> /tmp/ohpc-step-2

if [ "$run_3rd" == "1" ]
then
	./3_ohpc-head-exports-vnfs.sh
fi

Exports and VNFS Image

  • 3_ohpc-head-exports-vnfs.sh
#!/bin/bash

## Check to see if you are root

if [ "$USER" != "root" ]
then
        echo "This and the following scripts must be ran as root!"
        exit
fi

check_prev=$(cat /tmp/ohpc-step-2 2> /dev/null)

if [ "$check_prev" != "1" ]
then
        echo "You did not run the second script! Exiting..."
        exit
fi

echo "You are about to create the CentOS 7.2 image for the compute nodes and configure it appropriately."
echo "Are you sure you want to continue? [y/N]"

read input
if [ "$input" != "y" ]
then
        exit
fi

source /root/.bashrc

## Make the initial VNFS

wwmkchroot centos-7 $CHROOT
yum -y --installroot=$CHROOT groupinstall Base
yum -y --installroot=$CHROOT install kernel* grub* sudo ipmitool* epel-release htop nano tk* tcl* tigervnc* ipmitool* freeipmi* cairo* perl* gcc* glibc* screen yum-utils vim ntp libnl lsof libxml2-python python mlocate numactl* lmod-ohpc ohpc-slurm-client lmod-ohpc ganglia-gmond-ohpc enviroment-modules hwlock-libs libfabric libpsm2 intel-clck-ohpc

## Setup ssh keys

wwinit ssh_keys

## Setup NFS shares

echo "${sms_ip}:/home /home nfs rsize=1024,wsize=1024,cto 0 0" >> $CHROOT/etc/fstab
echo "${sms_ip}:/opt/ohpc/pub /opt/ohpc/pub nfs rsize=1024,wsize=1024,cto 0 0" >> $CHROOT/etc/fstab
 
echo "/home *(rw,no_subtree_check,fsid=10,no_root_squash)" >> /etc/exports
echo "/opt/ohpc/pub *(ro,no_subtree_check,fsid=11)" >> /etc/exports
 
exportfs -a
systemctl restart rpcbind
systemctl restart nfs-server

cp /etc/resolv.conf $CHROOT/etc/
sed -i 's/nameserver.*/nameserver 172.18.0.1/' $CHROOT/etc/resolv.conf
echo "nameserver 8.8.8.8" >> $CHROOT/etc/resolv.conf

## Modily limits

echo "* soft memlock unlimited" >> /etc/security/limits.conf
echo "* hard memlock unlimited" >> /etc/security/limits.conf
echo "* soft memlock unlimited" >> $CHROOT/etc/security/limits.conf
echo "* hard memlock unlimited" >> $CHROOT/etc/security/limits.conf

## Fix slow ssh

sed -i '93,94 s/^/#/' /etc/ssh/sshd_config
sed -i 's/#UseDNS yes/UseDNS no/' /etc/ssh/sshd_config
sed -i '93,94 s/^/#/' $CHROOT/etc/ssh/sshd_config
sed -i 's/#UseDNS yes/UseDNS no/' $CHROOT/etc/ssh/sshd_config

## Import ww files

ww_files=$(wwsh file list | wc -l)
while [ $ww_files -lt 6 ]
do
	wwsh -y file import /etc/passwd
	wwsh -y file import /etc/shadow
	wwsh -y file import /etc/group
	wwsh -y file import /etc/slurm/slurm.conf
	wwsh -y file import /etc/munge/munge.key
	wwsh -y file import /etc/profile.d/motd.sh
	wwsh -y file import /opt/ohpc/pub/examples/network/centos/ifcfg-ib0.ww
	wwsh -y file set ifcfg-ib0.ww --path=/etc/sysconfig/network-scripts/ifcfg-ib0
	ww_files=$(wwsh file list | wc -l)
done

wwinit AUTH
wwinit IPXE

## Build boootstrap and vnfs

sed -i '1s/.*/#!\/usr\/bin\/perl -w/' /usr/bin/wwvnfs
sed -i '1s/.*/#!\/usr\/bin\/perl -w/' /usr/bin/wwbootstrap

kernel=$(uname -r)
wwbootstrap $kernel
wwvnfs -y --chroot $CHROOT

echo "1" >> /tmp/ohpc-step-3

Add nodes to OHPC

  • 4_ohpc-head-add_nodes.sh
  • Be sure to correctly define the following variables:
    • NETMASK
    • HEAD_IP
    • INTERFACE
    • BOOTSTRAP
    • ip_part
  • And ensure that the array variables (controller_macs), (networknode_macs), (storage_macs) and (compute_macs) contain the MAC addresses of the nodes to add
  • Also, ensure that various for loop definitions (e.g. for node in {01..04}) represent the correct number of MAC addresses for their respective array variables
#!/bin/bash

## Check to see if you are root

if [ "$USER" != "root" ]
then
        echo "This and the following scripts must be ran as root!"
        exit
fi

echo "You are about to add node to the cluster, but you need to change the MAC addresses and the subnets according to what you need."
echo "Are you sure you want to continue? [y/N]"

read input
if [ "$input" != "y" ]
then
        exit
fi

NETMASK=255.255.0.0
HEAD_IP=172.18.0.1
INTERFACE=enp6s0f0
BOOTSTRAP=`uname -r`
DOMAIN=cix.vscaler.cloud

echo "GATEWAYDEV=$INTERFACE" > /tmp/network.$$
wwsh -y file import /tmp/network.$$ --name network
wwsh -y file set network --path /etc/sysconfig/network --mode=0644 --uid=0

controller_macs=("00:30:48:7F:35:36")
networknode_macs=("00:30:48:7F:35:50" "00:30:48:C6:A8:54")
storage_macs=("00:30:48:7F:35:5E" "00:30:48:7F:35:64" "00:30:48:7F:35:4C" "00:30:48:7F:13:90")
compute_macs=("00:30:48:C5:E7:12" "00:30:48:C5:E7:18")

## Add extra controller nodes

ip_part=2
mac_i=0
for node in {02..02}
do
	hostname=controller$node
	eth_ip=172.18.0.$ip_part
	ib_ip=172.20.0.$ip_part

	wwsh -y node new $hostname --ipaddr=$eth_ip -M $NETMASK -G $HEAD_IP --domain=$DOMAIN --hwaddr=${controller_macs[$mac_i]} -D $INTERFACE
 
	wwsh -y provision set $hostname --files=dynamic_hosts,passwd,group,shadow,slurm.conf,munge.key,network,ifcfg-ib0.ww,motd.sh
	wwsh -y provision set $hostname --vnfs=centos7.2 --bootstrap="$BOOTSTRAP"

	wwsh -y object modify -s bootloader=sda $hostname
	wwsh -y object modify -s diskpartition=sda $hostname
	wwsh -y object modify -s diskformat=sda1,sda2,sda3 $hostname
	wwsh -y object modify -s filesystems="mountpoint=/boot:dev=sda1:type=ext4:size=500,dev=sda2:type=swap:size=8192,mountpoint=/:dev=sda3:type=ext4:size=2000000" $hostname

	wwsh -y node set --netdev=ib0 --ipaddr=$ib_ip --netmask=$NETMASK $hostname

	ip_part=$(( $ip_part + 1 ))
	mac_i=$(( $mac_i + 1 ))
done

## Add network nodes

ip_part=1
mac_i=0
for node in {01..02}
do
        hostname=networknode$node
        eth_ip=172.18.1.$ip_part
        ib_ip=172.20.1.$ip_part

        wwsh -y node new $hostname --ipaddr=$eth_ip -M $NETMASK -G $HEAD_IP --domain=$DOMAIN --hwaddr=${networknode_macs[$mac_i]} -D $INTERFACE
 
        wwsh -y provision set $hostname --files=dynamic_hosts,passwd,group,shadow,slurm.conf,munge.key,network,ifcfg-ib0.ww,motd.sh
        wwsh -y provision set $hostname --vnfs=centos7.2 --bootstrap="$BOOTSTRAP"

        wwsh -y object modify -s bootloader=sda $hostname
        wwsh -y object modify -s diskpartition=sda $hostname
        wwsh -y object modify -s diskformat=sda1,sda2,sda3 $hostname
        wwsh -y object modify -s filesystems="mountpoint=/boot:dev=sda1:type=ext4:size=500,dev=sda2:type=swap:size=8192,mountpoint=/:dev=sda3:type=ext4:size=2000000" $hostname

        wwsh -y node set --netdev=ib0 --ipaddr=$ib_ip --netmask=$NETMASK $hostname

        ip_part=$(( $ip_part + 1 ))
        mac_i=$(( $mac_i + 1 ))
done

## Add storage nodes

ip_part=1
mac_i=0
for node in {01..04}
do
        hostname=storage$node
        eth_ip=172.18.2.$ip_part
        ib_ip=172.20.2.$ip_part

        wwsh -y node new $hostname --ipaddr=$eth_ip -M $NETMASK -G $HEAD_IP --domain=$DOMAIN --hwaddr=${storage_macs[$mac_i]} -D $INTERFACE
 
        wwsh -y provision set $hostname --files=dynamic_hosts,passwd,group,shadow,slurm.conf,munge.key,network,ifcfg-ib0.ww,motd.sh
        wwsh -y provision set $hostname --vnfs=centos7.2 --bootstrap="$BOOTSTRAP"

        wwsh -y object modify -s bootloader=sda $hostname
        wwsh -y object modify -s diskpartition=sda $hostname
        wwsh -y object modify -s diskformat=sda1,sda2,sda3 $hostname
        wwsh -y object modify -s filesystems="mountpoint=/boot:dev=sda1:type=ext4:size=500,dev=sda2:type=swap:size=8192,mountpoint=/:dev=sda3:type=ext4:size=1500000" $hostname

        wwsh -y node set --netdev=ib0 --ipaddr=$ib_ip --netmask=$NETMASK $hostname

        ip_part=$(( $ip_part + 1 ))
        mac_i=$(( $mac_i + 1 ))
done

## Add compute nodes

ip_part=1
mac_i=0
for node in {01..02}
do
        hostname=compute$node
        eth_ip=172.18.10.$ip_part
        ib_ip=172.20.10.$ip_part

        wwsh -y node new $hostname --ipaddr=$eth_ip -M $NETMASK -G $HEAD_IP --domain=$DOMAIN --hwaddr=${compute_macs[$mac_i]} -D $INTERFACE
 
        wwsh -y provision set $hostname --files=dynamic_hosts,passwd,group,shadow,slurm.conf,munge.key,network,ifcfg-ib0.ww,motd.sh
        wwsh -y provision set $hostname --vnfs=centos7.2 --bootstrap="$BOOTSTRAP"

        wwsh -y object modify -s bootloader=sda $hostname
        wwsh -y object modify -s diskpartition=sda $hostname
        wwsh -y object modify -s diskformat=sda1,sda2,sda3 $hostname
        wwsh -y object modify -s filesystems="mountpoint=/boot:dev=sda1:type=ext4:size=500,dev=sda2:type=swap:size=8192,mountpoint=/:dev=sda3:type=ext4:size=1500000" $hostname

        wwsh -y node set --netdev=ib0 --ipaddr=$ib_ip --netmask=$NETMASK $hostname
        wwsh -y node set --netdev=vx0 --ipaddr=$vx_ip --netmask=$NETMASK $hostname

        ip_part=$(( $ip_part + 1 ))
        mac_i=$(( $mac_i + 1 ))
done

systemctl restart dhcpd
wwsh pxe update
wwsh dhcp update

Configure firewall

  • 5_ohpc-head-firewall.sh
  • Be sure to correctly define the following variables:
    • IF_Internal
    • IF_External
#!/bin/bash

## Check to see if you are root

if [ "$USER" != "root" ]
then
        echo "This and the following scripts must be ran as root!"
        exit
fi

IF_Internal=enp6s0f0
IF_External=enp6s0f1

echo "**************************************************************************************"
echo "You are about to add the appropriate firewall rules to enable internet access on the compute nodes and all the required services."
echo "MAKE SURE YOU HAVE CHANGED THE INTERFACES ACCORDING TO YOUR SYSTEM!"
echo "You have selected:"
echo "   Internal if: $IF_Internal"
echo "   External if: $IF_External"
echo "**************************************************************************************"
echo "Are you sure you want to continue? [y/N]"

read input
if [ "$input" != "y" ]
then
        exit
fi

systemctl start firewalld

firewall-cmd --zone=external --add-interface=$IF_External --permanent
firewall-cmd --zone=internal --add-interface=$IF_Internal --permanent

firewall-cmd --zone=external --add-masquerade --permanent
firewall-cmd --permanent --direct --passthrough ipv4 -t nat -I POSTROUTING -o $IF_External -j MASQUERADE
firewall-cmd --permanent --zone=internal --add-service=dhcp
firewall-cmd --permanent --zone=internal --add-service=tftp
firewall-cmd --permanent --zone=internal --add-service=dns
firewall-cmd --permanent --zone=internal --add-service=http
firewall-cmd --permanent --zone=internal --add-service=nfs
firewall-cmd --permanent --zone=internal --add-service=ssh
firewall-cmd --permanent --zone=internal --add-service=mountd
firewall-cmd --permanent --zone=internal --add-service=rpc-bind
firewall-cmd --complete-reload
firewall-cmd --list-all-zones

firewall-cmd --permanent --zone=internal --add-port=6817/tcp
firewall-cmd --permanent --zone=internal --add-port=6817/udp
firewall-cmd --permanent --zone=internal --add-port=6818/tcp
firewall-cmd --permanent --zone=internal --add-port=6818/udp
firewall-cmd --permanent --zone=external --add-service=http
firewall-cmd --permanent --zone=external --add-service=https
firewall-cmd --permanent --zone=internal --add-port=8660/tcp
firewall-cmd --permanent --zone=internal --add-port=8661/tcp
firewall-cmd --permanent --zone=internal --add-port=8662/tcp
firewall-cmd --permanent --zone=internal --add-port=8663/tcp
firewall-cmd --permanent --zone=internal --add-port=8660/udp
firewall-cmd --permanent --zone=internal --add-port=8661/udp
firewall-cmd --permanent --zone=internal --add-port=8662/udp
firewall-cmd --permanent --zone=internal --add-port=8663/udp
firewall-cmd --permanent --zone=internal --add-port=8651/udp
firewall-cmd --permanent --zone=internal --add-port=8651/tcp
systemctl restart firewalld
systemctl enable firewalld