Difference between revisions of "OpenHPC:Headnode install"
| Line 307: | Line 307: | ||
== Build and Configure the Chroot == | == Build and Configure the Chroot == | ||
| + | |||
| + | '''Make Initial VNFS (Chroot, compute Node template) and install some Base components into the chroot operating system''' | ||
| + | |||
| + | <syntaxhighlight> | ||
| + | wwmkchroot centos-7 $CHROOT | ||
| + | yum -y --installroot=$CHROOT groupinstall “Base” | ||
| + | yum –y install kernel* grub* sudo ipmitool* epel-release htop nano tk* tcl* tigervnc* ipmitool* freeipmi* cairo* perl* gcc* glibc* screen yum-utils vim ntp libnl | ||
| + | lsof libxml2-python python mlocate numactl* lmod-ohpc ohpc-slurm-client lmod-ohpc ganglia-gmond-ohpc enviroment-modules hwlock-libs libfabric libpsm2 intel-clck-ohpc | ||
| + | </syntaxhighlight> | ||
| + | |||
| + | '''Setup SSH Keys for the Cluster – this is required for the root user only, /home will be exported so user ssh keys will be available. ''' | ||
| + | |||
| + | <syntaxhighlight> | ||
| + | wwinit ssh_keys | ||
| + | cat ~/.ssh/cluster.pub >> $CHROOT/root/.ssh/authorized_keys | ||
| + | </syntaxhighlight> | ||
| + | |||
| + | '''Setup NFS exports and FSTAB on the compute image''' | ||
| + | |||
| + | <syntaxhighlight> | ||
| + | echo "${sms_ip}:/home /home nfs nfsvers=3,rsize=1024,wsize=1024,cto 0 0" >> $CHROOT/etc/fstab | ||
| + | echo "${sms_ip}:/opt/ohpc/pub /opt/ohpc/pub nfs nfsvers=3,rsize=1024,wsize=1024,cto 0 0" >> $CHROOT/etc/fstab | ||
| + | |||
| + | echo "/home *(rw,no_subtree_check,fsid=10,no_root_squash)" >> /etc/exports | ||
| + | echo "/opt/ohpc/pub *(ro,no_subtree_check,fsid=11)" >> /etc/exports | ||
| + | |||
| + | exportfs –a | ||
| + | systemcl restart rpcbind | ||
| + | systemctl restart nfs-server.service | ||
| + | </syntaxhighlight> | ||
| + | |||
| + | '''Copy over resolve.conf to the chroot and modify the contents to point to the headnode and google dns.''' | ||
| + | |||
| + | <syntaxhighlight> | ||
| + | cp /etc/resolv.conf $CHROOT/etc/ | ||
| + | vi $CHROOT/etc/resolve.conf | ||
| + | </syntaxhighlight> | ||
| + | |||
| + | '''Modify Limits to unlimited on headnode and compute nodes..''' | ||
| + | |||
| + | <syntaxhighlight> | ||
| + | echo "* soft memlock unlimited" >> /etc/security/limits.conf | ||
| + | echo "* hard memlock unlimited" >> /etc/security/limits.conf | ||
| + | echo "* soft memlock unlimited" >> $CHROOT/etc/security/limits.conf | ||
| + | echo "* hard memlock unlimited" >> $CHROOT/etc/security/limits.conf | ||
| + | </syntaxhighlight> | ||
| + | |||
| + | '''Import warewulf files to the database, theses will be synced to compute nodes all of the time.''' | ||
| + | |||
| + | <syntaxhighlight> | ||
| + | wwsh file import /etc/passwd | ||
| + | wwsh file import /etc/shadow | ||
| + | wwsh file import /etc/group | ||
| + | wwsh file import /etc/slurm/slurm.conf | ||
| + | wwsh file import /etc/munge/munge.key | ||
| + | wwsh file import /opt/ohpc/pub/examples/network/centos/ifcfg-ib0.ww | ||
| + | wwsh -y file set ifcfg-ib0.ww --path=/etc/sysconfig/network-scripts/ifcfg-ib0 | ||
| + | </syntaxhighlight> | ||
| + | |||
| + | '''Building the bootstrap and vnfs images''' | ||
| + | |||
| + | <syntaxhighlight> | ||
| + | wwbootstrap 3.10.0-229.20.1.el7.x86_64 | ||
| + | wwvnfs -y --chroot | ||
| + | </syntaxhighlight> | ||
| + | |||
| + | '''Building the bootstrap and vnfs images''' | ||
| + | <syntaxhighlight> | ||
| + | wwbootstrap 3.10.0-229.20.1.el7.x86_64 | ||
| + | wwvnfs -y --chroot | ||
| + | </syntaxhighlight> | ||
| + | |||
| + | == Compute Node Configuration for Stateful Provisioning == | ||
| + | |||
| + | '''Adding the compute nodes to the database with the correct parameters to allow for statefull provisioning''' | ||
| + | |||
| + | <syntaxhighlight> | ||
| + | wwsh node new c0 --ipaddr=some.ip.address.here --hwaddr=some.mac.address.here -D eno1 | ||
| + | echo "GATEWAYDEV=${eth_provision}" > /tmp/network.$$ | ||
| + | wwsh -y file import /tmp/network.$$ --name network | ||
| + | wwsh -y file set network --path /etc/sysconfig/network --mode=0644 --uid=0 | ||
| + | |||
| + | wwsh -y provision set c0 --files=dynamic_hosts,passwd,group,shadow,slurm.conf,munge.key,network | ||
| + | wwsh -y provision set c0 --vnfs=centos7.1 --bootstrap=kernel.version.here | ||
| + | </syntaxhighlight> | ||
| + | |||
| + | '''Setup Bootloader and Partitions''' | ||
| + | <syntaxhighlight> | ||
| + | wwsh -y object modify -s bootloader=sda c0 | ||
| + | wwsh -y object modify -s diskpartition=sda c0 | ||
| + | wwsh -y object modify -s diskformat=sda1,sda2,sda3 c0 | ||
| + | wwsh -y object modify -s filesystems="mountpoint=/boot:dev=sda1:type=ext4:size=500,dev=sda2:type=swap:size=32768,mountpoint=/:dev=sda3:type=ext4:size=fill" c0 | ||
| + | </syntaxhighlight> | ||
| + | |||
| + | <syntaxhighlight> | ||
| + | systemctl restart dhcpd | ||
| + | wwsh pxe update | ||
| + | wwsh dhcp update | ||
| + | wwsh node list # Should show all the nodes you just added | ||
| + | </syntaxhighlight> | ||
Revision as of 11:00, 16 December 2015
Basic Initial System Configuration:
OpenHPC is designed to deploy CentOS 7.x based clusters, please install a fresh copy of CentOS 7.x onto a system. Please modify the partitioning and make the “/” partition a reasonable size.
service NetworkManager stop
service iptables stop
chkconfig NetworkManager off
chkconfig iptables off
chkconfig firewalld off
setenforce 0
vi /etc/selinux/configModify the system Hostname to use a fully qualified domain name, also modify the network interfaces to have 1x Private interface and 1x Public interface, eno1 must be the private (Provisioning) interface and eno2 can be the public interface. There are alternate ways to specify what interface to use during provision, however with CentOS 7.1 I have been unable to find them.
echo “head.ohpc.net” > /etc/hostname
vi /etc/sysconfig/network-scripts/ifcfg-eno1
vi /etc/sysconfig/network-scripts/ifcfg-eno2Yum update the system to the latest package versions, install additional packages and reboot.
yum install kernel* tk* tcl* tigervnc* ipmitool* freeipmi* cairo* perl* gcc* glibc* screen epel-release vim; yum install htop; yum -y install kernel* tk* tcl*
tigervnc* ipmitool* freeipmi* cairo* perl* gcc* glibc* screen epel-release vim ntp libnl lsof libxml2-python python mlocate numactl* yum-utils; yum -y install
htop; yum -y groupinstall "Development Tools" "X Windows System" “Base”; yum –y updateAdd the following alias into your bashrc for ease as you will be typing them frequently.
CHROOT=/opt/ohpc/admin/images/centos7.1
ohpc_repo=http://build.openhpc.community/OpenHPC:/1.0/CentOS_7.1/OpenHPC:1.0.repo
sms_name=head.ohpc.net # Hostanem of Headnode
sms_ip=10.10.10.1 # Private Interface IP of Headnode
sms_eth_internal=eno1 # Private Interface of Headnode
eth_provision=eno1 # Provisioning Interface of Headnode
internal_netmask=255.255.255.0 # Netmaks of Private Interface
ntp_server=0.centos.pool.ntp.org # Some NTP Server
bmc_username=ADMIN
bmc_password=ADMIN
sms_ipoib=10.10.20.1 # IPoIB Address of Headnode
ipoib_netmask=255.255.255.0 # IPoIB Netmask of Headnode
source /root/.bashrc
wget -P /etc/yum.repos.d ${ohpc_repo}
yum clean allSetup Network Time Protocol
service ntp stop
ntpdate 0.centos.pool.ntp.org
vi /etc/ntp.conf #Modify with your ntp server
service ntp restart
chkconfig ntp on
'''Enable sudo without a password for users in the Wheel group'''
<syntaxhighlight>
visudo
# Unhash line 108Installing and Patching the OpenHPC Base components
Basic OpenHPC Component install and patching to make it work correctly with Grub2 (The patching component of this step is for stateful provisioning only) If deploying systems as only RAM disks this process is not necessary.
yum groupinstall ohpc-base ohpc-warewulf
yum -y groupinstall ohpc-slurm-server
useradd slurm
mkdir /tmp/setup-filesystems
cd /tmp/setup-filesystems
cat /srv/warewulf/initramfs/capabilities/setup-filesystems | cpio –i
vi warewulf/provision/80-mkbootableModify the file with the below contents
#!/bin/sh
#
# Copyright (c) 2001-2003 Gregory M. Kurtzer
#
# Copyright (c) 2003-2011, The Regents of the University of California,
# through Lawrence Berkeley National Laboratory (subject to receipt of any
# required approvals from the U.S. Dept. of Energy). All rights reserved.
#
# Install a bootloader if $WWBOOTLOADER is set and the root device is known
if [ -n "$WWBOOTLOADER" -a -f "/tmp/rootdev" ]; then
if ! echo $WWBOOTLOADER | grep -q "^/dev/"; then
WWBOOTLOADER="/dev/$WWBOOTLOADER"
fi
if [ -b "$WWBOOTLOADER" ]; then
KERNEL=`cd $NEWROOT; find boot/vmlinuz-* 2>/dev/null | tail -n 1`
if [ -n "$KERNEL" ]; then
KERNELVERSION=`echo "$KERNEL" | sed -e 's@.*boot/vmlinuz-@@'`
if [ -x "$NEWROOT/sbin/dracut" ]; then
chroot $NEWROOT /sbin/dracut --force '' $KERNELVERSION
INITRD=`cd $NEWROOT; find boot/initr*-${KERNELVERSION}.img* 2>/dev/null | tail -n 1`
elif [ -x "$NEWROOT/sbin/mkinitrd" ]; then
INITRD=`cd $NEWROOT; find boot/initr*-${KERNELVERSION}.img* 2>/dev/null | tail -n 1`
if [ -n "$INITRD" ]; then
INITRD="boot/initramfs-$KERNELVERSION.img"
fi
mount -t sysfs none $NEWROOT/sys
chroot $NEWROOT /sbin/mkinitrd -f $INITRD $KERNELVERSION
umount $NEWROOT/sys
fi
if [ -z "${WWKARGS}" ]; then
WWKARGS="rhgb"
fi
if [ -n "${WWCONSOLE}" ]; then
# Parse the kernel console option for serial settings for grub
sed_serial() { echo $1 | sed -nr "s/.*ttyS([0-9]),([0-9]{4,6})([n,o,e])?([5-8])?(r)?.*/\\${2}/p"; }
SERIALUNIT=`sed_serial ${WWCONSOLE} 1`
SERIALSPEED=`sed_serial ${WWCONSOLE} 2`
if [ -z $SERIALSPEED ]; then
SERIALSPEED=115200
fi
SERIALPARITY=`sed_serial ${WWCONSOLE} 3`
if [ -z $SERIALPARITY ] || [ "$SERIALPARITY" = "n" ]; then
SERIALPARITY=no
elif [ $SERIALPARITY = "o" ]; then
SERIALPARITY=odd
elif [ $SERIALPARITY = "e" ]; then
SERIALPARITY=even
fi
SERIALWORD=`sed_serial ${WWCONSOLE} 4`
if [ -z $SERIALWORD ]; then
SERIALWORD=8
fi
fi
if [ -x "$NEWROOT/usr/sbin/grub2-install" ]; then
if [ -n "${WWCONSOLE}" ]; then
echo "GRUB_CMDLINE_LINUX='${WWKARGS} console=tty0 console=${WWCONSOLE}'" >> $NEWROOT/etc/default/grub
if [ -n SERIALUNIT ]; then
echo "GRUB_TERMINAL='console serial'" >> $NEWROOT/etc/default/grub
echo "GRUB_SERIAL_COMMAND='serial --speed=${SERIALSPEED} --unit=${SERIALUNIT} --word=${SERIALWORD} --parity=${SERIALPARITY}'" >> $NEWROOT/etc/default/grub
fi
else
echo "GRUB_CMDLINE_LINUX='${WWKARGS}'" >> $NEWROOT/etc/default/grub
fi
chroot $NEWROOT /usr/sbin/grub2-mkconfig -o /boot/grub2/grub.cfg >/dev/null
if chroot $NEWROOT /usr/sbin/grub2-install $WWBOOTLOADER >/dev/null; then
exit 0
fi
elif [ -x "$NEWROOT/sbin/grub-install" ]; then
ROOTDEV=`cat /tmp/rootdev`
if [ -n "$INITRD" -a -n "$KERNEL" ]; then
if [ -f "$NEWROOT/etc/redhat-release" ]; then
OSVERSION=`sed -e 's@ (.*@@' $NEWROOT/etc/redhat-release`
elif [ -f "$NEWROOT/etc/release" ]; then
OSVERSION=`cat $NEWROOT/etc/redhat-release | head -n 1`
else
OSVERSION="Warewulf"
fi
if [ -f /tmp/mptab ]; then
if grep -q "^/boot " /tmp/mptab; then
INITRD=${INITRD##boot/}
KERNEL=${KERNEL##boot/}
fi
fi
echo "# This file was written by Warewulf bootstrap (capability setup-filesystems)" > $NEWROOT/boot/grub/device.map
echo "(hd0) $WWBOOTLOADER" >> $NEWROOT/boot/grub/device.map
echo "# This file was written by Warewulf bootstrap (capability setup-filesystems)" > $NEWROOT/boot/grub/grub.conf
if [ -n $SERIALUNIT ]; then
echo "serial --speed=${SERIALSPEED} --unit=${SERIALUNIT} --word=${SERIALWORD} --parity=${SERIALPARITY}" >> $NEWROOT/boot/grub/grub.conf
echo "terminal_input console serial; terminal_output console serial" >> $NEWROOT/boot/grub/grub.conf
fi
echo "default 0" >>$NEWROOT/boot/grub/grub.conf
echo "timeout 10" >>$NEWROOT/boot/grub/grub.conf
echo "root (hd0,0)" >>$NEWROOT/boot/grub/grub.conf
echo "" >>$NEWROOT/boot/grub/grub.conf
echo "title $OSVERSION - $KERNELVERSION" >>$NEWROOT/boot/grub/grub.conf
if [ -n "${WWCONSOLE}" ]; then
echo " kernel /$KERNEL ro root=$ROOTDEV rhgb ${WWKARGS} console=tty0 console=${WWCONSOLE}" >>$NEWROOT/boot/grub/grub.conf
else
echo " kernel /$KERNEL ro root=$ROOTDEV rhgb ${WWKARGS}" >>$NEWROOT/boot/grub/grub.conf
fi
echo " initrd /$INITRD" >>$NEWROOT/boot/grub/grub.conf
if [ -f "/tmp/mtab" ]; then
cp /tmp/mtab $NEWROOT/etc/mtab
fi
mkdir $NEWROOT/dev/mapper
mknod $NEWROOT/dev/mapper/control c 10 58
if chroot $NEWROOT /sbin/grub-install $WWBOOTLOADER >/dev/null; then
exit 0
else
gscript="/root/grubinstall.sh";
grubscript=${NEWROOT}${gscript};
grubtext=$NEWROOT/root/grubinstall.txt;
echo -n "Running grub-install failed. Trying manually.";
# Setup Commands to install GRUB
echo "root (hd0,0)" > $grubtext;
echo "setup (hd0)" >> $grubtext;
echo "quit" >> $grubtext;
# Bash script to run grub.
echo "#!/bin/bash" > $grubscript;
echo "/sbin/grub --batch < /root/grubinstall.txt &>/root/grubinstall.out" >> $grubscript;
chmod 755 $grubscript;
if chroot $NEWROOT $gscript &>/dev/null; then
exit 0;
else
echo "Running grub-install failed!"
exit 255
fi
fi
else
echo "Could not find INITRD and/or KERNEL version!"
exit 2
fi
else
echo "GRUB is not installed!"
exit 2
fi
else
echo "Could not identify kernel version in VNFS!"
exit 2
fi
else
echo "BOOTLOADER=$BOOTLOADER is invalid!"
exit 2
fi
else
exit 1
fi
# vim: filetype=sh:syntax=sh:expandtab:ts=4:sw=4:You now need to rebuild the setup-filesystems to a cpio
find warewulf | cpio -o -c > /srv/warewulf/initramfs/capabilities/setup-filesystemsModify Warewulf core Configuration Files to provision Correctly
Modify warewulf provision.conf and bootstrap.conf to correctly include the correct kernel modules and configuration
vi /etc/warewulf/provision.conf #Ensure exclude looks like this.
exclude += /tmp/*
exclude += /var/log/*
exclude += /var/chroots/*
#exclude += /var/cache
exclude += /usr/src
#exclude += /usr/share
#exclude += /home/*vi /etc/warewulf/bootstrap.conf #Hash out all Infiniband drivers
# Infiniband drivers and Mellanox drivers
#drivers += ib_ipath, ib_iser, ib_srpt, ib_sdp, ib_mthca, ib_qib, iw_cxgb3, cxgb3
#drivers += iw_nes, mlx4_ib, ib_srp, ib_ipoib, ib_addr, rdma_cm, ib_ucm
#drivers += ib_ucm, ib_uverbs, ib_umad, ib_cm, ib_mad, iw_cm, ib_core
#drivers += rdma_ucm, ib_sa, mlx4_en, mlx4_core
#drivers += rds, rds_rdma, rds_tcp, mlx4_vnic, mlx4_vnic_helper
#Unhash the modprobe for the Mellanox Modules
modprobe += mlx4_core log_num_mtts=20 log_mtts_per_seg=6, ib_srpModify some Warewulf provisioning files to use the correct interfaces and some general Warewulf files to allow provisioning to work.
perl -pi -e "s/device = eth1/device = ${sms_eth_internal}/" /etc/warewulf/provision.conf
perl -pi -e "s/^\s+disable\s+= yes/ disable = no/" /etc/xinetd.d/tftp
export MODFILE=/etc/httpd/conf.d/warewulf-httpd.conf
perl -pi -e "s/cgi-bin>\$/cgi-bin>\n Require all granted/" $MODFILE
perl -pi -e "s/Allow from all/Require all granted/" $MODFILE
perl -ni -e "print unless /^\s+Order allow,deny/" $MODFILE
perl -pi -e "s/ControlMachine=\S+/ControlMachine=head.ohpc.net/" /etc/slurm/slurm.confsystemctl restart xinetd
systemctl enable mariadb.service
systemctl restart mariadb
systemctl enable httpd.service
systemctl restart httpd. service
systemctl restart rpcbind.service
systemctl enable rpcbind.service
systemctl restart nfs-server.service
systemctl enable nfs-server.serviceBuild and Configure the Chroot
Make Initial VNFS (Chroot, compute Node template) and install some Base components into the chroot operating system
wwmkchroot centos-7 $CHROOT
yum -y --installroot=$CHROOT groupinstall “Base”
yum –y install kernel* grub* sudo ipmitool* epel-release htop nano tk* tcl* tigervnc* ipmitool* freeipmi* cairo* perl* gcc* glibc* screen yum-utils vim ntp libnl
lsof libxml2-python python mlocate numactl* lmod-ohpc ohpc-slurm-client lmod-ohpc ganglia-gmond-ohpc enviroment-modules hwlock-libs libfabric libpsm2 intel-clck-ohpcSetup SSH Keys for the Cluster – this is required for the root user only, /home will be exported so user ssh keys will be available.
wwinit ssh_keys
cat ~/.ssh/cluster.pub >> $CHROOT/root/.ssh/authorized_keysSetup NFS exports and FSTAB on the compute image
echo "${sms_ip}:/home /home nfs nfsvers=3,rsize=1024,wsize=1024,cto 0 0" >> $CHROOT/etc/fstab
echo "${sms_ip}:/opt/ohpc/pub /opt/ohpc/pub nfs nfsvers=3,rsize=1024,wsize=1024,cto 0 0" >> $CHROOT/etc/fstab
echo "/home *(rw,no_subtree_check,fsid=10,no_root_squash)" >> /etc/exports
echo "/opt/ohpc/pub *(ro,no_subtree_check,fsid=11)" >> /etc/exports
exportfs –a
systemcl restart rpcbind
systemctl restart nfs-server.serviceCopy over resolve.conf to the chroot and modify the contents to point to the headnode and google dns.
cp /etc/resolv.conf $CHROOT/etc/
vi $CHROOT/etc/resolve.confModify Limits to unlimited on headnode and compute nodes..
echo "* soft memlock unlimited" >> /etc/security/limits.conf
echo "* hard memlock unlimited" >> /etc/security/limits.conf
echo "* soft memlock unlimited" >> $CHROOT/etc/security/limits.conf
echo "* hard memlock unlimited" >> $CHROOT/etc/security/limits.confImport warewulf files to the database, theses will be synced to compute nodes all of the time.
wwsh file import /etc/passwd
wwsh file import /etc/shadow
wwsh file import /etc/group
wwsh file import /etc/slurm/slurm.conf
wwsh file import /etc/munge/munge.key
wwsh file import /opt/ohpc/pub/examples/network/centos/ifcfg-ib0.ww
wwsh -y file set ifcfg-ib0.ww --path=/etc/sysconfig/network-scripts/ifcfg-ib0Building the bootstrap and vnfs images
wwbootstrap 3.10.0-229.20.1.el7.x86_64
wwvnfs -y --chrootBuilding the bootstrap and vnfs images
wwbootstrap 3.10.0-229.20.1.el7.x86_64
wwvnfs -y --chrootCompute Node Configuration for Stateful Provisioning
Adding the compute nodes to the database with the correct parameters to allow for statefull provisioning
wwsh node new c0 --ipaddr=some.ip.address.here --hwaddr=some.mac.address.here -D eno1
echo "GATEWAYDEV=${eth_provision}" > /tmp/network.$$
wwsh -y file import /tmp/network.$$ --name network
wwsh -y file set network --path /etc/sysconfig/network --mode=0644 --uid=0
wwsh -y provision set c0 --files=dynamic_hosts,passwd,group,shadow,slurm.conf,munge.key,network
wwsh -y provision set c0 --vnfs=centos7.1 --bootstrap=kernel.version.hereSetup Bootloader and Partitions
wwsh -y object modify -s bootloader=sda c0
wwsh -y object modify -s diskpartition=sda c0
wwsh -y object modify -s diskformat=sda1,sda2,sda3 c0
wwsh -y object modify -s filesystems="mountpoint=/boot:dev=sda1:type=ext4:size=500,dev=sda2:type=swap:size=32768,mountpoint=/:dev=sda3:type=ext4:size=fill" c0systemctl restart dhcpd
wwsh pxe update
wwsh dhcp update
wwsh node list # Should show all the nodes you just added