Lustre install for Unit.tv

From Define Wiki
Jump to navigation Jump to search

Preparation

Nodes

IML:

  1. CentOS 6.6
  2. Cobbler
    1. let kickstart file to only install system on first partition
  3. IML

MDS

  1. CentOS 6.6
  2. RAID:
    1. small partition for MGS
    2. large partition for MDT

OSS:

  1. CentOS 6.6
  2. RAID:
    1. 2 RAID arays: r6

Network

node ip mac formated mac
head 172.28.0.144
iml 172.105.0.10
mds1 172.105.0.11 0CC47A5210EE 0C:C4:7A:52:10:EE
mds2 172.105.0.12 0CC47A520E6E 0C:C4:7A:52:0E:6E
oss1 172.105.0.21 0CC47A6930A8 0C:C4:7A:69:30:A8
oss2 172.105.0.22 0CC47A694D1A 0C:C4:7A:69:4D:1A
oss3 172.105.0.23 0CC47A6930A4 0C:C4:7A:69:30:A4
oss4 172.105.0.24 0CC47A694DE4 0C:C4:7A:69:4D:E4
  1. Switch
    1. enable ETH license
  2. Cards
    1. first port IB, second port eth
  • IML
    • IP: 172.28.0.114

Installation

  • Network
    • all IB card should be set to port1 -> IB, port2 -> ETH
    • some port on IB switch should be ETH
  • for every pair of JBOD, connect network cable directly to each other

Install Cobbler

   17  yum install httpd dhcp bind
   18  yum install syslinux
   19  vim /etc/sysconfig/dhcpd
   20  history
[root@iml-utv ieel-2.2.0.0]# cat /etc/sysconfig/dhcpd
# Command line options here
DHCPDARGS=eth1
 install epel repo
## RHEL/CentOS 6 64-Bit ##
# wget http://download.fedoraproject.org/pub/epel/6/x86_64/epel-release-6-8.noarch.rpm
# rpm -ivh epel-release-6-8.noarch.rpm

install cobbler

yum install cobbler cobbler-web
for debian system deployment support 
   36  yum install debmirror 
   37  yum install pykickstart

set iptables and selinux for cobbler refer to here when using virt-manager, interfaces should be br0, br1

[root@iml-utv ieel-2.2.0.0]# setenforce 0

setenforce: SELinux is disabled

[root@iml-utv conf.d]# cat /etc/sysconfig/iptables
*nat
:PREROUTING ACCEPT [0:0]
:POSTROUTING ACCEPT [44:2156]
:OUTPUT ACCEPT [48:2352]
-A POSTROUTING -o eth0 -j MASQUERADE 
COMMIT
*filter
:INPUT ACCEPT [0:0]
:FORWARD ACCEPT [0:0]
:OUTPUT ACCEPT [31985:2140861]
-A INPUT -s 172.105.0.0/16 -p udp -m udp --dport 69 -j ACCEPT 
-A INPUT -m state --state RELATED,ESTABLISHED -j ACCEPT 
-A INPUT -p icmp -j ACCEPT 
-A INPUT -i lo -j ACCEPT 
-A INPUT -p tcp -m state --state NEW -m tcp --dport 22 -j ACCEPT 
-A INPUT -p tcp -m tcp --dport 69 -j ACCEPT 
-A INPUT -p tcp -m tcp --dport 80 -j ACCEPT 
-A INPUT -p tcp -m tcp --dport 443 -j ACCEPT 
-A INPUT -p tcp -m tcp --dport 25151 -j ACCEPT 
-A INPUT -p udp -m state --state NEW -m udp --dport 69 -j ACCEPT 
-A FORWARD -i eth1 -o eth0 -m state --state RELATED,ESTABLISHED -j ACCEPT 
-A FORWARD -i eth0 -o eth1 -j ACCEPT 
COMMIT

for tftp:

[root@iml-utv conf.d]# cat /etc/sysconfig/iptables-config 
# Load additional iptables modules (nat helpers)
#   Default: -none-
# Space separated list of nat helpers (e.g. 'ip_nat_ftp ip_nat_irc'), which
# are loaded after the firewall rules are applied. Options for the helpers are
# stored in /etc/modprobe.conf.
IPTABLES_MODULES="ip_conntrack ip_conntrack_tftp"

xinetd:

cat /etc/xinetd.d/rsync
# default: off
# description: The rsync server is a good addition to an ftp server, as it \
#	allows crc checksumming etc.
service rsync
{
	disable	= no
	flags		= IPv6
	socket_type     = stream
	wait            = no
	user            = root
	server          = /usr/bin/rsync
	server_args     = --daemon
	log_on_failure  += USERID
}

Edit /etc/debmirror.conf and comment dists and arches lines Edit /etc/cobbler/settings to be:

# set to 1 to enable Cobbler's DHCP management features.
# the choice of DHCP management engine is in /etc/cobbler/modules.conf
manage_dhcp: 1

# set to 1 to enable Cobbler's DNS management features.
# the choice of DNS mangement engine is in /etc/cobbler/modules.conf
manage_dns: 1

next_server: 172.105.0.1
server: 172.105.0.1

pxe_just_once: 1  # to avoid reinstall looping
edit /etc/cobbler/dhcp.template:
cat /etc/cobbler/dhcp.template
subnet 172.105.0.0 netmask 255.255.0.0 {
     option routers             172.105.0.1;
     option domain-name-servers 172.105.0.1;
     option subnet-mask         255.255.0.0;
     range dynamic-bootp        172.105.0.100 172.105.0.200;
     default-lease-time         21600;
     max-lease-time             43200;
     next-server                $next_server;
     class "pxeclients" {
          match if substring (option vendor-class-identifier, 0, 9) = "PXEClient";
          if option pxe-system-type = 00:02 {
                  filename "ia64/elilo.efi";
          } else if option pxe-system-type = 00:06 {
                  filename "grub/grub-x86.efi";
          } else if option pxe-system-type = 00:07 {
                  filename "grub/grub-x86_64.efi";
          } else {
                  filename "pxelinux.0";
          }
     }

}

dhcpd.conf

[root@iml-utv ieel-2.2.0.0]# cat /etc/dhcp/dhcpd.conf 
#
# DHCP Server Configuration file.
#   see /usr/share/doc/dhcp*/dhcpd.conf.sample
#   see 'man 5 dhcpd.conf'
#
#
subnet 172.105.0.0 netmask 255.255.0.0 {
     option routers             172.105.0.1;
     option domain-name-servers 172.105.0.1;
     option subnet-mask         255.255.0.0;
     range dynamic-bootp        172.105.0.100 172.105.0.200;
     default-lease-time         21600;
     max-lease-time             43200;
     next-server                172.105.0.1;
     filename 			"pxelinux.0";

}

disable dhcpd for virt-manager In this example i wanted to run a virtual DHCP server on the KVM system. The KVM server was getting in the way as it run a DHCP service also. To disable, remove the <dhcp> tags from the file /var/lib/libvirt/network/default.xml Change from:

<network>
  <name>default</name>
  <uuid>319d4ae5-4b6e-433c-832f-ad20269ed495</uuid>
  <forward mode='nat'/>
  <bridge name='virbr0' stp='on' delay='0' />
  <mac address='52:54:00:09:89:76'/>
  <ip address='192.168.122.1' netmask='255.255.255.0'>
    <dhcp>
      <range start='192.168.122.2' end='192.168.122.254' />
    </dhcp>
  </ip>
</network>

Change to:

<network>
  <name>default</name>
  <uuid>319d4ae5-4b6e-433c-832f-ad20269ed495</uuid>
  <forward mode='nat'/>
  <bridge name='virbr0' stp='on' delay='0' />
  <mac address='52:54:00:09:89:76'/>
  <ip address='192.168.122.1' netmask='255.255.255.0'>
  </ip>
</network>

And then restart libvirtd and dnsmasq:

  service libvirtd restart
  service dnsmasq restart

restart the dhcpd

[root@iml-utv ieel-2.2.0.0]# service dhcpd restart
start serverces:
[root@iml-utv conf.d]# service named restart
[root@iml-utv ieel-2.2.0.0]# service xinetd restart
Starting xinetd:                                           [  OK  ]
[root@iml-utv ieel-2.2.0.0]# service dhcpd restart
[root@iml-utv ieel-2.2.0.0]# service httpd restart
Starting httpd: 
[root@iml-utv ieel-2.2.0.0]# service cobblerd restart
Starting cobbler daemon:                                   [  OK  ]

cobbler check

[root@iml-utv ieel-2.2.0.0]# cobbler check
[root@iml-utv conf.d]# cobbler sync

Find ISOs:

[root@iml-utv ~]# mount -o loop ./CentOS-6.6-x86_64-bin-DVD1.iso /mnt/
[root@iml-utv ~]# cobbler import --name=CentOS_6.6_x86_64 --path=/mnt --breed=redhat --arch=x86_64
[root@iml-utv ~]# cobbler sync

add systems

cobbler system add --name=mds1 --profile=CentOS_6.6-x86_64 --mac=0C:C4:7A:52:10:EE --ip-address=172.105.0.11 --hostname=mds1.utv.boston.co.uk --netboot-enabled=Y --dns-name=mds1.utv.boston.co.uk --interface eth0
cobbler system add --name=mds2 --profile=CentOS_6.6-x86_64 --mac=0C:C4:7A:52:0E:6E --ip-address=172.105.0.12 --hostname=mds2.utv.boston.co.uk --netboot-enabled=Y --dns-name=mds2.utv.boston.co.uk --interface eth0
cobbler system add --name=oss1 --profile=CentOS_6.6-x86_64 --mac=0C:C4:7A:69:30:A8 --ip-address=172.105.0.21 --hostname=oss1.utv.boston.co.uk --netboot-enabled=Y --dns-name=oss1.utv.boston.co.uk --interface eth0
cobbler system add --name=oss2 --profile=CentOS_6.6-x86_64 --mac=0C:C4:7A:69:4D:1A --ip-address=172.105.0.22 --hostname=oss2.utv.boston.co.uk --netboot-enabled=Y --dns-name=oss2.utv.boston.co.uk --interface eth0
cobbler system add --name=oss3 --profile=CentOS_6.6-x86_64 --mac=0C:C4:7A:69:30:A4 --ip-address=172.105.0.23 --hostname=oss3.utv.boston.co.uk --netboot-enabled=Y --dns-name=oss3.utv.boston.co.uk --interface eth0
cobbler system add --name=oss4 --profile=CentOS_6.6-x86_64 --mac=0C:C4:7A:69:4D:E4 --ip-address=172.105.0.24 --hostname=oss4.utv.boston.co.uk --netboot-enabled=Y --dns-name=oss4.utv.boston.co.uk --interface eth0
cobbler system add --name=mds1-ipmi --ip-address=172.105.1.11 --mac-address=0C:C4:7A:5A:8A:4A --hostname=mds1-ipmi --interface=eth0 --profile=CentOS_6.6-x86_64
cobbler system add --name=mds2-ipmi --ip-address=172.105.1.12 --mac-address=0C:C4:7A:5A:8B:24 --hostname=mds2-ipmi --interface=eth0 --profile=CentOS_6.6-x86_64
cobbler system add --name=oss1-ipmi --ip-address=172.105.1.21 --mac-address=0C:C4:7A:37:69:0C --hostname=oss1-ipmi --interface=eth0 --profile=CentOS_6.6-x86_64
cobbler system add --name=oss2-ipmi --ip-address=172.105.1.22 --mac-address=0C:C4:7A:37:6F:AC --hostname=oss2-ipmi --interface=eth0 --profile=CentOS_6.6-x86_64
cobbler system add --name=oss3-ipmi --ip-address=172.105.1.23 --mac-address=0C:C4:7A:37:69:0A --hostname=oss3-ipmi --interface=eth0 --profile=CentOS_6.6-x86_64
cobbler system add --name=oss4-ipmi --ip-address=172.105.1.24 --mac-address=0C:C4:7A:37:6F:7C --hostname=oss4-ipmi --interface=eth0 --profile=CentOS_6.6-x86_64

stop netboot

cobbler system edit --name=mds2 --netboot-enabled=N
/etc/hosts:
172.105.0.1	head-utv
172.105.0.10	iml
172.105.0.11	mds1.utv.boston.co.uk mds1
172.105.0.12	mds2.utv.boston.co.uk mds2
172.105.0.21	oss1.utv.boston.co.uk oss1
172.105.0.22	oss2.utv.boston.co.uk oss2
172.105.0.23	oss3.utv.boston.co.uk oss3
172.105.0.24	oss4.utv.boston.co.uk oss4

Testing

Install `lustre-iokit` first, then use this script on one oss node:

TARGETS="
oss1.utv.boston.co.uk:lfs1-OST0000
oss1.utv.boston.co.uk:lfs1-OST0002
oss1.utv.boston.co.uk:lfs1-OST0004
oss1.utv.boston.co.uk:lfs1-OST0006
oss2.utv.boston.co.uk:lfs1-OST0001
oss2.utv.boston.co.uk:lfs1-OST0003
oss2.utv.boston.co.uk:lfs1-OST0005
oss2.utv.boston.co.uk:lfs1-OST0007
"
NOBJLO=1
NOBJHI=256
THRLO=1
THRHI=256
#OUTPUT="/root/test_results/obd/12lun_00_01"
 OUTPUT="/root/test_results/obd/short_peak"
 
# The test dataset size (MB) for each LUN. The total dataset size must be larger than 2 times of the RAM size in order to avoid the caching.
# The calculation of the size
# (RAM size * 2) / the number of LUNs
# For example, the server RAM is 24GB and there are 5 LUN
# the size should be (24GB * 2 ) / 5 ~= 10 GB = 10240 MB
# However, it is a good idea to run a very short trial run to make sure the test configuration working properly before scheduling a complete test.
# SIZE=100 is a good number for the trial run
 
 
#SIZE="46000"
SIZE="64000"
#SIZE="100"
 
ssh lustre01-oss1 mkdir -p $OUTPUT
ssh lustre01-oss2 mkdir -p $OUTPUT

thrhi=$THRHI thrlo=$THRLO nobjhi=$NOBJHI nobjlo=$NOBJLO size=$SIZE case="disk" targets=$TARGETS rslt_loc=$OUTPUT obdfilter-survey