Difference between revisions of "Lustre install for Unit.tv"

From Define Wiki
Jump to navigation Jump to search
 
(One intermediate revision by the same user not shown)
Line 19: Line 19:
  
 
Switch:  
 
Switch:  
IP: 172.105.0.106
+
IP: 10.10.20.250 - mgmt0 | mgmt1  (DHCPv4)
  
 
=== Network ===
 
=== Network ===
Line 30: Line 30:
 
|-
 
|-
 
| head
 
| head
| 172.28.0.114
+
| 10.10.20.1
 
|  
 
|  
 
|  
 
|  
Line 36: Line 36:
 
|-
 
|-
 
| iml
 
| iml
| 172.105.0.10
+
| 10.10.20.10
 
|  
 
|  
 
|  
 
|  
Line 42: Line 42:
 
|-
 
|-
 
| mds1
 
| mds1
| 172.105.0.11
+
| 10.10.20.11
 
| 0CC47A553DB4
 
| 0CC47A553DB4
 
| 0CC47A553DB4
 
| 0CC47A553DB4
Line 48: Line 48:
 
|-
 
|-
 
| mds2
 
| mds2
| 172.105.0.12
+
| 10.10.20.12
 
| 0CC47A520E6E
 
| 0CC47A520E6E
 
| 0C:C4:7A:52:0E:6E
 
| 0C:C4:7A:52:0E:6E
Line 54: Line 54:
 
|-
 
|-
 
| oss1
 
| oss1
| 172.105.0.21
+
| 10.10.20.21
 
| 0CC47A6930A8
 
| 0CC47A6930A8
 
| 0C:C4:7A:69:30:A8
 
| 0C:C4:7A:69:30:A8
Line 60: Line 60:
 
|-
 
|-
 
| oss2
 
| oss2
| 172.105.0.22
+
| 10.10.20.22
 
| 0CC47A694D1A
 
| 0CC47A694D1A
 
| 0C:C4:7A:69:4D:1A
 
| 0C:C4:7A:69:4D:1A
Line 66: Line 66:
 
|-
 
|-
 
| oss3
 
| oss3
| 172.105.0.23
+
| 10.10.20.23
 
| 0CC47A6930A4
 
| 0CC47A6930A4
 
| 0C:C4:7A:69:30:A4
 
| 0C:C4:7A:69:30:A4
Line 72: Line 72:
 
|-
 
|-
 
| oss4
 
| oss4
| 172.105.0.24
+
| 10.10.20.24
 
| 0CC47A694DE4
 
| 0CC47A694DE4
 
| 0C:C4:7A:69:4D:E4
 
| 0C:C4:7A:69:4D:E4

Latest revision as of 06:45, 19 August 2015

Preparation

Nodes

IML:

  1. CentOS 6.6
  2. Cobbler
    1. let kickstart file to only install system on first partition
  3. IML

MDS

  1. CentOS 6.6
  2. RAID:
    1. small partition for MGS
    2. large partition for MDT

OSS:

  1. CentOS 6.6
  2. RAID:
    1. 2 RAID arays: r6

Switch: IP: 10.10.20.250 - mgmt0 | mgmt1 (DHCPv4)

Network

node ip mac formated mac
head 10.10.20.1
iml 10.10.20.10
mds1 10.10.20.11 0CC47A553DB4 0CC47A553DB4
mds2 10.10.20.12 0CC47A520E6E 0C:C4:7A:52:0E:6E
oss1 10.10.20.21 0CC47A6930A8 0C:C4:7A:69:30:A8
oss2 10.10.20.22 0CC47A694D1A 0C:C4:7A:69:4D:1A
oss3 10.10.20.23 0CC47A6930A4 0C:C4:7A:69:30:A4
oss4 10.10.20.24 0CC47A694DE4 0C:C4:7A:69:4D:E4
  1. Switch
    1. enable ETH license
  2. Cards
    1. first port IB, second port eth
  • IML
    • IP: 172.28.0.114

Installation

  • Network
    • all IB card should be set to port1 -> IB, port2 -> ETH
    • some port on IB switch should be ETH
  • for every pair of JBOD, connect network cable directly to each other

Install Cobbler

   17  yum install httpd dhcp bind
   18  yum install syslinux
   19  vim /etc/sysconfig/dhcpd
   20  history
[root@iml-utv ieel-2.2.0.0]# cat /etc/sysconfig/dhcpd
# Command line options here
DHCPDARGS=eth1
 install epel repo
## RHEL/CentOS 6 64-Bit ##
# wget http://download.fedoraproject.org/pub/epel/6/x86_64/epel-release-6-8.noarch.rpm
# rpm -ivh epel-release-6-8.noarch.rpm

install cobbler

yum install cobbler cobbler-web
for debian system deployment support 
   36  yum install debmirror 
   37  yum install pykickstart

set iptables and selinux for cobbler refer to here when using virt-manager, interfaces should be br0, br1

[root@iml-utv ieel-2.2.0.0]# setenforce 0

setenforce: SELinux is disabled

[root@iml-utv conf.d]# cat /etc/sysconfig/iptables
*nat
:PREROUTING ACCEPT [0:0]
:POSTROUTING ACCEPT [44:2156]
:OUTPUT ACCEPT [48:2352]
-A POSTROUTING -o eth0 -j MASQUERADE 
COMMIT
*filter
:INPUT ACCEPT [0:0]
:FORWARD ACCEPT [0:0]
:OUTPUT ACCEPT [31985:2140861]
-A INPUT -s 172.105.0.0/16 -p udp -m udp --dport 69 -j ACCEPT 
-A INPUT -m state --state RELATED,ESTABLISHED -j ACCEPT 
-A INPUT -p icmp -j ACCEPT 
-A INPUT -i lo -j ACCEPT 
-A INPUT -p tcp -m state --state NEW -m tcp --dport 22 -j ACCEPT 
-A INPUT -p tcp -m tcp --dport 69 -j ACCEPT 
-A INPUT -p tcp -m tcp --dport 80 -j ACCEPT 
-A INPUT -p tcp -m tcp --dport 443 -j ACCEPT 
-A INPUT -p tcp -m tcp --dport 25151 -j ACCEPT 
-A INPUT -p udp -m state --state NEW -m udp --dport 69 -j ACCEPT 
-A FORWARD -i eth1 -o eth0 -m state --state RELATED,ESTABLISHED -j ACCEPT 
-A FORWARD -i eth0 -o eth1 -j ACCEPT 
COMMIT

for tftp:

[root@iml-utv conf.d]# cat /etc/sysconfig/iptables-config 
# Load additional iptables modules (nat helpers)
#   Default: -none-
# Space separated list of nat helpers (e.g. 'ip_nat_ftp ip_nat_irc'), which
# are loaded after the firewall rules are applied. Options for the helpers are
# stored in /etc/modprobe.conf.
IPTABLES_MODULES="ip_conntrack ip_conntrack_tftp"

xinetd:

cat /etc/xinetd.d/rsync
# default: off
# description: The rsync server is a good addition to an ftp server, as it \
#	allows crc checksumming etc.
service rsync
{
	disable	= no
	flags		= IPv6
	socket_type     = stream
	wait            = no
	user            = root
	server          = /usr/bin/rsync
	server_args     = --daemon
	log_on_failure  += USERID
}

Edit /etc/debmirror.conf and comment dists and arches lines Edit /etc/cobbler/settings to be:

# set to 1 to enable Cobbler's DHCP management features.
# the choice of DHCP management engine is in /etc/cobbler/modules.conf
manage_dhcp: 1

# set to 1 to enable Cobbler's DNS management features.
# the choice of DNS mangement engine is in /etc/cobbler/modules.conf
manage_dns: 1

next_server: 172.105.0.1
server: 172.105.0.1

pxe_just_once: 1  # to avoid reinstall looping
edit /etc/cobbler/dhcp.template:
cat /etc/cobbler/dhcp.template
subnet 172.105.0.0 netmask 255.255.0.0 {
     option routers             172.105.0.1;
     option domain-name-servers 172.105.0.1;
     option subnet-mask         255.255.0.0;
     range dynamic-bootp        172.105.0.100 172.105.0.200;
     default-lease-time         21600;
     max-lease-time             43200;
     next-server                $next_server;
     class "pxeclients" {
          match if substring (option vendor-class-identifier, 0, 9) = "PXEClient";
          if option pxe-system-type = 00:02 {
                  filename "ia64/elilo.efi";
          } else if option pxe-system-type = 00:06 {
                  filename "grub/grub-x86.efi";
          } else if option pxe-system-type = 00:07 {
                  filename "grub/grub-x86_64.efi";
          } else {
                  filename "pxelinux.0";
          }
     }

}

dhcpd.conf

[root@iml-utv ieel-2.2.0.0]# cat /etc/dhcp/dhcpd.conf 
#
# DHCP Server Configuration file.
#   see /usr/share/doc/dhcp*/dhcpd.conf.sample
#   see 'man 5 dhcpd.conf'
#
#
subnet 172.105.0.0 netmask 255.255.0.0 {
     option routers             172.105.0.1;
     option domain-name-servers 172.105.0.1;
     option subnet-mask         255.255.0.0;
     range dynamic-bootp        172.105.0.100 172.105.0.200;
     default-lease-time         21600;
     max-lease-time             43200;
     next-server                172.105.0.1;
     filename 			"pxelinux.0";

}

disable dhcpd for virt-manager In this example i wanted to run a virtual DHCP server on the KVM system. The KVM server was getting in the way as it run a DHCP service also. To disable, remove the <dhcp> tags from the file /var/lib/libvirt/network/default.xml Change from:

<network>
  <name>default</name>
  <uuid>319d4ae5-4b6e-433c-832f-ad20269ed495</uuid>
  <forward mode='nat'/>
  <bridge name='virbr0' stp='on' delay='0' />
  <mac address='52:54:00:09:89:76'/>
  <ip address='192.168.122.1' netmask='255.255.255.0'>
    <dhcp>
      <range start='192.168.122.2' end='192.168.122.254' />
    </dhcp>
  </ip>
</network>

Change to:

<network>
  <name>default</name>
  <uuid>319d4ae5-4b6e-433c-832f-ad20269ed495</uuid>
  <forward mode='nat'/>
  <bridge name='virbr0' stp='on' delay='0' />
  <mac address='52:54:00:09:89:76'/>
  <ip address='192.168.122.1' netmask='255.255.255.0'>
  </ip>
</network>

And then restart libvirtd and dnsmasq:

  service libvirtd restart
  service dnsmasq restart

restart the dhcpd

[root@iml-utv ieel-2.2.0.0]# service dhcpd restart
start serverces:
[root@iml-utv conf.d]# service named restart
[root@iml-utv ieel-2.2.0.0]# service xinetd restart
Starting xinetd:                                           [  OK  ]
[root@iml-utv ieel-2.2.0.0]# service dhcpd restart
[root@iml-utv ieel-2.2.0.0]# service httpd restart
Starting httpd: 
[root@iml-utv ieel-2.2.0.0]# service cobblerd restart
Starting cobbler daemon:                                   [  OK  ]

cobbler check

[root@iml-utv ieel-2.2.0.0]# cobbler check
[root@iml-utv conf.d]# cobbler sync

Find ISOs:

[root@iml-utv ~]# mount -o loop ./CentOS-6.6-x86_64-bin-DVD1.iso /mnt/
[root@iml-utv ~]# cobbler import --name=CentOS_6.6_x86_64 --path=/mnt --breed=redhat --arch=x86_64
[root@iml-utv ~]# cobbler sync

add systems

cobbler system add --name=mds1 --profile=CentOS_6.6-x86_64 --mac=0C:C4:7A:52:10:EE --ip-address=172.105.0.11 --hostname=mds1.utv.boston.co.uk --netboot-enabled=Y --dns-name=mds1.utv.boston.co.uk --interface eth0
cobbler system add --name=mds2 --profile=CentOS_6.6-x86_64 --mac=0C:C4:7A:52:0E:6E --ip-address=172.105.0.12 --hostname=mds2.utv.boston.co.uk --netboot-enabled=Y --dns-name=mds2.utv.boston.co.uk --interface eth0
cobbler system add --name=oss1 --profile=CentOS_6.6-x86_64 --mac=0C:C4:7A:69:30:A8 --ip-address=172.105.0.21 --hostname=oss1.utv.boston.co.uk --netboot-enabled=Y --dns-name=oss1.utv.boston.co.uk --interface eth0
cobbler system add --name=oss2 --profile=CentOS_6.6-x86_64 --mac=0C:C4:7A:69:4D:1A --ip-address=172.105.0.22 --hostname=oss2.utv.boston.co.uk --netboot-enabled=Y --dns-name=oss2.utv.boston.co.uk --interface eth0
cobbler system add --name=oss3 --profile=CentOS_6.6-x86_64 --mac=0C:C4:7A:69:30:A4 --ip-address=172.105.0.23 --hostname=oss3.utv.boston.co.uk --netboot-enabled=Y --dns-name=oss3.utv.boston.co.uk --interface eth0
cobbler system add --name=oss4 --profile=CentOS_6.6-x86_64 --mac=0C:C4:7A:69:4D:E4 --ip-address=172.105.0.24 --hostname=oss4.utv.boston.co.uk --netboot-enabled=Y --dns-name=oss4.utv.boston.co.uk --interface eth0
cobbler system add --name=mds1-ipmi --ip-address=172.105.1.11 --mac-address=0C:C4:7A:5A:8A:4A --hostname=mds1-ipmi --interface=eth0 --profile=CentOS_6.6-x86_64
cobbler system add --name=mds2-ipmi --ip-address=172.105.1.12 --mac-address=0C:C4:7A:5A:8B:24 --hostname=mds2-ipmi --interface=eth0 --profile=CentOS_6.6-x86_64
cobbler system add --name=oss1-ipmi --ip-address=172.105.1.21 --mac-address=0C:C4:7A:37:69:0C --hostname=oss1-ipmi --interface=eth0 --profile=CentOS_6.6-x86_64
cobbler system add --name=oss2-ipmi --ip-address=172.105.1.22 --mac-address=0C:C4:7A:37:6F:AC --hostname=oss2-ipmi --interface=eth0 --profile=CentOS_6.6-x86_64
cobbler system add --name=oss3-ipmi --ip-address=172.105.1.23 --mac-address=0C:C4:7A:37:69:0A --hostname=oss3-ipmi --interface=eth0 --profile=CentOS_6.6-x86_64
cobbler system add --name=oss4-ipmi --ip-address=172.105.1.24 --mac-address=0C:C4:7A:37:6F:7C --hostname=oss4-ipmi --interface=eth0 --profile=CentOS_6.6-x86_64

stop netboot

cobbler system edit --name=mds2 --netboot-enabled=N
/etc/hosts:
172.105.0.1	head-utv
172.105.0.10	iml
172.105.0.11	mds1.utv.boston.co.uk mds1
172.105.0.12	mds2.utv.boston.co.uk mds2
172.105.0.21	oss1.utv.boston.co.uk oss1
172.105.0.22	oss2.utv.boston.co.uk oss2
172.105.0.23	oss3.utv.boston.co.uk oss3
172.105.0.24	oss4.utv.boston.co.uk oss4

To update the repo of IML,

1. copy the packages to folder /var/lib/chroma/repo/lustre

2. use `createrepo`:

createrepo /var/lib/chroma/repo/lustre

Testing

Install `lustre-iokit` first, then use this script on one oss node:

TARGETS="
oss1.utv.boston.co.uk:lfs1-OST0000
oss1.utv.boston.co.uk:lfs1-OST0002
oss1.utv.boston.co.uk:lfs1-OST0004
oss1.utv.boston.co.uk:lfs1-OST0006
oss2.utv.boston.co.uk:lfs1-OST0001
oss2.utv.boston.co.uk:lfs1-OST0003
oss2.utv.boston.co.uk:lfs1-OST0005
oss2.utv.boston.co.uk:lfs1-OST0007
"
NOBJLO=1
NOBJHI=256
THRLO=1
THRHI=256
#OUTPUT="/root/test_results/obd/12lun_00_01"
 OUTPUT="/root/test_results/obd/short_peak"
 
# The test dataset size (MB) for each LUN. The total dataset size must be larger than 2 times of the RAM size in order to avoid the caching.
# The calculation of the size
# (RAM size * 2) / the number of LUNs
# For example, the server RAM is 24GB and there are 5 LUN
# the size should be (24GB * 2 ) / 5 ~= 10 GB = 10240 MB
# However, it is a good idea to run a very short trial run to make sure the test configuration working properly before scheduling a complete test.
# SIZE=100 is a good number for the trial run
 
 
#SIZE="46000"
SIZE="64000"
#SIZE="100"
 
ssh lustre01-oss1 mkdir -p $OUTPUT
ssh lustre01-oss2 mkdir -p $OUTPUT

thrhi=$THRHI thrlo=$THRLO nobjhi=$NOBJHI nobjlo=$NOBJLO size=$SIZE case="disk" targets=$TARGETS rslt_loc=$OUTPUT obdfilter-survey

IEEL Specific Configurations (WIP)

NTP Server

Set "iml.utv.boston.co.uk" during Install

service ntpd stop
ntpdate pool.net.org
vi /etc/ntp.conf
# Amend server line for "pool.ntp.org"
Save and Exit
service ntpd restart

MGT / MDT Creation Create Single RAID10 array across MDS JBOD

storcli64 /call /v0 del force
storcli64 /call add vd type=r10 drives13:0-23 pdperarray=8
strocli64 /call /v0 start init force

Create the Volumes

#Select the device which corresponds to the JBOD 
parted /dev/sd#
mklable gpt

#Create 20Gb Partition for MGT
mkpart
mgt

0
20000

#Fill the rest for the MDT
mkpart
mdt
20001
3593000

list

Additional Install Notes (IEEL 2.3)

  • Tar file with all IEEL 2.3 packages and additional IB packages for mofed 2.4-1.0.4
    • Kernel-ib packages must be installed manually on MDS's and OSS's
    • Also ensure you install mlnx-ofed-debug