Difference between revisions of "CloudX: Mellanox CloudX Installation"
Jump to navigation
Jump to search
| (31 intermediate revisions by 2 users not shown) | |||
| Line 16: | Line 16: | ||
* CentOS 6.4 Base System | * CentOS 6.4 Base System | ||
* MLNX_OFED_LINUX-2.2-0.0.2_20140306_1723-rhel6.4-x86_64.tgz package copied across | * MLNX_OFED_LINUX-2.2-0.0.2_20140306_1723-rhel6.4-x86_64.tgz package copied across | ||
| + | * Make sure all the external YUM repos are disabled | ||
| + | * Ensure times are all in sync across all nodes | ||
| + | * Setup ssh passwordless access | ||
| + | * Turn off IPtables on the compute nodes (probably not the best security practise! I'll need to come back here and confirm ports to open eventually) | ||
| + | |||
| + | === No External Repos === | ||
| + | <syntaxhighlight> | ||
| + | [root@ft1 ~]# ls /etc/yum.repos.d/ | ||
| + | CentOS-Base.repo CentOS-Debuginfo.repo CentOS-Media.repo CentOS-Vault.repo | ||
| + | </syntaxhighlight> | ||
| + | |||
| + | === Package Setup === | ||
<syntaxhighlight> | <syntaxhighlight> | ||
# Install the additional packages | # Install the additional packages | ||
| Line 22: | Line 34: | ||
cd MLNX_OFED_LINUX-2.2-0.0.2_20140306_1723-rhel6.4-x86_64 | cd MLNX_OFED_LINUX-2.2-0.0.2_20140306_1723-rhel6.4-x86_64 | ||
./mlnxofedinstall --force --all | ./mlnxofedinstall --force --all | ||
| + | |||
| + | # setup the adaptors as ethernet if using VPI | ||
| + | connectx_port_config | ||
| + | |||
| + | # Verify the ports | ||
| + | [root@blade2 ~]# connectx_port_config -s | ||
| + | -------------------------------- | ||
| + | Port configuration for PCI device: 0000:07:00.0 is: | ||
| + | eth | ||
| + | eth | ||
| + | -------------------------------- | ||
| + | </syntaxhighlight> | ||
| + | |||
| + | === Setup Grub === | ||
| + | * Setup GRUB to boot with SR-IOV support, add <tt>intel_iommu=on</tt> to the <tt>kernel</tt> args | ||
| + | <syntaxhighlight> | ||
| + | title CentOS (2.6.32-358.el6.x86_64) | ||
| + | root (hd0,0) | ||
| + | kernel /vmlinuz-2.6.32-358.el6.x86_64 ro root=/dev/mapper/vg_blade3-lv_root \ | ||
| + | rd_NO_LUKS rd_LVM_LV=vg_blade3/lv_root LANG=en_US.UTF-8 \ | ||
| + | rd_NO_MD SYSFONT=latarcyrheb-sun16 crashkernel=auto rd_LVM_LV=vg_blade3/lv_swap \ | ||
| + | KEYBOARDTYPE=pc KEYTABLE=us rd_NO_DM rhgb quiet intel_iommu=on | ||
| + | initrd /initramfs-2.6.32-358.el6.x86_64.img | ||
| + | </syntaxhighlight> | ||
| + | |||
| + | * '''NOTE SETUP THE BIOS''' - come back once confirmed settings | ||
| + | === Verify PCI Speeds === | ||
| + | * Setup PCI Utils, make sure we are running at 8GT/s | ||
| + | |||
| + | * ConnectX2 Sample Output - '''This is only GEN2 output''' | ||
| + | <syntaxhighlight> | ||
| + | [root@blade1 ~]# lspci -d 15b3: -vv | grep LnkSta | ||
| + | LnkSta: Speed 5GT/s, Width x8, TrErr- Train- SlotClk- DLActive- BWMgmt- ABWMgmt- | ||
| + | LnkSta2: Current De-emphasis Level: -3.5dB, EqualizationComplete-, EqualizationPhase1- | ||
| + | </syntaxhighlight> | ||
| + | |||
| + | === Setup Control Node with KVM === | ||
| + | * Verify Host Supports KVM | ||
| + | <syntaxhighlight> | ||
| + | egrep '(vmx|svm)' --color=always /proc/cpuinfo | ||
| + | # Should have a vmx flag on each core | ||
| + | </syntaxhighlight> | ||
| + | |||
| + | * Install KVM | ||
| + | <syntaxhighlight> | ||
| + | yum install xauth | ||
| + | yum groupinstall Virtualisation 'Virtualization Client' 'Virtualization Platform' 'Virtualization Tools' | ||
| + | modprobe kvm kvm-intel | ||
| + | /etc/init.d/libvirtd start | ||
| + | chkconfig libvirtd on | ||
| + | </syntaxhighlight> | ||
| + | |||
| + | * Verify the <tt>virbr0</tt> interface is setup and ready | ||
| + | <syntaxhighlight> | ||
| + | [root@blade2 ~]# ifconfig virbr0 | ||
| + | virbr0 Link encap:Ethernet HWaddr 52:54:00:1D:04:7A | ||
| + | inet addr:192.168.122.1 Bcast:192.168.122.255 Mask:255.255.255.0 | ||
| + | UP BROADCAST RUNNING MULTICAST MTU:1500 Metric:1 | ||
| + | RX packets:0 errors:0 dropped:0 overruns:0 frame:0 | ||
| + | TX packets:0 errors:0 dropped:0 overruns:0 carrier:0 | ||
| + | collisions:0 txqueuelen:0 | ||
| + | RX bytes:0 (0.0 b) TX bytes:0 (0.0 b) | ||
| + | </syntaxhighlight> | ||
| + | |||
| + | * Setup a network bridge on the host with DHCP | ||
| + | <syntaxhighlight> | ||
| + | # create the following file | ||
| + | [root@blade2 ~]# cat /etc/sysconfig/network-scripts/ifcfg-br0 | ||
| + | DEVICE=br0 | ||
| + | TYPE=Bridge | ||
| + | BOOTPROTO=dhcp | ||
| + | ONBOOT=yes | ||
| + | DELAY=0 | ||
| + | </syntaxhighlight> | ||
| + | |||
| + | * Edit the <tt>eth0</tt> configuration to add a bridge: <tt>BRIDGE=br0</tt> | ||
| + | <syntaxhighlight> | ||
| + | [root@blade2 ~]# cat /etc/sysconfig/network-scripts/ifcfg-eth0 | ||
| + | DEVICE=eth0 | ||
| + | ONBOOT=yes | ||
| + | HWADDR=00:25:90:C4:E9:8A | ||
| + | TYPE=Ethernet | ||
| + | BOOTPROTO=dhcp | ||
| + | BRIDGE=br0 | ||
| + | </syntaxhighlight> | ||
| + | |||
| + | * Reboot the node once complete | ||
| + | <syntaxhighlight> | ||
| + | reboot | ||
| + | </syntaxhighlight> | ||
| + | |||
| + | === Configure the CloudX VM === | ||
| + | * Bring up the KVM manager | ||
| + | <syntaxhighlight> | ||
| + | # X11 fwding required | ||
| + | virt-manager | ||
| + | </syntaxhighlight> | ||
| + | |||
| + | * Steps in the manager: | ||
| + | # Step 1 of 4 | ||
| + | # Create a new VM | ||
| + | # Select 'Import existing disk image' | ||
| + | # Click 'Forward' | ||
| + | # Step 2 of 4 | ||
| + | # Select the qcow2 image | ||
| + | # OS Type: Linux | ||
| + | # Version: Redhat Enterprise Linux 6 | ||
| + | # Select 'Forward' | ||
| + | # Step 3 of 4 | ||
| + | # RAM: 1024MB | ||
| + | # CPUs: 1 | ||
| + | # Select 'Forward' | ||
| + | # Step 4 of 4 | ||
| + | # Select the advanced options | ||
| + | # Host device should be br0 | ||
| + | # Virt Type: KVM | ||
| + | # Arch: x86_64 | ||
| + | # Finish | ||
| + | |||
| + | * Shut down the VM when it starts, we need to edit the disk format | ||
| + | * Select the 'i' or Information tab, Select the disk option | ||
| + | * Make sure the '''Storage Format''' is <tt>qcow2</tt> | ||
| + | * Make sure the '''Disk Bus''' is <tt>IDE</tt> | ||
| + | * Select the '''Display VNC''' entry | ||
| + | * Makes sure the '''Keymap''' is <tt>en-gb</tt> (so the keyboard works in the remote console) | ||
| + | * Power on the VM and let it boot | ||
| + | * Check the VM settings (IP DHCP etc, go through VNC) | ||
| + | <syntaxhighlight> | ||
| + | # Note; Keys were not working correctly through virt-manager, had to use vncviewer instead | ||
| + | yum install tsclient | ||
| + | vncviewer localhost:0 | ||
| + | </syntaxhighlight> | ||
| + | |||
| + | You can revert back to ssh once you have the IP address. The default username and password is '''root''' and '''password''' | ||
| + | |||
| + | === CloudX Setup (on the VM) === | ||
| + | * Log in to the VM (root/password) | ||
| + | * Verify the configuration settings in: <tt>/opt/cloudx_install/conf/cloudx.conf</tt> | ||
| + | ** '''Note:''' First installation we did wasnt with a mellanox switch so we had to change the Fabric Preparation setting to False | ||
| + | <syntaxhighlight> | ||
| + | # in the file /opt/cloudx_install/conf/cloudx.conf | ||
| + | fabric_preparation = False | ||
| + | |||
| + | # these are the setting mellanox recommend using if you have an non-mellanox switch | ||
| + | # Per switch: | ||
| + | dcb priority-flow-control enable force | ||
| + | dcb priority-flow-control priority 3 enable | ||
| + | interface ethernet 1/1-1/{ports} dcb priority-flow-control mode on force | ||
| + | interface ethernet 1/1-1/{ports} mtu {mtu} force | ||
| + | vlan {min_vlan}-{max_vlan} (default is 10 vlans) | ||
| + | |||
| + | # Per port: | ||
| + | interface ethernet 1/{portnum} switchport mode hybrid | ||
| + | interface ethernet 1/{portnum} switchport hybrid allowed-vlan all | ||
| + | |||
| + | ***Pay attention you configure the switch with the same vlan range you did in cloudx.conf (min_vlan/max_vlan parameters) | ||
| + | </syntaxhighlight> | ||
| + | |||
| + | Another update on VLAN settings | ||
| + | <syntaxhighlight> | ||
| + | I’ve configured the switch according to prerequisites as follows: | ||
| + | Step 1: Enter the switch via CLI | ||
| + | Enter ‘enable’ | ||
| + | Enter ‘Configure Terminal’ | ||
| + | |||
| + | Step 2: Configure the required vlans on the switch ( in our case 2-10) and configure ports as hybrid | ||
| + | Enter ‘vlan 2-10’ | ||
| + | Enter ‘exit’ | ||
| + | Enter ‘interface ethernet 1/<port-num> switchport mode hybrid’ | ||
| + | Enter ‘interface ethernet 1/<port-num> switchport hybrid allowed-vlan all’ | ||
| + | |||
| + | · To verify this you can perform the following commands: | ||
| + | ‘show vlan’ | ||
| + | ‘show interfaces switchport’ | ||
| + | |||
| + | |||
| + | Step 3: Enable DCB priority flow control: | ||
| + | Enter ‘dcb priority-flow-control enable force’ | ||
| + | Enter ‘dcb priority-flow-control priority 3 enable’ | ||
| + | interface ethernet 1/1-1/<max-port> dcb priority-flow-control mode on force | ||
| + | |||
| + | · To verify this you can perform the following command: | ||
| + | ‘ show dcb priority-flow-control’ | ||
| + | </syntaxhighlight> | ||
| + | |||
| + | * Verify the servers configuration in: <tt>/opt/cloudx_install/conf/servers.csv</tt> | ||
| + | |||
| + | <syntaxhighlight> | ||
| + | # This example if from the blade in the lab with: | ||
| + | # blade1: Storage | ||
| + | # blade2: Operation Node (CloudX Host) | ||
| + | # blade3: Network node | ||
| + | # blade4: Controller (Openstack Controller) | ||
| + | # cloudx: Installer (This is the cloudX VM!) note: The 192.xx address does nothing for this system, default pass is also password from the VM | ||
| + | # blade9/10: Compute | ||
| + | # Notes: IP is the eth0 interface, x for MAC as it doesnt work yet, Inband is the mlnx adaptor etc | ||
| + | |||
| + | # Make sure the file has no comments or anything else!! | ||
| + | # Installer failed with comments in the file!!! | ||
| + | |||
| + | [david@head-boston cloudx]$ cat servers.csv | ||
| + | IP,MAC/GUID,Inband,Username,Password,Card,Port,Role,Exclude | ||
| + | 172.28.15.10,x,192.168.0.10,root,Boston2014,mlx4_0,2,Compute,n | ||
| + | 172.28.15.9,x,192.168.0.9,root,Boston2014,mlx4_0,2,Compute,n | ||
| + | 172.28.15.3,x,192.168.0.3,root,Boston2014,mlx4_0,2,Network,n | ||
| + | 172.28.15.4,x,192.168.0.4,root,Boston2014,mlx4_0,2,Controller,n | ||
| + | 172.28.15.1,x,192.168.0.1,root,Boston2014,mlx4_0,2,Storage,n | ||
| + | 172.28.15.192,x,192.168.0.5,root,password,mlx4_0,2,Installer,n | ||
| + | </syntaxhighlight> | ||
| + | |||
| + | * Verify the switches configuration in: <tt>/opt/cloudx_install/conf/switches.csv</tt> | ||
| + | <syntaxhighlight> | ||
| + | # Note: Mellanox default username/pass: admin/admin | ||
| + | [root@cloudx ~]# cat /opt/cloudx_install/conf/switches.csv | ||
| + | Role,Hostname,Username,Password | ||
| + | spine_0,172.28.250.103,admin,admin | ||
| + | </syntaxhighlight> | ||
| + | |||
| + | * Other Host Preparations | ||
| + | Pull down a copy of the <code>cloudx.patches</code> file from the headnode: | ||
| + | <syntaxhighlight> | ||
| + | [david@head-boston cloudx]$ pwd | ||
| + | /home/david/projects/cloudx | ||
| + | [david@head-boston cloudx]$ ls | ||
| + | cloudx.patches conf-blade conf-ft ONE_CLICK_CLOUDX_1.0.0.8-31032014-2146.qcow2 | ||
| + | </syntaxhighlight> | ||
| + | |||
| + | <syntaxhighlight> | ||
| + | # - apply the cloudx.patch | ||
| + | cd /opt/cloudx_install/conf/ | ||
| + | patch -p0 < cloudx.patches | ||
| + | </syntaxhighlight> | ||
| + | * Make sure the host has a FQDN: eg. cloudx.boston.co.uk | ||
| + | <syntaxhighlight> | ||
| + | # - update the file: /etc/sysconfig/network - | ||
| + | HOSTNAME=cloudx.boston.co.uk | ||
| + | # Update /etc/hosts | ||
| + | 172.28.0.220 cloudx.boston.co.uk cloudx | ||
| + | # Ensure the following cmd works | ||
| + | [root@cloudx ~]# hostname --fqdn | ||
| + | cloudx.boston.co.uk | ||
| + | </syntaxhighlight> | ||
| + | |||
| + | * Once all the above is completed, you can launch the installer script. | ||
| + | <syntaxhighlight> | ||
| + | # use screen - installation process can take some time! | ||
| + | yum install screen | ||
| + | screen -S cloudx | ||
| + | /opt/cloudx_install/scripts/cloudx_installer.sh | ||
| + | </syntaxhighlight> | ||
| + | |||
| + | <b>Once installed you can log in with admin/password</b> | ||
| + | |||
| + | == Problems and Resolutions == | ||
| + | |||
| + | === Errors in servers.conf file === | ||
| + | <syntaxhighlight> | ||
| + | [172.28.1.57] executing python fabric_installer.py --log_file=fabric_install.log --conf_file=/opt/cloudx_install/conf/cloudx.conf | ||
| + | Traceback (most recent call last): | ||
| + | File "fabric_installer.py", line 82, in <module> | ||
| + | main() | ||
| + | File "fabric_installer.py", line 70, in main | ||
| + | readCsvFile(conf_file) | ||
| + | File "/opt/cloudx_install/src/installers/common_methods.py", line 178, in readCsvFile | ||
| + | readCsvLine(line) | ||
| + | File "/opt/cloudx_install/src/installers/common_methods.py", line 210, in readCsvLine | ||
| + | host_auth_list = getCsvAuthListFromLine(line) | ||
| + | File "/opt/cloudx_install/src/installers/common_methods.py", line 230, in getCsvAuthListFromLine | ||
| + | l.pop(OpenStack.ROLE_COLUMN) | ||
| + | KeyError: 'Role' | ||
| + | [172.28.1.57] command failed python fabric_installer.py --log_file=fabric_install.log --conf_file=/opt/cloudx_install/conf/cloudx.conf- return code:1, err: None | ||
| + | [172.28.1.57] -------------------------------------------------- | ||
| + | [172.28.1.57] /// *Failed* main_installer.py on: 172.28.1.57 /// | ||
| + | [172.28.1.57] -------------------------------------------------- | ||
| + | </syntaxhighlight> | ||
| + | * Remove any comment lines from the '''<code>servers.conf</code>''' file - the script can't process/ignore comment lines! | ||
| + | |||
| + | === IB/10GbE Switch config issues === | ||
| + | <syntaxhighlight> | ||
| + | INFO - Running command (dcb priority-flow-control enable force) on 172.28.1.74 | ||
| + | INFO - Running command (dcb priority-flow-control priority 3 enable) on 172.28.1.74 | ||
| + | INFO - Running command (interface ethernet 1/1-1/36 dcb priority-flow-control mode on force) on 172.28.1.74 | ||
| + | ERROR - Error while configuring 172.28.1.74: interface ethernet 1/1-1/36 dcb priority-flow-control mode on force | ||
| + | % 2nd interface does not exist | ||
| + | switch-cac5b2 [standalone: master] (config) # | ||
| + | ERROR - Host 172.28.1.74 configuration failed! (Configuration error on 172.28.1.74) | ||
| + | </syntaxhighlight> | ||
| + | * Script is trying to configure more ports than the switch has available! | ||
| + | * Set '''<code>fabric_preparation</code>''' in '''<code>cloudx.conf</code>''' file to '''<code>False</code>''': | ||
| + | <syntaxhighlight> | ||
| + | # in the file /opt/cloudx_install/conf/cloudx.conf | ||
| + | fabric_preparation = False | ||
| + | </syntaxhighlight> | ||
| + | |||
| + | === 10GbE Link Status === | ||
| + | <syntaxhighlight> | ||
| + | [172.28.1.57] ###### Inband configuration - UP STATE CHECK - FAILED | ||
| + | |||
| + | [172.28.1.57] ###### Inband configuration - UP STATE CHECK - FAILED | ||
| + | </syntaxhighlight> | ||
| + | * Ensure SR-IOV (virtualisation) is enabled in the BIOS | ||
| + | * Reboot the failing nodes | ||
| + | |||
| + | === Host not listening on port 22 === | ||
| + | <syntaxhighlight> | ||
| + | [172.28.32.3:22] run: packstack --answer-file=/tmp/havana_answer_file | ||
| + | [172.28.32.3:22] out: Welcome to Installer setup utility | ||
| + | [172.28.32.3:22] out: Parameter CONFIG_GLANCE_HOST failed validation: Given host does not listen on port 22: 192.168.0.4,192.168.0.7 | ||
| + | [172.28.32.3:22] out: | ||
| + | [172.28.32.3:22] out: ERROR : Failed handling answer file: Given host does not listen on port 22: 192.168.0.4,192.168.0.7 | ||
| + | [172.28.32.3:22] out: Please check log file /var/tmp/packstack/20140901-104919-ayWqHA/openstack-setup.log for more information | ||
| + | [172.28.32.3:22] out: | ||
| + | |||
| + | |||
| + | Fatal error: run() received nonzero return code 1 while executing! | ||
</syntaxhighlight> | </syntaxhighlight> | ||
| + | * Try stopping <code>iptables</code> on the nodes | ||
| + | * We got rid of this in the end by having just one storage node! | ||
Latest revision as of 13:52, 10 September 2014
- Assumptions: Installed Centos 6.4 base system, nothing else
- Access the software at:
- http://support.mellanox.com/SupportWeb/solutions/cloudx
- Username: cloudx
- Password: CloudXme
Download the CloudX Image
# Standard
wget http://support.mellanox.com/ftp/versions/current/Solutions/cloudX/1.0.0.8/ONE_CLICK_CLOUDX_1.0.0.8-31032014-2146.qcow2
# Continue WHEN things go wrong
wget -c http://support.mellanox.com/ftp/versions/current/Solutions/cloudX/1.0.0.8/ONE_CLICK_CLOUDX_1.0.0.8-31032014-2146.qcow2Base System Setup
- CentOS 6.4 Base System
- MLNX_OFED_LINUX-2.2-0.0.2_20140306_1723-rhel6.4-x86_64.tgz package copied across
- Make sure all the external YUM repos are disabled
- Ensure times are all in sync across all nodes
- Setup ssh passwordless access
- Turn off IPtables on the compute nodes (probably not the best security practise! I'll need to come back here and confirm ports to open eventually)
No External Repos
[root@ft1 ~]# ls /etc/yum.repos.d/
CentOS-Base.repo CentOS-Debuginfo.repo CentOS-Media.repo CentOS-Vault.repoPackage Setup
# Install the additional packages
yum install -y tcl gcc-gfortran.x86_64 tk
tar zxvf MLNX_OFED_LINUX-2.2-0.0.2_20140306_1723-rhel6.4-x86_64.tgz
cd MLNX_OFED_LINUX-2.2-0.0.2_20140306_1723-rhel6.4-x86_64
./mlnxofedinstall --force --all
# setup the adaptors as ethernet if using VPI
connectx_port_config
# Verify the ports
[root@blade2 ~]# connectx_port_config -s
--------------------------------
Port configuration for PCI device: 0000:07:00.0 is:
eth
eth
--------------------------------Setup Grub
- Setup GRUB to boot with SR-IOV support, add intel_iommu=on to the kernel args
title CentOS (2.6.32-358.el6.x86_64)
root (hd0,0)
kernel /vmlinuz-2.6.32-358.el6.x86_64 ro root=/dev/mapper/vg_blade3-lv_root \
rd_NO_LUKS rd_LVM_LV=vg_blade3/lv_root LANG=en_US.UTF-8 \
rd_NO_MD SYSFONT=latarcyrheb-sun16 crashkernel=auto rd_LVM_LV=vg_blade3/lv_swap \
KEYBOARDTYPE=pc KEYTABLE=us rd_NO_DM rhgb quiet intel_iommu=on
initrd /initramfs-2.6.32-358.el6.x86_64.img- NOTE SETUP THE BIOS - come back once confirmed settings
Verify PCI Speeds
- Setup PCI Utils, make sure we are running at 8GT/s
- ConnectX2 Sample Output - This is only GEN2 output
[root@blade1 ~]# lspci -d 15b3: -vv | grep LnkSta
LnkSta: Speed 5GT/s, Width x8, TrErr- Train- SlotClk- DLActive- BWMgmt- ABWMgmt-
LnkSta2: Current De-emphasis Level: -3.5dB, EqualizationComplete-, EqualizationPhase1-Setup Control Node with KVM
- Verify Host Supports KVM
egrep '(vmx|svm)' --color=always /proc/cpuinfo
# Should have a vmx flag on each core- Install KVM
yum install xauth
yum groupinstall Virtualisation 'Virtualization Client' 'Virtualization Platform' 'Virtualization Tools'
modprobe kvm kvm-intel
/etc/init.d/libvirtd start
chkconfig libvirtd on- Verify the virbr0 interface is setup and ready
[root@blade2 ~]# ifconfig virbr0
virbr0 Link encap:Ethernet HWaddr 52:54:00:1D:04:7A
inet addr:192.168.122.1 Bcast:192.168.122.255 Mask:255.255.255.0
UP BROADCAST RUNNING MULTICAST MTU:1500 Metric:1
RX packets:0 errors:0 dropped:0 overruns:0 frame:0
TX packets:0 errors:0 dropped:0 overruns:0 carrier:0
collisions:0 txqueuelen:0
RX bytes:0 (0.0 b) TX bytes:0 (0.0 b)- Setup a network bridge on the host with DHCP
# create the following file
[root@blade2 ~]# cat /etc/sysconfig/network-scripts/ifcfg-br0
DEVICE=br0
TYPE=Bridge
BOOTPROTO=dhcp
ONBOOT=yes
DELAY=0- Edit the eth0 configuration to add a bridge: BRIDGE=br0
[root@blade2 ~]# cat /etc/sysconfig/network-scripts/ifcfg-eth0
DEVICE=eth0
ONBOOT=yes
HWADDR=00:25:90:C4:E9:8A
TYPE=Ethernet
BOOTPROTO=dhcp
BRIDGE=br0- Reboot the node once complete
rebootConfigure the CloudX VM
- Bring up the KVM manager
# X11 fwding required
virt-manager- Steps in the manager:
- Step 1 of 4
- Create a new VM
- Select 'Import existing disk image'
- Click 'Forward'
- Step 2 of 4
- Select the qcow2 image
- OS Type: Linux
- Version: Redhat Enterprise Linux 6
- Select 'Forward'
- Step 3 of 4
- RAM: 1024MB
- CPUs: 1
- Select 'Forward'
- Step 4 of 4
- Select the advanced options
- Host device should be br0
- Virt Type: KVM
- Arch: x86_64
- Finish
- Shut down the VM when it starts, we need to edit the disk format
- Select the 'i' or Information tab, Select the disk option
- Make sure the Storage Format is qcow2
- Make sure the Disk Bus is IDE
- Select the Display VNC entry
- Makes sure the Keymap is en-gb (so the keyboard works in the remote console)
- Power on the VM and let it boot
- Check the VM settings (IP DHCP etc, go through VNC)
# Note; Keys were not working correctly through virt-manager, had to use vncviewer instead
yum install tsclient
vncviewer localhost:0You can revert back to ssh once you have the IP address. The default username and password is root and password
CloudX Setup (on the VM)
- Log in to the VM (root/password)
- Verify the configuration settings in: /opt/cloudx_install/conf/cloudx.conf
- Note: First installation we did wasnt with a mellanox switch so we had to change the Fabric Preparation setting to False
# in the file /opt/cloudx_install/conf/cloudx.conf
fabric_preparation = False
# these are the setting mellanox recommend using if you have an non-mellanox switch
# Per switch:
dcb priority-flow-control enable force
dcb priority-flow-control priority 3 enable
interface ethernet 1/1-1/{ports} dcb priority-flow-control mode on force
interface ethernet 1/1-1/{ports} mtu {mtu} force
vlan {min_vlan}-{max_vlan} (default is 10 vlans)
# Per port:
interface ethernet 1/{portnum} switchport mode hybrid
interface ethernet 1/{portnum} switchport hybrid allowed-vlan all
***Pay attention you configure the switch with the same vlan range you did in cloudx.conf (min_vlan/max_vlan parameters)Another update on VLAN settings
I’ve configured the switch according to prerequisites as follows:
Step 1: Enter the switch via CLI
Enter ‘enable’
Enter ‘Configure Terminal’
Step 2: Configure the required vlans on the switch ( in our case 2-10) and configure ports as hybrid
Enter ‘vlan 2-10’
Enter ‘exit’
Enter ‘interface ethernet 1/<port-num> switchport mode hybrid’
Enter ‘interface ethernet 1/<port-num> switchport hybrid allowed-vlan all’
· To verify this you can perform the following commands:
‘show vlan’
‘show interfaces switchport’
Step 3: Enable DCB priority flow control:
Enter ‘dcb priority-flow-control enable force’
Enter ‘dcb priority-flow-control priority 3 enable’
interface ethernet 1/1-1/<max-port> dcb priority-flow-control mode on force
· To verify this you can perform the following command:
‘ show dcb priority-flow-control’- Verify the servers configuration in: /opt/cloudx_install/conf/servers.csv
# This example if from the blade in the lab with:
# blade1: Storage
# blade2: Operation Node (CloudX Host)
# blade3: Network node
# blade4: Controller (Openstack Controller)
# cloudx: Installer (This is the cloudX VM!) note: The 192.xx address does nothing for this system, default pass is also password from the VM
# blade9/10: Compute
# Notes: IP is the eth0 interface, x for MAC as it doesnt work yet, Inband is the mlnx adaptor etc
# Make sure the file has no comments or anything else!!
# Installer failed with comments in the file!!!
[david@head-boston cloudx]$ cat servers.csv
IP,MAC/GUID,Inband,Username,Password,Card,Port,Role,Exclude
172.28.15.10,x,192.168.0.10,root,Boston2014,mlx4_0,2,Compute,n
172.28.15.9,x,192.168.0.9,root,Boston2014,mlx4_0,2,Compute,n
172.28.15.3,x,192.168.0.3,root,Boston2014,mlx4_0,2,Network,n
172.28.15.4,x,192.168.0.4,root,Boston2014,mlx4_0,2,Controller,n
172.28.15.1,x,192.168.0.1,root,Boston2014,mlx4_0,2,Storage,n
172.28.15.192,x,192.168.0.5,root,password,mlx4_0,2,Installer,n- Verify the switches configuration in: /opt/cloudx_install/conf/switches.csv
# Note: Mellanox default username/pass: admin/admin
[root@cloudx ~]# cat /opt/cloudx_install/conf/switches.csv
Role,Hostname,Username,Password
spine_0,172.28.250.103,admin,admin- Other Host Preparations
Pull down a copy of the cloudx.patches file from the headnode:
[david@head-boston cloudx]$ pwd
/home/david/projects/cloudx
[david@head-boston cloudx]$ ls
cloudx.patches conf-blade conf-ft ONE_CLICK_CLOUDX_1.0.0.8-31032014-2146.qcow2# - apply the cloudx.patch
cd /opt/cloudx_install/conf/
patch -p0 < cloudx.patches- Make sure the host has a FQDN: eg. cloudx.boston.co.uk
# - update the file: /etc/sysconfig/network -
HOSTNAME=cloudx.boston.co.uk
# Update /etc/hosts
172.28.0.220 cloudx.boston.co.uk cloudx
# Ensure the following cmd works
[root@cloudx ~]# hostname --fqdn
cloudx.boston.co.uk- Once all the above is completed, you can launch the installer script.
# use screen - installation process can take some time!
yum install screen
screen -S cloudx
/opt/cloudx_install/scripts/cloudx_installer.shOnce installed you can log in with admin/password
Problems and Resolutions
Errors in servers.conf file
[172.28.1.57] executing python fabric_installer.py --log_file=fabric_install.log --conf_file=/opt/cloudx_install/conf/cloudx.conf
Traceback (most recent call last):
File "fabric_installer.py", line 82, in <module>
main()
File "fabric_installer.py", line 70, in main
readCsvFile(conf_file)
File "/opt/cloudx_install/src/installers/common_methods.py", line 178, in readCsvFile
readCsvLine(line)
File "/opt/cloudx_install/src/installers/common_methods.py", line 210, in readCsvLine
host_auth_list = getCsvAuthListFromLine(line)
File "/opt/cloudx_install/src/installers/common_methods.py", line 230, in getCsvAuthListFromLine
l.pop(OpenStack.ROLE_COLUMN)
KeyError: 'Role'
[172.28.1.57] command failed python fabric_installer.py --log_file=fabric_install.log --conf_file=/opt/cloudx_install/conf/cloudx.conf- return code:1, err: None
[172.28.1.57] --------------------------------------------------
[172.28.1.57] /// *Failed* main_installer.py on: 172.28.1.57 ///
[172.28.1.57] --------------------------------------------------- Remove any comment lines from the
servers.conffile - the script can't process/ignore comment lines!
IB/10GbE Switch config issues
INFO - Running command (dcb priority-flow-control enable force) on 172.28.1.74
INFO - Running command (dcb priority-flow-control priority 3 enable) on 172.28.1.74
INFO - Running command (interface ethernet 1/1-1/36 dcb priority-flow-control mode on force) on 172.28.1.74
ERROR - Error while configuring 172.28.1.74: interface ethernet 1/1-1/36 dcb priority-flow-control mode on force
% 2nd interface does not exist
switch-cac5b2 [standalone: master] (config) #
ERROR - Host 172.28.1.74 configuration failed! (Configuration error on 172.28.1.74)- Script is trying to configure more ports than the switch has available!
- Set
fabric_preparationincloudx.conffile toFalse:
# in the file /opt/cloudx_install/conf/cloudx.conf
fabric_preparation = False10GbE Link Status
[172.28.1.57] ###### Inband configuration - UP STATE CHECK - FAILED
[172.28.1.57] ###### Inband configuration - UP STATE CHECK - FAILED- Ensure SR-IOV (virtualisation) is enabled in the BIOS
- Reboot the failing nodes
Host not listening on port 22
[172.28.32.3:22] run: packstack --answer-file=/tmp/havana_answer_file
[172.28.32.3:22] out: Welcome to Installer setup utility
[172.28.32.3:22] out: Parameter CONFIG_GLANCE_HOST failed validation: Given host does not listen on port 22: 192.168.0.4,192.168.0.7
[172.28.32.3:22] out:
[172.28.32.3:22] out: ERROR : Failed handling answer file: Given host does not listen on port 22: 192.168.0.4,192.168.0.7
[172.28.32.3:22] out: Please check log file /var/tmp/packstack/20140901-104919-ayWqHA/openstack-setup.log for more information
[172.28.32.3:22] out:
Fatal error: run() received nonzero return code 1 while executing!- Try stopping
iptableson the nodes - We got rid of this in the end by having just one storage node!