Piston:Landing page

From Define Wiki
Revision as of 09:54, 6 March 2015 by David (talk | contribs)
Jump to navigation Jump to search
Error creating thumbnail: File missing

Piston Cloud

Installation

Operation Guides

Component Guides

Notes

  • notes as i've installed the system
  • Accessing the nodes via ssh
# access the nodes, do this from the boot node: 
sudo su - 
(password)
dev ssh 
ssh <IP> # ip from the dashboard, access the private address of the nodes
  • Reinstall a system without a full reboot
# on the boot node and as root
# edit the conf file 
vi /mnt/flash/conf/pentos.conf.used 
# then :
dev reinit
  • Check status via CLI (if you can access the web interface)
# on boot node 
[root@boot-172-16-0-2 ~]# piston-dev.py cluster-info 
{u'control': {u'state': u'initialize:wait-for-nodes'},
 u'hosts': {u'172.16.1.3': {u'blessed': False,
                            u'context': {},
                            u'diskdata': None,

# or use the short version: 
[root@boot-172-16-0-2 ~]# piston-dev.py cluster-info -s
{u'control': {u'state': u'optimal'},
 u'hosts': {u'172.16.1.2': {u'host_ip': u'172.16.0.13',
                            u'progress': [],
                            u'status': u'ready'},
            u'172.16.1.3': {u'host_ip': u'172.16.0.14',
                            u'progress': [],
                            u'status': u'ready'},
            u'172.16.1.4': {u'host_ip': u'172.16.0.15',
                            u'progress': [],
                            u'status': u'ready'},
            u'172.16.1.5': {u'host_ip': None,
                            u'progress': [],
                            u'status': u'stop'},
            u'172.16.1.6': {u'host_ip': None,
                            u'progress': [],
                            u'status': u'booting'},
            u'172.16.1.7': {u'host_ip': None,
                            u'progress': [],
                            u'status': u'booting'}}}
  • Force reinstall
# create the file destroy-data on the USB root 
# or on the boot node: 
touch /mnt/usb1/destroy-data
  • Problems with IPMI (timeouts, commands should complete in <2 seconds)
# this is the command piston will use on the IPMI module - test out on the boot node to diagnose IPMI issues
[root@boot-172-16-0-2 log]# ipmi-chassis --session-timeout 1999 --retransmission-timeout 1000 -u admin -p admin -D LAN_2_0 -h 172.16.1.5 --get-status
ipmi_ctx_open_outofband_2_0: session timeout
# above is bad! below is what you want to see
[root@boot-172-16-0-2 log]# ipmi-chassis -u admin -p admin -D LAN_2_0 -h 172.16.1.5 --get-status
System Power                        : off
Power overload                      : false
Interlock                           : inactive
Power fault                         : false
Power control fault                 : false
Power restore policy                : Always off
Last Power Event                    : power on via ipmi c

### If you are seeing IPMI timeouts, its probably because eth0/eth1 trying bonding, see the MACs are the same below (we've only one cable plugged in in this instance)

eth0: flags=6211<UP,BROADCAST,RUNNING,SLAVE,MULTICAST>  mtu 1500
        ether 00:25:90:4b:7a:85  txqueuelen 1000  (Ethernet)

eth1: flags=6211<UP,BROADCAST,RUNNING,SLAVE,MULTICAST>  mtu 1500
        ether 00:25:90:4b:7a:85  txqueuelen 1000  (Ethernet)

[root@boot-172-16-0-2 log]# ifconfig eth1 down 
[root@boot-172-16-0-2 log]# ipmi-chassis --session-timeout 1999 --retransmission-timeout 1000 -u admin -p admin -D LAN_2_0 -h 172.16.1.5 --get-status
System Power                        : off
Power overload                      : false
Interlock                           : inactive
Power fault                         : false
Power control fault                 : false
Power restore policy                : Always off
Last Power Event                    : power on via ipmi command
Chassis intrusion                   : inactive
Front panel lockout                 : inactive
Drive Fault                         : false
Cooling/fan fault                   : false
Chassis Identify state              : off
Power off button                    : enabled
Reset button                        : enabled
Diagnostic Interrupt button         : enabled
Standby button                      : enabled
Power off button disable            : unallowed
Reset button disable                : unallowed
Diagnostic interrupt button disable : unallowed
Standby button disable              : unallowed
  • Monitoring the install from the boot node
tail -f /var/log/cmessages | egrep -i '(error|trace|critical|fail)'

# Good command to check and make sure the IPMI dhcp ok and then pass the status check:
tail -f /var/log/cmessages | egrep -i '(dhcp|ipmi)'
# removed all the comment lines: 
[root@boot-172-16-0-2 usb1]# grep "^[^#;]" cloud.conf 




[role profile BootNodeAdmin]
management api enabled = false
boot node enabled = true

[role profile ClusterAdmin]
management api enabled = true


[user profile admin]
role = BootNodeAdmin, ClusterAdmin
secret = $6$Mjc/QBXGf2Y$/l9f2jVxbeKkkk5KiyPxMD4k0MQggVLhZvjLI9NWD1CO2Fwzs1dyDsyKJ7RewfSG9nBipLMO0ySq7IlTvC5C2.

[user profile piston-admin]
role = ClusterAdmin
secret = $6$rounds=60000$Example.Jkvnr3vC$wRiggCNQhj/qthYCLqFTFPOs2eil.0DsAe8qGw.UyQEejk9u6qk/hhWdwrYFIdArbmY4RGxVw7


[network]
host_net=172.16.0.0/24


host_bootnode_ip=172.16.0.2



management_net=172.16.1.0/24




services_net=172.16.2.0/26



services_vlan=2


cloud_net=172.16.3.0/24



cloud_vlan=3


public_net=172.16.4.0/24



public_vlan=4


ntp_servers=pool.ntp.org

dns_servers=8.8.8.8,8.8.4.4


type=nova-network

[disk profile ceph]
count_min=1
size_min=100GB
ssd=always
partitions=ceph_journal,ceph_journal,ceph_monitor,identity,ceph_data
priority=1

[disk profile ephemeral]
count_min=1
size_min=500GB
ssd=never
partitions=identity,ceph_data,ephemeral:500GB
priority=2


[auth]
type=local

[local_auth]
admin_username=admin
admin_password=$6$2EFLpDNp$Example.reAyhjN90s.qORBBABvA0CExsiVcrKgZwz5uOwlLW7rRrCZJXjA5dQfHlA7L11c2n37nhcRav0aaa1

[ldap_auth]
url=ldap://ldap.example.com
user=CN=ldapadmin,CN=Users,DC=example,DC=com
password=BadPassword
suffix=DC=example,DC=com
tenant_tree_dn=OU=Piston,DC=example,DC=com
tenant_objectclass=organizationalUnit
tenant_id_attribute=ou
tenant_name_attribute=displayName
user_tree_dn=CN=Users,DC=example,DC=com
user_objectclass=person
user_id_attribute=cn
user_name_attribute=cn
user_attribute_ignore=password,tenant_id,tenants
user_enabled_attribute=userAccountControl
user_enabled_default=512
user_enabled_mask=2
role_tree_dn=OU=Piston,DC=example,DC=com
role_objectclass=group
role_id_attribute=cn
role_name_attribute=displayName
role_member_attribute=member


[snmp]
enabled=no
community=piston


[dashboard]



[servers]
server_count=5

image_cache_size=204800

ipmi_user=admin
ipmi_pass=admin