Difference between revisions of "GIGAIO:Lab Setup"
Jump to navigation
Jump to search
| Line 492: | Line 492: | ||
172.17.0.0/16 dev docker0 proto kernel scope link src 172.17.0.1 linkdown | 172.17.0.0/16 dev docker0 proto kernel scope link src 172.17.0.1 linkdown | ||
192.168.88.0/22 dev eno1 proto kernel scope link src 192.168.88.233 metric 100 | 192.168.88.0/22 dev eno1 proto kernel scope link src 192.168.88.233 metric 100 | ||
| + | |||
| + | </nowiki> | ||
| + | |||
| + | Prologctld.py | ||
| + | |||
| + | <nowiki> | ||
| + | #!/usr/bin/env python3 | ||
| + | |||
| + | import subprocess | ||
| + | import json | ||
| + | import os | ||
| + | import configparser | ||
| + | |||
| + | # Set up config handling | ||
| + | |||
| + | def run_command(command): | ||
| + | cmd_arr = command.split(' ') | ||
| + | process = subprocess.Popen(cmd_arr, stdout=subprocess.PIPE, stderr=subprocess.PIPE) | ||
| + | print(cmd_arr) | ||
| + | stdout, stderr = process.communicate() | ||
| + | print(stdout) | ||
| + | print(stderr) | ||
| + | return str(stdout) | ||
| + | |||
| + | def read_config(): | ||
| + | config = configparser.ConfigParser() | ||
| + | |||
| + | ret = config.read('/etc/slurm/gigaio.conf') | ||
| + | return config | ||
| + | |||
| + | def write_log(msg): | ||
| + | f = open("/var/log/slurm/prologctld.log", "a") | ||
| + | f.write(str(msg)) | ||
| + | f.close() | ||
| + | |||
| + | def main(): | ||
| + | |||
| + | # Read environment variables | ||
| + | env_var = os.environ | ||
| + | |||
| + | |||
| + | # Check if GPU requested | ||
| + | if 'SLURM_JOB_LICENSES' in env_var and 'GIOGPU' in env_var['SLURM_JOB_LICENSES']: | ||
| + | pass | ||
| + | else: | ||
| + | exit(0) | ||
| + | |||
| + | |||
| + | |||
| + | |||
| + | # Obtain Global lock | ||
| + | |||
| + | # Read nodelist | ||
| + | if 'SLURM_JOB_NODELIST' in env_var: | ||
| + | nodelist = env_var['SLURM_JOB_NODELIST'].split(',') | ||
| + | node_count = len(nodelist) | ||
| + | print("Nodelist is " + str(nodelist)) | ||
| + | else: | ||
| + | exit(1) | ||
| + | |||
| + | # Read and Parse config | ||
| + | cnf = read_config() | ||
| + | print(cnf) | ||
| + | |||
| + | # Read how many GPUs are requested | ||
| + | licenses = env_var['SLURM_JOB_LICENSES'].split(',') | ||
| + | gpu_count = 0 | ||
| + | for lic in licenses: | ||
| + | if 'GIOGPU' in lic: | ||
| + | gpu_count = int(lic.split(':')[1]) | ||
| + | break | ||
| + | |||
| + | if gpu_count < 1: | ||
| + | exit(1) | ||
| + | |||
| + | if 'SLURM_JOBID' in env_var: | ||
| + | job_id = env_var['SLURM_JOBID'] | ||
| + | |||
| + | gpu_per_node = gpu_count / node_count | ||
| + | |||
| + | # Find out which GPU unbound ports can be assigned from fmtool and conf | ||
| + | # based on availability and size | ||
| + | |||
| + | cmd_output = run_command("fmtool -s virgo12a") | ||
| + | cmd_json = (cmd_output.split('Response:')[1].split('Success')[0]).replace("\\n"," ") | ||
| + | print(str(cmd_json)) | ||
| + | cmd_obj = json.loads(str(cmd_json)) | ||
| + | |||
| + | unbound = cmd_obj["body"]["fabric"]["switches"][0]["binding"]["unbound"] | ||
| + | print(unbound) | ||
| + | if len(unbound) == 0: | ||
| + | print("No unbound ports available") | ||
| + | exit(1) | ||
| + | |||
| + | allocation = {} | ||
| + | unbound_start = 0 | ||
| + | |||
| + | for node in nodelist: | ||
| + | allocation[node] = {} | ||
| + | # Get partition id of nodes from conf file | ||
| + | allocation[node]['part_id'] = cnf.get('NODES', node) | ||
| + | allocation[node]['ports'] = [] | ||
| + | node_budget = gpu_per_node | ||
| + | |||
| + | for i in range(unbound_start, len(unbound)): | ||
| + | |||
| + | if node_budget <= 0: | ||
| + | break | ||
| + | print(unbound[i]) | ||
| + | # Get size of port from conf file | ||
| + | size = int(cnf.get('PORTS', str(unbound[i]))) | ||
| + | |||
| + | if (node_budget - size) >= 0: | ||
| + | node_budget = node_budget - size | ||
| + | allocation[node]['ports'].append(unbound[i]) | ||
| + | |||
| + | unbound_start = unbound_start + 1 | ||
| + | |||
| + | if node_budget > 0: | ||
| + | print("Unbound ports not available") | ||
| + | exit(1) | ||
| + | |||
| + | print(allocation) | ||
| + | |||
| + | # Assign unbound ports to partition ids | ||
| + | for node in allocation: | ||
| + | for port in allocation[node]['ports']: | ||
| + | # Bind port to node part_id | ||
| + | run_command('fmtool -B switch:virgo12a,part_id:' + str(allocation[node]['part_id']) + ',port_id:' + str(port) + ' virgo12a') | ||
| + | |||
| + | #Reboot Nodes | ||
| + | for node in allocation: | ||
| + | run_command('scontrol reboot '+ node) | ||
| + | |||
| + | #Release Lock | ||
| + | |||
| + | |||
| + | |||
| + | |||
| + | main() | ||
| + | |||
</nowiki> | </nowiki> | ||
Latest revision as of 07:35, 21 October 2020
[root@vcontroller test]# cat test.sh #!/bin/bash #SBATCH -N 1 # number of nodes #SBATCH --gres=gpu:k80:8 hostname lspci | grep NVIDIA nvidia-smi sleep 18
[root@vcontroller sbin]# cat slurm_resume
#!/usr/bin/python3
import sys
import subprocess
import json
def run_command(command):
cmd_arr = command.split(' ')
process = subprocess.Popen(cmd_arr, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
print(cmd_arr)
stdout, stderr = process.communicate()
print(stdout)
print(stderr)
return str(stdout)
enabled_ports = [9, 17]
nodelist = ["node0001", "node0002"]
def main():
nodename = str(sys.argv[1])
print(str(sys.argv))
if nodename in nodelist:
print("Nodename is " + nodename)
else:
exit(0)
if nodename == 'node0001':
part_id = '0'
elif nodename == 'node0002':
part_id = '1'
else:
exit(0)
# Find job id assigned to this node
cmd_out = run_command("squeue -o %A|%N")
print(cmd_out)
jobid = '-1'
for line in cmd_out.split("\\n"):
if nodename in line:
jobid = line.split('|')[0]
break
if jobid == '-1':
exit(0)
print("Job id is " + jobid)
# Find num of gpus requested by this job
cmd_output = run_command("scontrol show job " + str(jobid))
num_req_gpus = cmd_output.split('TresPerNode=gpu:k80:')[1].split('\\n')[0]
print("Num of req gpus is " + str(num_req_gpus).split('\\n')[0])
if num_req_gpus == "8" or num_req_gpus == "16":
pass
else:
print("Invalid number of gpus requested by job " + num_req_gpus)
# Cancel job
exit(1)
# Get current state of ports
cmd_output = run_command("fmtool -s virgo12a")
print("Getting unbound ports")
cmd_json = (cmd_output.split('Response:')[1].split('Success')[0]).replace("\\n"," ")
#print(cmd_json)
cmd_obj = json.loads(cmd_json)
unbound = cmd_obj["body"]["fabric"]["switches"][0]["binding"]["unbound"]
if len(unbound) == 0:
print("No unbound ports available")
#scancel jobid assigned to this node
exit(1)
print("Unbound ports")
print(unbound)
# If num of requested gpus are unbound, then bind them, else scancel the jobid
if num_req_gpus == "8":
if 9 in unbound:
run_command('fmtool -B switch:virgo12a,part_id:' + part_id + ',port_id:9 virgo12a')
pass
elif 17 in unbound:
run_command('fmtool -B switch:virgo12a,part_id:' + part_id + ',port_id:17 virgo12a')
pass
else:
print("9 or 17 port not present in unbound")
exit(1)
elif num_req_gpus == "16":
if 9 in unbound and 17 in unbound:
run_command('fmtool -B switch:virgo12a,part_id:' + part_id + ',port_id:9 virgo12a')
run_command('fmtool -B switch:virgo12a,part_id:' + part_id + ',port_id:17 virgo12a')
pass
else:
print("9 and 17 port not present in unbound")
exit(1)
# Reboot the node
run_command("ssh " + nodename + " -t reboot")
main()
[root@vcontroller sbin]# cat slurm_suspend #!/bin/bash # Example SuspendProgram echo "`date` Suspend invoked $0 $*" >>/var/log/power_save.log echo "Power Save Module" fmtool -U switch:virgo12a,port_id:9 virgo12a >>/var/log/power_save.log fmtool -U switch:virgo12a,port_id:17 virgo12a >>/var/log/power_save.log exit 0
[root@vcontroller ~]# cat slurm.py
import subprocess
import json
def run_command(command):
cmd_arr = command.split(' ')
process = subprocess.Popen(cmd_arr, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
print(cmd_arr)
stdout, stderr = process.communicate()
print(stdout)
print(stderr)
return str(stdout)
enabled_ports = [9, 17]
def main():
# Find if there is a job available
cmd_output = run_command("squeue -t PD -o %i|%r -h")
job_arr = cmd_output.split('\n')
if len(job_arr) == 0:
print("No jobs present in squeue")
exit(1)
job_id = "-1"
for job in job_arr:
if "ReqNodeNotAvail" in job:
job_id = job_arr[0].split('|')[0]
print ("Job id is " + str(job_id))
break
if job_id == "-1":
print("No PENDING jobs present in squeue")
exit(1)
# Find how many gpus it requests
cmd_output = run_command("scontrol show job " + str(job_id))
num_req_gpus = cmd_output.split('TresPerNode=gpu:k80:')[1][0]
print("Num of req gpus is " + str(num_req_gpus).split('\n')[0])
if num_req_gpus == "8" or num_req_gpus == "16":
pass
else:
print("Invalid number of gpus requested by job " + num_req_gpus)
# Cancel job
exit(1)
# Get current state of ports
cmd_output = run_command("fmtool -s virgo12a")
cmd_json = cmd_output.split('Response:')[1].split('Success')[0]
cmd_obj = json.loads(cmd_json)
unbound = cmd_obj["body"]["fabric"]["switches"][0]["binding"]["unbound"]
if len(unbound) == 0:
print("No unbound ports available")
exit(1)
print("Unbound ports")
print(unbound)
# Find available slurm node and corresponding partition id
cmd_output = run_command('sinfo')
cmd_arr = cmd_output.split('\n')[1:]
part_id = '-1'
nodename = 'dontexist'
for node in cmd_arr:
if 'idle' in node:
if 'n0001' in node:
part_id = '0'
nodename = 'n0001'
elif 'n0002' in node:
part_id = '1'
nodename = 'n0002'
break
if part_id == '-1':
print("No idle slurm nodes available")
exit(1)
print("Partition ID selected : " + part_id)
# Check if requested gpus are available
# Bind gpus to available node
if num_req_gpus == 8:
if 9 in unbound:
run_command('fmtool -B switch:virgo12a,part_id:' + part_id + ',port_id:9 virgo12a')
pass
elif 17 in unbound:
run_command('fmtool -B switch:virgo12a,part_id:' + part_id + ',port_id:17 virgo12a')
pass
else:
print("9 or 17 port not present in unbound")
exit(1)
else:
if 9 in unbound and 17 in unbound:
run_command('fmtool -B switch:virgo12a,part_id:' + part_id + ',port_id:9 virgo12a')
run_command('fmtool -B switch:virgo12a,part_id:' + part_id + ',port_id:17 virgo12a')
pass
else:
print("9 and 17 port not present in unbound")
exit(1)
# Update gres number with scontrol for slurm node
run_command('scontrol update Nodename=' + nodename + ' gres=gpu:k80:' + str(num_req_gpus))
# scontrol reboot
#run_command('scontrol reboot '+ nodename)
main()
[root@vcontroller slurm]# cat slurm.conf # # Ansible managed # #GresTypes=gpu ClusterName=cluster ControlMachine=vcontroller, Include /etc/slurm/slurm-nodes.conf Include /etc/slurm/slurm-partitions.conf Include /etc/slurm/slurm-user.conf #Include /etc/slurm/gres.conf #Include /etc/slurm/slurm-health.conf TopologyPlugin=topology/tree SwitchType=switch/none TaskPlugin=task/none MpiDefault=none ReturnToService=2 # Accounting. AccountingStorageType=accounting_storage/slurmdbd AccountingStorageHost=vcontroller AccountingStorageUser=slurm #AccountingStorageEnforce=qos,limits #AccountingStoragePass= #AccountingStoragePort= # Logging SlurmctldLogFile=/var/log/slurm/slurmctld.log SlurmdLogFile=/var/log/slurm/slurmd.log StateSaveLocation=/vscaler/local/var/spool/slurm AuthType=auth/munge ControlAddr=10.6.44.152 #Name=gpu Type=k80 File=/dev/nvidia[0-15] DebugFlags=Gres RebootProgram = "/sbin/shutdown -r now" GresTypes=gpu #SelectType=select/cons_res #SelectTypeParameters=CR_CORE_Memory SuspendProgram=/usr/sbin/slurm_suspend ResumeProgram=/usr/sbin/slurm_resume SuspendTime=15 NodeName=node0001 Weight=8 Feature=cloud State=CLOUD Gres=gpu:k80:16 NodeName=node0002 Weight=8 Feature=cloud State=CLOUD Gres=gpu:k80:16 ResumeTimeout=300
[root@vcontroller site]# cat hosts [controllers] vcontroller [staticcomputes] node0001 node0002 [ood] #portal [vncnodes] [logins]
[root@seashell kolla]# cat admin-openrc.sh export OS_PROJECT_DOMAIN_NAME=Default export OS_USER_DOMAIN_NAME=Default export OS_PROJECT_NAME=admin export OS_TENANT_NAME=admin export OS_USERNAME=admin export OS_PASSWORD=CSNOm67PJTEdOHOLrcWnPsa3d4ydxHWRsz4RA862 export OS_AUTH_URL=http://192.168.88.233:35357/v3 export OS_INTERFACE=internal export OS_IDENTITY_API_VERSION=3 export OS_REGION_NAME=RegionOne export OS_AUTH_PLUGIN=password
[root@seashell kolla]# grep -v "#" globals.yml
---
openstack_release: "stein"
kolla_internal_vip_address: "192.168.88.233"
docker_registry: "registry.vscaler.com:5000"
network_interface: "eno1"
neutron_external_interface: "eno2"
enable_haproxy: "no"
enable_cinder: "yes"
enable_cinder_backend_lvm: "yes"
enable_ironic: "yes"
enable_ironic_ipxe: "yes"
enable_ironic_neutron_agent: "yes"
glance_enable_rolling_upgrade: "no"
ironic_dnsmasq_interface: "{{ network_interface }}"
ironic_dnsmasq_dhcp_range: "192.168.88.50,192.168.88.60"
ironic_dnsmasq_boot_file: "pxelinux.0"
ironic_inspector_kernel_cmdline_extras: ['ipa-lldp-timeout=90.0', 'ipa-collect-lldp=1']
[root@seashell ironic]# grep -v "#" ironic-conductor.conf [DEFAULT] debug = True log_dir = /var/log/kolla/ironic transport_url = rabbit://openstack:6HFpy2oNyez4PbpTsjY29TSTRnP7IjBOD8y06vwL@192.168.88.233:5672// my_ip = 192.168.88.233 [oslo_messaging_notifications] transport_url = rabbit://openstack:6HFpy2oNyez4PbpTsjY29TSTRnP7IjBOD8y06vwL@192.168.88.233:5672// [conductor] api_url = http://192.168.88.233:6385 automated_clean = false [database] connection = mysql+pymysql://ironic:70mOUN3Gpi7ikPD7uzZBiaU7an1jS4hqOjXv3LPZ@192.168.88.233:3306/ironic max_retries = -1 [keystone_authtoken] www_authenticate_uri = http://192.168.88.233:5000 auth_url = http://192.168.88.233:35357 auth_type = password project_domain_id = default user_domain_id = default project_name = service username = ironic password = p03kBxk6gelVGK79ZFDhQvDpoWyAGKW01DqruUZJ memcache_security_strategy = ENCRYPT memcache_secret_key = DDTRcI9cLlneNaUEqx4DJMtLhhsqdYjeJ2OxpdAz memcached_servers = 192.168.88.233:11211 [glance] glance_api_servers = http://192.168.88.233:9292 auth_url = http://192.168.88.233:35357 auth_type = password project_domain_id = default user_domain_id = default project_name = service username = ironic password = p03kBxk6gelVGK79ZFDhQvDpoWyAGKW01DqruUZJ [neutron] url = http://192.168.88.233:9696 auth_url = http://192.168.88.233:35357 auth_type = password project_domain_id = default user_domain_id = default project_name = service username = ironic password = p03kBxk6gelVGK79ZFDhQvDpoWyAGKW01DqruUZJ cleaning_network = [inspector] enabled = true auth_url = http://192.168.88.233:35357 auth_type = password project_domain_id = default user_domain_id = default project_name = service username = ironic password = p03kBxk6gelVGK79ZFDhQvDpoWyAGKW01DqruUZJ endpoint_override = http://192.168.88.233:5050 [agent] deploy_logs_local_path = /var/log/kolla/ironic deploy_logs_storage_backend = local deploy_logs_collect = on_failure [pxe] pxe_append_params = nofb nomodeset vga=normal console=tty0 console=ttyS0,115200n8 console=tty0 sshkey="ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABgQChMdR4SZE2IZazLg2COA8uo6oPzbgXpSJ1QoHsNIKDivYwQiuBruBLZDdUErcm5KnZ7DmxxoBmN3dNBwpeQEVDn4CennaGmirZJC8cUfnq8rKRLK1MwamZZD7y9sS4bXJs7pqaeI1921l2uthBDsQ4r1XZ5f+t0c8ZeTDr9mwdhgOUDglrved5fjefJXwZUmR3EBlelWeqWFSk0/oazAzGWg+IVEgwEk4F6XIpmOPRdrDKU7tT3zXCQSf2cKPsB5ajTxTW66i94+wJnRRf/c/Yf7QLKymeGsw6d/ZoPqI/LVZRkh42kNN9sml3x6SkzZP8dnQU91xoQdQPCqc09XJ3Wfv7ERhgLXx6D/kN7STKXXTHKb/+Ab/HK96sbzuksuz2dbiBv/37kkXL9Oayr0IArDvUeW6dv2/Vg409G+Xf/tESOmm3hO9+WcVrCwBCMIuiKSTVrSrvmI19v5VuVeO0Ujb+6MwRSP/qCY7cTINt1y+lZmai6OG3ek+IcJc274U= vipul@localhost.localdomain" ipa-debug=1 coreos.autologin ipxe_enabled = True pxe_bootfile_name = undionly.kpxe uefi_pxe_bootfile_name = ipxe.efi pxe_config_template = $pybasedir/drivers/modules/ipxe_config.template uefi_pxe_config_template = $pybasedir/drivers/modules/ipxe_config.template tftp_root = /httpboot tftp_master_path = /httpboot/master_images tftp_server = 192.168.88.233 [deploy] http_url = http://192.168.88.233:8089 default_boot_option = local [oslo_middleware] enable_proxy_headers_parsing = True
[root@seashell neutron]# cat ml2_conf.ini [ml2] type_drivers = flat,vlan,vxlan tenant_network_types = vxlan mechanism_drivers = openvswitch,l2population extension_drivers = port_security [ml2_type_vlan] network_vlan_ranges = physnet1:300:400 [ml2_type_flat] flat_networks = physnet1 [ml2_type_vxlan] vni_ranges = 1:1000 vxlan_group = 239.1.1.1 [securitygroup] firewall_driver = neutron.agent.linux.iptables_firewall.OVSHybridIptablesFirewallDriver [agent] tunnel_types = vxlan l2_population = true arp_responder = true [ovs] bridge_mappings = physnet1:br-ex datapath_type = system ovsdb_connection = tcp:127.0.0.1:6640 local_ip = 192.168.88.233
init-runonce vscaler-key [root@seashell kolla]# ip r default via 192.168.88.254 dev eno1 proto dhcp metric 100 10.6.44.0/24 via 192.168.88.253 dev eno1 172.17.0.0/16 dev docker0 proto kernel scope link src 172.17.0.1 linkdown 192.168.88.0/22 dev eno1 proto kernel scope link src 192.168.88.233 metric 100
Prologctld.py
#!/usr/bin/env python3
import subprocess
import json
import os
import configparser
# Set up config handling
def run_command(command):
cmd_arr = command.split(' ')
process = subprocess.Popen(cmd_arr, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
print(cmd_arr)
stdout, stderr = process.communicate()
print(stdout)
print(stderr)
return str(stdout)
def read_config():
config = configparser.ConfigParser()
ret = config.read('/etc/slurm/gigaio.conf')
return config
def write_log(msg):
f = open("/var/log/slurm/prologctld.log", "a")
f.write(str(msg))
f.close()
def main():
# Read environment variables
env_var = os.environ
# Check if GPU requested
if 'SLURM_JOB_LICENSES' in env_var and 'GIOGPU' in env_var['SLURM_JOB_LICENSES']:
pass
else:
exit(0)
# Obtain Global lock
# Read nodelist
if 'SLURM_JOB_NODELIST' in env_var:
nodelist = env_var['SLURM_JOB_NODELIST'].split(',')
node_count = len(nodelist)
print("Nodelist is " + str(nodelist))
else:
exit(1)
# Read and Parse config
cnf = read_config()
print(cnf)
# Read how many GPUs are requested
licenses = env_var['SLURM_JOB_LICENSES'].split(',')
gpu_count = 0
for lic in licenses:
if 'GIOGPU' in lic:
gpu_count = int(lic.split(':')[1])
break
if gpu_count < 1:
exit(1)
if 'SLURM_JOBID' in env_var:
job_id = env_var['SLURM_JOBID']
gpu_per_node = gpu_count / node_count
# Find out which GPU unbound ports can be assigned from fmtool and conf
# based on availability and size
cmd_output = run_command("fmtool -s virgo12a")
cmd_json = (cmd_output.split('Response:')[1].split('Success')[0]).replace("\\n"," ")
print(str(cmd_json))
cmd_obj = json.loads(str(cmd_json))
unbound = cmd_obj["body"]["fabric"]["switches"][0]["binding"]["unbound"]
print(unbound)
if len(unbound) == 0:
print("No unbound ports available")
exit(1)
allocation = {}
unbound_start = 0
for node in nodelist:
allocation[node] = {}
# Get partition id of nodes from conf file
allocation[node]['part_id'] = cnf.get('NODES', node)
allocation[node]['ports'] = []
node_budget = gpu_per_node
for i in range(unbound_start, len(unbound)):
if node_budget <= 0:
break
print(unbound[i])
# Get size of port from conf file
size = int(cnf.get('PORTS', str(unbound[i])))
if (node_budget - size) >= 0:
node_budget = node_budget - size
allocation[node]['ports'].append(unbound[i])
unbound_start = unbound_start + 1
if node_budget > 0:
print("Unbound ports not available")
exit(1)
print(allocation)
# Assign unbound ports to partition ids
for node in allocation:
for port in allocation[node]['ports']:
# Bind port to node part_id
run_command('fmtool -B switch:virgo12a,part_id:' + str(allocation[node]['part_id']) + ',port_id:' + str(port) + ' virgo12a')
#Reboot Nodes
for node in allocation:
run_command('scontrol reboot '+ node)
#Release Lock
main()