Difference between revisions of "GIGAIO:Lab Setup"

From Define Wiki
Jump to navigation Jump to search
 
Line 492: Line 492:
 
172.17.0.0/16 dev docker0 proto kernel scope link src 172.17.0.1 linkdown  
 
172.17.0.0/16 dev docker0 proto kernel scope link src 172.17.0.1 linkdown  
 
192.168.88.0/22 dev eno1 proto kernel scope link src 192.168.88.233 metric 100  
 
192.168.88.0/22 dev eno1 proto kernel scope link src 192.168.88.233 metric 100  
 +
 +
</nowiki>
 +
 +
Prologctld.py
 +
 +
<nowiki>
 +
#!/usr/bin/env python3
 +
 +
import subprocess
 +
import json
 +
import os
 +
import configparser
 +
 +
# Set up config handling
 +
 +
def run_command(command):
 +
    cmd_arr = command.split(' ')
 +
    process = subprocess.Popen(cmd_arr, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
 +
    print(cmd_arr)
 +
    stdout, stderr = process.communicate()
 +
    print(stdout)
 +
    print(stderr)
 +
    return str(stdout)
 +
 +
def read_config():
 +
    config = configparser.ConfigParser()
 +
 +
    ret = config.read('/etc/slurm/gigaio.conf')
 +
    return config
 +
 +
def write_log(msg):
 +
    f = open("/var/log/slurm/prologctld.log", "a")
 +
    f.write(str(msg))
 +
    f.close()
 +
 +
def main():
 +
 +
    # Read environment variables
 +
    env_var = os.environ
 +
 +
 +
    # Check if GPU requested
 +
    if 'SLURM_JOB_LICENSES' in env_var and 'GIOGPU' in env_var['SLURM_JOB_LICENSES']:
 +
        pass
 +
    else:
 +
        exit(0)
 +
 +
   
 +
 +
 +
    # Obtain Global lock
 +
 +
    # Read nodelist
 +
    if 'SLURM_JOB_NODELIST' in env_var:
 +
        nodelist = env_var['SLURM_JOB_NODELIST'].split(',')
 +
        node_count = len(nodelist)
 +
        print("Nodelist is " + str(nodelist))
 +
    else:
 +
        exit(1)
 +
 +
    # Read and Parse config
 +
    cnf = read_config()
 +
    print(cnf)
 +
 +
    # Read how many GPUs are requested
 +
    licenses = env_var['SLURM_JOB_LICENSES'].split(',')
 +
    gpu_count = 0
 +
    for lic in licenses:
 +
        if 'GIOGPU' in lic:
 +
            gpu_count = int(lic.split(':')[1])
 +
            break
 +
 +
    if gpu_count < 1:
 +
        exit(1)
 +
 +
    if 'SLURM_JOBID' in env_var:
 +
        job_id = env_var['SLURM_JOBID']
 +
 +
    gpu_per_node = gpu_count / node_count
 +
   
 +
    # Find out which GPU unbound ports can be assigned from fmtool and conf
 +
    # based on availability and size
 +
   
 +
    cmd_output = run_command("fmtool -s virgo12a")
 +
    cmd_json = (cmd_output.split('Response:')[1].split('Success')[0]).replace("\\n"," ")
 +
    print(str(cmd_json))
 +
    cmd_obj = json.loads(str(cmd_json))
 +
 +
    unbound = cmd_obj["body"]["fabric"]["switches"][0]["binding"]["unbound"]
 +
    print(unbound)
 +
    if len(unbound) == 0:
 +
        print("No unbound ports available")
 +
        exit(1)
 +
   
 +
    allocation = {}
 +
    unbound_start = 0
 +
 +
    for node in nodelist:
 +
        allocation[node] = {}
 +
        # Get partition id of nodes from conf file
 +
        allocation[node]['part_id'] = cnf.get('NODES', node)
 +
        allocation[node]['ports'] = []
 +
        node_budget = gpu_per_node
 +
 +
        for i in range(unbound_start, len(unbound)):
 +
 +
            if node_budget <= 0:
 +
                break
 +
            print(unbound[i])
 +
            # Get size of port from conf file
 +
            size = int(cnf.get('PORTS', str(unbound[i])))
 +
 +
            if (node_budget - size) >= 0:
 +
                node_budget = node_budget - size
 +
                allocation[node]['ports'].append(unbound[i])
 +
 +
            unbound_start = unbound_start + 1
 +
 +
        if node_budget > 0:
 +
            print("Unbound ports not available")
 +
            exit(1)
 +
 +
    print(allocation)
 +
 +
    # Assign unbound ports to partition ids
 +
    for node in allocation:
 +
        for port in allocation[node]['ports']:
 +
            # Bind port to node part_id
 +
            run_command('fmtool -B switch:virgo12a,part_id:' + str(allocation[node]['part_id']) + ',port_id:' + str(port) + ' virgo12a')
 +
 +
    #Reboot Nodes
 +
    for node in allocation:
 +
        run_command('scontrol reboot '+ node)       
 +
 +
    #Release Lock
 +
 +
 +
 +
 +
main()
 +
  
 
</nowiki>
 
</nowiki>

Latest revision as of 07:35, 21 October 2020

[root@vcontroller test]# cat test.sh 
#!/bin/bash
#SBATCH -N 1 # number of nodes
#SBATCH --gres=gpu:k80:8

hostname
lspci | grep NVIDIA
nvidia-smi

sleep 18


[root@vcontroller sbin]# cat slurm_resume 
#!/usr/bin/python3

import sys

import subprocess
import json
def run_command(command):
    cmd_arr = command.split(' ') 
    process = subprocess.Popen(cmd_arr, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    print(cmd_arr)
    stdout, stderr = process.communicate()
    print(stdout)
    print(stderr)
    return str(stdout)

enabled_ports = [9, 17]
nodelist = ["node0001", "node0002"]
def main():

    nodename = str(sys.argv[1])
    print(str(sys.argv))
    
    if nodename in nodelist:
       print("Nodename is " + nodename)
    else:
       exit(0) 
    

    if nodename == 'node0001':
        part_id = '0'
    elif nodename == 'node0002':
        part_id = '1'
    else:
        exit(0)

    # Find job id assigned to this node
    cmd_out = run_command("squeue -o %A|%N")
    
    print(cmd_out)
    jobid = '-1'
    for line in cmd_out.split("\\n"):
        if nodename in line:
            jobid = line.split('|')[0]
            break
    if jobid == '-1':
        exit(0)

    print("Job id is " + jobid)

    # Find num of gpus requested by this job
    cmd_output = run_command("scontrol show job " + str(jobid))
    num_req_gpus = cmd_output.split('TresPerNode=gpu:k80:')[1].split('\\n')[0]
   
    print("Num of req gpus is " + str(num_req_gpus).split('\\n')[0])
    if num_req_gpus == "8" or num_req_gpus == "16":
        pass
    else:
        print("Invalid number of gpus requested by job " + num_req_gpus)
        # Cancel job    
        exit(1)




    # Get current state of ports
    cmd_output = run_command("fmtool -s virgo12a")
    print("Getting unbound ports")

    cmd_json = (cmd_output.split('Response:')[1].split('Success')[0]).replace("\\n"," ")
    #print(cmd_json)
    cmd_obj = json.loads(cmd_json)
    unbound = cmd_obj["body"]["fabric"]["switches"][0]["binding"]["unbound"]
    
    if len(unbound) == 0:
        print("No unbound ports available")
        #scancel jobid assigned to this node
        exit(1)
    print("Unbound ports")
    print(unbound)
    
    # If num of requested gpus are unbound, then bind them, else scancel the jobid

    if num_req_gpus == "8":
        if 9 in unbound:
            run_command('fmtool -B switch:virgo12a,part_id:' + part_id + ',port_id:9 virgo12a')
            pass
        elif 17 in unbound:
            run_command('fmtool -B switch:virgo12a,part_id:' + part_id + ',port_id:17 virgo12a')
            pass
        else:
            print("9 or 17 port not present in unbound")
            exit(1)
    elif num_req_gpus == "16":
        if 9 in unbound and 17 in unbound:
            run_command('fmtool -B switch:virgo12a,part_id:' + part_id + ',port_id:9 virgo12a')
            run_command('fmtool -B switch:virgo12a,part_id:' + part_id + ',port_id:17 virgo12a')
            pass
        else:
            print("9 and 17 port not present in unbound")
            exit(1)


        

    # Reboot the node
    run_command("ssh " + nodename + " -t reboot")
main()


[root@vcontroller sbin]# cat slurm_suspend 
#!/bin/bash
# Example SuspendProgram
echo "`date` Suspend invoked $0 $*" >>/var/log/power_save.log
echo "Power Save Module"
fmtool -U switch:virgo12a,port_id:9 virgo12a >>/var/log/power_save.log
fmtool -U switch:virgo12a,port_id:17 virgo12a >>/var/log/power_save.log
exit 0


[root@vcontroller ~]# cat slurm.py 
import subprocess
import json
def run_command(command):
    cmd_arr = command.split(' ') 
    process = subprocess.Popen(cmd_arr, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    print(cmd_arr)
    stdout, stderr = process.communicate()
    print(stdout)
    print(stderr)
    return str(stdout)

enabled_ports = [9, 17]
def main():
    # Find if there is a job available
    cmd_output = run_command("squeue -t PD -o %i|%r -h")
    job_arr = cmd_output.split('\n')
    if len(job_arr) == 0:
        print("No jobs present in squeue")
        exit(1)
    job_id = "-1"
    for job in job_arr:
        if "ReqNodeNotAvail" in job:
            job_id = job_arr[0].split('|')[0]
            print ("Job id  is "  + str(job_id))
            break
    if job_id == "-1":
        print("No PENDING jobs present in squeue")
        exit(1)
    # Find how many gpus it requests
    cmd_output = run_command("scontrol show job " + str(job_id))
    num_req_gpus = cmd_output.split('TresPerNode=gpu:k80:')[1][0]
   
    print("Num of req gpus is " + str(num_req_gpus).split('\n')[0])
    if num_req_gpus == "8" or num_req_gpus == "16":
        pass
    else:
        print("Invalid number of gpus requested by job " + num_req_gpus)
        # Cancel job    
        exit(1)

    # Get current state of ports
    cmd_output = run_command("fmtool -s virgo12a")
    cmd_json = cmd_output.split('Response:')[1].split('Success')[0]
    cmd_obj = json.loads(cmd_json)
    unbound = cmd_obj["body"]["fabric"]["switches"][0]["binding"]["unbound"]
    
    if len(unbound) == 0:
        print("No unbound ports available")
        exit(1)
    print("Unbound ports")
    print(unbound)
    # Find available slurm node and corresponding partition id
    cmd_output = run_command('sinfo')
    cmd_arr = cmd_output.split('\n')[1:]
    part_id = '-1'
    nodename = 'dontexist'
    for node in cmd_arr:
        if 'idle' in node:
            if 'n0001' in node:
                part_id = '0'
                nodename = 'n0001'
            elif 'n0002' in node:
                part_id = '1'
                nodename = 'n0002'
            break

    if part_id == '-1':
        print("No idle slurm nodes available")
        exit(1)
    print("Partition ID selected : " + part_id)
    # Check if requested gpus are available
    # Bind gpus to available node
    if num_req_gpus == 8:
        if 9 in unbound:
            run_command('fmtool -B switch:virgo12a,part_id:' + part_id + ',port_id:9 virgo12a')
            pass
        elif 17 in unbound:
            run_command('fmtool -B switch:virgo12a,part_id:' + part_id + ',port_id:17 virgo12a')
            pass
        else:
            print("9 or 17 port not present in unbound")
            exit(1)
    else:
        if 9 in unbound and 17 in unbound:
            run_command('fmtool -B switch:virgo12a,part_id:' + part_id + ',port_id:9 virgo12a')
            run_command('fmtool -B switch:virgo12a,part_id:' + part_id + ',port_id:17 virgo12a')
            pass
        else:
            print("9 and 17 port not present in unbound")
            exit(1)

    
    # Update gres number with scontrol for slurm node
    run_command('scontrol update Nodename=' + nodename + ' gres=gpu:k80:' + str(num_req_gpus))
    # scontrol reboot
    #run_command('scontrol reboot '+ nodename)
    

main()



[root@vcontroller slurm]# cat slurm.conf 
#
# Ansible managed
#
#GresTypes=gpu
ClusterName=cluster
ControlMachine=vcontroller,
Include /etc/slurm/slurm-nodes.conf
Include /etc/slurm/slurm-partitions.conf
Include /etc/slurm/slurm-user.conf
#Include /etc/slurm/gres.conf
#Include /etc/slurm/slurm-health.conf


TopologyPlugin=topology/tree

SwitchType=switch/none
TaskPlugin=task/none
MpiDefault=none
ReturnToService=2


# Accounting.
AccountingStorageType=accounting_storage/slurmdbd
AccountingStorageHost=vcontroller
AccountingStorageUser=slurm
#AccountingStorageEnforce=qos,limits
#AccountingStoragePass=
#AccountingStoragePort=

# Logging
SlurmctldLogFile=/var/log/slurm/slurmctld.log
SlurmdLogFile=/var/log/slurm/slurmd.log

StateSaveLocation=/vscaler/local/var/spool/slurm
AuthType=auth/munge
ControlAddr=10.6.44.152


#Name=gpu Type=k80 File=/dev/nvidia[0-15]

DebugFlags=Gres
RebootProgram = "/sbin/shutdown -r now"
GresTypes=gpu

#SelectType=select/cons_res
#SelectTypeParameters=CR_CORE_Memory

SuspendProgram=/usr/sbin/slurm_suspend
ResumeProgram=/usr/sbin/slurm_resume
SuspendTime=15
NodeName=node0001 Weight=8 Feature=cloud State=CLOUD Gres=gpu:k80:16
NodeName=node0002 Weight=8 Feature=cloud State=CLOUD Gres=gpu:k80:16

ResumeTimeout=300


[root@vcontroller site]# cat hosts 
[controllers]
vcontroller
[staticcomputes]
node0001
node0002
[ood]
#portal
[vncnodes]
[logins]


[root@seashell kolla]# cat admin-openrc.sh 
export OS_PROJECT_DOMAIN_NAME=Default
export OS_USER_DOMAIN_NAME=Default
export OS_PROJECT_NAME=admin
export OS_TENANT_NAME=admin
export OS_USERNAME=admin
export OS_PASSWORD=CSNOm67PJTEdOHOLrcWnPsa3d4ydxHWRsz4RA862
export OS_AUTH_URL=http://192.168.88.233:35357/v3
export OS_INTERFACE=internal
export OS_IDENTITY_API_VERSION=3
export OS_REGION_NAME=RegionOne
export OS_AUTH_PLUGIN=password


[root@seashell kolla]# grep -v "#" globals.yml 
---
openstack_release: "stein"
kolla_internal_vip_address: "192.168.88.233"
docker_registry: "registry.vscaler.com:5000"

network_interface: "eno1"

neutron_external_interface: "eno2"

enable_haproxy: "no"

enable_cinder: "yes"
enable_cinder_backend_lvm: "yes"
enable_ironic: "yes"
enable_ironic_ipxe: "yes"
enable_ironic_neutron_agent: "yes"
glance_enable_rolling_upgrade: "no"

ironic_dnsmasq_interface: "{{ network_interface }}"
ironic_dnsmasq_dhcp_range: "192.168.88.50,192.168.88.60"
ironic_dnsmasq_boot_file: "pxelinux.0"
ironic_inspector_kernel_cmdline_extras: ['ipa-lldp-timeout=90.0', 'ipa-collect-lldp=1']



[root@seashell ironic]# grep -v "#" ironic-conductor.conf 
[DEFAULT]
debug = True
log_dir = /var/log/kolla/ironic
transport_url = rabbit://openstack:6HFpy2oNyez4PbpTsjY29TSTRnP7IjBOD8y06vwL@192.168.88.233:5672//
my_ip = 192.168.88.233

[oslo_messaging_notifications]
transport_url = rabbit://openstack:6HFpy2oNyez4PbpTsjY29TSTRnP7IjBOD8y06vwL@192.168.88.233:5672//

[conductor]
api_url = http://192.168.88.233:6385
automated_clean = false

[database]
connection = mysql+pymysql://ironic:70mOUN3Gpi7ikPD7uzZBiaU7an1jS4hqOjXv3LPZ@192.168.88.233:3306/ironic
max_retries = -1

[keystone_authtoken]
www_authenticate_uri = http://192.168.88.233:5000
auth_url = http://192.168.88.233:35357
auth_type = password
project_domain_id = default
user_domain_id = default
project_name = service
username = ironic
password = p03kBxk6gelVGK79ZFDhQvDpoWyAGKW01DqruUZJ
memcache_security_strategy = ENCRYPT
memcache_secret_key = DDTRcI9cLlneNaUEqx4DJMtLhhsqdYjeJ2OxpdAz
memcached_servers = 192.168.88.233:11211

[glance]
glance_api_servers = http://192.168.88.233:9292
auth_url = http://192.168.88.233:35357
auth_type = password
project_domain_id = default
user_domain_id = default
project_name = service
username = ironic
password = p03kBxk6gelVGK79ZFDhQvDpoWyAGKW01DqruUZJ

[neutron]
url = http://192.168.88.233:9696
auth_url = http://192.168.88.233:35357
auth_type = password
project_domain_id = default
user_domain_id = default
project_name = service
username = ironic
password = p03kBxk6gelVGK79ZFDhQvDpoWyAGKW01DqruUZJ
cleaning_network =

[inspector]
enabled = true
auth_url = http://192.168.88.233:35357
auth_type = password
project_domain_id = default
user_domain_id = default
project_name = service
username = ironic
password = p03kBxk6gelVGK79ZFDhQvDpoWyAGKW01DqruUZJ
endpoint_override = http://192.168.88.233:5050

[agent]
deploy_logs_local_path = /var/log/kolla/ironic
deploy_logs_storage_backend = local
deploy_logs_collect = on_failure

[pxe]
pxe_append_params = nofb nomodeset vga=normal console=tty0 console=ttyS0,115200n8 console=tty0 sshkey="ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABgQChMdR4SZE2IZazLg2COA8uo6oPzbgXpSJ1QoHsNIKDivYwQiuBruBLZDdUErcm5KnZ7DmxxoBmN3dNBwpeQEVDn4CennaGmirZJC8cUfnq8rKRLK1MwamZZD7y9sS4bXJs7pqaeI1921l2uthBDsQ4r1XZ5f+t0c8ZeTDr9mwdhgOUDglrved5fjefJXwZUmR3EBlelWeqWFSk0/oazAzGWg+IVEgwEk4F6XIpmOPRdrDKU7tT3zXCQSf2cKPsB5ajTxTW66i94+wJnRRf/c/Yf7QLKymeGsw6d/ZoPqI/LVZRkh42kNN9sml3x6SkzZP8dnQU91xoQdQPCqc09XJ3Wfv7ERhgLXx6D/kN7STKXXTHKb/+Ab/HK96sbzuksuz2dbiBv/37kkXL9Oayr0IArDvUeW6dv2/Vg409G+Xf/tESOmm3hO9+WcVrCwBCMIuiKSTVrSrvmI19v5VuVeO0Ujb+6MwRSP/qCY7cTINt1y+lZmai6OG3ek+IcJc274U= vipul@localhost.localdomain" ipa-debug=1 coreos.autologin
ipxe_enabled = True
pxe_bootfile_name = undionly.kpxe
uefi_pxe_bootfile_name = ipxe.efi
pxe_config_template = $pybasedir/drivers/modules/ipxe_config.template
uefi_pxe_config_template = $pybasedir/drivers/modules/ipxe_config.template
tftp_root = /httpboot
tftp_master_path = /httpboot/master_images
tftp_server = 192.168.88.233

[deploy]
http_url = http://192.168.88.233:8089
default_boot_option = local

[oslo_middleware]
enable_proxy_headers_parsing = True



[root@seashell neutron]# cat ml2_conf.ini 
[ml2]
type_drivers = flat,vlan,vxlan
tenant_network_types = vxlan
mechanism_drivers = openvswitch,l2population
extension_drivers = port_security

[ml2_type_vlan]
network_vlan_ranges = physnet1:300:400

[ml2_type_flat]
flat_networks = physnet1

[ml2_type_vxlan]
vni_ranges = 1:1000
vxlan_group = 239.1.1.1

[securitygroup]
firewall_driver = neutron.agent.linux.iptables_firewall.OVSHybridIptablesFirewallDriver

[agent]
tunnel_types = vxlan
l2_population = true
arp_responder = true

[ovs]
bridge_mappings = physnet1:br-ex
datapath_type = system
ovsdb_connection = tcp:127.0.0.1:6640
local_ip = 192.168.88.233



                      init-runonce       vscaler-key
[root@seashell kolla]# ip r
default via 192.168.88.254 dev eno1 proto dhcp metric 100 
10.6.44.0/24 via 192.168.88.253 dev eno1 
172.17.0.0/16 dev docker0 proto kernel scope link src 172.17.0.1 linkdown 
192.168.88.0/22 dev eno1 proto kernel scope link src 192.168.88.233 metric 100 


Prologctld.py

#!/usr/bin/env python3

import subprocess
import json
import os
import configparser

# Set up config handling

def run_command(command):
    cmd_arr = command.split(' ') 
    process = subprocess.Popen(cmd_arr, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    print(cmd_arr)
    stdout, stderr = process.communicate()
    print(stdout)
    print(stderr)
    return str(stdout)

def read_config():
    config = configparser.ConfigParser()

    ret = config.read('/etc/slurm/gigaio.conf')
    return config

def write_log(msg):
    f = open("/var/log/slurm/prologctld.log", "a")
    f.write(str(msg))
    f.close()

def main():

    # Read environment variables
    env_var = os.environ


    # Check if GPU requested
    if 'SLURM_JOB_LICENSES' in env_var and 'GIOGPU' in env_var['SLURM_JOB_LICENSES']:
        pass
    else:
        exit(0)

    


    # Obtain Global lock

    # Read nodelist 
    if 'SLURM_JOB_NODELIST' in env_var:
        nodelist = env_var['SLURM_JOB_NODELIST'].split(',')
        node_count = len(nodelist)
        print("Nodelist is " + str(nodelist)) 
    else:
        exit(1)

    # Read and Parse config 
    cnf = read_config()
    print(cnf)

    # Read how many GPUs are requested
    licenses = env_var['SLURM_JOB_LICENSES'].split(',')
    gpu_count = 0
    for lic in licenses:
        if 'GIOGPU' in lic:
            gpu_count = int(lic.split(':')[1])
            break

    if gpu_count < 1:
        exit(1)

    if 'SLURM_JOBID' in env_var:
        job_id = env_var['SLURM_JOBID']

    gpu_per_node = gpu_count / node_count
    
    # Find out which GPU unbound ports can be assigned from fmtool and conf
    # based on availability and size
    
    cmd_output = run_command("fmtool -s virgo12a")
    cmd_json = (cmd_output.split('Response:')[1].split('Success')[0]).replace("\\n"," ") 
    print(str(cmd_json))
    cmd_obj = json.loads(str(cmd_json))

    unbound = cmd_obj["body"]["fabric"]["switches"][0]["binding"]["unbound"]
    print(unbound) 
    if len(unbound) == 0:
        print("No unbound ports available")
        exit(1)
    
    allocation = {}
    unbound_start = 0

    for node in nodelist:
        allocation[node] = {}
        # Get partition id of nodes from conf file
        allocation[node]['part_id'] = cnf.get('NODES', node)
        allocation[node]['ports'] = []
        node_budget = gpu_per_node

        for i in range(unbound_start, len(unbound)):

            if node_budget <= 0:
                break
            print(unbound[i])
            # Get size of port from conf file
            size = int(cnf.get('PORTS', str(unbound[i])))

            if (node_budget - size) >= 0:
                node_budget = node_budget - size
                allocation[node]['ports'].append(unbound[i])

            unbound_start = unbound_start + 1

        if node_budget > 0:
            print("Unbound ports not available")
            exit(1)

    print(allocation)

    # Assign unbound ports to partition ids
    for node in allocation:
        for port in allocation[node]['ports']:
            # Bind port to node part_id
            run_command('fmtool -B switch:virgo12a,part_id:' + str(allocation[node]['part_id']) + ',port_id:' + str(port) + ' virgo12a')

    #Reboot Nodes
    for node in allocation:
        run_command('scontrol reboot '+ node)        

    #Release Lock




main()