Set up HSM on Lustre

From Define Wiki
Jump to navigation Jump to search

On node hosting HSM:

  1. added nodes as HSM agent Node
  2. enable HSM coordinator on Lustre Dashboard
    1. Config > Filesystem > Advanced > hsm_control: enabled
  3. provision an HSM agent with POSIX
    1. config > HSM > Add Copytool

set up network

    2  cat /etc/hosts
    3  hostname lHsm.boston.co.uk
    4  vim /etc/sysconfig/network
    5  exit
    6  ip a
7  lsblk

create archive storage

    8  mkdir -p /archive/demo
    9  mkfs.ext4 -L ARC1 /dev/sdb
   10  vim /etc/fstab 
   11  mkdir -p /archive/demo
   12  mount -L ARC1
   13  dmesg | tail 
   14  ls
   15  df
   16  lsblk
   17  mkfs.ext4 /dev/sdb1
   18  mkfs.ext4 /dev/sdb
   19  smartctl -H /dev/sdb
   20  df
   21  mount /dev/sdb /archive/demo/
   22  df
   23  umount /dev/sdb
   24  df
   25  vim /etc/fstab 
   26  printf "LABEL=ARC1\t\t/archive/demo\text4\tdefaults\t0 0\n" >>/etc/fstab
   27  vim /etc/fstab 
   28  printf "LABEL=ARC1\t\t/archive/demo\text4\tdefaults\t0 0\n" >>/etc/fstab
   29  vim /etc/fstab 
   30  mount -L ARC1
   31  df
   32  top
   33  uname -r
install packages for lustre client
   34  rpm -qa|grep -i lustre
   35  uname 0r
   36  uname -r
   37  ls
   38  unzip archive.zip 
   39  ls
   40  cd artifacts/
   41  ls
   42  ls *
   43  cd RPMS/
   44  ls
   45  ls *
   46  cd x86_64/
   47  ls
   48  uname -r
   49  yum search kernel
   50  ls
   51  yum install kernel-ib-*
   52  yum install lustre-client-2.5.34-2.6.32_504.12.2.el6.x86_64.x86_64.rpm lustre-client-modules-2.5.34-2.6.32_504.12.2.el6.x86_64.x86_64.rpm mlnx-ofa_kernel-debuginfo-2.4-2.6.32_504.12.2.el6.x86_64.x86_64.rpm 
   53  uname -r
   54  vim /etc/grub.conf 
   55  ls
   56  yum install lustre-client-tests-2.5.34-2.6.32_504.12.2.el6.x86_64.x86_64.rpm 
   57  reboot
   58  ls
install mlnx driver
   59  tar xzvf MLNX_OFED_LINUX-2.4-1.0.0-rhel6.6-x86_64.tgz 
   60  cd MLNX_OFED_LINUX-2.4-1.0.0-rhel6.6-x86_64
   61  ls
   62  ./mlnx_add_kernel_support.sh -m /root/MLNX_OFED_LINUX-2.4-1.0.0-rhel6.6-x86_64 --make-tgz
   63  ls
   64  cd
   65  ls
   66  cd  ar
   67  cd ar
   68  cd artifacts/
   69  ls
   70  ls *
   71  tree
   72  find .|grep lustre
   73  find .|grep kernel#
   74  find .|grep kernel
   75  uname -r
   76  yum install kernel-devel
   77  uname -0r
   78  uname -r
   79  sudo yum install "kernel-devel-uname-r == $(uname -r)"
   80  ls
   81  cd 
   82  cd ml
   83  ls
   84  cd MLNX_OFED_LINUX-2.4-1.0.0-rhel6.6-x86_64
   85  ls
   86  ./mlnx_add_kernel_support.sh -m /root/MLNX_OFED_LINUX-2.4-1.0.0-rhel6.6-x86_64 --make-tgz
   87  cat /tmp/mlnx_ofed_iso.14503.log 
   88  yum install python-devel rpm-build gcc
   89  ./mlnx_add_kernel_support.sh -m /root/MLNX_OFED_LINUX-2.4-1.0.0-rhel6.6-x86_64 --make-tgz
   90  cd /tmp/
   91  ls
   92  tar xzvf MLNX_OFED_LINUX-2.4-1.0.0-rhel6.6-x86_64-ext.tgz 
   93  ls
   94  uanme -r
   95  uname -r
   96  cd MLNX_OFED_LINUX-2.4-1.0.0-rhel6.6-x86_64-ext
   97  ls
   98  ./mlnxofedinstall 
   99  yum install gtk2 atk cairo tcl gcc-gfortran tk
  100  ./mlnxofedinstall 
  101  /etc/init.d/openibd restart
  102  cd /etc/sysconfig/network-scripts/
  103  ls
set up ib network and check lustre config
  104  vim ifcfg-ib0 
  105  ifup ib0
  106  cat /etc/modprobe.d/iml_lnet_module_parameters.conf 
  107  lctl list_nids
  110  top
  111  tail /tmp/mlnx_ofed_iso.14
  112  tail /tmp/mlnx_ofed_iso.14897.log 
  113  watch tail /tmp/mlnx_ofed_iso.14897.log 
  114  exit
  115  ls
  116  scp archive.zip root@172.28.1.39:~
  117  vim /etc/sysconfig/network-scripts/ifcfg-ib0 
  118  df
  119  ps -ef|grep hsm
  120  lctl get_param mdt.*.hsm.agents
  121  df 
  122  df -ht lustre
check if the HSM is working by manually issuing commands
  123  cd /mnt/lfs1/
  124  ls
  125  dd if=/dev/zero of=f001 bs=1M count=1
  126  lfs hsm_state f001
  127  lfs hsm_archive --archive 1 f001
  128  lfs hsm_state f001
  129  find /archive/demo/
  130  df
  131  lfs hsm_*
  132  lctl get_param mdt.*.hsm.default_archive_id
  133  lfs hsm_state f001
  134  dd if=/dev/zero of=f002 bs=1M count=200
  135  dd if=/dev/zero of=f002 bs=1M count=2048
  136  lfs hsm_archive --archive 1 f002
  137  lfs hsm_state f002
  138  df -ht lustre
  139  lfs hsm_release f002
  140  lfs hsm_state f002
  141  lfs hsm_release f002
  142  lfs hsm_state f002
  143  lfs hsm_restore f002

On Robinhood Node: On node hosting Robinhood policy engine:

  • added node as Robinhood Policy Engine Server
# install packages needed bylustre client 
   14  yum install kernel-ib-* lustre-client-*  mlnx-ofa_kernel-debuginfo-2.4-2.6.32_504.12.2.el6.x86_64.x86_64.rpm lustre-iokit-2.5.34-2.6.32_504.12.2.el6.x86_64.x86_64.rpm 
   15  reboot
# set up network and install kernel devel
   16  hostname lRob.boston.co.uk
   17  vim /etc/sysconfig/network
   18  exit
   19  uname -r
   20  ip a
   21  sudo yum install "kernel-devel-uname-r == $(uname -r)"
   22  ls
   23  tar xzvf 2.6.32-504.12.2.el6.x86_64.tgz 
   24  cd MLNX_OFED_LINUX-2.4-1.0.0-rhel6.6-x86_64-ext/
   25  ls
   26  ./mlnxofedinstall 
   27  yum install gtk2 atk cairo gcc-gfortran tk
   28  ./mlnxofedinstall 
   29  /etc/init.d/openibd restart
   30  cd /etc/sysconfig/network-scripts/
   31  ls
   32  vim ifcfg-ib0 
   33  ifup ib0
   34  ip a
   35  ping 192.168.0.1
   36  ping 192.168.0.5
   37  ping 192.168.0.2
   38  p a
   39  ip a
   40  cat /etc/hosts
   41  top
   42  ls
   43  cd
   44  cat /etc/modprobe.d/iml_lnet_module_parameters.conf 
   45  mkdir -p /mnt/lfs1
   46  mount -t lustre 192.168.0.1@o2ib0:192.168.0.2@o2ib0:/lfs1 /mnt/lfs1
   47  df
   48  slblk
   49  lsblk
   50  df
# set up robinhood
   51  rpm -ql robinhood-adm
   52  which rbh-config
   53  service mysqld start
   54  /usr/bin/mysql_secure_installation 
   55  chkconfig mysqld on
   56  rbh-config precheck_db
   57  rbh-config create_db
   58  mysql -p -h localhost -u robinhood
# store the password in .dbpassword file
   59  vim /etc/robinhood.d/.dbpassword
   60  chmod 600 /etc/robinhood.d/.dbpassword 
   61  cd /etc/robinhood.d/lhsm/
   62  ls
   63  cd templates/
   64  ls
   65  cd /etc/robinhood.d/lhsm/
   66  ls
   67  cd templates/
   68  ls
   69  cp lhsm_detailed.conf ..
   70  cd 
   71  cd -
   72  cd ..
   73  ls
   74  rm lhsm_detailed.conf 
   75  touch lfs1-lustre-hsm.conf  # file content is in the end
   76  vim lfs1-lustre-hsm.conf
   77  df
   78  mkdir -m 0700 -p /var/log/robinhood
   79  rbh-lhsm --scan --once -f /etc/robinhood.d/lhsm/lfs1-lustre-hsm.conf 
# start robinhood
   80  service robinhood-lhsm start
   81  service robinhood-lhsm status

lfs1-lustre-hsm.conf:

[root@lRob lhsm]# cat lfs1-lustre-hsm.conf 
General 
{
# file system to be monitored 
	fs_path = "/mnt/lfs1" ;
}
Log 
{
# Log file
	log_file = "/var/log/robinhood/lustre_hsm_lfs1.log" ;
# File for reporting purge events 
	report_file = "/var/log/robinhood/lustre_hsm_lfs1_reports.log" ;
# Alerts file
	alert_file = "/var/log/robinhood/lustre_hsm_lfs1_alerts.log" ;
}
ListManager 
{
	commit_behavior = transaction ;
	MySQL 
	{
		server = "localhost" ;
		db = "robinhood_lfs1" ;
		user = "robinhood" ;
		password_file = "/etc/robinhood.d/.dbpassword" ;
		engine = InnoDB ;
	}
}
ChangeLog
{
	MDT 
	{
		mdt_name = "MDT0000" ;
		reader_id = "cl1" ;
	} 
	force_polling = ON ;
}

Filesets {
	FileClass small_files {
		definition { 
			tree == "/mnt/lfs1" 
			and
			size <= 1KB 
		}
	} 
}

Migration_parameters { 
	nb_threads_migration = 4;
	runtime_interval = 15min;
	max_migration_count = 10000; 
	max_migration_volume = 10TB;
	check_copy_status_on_startup = TRUE;
}

Migration_policies { 
	ignore_fileclass = small_files; 

	policy default { 
		condition {
			last_mod > 4h 
			or
			last_archive > 12h 
		} 
	}
}

Purge_parameters { 
	nb_threads_purge = 4; 
	post_purge_df_latency = 1min;
	check_purge_status_on_startup = TRUE;
}

Purge_Policies { 
	ignore {
		size == 0
	}

	policy default { 
		condition {
			last_access > 12h 
			and
			last_mod > 1d
		} 
	}
}

Purge_trigger { 
	trigger_on = global_usage;
	high_threshold_pct = 90%; 
	low_threshold_pct = 85%; 
	check_interval = 15min;
}
Purge_trigger { 
	trigger_on = OST_usage;
	high_threshold_pct = 85%; 
	low_threshold_pct = 80%;
	check_interval = 15min;
}

Purge_trigger { 
	trigger_on = user_usage;
	high_threshold_vol = 1TB; 
	low_threshold_vol = 750GB;
	check_interval = 4h;
}
hsm_remove_policy { 
	hsm_remove = TRUE;
	deferred_remove_delay = 24h;
}
hsm_remove_parameters { 
	nb_threads_rm = 4; 
	max_rm_count = 10000; 
	runtime_interval = 15min;
}