Difference between revisions of "Lustre: Parallel FileSystem"
| Line 19: | Line 19: | ||
* [[Lustre: Using lustre-iokit]] | * [[Lustre: Using lustre-iokit]] | ||
* [[Lustre: Using IOR to Benchmark]] | * [[Lustre: Using IOR to Benchmark]] | ||
| + | |||
| + | == Lustre Configuration for MRC Notes == | ||
| + | 1. On all the server create this configuration file: | ||
| + | for mds: | ||
| + | <syntaxhighlight> | ||
| + | #cat /etc/modprobe.d/lustre.conf | ||
| + | options lnet networks=tcp0(eth2) | ||
| + | </syntaxhighlight> | ||
| + | for oss | ||
| + | <syntaxhighlight> | ||
| + | #cat /etc/modprobe.d/lustre.conf | ||
| + | options lnet networks=tcp0(bond0) | ||
| + | </syntaxhighlight> | ||
| + | load the lustre module | ||
| + | <syntaxhighlight> | ||
| + | modprobe -v lustre | ||
| + | </syntaxhighlight> | ||
| + | |||
| + | 2. create lustre filesystem | ||
| + | on MDS1 | ||
| + | <syntaxhighlight> | ||
| + | mkfs.lustre --mgs --reformat --servicenode=10.10.17.193@tcp0 --servicenode=10.10.17.194@tcp0 /dev/sda | ||
| + | |||
| + | mkfs.lustre --mdt --fsname=lfs1 --reformat --index=0 --mgsnid=10.10.17.193@tcp0 --mgsnid=10.10.17.194@tcp0 --servicenode=10.10.17.193@tcp0 --servicenode=10.10.17.194@tcp0 --mkfsoptions="-J size=2048" /dev/sdb | ||
| + | </syntaxhighlight> | ||
| + | on MDS2 | ||
| + | <syntaxhighlight> | ||
| + | mkfs.lustre --mdt --fsname=lfs2 --reformat --index=0 --mgsnid=10.10.17.193@tcp0 --mgsnid=10.10.17.194@tcp0 --servicenode=10.10.17.194@tcp0 --servicenode=10.10.17.193@tcp0 --mkfsoptions="-J size=2048" /dev/sdc | ||
| + | </syntaxhighlight> | ||
| + | on lustre01-oss1.boston.co.uk | ||
| + | <syntaxhighlight> | ||
| + | mkfs.lustre --ost --fsname=lfs1 --reformat --index=0 --mgsnid=10.10.17.193@tcp0 --mgsnid=10.10.17.194@tcp0 --servicenode=10.10.17.200@tcp0 --mkfsoptions="-J size=2048" /dev/sdb | ||
| + | </syntaxhighlight> | ||
| + | |||
| + | on lustre01-oss2.boston.co.uk | ||
| + | <syntaxhighlight> | ||
| + | mkfs.lustre --ost --fsname=lfs1 --reformat --index=1 --mgsnid=10.10.17.193@tcp0 --mgsnid=10.10.17.194@tcp0 --servicenode=10.10.17.201@tcp0 --mkfsoptions="-J size=2048" /dev/sdb | ||
| + | </syntaxhighlight> | ||
| + | |||
| + | on lustre01-oss3.boston.co.uk | ||
| + | <syntaxhighlight> | ||
| + | mkfs.lustre --ost --fsname=lfs1 --reformat --index=2 --mgsnid=10.10.17.193@tcp0 --mgsnid=10.10.17.194@tcp0 --servicenode=10.10.17.202@tcp0 --mkfsoptions="-J size=2048" /dev/sdb | ||
| + | </syntaxhighlight> | ||
| + | |||
| + | on lustre01-oss4.boston.co.uk | ||
| + | <syntaxhighlight> | ||
| + | mkfs.lustre --ost --fsname=lfs1 --reformat --index=3 --mgsnid=10.10.17.193@tcp0 --mgsnid=10.10.17.194@tcp0 --servicenode=10.10.17.203@tcp0 --mkfsoptions="-J size=2048" /dev/sdb | ||
| + | </syntaxhighlight> | ||
| + | |||
| + | on lustre02-oss1.boston.co.uk | ||
| + | <syntaxhighlight> | ||
| + | mkfs.lustre --ost --fsname=lfs2 --reformat --index=0 --mgsnid=10.10.17.193@tcp0 --mgsnid=10.10.17.194@tcp0 --servicenode=10.10.17.210@tcp0 --mkfsoptions="-J size=2048" /dev/sdb | ||
| + | </syntaxhighlight> | ||
| + | |||
| + | on lustre02-oss2.boston.co.uk | ||
| + | <syntaxhighlight> | ||
| + | mkfs.lustre --ost --fsname=lfs2 --reformat --index=1 --mgsnid=10.10.17.193@tcp0 --mgsnid=10.10.17.194@tcp0 --servicenode=10.10.17.211@tcp0 --mkfsoptions="-J size=2048" /dev/sdb | ||
| + | </syntaxhighlight> | ||
| + | |||
| + | on lustre02-oss3.boston.co.uk | ||
| + | <syntaxhighlight> | ||
| + | mkfs.lustre --ost --fsname=lfs2 --reformat --index=2 --mgsnid=10.10.17.193@tcp0 --mgsnid=10.10.17.194@tcp0 --servicenode=10.10.17.212@tcp0 --mkfsoptions="-J size=2048" /dev/sdb | ||
| + | </syntaxhighlight> | ||
| + | |||
| + | on lustre02-oss4.boston.co.uk | ||
| + | <syntaxhighlight> | ||
| + | mkfs.lustre --ost --fsname=lfs2 --reformat --index=3 --mgsnid=10.10.17.193@tcp0 --mgsnid=10.10.17.194@tcp0 --servicenode=10.10.17.213@tcp0 --mkfsoptions="-J size=2048" /dev/sdb | ||
| + | </syntaxhighlight> | ||
| + | |||
| + | on all the nodes: | ||
| + | |||
| + | <syntaxhighlight> | ||
| + | mkdir -p /lustre/mgt | ||
| + | mkdir -p /lustre/lfs1-mdt | ||
| + | mkdir -p /lustre/lfs2-mdt | ||
| + | mkdir -p /lustre/lfs1-ost00 | ||
| + | mkdir -p /lustre/lfs1-ost01 | ||
| + | mkdir -p /lustre/lfs1-ost02 | ||
| + | mkdir -p /lustre/lfs1-ost03 | ||
| + | mkdir -p /lustre/lfs2-ost00 | ||
| + | mkdir -p /lustre/lfs2-ost01 | ||
| + | mkdir -p /lustre/lfs2-ost02 | ||
| + | mkdir -p /lustre/lfs2-ost03 | ||
| + | mkdir -p /lustre/lfs2-ost04 | ||
| + | </syntaxhighlight> | ||
| + | |||
| + | disable iptables and selinux | ||
| + | |||
| + | mounting the targets | ||
| + | |||
| + | on MDS1 | ||
| + | <syntaxhighlight> | ||
| + | mount -t lustre /dev/sda /lustre/mgt | ||
| + | |||
| + | mount -t lustre /dev/sdb /lustre/lfs1-mdt | ||
| + | </syntaxhighlight> | ||
| + | on lustre01-oss1.boston.co.uk | ||
| + | <syntaxhighlight> | ||
| + | mount -t lustre /dev/sdb /lustre/lfs1-ost00 | ||
| + | </syntaxhighlight> | ||
| + | on lustre01-oss2.boston.co.uk | ||
| + | <syntaxhighlight> | ||
| + | mount -t lustre /dev/sdb /lustre/lfs1-ost01 | ||
| + | </syntaxhighlight> | ||
| + | on lustre01-oss3.boston.co.uk | ||
| + | <syntaxhighlight> | ||
| + | mount -t lustre /dev/sdb /lustre/lfs1-ost02 | ||
| + | </syntaxhighlight> | ||
| + | on lustre01-oss4.boston.co.uk | ||
| + | <syntaxhighlight> | ||
| + | mount -t lustre /dev/sdb /lustre/lfs1-ost03 | ||
| + | </syntaxhighlight> | ||
| + | |||
| + | on MDS2 | ||
| + | <syntaxhighlight> | ||
| + | mount -t lustre /dev/sdc /lustre/lfs2-mdt | ||
| + | </syntaxhighlight> | ||
| + | on lustre02-oss1.boston.co.uk | ||
| + | <syntaxhighlight> | ||
| + | mount -t lustre /dev/sdb /lustre/lfs2-ost00 | ||
| + | </syntaxhighlight> | ||
| + | on lustre02-oss2.boston.co.uk | ||
| + | <syntaxhighlight> | ||
| + | mount -t lustre /dev/sdb /lustre/lfs2-ost01 | ||
| + | </syntaxhighlight> | ||
| + | on lustre02-oss3.boston.co.uk | ||
| + | <syntaxhighlight> | ||
| + | mount -t lustre /dev/sdb /lustre/lfs2-ost02 | ||
| + | </syntaxhighlight> | ||
| + | on lustre02-oss4.boston.co.uk | ||
| + | <syntaxhighlight> | ||
| + | mount -t lustre /dev/sdb /lustre/lfs2-ost03 | ||
| + | </syntaxhighlight> | ||
| + | |||
| + | mount client: | ||
| + | <syntaxhighlight> | ||
| + | mount -t lustre 10.10.17.193@tcp0:10.10.17.194@tcp0:/lfs1 /mnt/lfs1 | ||
| + | </syntaxhighlight> | ||
| + | |||
| + | Configuartion file to create on MDS1/2 for HA | ||
| + | |||
| + | install peacemaker, corosync and pcs | ||
| + | |||
| + | |||
| + | cat /etc/corosync/corosync.conf for MDS pair | ||
| + | <syntaxhighlight> | ||
| + | # Please read the corosync.conf.5 manual page | ||
| + | compatibility: whitetank | ||
| + | |||
| + | totem { | ||
| + | version: 2 | ||
| + | secauth: off | ||
| + | interface { | ||
| + | member { | ||
| + | memberaddr: 172.28.50.193 | ||
| + | } | ||
| + | member { | ||
| + | memberaddr: 172.28.50.194 | ||
| + | } | ||
| + | ringnumber: 0 | ||
| + | bindnetaddr: 172.28.50.0 | ||
| + | mcastport: 5405 | ||
| + | ttl: 1 | ||
| + | } | ||
| + | interface { | ||
| + | member { | ||
| + | memberaddr: 10.0.0.1 | ||
| + | } | ||
| + | member { | ||
| + | memberaddr: 10.0.0.2 | ||
| + | } | ||
| + | ringnumber: 1 | ||
| + | bindnetaddr: 10.0.0.0 | ||
| + | mcastport: 5405 | ||
| + | ttl: 1 | ||
| + | } | ||
| + | |||
| + | transport: udpu | ||
| + | token: 17000 | ||
| + | rrp_mode: passive | ||
| + | } | ||
| + | |||
| + | logging { | ||
| + | fileline: off | ||
| + | to_logfile: yes | ||
| + | to_syslog: yes | ||
| + | debug: on | ||
| + | logfile: /var/log/cluster/corosync.log | ||
| + | debug: off | ||
| + | timestamp: on | ||
| + | logger_subsys { | ||
| + | subsys: AMF | ||
| + | debug: off | ||
| + | } | ||
| + | } | ||
| + | </syntaxhighlight> | ||
| + | |||
| + | |||
| + | cat /etc/corosync/service.d/pacemaker | ||
| + | <syntaxhighlight> | ||
| + | service { | ||
| + | # Load the Pacemaker Cluster Resource Manager | ||
| + | name: pacemaker | ||
| + | ver: 1 | ||
| + | } | ||
| + | </syntaxhighlight> | ||
| + | |||
| + | |||
| + | |||
| + | cat /usr/lib/ocf/resource.d/heartbeat/Lustre-LDISKFS | ||
| + | chmod 755 /usr/lib/ocf/resource.d/heartbeat/Lustre-LDISKFS | ||
| + | <syntaxhighlight> | ||
| + | #!/bin/sh | ||
| + | # | ||
| + | # Lustre-LDISKFS | ||
| + | # Description: Manages a Lustre target on a shared storage medium. | ||
| + | # | ||
| + | # usage: ./Lustre-LDISKFS {start|stop|status|monitor|validate-all|meta-data} | ||
| + | # | ||
| + | # OCF parameters are as below: | ||
| + | # OCF_RESKEY_label | ||
| + | # OCF_RESKEY_mountpoint | ||
| + | # | ||
| + | # OCF_RESKEY_label : label of the target the script should operate on | ||
| + | # OCF_RESKEY_mountpoint : name of the target the script should operate on | ||
| + | |||
| + | : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/resource.d/heartbeat} | ||
| + | . ${OCF_FUNCTIONS_DIR}/.ocf-shellfuncs | ||
| + | |||
| + | usage() { | ||
| + | echo "usage: $0 {start|stop|status|monitor|meta-data}" | ||
| + | } | ||
| + | |||
| + | meta_data() { | ||
| + | cat <<END | ||
| + | <?xml version="1.0"?> | ||
| + | <!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd"> | ||
| + | <resource-agent name="Lustre-LDISKFS"> | ||
| + | <version>1.0</version> | ||
| + | |||
| + | <longdesc lang="en"> | ||
| + | Resource script for a Lustre LDISKFS Target. | ||
| + | </longdesc> | ||
| + | |||
| + | <shortdesc lang="en">Manages Lustre LDISKFS Targets</shortdesc> | ||
| + | |||
| + | <parameters> | ||
| + | <parameter name="label" required="1"> | ||
| + | <longdesc lang="en"> | ||
| + | The label of the target. blkid to display. | ||
| + | </longdesc> | ||
| + | <shortdesc lang="en">label</shortdesc> | ||
| + | <content type="string" default="" /> | ||
| + | </parameter> | ||
| + | |||
| + | <parameter name="mountpoint" required="1"> | ||
| + | <longdesc lang="en"> | ||
| + | The mount point for the target | ||
| + | </longdesc> | ||
| + | <shortdesc lang="en">mountpoint</shortdesc> | ||
| + | <content type="string" default="" /> | ||
| + | </parameter> | ||
| + | |||
| + | |||
| + | |||
| + | </parameters> | ||
| + | |||
| + | <actions> | ||
| + | <action name="start" timeout="60" /> | ||
| + | <action name="stop" timeout="60" /> | ||
| + | <action name="notify" timeout="60" /> | ||
| + | <action name="monitor" depth="0" timeout="40" interval="20" /> | ||
| + | <action name="validate-all" timeout="5" /> | ||
| + | <action name="meta-data" timeout="5" /> | ||
| + | </actions> | ||
| + | </resource-agent> | ||
| + | END | ||
| + | } | ||
| + | |||
| + | Target_start() { | ||
| + | # See if the device is already mounted. | ||
| + | if Target_status >/dev/null 2>&1; then | ||
| + | ocf_log info "Target $TARGET is already started." | ||
| + | return $OCF_SUCCESS | ||
| + | fi | ||
| + | |||
| + | # this is not necessary, mount should start modules | ||
| + | # if ! grep -e 'lustre$' /proc/filesystems >/dev/null; then | ||
| + | # ocf_log err "Couldn't find the lustre module in /proc/filesystems" | ||
| + | # return $OCF_ERR_ARGS | ||
| + | # fi | ||
| + | |||
| + | # start the target | ||
| + | # if ! chroma-agent mount_target --uuid $TARGET; then | ||
| + | if ! mount -t lustre LABEL=$TARGET $MOUNT_POINT; then | ||
| + | ocf_log err "Couldn't start target $TARGET" | ||
| + | return $OCF_ERR_GENERIC | ||
| + | fi | ||
| + | return $OCF_SUCCESS | ||
| + | } | ||
| + | |||
| + | Target_notify() { | ||
| + | return $OCF_SUCCESS | ||
| + | } | ||
| + | |||
| + | Target_stop() { | ||
| + | # started already? | ||
| + | Target_status >/dev/null 2>&1 | ||
| + | if [ $? -eq $OCF_NOT_RUNNING ]; then | ||
| + | # woo! nothing to do. | ||
| + | rc=$OCF_SUCCESS | ||
| + | else | ||
| + | # chroma-agent unmount_target --uuid $TARGET | ||
| + | umount $MOUNT_POINT | ||
| + | fi | ||
| + | |||
| + | return $rc | ||
| + | } | ||
| + | |||
| + | Target_status() { | ||
| + | # call the agent to see if it's running | ||
| + | |||
| + | |||
| + | # if chroma-agent target_running --uuid $TARGET >/dev/null 2>&1; then | ||
| + | if cat /proc/mounts |grep $MOUNT_POINT >/dev/null 2>&1; then | ||
| + | rc=$OCF_SUCCESS | ||
| + | msg="$TARGET is started (running)" | ||
| + | else | ||
| + | rc=$OCF_NOT_RUNNING | ||
| + | msg="$TARGET is stopped" | ||
| + | fi | ||
| + | |||
| + | if [ "$OP" = "status" ]; then | ||
| + | ocf_log info "$msg" | ||
| + | fi | ||
| + | |||
| + | return $rc | ||
| + | } | ||
| + | |||
| + | Target_validate_all() { | ||
| + | return $OCF_SUCCESS | ||
| + | } | ||
| + | |||
| + | if [ $# -ne 1 ]; then | ||
| + | usage | ||
| + | exit $OCF_ERR_ARGS | ||
| + | fi | ||
| + | |||
| + | TARGET=$OCF_RESKEY_label | ||
| + | MOUNT_POINT=$OCF_RESKEY_mountpoint | ||
| + | OP=$1 | ||
| + | |||
| + | # These operations do not require instance parameters | ||
| + | case $OP in | ||
| + | meta-data) meta_data | ||
| + | exit $OCF_SUCCESS | ||
| + | ;; | ||
| + | usage) usage | ||
| + | exit $OCF_SUCCESS | ||
| + | ;; | ||
| + | status) Target_status | ||
| + | exit $? | ||
| + | ;; | ||
| + | monitor) Target_status | ||
| + | exit $? | ||
| + | ;; | ||
| + | validate-all) Target_validate_all | ||
| + | exit $? | ||
| + | ;; | ||
| + | stop) Target_stop | ||
| + | exit $? | ||
| + | ;; | ||
| + | start) Target_start | ||
| + | ;; | ||
| + | *) usage | ||
| + | exit $OCF_ERR_UNIMPLEMENTED | ||
| + | ;; | ||
| + | esac | ||
| + | exit $? | ||
| + | </syntaxhighlight> | ||
| + | |||
| + | On MDS1 | ||
| + | <syntaxhighlight> | ||
| + | pcs resource create MGT ocf:heartbeat:Lustre-LDISKFS label=“MGS“ mountpoint=“/lustre/mgt“ | ||
| + | pcs resource create lfs1_mgt ocf:heartbeat:Lustre-LDISKFS label=“lfs1-MDT0000“ mountpoint=“/lustre/lfs1-mdt“ | ||
| + | </syntaxhighlight> | ||
| + | On MDS2 | ||
| + | <syntaxhighlight> | ||
| + | pcs resource create lfs2_mgt ocf:heartbeat:Lustre-LDISKFS label=“lfs2-MDT0000“ mountpoint=“/lustre/lfs2-mdt“ | ||
| + | </syntaxhighlight> | ||
| + | |||
| + | More notes: | ||
| + | notes: | ||
| + | # no need to add mount point on /etc/fstab | ||
| + | # no need for pcsd and etc on oss nodes | ||
| + | # before, failover | ||
| + | ##make sure data movement through | ||
| + | ### pcs resource move mgt to lustre02 | ||
| + | ### pcs resource move lfs1 mgt ... | ||
| + | ### pcs resource ... | ||
| + | ### pcs property set no-quorum-policy=ignore | ||
| + | ### need to enlarge the time interval to 300 second through pcs resource show --full | ||
| + | #### <syntaxhighlight> pcs resource update Lustre_mdt op stop interval=0 timeout=300 </syntaxhighlight> | ||
| + | #### <syntaxhighlight> pcs resource update Lustre_mdt op monitor interval=5 timeout=60 </syntaxhighlight> | ||
| + | ### when using infiniband, we need to make sure lustre module start before infiniband module | ||
| + | ### set rules for priority to let resource on two nodes(not recommended). Normally, a resource should just on one node. | ||
| + | ### need to keep an eye on /var/log message make sure no more error messages coming out. | ||
Revision as of 14:55, 24 March 2015
Lustre
Installation
Configuration / Operation
- Lustre: Lustre Acronyms
- Lustre: Verify OST df usage
- Lustre: Migrate data from a full OST
- Lustre: Using lfs to change strip size
- Lustre: Verify Lustre FS creation parameters
- Lustre: Configuraing and Managing Quotas
Benchmarking
Lustre Configuration for MRC Notes
1. On all the server create this configuration file: for mds:
#cat /etc/modprobe.d/lustre.conf
options lnet networks=tcp0(eth2)for oss
#cat /etc/modprobe.d/lustre.conf
options lnet networks=tcp0(bond0)load the lustre module
modprobe -v lustre2. create lustre filesystem on MDS1
mkfs.lustre --mgs --reformat --servicenode=10.10.17.193@tcp0 --servicenode=10.10.17.194@tcp0 /dev/sda
mkfs.lustre --mdt --fsname=lfs1 --reformat --index=0 --mgsnid=10.10.17.193@tcp0 --mgsnid=10.10.17.194@tcp0 --servicenode=10.10.17.193@tcp0 --servicenode=10.10.17.194@tcp0 --mkfsoptions="-J size=2048" /dev/sdbon MDS2
mkfs.lustre --mdt --fsname=lfs2 --reformat --index=0 --mgsnid=10.10.17.193@tcp0 --mgsnid=10.10.17.194@tcp0 --servicenode=10.10.17.194@tcp0 --servicenode=10.10.17.193@tcp0 --mkfsoptions="-J size=2048" /dev/sdcon lustre01-oss1.boston.co.uk
mkfs.lustre --ost --fsname=lfs1 --reformat --index=0 --mgsnid=10.10.17.193@tcp0 --mgsnid=10.10.17.194@tcp0 --servicenode=10.10.17.200@tcp0 --mkfsoptions="-J size=2048" /dev/sdbon lustre01-oss2.boston.co.uk
mkfs.lustre --ost --fsname=lfs1 --reformat --index=1 --mgsnid=10.10.17.193@tcp0 --mgsnid=10.10.17.194@tcp0 --servicenode=10.10.17.201@tcp0 --mkfsoptions="-J size=2048" /dev/sdbon lustre01-oss3.boston.co.uk
mkfs.lustre --ost --fsname=lfs1 --reformat --index=2 --mgsnid=10.10.17.193@tcp0 --mgsnid=10.10.17.194@tcp0 --servicenode=10.10.17.202@tcp0 --mkfsoptions="-J size=2048" /dev/sdbon lustre01-oss4.boston.co.uk
mkfs.lustre --ost --fsname=lfs1 --reformat --index=3 --mgsnid=10.10.17.193@tcp0 --mgsnid=10.10.17.194@tcp0 --servicenode=10.10.17.203@tcp0 --mkfsoptions="-J size=2048" /dev/sdbon lustre02-oss1.boston.co.uk
mkfs.lustre --ost --fsname=lfs2 --reformat --index=0 --mgsnid=10.10.17.193@tcp0 --mgsnid=10.10.17.194@tcp0 --servicenode=10.10.17.210@tcp0 --mkfsoptions="-J size=2048" /dev/sdbon lustre02-oss2.boston.co.uk
mkfs.lustre --ost --fsname=lfs2 --reformat --index=1 --mgsnid=10.10.17.193@tcp0 --mgsnid=10.10.17.194@tcp0 --servicenode=10.10.17.211@tcp0 --mkfsoptions="-J size=2048" /dev/sdbon lustre02-oss3.boston.co.uk
mkfs.lustre --ost --fsname=lfs2 --reformat --index=2 --mgsnid=10.10.17.193@tcp0 --mgsnid=10.10.17.194@tcp0 --servicenode=10.10.17.212@tcp0 --mkfsoptions="-J size=2048" /dev/sdbon lustre02-oss4.boston.co.uk
mkfs.lustre --ost --fsname=lfs2 --reformat --index=3 --mgsnid=10.10.17.193@tcp0 --mgsnid=10.10.17.194@tcp0 --servicenode=10.10.17.213@tcp0 --mkfsoptions="-J size=2048" /dev/sdbon all the nodes:
mkdir -p /lustre/mgt
mkdir -p /lustre/lfs1-mdt
mkdir -p /lustre/lfs2-mdt
mkdir -p /lustre/lfs1-ost00
mkdir -p /lustre/lfs1-ost01
mkdir -p /lustre/lfs1-ost02
mkdir -p /lustre/lfs1-ost03
mkdir -p /lustre/lfs2-ost00
mkdir -p /lustre/lfs2-ost01
mkdir -p /lustre/lfs2-ost02
mkdir -p /lustre/lfs2-ost03
mkdir -p /lustre/lfs2-ost04disable iptables and selinux
mounting the targets
on MDS1
mount -t lustre /dev/sda /lustre/mgt
mount -t lustre /dev/sdb /lustre/lfs1-mdton lustre01-oss1.boston.co.uk
mount -t lustre /dev/sdb /lustre/lfs1-ost00on lustre01-oss2.boston.co.uk
mount -t lustre /dev/sdb /lustre/lfs1-ost01on lustre01-oss3.boston.co.uk
mount -t lustre /dev/sdb /lustre/lfs1-ost02on lustre01-oss4.boston.co.uk
mount -t lustre /dev/sdb /lustre/lfs1-ost03on MDS2
mount -t lustre /dev/sdc /lustre/lfs2-mdton lustre02-oss1.boston.co.uk
mount -t lustre /dev/sdb /lustre/lfs2-ost00on lustre02-oss2.boston.co.uk
mount -t lustre /dev/sdb /lustre/lfs2-ost01on lustre02-oss3.boston.co.uk
mount -t lustre /dev/sdb /lustre/lfs2-ost02on lustre02-oss4.boston.co.uk
mount -t lustre /dev/sdb /lustre/lfs2-ost03mount client:
mount -t lustre 10.10.17.193@tcp0:10.10.17.194@tcp0:/lfs1 /mnt/lfs1Configuartion file to create on MDS1/2 for HA
install peacemaker, corosync and pcs
cat /etc/corosync/corosync.conf for MDS pair
# Please read the corosync.conf.5 manual page
compatibility: whitetank
totem {
version: 2
secauth: off
interface {
member {
memberaddr: 172.28.50.193
}
member {
memberaddr: 172.28.50.194
}
ringnumber: 0
bindnetaddr: 172.28.50.0
mcastport: 5405
ttl: 1
}
interface {
member {
memberaddr: 10.0.0.1
}
member {
memberaddr: 10.0.0.2
}
ringnumber: 1
bindnetaddr: 10.0.0.0
mcastport: 5405
ttl: 1
}
transport: udpu
token: 17000
rrp_mode: passive
}
logging {
fileline: off
to_logfile: yes
to_syslog: yes
debug: on
logfile: /var/log/cluster/corosync.log
debug: off
timestamp: on
logger_subsys {
subsys: AMF
debug: off
}
}
cat /etc/corosync/service.d/pacemaker
service {
# Load the Pacemaker Cluster Resource Manager
name: pacemaker
ver: 1
}
cat /usr/lib/ocf/resource.d/heartbeat/Lustre-LDISKFS chmod 755 /usr/lib/ocf/resource.d/heartbeat/Lustre-LDISKFS
#!/bin/sh
#
# Lustre-LDISKFS
# Description: Manages a Lustre target on a shared storage medium.
#
# usage: ./Lustre-LDISKFS {start|stop|status|monitor|validate-all|meta-data}
#
# OCF parameters are as below:
# OCF_RESKEY_label
# OCF_RESKEY_mountpoint
#
# OCF_RESKEY_label : label of the target the script should operate on
# OCF_RESKEY_mountpoint : name of the target the script should operate on
: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/resource.d/heartbeat}
. ${OCF_FUNCTIONS_DIR}/.ocf-shellfuncs
usage() {
echo "usage: $0 {start|stop|status|monitor|meta-data}"
}
meta_data() {
cat <<END
<?xml version="1.0"?>
<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
<resource-agent name="Lustre-LDISKFS">
<version>1.0</version>
<longdesc lang="en">
Resource script for a Lustre LDISKFS Target.
</longdesc>
<shortdesc lang="en">Manages Lustre LDISKFS Targets</shortdesc>
<parameters>
<parameter name="label" required="1">
<longdesc lang="en">
The label of the target. blkid to display.
</longdesc>
<shortdesc lang="en">label</shortdesc>
<content type="string" default="" />
</parameter>
<parameter name="mountpoint" required="1">
<longdesc lang="en">
The mount point for the target
</longdesc>
<shortdesc lang="en">mountpoint</shortdesc>
<content type="string" default="" />
</parameter>
</parameters>
<actions>
<action name="start" timeout="60" />
<action name="stop" timeout="60" />
<action name="notify" timeout="60" />
<action name="monitor" depth="0" timeout="40" interval="20" />
<action name="validate-all" timeout="5" />
<action name="meta-data" timeout="5" />
</actions>
</resource-agent>
END
}
Target_start() {
# See if the device is already mounted.
if Target_status >/dev/null 2>&1; then
ocf_log info "Target $TARGET is already started."
return $OCF_SUCCESS
fi
# this is not necessary, mount should start modules
# if ! grep -e 'lustre$' /proc/filesystems >/dev/null; then
# ocf_log err "Couldn't find the lustre module in /proc/filesystems"
# return $OCF_ERR_ARGS
# fi
# start the target
# if ! chroma-agent mount_target --uuid $TARGET; then
if ! mount -t lustre LABEL=$TARGET $MOUNT_POINT; then
ocf_log err "Couldn't start target $TARGET"
return $OCF_ERR_GENERIC
fi
return $OCF_SUCCESS
}
Target_notify() {
return $OCF_SUCCESS
}
Target_stop() {
# started already?
Target_status >/dev/null 2>&1
if [ $? -eq $OCF_NOT_RUNNING ]; then
# woo! nothing to do.
rc=$OCF_SUCCESS
else
# chroma-agent unmount_target --uuid $TARGET
umount $MOUNT_POINT
fi
return $rc
}
Target_status() {
# call the agent to see if it's running
# if chroma-agent target_running --uuid $TARGET >/dev/null 2>&1; then
if cat /proc/mounts |grep $MOUNT_POINT >/dev/null 2>&1; then
rc=$OCF_SUCCESS
msg="$TARGET is started (running)"
else
rc=$OCF_NOT_RUNNING
msg="$TARGET is stopped"
fi
if [ "$OP" = "status" ]; then
ocf_log info "$msg"
fi
return $rc
}
Target_validate_all() {
return $OCF_SUCCESS
}
if [ $# -ne 1 ]; then
usage
exit $OCF_ERR_ARGS
fi
TARGET=$OCF_RESKEY_label
MOUNT_POINT=$OCF_RESKEY_mountpoint
OP=$1
# These operations do not require instance parameters
case $OP in
meta-data) meta_data
exit $OCF_SUCCESS
;;
usage) usage
exit $OCF_SUCCESS
;;
status) Target_status
exit $?
;;
monitor) Target_status
exit $?
;;
validate-all) Target_validate_all
exit $?
;;
stop) Target_stop
exit $?
;;
start) Target_start
;;
*) usage
exit $OCF_ERR_UNIMPLEMENTED
;;
esac
exit $?On MDS1
pcs resource create MGT ocf:heartbeat:Lustre-LDISKFS label=“MGS“ mountpoint=“/lustre/mgt“
pcs resource create lfs1_mgt ocf:heartbeat:Lustre-LDISKFS label=“lfs1-MDT0000“ mountpoint=“/lustre/lfs1-mdt“On MDS2
pcs resource create lfs2_mgt ocf:heartbeat:Lustre-LDISKFS label=“lfs2-MDT0000“ mountpoint=“/lustre/lfs2-mdt“More notes: notes:
- no need to add mount point on /etc/fstab
- no need for pcsd and etc on oss nodes
- before, failover
- make sure data movement through
- pcs resource move mgt to lustre02
- pcs resource move lfs1 mgt ...
- pcs resource ...
- pcs property set no-quorum-policy=ignore
- need to enlarge the time interval to 300 second through pcs resource show --full
pcs resource update Lustre_mdt op stop interval=0 timeout=300
pcs resource update Lustre_mdt op monitor interval=5 timeout=60
- when using infiniband, we need to make sure lustre module start before infiniband module
- set rules for priority to let resource on two nodes(not recommended). Normally, a resource should just on one node.
- need to keep an eye on /var/log message make sure no more error messages coming out.
- make sure data movement through
