Difference between revisions of "Lustre intel: installing IEEL 2.2"
(Created page with "== Lustre Configuration for IEEL 2.2 == 1. On all the server create this configuration file: for mds: <syntaxhighlight> #cat /etc/modprobe.d/lustre.conf options lnet networks=tcp0(eth2) ...") |
(No difference)
|
Revision as of 10:36, 27 March 2015
Lustre Configuration for IEEL 2.2
1. On all the server create this configuration file: for mds:
#cat /etc/modprobe.d/lustre.conf
options lnet networks=tcp0(eth2)for oss
#cat /etc/modprobe.d/lustre.conf
options lnet networks=tcp0(bond0)load the lustre module
modprobe -v lustre2. create lustre filesystem on MDS1
mkfs.lustre --mgs --reformat --servicenode=10.10.17.193@tcp0 --servicenode=10.10.17.194@tcp0 /dev/sda
mkfs.lustre --mdt --fsname=lfs1 --reformat --index=0 --mgsnid=10.10.17.193@tcp0 --mgsnid=10.10.17.194@tcp0 --servicenode=10.10.17.193@tcp0 --servicenode=10.10.17.194@tcp0 --mkfsoptions="-J size=2048" /dev/sdbon MDS2
mkfs.lustre --mdt --fsname=lfs2 --reformat --index=0 --mgsnid=10.10.17.193@tcp0 --mgsnid=10.10.17.194@tcp0 --servicenode=10.10.17.194@tcp0 --servicenode=10.10.17.193@tcp0 --mkfsoptions="-J size=2048" /dev/sdcon lustre01-oss1.boston.co.uk
mkfs.lustre --ost --fsname=lfs1 --reformat --index=0 --mgsnid=10.10.17.193@tcp0 --mgsnid=10.10.17.194@tcp0 --servicenode=10.10.17.200@tcp0 --mkfsoptions="-J size=2048" /dev/sdbon lustre01-oss2.boston.co.uk
mkfs.lustre --ost --fsname=lfs1 --reformat --index=1 --mgsnid=10.10.17.193@tcp0 --mgsnid=10.10.17.194@tcp0 --servicenode=10.10.17.201@tcp0 --mkfsoptions="-J size=2048" /dev/sdbon lustre01-oss3.boston.co.uk
mkfs.lustre --ost --fsname=lfs1 --reformat --index=2 --mgsnid=10.10.17.193@tcp0 --mgsnid=10.10.17.194@tcp0 --servicenode=10.10.17.202@tcp0 --mkfsoptions="-J size=2048" /dev/sdbon lustre01-oss4.boston.co.uk
mkfs.lustre --ost --fsname=lfs1 --reformat --index=3 --mgsnid=10.10.17.193@tcp0 --mgsnid=10.10.17.194@tcp0 --servicenode=10.10.17.203@tcp0 --mkfsoptions="-J size=2048" /dev/sdbon lustre02-oss1.boston.co.uk
mkfs.lustre --ost --fsname=lfs2 --reformat --index=0 --mgsnid=10.10.17.193@tcp0 --mgsnid=10.10.17.194@tcp0 --servicenode=10.10.17.210@tcp0 --mkfsoptions="-J size=2048" /dev/sdbon lustre02-oss2.boston.co.uk
mkfs.lustre --ost --fsname=lfs2 --reformat --index=1 --mgsnid=10.10.17.193@tcp0 --mgsnid=10.10.17.194@tcp0 --servicenode=10.10.17.211@tcp0 --mkfsoptions="-J size=2048" /dev/sdbon lustre02-oss3.boston.co.uk
mkfs.lustre --ost --fsname=lfs2 --reformat --index=2 --mgsnid=10.10.17.193@tcp0 --mgsnid=10.10.17.194@tcp0 --servicenode=10.10.17.212@tcp0 --mkfsoptions="-J size=2048" /dev/sdbon lustre02-oss4.boston.co.uk
mkfs.lustre --ost --fsname=lfs2 --reformat --index=3 --mgsnid=10.10.17.193@tcp0 --mgsnid=10.10.17.194@tcp0 --servicenode=10.10.17.213@tcp0 --mkfsoptions="-J size=2048" /dev/sdbon all the nodes:
mkdir -p /lustre/mgt
mkdir -p /lustre/lfs1-mdt
mkdir -p /lustre/lfs2-mdt
mkdir -p /lustre/lfs1-ost00
mkdir -p /lustre/lfs1-ost01
mkdir -p /lustre/lfs1-ost02
mkdir -p /lustre/lfs1-ost03
mkdir -p /lustre/lfs2-ost00
mkdir -p /lustre/lfs2-ost01
mkdir -p /lustre/lfs2-ost02
mkdir -p /lustre/lfs2-ost03
mkdir -p /lustre/lfs2-ost04disable iptables and selinux
mounting the targets
on MDS1
mount -t lustre /dev/sda /lustre/mgt
mount -t lustre /dev/sdb /lustre/lfs1-mdton lustre01-oss1.boston.co.uk
mount -t lustre /dev/sdb /lustre/lfs1-ost00on lustre01-oss2.boston.co.uk
mount -t lustre /dev/sdb /lustre/lfs1-ost01on lustre01-oss3.boston.co.uk
mount -t lustre /dev/sdb /lustre/lfs1-ost02on lustre01-oss4.boston.co.uk
mount -t lustre /dev/sdb /lustre/lfs1-ost03on MDS2
mount -t lustre /dev/sdc /lustre/lfs2-mdton lustre02-oss1.boston.co.uk
mount -t lustre /dev/sdb /lustre/lfs2-ost00on lustre02-oss2.boston.co.uk
mount -t lustre /dev/sdb /lustre/lfs2-ost01on lustre02-oss3.boston.co.uk
mount -t lustre /dev/sdb /lustre/lfs2-ost02on lustre02-oss4.boston.co.uk
mount -t lustre /dev/sdb /lustre/lfs2-ost03mount client:
mount -t lustre 10.10.17.193@tcp0:10.10.17.194@tcp0:/lfs1 /mnt/lfs1Configuartion file to create on MDS1/2 for HA
install peacemaker, corosync and pcs
cat /etc/corosync/corosync.conf for MDS pair
# Please read the corosync.conf.5 manual page
compatibility: whitetank
totem {
version: 2
secauth: off
interface {
member {
memberaddr: 172.28.50.193
}
member {
memberaddr: 172.28.50.194
}
ringnumber: 0
bindnetaddr: 172.28.50.0
mcastport: 5405
ttl: 1
}
interface {
member {
memberaddr: 10.0.0.1
}
member {
memberaddr: 10.0.0.2
}
ringnumber: 1
bindnetaddr: 10.0.0.0
mcastport: 5405
ttl: 1
}
transport: udpu
token: 17000
rrp_mode: passive
}
logging {
fileline: off
to_logfile: yes
to_syslog: yes
debug: on
logfile: /var/log/cluster/corosync.log
debug: off
timestamp: on
logger_subsys {
subsys: AMF
debug: off
}
}
cat /etc/corosync/service.d/pacemaker
service {
# Load the Pacemaker Cluster Resource Manager
name: pacemaker
ver: 1
}
cat /usr/lib/ocf/resource.d/heartbeat/Lustre-LDISKFS chmod 755 /usr/lib/ocf/resource.d/heartbeat/Lustre-LDISKFS
#!/bin/sh
#
# Lustre-LDISKFS
# Description: Manages a Lustre target on a shared storage medium.
#
# usage: ./Lustre-LDISKFS {start|stop|status|monitor|validate-all|meta-data}
#
# OCF parameters are as below:
# OCF_RESKEY_label
# OCF_RESKEY_mountpoint
#
# OCF_RESKEY_label : label of the target the script should operate on
# OCF_RESKEY_mountpoint : name of the target the script should operate on
: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/resource.d/heartbeat}
. ${OCF_FUNCTIONS_DIR}/.ocf-shellfuncs
usage() {
echo "usage: $0 {start|stop|status|monitor|meta-data}"
}
meta_data() {
cat <<END
<?xml version="1.0"?>
<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
<resource-agent name="Lustre-LDISKFS">
<version>1.0</version>
<longdesc lang="en">
Resource script for a Lustre LDISKFS Target.
</longdesc>
<shortdesc lang="en">Manages Lustre LDISKFS Targets</shortdesc>
<parameters>
<parameter name="label" required="1">
<longdesc lang="en">
The label of the target. blkid to display.
</longdesc>
<shortdesc lang="en">label</shortdesc>
<content type="string" default="" />
</parameter>
<parameter name="mountpoint" required="1">
<longdesc lang="en">
The mount point for the target
</longdesc>
<shortdesc lang="en">mountpoint</shortdesc>
<content type="string" default="" />
</parameter>
</parameters>
<actions>
<action name="start" timeout="60" />
<action name="stop" timeout="60" />
<action name="notify" timeout="60" />
<action name="monitor" depth="0" timeout="40" interval="20" />
<action name="validate-all" timeout="5" />
<action name="meta-data" timeout="5" />
</actions>
</resource-agent>
END
}
Target_start() {
# See if the device is already mounted.
if Target_status >/dev/null 2>&1; then
ocf_log info "Target $TARGET is already started."
return $OCF_SUCCESS
fi
# this is not necessary, mount should start modules
# if ! grep -e 'lustre$' /proc/filesystems >/dev/null; then
# ocf_log err "Couldn't find the lustre module in /proc/filesystems"
# return $OCF_ERR_ARGS
# fi
# start the target
# if ! chroma-agent mount_target --uuid $TARGET; then
if ! mount -t lustre LABEL=$TARGET $MOUNT_POINT; then
ocf_log err "Couldn't start target $TARGET"
return $OCF_ERR_GENERIC
fi
return $OCF_SUCCESS
}
Target_notify() {
return $OCF_SUCCESS
}
Target_stop() {
# started already?
Target_status >/dev/null 2>&1
if [ $? -eq $OCF_NOT_RUNNING ]; then
# woo! nothing to do.
rc=$OCF_SUCCESS
else
# chroma-agent unmount_target --uuid $TARGET
umount $MOUNT_POINT
fi
return $rc
}
Target_status() {
# call the agent to see if it's running
# if chroma-agent target_running --uuid $TARGET >/dev/null 2>&1; then
if cat /proc/mounts |grep $MOUNT_POINT >/dev/null 2>&1; then
rc=$OCF_SUCCESS
msg="$TARGET is started (running)"
else
rc=$OCF_NOT_RUNNING
msg="$TARGET is stopped"
fi
if [ "$OP" = "status" ]; then
ocf_log info "$msg"
fi
return $rc
}
Target_validate_all() {
return $OCF_SUCCESS
}
if [ $# -ne 1 ]; then
usage
exit $OCF_ERR_ARGS
fi
TARGET=$OCF_RESKEY_label
MOUNT_POINT=$OCF_RESKEY_mountpoint
OP=$1
# These operations do not require instance parameters
case $OP in
meta-data) meta_data
exit $OCF_SUCCESS
;;
usage) usage
exit $OCF_SUCCESS
;;
status) Target_status
exit $?
;;
monitor) Target_status
exit $?
;;
validate-all) Target_validate_all
exit $?
;;
stop) Target_stop
exit $?
;;
start) Target_start
;;
*) usage
exit $OCF_ERR_UNIMPLEMENTED
;;
esac
exit $?On MDS1
pcs resource create MGT ocf:heartbeat:Lustre-LDISKFS label=“MGS“ mountpoint=“/lustre/mgt“
pcs resource create lfs1_mgt ocf:heartbeat:Lustre-LDISKFS label=“lfs1-MDT0000“ mountpoint=“/lustre/lfs1-mdt“On MDS2
pcs resource create lfs2_mgt ocf:heartbeat:Lustre-LDISKFS label=“lfs2-MDT0000“ mountpoint=“/lustre/lfs2-mdt“More notes: notes:
- no need to add mount point on /etc/fstab
- no need for pcsd and etc on oss nodes
- before failover, make sure data movement through
- pcs resource move mgt to lustre02
- pcs resource move lfs1 mgt ...
- pcs resource ...
- pcs property set no-quorum-policy=ignore
- need to enlarge the time interval to 300 second through pcs resource show --full
pcs resource update Lustre_mdt op stop interval=0 timeout=300
pcs resource update Lustre_mdt op monitor interval=5 timeout=60
- when using infiniband, we need to make sure lustre module start before infiniband module
- set rules for priority to let resource on two nodes(not recommended). Normally, a resource should just on one node.
- need to keep an eye on /var/log message make sure no more error messages coming out.