Lightbits - cheatsheet and using lbcli tool
Jump to navigation
Jump to search
Get Cluster Status
[root@stu-prr-nmv1-mlnx ~]# lbcli get cluster UUID Subsystem NQN Current max replicas Supported max replicas MinVersionInCluster MinAllowedVersion MaxAllowedVersion d90ac496-133e-4459-bcee-7ee8e6ee902c nqn.2016-01.com.lightbitslabs:uuid:453e6f39-fef3-4a7e-bdc5-1ec308440558 0 1 2.1.2 2.1.X 2.2.X
Get Cluster Nodes
[root@stu-prr-nmv1-mlnx ~]# lbcli list nodes Name UUID State NVMe endpoint Failure domains Local rebuild progress server00-0 304bc373-b1e6-53ae-bc3a-a6f2a709740b Inactive 10.1.144.17:4420 [server00] None
Show the name devices
[root@stu-prr-nmv1-mlnx ~]# lbcli list nvme-device Name Size NUMA ID Serial State Server UUID Node UUID nvme7n1 7.3 TiB 0 BTLJ048202558P0HGN Healthy b25279a2-1153-59df-9480-c35c264894b8 304bc373-b1e6-53ae-bc3a-a6f2a709740b nvme6n1 7.3 TiB 0 BTLJ048202568P0HGN Healthy b25279a2-1153-59df-9480-c35c264894b8 304bc373-b1e6-53ae-bc3a-a6f2a709740b nvme4n1 7.3 TiB 0 BTLJ048202588P0HGN Healthy b25279a2-1153-59df-9480-c35c264894b8 304bc373-b1e6-53ae-bc3a-a6f2a709740b nvme5n1 7.3 TiB 0 BTLJ0482025M8P0HGN Healthy b25279a2-1153-59df-9480-c35c264894b8 304bc373-b1e6-53ae-bc3a-a6f2a709740b nvme0n1 7.3 TiB 0 BTLJ0482026C8P0HGN Healthy b25279a2-1153-59df-9480-c35c264894b8 304bc373-b1e6-53ae-bc3a-a6f2a709740b nvme3n1 7.3 TiB 0 BTLJ048202BF8P0HGN Healthy b25279a2-1153-59df-9480-c35c264894b8 304bc373-b1e6-53ae-bc3a-a6f2a709740b nvme11n1 7.3 TiB 0 BTLJ048202BQ8P0HGN Healthy b25279a2-1153-59df-9480-c35c264894b8 304bc373-b1e6-53ae-bc3a-a6f2a709740b nvme2n1 7.3 TiB 0 BTLJ048202QR8P0HGN Healthy b25279a2-1153-59df-9480-c35c264894b8 304bc373-b1e6-53ae-bc3a-a6f2a709740b nvme8n1 7.3 TiB 0 PHLJ047400358P0HGN Healthy b25279a2-1153-59df-9480-c35c264894b8 304bc373-b1e6-53ae-bc3a-a6f2a709740b nvme9n1 7.3 TiB 0 PHLJ0474006T8P0HGN Healthy b25279a2-1153-59df-9480-c35c264894b8 304bc373-b1e6-53ae-bc3a-a6f2a709740b nvme1n1 7.3 TiB 0 PHLJ047400UN8P0HGN Healthy b25279a2-1153-59df-9480-c35c264894b8 304bc373-b1e6-53ae-bc3a-a6f2a709740b nvme10n1 7.3 TiB 0 PHLJ047500HQ8P0HGN Healthy b25279a2-1153-59df-9480-c35c264894b8 304bc373-b1e6-53ae-bc3a-a6f2a709740b
Show the volumes
[root@stu-prr-nmv1-mlnx ~]# lbcli list volumes | head -n 4 Name UUID State Protection State NSID Size Replicas Compression ACL Rebuild Progress vol_volume-d87f9518-5563-4149-8c3c-a76fdb6e6162 028b1e0a-9dc0-4e4a-93ee-2971698fdad0 Available NotAvailable 45 200 GiB 1 false values:"nqn.2014-08.org.nvmexpress:uuid:0f0cf8f4-f206-4799-87c6-75a3118a56d1" None vol_volume-c2ab8fbe-6b42-4e4b-9900-f01249bb3b71 03a9f310-e01f-4387-b678-2fa75b490d50 Available NotAvailable 12 200 GiB 1 false values:"nqn.2014-08.org.nvmexpress:uuid:0f0cf8f4-f206-4799-87c6-75a3118a56d1" None vol_volume-6b4712f9-842a-48a9-8bb1-22441f027412 070f19e4-3d61-4ad5-a8d4-d949fd98f401 Available NotAvailable 36 1.0 TiB 1 false values:"nqn.2014-08.org.nvmexpress:uuid:0f0cf8f4-f206-4799-87c6-75a3118a56d1" None
Check services and status
systemctl status node-manager
Show the pmem device is working
[root@stu-prr-nmv1-mlnx log]# ndctl list
[
{
"dev":"namespace0.0",
"mode":"raw",
"size":68719476736,
"sector_size":512,
"blockdev":"pmem0"
}
]
# or if not create it - WARNING don't try this without a light bits engineer watching. can destroy all cache
[root@stu-prr-nmv1-mlnx ~]# ndctl create-namespace -f -e namespace0.0 --type=pmem --mode=dax --no-autolabel
{
"dev":"namespace0.0",
"mode":"devdax",
"map":"dev",
"size":"63.00 GiB (67.64 GB)",
"uuid":"60d736b7-6e2b-4a2b-8ed5-db512fd4c19c",
"daxregion":{
"id":0,
"size":"63.00 GiB (67.64 GB)",
"align":2097152,
"devices":[
{
"chardev":"dax0.0",
"size":"63.00 GiB (67.64 GB)"
}
]
},
"align":2097152
}
Check the logs
[root@stu-prr-nmv1-mlnx ~]# journalctl -u duroslight-0.service -f -- Logs begin at Fri 2021-04-09 16:45:32 UTC. -- Apr 18 15:01:09 stu-prr-nmv1-mlnx.dt.internal duroslight.sh[6192]: INFO 2021-04-18 15:01:09,559 [shard 22] frontend - Accepted connection from: 10.1.144.11:39466 con 0x616000270610
Debugging the nvme devices
# debugging nvme id-ctrl /dev/nvme0 nvme id-ctrl /dev/nvme0 nvme list-subsys nvme id-ns /dev/nvme0n2 cat /sys/block/nvme0n4/uuid
Scirbbles of a madman to be cleaned up
# broke shit - this is what we had to do
device paths changes
docker cp <file with sleep> nova_compute:/usr/local/bin/kolla_start
sudo vi /usr/lib/python3.6/site-packages/nova/virt/libvirt/volume/lightos.py (line 82)
try:
db[vol['connection_info']['data']['device_path']] = vol['connection_info']['data']
except:
pass
clear out discovery service internal dir (or check for consistent hostnqn)
restart the discovery service (move to being a systemctl service not tmux)
recreate all the .conf files in /etc/discovery-client/discovery.d
#update lightbits with the correct uuid for volumes
lbcli update volume --uuid=15497b40-b314-4af9-abcc-6f1bd2e2f75f --acl=nqn.2014-08.org.nvmexpress:uuid:d91377d9-8544-4af9-9b0b-36a91265acee
stop / start the VM via openstack