Lightbits - cheatsheet and using lbcli tool

From Define Wiki
Revision as of 10:10, 23 November 2021 by David (talk | contribs) (Created page with "== Get Cluster Status == <pre> [root@stu-prr-nmv1-mlnx ~]# lbcli get cluster UUID Subsystem NQN...")
(diff) ← Older revision | Latest revision (diff) | Newer revision → (diff)
Jump to navigation Jump to search

Get Cluster Status

[root@stu-prr-nmv1-mlnx ~]# lbcli get cluster 
UUID                                   Subsystem NQN                                                             Current max replicas   Supported max replicas   MinVersionInCluster   MinAllowedVersion   MaxAllowedVersion
d90ac496-133e-4459-bcee-7ee8e6ee902c   nqn.2016-01.com.lightbitslabs:uuid:453e6f39-fef3-4a7e-bdc5-1ec308440558   0                      1                        2.1.2                 2.1.X               2.2.X

Get Cluster Nodes

[root@stu-prr-nmv1-mlnx ~]# lbcli list nodes
Name         UUID                                   State      NVMe endpoint      Failure domains   Local rebuild progress
server00-0   304bc373-b1e6-53ae-bc3a-a6f2a709740b   Inactive   10.1.144.17:4420   [server00]        None

Show the name devices

[root@stu-prr-nmv1-mlnx ~]# lbcli list nvme-device
Name       Size      NUMA ID   Serial               State     Server UUID                            Node UUID
nvme7n1    7.3 TiB   0         BTLJ048202558P0HGN   Healthy   b25279a2-1153-59df-9480-c35c264894b8   304bc373-b1e6-53ae-bc3a-a6f2a709740b
nvme6n1    7.3 TiB   0         BTLJ048202568P0HGN   Healthy   b25279a2-1153-59df-9480-c35c264894b8   304bc373-b1e6-53ae-bc3a-a6f2a709740b
nvme4n1    7.3 TiB   0         BTLJ048202588P0HGN   Healthy   b25279a2-1153-59df-9480-c35c264894b8   304bc373-b1e6-53ae-bc3a-a6f2a709740b
nvme5n1    7.3 TiB   0         BTLJ0482025M8P0HGN   Healthy   b25279a2-1153-59df-9480-c35c264894b8   304bc373-b1e6-53ae-bc3a-a6f2a709740b
nvme0n1    7.3 TiB   0         BTLJ0482026C8P0HGN   Healthy   b25279a2-1153-59df-9480-c35c264894b8   304bc373-b1e6-53ae-bc3a-a6f2a709740b
nvme3n1    7.3 TiB   0         BTLJ048202BF8P0HGN   Healthy   b25279a2-1153-59df-9480-c35c264894b8   304bc373-b1e6-53ae-bc3a-a6f2a709740b
nvme11n1   7.3 TiB   0         BTLJ048202BQ8P0HGN   Healthy   b25279a2-1153-59df-9480-c35c264894b8   304bc373-b1e6-53ae-bc3a-a6f2a709740b
nvme2n1    7.3 TiB   0         BTLJ048202QR8P0HGN   Healthy   b25279a2-1153-59df-9480-c35c264894b8   304bc373-b1e6-53ae-bc3a-a6f2a709740b
nvme8n1    7.3 TiB   0         PHLJ047400358P0HGN   Healthy   b25279a2-1153-59df-9480-c35c264894b8   304bc373-b1e6-53ae-bc3a-a6f2a709740b
nvme9n1    7.3 TiB   0         PHLJ0474006T8P0HGN   Healthy   b25279a2-1153-59df-9480-c35c264894b8   304bc373-b1e6-53ae-bc3a-a6f2a709740b
nvme1n1    7.3 TiB   0         PHLJ047400UN8P0HGN   Healthy   b25279a2-1153-59df-9480-c35c264894b8   304bc373-b1e6-53ae-bc3a-a6f2a709740b
nvme10n1   7.3 TiB   0         PHLJ047500HQ8P0HGN   Healthy   b25279a2-1153-59df-9480-c35c264894b8   304bc373-b1e6-53ae-bc3a-a6f2a709740b

Show the volumes

[root@stu-prr-nmv1-mlnx ~]# lbcli list volumes | head -n 4
Name                                              UUID                                   State       Protection State   NSID      Size      Replicas   Compression   ACL                                                                              Rebuild Progress
vol_volume-d87f9518-5563-4149-8c3c-a76fdb6e6162   028b1e0a-9dc0-4e4a-93ee-2971698fdad0   Available   NotAvailable       45        200 GiB   1          false         values:"nqn.2014-08.org.nvmexpress:uuid:0f0cf8f4-f206-4799-87c6-75a3118a56d1"    None
vol_volume-c2ab8fbe-6b42-4e4b-9900-f01249bb3b71   03a9f310-e01f-4387-b678-2fa75b490d50   Available   NotAvailable       12        200 GiB   1          false         values:"nqn.2014-08.org.nvmexpress:uuid:0f0cf8f4-f206-4799-87c6-75a3118a56d1"    None
vol_volume-6b4712f9-842a-48a9-8bb1-22441f027412   070f19e4-3d61-4ad5-a8d4-d949fd98f401   Available   NotAvailable       36        1.0 TiB   1          false         values:"nqn.2014-08.org.nvmexpress:uuid:0f0cf8f4-f206-4799-87c6-75a3118a56d1"    None

Check services and status

systemctl status node-manager 

Show the pmem device is working

[root@stu-prr-nmv1-mlnx log]# ndctl list
[
  {
    "dev":"namespace0.0",
    "mode":"raw",
    "size":68719476736,
    "sector_size":512,
    "blockdev":"pmem0"
  }
]

# or if not create it - WARNING don't try this without a light bits engineer watching. can destroy all cache
[root@stu-prr-nmv1-mlnx ~]# ndctl create-namespace -f -e namespace0.0 --type=pmem --mode=dax --no-autolabel
{
  "dev":"namespace0.0",
  "mode":"devdax",
  "map":"dev",
  "size":"63.00 GiB (67.64 GB)",
  "uuid":"60d736b7-6e2b-4a2b-8ed5-db512fd4c19c",
  "daxregion":{
    "id":0,
    "size":"63.00 GiB (67.64 GB)",
    "align":2097152,
    "devices":[
      {
        "chardev":"dax0.0",
        "size":"63.00 GiB (67.64 GB)"
      }
    ]
  },
  "align":2097152
}

Check the logs

[root@stu-prr-nmv1-mlnx ~]# journalctl -u duroslight-0.service  -f
-- Logs begin at Fri 2021-04-09 16:45:32 UTC. --
Apr 18 15:01:09 stu-prr-nmv1-mlnx.dt.internal duroslight.sh[6192]: INFO  2021-04-18 15:01:09,559 [shard 22] frontend - Accepted connection from: 10.1.144.11:39466 con 0x616000270610

Debugging the nvme devices

# debugging 
nvme id-ctrl /dev/nvme0
nvme id-ctrl /dev/nvme0
nvme list-subsys
nvme id-ns  /dev/nvme0n2
cat /sys/block/nvme0n4/uuid

Scirbbles of a madman to be cleaned up

# broke shit - this is what we had to do 

device paths changes

docker cp <file with sleep> nova_compute:/usr/local/bin/kolla_start

sudo vi /usr/lib/python3.6/site-packages/nova/virt/libvirt/volume/lightos.py (line 82)
                try:
                    db[vol['connection_info']['data']['device_path']] = vol['connection_info']['data']
                except:
                    pass

clear out discovery service internal dir (or check for consistent hostnqn)

restart the discovery service (move to being a systemctl service not tmux) 

recreate all the .conf files in /etc/discovery-client/discovery.d

#update lightbits with the correct uuid for volumes 
lbcli update volume --uuid=15497b40-b314-4af9-abcc-6f1bd2e2f75f  --acl=nqn.2014-08.org.nvmexpress:uuid:d91377d9-8544-4af9-9b0b-36a91265acee

stop / start the VM via openstack