Ceph Nautilus : Add or Remove OSDs2019/06/12

This is how to add or remove OSDs from exisiting Cluster.

                                         |
        +--------------------+           |
        |   [dlp.srv.world]  |10.0.0.30  |
        |     Ceph Client    +-----------+
        |                    |           |
        +--------------------+           |
            +----------------------------+----------------------------+
            |                            |                            |
            |10.0.0.51                   |10.0.0.52                   |10.0.0.53 
+-----------+-----------+    +-----------+-----------+    +-----------+-----------+
|   [node01.srv.world]  |    |   [node02.srv.world]  |    |   [node03.srv.world]  |
|     Object Storage    +----+     Object Storage    +----+     Object Storage    |
|     Monitor Daemon    |    |                       |    |                       |
|     Manager Daemon    |    |                       |    |                       |
+-----------------------+    +-----------------------+    +-----------------------+

[1]	For example, Add a [node04] node to OSDs on Admin Node. For Block device on new [node04] Node, use [/dev/sdb] on this example.

# transfer public key

[root@node01 ~]#

ssh-copy-id node04

# if Firewalld is running, allow service

[root@node01 ~]#

ssh node04 "firewall-cmd --add-service=ceph --permanent; firewall-cmd --reload"

# install required packages

[root@node01 ~]#

ssh node04 "yum -y install centos-release-ceph-nautilus; yum -y install ceph"

# transfer required files

[root@node01 ~]#

scp /etc/ceph/ceph.conf node04:/etc/ceph/ceph.conf

[root@node01 ~]#

scp /etc/ceph/ceph.client.admin.keyring node04:/etc/ceph

[root@node01 ~]#

scp /var/lib/ceph/bootstrap-osd/ceph.keyring node04:/var/lib/ceph/bootstrap-osd

# configure OSD

[root@node01 ~]# ssh node04 \
"chown ceph. /etc/ceph/ceph.* /var/lib/ceph/bootstrap-osd/*; \
parted --script /dev/sdb 'mklabel gpt'; \
parted --script /dev/sdb "mkpart primary 0% 100%"; \
ceph-volume lvm create --data /dev/sdb1" 
Running command: /usr/bin/ceph-authtool --gen-print-key
Running command: /usr/bin/ceph --cluster ceph --name client.bootstrap-osd --keyring /var/lib/ceph/bootstrap-osd/ceph.keyring -i - osd new a3bb6021-ec1c-45f9-967a-1ab5dac07e18

.....
.....

Running command: /usr/bin/systemctl start ceph-osd@3
--> ceph-volume lvm activate successful for osd ID: 3
--> ceph-volume lvm create successful for: /dev/sdb1

[root@node01 ~]# ceph -s 
  cluster:
    id:     ad3abac5-ad31-48bd-abbd-2f95dded394d
    health: HEALTH_OK

  services:
    mon: 1 daemons, quorum node01 (age 105m)
    mgr: node01(active, since 74m)
    mds: cephfs:1 {0=node01=up:active}
    osd: 4 osds: 4 up (since 0.660506s), 4 in (since 0.660506s)
    rgw: 1 daemon active (www)

  task status:
    scrub status:
        mds.node01: idle

  data:
    pools:   7 pools, 240 pgs
    objects: 215 objects, 44 KiB
    usage:   3.1 GiB used, 237 GiB / 240 GiB avail
    pgs:     240 active+clean

  io:
    client:   5.5 KiB/s rd, 0 B/s wr, 5 op/s rd, 3 op/s wr

[2]	To remove an OSD Node from existing Cluster, run commands like follows. For example, Remove [node04] node.

[root@node01 ~]#

ceph -s

  cluster:
    id:     ad3abac5-ad31-48bd-abbd-2f95dded394d
    health: HEALTH_OK

  services:
    mon: 1 daemons, quorum node01 (age 106m)
    mgr: node01(active, since 75m)
    mds: cephfs:1 {0=node01=up:active}
    osd: 4 osds: 4 up (since 65s), 4 in (since 65s)
    rgw: 1 daemon active (www)

  task status:
    scrub status:
        mds.node01: idle

  data:
    pools:   7 pools, 240 pgs
    objects: 215 objects, 44 KiB
    usage:   4.2 GiB used, 316 GiB / 320 GiB avail
    pgs:     240 active+clean

[root@node01 ~]#

ceph osd tree

ID CLASS WEIGHT  TYPE NAME       STATUS REWEIGHT PRI-AFF
-1       0.31238 root default
-3       0.07809     host node01
 0   hdd 0.07809         osd.0       up  1.00000 1.00000
-5       0.07809     host node02
 1   hdd 0.07809         osd.1       up  1.00000 1.00000
-7       0.07809     host node03
 2   hdd 0.07809         osd.2       up  1.00000 1.00000
-9       0.07809     host node04
 3   hdd 0.07809         osd.3       up  1.00000 1.00000

# specify OSD ID of a node you'd like to remove

[root@node01 ~]#

ceph osd out 3

marked out osd.3.

# live watch cluster status

# after running [ceph osd out ***], rebalancing is executed automatically

# to quit live watch, push [Ctrl + c]

[root@node01 ~]#

ceph -w

  cluster:
    id:     ad3abac5-ad31-48bd-abbd-2f95dded394d
    health: HEALTH_WARN
            Reduced data availability: 2 pgs inactive
            Degraded data redundancy: 169/645 objects degraded (26.202%), 47 pgs degraded

  services:
    mon: 1 daemons, quorum node01 (age 107m)
    mgr: node01(active, since 76m)
    mds: cephfs:1 {0=node01=up:active}
    osd: 4 osds: 4 up (since 106s), 3 in (since 7s)
    rgw: 1 daemon active (www)

  task status:
    scrub status:
        mds.node01: idle

  data:
    pools:   7 pools, 240 pgs
    objects: 215 objects, 44 KiB
    usage:   3.1 GiB used, 237 GiB / 240 GiB avail
    pgs:     2.083% pgs not active
             169/645 objects degraded (26.202%)
             186 active+clean
             45  active+recovery_wait+degraded
             4   active+recovery_wait
             3   activating
             2   activating+degraded

  io:
    recovery: 5.0 KiB/s, 0 objects/s

  progress:
    Rebalancing after osd.3 marked out
      [=============.................]

.....
.....

# after status turns to [HEALTH_OK], disable OSD service on the target node

[root@node01 ~]#

ssh node04 "systemctl disable --now ceph-osd@3.service"

Removed /run/systemd/system/ceph-osd.target.wants/ceph-osd@3.service.

# remove the node to specify target OSD ID

[root@node01 ~]#

ceph osd purge 3 --yes-i-really-mean-it

purged osd.3

[root@node01 ~]#

ceph -s

  cluster:
    id:     ad3abac5-ad31-48bd-abbd-2f95dded394d
    health: HEALTH_OK

  services:
    mon: 1 daemons, quorum node01 (age 108m)
    mgr: node01(active, since 77m)
    mds: cephfs:1 {0=node01=up:active}
    osd: 3 osds: 3 up (since 18s), 3 in (since 91s)
    rgw: 1 daemon active (www)

  task status:
    scrub status:
        mds.node01: idle

  data:
    pools:   7 pools, 240 pgs
    objects: 215 objects, 44 KiB
    usage:   3.1 GiB used, 237 GiB / 240 GiB avail
    pgs:     240 active+clean

  io:
    client:   670 B/s rd, 0 op/s rd, 0 op/s wr

Matched Content