1 删除osd设备
[root@ceph04 ~]# service ceph stop osd.3 //停止该设备 === osd.3 === Stopping ceph osd.3 on ceph04...kill 12012...done [root@ceph04 ~]# ceph osd crush remove osd.3 //从CRUSH中移除 removed item id 3 name 'osd.3' from crush map [root@ceph04 ~]# ceph osd tree # id weight type name up/down reweight -1 3 root default -3 3 rack unknownrack -2 1 host ceph01 0 1 osd.0 up 1 -4 1 host ceph02 1 1 osd.1 up 1 -5 1 host ceph03 2 1 osd.2 up 1 -6 0 host ceph04 3 0 osd.3 down 1 [root@ceph04 ~]# ceph auth del osd.3 //从认证中删除 updated [root@ceph04 ~]# ceph osd rm 3 //删除 removed osd.3
2 新增osd 设备
[root@ceph04 data]# mkfs.xfs -f /dev/sdb //格式化分区 [root@ceph04 data]# mount /dev/sdb /data/osd.3/ //挂载分区 [root@ceph04 data]# ceph-osd -i 3 --mkfs --mkkey //对指定的目录mkcephfs 2014-02-26 11:17:48.014785 7f94ef4947a0 -1 journal FileJournal::_open: disabling aio for non-block journal. Use journal_force_aio to force use of aio anyway 2014-02-26 11:17:48.049559 7f94ef4947a0 -1 journal FileJournal::_open: disabling aio for non-block journal. Use journal_force_aio to force use of aio anyway 2014-02-26 11:17:48.059596 7f94ef4947a0 -1 filestore(/data/osd.3) could not find 23c2fcde/osd_superblock/0//-1 in index: (2) No such file or directory 2014-02-26 11:17:48.150783 7f94ef4947a0 -1 created object store /data/osd.3 journal /data/osd.3/journal for osd.3 fsid c9871314-3f0b-42c5-8bc7-ad14d41977a0 2014-02-26 11:17:48.150840 7f94ef4947a0 -1 auth: error reading file: /data/osd.3/keyring: can't open /data/osd.3/keyring: (2) No such file or directory 2014-02-26 11:17:48.150949 7f94ef4947a0 -1 created new key in keyring /data/osd.3/keyring [root@ceph04 osd.3]# ceph auth add osd.3 osd 'allow *' mon 'allow rwx' -i /data/osd.3/keyring //添加allow rwx 规则 2014-02-26 11:19:26.004404 7f46b7ee7760 -1 read 56 bytes from /data/osd.3/keyring added key for osd.3 oot@ceph01 ceph]# ceph osd getcrushmap -o map //获得crushmap信息 got crush map from osdmap epoch 12 [root@ceph01 ceph]# ls fetch_config map ceph.conf ceph.keyring [root@ceph01 ceph]# crushtool -d map //格式化输出crushmap信息 # begin crush map # devices device 0 osd.0 device 1 osd.1 device 2 osd.2 # types type 0 osd type 1 host type 2 rack type 3 row type 4 room type 5 datacenter type 6 root # buckets host ceph01 { id -2 # do not change unnecessarily # weight 1.000 alg straw hash 0 # rjenkins1 item osd.0 weight 1.000 } host ceph02 { id -4 # do not change unnecessarily # weight 1.000 alg straw hash 0 # rjenkins1 item osd.1 weight 1.000 } host ceph03 { id -5 # do not change unnecessarily # weight 1.000 alg straw hash 0 # rjenkins1 item osd.2 weight 1.000 } host ceph04 { id -6 # do not change unnecessarily # weight 0.000 alg straw hash 0 # rjenkins1 } rack unknownrack { id -3 # do not change unnecessarily # weight 3.000 alg straw hash 0 # rjenkins1 item ceph01 weight 1.000 item ceph02 weight 1.000 item ceph03 weight 1.000 item ceph04 weight 0.000 } root default { id -1 # do not change unnecessarily # weight 3.000 alg straw hash 0 # rjenkins1 item unknownrack weight 3.000 } # rules rule data { ruleset 0 type replicated min_size 1 max_size 10 step take default step chooseleaf firstn 0 type host step emit } rule metadata { ruleset 1 type replicated min_size 1 max_size 10 step take default step chooseleaf firstn 0 type host step emit } rule rbd { ruleset 2 type replicated min_size 1 max_size 10 step take default step chooseleaf firstn 0 type host step emit } [root@ceph04 osd.3]# ceph osd crush set 3 1.0 root=default rack=unknownrack host=ceph04 //设置crushmap set item id 3 name 'osd.3' weight 1 at location {host=ceph04,rack=unknownrack,root=default} to crush map host ceph04 { id -6 # do not change unnecessarily # weight 1.000 alg straw hash 0 # rjenkins1 item osd.3 weight 1.000 } rack unknownrack { id -3 # do not change unnecessarily # weight 4.000 alg straw hash 0 # rjenkins1 item ceph01 weight 1.000 item ceph02 weight 1.000 item ceph03 weight 1.000 item ceph04 weight 1.000 ************************ ceph osd crush set 3 1.0 root=default rack=unknownrack host=ceph04的另一种做法 1.修改map.txt vi map.txt #devices device 0 osd.0 device 1 osd.1 device 2 osd.2 device 3 osd.3 host osd3 { id -1 alg straw hash 0 item osd.3 weight 1.00 } 2.编译crushmap cephtool -c /root/map.txt -o map 3.将编译好的map再次导入 ceph osd crushmap -i map ******************************* [root@ceph04 osd.3]# service ceph start osd.3 //启动osd.3 === osd.3 === Mounting xfs on ceph04:/data/osd.3 create-or-move updated item id 3 name 'osd.3' weight 0.02 at location {host=ceph04,root=default} to crush map Starting Ceph osd.3 on ceph04... starting osd.3 at :/0 osd_data /data/osd.3 /data/osd.3/journal [root@ceph04 osd.3]# ceph -s health HEALTH_WARN 1 pgs recovery_wait; 1 pgs stuck unclean; recovery 1/42 degraded (2.381%); recovering 4 o/s, 3553B/s; clock skew detected on mon.ceph02, mon.ceph03 monmap e1: 3 mons at {ceph01=192.168.9.62:6789/0,ceph02=192.168.9.63:6789/0,ceph03=192.168.9.73:6789/0}, election epoch 6, quorum 0,1,2 ceph01,ceph02,ceph03 osdmap e16: 4 osds: 4 up, 4 in pgmap v459: 960 pgs: 14 active, 945 active+clean, 1 active+recovery_wait; 9518 bytes data, 4154 MB used, 77725 MB / 81880 MB avail; 1001B/s wr, 0op/s; 1/42 degraded (2.381%); recovering 4 o/s, 3553B/s mdsmap e5: 1/1/1 up {0=ucms01=up:active}, 1 up:standby [root@ceph04 osd.3]# ceph osd tree # id weight type name up/down reweight -1 4 root default -3 4 rack unknownrack -2 1 host ceph01 0 1 osd.0 up 1 -4 1 host ceph02 1 1 osd.1 up 1 -5 1 host ceph03 2 1 osd.2 up 1 -6 1 host ceph04 3 1 osd.3 up 1
可能遇到的问题:
ceph mds stat mdsmap e63: 1/1/1 up {0=ceph02=up:replay}, 1 up:standby
osd日志信息为:
2014-02-26 10:42:15.386552 7f33b5e40700 0 -- 192.168.9.63:6802/13005 >> 192.168.9.62:6803/18894 pipe(0x3fa1900 sd=29 :6802 s=0 pgs=0 cs=0 l=0).accept connect_seq 0 vs existing 0 state connecting 2014-02-26 10:42:16.394540 7f33b5639700 0 -- 192.168.9.63:6802/13005 >> 192.168.9.39:6802/11369 pipe(0x3fa1680 sd=33 :6802 s=0 pgs=0 cs=0 l=0).accept connect_seq 0 vs existing 0 state wait 2014-02-26 10:42:17.029623 7f33b5033700 0 -- 192.168.9.63:6801/13005 >> 192.168.9.62:0/3872604662 pipe(0x3fa4d80 sd=35 :6801 s=0 pgs=0 cs=0 l=0).accept peer addr is really 192.168.9.62:0/3872604662 (socket is 192.168.9.62:39504/0)
mds 日志信息为:
2014-02-26 10:42:12.824284 7f57ba9de700 0 -- 192.168.9.63:6800/12877 >> 192.168.9.62:6801/17332 pipe(0x2450000 sd=17 :37551 s=1 pgs=0 cs=0 l=1).connect claims to be 0.0.0.0:6801/18894 not 192.168.9.62:6801/17332 - wrong node!
错误的原因可能为:osd 没有在crushmap中。