-
Notifications
You must be signed in to change notification settings - Fork 9
/
Copy pathadd-db-to-osd.sh
252 lines (223 loc) · 9.7 KB
/
add-db-to-osd.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
#!/bin/bash
# Brett Kelly Oct 2021
# 45Drives
# Version 1.3 stable
usage() { # Help
cat << EOF
Usage:
[-b] Block DB size. Required. Allowed suffixes K,M,G,T
[-d] Device to use as db. Required. Aliased Device name should be used /dev/X-Y
[-f] Bypass osd per db warning
[-o] OSDs to add db to. Required. Comma separated list of osd.id. <0,1,2,3>
[-h] Displays this message
EOF
exit 0
}
add_lv_tags(){
get_lv_uuid(){
local LV_UUID=$(lvs -o uuid $BLOCK_LV_DEVICE --no-headings | awk '{$1=$1};1')
echo "$LV_UUID"
}
# Add block.db tags to existing block device
lvchange --addtag "ceph.db_device=$DB_LV_DEVICE" $BLOCK_LV_DEVICE
lvchange --addtag "ceph.db_uuid=$(get_lv_uuid $DB_LV_DEVICE)" $BLOCK_LV_DEVICE
# Get all tags from existing block device and write them to new block.db
# Both device should match except ceph.type=db and ceph.type=block
BLOCK_LV_TAGS_STRING=$(lvs -o lv_tags --no-headings $BLOCK_LV_DEVICE | awk '{$1=$1};1')
IFS=',' read -r -a BLOCK_LV_TAGS <<< "$BLOCK_LV_TAGS_STRING"
for index in "${!BLOCK_LV_TAGS[@]}" ; do
lvchange --addtag "${BLOCK_LV_TAGS[index]}" $DB_LV_DEVICE
done
lvchange --deltag "ceph.type=block" $DB_LV_DEVICE
lvchange --addtag "ceph.type=db" $DB_LV_DEVICE
}
check_dependancies(){
for i in "${!SCRIPT_DEPENDANCIES[@]}"; do
if ! command -v ${SCRIPT_DEPENDANCIES[i]} >/dev/null 2>&1;then
echo "cli utility: ${SCRIPT_DEPENDANCIES[i]} is not installed"
echo "jq, and bc are required"
exit 1
fi
done
}
# if encountering any error quit, so to not make a mess
set -e
SCRIPT_DEPENDANCIES=(bc jq)
FORCE="false"
PHYSICAL_EXTENT_SIZE_BYTES=4194304
OSD_PER_DB_LIMIT=5
while getopts 'b:fo:d:h' OPTION; do
case ${OPTION} in
b)
BLOCK_DB_SIZE=${OPTARG}
BLOCK_DB_SIZE_BYTES=$(numfmt --from=iec $BLOCK_DB_SIZE)
;;
d)
DB_DEVICE=${OPTARG}
if [ ! -b $DB_DEVICE ];then
echo "Error: DB_DEVICE=$DB_DEVICE is not a block device"
exit 1
fi
;;
f)
FORCE="true"
;;
o)
OSD_LIST_=${OPTARG}
IFS=',' read -r -a OSD_LIST <<< "$OSD_LIST_"
;;
h)
usage
;;
esac
done
# Check if correct input was given
if [ -z $OSD_LIST ] || [ -z $DB_DEVICE ] || [ -z $BLOCK_DB_SIZE_BYTES ]; then
echo "Input required. See ./`basename "$0"` -h for usage details"
exit 1
fi
# If the db device given is a linux sd device then warn if you want to continue
# Check cli depandancies
check_dependancies
BLOCK_DB_SIZE_EXTENTS=$(bc <<< "$BLOCK_DB_SIZE_BYTES/$PHYSICAL_EXTENT_SIZE_BYTES")
OSD_COUNT="${#OSD_LIST[@]}"
TOTAL_DB_SIZE_BYTES=$(bc <<< "$BLOCK_DB_SIZE_BYTES*$OSD_COUNT")
DB_DEVICE_SIZE_BYTES=$(blockdev --getsize64 $DB_DEVICE)
# Check if LVM info is already present on DB_DEVICE
# check with wipefs that device has LVM data present
DB_DEVICE_SIGNATURE=$(wipefs "$DB_DEVICE" --json | jq -r '.signatures | .[0].type // empty')
# If this is empty the disk is assumed new.
# If this is LVM2_member the disk is assumed to already have a db lv present it
# If anything else the disk is assumed to have something else on it and should be wiped. Quit with warning
if [ -z "$LVM_JSON_DEVICE" ] || [ "$DB_DEVICE_SIGNATURE" == "LVM2_member" ];then
:
else
echo "Disk is not empty nor a LVM device, wipe device first and run again"
exit 1
fi
# Get PVS info for the specific disk we want
LVM_JSON=$(pvs --units B --nosuffix -o name,vg_name,lv_name,lv_count,lvsize,vg_free --reportformat json )
LVM_JSON_DEVICE=$(echo $LVM_JSON | jq --arg disk "$DB_DEVICE" '.[] |.[].pv | .[] | select(.pv_name==$disk)')
# Check we are using the correct device name
# if DB_DEVICE_SIGNATURE is LVM2_member and LVM_JSON_DEVICE is empty, then the wrong disk name was used (sd name instead of alias). Quit with warning
if [ "$DB_DEVICE_SIGNATURE" == "LVM2_member" ] && [ -z "$LVM_JSON_DEVICE" ];then
echo "WARNING: device selected ($DB_DEVICE) has a LVM signature, but could not get LVM info."
echo "Wrong disk name was most likely provided, use the device alias name instead of the linux device name"
exit 1
fi
# are we using an exitsing db device or a new device, if LVM_JSON_DEVICE is empty, and DB_DEVICE_SIGNATURE is empty we have a new disk
if [ -z "$LVM_JSON_DEVICE" ] && [ -z "$DB_DEVICE_SIGNATURE" ];then
DB_VG_NAME="ceph-$(uuidgen)"
else
# if not how do we get db_VG ? inspect from device given
DB_VG_NAME="$(echo $LVM_JSON_DEVICE | jq -r '.vg_name' | awk 'NR==1')"
# If there is no DB Volume group quit with warning. The disk has a LVM2_memebr signature but no volume group. Wipe disk and run again
if [ -z $DB_VG_NAME ];then
echo "WARNING: Device selected ($DB_DEVICE) has a LVM2_member signature, but no volume group"
echo "Wipe disk and run again"
exit 1
fi
# Count how many lv dbs are present, add that to input osds and compare to OSD_LIMIT
EXISTING_DB_COUNT=$(echo $LVM_JSON_DEVICE | jq -r '.lv_count' | awk 'NR==1')
echo "WARNING: device currently has $EXISTING_DB_COUNT db's present"
OSD_COUNT=$(bc <<< "${#OSD_LIST[@]}+$EXISTING_DB_COUNT")
# set db total device size to the amount of free Bytes in the volume group
DB_DEVICE_DISK_SIZE_BYTES=$(echo $LVM_JSON_DEVICE | jq -r '.vg_free' | awk 'NR==1')
fi
# Check if OSD_COUNT is greater than OSD_PER_DB_LIMIT, exit with warning.
# If -f flag present the ignore OSD_PER_DB_LIMIT
if [ "$FORCE" == "false" ] ; then
if [ "$OSD_COUNT" -gt "$OSD_PER_DB_LIMIT" ];then
echo "Warning: OSD_COUNT is greater than OSD_PER_DB_LIMIT=$OSD_PER_DB_LIMIT. Use -f to bypass"
exit 1
fi
fi
# Check if total size of db's to be created will fit on db device
if [ "$TOTAL_DB_SIZE_BYTES" -gt "$DB_DEVICE_SIZE_BYTES" ] ; then
echo "Warning: total size of db will not fit on device $DB_DEVICE"
exit 1
fi
# Check each osd to see if it present on host
# Check each osd to see if it already has db device
# Check current bluestore db size and compare to chosen db size
# Gather ceph-volume output before entering loop as it takes a while to run
CEPH_VOLUME_JSON=$(ceph-volume lvm list --format json)
for i in "${!OSD_LIST[@]}"; do
OSD_ID=${OSD_LIST[i]}
OSD_JSON=$(echo $CEPH_VOLUME_JSON | jq -r --arg id "$OSD_ID" '.[] | .[] | select(.tags["ceph.osd_id"]==$id)')
if [ -z "$OSD_JSON" ]; then
echo "Can't find osd.$OSD_ID on this host"
exit 1
fi
DB_CHECK=$(echo $OSD_JSON | jq 'select(.tags["ceph.db_device"])');
if [ ! -z "$DB_CHECK" ]; then
echo "Warning: osd.$OSD_ID already has a db device attached"
exit 1
fi
CURRENT_BLOCK_DB_USED_BYTES=$(ceph daemon osd.$OSD_ID perf dump | jq '.bluefs | .db_used_bytes')
if [[ "$CURRENT_BLOCK_DB_USED_BYTES" -ge "$BLOCK_DB_SIZE_BYTES" ]];then
echo "Warning: osd.$OSD_ID has CURRENT_BLOCK_DB_USED_BYTES($(numfmt --to=iec $CURRENT_BLOCK_DB_USED_BYTES)). This must be less than BLOCK_DB_SIZE_BYTES($(numfmt --to=iec $BLOCK_DB_SIZE_BYTES))"
exit 1
fi
done
# Make sure ceph admin keyring is present hs correct permission
# Remove "set -e" so we can check ceph status error code
# Then turn it back on after
set +e
ceph status > /dev/null 2>&1 ; rc=$?
set -e
if [[ "$rc" -ne 0 ]];then
echo "Warning: permisson denied accessing cluster, admin keyring must be present"
exit 1
fi
# If we got this far then all checked are passed
# Start migration process
if [ -z "$LVM_JSON_DEVICE" ] && [ -z "$DB_DEVICE_SIGNATURE" ];then
pvcreate $DB_DEVICE
vgcreate $DB_VG_NAME $DB_DEVICE
fi
for i in "${!OSD_LIST[@]}"; do
OSD_ID=${OSD_LIST[i]}
OSD_JSON=$(echo $CEPH_VOLUME_JSON | jq -r --arg id "$OSD_ID" '.[] | .[] | select(.tags["ceph.osd_id"]==$id)')
OSD_FSID=$(echo $OSD_JSON | jq -r '.tags["ceph.osd_fsid"]')
DB_LV_UUID=$(uuidgen)
DB_LV_DEVICE="/dev/$DB_VG_NAME/osd-db-$DB_LV_UUID"
BLOCK_LV_DEVICE="$(echo $OSD_JSON | jq -r '.lv_path')"
lvcreate -l $BLOCK_DB_SIZE_EXTENTS -n osd-db-$DB_LV_UUID $DB_VG_NAME
chown -h ceph:ceph $DB_LV_DEVICE
chown -R ceph:ceph $(realpath $DB_LV_DEVICE)
# Call ceph health check function dont continue unless cluster healthy
CEPH_STATUS=$(ceph health --format json | jq -r '.status')
while [ "$CEPH_STATUS" != "HEALTH_OK" ]; do
echo "Warning: Cluster is not in HEALTH_OK state"
sleep 2
CEPH_STATUS=$(ceph health --format json | jq -r '.status')
done
echo "Set noout"
ceph osd set noout
echo "Stop OSD.$OSD_ID"
systemctl stop ceph-osd@$OSD_ID
echo "Flush OSD Journal"
ceph-osd -i $OSD_ID --flush-journal
echo "Create new db"
CEPH_ARGS="--bluestore-block-db-size $BLOCK_DB_SIZE_BYTES" ceph-bluestore-tool bluefs-bdev-new-db --path /var/lib/ceph/osd/ceph-$OSD_ID/ --dev-target $DB_LV_DEVICE
echo "Migrate old db to new db"
ceph-bluestore-tool bluefs-bdev-migrate --path /var/lib/ceph/osd/ceph-$OSD_ID/ --devs-source /var/lib/ceph/osd/ceph-$OSD_ID/block --dev-target /var/lib/ceph/osd/ceph-$OSD_ID/block.db
echo "Update LV tags on block and db"
add_lv_tags
echo "unmount OSD.$OSD_ID"
umount /var/lib/ceph/osd/ceph-$OSD_ID/
echo "Activate OSD.$OSD_ID"
ceph-volume lvm activate $OSD_ID $OSD_FSID
echo "Unset noout"
ceph osd unset noout
echo "Verify osd is back up before continuing"
OSD_STATE=$(ceph osd tree --format json | jq --arg id "$OSD_ID" -r '.nodes[] | select(.id == ($id |tonumber)) | .status')
echo "OSD_STATE: $OSD_STATE"
while [ "$OSD_STATE" != "up" ]; do
echo "Warning: OSD.$OSD_ID is not UP yet. Waiting..."
sleep 2
OSD_STATE=$(ceph osd tree --format json | jq --arg id "$OSD_ID" -r '.nodes[] | select(.id == ($id |tonumber)) | .status')
echo "OSD_STATE: $OSD_STATE"
done
done