Skip to content

Commit

Permalink
[ceph] include bluefs db size in "osdfull" calculation (#983)
Browse files Browse the repository at this point in the history
Closes: #973

Signed-off-by: Ponnuvel Palaniyappan <[email protected]>
  • Loading branch information
pponnuvel authored Oct 6, 2024
1 parent 4c39474 commit e1db9b5
Show file tree
Hide file tree
Showing 3 changed files with 27 additions and 6 deletions.
25 changes: 23 additions & 2 deletions hotsos/core/plugins/storage/ceph/cluster.py
Original file line number Diff line number Diff line change
Expand Up @@ -656,6 +656,25 @@ def ssds_using_bcache(self):

return sorted(ssd_osds_using_bcache)

@staticmethod
def _get_db_size_of_osd(osd_id):
"""
Returns the bluefs DB size of the given OSD.
Returned size is in bytes.
"""

report = CLIHelper().ceph_report_json_decoded()
if report:
for osd in report['osd_metadata']:
if osd['id'] == osd_id:
try:
return int(osd['bluefs_db_size'])
except KeyError:
# older versions do not output bluefs_db_size
return 0

return 0

@cached_property
def osd_raw_usage_higher_than_data(self):
_bad_osds = []
Expand All @@ -664,10 +683,12 @@ def osd_raw_usage_higher_than_data(self):
return _bad_osds

for osd in self.osd_df_tree['nodes']:
if osd['id'] >= 0:
osd_id = osd['id']
if osd_id >= 0:
raw_usage = osd['kb_used']
db_size_kb = self._get_db_size_of_osd(osd_id) / 1024.0
total_usage = osd['kb_used_data'] + osd['kb_used_omap'] + \
osd['kb_used_meta']
osd['kb_used_meta'] + db_size_kb
# There's always some additional space used by OSDs that's not
# by data/omap/meta for journaling, internal structures, etc.
# Thus we allow 5% discrepancy.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,9 @@ conclusions:
raises:
type: CephOSDWarning
message: >-
Found OSD(s) {bad_osds} with larger raw usage size than data+meta+omap
Found OSD(s) {bad_osds} with larger raw usage size than data+meta+omap+bluefs
combined. While a discrepancy is to be expected due to Ceph using space
not accounted by data+meta+omap columns, usage is greater than {limit}%
not accounted by data+meta+omap+bluefs columns, usage is greater than {limit}%
and likely indicates high discard ops sent to disk which is often
the case for workloads with frequent rewrites.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -115,9 +115,9 @@ data-root:
- sos_commands/systemd/systemctl_list-unit-files
raised-issues:
CephOSDWarning: >-
Found OSD(s) osd.2 with larger raw usage size than data+meta+omap
Found OSD(s) osd.2 with larger raw usage size than data+meta+omap+bluefs
combined. While a discrepancy is to be expected due to Ceph using space
not accounted by data+meta+omap columns, usage is greater than 5%
not accounted by data+meta+omap+bluefs columns, usage is greater than 5%
and likely indicates high discard ops sent to disk which is often
the case for workloads with frequent rewrites.
Expand Down

0 comments on commit e1db9b5

Please sign in to comment.