Skip to content

Commit

Permalink
warn on low replication
Browse files Browse the repository at this point in the history
  • Loading branch information
mensfeld committed Sep 6, 2023
1 parent 06747c7 commit acd2e9f
Show file tree
Hide file tree
Showing 6 changed files with 99 additions and 7 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@
- [Improvement] Introduce config validation.
- [Improvement] Provide flash messages support.
- [Improvement] Use replication factor of two by default (if not overridden) for Web UI topics when there is more than one broker.
- [Improvement] Show a warning when replication factor of 1 is used for Web UI topics in production.
- [Fix] Return 402 status instead of 500 on Pro features that are not available in OSS.
- [Fix] Fix a case where errors would not be visible without Rails due to the `String#first` usage.
- [Fix] Fix a case where live-poll would be disabled but would still update data.
Expand Down
40 changes: 33 additions & 7 deletions lib/karafka/web/ui/models/status.rb
Original file line number Diff line number Diff line change
Expand Up @@ -113,9 +113,30 @@ def partitions
)
end

# @return [Status::Step] do we have correct replication for given env
def replication
if partitions.success?
status = :success
# low replication is not an error but just a warning and a potential problem
# in case of a crash, this is why we do not fail but warn only
status = :warning if topics_details.values.any? { |det| det[:replication] < 2 }
# Allow for non-production setups to use replication 1 as it is not that relevant
status = :success unless Karafka.env.production?
details = topics_details
else
status = :halted
details = {}
end

Step.new(
status,
details
)
end

# @return [Status::Step] Is the initial consumers state present in Kafka
def initial_consumers_state
if partitions.success?
if replication.success?
@current_state ||= Models::ConsumersState.current
status = @current_state ? :success : :failure
else
Expand Down Expand Up @@ -234,20 +255,25 @@ def topics_errors

# @return [Hash] hash with topics with which we work details (even if don't exist)
def topics_details
base = { present: false, partitions: 0, replication: 1 }

topics = {
topics_consumers_states => { present: false, partitions: 0 },
topics_consumers_reports => { present: false, partitions: 0 },
topics_consumers_metrics => { present: false, partitions: 0 },
topics_errors => { present: false, partitions: 0 }
topics_consumers_states => base.dup,
topics_consumers_reports => base.dup,
topics_consumers_metrics => base.dup,
topics_errors => base.dup
}

@cluster_info.topics.each do |topic|
name = topic[:topic_name]

next unless topics.key?(name)

topics[name][:present] = true
topics[name][:partitions] = topic[:partition_count]
topics[name].merge!(
present: true,
partitions: topic[:partition_count],
replication: topic[:partitions].map { |part| part[:replica_count] }.max
)
end

topics
Expand Down
15 changes: 15 additions & 0 deletions lib/karafka/web/ui/views/status/show.erb
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,21 @@
)
%>

<%==
partial(
"status/#{@status.replication.to_s}",
locals: {
title: 'Replication factors',
description: partial(
'status/warnings/replication',
locals: {
details: @status.replication.details
}
)
}
)
%>

<%==
partial(
"status/#{@status.initial_consumers_state.to_s}",
Expand Down
19 changes: 19 additions & 0 deletions lib/karafka/web/ui/views/status/warnings/_replication.erb
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
<p>
It is recommended to have a replication factor greater than <code>1</code> for all the Karafka Web UI topics in a production environment.
</p>

<p>
Current replication factors for Karafka Web UI topics:
</p>

<ul>
<% details.each do |name, details| %>
<li>
<code><%= name %></code>: <code><%= details[:replication] %></code>
</li>
<% end %>
</ul>

<p>
Please ensure all those topics have a replication factor of at least <code>2</code>.
</p>
15 changes: 15 additions & 0 deletions spec/lib/karafka/web/ui/controllers/status_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -30,5 +30,20 @@
expect(body).to include(breadcrumbs)
end
end

context 'when replication factor is less than 2 in production' do
before do
allow(Karafka.env).to receive(:production?).and_return(true)
get 'status'
end

it do
expect(response).to be_ok
expect(body).to include(support_message)
expect(body).to include(breadcrumbs)
expect(body).to include('Please ensure all those topics have a replication')
expect(body).to include('Warning')
end
end
end
end
16 changes: 16 additions & 0 deletions spec/lib/karafka/web/ui/models/status_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -256,6 +256,22 @@
expect(result.partial_namespace).to eq('successes')
end
end

context 'when state is present but replication factor is 1 in prod' do
before do
all_topics
produce(states_topic, state)
# This will force a warning because in prod replication is expected to be > 1
allow(Karafka.env).to receive(:production).and_return(true)
end

it 'expect all to be ok because replication is a warning' do
expect(result.success?).to eq(true)
expect(result.to_s).to eq('success')
expect(result.details).to eq(nil)
expect(result.partial_namespace).to eq('successes')
end
end
end

describe '#initial_consumers_metrics' do
Expand Down

0 comments on commit acd2e9f

Please sign in to comment.