From acd2e9fe2fe376956a64f1490f84a8bc32e94c2c Mon Sep 17 00:00:00 2001 From: Maciej Mensfeld Date: Wed, 6 Sep 2023 11:49:24 +0200 Subject: [PATCH 1/2] warn on low replication --- CHANGELOG.md | 1 + lib/karafka/web/ui/models/status.rb | 40 +++++++++++++++---- lib/karafka/web/ui/views/status/show.erb | 15 +++++++ .../ui/views/status/warnings/_replication.erb | 19 +++++++++ .../karafka/web/ui/controllers/status_spec.rb | 15 +++++++ spec/lib/karafka/web/ui/models/status_spec.rb | 16 ++++++++ 6 files changed, 99 insertions(+), 7 deletions(-) create mode 100644 lib/karafka/web/ui/views/status/warnings/_replication.erb diff --git a/CHANGELOG.md b/CHANGELOG.md index 88f2c673..08d7c3ae 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -50,6 +50,7 @@ - [Improvement] Introduce config validation. - [Improvement] Provide flash messages support. - [Improvement] Use replication factor of two by default (if not overridden) for Web UI topics when there is more than one broker. +- [Improvement] Show a warning when replication factor of 1 is used for Web UI topics in production. - [Fix] Return 402 status instead of 500 on Pro features that are not available in OSS. - [Fix] Fix a case where errors would not be visible without Rails due to the `String#first` usage. - [Fix] Fix a case where live-poll would be disabled but would still update data. diff --git a/lib/karafka/web/ui/models/status.rb b/lib/karafka/web/ui/models/status.rb index 5578a163..d4c208d7 100644 --- a/lib/karafka/web/ui/models/status.rb +++ b/lib/karafka/web/ui/models/status.rb @@ -113,9 +113,30 @@ def partitions ) end + # @return [Status::Step] do we have correct replication for given env + def replication + if partitions.success? + status = :success + # low replication is not an error but just a warning and a potential problem + # in case of a crash, this is why we do not fail but warn only + status = :warning if topics_details.values.any? { |det| det[:replication] < 2 } + # Allow for non-production setups to use replication 1 as it is not that relevant + status = :success unless Karafka.env.production? + details = topics_details + else + status = :halted + details = {} + end + + Step.new( + status, + details + ) + end + # @return [Status::Step] Is the initial consumers state present in Kafka def initial_consumers_state - if partitions.success? + if replication.success? @current_state ||= Models::ConsumersState.current status = @current_state ? :success : :failure else @@ -234,11 +255,13 @@ def topics_errors # @return [Hash] hash with topics with which we work details (even if don't exist) def topics_details + base = { present: false, partitions: 0, replication: 1 } + topics = { - topics_consumers_states => { present: false, partitions: 0 }, - topics_consumers_reports => { present: false, partitions: 0 }, - topics_consumers_metrics => { present: false, partitions: 0 }, - topics_errors => { present: false, partitions: 0 } + topics_consumers_states => base.dup, + topics_consumers_reports => base.dup, + topics_consumers_metrics => base.dup, + topics_errors => base.dup } @cluster_info.topics.each do |topic| @@ -246,8 +269,11 @@ def topics_details next unless topics.key?(name) - topics[name][:present] = true - topics[name][:partitions] = topic[:partition_count] + topics[name].merge!( + present: true, + partitions: topic[:partition_count], + replication: topic[:partitions].map { |part| part[:replica_count] }.max + ) end topics diff --git a/lib/karafka/web/ui/views/status/show.erb b/lib/karafka/web/ui/views/status/show.erb index 0b4015e1..d9d47450 100644 --- a/lib/karafka/web/ui/views/status/show.erb +++ b/lib/karafka/web/ui/views/status/show.erb @@ -63,6 +63,21 @@ ) %> + <%== + partial( + "status/#{@status.replication.to_s}", + locals: { + title: 'Replication factors', + description: partial( + 'status/warnings/replication', + locals: { + details: @status.replication.details + } + ) + } + ) + %> + <%== partial( "status/#{@status.initial_consumers_state.to_s}", diff --git a/lib/karafka/web/ui/views/status/warnings/_replication.erb b/lib/karafka/web/ui/views/status/warnings/_replication.erb new file mode 100644 index 00000000..fa3d769f --- /dev/null +++ b/lib/karafka/web/ui/views/status/warnings/_replication.erb @@ -0,0 +1,19 @@ +

+ It is recommended to have a replication factor greater than 1 for all the Karafka Web UI topics in a production environment. +

+ +

+ Current replication factors for Karafka Web UI topics: +

+ + + +

+ Please ensure all those topics have a replication factor of at least 2. +

diff --git a/spec/lib/karafka/web/ui/controllers/status_spec.rb b/spec/lib/karafka/web/ui/controllers/status_spec.rb index fcff8cf2..e8936573 100644 --- a/spec/lib/karafka/web/ui/controllers/status_spec.rb +++ b/spec/lib/karafka/web/ui/controllers/status_spec.rb @@ -30,5 +30,20 @@ expect(body).to include(breadcrumbs) end end + + context 'when replication factor is less than 2 in production' do + before do + allow(Karafka.env).to receive(:production?).and_return(true) + get 'status' + end + + it do + expect(response).to be_ok + expect(body).to include(support_message) + expect(body).to include(breadcrumbs) + expect(body).to include('Please ensure all those topics have a replication') + expect(body).to include('Warning') + end + end end end diff --git a/spec/lib/karafka/web/ui/models/status_spec.rb b/spec/lib/karafka/web/ui/models/status_spec.rb index e464f99b..71735aaa 100644 --- a/spec/lib/karafka/web/ui/models/status_spec.rb +++ b/spec/lib/karafka/web/ui/models/status_spec.rb @@ -256,6 +256,22 @@ expect(result.partial_namespace).to eq('successes') end end + + context 'when state is present but replication factor is 1 in prod' do + before do + all_topics + produce(states_topic, state) + # This will force a warning because in prod replication is expected to be > 1 + allow(Karafka.env).to receive(:production).and_return(true) + end + + it 'expect all to be ok because replication is a warning' do + expect(result.success?).to eq(true) + expect(result.to_s).to eq('success') + expect(result.details).to eq(nil) + expect(result.partial_namespace).to eq('successes') + end + end end describe '#initial_consumers_metrics' do From 72165d5cdf584e5e32ca9b9e1c0340cdc18fe272 Mon Sep 17 00:00:00 2001 From: Maciej Mensfeld Date: Wed, 6 Sep 2023 11:50:05 +0200 Subject: [PATCH 2/2] more specs --- spec/lib/karafka/web/ui/controllers/status_spec.rb | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/spec/lib/karafka/web/ui/controllers/status_spec.rb b/spec/lib/karafka/web/ui/controllers/status_spec.rb index e8936573..bbb8288d 100644 --- a/spec/lib/karafka/web/ui/controllers/status_spec.rb +++ b/spec/lib/karafka/web/ui/controllers/status_spec.rb @@ -45,5 +45,16 @@ expect(body).to include('Warning') end end + + context 'when replication factor is less than 2 in non-production' do + before { get 'status' } + + it do + expect(response).to be_ok + expect(body).to include(support_message) + expect(body).to include(breadcrumbs) + expect(body).not_to include('Please ensure all those topics have a replication') + end + end end end