From 13c8701f2f0afc6dbad3f340eab2be67b718d8cd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Matou=C5=A1=20Bor=C3=A1k?= Date: Thu, 6 Jun 2024 08:35:10 +0200 Subject: [PATCH] Validate cleaner params --- .../cleanup/cleaning_steps/randomize_email.rb | 18 ++++++++++++++---- .../cleaning_steps/randomize_email_spec.rb | 13 +++++++++++++ 2 files changed, 27 insertions(+), 4 deletions(-) diff --git a/lib/dump_cleaner/cleanup/cleaning_steps/randomize_email.rb b/lib/dump_cleaner/cleanup/cleaning_steps/randomize_email.rb index e29785e..9da456a 100644 --- a/lib/dump_cleaner/cleanup/cleaning_steps/randomize_email.rb +++ b/lib/dump_cleaner/cleanup/cleaning_steps/randomize_email.rb @@ -4,7 +4,9 @@ module DumpCleaner module Cleanup module CleaningSteps class RandomizeEmail < Base - def run(domains_to_keep_key: "domains_to_keep", words_key: "words") + def run(domains_to_keep_data_key: "domains_to_keep", words_data_key: "words") + validate_params(domains_to_keep_data_key:, words_data_key:) + mailbox, domain = current_value.split("@", 2) if !mailbox || !domain || mailbox.empty? || domain.empty? || !domain.include?(".") @@ -13,9 +15,9 @@ def run(domains_to_keep_key: "domains_to_keep", words_key: "words") return step_context end - new_mailbox = new_mailbox(mailbox, words: cleanup_data[words_key]) - new_domain = new_domain(domain, domains: cleanup_data[domains_to_keep_key], - words: cleanup_data[words_key]) + new_mailbox = new_mailbox(mailbox, words: cleanup_data[words_data_key]) + new_domain = new_domain(domain, domains: cleanup_data[domains_to_keep_data_key], + words: cleanup_data[words_data_key]) step_context.current_value = "#{new_mailbox}@#{new_domain}" step_context @@ -51,6 +53,14 @@ def random_word_instead_of(word) GenerateRandomString.new(StepContext.new_from(step_context, current_value: word)) .run(character_set: :lowercase).current_value end + + def validate_params(domains_to_keep_data_key:, words_data_key:) + unless cleanup_data.respond_to?(:key) && + cleanup_data.key?(domains_to_keep_data_key) && cleanup_data.key?(words_data_key) + raise_params_error("The cleanup_data does not contain the dictionary keys + \"#{domains_to_keep_data_key}\" and \"#{words_data_key})\"".gsub(/\s+/, " ")) + end + end end end end diff --git a/spec/lib/dump_cleaner/cleanup/cleaning_steps/randomize_email_spec.rb b/spec/lib/dump_cleaner/cleanup/cleaning_steps/randomize_email_spec.rb index ea4f0ab..08e3468 100644 --- a/spec/lib/dump_cleaner/cleanup/cleaning_steps/randomize_email_spec.rb +++ b/spec/lib/dump_cleaner/cleanup/cleaning_steps/randomize_email_spec.rb @@ -52,5 +52,18 @@ def cleaner(step_context) expect(cleaner(step_context(orig_value: "foo.bar@baz.cz")).run.current_value) .to eq("hvg.sgb@kgi.cz") end + + it "allows specifying custom dictionary data keys" do + step_context = step_context(orig_value: "someone.dustful@gmail.com", + cleanup_data: { "domains" => %w[gmail.com], + "dictionary" => { "7-7" => %w[willful foobars] } }) + expect(cleaner(step_context).run(domains_to_keep_data_key: "domains", words_data_key: "dictionary").current_value) + .to eq("willful.foobars@gmail.com") + end + + it "raises error if custom dictionary key not found in data" do + step_context = step_context(orig_value: "someone.dustful@gmail.com", cleanup_data: {}) + expect { cleaner(step_context).run }.to raise_error(ArgumentError, /does not contain the dictionary keys/) + end end end