diff --git a/lib/dump_cleaner/cleanup/cleaning_steps/fill_up_with_string.rb b/lib/dump_cleaner/cleanup/cleaning_steps/fill_up_with_string.rb new file mode 100644 index 0000000..81011dd --- /dev/null +++ b/lib/dump_cleaner/cleanup/cleaning_steps/fill_up_with_string.rb @@ -0,0 +1,20 @@ +# frozen_string_literal: true + +module DumpCleaner + module Cleanup + module CleaningSteps + class FillUpWithString < Base + include BytesizeHelpers + + def run(string: "anonymized #{type}", padding: " ", strict_bytesize_check: false) + if strict_bytesize_check && string.bytesize != orig_value.bytesize + raise "The bytesize of the string must be equal to the bytesize of the original value." + end + + string = set_to_bytesize(string, bytesize: orig_value.bytesize, padding:) + AddRepetitionSuffix.new(StepContext.new_from(step_context, current_value: string)).run + end + end + end + end +end diff --git a/lib/dump_cleaner/cleanup/cleaning_steps/same_length_anonymized_string.rb b/lib/dump_cleaner/cleanup/cleaning_steps/same_length_anonymized_string.rb deleted file mode 100644 index 4a8b4ce..0000000 --- a/lib/dump_cleaner/cleanup/cleaning_steps/same_length_anonymized_string.rb +++ /dev/null @@ -1,14 +0,0 @@ -# frozen_string_literal: true - -module DumpCleaner - module Cleanup - module CleaningSteps - class SameLengthAnonymizedString < Base - def run - value = ("anonymized #{type} " * 100).slice(0...current_value.bytesize) - AddRepetitionSuffix.new(StepContext.new_from(step_context, current_value: value)).run - end - end - end - end -end diff --git a/lib/dump_cleaner/cleanup/cleaning_steps/static_string.rb b/lib/dump_cleaner/cleanup/cleaning_steps/static_string.rb deleted file mode 100644 index ecbecb5..0000000 --- a/lib/dump_cleaner/cleanup/cleaning_steps/static_string.rb +++ /dev/null @@ -1,14 +0,0 @@ -# frozen_string_literal: true - -module DumpCleaner - module Cleanup - module CleaningSteps - class StaticString < Base - def run(value:) - step_context.current_value = value - step_context - end - end - end - end -end diff --git a/spec/lib/dump_cleaner/cleanup/cleaning_steps/add_repetition_suffix_spec.rb b/spec/lib/dump_cleaner/cleanup/cleaning_steps/add_repetition_suffix_spec.rb index b57616e..8b5da71 100644 --- a/spec/lib/dump_cleaner/cleanup/cleaning_steps/add_repetition_suffix_spec.rb +++ b/spec/lib/dump_cleaner/cleanup/cleaning_steps/add_repetition_suffix_spec.rb @@ -10,9 +10,9 @@ def cleaner(step_context) end describe "#run" do - it "returns the step_context" do + it "returns a step_context" do step_context = step_context(orig_value: "abc") - expect(cleaner(step_context).run).to eq(step_context) + expect(cleaner(step_context).run).to be_a(DumpCleaner::Cleanup::StepContext) end it "returns the current_value number as a string" do diff --git a/spec/lib/dump_cleaner/cleanup/cleaning_steps/fill_up_with_string_spec.rb b/spec/lib/dump_cleaner/cleanup/cleaning_steps/fill_up_with_string_spec.rb new file mode 100644 index 0000000..39323c7 --- /dev/null +++ b/spec/lib/dump_cleaner/cleanup/cleaning_steps/fill_up_with_string_spec.rb @@ -0,0 +1,59 @@ +require "spec_helper" + +RSpec.describe DumpCleaner::Cleanup::CleaningSteps::FillUpWithString do + def step_context(orig_value:, record: { "id_value" => "123" }, type: "some_type", cleanup_data: [], repetition: 0) + DumpCleaner::Cleanup::StepContext.new(orig_value:, record:, type:, cleanup_data:, repetition:) + end + + def cleaner(step_context) + described_class.new(step_context) + end + + describe "#run" do + it "returns a step_context" do + step_context = step_context(orig_value: "abc") + expect(cleaner(step_context).run).to be_a(DumpCleaner::Cleanup::StepContext) + end + + context "with custom string provided" do + it "returns the string if bytesize equal" do + step_context = step_context(orig_value: "abcdef") + expect(cleaner(step_context).run(string: "uvwxyz").current_value).to eq("uvwxyz") + end + + it "returns the string truncated if too long" do + step_context = step_context(orig_value: "abc") + expect(cleaner(step_context).run(string: "efghij").current_value).to eq("efg") + end + + it "returns the string repeated if too short" do + step_context = step_context(orig_value: "abcdefg") + expect(cleaner(step_context).run(string: "xyz").current_value).to eq("xyz xyz") + end + end + + context "with default string" do + it "returns the 'anonymized type' string if bytesize equal" do + step_context = step_context(orig_value: "some equally long st") + expect(cleaner(step_context).run.current_value).to eq("anonymized some_type") + end + + it "returns the 'anonymized type' string truncated if too long" do + step_context = step_context(orig_value: "abc") + expect(cleaner(step_context).run.current_value).to eq("ano") + end + + it "returns the 'anonymized type' string repeated if too short" do + step_context = step_context(orig_value: "some very long string and still even longer") + expect(cleaner(step_context).run.current_value).to eq("anonymized some_type anonymized some_type a") + end + end + + it "raises error if byte sizes don't match and strict check requested" do + step_context = step_context(orig_value: "abcdefg") + expect do + cleaner(step_context).run(string: "xyz", strict_bytesize_check: true).current_value + end.to raise_error(/must be equal/) + end + end +end