diff --git a/src/scrubber.test.ts b/src/scrubber.test.ts index f0130f5..d10ba26 100644 --- a/src/scrubber.test.ts +++ b/src/scrubber.test.ts @@ -430,3 +430,53 @@ test('getScrubberSql', () => { expect(scrubber.getScrubberSql('pw')).toMatchInlineSnapshot(`"'notsecret'"`) expect(scrubber.getScrubberSql('name')).toMatchInlineSnapshot(`"'Jane Doe'"`) }) + +test('saltedHashSubstringScrubber should scrub substring values', () => { + const data = { + Data: [ + { id: '01' }, + { id: 'ab02cd' }, + { id: 'ab03cd', foo: '03' }, + { id: 'ab04cd', foo: 'ab04cd' }, + { id: '01\n02\n03' }, + ], + } + + const result = scrub(data, { + fields: { + 'id,foo': { + scrubber: 'saltedHashSubstringScrubber', + params: { + regex: ['\\d\\d'], + initializationVector: 'initializationVector', + }, + }, + }, + }) + + expect(result).toMatchInlineSnapshot(` +{ + "Data": [ + { + "id": "42bb960e91b4abf82bd6bdcc8e49cb405678ba5655a1cdc0210a4089cf2980f9", + }, + { + "id": "ab5365d6a9320a362fe52dbd54a20bc58eaa775d548e20dccf58d761882201381acd", + }, + { + "foo": "bb722ef61aa727e4a61aab72132badd39388204e9c6d8653c90a313a581bd622", + "id": "abbb722ef61aa727e4a61aab72132badd39388204e9c6d8653c90a313a581bd622cd", + }, + { + "foo": "ab67fe825923d446fa7cd7711e66345232ab15a4bdc1cc9590b975353be70ad616cd", + "id": "ab67fe825923d446fa7cd7711e66345232ab15a4bdc1cc9590b975353be70ad616cd", + }, + { + "id": "42bb960e91b4abf82bd6bdcc8e49cb405678ba5655a1cdc0210a4089cf2980f9 +5365d6a9320a362fe52dbd54a20bc58eaa775d548e20dccf58d761882201381a +bb722ef61aa727e4a61aab72132badd39388204e9c6d8653c90a313a581bd622", + }, + ], +} +`) +}) diff --git a/src/scrubbers.test.ts b/src/scrubbers.test.ts index ab21d0c..8395e4c 100644 --- a/src/scrubbers.test.ts +++ b/src/scrubbers.test.ts @@ -1,3 +1,4 @@ +import { _stringMapEntries } from '@naturalcycles/js-lib' import { nanoid } from '@naturalcycles/nodejs-lib' import { bcryptStringScrubber, @@ -7,6 +8,7 @@ import { charsFromRightScrubberSQL, isoDateStringScrubber, isoDateStringScrubberSQL, + saltedHashSubstringScrubber, preserveOriginalScrubber, preserveOriginalScrubberSQL, randomEmailInContentScrubber, @@ -25,6 +27,8 @@ import { undefinedScrubberSQL, unixTimestampScrubber, unixTimestampScrubberSQL, + defaultScrubbers, + defaultScrubbersSQL, } from './scrubbers' const bryptStr1 = '$2a$12$HYNzBb8XYOZZeRwZDiVux.orKNqkSVAoXBDc9Gw7nSxr8rcZupbRK' @@ -434,3 +438,76 @@ test('bcryptStringScrubberSQL', () => { bcryptStringScrubberSQL({ replacements: '$2a$10$:$2a$10$456,$2a$12$:$2a$12$123' }), ).toMatchSnapshot() }) + +describe('saltedHashSubstringScrubber', () => { + const initializationVector = nanoid() + + test('should scrub the matching substring with a hash', () => { + const result = saltedHashSubstringScrubber('foo|00:00:00:00:00:00|bar', { + regex: '00:00:00:00:00:00', + initializationVector, + }) + + expect(result).toMatch(/foo\|.{64}\|bar/) + expect(result).not.toContain('00:00:00:00:00:00') + }) + + test('should scrub the same value with the same hash', () => { + const result1 = saltedHashSubstringScrubber('foo|00:00:00:00:00:00|bar', { + regex: '00:00:00:00:00:00', + initializationVector, + }) + + const result2 = saltedHashSubstringScrubber('bee|00:00:00:00:00:00|boo', { + regex: '00:00:00:00:00:00', + initializationVector, + }) + + expect(result1?.substring(4, 64)).toBe(result2?.substring(4, 64)) + }) + + test('should scrub substring using regex', () => { + const result = saltedHashSubstringScrubber('foo|00:00:00:00:00:00|bar', { + regex: + '[0-9a-zA-Z]{2}:[0-9a-zA-Z]{2}:[0-9a-zA-Z]{2}:[0-9a-zA-Z]{2}:[0-9a-zA-Z]{2}:[0-9a-zA-Z]{2}', + initializationVector, + }) + + expect(result).toMatch(/foo\|.{64}\|bar/) + expect(result).not.toContain('00:00:00:00:00:00') + }) + + test('should scrub multiple occurrences', () => { + const result = saltedHashSubstringScrubber('foo|max|bar|max|boo', { + regex: 'max', + initializationVector, + }) + + expect(result).not.toContain('max') + }) + + test('should throw when the salt is missing', () => { + expect(() => saltedHashSubstringScrubber('foo|max|bar', { regex: 'max' } as any)).toThrow( + 'Initialization vector is missing', + ) + }) + + test('should throw when the regex or substring is missing', () => { + expect(() => + saltedHashSubstringScrubber('foo|max|bar', { + initializationVector, + } as any), + ).toThrow('Substring or regex is missing') + }) +}) + +const scrubberNames = _stringMapEntries(defaultScrubbers).map(([k]) => k) +test.each(scrubberNames)('the %s should have its SQL scrubber counterpart', scrubberName => { + console.log(scrubberName, defaultScrubbersSQL[scrubberName]) + expect(defaultScrubbersSQL[scrubberName]).toBeDefined() +}) + +const sqlScrubberNames = _stringMapEntries(defaultScrubbersSQL).map(([k]) => k) +test.each(sqlScrubberNames)('the %s should have its scrubber counterpart', scrubberName => { + expect(defaultScrubbers[scrubberName]).toBeDefined() +}) diff --git a/src/scrubbers.ts b/src/scrubbers.ts index 14c9d11..14aab9f 100644 --- a/src/scrubbers.ts +++ b/src/scrubbers.ts @@ -462,6 +462,42 @@ export const bcryptStringScrubberSQL: BcryptStringScrubberSQLFn = params => { END` } +export type SaltedHashSubstringScrubberFn = ScrubberFn< + string | undefined, + SaltedHashSubstringScrubberParams +> + +export type SaltedHashSubstringScrubberSQLFn = ScrubberSQLFn + +export interface SaltedHashSubstringScrubberParams { + initializationVector: string + regex: string +} + +export const saltedHashSubstringScrubber: SaltedHashSubstringScrubberFn = (value, params) => { + _assert(params?.initializationVector, 'Initialization vector is missing') + _assert(params?.regex, 'Substring or regex is missing') + + if (!value) return value + + const regex = new RegExp(params.regex, 'g') + + return value.replace(regex, substring => + crypto.createHash('sha256').update(substring).update(params.initializationVector).digest('hex'), + ) +} + +export const saltedHashSubstringScrubberSQL: SaltedHashSubstringScrubberSQLFn = params => { + _assert(params?.initializationVector, 'Initialization vector is missing') + _assert(params?.regex, 'Substring or regex is missing') + + const substringToReplace = `COALESCE(REGEXP_SUBSTR(${sqlValueToReplace}, '${params.regex}'), '')` + const hashedValue = `SHA2(${substringToReplace} || '${params.initializationVector}', 256)` + const replacedValue = `REGEXP_REPLACE(${sqlValueToReplace}, '${params.regex}', ${hashedValue})` + + return replacedValue +} + function nthChar(str: string, character: string, n: number): number | undefined { let count = 0 let i = 0 @@ -489,6 +525,7 @@ export const defaultScrubbers: ScrubbersMap = { saltedHashScrubber, saltedHashEmailScrubber, bcryptStringScrubber, + saltedHashSubstringScrubber, } export const defaultScrubbersSQL: ScrubbersSQLMap = { @@ -504,4 +541,5 @@ export const defaultScrubbersSQL: ScrubbersSQLMap = { saltedHashScrubber: saltedHashScrubberSQL, saltedHashEmailScrubber: saltedHashEmailScrubberSQL, bcryptStringScrubber: bcryptStringScrubberSQL, + saltedHashSubstringScrubber: saltedHashSubstringScrubberSQL, }