From 60ccea8fe0be4bd09f506cc7ecdc6efea99090ab Mon Sep 17 00:00:00 2001 From: David Nagy Date: Thu, 4 Jul 2024 15:17:10 +0200 Subject: [PATCH 1/4] feat: scrub mac address from objects --- src/scrubber.test.ts | 29 ++++++++++++++++++ src/scrubbers.test.ts | 68 +++++++++++++++++++++++++++++++++++++++++++ src/scrubbers.ts | 48 +++++++++++++++++++++++++++++- 3 files changed, 144 insertions(+), 1 deletion(-) diff --git a/src/scrubber.test.ts b/src/scrubber.test.ts index f0130f5..3f15b72 100644 --- a/src/scrubber.test.ts +++ b/src/scrubber.test.ts @@ -430,3 +430,32 @@ test('getScrubberSql', () => { expect(scrubber.getScrubberSql('pw')).toMatchInlineSnapshot(`"'notsecret'"`) expect(scrubber.getScrubberSql('name')).toMatchInlineSnapshot(`"'Jane Doe'"`) }) + +test('macAndIdScrubber should scrub a list of objects', () => { + const data = { + HardwareDevices: [ + { id: '123|mac', mac: 'mac', foo: 'bar' }, + { id: '123|cheese', mac: 'cheese', foo: 'bar' }, + { id: 'tom', mac: 'tom', foo: 'bar' }, + { id: 'mac|123|mac', mac: 'mac', foo: 'bar' }, + ], + } + + const result = scrub(data, { + fields: { + HardwareDevices: { + scrubber: 'macAndIdScrubber', + params: { otherFieldsToScrub: ['id'] }, + }, + }, + }) + + expect(result).toEqual({ + HardwareDevices: [ + { id: '123|1', mac: '1', foo: 'bar' }, + { id: '123|2', mac: '2', foo: 'bar' }, + { id: '3', mac: '3', foo: 'bar' }, + { id: '4|123|4', mac: '4', foo: 'bar' }, + ], + }) +}) diff --git a/src/scrubbers.test.ts b/src/scrubbers.test.ts index ab21d0c..bda4210 100644 --- a/src/scrubbers.test.ts +++ b/src/scrubbers.test.ts @@ -7,6 +7,7 @@ import { charsFromRightScrubberSQL, isoDateStringScrubber, isoDateStringScrubberSQL, + macAndIdScrubber, preserveOriginalScrubber, preserveOriginalScrubberSQL, randomEmailInContentScrubber, @@ -434,3 +435,70 @@ test('bcryptStringScrubberSQL', () => { bcryptStringScrubberSQL({ replacements: '$2a$10$:$2a$10$456,$2a$12$:$2a$12$123' }), ).toMatchSnapshot() }) + +describe('macAndIdScrubber', () => { + test('should scrub the entire `mac` field with a counter', () => { + const data = [{ mac: '00:00:00:00:00:00' }, { mac: '00:00:00:00:00:01' }, { mac: 'foo' }] + + const result = macAndIdScrubber(data) + + expect(result).toEqual([{ mac: '1' }, { mac: '2' }, { mac: '3' }]) + }) + + test('should be possible to change the name of property with the MAC address', () => { + const data = [ + { foo: '00:00:00:00:00:00' }, + { foo: '00:00:00:00:00:01' }, + { mac: '00:00:00:00:00:01' }, + ] + + const result = macAndIdScrubber(data, { fieldNameOfMacAddress: 'foo' }) + + expect(result).toEqual([{ foo: '1' }, { foo: '2' }, { mac: '00:00:00:00:00:01' }]) + }) + + test('should scrub the mac address from other fields with the same value', () => { + const data = [ + { mac: '00:00:00:00:00:00', id: 'foo|00:00:00:00:00:00|bar' }, + { mac: '00:00:00:00:00:01' }, + ] + + const result = macAndIdScrubber(data, { otherFieldsToScrub: ['id'] }) + + expect(result).toEqual([{ mac: '1', id: 'foo|1|bar' }, { mac: '2' }]) + }) + + test('should not scrub the mac address from other fields when not instructed', () => { + const data = [{ mac: '00:00:00:00:00:00', id: 'foo|00:00:00:00:00:00|bar' }] + + const result = macAndIdScrubber(data) + + expect(result).toEqual([{ mac: '1', id: 'foo|00:00:00:00:00:00|bar' }]) + }) + + test('should not scrub when the field is missing', () => { + const data = [{ foo: '00:00:00:00:00:00' }, { foo: '00:00:00:00:00:01' }, { foo: 'foo' }] + + expect(macAndIdScrubber(data)).toEqual([ + { foo: '00:00:00:00:00:00' }, + { foo: '00:00:00:00:00:01' }, + { foo: 'foo' }, + ]) + + expect(macAndIdScrubber(data, { fieldNameOfMacAddress: 'mac' })).toEqual([ + { foo: '00:00:00:00:00:00' }, + { foo: '00:00:00:00:00:01' }, + { foo: 'foo' }, + ]) + + expect( + macAndIdScrubber(data, { fieldNameOfMacAddress: 'mac', otherFieldsToScrub: ['id'] }), + ).toEqual([{ foo: '00:00:00:00:00:00' }, { foo: '00:00:00:00:00:01' }, { foo: 'foo' }]) + }) + + test('should not scrub when the data is not a list', () => { + const data = { mac: '00:00:00:00:00:00' } + + expect(macAndIdScrubber(data as any)).toEqual({ mac: '00:00:00:00:00:00' }) + }) +}) diff --git a/src/scrubbers.ts b/src/scrubbers.ts index 14c9d11..83161a9 100644 --- a/src/scrubbers.ts +++ b/src/scrubbers.ts @@ -1,5 +1,5 @@ import * as crypto from 'node:crypto' -import { _assert } from '@naturalcycles/js-lib' +import { _assert, AnyObject } from '@naturalcycles/js-lib' import { nanoIdCustomAlphabet } from '@naturalcycles/nodejs-lib' import { ScrubberFn, ScrubbersMap, ScrubberSQLFn, ScrubbersSQLMap } from './scrubber.model' @@ -462,6 +462,51 @@ export const bcryptStringScrubberSQL: BcryptStringScrubberSQLFn = params => { END` } +export type MacAndIdScrubberFn = ScrubberFn + +export interface MacAndIdScrubberParams { + /** + * Defaults to `mac` + */ + fieldNameOfMacAddress?: string + + /** + * Defaults to `[]` + */ + otherFieldsToScrub?: string[] +} + +export const macAndIdScrubber: MacAndIdScrubberFn = (hardwareDevices, opts) => { + if (!hardwareDevices) return + if (!Array.isArray(hardwareDevices)) return hardwareDevices + + const fieldNameOfMacAddress = opts?.fieldNameOfMacAddress || 'mac' + const otherFieldsToScrub = opts?.otherFieldsToScrub || [] + + let counter = 0 + + return hardwareDevices.map(hardwareDevice => { + const mac = hardwareDevice[fieldNameOfMacAddress] + if (typeof mac !== 'string') return hardwareDevice + + counter += 1 + const replacement = String(counter) + + const newHardwareDevice: AnyObject = { + ...hardwareDevice, + [fieldNameOfMacAddress]: replacement, + } + + otherFieldsToScrub.forEach(key => { + const value = newHardwareDevice[key] + if (typeof value !== 'string') return + newHardwareDevice[key] = value.replaceAll(mac, replacement) + }) + + return newHardwareDevice + }) +} + function nthChar(str: string, character: string, n: number): number | undefined { let count = 0 let i = 0 @@ -489,6 +534,7 @@ export const defaultScrubbers: ScrubbersMap = { saltedHashScrubber, saltedHashEmailScrubber, bcryptStringScrubber, + macAndIdScrubber, } export const defaultScrubbersSQL: ScrubbersSQLMap = { From b385779ce5ade183fc9c1fcd5b464298aff5b0ad Mon Sep 17 00:00:00 2001 From: David Nagy Date: Fri, 5 Jul 2024 11:04:39 +0200 Subject: [PATCH 2/4] chore: define the SQL scrubber --- src/scrubbers.test.ts | 14 ++++++++++++++ src/scrubbers.ts | 1 + 2 files changed, 15 insertions(+) diff --git a/src/scrubbers.test.ts b/src/scrubbers.test.ts index bda4210..68e6abd 100644 --- a/src/scrubbers.test.ts +++ b/src/scrubbers.test.ts @@ -1,3 +1,4 @@ +import { _stringMapEntries } from '@naturalcycles/js-lib' import { nanoid } from '@naturalcycles/nodejs-lib' import { bcryptStringScrubber, @@ -26,6 +27,8 @@ import { undefinedScrubberSQL, unixTimestampScrubber, unixTimestampScrubberSQL, + defaultScrubbers, + defaultScrubbersSQL, } from './scrubbers' const bryptStr1 = '$2a$12$HYNzBb8XYOZZeRwZDiVux.orKNqkSVAoXBDc9Gw7nSxr8rcZupbRK' @@ -502,3 +505,14 @@ describe('macAndIdScrubber', () => { expect(macAndIdScrubber(data as any)).toEqual({ mac: '00:00:00:00:00:00' }) }) }) + +const scrubberNames = _stringMapEntries(defaultScrubbers).map(([k]) => k) +test.each(scrubberNames)('the %s should have its SQL scrubber counterpart', scrubberName => { + console.log(scrubberName, defaultScrubbersSQL[scrubberName]) + expect(defaultScrubbersSQL[scrubberName]).toBeDefined() +}) + +const sqlScrubberNames = _stringMapEntries(defaultScrubbersSQL).map(([k]) => k) +test.each(sqlScrubberNames)('the %s should have its scrubber counterpart', scrubberName => { + expect(defaultScrubbers[scrubberName]).toBeDefined() +}) diff --git a/src/scrubbers.ts b/src/scrubbers.ts index 83161a9..09f8492 100644 --- a/src/scrubbers.ts +++ b/src/scrubbers.ts @@ -550,4 +550,5 @@ export const defaultScrubbersSQL: ScrubbersSQLMap = { saltedHashScrubber: saltedHashScrubberSQL, saltedHashEmailScrubber: saltedHashEmailScrubberSQL, bcryptStringScrubber: bcryptStringScrubberSQL, + macAndIdScrubber: undefinedScrubberSQL, } From dc974ed5cb60b241a7f73fa2a23311432f0d75bf Mon Sep 17 00:00:00 2001 From: David Nagy Date: Thu, 18 Jul 2024 12:15:13 +0200 Subject: [PATCH 3/4] chore: make scrubber generic --- src/scrubber.test.ts | 55 +++++++++++++++++-------- src/scrubbers.test.ts | 93 ++++++++++++++++++++----------------------- src/scrubbers.ts | 63 +++++++++++++---------------- 3 files changed, 109 insertions(+), 102 deletions(-) diff --git a/src/scrubber.test.ts b/src/scrubber.test.ts index 3f15b72..d10ba26 100644 --- a/src/scrubber.test.ts +++ b/src/scrubber.test.ts @@ -431,31 +431,52 @@ test('getScrubberSql', () => { expect(scrubber.getScrubberSql('name')).toMatchInlineSnapshot(`"'Jane Doe'"`) }) -test('macAndIdScrubber should scrub a list of objects', () => { +test('saltedHashSubstringScrubber should scrub substring values', () => { const data = { - HardwareDevices: [ - { id: '123|mac', mac: 'mac', foo: 'bar' }, - { id: '123|cheese', mac: 'cheese', foo: 'bar' }, - { id: 'tom', mac: 'tom', foo: 'bar' }, - { id: 'mac|123|mac', mac: 'mac', foo: 'bar' }, + Data: [ + { id: '01' }, + { id: 'ab02cd' }, + { id: 'ab03cd', foo: '03' }, + { id: 'ab04cd', foo: 'ab04cd' }, + { id: '01\n02\n03' }, ], } const result = scrub(data, { fields: { - HardwareDevices: { - scrubber: 'macAndIdScrubber', - params: { otherFieldsToScrub: ['id'] }, + 'id,foo': { + scrubber: 'saltedHashSubstringScrubber', + params: { + regex: ['\\d\\d'], + initializationVector: 'initializationVector', + }, }, }, }) - expect(result).toEqual({ - HardwareDevices: [ - { id: '123|1', mac: '1', foo: 'bar' }, - { id: '123|2', mac: '2', foo: 'bar' }, - { id: '3', mac: '3', foo: 'bar' }, - { id: '4|123|4', mac: '4', foo: 'bar' }, - ], - }) + expect(result).toMatchInlineSnapshot(` +{ + "Data": [ + { + "id": "42bb960e91b4abf82bd6bdcc8e49cb405678ba5655a1cdc0210a4089cf2980f9", + }, + { + "id": "ab5365d6a9320a362fe52dbd54a20bc58eaa775d548e20dccf58d761882201381acd", + }, + { + "foo": "bb722ef61aa727e4a61aab72132badd39388204e9c6d8653c90a313a581bd622", + "id": "abbb722ef61aa727e4a61aab72132badd39388204e9c6d8653c90a313a581bd622cd", + }, + { + "foo": "ab67fe825923d446fa7cd7711e66345232ab15a4bdc1cc9590b975353be70ad616cd", + "id": "ab67fe825923d446fa7cd7711e66345232ab15a4bdc1cc9590b975353be70ad616cd", + }, + { + "id": "42bb960e91b4abf82bd6bdcc8e49cb405678ba5655a1cdc0210a4089cf2980f9 +5365d6a9320a362fe52dbd54a20bc58eaa775d548e20dccf58d761882201381a +bb722ef61aa727e4a61aab72132badd39388204e9c6d8653c90a313a581bd622", + }, + ], +} +`) }) diff --git a/src/scrubbers.test.ts b/src/scrubbers.test.ts index 68e6abd..8395e4c 100644 --- a/src/scrubbers.test.ts +++ b/src/scrubbers.test.ts @@ -8,7 +8,7 @@ import { charsFromRightScrubberSQL, isoDateStringScrubber, isoDateStringScrubberSQL, - macAndIdScrubber, + saltedHashSubstringScrubber, preserveOriginalScrubber, preserveOriginalScrubberSQL, randomEmailInContentScrubber, @@ -439,70 +439,65 @@ test('bcryptStringScrubberSQL', () => { ).toMatchSnapshot() }) -describe('macAndIdScrubber', () => { - test('should scrub the entire `mac` field with a counter', () => { - const data = [{ mac: '00:00:00:00:00:00' }, { mac: '00:00:00:00:00:01' }, { mac: 'foo' }] +describe('saltedHashSubstringScrubber', () => { + const initializationVector = nanoid() - const result = macAndIdScrubber(data) + test('should scrub the matching substring with a hash', () => { + const result = saltedHashSubstringScrubber('foo|00:00:00:00:00:00|bar', { + regex: '00:00:00:00:00:00', + initializationVector, + }) - expect(result).toEqual([{ mac: '1' }, { mac: '2' }, { mac: '3' }]) + expect(result).toMatch(/foo\|.{64}\|bar/) + expect(result).not.toContain('00:00:00:00:00:00') }) - test('should be possible to change the name of property with the MAC address', () => { - const data = [ - { foo: '00:00:00:00:00:00' }, - { foo: '00:00:00:00:00:01' }, - { mac: '00:00:00:00:00:01' }, - ] + test('should scrub the same value with the same hash', () => { + const result1 = saltedHashSubstringScrubber('foo|00:00:00:00:00:00|bar', { + regex: '00:00:00:00:00:00', + initializationVector, + }) - const result = macAndIdScrubber(data, { fieldNameOfMacAddress: 'foo' }) + const result2 = saltedHashSubstringScrubber('bee|00:00:00:00:00:00|boo', { + regex: '00:00:00:00:00:00', + initializationVector, + }) - expect(result).toEqual([{ foo: '1' }, { foo: '2' }, { mac: '00:00:00:00:00:01' }]) + expect(result1?.substring(4, 64)).toBe(result2?.substring(4, 64)) }) - test('should scrub the mac address from other fields with the same value', () => { - const data = [ - { mac: '00:00:00:00:00:00', id: 'foo|00:00:00:00:00:00|bar' }, - { mac: '00:00:00:00:00:01' }, - ] - - const result = macAndIdScrubber(data, { otherFieldsToScrub: ['id'] }) + test('should scrub substring using regex', () => { + const result = saltedHashSubstringScrubber('foo|00:00:00:00:00:00|bar', { + regex: + '[0-9a-zA-Z]{2}:[0-9a-zA-Z]{2}:[0-9a-zA-Z]{2}:[0-9a-zA-Z]{2}:[0-9a-zA-Z]{2}:[0-9a-zA-Z]{2}', + initializationVector, + }) - expect(result).toEqual([{ mac: '1', id: 'foo|1|bar' }, { mac: '2' }]) + expect(result).toMatch(/foo\|.{64}\|bar/) + expect(result).not.toContain('00:00:00:00:00:00') }) - test('should not scrub the mac address from other fields when not instructed', () => { - const data = [{ mac: '00:00:00:00:00:00', id: 'foo|00:00:00:00:00:00|bar' }] - - const result = macAndIdScrubber(data) + test('should scrub multiple occurrences', () => { + const result = saltedHashSubstringScrubber('foo|max|bar|max|boo', { + regex: 'max', + initializationVector, + }) - expect(result).toEqual([{ mac: '1', id: 'foo|00:00:00:00:00:00|bar' }]) + expect(result).not.toContain('max') }) - test('should not scrub when the field is missing', () => { - const data = [{ foo: '00:00:00:00:00:00' }, { foo: '00:00:00:00:00:01' }, { foo: 'foo' }] - - expect(macAndIdScrubber(data)).toEqual([ - { foo: '00:00:00:00:00:00' }, - { foo: '00:00:00:00:00:01' }, - { foo: 'foo' }, - ]) - - expect(macAndIdScrubber(data, { fieldNameOfMacAddress: 'mac' })).toEqual([ - { foo: '00:00:00:00:00:00' }, - { foo: '00:00:00:00:00:01' }, - { foo: 'foo' }, - ]) - - expect( - macAndIdScrubber(data, { fieldNameOfMacAddress: 'mac', otherFieldsToScrub: ['id'] }), - ).toEqual([{ foo: '00:00:00:00:00:00' }, { foo: '00:00:00:00:00:01' }, { foo: 'foo' }]) + test('should throw when the salt is missing', () => { + expect(() => saltedHashSubstringScrubber('foo|max|bar', { regex: 'max' } as any)).toThrow( + 'Initialization vector is missing', + ) }) - test('should not scrub when the data is not a list', () => { - const data = { mac: '00:00:00:00:00:00' } - - expect(macAndIdScrubber(data as any)).toEqual({ mac: '00:00:00:00:00:00' }) + test('should throw when the regex or substring is missing', () => { + expect(() => + saltedHashSubstringScrubber('foo|max|bar', { + initializationVector, + } as any), + ).toThrow('Substring or regex is missing') }) }) diff --git a/src/scrubbers.ts b/src/scrubbers.ts index 09f8492..51bd5d0 100644 --- a/src/scrubbers.ts +++ b/src/scrubbers.ts @@ -1,5 +1,5 @@ import * as crypto from 'node:crypto' -import { _assert, AnyObject } from '@naturalcycles/js-lib' +import { _assert } from '@naturalcycles/js-lib' import { nanoIdCustomAlphabet } from '@naturalcycles/nodejs-lib' import { ScrubberFn, ScrubbersMap, ScrubberSQLFn, ScrubbersSQLMap } from './scrubber.model' @@ -462,49 +462,40 @@ export const bcryptStringScrubberSQL: BcryptStringScrubberSQLFn = params => { END` } -export type MacAndIdScrubberFn = ScrubberFn +export type SaltedHashSubstringScrubberFn = ScrubberFn< + string | undefined, + SaltedHashSubstringScrubberParams +> -export interface MacAndIdScrubberParams { - /** - * Defaults to `mac` - */ - fieldNameOfMacAddress?: string +export type SaltedHashSubstringScrubberSQLFn = ScrubberSQLFn - /** - * Defaults to `[]` - */ - otherFieldsToScrub?: string[] +export interface SaltedHashSubstringScrubberParams { + initializationVector: string + regex: string } -export const macAndIdScrubber: MacAndIdScrubberFn = (hardwareDevices, opts) => { - if (!hardwareDevices) return - if (!Array.isArray(hardwareDevices)) return hardwareDevices - - const fieldNameOfMacAddress = opts?.fieldNameOfMacAddress || 'mac' - const otherFieldsToScrub = opts?.otherFieldsToScrub || [] +export const saltedHashSubstringScrubber: SaltedHashSubstringScrubberFn = (value, params) => { + _assert(params?.initializationVector, 'Initialization vector is missing') + _assert(params?.regex, 'Substring or regex is missing') - let counter = 0 + if (!value) return value - return hardwareDevices.map(hardwareDevice => { - const mac = hardwareDevice[fieldNameOfMacAddress] - if (typeof mac !== 'string') return hardwareDevice + const regex = new RegExp(params.regex, 'g') - counter += 1 - const replacement = String(counter) + return value.replace(regex, substring => + crypto.createHash('sha256').update(substring).update(params.initializationVector).digest('hex'), + ) +} - const newHardwareDevice: AnyObject = { - ...hardwareDevice, - [fieldNameOfMacAddress]: replacement, - } +export const saltedHashSubstringScrubberSQL: SaltedHashSubstringScrubberSQLFn = params => { + _assert(params?.initializationVector, 'Initialization vector is missing') + _assert(params?.regex, 'Substring or regex is missing') - otherFieldsToScrub.forEach(key => { - const value = newHardwareDevice[key] - if (typeof value !== 'string') return - newHardwareDevice[key] = value.replaceAll(mac, replacement) - }) + const substringToReplace = `REGEXP_SUBSTR(${sqlValueToReplace}, ${params.regex})` + const hashedValue = `SHA2(${substringToReplace} || '${params.initializationVector}', 256)` + const replacedValue = `REGEXP_REPLACE(${sqlValueToReplace}, ${params.regex}, ${hashedValue})` - return newHardwareDevice - }) + return replacedValue } function nthChar(str: string, character: string, n: number): number | undefined { @@ -534,7 +525,7 @@ export const defaultScrubbers: ScrubbersMap = { saltedHashScrubber, saltedHashEmailScrubber, bcryptStringScrubber, - macAndIdScrubber, + saltedHashSubstringScrubber, } export const defaultScrubbersSQL: ScrubbersSQLMap = { @@ -550,5 +541,5 @@ export const defaultScrubbersSQL: ScrubbersSQLMap = { saltedHashScrubber: saltedHashScrubberSQL, saltedHashEmailScrubber: saltedHashEmailScrubberSQL, bcryptStringScrubber: bcryptStringScrubberSQL, - macAndIdScrubber: undefinedScrubberSQL, + saltedHashSubstringScrubber: saltedHashSubstringScrubberSQL, } From 11957c3eae25be3a73e82fb1f7df7775ccb758c5 Mon Sep 17 00:00:00 2001 From: David Nagy Date: Thu, 18 Jul 2024 14:09:34 +0200 Subject: [PATCH 4/4] chore: fix some sql scrubber thingies --- src/scrubbers.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/scrubbers.ts b/src/scrubbers.ts index 51bd5d0..14aab9f 100644 --- a/src/scrubbers.ts +++ b/src/scrubbers.ts @@ -491,9 +491,9 @@ export const saltedHashSubstringScrubberSQL: SaltedHashSubstringScrubberSQLFn = _assert(params?.initializationVector, 'Initialization vector is missing') _assert(params?.regex, 'Substring or regex is missing') - const substringToReplace = `REGEXP_SUBSTR(${sqlValueToReplace}, ${params.regex})` + const substringToReplace = `COALESCE(REGEXP_SUBSTR(${sqlValueToReplace}, '${params.regex}'), '')` const hashedValue = `SHA2(${substringToReplace} || '${params.initializationVector}', 256)` - const replacedValue = `REGEXP_REPLACE(${sqlValueToReplace}, ${params.regex}, ${hashedValue})` + const replacedValue = `REGEXP_REPLACE(${sqlValueToReplace}, '${params.regex}', ${hashedValue})` return replacedValue }