Skip to content

Commit

Permalink
fix: allow users context traits and underscore divide numbers configu…
Browse files Browse the repository at this point in the history
…ration (#3752)
  • Loading branch information
achettyiitr authored Sep 24, 2024
1 parent b2c1a18 commit 386d2ab
Show file tree
Hide file tree
Showing 25 changed files with 1,697 additions and 161 deletions.
34 changes: 24 additions & 10 deletions src/warehouse/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -643,6 +643,18 @@ function processWarehouseMessage(message, options) {
const skipReservedKeywordsEscaping =
options.integrationOptions.skipReservedKeywordsEscaping || false;

// underscoreDivideNumbers when set to false, if a column has a format like "_v_3_", it will be formatted to "_v3_"
// underscoreDivideNumbers when set to true, if a column has a format like "_v_3_", we keep it like that
// For older destinations, it will come as true and for new destinations this config will not be present which means we will treat it as false.
options.underscoreDivideNumbers = options.destConfig?.underscoreDivideNumbers || false;

// allowUsersContextTraits when set to true, if context.traits.* is present, it will be added as context_traits_* and *,
// e.g., for context.traits.name, context_traits_name and name will be added to the user's table.
// allowUsersContextTraits when set to false, if context.traits.* is present, it will be added only as context_traits_*
// e.g., for context.traits.name, only context_traits_name will be added to the user's table.
// For older destinations, it will come as true, and for new destinations this config will not be present, which means we will treat it as false.
const allowUsersContextTraits = options.destConfig?.allowUsersContextTraits || false;

addJsonKeysToOptions(options);

if (isBlank(message.messageId)) {
Expand Down Expand Up @@ -898,16 +910,18 @@ function processWarehouseMessage(message, options) {
`${eventType + '_userProperties_'}`,
2,
);
setDataFromInputAndComputeColumnTypes(
utils,
eventType,
commonProps,
message.context ? message.context.traits : {},
commonColumnTypes,
options,
`${eventType + '_context_traits_'}`,
3,
);
if (allowUsersContextTraits) {
setDataFromInputAndComputeColumnTypes(
utils,
eventType,
commonProps,
message.context ? message.context.traits : {},
commonColumnTypes,
options,
`${eventType + '_context_traits_'}`,
3,
);
}
setDataFromInputAndComputeColumnTypes(
utils,
eventType,
Expand Down
37 changes: 37 additions & 0 deletions src/warehouse/snakecase/snakecase.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
const { toString } = require('lodash');
const { unicodeWords, unicodeWordsWithNumbers } = require('./unicodeWords');

const hasUnicodeWord = RegExp.prototype.test.bind(
/[a-z][A-Z]|[A-Z]{2}[a-z]|[0-9][a-zA-Z]|[a-zA-Z][0-9]|[^a-zA-Z0-9 ]/,
);

/** Used to match words composed of alphanumeric characters. */
// eslint-disable-next-line no-control-regex
const reAsciiWord = /[^\x00-\x2f\x3a-\x40\x5b-\x60\x7b-\x7f]+/g;

function asciiWords(string) {
return string.match(reAsciiWord);
}

function words(string) {
const result = hasUnicodeWord(string) ? unicodeWords(string) : asciiWords(string);
return result || [];
}

function wordsWithNumbers(string) {
const result = hasUnicodeWord(string) ? unicodeWordsWithNumbers(string) : asciiWords(string);
return result || [];
}

const snakeCase = (string) =>
words(toString(string).replace(/['\u2019]/g, '')).reduce(
(result, word, index) => result + (index ? '_' : '') + word.toLowerCase(),
'',
);
const snakeCaseWithNumbers = (string) =>
wordsWithNumbers(toString(string).replace(/['\u2019]/g, '')).reduce(
(result, word, index) => result + (index ? '_' : '') + word.toLowerCase(),
'',
);

module.exports = { words, wordsWithNumbers, snakeCase, snakeCaseWithNumbers };
94 changes: 94 additions & 0 deletions src/warehouse/snakecase/unicodeWords.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
/** Used to compose unicode character classes. */
const rsAstralRange = '\\ud800-\\udfff';
const rsComboMarksRange = '\\u0300-\\u036f';
const reComboHalfMarksRange = '\\ufe20-\\ufe2f';
const rsComboSymbolsRange = '\\u20d0-\\u20ff';
const rsComboMarksExtendedRange = '\\u1ab0-\\u1aff';
const rsComboMarksSupplementRange = '\\u1dc0-\\u1dff';
const rsComboRange =
rsComboMarksRange +
reComboHalfMarksRange +
rsComboSymbolsRange +
rsComboMarksExtendedRange +
rsComboMarksSupplementRange;
const rsDingbatRange = '\\u2700-\\u27bf';
const rsLowerRange = 'a-z\\xdf-\\xf6\\xf8-\\xff';
const rsMathOpRange = '\\xac\\xb1\\xd7\\xf7';
const rsNonCharRange = '\\x00-\\x2f\\x3a-\\x40\\x5b-\\x60\\x7b-\\xbf';
const rsPunctuationRange = '\\u2000-\\u206f';
const rsSpaceRange =
' \\t\\x0b\\f\\xa0\\ufeff\\n\\r\\u2028\\u2029\\u1680\\u180e\\u2000\\u2001\\u2002\\u2003\\u2004\\u2005\\u2006\\u2007\\u2008\\u2009\\u200a\\u202f\\u205f\\u3000';
const rsUpperRange = 'A-Z\\xc0-\\xd6\\xd8-\\xde';
const rsVarRange = '\\ufe0e\\ufe0f';
const rsBreakRange = rsMathOpRange + rsNonCharRange + rsPunctuationRange + rsSpaceRange;

/** Used to compose unicode capture groups. */
const rsApos = "['\u2019]";
const rsBreak = `[${rsBreakRange}]`;
const rsCombo = `[${rsComboRange}]`;
const rsDigit = '\\d';
const rsDingbat = `[${rsDingbatRange}]`;
const rsLower = `[${rsLowerRange}]`;
const rsMisc = `[^${rsAstralRange}${rsBreakRange + rsDigit + rsDingbatRange + rsLowerRange + rsUpperRange}]`;
const rsFitz = '\\ud83c[\\udffb-\\udfff]';
const rsModifier = `(?:${rsCombo}|${rsFitz})`;
const rsNonAstral = `[^${rsAstralRange}]`;
const rsRegional = '(?:\\ud83c[\\udde6-\\uddff]){2}';
const rsSurrPair = '[\\ud800-\\udbff][\\udc00-\\udfff]';
const rsUpper = `[${rsUpperRange}]`;
const rsZWJ = '\\u200d';

/** Used to compose unicode regexes. */
const rsMiscLower = `(?:${rsLower}|${rsMisc})`;
const rsMiscUpper = `(?:${rsUpper}|${rsMisc})`;
const rsOptContrLower = `(?:${rsApos}(?:d|ll|m|re|s|t|ve))?`;
const rsOptContrUpper = `(?:${rsApos}(?:D|LL|M|RE|S|T|VE))?`;
const reOptMod = `${rsModifier}?`;
const rsOptVar = `[${rsVarRange}]?`;
const rsOptJoin = `(?:${rsZWJ}(?:${[rsNonAstral, rsRegional, rsSurrPair].join('|')})${rsOptVar + reOptMod})*`;
const rsOrdLower = '\\d*(?:1st|2nd|3rd|(?![123])\\dth)(?=\\b|[A-Z_])';
const rsOrdUpper = '\\d*(?:1ST|2ND|3RD|(?![123])\\dTH)(?=\\b|[a-z_])';
const rsSeq = rsOptVar + reOptMod + rsOptJoin;
const rsEmoji = `(?:${[rsDingbat, rsRegional, rsSurrPair].join('|')})${rsSeq}`;

const reUnicodeWords = RegExp(
[
`${rsUpper}?${rsLower}+${rsOptContrLower}(?=${[rsBreak, rsUpper, '$'].join('|')})`, // Regular words, lowercase letters followed by optional contractions
`${rsMiscUpper}+${rsOptContrUpper}(?=${[rsBreak, rsUpper + rsMiscLower, '$'].join('|')})`, // Miscellaneous uppercase characters with optional contractions
`${rsUpper}?${rsMiscLower}+${rsOptContrLower}`, // Miscellaneous lowercase sequences with optional contractions
`${rsUpper}+${rsOptContrUpper}`, // All uppercase words with optional contractions (e.g., "THIS")
rsOrdUpper, // Ordinals for uppercase (e.g., "1ST", "2ND")
rsOrdLower, // Ordinals for lowercase (e.g., "1st", "2nd")
`${rsDigit}+`, // Pure digits (e.g., "123")
rsEmoji, // Emojis (e.g., 😀, ❤️)
].join('|'),
'g',
);

const reUnicodeWordsWithNumbers = RegExp(
[
`${rsUpper}?${rsLower}+${rsDigit}+`, // Lowercase letters followed by digits (e.g., "abc123")
`${rsUpper}+${rsDigit}+`, // Uppercase letters followed by digits (e.g., "ABC123")
`${rsDigit}+${rsUpper}?${rsLower}+`, // Digits followed by lowercase letters (e.g., "123abc")
`${rsDigit}+${rsUpper}+`, // Digits followed by uppercase letters (e.g., "123ABC")
`${rsUpper}?${rsLower}+${rsOptContrLower}(?=${[rsBreak, rsUpper, '$'].join('|')})`, // Regular words, lowercase letters followed by optional contractions
`${rsMiscUpper}+${rsOptContrUpper}(?=${[rsBreak, rsUpper + rsMiscLower, '$'].join('|')})`, // Miscellaneous uppercase characters with optional contractions
`${rsUpper}?${rsMiscLower}+${rsOptContrLower}`, // Miscellaneous lowercase sequences with optional contractions
`${rsUpper}+${rsOptContrUpper}`, // All uppercase words with optional contractions (e.g., "THIS")
rsOrdUpper, // Ordinals for uppercase (e.g., "1ST", "2ND")
rsOrdLower, // Ordinals for lowercase (e.g., "1st", "2nd")
`${rsDigit}+`, // Pure digits (e.g., "123")
rsEmoji, // Emojis (e.g., 😀, ❤️)
].join('|'),
'g',
);

function unicodeWords(string) {
return string.match(reUnicodeWords);
}

function unicodeWordsWithNumbers(string) {
return string.match(reUnicodeWordsWithNumbers);
}

module.exports = { unicodeWords, unicodeWordsWithNumbers };
11 changes: 1 addition & 10 deletions src/warehouse/util.js
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
const _ = require('lodash');
const get = require('get-value');

const v0 = require('./v0/util');
const v1 = require('./v1/util');
const { PlatformError, InstrumentationError } = require('@rudderstack/integrations-lib');
const { isBlank } = require('./config/helpers');
Expand Down Expand Up @@ -112,14 +110,7 @@ function validTimestamp(input) {
}

function getVersionedUtils(schemaVersion) {
switch (schemaVersion) {
case 'v0':
return v0;
case 'v1':
return v1;
default:
return v1;
}
return v1;
}

function isRudderSourcesEvent(event) {
Expand Down
87 changes: 0 additions & 87 deletions src/warehouse/v0/util.js

This file was deleted.

16 changes: 9 additions & 7 deletions src/warehouse/v1/util.js
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
const _ = require('lodash');

const reservedANSIKeywordsMap = require('../config/ReservedKeywords.json');
const { isDataLakeProvider } = require('../config/helpers');
const { TransformationError } = require('@rudderstack/integrations-lib');
const { snakeCase, snakeCaseWithNumbers } = require('../snakecase/snakecase');

function safeTableName(options, name = '') {
const { provider } = options;
Expand Down Expand Up @@ -82,7 +81,7 @@ function safeColumnName(options, name = '') {
path to $1,00,000 to path_to_1_00_000
return an empty string if it couldn't find a char if its ascii value doesnt belong to numbers or english alphabets
*/
function transformName(provider, name = '') {
function transformName(options, provider, name = '') {
const extractedValues = [];
let extractedValue = '';
for (let i = 0; i < name.length; i += 1) {
Expand All @@ -104,14 +103,17 @@ function transformName(provider, name = '') {
if (extractedValue !== '') {
extractedValues.push(extractedValue);
}
const underscoreDivideNumbers = options?.underscoreDivideNumbers || false;
const snakeCaseFn = underscoreDivideNumbers ? snakeCase : snakeCaseWithNumbers;

let key = extractedValues.join('_');
if (name.startsWith('_')) {
// do not remove leading underscores to allow esacaping rudder keywords with underscore
// _timestamp -> _timestamp
// __timestamp -> __timestamp
key = name.match(/^_*/)[0] + _.snakeCase(key.replace(/^_*/, ''));
key = name.match(/^_*/)[0] + snakeCaseFn(key.replace(/^_*/, ''));
} else {
key = _.snakeCase(key);
key = snakeCaseFn(key);
}

if (key !== '' && key.charCodeAt(0) >= 48 && key.charCodeAt(0) <= 57) {
Expand Down Expand Up @@ -150,15 +152,15 @@ function toBlendoCase(name = '') {

function transformTableName(options, name = '') {
const useBlendoCasing = options.integrationOptions?.useBlendoCasing || false;
return useBlendoCasing ? toBlendoCase(name) : transformName('', name);
return useBlendoCasing ? toBlendoCase(name) : transformName(options, '', name);
}

function transformColumnName(options, name = '') {
const { provider } = options;
const useBlendoCasing = options.integrationOptions?.useBlendoCasing || false;
return useBlendoCasing
? transformNameToBlendoCase(provider, name)
: transformName(provider, name);
: transformName(options, provider, name);
}

module.exports = {
Expand Down
Loading

0 comments on commit 386d2ab

Please sign in to comment.