Skip to content

Commit

Permalink
Add support for computing digest/hash on TDS columns (#2326)
Browse files Browse the repository at this point in the history
- Add support for hash functions in TDS row functions
- Fixed joinStrings for concatenating columns in a TDS row functions
- Add support for computing a hash digest across columns in a TDS
  • Loading branch information
aormerod-gs authored Oct 3, 2023
1 parent 9db8aec commit 8205951
Show file tree
Hide file tree
Showing 9 changed files with 187 additions and 14 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -693,7 +693,8 @@ function meta::protocols::pure::vX_X_X::transformation::fromPureGraph::toPureGra
->concatenate([
pair(meta::pure::mapping::from_TabularDataSet_1__Mapping_1__Runtime_1__TabularDataSet_1_.name->toOne(), meta::pure::mapping::from_TabularDataSet_1__Mapping_1__Runtime_1__TabularDataSet_1_),
pair(meta::pure::mapping::from_TabularDataSet_1__Mapping_1__Runtime_1__ExecutionContext_1__TabularDataSet_1_.name->toOne(), meta::pure::mapping::from_TabularDataSet_1__Mapping_1__Runtime_1__ExecutionContext_1__TabularDataSet_1_),
pair(meta::pure::runtime::currentUserId__String_1_.name->toOne(), meta::pure::runtime::currentUserId__String_1_)
pair(meta::pure::runtime::currentUserId__String_1_.name->toOne(), meta::pure::runtime::currentUserId__String_1_),
pair(meta::pure::functions::hash::hash_String_1__HashType_1__String_1_.name->toOne(), meta::pure::functions::hash::hash_String_1__HashType_1__String_1_)
])
->removeDuplicatesBy(p | $p.second)
->newMultiValueMap();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -517,6 +517,10 @@ public List<Function<Handlers, List<FunctionExpressionBuilderRegistrationInfo>>>
handlers.m(handlers.m(handlers.h("meta::pure::tds::extensions::columnValueDifference_TabularDataSet_1__TabularDataSet_1__String_$1_MANY$__String_$1_MANY$__String_$1_MANY$__TabularDataSet_1_", false, ps -> handlers.res("meta::pure::tds::TabularDataSet", "one"), ps -> ps.size() == 5)),
handlers.m(handlers.h("meta::pure::tds::extensions::columnValueDifference_TabularDataSet_1__TabularDataSet_1__String_$1_MANY$__String_$1_MANY$__TabularDataSet_1_", false, ps -> handlers.res("meta::pure::tds::TabularDataSet", "one"), ps -> true)))
),
new FunctionExpressionBuilderRegistrationInfo(null,
handlers.m(handlers.m(handlers.h("meta::pure::tds::extensions::extendWithDigestOnColumns_TabularDataSet_1__String_1__HashType_1__String_$1_MANY$__TabularDataSet_1_", false, ps -> handlers.res("meta::pure::tds::TabularDataSet", "one"), ps -> ps.size() == 4)),
handlers.m(handlers.h("meta::pure::tds::extensions::extendWithDigestOnColumns_TabularDataSet_1__String_1__TabularDataSet_1_", false, ps -> handlers.res("meta::pure::tds::TabularDataSet", "one"), ps -> ps.size() == 2)))
),
new FunctionExpressionBuilderRegistrationInfo(null,
handlers.m(handlers.m(handlers.h("meta::pure::tds::extensions::rowValueDifference_TabularDataSet_1__TabularDataSet_1__String_$1_MANY$__String_$1_MANY$__String_$1_MANY$__TabularDataSet_1_", false, ps -> handlers.res("meta::pure::tds::TabularDataSet", "one"), ps -> ps.size() == 5)),
handlers.m(handlers.h("meta::pure::tds::extensions::rowValueDifference_TabularDataSet_1__TabularDataSet_1__String_$1_MANY$__String_$1_MANY$__TabularDataSet_1_", false, ps -> handlers.res("meta::pure::tds::TabularDataSet", "one"), ps -> ps.size() == 4)))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -203,6 +203,20 @@ function meta::relational::extension::relationalExtension() : meta::pure::extens
)
)
)->concatenate(
[
meta::pure::tds::extensions::extendWithDigestOnColumns_TabularDataSet_1__String_1__HashType_1__String_$1_MANY$__TabularDataSet_1_,
meta::pure::tds::extensions::extendWithDigestOnColumns_TabularDataSet_1__String_1__TabularDataSet_1_
]->map(f|
pair($f->cast(@Function<Any>), {|
let tdsSchema = resolveSchemaImpl($fe.parametersValues->at(0), $openVars, $extensions);
let digestCol = $fe.parametersValues->last()->toOne()->reactivate($openVars)->cast(@String)->toOne();

$tdsSchema.extend(^TDSColumn(name = $digestCol, offset= 0, type = String));
}
)
)
)
->concatenate(
pair(tableToTDS_Table_1__TableTDS_1_->cast(@Function<Any>), {|
createSchemaState($fe->reactivate()->cast(@TabularDataSet).columns);
})
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ function meta::pure::tds::viewToTDS(view:meta::relational::metamodel::relation::
}

function
{doc.doc = 'Project the specified calculated columns from the provided TDS. This is similar to extend, but rather than adding the columns it replaces all of the existing ones '}
{doc.doc = 'Project the specified columns from the provided TableTDS'}
meta::pure::tds::project(tds:meta::relational::mapping::TableTDS[1], columnFunctions:ColumnSpecification<TDSRow>[*]):TabularDataSet[1]
{
$tds->project($columnFunctions);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2228,20 +2228,25 @@ function meta::relational::functions::pureToSqlQuery::processNoOp(f:FunctionExpr

function meta::relational::functions::pureToSqlQuery::processHash(f:FunctionExpression[1], currentPropertyMapping:PropertyMapping[*], operation:SelectWithCursor[1], vars:Map<VariableExpression, ValueSpecification>[1], state:State[1], joinType:JoinType[1], nodeId:String[1], aggFromMap:List<ColumnGroup>[1], context:DebugContext[1], extensions:Extension[*]):RelationalOperationElement[1]
{

let type = $f.parametersValues->at(1)->reactivate()->cast(@meta::pure::functions::hash::HashType)->toOne();
let name = meta::relational::functions::pureToSqlQuery::hashTypeToHashDynaFuncName($type);

let oldFunc = $f.func;
let functionExpression = ^$f(func = ^$oldFunc(functionName = $name->toOne()), parametersValues = $f.parametersValues->at(0));
$functionExpression->processDynaFunction($currentPropertyMapping, $operation, $vars, $state, $joinType, $nodeId, $aggFromMap, $context, $extensions);
}

function <<access.private>> meta::relational::functions::pureToSqlQuery::hashTypeToHashDynaFuncName(hashType : meta::pure::functions::hash::HashType[1]): String[1]
{
let name = newMap([
pair(meta::pure::functions::hash::HashType.MD5, 'md5'),
pair(meta::pure::functions::hash::HashType.SHA1, 'sha1'),
pair(meta::pure::functions::hash::HashType.SHA256, 'sha256')
])->get($type);
])->get($hashType);

assert($name->isNotEmpty(), | 'hash type ' + $type.name + ' is not yet supported');
assert($name->isNotEmpty(), | 'hash type ' + $hashType.name + ' is not yet supported');

let oldFunc = $f.func;
let functionExpression = ^$f(func = ^$oldFunc(functionName = $name->toOne()), parametersValues = $f.parametersValues->at(0));
$functionExpression->processDynaFunction($currentPropertyMapping, $operation, $vars, $state, $joinType, $nodeId, $aggFromMap, $context, $extensions);
$name->toOne();
}

function <<access.private>> meta::relational::functions::pureToSqlQuery::canProcessAt(f:FunctionExpression[1]):Boolean[1]
Expand Down Expand Up @@ -5032,8 +5037,29 @@ function meta::relational::functions::pureToSqlQuery::processTdsLambda(mapFn:Val
$f.parametersValues->at(0)->processTdsLambda($a, $returnColumnName, $vars, $state, $currentPropertyMapping, $context)->toOne(),
^Literal(value = 'YYYY-MM-DD HH24:MI:SS')
]);
})
];
}),
^PureFunctionTDSToRelationalFunctionPair(first = meta::pure::functions::hash::hash_String_1__HashType_1__String_1_, second = {|
let type = $f.parametersValues->at(1)->reactivate()->cast(@meta::pure::functions::hash::HashType)->toOne();
let name = meta::relational::functions::pureToSqlQuery::hashTypeToHashDynaFuncName($type);

let value = $f.parametersValues->at(0)->processTdsLambda($a, $returnColumnName, $vars, $state, $currentPropertyMapping, $context);

newDynaFunction($name, $value);
})
]->concatenate(
[
meta::pure::functions::string::joinStrings_String_MANY__String_1_,
meta::pure::functions::string::joinStrings_String_MANY__String_1__String_1_,
meta::pure::functions::string::joinStrings_String_MANY__String_1__String_1__String_1__String_1_
]->map(func|
^PureFunctionTDSToRelationalFunctionPair(first = $func, second = {|
^DynaFunction(
name = 'concat',
parameters =$f.parametersValues->map(p|$p->processTdsLambda($a, $returnColumnName, $vars, $state, $currentPropertyMapping, $context))
)
})
)
);

let override = $overrides->filter(o | $o.first == $f.func)->first();
if ($override->isNotEmpty(),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@
// See the License for the specific language governing permissions and
// limitations under the License.

import meta::relational::mapping::*;
import meta::pure::functions::hash::*;
import meta::pure::tds::extensions::*;
import meta::relational::metamodel::join::*;

Expand Down Expand Up @@ -202,4 +204,47 @@ function <<access.private>> meta::pure::tds::extensions::joinWithOptionalColumns
| $q1->join($q2, $joinType, {x,y|true}),
| $q1->join($q2, $joinType, $cols->toOneMany())
);
}
}

function <<functionType.NormalizeRequiredFunction>> meta::pure::tds::extensions::extendWithDigestOnColumns(input : TabularDataSet[1], digestColumnName : String[1]) : TabularDataSet[1]
{
$input->extendWithDigestOnColumns($digestColumnName, HashType.MD5, $input.columns.name->toOneMany());
}

function <<functionType.NormalizeRequiredFunction>> meta::pure::tds::extensions::extendWithDigestOnColumns(input : TabularDataSet[1], digestValueColumnName : String[1], digestHashType : HashType[1], digestColumns : String[1..*]) : TabularDataSet[1]
{
$input->extend(
col({row:TDSRow[1]|
$input.columns
->filter(c|$c.name->in($digestColumns))
->map(col|toStringForColAccessor($col)->eval($row))
->joinStrings('|')
->hash($digestHashType)
},$digestValueColumnName)
)
}

function <<access.private>>
meta::pure::tds::extensions::toStringForColAccessor(col: TDSColumn[1]) : Function<{TDSRow[1]->String[1]}>[1]
{
[
// Pending release of https://github.com/finos/legend-pure/pull/736
// if(!$col.type->toOne()->instanceOf(Enumeration),
// | [],
// | {r:TDSRow[1]|$r.getEnum($col.name)->toString()}
// )
]
->concatenate(
[
pair(Integer, {r:TDSRow[1]|$r.getInteger($col.name)->toString()}),
pair(Float, {r:TDSRow[1]|$r.getFloat($col.name)->toString()}),
pair(String, {r:TDSRow[1]|$r.getString($col.name)->toString()}),
pair(Boolean, {r:TDSRow[1]|$r.getBoolean($col.name)->toString()}),
pair(Date, {r:TDSRow[1]|$r.getDate($col.name)->toString()}),
pair(DateTime, {r:TDSRow[1]|$r.getDateTime($col.name)->toString()}),
pair(StrictDate, {r:TDSRow[1]|$r.getStrictDate($col.name)->toString()})
]->filter(p|$p.first == $col.type).second
)
->toOne('Unsupported column type: ' + $col.type->toOne()->toString() + '(' + $col.name + ')')
}

Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,9 @@
// See the License for the specific language governing permissions and
// limitations under the License.

import meta::relational::mapping::*;
import meta::pure::router::preeval::*;
import meta::pure::functions::hash::*;
import meta::pure::tds::extensions::*;
import meta::relational::tests::csv::*;
import meta::protocols::tds::preeval::router::prerouting::*;
Expand Down Expand Up @@ -487,4 +490,41 @@ function <<test.Test>> meta::pure::tds::tests::extensions::testFirstNotNull():Bo
{
assertEquals(1, [TDSNull, 1, 2]->meta::pure::tds::extensions::firstNotNull());
assertEquals([], [TDSNull, TDSNull]->meta::pure::tds::extensions::firstNotNull());
}

function <<test.Test>>
meta::pure::tds::tests::extensions::testExtendDigest_InMemory() : Boolean[1]
{
let scores = [1,2,3,4,5];
let data = range($scores->size())->map(i|'student_' +toString($i))->zip($scores);

let tds = $data->project([col(p|$p.first, 'name'), col(p|$p.second, 'score')]);

let result = $tds->extendWithDigestOnColumns('_digest');

println($result->toCSV());

assertEquals(['0da8968758f3a315890e2f9ac3eb2fb3', 'eab9489b8f6517e84642ed09d819fee6', '387c444764879ef47844199bc693effc', '72d92b984f7ba90fe9b1ac4962c74012', 'e9f5f78a0597a8804196a8f9ced00000'], $result->columnValues('_digest'));
}

function <<test.Test, test.AlloyOnly>>
meta::pure::tds::tests::extensions::testExtendDigest_Relational() : Boolean[1]
{
let mapping = meta::relational::tests::simpleRelationalMapping;
let runtime = meta::relational::tests::testRuntime();

let query = {|
Trade.all()
->project([
col(x | $x.id, 'Trade ID'),
col(x | $x.quantity, 'Quantity')
])
->extendWithDigestOnColumns('_digest')
->sort('Trade ID')
->limit(2);
};

let result = execute($query, $mapping, $runtime, meta::relational::extension::relationalExtensions());

assertEquals(['b7bbee4d9b6a2736c25b00dded9344c7', '9e103ea06a6999b4c5a86cf25d68b083'], $result.values->columnValues('_digest'));
}
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,16 @@ function <<test.Test>> meta::relational::tds::schema::tests::resolveSchemaTest()
['tradeDate'], ['quantityA'])
->sort(asc('tradeDate'))
});

assertSchemaRoundTripEquality({|
Trade.all()
->groupBy([x|$x.date->adjust(0, DurationUnit.DAYS)],
[ agg(x | $x.quantity, y | $y->sum()), agg(x | $x.quantity, y | $y->sum())],
['tradeDate', 'quantityA', 'quantityB']
)
->extendWithDigestOnColumns('_digest')
->sort(asc('tradeDate'))
});
}

function meta::relational::tds::schema::tests::assertSchemaRoundTripEquality(query : FunctionDefinition<{->TabularDataSet[1]}>[1]) : Boolean[1]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -599,15 +599,48 @@ function <<test.Test>> meta::relational::tests::functions::sqlstring::testToSQLS
assertEquals('select cast("root".quantity as decimal) as "decimal", cast("root".quantity as double precision) as "float" from tradeTable as "root"', $result);
}

function <<test.Test>> meta::relational::tests::functions::sqlstring::testHashFunctions():Boolean[1]
function <<test.Test>> meta::relational::tests::functions::sqlstring::testToSQLStringForTDSStringJoin():Boolean[1]
{
let result = toSQLString(
|Person.all()
->project([
col(p|$p.firstName, 'firstName'),
col(p|$p.lastName, 'lastName')
])
->extend([
col(row:TDSRow[1]|joinStrings([$row.getString('firstName'), ' ', $row.getString('lastName')]), 'name1'),
col(row:TDSRow[1]|joinStrings([$row.getString('firstName'), ' ', $row.getString('lastName')], '|'), 'name2'),
col(row:TDSRow[1]|joinStrings([$row.getString('firstName'), ' ', $row.getString('lastName')], '[', ',', ']'), 'name3'),
col(row:TDSRow[1]|joinStrings(['myValue', $row.getString('firstName'), ' ', $row.getString('lastName')], '[', ',', ']'), 'name4')
]), simpleRelationalMapping, DatabaseType.H2, meta::relational::extension::relationalExtensions());

assertEquals('select "root".FIRSTNAME as "firstName", "root".LASTNAME as "lastName", concat("root".FIRSTNAME, \' \', "root".LASTNAME, \'\', \'\', \'\') as "name1", concat("root".FIRSTNAME, \' \', "root".LASTNAME, \'|\') as "name2", '
+'concat("root".FIRSTNAME, \' \', "root".LASTNAME, \'[\', \',\', \']\') as "name3", concat(\'myValue\', "root".FIRSTNAME, \' \', "root".LASTNAME, \'[\', \',\', \']\') as "name4" from personTable as "root"', $result);
}

function <<test.Test, test.AlloyOnly>> meta::relational::tests::functions::sqlstring::testHashFunctions():Boolean[1]
{
let result = toSQLString(
|Person.all()
->project([
col(p|$p.firstName, 'firstName'),
col(p|$p.lastName, 'lastName'),
col(p|$p.firstName->hash(HashType.MD5), 'md5'),
col(p|$p.firstName->hash(HashType.SHA1), 'sha1'),
col(p|$p.firstName->hash(HashType.SHA256), 'sha256')
]), simpleRelationalMapping, DatabaseType.H2, meta::relational::extension::relationalExtensions());
])
->project([
col({row:TDSRow[1]|$row.getString('firstName')->hash(HashType.MD5)}, 'tds_md5'),
col({row:TDSRow[1]|$row.getString('firstName')->hash(HashType.SHA1)}, 'tds_sha1'),
col({row:TDSRow[1]|$row.getString('firstName')->hash(HashType.SHA256)}, 'tds_sha256'),
col({row:TDSRow[1]|hash(($row.getString('firstName') + $row.getString('lastName')), HashType.MD5)}, 'tds_concat_md5'),
col({row:TDSRow[1]|hash(joinStrings([$row.getString('firstName'), $row.getString('lastName')]), HashType.MD5)}, 'tds_joinstrings_md5'),
col({row:TDSRow[1]|[$row.getString('firstName'), $row.getString('lastName')]->joinStrings('|')->hash(HashType.MD5)}, 'tds_digest')
])
, simpleRelationalMapping, DatabaseType.H2, meta::relational::extension::relationalExtensions());

assertEquals('select rawtohex(hash(\'MD5\', "root".FIRSTNAME)) as "tds_md5", rawtohex(hash(\'SHA-1\', "root".FIRSTNAME)) as "tds_sha1", rawtohex(hash(\'SHA-256\', "root".FIRSTNAME)) as "tds_sha256", '
+ 'rawtohex(hash(\'MD5\', concat("root".FIRSTNAME, "root".LASTNAME))) as "tds_concat_md5", rawtohex(hash(\'MD5\', concat("root".FIRSTNAME, "root".LASTNAME, \'\', \'\', \'\'))) as "tds_joinstrings_md5", '
+ 'rawtohex(hash(\'MD5\', concat("root".FIRSTNAME, "root".LASTNAME, \'|\'))) as "tds_digest" from personTable as "root"', $result);

assertNotEmpty($result);
}

0 comments on commit 8205951

Please sign in to comment.