Skip to content

Commit

Permalink
Test Data Generation - only show necessary columns of tables used in …
Browse files Browse the repository at this point in the history
…query (#2202)

* Test Data Gen - just show necessary columns of tables used in query

* resolve comments
  • Loading branch information
YannanGao-gs authored Sep 15, 2023
1 parent 27b56db commit 11873be
Show file tree
Hide file tree
Showing 7 changed files with 96 additions and 37 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,6 @@
<groupId>org.finos.legend.pure</groupId>
<artifactId>legend-pure-m3-core</artifactId>
</dependency>
<dependency>
<groupId>org.finos.legend.pure</groupId>
<artifactId>legend-pure-m2-store-relational-pure</artifactId>
</dependency>
<dependency>
<groupId>org.finos.legend.pure</groupId>
<artifactId>legend-pure-m2-dsl-mapping-pure</artifactId>
Expand Down Expand Up @@ -51,10 +47,6 @@
<groupId>org.finos.legend.engine</groupId>
<artifactId>legend-engine-language-pure-modelManager</artifactId>
</dependency>
<dependency>
<groupId>org.finos.legend.engine</groupId>
<artifactId>legend-engine-pure-platform-dsl-mapping-java</artifactId>
</dependency>
<!-- ENGINE -->

<!-- ANNOTATIONS -->
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ public Response generateTestData(TestDataGenerationInput input, @ApiParam(hidden
PureModel pureModel = modelManager.loadModel(input.model, input.clientVersion == null ? PureClientVersions.production : input.clientVersion, profiles, null);
try
{
TestDataGenerationResult result = new TestDataGenerationResult(TestDataGenerationService.generateEmbeddedData(input.query, pureModel.getRuntime(input.runtime),pureModel.getMapping(input.mapping), pureModel));
TestDataGenerationResult result = new TestDataGenerationResult(TestDataGenerationService.generateEmbeddedData(input.query, pureModel.getMapping(input.mapping), pureModel));
return ManageConstantResult.manageResult(profiles, result, objectMapper);
}
catch (Exception e)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,42 +16,35 @@

package org.finos.legend.engine.testData.generation.service;

import org.eclipse.collections.api.RichIterable;
import org.finos.legend.engine.language.pure.compiler.toPureGraph.HelperValueSpecificationBuilder;
import org.finos.legend.engine.language.pure.compiler.toPureGraph.PureModel;
import org.finos.legend.engine.protocol.pure.v1.model.data.EmbeddedData;
import org.finos.legend.engine.protocol.pure.v1.model.packageableElement.store.relational.data.RelationalCSVData;
import org.finos.legend.engine.protocol.pure.v1.model.packageableElement.store.relational.data.RelationalCSVTable;
import org.finos.legend.engine.protocol.pure.v1.model.valueSpecification.raw.Lambda;
import org.finos.legend.pure.generated.Root_meta_pure_runtime_Runtime;
import org.finos.legend.pure.generated.Root_meta_relational_metamodel_data_RelationalCSVData;
import org.finos.legend.pure.m3.coreinstance.meta.pure.mapping.Mapping;
import org.finos.legend.pure.generated.core_relational_relational_testDataGeneration_testDataGeneration;
import org.finos.legend.pure.m3.coreinstance.meta.pure.metamodel.function.LambdaFunction;
import org.finos.legend.pure.m3.coreinstance.meta.relational.metamodel.Column;
import org.finos.legend.pure.m3.coreinstance.meta.relational.metamodel.relation.Table;

import java.util.Collections;
import java.util.List;

public class TestDataGenerationService
{
public static List<EmbeddedData> generateEmbeddedData(Lambda query, Root_meta_pure_runtime_Runtime runtime, Mapping mapping, PureModel pureModel)
public static List<EmbeddedData> generateEmbeddedData(Lambda query, Mapping mapping, PureModel pureModel)
{
RichIterable<? extends Table> tables = core_relational_relational_testDataGeneration_testDataGeneration.Root_meta_relational_testDataGeneration_getTableFromQuery_FunctionDefinition_1__Mapping_1__Runtime_1__Table_MANY_(
buildPureLambda(query, pureModel), mapping, runtime, pureModel.getExecutionSupport());
if (tables.isEmpty())
{
return null;
}
List<RelationalCSVTable> relationalCSVTables = tables.collect(table ->
Root_meta_relational_metamodel_data_RelationalCSVData relationalCSVData = core_relational_relational_testDataGeneration_testDataGeneration.Root_meta_relational_testDataGeneration_getRelationalCSVDataFromQuery_FunctionDefinition_1__Mapping_1__RelationalCSVData_1_(
buildPureLambda(query, pureModel), mapping, pureModel.getExecutionSupport());
RelationalCSVData data = new RelationalCSVData();
List<RelationalCSVTable> relationalCSVTables = relationalCSVData._tables().collect(table ->
{
RelationalCSVTable relationalCSVTable = new RelationalCSVTable();
relationalCSVTable.schema = table._schema()._name();
relationalCSVTable.table = table._name();
relationalCSVTable.values = table._columns().select(c -> c instanceof Column).collect(c -> c.getName()).makeString(",");
relationalCSVTable.schema = table._schema();
relationalCSVTable.table = table._table();
relationalCSVTable.values = table._values();
return relationalCSVTable;
}).toList();
RelationalCSVData data = new RelationalCSVData();
data.tables = relationalCSVTables;
return Collections.singletonList(data);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ private void testGenerateEmbeddedData(String testGenerationInputPath, String exp
String testGenerationInput = getResourceAsString(testGenerationInputPath);
TestDataGenerationInput input = objectMapper.readValue(testGenerationInput, TestDataGenerationInput.class);
PureModel pureModel = modelManager.loadModel(input.model, input.clientVersion == null ? PureClientVersions.production : input.clientVersion, null, null);
List<EmbeddedData> testData = TestDataGenerationService.generateEmbeddedData(input.query, pureModel.getRuntime(input.runtime),pureModel.getMapping(input.mapping), pureModel);
List<EmbeddedData> testData = TestDataGenerationService.generateEmbeddedData(input.query, pureModel.getMapping(input.mapping), pureModel);
Assert.assertEquals(objectMapper.writeValueAsString(testData), expectedResult);
}

Expand All @@ -67,6 +67,6 @@ public void testRelationalCSVTableGeneration() throws Exception
{
testGenerateEmbeddedData(
"models/relationalModelTestDataGenerationInput.json",
"[{\"tables\":[{\"schema\":\"default\",\"table\":\"FirmTable\",\"values\":\"id,Legal_name\"},{\"schema\":\"default\",\"table\":\"PersonTable\",\"values\":\"id,firm_id,firstName,lastName\"}]}]");
"[{\"tables\":[{\"schema\":\"default\",\"table\":\"FirmTable\",\"values\":\"id\"},{\"schema\":\"default\",\"table\":\"PersonTable\",\"values\":\"id,firm_id,firstName\"}]}]");
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -424,6 +424,15 @@ function meta::relational::milestoning::getSnapshotDateColumns(tables:Table[*]):
$tables->map(table|$table->getSnapshotDateColumn());
}

function meta::relational::milestoning::getMilestoningDateColumns(tables:Table[*]):Column[*]
{
$tables->map(table|$table.milestoning->match([
p : ProcessingMilestoning[*] | $p.in->concatenate($p.out),
b : BusinessMilestoning[*] | $b.from->concatenate($b.thru),
bs: BusinessSnapshotMilestoning[*] | $table->getSnapshotDateColumn()
]));
}

function <<access.private>> meta::relational::milestoning::getSnapshotDateColumn(table:Table[1]):Column[0..1]
{
$table.milestoning->filter(m|$m->instanceOf(BusinessSnapshotMilestoning))->cast(@BusinessSnapshotMilestoning).snapshotDate->first();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -748,12 +748,26 @@ function meta::relational::testDataGeneration::getMilestoningFilter(table: Table
)
}

function meta::relational::testDataGeneration::getTableFromQuery(query:FunctionDefinition<{->TabularDataSet[1]}>[1], mapping:Mapping[1], runtime: Runtime[1]):Table[*]
{
let sql = meta::relational::functions::sqlstring::toSQL($query, $mapping, $runtime, meta::relational::extension::relationalExtensions()).sqlQueries;
$sql->map(q | $q->meta::relational::validation::functions::getTables())
->removeDuplicates()
->removeDuplicatesBy(t | $t.schema.name + '.' + $t.name);
function meta::relational::testDataGeneration::getRelationalCSVDataFromQuery(query:FunctionDefinition<{->TabularDataSet[1]}>[1], mapping:Mapping[1]):meta::relational::metamodel::data::RelationalCSVData[1]
{
let propertyTree = $query.expressionSequence->at(0)->evaluateAndDeactivate()->meta::pure::lineage::scanProperties::scanProperties(^List<meta::pure::lineage::scanProperties::PropertyPathNode>(), [], [])
.result->meta::pure::lineage::scanProperties::propertyTree::buildPropertyTree();
let columns = $propertyTree->meta::pure::lineage::scanColumns::scanColumns($mapping).column->removeDuplicates();

let finalTableToColumnsMap = $columns->filter(c | $c.owner->isNotEmpty() && $c.owner->toOne()->instanceOf(Table))->fold({column, tableToColumnsMap |
let table = $column.owner->toOne()->cast(@Table);
let existingColumnList = $tableToColumnsMap->get($table);
let list = if($existingColumnList->isEmpty(),| ^List<Column>(values = [$column]) ,| ^List<Column>(values = $existingColumnList.values->add($column)))->cast(@List<Column>)->toOne();
$tableToColumnsMap->put($table, $list);
}, ^Map<Table,List<Column>>());

let relationalCSVTables = $finalTableToColumnsMap->keyValues()->map(tableColumnsPair | let table = $tableColumnsPair.first;
let primaryKeys = $table.primaryKey->sortBy(pk | $pk.name);
let milestoningColumns = $table->meta::relational::milestoning::getMilestoningDateColumns()->sortBy(c | $c.name);
let columns = $primaryKeys->concatenate($tableColumnsPair.second.values->sortBy(c | $c->cast(@Column).name))->concatenate($milestoningColumns)->removeDuplicates();
^meta::relational::metamodel::data::RelationalCSVTable(schema = $table.schema.name, table = $table.name, values = $columns->map(c|$c->cast(@Column).name)->joinStrings(','));
)->sortBy(t | $t.schema + $t.table);
^meta::relational::metamodel::data::RelationalCSVData(tables=$relationalCSVTables);
}

/*** Plan Generation ***/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2932,7 +2932,7 @@ function <<meta::pure::profiles::test.Test>> {serverVersion.start='V1_5_0'} meta
let mapping = meta::relational::testDataGeneration::tests::model::VeiwOnViewonViewMapping;
let db = meta::relational::testDataGeneration::tests::model::db;
let runtime = meta::relational::testDataGeneration::tests::model::setUp();

let tableRowIdentifiers = [
meta::relational::testDataGeneration::createTableRowIdentifiers(meta::relational::testDataGeneration::tests::model::db, 'default', 'StudentTable', [
meta::relational::testDataGeneration::createRowIdentifier(['id', 'name', 'school_id'], ['1', 'SURAJ', 'sc1']),
Expand Down Expand Up @@ -3003,7 +3003,7 @@ function <<meta::pure::profiles::test.Test>> {serverVersion.start='V1_5_0'} meta
function <<meta::pure::profiles::test.Test>> {serverVersion.start='V1_5_0'} meta::relational::testDataGeneration::tests::alloy::testAlloyTestDatGenWithQuotedColumnsForViews():Boolean[1]
{
// Purposefully asserting on plan string to assert we add quotes in join columns

let query = {|meta::pure::lineage::scanRelations::test::Party.all()->project([r | $r.identifier.identifier],['id'])->distinct()};
let mapping = meta::pure::lineage::scanRelations::test::MappingWithJoinToSchemaInAnotherView;
let db = meta::pure::lineage::scanRelations::test::DB2;
Expand All @@ -3014,7 +3014,7 @@ function <<meta::pure::profiles::test.Test>> {serverVersion.start='V1_5_0'} meta
meta::relational::testDataGeneration::createRowIdentifier(['entityID'], ['2'])
])
];

let plan = meta::relational::testDataGeneration::executionPlan::planTestDataGeneration($query, $mapping, $runtime, ^ExecutionContext(), $tableRowIdentifiers, false, meta::relational::extension::relationalExtensions());
assertEquals(
'MultiResultSequence\n' +
Expand Down Expand Up @@ -3075,3 +3075,54 @@ function <<meta::pure::profiles::test.Test>> {serverVersion.start='V1_5_0'} meta
' )\n' +
')\n', $plan->meta::pure::executionPlan::toString::planToString(meta::relational::extension::relationalExtensions()));
}


// ----------------------------------------------------- TEST QUERY SCHEMA GENERATION -----------------------------------------------------
function <<meta::pure::profiles::test.Test>> meta::relational::testDataGeneration::tests::testGenerateNecessaryTableColumnsForSingleTable():Boolean[1]
{

let query = {|meta::relational::tests::model::inheritance::Person.all()->project([f|$f.name], ['col'])};
let mapping = meta::relational::tests::mapping::inheritance::relational::inheritanceMappingDB;
let relationalCsvData = meta::relational::testDataGeneration::getRelationalCSVDataFromQuery($query, $mapping);
assertEquals(1, $relationalCsvData.tables->size());
assertEquals('default\n'+
'Person\n'+
'ID,name', $relationalCsvData.tables->map(t | $t.schema + '\n' + $t.table + '\n' + $t.values)->joinStrings('\n-------\n'));
}

function <<meta::pure::profiles::test.Test>> meta::relational::testDataGeneration::tests::testGenerateNecessaryTableColumnsForMultiTables():Boolean[1]
{
let query = {|Trade.all()->project([t|$t.product.name, t|$t.product->toOne().synonymByType(ProductSynonymType.CUSIP).name],['prodName', 'synName'])};
let mapping = meta::relational::tests::simpleRelationalMapping;
let relationalCsvData = meta::relational::testDataGeneration::getRelationalCSVDataFromQuery($query, $mapping);
assertEquals(3, $relationalCsvData.tables->size());
assertEquals('default\n'+
'tradeTable\n'+
'ID,prodId\n'+
'-------\n'+
'productSchema\n'+
'productTable\n'+
'ID,NAME\n'+
'-------\n'+
'productSchema\n'+
'synonymTable\n' +
'ID,NAME,PRODID,TYPE'
, $relationalCsvData.tables->sortBy(t | $t.schema + $t.table)->map(t | $t.schema + '\n' + $t.table + '\n' + $t.values)->joinStrings('\n-------\n'));
}


function <<meta::pure::profiles::test.Test>> meta::relational::testDataGeneration::tests::testGenerateNecessaryTableColumnsForMilestoningTable():Boolean[1]
{
let query = {|meta::relational::tests::milestoning::Product.all(%2015-10-16)->project([p|$p.name, p|$p.classificationTypeStr],['name','classificationType'])};
let mapping = meta::relational::tests::milestoning::milestoningmap;
let relationalCsvData = meta::relational::testDataGeneration::getRelationalCSVDataFromQuery($query, $mapping);
assertEquals(2, $relationalCsvData.tables->size());
assertEquals('default\n'+
'ProductClassificationTable\n'+
'type,from_z,thru_z\n'+
'-------\n'+
'default\n'+
'ProductTable\n'+
'id,name,type,from_z,thru_z',
$relationalCsvData.tables->sortBy(t | $t.schema + $t.table)->map(t | $t.schema + '\n' + $t.table + '\n' + $t.values)->joinStrings('\n-------\n'));
}

0 comments on commit 11873be

Please sign in to comment.