From 7ee26ab69dc152f108151980c1ddbbba24f94069 Mon Sep 17 00:00:00 2001 From: Jack Del Vecchio Date: Mon, 25 Mar 2024 16:27:20 +0000 Subject: [PATCH] HPCC-31496 Allow field translation that only removes fields - and does not add blank values --- common/thorhelper/thorcommon.cpp | 3 +++ common/thorhelper/thorread.cpp | 3 +++ ecl/hthor/hthorkey.cpp | 12 ++++++++++++ helm/hpcc/values.schema.json | 4 ++-- .../componentfiles/configschema/xsd/eclagent.xsd | 5 +++-- initfiles/componentfiles/configschema/xsd/roxie.xsd | 5 +++-- initfiles/componentfiles/configschema/xsd/thor.xsd | 5 +++-- .../componentfiles/configxml/eclagent_config.xsd.in | 3 ++- initfiles/componentfiles/configxml/roxie.xsd.in | 3 ++- initfiles/componentfiles/configxml/thor.xsd.in | 3 ++- initfiles/etc/DIR_NAME/environment.xml.in | 2 +- roxie/ccd/ccdfile.cpp | 2 ++ roxie/ccd/ccdmain.cpp | 4 ++-- roxie/ccd/ccdstate.cpp | 2 +- rtl/eclrtl/rtldynfield.cpp | 13 ++++++++++++- rtl/eclrtl/rtldynfield.hpp | 4 +++- testing/regress/ecl/layouttrans.ecl | 2 +- testing/regress/ecl/sqtrans.ecl | 2 ++ testing/regress/ecl/translatedisk.ecl | 2 +- testing/regress/environment.xml.in | 2 +- thorlcr/slave/slavmain.cpp | 2 ++ 21 files changed, 63 insertions(+), 20 deletions(-) diff --git a/common/thorhelper/thorcommon.cpp b/common/thorhelper/thorcommon.cpp index 6c9668cfcb8..38f29328615 100644 --- a/common/thorhelper/thorcommon.cpp +++ b/common/thorhelper/thorcommon.cpp @@ -2148,6 +2148,9 @@ static bool getTranslators(Owned &translator, OwnedcanTranslate()) throw MakeStringException(0, "Untranslatable record layout mismatch detected for file %s", tracing); + if (mode == RecordTranslationMode::PayloadRemoveOnly && translator->hasNewFields()) + throw MakeStringException(0, "Translatable file layout mismatch reading file %s but translation disabled when expected fields are missing from source.", tracing); + if (translator->needsTranslate()) { if (keyedTranslator && (sourceFormat != expectedFormat)) diff --git a/common/thorhelper/thorread.cpp b/common/thorhelper/thorread.cpp index d552b78d154..2771f68f720 100644 --- a/common/thorhelper/thorread.cpp +++ b/common/thorhelper/thorread.cpp @@ -176,6 +176,9 @@ void DiskReadMapping::ensureTranslators() const if (!translator->canTranslate()) throw MakeStringException(0, "Untranslatable record layout mismatch detected for file %s", filename); + if (mode == RecordTranslationMode::PayloadRemoveOnly && translator->hasNewFields()) + throw MakeStringException(0, "Translatable file layout mismatch reading file %s but translation disabled when expected fields are missing from source.", filename); + if (translator->needsTranslate()) { if (sourceMeta != expectedMeta) diff --git a/ecl/hthor/hthorkey.cpp b/ecl/hthor/hthorkey.cpp index 7fb6c7952ef..1c9094a3a63 100644 --- a/ecl/hthor/hthorkey.cpp +++ b/ecl/hthor/hthorkey.cpp @@ -700,6 +700,8 @@ const IDynamicTransform * CHThorIndexReadActivityBase::getLayoutTranslator(IDist Owned payloadTranslator = createRecordTranslator(projectedFormat->queryRecordAccessor(true), actualFormat->queryRecordAccessor(true)); if (!payloadTranslator->canTranslate()) throw MakeStringException(0, "Untranslatable key layout mismatch reading index %s", f->queryLogicalName()); + if (getLayoutTranslationMode() == RecordTranslationMode::PayloadRemoveOnly && payloadTranslator->hasNewFields()) + throw MakeStringException(0, "Translatable file layout mismatch reading file %s but translation disabled when expected fields are missing from source.", f->queryLogicalName()); if (payloadTranslator->needsTranslate()) return payloadTranslator.getClear(); return nullptr; @@ -715,6 +717,8 @@ void CHThorIndexReadActivityBase::verifyIndex(IKeyIndex * idx) { if (!layoutTrans->canTranslate()) throw MakeStringException(0, "Untranslatable key layout mismatch reading index %s", df->queryLogicalName()); + if (getLayoutTranslationMode() == RecordTranslationMode::PayloadRemoveOnly && layoutTrans->hasNewFields()) + throw MakeStringException(0, "Translatable file layout mismatch reading file %s but translation disabled when expected fields are missing from source.", df->queryLogicalName()); } else { @@ -2462,6 +2466,8 @@ class CHThorFlatFetchActivity : public CHThorFetchActivityBase, public IFlatFetc translator->describe(); if (translator->canTranslate()) { + if (getLayoutTranslationMode()==RecordTranslationMode::PayloadRemoveOnly && translator->hasNewFields()) + throw MakeStringException(0, "Translatable file layout mismatch reading file %s but translation disabled when expected fields are missing from source.", f->queryLogicalName()); if (getLayoutTranslationMode()==RecordTranslationMode::None) throw MakeStringException(0, "Translatable file layout mismatch reading file %s but translation disabled", f->queryLogicalName()); VStringBuffer msg("Record layout translation required for %s", f->queryLogicalName()); @@ -4102,6 +4108,8 @@ class CHThorKeyedJoinActivity : public CHThorThreadedActivityBase, implements I throw MakeStringException(0, "Untranslatable key layout mismatch reading index %s", f->queryLogicalName()); if (payloadTranslator->keyedTranslated()) throw MakeStringException(0, "Untranslatable key layout mismatch reading index %s - keyed fields do not match", f->queryLogicalName()); + if (getLayoutTranslationMode()==RecordTranslationMode::PayloadRemoveOnly && payloadTranslator->hasNewFields()) + throw MakeStringException(0, "Translatable file layout mismatch reading file %s but translation disabled when expected fields are missing from source.", f->queryLogicalName()); if (getLayoutTranslationMode()==RecordTranslationMode::None) throw MakeStringException(0, "Translatable file layout mismatch reading file %s but translation disabled", f->queryLogicalName()); VStringBuffer msg("Record layout translation required for %s", f->queryLogicalName()); @@ -4119,6 +4127,8 @@ class CHThorKeyedJoinActivity : public CHThorThreadedActivityBase, implements I { if (!trans->canTranslate()) throw MakeStringException(0, "Untranslatable key layout mismatch reading index %s", f->queryLogicalName()); + if (getLayoutTranslationMode() == RecordTranslationMode::PayloadRemoveOnly && trans->hasNewFields()) + throw MakeStringException(0, "Translatable file layout mismatch reading file %s but translation disabled when expected fields are missing from source.", f->queryLogicalName()); } else { @@ -4149,6 +4159,8 @@ class CHThorKeyedJoinActivity : public CHThorThreadedActivityBase, implements I translator.setown(createRecordTranslator(helper.queryProjectedDiskRecordSize()->queryRecordAccessor(true), actualDiskMeta->queryRecordAccessor(true))); if (translator->canTranslate()) { + if (getLayoutTranslationMode()==RecordTranslationMode::PayloadRemoveOnly && translator->hasNewFields()) + throw MakeStringException(0, "Translatable file layout mismatch reading file %s but translation disabled when expected fields are missing from source.", f->queryLogicalName()); if (getLayoutTranslationMode()==RecordTranslationMode::None) throw MakeStringException(0, "Translatable file layout mismatch reading file %s but translation disabled", f->queryLogicalName()); VStringBuffer msg("Record layout translation required for %s", f->queryLogicalName()); diff --git a/helm/hpcc/values.schema.json b/helm/hpcc/values.schema.json index b6dfad61e5b..26a62e37ea5 100644 --- a/helm/hpcc/values.schema.json +++ b/helm/hpcc/values.schema.json @@ -1825,8 +1825,8 @@ "description": "Should the index node memory allocation flush the cache and retry if memory allocation fails" }, "fieldTranslationEnabled": { - "default": "payload", - "enum": ["false", "true", "payload"], + "default": "payloadRemoveOnly", + "enum": ["false", "true", "payload", "payloadRemoveOnly"], "description": "Enables translation (where possible) of mismatched file layouts on-the-fly. Specify 'payload' to attempt to translate payload fields only" }, "highTimeout": { diff --git a/initfiles/componentfiles/configschema/xsd/eclagent.xsd b/initfiles/componentfiles/configschema/xsd/eclagent.xsd index 9278367bc88..a1ca70f7f1c 100644 --- a/initfiles/componentfiles/configschema/xsd/eclagent.xsd +++ b/initfiles/componentfiles/configschema/xsd/eclagent.xsd @@ -57,13 +57,14 @@ hpcc:presetValue="false" hpcc:tooltip="eclAgent analyzes workunit post-execution"/> - + + diff --git a/initfiles/componentfiles/configschema/xsd/roxie.xsd b/initfiles/componentfiles/configschema/xsd/roxie.xsd index 14da617e884..65eef7ddea2 100644 --- a/initfiles/componentfiles/configschema/xsd/roxie.xsd +++ b/initfiles/componentfiles/configschema/xsd/roxie.xsd @@ -110,13 +110,14 @@ hpcc:displayName="flushJHtreeCacheOnOOM" hpcc:presetValue="true" hpcc:tooltip="Should the index node memory allocation flush the cache and retry if memory allocation fails"/> - + + diff --git a/initfiles/componentfiles/configschema/xsd/thor.xsd b/initfiles/componentfiles/configschema/xsd/thor.xsd index ae0c8579ec4..c9183448b86 100644 --- a/initfiles/componentfiles/configschema/xsd/thor.xsd +++ b/initfiles/componentfiles/configschema/xsd/thor.xsd @@ -224,13 +224,14 @@ - + + diff --git a/initfiles/componentfiles/configxml/eclagent_config.xsd.in b/initfiles/componentfiles/configxml/eclagent_config.xsd.in index d4c6e0b1153..447c347192f 100644 --- a/initfiles/componentfiles/configxml/eclagent_config.xsd.in +++ b/initfiles/componentfiles/configxml/eclagent_config.xsd.in @@ -190,7 +190,7 @@ - + Enables translation (where possible) of mismatched file layouts on-the-fly. Specify 'payload' to attempt to translate payload fields only @@ -201,6 +201,7 @@ + diff --git a/initfiles/componentfiles/configxml/roxie.xsd.in b/initfiles/componentfiles/configxml/roxie.xsd.in index 3175574f6d7..dcb5a533a7c 100644 --- a/initfiles/componentfiles/configxml/roxie.xsd.in +++ b/initfiles/componentfiles/configxml/roxie.xsd.in @@ -638,7 +638,7 @@ - + Enables translation (where possible) of mismatched file layouts on-the-fly. Specify 'payload' to attempt to translate payload fields only @@ -649,6 +649,7 @@ + diff --git a/initfiles/componentfiles/configxml/thor.xsd.in b/initfiles/componentfiles/configxml/thor.xsd.in index b3556aa3324..696e0985b5f 100644 --- a/initfiles/componentfiles/configxml/thor.xsd.in +++ b/initfiles/componentfiles/configxml/thor.xsd.in @@ -677,7 +677,7 @@ - + Enables translation (where possible) of mismatched file layouts on-the-fly. Specify 'payload' to attempt to translate payload fields only @@ -688,6 +688,7 @@ + diff --git a/initfiles/etc/DIR_NAME/environment.xml.in b/initfiles/etc/DIR_NAME/environment.xml.in index 14a6ada1e06..afbe84bc65d 100644 --- a/initfiles/etc/DIR_NAME/environment.xml.in +++ b/initfiles/etc/DIR_NAME/environment.xml.in @@ -68,7 +68,7 @@ enableKeyDiff="true" enableSysLog="true" fastLaneQueue="true" - fieldTranslationEnabled="payload" + fieldTranslationEnabled="payloadRemoveOnly" flushJHtreeCacheOnOOM="true" forceStdLog="false" heapRetainMemory="false" diff --git a/roxie/ccd/ccdfile.cpp b/roxie/ccd/ccdfile.cpp index e1e7cb3f1c5..a08ee799ceb 100644 --- a/roxie/ccd/ccdfile.cpp +++ b/roxie/ccd/ccdfile.cpp @@ -3013,6 +3013,8 @@ class CResolvedFile : implements IResolvedFileCreator, implements ISafeSDSSubscr } if (!translator || !translator->canTranslate()) throw MakeStringException(ROXIE_MISMATCH, "Untranslatable record layout mismatch detected for file %s", subname); + else if (mode == RecordTranslationMode::PayloadRemoveOnly && translator->hasNewFields()) + throw MakeStringException(0, "Translatable file layout mismatch reading file %s but translation disabled when expected fields are missing from source.", subname); else if (translator->needsTranslate()) { if (fileMode==FileFormatMode::index && translator->keyedTranslated()) diff --git a/roxie/ccd/ccdmain.cpp b/roxie/ccd/ccdmain.cpp index 5490b81b7fe..e71cc7a0c72 100644 --- a/roxie/ccd/ccdmain.cpp +++ b/roxie/ccd/ccdmain.cpp @@ -103,7 +103,7 @@ unsigned defaultTraceLimit = 10; unsigned watchActivityId = 0; unsigned testAgentFailure = 0; RelaxedAtomic restarts; -RecordTranslationMode fieldTranslationEnabled = RecordTranslationMode::Payload; +RecordTranslationMode fieldTranslationEnabled = RecordTranslationMode::PayloadRemoveOnly; bool mergeAgentStatistics = true; PTreeReaderOptions defaultXmlReadFlags = ptr_ignoreWhiteSpace; bool runOnce = false; @@ -1181,7 +1181,7 @@ int CCD_API roxie_main(int argc, const char *argv[], const char * defaultYaml) agentQueryReleaseDelaySeconds = topology->getPropInt("@agentQueryReleaseDelaySeconds", topology->getPropInt("@slaveQueryReleaseDelaySeconds", 60)); // legacy name coresPerQuery = topology->getPropInt("@coresPerQuery", 0); - fieldTranslationEnabled = RecordTranslationMode::Payload; + fieldTranslationEnabled = RecordTranslationMode::PayloadRemoveOnly; const char *val = topology->queryProp("@fieldTranslationEnabled"); if (val) fieldTranslationEnabled = getTranslationMode(val, false); diff --git a/roxie/ccd/ccdstate.cpp b/roxie/ccd/ccdstate.cpp index c09c603e6ed..1b239971d9c 100644 --- a/roxie/ccd/ccdstate.cpp +++ b/roxie/ccd/ccdstate.cpp @@ -2463,7 +2463,7 @@ class CRoxiePackageSetManager : implements IRoxieQueryPackageManagerSet, impleme if (val) fieldTranslationEnabled = getTranslationMode(val, false); else - fieldTranslationEnabled = RecordTranslationMode::Payload; + fieldTranslationEnabled = RecordTranslationMode::PayloadRemoveOnly; val = getTranslationModeText(fieldTranslationEnabled); topology->setProp("@fieldTranslationEnabled", val); } diff --git a/rtl/eclrtl/rtldynfield.cpp b/rtl/eclrtl/rtldynfield.cpp index 14f60d4ca22..2d8ccd3aa14 100644 --- a/rtl/eclrtl/rtldynfield.cpp +++ b/rtl/eclrtl/rtldynfield.cpp @@ -36,7 +36,9 @@ extern ECLRTL_API RecordTranslationMode getTranslationMode(const char *val, bool isLocal) { - if (isEmptyString(val) || strToBool(val) || strieq(val, "payload")) + if (isEmptyString(val) || strToBool(val) || strieq(val, "payloadRemoveOnly")) + return RecordTranslationMode::PayloadRemoveOnly; + else if (strieq(val, "payload")) return RecordTranslationMode::Payload; else if (strieq(val, "alwaysDisk") || strieq(val, "disk")) { @@ -60,6 +62,7 @@ extern ECLRTL_API const char *getTranslationModeText(RecordTranslationMode val) case RecordTranslationMode::AlwaysDisk: return "alwaysDisk"; case RecordTranslationMode::AlwaysECL: return "alwaysECL"; case RecordTranslationMode::Payload: return "payload"; + case RecordTranslationMode::PayloadRemoveOnly: return "payloadRemoveOnly"; case RecordTranslationMode::None: return "off"; } throwUnexpected(); @@ -1085,6 +1088,10 @@ class GeneralRecordTranslator : public CInterfaceOf { return (matchFlags & match_keychange) != 0; } + virtual bool hasNewFields() const override + { + return (matchFlags & match_none) != 0; + } private: void doDescribe(unsigned indent) const { @@ -1948,6 +1955,10 @@ class CloneVirtualRecordTranslator : public CInterfaceOf { return false; } + virtual bool hasNewFields() const override + { + return false; + } private: void doDescribe(unsigned indent) const { diff --git a/rtl/eclrtl/rtldynfield.hpp b/rtl/eclrtl/rtldynfield.hpp index 2e7ae6f1df6..d9ddae13068 100644 --- a/rtl/eclrtl/rtldynfield.hpp +++ b/rtl/eclrtl/rtldynfield.hpp @@ -110,7 +110,8 @@ enum class RecordTranslationMode : byte Payload = 2, // Translate all fields in datasets, and only payload fields in indexes AlwaysDisk = 3, // Always translate - even if wouldn't normally (e.g. csv/xml source read as binary), or crcs happen to match AlwaysECL = 4, // Ignore the published format - can make sense to force no translation e.g. when field names have changed - Unspecified = 5 + PayloadRemoveOnly = 5, // Allow fields to be removed from the incoming dataset, but not allow fields to be missing + Unspecified = 6 }; // AlwaysDisk and AlwaysECL are for testing purposes only, and can only be set per file (not globally) extern ECLRTL_API RecordTranslationMode getTranslationMode(const char *modeStr, bool isLocal); @@ -139,6 +140,7 @@ interface IDynamicTransform : public IInterface virtual bool needsTranslate() const = 0; virtual bool keyedTranslated() const = 0; virtual bool needsNonVirtualTranslate() const = 0; + virtual bool hasNewFields() const = 0; }; interface IKeyTranslator : public IInterface diff --git a/testing/regress/ecl/layouttrans.ecl b/testing/regress/ecl/layouttrans.ecl index fb4535c9e8d..b60f3a68304 100644 --- a/testing/regress/ecl/layouttrans.ecl +++ b/testing/regress/ecl/layouttrans.ecl @@ -26,7 +26,7 @@ version := #IFDEFINED(root.version, 1); //--- end of version configuration --- -#option ('layoutTranslation', true); +#option ('layoutTranslation', 'payload'); #onwarning (4515, ignore); #onwarning (4522, ignore); #onwarning (4523, ignore); diff --git a/testing/regress/ecl/sqtrans.ecl b/testing/regress/ecl/sqtrans.ecl index b342b7e817e..a712e067766 100644 --- a/testing/regress/ecl/sqtrans.ecl +++ b/testing/regress/ecl/sqtrans.ecl @@ -22,6 +22,8 @@ multiPart := #IFDEFINED(root.multiPart, false); //--- end of version configuration --- +#option('layoutTranslation', 'payload'); + //check that compound disk read activities workcorrectly when the input file is translated //include keyed filters to ensure they are also translated correctly diff --git a/testing/regress/ecl/translatedisk.ecl b/testing/regress/ecl/translatedisk.ecl index 619e627b9ad..5cb92428242 100644 --- a/testing/regress/ecl/translatedisk.ecl +++ b/testing/regress/ecl/translatedisk.ecl @@ -27,7 +27,7 @@ optRemoteRead := #IFDEFINED(root.optRemoteRead, true); #onwarning(2036, ignore); #onwarning(4522, ignore); -#option ('layoutTranslation', true); +#option ('layoutTranslation', 'payload'); #option('forceRemoteRead', optRemoteRead); import $.Setup; diff --git a/testing/regress/environment.xml.in b/testing/regress/environment.xml.in index 46eb65517d2..709423ee436 100644 --- a/testing/regress/environment.xml.in +++ b/testing/regress/environment.xml.in @@ -65,7 +65,7 @@ enableKeyDiff="true" enableSysLog="true" fastLaneQueue="true" - fieldTranslationEnabled="payload" + fieldTranslationEnabled="payloadRemoveOnly" flushJHtreeCacheOnOOM="true" forceStdLog="false" heapRetainMemory="false" diff --git a/thorlcr/slave/slavmain.cpp b/thorlcr/slave/slavmain.cpp index 07ef3c0c14d..d066be220c5 100644 --- a/thorlcr/slave/slavmain.cpp +++ b/thorlcr/slave/slavmain.cpp @@ -353,6 +353,8 @@ class CKJService : public CSimpleInterfaceOf, implements IThreaded, translator.setown(createRecordTranslator(projectedFormat->queryRecordAccessor(true), publishedFormat->queryRecordAccessor(true))); if (!translator->canTranslate()) throw MakeStringException(0, "Untranslatable record layout mismatch detected for: %s", tracing); + if (RecordTranslationMode::PayloadRemoveOnly == translationMode && translator->hasNewFields()) + throw MakeStringException(0, "Translatable file layout mismatch reading file %s but translation disabled when expected fields are missing from source.", tracing); } DBGLOG("Record layout translator created for %s", tracing); translator->describe();