From c4296cb8b56da98695b70df1fbf6fe18b41bb858 Mon Sep 17 00:00:00 2001 From: "Dan S. Camper" Date: Fri, 5 Jan 2024 08:51:31 -0600 Subject: [PATCH] Initial commit --- ecl/hql/hqlattr.cpp | 3 +- ecl/hql/hqlexpr.cpp | 4 +- ecl/hql/hqlexpr.hpp | 2 +- ecl/hql/hqlgram.hpp | 2 + ecl/hql/hqlgram.y | 71 +++++++++++++++++++++++++++++++++- ecl/hql/hqlgram2.cpp | 23 +++++++++++ ecl/hql/hqlir.cpp | 2 +- ecl/hql/hqltrans.cpp | 1 + ecl/regress/filetypeplugin.ecl | 54 ++++++++++++++++++++++++++ 9 files changed, 157 insertions(+), 5 deletions(-) create mode 100644 ecl/regress/filetypeplugin.ecl diff --git a/ecl/hql/hqlattr.cpp b/ecl/hql/hqlattr.cpp index af0e32c7e6a..0fc5eede29d 100644 --- a/ecl/hql/hqlattr.cpp +++ b/ecl/hql/hqlattr.cpp @@ -507,6 +507,7 @@ unsigned getOperatorMetaFlags(node_operator op) case no_thor: case no_flat: case no_pipe: + case no_filetype: case no_joined: case no_any: case no_xml: @@ -622,7 +623,7 @@ unsigned getOperatorMetaFlags(node_operator op) case no_unused6: case no_unused13: case no_unused14: case no_unused15: - case no_unused34: case no_unused35: case no_unused36: case no_unused37: case no_unused38: + case no_unused35: case no_unused36: case no_unused37: case no_unused38: case no_unused40: case no_unused41: case no_unused42: case no_unused43: case no_unused44: case no_unused45: case no_unused46: case no_unused47: case no_unused48: case no_unused49: case no_unused50: case no_unused52: case no_unused80: diff --git a/ecl/hql/hqlexpr.cpp b/ecl/hql/hqlexpr.cpp index 57529bce0ae..e35f7983119 100644 --- a/ecl/hql/hqlexpr.cpp +++ b/ecl/hql/hqlexpr.cpp @@ -1604,6 +1604,7 @@ const char *getOpString(node_operator op) case no_csv: return "CSV"; case no_xml: return "XML"; case no_json: return "JSON"; + case no_filetype: return "TYPE"; case no_when: return "WHEN"; case no_priority: return "PRIORITY"; @@ -2020,7 +2021,7 @@ const char *getOpString(node_operator op) case no_unused6: case no_unused13: case no_unused14: case no_unused15: - case no_unused34: case no_unused35: case no_unused36: case no_unused37: case no_unused38: + case no_unused35: case no_unused36: case no_unused37: case no_unused38: case no_unused40: case no_unused41: case no_unused42: case no_unused43: case no_unused44: case no_unused45: case no_unused46: case no_unused47: case no_unused48: case no_unused49: case no_unused50: case no_unused52: case no_unused80: @@ -5095,6 +5096,7 @@ unsigned CHqlRealExpression::getCachedEclCRC() case no_csv: case no_xml: case no_json: + case no_filetype: case no_null: if (thisType && (thisType->getTypeCode() == type_null)) thisType = nullptr; diff --git a/ecl/hql/hqlexpr.hpp b/ecl/hql/hqlexpr.hpp index 66071721e4f..1dbc2601dea 100644 --- a/ecl/hql/hqlexpr.hpp +++ b/ecl/hql/hqlexpr.hpp @@ -358,7 +358,7 @@ enum node_operator : unsigned short { no_unlikely, no_inline, no_nwaydistribute, - no_unused34, + no_filetype, // File format/type information (input or output) no_unused35, no_unused36, no_unused37, diff --git a/ecl/hql/hqlgram.hpp b/ecl/hql/hqlgram.hpp index 58d3c735ac5..835c71fe321 100644 --- a/ecl/hql/hqlgram.hpp +++ b/ecl/hql/hqlgram.hpp @@ -766,6 +766,8 @@ class HqlGram : implements IErrorReceiver, public CInterface void checkValidPipeRecord(const attribute & errpos, IHqlExpression * record, IHqlExpression * attrs, IHqlExpression * expr); void checkValidLookupFlag(IHqlExpression * dataset, IHqlExpression * filename, attribute & atr); + void setPluggableModeExpr(attribute & targetAttr, attribute & mode, HqlExprArray * options); + void createAppendDictionaries(attribute & targetAttr, attribute & leftAttr, attribute & rightAttr, IAtom * kind); void createAppendFiles(attribute & targetAttr, attribute & leftAttr, attribute & rightAttr, IAtom * kind); IHqlExpression * createAppendFiles(attribute & filesAttr, IHqlExpression * _attrs); diff --git a/ecl/hql/hqlgram.y b/ecl/hql/hqlgram.y index 9e3e190beb7..d5f99eeebcd 100644 --- a/ecl/hql/hqlgram.y +++ b/ecl/hql/hqlgram.y @@ -10494,7 +10494,7 @@ mode : FLAT { $$.setExpr(createValue(no_flat, makeNullType())); } | CSV { $$.setExpr(createValue(no_csv, makeNullType())); } | CSV '(' csvOptions ')' - { + { HqlExprArray args; $3.unwindCommaList(args); $$.setExpr(createValue(no_csv, makeNullType(), args)); @@ -10542,6 +10542,75 @@ mode $$.setExpr(createValue(no_json, makeNullType(), args)); } | pipe + | TYPE '(' FLAT ')' + { + $$.setExpr(createValue(no_flat, makeNullType())); + } + | TYPE '(' CSV ')' + { + $$.setExpr(createValue(no_csv, makeNullType())); + } + | TYPE '(' CSV ':' csvOptions ')' + { + HqlExprArray args; + $5.unwindCommaList(args); + $$.setExpr(createValue(no_csv, makeNullType(), args)); + } + | TYPE '(' THOR ')' + { + $$.setExpr(createValue(no_thor, makeNullType())); + } + | TYPE '(' XML_TOKEN ')' + { + $$.setExpr(createValue(no_xml, makeNullType())); + } + | TYPE '(' XML_TOKEN ':' xmlOptions ')' + { + HqlExprArray args; + $5.unwindCommaList(args); + + //Create expression in a form that is backward compatible + IHqlExpression * name = queryAttribute(rowAtom, args); + if (name) + { + args.add(*LINK(name->queryChild(0)), 0); + args.zap(*name); + } + else + args.add(*createConstant("xml"), 0); + $$.setExpr(createValue(no_xml, makeNullType(), args)); + } + | TYPE '(' JSON_TOKEN ')' + { + $$.setExpr(createValue(no_json, makeNullType())); + } + | TYPE '(' JSON_TOKEN ':' xmlOptions ')' + { + HqlExprArray args; + $5.unwindCommaList(args); + + //Create expression in a form that is backward compatible + IHqlExpression * name = queryAttribute(rowAtom, args); + if (name) + { + args.add(*LINK(name->queryChild(0)), 0); + args.zap(*name); + } + else + args.add(*createConstant("json"), 0); + $$.setExpr(createValue(no_json, makeNullType(), args)); + } + | TYPE '(' pipe ')' + | TYPE '(' UNKNOWN_ID ')' + { + parser->setPluggableModeExpr($$, $3, nullptr); + } + | TYPE '(' UNKNOWN_ID ':' hintList ')' + { + HqlExprArray options; + $5.unwindCommaList(options); + parser->setPluggableModeExpr($$, $3, &options); + } ; dsOption diff --git a/ecl/hql/hqlgram2.cpp b/ecl/hql/hqlgram2.cpp index 999af21edaf..e1ecc1795dc 100644 --- a/ecl/hql/hqlgram2.cpp +++ b/ecl/hql/hqlgram2.cpp @@ -8992,6 +8992,29 @@ bool HqlGram::convertAllToAttribute(attribute &atr) return true; } +void HqlGram::setPluggableModeExpr(attribute & targetAttr, attribute & mode, HqlExprArray * options) +{ + // This function is a placeholder for activating a pluggable file format; + // the grammar should have ensured that none of the legacy (built-in) file formats + // were activated, even with the new TYPE() syntax; if you get here, + // you should be only trying to load a plugin + IAtom * fileFormat = lower(mode.getId()); + StringBuffer fileFormatStr(fileFormat->queryStr()); + + // TODO: Look for a plugin filetype of name fileFormatStr + DBGLOG("HqlGram::setPluggableModeExpr processing file type %s", fileFormatStr.str()); + + // Following is a placeholder to make the parser happy -- note the hardwiring + // of CSV format.... + if (options) + { + targetAttr.setExpr(createValue(no_csv, makeNullType(), *options)); + } + else + { + targetAttr.setExpr(createValue(no_csv, makeNullType())); + } +} void HqlGram::checkValidRecordMode(IHqlExpression * dataset, attribute & atr, attribute & modeattr) { diff --git a/ecl/hql/hqlir.cpp b/ecl/hql/hqlir.cpp index 11dfd32d759..6d78b5de9ec 100644 --- a/ecl/hql/hqlir.cpp +++ b/ecl/hql/hqlir.cpp @@ -291,7 +291,6 @@ const char * getOperatorIRText(node_operator op) EXPAND_CASE(no,unlikely); EXPAND_CASE(no,inline); EXPAND_CASE(no,nwaydistribute); - EXPAND_CASE(no,unused34); EXPAND_CASE(no,unused35); EXPAND_CASE(no,unused36); EXPAND_CASE(no,unused37); @@ -662,6 +661,7 @@ const char * getOperatorIRText(node_operator op) EXPAND_CASE(no,getenv); EXPAND_CASE(no,json); EXPAND_CASE(no,matched_injoin); + EXPAND_CASE(no,filetype); } return ""; diff --git a/ecl/hql/hqltrans.cpp b/ecl/hql/hqltrans.cpp index 622b6f668c8..285ad4e1812 100644 --- a/ecl/hql/hqltrans.cpp +++ b/ecl/hql/hqltrans.cpp @@ -2687,6 +2687,7 @@ bool onlyTransformOnce(IHqlExpression * expr) case no_csv: case no_xml: case no_json: + case no_filetype: case no_list: return (expr->numChildren() == 0); case no_select: diff --git a/ecl/regress/filetypeplugin.ecl b/ecl/regress/filetypeplugin.ecl new file mode 100644 index 00000000000..0b401effc11 --- /dev/null +++ b/ecl/regress/filetypeplugin.ecl @@ -0,0 +1,54 @@ +/*############################################################################## + + HPCC SYSTEMS software Copyright (C) 2023 HPCC Systems®. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +############################################################################## */ + +NamesLayout := RECORD + STRING20 surname; + STRING10 forename; + INTEGER2 age := 25; +END; + +//----------------------------------------------------------- +// All of the following represent referencing legacy/built-in +// file types and their options with a new syntax +//----------------------------------------------------------- +namesTableFlat_1 := DATASET(DYNAMIC('x'), NamesLayout, TYPE(FLAT), __COMPRESSED__, __GROUPED__, OPT); +OUTPUT(namesTableFlat_1, {namesTableFlat_1}, '~filetypetest::namestableflat_1', OVERWRITE); + +namesTableThor_1 := DATASET(DYNAMIC('x'), NamesLayout, TYPE(THOR), OPT); +OUTPUT(namesTableThor_1, {namesTableThor_1}, '~filetypetest::namestablethor_1', OVERWRITE); + +namesTableCSV_1 := DATASET(DYNAMIC('x'), NamesLayout, TYPE(CSV), OPT); +// OUTPUT(namesTableCSV_1, {namesTableCSV_1}, '~filetypetest::namestablecsv_1', TYPE(CSV), OVERWRITE); + +namesTableCSV_2 := DATASET(DYNAMIC('x'), NamesLayout, TYPE(CSV : HEADING(1), SEPARATOR([',', '==>']), QUOTE(['\'', '"', '$$']), TERMINATOR(['\r\n', '\r', '\n']), NOTRIM, UTF8, MAXLENGTH(10000)), OPT); +// OUTPUT(namesTableCSV_2, {namesTableCSV_2}, '~filetypetest::namestablecsv_2', TYPE(CSV : SEPARATOR('\t')), OVERWRITE); + +namesTableXML_1 := DATASET(DYNAMIC('x'), NamesLayout, TYPE(XML), OPT); +// OUTPUT(namesTableXML_1, {namesTableXML_1}, '~filetypetest::namestablexml_1', TYPE(XML), OVERWRITE); + +namesTableXML_2 := DATASET(DYNAMIC('x'), NamesLayout, TYPE(XML : '/', NOROOT), OPT); +// OUTPUT(namesTableXML_2, {namesTableXML_2}, '~filetypetest::namestablexml_2', TYPE(XML : HEADING('', '')), OVERWRITE); + +namesTableJSON_1 := DATASET(DYNAMIC('x'), NamesLayout, TYPE(JSON), OPT); +// OUTPUT(namesTableJSON_1, {namesTableJSON_1}, '~filetypetest::namestablejson_1', TYPE(JSON), OVERWRITE); + +namesTableJSON_2 := DATASET(DYNAMIC('x'), NamesLayout, TYPE(JSON : '/', NOROOT), OPT); +// OUTPUT(namesTableJSON_2, {namesTableJSON_2}, '~filetypetest::namestablejson_2', TYPE(JSON : TRIM), OVERWRITE); + +//----------------------------------------------------------- +// What follows is testing pluggable file types +//-----------------------------------------------------------