From c4296cb8b56da98695b70df1fbf6fe18b41bb858 Mon Sep 17 00:00:00 2001
From: "Dan S. Camper" <dan.camper@lexisnexisrisk.com>
Date: Fri, 5 Jan 2024 08:51:31 -0600
Subject: [PATCH] Initial commit

---
 ecl/hql/hqlattr.cpp            |  3 +-
 ecl/hql/hqlexpr.cpp            |  4 +-
 ecl/hql/hqlexpr.hpp            |  2 +-
 ecl/hql/hqlgram.hpp            |  2 +
 ecl/hql/hqlgram.y              | 71 +++++++++++++++++++++++++++++++++-
 ecl/hql/hqlgram2.cpp           | 23 +++++++++++
 ecl/hql/hqlir.cpp              |  2 +-
 ecl/hql/hqltrans.cpp           |  1 +
 ecl/regress/filetypeplugin.ecl | 54 ++++++++++++++++++++++++++
 9 files changed, 157 insertions(+), 5 deletions(-)
 create mode 100644 ecl/regress/filetypeplugin.ecl

diff --git a/ecl/hql/hqlattr.cpp b/ecl/hql/hqlattr.cpp
index af0e32c7e6a..0fc5eede29d 100644
--- a/ecl/hql/hqlattr.cpp
+++ b/ecl/hql/hqlattr.cpp
@@ -507,6 +507,7 @@ unsigned getOperatorMetaFlags(node_operator op)
     case no_thor:
     case no_flat:
     case no_pipe:
+    case no_filetype:
     case no_joined:
     case no_any:
     case no_xml:
@@ -622,7 +623,7 @@ unsigned getOperatorMetaFlags(node_operator op)
 
     case no_unused6:
     case no_unused13: case no_unused14: case no_unused15:
-    case no_unused34: case no_unused35: case no_unused36: case no_unused37: case no_unused38:
+    case no_unused35: case no_unused36: case no_unused37: case no_unused38:
     case no_unused40: case no_unused41: case no_unused42: case no_unused43: case no_unused44: case no_unused45: case no_unused46: case no_unused47: case no_unused48: case no_unused49:
     case no_unused50: case no_unused52:
     case no_unused80:
diff --git a/ecl/hql/hqlexpr.cpp b/ecl/hql/hqlexpr.cpp
index 57529bce0ae..e35f7983119 100644
--- a/ecl/hql/hqlexpr.cpp
+++ b/ecl/hql/hqlexpr.cpp
@@ -1604,6 +1604,7 @@ const char *getOpString(node_operator op)
     case no_csv: return "CSV";
     case no_xml: return "XML";
     case no_json: return "JSON";
+    case no_filetype: return "TYPE";
 
     case no_when: return "WHEN";
     case no_priority: return "PRIORITY";
@@ -2020,7 +2021,7 @@ const char *getOpString(node_operator op)
 
     case no_unused6:
     case no_unused13: case no_unused14: case no_unused15:
-    case no_unused34: case no_unused35: case no_unused36: case no_unused37: case no_unused38:
+    case no_unused35: case no_unused36: case no_unused37: case no_unused38:
     case no_unused40: case no_unused41: case no_unused42: case no_unused43: case no_unused44: case no_unused45: case no_unused46: case no_unused47: case no_unused48: case no_unused49:
     case no_unused50: case no_unused52:
     case no_unused80:
@@ -5095,6 +5096,7 @@ unsigned CHqlRealExpression::getCachedEclCRC()
     case no_csv:
     case no_xml:
     case no_json:
+    case no_filetype:
     case no_null:
         if (thisType && (thisType->getTypeCode() == type_null))
             thisType = nullptr;
diff --git a/ecl/hql/hqlexpr.hpp b/ecl/hql/hqlexpr.hpp
index 66071721e4f..1dbc2601dea 100644
--- a/ecl/hql/hqlexpr.hpp
+++ b/ecl/hql/hqlexpr.hpp
@@ -358,7 +358,7 @@ enum node_operator : unsigned short {
         no_unlikely,
         no_inline,
         no_nwaydistribute,
-    no_unused34,
+        no_filetype,                    // File format/type information (input or output)
     no_unused35,
     no_unused36,
     no_unused37,
diff --git a/ecl/hql/hqlgram.hpp b/ecl/hql/hqlgram.hpp
index 58d3c735ac5..835c71fe321 100644
--- a/ecl/hql/hqlgram.hpp
+++ b/ecl/hql/hqlgram.hpp
@@ -766,6 +766,8 @@ class HqlGram : implements IErrorReceiver, public CInterface
     void checkValidPipeRecord(const attribute & errpos, IHqlExpression * record, IHqlExpression * attrs, IHqlExpression * expr);
     void checkValidLookupFlag(IHqlExpression * dataset, IHqlExpression * filename, attribute & atr);
 
+    void setPluggableModeExpr(attribute & targetAttr, attribute & mode, HqlExprArray * options);
+
     void createAppendDictionaries(attribute & targetAttr, attribute & leftAttr, attribute & rightAttr, IAtom * kind);
     void createAppendFiles(attribute & targetAttr, attribute & leftAttr, attribute & rightAttr, IAtom * kind);
     IHqlExpression * createAppendFiles(attribute & filesAttr, IHqlExpression * _attrs);
diff --git a/ecl/hql/hqlgram.y b/ecl/hql/hqlgram.y
index 9e3e190beb7..d5f99eeebcd 100644
--- a/ecl/hql/hqlgram.y
+++ b/ecl/hql/hqlgram.y
@@ -10494,7 +10494,7 @@ mode
     : FLAT              {   $$.setExpr(createValue(no_flat, makeNullType()));   }
     | CSV               {   $$.setExpr(createValue(no_csv, makeNullType()));    }
     | CSV '(' csvOptions ')'
-                        {   
+                        {
                             HqlExprArray args;
                             $3.unwindCommaList(args);
                             $$.setExpr(createValue(no_csv, makeNullType(), args));
@@ -10542,6 +10542,75 @@ mode
                             $$.setExpr(createValue(no_json, makeNullType(), args));
                         }
     | pipe
+    | TYPE '(' FLAT ')'
+                        {
+                            $$.setExpr(createValue(no_flat, makeNullType()));
+                        }
+    | TYPE '(' CSV ')'
+                        {
+                            $$.setExpr(createValue(no_csv, makeNullType()));
+                        }
+    | TYPE '(' CSV ':' csvOptions ')'
+                        {
+                            HqlExprArray args;
+                            $5.unwindCommaList(args);
+                            $$.setExpr(createValue(no_csv, makeNullType(), args));
+                        }
+    | TYPE '(' THOR ')'
+                        {
+                            $$.setExpr(createValue(no_thor, makeNullType()));
+                        }
+    | TYPE '(' XML_TOKEN ')'
+                        {
+                            $$.setExpr(createValue(no_xml, makeNullType()));
+                        }
+    | TYPE '(' XML_TOKEN ':' xmlOptions ')'
+                        {
+                            HqlExprArray args;
+                            $5.unwindCommaList(args);
+
+                            //Create expression in a form that is backward compatible
+                            IHqlExpression * name = queryAttribute(rowAtom, args);
+                            if (name)
+                            {
+                                args.add(*LINK(name->queryChild(0)), 0);
+                                args.zap(*name);
+                            }
+                            else
+                                args.add(*createConstant("xml"), 0);
+                            $$.setExpr(createValue(no_xml, makeNullType(), args));
+                        }
+    | TYPE '(' JSON_TOKEN ')'
+                        {
+                            $$.setExpr(createValue(no_json, makeNullType()));
+                        }
+    | TYPE '(' JSON_TOKEN ':' xmlOptions ')'
+                        {
+                            HqlExprArray args;
+                            $5.unwindCommaList(args);
+
+                            //Create expression in a form that is backward compatible
+                            IHqlExpression * name = queryAttribute(rowAtom, args);
+                            if (name)
+                            {
+                                args.add(*LINK(name->queryChild(0)), 0);
+                                args.zap(*name);
+                            }
+                            else
+                                args.add(*createConstant("json"), 0);
+                            $$.setExpr(createValue(no_json, makeNullType(), args));
+                        }
+    | TYPE '(' pipe ')'
+    | TYPE '(' UNKNOWN_ID ')'
+                        {
+                            parser->setPluggableModeExpr($$, $3, nullptr);
+                        }
+    | TYPE '(' UNKNOWN_ID ':' hintList ')'
+                        {
+                            HqlExprArray options;
+                            $5.unwindCommaList(options);
+                            parser->setPluggableModeExpr($$, $3, &options);
+                        }
     ;
 
 dsOption
diff --git a/ecl/hql/hqlgram2.cpp b/ecl/hql/hqlgram2.cpp
index 999af21edaf..e1ecc1795dc 100644
--- a/ecl/hql/hqlgram2.cpp
+++ b/ecl/hql/hqlgram2.cpp
@@ -8992,6 +8992,29 @@ bool HqlGram::convertAllToAttribute(attribute &atr)
     return true;
 }
 
+void HqlGram::setPluggableModeExpr(attribute & targetAttr, attribute & mode, HqlExprArray * options)
+{
+    // This function is a placeholder for activating a pluggable file format;
+    // the grammar should have ensured that none of the legacy (built-in) file formats
+    // were activated, even with the new TYPE(<format>) syntax; if you get here,
+    // you should be only trying to load a plugin
+    IAtom * fileFormat = lower(mode.getId());
+    StringBuffer fileFormatStr(fileFormat->queryStr());
+
+    // TODO: Look for a plugin filetype of name fileFormatStr
+    DBGLOG("HqlGram::setPluggableModeExpr processing file type %s", fileFormatStr.str());
+
+    // Following is a placeholder to make the parser happy -- note the hardwiring
+    // of CSV format....
+    if (options)
+    {
+        targetAttr.setExpr(createValue(no_csv, makeNullType(), *options));
+    }
+    else
+    {
+        targetAttr.setExpr(createValue(no_csv, makeNullType()));
+    }
+}
 
 void HqlGram::checkValidRecordMode(IHqlExpression * dataset, attribute & atr, attribute & modeattr)
 {
diff --git a/ecl/hql/hqlir.cpp b/ecl/hql/hqlir.cpp
index 11dfd32d759..6d78b5de9ec 100644
--- a/ecl/hql/hqlir.cpp
+++ b/ecl/hql/hqlir.cpp
@@ -291,7 +291,6 @@ const char * getOperatorIRText(node_operator op)
     EXPAND_CASE(no,unlikely);
     EXPAND_CASE(no,inline);
     EXPAND_CASE(no,nwaydistribute);
-    EXPAND_CASE(no,unused34);
     EXPAND_CASE(no,unused35);
     EXPAND_CASE(no,unused36);
     EXPAND_CASE(no,unused37);
@@ -662,6 +661,7 @@ const char * getOperatorIRText(node_operator op)
     EXPAND_CASE(no,getenv);
     EXPAND_CASE(no,json);
     EXPAND_CASE(no,matched_injoin);
+    EXPAND_CASE(no,filetype);
     }
 
     return "<unknown>";
diff --git a/ecl/hql/hqltrans.cpp b/ecl/hql/hqltrans.cpp
index 622b6f668c8..285ad4e1812 100644
--- a/ecl/hql/hqltrans.cpp
+++ b/ecl/hql/hqltrans.cpp
@@ -2687,6 +2687,7 @@ bool onlyTransformOnce(IHqlExpression * expr)
     case no_csv:
     case no_xml:
     case no_json:
+    case no_filetype:
     case no_list:
         return (expr->numChildren() == 0);
     case no_select:
diff --git a/ecl/regress/filetypeplugin.ecl b/ecl/regress/filetypeplugin.ecl
new file mode 100644
index 00000000000..0b401effc11
--- /dev/null
+++ b/ecl/regress/filetypeplugin.ecl
@@ -0,0 +1,54 @@
+/*##############################################################################
+
+    HPCC SYSTEMS software Copyright (C) 2023 HPCC Systems®.
+
+    Licensed under the Apache License, Version 2.0 (the "License");
+    you may not use this file except in compliance with the License.
+    You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+############################################################################## */
+
+NamesLayout := RECORD
+    STRING20        surname;
+    STRING10        forename;
+    INTEGER2        age := 25;
+END;
+
+//-----------------------------------------------------------
+// All of the following represent referencing legacy/built-in
+// file types and their options with a new syntax
+//-----------------------------------------------------------
+namesTableFlat_1 := DATASET(DYNAMIC('x'), NamesLayout, TYPE(FLAT), __COMPRESSED__, __GROUPED__, OPT);
+OUTPUT(namesTableFlat_1, {namesTableFlat_1}, '~filetypetest::namestableflat_1', OVERWRITE);
+
+namesTableThor_1 := DATASET(DYNAMIC('x'), NamesLayout, TYPE(THOR), OPT);
+OUTPUT(namesTableThor_1, {namesTableThor_1}, '~filetypetest::namestablethor_1', OVERWRITE);
+
+namesTableCSV_1 := DATASET(DYNAMIC('x'), NamesLayout, TYPE(CSV), OPT);
+// OUTPUT(namesTableCSV_1, {namesTableCSV_1}, '~filetypetest::namestablecsv_1', TYPE(CSV), OVERWRITE);
+
+namesTableCSV_2 := DATASET(DYNAMIC('x'), NamesLayout, TYPE(CSV : HEADING(1), SEPARATOR([',', '==>']), QUOTE(['\'', '"', '$$']), TERMINATOR(['\r\n', '\r', '\n']), NOTRIM, UTF8, MAXLENGTH(10000)), OPT);
+// OUTPUT(namesTableCSV_2, {namesTableCSV_2}, '~filetypetest::namestablecsv_2', TYPE(CSV : SEPARATOR('\t')), OVERWRITE);
+
+namesTableXML_1 := DATASET(DYNAMIC('x'), NamesLayout, TYPE(XML), OPT);
+// OUTPUT(namesTableXML_1, {namesTableXML_1}, '~filetypetest::namestablexml_1', TYPE(XML), OVERWRITE);
+
+namesTableXML_2 := DATASET(DYNAMIC('x'), NamesLayout, TYPE(XML : '/', NOROOT), OPT);
+// OUTPUT(namesTableXML_2, {namesTableXML_2}, '~filetypetest::namestablexml_2', TYPE(XML : HEADING('<foo>', '</foo>')), OVERWRITE);
+
+namesTableJSON_1 := DATASET(DYNAMIC('x'), NamesLayout, TYPE(JSON), OPT);
+// OUTPUT(namesTableJSON_1, {namesTableJSON_1}, '~filetypetest::namestablejson_1', TYPE(JSON), OVERWRITE);
+
+namesTableJSON_2 := DATASET(DYNAMIC('x'), NamesLayout, TYPE(JSON : '/', NOROOT), OPT);
+// OUTPUT(namesTableJSON_2, {namesTableJSON_2}, '~filetypetest::namestablejson_2', TYPE(JSON : TRIM), OVERWRITE);
+
+//-----------------------------------------------------------
+// What follows is testing pluggable file types
+//-----------------------------------------------------------