From bf13299bfe2f9c0c1ebf7474b3c10e270ff179bf Mon Sep 17 00:00:00 2001 From: Jack Del Vecchio Date: Fri, 12 Apr 2024 16:03:27 +0000 Subject: [PATCH] HPCC-28288 Add support for regex and timestamp types. --- plugins/mongodb/README.md | 10 +++-- plugins/mongodb/examples/mongodb-test.ecl | 24 +++++++++++ plugins/mongodb/mongodb.ecllib | 4 ++ plugins/mongodb/mongodbembed.cpp | 50 +++++++++-------------- 4 files changed, 55 insertions(+), 33 deletions(-) diff --git a/plugins/mongodb/README.md b/plugins/mongodb/README.md index 6c9747080d9..c9f2317c0e4 100755 --- a/plugins/mongodb/README.md +++ b/plugins/mongodb/README.md @@ -125,10 +125,14 @@ Not every ECL or MongoDB datatype translates seemlessly to the other side. | MongoDB datatypes | ECL equivalent | | ----------------- | -------------- | | b_date | STRING, INTEGER | -| b_regex | Unsupported | -| b_timestamp | Unsupported | +| b_regex | {String pattern, String options} | +| b_timestamp | {Unsigned t, Unsigned i} | -The MongoDB date datatype can be converted to an integer in MongoDB or it will automatically be converted to a STRING by the plugin. Typically Dates before 1970 get returned by MongoDB as INTEGERS. Also, Unsigned Integers are unsupported in MongoDB. This means that in order to insert UINTEGERs into the database the plugin converts them to b_int64 which is a 64 bit signed integer. +The MongoDB date datatype can be converted to an integer in MongoDB or it will automatically be converted to a STRING by the plugin. Typically Dates before 1970 get returned by MongoDB as INTEGERS. + +Due to regex and timestamp types being returned by MongoDB as objects, ECL records that map to these types are defined in the mongodb.ecllib file for your use. For information about the regex and timestamp types: [Manual](https://www.mongodb.com/docs/manual/reference/mongodb-extended-json/#bson-data-types-and-associated-representations) + +Unsigned Integers are unsupported in MongoDB. This means that in order to insert UINTEGERs into the database the plugin converts them to b_int64 which is a 64 bit signed integer. ### Inserting Documents diff --git a/plugins/mongodb/examples/mongodb-test.ecl b/plugins/mongodb/examples/mongodb-test.ecl index 08af1447304..902dbe4cf34 100644 --- a/plugins/mongodb/examples/mongodb-test.ecl +++ b/plugins/mongodb/examples/mongodb-test.ecl @@ -57,6 +57,18 @@ layoutDates := {STRING bucket_start_date, STRING bucket_end_date}; layoutEmployee := {INTEGER1 id, STRING25 first, STRING25 last, REAL salary}; layoutperson := {String username, String address, String email}; +layoutRegex := RECORD + STRING name; + INTEGER uniqueID; + mongodb.regexType regex; +END; + +layoutTimestamp := RECORD + STRING name; + INTEGER uniqueID; + mongodb.timestampType timestamp; +END; + // Example/Test functions // Returns the unique _id and name every document in the listingsAndReviews collection @@ -161,6 +173,16 @@ dataset(layoutEmployee) findInfo(BOOLEAN mybool) := EMBED(mongodb : user(user), ); ENDEMBED; +// Gets all the documents from the regexTest collection for testing the coversion of MongoDB regex data to ECL +dataset(layoutRegex) getRegex() := EMBED(mongodb : user(user), password(pwd), server(server), database('mydb'), collection('regexTest')) + find({}); +ENDEMBED; + +// Gets all the documents from the timestampTest collection for testing the coversion of MongoDB timestamp data to ECL +dataset(layoutTimestamp) getTimestamp() := EMBED(mongodb : user(user), password(pwd), server(server), database('mydb'), collection('timestampTest')) + find({}); +ENDEMBED; + // $or is not allowed in the M0 tier of MongoDB atlas INTEGER ppl := 8; // Matches all the documents that match either expression. Then it groups them by the number of beds they have and counts the number of documents in each group. @@ -193,6 +215,8 @@ SEQUENTIAL OUTPUT(insertMany(employeeDS), NAMED('InsertMany')); createIndex(1); OUTPUT(findInfo(mybool), NAMED('RemoveOnQuery')); + OUTPUT(getRegex(), NAMED('TestRegexSupport')); + OUTPUT(getTimestamp(), NAMED('TestTimestampSupport')); OUTPUT(findCountOR(nights,ppl), NAMED('OrCountAggregate')); OUTPUT('Done', Named('Status')); ); diff --git a/plugins/mongodb/mongodb.ecllib b/plugins/mongodb/mongodb.ecllib index 576dabe89a2..39ea5e08450 100644 --- a/plugins/mongodb/mongodb.ecllib +++ b/plugins/mongodb/mongodb.ecllib @@ -26,3 +26,7 @@ EXPORT boolean supportsScript := true; EXPORT updateResultRecord := {INTEGER matched_count, INTEGER modified_count}; EXPORT insertManyResultRecord := {INTEGER inserted_count}; EXPORT deleteResultRecord := {INTEGER deleted_count}; + +// For information about the regex and timestamp types: https://www.mongodb.com/docs/manual/reference/mongodb-extended-json/#bson-data-types-and-associated-representations +EXPORT regexType := {STRING pattern, STRING options}; +EXPORT timestampType := {UNSIGNED t, UNSIGNED i}; diff --git a/plugins/mongodb/mongodbembed.cpp b/plugins/mongodb/mongodbembed.cpp index 9b7c3a548c6..6333734a8f1 100755 --- a/plugins/mongodb/mongodbembed.cpp +++ b/plugins/mongodb/mongodbembed.cpp @@ -149,17 +149,16 @@ namespace mongodbembed std::string key = std::string(start, end - start); // Get datatype result += std::string(row, lastBrkt - row); // Add everything before we went into nested document // Some data types are unsupported as they are not straightforward to deserialize - if (key == "$regularExpression") + // Regex and timestamp both get deserialized to their child objects + if (key == "$regularExpression" || key == "$timestamp") { - UNSUPPORTED("Regular Expressions"); // TO DO handle unsupported types by not throwing an exception. - } - else if (key == "$timestamp") - { - while (*end && *end != '}') - end++; // Skip over timestamp - row = ++end; + // remove type identifier and create a nested object for the regex or timestamp values + while (*end && *end != '{') + end++; start = end; - result += "\"\""; + while (*end && *end != '}') + end++; + result += std::string(start, ++end - start); } // Both of these get deserialized to strings and are surround by quotation marks else if (key == "$date" || key == "$oid") @@ -192,14 +191,6 @@ namespace mongodbembed while (*end && *end != '}') end++; // Get out of both nested documents end++; - - while (*end && *end != '}') - end++; - end++; - - depth--; - row = end; // Set row to just after the nested document - start = end; // move start to the next place for parsing } else { @@ -208,13 +199,6 @@ namespace mongodbembed end++; result += std::string(start, ++end - start); // Only add the data inside the quotation marks to result string - - while (*end && *end != '}') - end++; // Only have to get out of one nested document - end++; - depth--; - row = end; // Set row to just after the nested document - start = end; // move start to the next place for parsing } } else if (key == "$numberDouble" || key == "$numberDecimal" || key == "$numberLong") @@ -229,17 +213,23 @@ namespace mongodbembed end++; result += std::string(start, end++ - start); // Only add the data inside the quotation marks to result string - while (*end && *end != '}') - end++; // Only have to get out of one nested document - end++; - depth--; - row = end; - start = end; } else { failx("EJSON datatype error: '%s' is not supported in the current version.", key.c_str()); } + + // Get out of nested object. + while (*end && *end != '}') + end++; + + if (*end) + end++; + else + failx("Read past the end of stream while converting EJSON types to ECL."); + depth--; + row = end; // Set row to just after the nested document + start = end; // move start to the next place for parsing } /**