Skip to content

Commit

Permalink
HPCC-28288 Add support for regex and timestamp types.
Browse files Browse the repository at this point in the history
  • Loading branch information
jackdelv committed Apr 12, 2024
1 parent 0a8b74d commit 973cda3
Show file tree
Hide file tree
Showing 4 changed files with 35 additions and 33 deletions.
4 changes: 2 additions & 2 deletions plugins/mongodb/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -125,8 +125,8 @@ Not every ECL or MongoDB datatype translates seemlessly to the other side.
| MongoDB datatypes | ECL equivalent |
| ----------------- | -------------- |
| b_date | STRING, INTEGER |
| b_regex | Unsupported |
| b_timestamp | Unsupported |
| b_regex | {String, String} |
| b_timestamp | {Unsigned, Unsigned} |

The MongoDB date datatype can be converted to an integer in MongoDB or it will automatically be converted to a STRING by the plugin. Typically Dates before 1970 get returned by MongoDB as INTEGERS. Also, Unsigned Integers are unsupported in MongoDB. This means that in order to insert UINTEGERs into the database the plugin converts them to b_int64 which is a 64 bit signed integer.

Expand Down
12 changes: 12 additions & 0 deletions plugins/mongodb/examples/mongodb-test.ecl
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,13 @@ layoutDates := {STRING bucket_start_date, STRING bucket_end_date};
layoutEmployee := {INTEGER1 id, STRING25 first, STRING25 last, REAL salary};
layoutperson := {String username, String address, String email};

layoutRegexTimestamp := RECORD
STRING name;
INTEGER uniqueID;
mongodb.regexType regex;
mongodb.timestampType timestamp;
END;

// Example/Test functions

// Returns the unique _id and name every document in the listingsAndReviews collection
Expand Down Expand Up @@ -161,6 +168,10 @@ dataset(layoutEmployee) findInfo(BOOLEAN mybool) := EMBED(mongodb : user(user),
);
ENDEMBED;

dataset(layoutRegexTimestamp) getRegexandTimestamp() := EMBED(mongodb : user(user), password(pwd), server(server), database('mydb'), collection('regexTest'))
find({});
ENDEMBED;

// $or is not allowed in the M0 tier of MongoDB atlas
INTEGER ppl := 8;
// Matches all the documents that match either expression. Then it groups them by the number of beds they have and counts the number of documents in each group.
Expand Down Expand Up @@ -193,6 +204,7 @@ SEQUENTIAL
OUTPUT(insertMany(employeeDS), NAMED('InsertMany'));
createIndex(1);
OUTPUT(findInfo(mybool), NAMED('RemoveOnQuery'));
OUTPUT(getRegexandTimestamp(), NAMED('TestRegexSupport'));
OUTPUT(findCountOR(nights,ppl), NAMED('OrCountAggregate'));
OUTPUT('Done', Named('Status'));
);
3 changes: 3 additions & 0 deletions plugins/mongodb/mongodb.ecllib
Original file line number Diff line number Diff line change
Expand Up @@ -26,3 +26,6 @@ EXPORT boolean supportsScript := true;
EXPORT updateResultRecord := {INTEGER matched_count, INTEGER modified_count};
EXPORT insertManyResultRecord := {INTEGER inserted_count};
EXPORT deleteResultRecord := {INTEGER deleted_count};

EXPORT regexType := {STRING pattern, STRING options};
EXPORT timestampType := {UNSIGNED t, UNSIGNED i};
49 changes: 18 additions & 31 deletions plugins/mongodb/mongodbembed.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -149,18 +149,17 @@ namespace mongodbembed
std::string key = std::string(start, end - start); // Get datatype
result += std::string(row, lastBrkt - row); // Add everything before we went into nested document
// Some data types are unsupported as they are not straightforward to deserialize
if (key == "$regularExpression")
// Regex and timestamp both get deserialized to their child objects
if (key == "$regularExpression" || key == "$timestamp")
{
UNSUPPORTED("Regular Expressions"); // TO DO handle unsupported types by not throwing an exception.
}
else if (key == "$timestamp")
{
while (*end && *end != '}')
end++; // Skip over timestamp
row = ++end;
// remove type identifier and create a nested object for the regex or timestamp values
while (*end && *end != '{')
end++;
start = end;
result += "\"\"";
}
while (*end && *end != '}')
end++;
result += std::string(start, ++end - start);
}
// Both of these get deserialized to strings and are surround by quotation marks
else if (key == "$date" || key == "$oid")
{
Expand Down Expand Up @@ -192,14 +191,6 @@ namespace mongodbembed
while (*end && *end != '}')
end++; // Get out of both nested documents
end++;

while (*end && *end != '}')
end++;
end++;

depth--;
row = end; // Set row to just after the nested document
start = end; // move start to the next place for parsing
}
else
{
Expand All @@ -208,13 +199,6 @@ namespace mongodbembed
end++;

result += std::string(start, ++end - start); // Only add the data inside the quotation marks to result string

while (*end && *end != '}')
end++; // Only have to get out of one nested document
end++;
depth--;
row = end; // Set row to just after the nested document
start = end; // move start to the next place for parsing
}
}
else if (key == "$numberDouble" || key == "$numberDecimal" || key == "$numberLong")
Expand All @@ -229,17 +213,20 @@ namespace mongodbembed
end++;

result += std::string(start, end++ - start); // Only add the data inside the quotation marks to result string
while (*end && *end != '}')
end++; // Only have to get out of one nested document
end++;
depth--;
row = end;
start = end;
}
else
{
failx("EJSON datatype error: '%s' is not supported in the current version.", key.c_str());
}

// Get out of nested object.
while (*end && *end != '}')
end++;

end++;
depth--;
row = end; // Set row to just after the nested document
start = end; // move start to the next place for parsing
}

/**
Expand Down

0 comments on commit 973cda3

Please sign in to comment.