diff --git a/CHANGES.rst b/CHANGES.rst index 25ecde6f..50da0e85 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -7,15 +7,21 @@ Unreleased - Switched JSON encoder to use the `orjson`_ library, to improve JSON marshalling performance. Thanks, @widmogrod. + orjson is fast and in some spots even more correct when compared against Python's stdlib ``json`` module. Contrary to the stdlib variant, orjson - will serialize to ``bytes`` instead of ``str``. Please also note it - will not deserialize to dataclasses, UUIDs, decimals, etc., or support - ``object_hook``. Within ``crate-python``, it is applied with an encoder - function for additional type support about Python's ``Decimal`` type and - freezegun's ``FakeDatetime`` type. + will serialize to ``bytes`` instead of ``str``. When sending data to CrateDB, + ``crate-python`` uses a custom encoder to add support for additional data + types. + + - Python's ``Decimal`` type will be serialized to ``str``. + - Python's ``dt.datetime`` and ``dt.date`` types will be serialized to + ``int`` (``LONG``) after converting to milliseconds since epoch, to + optimally accommodate CrateDB's `TIMESTAMP`_ representation. + - NumPy's data types will be handled by ``orjson`` without any ado. .. _orjson: https://github.com/ijl/orjson +.. _TIMESTAMP: https://cratedb.com/docs/crate/reference/en/latest/general/ddl/data-types.html#type-timestamp 2024/11/23 1.0.1 ================ diff --git a/src/crate/client/http.py b/src/crate/client/http.py index 8d19b9c4..a1251d34 100644 --- a/src/crate/client/http.py +++ b/src/crate/client/http.py @@ -20,6 +20,8 @@ # software solely pursuant to the terms of the relevant commercial agreement. +import calendar +import datetime as dt import heapq import io import logging @@ -84,19 +86,35 @@ def super_len(o): return None -def cratedb_json_encoder(obj: t.Any) -> str: +epoch_aware = dt.datetime(1970, 1, 1, tzinfo=dt.timezone.utc) +epoch_naive = dt.datetime(1970, 1, 1) + + +def json_encoder(obj: t.Any) -> t.Union[int, str]: """ Encoder function for orjson, with additional type support. - - Python's `Decimal` type. - - freezegun's `FakeDatetime` type. + - Python's `Decimal` type will be serialized to `str`. + - Python's `dt.datetime` and `dt.date` types will be + serialized to `int` after converting to milliseconds + since epoch. https://github.com/ijl/orjson#default + https://cratedb.com/docs/crate/reference/en/latest/general/ddl/data-types.html#type-timestamp """ if isinstance(obj, Decimal): return str(obj) - elif hasattr(obj, "isoformat"): - return obj.isoformat() + if isinstance(obj, dt.datetime): + if obj.tzinfo is not None: + delta = obj - epoch_aware + else: + delta = obj - epoch_naive + return int( + delta.microseconds / 1000.0 + + (delta.seconds + delta.days * 24 * 3600) * 1000.0 + ) + if isinstance(obj, dt.date): + return calendar.timegm(obj.timetuple()) * 1000 raise TypeError @@ -108,8 +126,12 @@ def json_dumps(obj: t.Any) -> bytes: """ return orjson.dumps( obj, - default=cratedb_json_encoder, - option=(orjson.OPT_NON_STR_KEYS | orjson.OPT_SERIALIZE_NUMPY), + default=json_encoder, + option=( + orjson.OPT_PASSTHROUGH_DATETIME + | orjson.OPT_NON_STR_KEYS + | orjson.OPT_SERIALIZE_NUMPY + ), ) diff --git a/tests/client/test_http.py b/tests/client/test_http.py index 554fbe5f..c4c0609e 100644 --- a/tests/client/test_http.py +++ b/tests/client/test_http.py @@ -318,7 +318,7 @@ def test_datetime_is_converted_to_ts(self, request): # convert string to dict # because the order of the keys isn't deterministic data = json.loads(request.call_args[1]["data"]) - self.assertEqual(data["args"], ["2015-02-28T07:31:40"]) + self.assertEqual(data["args"], [1425108700000]) client.close() @patch(REQUEST, autospec=True) @@ -329,7 +329,7 @@ def test_date_is_converted_to_ts(self, request): day = dt.date(2016, 4, 21) client.sql("insert into users (dt) values (?)", (day,)) data = json.loads(request.call_args[1]["data"]) - self.assertEqual(data["args"], ["2016-04-21"]) + self.assertEqual(data["args"], [1461196800000]) client.close() def test_socket_options_contain_keepalive(self): @@ -725,9 +725,9 @@ class TestCrateJsonEncoder(TestCase): def test_naive_datetime(self): data = dt.datetime.fromisoformat("2023-06-26T09:24:00.123") result = json_dumps(data) - self.assertEqual(result, b'"2023-06-26T09:24:00.123000"') + self.assertEqual(result, b"1687771440123") def test_aware_datetime(self): data = dt.datetime.fromisoformat("2023-06-26T09:24:00.123+02:00") result = json_dumps(data) - self.assertEqual(result, b'"2023-06-26T09:24:00.123000+02:00"') + self.assertEqual(result, b"1687764240123")