Skip to content

Commit

Permalink
Marshalling: Restore CrateDB standard encoder
Browse files Browse the repository at this point in the history
  • Loading branch information
amotl committed Jan 17, 2025
1 parent 0684b99 commit 6b6e7c0
Show file tree
Hide file tree
Showing 3 changed files with 44 additions and 16 deletions.
16 changes: 11 additions & 5 deletions CHANGES.rst
Original file line number Diff line number Diff line change
Expand Up @@ -7,15 +7,21 @@ Unreleased

- Switched JSON encoder to use the `orjson`_ library, to improve JSON
marshalling performance. Thanks, @widmogrod.

orjson is fast and in some spots even more correct when compared against
Python's stdlib ``json`` module. Contrary to the stdlib variant, orjson
will serialize to ``bytes`` instead of ``str``. Please also note it
will not deserialize to dataclasses, UUIDs, decimals, etc., or support
``object_hook``. Within ``crate-python``, it is applied with an encoder
function for additional type support about Python's ``Decimal`` type and
freezegun's ``FakeDatetime`` type.
will serialize to ``bytes`` instead of ``str``. When sending data to CrateDB,
``crate-python`` uses a custom encoder to add support for additional data
types.

- Python's ``Decimal`` type will be serialized to ``str``.
- Python's ``dt.datetime`` and ``dt.date`` types will be serialized to
``int`` (``LONG``) after converting to milliseconds since epoch, to
optimally accommodate CrateDB's `TIMESTAMP`_ representation.
- NumPy's data types will be handled by ``orjson`` without any ado.

.. _orjson: https://github.com/ijl/orjson
.. _TIMESTAMP: https://cratedb.com/docs/crate/reference/en/latest/general/ddl/data-types.html#type-timestamp

2024/11/23 1.0.1
================
Expand Down
36 changes: 29 additions & 7 deletions src/crate/client/http.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@
# software solely pursuant to the terms of the relevant commercial agreement.


import calendar
import datetime as dt
import heapq
import io
import logging
Expand Down Expand Up @@ -84,19 +86,35 @@ def super_len(o):
return None


def cratedb_json_encoder(obj: t.Any) -> str:
epoch_aware = dt.datetime(1970, 1, 1, tzinfo=dt.timezone.utc)
epoch_naive = dt.datetime(1970, 1, 1)


def json_encoder(obj: t.Any) -> t.Union[int, str]:
"""
Encoder function for orjson, with additional type support.
- Python's `Decimal` type.
- freezegun's `FakeDatetime` type.
- Python's `Decimal` type will be serialized to `str`.
- Python's `dt.datetime` and `dt.date` types will be
serialized to `int` after converting to milliseconds
since epoch.
https://github.com/ijl/orjson#default
https://cratedb.com/docs/crate/reference/en/latest/general/ddl/data-types.html#type-timestamp
"""
if isinstance(obj, Decimal):
return str(obj)
elif hasattr(obj, "isoformat"):
return obj.isoformat()
if isinstance(obj, dt.datetime):
if obj.tzinfo is not None:
delta = obj - epoch_aware
else:
delta = obj - epoch_naive
return int(
delta.microseconds / 1000.0
+ (delta.seconds + delta.days * 24 * 3600) * 1000.0
)
if isinstance(obj, dt.date):
return calendar.timegm(obj.timetuple()) * 1000
raise TypeError

Check warning on line 118 in src/crate/client/http.py

View check run for this annotation

Codecov / codecov/patch

src/crate/client/http.py#L118

Added line #L118 was not covered by tests


Expand All @@ -108,8 +126,12 @@ def json_dumps(obj: t.Any) -> bytes:
"""
return orjson.dumps(
obj,
default=cratedb_json_encoder,
option=(orjson.OPT_NON_STR_KEYS | orjson.OPT_SERIALIZE_NUMPY),
default=json_encoder,
option=(
orjson.OPT_PASSTHROUGH_DATETIME
| orjson.OPT_NON_STR_KEYS
| orjson.OPT_SERIALIZE_NUMPY
),
)


Expand Down
8 changes: 4 additions & 4 deletions tests/client/test_http.py
Original file line number Diff line number Diff line change
Expand Up @@ -318,7 +318,7 @@ def test_datetime_is_converted_to_ts(self, request):
# convert string to dict
# because the order of the keys isn't deterministic
data = json.loads(request.call_args[1]["data"])
self.assertEqual(data["args"], ["2015-02-28T07:31:40"])
self.assertEqual(data["args"], [1425108700000])
client.close()

@patch(REQUEST, autospec=True)
Expand All @@ -329,7 +329,7 @@ def test_date_is_converted_to_ts(self, request):
day = dt.date(2016, 4, 21)
client.sql("insert into users (dt) values (?)", (day,))
data = json.loads(request.call_args[1]["data"])
self.assertEqual(data["args"], ["2016-04-21"])
self.assertEqual(data["args"], [1461196800000])
client.close()

def test_socket_options_contain_keepalive(self):
Expand Down Expand Up @@ -725,9 +725,9 @@ class TestCrateJsonEncoder(TestCase):
def test_naive_datetime(self):
data = dt.datetime.fromisoformat("2023-06-26T09:24:00.123")
result = json_dumps(data)
self.assertEqual(result, b'"2023-06-26T09:24:00.123000"')
self.assertEqual(result, b"1687771440123")

def test_aware_datetime(self):
data = dt.datetime.fromisoformat("2023-06-26T09:24:00.123+02:00")
result = json_dumps(data)
self.assertEqual(result, b'"2023-06-26T09:24:00.123000+02:00"')
self.assertEqual(result, b"1687764240123")

0 comments on commit 6b6e7c0

Please sign in to comment.