From 979474df8fa3d2b3fb848217f9494d3c686ddbbe Mon Sep 17 00:00:00 2001 From: hdoupe Date: Wed, 25 Sep 2019 13:36:32 -0400 Subject: [PATCH 1/4] Fix handling of PNG/JPG data --- cs_storage/__init__.py | 21 +++++++---- cs_storage/tests/test_cs_storage.py | 54 +++++++++++++++++++++++++++++ environment.yml | 2 ++ 3 files changed, 71 insertions(+), 6 deletions(-) diff --git a/cs_storage/__init__.py b/cs_storage/__init__.py index 6684515..a203c66 100644 --- a/cs_storage/__init__.py +++ b/cs_storage/__init__.py @@ -1,3 +1,4 @@ +import base64 import io import json import os @@ -43,17 +44,25 @@ def deserialize(self, data): return data.decode() +class Base64Serializer(Serializer): + def deserialize(self, data): + return base64.b64encode(data).decode("utf-8") + + def from_string(self, data): + return base64.b64decode(data.encode("utf-8")) + + def get_serializer(media_type): return { "bokeh": JSONSerializer("json"), "table": TextSerializer("html"), "CSV": TextSerializer("csv"), - "PNG": Serializer("png"), - "JPEG": Serializer("jpeg"), - "MP3": Serializer("mp3"), - "MP4": Serializer("mp4"), - "HDF5": Serializer("h5"), - "PDF": Serializer("pdf"), + "PNG": Base64Serializer("png"), + "JPEG": Base64Serializer("jpeg"), + "MP3": Base64Serializer("mp3"), + "MP4": Base64Serializer("mp4"), + "HDF5": Base64Serializer("h5"), + "PDF": Base64Serializer("pdf"), "Markdown": TextSerializer("md"), "Text": TextSerializer("txt"), }[media_type] diff --git a/cs_storage/tests/test_cs_storage.py b/cs_storage/tests/test_cs_storage.py index 51500f3..aeaa08e 100644 --- a/cs_storage/tests/test_cs_storage.py +++ b/cs_storage/tests/test_cs_storage.py @@ -1,3 +1,4 @@ +import base64 import io import json import uuid @@ -10,6 +11,44 @@ import cs_storage +@pytest.fixture +def png(): + import matplotlib.pyplot as plt + import numpy as np + x = np.linspace(0, 2, 100) + plt.figure() + plt.plot(x, x, label='linear') + plt.plot(x, x**2, label='quadratic') + plt.plot(x, x**3, label='cubic') + plt.xlabel('x label') + plt.ylabel('y label') + plt.title("Simple Plot") + plt.legend() + initial_buff = io.BytesIO() + plt.savefig(initial_buff, format="png") + initial_buff.seek(0) + return initial_buff.read() + + +@pytest.fixture +def jpg(): + import matplotlib.pyplot as plt + import numpy as np + x = np.linspace(0, 2, 100) + plt.figure() + plt.plot(x, x, label='linear') + plt.plot(x, x**2, label='quadratic') + plt.plot(x, x**3, label='cubic') + plt.xlabel('x label') + plt.ylabel('y label') + plt.title("Simple Plot") + plt.legend() + initial_buff = io.BytesIO() + plt.savefig(initial_buff, format="jpg") + initial_buff.seek(0) + return initial_buff.read() + + def test_JSONSerializer(): ser = cs_storage.JSONSerializer("json") @@ -46,6 +85,21 @@ def test_serializer(): assert act == b"hello world" +def test_base64serializer(png, jpg): + """Test round trip serializtion/deserialization of PNG and JPG""" + ser = cs_storage.Base64Serializer("PNG") + asbytes = ser.serialize(png) + asstr = ser.deserialize(asbytes) + assert png == ser.from_string(asstr) + assert json.dumps({"pic": asstr}) + + ser = cs_storage.Base64Serializer("JPG") + asbytes = ser.serialize(jpg) + asstr = ser.deserialize(asbytes) + assert jpg == ser.from_string(asstr) + assert json.dumps({"pic": asstr}) + + def test_get_serializer(): types = ["bokeh", "table", "CSV", "PNG", "JPEG", "MP3", "MP4", "HDF5"] for t in types: diff --git a/environment.yml b/environment.yml index 4abf0a1..0d95114 100644 --- a/environment.yml +++ b/environment.yml @@ -5,3 +5,5 @@ dependencies: - "marshmallow>=3.0.0" - pytest - gcsfs + - matplotlib + - numpy From 499f58a3c313ed5bc69d41e746038b36df6fcc00 Mon Sep 17 00:00:00 2001 From: hdoupe Date: Wed, 25 Sep 2019 14:15:45 -0400 Subject: [PATCH 2/4] Add json_serializable keyword --- cs_storage/__init__.py | 17 ++++++++++------- cs_storage/tests/test_cs_storage.py | 16 +++++++++++----- 2 files changed, 21 insertions(+), 12 deletions(-) diff --git a/cs_storage/__init__.py b/cs_storage/__init__.py index a203c66..44ee0c1 100644 --- a/cs_storage/__init__.py +++ b/cs_storage/__init__.py @@ -24,7 +24,7 @@ def __init__(self, ext): def serialize(self, data): return data - def deserialize(self, data): + def deserialize(self, data, json_serializable=True): return data @@ -32,7 +32,7 @@ class JSONSerializer(Serializer): def serialize(self, data): return json.dumps(data).encode() - def deserialize(self, data): + def deserialize(self, data, json_serializable=True): return json.loads(data.decode()) @@ -40,13 +40,16 @@ class TextSerializer(Serializer): def serialize(self, data): return data.encode() - def deserialize(self, data): + def deserialize(self, data, json_serializable=True): return data.decode() class Base64Serializer(Serializer): - def deserialize(self, data): - return base64.b64encode(data).decode("utf-8") + def deserialize(self, data, json_serializable=True): + if json_serializable: + return base64.b64encode(data).decode("utf-8") + else: + return data def from_string(self, data): return base64.b64decode(data.encode("utf-8")) @@ -139,7 +142,7 @@ def write(task_id, loc_result, do_upload=True): return rem_result -def read(rem_result): +def read(rem_result, json_serializable=True): # compute studio results have public read access. fs = gcsfs.GCSFileSystem(token="anon") s = time.time() @@ -154,7 +157,7 @@ def read(rem_result): for rem_output in rem_result[category]["outputs"]: ser = get_serializer(rem_output["media_type"]) - rem_data = ser.deserialize(zipfileobj.read(rem_output["filename"])) + rem_data = ser.deserialize(zipfileobj.read(rem_output["filename"]), json_serializable) read[category].append( { "title": rem_output["title"], diff --git a/cs_storage/tests/test_cs_storage.py b/cs_storage/tests/test_cs_storage.py index aeaa08e..cc95ade 100644 --- a/cs_storage/tests/test_cs_storage.py +++ b/cs_storage/tests/test_cs_storage.py @@ -106,7 +106,7 @@ def test_get_serializer(): assert cs_storage.get_serializer(t) -def test_cs_storage(): +def test_cs_storage(png, jpg): exp_loc_res = { "renderable": [ { @@ -122,12 +122,12 @@ def test_cs_storage(): { "media_type": "PNG", "title": "PNG data", - "data": b"PNG bytes", + "data": png, }, { "media_type": "JPEG", "title": "JPEG data", - "data": b"JPEG bytes", + "data": jpg, }, { "media_type": "MP3", @@ -171,11 +171,17 @@ def test_cs_storage(): } task_id = uuid.uuid4() rem_res = cs_storage.write(task_id, exp_loc_res) - loc_res = cs_storage.read(rem_res) + loc_res = cs_storage.read(rem_res, json_serializable=False) assert loc_res == exp_loc_res + assert json.dumps( + cs_storage.read(rem_res, json_serializable=True) + ) - loc_res1 = cs_storage.read({"renderable": rem_res["renderable"]}) + loc_res1 = cs_storage.read({"renderable": rem_res["renderable"]}, json_serializable=False) assert loc_res1["renderable"] == exp_loc_res["renderable"] + assert json.dumps( + cs_storage.read({"renderable": rem_res["renderable"]}, json_serializable=True) + ) def test_errors(): From 958ae18b534f9ecff647d8c02c00cfbf30ca200c Mon Sep 17 00:00:00 2001 From: hdoupe Date: Wed, 25 Sep 2019 14:16:57 -0400 Subject: [PATCH 3/4] Add quick docstring comment --- cs_storage/__init__.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/cs_storage/__init__.py b/cs_storage/__init__.py index 44ee0c1..9bd73a8 100644 --- a/cs_storage/__init__.py +++ b/cs_storage/__init__.py @@ -18,6 +18,9 @@ class Serializer: + """ + Base class for serializng input data to bytes and back. + """ def __init__(self, ext): self.ext = ext From 9abcca4679047219541ec7c2174bedf4cb24de4d Mon Sep 17 00:00:00 2001 From: hdoupe Date: Wed, 25 Sep 2019 14:21:48 -0400 Subject: [PATCH 4/4] Bump version to 1.7.0 --- cs_storage/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cs_storage/__init__.py b/cs_storage/__init__.py index 9bd73a8..f4bc990 100644 --- a/cs_storage/__init__.py +++ b/cs_storage/__init__.py @@ -11,7 +11,7 @@ from marshmallow import Schema, fields, validate -__version__ = "1.6.0" +__version__ = "1.7.0" BUCKET = os.environ.get("BUCKET", None)