Skip to content

Commit

Permalink
added support for xz and bzip2 compression
Browse files Browse the repository at this point in the history
  • Loading branch information
jze committed Sep 4, 2023
1 parent 740319e commit 1759b43
Show file tree
Hide file tree
Showing 6 changed files with 77 additions and 1 deletion.
Binary file added data/table.csv.bz2
Binary file not shown.
Binary file added data/table.csv.xz
Binary file not shown.
12 changes: 12 additions & 0 deletions frictionless/platform.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,12 @@ class Platform:

# Core

@cached_property
def bz2(self):
import bz2

return bz2

@cached_property
def chardet(self):
import chardet
Expand Down Expand Up @@ -149,6 +155,12 @@ def jsonschema_validators(self):

return jsonschema.validators

@cached_property
def lzma(self):
import lzma

return lzma

@cached_property
def marko(self):
import marko
Expand Down
2 changes: 1 addition & 1 deletion frictionless/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
NAME_PATTERN = "^([-a-z0-9._/])+$"
TYPE_PATTERN = "^([-a-z/])+$"
PACKAGE_PATH = "datapackage.json"
COMPRESSION_FORMATS = ["zip", "gz"]
COMPRESSION_FORMATS = ["zip", "gz", "bz2", "xz"]

# Defaults

Expand Down
22 changes: 22 additions & 0 deletions frictionless/system/loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -231,6 +231,28 @@ def read_byte_stream_decompress(
byte_stream = platform.gzip.open(byte_stream) # type: ignore
return byte_stream

# bzip2 compression
if self.resource.compression == "bz2":
# Stats
if not self.remote:
bytes = True
while bytes:
bytes = byte_stream.read1(io.DEFAULT_BUFFER_SIZE) # type: ignore
byte_stream.seek(0)
byte_stream = platform.bz2.open(byte_stream) # type: ignore
return byte_stream

# XZ compression
if self.resource.compression == "xz":
# Stats
if not self.remote:
bytes = True
while bytes:
bytes = byte_stream.read1(io.DEFAULT_BUFFER_SIZE) # type: ignore
byte_stream.seek(0)
byte_stream = platform.lzma.open(byte_stream) # type: ignore
return byte_stream

# Not supported compression
note = f'compression "{self.resource.compression}" is not supported'
raise FrictionlessException(errors.CompressionError(note=note))
Expand Down
42 changes: 42 additions & 0 deletions tests/resources/table/test_compression.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,28 @@ def test_resource_compression_local_csv_gz():
]


def test_resource_compression_local_csv_xz():
with TableResource(path="data/table.csv.xz") as resource:
assert resource.compression == "xz"
assert resource.innerpath is None
assert resource.header == ["id", "name"]
assert resource.read_rows() == [
{"id": 1, "name": "english"},
{"id": 2, "name": "中国人"},
]


def test_resource_compression_local_csv_bz2():
with TableResource(path="data/table.csv.bz2") as resource:
assert resource.compression == "bz2"
assert resource.innerpath is None
assert resource.header == ["id", "name"]
assert resource.read_rows() == [
{"id": 1, "name": "english"},
{"id": 2, "name": "中国人"},
]


def test_resource_compression_stream_csv_zip():
with open("data/table.csv.zip", "rb") as file:
with TableResource(data=file, format="csv", compression="zip") as resource:
Expand All @@ -83,6 +105,26 @@ def test_resource_compression_stream_csv_gz():
]


def test_resource_compression_stream_csv_xz():
with open("data/table.csv.xz", "rb") as file:
with TableResource(data=file, format="csv", compression="xz") as resource:
assert resource.header == ["id", "name"]
assert resource.read_rows() == [
{"id": 1, "name": "english"},
{"id": 2, "name": "中国人"},
]


def test_resource_compression_stream_csv_bz2():
with open("data/table.csv.bz2", "rb") as file:
with TableResource(data=file, format="csv", compression="bz2") as resource:
assert resource.header == ["id", "name"]
assert resource.read_rows() == [
{"id": 1, "name": "english"},
{"id": 2, "name": "中国人"},
]


@pytest.mark.vcr
def test_resource_compression_remote_csv_zip():
source = "https://raw.githubusercontent.com/frictionlessdata/tabulator-py/master/data/table.csv.zip"
Expand Down

0 comments on commit 1759b43

Please sign in to comment.