Skip to content

Commit

Permalink
Merge branch 'frictionlessdata:main' into Fix-1610-solve-field-error-…
Browse files Browse the repository at this point in the history
…on-boolean-field-with-true-or-false-values-customised
  • Loading branch information
amelie-rondot authored Jan 26, 2024
2 parents 3f2f09c + ae3763d commit ca40416
Show file tree
Hide file tree
Showing 9 changed files with 105 additions and 15 deletions.
14 changes: 14 additions & 0 deletions .github/dependabot.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# dependabot
# Ref: https://docs.github.com/en/code-security/dependabot/dependabot-version-updates/configuration-options-for-the-dependabot.yml-file
# ------------------------------------------------------------------------------
version: 2
updates:
- package-ecosystem: github-actions
directory: /
schedule:
interval: monthly
groups:
# open a single pull-request for all GitHub actions updates
github-actions:
patterns:
- '*'
24 changes: 12 additions & 12 deletions .github/workflows/general.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,9 @@ jobs:
python-version: ["3.10", "3.11"]
steps:
- name: Checkout repository
uses: actions/checkout@v2
uses: actions/checkout@v4
- name: Install Python
uses: actions/setup-python@v3
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
- name: Prepare environment
Expand All @@ -45,7 +45,7 @@ jobs:
- name: Test software
run: hatch run +py=${{ matrix.py || matrix.python-version }} ci:test
- name: Report coverage
uses: codecov/codecov-action@v2
uses: codecov/codecov-action@v3
services:
postgres:
image: postgres:12
Expand Down Expand Up @@ -79,9 +79,9 @@ jobs:
runs-on: macos-latest
steps:
- name: Checkout repository
uses: actions/checkout@v2
uses: actions/checkout@v4
- name: Install Python
uses: actions/setup-python@v2
uses: actions/setup-python@v5
with:
python-version: "3.10"
- name: Prepare environment
Expand All @@ -99,9 +99,9 @@ jobs:
runs-on: windows-latest
steps:
- name: Checkout repository
uses: actions/checkout@v2
uses: actions/checkout@v4
- name: Install Python
uses: actions/setup-python@v2
uses: actions/setup-python@v5
with:
python-version: "3.10"
- name: Prepare environment
Expand All @@ -118,9 +118,9 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: Checkout repository
uses: actions/checkout@v2
uses: actions/checkout@v4
- name: Install Python
uses: actions/setup-python@v2
uses: actions/setup-python@v5
with:
python-version: "3.10"
- name: Prepare environment
Expand All @@ -130,7 +130,7 @@ jobs:
echo '!**/*.html' >> .gitignore
make docs
- name: Publush to Github Pages
uses: stefanzweifel/git-auto-commit-action@v4
uses: stefanzweifel/git-auto-commit-action@v5
with:
branch: site
create_branch: true
Expand All @@ -144,9 +144,9 @@ jobs:
needs: [test-linux, test-macos, test-windows]
steps:
- name: Checkout repository
uses: actions/checkout@v2
uses: actions/checkout@v4
- name: Install Python
uses: actions/setup-python@v2
uses: actions/setup-python@v5
with:
python-version: "3.10"
- name: Install dependencies
Expand Down
Binary file added data/table.csv.bz2
Binary file not shown.
Binary file added data/table.csv.xz
Binary file not shown.
12 changes: 12 additions & 0 deletions frictionless/platform.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,12 @@ class Platform:

# Core

@cached_property
def bz2(self):
import bz2

return bz2

@cached_property
def chardet(self):
import chardet
Expand Down Expand Up @@ -146,6 +152,12 @@ def jsonschema_validators(self):

return jsonschema.validators

@cached_property
def lzma(self):
import lzma

return lzma

@cached_property
def marko(self):
import marko
Expand Down
2 changes: 1 addition & 1 deletion frictionless/schema/field.py
Original file line number Diff line number Diff line change
Expand Up @@ -278,7 +278,7 @@ def metadata_validate(cls, descriptor: IDescriptor): # type: ignore
type = descriptor.get("type")
Class = system.select_field_class(type)
field = Class(
name=descriptor.get("name"), format=descriptor.get("format", None) # type: ignore
name=descriptor.get("name"), format=descriptor.get("format", "default") # type: ignore
)
if type == "boolean":
# 'example' value must be compared to customized 'trueValues' and 'falseValues'
Expand Down
4 changes: 2 additions & 2 deletions frictionless/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,15 @@

# Version

VERSION = "5.16.0"
VERSION = "5.16.1"

# General

UNDEFINED = object()
NAME_PATTERN = "^([-a-z0-9._/])+$"
TYPE_PATTERN = "^([-a-z/])+$"
PACKAGE_PATH = "datapackage.json"
COMPRESSION_FORMATS = ["zip", "gz"]
COMPRESSION_FORMATS = ["zip", "gz", "bz2", "xz"]

# Defaults

Expand Down
22 changes: 22 additions & 0 deletions frictionless/system/loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -231,6 +231,28 @@ def read_byte_stream_decompress(
byte_stream = platform.gzip.open(byte_stream) # type: ignore
return byte_stream

# bzip2 compression
if self.resource.compression == "bz2":
# Stats
if not self.remote:
bytes = True
while bytes:
bytes = byte_stream.read1(io.DEFAULT_BUFFER_SIZE) # type: ignore
byte_stream.seek(0)
byte_stream = platform.bz2.open(byte_stream) # type: ignore
return byte_stream

# XZ compression
if self.resource.compression == "xz":
# Stats
if not self.remote:
bytes = True
while bytes:
bytes = byte_stream.read1(io.DEFAULT_BUFFER_SIZE) # type: ignore
byte_stream.seek(0)
byte_stream = platform.lzma.open(byte_stream) # type: ignore
return byte_stream

# Not supported compression
note = f'compression "{self.resource.compression}" is not supported'
raise FrictionlessException(errors.CompressionError(note=note))
Expand Down
42 changes: 42 additions & 0 deletions tests/resources/table/test_compression.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,28 @@ def test_resource_compression_local_csv_gz():
]


def test_resource_compression_local_csv_xz():
with TableResource(path="data/table.csv.xz") as resource:
assert resource.compression == "xz"
assert resource.innerpath is None
assert resource.header == ["id", "name"]
assert resource.read_rows() == [
{"id": 1, "name": "english"},
{"id": 2, "name": "中国人"},
]


def test_resource_compression_local_csv_bz2():
with TableResource(path="data/table.csv.bz2") as resource:
assert resource.compression == "bz2"
assert resource.innerpath is None
assert resource.header == ["id", "name"]
assert resource.read_rows() == [
{"id": 1, "name": "english"},
{"id": 2, "name": "中国人"},
]


def test_resource_compression_stream_csv_zip():
with open("data/table.csv.zip", "rb") as file:
with TableResource(data=file, format="csv", compression="zip") as resource:
Expand All @@ -83,6 +105,26 @@ def test_resource_compression_stream_csv_gz():
]


def test_resource_compression_stream_csv_xz():
with open("data/table.csv.xz", "rb") as file:
with TableResource(data=file, format="csv", compression="xz") as resource:
assert resource.header == ["id", "name"]
assert resource.read_rows() == [
{"id": 1, "name": "english"},
{"id": 2, "name": "中国人"},
]


def test_resource_compression_stream_csv_bz2():
with open("data/table.csv.bz2", "rb") as file:
with TableResource(data=file, format="csv", compression="bz2") as resource:
assert resource.header == ["id", "name"]
assert resource.read_rows() == [
{"id": 1, "name": "english"},
{"id": 2, "name": "中国人"},
]


@pytest.mark.vcr
def test_resource_compression_remote_csv_zip():
source = "https://raw.githubusercontent.com/frictionlessdata/tabulator-py/master/data/table.csv.zip"
Expand Down

0 comments on commit ca40416

Please sign in to comment.