diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 0000000000..f8f779b593 --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,14 @@ +# dependabot +# Ref: https://docs.github.com/en/code-security/dependabot/dependabot-version-updates/configuration-options-for-the-dependabot.yml-file +# ------------------------------------------------------------------------------ +version: 2 +updates: +- package-ecosystem: github-actions + directory: / + schedule: + interval: monthly + groups: + # open a single pull-request for all GitHub actions updates + github-actions: + patterns: + - '*' diff --git a/.github/workflows/general.yaml b/.github/workflows/general.yaml index 5fa0dd9eb2..a44ccc96aa 100644 --- a/.github/workflows/general.yaml +++ b/.github/workflows/general.yaml @@ -28,9 +28,9 @@ jobs: python-version: ["3.10", "3.11"] steps: - name: Checkout repository - uses: actions/checkout@v2 + uses: actions/checkout@v4 - name: Install Python - uses: actions/setup-python@v3 + uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} - name: Prepare environment @@ -45,7 +45,7 @@ jobs: - name: Test software run: hatch run +py=${{ matrix.py || matrix.python-version }} ci:test - name: Report coverage - uses: codecov/codecov-action@v2 + uses: codecov/codecov-action@v3 services: postgres: image: postgres:12 @@ -79,9 +79,9 @@ jobs: runs-on: macos-latest steps: - name: Checkout repository - uses: actions/checkout@v2 + uses: actions/checkout@v4 - name: Install Python - uses: actions/setup-python@v2 + uses: actions/setup-python@v5 with: python-version: "3.10" - name: Prepare environment @@ -99,9 +99,9 @@ jobs: runs-on: windows-latest steps: - name: Checkout repository - uses: actions/checkout@v2 + uses: actions/checkout@v4 - name: Install Python - uses: actions/setup-python@v2 + uses: actions/setup-python@v5 with: python-version: "3.10" - name: Prepare environment @@ -118,9 +118,9 @@ jobs: runs-on: ubuntu-latest steps: - name: Checkout repository - uses: actions/checkout@v2 + uses: actions/checkout@v4 - name: Install Python - uses: actions/setup-python@v2 + uses: actions/setup-python@v5 with: python-version: "3.10" - name: Prepare environment @@ -130,7 +130,7 @@ jobs: echo '!**/*.html' >> .gitignore make docs - name: Publush to Github Pages - uses: stefanzweifel/git-auto-commit-action@v4 + uses: stefanzweifel/git-auto-commit-action@v5 with: branch: site create_branch: true @@ -144,9 +144,9 @@ jobs: needs: [test-linux, test-macos, test-windows] steps: - name: Checkout repository - uses: actions/checkout@v2 + uses: actions/checkout@v4 - name: Install Python - uses: actions/setup-python@v2 + uses: actions/setup-python@v5 with: python-version: "3.10" - name: Install dependencies diff --git a/data/table.csv.bz2 b/data/table.csv.bz2 new file mode 100644 index 0000000000..cc1d68a76c Binary files /dev/null and b/data/table.csv.bz2 differ diff --git a/data/table.csv.xz b/data/table.csv.xz new file mode 100644 index 0000000000..5348c739e2 Binary files /dev/null and b/data/table.csv.xz differ diff --git a/frictionless/platform.py b/frictionless/platform.py index f10302db40..b88584d99d 100644 --- a/frictionless/platform.py +++ b/frictionless/platform.py @@ -38,6 +38,12 @@ class Platform: # Core + @cached_property + def bz2(self): + import bz2 + + return bz2 + @cached_property def chardet(self): import chardet @@ -146,6 +152,12 @@ def jsonschema_validators(self): return jsonschema.validators + @cached_property + def lzma(self): + import lzma + + return lzma + @cached_property def marko(self): import marko diff --git a/frictionless/schema/field.py b/frictionless/schema/field.py index 8c310df4e8..c04306ac97 100644 --- a/frictionless/schema/field.py +++ b/frictionless/schema/field.py @@ -278,7 +278,7 @@ def metadata_validate(cls, descriptor: IDescriptor): # type: ignore type = descriptor.get("type") Class = system.select_field_class(type) field = Class( - name=descriptor.get("name"), format=descriptor.get("format", None) # type: ignore + name=descriptor.get("name"), format=descriptor.get("format", "default") # type: ignore ) if type == "boolean": # 'example' value must be compared to customized 'trueValues' and 'falseValues' diff --git a/frictionless/settings.py b/frictionless/settings.py index e1db003c52..84c8b64403 100644 --- a/frictionless/settings.py +++ b/frictionless/settings.py @@ -5,7 +5,7 @@ # Version -VERSION = "5.16.0" +VERSION = "5.16.1" # General @@ -13,7 +13,7 @@ NAME_PATTERN = "^([-a-z0-9._/])+$" TYPE_PATTERN = "^([-a-z/])+$" PACKAGE_PATH = "datapackage.json" -COMPRESSION_FORMATS = ["zip", "gz"] +COMPRESSION_FORMATS = ["zip", "gz", "bz2", "xz"] # Defaults diff --git a/frictionless/system/loader.py b/frictionless/system/loader.py index b2980fbab5..0da2c21b4d 100644 --- a/frictionless/system/loader.py +++ b/frictionless/system/loader.py @@ -231,6 +231,28 @@ def read_byte_stream_decompress( byte_stream = platform.gzip.open(byte_stream) # type: ignore return byte_stream + # bzip2 compression + if self.resource.compression == "bz2": + # Stats + if not self.remote: + bytes = True + while bytes: + bytes = byte_stream.read1(io.DEFAULT_BUFFER_SIZE) # type: ignore + byte_stream.seek(0) + byte_stream = platform.bz2.open(byte_stream) # type: ignore + return byte_stream + + # XZ compression + if self.resource.compression == "xz": + # Stats + if not self.remote: + bytes = True + while bytes: + bytes = byte_stream.read1(io.DEFAULT_BUFFER_SIZE) # type: ignore + byte_stream.seek(0) + byte_stream = platform.lzma.open(byte_stream) # type: ignore + return byte_stream + # Not supported compression note = f'compression "{self.resource.compression}" is not supported' raise FrictionlessException(errors.CompressionError(note=note)) diff --git a/tests/resources/table/test_compression.py b/tests/resources/table/test_compression.py index 5658ec759c..8647483257 100644 --- a/tests/resources/table/test_compression.py +++ b/tests/resources/table/test_compression.py @@ -63,6 +63,28 @@ def test_resource_compression_local_csv_gz(): ] +def test_resource_compression_local_csv_xz(): + with TableResource(path="data/table.csv.xz") as resource: + assert resource.compression == "xz" + assert resource.innerpath is None + assert resource.header == ["id", "name"] + assert resource.read_rows() == [ + {"id": 1, "name": "english"}, + {"id": 2, "name": "中国人"}, + ] + + +def test_resource_compression_local_csv_bz2(): + with TableResource(path="data/table.csv.bz2") as resource: + assert resource.compression == "bz2" + assert resource.innerpath is None + assert resource.header == ["id", "name"] + assert resource.read_rows() == [ + {"id": 1, "name": "english"}, + {"id": 2, "name": "中国人"}, + ] + + def test_resource_compression_stream_csv_zip(): with open("data/table.csv.zip", "rb") as file: with TableResource(data=file, format="csv", compression="zip") as resource: @@ -83,6 +105,26 @@ def test_resource_compression_stream_csv_gz(): ] +def test_resource_compression_stream_csv_xz(): + with open("data/table.csv.xz", "rb") as file: + with TableResource(data=file, format="csv", compression="xz") as resource: + assert resource.header == ["id", "name"] + assert resource.read_rows() == [ + {"id": 1, "name": "english"}, + {"id": 2, "name": "中国人"}, + ] + + +def test_resource_compression_stream_csv_bz2(): + with open("data/table.csv.bz2", "rb") as file: + with TableResource(data=file, format="csv", compression="bz2") as resource: + assert resource.header == ["id", "name"] + assert resource.read_rows() == [ + {"id": 1, "name": "english"}, + {"id": 2, "name": "中国人"}, + ] + + @pytest.mark.vcr def test_resource_compression_remote_csv_zip(): source = "https://raw.githubusercontent.com/frictionlessdata/tabulator-py/master/data/table.csv.zip"