From 1bda3bdf9fa107bbb85d6f1ef8a3fa1c6f3ec859 Mon Sep 17 00:00:00 2001 From: Phillip Cloud <417981+cpcloud@users.noreply.github.com> Date: Sun, 20 Aug 2023 07:15:21 -0400 Subject: [PATCH] feat(polars): support reading ndjson --- ibis/backends/polars/__init__.py | 33 +++++++++++++++++++++++++++- ibis/backends/snowflake/__init__.py | 6 ++--- ibis/backends/tests/test_register.py | 2 ++ 3 files changed, 37 insertions(+), 4 deletions(-) diff --git a/ibis/backends/polars/__init__.py b/ibis/backends/polars/__init__.py index c13c10c5bb24..fbda39b7e72c 100644 --- a/ibis/backends/polars/__init__.py +++ b/ibis/backends/polars/__init__.py @@ -144,7 +144,7 @@ def sql( def read_csv( self, path: str | Path, table_name: str | None = None, **kwargs: Any ) -> ir.Table: - """Register a CSV file as a table in the current database. + """Register a CSV file as a table. Parameters ---------- @@ -172,6 +172,37 @@ def read_csv( self._add_table(table_name, pl.read_csv(path, **kwargs)) return self.table(table_name) + def read_json( + self, path: str | Path, table_name: str | None = None, **kwargs: Any + ) -> ir.Table: + """Register a JSON file as a table. + + Parameters + ---------- + path + A string or Path to a JSON file; globs are supported + table_name + An optional name to use for the created table. This defaults to + a sequentially generated name. + **kwargs + Additional keyword arguments passed to Polars loading function. + See https://pola-rs.github.io/polars/py-polars/html/reference/api/polars.scan_ndjson.html + for more information. + + Returns + ------- + ir.Table + The just-registered table + """ + path = normalize_filename(path) + table_name = table_name or gen_name("read_json") + try: + self._add_table(table_name, pl.scan_ndjson(path, **kwargs)) + except pl.exceptions.ComputeError: + # handles compressed json files + self._add_table(table_name, pl.read_ndjson(path, **kwargs)) + return self.table(table_name) + def read_delta( self, path: str | Path, table_name: str | None = None, **kwargs: Any ) -> ir.Table: diff --git a/ibis/backends/snowflake/__init__.py b/ibis/backends/snowflake/__init__.py index d5ad72ba3c66..fbdce8b8b7e7 100644 --- a/ibis/backends/snowflake/__init__.py +++ b/ibis/backends/snowflake/__init__.py @@ -615,7 +615,7 @@ def read_csv( Parameters ---------- path - Path to the CSV file + A string or Path to a CSV file; globs are supported table_name Optional name for the table; if not passed, a random name will be generated kwargs @@ -711,7 +711,7 @@ def read_json( Parameters ---------- path - File or list of files + A string or Path to a JSON file; globs are supported table_name Optional table name kwargs @@ -786,7 +786,7 @@ def read_parquet( Parameters ---------- path - Path to a Parquet file + A string or Path to a Parquet file; globs are supported table_name Optional table name kwargs diff --git a/ibis/backends/tests/test_register.py b/ibis/backends/tests/test_register.py index be695b6550c0..3732609e6cb1 100644 --- a/ibis/backends/tests/test_register.py +++ b/ibis/backends/tests/test_register.py @@ -509,7 +509,9 @@ def test_read_csv_glob(con, tmp_path, ft_data): @pytest.mark.notyet( [ "bigquery", + "clickhouse", "dask", + "datafusion", "impala", "mssql", "mysql",