Skip to content

Commit

Permalink
feat(polars): support reading ndjson
Browse files Browse the repository at this point in the history
  • Loading branch information
cpcloud committed Aug 21, 2023
1 parent adb8f4c commit 1bda3bd
Show file tree
Hide file tree
Showing 3 changed files with 37 additions and 4 deletions.
33 changes: 32 additions & 1 deletion ibis/backends/polars/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,7 @@ def sql(
def read_csv(
self, path: str | Path, table_name: str | None = None, **kwargs: Any
) -> ir.Table:
"""Register a CSV file as a table in the current database.
"""Register a CSV file as a table.
Parameters
----------
Expand Down Expand Up @@ -172,6 +172,37 @@ def read_csv(
self._add_table(table_name, pl.read_csv(path, **kwargs))
return self.table(table_name)

def read_json(
self, path: str | Path, table_name: str | None = None, **kwargs: Any
) -> ir.Table:
"""Register a JSON file as a table.
Parameters
----------
path
A string or Path to a JSON file; globs are supported
table_name
An optional name to use for the created table. This defaults to
a sequentially generated name.
**kwargs
Additional keyword arguments passed to Polars loading function.
See https://pola-rs.github.io/polars/py-polars/html/reference/api/polars.scan_ndjson.html
for more information.
Returns
-------
ir.Table
The just-registered table
"""
path = normalize_filename(path)
table_name = table_name or gen_name("read_json")
try:
self._add_table(table_name, pl.scan_ndjson(path, **kwargs))
except pl.exceptions.ComputeError:
# handles compressed json files
self._add_table(table_name, pl.read_ndjson(path, **kwargs))
return self.table(table_name)

def read_delta(
self, path: str | Path, table_name: str | None = None, **kwargs: Any
) -> ir.Table:
Expand Down
6 changes: 3 additions & 3 deletions ibis/backends/snowflake/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -615,7 +615,7 @@ def read_csv(
Parameters
----------
path
Path to the CSV file
A string or Path to a CSV file; globs are supported
table_name
Optional name for the table; if not passed, a random name will be generated
kwargs
Expand Down Expand Up @@ -711,7 +711,7 @@ def read_json(
Parameters
----------
path
File or list of files
A string or Path to a JSON file; globs are supported
table_name
Optional table name
kwargs
Expand Down Expand Up @@ -786,7 +786,7 @@ def read_parquet(
Parameters
----------
path
Path to a Parquet file
A string or Path to a Parquet file; globs are supported
table_name
Optional table name
kwargs
Expand Down
2 changes: 2 additions & 0 deletions ibis/backends/tests/test_register.py
Original file line number Diff line number Diff line change
Expand Up @@ -509,7 +509,9 @@ def test_read_csv_glob(con, tmp_path, ft_data):
@pytest.mark.notyet(
[
"bigquery",
"clickhouse",
"dask",
"datafusion",
"impala",
"mssql",
"mysql",
Expand Down

0 comments on commit 1bda3bd

Please sign in to comment.