Skip to content

Commit

Permalink
rename to nested-dask
Browse files Browse the repository at this point in the history
  • Loading branch information
dougbrn committed May 21, 2024
1 parent 954ab73 commit d93cdf5
Show file tree
Hide file tree
Showing 18 changed files with 50 additions and 50 deletions.
12 changes: 6 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
# dask-nested
# nested-dask

[![Template](https://img.shields.io/badge/Template-LINCC%20Frameworks%20Python%20Project%20Template-brightgreen)](https://lincc-ppt.readthedocs.io/en/latest/)

[![PyPI](https://img.shields.io/pypi/v/dask-nested?color=blue&logo=pypi&logoColor=white)](https://pypi.org/project/dask-nested/)
[![GitHub Workflow Status](https://img.shields.io/github/actions/workflow/status/lincc-frameworks/dask-nested/smoke-test.yml)](https://github.com/lincc-frameworks/dask-nested/actions/workflows/smoke-test.yml)
[![Codecov](https://codecov.io/gh/lincc-frameworks/dask-nested/branch/main/graph/badge.svg)](https://codecov.io/gh/lincc-frameworks/dask-nested)
[![Read The Docs](https://img.shields.io/readthedocs/dask-nested)](https://dask-nested.readthedocs.io/)
[![Benchmarks](https://img.shields.io/github/actions/workflow/status/lincc-frameworks/dask-nested/asv-main.yml?label=benchmarks)](https://lincc-frameworks.github.io/dask-nested/)
[![PyPI](https://img.shields.io/pypi/v/nested-dask?color=blue&logo=pypi&logoColor=white)](https://pypi.org/project/nested-dask/)
[![GitHub Workflow Status](https://img.shields.io/github/actions/workflow/status/lincc-frameworks/nested-dask/smoke-test.yml)](https://github.com/lincc-frameworks/nested-dask/actions/workflows/smoke-test.yml)
[![Codecov](https://codecov.io/gh/lincc-frameworks/nested-dask/branch/main/graph/badge.svg)](https://codecov.io/gh/lincc-frameworks/nested-dask)
[![Read The Docs](https://img.shields.io/readthedocs/nested-dask)](https://nested-dask.readthedocs.io/)
[![Benchmarks](https://img.shields.io/github/actions/workflow/status/lincc-frameworks/nested-dask/asv-main.yml?label=benchmarks)](https://lincc-frameworks.github.io/nested-dask/)

This project was automatically generated using the LINCC-Frameworks
[python-project-template](https://github.com/lincc-frameworks/python-project-template).
Expand Down
6 changes: 3 additions & 3 deletions benchmarks/asv.conf.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,9 @@
// you know what you are doing.
"version": 1,
// The name of the project being benchmarked.
"project": "dask-nested",
"project": "nested-dask",
// The project's homepage.
"project_url": "https://github.com/lincc-frameworks/dask-nested",
"project_url": "https://github.com/lincc-frameworks/nested-dask",
// The URL or local path of the source code repository for the
// project being benchmarked.
"repo": "..",
Expand All @@ -32,7 +32,7 @@
// variable.
"environment_type": "virtualenv",
// the base URL to show a commit for the project.
"show_commit_url": "https://github.com/lincc-frameworks/dask-nested/commit/",
"show_commit_url": "https://github.com/lincc-frameworks/nested-dask/commit/",
// The Pythons you'd like to test against. If not provided, defaults
// to the current version of Python used to run `asv`.
"pythons": [
Expand Down
14 changes: 7 additions & 7 deletions benchmarks/benchmarks.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
For more information on writing benchmarks:
https://asv.readthedocs.io/en/stable/writing_benchmarks.html."""

import dask_nested as dn
import nested_dask as nd
import nested_pandas as npd
import numpy as np
import pandas as pd
Expand Down Expand Up @@ -31,8 +31,8 @@ def _generate_benchmark_data(add_nested=True):
layer_nf = npd.NestedFrame(data=layer_data).set_index("index").sort_index()

# Convert to Dask
base_nf = dn.NestedFrame.from_nested_pandas(base_nf).repartition(npartitions=5)
layer_nf = dn.NestedFrame.from_nested_pandas(layer_nf).repartition(npartitions=50)
base_nf = nd.NestedFrame.from_nested_pandas(base_nf).repartition(npartitions=5)
layer_nf = nd.NestedFrame.from_nested_pandas(layer_nf).repartition(npartitions=50)

# Return based on add_nested
if add_nested:
Expand All @@ -47,8 +47,8 @@ class NestedFrameAddNested:

n_base = 100
layer_size = 1000
base_nf = dn.NestedFrame
layer_nf = dn.NestedFrame
base_nf = nd.NestedFrame
layer_nf = nd.NestedFrame

def setup(self):
"""Set up the benchmark environment"""
Expand All @@ -70,7 +70,7 @@ def peakmem_run(self):
class NestedFrameReduce:
"""Benchmark the NestedFrame.reduce function"""

nf = dn.NestedFrame
nf = nd.NestedFrame

def setup(self):
"""Set up the benchmark environment"""
Expand All @@ -93,7 +93,7 @@ def peakmem_run(self):
class NestedFrameQuery:
"""Benchmark the NestedFrame.query function"""

nf = dn.NestedFrame
nf = nd.NestedFrame

def setup(self):
"""Set up the benchmark environment"""
Expand Down
4 changes: 2 additions & 2 deletions docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,10 @@
# -- Project information -----------------------------------------------------
# https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information

project = "dask-nested"
project = "nested-dask"
copyright = "2023, lincc-frameworks"
author = "lincc-frameworks"
release = version("dask-nested")
release = version("nested-dask")
# for example take major/minor
version = ".".join(release.split(".")[:2])

Expand Down
4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[project]
name = "dask-nested"
name = "nested-dask"
license = {file = "LICENSE"}
readme = "README.md"
authors = [
Expand All @@ -25,7 +25,7 @@ dependencies = [
]

[project.urls]
"Source Code" = "https://github.com/lincc-frameworks/dask-nested"
"Source Code" = "https://github.com/lincc-frameworks/nested-dask"

# On a mac, install optional dependencies with `pip install '.[dev]'` (include the single quotes)
[project.optional-dependencies]
Expand Down
File renamed without changes.
File renamed without changes.
File renamed without changes.
4 changes: 2 additions & 2 deletions src/dask_nested/core.py → src/nested_dask/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ def from_dask_dataframe(cls, df) -> NestedFrame:
Returns
-------
`dask_nested.NestedFrame`
`nested_dask.NestedFrame`
"""
return df.map_partitions(npd.NestedFrame)

Expand Down Expand Up @@ -163,7 +163,7 @@ def add_nested(self, nested, name, how="outer") -> NestedFrame: # type: ignore[
Returns
-------
`dask_nested.NestedFrame`
`nested_dask.NestedFrame`
"""
nested = nested.map_partitions(lambda x: pack_flat(x)).rename(name)
return self.join(nested, how=how)
Expand Down
File renamed without changes.
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
from nested_pandas import datasets

import dask_nested as dn
import nested_dask as nd


def generate_data(n_base, n_layer, npartitions=1, seed=None) -> dn.NestedFrame:
def generate_data(n_base, n_layer, npartitions=1, seed=None) -> nd.NestedFrame:
"""Generates a toy dataset.
Docstring copied from nested-pandas.
Expand All @@ -28,15 +28,15 @@ def generate_data(n_base, n_layer, npartitions=1, seed=None) -> dn.NestedFrame:
Examples
--------
>>> import dask_nested as dn
>>> dn.datasets.generate_data(10,100)
>>> dn.datasets.generate_data(10, {"nested_a": 100, "nested_b": 200})
>>> import nested_dask as nd
>>> nd.datasets.generate_data(10,100)
>>> nd.datasets.generate_data(10, {"nested_a": 100, "nested_b": 200})
"""

# Use nested-pandas generator
base_nf = datasets.generate_data(n_base, n_layer, seed=seed)

# Convert to dask-nested
base_nf = dn.NestedFrame.from_nested_pandas(base_nf).repartition(npartitions=npartitions)
base_nf = nd.NestedFrame.from_nested_pandas(base_nf).repartition(npartitions=npartitions)

return base_nf
File renamed without changes.
File renamed without changes.
20 changes: 10 additions & 10 deletions tests/dask_nested/conftest.py → tests/nested_dask/conftest.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import dask_nested as dn
import nested_dask as nd
import nested_pandas as npd
import numpy as np
import pytest
Expand All @@ -23,10 +23,10 @@ def test_dataset():
}
layer_nf = npd.NestedFrame(data=layer_data).set_index("index").sort_index()

base_dn = dn.NestedFrame.from_nested_pandas(base_nf, npartitions=5)
layer_dn = dn.NestedFrame.from_nested_pandas(layer_nf, npartitions=10)
base_nd = nd.NestedFrame.from_nested_pandas(base_nf, npartitions=5)
layer_nd = nd.NestedFrame.from_nested_pandas(layer_nf, npartitions=10)

return base_dn.add_nested(layer_dn, "nested")
return base_nd.add_nested(layer_nd, "nested")


@pytest.fixture
Expand All @@ -53,10 +53,10 @@ def test_dataset_with_nans():
}
layer_nf = npd.NestedFrame(data=layer_data).set_index("index")

base_dn = dn.NestedFrame.from_nested_pandas(base_nf, npartitions=5)
layer_dn = dn.NestedFrame.from_nested_pandas(layer_nf, npartitions=10)
base_nd = nd.NestedFrame.from_nested_pandas(base_nf, npartitions=5)
layer_nd = nd.NestedFrame.from_nested_pandas(layer_nf, npartitions=10)

return base_dn.add_nested(layer_dn, "nested")
return base_nd.add_nested(layer_nd, "nested")


@pytest.fixture
Expand All @@ -78,7 +78,7 @@ def test_dataset_no_add_nested():
}
layer_nf = npd.NestedFrame(data=layer_data).set_index("index")

base_dn = dn.NestedFrame.from_nested_pandas(base_nf, npartitions=5)
layer_dn = dn.NestedFrame.from_nested_pandas(layer_nf, npartitions=10)
base_nd = nd.NestedFrame.from_nested_pandas(base_nf, npartitions=5)
layer_nd = nd.NestedFrame.from_nested_pandas(layer_nf, npartitions=10)

return (base_dn, layer_dn)
return (base_nd, layer_nd)
File renamed without changes.
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
import dask_nested as dn
import nested_dask as nd


def test_generate_data():
"""test the dataset generator function"""

# test the seed
generate_1 = dn.datasets.generate_data(10, 100, npartitions=2, seed=1)
generate_2 = dn.datasets.generate_data(10, 100, npartitions=2, seed=1)
generate_3 = dn.datasets.generate_data(10, 100, npartitions=2, seed=2)
generate_1 = nd.datasets.generate_data(10, 100, npartitions=2, seed=1)
generate_2 = nd.datasets.generate_data(10, 100, npartitions=2, seed=1)
generate_3 = nd.datasets.generate_data(10, 100, npartitions=2, seed=2)

assert generate_1.compute().equals(generate_2.compute())
assert not generate_1.compute().equals(generate_3.compute())
Expand Down
6 changes: 3 additions & 3 deletions tests/dask_nested/test_io.py → tests/nested_dask/test_io.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import dask_nested as dn
import nested_dask as nd


def test_read_parquet(test_dataset, tmp_path):
Expand All @@ -16,8 +16,8 @@ def test_read_parquet(test_dataset, tmp_path):
test_dataset[["a", "b"]].to_parquet(test_save_path, write_index=True)

# Now read
base = dn.read_parquet(test_save_path, calculate_divisions=True)
nested = dn.read_parquet(nested_save_path, calculate_divisions=True)
base = nd.read_parquet(test_save_path, calculate_divisions=True)
nested = nd.read_parquet(nested_save_path, calculate_divisions=True)

base = base.add_nested(nested, "nested")

Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import dask_nested as dn
import nested_dask as nd
import numpy as np
import pytest
from nested_pandas.series.dtype import NestedDtype
Expand Down Expand Up @@ -31,7 +31,7 @@ def test_add_nested(test_dataset_no_add_nested):
base_with_nested = base.add_nested(layer, "nested")

# Check that the result is a nestedframe
assert isinstance(base_with_nested, dn.NestedFrame)
assert isinstance(base_with_nested, nd.NestedFrame)

# Check that there's a new nested column with the correct dtype
assert "nested" in base_with_nested.columns
Expand Down Expand Up @@ -109,7 +109,7 @@ def test_to_parquet_combined(test_dataset, tmp_path):
test_dataset.to_parquet(test_save_path, by_layer=False)

# load back from parquet
loaded_dataset = dn.read_parquet(test_save_path, calculate_divisions=True)
loaded_dataset = nd.read_parquet(test_save_path, calculate_divisions=True)
# todo: file bug for this and investigate
loaded_dataset = loaded_dataset.reset_index().set_index("index")

Expand All @@ -131,8 +131,8 @@ def test_to_parquet_by_layer(test_dataset, tmp_path):
test_dataset.to_parquet(test_save_path, by_layer=True, write_index=True)

# load back from parquet
loaded_base = dn.read_parquet(test_save_path / "base", calculate_divisions=True)
loaded_nested = dn.read_parquet(test_save_path / "nested", calculate_divisions=True)
loaded_base = nd.read_parquet(test_save_path / "base", calculate_divisions=True)
loaded_nested = nd.read_parquet(test_save_path / "nested", calculate_divisions=True)

loaded_dataset = loaded_base.add_nested(loaded_nested, "nested")

Expand Down

0 comments on commit d93cdf5

Please sign in to comment.