Skip to content

Commit

Permalink
Merge pull request #411 from lincc-frameworks/doc_div_warnings
Browse files Browse the repository at this point in the history
fix divisions warnings
  • Loading branch information
dougbrn authored Mar 28, 2024
2 parents 60f12c3 + d6cdc97 commit 4a8849f
Show file tree
Hide file tree
Showing 10 changed files with 58 additions and 31 deletions.
2 changes: 1 addition & 1 deletion docs/examples/rrlyr-period.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@
"outputs": [],
"source": [
"# Load SDSS Stripe 82 RR Lyrae catalog\n",
"ens = Ensemble(client=False).from_dataset(\"s82_rrlyrae\")"
"ens = Ensemble(client=False).from_dataset(\"s82_rrlyrae\", sorted=True)"
]
},
{
Expand Down
2 changes: 1 addition & 1 deletion docs/gettingstarted/quickstart.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@
"from tape import Ensemble\n",
"\n",
"ens = Ensemble() # Initialize a TAPE Ensemble\n",
"ens.from_dataset(\"s82_qso\")"
"ens.from_dataset(\"s82_qso\", sorted=True)"
]
},
{
Expand Down
1 change: 1 addition & 0 deletions docs/tutorials/batch_showcase.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@
"ens.from_source_dict(\n",
" source_dict,\n",
" column_mapper=ColumnMapper(id_col=\"id\", time_col=\"mjd\", flux_col=\"flux\", err_col=\"err\", band_col=\"band\"),\n",
" sorted=True,\n",
")"
]
},
Expand Down
26 changes: 21 additions & 5 deletions docs/tutorials/common_data_operations.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@
"\n",
"ens = Ensemble()\n",
"\n",
"ens.from_dataset(\"s82_rrlyrae\", sort=True)"
"ens.from_dataset(\"s82_rrlyrae\", sorted=True)"
]
},
{
Expand Down Expand Up @@ -252,9 +252,9 @@
"metadata": {},
"outputs": [],
"source": [
"ens.calc_nobs(by_band=True)\n",
"ens.calc_nobs(by_band=True, temporary=False)\n",
"\n",
"ens.object[[\"nobs_u\", \"nobs_g\", \"nobs_r\", \"nobs_i\", \"nobs_z\", \"nobs_total\"]].head(5)"
"ens.object.head(5)[[\"nobs_u\", \"nobs_g\", \"nobs_r\", \"nobs_i\", \"nobs_z\", \"nobs_total\"]]"
]
},
{
Expand Down Expand Up @@ -467,8 +467,8 @@
"metadata": {},
"outputs": [],
"source": [
"ens.source.repartition(partition_size=\"100MB\").update_ensemble() # 100MBs is generally recommended\n",
"ens.source # In this case, we have a small set of data that easily fits into one partition"
"ens.source.repartition(partition_size=\"100MB\") # 100MBs is generally recommended\n",
"# In this case, we have a small set of data that easily fits into one partition"
]
},
{
Expand Down Expand Up @@ -548,6 +548,15 @@
"In some situations, you may find yourself running a given workflow many times. Due to the nature of lazy-computation, this will involve repeated execution of data I/O, pre-processing steps, initial analysis, etc. In these situations, it may be effective to instead save the ensemble state to disk after completion of these initial processing steps. To accomplish this, we can use the `Ensemble.save_ensemble()` function."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"ens.object.head(5)"
]
},
{
"cell_type": "code",
"execution_count": null,
Expand Down Expand Up @@ -576,6 +585,13 @@
"new_ens = Ensemble()\n",
"new_ens.from_ensemble(\"./ensemble\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
Expand Down
12 changes: 3 additions & 9 deletions docs/tutorials/structure_function_showcase.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -158,13 +158,6 @@
"in a TAPE `ensemble`. "
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
Expand Down Expand Up @@ -249,6 +242,7 @@
"ens.from_source_dict(\n",
" {\"id_ens\": id_ens, \"t_ens\": t_ens, \"y_ens\": y_ens, \"yerr_ens\": yerr_ens, \"filter_ens\": filter_ens},\n",
" column_mapper=manual_colmap,\n",
" sorted=True,\n",
")"
]
},
Expand Down Expand Up @@ -564,11 +558,11 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.9"
"version": "3.10.11"
},
"vscode": {
"interpreter": {
"hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6"
"hash": "83afbb17b435d9bf8b0d0042367da76f26510da1c5781f0ff6e6c518eab621ec"
}
}
},
Expand Down
14 changes: 11 additions & 3 deletions docs/tutorials/tape_datasets.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@
")\n",
"\n",
"# Read in data from a parquet file that contains source (timeseries) data\n",
"ens.from_parquet(source_file=f\"{rel_path}/source/test_source.parquet\", column_mapper=col_map)\n",
"ens.from_parquet(source_file=f\"{rel_path}/source/test_source.parquet\", column_mapper=col_map, sorted=True)\n",
"\n",
"ens.source.head(5) # View the first 5 entries of the source table"
]
Expand Down Expand Up @@ -80,6 +80,7 @@
" source_file=f\"{rel_path}/source/test_source.parquet\",\n",
" object_file=f\"{rel_path}/object/test_object.parquet\",\n",
" column_mapper=col_map,\n",
" sorted=True,\n",
")\n",
"\n",
"ens.object.head(5) # View the first 5 entries of the object table"
Expand Down Expand Up @@ -147,7 +148,7 @@
"metadata": {},
"outputs": [],
"source": [
"ens.from_dataset(\"s82_rrlyrae\") # Let's grab the Stripe 82 RR Lyrae\n",
"ens.from_dataset(\"s82_rrlyrae\", sorted=True) # Let's grab the Stripe 82 RR Lyrae\n",
"\n",
"ens.object.head(5)"
]
Expand Down Expand Up @@ -209,10 +210,17 @@
"source": [
"colmap = ColumnMapper(id_col=\"id\", time_col=\"time\", flux_col=\"flux\", err_col=\"error\", band_col=\"band\")\n",
"ens = Ensemble()\n",
"ens.from_source_dict(source_dict, column_mapper=colmap)\n",
"ens.from_source_dict(source_dict, column_mapper=colmap, sorted=True)\n",
"\n",
"ens.info()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
Expand Down
6 changes: 3 additions & 3 deletions docs/tutorials/using_ray_with_the_ensemble.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@
"metadata": {},
"outputs": [],
"source": [
"ens.from_dataset(\"s82_qso\")\n",
"ens.from_dataset(\"s82_qso\", sorted=True)\n",
"ens.source = ens.source.repartition(npartitions=10)\n",
"ens.batch(\n",
" calc_sf2, use_map=False\n",
Expand Down Expand Up @@ -117,7 +117,7 @@
"%%time\n",
"\n",
"ens = Ensemble(client=False) # Do not use a client\n",
"ens.from_dataset(\"s82_qso\")\n",
"ens.from_dataset(\"s82_qso\", sorted=True)\n",
"ens.source = ens.source.repartition(npartitions=10)\n",
"ens.batch(calc_sf2, use_map=False)"
]
Expand Down Expand Up @@ -151,7 +151,7 @@
"%%time\n",
"\n",
"ens = Ensemble()\n",
"ens.from_dataset(\"s82_qso\")\n",
"ens.from_dataset(\"s82_qso\", sorted=True)\n",
"ens.source = ens.source.repartition(npartitions=10)\n",
"ens.batch(calc_sf2, use_map=False).compute()"
]
Expand Down
8 changes: 4 additions & 4 deletions docs/tutorials/working_with_the_ensemble.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
"source": [
"from tape.ensemble import Ensemble\n",
"\n",
"ens = Ensemble().from_dataset(\"s82_rrlyrae\", sort=True)"
"ens = Ensemble().from_dataset(\"s82_rrlyrae\", sorted=True)"
]
},
{
Expand Down Expand Up @@ -197,7 +197,7 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"# Storing and Accessing Result Frames\n",
"## Storing and Accessing Result Frames\n",
"The `Ensemble` provides a powerful batching interface, `Ensemble.batch()`, to perform analysis functions in parallel across your lightcurves.\n",
"\n",
"For the below example, we use the included suite of analysis functions to apply `tape.analysis.calc_stetson_J` on our dataset. (For more info on `Ensemble.batch()`, including providing your own custom functions, see the [Ensemble Batch Showcase](https://tape.readthedocs.io/en/latest/tutorials/batch_showcase.html#) )"
Expand Down Expand Up @@ -319,7 +319,7 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"# Keeping the Object and Source Tables in Sync\n",
"## Keeping the Object and Source Tables in Sync\n",
"\n",
"The `TAPE` `Ensemble` attempts to lazily \"sync\" the Object and Source tables such that:\n",
"\n",
Expand Down Expand Up @@ -397,7 +397,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.13"
"version": "3.10.11"
},
"vscode": {
"interpreter": {
Expand Down
5 changes: 3 additions & 2 deletions docs/tutorials/working_with_the_timeseries.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@
" flux_col=\"psFlux\",\n",
" err_col=\"psFluxErr\",\n",
" band_col=\"filterName\",\n",
" sorted=True,\n",
")"
]
},
Expand Down Expand Up @@ -85,7 +86,7 @@
],
"metadata": {
"kernelspec": {
"display_name": "py310",
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
Expand All @@ -103,7 +104,7 @@
},
"vscode": {
"interpreter": {
"hash": "08968836a6367873274ed1d5e98a07391f42fc3a62bd5aba54afbd7b11ba8673"
"hash": "83afbb17b435d9bf8b0d0042367da76f26510da1c5781f0ff6e6c518eab621ec"
}
}
},
Expand Down
13 changes: 10 additions & 3 deletions src/tape/ensemble.py
Original file line number Diff line number Diff line change
Expand Up @@ -2034,9 +2034,16 @@ def from_dataset(self, dataset, **kwargs):
band_col=dataset_map["band"],
)

return self.from_parquet(
source_file=dataset_info["source_file"],
object_file=dataset_info["object_file"],
# repartition and sort these demo datasets
src = dd.read_parquet(dataset_info["source_file"]).repartition(npartitions=4).persist()
obj = dd.read_parquet(dataset_info["object_file"]).repartition(npartitions=2).persist()

src = src.set_index(dataset_map["id"], sort=True)
obj = obj.set_index(dataset_map["id"], sort=True)

return self.from_dask_dataframe(
source_frame=src,
object_frame=obj,
column_mapper=col_map,
**kwargs,
)
Expand Down

0 comments on commit 4a8849f

Please sign in to comment.