generated from geo-smart/use_case_template
-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #2 from geo-smart/main
Sync old repo
- Loading branch information
Showing
46 changed files
with
10,810 additions
and
1,198 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,253 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 1, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"from datasets import Dataset, DatasetDict, Image\n", | ||
"from util import TLDataset, readSetFromFile" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"For the full dataset:" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 2, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"# train_data = TLDataset(\n", | ||
"# \"../learning/\", \"imagery/\", \"masks/\", \"*_corrected.png\", \n", | ||
"# subset=\"Train\", fraction=0.1, seed=1,\n", | ||
"# )\n", | ||
"\n", | ||
"# validation_data = TLDataset(\n", | ||
"# \"../learning/\", \"imagery/\", \"masks/\", \"*_corrected.png\", \n", | ||
"# subset=\"Test\", fraction=0.1, seed=1,\n", | ||
"# )\n", | ||
"\n", | ||
"# image_paths_train = [str(path) for path in train_data.image_names]\n", | ||
"# label_paths_train = [str(path) for path in train_data.mask_names]\n", | ||
"\n", | ||
"# image_paths_validation = [str(path) for path in validation_data.image_names]\n", | ||
"# label_paths_validation = [str(path) for path in validation_data.mask_names]" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"For the tiny dataset:" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 3, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"Found and loaded 4382 images with glob *_corrected.png.\n", | ||
"Subset of 4382 ground truth segmentation masks marked for Train.\n", | ||
"Tiny dataset train: 198 images\n", | ||
"Tiny dataset validation: 22 images\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"all_data = TLDataset(\n", | ||
" \"../learning/\", \"imagery/\", \"masks/\", \"*_corrected.png\", \n", | ||
" subset=\"Train\", fraction=0, seed=1, # We want to select every image\n", | ||
")\n", | ||
"\n", | ||
"indices = readSetFromFile(\"../data/useful_images.txt\")\n", | ||
"names = [all_data[index][\"name\"].replace(\".png\", \"\") for index in indices]\n", | ||
"\n", | ||
"cutoff = int(len(names) / 10) # fraction=0.1\n", | ||
"train_names = names[cutoff:]\n", | ||
"val_names = names[:cutoff]\n", | ||
"\n", | ||
"image_paths_train = [f\"../learning/imagery/{name}.png\" for name in train_names]\n", | ||
"label_paths_train = [f\"../learning/masks/{name}_corrected.png\" for name in train_names]\n", | ||
"\n", | ||
"image_paths_validation = [f\"../learning/imagery/{name}.png\" for name in val_names]\n", | ||
"label_paths_validation = [f\"../learning/masks/{name}_corrected.png\" for name in val_names]\n", | ||
"\n", | ||
"print(f\"Tiny dataset train: {len(train_names)} images\")\n", | ||
"print(f\"Tiny dataset validation: {len(val_names)} images\")" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 4, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"data": { | ||
"application/vnd.jupyter.widget-view+json": { | ||
"model_id": "dd26e5f40ee648f19e7c720a0845ffce", | ||
"version_major": 2, | ||
"version_minor": 0 | ||
}, | ||
"text/plain": [ | ||
"Uploading the dataset shards: 0%| | 0/1 [00:00<?, ?it/s]" | ||
] | ||
}, | ||
"metadata": {}, | ||
"output_type": "display_data" | ||
}, | ||
{ | ||
"data": { | ||
"application/vnd.jupyter.widget-view+json": { | ||
"model_id": "3e6aef8bc7774ba8ab294a718a0afa56", | ||
"version_major": 2, | ||
"version_minor": 0 | ||
}, | ||
"text/plain": [ | ||
"Map: 0%| | 0/198 [00:00<?, ? examples/s]" | ||
] | ||
}, | ||
"metadata": {}, | ||
"output_type": "display_data" | ||
}, | ||
{ | ||
"data": { | ||
"application/vnd.jupyter.widget-view+json": { | ||
"model_id": "f76052cf75eb4e9cbec04f13555c99c1", | ||
"version_major": 2, | ||
"version_minor": 0 | ||
}, | ||
"text/plain": [ | ||
"Creating parquet from Arrow format: 0%| | 0/2 [00:00<?, ?ba/s]" | ||
] | ||
}, | ||
"metadata": {}, | ||
"output_type": "display_data" | ||
}, | ||
{ | ||
"data": { | ||
"application/vnd.jupyter.widget-view+json": { | ||
"model_id": "30c3fae17e094ca491b9de7fb022e611", | ||
"version_major": 2, | ||
"version_minor": 0 | ||
}, | ||
"text/plain": [ | ||
"Uploading the dataset shards: 0%| | 0/1 [00:00<?, ?it/s]" | ||
] | ||
}, | ||
"metadata": {}, | ||
"output_type": "display_data" | ||
}, | ||
{ | ||
"data": { | ||
"application/vnd.jupyter.widget-view+json": { | ||
"model_id": "f6993e1878e0488589e5a232923f06e3", | ||
"version_major": 2, | ||
"version_minor": 0 | ||
}, | ||
"text/plain": [ | ||
"Map: 0%| | 0/22 [00:00<?, ? examples/s]" | ||
] | ||
}, | ||
"metadata": {}, | ||
"output_type": "display_data" | ||
}, | ||
{ | ||
"data": { | ||
"application/vnd.jupyter.widget-view+json": { | ||
"model_id": "f5ff8c98b7db43059f68c567f0250ddb", | ||
"version_major": 2, | ||
"version_minor": 0 | ||
}, | ||
"text/plain": [ | ||
"Creating parquet from Arrow format: 0%| | 0/1 [00:00<?, ?ba/s]" | ||
] | ||
}, | ||
"metadata": {}, | ||
"output_type": "display_data" | ||
}, | ||
{ | ||
"data": { | ||
"application/vnd.jupyter.widget-view+json": { | ||
"model_id": "5a3b9be6c6d641e9a2182c84cc7447a9", | ||
"version_major": 2, | ||
"version_minor": 0 | ||
}, | ||
"text/plain": [ | ||
"README.md: 0%| | 0.00/434 [00:00<?, ?B/s]" | ||
] | ||
}, | ||
"metadata": {}, | ||
"output_type": "display_data" | ||
}, | ||
{ | ||
"data": { | ||
"text/plain": [ | ||
"CommitInfo(commit_url='https://huggingface.co/datasets/stodoran/elwha-segmentation-tiny/commit/b03c4bd87b3924a54828adfd01b0cf689a9aef07', commit_message='Upload dataset', commit_description='', oid='b03c4bd87b3924a54828adfd01b0cf689a9aef07', pr_url=None, pr_revision=None, pr_num=None)" | ||
] | ||
}, | ||
"execution_count": 4, | ||
"metadata": {}, | ||
"output_type": "execute_result" | ||
} | ||
], | ||
"source": [ | ||
"def createDataset(image_paths, label_paths):\n", | ||
" dataset = Dataset.from_dict({\"image\": sorted(image_paths), \"label\": sorted(label_paths)})\n", | ||
" dataset = dataset.cast_column(\"image\", Image())\n", | ||
" dataset = dataset.cast_column(\"label\", Image())\n", | ||
"\n", | ||
" return dataset\n", | ||
"\n", | ||
"train_dataset = createDataset(image_paths_train, label_paths_train)\n", | ||
"validation_dataset = createDataset(image_paths_validation, label_paths_validation)\n", | ||
"\n", | ||
"dataset = DatasetDict({\n", | ||
" \"train\": train_dataset,\n", | ||
" \"validation\": validation_dataset,\n", | ||
"})\n", | ||
"\n", | ||
"# This function assumes you have ran the huggingface-cli login command in a terminal/notebook\n", | ||
"dataset.push_to_hub(\"stodoran/elwha-segmentation\")" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"[stodoran/elwha-segmentation-large](https://huggingface.co/datasets/stodoran/elwha-segmentation-large/tree/main)<br>\n", | ||
"[stodoran/elwha-segmentation-tiny](https://huggingface.co/datasets/stodoran/elwha-segmentation-tiny/tree/main)" | ||
] | ||
} | ||
], | ||
"metadata": { | ||
"kernelspec": { | ||
"display_name": "elwha_env", | ||
"language": "python", | ||
"name": "python3" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3", | ||
"version": "3.10.12" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 2 | ||
} |
Oops, something went wrong.