-
Notifications
You must be signed in to change notification settings - Fork 380
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
SatlasPretrain: add new dataset (#2248)
* SatlasPretrain: add new dataset * Add versionadded * Fix bugs * Add tests * Fix Windows tests * Simpler Windows fix * Remove unnecessary variable * Landsat files must be resized * Add more checksums * Ruff * Download from AWS * Add missing import * Add NAIP checksums * Add Landsat and Sentinel-1 checksums * All S3 all the time * Fix Windows tests * Use pandas * Use good_images to find directory instead of glob * Return timestamp * Update tasks, fix NAIP resolution
- Loading branch information
1 parent
90e824a
commit a0a2585
Showing
60 changed files
with
952 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,111 @@ | ||
#!/usr/bin/env python3 | ||
|
||
# Copyright (c) Microsoft Corporation. All rights reserved. | ||
# Licensed under the MIT License. | ||
|
||
import json | ||
import os | ||
import shutil | ||
|
||
from PIL import Image | ||
|
||
SIZE = 32 | ||
landsat_size = { | ||
'b1': SIZE // 2, | ||
'b2': SIZE // 2, | ||
'b3': SIZE // 2, | ||
'b4': SIZE // 2, | ||
'b5': SIZE // 2, | ||
'b6': SIZE // 2, | ||
'b7': SIZE // 2, | ||
'b8': SIZE, | ||
'b9': SIZE // 2, | ||
'b10': SIZE // 2, | ||
'b11': SIZE // 4, | ||
'b12': SIZE // 4, | ||
} | ||
|
||
index = [[7149, 3246], [1234, 5678]] | ||
good_images = [ | ||
[7149, 3246, '2022-03'], | ||
[1234, 5678, '2022-03'], | ||
[7149, 3246, 'm_3808245_se_17_1_20110801'], | ||
[1234, 5678, 'm_3808245_se_17_1_20110801'], | ||
[7149, 3246, '2022-01'], | ||
[1234, 5678, '2022-01'], | ||
[7149, 3246, 'S2A_MSIL1C_20220309T032601_N0400_R018_T48RYR_20220309T060235'], | ||
[1234, 5678, 'S2A_MSIL1C_20220309T032601_N0400_R018_T48RYR_20220309T060235'], | ||
] | ||
times = { | ||
'2022-03': '2022-03-01T00:00:00+00:00', | ||
'm_3808245_se_17_1_20110801': '2011-08-01T12:00:00+00:00', | ||
'2022-01': '2022-01-01T00:00:00+00:00', | ||
'S2A_MSIL1C_20220309T032601_N0400_R018_T48RYR_20220309T060235': '2022-03-09T06:02:35+00:00', | ||
} | ||
|
||
FILENAME_HIERARCHY = dict[str, 'FILENAME_HIERARCHY'] | list[str] | ||
filenames: FILENAME_HIERARCHY = { | ||
'landsat': {'2022-03': list(f'b{i}' for i in range(1, 12))}, | ||
'naip': {'m_3808245_se_17_1_20110801': ['tci', 'ir']}, | ||
'sentinel1': {'2022-01': ['vh', 'vv']}, | ||
'sentinel2': { | ||
'S2A_MSIL1C_20220309T032601_N0400_R018_T48RYR_20220309T060235': [ | ||
'tci', | ||
'b05', | ||
'b06', | ||
'b07', | ||
'b08', | ||
'b11', | ||
'b12', | ||
] | ||
}, | ||
} | ||
|
||
|
||
def create_files(path: str) -> None: | ||
os.makedirs(path, exist_ok=True) | ||
for col, row in index: | ||
band = os.path.basename(path) | ||
mode = 'RGB' if band == 'tci' else 'L' | ||
size = SIZE | ||
if 'landsat' in path: | ||
size = landsat_size[band] | ||
img = Image.new(mode, (size, size)) | ||
img.save(os.path.join(path, f'{col}_{row}.png')) | ||
|
||
|
||
def create_directory(directory: str, hierarchy: FILENAME_HIERARCHY) -> None: | ||
if isinstance(hierarchy, dict): | ||
# Recursive case | ||
for key, value in hierarchy.items(): | ||
path = os.path.join(directory, key) | ||
create_directory(path, value) | ||
else: | ||
# Base case | ||
for value in hierarchy: | ||
path = os.path.join(directory, value) | ||
create_files(path) | ||
|
||
|
||
if __name__ == '__main__': | ||
create_directory('.', filenames) | ||
|
||
col, row = index[0] | ||
path = os.path.join('static', f'{col}_{row}') | ||
os.makedirs(path, exist_ok=True) | ||
img = Image.new('L', (SIZE, SIZE)) | ||
img.save(os.path.join(path, 'land_cover.png')) | ||
|
||
os.makedirs('metadata', exist_ok=True) | ||
with open(os.path.join('metadata', 'train_lowres.json'), 'w') as f: | ||
json.dump(index, f) | ||
|
||
with open(os.path.join('metadata', 'good_images_lowres_all.json'), 'w') as f: | ||
json.dump(good_images, f) | ||
|
||
with open(os.path.join('metadata', 'image_times.json'), 'w') as f: | ||
json.dump(times, f) | ||
|
||
for path in os.listdir('.'): | ||
if os.path.isdir(path): | ||
shutil.make_archive(path, 'tar', '.', path) |
Binary file not shown.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
[[7149, 3246, "2022-03"], [1234, 5678, "2022-03"], [7149, 3246, "m_3808245_se_17_1_20110801"], [1234, 5678, "m_3808245_se_17_1_20110801"], [7149, 3246, "2022-01"], [1234, 5678, "2022-01"], [7149, 3246, "S2A_MSIL1C_20220309T032601_N0400_R018_T48RYR_20220309T060235"], [1234, 5678, "S2A_MSIL1C_20220309T032601_N0400_R018_T48RYR_20220309T060235"]] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
{"2022-03": "2022-03-01T00:00:00+00:00", "m_3808245_se_17_1_20110801": "2011-08-01T12:00:00+00:00", "2022-01": "2022-01-01T00:00:00+00:00", "S2A_MSIL1C_20220309T032601_N0400_R018_T48RYR_20220309T060235": "2022-03-09T06:02:35+00:00"} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
[[7149, 3246], [1234, 5678]] |
Binary file not shown.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Oops, something went wrong.
Binary file not shown.
Oops, something went wrong.
Oops, something went wrong.
Oops, something went wrong.
Oops, something went wrong.
Binary file not shown.
Binary file added
BIN
+76 Bytes
.../S2A_MSIL1C_20220309T032601_N0400_R018_T48RYR_20220309T060235/b05/1234_5678.png
Oops, something went wrong.
Binary file added
BIN
+76 Bytes
.../S2A_MSIL1C_20220309T032601_N0400_R018_T48RYR_20220309T060235/b05/7149_3246.png
Oops, something went wrong.
Binary file added
BIN
+76 Bytes
.../S2A_MSIL1C_20220309T032601_N0400_R018_T48RYR_20220309T060235/b06/1234_5678.png
Oops, something went wrong.
Binary file added
BIN
+76 Bytes
.../S2A_MSIL1C_20220309T032601_N0400_R018_T48RYR_20220309T060235/b06/7149_3246.png
Oops, something went wrong.
Binary file added
BIN
+76 Bytes
.../S2A_MSIL1C_20220309T032601_N0400_R018_T48RYR_20220309T060235/b07/1234_5678.png
Oops, something went wrong.
Binary file added
BIN
+76 Bytes
.../S2A_MSIL1C_20220309T032601_N0400_R018_T48RYR_20220309T060235/b07/7149_3246.png
Oops, something went wrong.
Binary file added
BIN
+76 Bytes
.../S2A_MSIL1C_20220309T032601_N0400_R018_T48RYR_20220309T060235/b08/1234_5678.png
Oops, something went wrong.
Binary file added
BIN
+76 Bytes
.../S2A_MSIL1C_20220309T032601_N0400_R018_T48RYR_20220309T060235/b08/7149_3246.png
Oops, something went wrong.
Binary file added
BIN
+76 Bytes
.../S2A_MSIL1C_20220309T032601_N0400_R018_T48RYR_20220309T060235/b11/1234_5678.png
Oops, something went wrong.
Binary file added
BIN
+76 Bytes
.../S2A_MSIL1C_20220309T032601_N0400_R018_T48RYR_20220309T060235/b11/7149_3246.png
Oops, something went wrong.
Binary file added
BIN
+76 Bytes
.../S2A_MSIL1C_20220309T032601_N0400_R018_T48RYR_20220309T060235/b12/1234_5678.png
Oops, something went wrong.
Binary file added
BIN
+76 Bytes
.../S2A_MSIL1C_20220309T032601_N0400_R018_T48RYR_20220309T060235/b12/7149_3246.png
Oops, something went wrong.
Binary file added
BIN
+83 Bytes
.../S2A_MSIL1C_20220309T032601_N0400_R018_T48RYR_20220309T060235/tci/1234_5678.png
Oops, something went wrong.
Binary file added
BIN
+83 Bytes
.../S2A_MSIL1C_20220309T032601_N0400_R018_T48RYR_20220309T060235/tci/7149_3246.png
Oops, something went wrong.
Binary file not shown.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,59 @@ | ||
# Copyright (c) Microsoft Corporation. All rights reserved. | ||
# Licensed under the MIT License. | ||
|
||
import os | ||
import shutil | ||
from pathlib import Path | ||
|
||
import matplotlib.pyplot as plt | ||
import pytest | ||
import torch.nn as nn | ||
from pytest import MonkeyPatch | ||
from torch import Tensor | ||
|
||
from torchgeo.datasets import DatasetNotFoundError, SatlasPretrain | ||
from torchgeo.datasets.utils import Executable | ||
|
||
|
||
class TestSatlasPretrain: | ||
@pytest.fixture | ||
def dataset( | ||
self, aws: Executable, monkeypatch: MonkeyPatch, tmp_path: Path | ||
) -> SatlasPretrain: | ||
url = os.path.join('tests', 'data', 'satlas', '') | ||
monkeypatch.setattr(SatlasPretrain, 'url', url) | ||
images = ('landsat', 'naip', 'sentinel1', 'sentinel2') | ||
products = (*images, 'static', 'metadata') | ||
tarballs = {product: (f'{product}.tar',) for product in products} | ||
monkeypatch.setattr(SatlasPretrain, 'tarballs', tarballs) | ||
transforms = nn.Identity() | ||
return SatlasPretrain( | ||
tmp_path, images=images, transforms=transforms, download=True | ||
) | ||
|
||
@pytest.mark.parametrize('index', [0, 1]) | ||
def test_getitem(self, dataset: SatlasPretrain, index: int) -> None: | ||
x = dataset[index] | ||
assert isinstance(x, dict) | ||
for image in dataset.images: | ||
assert isinstance(x[f'image_{image}'], Tensor) | ||
assert isinstance(x[f'time_{image}'], Tensor) | ||
for label in dataset.labels: | ||
assert isinstance(x[f'mask_{label}'], Tensor) | ||
|
||
def test_len(self, dataset: SatlasPretrain) -> None: | ||
assert len(dataset) == 2 | ||
|
||
def test_already_downloaded(self, dataset: SatlasPretrain) -> None: | ||
shutil.rmtree(os.path.join(dataset.root, 'landsat')) | ||
SatlasPretrain(root=dataset.root, download=True) | ||
|
||
def test_not_downloaded(self, tmp_path: Path) -> None: | ||
with pytest.raises(DatasetNotFoundError, match='Dataset not found'): | ||
SatlasPretrain(tmp_path) | ||
|
||
def test_plot(self, dataset: SatlasPretrain) -> None: | ||
x = dataset[0] | ||
x['prediction_land_cover'] = x['mask_land_cover'] | ||
dataset.plot(x, suptitle='Test') | ||
plt.close() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.