From c8325558599857c7b4af5848d1a2b702dbdbb8ba Mon Sep 17 00:00:00 2001 From: Wolfgang Preimesberger Date: Wed, 9 Oct 2024 11:30:33 +0200 Subject: [PATCH 1/4] Fix download --- src/smos/smos_l2/cli.py | 18 +++++++++++------- src/smos/smos_l2/download.py | 23 ++++++++++++++++++++--- 2 files changed, 31 insertions(+), 10 deletions(-) diff --git a/src/smos/smos_l2/cli.py b/src/smos/smos_l2/cli.py index afcb1d4..d1ec3dc 100644 --- a/src/smos/smos_l2/cli.py +++ b/src/smos/smos_l2/cli.py @@ -1,5 +1,5 @@ import click -from datetime import datetime +from datetime import datetime, timedelta import pandas as pd from smos.smos_l2.download import SmosDissEoFtp, L2_START_DATE, get_avail_img_range @@ -20,9 +20,9 @@ @click.option( '--enddate', '-e', type=click.STRING, - default=str(datetime.now().date()), + default=None, help="Enddate in format YYYY-MM-DD. If not given, " - "then the current date is used.") + "then the last full day on the server is used.") @click.option( "--username", type=click.STRING, @@ -60,8 +60,11 @@ def cli_download(path, ftp = SmosDissEoFtp(path, username=username, password=password) + if enddate is None: + enddate = ftp.last_available_day() - timedelta(days=1) + ftp.sync_period(startdate=pd.to_datetime(startdate).to_pydatetime(), - enddate=pd.to_datetime(enddate).to_pydatetime()) + enddate=enddate) @click.command( "update_img", @@ -90,7 +93,8 @@ def cli_update_img(path, password): """ Extend a locally existing SMOS L2 by downloading new files that - don't yet exist locally. + don't yet exist locally. The last day on the server is usually incomplete + and therefore ignored. NOTE: Before using this program, create an account at https://eoiam-idp.eo.esa.int and ideally store you credentials in the file $HOME/.smosapirc (to avoid passing them as plain text). @@ -107,10 +111,10 @@ def cli_update_img(path, # display it properly on the command line. ftp = SmosDissEoFtp(path, username=username, password=password) - + enddate = ftp.last_available_day() - timedelta(days=1) # in case there are any incomplete days ftp.sync_period(startdate=get_avail_img_range(path)[1], - enddate=str(datetime.now().date())) + enddate=str(enddate.date())) @click.command( diff --git a/src/smos/smos_l2/download.py b/src/smos/smos_l2/download.py index a213a50..2c689f2 100644 --- a/src/smos/smos_l2/download.py +++ b/src/smos/smos_l2/download.py @@ -183,7 +183,7 @@ def list(self, subpath='', filter='all'): """ path = self.ftp_root if subpath not in [None, '']: - path += '/' + str(subpath) + path = path / PurePosixPath(subpath) cmd = f"cls {path}" r = self.exec(cmd) lst = r.stdout.decode("utf-8").splitlines() @@ -200,6 +200,22 @@ def list(self, subpath='', filter='all'): return data + def last_available_day(self): + """ + Get the latest available day on the server (incomplete directory). + We want to exclude this day from downloading. + + Returns + ------- + last_date: + + """ + last_year = [int(y.replace('/', '')) for y in self.list(filter='dir')][-1] + last_month = [int(m.replace('/', '')) for m in self.list(subpath=str(last_year), filter='dir')][-1] + last_day = [int(d.replace('/', '')) for d in self.list(subpath=f"{last_year}/{last_month:02}", filter='dir')][-1] + + return datetime(last_year, last_month, last_day) + def list_all_available_days(self, date_from=L2_START_DATE, date_to=datetime.now(), progressbar=True): """ @@ -230,7 +246,7 @@ def list_all_available_days(self, date_from=L2_START_DATE, years = [int(y.replace('/', '')) for y in self.list(filter='dir')] years = [y for y in years if ((y >= date_from.year) and (y <= date_to.year))] - for year in tqdm(years, disable=not progressbar): + for year in tqdm(years, disable=not progressbar, description="Scanning FTP folder"): months = [int(m.replace('/', '')) for m in self.list(subpath=str(year), filter='dir')] if year == date_from.year: months = [m for m in months if m >= date_from.month] @@ -300,7 +316,8 @@ def sync(self, year, month, day=None, opts='', dry_run=False): def sync_period(self, startdate, enddate, dry_run=False): """ Synchronize SMOS L2 data between local root and FTP folder for days - in the passed time frame. + in the passed time frame. The last day on the server is usually not yet + complete (i.e. swath files are missing). This will NOT be synchronized. Parameters ---------- From 0577a689cd0ca1b392e693840999d05021227c49 Mon Sep 17 00:00:00 2001 From: Wolfgang Preimesberger Date: Thu, 10 Oct 2024 11:05:40 +0200 Subject: [PATCH 2/4] Restructure and fix bugs --- src/smos/misc.py | 23 ++++++----- src/smos/smos_l2/cli.py | 5 ++- src/smos/smos_l2/download.py | 60 ++++++--------------------- src/smos/smos_l2/reshuffle.py | 65 ++++++++++++++---------------- tests/smos_l2/test_l2_download.py | 11 +++++ tests/smos_l2/test_l2_reshuffle.py | 10 ++--- tests/test_misc.py | 9 +++++ 7 files changed, 85 insertions(+), 98 deletions(-) diff --git a/src/smos/misc.py b/src/smos/misc.py index 76e1ede..72ab615 100644 --- a/src/smos/misc.py +++ b/src/smos/misc.py @@ -2,6 +2,7 @@ import os from datetime import date import typing as t +import yaml def _get_first_and_last_file(path: str): # Get list of all years (folders) in the path @@ -70,17 +71,21 @@ def _get_date(f: str) -> t.Union[date, None]: return None -def get_first_last_day_images(img_path: str) -> (date, date): - +def get_first_last_day_images(img_path: str) -> \ + (t.Union[date, None], t.Union[date, None]): f, l = _get_first_and_last_file(img_path) - first_date = _get_date(f) - last_date = _get_date(l) - - return first_date, last_date + first_day = _get_date(f) if f is not None else f + last_day = _get_date(l) if l is not None else f + return first_day, last_day -if __name__ == '__main__': +def read_summary_yml(path: str) -> dict: + """ + Read image summary and return fields as dict. + """ + path = os.path.join(path, 'overview.yml') - f, l = get_first_last_day_images("/home/wpreimes/shares/climers/Projects/FRM4SM/07_data/SMOSL2/MIR_SMUDP2_nc") - print(f, l) + with open(path, 'r') as stream: + props = yaml.safe_load(stream) + return props \ No newline at end of file diff --git a/src/smos/smos_l2/cli.py b/src/smos/smos_l2/cli.py index d1ec3dc..3901df5 100644 --- a/src/smos/smos_l2/cli.py +++ b/src/smos/smos_l2/cli.py @@ -2,8 +2,9 @@ from datetime import datetime, timedelta import pandas as pd -from smos.smos_l2.download import SmosDissEoFtp, L2_START_DATE, get_avail_img_range +from smos.smos_l2.download import SmosDissEoFtp, L2_START_DATE from smos.smos_l2.reshuffle import swath2ts, extend_ts +from smos.misc import get_first_last_day_images @click.command( "download", @@ -113,7 +114,7 @@ def cli_update_img(path, ftp = SmosDissEoFtp(path, username=username, password=password) enddate = ftp.last_available_day() - timedelta(days=1) # in case there are any incomplete days - ftp.sync_period(startdate=get_avail_img_range(path)[1], + ftp.sync_period(startdate=get_first_last_day_images(path)[1], enddate=str(enddate.date())) diff --git a/src/smos/smos_l2/download.py b/src/smos/smos_l2/download.py index 2c689f2..888d5b8 100644 --- a/src/smos/smos_l2/download.py +++ b/src/smos/smos_l2/download.py @@ -9,7 +9,8 @@ import pandas as pd from calendar import monthrange from pathlib import PurePosixPath -from glob import glob +from smos.misc import get_first_last_day_images +import yaml L2_START_DATE = datetime(2010, 6, 1) @@ -42,52 +43,6 @@ def load_dotrc(path=None) -> dict: config[k] = v.strip() return config -def get_avail_img_range(path) -> (datetime, datetime): - """ - Derive first and last day (available folder) with data from the - local SMOS L2 data - Folder structure: $PATH/YEAR/MONTH/DAY/*.nc - - Parameters - ---------- - path: str - Local root path (contains annual folders) - - Returns - ------- - first_day: datetime - First day for which image data is available. - last_day: datetime - Last day for which data is available. - """ - years = glob(os.path.join(path, '[0-9][0-9][0-9][0-9]')) - years = [int(os.path.basename(y)) for y in years] - years.sort() - if len(years) == 0: - raise ValueError(f"No SMOS L2 data found in {path}.") - - first_year = years[0] - last_year = years[-1] - - months = glob(os.path.join(path, str(last_year), '[0-9][0-9]')) - months = [int(os.path.basename(m)) for m in months] - months.sort() - if len(years) == 0: - raise ValueError(f"No SMOS L2 data found in {path}.") - first_month = months[0] - last_month = months[-1] - - days = glob(os.path.join(path, str(last_year), f"{last_month:02}", '[0-9][0-9]')) - days = [int(os.path.basename(d)) for d in days] - days.sort() - if len(years) == 0: - raise ValueError(f"No SMOS L2 data found in {path}.") - first_day = days[0] - last_day = days[-1] - - return (datetime(int(first_year), first_month, first_day), - datetime(int(last_year), last_month, last_day)) - class SmosDissEoFtp: def __init__(self, local_root, username=None, password=None, dotrc=None, @@ -350,4 +305,15 @@ def sync_period(self, startdate, enddate, dry_run=False): r = self.sync(dt.year, dt.month, dt.day, dry_run=dry_run) ret.append(r) + first_day, last_day = get_first_last_day_images(str(self.local_root)) + + props = dict(comment="DO NOT CHANGE THIS FILE MANUALLY! " + "It is required by the automatic data update process.", + first_day=str(first_day) if first_day is not None else None, + last_day=str(last_day) if last_day is not None else None, + last_update=str(datetime.now())) + + with open(os.path.join(self.local_root, 'overview.yml'), 'w') as f: + yaml.dump(props, f, default_flow_style=False, sort_keys=False) + return ret \ No newline at end of file diff --git a/src/smos/smos_l2/reshuffle.py b/src/smos/smos_l2/reshuffle.py index 6380ab5..cf6b0dc 100644 --- a/src/smos/smos_l2/reshuffle.py +++ b/src/smos/smos_l2/reshuffle.py @@ -2,20 +2,9 @@ import os import yaml from qa4sm_preprocessing.level2.smos import SMOSL2Reader -from smos.smos_l2.download import get_avail_img_range +from smos.misc import read_summary_yml, get_first_last_day_images from datetime import datetime -def read_summary_yml(path: str) -> dict: - """ - Read image summary and return fields as dict. - """ - path = os.path.join(path, 'overview.yml') - - with open(path, 'r') as stream: - props = yaml.safe_load(stream) - - return props - def swath2ts(img_path, ts_path, startdate=None, enddate=None, memory=4): """ @@ -40,7 +29,7 @@ def swath2ts(img_path, ts_path, startdate=None, enddate=None, memory=4): """ reader = SMOSL2Reader(img_path) - first_day, last_day = get_avail_img_range(img_path) + first_day, last_day = get_first_last_day_images(img_path) start = pd.to_datetime(startdate).to_pydatetime() if startdate is not None else first_day end = pd.to_datetime(enddate).to_pydatetime() if enddate is not None else last_day @@ -49,11 +38,13 @@ def swath2ts(img_path, ts_path, startdate=None, enddate=None, memory=4): if os.path.isfile(out_file): props = read_summary_yml(ts_path) - if start < pd.to_datetime(props['enddate']).to_pydatetime(): + if start < pd.to_datetime(props['last_day']).to_pydatetime(): raise ValueError("Cannot prepend data to time series, or replace " "existing values. Choose different start date.") - props = {'enddate': str(end), 'last_update': str(datetime.now()), + props = {'comment': "DO NOT CHANGE THIS FILE MANUALLY! " + "It is required by the automatic data update process.", + 'last_day': str(end.date()), 'last_update': str(datetime.now()), 'parameters': [str(v) for v in reader.varnames]} r = reader.repurpose( @@ -72,7 +63,7 @@ def swath2ts(img_path, ts_path, startdate=None, enddate=None, memory=4): def extend_ts(img_path, ts_path, memory=4): """ Append new image data to an existing time series record. - This will use the enddate from summary.yml in the time series + This will use the last_day from summary.yml in the time series directory to decide which date the update should start from and the available image directories to decide how many images can be appended. @@ -95,26 +86,32 @@ def extend_ts(img_path, ts_path, memory=4): f"series setup or provide overview.yml in {ts_path}.") props = read_summary_yml(ts_path) - startdate = pd.to_datetime(props['enddate']).to_pydatetime() - _, enddate = get_avail_img_range(img_path) + startdate = pd.to_datetime(props['last_day']).to_pydatetime() + _, last_day = get_first_last_day_images(img_path) - reader = SMOSL2Reader(img_path) + if startdate < pd.to_datetime(last_day).to_pydatetime(): - print(f"From: {startdate}, To: {enddate}") + reader = SMOSL2Reader(img_path) - r = reader.repurpose( - outpath=ts_path, - start=startdate, - end=enddate, - memory=memory, - imgbaseconnection=True, - overwrite=False, - append=True, - ) + print(f"Extent TimeSeries data From: {startdate}, To: {last_day}") - if r is not None: - props['enddate'] = str(enddate) - props['last_update'] = str(datetime.now()) + r = reader.repurpose( + outpath=ts_path, + start=startdate, + end=last_day, + memory=memory, + imgbaseconnection=True, + overwrite=False, + append=True, + ) + + if r is not None: + props['last_day'] = str(last_day) + props['last_update'] = str(datetime.now()) + + with open(out_file, 'w') as f: + yaml.dump(props, f, default_flow_style=False, sort_keys=False) + + else: + print(f"No extension required From: {startdate} To: {last_day}") - with open(out_file, 'w') as f: - yaml.dump(props, f, default_flow_style=False, sort_keys=False) diff --git a/tests/smos_l2/test_l2_download.py b/tests/smos_l2/test_l2_download.py index c47526c..e44eedf 100644 --- a/tests/smos_l2/test_l2_download.py +++ b/tests/smos_l2/test_l2_download.py @@ -1,6 +1,7 @@ import os from tempfile import TemporaryDirectory from smos.smos_l2.download import SmosDissEoFtp +from smos.misc import read_summary_yml def test_download_l2(): with TemporaryDirectory() as tempdir: @@ -9,6 +10,8 @@ def test_download_l2(): c = ftp.sync(2022, 1, 1, opts='-e --testflag 1 2 3', dry_run=True) assert c == f'mirror -c -e --testflag 1 2 3 /SMOS/L2SM/MIR_SMUDP2_nc/2022/01/01 {os.path.join(tempdir, "2022", "01", "01")} −−no−perms' + + def test_download_l2_period(): with TemporaryDirectory() as tempdir: ftp = SmosDissEoFtp(local_root=tempdir, username='asd', password='asd', @@ -17,3 +20,11 @@ def test_download_l2_period(): for d in [1, 2, 3]: assert cmds[d-1] == f'mirror -c /SMOS/L2SM/MIR_SMUDP2_nc/2022/01/0{d} {os.path.join(tempdir, "2022", "01", f"0{d}")} −−no−perms' + + props = read_summary_yml(tempdir) + assert props['first_day'] is None + assert props['last_day'] is None + assert props['last_update'] is not None + +if __name__ == '__main__': + test_download_l2_period() \ No newline at end of file diff --git a/tests/smos_l2/test_l2_reshuffle.py b/tests/smos_l2/test_l2_reshuffle.py index 8e78a7e..37ea012 100644 --- a/tests/smos_l2/test_l2_reshuffle.py +++ b/tests/smos_l2/test_l2_reshuffle.py @@ -1,6 +1,7 @@ import os from tempfile import TemporaryDirectory -from smos.smos_l2.reshuffle import swath2ts, extend_ts, read_summary_yml +from smos.smos_l2.reshuffle import swath2ts, extend_ts +from smos.misc import read_summary_yml from pynetcf.time_series import GriddedNcIndexedRaggedTs from pygeogrids.netcdf import load_grid import numpy as np @@ -12,7 +13,7 @@ def test_reshuffle_and_update(): assert os.path.isfile(os.path.join(ts_path, 'grid.nc')) props = read_summary_yml(ts_path) - assert props['enddate'] == '2022-01-02 00:00:00' + assert props['last_day'] == '2022-01-02' grid = load_grid(os.path.join(ts_path, 'grid.nc')) reader = GriddedNcIndexedRaggedTs(ts_path, grid=grid) @@ -31,7 +32,7 @@ def test_reshuffle_and_update(): extend_ts(img_path, ts_path) props = read_summary_yml(ts_path) - assert props['enddate'] == '2022-01-03 00:00:00' + assert props['last_day'] == '2022-01-03' reader = GriddedNcIndexedRaggedTs(ts_path, grid=grid) @@ -47,6 +48,3 @@ def test_reshuffle_and_update(): assert len(ts) == 2 reader.close() - -if __name__ == '__main__': - test_reshuffle_and_update() \ No newline at end of file diff --git a/tests/test_misc.py b/tests/test_misc.py index fff85ee..9dd44f6 100644 --- a/tests/test_misc.py +++ b/tests/test_misc.py @@ -1,9 +1,18 @@ +import tempfile + import os from smos.misc import get_first_last_day_images import datetime def test_first_last_date(): rootf = os.path.join(os.path.join(os.path.dirname(__file__), 'smos-test-data')) + + with tempfile.TemporaryDirectory() as emptydir: + s, e = get_first_last_day_images(emptydir) + assert s is None + assert e is None + + s, e = get_first_last_day_images(os.path.join(rootf, 'L2_SMOS')) assert s == datetime.date(2022,1,1) assert e == datetime.date(2022,1,3) From 43d0bf02fcac29f484c6f4e87b821182b06a92ff Mon Sep 17 00:00:00 2001 From: Wolfgang Preimesberger Date: Thu, 10 Oct 2024 11:30:30 +0200 Subject: [PATCH 3/4] Update docs --- CHANGELOG.rst | 3 ++- README.rst | 22 ++++++++++++---------- 2 files changed, 14 insertions(+), 11 deletions(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index fe6c09f..b00f9ea 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -6,7 +6,8 @@ Unreleased ========== - Add support for SMOS L4 RZSM product (`PR #11 `_) - Update pyscaffold to v4, Replace Travis CI with GithubActions (`PR #11 `_) - +- Add support for SMOS L2 product +- Add scripts for operational data updates of SMOS L2 (download and time series extension) Version 0.2 =========== diff --git a/README.rst b/README.rst index 4c995ce..255bc50 100644 --- a/README.rst +++ b/README.rst @@ -1,23 +1,26 @@ ==== SMOS ==== -.. image:: https://github.com/TUW-GEO/smos/workflows/Automated%20Tests/badge.svg?branch=master&event=push + +|ci| |cov| |pip| |doc| |doi| + +.. |ci| image:: https://github.com/TUW-GEO/smos/actions/workflows/build.yml/badge.svg?branch=master :target: https://github.com/TUW-GEO/smos/actions -.. image:: https://coveralls.io/repos/github/TUW-GEO/smos/badge.svg?branch=master - :target: https://coveralls.io/github/TUW-GEO/smos?branch=master +.. |cov| image:: https://coveralls.io/repos/github/TUW-GEO/smos/badge.svg?branch=master + :target: https://coveralls.io/github/TUW-GEO/smos?branch=master -.. image:: https://badge.fury.io/py/smos.svg +.. |pip| image:: https://badge.fury.io/py/smos.svg :target: http://badge.fury.io/py/smos -.. image:: https://readthedocs.org/projects/smos/badge/?version=latest +.. |doc| image:: https://readthedocs.org/projects/smos/badge/?version=latest :target: http://smos.readthedocs.org/ -.. image:: https://zenodo.org/badge/167011732.svg +.. |doi| image:: https://zenodo.org/badge/167011732.svg :target: https://zenodo.org/badge/latestdoi/167011732 -SMOS (Soil Moisture and Ocean Salinity) data readers and time series converter. +SMOS (Soil Moisture and Ocean Salinity) data readers and time series conversion tools. Works great in combination with `pytesmo `_. @@ -25,10 +28,7 @@ Works great in combination with `pytesmo `_. Documentation & Software Citation ================================= To see the latest full documentation click on the docs badge at the top. - To cite this package follow the Zenodo badge at the top and export the citation there. -Be aware that this badge links to the latest package version. Additional information -on DOI versioning can be found here: http://help.zenodo.org/#versioning Installation ============ @@ -88,6 +88,8 @@ added. - `SMOS IC `_: SMOS INRA-CESBIO (SMOS-IC) 25km - `SMOS L4 RZSM `_: SMOS CATDS-CESBIO (SMOS L4 RZSM) 25km +- SMOS L2 +- SMOS L3 Contribute ========== From 172bfe114c611fdf8ac0208d6f86998308fcc488 Mon Sep 17 00:00:00 2001 From: Wolfgang Preimesberger Date: Thu, 10 Oct 2024 11:31:31 +0200 Subject: [PATCH 4/4] Update docs --- README.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.rst b/README.rst index 255bc50..9036092 100644 --- a/README.rst +++ b/README.rst @@ -18,7 +18,7 @@ SMOS .. |doi| image:: https://zenodo.org/badge/167011732.svg :target: https://zenodo.org/badge/latestdoi/167011732 - + SMOS (Soil Moisture and Ocean Salinity) data readers and time series conversion tools.