From 25b10c668faef0485afb6a78d928dcdc18600c2f Mon Sep 17 00:00:00 2001 From: Wolfgang Preimesberger Date: Sun, 10 Nov 2024 13:57:49 +0100 Subject: [PATCH] Changed default variables to reshuffle SMOS L2 and added CLI option to select variables manually --- CHANGELOG.rst | 4 +++ environment.yml | 4 +-- setup.cfg | 4 +-- src/smos/smos_l2/cli.py | 21 +++++++++++---- src/smos/smos_l2/reshuffle.py | 43 +++++++++++++++++++++++++++--- tests/smos_l2/test_l2_reshuffle.py | 7 ++++- 6 files changed, 70 insertions(+), 13 deletions(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 6341c72..0601847 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -2,6 +2,10 @@ Changelog ========= +v0.3.1 +====== +- Changed the default variables reshuffled for SMOS L2 and added option to select variables + v0.3.0 ====== - Add support for SMOS L4 RZSM product (`PR #11 `_) diff --git a/environment.yml b/environment.yml index 521c767..bbb0791 100644 --- a/environment.yml +++ b/environment.yml @@ -17,9 +17,9 @@ dependencies: - pygeogrids - pynetcf - pyproj - - git+https://github.com/TUW-GEO/repurpose@master + - repurpose>=0.13.1 - trollsift - ease_grid - more_itertools - cf-xarray==0.8.4 - - git+https://github.com/awst-austria/qa4sm-preprocessing@master + - qa4sm_preprocessing==0.3 diff --git a/setup.cfg b/setup.cfg index 820f33d..e748864 100644 --- a/setup.cfg +++ b/setup.cfg @@ -37,7 +37,7 @@ install_requires = dask[distributed] xarray netCDF4 - repurpose + repurpose>=0.13.1 pyresample pygeogrids>=0.3.2 pynetcf>=0.5.1 @@ -47,7 +47,7 @@ install_requires = h5py more_itertools cf-xarray==0.8.4 - qa4sm_preprocessing>=0.2 + qa4sm_preprocessing>=0.3 # The usage of test_requires is discouraged, see `Dependency Management` docs #tests_require = pytest; pytest-cov; coverage # Require a specific Python version, e.g. Python 2.7 or >= 3.4 diff --git a/src/smos/smos_l2/cli.py b/src/smos/smos_l2/cli.py index 3901df5..2835bbd 100644 --- a/src/smos/smos_l2/cli.py +++ b/src/smos/smos_l2/cli.py @@ -3,7 +3,7 @@ import pandas as pd from smos.smos_l2.download import SmosDissEoFtp, L2_START_DATE -from smos.smos_l2.reshuffle import swath2ts, extend_ts +from smos.smos_l2.reshuffle import swath2ts, extend_ts, _default_variables from smos.misc import get_first_last_day_images @click.command( @@ -138,6 +138,13 @@ def cli_update_img(path, default=None, help="Format YYYY-MM-DD | Last day to include in the" "time series. [default: Date of the last available image]") +@click.option( + '--variables', + '-v', + type=click.STRING, + default=','.join(_default_variables), + help="List of variables in the swath files to reshuffle. Multiple variables" + " must be comma-separated.") @click.option( '--memory', '-m', @@ -145,7 +152,8 @@ def cli_update_img(path, default=4, help="NUMBER | Available memory (in GB) to use to load image data. " "A larger buffer means faster processing.") -def cli_reshuffle(img_path, ts_path, startdate, enddate, memory): +def cli_reshuffle(img_path, ts_path, startdate, enddate, variables, + memory): """ Convert SMOS L2 image data into a (5x5 degrees chunked) time series format following CF conventions (Indexed Ragged format). @@ -158,8 +166,8 @@ def cli_reshuffle(img_path, ts_path, startdate, enddate, memory): Required Parameters ------------------- IMG_PATH: string - Path where previously downloaded C3S SM images are stored. Use the - `c3s_sm download` command to retrieve image data. + Path where previously downloaded SMOS images are stored. Use the + `smos_l2 download` command to retrieve image data. TS_PATH: string Path where the newly created time series files should be stored. """ @@ -167,9 +175,12 @@ def cli_reshuffle(img_path, ts_path, startdate, enddate, memory): # display it properly on the command line. print(f"Convert image data in {img_path} to time series in {ts_path}") + variables = [str(v.strip()) for v in variables.split(',')] + swath2ts( img_path, ts_path, + variables=variables, startdate=startdate, enddate=enddate, memory=int(memory)) @@ -193,7 +204,7 @@ def cli_update_ts(img_path, ts_path): Required Parameters ------------------- IMG_PATH: string - Path where previously downloaded C3S SM images are stored. + Path where previously downloaded SMOS files are stored. TS_PATH: string Path where the time series to update are stored """ diff --git a/src/smos/smos_l2/reshuffle.py b/src/smos/smos_l2/reshuffle.py index 9a025e4..46a7bcd 100644 --- a/src/smos/smos_l2/reshuffle.py +++ b/src/smos/smos_l2/reshuffle.py @@ -1,3 +1,4 @@ +import numpy as np import pandas as pd import os import yaml @@ -5,8 +6,19 @@ from smos.misc import read_summary_yml, get_first_last_day_images from datetime import datetime - -def swath2ts(img_path, ts_path, startdate=None, enddate=None, memory=4): +_default_variables = ( + "Soil_Moisture", + "Science_Flags", + "Confidence_Flags", + "Chi_2_P", + "RFI_Prob", + "N_RFI_X", + "N_RFI_Y", + "M_AVA0", +) + +def swath2ts(img_path, ts_path, variables=_default_variables, + startdate=None, enddate=None, memory=4): """ Convert SMOS L2 swath data to time series in IndexedRaggedTs format. @@ -17,6 +29,19 @@ def swath2ts(img_path, ts_path, startdate=None, enddate=None, memory=4): swath data are found. ts_path: str Local directory where the converted time series data will be stored. + variables: tuple or str, optional (default: None) + List of variables to include, None will use the default variables + "Soil_Moisture", + "Soil_Moisture_DQX", + "Science_Flags", + "Confidence_Flags", + "Processing_Flags", + "Chi_2_P", + "RFI_Prob", + "N_RFI_X", + "N_RFI_Y", + "M_AVA0", + "acquisition_time" startdate: str or datetime, optional (default: None) First day of the available swath data that should be included in the time series. If None is passed, then the first available day is used. @@ -27,7 +52,13 @@ def swath2ts(img_path, ts_path, startdate=None, enddate=None, memory=4): Size of available memory in GB. More memory will lead to a faster conversion. """ - reader = SMOSL2Reader(img_path) + variables = [v for v in np.atleast_1d(variables)] + + if "acquisition_time" not in variables: + variables.append("acquisition_time") + + reader = SMOSL2Reader(img_path, varnames=variables, + add_overpass_flag=True) first_day, last_day = get_first_last_day_images(img_path) @@ -123,3 +154,9 @@ def extend_ts(img_path, ts_path, memory=4): else: print(f"No extension required From: {startdate} To: {last_day}") +if __name__ == '__main__': + ts_path = '/tmp/ts' + img_path = "/home/wpreimes/shares/climers/Projects/QA4SM_HR/07_data/SERVICE_DATA/SMOS_L2/SMOSL2_v700-ext/images" + # swath2ts(img_path, + # ts_path=ts_path, startdate='2024-03-01', enddate='2024-03-05') + extend_ts(img_path, ts_path) diff --git a/tests/smos_l2/test_l2_reshuffle.py b/tests/smos_l2/test_l2_reshuffle.py index acb090e..8485237 100644 --- a/tests/smos_l2/test_l2_reshuffle.py +++ b/tests/smos_l2/test_l2_reshuffle.py @@ -1,7 +1,7 @@ import os import pandas as pd from tempfile import TemporaryDirectory -from smos.smos_l2.reshuffle import swath2ts, extend_ts +from smos.smos_l2.reshuffle import swath2ts, extend_ts, _default_variables from smos.misc import read_summary_yml from pynetcf.time_series import GriddedNcIndexedRaggedTs from pygeogrids.netcdf import load_grid @@ -42,6 +42,11 @@ def test_reshuffle_and_update(): ts.loc['2022-01-02', 'Soil_Moisture'].values[0], 0.52442, 5 ) + for var in _default_variables: + assert var in ts.columns + + assert 'Overpass' in ts.columns + assert 1 in ts.index.day assert 2 in ts.index.day assert 3 not in ts.index.day # this must be excluded