diff --git a/.github/workflows/catalog.yaml b/.github/workflows/catalog.yaml index dfb12685a1..4dc3559b29 100644 --- a/.github/workflows/catalog.yaml +++ b/.github/workflows/catalog.yaml @@ -6,7 +6,143 @@ on: name: catalog jobs: - catalog: + metadata_catalog: + runs-on: ubuntu-latest + env: + GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} + OSN_KEY: ${{ secrets.OSN_KEY }} + OSN_SECRET: ${{ secrets.OSN_SECRET }} + container: eco4cast/rocker-neon4cast:latest + steps: + - run: git config --system --add safe.directory '*' + + - uses: actions/checkout@v4 + with: + ref: prod + fetch-depth: 0 + set-safe-directory: '*' + + - name: install validator + run: | + pip install stac-validator + - name: Render metadata + shell: Rscript {0} + run: source("catalog/model_metadata.R") + + - name: Render catalog + shell: Rscript {0} + run: source("catalog/catalog.R") + + - name: Commit and Push + run: | + git pull + git config user.name github-actions + git config user.email github-actions@github.com + git add catalog/* . + git commit -a -m "update catalog" || echo "nothing to commit" + git push https://${GITHUB_PAT}:${GITHUB_PAT}@github.com/${GITHUB_REPOSITORY} + forecasts: + needs: metadata_catalog + if: success() || failure() + runs-on: ubuntu-latest + env: + GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} + OSN_KEY: ${{ secrets.OSN_KEY }} + OSN_SECRET: ${{ secrets.OSN_SECRET }} + container: eco4cast/rocker-neon4cast:latest + steps: + - run: git config --system --add safe.directory '*' + + - uses: actions/checkout@v3 + with: + ref: prod + fetch-depth: 0 + set-safe-directory: '*' + + - name: install validator + run: | + pip install stac-validator + - name: Render + shell: Rscript {0} + run: source('catalog/forecasts/forecast_models.R') + + - name: Commit and Push + run: | + git pull + git config user.name github-actions + git config user.email github-actions@github.com + git add catalog/* . + git commit -a -m "update catalog" || echo "nothing to commit" + git push https://${GITHUB_PAT}:${GITHUB_PAT}@github.com/${GITHUB_REPOSITORY} + scores: + needs: forecasts + if: success() || failure() + runs-on: ubuntu-latest + env: + GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} + OSN_KEY: ${{ secrets.OSN_KEY }} + OSN_SECRET: ${{ secrets.OSN_SECRET }} + container: eco4cast/rocker-neon4cast:latest + steps: + - run: git config --system --add safe.directory '*' + + - uses: actions/checkout@v3 + with: + ref: prod + fetch-depth: 0 + set-safe-directory: '*' + + - name: install validator + run: | + pip install stac-validator + - name: Render + shell: Rscript {0} + run: source('catalog/scores/scores_models.R') + + - name: Commit and Push + run: | + git pull + git config user.name github-actions + git config user.email github-actions@github.com + git add catalog/* . + git commit -a -m "update catalog" || echo "nothing to commit" + git push https://${GITHUB_PAT}:${GITHUB_PAT}@github.com/${GITHUB_REPOSITORY} + inventory: + needs: scores + if: success() || failure() + runs-on: ubuntu-latest + env: + GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} + OSN_KEY: ${{ secrets.OSN_KEY }} + OSN_SECRET: ${{ secrets.OSN_SECRET }} + container: eco4cast/rocker-neon4cast:latest + steps: + - run: git config --system --add safe.directory '*' + + - uses: actions/checkout@v3 + with: + ref: prod + fetch-depth: 0 + set-safe-directory: '*' + + - name: install validator + run: | + pip install stac-validator + - name: Render + shell: Rscript {0} + run: source('catalog/inventory/create_inventory_page.R') + + - name: Commit and Push + run: | + git pull + git config user.name github-actions + git config user.email github-actions@github.com + git add catalog/* . + git commit -a -m "update catalog" || echo "nothing to commit" + git push https://${GITHUB_PAT}:${GITHUB_PAT}@github.com/${GITHUB_REPOSITORY} + summaries: + needs: inventory + if: success() || failure() runs-on: ubuntu-latest env: GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} @@ -18,23 +154,102 @@ jobs: - uses: actions/checkout@v3 with: - ref: prod - fetch-depth: 0 - set-safe-directory: '*' + ref: prod + fetch-depth: 0 + set-safe-directory: '*' - name: install validator run: | pip install stac-validator + - name: Render + shell: Rscript {0} + run: source('catalog/summaries/summaries_models.R') + - name: Commit and Push + run: | + git pull + git config user.name github-actions + git config user.email github-actions@github.com + git add catalog/* . + git commit -a -m "update catalog" || echo "nothing to commit" + git push https://${GITHUB_PAT}:${GITHUB_PAT}@github.com/${GITHUB_REPOSITORY} + noaa: + needs: summaries + if: success() || failure() + runs-on: ubuntu-latest + env: + GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} + OSN_KEY: ${{ secrets.OSN_KEY }} + OSN_SECRET: ${{ secrets.OSN_SECRET }} + container: eco4cast/rocker-neon4cast:latest + steps: + - run: git config --system --add safe.directory '*' + + - uses: actions/checkout@v3 + with: + ref: prod + fetch-depth: 0 + set-safe-directory: '*' + + - name: install validator + run: | + pip install stac-validator - name: Render shell: Rscript {0} - run: source("catalog/update_stac.R") + run: source('catalog/noaa_forecasts/noaa_forecasts.R') - name: Commit and Push run: | + git pull git config user.name github-actions git config user.email github-actions@github.com + git add catalog/* . + git commit -a -m "update catalog" || echo "nothing to commit" + git push https://${GITHUB_PAT}:${GITHUB_PAT}@github.com/${GITHUB_REPOSITORY} + targets_sites: + needs: noaa + if: success() || failure() + runs-on: ubuntu-latest + env: + GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} + OSN_KEY: ${{ secrets.OSN_KEY }} + OSN_SECRET: ${{ secrets.OSN_SECRET }} + container: eco4cast/rocker-neon4cast:latest + steps: + - run: git config --system --add safe.directory '*' + + - uses: actions/checkout@v3 + with: + ref: prod + fetch-depth: 0 + set-safe-directory: '*' + + - name: install validator + run: | + pip install stac-validator + - name: Render targets + shell: Rscript {0} + run: source('catalog/targets/create_targets_page.R') + + - name: Render sites + shell: Rscript {0} + run: source('catalog/sites/build_sites_page.R') + + - name: Commit and Push + run: | git pull + git config user.name github-actions + git config user.email github-actions@github.com git add catalog/* . git commit -a -m "update catalog" || echo "nothing to commit" git push https://${GITHUB_PAT}:${GITHUB_PAT}@github.com/${GITHUB_REPOSITORY} + Healthcheck: + needs: [metadata_catalog, forecasts, scores, inventory, summaries, noaa, targets_sites] + runs-on: ubuntu-latest + env: + GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} + container: eco4cast/rocker-neon4cast:latest + steps: + - name: Call Healthcheck + run: | + curl -m 10 --retry 5 https://hc-ping.com/22fcd257-7930-455c-948d-0f913743b0c3 diff --git a/catalog/forecasts/aquatics/Daily_Chlorophyll_a/collection.json b/catalog/forecasts/aquatics/Daily_Chlorophyll_a/collection.json index aa84613658..6ad0dedc42 100644 --- a/catalog/forecasts/aquatics/Daily_Chlorophyll_a/collection.json +++ b/catalog/forecasts/aquatics/Daily_Chlorophyll_a/collection.json @@ -1,6 +1,6 @@ { "id": "Daily_Chlorophyll_a", - "description": "This page includes all models for the Daily_Chlorophyll_a variable.", + "description": "All models for the Daily_Chlorophyll_a variable. The variable description is as follows: daily mean Chlorophyll-a (ug/L)", "stac_version": "1.0.0", "license": "CC0-1.0", "stac_extensions": [ @@ -9,16 +9,23 @@ "https://stac-extensions.github.io/table/v1.2.0/schema.json" ], "type": "Collection", + "sci:doi": "10.1002/fee.2616", + "sci:publications": {}, "links": [ { "rel": "item", "type": "application/json", - "href": "../../models/model_items/climatology.json" + "href": "./models/persistenceRW.json" }, { "rel": "item", "type": "application/json", - "href": "../../models/model_items/persistenceRW.json" + "href": "./models/USGSHABs1.json" + }, + { + "rel": "item", + "type": "application/json", + "href": "./models/climatology.json" }, { "rel": "parent", @@ -47,7 +54,7 @@ }, { "rel": "describedby", - "href": "https://projects.ecoforecast.org/usgsrc4cast-docs/", + "href": "https://projects.ecoforecast.org/usgsrc4cast-ci/", "title": "EFI-USGS River Chlorophyll Forecast Challenge Dashboard", "type": "text/html" } @@ -56,23 +63,23 @@ "extent": { "spatial": { "bbox": [ - ["Inf", "Inf", "-Inf", "-Inf"] + [-122.6692, 39.6328, -74.7781, 45.5175] ] }, "temporal": { "interval": [ [ "2024-02-07T00:00:00Z", - "2024-03-17T00:00:00Z" + "2024-07-17T00:00:00Z" ] ] } }, "table:columns": [ { - "name": "reference_datetime", - "type": "timestamp[us, tz=UTC]", - "description": "datetime that the forecast was initiated (horizon = 0)" + "name": "13 columns", + "type": null, + "description": {} }, { "name": "datetime", @@ -85,9 +92,9 @@ "description": "For forecasts that are not on a spatial grid, use of a site dimension that maps to a more detailed geometry (points, polygons, etc.) is allowable. In general this would be documented in the external metadata (e.g., alook-up table that provides lon and lat)" }, { - "name": "family", - "type": "string", - "description": "For ensembles: “ensemble.” Default value if unspecified for probability distributions: Name of the statistical distribution associated with the reported statistics. The “sample” distribution is synonymous with “ensemble.”For summary statistics: “summary.”" + "name": "prediction", + "type": "double", + "description": "predicted value for variable" }, { "name": "parameter", @@ -95,15 +102,25 @@ "description": "ensemble member or distribution parameter" }, { - "name": "prediction", - "type": "double", - "description": "predicted value for variable" + "name": "family", + "type": "string", + "description": "For ensembles: “ensemble.” Default value if unspecified for probability distributions: Name of the statistical distribution associated with the reported statistics. The “sample” distribution is synonymous with “ensemble.”For summary statistics: “summary.”" + }, + { + "name": "reference_datetime", + "type": "timestamp[us, tz=UTC]", + "description": "datetime that the forecast was initiated (horizon = 0)" }, { "name": "pub_datetime", "type": "timestamp[us, tz=UTC]", "description": "datetime that forecast was submitted" }, + { + "name": "date", + "type": "date32[day]", + "description": "date of the forecasted value" + }, { "name": "project_id", "type": "string", @@ -123,11 +140,6 @@ "name": "model_id", "type": "string", "description": "unique model identifier" - }, - { - "name": "reference_date", - "type": "string", - "description": "date that the forecast was initiated" } ], "assets": { @@ -141,12 +153,12 @@ "description": "Use `arrow` for remote access to the database. This R code will return results for forecasts of the variable by the specific model .\n\n### R\n\n```{r}\n# Use code below\n\nall_results <- arrow::open_dataset(\"s3://anonymous@bio230014-bucket01/challenges/forecasts/parquet/project_id=usgsrc4cast/duration=P1D/variable=chla?endpoint_override=sdsc.osn.xsede.org\")\ndf <- all_results |> dplyr::collect()\n\n```\n \n\nYou can use dplyr operations before calling `dplyr::collect()` to `summarise`, `select` columns, and/or `filter` rows prior to pulling the data into a local `data.frame`. Reducing the data that is pulled locally will speed up the data download speed and reduce your memory usage.\n\n\n" }, "thumbnail": { - "href": "pending", + "href": "https://d9-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/s3fs-public/thumbnails/image/Back-b.jpg", "type": "image/JPEG", "roles": [ "thumbnail" ], - "title": "pending" + "title": "Thumbnail Image" } } } diff --git a/catalog/forecasts/aquatics/Daily_Chlorophyll_a/models/USGSHABs1.json b/catalog/forecasts/aquatics/Daily_Chlorophyll_a/models/USGSHABs1.json new file mode 100644 index 0000000000..7b43e7eaa1 --- /dev/null +++ b/catalog/forecasts/aquatics/Daily_Chlorophyll_a/models/USGSHABs1.json @@ -0,0 +1,191 @@ +{ + "stac_version": "1.0.0", + "stac_extensions": [ + "https://stac-extensions.github.io/table/v1.2.0/schema.json" + ], + "type": "Feature", + "id": "USGSHABs1_chla_P1D_forecast", + "bbox": [ + [-122.6692, 39.6328, -74.7781, 45.5175] + ], + "geometry": { + "type": "MultiPoint", + "coordinates": [ + [-122.6692, 45.5175], + [-122.2974, 44.7538], + [-90.6077, 39.6328], + [-122.5773, 45.3793], + [-74.7781, 40.2217], + [-89.3562, 41.1073], + [-88.984, 41.3248], + [-88.6142, 41.2999], + [-88.2515, 42.31], + [-75.0574, 41.7567] + ] + }, + "properties": { + "title": "USGSHABs1", + "description": "All forecasts for the Daily_Chlorophyll_a variable for the USGSHABs1 model. Information for the model is provided as follows: Uses the randomForest::randomForest() R package model to train site-specific models for predicting river chl-a. Uses ensemble Kalman filter to adjust predicted chl-a states..\n The model predicts this variable at the following sites: USGS-14211720, USGS-14181500, USGS-05586300, USGS-14211010, USGS-01463500, USGS-05558300, USGS-05553700, USGS-05543010, USGS-05549500, USGS-01427510.\n Forecasts are the raw forecasts that includes all ensemble members or distribution parameters. Due to the size of the raw forecasts, we recommend accessing the forecast summaries or scores to analyze forecasts (unless you need the individual ensemble members)", + "start_datetime": "2024-02-13", + "end_datetime": "2024-07-14", + "providers": [ + { + "url": "jzwart@usgs.gov", + "name": "Jacob Zwart", + "roles": [ + "producer", + "processor", + "licensor" + ] + }, + { + "url": "https://www.ecoforecastprojectvt.org", + "name": "Ecoforecast Challenge", + "roles": [ + "host" + ] + } + ], + "license": "CC0-1.0", + "keywords": [ + "Forecasts", + "usgsrc4cast", + "aquatics", + "USGSHABs1", + "Chlorophyll_a", + "chla", + "Daily", + "P1D", + "USGS-14211720", + "USGS-14181500", + "USGS-05586300", + "USGS-14211010", + "USGS-01463500", + "USGS-05558300", + "USGS-05553700", + "USGS-05543010", + "USGS-05549500", + "USGS-01427510" + ], + "table:columns": [ + { + "name": "13 columns", + "type": null, + "description": {} + }, + { + "name": "datetime", + "type": "timestamp[us, tz=UTC]", + "description": "datetime of the forecasted value (ISO 8601)" + }, + { + "name": "site_id", + "type": "string", + "description": "For forecasts that are not on a spatial grid, use of a site dimension that maps to a more detailed geometry (points, polygons, etc.) is allowable. In general this would be documented in the external metadata (e.g., alook-up table that provides lon and lat)" + }, + { + "name": "prediction", + "type": "double", + "description": "predicted value for variable" + }, + { + "name": "parameter", + "type": "string", + "description": "ensemble member or distribution parameter" + }, + { + "name": "family", + "type": "string", + "description": "For ensembles: “ensemble.” Default value if unspecified for probability distributions: Name of the statistical distribution associated with the reported statistics. The “sample” distribution is synonymous with “ensemble.”For summary statistics: “summary.”" + }, + { + "name": "reference_datetime", + "type": "timestamp[us, tz=UTC]", + "description": "datetime that the forecast was initiated (horizon = 0)" + }, + { + "name": "pub_datetime", + "type": "timestamp[us, tz=UTC]", + "description": "datetime that forecast was submitted" + }, + { + "name": "date", + "type": "date32[day]", + "description": "date of the forecasted value" + }, + { + "name": "project_id", + "type": "string", + "description": "unique identifier for the forecast project" + }, + { + "name": "duration", + "type": "string", + "description": "temporal duration of forecast (hourly, daily, etc.); follows ISO 8601 duration convention" + }, + { + "name": "variable", + "type": "string", + "description": "name of forecasted variable" + }, + { + "name": "model_id", + "type": "string", + "description": "unique model identifier" + } + ] + }, + "collection": "forecasts", + "links": [ + { + "rel": "collection", + "href": "../collection.json", + "type": "application/json", + "title": "USGSHABs1" + }, + { + "rel": "root", + "href": "../../../catalog.json", + "type": "application/json", + "title": "Forecast Catalog" + }, + { + "rel": "parent", + "href": "../collection.json", + "type": "application/json", + "title": "USGSHABs1" + }, + { + "rel": "self", + "href": "USGSHABs1.json", + "type": "application/json", + "title": "Model Forecast" + }, + { + "rel": "item", + "href": "https://code.usgs.gov/wma/proxies/habs/habs-forecast-chl-usgsrc4cast/-/blob/main/2_model/src/chla_models.R?ref_type=heads", + "type": "text/html", + "title": "Link for Model Code" + } + ], + "assets": { + "1": { + "type": "application/json", + "title": "Model Metadata", + "href": "https://sdsc.osn.xsede.org/bio230014-bucket01/challenges/metadata/model_id/USGSHABs1.json", + "description": "Use `jsonlite::fromJSON()` to download the model metadata JSON file. This R code will return metadata provided during the model registration.\n \n\n### R\n\n```{r}\n# Use code below\n\nmodel_metadata <- jsonlite::fromJSON(\"https://sdsc.osn.xsede.org/bio230014-bucket01/challenges/metadata/model_id/USGSHABs1.json\")\n\n" + }, + "2": { + "type": "text/html", + "title": "Link for Model Code", + "href": "https://code.usgs.gov/wma/proxies/habs/habs-forecast-chl-usgsrc4cast/-/blob/main/2_model/src/chla_models.R?ref_type=heads", + "description": "The link to the model code provided by the model submission team" + }, + "3": { + "type": "application/x-parquet", + "title": "Database Access for Daily Chlorophyll_a", + "href": "s3://anonymous@bio230014-bucket01/challenges/forecasts/parquet/project_id=usgsrc4cast/duration=P1D/variable=chla/model_id=USGSHABs1?endpoint_override=sdsc.osn.xsede.org", + "description": "Use `arrow` for remote access to the database. This R code will return results for this variable and model combination.\n\n### R\n\n```{r}\n# Use code below\n\nall_results <- arrow::open_dataset(\"s3://anonymous@bio230014-bucket01/challenges/forecasts/parquet/project_id=usgsrc4cast/duration=P1D/variable=chla/model_id=USGSHABs1?endpoint_override=sdsc.osn.xsede.org\")\ndf <- all_results |> dplyr::collect()\n\n```\n \n\nYou can use dplyr operations before calling `dplyr::collect()` to `summarise`, `select` columns, and/or `filter` rows prior to pulling the data into a local `data.frame`. Reducing the data that is pulled locally will speed up the data download speed and reduce your memory usage.\n\n\n" + } + } +} diff --git a/catalog/forecasts/aquatics/Daily_Chlorophyll_a/models/climatology.json b/catalog/forecasts/aquatics/Daily_Chlorophyll_a/models/climatology.json new file mode 100644 index 0000000000..b18956577e --- /dev/null +++ b/catalog/forecasts/aquatics/Daily_Chlorophyll_a/models/climatology.json @@ -0,0 +1,191 @@ +{ + "stac_version": "1.0.0", + "stac_extensions": [ + "https://stac-extensions.github.io/table/v1.2.0/schema.json" + ], + "type": "Feature", + "id": "climatology_chla_P1D_forecast", + "bbox": [ + [-122.6692, 39.6328, -74.7781, 45.5175] + ], + "geometry": { + "type": "MultiPoint", + "coordinates": [ + [-75.0574, 41.7567], + [-74.7781, 40.2217], + [-88.6142, 41.2999], + [-88.2515, 42.31], + [-88.984, 41.3248], + [-89.3562, 41.1073], + [-90.6077, 39.6328], + [-122.2974, 44.7538], + [-122.5773, 45.3793], + [-122.6692, 45.5175] + ] + }, + "properties": { + "title": "climatology", + "description": "All forecasts for the Daily_Chlorophyll_a variable for the climatology model. Information for the model is provided as follows: Forecasts stream chlorophyll-a based on the historic average and standard deviation for that given site and day-of-year..\n The model predicts this variable at the following sites: USGS-01427510, USGS-01463500, USGS-05543010, USGS-05549500, USGS-05553700, USGS-05558300, USGS-05586300, USGS-14181500, USGS-14211010, USGS-14211720.\n Forecasts are the raw forecasts that includes all ensemble members or distribution parameters. Due to the size of the raw forecasts, we recommend accessing the forecast summaries or scores to analyze forecasts (unless you need the individual ensemble members)", + "start_datetime": "2024-02-07", + "end_datetime": "2024-07-17", + "providers": [ + { + "url": "jzwart@usgs.gov", + "name": "Jacob Zwart", + "roles": [ + "producer", + "processor", + "licensor" + ] + }, + { + "url": "https://www.ecoforecastprojectvt.org", + "name": "Ecoforecast Challenge", + "roles": [ + "host" + ] + } + ], + "license": "CC0-1.0", + "keywords": [ + "Forecasts", + "usgsrc4cast", + "aquatics", + "climatology", + "Chlorophyll_a", + "chla", + "Daily", + "P1D", + "USGS-01427510", + "USGS-01463500", + "USGS-05543010", + "USGS-05549500", + "USGS-05553700", + "USGS-05558300", + "USGS-05586300", + "USGS-14181500", + "USGS-14211010", + "USGS-14211720" + ], + "table:columns": [ + { + "name": "13 columns", + "type": null, + "description": {} + }, + { + "name": "datetime", + "type": "timestamp[us, tz=UTC]", + "description": "datetime of the forecasted value (ISO 8601)" + }, + { + "name": "site_id", + "type": "string", + "description": "For forecasts that are not on a spatial grid, use of a site dimension that maps to a more detailed geometry (points, polygons, etc.) is allowable. In general this would be documented in the external metadata (e.g., alook-up table that provides lon and lat)" + }, + { + "name": "prediction", + "type": "double", + "description": "predicted value for variable" + }, + { + "name": "parameter", + "type": "string", + "description": "ensemble member or distribution parameter" + }, + { + "name": "family", + "type": "string", + "description": "For ensembles: “ensemble.” Default value if unspecified for probability distributions: Name of the statistical distribution associated with the reported statistics. The “sample” distribution is synonymous with “ensemble.”For summary statistics: “summary.”" + }, + { + "name": "reference_datetime", + "type": "timestamp[us, tz=UTC]", + "description": "datetime that the forecast was initiated (horizon = 0)" + }, + { + "name": "pub_datetime", + "type": "timestamp[us, tz=UTC]", + "description": "datetime that forecast was submitted" + }, + { + "name": "date", + "type": "date32[day]", + "description": "date of the forecasted value" + }, + { + "name": "project_id", + "type": "string", + "description": "unique identifier for the forecast project" + }, + { + "name": "duration", + "type": "string", + "description": "temporal duration of forecast (hourly, daily, etc.); follows ISO 8601 duration convention" + }, + { + "name": "variable", + "type": "string", + "description": "name of forecasted variable" + }, + { + "name": "model_id", + "type": "string", + "description": "unique model identifier" + } + ] + }, + "collection": "forecasts", + "links": [ + { + "rel": "collection", + "href": "../collection.json", + "type": "application/json", + "title": "climatology" + }, + { + "rel": "root", + "href": "../../../catalog.json", + "type": "application/json", + "title": "Forecast Catalog" + }, + { + "rel": "parent", + "href": "../collection.json", + "type": "application/json", + "title": "climatology" + }, + { + "rel": "self", + "href": "climatology.json", + "type": "application/json", + "title": "Model Forecast" + }, + { + "rel": "item", + "href": "https://github.com/eco4cast/usgsrc4cast-ci/blob/main/baseline_models/models/aquatics_climatology.R", + "type": "text/html", + "title": "Link for Model Code" + } + ], + "assets": { + "1": { + "type": "application/json", + "title": "Model Metadata", + "href": "https://sdsc.osn.xsede.org/bio230014-bucket01/challenges/metadata/model_id/climatology.json", + "description": "Use `jsonlite::fromJSON()` to download the model metadata JSON file. This R code will return metadata provided during the model registration.\n \n\n### R\n\n```{r}\n# Use code below\n\nmodel_metadata <- jsonlite::fromJSON(\"https://sdsc.osn.xsede.org/bio230014-bucket01/challenges/metadata/model_id/climatology.json\")\n\n" + }, + "2": { + "type": "text/html", + "title": "Link for Model Code", + "href": "https://github.com/eco4cast/usgsrc4cast-ci/blob/main/baseline_models/models/aquatics_climatology.R", + "description": "The link to the model code provided by the model submission team" + }, + "3": { + "type": "application/x-parquet", + "title": "Database Access for Daily Chlorophyll_a", + "href": "s3://anonymous@bio230014-bucket01/challenges/forecasts/parquet/project_id=usgsrc4cast/duration=P1D/variable=chla/model_id=climatology?endpoint_override=sdsc.osn.xsede.org", + "description": "Use `arrow` for remote access to the database. This R code will return results for this variable and model combination.\n\n### R\n\n```{r}\n# Use code below\n\nall_results <- arrow::open_dataset(\"s3://anonymous@bio230014-bucket01/challenges/forecasts/parquet/project_id=usgsrc4cast/duration=P1D/variable=chla/model_id=climatology?endpoint_override=sdsc.osn.xsede.org\")\ndf <- all_results |> dplyr::collect()\n\n```\n \n\nYou can use dplyr operations before calling `dplyr::collect()` to `summarise`, `select` columns, and/or `filter` rows prior to pulling the data into a local `data.frame`. Reducing the data that is pulled locally will speed up the data download speed and reduce your memory usage.\n\n\n" + } + } +} diff --git a/catalog/forecasts/aquatics/Daily_Chlorophyll_a/models/persistenceRW.json b/catalog/forecasts/aquatics/Daily_Chlorophyll_a/models/persistenceRW.json new file mode 100644 index 0000000000..4d74c91fc8 --- /dev/null +++ b/catalog/forecasts/aquatics/Daily_Chlorophyll_a/models/persistenceRW.json @@ -0,0 +1,191 @@ +{ + "stac_version": "1.0.0", + "stac_extensions": [ + "https://stac-extensions.github.io/table/v1.2.0/schema.json" + ], + "type": "Feature", + "id": "persistenceRW_chla_P1D_forecast", + "bbox": [ + [-122.6692, 39.6328, -74.7781, 45.5175] + ], + "geometry": { + "type": "MultiPoint", + "coordinates": [ + [-122.6692, 45.5175], + [-75.0574, 41.7567], + [-74.7781, 40.2217], + [-88.6142, 41.2999], + [-88.2515, 42.31], + [-88.984, 41.3248], + [-89.3562, 41.1073], + [-90.6077, 39.6328], + [-122.2974, 44.7538], + [-122.5773, 45.3793] + ] + }, + "properties": { + "title": "persistenceRW", + "description": "All forecasts for the Daily_Chlorophyll_a variable for the persistenceRW model. Information for the model is provided as follows: Random walk model based on most recent stream chl-a observations using the fable::RW() model..\n The model predicts this variable at the following sites: USGS-14211720, USGS-01427510, USGS-01463500, USGS-05543010, USGS-05549500, USGS-05553700, USGS-05558300, USGS-05586300, USGS-14181500, USGS-14211010.\n Forecasts are the raw forecasts that includes all ensemble members or distribution parameters. Due to the size of the raw forecasts, we recommend accessing the forecast summaries or scores to analyze forecasts (unless you need the individual ensemble members)", + "start_datetime": "2024-02-07", + "end_datetime": "2024-07-16", + "providers": [ + { + "url": "jzwart@usgs.gov", + "name": "Jacob Zwart", + "roles": [ + "producer", + "processor", + "licensor" + ] + }, + { + "url": "https://www.ecoforecastprojectvt.org", + "name": "Ecoforecast Challenge", + "roles": [ + "host" + ] + } + ], + "license": "CC0-1.0", + "keywords": [ + "Forecasts", + "usgsrc4cast", + "aquatics", + "persistenceRW", + "Chlorophyll_a", + "chla", + "Daily", + "P1D", + "USGS-14211720", + "USGS-01427510", + "USGS-01463500", + "USGS-05543010", + "USGS-05549500", + "USGS-05553700", + "USGS-05558300", + "USGS-05586300", + "USGS-14181500", + "USGS-14211010" + ], + "table:columns": [ + { + "name": "13 columns", + "type": null, + "description": {} + }, + { + "name": "datetime", + "type": "timestamp[us, tz=UTC]", + "description": "datetime of the forecasted value (ISO 8601)" + }, + { + "name": "site_id", + "type": "string", + "description": "For forecasts that are not on a spatial grid, use of a site dimension that maps to a more detailed geometry (points, polygons, etc.) is allowable. In general this would be documented in the external metadata (e.g., alook-up table that provides lon and lat)" + }, + { + "name": "prediction", + "type": "double", + "description": "predicted value for variable" + }, + { + "name": "parameter", + "type": "string", + "description": "ensemble member or distribution parameter" + }, + { + "name": "family", + "type": "string", + "description": "For ensembles: “ensemble.” Default value if unspecified for probability distributions: Name of the statistical distribution associated with the reported statistics. The “sample” distribution is synonymous with “ensemble.”For summary statistics: “summary.”" + }, + { + "name": "reference_datetime", + "type": "timestamp[us, tz=UTC]", + "description": "datetime that the forecast was initiated (horizon = 0)" + }, + { + "name": "pub_datetime", + "type": "timestamp[us, tz=UTC]", + "description": "datetime that forecast was submitted" + }, + { + "name": "date", + "type": "date32[day]", + "description": "date of the forecasted value" + }, + { + "name": "project_id", + "type": "string", + "description": "unique identifier for the forecast project" + }, + { + "name": "duration", + "type": "string", + "description": "temporal duration of forecast (hourly, daily, etc.); follows ISO 8601 duration convention" + }, + { + "name": "variable", + "type": "string", + "description": "name of forecasted variable" + }, + { + "name": "model_id", + "type": "string", + "description": "unique model identifier" + } + ] + }, + "collection": "forecasts", + "links": [ + { + "rel": "collection", + "href": "../collection.json", + "type": "application/json", + "title": "persistenceRW" + }, + { + "rel": "root", + "href": "../../../catalog.json", + "type": "application/json", + "title": "Forecast Catalog" + }, + { + "rel": "parent", + "href": "../collection.json", + "type": "application/json", + "title": "persistenceRW" + }, + { + "rel": "self", + "href": "persistenceRW.json", + "type": "application/json", + "title": "Model Forecast" + }, + { + "rel": "item", + "href": "https://github.com/eco4cast/usgsrc4cast-ci/blob/main/baseline_models/models/aquatics_persistenceRW.R", + "type": "text/html", + "title": "Link for Model Code" + } + ], + "assets": { + "1": { + "type": "application/json", + "title": "Model Metadata", + "href": "https://sdsc.osn.xsede.org/bio230014-bucket01/challenges/metadata/model_id/persistenceRW.json", + "description": "Use `jsonlite::fromJSON()` to download the model metadata JSON file. This R code will return metadata provided during the model registration.\n \n\n### R\n\n```{r}\n# Use code below\n\nmodel_metadata <- jsonlite::fromJSON(\"https://sdsc.osn.xsede.org/bio230014-bucket01/challenges/metadata/model_id/persistenceRW.json\")\n\n" + }, + "2": { + "type": "text/html", + "title": "Link for Model Code", + "href": "https://github.com/eco4cast/usgsrc4cast-ci/blob/main/baseline_models/models/aquatics_persistenceRW.R", + "description": "The link to the model code provided by the model submission team" + }, + "3": { + "type": "application/x-parquet", + "title": "Database Access for Daily Chlorophyll_a", + "href": "s3://anonymous@bio230014-bucket01/challenges/forecasts/parquet/project_id=usgsrc4cast/duration=P1D/variable=chla/model_id=persistenceRW?endpoint_override=sdsc.osn.xsede.org", + "description": "Use `arrow` for remote access to the database. This R code will return results for this variable and model combination.\n\n### R\n\n```{r}\n# Use code below\n\nall_results <- arrow::open_dataset(\"s3://anonymous@bio230014-bucket01/challenges/forecasts/parquet/project_id=usgsrc4cast/duration=P1D/variable=chla/model_id=persistenceRW?endpoint_override=sdsc.osn.xsede.org\")\ndf <- all_results |> dplyr::collect()\n\n```\n \n\nYou can use dplyr operations before calling `dplyr::collect()` to `summarise`, `select` columns, and/or `filter` rows prior to pulling the data into a local `data.frame`. Reducing the data that is pulled locally will speed up the data download speed and reduce your memory usage.\n\n\n" + } + } +} diff --git a/catalog/forecasts/aquatics/collection.json b/catalog/forecasts/aquatics/collection.json index 1c278d639f..f6903db816 100644 --- a/catalog/forecasts/aquatics/collection.json +++ b/catalog/forecasts/aquatics/collection.json @@ -1,6 +1,6 @@ { "id": "aquatics", - "description": "This page includes variables for the aquatics group.", + "description": "All variables for the aquatics group.", "stac_version": "1.0.0", "license": "CC0-1.0", "stac_extensions": [ @@ -9,6 +9,8 @@ "https://stac-extensions.github.io/table/v1.2.0/schema.json" ], "type": "Collection", + "sci:doi": "10.1002/fee.2616", + "sci:publications": {}, "links": [ { "rel": "child", @@ -42,7 +44,7 @@ }, { "rel": "describedby", - "href": "https://projects.ecoforecast.org/usgsrc4cast-docs/", + "href": "https://projects.ecoforecast.org/usgsrc4cast-ci/", "title": "EFI-USGS River Chlorophyll Forecast Challenge Dashboard", "type": "text/html" } @@ -51,23 +53,23 @@ "extent": { "spatial": { "bbox": [ - ["Inf", "Inf", "-Inf", "-Inf"] + [-122.6692, 39.6328, -74.7781, 45.5175] ] }, "temporal": { "interval": [ [ "2024-02-07T00:00:00Z", - "2024-03-17T00:00:00Z" + "2024-07-17T00:00:00Z" ] ] } }, "table:columns": [ { - "name": "reference_datetime", - "type": "timestamp[us, tz=UTC]", - "description": "datetime that the forecast was initiated (horizon = 0)" + "name": "13 columns", + "type": null, + "description": {} }, { "name": "datetime", @@ -80,9 +82,9 @@ "description": "For forecasts that are not on a spatial grid, use of a site dimension that maps to a more detailed geometry (points, polygons, etc.) is allowable. In general this would be documented in the external metadata (e.g., alook-up table that provides lon and lat)" }, { - "name": "family", - "type": "string", - "description": "For ensembles: “ensemble.” Default value if unspecified for probability distributions: Name of the statistical distribution associated with the reported statistics. The “sample” distribution is synonymous with “ensemble.”For summary statistics: “summary.”" + "name": "prediction", + "type": "double", + "description": "predicted value for variable" }, { "name": "parameter", @@ -90,15 +92,25 @@ "description": "ensemble member or distribution parameter" }, { - "name": "prediction", - "type": "double", - "description": "predicted value for variable" + "name": "family", + "type": "string", + "description": "For ensembles: “ensemble.” Default value if unspecified for probability distributions: Name of the statistical distribution associated with the reported statistics. The “sample” distribution is synonymous with “ensemble.”For summary statistics: “summary.”" + }, + { + "name": "reference_datetime", + "type": "timestamp[us, tz=UTC]", + "description": "datetime that the forecast was initiated (horizon = 0)" }, { "name": "pub_datetime", "type": "timestamp[us, tz=UTC]", "description": "datetime that forecast was submitted" }, + { + "name": "date", + "type": "date32[day]", + "description": "date of the forecasted value" + }, { "name": "project_id", "type": "string", @@ -118,22 +130,17 @@ "name": "model_id", "type": "string", "description": "unique model identifier" - }, - { - "name": "reference_date", - "type": "string", - "description": "date that the forecast was initiated" } ], "assets": { "data": { - "href": "s3://anonymous@bio230014-bucket01/challenges/forecasts/parquet/?endpoint_override=sdsc.osn.xsede.org", + "href": "s3://anonymous@bio230014-bucket01/challenges/scores/parquet/?endpoint_override=sdsc.osn.xsede.org", "type": "application/x-parquet", "title": "Database Access", "roles": [ "data" ], - "description": "Use `arrow` for remote access to the database. This R code will return results for the NEON Ecological Forecasting Aquatics theme.\n\n### R\n\n```{r}\n# Use code below\n\nall_results <- arrow::open_dataset(\"s3://anonymous@bio230014-bucket01/challenges/forecasts/parquet/?endpoint_override=sdsc.osn.xsede.org\")\ndf <- all_results |>\n dplyr::filter(variable %in% c(\"chla\")) |>\n dplyr::collect()\n\n```\n \n\nYou can use dplyr operations before calling `dplyr::collect()` to `summarise`, `select` columns, and/or `filter` rows prior to pulling the data into a local `data.frame`. Reducing the data that is pulled locally will speed up the data download speed and reduce your memory usage.\n\n\n" + "description": "Use `arrow` for remote access to the database. This R code will return results for the NEON Ecological Forecasting Aquatics theme.\n\n### R\n\n```{r}\n# Use code below\n\nall_results <- arrow::open_dataset(\"s3://anonymous@bio230014-bucket01/challenges/scores/parquet/?endpoint_override=sdsc.osn.xsede.org\")\ndf <- all_results |>\n dplyr::filter(variable %in% c(\"chla\")) |>\n dplyr::collect()\n\n```\n \n\nYou can use dplyr operations before calling `dplyr::collect()` to `summarise`, `select` columns, and/or `filter` rows prior to pulling the data into a local `data.frame`. Reducing the data that is pulled locally will speed up the data download speed and reduce your memory usage.\n\n\n" }, "thumbnail": { "href": "https://d9-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/s3fs-public/thumbnails/image/Back-b.jpg", diff --git a/catalog/forecasts/collection.json b/catalog/forecasts/collection.json index 689e89835b..7b663313b1 100644 --- a/catalog/forecasts/collection.json +++ b/catalog/forecasts/collection.json @@ -16,12 +16,6 @@ "href": "aquatics/collection.json", "title": "aquatics" }, - { - "rel": "child", - "type": "application/json", - "href": "models/collection.json", - "title": "group item" - }, { "rel": "parent", "type": "application/json", @@ -49,7 +43,7 @@ }, { "rel": "describedby", - "href": "https://projects.ecoforecast.org/usgsrc4cast-docs/", + "href": "https://projects.ecoforecast.org/usgsrc4cast-ci/", "title": "EFI-USGS River Chlorophyll Forecast Challenge Dashboard", "type": "text/html" } @@ -58,28 +52,23 @@ "extent": { "spatial": { "bbox": [ - [ - -122.6692, - 39.6327, - -74.7781, - 45.5175 - ] + [-122.6692, 39.6328, -74.7781, 45.5175] ] }, "temporal": { "interval": [ [ "2024-02-07T00:00:00Z", - "2024-03-17T00:00:00Z" + "2024-07-17T00:00:00Z" ] ] } }, "table:columns": [ { - "name": "reference_datetime", - "type": "timestamp[us, tz=UTC]", - "description": "datetime that the forecast was initiated (horizon = 0)" + "name": "13 columns", + "type": null, + "description": {} }, { "name": "datetime", @@ -92,9 +81,9 @@ "description": "For forecasts that are not on a spatial grid, use of a site dimension that maps to a more detailed geometry (points, polygons, etc.) is allowable. In general this would be documented in the external metadata (e.g., alook-up table that provides lon and lat)" }, { - "name": "family", - "type": "string", - "description": "For ensembles: “ensemble.” Default value if unspecified for probability distributions: Name of the statistical distribution associated with the reported statistics. The “sample” distribution is synonymous with “ensemble.”For summary statistics: “summary.”" + "name": "prediction", + "type": "double", + "description": "predicted value for variable" }, { "name": "parameter", @@ -102,15 +91,25 @@ "description": "ensemble member or distribution parameter" }, { - "name": "prediction", - "type": "double", - "description": "predicted value for variable" + "name": "family", + "type": "string", + "description": "For ensembles: “ensemble.” Default value if unspecified for probability distributions: Name of the statistical distribution associated with the reported statistics. The “sample” distribution is synonymous with “ensemble.”For summary statistics: “summary.”" + }, + { + "name": "reference_datetime", + "type": "timestamp[us, tz=UTC]", + "description": "datetime that the forecast was initiated (horizon = 0)" }, { "name": "pub_datetime", "type": "timestamp[us, tz=UTC]", "description": "datetime that forecast was submitted" }, + { + "name": "date", + "type": "date32[day]", + "description": "date of the forecasted value" + }, { "name": "project_id", "type": "string", @@ -130,11 +129,6 @@ "name": "model_id", "type": "string", "description": "unique model identifier" - }, - { - "name": "reference_date", - "type": "string", - "description": "date that the forecast was initiated" } ], "assets": { diff --git a/catalog/forecasts/forecast_models.R b/catalog/forecasts/forecast_models.R index 62e9925094..4250a167f4 100644 --- a/catalog/forecasts/forecast_models.R +++ b/catalog/forecasts/forecast_models.R @@ -13,6 +13,7 @@ catalog_config <- config$catalog_config ## CREATE table for column descriptions forecast_description_create <- data.frame(datetime = 'datetime of the forecasted value (ISO 8601)', + date = 'date of the forecasted value', site_id = 'For forecasts that are not on a spatial grid, use of a site dimension that maps to a more detailed geometry (points, polygons, etc.) is allowable. In general this would be documented in the external metadata (e.g., alook-up table that provides lon and lat)', family = 'For ensembles: “ensemble.” Default value if unspecified for probability distributions: Name of the statistical distribution associated with the reported statistics. The “sample” distribution is synonymous with “ensemble.”For summary statistics: “summary.”', parameter = 'ensemble member or distribution parameter', @@ -34,13 +35,12 @@ forecast_description_create <- data.frame(datetime = 'datetime of the forecasted # model_id <- 'climatology' print('FIND FORECAST TABLE SCHEMA') -forecast_theme_df <- arrow::open_dataset(arrow::s3_bucket(config$forecasts_bucket, - endpoint_override = config$endpoint, anonymous = TRUE)) +forecast_theme_df <- arrow::open_dataset(arrow::s3_bucket(config$forecasts_bucket, endpoint_override = config$endpoint, anonymous = TRUE)) #|> print('FIND INVENTORY BUCKET') forecast_s3 <- arrow::s3_bucket(glue::glue("{config$inventory_bucket}/catalog/forecasts/project_id={config$project_id}"), - endpoint_override = "sdsc.osn.xsede.org", - anonymous=TRUE) + endpoint_override = "sdsc.osn.xsede.org", + anonymous=TRUE) print('OPEN INVENTORY BUCKET') forecast_data_df <- arrow::open_dataset(forecast_s3) |> @@ -62,22 +62,22 @@ build_description <- paste0("Forecasts are the raw forecasts that includes all e forecast_sites <- forecast_sites$site_id stac4cast::build_forecast_scores(table_schema = forecast_theme_df, - #theme_id = 'Forecasts', - table_description = forecast_description_create, - start_date = forecast_min_date, - end_date = forecast_max_date, - id_value = "daily-forecasts", - description_string = build_description, - about_string = catalog_config$about_string, - about_title = catalog_config$about_title, - theme_title = "Forecasts", - destination_path = catalog_config$forecast_path, - aws_download_path = catalog_config$aws_download_path_forecasts, - link_items = stac4cast::generate_group_values(group_values = names(config$variable_groups)), - thumbnail_link = catalog_config$forecasts_thumbnail, - thumbnail_title = catalog_config$forecasts_thumbnail_title, - group_sites = forecast_sites, - model_child = TRUE) + #theme_id = 'Forecasts', + table_description = forecast_description_create, + start_date = forecast_min_date, + end_date = forecast_max_date, + id_value = "daily-forecasts", + description_string = build_description, + about_string = catalog_config$about_string, + about_title = catalog_config$about_title, + theme_title = "Forecasts", + destination_path = catalog_config$forecast_path, + aws_download_path = catalog_config$aws_download_path_forecasts, + link_items = stac4cast::generate_group_values(group_values = names(config$variable_groups)), + thumbnail_link = catalog_config$forecasts_thumbnail, + thumbnail_title = catalog_config$forecasts_thumbnail_title, + group_sites = forecast_sites, + model_child = FALSE) ## READ IN GSHEET FILES variable_gsheet <- gsheet2tbl(config$target_metadata_gsheet) @@ -95,7 +95,7 @@ registered_model_id <- gsheet_read |> arrange(row_non_na) |> distinct(model_id, project_id, .keep_all = TRUE) -## BUILD VARIABLE GROUPS +## BUILD VARIABLE GROUPS (variables and models) for (i in 1:length(config$variable_groups)){ ## organize variable groups print(names(config$variable_groups)[i]) @@ -110,12 +110,12 @@ for (i in 1:length(config$variable_groups)){ ## organize variable groups } ## REMOVE STALE OR UNUSED DIRECTORIES - current_var_path <- paste0(catalog_config$summaries_path,names(config$variable_groups[i])) + current_var_path <- paste0(catalog_config$forecast_path,names(config$variable_groups[i])) current_var_dirs <- list.dirs(current_var_path, recursive = FALSE, full.names = TRUE) unlink(current_var_dirs, recursive = TRUE) - if (!dir.exists(paste0(catalog_config$forecast_path,names(config$variable_groups[i])))){ - dir.create(paste0(catalog_config$forecast_path,names(config$variable_groups[i]))) + if (!dir.exists(paste0(catalog_config$forecast_path,'/',names(config$variable_groups[i])))){ + dir.create(paste0(catalog_config$forecast_path,'/',names(config$variable_groups[i]))) } # match variable with full name in gsheet @@ -127,7 +127,7 @@ for (i in 1:length(config$variable_groups)){ ## organize variable groups var_name_full <- var_gsheet_arrange[which(var_gsheet_arrange$`"official" targets name` %in% var_values),1][[1]] ## CREATE VARIABLE GROUP JSONS - group_description <- paste0('This page includes variables for the ',names(config$variable_groups[i]),' group.') + group_description <- paste0('All variables for the ',names(config$variable_groups[i]),' group.') ## find group sites find_group_sites <- forecast_data_df |> @@ -143,7 +143,6 @@ for (i in 1:length(config$variable_groups)){ ## organize variable groups for(j in 1:length(config$variable_groups[[i]]$group_vars)){ # FOR EACH VARIABLE WITHIN A MODEL GROUP - var_name <- names(config$variable_groups[[i]]$group_vars[j]) print(var_name) @@ -199,6 +198,7 @@ for (i in 1:length(config$variable_groups)){ ## organize variable groups var_description <- paste0('All models for the ',var_formal_name,' variable. The variable description is as follows: ', var_metadata$Description) + #var_path <- gsub('forecasts','scores',var_data$path[1]) var_path <- var_data$path[1] ## build lists for creating publication items @@ -211,7 +211,7 @@ for (i in 1:length(config$variable_groups)){ ## organize variable groups variable_name_build <- append(variable_name_build, var_formal_name) - # variable_name_build <- append(variable_name_build, var_formal_name) + #variable_name_build <- append(variable_name_build, var_formal_name) stac4cast::build_group_variables(table_schema = forecast_theme_df, #theme_id = var_formal_name[j], @@ -229,7 +229,7 @@ for (i in 1:length(config$variable_groups)){ ## organize variable groups aws_download_path = var_path, group_var_items = stac4cast::generate_variable_model_items(model_list = var_models$model_id), thumbnail_link = config$variable_groups[[i]]$thumbnail_link, - thumbnail_title = 'Thumbnail Image', + thumbnail_title = "Thumbnail Image", group_var_vector = NULL, group_sites = find_var_sites$site_id, citation_values = var_citations, @@ -238,7 +238,7 @@ for (i in 1:length(config$variable_groups)){ ## organize variable groups forecast_sites <- c() ## LOOP OVER MODEL IDS AND CREATE JSONS - for (m in theme_models$model_id){ + for (m in var_models$model_id){ # make model items directory if (!dir.exists(paste0(catalog_config$forecast_path,'/',names(config$variable_groups)[i],'/',var_formal_name,"/models"))){ @@ -267,8 +267,8 @@ for (i in 1:length(config$variable_groups)){ ## organize variable groups model_var_full_name <- model_var_duration_df |> left_join((variable_gsheet |> - select(variable = `"official" targets name`, full_name = `Variable name`) |> - distinct(variable, .keep_all = TRUE)), by = c('variable')) + select(variable = `"official" targets name`, full_name = `Variable name`) |> + distinct(variable, .keep_all = TRUE)), by = c('variable')) model_sites <- forecast_data_df |> filter(model_id == m, @@ -288,7 +288,7 @@ for (i in 1:length(config$variable_groups)){ ## organize variable groups model_vars$var_duration_name <- paste0(model_vars$duration_name, " ", model_vars$full_name) forecast_sites <- append(forecast_sites, stac4cast::get_site_coords(site_metadata = catalog_config$site_metadata_url, - sites = model_sites$site_id)) + sites = model_sites$site_id)) idx = which(registered_model_id$model_id == m) @@ -318,7 +318,7 @@ for (i in 1:length(config$variable_groups)){ ## organize variable groups stac4cast::build_model(model_id = m, stac_id = stac_id, team_name = registered_model_id$`Long name of the model`[idx], - # model_description = registered_model_id[idx,"Describe your modeling approach in your own words."][[1]], + #model_description = registered_model_id[idx,"Describe your modeling approach in your own words."][[1]], model_description = model_description, start_date = model_min_date, end_date = model_max_date, @@ -343,7 +343,7 @@ for (i in 1:length(config$variable_groups)){ ## organize variable groups } ## end variable loop ## BUILD THE GROUP PAGES WITH UPDATED VAR/PUB INFORMATION - stac4cast::build_group_variables(table_schema = forecast_data_df, + stac4cast::build_group_variables(table_schema = forecast_theme_df, table_description = forecast_description_create, start_date = forecast_min_date, end_date = forecast_max_date, diff --git a/catalog/forecasts/models/collection.json b/catalog/forecasts/models/collection.json index 8f38db70c6..a07e65b1c9 100644 --- a/catalog/forecasts/models/collection.json +++ b/catalog/forecasts/models/collection.json @@ -9,6 +9,7 @@ "https://stac-extensions.github.io/table/v1.2.0/schema.json" ], "type": "Collection", + "sci:doi": "https://doi.org/10.1002/fee.2616", "links": [ { "rel": "item", @@ -20,6 +21,11 @@ "type": "application/json", "href": "model_items/persistenceRW.json" }, + { + "rel": "item", + "type": "application/json", + "href": "model_items/USGSHABs1.json" + }, { "rel": "parent", "type": "application/json", @@ -37,7 +43,7 @@ }, { "rel": "cite-as", - "href": "https://doi.org/10.1002/fee.2616" + "href": {} }, { "rel": "about", @@ -63,17 +69,12 @@ "interval": [ [ "2024-02-07T00:00:00Z", - "2024-03-17T00:00:00Z" + "2024-06-03T00:00:00Z" ] ] } }, "table:columns": [ - { - "name": "reference_datetime", - "type": "timestamp[us, tz=UTC]", - "description": "datetime that the forecast was initiated (horizon = 0)" - }, { "name": "datetime", "type": "timestamp[us, tz=UTC]", @@ -85,9 +86,9 @@ "description": "For forecasts that are not on a spatial grid, use of a site dimension that maps to a more detailed geometry (points, polygons, etc.) is allowable. In general this would be documented in the external metadata (e.g., alook-up table that provides lon and lat)" }, { - "name": "family", - "type": "string", - "description": "For ensembles: “ensemble.” Default value if unspecified for probability distributions: Name of the statistical distribution associated with the reported statistics. The “sample” distribution is synonymous with “ensemble.”For summary statistics: “summary.”" + "name": "prediction", + "type": "double", + "description": "predicted value for variable" }, { "name": "parameter", @@ -95,15 +96,25 @@ "description": "ensemble member or distribution parameter" }, { - "name": "prediction", - "type": "double", - "description": "predicted value for variable" + "name": "family", + "type": "string", + "description": "For ensembles: “ensemble.” Default value if unspecified for probability distributions: Name of the statistical distribution associated with the reported statistics. The “sample” distribution is synonymous with “ensemble.”For summary statistics: “summary.”" + }, + { + "name": "reference_datetime", + "type": "timestamp[us, tz=UTC]", + "description": "datetime that the forecast was initiated (horizon = 0)" }, { "name": "pub_datetime", "type": "timestamp[us, tz=UTC]", "description": "datetime that forecast was submitted" }, + { + "name": "date", + "type": "date32[day]", + "description": {} + }, { "name": "project_id", "type": "string", diff --git a/catalog/forecasts/models/model_items/USGSHABs1.json b/catalog/forecasts/models/model_items/USGSHABs1.json new file mode 100644 index 0000000000..f26add9902 --- /dev/null +++ b/catalog/forecasts/models/model_items/USGSHABs1.json @@ -0,0 +1,178 @@ +{ + "stac_version": "1.0.0", + "stac_extensions": [ + "https://stac-extensions.github.io/table/v1.2.0/schema.json" + ], + "type": "Feature", + "id": "USGSHABs1", + "bbox": [ + [ + -122.6692, + 45.5175, + -74.7781, + 45.5175 + ] + ], + "geometry": { + "type": "MultiPoint", + "coordinates": [ + [], + [], + [], + [], + [], + [], + [], + [] + ] + }, + "properties": { + "description": "\nmodel info: Uses the randomForest::randomForest() R package model to train site-specific models for predicting river chl-a. Uses ensemble Kalman filter to adjust predicted chl-a states.\n\nSites: USGS-14211720, USGS-14181500, USGS-05586300, USGS-05558300, USGS-05553700, USGS-05543010, USGS-05549500, USGS-01427510\n\nVariables: Daily Chlorophyll_a", + "start_datetime": "2024-02-13", + "end_datetime": "2024-06-01", + "providers": [ + { + "url": "pending", + "name": "pending", + "roles": [ + "producer", + "processor", + "licensor" + ] + }, + { + "url": "https://www.ecoforecastprojectvt.org", + "name": "Ecoforecast Challenge", + "roles": [ + "host" + ] + } + ], + "license": "CC0-1.0", + "keywords": [ + "Forecasting", + "usgsrc4cast", + "Daily Chlorophyll_a" + ], + "table:columns": [ + { + "name": "datetime", + "type": "timestamp[us, tz=UTC]", + "description": "datetime of the forecasted value (ISO 8601)" + }, + { + "name": "site_id", + "type": "string", + "description": "For forecasts that are not on a spatial grid, use of a site dimension that maps to a more detailed geometry (points, polygons, etc.) is allowable. In general this would be documented in the external metadata (e.g., alook-up table that provides lon and lat)" + }, + { + "name": "prediction", + "type": "double", + "description": "predicted value for variable" + }, + { + "name": "parameter", + "type": "string", + "description": "ensemble member or distribution parameter" + }, + { + "name": "family", + "type": "string", + "description": "For ensembles: “ensemble.” Default value if unspecified for probability distributions: Name of the statistical distribution associated with the reported statistics. The “sample” distribution is synonymous with “ensemble.”For summary statistics: “summary.”" + }, + { + "name": "reference_datetime", + "type": "timestamp[us, tz=UTC]", + "description": "datetime that the forecast was initiated (horizon = 0)" + }, + { + "name": "pub_datetime", + "type": "timestamp[us, tz=UTC]", + "description": "datetime that forecast was submitted" + }, + { + "name": "date", + "type": "date32[day]", + "description": {} + }, + { + "name": "project_id", + "type": "string", + "description": "unique identifier for the forecast project" + }, + { + "name": "duration", + "type": "string", + "description": "temporal duration of forecast (hourly, daily, etc.); follows ISO 8601 duration convention" + }, + { + "name": "variable", + "type": "string", + "description": "name of forecasted variable" + }, + { + "name": "model_id", + "type": "string", + "description": "unique model identifier" + }, + { + "name": "reference_date", + "type": "string", + "description": "date that the forecast was initiated" + } + ] + }, + "collection": "forecasts", + "links": [ + { + "rel": "collection", + "href": "../collection.json", + "type": "application/json", + "title": "USGSHABs1" + }, + { + "rel": "root", + "href": "../../../catalog.json", + "type": "application/json", + "title": "Forecast Catalog" + }, + { + "rel": "parent", + "href": "../collection.json", + "type": "application/json", + "title": "USGSHABs1" + }, + { + "rel": "self", + "href": "USGSHABs1.json", + "type": "application/json", + "title": "Model Forecast" + }, + { + "rel": "item", + "href": "pending", + "type": "text/html", + "title": "Link for Model Code" + } + ], + "assets": { + "1": { + "type": "application/json", + "title": "Model Metadata", + "href": "https://sdsc.osn.xsede.org/bio230014-bucket01/challenges/metadata/model_id/USGSHABs1.json", + "description": "Use `jsonlite::fromJSON()` to download the model metadata JSON file. This R code will return metadata provided during the model registration.\n \n\n### R\n\n```{r}\n# Use code below\n\nmodel_metadata <- jsonlite::fromJSON(\"https://sdsc.osn.xsede.org/bio230014-bucket01/challenges/metadata/model_id/USGSHABs1.json\")\n\n" + }, + "2": { + "type": "text/html", + "title": "Link for Model Code", + "href": "pending", + "description": "The link to the model code provided by the model submission team" + }, + "3": { + "type": "application/x-parquet", + "title": "Database Access for Daily Chlorophyll_a", + "href": "s3://anonymous@bio230014-bucket01/challenges/forecastsproject_id=/duration=P1D/variable=chla/model_id=USGSHABs1?endpoint_override=sdsc.osn.xsede.org", + "description": "Use `arrow` for remote access to the database. This R code will return results for this variable and model combination.\n\n### R\n\n```{r}\n# Use code below\n\nall_results <- arrow::open_dataset(\"s3://anonymous@bio230014-bucket01/challenges/forecastsproject_id=/duration=P1D/variable=chla/model_id=USGSHABs1?endpoint_override=sdsc.osn.xsede.org\")\ndf <- all_results |> dplyr::collect()\n\n```\n \n\nYou can use dplyr operations before calling `dplyr::collect()` to `summarise`, `select` columns, and/or `filter` rows prior to pulling the data into a local `data.frame`. Reducing the data that is pulled locally will speed up the data download speed and reduce your memory usage.\n\n\n" + } + } +} diff --git a/catalog/forecasts/models/model_items/climatology.json b/catalog/forecasts/models/model_items/climatology.json index 56366817ea..eec1b534a7 100644 --- a/catalog/forecasts/models/model_items/climatology.json +++ b/catalog/forecasts/models/model_items/climatology.json @@ -24,13 +24,14 @@ [], [], [], + [], [] ] }, "properties": { - "description": "\nmodel info: Forecasts stream chlorophyll-a based on the historic average and standard deviation for that given site and day-of-year.\n\nSites: USGS-01427510, USGS-01463500, USGS-05543010, USGS-05553700, USGS-05558300, USGS-05586300, USGS-14181500, USGS-14211010, USGS-14211720\n\nVariables: Daily Chlorophyll_a", + "description": "\nmodel info: Forecasts stream chlorophyll-a based on the historic average and standard deviation for that given site and day-of-year.\n\nSites: USGS-05549500, USGS-05553700, USGS-05558300, USGS-05586300, USGS-14181500, USGS-14211010, USGS-14211720, USGS-01427510, USGS-01463500, USGS-05543010\n\nVariables: Daily Chlorophyll_a", "start_datetime": "2024-02-07", - "end_datetime": "2024-03-17", + "end_datetime": "2024-06-03", "providers": [ { "url": "pending", @@ -56,11 +57,6 @@ "Daily Chlorophyll_a" ], "table:columns": [ - { - "name": "reference_datetime", - "type": "timestamp[us, tz=UTC]", - "description": "datetime that the forecast was initiated (horizon = 0)" - }, { "name": "datetime", "type": "timestamp[us, tz=UTC]", @@ -72,9 +68,9 @@ "description": "For forecasts that are not on a spatial grid, use of a site dimension that maps to a more detailed geometry (points, polygons, etc.) is allowable. In general this would be documented in the external metadata (e.g., alook-up table that provides lon and lat)" }, { - "name": "family", - "type": "string", - "description": "For ensembles: “ensemble.” Default value if unspecified for probability distributions: Name of the statistical distribution associated with the reported statistics. The “sample” distribution is synonymous with “ensemble.”For summary statistics: “summary.”" + "name": "prediction", + "type": "double", + "description": "predicted value for variable" }, { "name": "parameter", @@ -82,15 +78,25 @@ "description": "ensemble member or distribution parameter" }, { - "name": "prediction", - "type": "double", - "description": "predicted value for variable" + "name": "family", + "type": "string", + "description": "For ensembles: “ensemble.” Default value if unspecified for probability distributions: Name of the statistical distribution associated with the reported statistics. The “sample” distribution is synonymous with “ensemble.”For summary statistics: “summary.”" + }, + { + "name": "reference_datetime", + "type": "timestamp[us, tz=UTC]", + "description": "datetime that the forecast was initiated (horizon = 0)" }, { "name": "pub_datetime", "type": "timestamp[us, tz=UTC]", "description": "datetime that forecast was submitted" }, + { + "name": "date", + "type": "date32[day]", + "description": {} + }, { "name": "project_id", "type": "string", diff --git a/catalog/forecasts/models/model_items/persistenceRW.json b/catalog/forecasts/models/model_items/persistenceRW.json index 0f2a7aeecf..5c8bd9c2b5 100644 --- a/catalog/forecasts/models/model_items/persistenceRW.json +++ b/catalog/forecasts/models/model_items/persistenceRW.json @@ -31,7 +31,7 @@ "properties": { "description": "\nmodel info: Random walk model based on most recent stream chl-a observations using the fable::RW() model.\n\nSites: USGS-01427510, USGS-01463500, USGS-05543010, USGS-05549500, USGS-05553700, USGS-05558300, USGS-05586300, USGS-14181500, USGS-14211010, USGS-14211720\n\nVariables: Daily Chlorophyll_a", "start_datetime": "2024-02-07", - "end_datetime": "2024-03-15", + "end_datetime": "2024-06-02", "providers": [ { "url": "pending", @@ -57,11 +57,6 @@ "Daily Chlorophyll_a" ], "table:columns": [ - { - "name": "reference_datetime", - "type": "timestamp[us, tz=UTC]", - "description": "datetime that the forecast was initiated (horizon = 0)" - }, { "name": "datetime", "type": "timestamp[us, tz=UTC]", @@ -73,9 +68,9 @@ "description": "For forecasts that are not on a spatial grid, use of a site dimension that maps to a more detailed geometry (points, polygons, etc.) is allowable. In general this would be documented in the external metadata (e.g., alook-up table that provides lon and lat)" }, { - "name": "family", - "type": "string", - "description": "For ensembles: “ensemble.” Default value if unspecified for probability distributions: Name of the statistical distribution associated with the reported statistics. The “sample” distribution is synonymous with “ensemble.”For summary statistics: “summary.”" + "name": "prediction", + "type": "double", + "description": "predicted value for variable" }, { "name": "parameter", @@ -83,15 +78,25 @@ "description": "ensemble member or distribution parameter" }, { - "name": "prediction", - "type": "double", - "description": "predicted value for variable" + "name": "family", + "type": "string", + "description": "For ensembles: “ensemble.” Default value if unspecified for probability distributions: Name of the statistical distribution associated with the reported statistics. The “sample” distribution is synonymous with “ensemble.”For summary statistics: “summary.”" + }, + { + "name": "reference_datetime", + "type": "timestamp[us, tz=UTC]", + "description": "datetime that the forecast was initiated (horizon = 0)" }, { "name": "pub_datetime", "type": "timestamp[us, tz=UTC]", "description": "datetime that forecast was submitted" }, + { + "name": "date", + "type": "date32[day]", + "description": {} + }, { "name": "project_id", "type": "string", diff --git a/catalog/inventory/collection.json b/catalog/inventory/collection.json index 2c6de1f42e..86f1912ebd 100644 --- a/catalog/inventory/collection.json +++ b/catalog/inventory/collection.json @@ -27,7 +27,7 @@ }, { "rel": "cite-as", - "href": "https://doi.org/10.1002/fee.2616" + "href": "10.1002/fee.2616" }, { "rel": "about", @@ -37,7 +37,7 @@ }, { "rel": "describedby", - "href": "https://projects.ecoforecast.org/usgsrc4cast-docs/", + "href": "https://projects.ecoforecast.org/usgsrc4cast-ci/", "title": "EFI-USGS River Chlorophyll Forecast Challenge Dashboard", "type": "text/html" } @@ -58,12 +58,17 @@ "interval": [ [ "2024-02-07T00:00:00Z", - "2024-03-17T00:00:00Z" + "2024-07-17T00:00:00Z" ] ] } }, "table:columns": [ + { + "name": "14 columns", + "type": null, + "description": {} + }, { "name": "duration", "type": "string", @@ -128,11 +133,6 @@ "name": "latitude", "type": "double", "description": {} - }, - { - "name": "longitude", - "type": "double", - "description": {} } ], "assets": { diff --git a/catalog/model_metadata.R b/catalog/model_metadata.R index cce5444df6..dcdc34abc1 100644 --- a/catalog/model_metadata.R +++ b/catalog/model_metadata.R @@ -12,17 +12,19 @@ minioclient::mc_alias_set("osn", Sys.getenv("OSN_KEY"), Sys.getenv("OSN_SECRET")) -#googlesheets4::gs4_deauth() -# registered_models <- googlesheets4::read_sheet(config$model_metadata_gsheet) |> -# dplyr::filter(`What forecasting challenge are you registering for?` == config$project_id, -# !is.na(registered_models$`Which category best matches your modeling approach?`)) - -registered_models <- gsheet::gsheet2tbl(config$model_metadata_gsheet) |> +googlesheets4::gs4_deauth() +registered_models <- googlesheets4::read_sheet(config$model_metadata_gsheet) |> dplyr::filter(`What forecasting challenge are you registering for?` == config$project_id, - !is.na(`Which category best matches your modeling approach?`)) + !grepl("example",model_id)) + +# registered_models <- gsheet::gsheet2tbl(config$model_metadata_gsheet) |> +# dplyr::filter(`What forecasting challenge are you registering for?` == config$project_id, +# !is.na(`Which category best matches your modeling approach?`)) for(i in 1:nrow(registered_models)){ + print(registered_models$model_id[i]) + #Need to get from forecast output progagates_method <- "Infer from family column in archived forecasts" @@ -37,13 +39,27 @@ for(i in 1:nrow(registered_models)){ metadata$model_description$type <- registered_models$`Which category best matches your modeling approach?`[i] metadata$model_description$repository <- registered_models$`Web link to model code`[i] + + ## handle models with no metadata present + if (is.na(registered_models$`Which category best matches your modeling approach?`[i])){ + print('Skipping model due to missing metadata') + metadata$uncertainty$initial_conditions$present <- "Unknown" + metadata$uncertainty$drivers$present <- "Unknown" + metadata$uncertainty$process$present <- "Unknown" + metadata$uncertainty$obs_error$present <- "Unknown" + metadata$uncertainty$structural_error$present <- "Unknown" + metadata$uncertainty$random_effects$present <- "Unknown" + + next() + } + # Initial Conditions - if(registered_models$`Do your forecasts include uncertainty from initial conditions?`[i] == "Yes and they were estimated from data"){ + if(registered_models$`Do your forecasts include uncertainty from initial conditions?`[i] %in% c("Yes and they were estimated from data", "Yes")){ metadata$uncertainty$initial_conditions$present <- TRUE metadata$uncertainty$initial_conditions$data_driven <- TRUE metadata$uncertainty$initial_conditions$progagates$type <- progagates_method - }else if(registered_models$`Do your forecasts include uncertainty from initial conditions?`[i] == "Yes and they were not estimated from data (e.g., assumed initial conditions were the model equilibrium)"){ + }else if(registered_models$`Do your forecasts include uncertainty from initial conditions?`[i] %in% c("Yes and they were not estimated from data (e.g., assumed initial conditions were the model equilibrium)", "Yes")){ metadata$uncertainty$initial_conditions$present <- TRUE metadata$uncertainty$initial_conditions$data_driven <- FALSE metadata$uncertainty$initial_conditions$progagates$type <- progagates_method @@ -65,23 +81,18 @@ for(i in 1:nrow(registered_models)){ #Parameters - if(registered_models$`Does your forecast include uncertainty from the model parameters?`[i] == "Yes and at least one is estimated from data"){ - metadata$uncertainty$parameters$present <- TRUE - metadata$uncertainty$parameters$data_driven <- TRUE - metadata$uncertainty$parameters$progagates$type <- progagates_method - }else if(registered_models$`Does your forecast include uncertainty from the model parameters?`[i] == "Yes and they are not estimated from data"){ + if(registered_models$`Does your model include parameters?`[i] %in% c("Yes and they are not estimated from data", "Yes")){ metadata$uncertainty$parameters$present <- TRUE metadata$uncertainty$parameters$data_driven <- FALSE - metadata$uncertainty$parameters$progagates$type <- progagates_method - }else if(registered_models$`Does your forecast include uncertainty from the model parameters?`[i] == "No"){ - if(registered_models$`Does your model include parameters?`[i] == "Yes"){ - metadata$uncertainty$parameters$present <- TRUE - metadata$uncertainty$parameters$data_driven <- FALSE - }else{ - metadata$uncertainty$parameters$present <- FALSE + if(registered_models$`Does your forecast include uncertainty from the model parameters?`[i] == "Yes"){ + metadata$uncertainty$parameters$progagates$type <- progagates_method + } + }else if(registered_models$`Does your forecast include uncertainty from the model parameters?`[i] %in% c("Yes and at least one is estimated from data", "Yes")){ + metadata$uncertainty$parameters$present <- TRUE + metadata$uncertainty$parameters$data_driven <- TRUE + if(registered_models$`Does your forecast include uncertainty from the model parameters?`[i] == "Yes"){ + metadata$uncertainty$parameters$progagates$type <- progagates_method } - }else{ - metadata$uncertainty$parameters$present <- "Unknown" } if(registered_models$`Do you update your initial conditions or parameters between forecast submissions using newly available data (i.e., data assimilation)?`[i] %in% @@ -108,11 +119,11 @@ for(i in 1:nrow(registered_models)){ #Process model - if(registered_models$`Does your forecast include uncertainty from the model (process uncertainty)?`[i] == "Yes and the uncertainty was estimated from data"){ + if(registered_models$`Does your forecast include uncertainty from the model (process uncertainty)?`[i] %in% c("Yes and the uncertainty was estimated from data", "Yes")){ metadata$uncertainty$process_error$present <- TRUE metadata$uncertainty$process_error$data_driven <- TRUE metadata$uncertainty$process_error$progagates$type <- progagates_method - }else if(registered_models$`Does your forecast include uncertainty from the model (process uncertainty)?`[i] == "Yes and the uncertainty was not estimated from data"){ + }else if(registered_models$`Does your forecast include uncertainty from the model (process uncertainty)?`[i] %in% c("Yes and the uncertainty was not estimated from data","Yes")){ metadata$uncertainty$process_error$present <- TRUE metadata$uncertainty$process_error$data_driven <- FALSE metadata$uncertainty$process_error$progagates$type <- progagates_method @@ -124,11 +135,11 @@ for(i in 1:nrow(registered_models)){ # Measurement error - if(registered_models$`Does your forecast include uncertainty from measurement noise?`[i] == "Yes and the noise was estimated from data"){ + if(registered_models$`Does your forecast include uncertainty from measurement noise?`[i] %in% c("Yes and the noise was estimated from data", "Yes")){ metadata$uncertainty$obs_error$present <- TRUE metadata$uncertainty$obs_error$data_driven <- TRUE metadata$uncertainty$obs_error$progagates$type <- progagates_method - }else if(registered_models$`Does your forecast include uncertainty from measurement noise?`[i] == "Yes and the noise was not estimated from data"){ + }else if(registered_models$`Does your forecast include uncertainty from measurement noise?`[i] %in% c("Yes and the noise was not estimated from data", "Yes")){ metadata$uncertainty$obs_error$present <- TRUE metadata$uncertainty$obs_error$data_driven <- FALSE metadata$uncertainty$obs_error$progagates$type <- progagates_method @@ -154,11 +165,11 @@ for(i in 1:nrow(registered_models)){ # Random effects - if(registered_models$`Does your forecast include uncertainty from parameter random effects?`[i] == "Yes and the uncertainty was estimated from data"){ + if(registered_models$`Does your forecast include uncertainty from parameter random effects?`[i] %in% c("Yes and the uncertainty was estimated from data", "Yes")){ metadata$uncertainty$random_effects$present <- TRUE metadata$uncertainty$random_effects$data_driven <- TRUE metadata$uncertainty$random_effects$progagates$type <- progagates_method - }else if(registered_models$`Does your forecast include uncertainty from parameter random effects?`[i] == "Yes and the uncertainty was not estimated from data (uncommon)"){ + }else if(registered_models$`Does your forecast include uncertainty from parameter random effects?`[i] %in% c("Yes and the uncertainty was not estimated from data (uncommon)", "Yes")){ metadata$uncertainty$random_effects$present <- TRUE metadata$uncertainty$random_effects$data_driven <- FALSE metadata$uncertainty$random_effects$progagates$type <- progagates_method @@ -171,15 +182,7 @@ for(i in 1:nrow(registered_models)){ file_name <- paste0(metadata$model_id, ".json") jsonlite::write_json(metadata, path = file.path("catalog",file_name), pretty = TRUE) - minioclient::mc_cp(file.path("catalog",file_name), - file.path("osn", - config$model_metadata_bucket, - paste0("project_id=", config$project_id), - file_name)) + minioclient::mc_cp(file.path("catalog",file_name), file.path("osn",config$model_metadata_bucket, file_name)) unlink(file.path("catalog",file_name)) } - - - - diff --git a/catalog/noaa_forecasts/Pseudo/collection.json b/catalog/noaa_forecasts/Pseudo/collection.json index ae8db5062c..2c49f93cf8 100644 --- a/catalog/noaa_forecasts/Pseudo/collection.json +++ b/catalog/noaa_forecasts/Pseudo/collection.json @@ -32,8 +32,8 @@ { "rel": "about", "href": "https://projects.ecoforecast.org/usgsrc4cast-docs/", - "type": "text/html", - "title": "EFI-USGS River Chlorophyll Forecasting Challenge Documentation" + "title": "EFI-USGS River Chlorophyll Forecasting Challenge Documentation", + "type": "text/html" }, { "rel": "describedby", @@ -53,21 +53,21 @@ "interval": [ [ "2024-01-29T00:00:00Z", - "2024-03-16T00:00:00Z" + "2024-06-02T00:00:00Z" ] ] } }, "table:columns": [ { - "name": "site_id", - "type": "string", - "description": "For forecasts that are not on a spatial grid, use of a site dimension that maps to a more detailed geometry (points, polygons, etc.) is allowable. In general this would be documented in the external metadata (e.g., alook-up table that provides lon and lat)" + "name": "parameter", + "type": "double", + "description": "ensemble member or distribution parameter" }, { - "name": "prediction", - "type": "double", - "description": "predicted value for variable" + "name": "datetime", + "type": "timestamp[us, tz=UTC]", + "description": "datetime of the forecasted value (ISO 8601)" }, { "name": "variable", @@ -75,19 +75,9 @@ "description": "name of forecasted variable" }, { - "name": "height", - "type": "string", - "description": "variable height" - }, - { - "name": "horizon", + "name": "prediction", "type": "double", - "description": "number of days in forecast" - }, - { - "name": "parameter", - "type": "int32", - "description": "ensemble member or distribution parameter" + "description": "predicted value for variable" }, { "name": "family", @@ -96,39 +86,24 @@ }, { "name": "reference_datetime", - "type": "timestamp[us, tz=UTC]", + "type": "string", "description": "datetime that the forecast was initiated (horizon = 0)" }, { - "name": "forecast_valid", + "name": "site_id", "type": "string", - "description": "date when forecast is valid" - }, - { - "name": "datetime", - "type": "timestamp[us, tz=UTC]", - "description": "datetime of the forecasted value (ISO 8601)" - }, - { - "name": "longitude", - "type": "double", - "description": "forecast site longitude" - }, - { - "name": "latitude", - "type": "double", - "description": "forecast site latitude" + "description": "For forecasts that are not on a spatial grid, use of a site dimension that maps to a more detailed geometry (points, polygons, etc.) is allowable. In general this would be documented in the external metadata (e.g., alook-up table that provides lon and lat)" } ], "assets": { "data": { - "href": "s3://anonymous@drivers/noaa/gefs-v12-reprocess/pseudo/parquet?endpoint_override=s3.flare-forecast.org", + "href": "s3://anonymous@drivers/noaa/gefs-v12-reprocess//pseudo/parquet?endpoint_override=s3.flare-forecast.org", "type": "application/x-parquet", "title": "Database Access", "roles": [ "data" ], - "description": "Use `arrow` for remote access to the database. This R code will return results for NEON forecasts associated with the forecasting challenge.\n\n### R\n\n```{r}\n# Use code below\n\nall_results <- arrow::open_dataset(\"s3://anonymous@drivers/noaa/gefs-v12-reprocess/pseudo/parquet?endpoint_override=s3.flare-forecast.org\")\ndf <- all_results |> dplyr::collect()\n\n```\n \n\nYou can use dplyr operations before calling `dplyr::collect()` to `summarise`, `select` columns, and/or `filter` rows prior to pulling the data into a local `data.frame`. Reducing the data that is pulled locally will speed up the data download speed and reduce your memory usage.\n\n\n" + "description": "Use `arrow` for remote access to the database. This R code will return results for NEON forecasts associated with the forecasting challenge.\n\n### R\n\n```{r}\n# Use code below\n\nall_results <- arrow::open_dataset(\"s3://anonymous@drivers/noaa/gefs-v12-reprocess//pseudo/parquet?endpoint_override=s3.flare-forecast.org\")\ndf <- all_results |> dplyr::collect()\n\n```\n \n\nYou can use dplyr operations before calling `dplyr::collect()` to `summarise`, `select` columns, and/or `filter` rows prior to pulling the data into a local `data.frame`. Reducing the data that is pulled locally will speed up the data download speed and reduce your memory usage.\n\n\n" }, "thumbnail": { "href": "https://d9-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/s3fs-public/DSC_0001.jpg", diff --git a/catalog/noaa_forecasts/Stage1-stats/collection.json b/catalog/noaa_forecasts/Stage1-stats/collection.json index a9c31e6792..c399c4b342 100644 --- a/catalog/noaa_forecasts/Stage1-stats/collection.json +++ b/catalog/noaa_forecasts/Stage1-stats/collection.json @@ -32,8 +32,8 @@ { "rel": "about", "href": "https://projects.ecoforecast.org/usgsrc4cast-docs/", - "type": "text/html", - "title": "EFI-USGS River Chlorophyll Forecasting Challenge Documentation" + "title": "EFI-USGS River Chlorophyll Forecasting Challenge Documentation", + "type": "text/html" }, { "rel": "describedby", @@ -53,21 +53,21 @@ "interval": [ [ "2024-01-29T00:00:00Z", - "2024-03-16T00:00:00Z" + "2024-06-02T00:00:00Z" ] ] } }, "table:columns": [ { - "name": "site_id", - "type": "string", - "description": "For forecasts that are not on a spatial grid, use of a site dimension that maps to a more detailed geometry (points, polygons, etc.) is allowable. In general this would be documented in the external metadata (e.g., alook-up table that provides lon and lat)" + "name": "parameter", + "type": "double", + "description": "ensemble member or distribution parameter" }, { - "name": "prediction", - "type": "double", - "description": "predicted value for variable" + "name": "datetime", + "type": "timestamp[us, tz=UTC]", + "description": "datetime of the forecasted value (ISO 8601)" }, { "name": "variable", @@ -75,19 +75,9 @@ "description": "name of forecasted variable" }, { - "name": "height", - "type": "string", - "description": "variable height" - }, - { - "name": "horizon", + "name": "prediction", "type": "double", - "description": "number of days in forecast" - }, - { - "name": "parameter", - "type": "int32", - "description": "ensemble member or distribution parameter" + "description": "predicted value for variable" }, { "name": "family", @@ -96,39 +86,24 @@ }, { "name": "reference_datetime", - "type": "timestamp[us, tz=UTC]", + "type": "string", "description": "datetime that the forecast was initiated (horizon = 0)" }, { - "name": "forecast_valid", + "name": "site_id", "type": "string", - "description": "date when forecast is valid" - }, - { - "name": "datetime", - "type": "timestamp[us, tz=UTC]", - "description": "datetime of the forecasted value (ISO 8601)" - }, - { - "name": "longitude", - "type": "double", - "description": "forecast site longitude" - }, - { - "name": "latitude", - "type": "double", - "description": "forecast site latitude" + "description": "For forecasts that are not on a spatial grid, use of a site dimension that maps to a more detailed geometry (points, polygons, etc.) is allowable. In general this would be documented in the external metadata (e.g., alook-up table that provides lon and lat)" } ], "assets": { "data": { - "href": "s3://anonymous@drivers/noaa/gefs-v12-reprocess/stage1-stats/parquet?endpoint_override=s3.flare-forecast.org", + "href": "s3://anonymous@drivers/noaa/gefs-v12-reprocess//stage1-stats/parquet?endpoint_override=s3.flare-forecast.org", "type": "application/x-parquet", "title": "Database Access", "roles": [ "data" ], - "description": "Use `arrow` for remote access to the database. This R code will return results for NEON forecasts associated with the forecasting challenge.\n\n### R\n\n```{r}\n# Use code below\n\nall_results <- arrow::open_dataset(\"s3://anonymous@drivers/noaa/gefs-v12-reprocess/stage1-stats/parquet?endpoint_override=s3.flare-forecast.org\")\ndf <- all_results |> dplyr::collect()\n\n```\n \n\nYou can use dplyr operations before calling `dplyr::collect()` to `summarise`, `select` columns, and/or `filter` rows prior to pulling the data into a local `data.frame`. Reducing the data that is pulled locally will speed up the data download speed and reduce your memory usage.\n\n\n" + "description": "Use `arrow` for remote access to the database. This R code will return results for NEON forecasts associated with the forecasting challenge.\n\n### R\n\n```{r}\n# Use code below\n\nall_results <- arrow::open_dataset(\"s3://anonymous@drivers/noaa/gefs-v12-reprocess//stage1-stats/parquet?endpoint_override=s3.flare-forecast.org\")\ndf <- all_results |> dplyr::collect()\n\n```\n \n\nYou can use dplyr operations before calling `dplyr::collect()` to `summarise`, `select` columns, and/or `filter` rows prior to pulling the data into a local `data.frame`. Reducing the data that is pulled locally will speed up the data download speed and reduce your memory usage.\n\n\n" }, "thumbnail": { "href": "https://d9-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/s3fs-public/DSC_0001.jpg", diff --git a/catalog/noaa_forecasts/Stage1/collection.json b/catalog/noaa_forecasts/Stage1/collection.json index 7b680bb0ee..76e9258ce8 100644 --- a/catalog/noaa_forecasts/Stage1/collection.json +++ b/catalog/noaa_forecasts/Stage1/collection.json @@ -32,8 +32,8 @@ { "rel": "about", "href": "https://projects.ecoforecast.org/usgsrc4cast-docs/", - "type": "text/html", - "title": "EFI-USGS River Chlorophyll Forecasting Challenge Documentation" + "title": "EFI-USGS River Chlorophyll Forecasting Challenge Documentation", + "type": "text/html" }, { "rel": "describedby", @@ -53,21 +53,21 @@ "interval": [ [ "2024-01-29T00:00:00Z", - "2024-03-16T00:00:00Z" + "2024-06-02T00:00:00Z" ] ] } }, "table:columns": [ { - "name": "site_id", - "type": "string", - "description": "For forecasts that are not on a spatial grid, use of a site dimension that maps to a more detailed geometry (points, polygons, etc.) is allowable. In general this would be documented in the external metadata (e.g., alook-up table that provides lon and lat)" + "name": "parameter", + "type": "double", + "description": "ensemble member or distribution parameter" }, { - "name": "prediction", - "type": "double", - "description": "predicted value for variable" + "name": "datetime", + "type": "timestamp[us, tz=UTC]", + "description": "datetime of the forecasted value (ISO 8601)" }, { "name": "variable", @@ -75,19 +75,9 @@ "description": "name of forecasted variable" }, { - "name": "height", - "type": "string", - "description": "variable height" - }, - { - "name": "horizon", + "name": "prediction", "type": "double", - "description": "number of days in forecast" - }, - { - "name": "parameter", - "type": "int32", - "description": "ensemble member or distribution parameter" + "description": "predicted value for variable" }, { "name": "family", @@ -96,39 +86,24 @@ }, { "name": "reference_datetime", - "type": "timestamp[us, tz=UTC]", + "type": "string", "description": "datetime that the forecast was initiated (horizon = 0)" }, { - "name": "forecast_valid", + "name": "site_id", "type": "string", - "description": "date when forecast is valid" - }, - { - "name": "datetime", - "type": "timestamp[us, tz=UTC]", - "description": "datetime of the forecasted value (ISO 8601)" - }, - { - "name": "longitude", - "type": "double", - "description": "forecast site longitude" - }, - { - "name": "latitude", - "type": "double", - "description": "forecast site latitude" + "description": "For forecasts that are not on a spatial grid, use of a site dimension that maps to a more detailed geometry (points, polygons, etc.) is allowable. In general this would be documented in the external metadata (e.g., alook-up table that provides lon and lat)" } ], "assets": { "data": { - "href": "s3://anonymous@drivers/noaa/gefs-v12-reprocess/stage1/parquet?endpoint_override=s3.flare-forecast.org", + "href": "s3://anonymous@drivers/noaa/gefs-v12-reprocess//stage1/parquet?endpoint_override=s3.flare-forecast.org", "type": "application/x-parquet", "title": "Database Access", "roles": [ "data" ], - "description": "Use `arrow` for remote access to the database. This R code will return results for NEON forecasts associated with the forecasting challenge.\n\n### R\n\n```{r}\n# Use code below\n\nall_results <- arrow::open_dataset(\"s3://anonymous@drivers/noaa/gefs-v12-reprocess/stage1/parquet?endpoint_override=s3.flare-forecast.org\")\ndf <- all_results |> dplyr::collect()\n\n```\n \n\nYou can use dplyr operations before calling `dplyr::collect()` to `summarise`, `select` columns, and/or `filter` rows prior to pulling the data into a local `data.frame`. Reducing the data that is pulled locally will speed up the data download speed and reduce your memory usage.\n\n\n" + "description": "Use `arrow` for remote access to the database. This R code will return results for NEON forecasts associated with the forecasting challenge.\n\n### R\n\n```{r}\n# Use code below\n\nall_results <- arrow::open_dataset(\"s3://anonymous@drivers/noaa/gefs-v12-reprocess//stage1/parquet?endpoint_override=s3.flare-forecast.org\")\ndf <- all_results |> dplyr::collect()\n\n```\n \n\nYou can use dplyr operations before calling `dplyr::collect()` to `summarise`, `select` columns, and/or `filter` rows prior to pulling the data into a local `data.frame`. Reducing the data that is pulled locally will speed up the data download speed and reduce your memory usage.\n\n\n" }, "thumbnail": { "href": "https://d9-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/s3fs-public/DSC_0001.jpg", diff --git a/catalog/noaa_forecasts/Stage2/collection.json b/catalog/noaa_forecasts/Stage2/collection.json index 94d8fd52bc..a111c83224 100644 --- a/catalog/noaa_forecasts/Stage2/collection.json +++ b/catalog/noaa_forecasts/Stage2/collection.json @@ -32,8 +32,8 @@ { "rel": "about", "href": "https://projects.ecoforecast.org/usgsrc4cast-docs/", - "type": "text/html", - "title": "EFI-USGS River Chlorophyll Forecasting Challenge Documentation" + "title": "EFI-USGS River Chlorophyll Forecasting Challenge Documentation", + "type": "text/html" }, { "rel": "describedby", @@ -53,21 +53,21 @@ "interval": [ [ "2024-01-29T00:00:00Z", - "2024-03-16T00:00:00Z" + "2024-06-02T00:00:00Z" ] ] } }, "table:columns": [ { - "name": "site_id", - "type": "string", - "description": "For forecasts that are not on a spatial grid, use of a site dimension that maps to a more detailed geometry (points, polygons, etc.) is allowable. In general this would be documented in the external metadata (e.g., alook-up table that provides lon and lat)" + "name": "parameter", + "type": "double", + "description": "ensemble member or distribution parameter" }, { - "name": "prediction", - "type": "double", - "description": "predicted value for variable" + "name": "datetime", + "type": "timestamp[us, tz=UTC]", + "description": "datetime of the forecasted value (ISO 8601)" }, { "name": "variable", @@ -75,19 +75,9 @@ "description": "name of forecasted variable" }, { - "name": "height", - "type": "string", - "description": "variable height" - }, - { - "name": "horizon", + "name": "prediction", "type": "double", - "description": "number of days in forecast" - }, - { - "name": "parameter", - "type": "int32", - "description": "ensemble member or distribution parameter" + "description": "predicted value for variable" }, { "name": "family", @@ -96,39 +86,24 @@ }, { "name": "reference_datetime", - "type": "timestamp[us, tz=UTC]", + "type": "string", "description": "datetime that the forecast was initiated (horizon = 0)" }, { - "name": "forecast_valid", + "name": "site_id", "type": "string", - "description": "date when forecast is valid" - }, - { - "name": "datetime", - "type": "timestamp[us, tz=UTC]", - "description": "datetime of the forecasted value (ISO 8601)" - }, - { - "name": "longitude", - "type": "double", - "description": "forecast site longitude" - }, - { - "name": "latitude", - "type": "double", - "description": "forecast site latitude" + "description": "For forecasts that are not on a spatial grid, use of a site dimension that maps to a more detailed geometry (points, polygons, etc.) is allowable. In general this would be documented in the external metadata (e.g., alook-up table that provides lon and lat)" } ], "assets": { "data": { - "href": "s3://anonymous@drivers/noaa/gefs-v12-reprocess/stage2/parquet?endpoint_override=s3.flare-forecast.org", + "href": "s3://anonymous@drivers/noaa/gefs-v12-reprocess//stage2/parquet?endpoint_override=s3.flare-forecast.org", "type": "application/x-parquet", "title": "Database Access", "roles": [ "data" ], - "description": "Use `arrow` for remote access to the database. This R code will return results for NEON forecasts associated with the forecasting challenge.\n\n### R\n\n```{r}\n# Use code below\n\nall_results <- arrow::open_dataset(\"s3://anonymous@drivers/noaa/gefs-v12-reprocess/stage2/parquet?endpoint_override=s3.flare-forecast.org\")\ndf <- all_results |> dplyr::collect()\n\n```\n \n\nYou can use dplyr operations before calling `dplyr::collect()` to `summarise`, `select` columns, and/or `filter` rows prior to pulling the data into a local `data.frame`. Reducing the data that is pulled locally will speed up the data download speed and reduce your memory usage.\n\n\n" + "description": "Use `arrow` for remote access to the database. This R code will return results for NEON forecasts associated with the forecasting challenge.\n\n### R\n\n```{r}\n# Use code below\n\nall_results <- arrow::open_dataset(\"s3://anonymous@drivers/noaa/gefs-v12-reprocess//stage2/parquet?endpoint_override=s3.flare-forecast.org\")\ndf <- all_results |> dplyr::collect()\n\n```\n \n\nYou can use dplyr operations before calling `dplyr::collect()` to `summarise`, `select` columns, and/or `filter` rows prior to pulling the data into a local `data.frame`. Reducing the data that is pulled locally will speed up the data download speed and reduce your memory usage.\n\n\n" }, "thumbnail": { "href": "https://d9-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/s3fs-public/DSC_0001.jpg", diff --git a/catalog/noaa_forecasts/Stage3/collection.json b/catalog/noaa_forecasts/Stage3/collection.json index 2c15a2f7bb..664cdb7a95 100644 --- a/catalog/noaa_forecasts/Stage3/collection.json +++ b/catalog/noaa_forecasts/Stage3/collection.json @@ -32,8 +32,8 @@ { "rel": "about", "href": "https://projects.ecoforecast.org/usgsrc4cast-docs/", - "type": "text/html", - "title": "EFI-USGS River Chlorophyll Forecasting Challenge Documentation" + "title": "EFI-USGS River Chlorophyll Forecasting Challenge Documentation", + "type": "text/html" }, { "rel": "describedby", @@ -53,21 +53,21 @@ "interval": [ [ "2024-01-29T00:00:00Z", - "2024-03-16T00:00:00Z" + "2024-06-02T00:00:00Z" ] ] } }, "table:columns": [ { - "name": "site_id", - "type": "string", - "description": "For forecasts that are not on a spatial grid, use of a site dimension that maps to a more detailed geometry (points, polygons, etc.) is allowable. In general this would be documented in the external metadata (e.g., alook-up table that provides lon and lat)" + "name": "parameter", + "type": "double", + "description": "ensemble member or distribution parameter" }, { - "name": "prediction", - "type": "double", - "description": "predicted value for variable" + "name": "datetime", + "type": "timestamp[us, tz=UTC]", + "description": "datetime of the forecasted value (ISO 8601)" }, { "name": "variable", @@ -75,19 +75,9 @@ "description": "name of forecasted variable" }, { - "name": "height", - "type": "string", - "description": "variable height" - }, - { - "name": "horizon", + "name": "prediction", "type": "double", - "description": "number of days in forecast" - }, - { - "name": "parameter", - "type": "int32", - "description": "ensemble member or distribution parameter" + "description": "predicted value for variable" }, { "name": "family", @@ -96,39 +86,24 @@ }, { "name": "reference_datetime", - "type": "timestamp[us, tz=UTC]", + "type": "string", "description": "datetime that the forecast was initiated (horizon = 0)" }, { - "name": "forecast_valid", + "name": "site_id", "type": "string", - "description": "date when forecast is valid" - }, - { - "name": "datetime", - "type": "timestamp[us, tz=UTC]", - "description": "datetime of the forecasted value (ISO 8601)" - }, - { - "name": "longitude", - "type": "double", - "description": "forecast site longitude" - }, - { - "name": "latitude", - "type": "double", - "description": "forecast site latitude" + "description": "For forecasts that are not on a spatial grid, use of a site dimension that maps to a more detailed geometry (points, polygons, etc.) is allowable. In general this would be documented in the external metadata (e.g., alook-up table that provides lon and lat)" } ], "assets": { "data": { - "href": "s3://anonymous@drivers/noaa/gefs-v12-reprocess/stage3/parquet?endpoint_override=s3.flare-forecast.org", + "href": "s3://anonymous@drivers/noaa/gefs-v12-reprocess//stage3/parquet?endpoint_override=s3.flare-forecast.org", "type": "application/x-parquet", "title": "Database Access", "roles": [ "data" ], - "description": "Use `arrow` for remote access to the database. This R code will return results for NEON forecasts associated with the forecasting challenge.\n\n### R\n\n```{r}\n# Use code below\n\nall_results <- arrow::open_dataset(\"s3://anonymous@drivers/noaa/gefs-v12-reprocess/stage3/parquet?endpoint_override=s3.flare-forecast.org\")\ndf <- all_results |> dplyr::collect()\n\n```\n \n\nYou can use dplyr operations before calling `dplyr::collect()` to `summarise`, `select` columns, and/or `filter` rows prior to pulling the data into a local `data.frame`. Reducing the data that is pulled locally will speed up the data download speed and reduce your memory usage.\n\n\n" + "description": "Use `arrow` for remote access to the database. This R code will return results for NEON forecasts associated with the forecasting challenge.\n\n### R\n\n```{r}\n# Use code below\n\nall_results <- arrow::open_dataset(\"s3://anonymous@drivers/noaa/gefs-v12-reprocess//stage3/parquet?endpoint_override=s3.flare-forecast.org\")\ndf <- all_results |> dplyr::collect()\n\n```\n \n\nYou can use dplyr operations before calling `dplyr::collect()` to `summarise`, `select` columns, and/or `filter` rows prior to pulling the data into a local `data.frame`. Reducing the data that is pulled locally will speed up the data download speed and reduce your memory usage.\n\n\n" }, "thumbnail": { "href": "https://d9-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/s3fs-public/DSC_0001.jpg", diff --git a/catalog/noaa_forecasts/collection.json b/catalog/noaa_forecasts/collection.json index d7c8e6e122..c68eafd866 100644 --- a/catalog/noaa_forecasts/collection.json +++ b/catalog/noaa_forecasts/collection.json @@ -88,21 +88,21 @@ "interval": [ [ "2024-01-29T00:00:00Z", - "2024-03-16T00:00:00Z" + "2024-06-02T00:00:00Z" ] ] } }, "table:columns": [ { - "name": "site_id", - "type": "string", - "description": "For forecasts that are not on a spatial grid, use of a site dimension that maps to a more detailed geometry (points, polygons, etc.) is allowable. In general this would be documented in the external metadata (e.g., alook-up table that provides lon and lat)" + "name": "parameter", + "type": "double", + "description": "ensemble member or distribution parameter" }, { - "name": "prediction", - "type": "double", - "description": "predicted value for variable" + "name": "datetime", + "type": "timestamp[us, tz=UTC]", + "description": "datetime of the forecasted value (ISO 8601)" }, { "name": "variable", @@ -110,19 +110,9 @@ "description": "name of forecasted variable" }, { - "name": "height", - "type": "string", - "description": "variable height" - }, - { - "name": "horizon", + "name": "prediction", "type": "double", - "description": "number of days in forecast" - }, - { - "name": "parameter", - "type": "int32", - "description": "ensemble member or distribution parameter" + "description": "predicted value for variable" }, { "name": "family", @@ -131,28 +121,13 @@ }, { "name": "reference_datetime", - "type": "timestamp[us, tz=UTC]", + "type": "string", "description": "datetime that the forecast was initiated (horizon = 0)" }, { - "name": "forecast_valid", + "name": "site_id", "type": "string", - "description": "date when forecast is valid" - }, - { - "name": "datetime", - "type": "timestamp[us, tz=UTC]", - "description": "datetime of the forecasted value (ISO 8601)" - }, - { - "name": "longitude", - "type": "double", - "description": "forecast site longitude" - }, - { - "name": "latitude", - "type": "double", - "description": "forecast site latitude" + "description": "For forecasts that are not on a spatial grid, use of a site dimension that maps to a more detailed geometry (points, polygons, etc.) is allowable. In general this would be documented in the external metadata (e.g., alook-up table that provides lon and lat)" } ], "assets": { diff --git a/catalog/scores/aquatics/Daily_Chlorophyll_a/collection.json b/catalog/scores/aquatics/Daily_Chlorophyll_a/collection.json deleted file mode 100644 index 4f1a92c11f..0000000000 --- a/catalog/scores/aquatics/Daily_Chlorophyll_a/collection.json +++ /dev/null @@ -1,192 +0,0 @@ -{ - "id": "Daily_Chlorophyll_a", - "description": "This page includes all models for the Daily_Chlorophyll_a variable.", - "stac_version": "1.0.0", - "license": "CC0-1.0", - "stac_extensions": [ - "https://stac-extensions.github.io/scientific/v1.0.0/schema.json", - "https://stac-extensions.github.io/item-assets/v1.0.0/schema.json", - "https://stac-extensions.github.io/table/v1.2.0/schema.json" - ], - "type": "Collection", - "links": [ - { - "rel": "item", - "type": "application/json", - "href": "../../models/model_items/climatology.json" - }, - { - "rel": "item", - "type": "application/json", - "href": "../../models/model_items/persistenceRW.json" - }, - { - "rel": "parent", - "type": "application/json", - "href": "../collection.json" - }, - { - "rel": "root", - "type": "application/json", - "href": "../collection.json" - }, - { - "rel": "self", - "type": "application/json", - "href": "collection.json" - }, - { - "rel": "cite-as", - "href": "https://doi.org/10.1002/fee.2616" - }, - { - "rel": "about", - "href": "https://projects.ecoforecast.org/usgsrc4cast-docs/", - "type": "text/html", - "title": "EFI-USGS River Chlorophyll Forecasting Challenge Documentation" - }, - { - "rel": "describedby", - "href": "https://projects.ecoforecast.org/usgsrc4cast-docs/", - "title": "EFI-USGS River Chlorophyll Forecast Challenge Dashboard", - "type": "text/html" - } - ], - "title": "Daily_Chlorophyll_a", - "extent": { - "spatial": { - "bbox": [ - ["Inf", "Inf", "-Inf", "-Inf"] - ] - }, - "temporal": { - "interval": [ - [ - "2024-02-07T00:00:00Z", - "2024-02-12T00:00:00Z" - ] - ] - } - }, - "table:columns": [ - { - "name": "reference_datetime", - "type": "timestamp[us, tz=UTC]", - "description": "datetime that the forecast was initiated (horizon = 0)" - }, - { - "name": "site_id", - "type": "string", - "description": "For forecasts that are not on a spatial grid, use of a site dimension that maps to a more detailed geometry (points, polygons, etc.) is allowable. In general this would be documented in the external metadata (e.g., alook-up table that provides lon and lat); however in netCDF this could be handled by the CF Discrete Sampling Geometry data model." - }, - { - "name": "datetime", - "type": "timestamp[us, tz=UTC]", - "description": "datetime of the forecasted value (ISO 8601)" - }, - { - "name": "family", - "type": "string", - "description": "For ensembles: “ensemble.” Default value if unspecified For probability distributions: Name of the statistical distribution associated with the reported statistics. The “sample” distribution is synonymous with “ensemble.” For summary statistics: “summary.”If this dimension does not vary, it is permissible to specify family as a variable attribute if the file format being used supports this (e.g.,netCDF)." - }, - { - "name": "pub_datetime", - "type": "timestamp[us, tz=UTC]", - "description": "datetime that forecast was submitted" - }, - { - "name": "observation", - "type": "double", - "description": "observed value for variable" - }, - { - "name": "crps", - "type": "double", - "description": "crps forecast score" - }, - { - "name": "logs", - "type": "double", - "description": "logs forecast score" - }, - { - "name": "mean", - "type": "double", - "description": "mean forecast prediction" - }, - { - "name": "median", - "type": "double", - "description": "median forecast prediction" - }, - { - "name": "sd", - "type": "double", - "description": "standard deviation forecasts" - }, - { - "name": "quantile97.5", - "type": "double", - "description": "upper 97.5 percentile value of forecast" - }, - { - "name": "quantile02.5", - "type": "double", - "description": "upper 2.5 percentile value of forecast" - }, - { - "name": "quantile90", - "type": "double", - "description": "upper 90 percentile value of forecast" - }, - { - "name": "quantile10", - "type": "double", - "description": "upper 10 percentile value of forecast" - }, - { - "name": "project_id", - "type": "string", - "description": "unique project identifier" - }, - { - "name": "duration", - "type": "string", - "description": "temporal duration of forecast (hourly = PT1H, daily = P1D, etc.); follows ISO 8601 duration convention" - }, - { - "name": "variable", - "type": "string", - "description": "name of forecasted variable" - }, - { - "name": "model_id", - "type": "string", - "description": "unique model identifier" - }, - { - "name": "date", - "type": "string", - "description": "ISO 8601 (ISO 2019) date of the predicted value; follows CF convention http://cfconventions.org/cf-conventions/cf-conventions.html#time-coordinate. This variable was called time before v0.5of the EFI convention. For time-integrated variables (e.g., cumulative net primary productivity), one should specify the start_datetime and end_datetime as two variables, instead of the single datetime. If this is not provided the datetime is assumed to be the MIDPOINT of the integration period." - } - ], - "assets": { - "data": { - "href": "s3://anonymous@bio230014-bucket01/challenges/scores/parquet/project_id=usgsrc4cast/duration=P1D/variable=chla?endpoint_override=sdsc.osn.xsede.org", - "type": "application/x-parquet", - "title": "Database Access", - "roles": [ - "data" - ], - "description": "Use `arrow` for remote access to the database. This R code will return results for forecasts of the variable by the specific model .\n\n### R\n\n```{r}\n# Use code below\n\nall_results <- arrow::open_dataset(\"s3://anonymous@bio230014-bucket01/challenges/scores/parquet/project_id=usgsrc4cast/duration=P1D/variable=chla?endpoint_override=sdsc.osn.xsede.org\")\ndf <- all_results |> dplyr::collect()\n\n```\n \n\nYou can use dplyr operations before calling `dplyr::collect()` to `summarise`, `select` columns, and/or `filter` rows prior to pulling the data into a local `data.frame`. Reducing the data that is pulled locally will speed up the data download speed and reduce your memory usage.\n\n\n" - }, - "thumbnail": { - "href": "pending", - "type": "image/JPEG", - "roles": [ - "thumbnail" - ], - "title": "pending" - } - } -} diff --git a/catalog/scores/aquatics/collection.json b/catalog/scores/aquatics/collection.json deleted file mode 100644 index d8dbe012be..0000000000 --- a/catalog/scores/aquatics/collection.json +++ /dev/null @@ -1,187 +0,0 @@ -{ - "id": "aquatics", - "description": "This page includes variables for the aquatics group.", - "stac_version": "1.0.0", - "license": "CC0-1.0", - "stac_extensions": [ - "https://stac-extensions.github.io/scientific/v1.0.0/schema.json", - "https://stac-extensions.github.io/item-assets/v1.0.0/schema.json", - "https://stac-extensions.github.io/table/v1.2.0/schema.json" - ], - "type": "Collection", - "links": [ - { - "rel": "child", - "type": "application/json", - "href": "Daily_Chlorophyll_a/collection.json" - }, - { - "rel": "parent", - "type": "application/json", - "href": "../collection.json" - }, - { - "rel": "root", - "type": "application/json", - "href": "../collection.json" - }, - { - "rel": "self", - "type": "application/json", - "href": "collection.json" - }, - { - "rel": "cite-as", - "href": "https://doi.org/10.1002/fee.2616" - }, - { - "rel": "about", - "href": "https://projects.ecoforecast.org/usgsrc4cast-docs/", - "type": "text/html", - "title": "EFI-USGS River Chlorophyll Forecasting Challenge Documentation" - }, - { - "rel": "describedby", - "href": "https://projects.ecoforecast.org/usgsrc4cast-docs/", - "title": "EFI-USGS River Chlorophyll Forecast Challenge Dashboard", - "type": "text/html" - } - ], - "title": "aquatics", - "extent": { - "spatial": { - "bbox": [ - ["Inf", "Inf", "-Inf", "-Inf"] - ] - }, - "temporal": { - "interval": [ - [ - "2024-02-07T00:00:00Z", - "2024-02-12T00:00:00Z" - ] - ] - } - }, - "table:columns": [ - { - "name": "reference_datetime", - "type": "timestamp[us, tz=UTC]", - "description": "datetime that the forecast was initiated (horizon = 0)" - }, - { - "name": "site_id", - "type": "string", - "description": "For forecasts that are not on a spatial grid, use of a site dimension that maps to a more detailed geometry (points, polygons, etc.) is allowable. In general this would be documented in the external metadata (e.g., alook-up table that provides lon and lat); however in netCDF this could be handled by the CF Discrete Sampling Geometry data model." - }, - { - "name": "datetime", - "type": "timestamp[us, tz=UTC]", - "description": "datetime of the forecasted value (ISO 8601)" - }, - { - "name": "family", - "type": "string", - "description": "For ensembles: “ensemble.” Default value if unspecified For probability distributions: Name of the statistical distribution associated with the reported statistics. The “sample” distribution is synonymous with “ensemble.” For summary statistics: “summary.”If this dimension does not vary, it is permissible to specify family as a variable attribute if the file format being used supports this (e.g.,netCDF)." - }, - { - "name": "pub_datetime", - "type": "timestamp[us, tz=UTC]", - "description": "datetime that forecast was submitted" - }, - { - "name": "observation", - "type": "double", - "description": "observed value for variable" - }, - { - "name": "crps", - "type": "double", - "description": "crps forecast score" - }, - { - "name": "logs", - "type": "double", - "description": "logs forecast score" - }, - { - "name": "mean", - "type": "double", - "description": "mean forecast prediction" - }, - { - "name": "median", - "type": "double", - "description": "median forecast prediction" - }, - { - "name": "sd", - "type": "double", - "description": "standard deviation forecasts" - }, - { - "name": "quantile97.5", - "type": "double", - "description": "upper 97.5 percentile value of forecast" - }, - { - "name": "quantile02.5", - "type": "double", - "description": "upper 2.5 percentile value of forecast" - }, - { - "name": "quantile90", - "type": "double", - "description": "upper 90 percentile value of forecast" - }, - { - "name": "quantile10", - "type": "double", - "description": "upper 10 percentile value of forecast" - }, - { - "name": "project_id", - "type": "string", - "description": "unique project identifier" - }, - { - "name": "duration", - "type": "string", - "description": "temporal duration of forecast (hourly = PT1H, daily = P1D, etc.); follows ISO 8601 duration convention" - }, - { - "name": "variable", - "type": "string", - "description": "name of forecasted variable" - }, - { - "name": "model_id", - "type": "string", - "description": "unique model identifier" - }, - { - "name": "date", - "type": "string", - "description": "ISO 8601 (ISO 2019) date of the predicted value; follows CF convention http://cfconventions.org/cf-conventions/cf-conventions.html#time-coordinate. This variable was called time before v0.5of the EFI convention. For time-integrated variables (e.g., cumulative net primary productivity), one should specify the start_datetime and end_datetime as two variables, instead of the single datetime. If this is not provided the datetime is assumed to be the MIDPOINT of the integration period." - } - ], - "assets": { - "data": { - "href": "s3://anonymous@bio230014-bucket01/challenges/scores/parquet/?endpoint_override=sdsc.osn.xsede.org", - "type": "application/x-parquet", - "title": "Database Access", - "roles": [ - "data" - ], - "description": "Use `arrow` for remote access to the database. This R code will return results for the NEON Ecological Forecasting Aquatics theme.\n\n### R\n\n```{r}\n# Use code below\n\nall_results <- arrow::open_dataset(\"s3://anonymous@bio230014-bucket01/challenges/scores/parquet/?endpoint_override=sdsc.osn.xsede.org\")\ndf <- all_results |>\n dplyr::filter(variable %in% c(\"chla\")) |>\n dplyr::collect()\n\n```\n \n\nYou can use dplyr operations before calling `dplyr::collect()` to `summarise`, `select` columns, and/or `filter` rows prior to pulling the data into a local `data.frame`. Reducing the data that is pulled locally will speed up the data download speed and reduce your memory usage.\n\n\n" - }, - "thumbnail": { - "href": "https://d9-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/s3fs-public/thumbnails/image/Back-b.jpg", - "type": "image/JPEG", - "roles": [ - "thumbnail" - ], - "title": "USGS Streamgage" - } - } -} diff --git a/catalog/scores/collection.json b/catalog/scores/collection.json index 8d0608e102..eb5fae2c3f 100644 --- a/catalog/scores/collection.json +++ b/catalog/scores/collection.json @@ -70,7 +70,7 @@ "interval": [ [ "2024-02-07T00:00:00Z", - "2024-02-12T00:00:00Z" + "2024-04-30T00:00:00Z" ] ] } diff --git a/catalog/scores/models/collection.json b/catalog/scores/models/collection.json index dfe9d1a961..dbae3a6c94 100644 --- a/catalog/scores/models/collection.json +++ b/catalog/scores/models/collection.json @@ -9,7 +9,13 @@ "https://stac-extensions.github.io/table/v1.2.0/schema.json" ], "type": "Collection", + "sci:doi": "https://doi.org/10.1002/fee.2616", "links": [ + { + "rel": "item", + "type": "application/json", + "href": "model_items/USGSHABs1.json" + }, { "rel": "item", "type": "application/json", @@ -37,7 +43,7 @@ }, { "rel": "cite-as", - "href": "https://doi.org/10.1002/fee.2616" + "href": {} }, { "rel": "about", @@ -63,7 +69,7 @@ "interval": [ [ "2024-02-07T00:00:00Z", - "2024-02-12T00:00:00Z" + "2024-04-30T00:00:00Z" ] ] } diff --git a/catalog/scores/models/model_items/USGSHABs1.json b/catalog/scores/models/model_items/USGSHABs1.json new file mode 100644 index 0000000000..dbdfac5e89 --- /dev/null +++ b/catalog/scores/models/model_items/USGSHABs1.json @@ -0,0 +1,213 @@ +{ + "stac_version": "1.0.0", + "stac_extensions": [ + "https://stac-extensions.github.io/table/v1.2.0/schema.json" + ], + "type": "Feature", + "id": "USGSHABs1", + "bbox": [ + [ + -122.6692, + 45.5175, + -74.7781, + 45.5175 + ] + ], + "geometry": { + "type": "MultiPoint", + "coordinates": [ + [], + [], + [], + [], + [], + [], + [], + [] + ] + }, + "properties": { + "description": "\nmodel info: Uses the randomForest::randomForest() R package model to train site-specific models for predicting river chl-a. Uses ensemble Kalman filter to adjust predicted chl-a states.\n\nSites: USGS-05543010, USGS-05549500, USGS-05553700, USGS-05558300, USGS-05586300, USGS-14181500, USGS-14211720, USGS-01427510\n\nVariables: Daily Chlorophyll_a", + "start_datetime": "2024-02-13", + "end_datetime": "2024-04-30", + "providers": [ + { + "url": "pending", + "name": "pending", + "roles": [ + "producer", + "processor", + "licensor" + ] + }, + { + "url": "https://www.ecoforecastprojectvt.org", + "name": "Ecoforecast Challenge", + "roles": [ + "host" + ] + } + ], + "license": "CC0-1.0", + "keywords": [ + "Forecasting", + "usgsrc4cast", + "Daily Chlorophyll_a" + ], + "table:columns": [ + { + "name": "reference_datetime", + "type": "timestamp[us, tz=UTC]", + "description": "datetime that the forecast was initiated (horizon = 0)" + }, + { + "name": "site_id", + "type": "string", + "description": "For forecasts that are not on a spatial grid, use of a site dimension that maps to a more detailed geometry (points, polygons, etc.) is allowable. In general this would be documented in the external metadata (e.g., alook-up table that provides lon and lat); however in netCDF this could be handled by the CF Discrete Sampling Geometry data model." + }, + { + "name": "datetime", + "type": "timestamp[us, tz=UTC]", + "description": "datetime of the forecasted value (ISO 8601)" + }, + { + "name": "family", + "type": "string", + "description": "For ensembles: “ensemble.” Default value if unspecified For probability distributions: Name of the statistical distribution associated with the reported statistics. The “sample” distribution is synonymous with “ensemble.” For summary statistics: “summary.”If this dimension does not vary, it is permissible to specify family as a variable attribute if the file format being used supports this (e.g.,netCDF)." + }, + { + "name": "pub_datetime", + "type": "timestamp[us, tz=UTC]", + "description": "datetime that forecast was submitted" + }, + { + "name": "observation", + "type": "double", + "description": "observed value for variable" + }, + { + "name": "crps", + "type": "double", + "description": "crps forecast score" + }, + { + "name": "logs", + "type": "double", + "description": "logs forecast score" + }, + { + "name": "mean", + "type": "double", + "description": "mean forecast prediction" + }, + { + "name": "median", + "type": "double", + "description": "median forecast prediction" + }, + { + "name": "sd", + "type": "double", + "description": "standard deviation forecasts" + }, + { + "name": "quantile97.5", + "type": "double", + "description": "upper 97.5 percentile value of forecast" + }, + { + "name": "quantile02.5", + "type": "double", + "description": "upper 2.5 percentile value of forecast" + }, + { + "name": "quantile90", + "type": "double", + "description": "upper 90 percentile value of forecast" + }, + { + "name": "quantile10", + "type": "double", + "description": "upper 10 percentile value of forecast" + }, + { + "name": "project_id", + "type": "string", + "description": "unique project identifier" + }, + { + "name": "duration", + "type": "string", + "description": "temporal duration of forecast (hourly = PT1H, daily = P1D, etc.); follows ISO 8601 duration convention" + }, + { + "name": "variable", + "type": "string", + "description": "name of forecasted variable" + }, + { + "name": "model_id", + "type": "string", + "description": "unique model identifier" + }, + { + "name": "date", + "type": "string", + "description": "ISO 8601 (ISO 2019) date of the predicted value; follows CF convention http://cfconventions.org/cf-conventions/cf-conventions.html#time-coordinate. This variable was called time before v0.5of the EFI convention. For time-integrated variables (e.g., cumulative net primary productivity), one should specify the start_datetime and end_datetime as two variables, instead of the single datetime. If this is not provided the datetime is assumed to be the MIDPOINT of the integration period." + } + ] + }, + "collection": "scores", + "links": [ + { + "rel": "collection", + "href": "../collection.json", + "type": "application/json", + "title": "USGSHABs1" + }, + { + "rel": "root", + "href": "../../../catalog.json", + "type": "application/json", + "title": "Forecast Catalog" + }, + { + "rel": "parent", + "href": "../collection.json", + "type": "application/json", + "title": "USGSHABs1" + }, + { + "rel": "self", + "href": "USGSHABs1.json", + "type": "application/json", + "title": "Model Forecast" + }, + { + "rel": "item", + "href": "pending", + "type": "text/html", + "title": "Link for Model Code" + } + ], + "assets": { + "1": { + "type": "application/json", + "title": "Model Metadata", + "href": "https://sdsc.osn.xsede.org/bio230014-bucket01/challenges/metadata/model_id/USGSHABs1.json", + "description": "Use `jsonlite::fromJSON()` to download the model metadata JSON file. This R code will return metadata provided during the model registration.\n \n\n### R\n\n```{r}\n# Use code below\n\nmodel_metadata <- jsonlite::fromJSON(\"https://sdsc.osn.xsede.org/bio230014-bucket01/challenges/metadata/model_id/USGSHABs1.json\")\n\n" + }, + "2": { + "type": "text/html", + "title": "Link for Model Code", + "href": "pending", + "description": "The link to the model code provided by the model submission team" + }, + "3": { + "type": "application/x-parquet", + "title": "Database Access for Daily Chlorophyll_a", + "href": "s3://anonymous@bio230014-bucket01/challenges/scoresproject_id=/duration=P1D/variable=chla/model_id=USGSHABs1?endpoint_override=sdsc.osn.xsede.org", + "description": "Use `arrow` for remote access to the database. This R code will return results for this variable and model combination.\n\n### R\n\n```{r}\n# Use code below\n\nall_results <- arrow::open_dataset(\"s3://anonymous@bio230014-bucket01/challenges/scoresproject_id=/duration=P1D/variable=chla/model_id=USGSHABs1?endpoint_override=sdsc.osn.xsede.org\")\ndf <- all_results |> dplyr::collect()\n\n```\n \n\nYou can use dplyr operations before calling `dplyr::collect()` to `summarise`, `select` columns, and/or `filter` rows prior to pulling the data into a local `data.frame`. Reducing the data that is pulled locally will speed up the data download speed and reduce your memory usage.\n\n\n" + } + } +} diff --git a/catalog/scores/models/model_items/climatology.json b/catalog/scores/models/model_items/climatology.json index be1bf9601f..108b203707 100644 --- a/catalog/scores/models/model_items/climatology.json +++ b/catalog/scores/models/model_items/climatology.json @@ -24,13 +24,14 @@ [], [], [], + [], [] ] }, "properties": { - "description": "\nmodel info: Forecasts stream chlorophyll-a based on the historic average and standard deviation for that given site and day-of-year.\n\nSites: USGS-01427510, USGS-01463500, USGS-05543010, USGS-05553700, USGS-05558300, USGS-05586300, USGS-14181500, USGS-14211010, USGS-14211720\n\nVariables: Daily Chlorophyll_a", + "description": "\nmodel info: Forecasts stream chlorophyll-a based on the historic average and standard deviation for that given site and day-of-year.\n\nSites: USGS-01427510, USGS-01463500, USGS-05543010, USGS-05553700, USGS-05558300, USGS-05586300, USGS-14181500, USGS-14211010, USGS-14211720, USGS-05549500\n\nVariables: Daily Chlorophyll_a", "start_datetime": "2024-02-07", - "end_datetime": "2024-02-12", + "end_datetime": "2024-04-30", "providers": [ { "url": "pending", diff --git a/catalog/scores/models/model_items/persistenceRW.json b/catalog/scores/models/model_items/persistenceRW.json index 091dc00a8e..1bf4e63eab 100644 --- a/catalog/scores/models/model_items/persistenceRW.json +++ b/catalog/scores/models/model_items/persistenceRW.json @@ -31,7 +31,7 @@ "properties": { "description": "\nmodel info: Random walk model based on most recent stream chl-a observations using the fable::RW() model.\n\nSites: USGS-01427510, USGS-01463500, USGS-05543010, USGS-05549500, USGS-05553700, USGS-05558300, USGS-05586300, USGS-14181500, USGS-14211010, USGS-14211720\n\nVariables: Daily Chlorophyll_a", "start_datetime": "2024-02-07", - "end_datetime": "2024-02-12", + "end_datetime": "2024-04-30", "providers": [ { "url": "pending", diff --git a/catalog/scores/scores_models.R b/catalog/scores/scores_models.R index 2cffa8430d..7b42c2e811 100644 --- a/catalog/scores/scores_models.R +++ b/catalog/scores/scores_models.R @@ -7,11 +7,6 @@ library(readr) config <- yaml::read_yaml('challenge_configuration.yaml') catalog_config <- config$catalog_config -# names(config$variable_groups) -# variable_groups <- names(config$variable_groups) -# variable_list <- config$variable_groups - - ## CREATE table for column descriptions scores_description_create <- data.frame(reference_datetime ='datetime that the forecast was initiated (horizon = 0)', site_id = 'For forecasts that are not on a spatial grid, use of a site dimension that maps to a more detailed geometry (points, polygons, etc.) is allowable. In general this would be documented in the external metadata (e.g., alook-up table that provides lon and lat); however in netCDF this could be handled by the CF Discrete Sampling Geometry data model.', diff --git a/catalog/summaries/aquatics/Daily_Chlorophyll_a/collection.json b/catalog/summaries/aquatics/Daily_Chlorophyll_a/collection.json deleted file mode 100644 index e27ebad3d7..0000000000 --- a/catalog/summaries/aquatics/Daily_Chlorophyll_a/collection.json +++ /dev/null @@ -1,177 +0,0 @@ -{ - "id": "Daily_Chlorophyll_a", - "description": "This page includes all models for the Daily_Chlorophyll_a variable.", - "stac_version": "1.0.0", - "license": "CC0-1.0", - "stac_extensions": [ - "https://stac-extensions.github.io/scientific/v1.0.0/schema.json", - "https://stac-extensions.github.io/item-assets/v1.0.0/schema.json", - "https://stac-extensions.github.io/table/v1.2.0/schema.json" - ], - "type": "Collection", - "links": [ - { - "rel": "item", - "type": "application/json", - "href": "../../models/model_items/climatology.json" - }, - { - "rel": "item", - "type": "application/json", - "href": "../../models/model_items/persistenceRW.json" - }, - { - "rel": "parent", - "type": "application/json", - "href": "../collection.json" - }, - { - "rel": "root", - "type": "application/json", - "href": "../collection.json" - }, - { - "rel": "self", - "type": "application/json", - "href": "collection.json" - }, - { - "rel": "cite-as", - "href": "https://doi.org/10.1002/fee.2616" - }, - { - "rel": "about", - "href": "https://projects.ecoforecast.org/usgsrc4cast-docs/", - "type": "text/html", - "title": "EFI-USGS River Chlorophyll Forecasting Challenge Documentation" - }, - { - "rel": "describedby", - "href": "https://projects.ecoforecast.org/usgsrc4cast-docs/", - "title": "EFI-USGS River Chlorophyll Forecast Challenge Dashboard", - "type": "text/html" - } - ], - "title": "Daily_Chlorophyll_a", - "extent": { - "spatial": { - "bbox": [ - ["Inf", "Inf", "-Inf", "-Inf"] - ] - }, - "temporal": { - "interval": [ - [ - "2024-02-07T00:00:00Z", - "2024-03-17T00:00:00Z" - ] - ] - } - }, - "table:columns": [ - { - "name": "reference_datetime", - "type": "timestamp[us, tz=UTC]", - "description": "datetime that the forecast was initiated (horizon = 0)" - }, - { - "name": "site_id", - "type": "string", - "description": "For forecasts that are not on a spatial grid, use of a site dimension that maps to a more detailed geometry (points, polygons, etc.) is allowable. In general this would be documented in the external metadata (e.g., alook-up table that provides lon and lat)" - }, - { - "name": "datetime", - "type": "timestamp[us, tz=UTC]", - "description": "datetime of the forecasted value (ISO 8601)" - }, - { - "name": "family", - "type": "string", - "description": "For ensembles: “ensemble.” Default value if unspecified for probability distributions: Name of the statistical distribution associated with the reported statistics. The “sample” distribution is synonymous with “ensemble.”For summary statistics: “summary.”" - }, - { - "name": "pub_datetime", - "type": "timestamp[us, tz=UTC]", - "description": "datetime that forecast was submitted" - }, - { - "name": "mean", - "type": "double", - "description": "mean forecast prediction" - }, - { - "name": "median", - "type": "double", - "description": "median forecast prediction" - }, - { - "name": "sd", - "type": "double", - "description": "standard deviation forecasts" - }, - { - "name": "quantile97.5", - "type": "double", - "description": "upper 97.5 percentile value of forecast" - }, - { - "name": "quantile02.5", - "type": "double", - "description": "upper 2.5 percentile value of forecast" - }, - { - "name": "quantile90", - "type": "double", - "description": "upper 90 percentile value of forecast" - }, - { - "name": "quantile10", - "type": "double", - "description": "upper 10 percentile value of forecast" - }, - { - "name": "project_id", - "type": "string", - "description": "unique identifier for the forecast project" - }, - { - "name": "duration", - "type": "string", - "description": "temporal duration of forecast (hourly, daily, etc.); follows ISO 8601 duration convention" - }, - { - "name": "variable", - "type": "string", - "description": "name of forecasted variable" - }, - { - "name": "model_id", - "type": "string", - "description": "unique model identifier" - }, - { - "name": "reference_date", - "type": "string", - "description": "date that the forecast was initiated" - } - ], - "assets": { - "data": { - "href": "s3://anonymous@bio230014-bucket01/challenges/forecasts/parquet/project_id=usgsrc4cast/duration=P1D/variable=chla?endpoint_override=sdsc.osn.xsede.org", - "type": "application/x-parquet", - "title": "Database Access", - "roles": [ - "data" - ], - "description": "Use `arrow` for remote access to the database. This R code will return results for forecasts of the variable by the specific model .\n\n### R\n\n```{r}\n# Use code below\n\nall_results <- arrow::open_dataset(\"s3://anonymous@bio230014-bucket01/challenges/forecasts/parquet/project_id=usgsrc4cast/duration=P1D/variable=chla?endpoint_override=sdsc.osn.xsede.org\")\ndf <- all_results |> dplyr::collect()\n\n```\n \n\nYou can use dplyr operations before calling `dplyr::collect()` to `summarise`, `select` columns, and/or `filter` rows prior to pulling the data into a local `data.frame`. Reducing the data that is pulled locally will speed up the data download speed and reduce your memory usage.\n\n\n" - }, - "thumbnail": { - "href": "pending", - "type": "image/JPEG", - "roles": [ - "thumbnail" - ], - "title": "pending" - } - } -} diff --git a/catalog/summaries/aquatics/collection.json b/catalog/summaries/aquatics/collection.json deleted file mode 100644 index 4d19ee6257..0000000000 --- a/catalog/summaries/aquatics/collection.json +++ /dev/null @@ -1,172 +0,0 @@ -{ - "id": "aquatics", - "description": "This page includes variables for the aquatics group.", - "stac_version": "1.0.0", - "license": "CC0-1.0", - "stac_extensions": [ - "https://stac-extensions.github.io/scientific/v1.0.0/schema.json", - "https://stac-extensions.github.io/item-assets/v1.0.0/schema.json", - "https://stac-extensions.github.io/table/v1.2.0/schema.json" - ], - "type": "Collection", - "links": [ - { - "rel": "child", - "type": "application/json", - "href": "Daily_Chlorophyll_a/collection.json" - }, - { - "rel": "parent", - "type": "application/json", - "href": "../collection.json" - }, - { - "rel": "root", - "type": "application/json", - "href": "../collection.json" - }, - { - "rel": "self", - "type": "application/json", - "href": "collection.json" - }, - { - "rel": "cite-as", - "href": "https://doi.org/10.1002/fee.2616" - }, - { - "rel": "about", - "href": "https://projects.ecoforecast.org/usgsrc4cast-docs/", - "type": "text/html", - "title": "EFI-USGS River Chlorophyll Forecasting Challenge Documentation" - }, - { - "rel": "describedby", - "href": "https://projects.ecoforecast.org/usgsrc4cast-docs/", - "title": "EFI-USGS River Chlorophyll Forecast Challenge Dashboard", - "type": "text/html" - } - ], - "title": "aquatics", - "extent": { - "spatial": { - "bbox": [ - ["Inf", "Inf", "-Inf", "-Inf"] - ] - }, - "temporal": { - "interval": [ - [ - "2024-02-07T00:00:00Z", - "2024-03-17T00:00:00Z" - ] - ] - } - }, - "table:columns": [ - { - "name": "reference_datetime", - "type": "timestamp[us, tz=UTC]", - "description": "datetime that the forecast was initiated (horizon = 0)" - }, - { - "name": "site_id", - "type": "string", - "description": "For forecasts that are not on a spatial grid, use of a site dimension that maps to a more detailed geometry (points, polygons, etc.) is allowable. In general this would be documented in the external metadata (e.g., alook-up table that provides lon and lat)" - }, - { - "name": "datetime", - "type": "timestamp[us, tz=UTC]", - "description": "datetime of the forecasted value (ISO 8601)" - }, - { - "name": "family", - "type": "string", - "description": "For ensembles: “ensemble.” Default value if unspecified for probability distributions: Name of the statistical distribution associated with the reported statistics. The “sample” distribution is synonymous with “ensemble.”For summary statistics: “summary.”" - }, - { - "name": "pub_datetime", - "type": "timestamp[us, tz=UTC]", - "description": "datetime that forecast was submitted" - }, - { - "name": "mean", - "type": "double", - "description": "mean forecast prediction" - }, - { - "name": "median", - "type": "double", - "description": "median forecast prediction" - }, - { - "name": "sd", - "type": "double", - "description": "standard deviation forecasts" - }, - { - "name": "quantile97.5", - "type": "double", - "description": "upper 97.5 percentile value of forecast" - }, - { - "name": "quantile02.5", - "type": "double", - "description": "upper 2.5 percentile value of forecast" - }, - { - "name": "quantile90", - "type": "double", - "description": "upper 90 percentile value of forecast" - }, - { - "name": "quantile10", - "type": "double", - "description": "upper 10 percentile value of forecast" - }, - { - "name": "project_id", - "type": "string", - "description": "unique identifier for the forecast project" - }, - { - "name": "duration", - "type": "string", - "description": "temporal duration of forecast (hourly, daily, etc.); follows ISO 8601 duration convention" - }, - { - "name": "variable", - "type": "string", - "description": "name of forecasted variable" - }, - { - "name": "model_id", - "type": "string", - "description": "unique model identifier" - }, - { - "name": "reference_date", - "type": "string", - "description": "date that the forecast was initiated" - } - ], - "assets": { - "data": { - "href": "s3://anonymous@bio230014-bucket01/vera4cast/forecasts/summaries/parquet/?endpoint_override=sdsc.osn.xsede.org", - "type": "application/x-parquet", - "title": "Database Access", - "roles": [ - "data" - ], - "description": "Use `arrow` for remote access to the database. This R code will return results for the NEON Ecological Forecasting Aquatics theme.\n\n### R\n\n```{r}\n# Use code below\n\nall_results <- arrow::open_dataset(\"s3://anonymous@bio230014-bucket01/vera4cast/forecasts/summaries/parquet/?endpoint_override=sdsc.osn.xsede.org\")\ndf <- all_results |>\n dplyr::filter(variable %in% c(\"chla\")) |>\n dplyr::collect()\n\n```\n \n\nYou can use dplyr operations before calling `dplyr::collect()` to `summarise`, `select` columns, and/or `filter` rows prior to pulling the data into a local `data.frame`. Reducing the data that is pulled locally will speed up the data download speed and reduce your memory usage.\n\n\n" - }, - "thumbnail": { - "href": "https://d9-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/s3fs-public/thumbnails/image/Back-b.jpg", - "type": "image/JPEG", - "roles": [ - "thumbnail" - ], - "title": "USGS Streamgage" - } - } -} diff --git a/catalog/summaries/collection.json b/catalog/summaries/collection.json index 25ac4b7bdd..cc0441c88c 100644 --- a/catalog/summaries/collection.json +++ b/catalog/summaries/collection.json @@ -70,7 +70,7 @@ "interval": [ [ "2024-02-07T00:00:00Z", - "2024-03-17T00:00:00Z" + "2024-06-03T00:00:00Z" ] ] } diff --git a/catalog/summaries/models/collection.json b/catalog/summaries/models/collection.json index 3d7a48c53b..a854671339 100644 --- a/catalog/summaries/models/collection.json +++ b/catalog/summaries/models/collection.json @@ -9,6 +9,7 @@ "https://stac-extensions.github.io/table/v1.2.0/schema.json" ], "type": "Collection", + "sci:doi": "https://doi.org/10.1002/fee.2616", "links": [ { "rel": "item", @@ -20,6 +21,11 @@ "type": "application/json", "href": "model_items/persistenceRW.json" }, + { + "rel": "item", + "type": "application/json", + "href": "model_items/USGSHABs1.json" + }, { "rel": "parent", "type": "application/json", @@ -37,7 +43,7 @@ }, { "rel": "cite-as", - "href": "https://doi.org/10.1002/fee.2616" + "href": {} }, { "rel": "about", @@ -63,7 +69,7 @@ "interval": [ [ "2024-02-07T00:00:00Z", - "2024-03-17T00:00:00Z" + "2024-06-03T00:00:00Z" ] ] } diff --git a/catalog/summaries/models/model_items/USGSHABs1.json b/catalog/summaries/models/model_items/USGSHABs1.json new file mode 100644 index 0000000000..3c862ced93 --- /dev/null +++ b/catalog/summaries/models/model_items/USGSHABs1.json @@ -0,0 +1,198 @@ +{ + "stac_version": "1.0.0", + "stac_extensions": [ + "https://stac-extensions.github.io/table/v1.2.0/schema.json" + ], + "type": "Feature", + "id": "USGSHABs1", + "bbox": [ + [ + -122.6692, + 45.5175, + -74.7781, + 45.5175 + ] + ], + "geometry": { + "type": "MultiPoint", + "coordinates": [ + [], + [], + [], + [], + [], + [], + [], + [] + ] + }, + "properties": { + "description": "\nmodel info: Uses the randomForest::randomForest() R package model to train site-specific models for predicting river chl-a. Uses ensemble Kalman filter to adjust predicted chl-a states.\n\nSites: USGS-14211720, USGS-14181500, USGS-05586300, USGS-05558300, USGS-05553700, USGS-05543010, USGS-05549500, USGS-01427510\n\nVariables: Daily Chlorophyll_a", + "start_datetime": "2024-02-13", + "end_datetime": "2024-06-01", + "providers": [ + { + "url": "pending", + "name": "pending", + "roles": [ + "producer", + "processor", + "licensor" + ] + }, + { + "url": "https://www.ecoforecastprojectvt.org", + "name": "Ecoforecast Challenge", + "roles": [ + "host" + ] + } + ], + "license": "CC0-1.0", + "keywords": [ + "Forecasting", + "usgsrc4cast", + "Daily Chlorophyll_a" + ], + "table:columns": [ + { + "name": "reference_datetime", + "type": "timestamp[us, tz=UTC]", + "description": "datetime that the forecast was initiated (horizon = 0)" + }, + { + "name": "site_id", + "type": "string", + "description": "For forecasts that are not on a spatial grid, use of a site dimension that maps to a more detailed geometry (points, polygons, etc.) is allowable. In general this would be documented in the external metadata (e.g., alook-up table that provides lon and lat)" + }, + { + "name": "datetime", + "type": "timestamp[us, tz=UTC]", + "description": "datetime of the forecasted value (ISO 8601)" + }, + { + "name": "family", + "type": "string", + "description": "For ensembles: “ensemble.” Default value if unspecified for probability distributions: Name of the statistical distribution associated with the reported statistics. The “sample” distribution is synonymous with “ensemble.”For summary statistics: “summary.”" + }, + { + "name": "pub_datetime", + "type": "timestamp[us, tz=UTC]", + "description": "datetime that forecast was submitted" + }, + { + "name": "mean", + "type": "double", + "description": "mean forecast prediction" + }, + { + "name": "median", + "type": "double", + "description": "median forecast prediction" + }, + { + "name": "sd", + "type": "double", + "description": "standard deviation forecasts" + }, + { + "name": "quantile97.5", + "type": "double", + "description": "upper 97.5 percentile value of forecast" + }, + { + "name": "quantile02.5", + "type": "double", + "description": "upper 2.5 percentile value of forecast" + }, + { + "name": "quantile90", + "type": "double", + "description": "upper 90 percentile value of forecast" + }, + { + "name": "quantile10", + "type": "double", + "description": "upper 10 percentile value of forecast" + }, + { + "name": "project_id", + "type": "string", + "description": "unique identifier for the forecast project" + }, + { + "name": "duration", + "type": "string", + "description": "temporal duration of forecast (hourly, daily, etc.); follows ISO 8601 duration convention" + }, + { + "name": "variable", + "type": "string", + "description": "name of forecasted variable" + }, + { + "name": "model_id", + "type": "string", + "description": "unique model identifier" + }, + { + "name": "reference_date", + "type": "string", + "description": "date that the forecast was initiated" + } + ] + }, + "collection": "forecasts", + "links": [ + { + "rel": "collection", + "href": "../collection.json", + "type": "application/json", + "title": "USGSHABs1" + }, + { + "rel": "root", + "href": "../../../catalog.json", + "type": "application/json", + "title": "Forecast Catalog" + }, + { + "rel": "parent", + "href": "../collection.json", + "type": "application/json", + "title": "USGSHABs1" + }, + { + "rel": "self", + "href": "USGSHABs1.json", + "type": "application/json", + "title": "Model Forecast" + }, + { + "rel": "item", + "href": "https://code.usgs.gov/wma/proxies/habs/habs-forecast-chl-usgsrc4cast/-/blob/main/2_model/src/chla_models.R?ref_type=heads", + "type": "text/html", + "title": "Link for Model Code" + } + ], + "assets": { + "1": { + "type": "application/json", + "title": "Model Metadata", + "href": "https://sdsc.osn.xsede.org/bio230014-bucket01/challenges/metadata/model_id/USGSHABs1.json", + "description": "Use `jsonlite::fromJSON()` to download the model metadata JSON file. This R code will return metadata provided during the model registration.\n \n\n### R\n\n```{r}\n# Use code below\n\nmodel_metadata <- jsonlite::fromJSON(\"https://sdsc.osn.xsede.org/bio230014-bucket01/challenges/metadata/model_id/USGSHABs1.json\")\n\n" + }, + "2": { + "type": "text/html", + "title": "Link for Model Code", + "href": "https://code.usgs.gov/wma/proxies/habs/habs-forecast-chl-usgsrc4cast/-/blob/main/2_model/src/chla_models.R?ref_type=heads", + "description": "The link to the model code provided by the model submission team" + }, + "3": { + "type": "application/x-parquet", + "title": "Database Access for Daily Chlorophyll_a", + "href": "s3://anonymous@bio230014-bucket01/challenges/forecasts/summariesproject_id=/duration=P1D/variable=chla/model_id=USGSHABs1?endpoint_override=sdsc.osn.xsede.org", + "description": "Use `arrow` for remote access to the database. This R code will return results for this variable and model combination.\n\n### R\n\n```{r}\n# Use code below\n\nall_results <- arrow::open_dataset(\"s3://anonymous@bio230014-bucket01/challenges/forecasts/summariesproject_id=/duration=P1D/variable=chla/model_id=USGSHABs1?endpoint_override=sdsc.osn.xsede.org\")\ndf <- all_results |> dplyr::collect()\n\n```\n \n\nYou can use dplyr operations before calling `dplyr::collect()` to `summarise`, `select` columns, and/or `filter` rows prior to pulling the data into a local `data.frame`. Reducing the data that is pulled locally will speed up the data download speed and reduce your memory usage.\n\n\n" + } + } +} diff --git a/catalog/summaries/models/model_items/climatology.json b/catalog/summaries/models/model_items/climatology.json index 5114c3b22e..b75800ec0b 100644 --- a/catalog/summaries/models/model_items/climatology.json +++ b/catalog/summaries/models/model_items/climatology.json @@ -24,13 +24,14 @@ [], [], [], + [], [] ] }, "properties": { - "description": "\nmodel info: Forecasts stream chlorophyll-a based on the historic average and standard deviation for that given site and day-of-year.\n\nSites: USGS-01427510, USGS-01463500, USGS-05543010, USGS-05553700, USGS-05558300, USGS-05586300, USGS-14181500, USGS-14211010, USGS-14211720\n\nVariables: Daily Chlorophyll_a", + "description": "\nmodel info: Forecasts stream chlorophyll-a based on the historic average and standard deviation for that given site and day-of-year.\n\nSites: USGS-01427510, USGS-01463500, USGS-05543010, USGS-05553700, USGS-05558300, USGS-05586300, USGS-14181500, USGS-14211010, USGS-14211720, USGS-05549500\n\nVariables: Daily Chlorophyll_a", "start_datetime": "2024-02-07", - "end_datetime": "2024-03-17", + "end_datetime": "2024-06-03", "providers": [ { "url": "pending", diff --git a/catalog/summaries/models/model_items/persistenceRW.json b/catalog/summaries/models/model_items/persistenceRW.json index 300c435d6c..c78fb446e6 100644 --- a/catalog/summaries/models/model_items/persistenceRW.json +++ b/catalog/summaries/models/model_items/persistenceRW.json @@ -31,7 +31,7 @@ "properties": { "description": "\nmodel info: Random walk model based on most recent stream chl-a observations using the fable::RW() model.\n\nSites: USGS-01427510, USGS-01463500, USGS-05543010, USGS-05549500, USGS-05553700, USGS-05558300, USGS-05586300, USGS-14181500, USGS-14211010, USGS-14211720\n\nVariables: Daily Chlorophyll_a", "start_datetime": "2024-02-07", - "end_datetime": "2024-03-15", + "end_datetime": "2024-06-02", "providers": [ { "url": "pending", diff --git a/catalog/targets/collection.json b/catalog/targets/collection.json index ca449d8b9c..e2e5db9360 100644 --- a/catalog/targets/collection.json +++ b/catalog/targets/collection.json @@ -58,7 +58,7 @@ "interval": [ [ "2009-01-22T00:00:00Z", - "2024-02-12T00:00:00Z" + "2024-04-30T00:00:00Z" ] ] } diff --git a/challenge_configuration.yaml b/challenge_configuration.yaml index 7a176cde98..101b858f18 100644 --- a/challenge_configuration.yaml +++ b/challenge_configuration.yaml @@ -67,7 +67,6 @@ catalog_config: forecast_path: 'catalog/forecasts/' scores_path: 'catalog/scores/' summaries_path: 'catalog/summaries/' - # here summaries_thumbnail: 'https://d9-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/s3fs-public/thumbnails/image/Manual%20measurement%20streamgage.jpg' summaries_thumbnail_title: "USGS Image" inventory_path: 'catalog/inventory' @@ -88,7 +87,8 @@ catalog_config: min_lon: -122.6692 site_type: 'MultiPoint' base_image_path: 'https://data.ecoforecast.org/usgsrc4cast-catalog' - citation_doi: "https://doi.org/10.1002/fee.2616" + citation_doi_link: "https://doi.org/10.1002/fee.2616" + citation_doi: "10.1002/fee.2616" citation_text: "Thomas, R.Q., C. Boettiger, C.C. Carey, M.C. Dietze, L.R. Johnson, M.A. Kenney, J.S. Mclachlan, J.A. Peters, E.R. Sokol, J.F. Weltzin, A. Willson, W.M. Woelmer, and Challenge Contributors. 2023. The NEON Ecological Forecasting Challenge. Frontiers in Ecology and Environment 21: 112-113." dashboard_url: "https://projects.ecoforecast.org/usgsrc4cast-ci/" dashboard_title: "EFI-USGS River Chlorophyll Forecast Challenge Dashboard"