diff --git a/catalog/forecasts/forecast_models.R b/catalog/forecasts/forecast_models.R index f1cf7dfa53..c667455b23 100644 --- a/catalog/forecasts/forecast_models.R +++ b/catalog/forecasts/forecast_models.R @@ -317,6 +317,17 @@ for (i in 1:length(config$variable_groups)){ ## organize variable groups model_keywords <- c(list('Forecasts',config$project_id, names(config$variable_groups)[i], m, var_name_full[j], var_name, duration_value, duration_name), as.list(model_sites$site_id)) + ## build radiantearth stac and raw json link + stac_link <- paste0('https://radiantearth.github.io/stac-browser/#/external/raw.githubusercontent.com/eco4cast/usgsrc4cast-ci/main/catalog/forecasts/', + names(config$variable_groups)[i],'/', + var_formal_name, '/models/', + m,'.json') + + json_link <- paste0('https://raw.githubusercontent.com/eco4cast/usgsrc4cast-ci/main/catalog/forecasts/', + names(config$variable_groups)[i],'/', + var_formal_name, '/models/', + m,'.json') + stac4cast::build_model(model_id = m, stac_id = stac_id, team_name = registered_model_id$`Long name of the model (can include spaces)`[idx], @@ -338,7 +349,9 @@ for (i in 1:length(config$variable_groups)){ ## organize variable groups table_description = forecast_description_create, full_var_df = model_vars, code_web_link = model_code_link, - model_keywords = model_keywords) + model_keywords = model_keywords, + stac_web_link = stac_link, + raw_json_link = json_link) } ## end model loop } ## end duration loop diff --git a/catalog/forecasts/models/collection.json b/catalog/forecasts/models/collection.json deleted file mode 100644 index a07e65b1c9..0000000000 --- a/catalog/forecasts/models/collection.json +++ /dev/null @@ -1,163 +0,0 @@ -{ - "id": "models", - "description": "Forecasts are the raw forecasts that includes all ensemble members or distribution parameters. Due to the size of the raw forecasts, we recommend accessing the scores (summaries of the forecasts) to analyze forecasts (unless you need the individual ensemble members). You can access the forecasts at the top level of the dataset where all models, variables, and dates that forecasts were produced (reference_datetime) are available. The code to access the entire dataset is provided as an asset. Given the size of the forecast catalog, it can be time-consuming to access the data at the full dataset level. For quicker access to the forecasts for a particular model (model_id), we also provide the code to access the data at the model_id level as an asset for each model.", - "stac_version": "1.0.0", - "license": "CC0-1.0", - "stac_extensions": [ - "https://stac-extensions.github.io/scientific/v1.0.0/schema.json", - "https://stac-extensions.github.io/item-assets/v1.0.0/schema.json", - "https://stac-extensions.github.io/table/v1.2.0/schema.json" - ], - "type": "Collection", - "sci:doi": "https://doi.org/10.1002/fee.2616", - "links": [ - { - "rel": "item", - "type": "application/json", - "href": "model_items/climatology.json" - }, - { - "rel": "item", - "type": "application/json", - "href": "model_items/persistenceRW.json" - }, - { - "rel": "item", - "type": "application/json", - "href": "model_items/USGSHABs1.json" - }, - { - "rel": "parent", - "type": "application/json", - "href": "../collection.json" - }, - { - "rel": "root", - "type": "application/json", - "href": "../collection.json" - }, - { - "rel": "self", - "type": "application/json", - "href": "collection.json" - }, - { - "rel": "cite-as", - "href": {} - }, - { - "rel": "about", - "href": "https://projects.ecoforecast.org/usgsrc4cast-docs/", - "type": "text/html", - "title": "EFI-USGS River Chlorophyll Forecasting Challenge Documentation" - }, - { - "rel": "describedby", - "href": "https://projects.ecoforecast.org/usgsrc4cast-docs/", - "title": "EFI-USGS River Chlorophyll Forecast Challenge Dashboard", - "type": "text/html" - } - ], - "title": "Models", - "extent": { - "spatial": { - "bbox": [ - [-122.6692, 39.6328, -74.7781, 45.5175] - ] - }, - "temporal": { - "interval": [ - [ - "2024-02-07T00:00:00Z", - "2024-06-03T00:00:00Z" - ] - ] - } - }, - "table:columns": [ - { - "name": "datetime", - "type": "timestamp[us, tz=UTC]", - "description": "datetime of the forecasted value (ISO 8601)" - }, - { - "name": "site_id", - "type": "string", - "description": "For forecasts that are not on a spatial grid, use of a site dimension that maps to a more detailed geometry (points, polygons, etc.) is allowable. In general this would be documented in the external metadata (e.g., alook-up table that provides lon and lat)" - }, - { - "name": "prediction", - "type": "double", - "description": "predicted value for variable" - }, - { - "name": "parameter", - "type": "string", - "description": "ensemble member or distribution parameter" - }, - { - "name": "family", - "type": "string", - "description": "For ensembles: “ensemble.” Default value if unspecified for probability distributions: Name of the statistical distribution associated with the reported statistics. The “sample” distribution is synonymous with “ensemble.”For summary statistics: “summary.”" - }, - { - "name": "reference_datetime", - "type": "timestamp[us, tz=UTC]", - "description": "datetime that the forecast was initiated (horizon = 0)" - }, - { - "name": "pub_datetime", - "type": "timestamp[us, tz=UTC]", - "description": "datetime that forecast was submitted" - }, - { - "name": "date", - "type": "date32[day]", - "description": {} - }, - { - "name": "project_id", - "type": "string", - "description": "unique identifier for the forecast project" - }, - { - "name": "duration", - "type": "string", - "description": "temporal duration of forecast (hourly, daily, etc.); follows ISO 8601 duration convention" - }, - { - "name": "variable", - "type": "string", - "description": "name of forecasted variable" - }, - { - "name": "model_id", - "type": "string", - "description": "unique model identifier" - }, - { - "name": "reference_date", - "type": "string", - "description": "date that the forecast was initiated" - } - ], - "assets": { - "data": { - "href": "s3://anonymous@bio230014-bucket01/challenges/forecasts/parquet/?endpoint_override=sdsc.osn.xsede.org", - "type": "application/x-parquet", - "title": "Database Access", - "roles": [ - "data" - ], - "description": "Use `arrow` for remote access to the database. This R code will return results for forecasts of the variable by the specific model .\n\n### R\n\n```{r}\n# Use code below\n\nall_results <- arrow::open_dataset(\"s3://anonymous@bio230014-bucket01/challenges/forecasts/parquet/?endpoint_override=sdsc.osn.xsede.org\")\ndf <- all_results |> dplyr::collect()\n\n```\n \n\nYou can use dplyr operations before calling `dplyr::collect()` to `summarise`, `select` columns, and/or `filter` rows prior to pulling the data into a local `data.frame`. Reducing the data that is pulled locally will speed up the data download speed and reduce your memory usage.\n\n\n" - }, - "thumbnail": { - "href": "pending", - "type": "image/JPEG", - "roles": [ - "thumbnail" - ], - "title": "pending" - } - } -} diff --git a/catalog/forecasts/models/model_items/.empty b/catalog/forecasts/models/model_items/.empty deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/catalog/forecasts/models/model_items/USGSHABs1.json b/catalog/forecasts/models/model_items/USGSHABs1.json deleted file mode 100644 index f26add9902..0000000000 --- a/catalog/forecasts/models/model_items/USGSHABs1.json +++ /dev/null @@ -1,178 +0,0 @@ -{ - "stac_version": "1.0.0", - "stac_extensions": [ - "https://stac-extensions.github.io/table/v1.2.0/schema.json" - ], - "type": "Feature", - "id": "USGSHABs1", - "bbox": [ - [ - -122.6692, - 45.5175, - -74.7781, - 45.5175 - ] - ], - "geometry": { - "type": "MultiPoint", - "coordinates": [ - [], - [], - [], - [], - [], - [], - [], - [] - ] - }, - "properties": { - "description": "\nmodel info: Uses the randomForest::randomForest() R package model to train site-specific models for predicting river chl-a. Uses ensemble Kalman filter to adjust predicted chl-a states.\n\nSites: USGS-14211720, USGS-14181500, USGS-05586300, USGS-05558300, USGS-05553700, USGS-05543010, USGS-05549500, USGS-01427510\n\nVariables: Daily Chlorophyll_a", - "start_datetime": "2024-02-13", - "end_datetime": "2024-06-01", - "providers": [ - { - "url": "pending", - "name": "pending", - "roles": [ - "producer", - "processor", - "licensor" - ] - }, - { - "url": "https://www.ecoforecastprojectvt.org", - "name": "Ecoforecast Challenge", - "roles": [ - "host" - ] - } - ], - "license": "CC0-1.0", - "keywords": [ - "Forecasting", - "usgsrc4cast", - "Daily Chlorophyll_a" - ], - "table:columns": [ - { - "name": "datetime", - "type": "timestamp[us, tz=UTC]", - "description": "datetime of the forecasted value (ISO 8601)" - }, - { - "name": "site_id", - "type": "string", - "description": "For forecasts that are not on a spatial grid, use of a site dimension that maps to a more detailed geometry (points, polygons, etc.) is allowable. In general this would be documented in the external metadata (e.g., alook-up table that provides lon and lat)" - }, - { - "name": "prediction", - "type": "double", - "description": "predicted value for variable" - }, - { - "name": "parameter", - "type": "string", - "description": "ensemble member or distribution parameter" - }, - { - "name": "family", - "type": "string", - "description": "For ensembles: “ensemble.” Default value if unspecified for probability distributions: Name of the statistical distribution associated with the reported statistics. The “sample” distribution is synonymous with “ensemble.”For summary statistics: “summary.”" - }, - { - "name": "reference_datetime", - "type": "timestamp[us, tz=UTC]", - "description": "datetime that the forecast was initiated (horizon = 0)" - }, - { - "name": "pub_datetime", - "type": "timestamp[us, tz=UTC]", - "description": "datetime that forecast was submitted" - }, - { - "name": "date", - "type": "date32[day]", - "description": {} - }, - { - "name": "project_id", - "type": "string", - "description": "unique identifier for the forecast project" - }, - { - "name": "duration", - "type": "string", - "description": "temporal duration of forecast (hourly, daily, etc.); follows ISO 8601 duration convention" - }, - { - "name": "variable", - "type": "string", - "description": "name of forecasted variable" - }, - { - "name": "model_id", - "type": "string", - "description": "unique model identifier" - }, - { - "name": "reference_date", - "type": "string", - "description": "date that the forecast was initiated" - } - ] - }, - "collection": "forecasts", - "links": [ - { - "rel": "collection", - "href": "../collection.json", - "type": "application/json", - "title": "USGSHABs1" - }, - { - "rel": "root", - "href": "../../../catalog.json", - "type": "application/json", - "title": "Forecast Catalog" - }, - { - "rel": "parent", - "href": "../collection.json", - "type": "application/json", - "title": "USGSHABs1" - }, - { - "rel": "self", - "href": "USGSHABs1.json", - "type": "application/json", - "title": "Model Forecast" - }, - { - "rel": "item", - "href": "pending", - "type": "text/html", - "title": "Link for Model Code" - } - ], - "assets": { - "1": { - "type": "application/json", - "title": "Model Metadata", - "href": "https://sdsc.osn.xsede.org/bio230014-bucket01/challenges/metadata/model_id/USGSHABs1.json", - "description": "Use `jsonlite::fromJSON()` to download the model metadata JSON file. This R code will return metadata provided during the model registration.\n \n\n### R\n\n```{r}\n# Use code below\n\nmodel_metadata <- jsonlite::fromJSON(\"https://sdsc.osn.xsede.org/bio230014-bucket01/challenges/metadata/model_id/USGSHABs1.json\")\n\n" - }, - "2": { - "type": "text/html", - "title": "Link for Model Code", - "href": "pending", - "description": "The link to the model code provided by the model submission team" - }, - "3": { - "type": "application/x-parquet", - "title": "Database Access for Daily Chlorophyll_a", - "href": "s3://anonymous@bio230014-bucket01/challenges/forecastsproject_id=/duration=P1D/variable=chla/model_id=USGSHABs1?endpoint_override=sdsc.osn.xsede.org", - "description": "Use `arrow` for remote access to the database. This R code will return results for this variable and model combination.\n\n### R\n\n```{r}\n# Use code below\n\nall_results <- arrow::open_dataset(\"s3://anonymous@bio230014-bucket01/challenges/forecastsproject_id=/duration=P1D/variable=chla/model_id=USGSHABs1?endpoint_override=sdsc.osn.xsede.org\")\ndf <- all_results |> dplyr::collect()\n\n```\n \n\nYou can use dplyr operations before calling `dplyr::collect()` to `summarise`, `select` columns, and/or `filter` rows prior to pulling the data into a local `data.frame`. Reducing the data that is pulled locally will speed up the data download speed and reduce your memory usage.\n\n\n" - } - } -} diff --git a/catalog/forecasts/models/model_items/climatology.json b/catalog/forecasts/models/model_items/climatology.json deleted file mode 100644 index eec1b534a7..0000000000 --- a/catalog/forecasts/models/model_items/climatology.json +++ /dev/null @@ -1,180 +0,0 @@ -{ - "stac_version": "1.0.0", - "stac_extensions": [ - "https://stac-extensions.github.io/table/v1.2.0/schema.json" - ], - "type": "Feature", - "id": "climatology", - "bbox": [ - [ - -122.6692, - 45.5175, - -74.7781, - 45.5175 - ] - ], - "geometry": { - "type": "MultiPoint", - "coordinates": [ - [], - [], - [], - [], - [], - [], - [], - [], - [], - [] - ] - }, - "properties": { - "description": "\nmodel info: Forecasts stream chlorophyll-a based on the historic average and standard deviation for that given site and day-of-year.\n\nSites: USGS-05549500, USGS-05553700, USGS-05558300, USGS-05586300, USGS-14181500, USGS-14211010, USGS-14211720, USGS-01427510, USGS-01463500, USGS-05543010\n\nVariables: Daily Chlorophyll_a", - "start_datetime": "2024-02-07", - "end_datetime": "2024-06-03", - "providers": [ - { - "url": "pending", - "name": "pending", - "roles": [ - "producer", - "processor", - "licensor" - ] - }, - { - "url": "https://www.ecoforecastprojectvt.org", - "name": "Ecoforecast Challenge", - "roles": [ - "host" - ] - } - ], - "license": "CC0-1.0", - "keywords": [ - "Forecasting", - "usgsrc4cast", - "Daily Chlorophyll_a" - ], - "table:columns": [ - { - "name": "datetime", - "type": "timestamp[us, tz=UTC]", - "description": "datetime of the forecasted value (ISO 8601)" - }, - { - "name": "site_id", - "type": "string", - "description": "For forecasts that are not on a spatial grid, use of a site dimension that maps to a more detailed geometry (points, polygons, etc.) is allowable. In general this would be documented in the external metadata (e.g., alook-up table that provides lon and lat)" - }, - { - "name": "prediction", - "type": "double", - "description": "predicted value for variable" - }, - { - "name": "parameter", - "type": "string", - "description": "ensemble member or distribution parameter" - }, - { - "name": "family", - "type": "string", - "description": "For ensembles: “ensemble.” Default value if unspecified for probability distributions: Name of the statistical distribution associated with the reported statistics. The “sample” distribution is synonymous with “ensemble.”For summary statistics: “summary.”" - }, - { - "name": "reference_datetime", - "type": "timestamp[us, tz=UTC]", - "description": "datetime that the forecast was initiated (horizon = 0)" - }, - { - "name": "pub_datetime", - "type": "timestamp[us, tz=UTC]", - "description": "datetime that forecast was submitted" - }, - { - "name": "date", - "type": "date32[day]", - "description": {} - }, - { - "name": "project_id", - "type": "string", - "description": "unique identifier for the forecast project" - }, - { - "name": "duration", - "type": "string", - "description": "temporal duration of forecast (hourly, daily, etc.); follows ISO 8601 duration convention" - }, - { - "name": "variable", - "type": "string", - "description": "name of forecasted variable" - }, - { - "name": "model_id", - "type": "string", - "description": "unique model identifier" - }, - { - "name": "reference_date", - "type": "string", - "description": "date that the forecast was initiated" - } - ] - }, - "collection": "forecasts", - "links": [ - { - "rel": "collection", - "href": "../collection.json", - "type": "application/json", - "title": "climatology" - }, - { - "rel": "root", - "href": "../../../catalog.json", - "type": "application/json", - "title": "Forecast Catalog" - }, - { - "rel": "parent", - "href": "../collection.json", - "type": "application/json", - "title": "climatology" - }, - { - "rel": "self", - "href": "climatology.json", - "type": "application/json", - "title": "Model Forecast" - }, - { - "rel": "item", - "href": "pending", - "type": "text/html", - "title": "Link for Model Code" - } - ], - "assets": { - "1": { - "type": "application/json", - "title": "Model Metadata", - "href": "https://sdsc.osn.xsede.org/bio230014-bucket01/challenges/metadata/model_id/climatology.json", - "description": "Use `jsonlite::fromJSON()` to download the model metadata JSON file. This R code will return metadata provided during the model registration.\n \n\n### R\n\n```{r}\n# Use code below\n\nmodel_metadata <- jsonlite::fromJSON(\"https://sdsc.osn.xsede.org/bio230014-bucket01/challenges/metadata/model_id/climatology.json\")\n\n" - }, - "2": { - "type": "text/html", - "title": "Link for Model Code", - "href": "pending", - "description": "The link to the model code provided by the model submission team" - }, - "3": { - "type": "application/x-parquet", - "title": "Database Access for Daily Chlorophyll_a", - "href": "s3://anonymous@bio230014-bucket01/challenges/forecastsproject_id=/duration=P1D/variable=chla/model_id=climatology?endpoint_override=sdsc.osn.xsede.org", - "description": "Use `arrow` for remote access to the database. This R code will return results for this variable and model combination.\n\n### R\n\n```{r}\n# Use code below\n\nall_results <- arrow::open_dataset(\"s3://anonymous@bio230014-bucket01/challenges/forecastsproject_id=/duration=P1D/variable=chla/model_id=climatology?endpoint_override=sdsc.osn.xsede.org\")\ndf <- all_results |> dplyr::collect()\n\n```\n \n\nYou can use dplyr operations before calling `dplyr::collect()` to `summarise`, `select` columns, and/or `filter` rows prior to pulling the data into a local `data.frame`. Reducing the data that is pulled locally will speed up the data download speed and reduce your memory usage.\n\n\n" - } - } -} diff --git a/catalog/forecasts/models/model_items/persistenceRW.json b/catalog/forecasts/models/model_items/persistenceRW.json deleted file mode 100644 index 5c8bd9c2b5..0000000000 --- a/catalog/forecasts/models/model_items/persistenceRW.json +++ /dev/null @@ -1,180 +0,0 @@ -{ - "stac_version": "1.0.0", - "stac_extensions": [ - "https://stac-extensions.github.io/table/v1.2.0/schema.json" - ], - "type": "Feature", - "id": "persistenceRW", - "bbox": [ - [ - -122.6692, - 45.5175, - -74.7781, - 45.5175 - ] - ], - "geometry": { - "type": "MultiPoint", - "coordinates": [ - [], - [], - [], - [], - [], - [], - [], - [], - [], - [] - ] - }, - "properties": { - "description": "\nmodel info: Random walk model based on most recent stream chl-a observations using the fable::RW() model.\n\nSites: USGS-01427510, USGS-01463500, USGS-05543010, USGS-05549500, USGS-05553700, USGS-05558300, USGS-05586300, USGS-14181500, USGS-14211010, USGS-14211720\n\nVariables: Daily Chlorophyll_a", - "start_datetime": "2024-02-07", - "end_datetime": "2024-06-02", - "providers": [ - { - "url": "pending", - "name": "pending", - "roles": [ - "producer", - "processor", - "licensor" - ] - }, - { - "url": "https://www.ecoforecastprojectvt.org", - "name": "Ecoforecast Challenge", - "roles": [ - "host" - ] - } - ], - "license": "CC0-1.0", - "keywords": [ - "Forecasting", - "usgsrc4cast", - "Daily Chlorophyll_a" - ], - "table:columns": [ - { - "name": "datetime", - "type": "timestamp[us, tz=UTC]", - "description": "datetime of the forecasted value (ISO 8601)" - }, - { - "name": "site_id", - "type": "string", - "description": "For forecasts that are not on a spatial grid, use of a site dimension that maps to a more detailed geometry (points, polygons, etc.) is allowable. In general this would be documented in the external metadata (e.g., alook-up table that provides lon and lat)" - }, - { - "name": "prediction", - "type": "double", - "description": "predicted value for variable" - }, - { - "name": "parameter", - "type": "string", - "description": "ensemble member or distribution parameter" - }, - { - "name": "family", - "type": "string", - "description": "For ensembles: “ensemble.” Default value if unspecified for probability distributions: Name of the statistical distribution associated with the reported statistics. The “sample” distribution is synonymous with “ensemble.”For summary statistics: “summary.”" - }, - { - "name": "reference_datetime", - "type": "timestamp[us, tz=UTC]", - "description": "datetime that the forecast was initiated (horizon = 0)" - }, - { - "name": "pub_datetime", - "type": "timestamp[us, tz=UTC]", - "description": "datetime that forecast was submitted" - }, - { - "name": "date", - "type": "date32[day]", - "description": {} - }, - { - "name": "project_id", - "type": "string", - "description": "unique identifier for the forecast project" - }, - { - "name": "duration", - "type": "string", - "description": "temporal duration of forecast (hourly, daily, etc.); follows ISO 8601 duration convention" - }, - { - "name": "variable", - "type": "string", - "description": "name of forecasted variable" - }, - { - "name": "model_id", - "type": "string", - "description": "unique model identifier" - }, - { - "name": "reference_date", - "type": "string", - "description": "date that the forecast was initiated" - } - ] - }, - "collection": "forecasts", - "links": [ - { - "rel": "collection", - "href": "../collection.json", - "type": "application/json", - "title": "persistenceRW" - }, - { - "rel": "root", - "href": "../../../catalog.json", - "type": "application/json", - "title": "Forecast Catalog" - }, - { - "rel": "parent", - "href": "../collection.json", - "type": "application/json", - "title": "persistenceRW" - }, - { - "rel": "self", - "href": "persistenceRW.json", - "type": "application/json", - "title": "Model Forecast" - }, - { - "rel": "item", - "href": "pending", - "type": "text/html", - "title": "Link for Model Code" - } - ], - "assets": { - "1": { - "type": "application/json", - "title": "Model Metadata", - "href": "https://sdsc.osn.xsede.org/bio230014-bucket01/challenges/metadata/model_id/persistenceRW.json", - "description": "Use `jsonlite::fromJSON()` to download the model metadata JSON file. This R code will return metadata provided during the model registration.\n \n\n### R\n\n```{r}\n# Use code below\n\nmodel_metadata <- jsonlite::fromJSON(\"https://sdsc.osn.xsede.org/bio230014-bucket01/challenges/metadata/model_id/persistenceRW.json\")\n\n" - }, - "2": { - "type": "text/html", - "title": "Link for Model Code", - "href": "pending", - "description": "The link to the model code provided by the model submission team" - }, - "3": { - "type": "application/x-parquet", - "title": "Database Access for Daily Chlorophyll_a", - "href": "s3://anonymous@bio230014-bucket01/challenges/forecastsproject_id=/duration=P1D/variable=chla/model_id=persistenceRW?endpoint_override=sdsc.osn.xsede.org", - "description": "Use `arrow` for remote access to the database. This R code will return results for this variable and model combination.\n\n### R\n\n```{r}\n# Use code below\n\nall_results <- arrow::open_dataset(\"s3://anonymous@bio230014-bucket01/challenges/forecastsproject_id=/duration=P1D/variable=chla/model_id=persistenceRW?endpoint_override=sdsc.osn.xsede.org\")\ndf <- all_results |> dplyr::collect()\n\n```\n \n\nYou can use dplyr operations before calling `dplyr::collect()` to `summarise`, `select` columns, and/or `filter` rows prior to pulling the data into a local `data.frame`. Reducing the data that is pulled locally will speed up the data download speed and reduce your memory usage.\n\n\n" - } - } -} diff --git a/catalog/scores/models/collection.json b/catalog/scores/models/collection.json deleted file mode 100644 index dbae3a6c94..0000000000 --- a/catalog/scores/models/collection.json +++ /dev/null @@ -1,198 +0,0 @@ -{ - "id": "models", - "description": "The catalog contains scores for the EFI-USGS River Chlorophyll Forecasting Challenge. The scores are summaries of the forecasts (i.e., mean, median, confidence intervals), matched observations (if available), and scores (metrics of how well the model distribution compares to observations). You can access the scores at the top level of the dataset where all models, variables, and dates that forecasts were produced (reference_datetime) are available. The code to access the entire dataset is provided as an asset. Given the size of the scores catalog, it can be time-consuming to access the data at the full dataset level. For quicker access to the scores for a particular model (model_id), we also provide the code to access the data at the model_id level as an asset for each model.", - "stac_version": "1.0.0", - "license": "CC0-1.0", - "stac_extensions": [ - "https://stac-extensions.github.io/scientific/v1.0.0/schema.json", - "https://stac-extensions.github.io/item-assets/v1.0.0/schema.json", - "https://stac-extensions.github.io/table/v1.2.0/schema.json" - ], - "type": "Collection", - "sci:doi": "https://doi.org/10.1002/fee.2616", - "links": [ - { - "rel": "item", - "type": "application/json", - "href": "model_items/USGSHABs1.json" - }, - { - "rel": "item", - "type": "application/json", - "href": "model_items/climatology.json" - }, - { - "rel": "item", - "type": "application/json", - "href": "model_items/persistenceRW.json" - }, - { - "rel": "parent", - "type": "application/json", - "href": "../collection.json" - }, - { - "rel": "root", - "type": "application/json", - "href": "../collection.json" - }, - { - "rel": "self", - "type": "application/json", - "href": "collection.json" - }, - { - "rel": "cite-as", - "href": {} - }, - { - "rel": "about", - "href": "https://projects.ecoforecast.org/usgsrc4cast-docs/", - "type": "text/html", - "title": "EFI-USGS River Chlorophyll Forecasting Challenge Documentation" - }, - { - "rel": "describedby", - "href": "https://projects.ecoforecast.org/usgsrc4cast-docs/", - "title": "EFI-USGS River Chlorophyll Forecast Challenge Dashboard", - "type": "text/html" - } - ], - "title": "Models", - "extent": { - "spatial": { - "bbox": [ - [-122.6692, 39.6328, -74.7781, 45.5175] - ] - }, - "temporal": { - "interval": [ - [ - "2024-02-07T00:00:00Z", - "2024-04-30T00:00:00Z" - ] - ] - } - }, - "table:columns": [ - { - "name": "reference_datetime", - "type": "timestamp[us, tz=UTC]", - "description": "datetime that the forecast was initiated (horizon = 0)" - }, - { - "name": "site_id", - "type": "string", - "description": "For forecasts that are not on a spatial grid, use of a site dimension that maps to a more detailed geometry (points, polygons, etc.) is allowable. In general this would be documented in the external metadata (e.g., alook-up table that provides lon and lat); however in netCDF this could be handled by the CF Discrete Sampling Geometry data model." - }, - { - "name": "datetime", - "type": "timestamp[us, tz=UTC]", - "description": "datetime of the forecasted value (ISO 8601)" - }, - { - "name": "family", - "type": "string", - "description": "For ensembles: “ensemble.” Default value if unspecified For probability distributions: Name of the statistical distribution associated with the reported statistics. The “sample” distribution is synonymous with “ensemble.” For summary statistics: “summary.”If this dimension does not vary, it is permissible to specify family as a variable attribute if the file format being used supports this (e.g.,netCDF)." - }, - { - "name": "pub_datetime", - "type": "timestamp[us, tz=UTC]", - "description": "datetime that forecast was submitted" - }, - { - "name": "observation", - "type": "double", - "description": "observed value for variable" - }, - { - "name": "crps", - "type": "double", - "description": "crps forecast score" - }, - { - "name": "logs", - "type": "double", - "description": "logs forecast score" - }, - { - "name": "mean", - "type": "double", - "description": "mean forecast prediction" - }, - { - "name": "median", - "type": "double", - "description": "median forecast prediction" - }, - { - "name": "sd", - "type": "double", - "description": "standard deviation forecasts" - }, - { - "name": "quantile97.5", - "type": "double", - "description": "upper 97.5 percentile value of forecast" - }, - { - "name": "quantile02.5", - "type": "double", - "description": "upper 2.5 percentile value of forecast" - }, - { - "name": "quantile90", - "type": "double", - "description": "upper 90 percentile value of forecast" - }, - { - "name": "quantile10", - "type": "double", - "description": "upper 10 percentile value of forecast" - }, - { - "name": "project_id", - "type": "string", - "description": "unique project identifier" - }, - { - "name": "duration", - "type": "string", - "description": "temporal duration of forecast (hourly = PT1H, daily = P1D, etc.); follows ISO 8601 duration convention" - }, - { - "name": "variable", - "type": "string", - "description": "name of forecasted variable" - }, - { - "name": "model_id", - "type": "string", - "description": "unique model identifier" - }, - { - "name": "date", - "type": "string", - "description": "ISO 8601 (ISO 2019) date of the predicted value; follows CF convention http://cfconventions.org/cf-conventions/cf-conventions.html#time-coordinate. This variable was called time before v0.5of the EFI convention. For time-integrated variables (e.g., cumulative net primary productivity), one should specify the start_datetime and end_datetime as two variables, instead of the single datetime. If this is not provided the datetime is assumed to be the MIDPOINT of the integration period." - } - ], - "assets": { - "data": { - "href": "s3://anonymous@bio230014-bucket01/challenges/scores/parquet/?endpoint_override=sdsc.osn.xsede.org", - "type": "application/x-parquet", - "title": "Database Access", - "roles": [ - "data" - ], - "description": "Use `arrow` for remote access to the database. This R code will return results for forecasts of the variable by the specific model .\n\n### R\n\n```{r}\n# Use code below\n\nall_results <- arrow::open_dataset(\"s3://anonymous@bio230014-bucket01/challenges/scores/parquet/?endpoint_override=sdsc.osn.xsede.org\")\ndf <- all_results |> dplyr::collect()\n\n```\n \n\nYou can use dplyr operations before calling `dplyr::collect()` to `summarise`, `select` columns, and/or `filter` rows prior to pulling the data into a local `data.frame`. Reducing the data that is pulled locally will speed up the data download speed and reduce your memory usage.\n\n\n" - }, - "thumbnail": { - "href": "pending", - "type": "image/JPEG", - "roles": [ - "thumbnail" - ], - "title": "pending" - } - } -} diff --git a/catalog/scores/models/model_items/.empty b/catalog/scores/models/model_items/.empty deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/catalog/scores/models/model_items/USGSHABs1.json b/catalog/scores/models/model_items/USGSHABs1.json deleted file mode 100644 index dbdfac5e89..0000000000 --- a/catalog/scores/models/model_items/USGSHABs1.json +++ /dev/null @@ -1,213 +0,0 @@ -{ - "stac_version": "1.0.0", - "stac_extensions": [ - "https://stac-extensions.github.io/table/v1.2.0/schema.json" - ], - "type": "Feature", - "id": "USGSHABs1", - "bbox": [ - [ - -122.6692, - 45.5175, - -74.7781, - 45.5175 - ] - ], - "geometry": { - "type": "MultiPoint", - "coordinates": [ - [], - [], - [], - [], - [], - [], - [], - [] - ] - }, - "properties": { - "description": "\nmodel info: Uses the randomForest::randomForest() R package model to train site-specific models for predicting river chl-a. Uses ensemble Kalman filter to adjust predicted chl-a states.\n\nSites: USGS-05543010, USGS-05549500, USGS-05553700, USGS-05558300, USGS-05586300, USGS-14181500, USGS-14211720, USGS-01427510\n\nVariables: Daily Chlorophyll_a", - "start_datetime": "2024-02-13", - "end_datetime": "2024-04-30", - "providers": [ - { - "url": "pending", - "name": "pending", - "roles": [ - "producer", - "processor", - "licensor" - ] - }, - { - "url": "https://www.ecoforecastprojectvt.org", - "name": "Ecoforecast Challenge", - "roles": [ - "host" - ] - } - ], - "license": "CC0-1.0", - "keywords": [ - "Forecasting", - "usgsrc4cast", - "Daily Chlorophyll_a" - ], - "table:columns": [ - { - "name": "reference_datetime", - "type": "timestamp[us, tz=UTC]", - "description": "datetime that the forecast was initiated (horizon = 0)" - }, - { - "name": "site_id", - "type": "string", - "description": "For forecasts that are not on a spatial grid, use of a site dimension that maps to a more detailed geometry (points, polygons, etc.) is allowable. In general this would be documented in the external metadata (e.g., alook-up table that provides lon and lat); however in netCDF this could be handled by the CF Discrete Sampling Geometry data model." - }, - { - "name": "datetime", - "type": "timestamp[us, tz=UTC]", - "description": "datetime of the forecasted value (ISO 8601)" - }, - { - "name": "family", - "type": "string", - "description": "For ensembles: “ensemble.” Default value if unspecified For probability distributions: Name of the statistical distribution associated with the reported statistics. The “sample” distribution is synonymous with “ensemble.” For summary statistics: “summary.”If this dimension does not vary, it is permissible to specify family as a variable attribute if the file format being used supports this (e.g.,netCDF)." - }, - { - "name": "pub_datetime", - "type": "timestamp[us, tz=UTC]", - "description": "datetime that forecast was submitted" - }, - { - "name": "observation", - "type": "double", - "description": "observed value for variable" - }, - { - "name": "crps", - "type": "double", - "description": "crps forecast score" - }, - { - "name": "logs", - "type": "double", - "description": "logs forecast score" - }, - { - "name": "mean", - "type": "double", - "description": "mean forecast prediction" - }, - { - "name": "median", - "type": "double", - "description": "median forecast prediction" - }, - { - "name": "sd", - "type": "double", - "description": "standard deviation forecasts" - }, - { - "name": "quantile97.5", - "type": "double", - "description": "upper 97.5 percentile value of forecast" - }, - { - "name": "quantile02.5", - "type": "double", - "description": "upper 2.5 percentile value of forecast" - }, - { - "name": "quantile90", - "type": "double", - "description": "upper 90 percentile value of forecast" - }, - { - "name": "quantile10", - "type": "double", - "description": "upper 10 percentile value of forecast" - }, - { - "name": "project_id", - "type": "string", - "description": "unique project identifier" - }, - { - "name": "duration", - "type": "string", - "description": "temporal duration of forecast (hourly = PT1H, daily = P1D, etc.); follows ISO 8601 duration convention" - }, - { - "name": "variable", - "type": "string", - "description": "name of forecasted variable" - }, - { - "name": "model_id", - "type": "string", - "description": "unique model identifier" - }, - { - "name": "date", - "type": "string", - "description": "ISO 8601 (ISO 2019) date of the predicted value; follows CF convention http://cfconventions.org/cf-conventions/cf-conventions.html#time-coordinate. This variable was called time before v0.5of the EFI convention. For time-integrated variables (e.g., cumulative net primary productivity), one should specify the start_datetime and end_datetime as two variables, instead of the single datetime. If this is not provided the datetime is assumed to be the MIDPOINT of the integration period." - } - ] - }, - "collection": "scores", - "links": [ - { - "rel": "collection", - "href": "../collection.json", - "type": "application/json", - "title": "USGSHABs1" - }, - { - "rel": "root", - "href": "../../../catalog.json", - "type": "application/json", - "title": "Forecast Catalog" - }, - { - "rel": "parent", - "href": "../collection.json", - "type": "application/json", - "title": "USGSHABs1" - }, - { - "rel": "self", - "href": "USGSHABs1.json", - "type": "application/json", - "title": "Model Forecast" - }, - { - "rel": "item", - "href": "pending", - "type": "text/html", - "title": "Link for Model Code" - } - ], - "assets": { - "1": { - "type": "application/json", - "title": "Model Metadata", - "href": "https://sdsc.osn.xsede.org/bio230014-bucket01/challenges/metadata/model_id/USGSHABs1.json", - "description": "Use `jsonlite::fromJSON()` to download the model metadata JSON file. This R code will return metadata provided during the model registration.\n \n\n### R\n\n```{r}\n# Use code below\n\nmodel_metadata <- jsonlite::fromJSON(\"https://sdsc.osn.xsede.org/bio230014-bucket01/challenges/metadata/model_id/USGSHABs1.json\")\n\n" - }, - "2": { - "type": "text/html", - "title": "Link for Model Code", - "href": "pending", - "description": "The link to the model code provided by the model submission team" - }, - "3": { - "type": "application/x-parquet", - "title": "Database Access for Daily Chlorophyll_a", - "href": "s3://anonymous@bio230014-bucket01/challenges/scoresproject_id=/duration=P1D/variable=chla/model_id=USGSHABs1?endpoint_override=sdsc.osn.xsede.org", - "description": "Use `arrow` for remote access to the database. This R code will return results for this variable and model combination.\n\n### R\n\n```{r}\n# Use code below\n\nall_results <- arrow::open_dataset(\"s3://anonymous@bio230014-bucket01/challenges/scoresproject_id=/duration=P1D/variable=chla/model_id=USGSHABs1?endpoint_override=sdsc.osn.xsede.org\")\ndf <- all_results |> dplyr::collect()\n\n```\n \n\nYou can use dplyr operations before calling `dplyr::collect()` to `summarise`, `select` columns, and/or `filter` rows prior to pulling the data into a local `data.frame`. Reducing the data that is pulled locally will speed up the data download speed and reduce your memory usage.\n\n\n" - } - } -} diff --git a/catalog/scores/models/model_items/climatology.json b/catalog/scores/models/model_items/climatology.json deleted file mode 100644 index 108b203707..0000000000 --- a/catalog/scores/models/model_items/climatology.json +++ /dev/null @@ -1,215 +0,0 @@ -{ - "stac_version": "1.0.0", - "stac_extensions": [ - "https://stac-extensions.github.io/table/v1.2.0/schema.json" - ], - "type": "Feature", - "id": "climatology", - "bbox": [ - [ - -122.6692, - 45.5175, - -74.7781, - 45.5175 - ] - ], - "geometry": { - "type": "MultiPoint", - "coordinates": [ - [], - [], - [], - [], - [], - [], - [], - [], - [], - [] - ] - }, - "properties": { - "description": "\nmodel info: Forecasts stream chlorophyll-a based on the historic average and standard deviation for that given site and day-of-year.\n\nSites: USGS-01427510, USGS-01463500, USGS-05543010, USGS-05553700, USGS-05558300, USGS-05586300, USGS-14181500, USGS-14211010, USGS-14211720, USGS-05549500\n\nVariables: Daily Chlorophyll_a", - "start_datetime": "2024-02-07", - "end_datetime": "2024-04-30", - "providers": [ - { - "url": "pending", - "name": "pending", - "roles": [ - "producer", - "processor", - "licensor" - ] - }, - { - "url": "https://www.ecoforecastprojectvt.org", - "name": "Ecoforecast Challenge", - "roles": [ - "host" - ] - } - ], - "license": "CC0-1.0", - "keywords": [ - "Forecasting", - "usgsrc4cast", - "Daily Chlorophyll_a" - ], - "table:columns": [ - { - "name": "reference_datetime", - "type": "timestamp[us, tz=UTC]", - "description": "datetime that the forecast was initiated (horizon = 0)" - }, - { - "name": "site_id", - "type": "string", - "description": "For forecasts that are not on a spatial grid, use of a site dimension that maps to a more detailed geometry (points, polygons, etc.) is allowable. In general this would be documented in the external metadata (e.g., alook-up table that provides lon and lat); however in netCDF this could be handled by the CF Discrete Sampling Geometry data model." - }, - { - "name": "datetime", - "type": "timestamp[us, tz=UTC]", - "description": "datetime of the forecasted value (ISO 8601)" - }, - { - "name": "family", - "type": "string", - "description": "For ensembles: “ensemble.” Default value if unspecified For probability distributions: Name of the statistical distribution associated with the reported statistics. The “sample” distribution is synonymous with “ensemble.” For summary statistics: “summary.”If this dimension does not vary, it is permissible to specify family as a variable attribute if the file format being used supports this (e.g.,netCDF)." - }, - { - "name": "pub_datetime", - "type": "timestamp[us, tz=UTC]", - "description": "datetime that forecast was submitted" - }, - { - "name": "observation", - "type": "double", - "description": "observed value for variable" - }, - { - "name": "crps", - "type": "double", - "description": "crps forecast score" - }, - { - "name": "logs", - "type": "double", - "description": "logs forecast score" - }, - { - "name": "mean", - "type": "double", - "description": "mean forecast prediction" - }, - { - "name": "median", - "type": "double", - "description": "median forecast prediction" - }, - { - "name": "sd", - "type": "double", - "description": "standard deviation forecasts" - }, - { - "name": "quantile97.5", - "type": "double", - "description": "upper 97.5 percentile value of forecast" - }, - { - "name": "quantile02.5", - "type": "double", - "description": "upper 2.5 percentile value of forecast" - }, - { - "name": "quantile90", - "type": "double", - "description": "upper 90 percentile value of forecast" - }, - { - "name": "quantile10", - "type": "double", - "description": "upper 10 percentile value of forecast" - }, - { - "name": "project_id", - "type": "string", - "description": "unique project identifier" - }, - { - "name": "duration", - "type": "string", - "description": "temporal duration of forecast (hourly = PT1H, daily = P1D, etc.); follows ISO 8601 duration convention" - }, - { - "name": "variable", - "type": "string", - "description": "name of forecasted variable" - }, - { - "name": "model_id", - "type": "string", - "description": "unique model identifier" - }, - { - "name": "date", - "type": "string", - "description": "ISO 8601 (ISO 2019) date of the predicted value; follows CF convention http://cfconventions.org/cf-conventions/cf-conventions.html#time-coordinate. This variable was called time before v0.5of the EFI convention. For time-integrated variables (e.g., cumulative net primary productivity), one should specify the start_datetime and end_datetime as two variables, instead of the single datetime. If this is not provided the datetime is assumed to be the MIDPOINT of the integration period." - } - ] - }, - "collection": "scores", - "links": [ - { - "rel": "collection", - "href": "../collection.json", - "type": "application/json", - "title": "climatology" - }, - { - "rel": "root", - "href": "../../../catalog.json", - "type": "application/json", - "title": "Forecast Catalog" - }, - { - "rel": "parent", - "href": "../collection.json", - "type": "application/json", - "title": "climatology" - }, - { - "rel": "self", - "href": "climatology.json", - "type": "application/json", - "title": "Model Forecast" - }, - { - "rel": "item", - "href": "pending", - "type": "text/html", - "title": "Link for Model Code" - } - ], - "assets": { - "1": { - "type": "application/json", - "title": "Model Metadata", - "href": "https://sdsc.osn.xsede.org/bio230014-bucket01/challenges/metadata/model_id/climatology.json", - "description": "Use `jsonlite::fromJSON()` to download the model metadata JSON file. This R code will return metadata provided during the model registration.\n \n\n### R\n\n```{r}\n# Use code below\n\nmodel_metadata <- jsonlite::fromJSON(\"https://sdsc.osn.xsede.org/bio230014-bucket01/challenges/metadata/model_id/climatology.json\")\n\n" - }, - "2": { - "type": "text/html", - "title": "Link for Model Code", - "href": "pending", - "description": "The link to the model code provided by the model submission team" - }, - "3": { - "type": "application/x-parquet", - "title": "Database Access for Daily Chlorophyll_a", - "href": "s3://anonymous@bio230014-bucket01/challenges/scoresproject_id=/duration=P1D/variable=chla/model_id=climatology?endpoint_override=sdsc.osn.xsede.org", - "description": "Use `arrow` for remote access to the database. This R code will return results for this variable and model combination.\n\n### R\n\n```{r}\n# Use code below\n\nall_results <- arrow::open_dataset(\"s3://anonymous@bio230014-bucket01/challenges/scoresproject_id=/duration=P1D/variable=chla/model_id=climatology?endpoint_override=sdsc.osn.xsede.org\")\ndf <- all_results |> dplyr::collect()\n\n```\n \n\nYou can use dplyr operations before calling `dplyr::collect()` to `summarise`, `select` columns, and/or `filter` rows prior to pulling the data into a local `data.frame`. Reducing the data that is pulled locally will speed up the data download speed and reduce your memory usage.\n\n\n" - } - } -} diff --git a/catalog/scores/models/model_items/persistenceRW.json b/catalog/scores/models/model_items/persistenceRW.json deleted file mode 100644 index 1bf4e63eab..0000000000 --- a/catalog/scores/models/model_items/persistenceRW.json +++ /dev/null @@ -1,215 +0,0 @@ -{ - "stac_version": "1.0.0", - "stac_extensions": [ - "https://stac-extensions.github.io/table/v1.2.0/schema.json" - ], - "type": "Feature", - "id": "persistenceRW", - "bbox": [ - [ - -122.6692, - 45.5175, - -74.7781, - 45.5175 - ] - ], - "geometry": { - "type": "MultiPoint", - "coordinates": [ - [], - [], - [], - [], - [], - [], - [], - [], - [], - [] - ] - }, - "properties": { - "description": "\nmodel info: Random walk model based on most recent stream chl-a observations using the fable::RW() model.\n\nSites: USGS-01427510, USGS-01463500, USGS-05543010, USGS-05549500, USGS-05553700, USGS-05558300, USGS-05586300, USGS-14181500, USGS-14211010, USGS-14211720\n\nVariables: Daily Chlorophyll_a", - "start_datetime": "2024-02-07", - "end_datetime": "2024-04-30", - "providers": [ - { - "url": "pending", - "name": "pending", - "roles": [ - "producer", - "processor", - "licensor" - ] - }, - { - "url": "https://www.ecoforecastprojectvt.org", - "name": "Ecoforecast Challenge", - "roles": [ - "host" - ] - } - ], - "license": "CC0-1.0", - "keywords": [ - "Forecasting", - "usgsrc4cast", - "Daily Chlorophyll_a" - ], - "table:columns": [ - { - "name": "reference_datetime", - "type": "timestamp[us, tz=UTC]", - "description": "datetime that the forecast was initiated (horizon = 0)" - }, - { - "name": "site_id", - "type": "string", - "description": "For forecasts that are not on a spatial grid, use of a site dimension that maps to a more detailed geometry (points, polygons, etc.) is allowable. In general this would be documented in the external metadata (e.g., alook-up table that provides lon and lat); however in netCDF this could be handled by the CF Discrete Sampling Geometry data model." - }, - { - "name": "datetime", - "type": "timestamp[us, tz=UTC]", - "description": "datetime of the forecasted value (ISO 8601)" - }, - { - "name": "family", - "type": "string", - "description": "For ensembles: “ensemble.” Default value if unspecified For probability distributions: Name of the statistical distribution associated with the reported statistics. The “sample” distribution is synonymous with “ensemble.” For summary statistics: “summary.”If this dimension does not vary, it is permissible to specify family as a variable attribute if the file format being used supports this (e.g.,netCDF)." - }, - { - "name": "pub_datetime", - "type": "timestamp[us, tz=UTC]", - "description": "datetime that forecast was submitted" - }, - { - "name": "observation", - "type": "double", - "description": "observed value for variable" - }, - { - "name": "crps", - "type": "double", - "description": "crps forecast score" - }, - { - "name": "logs", - "type": "double", - "description": "logs forecast score" - }, - { - "name": "mean", - "type": "double", - "description": "mean forecast prediction" - }, - { - "name": "median", - "type": "double", - "description": "median forecast prediction" - }, - { - "name": "sd", - "type": "double", - "description": "standard deviation forecasts" - }, - { - "name": "quantile97.5", - "type": "double", - "description": "upper 97.5 percentile value of forecast" - }, - { - "name": "quantile02.5", - "type": "double", - "description": "upper 2.5 percentile value of forecast" - }, - { - "name": "quantile90", - "type": "double", - "description": "upper 90 percentile value of forecast" - }, - { - "name": "quantile10", - "type": "double", - "description": "upper 10 percentile value of forecast" - }, - { - "name": "project_id", - "type": "string", - "description": "unique project identifier" - }, - { - "name": "duration", - "type": "string", - "description": "temporal duration of forecast (hourly = PT1H, daily = P1D, etc.); follows ISO 8601 duration convention" - }, - { - "name": "variable", - "type": "string", - "description": "name of forecasted variable" - }, - { - "name": "model_id", - "type": "string", - "description": "unique model identifier" - }, - { - "name": "date", - "type": "string", - "description": "ISO 8601 (ISO 2019) date of the predicted value; follows CF convention http://cfconventions.org/cf-conventions/cf-conventions.html#time-coordinate. This variable was called time before v0.5of the EFI convention. For time-integrated variables (e.g., cumulative net primary productivity), one should specify the start_datetime and end_datetime as two variables, instead of the single datetime. If this is not provided the datetime is assumed to be the MIDPOINT of the integration period." - } - ] - }, - "collection": "scores", - "links": [ - { - "rel": "collection", - "href": "../collection.json", - "type": "application/json", - "title": "persistenceRW" - }, - { - "rel": "root", - "href": "../../../catalog.json", - "type": "application/json", - "title": "Forecast Catalog" - }, - { - "rel": "parent", - "href": "../collection.json", - "type": "application/json", - "title": "persistenceRW" - }, - { - "rel": "self", - "href": "persistenceRW.json", - "type": "application/json", - "title": "Model Forecast" - }, - { - "rel": "item", - "href": "pending", - "type": "text/html", - "title": "Link for Model Code" - } - ], - "assets": { - "1": { - "type": "application/json", - "title": "Model Metadata", - "href": "https://sdsc.osn.xsede.org/bio230014-bucket01/challenges/metadata/model_id/persistenceRW.json", - "description": "Use `jsonlite::fromJSON()` to download the model metadata JSON file. This R code will return metadata provided during the model registration.\n \n\n### R\n\n```{r}\n# Use code below\n\nmodel_metadata <- jsonlite::fromJSON(\"https://sdsc.osn.xsede.org/bio230014-bucket01/challenges/metadata/model_id/persistenceRW.json\")\n\n" - }, - "2": { - "type": "text/html", - "title": "Link for Model Code", - "href": "pending", - "description": "The link to the model code provided by the model submission team" - }, - "3": { - "type": "application/x-parquet", - "title": "Database Access for Daily Chlorophyll_a", - "href": "s3://anonymous@bio230014-bucket01/challenges/scoresproject_id=/duration=P1D/variable=chla/model_id=persistenceRW?endpoint_override=sdsc.osn.xsede.org", - "description": "Use `arrow` for remote access to the database. This R code will return results for this variable and model combination.\n\n### R\n\n```{r}\n# Use code below\n\nall_results <- arrow::open_dataset(\"s3://anonymous@bio230014-bucket01/challenges/scoresproject_id=/duration=P1D/variable=chla/model_id=persistenceRW?endpoint_override=sdsc.osn.xsede.org\")\ndf <- all_results |> dplyr::collect()\n\n```\n \n\nYou can use dplyr operations before calling `dplyr::collect()` to `summarise`, `select` columns, and/or `filter` rows prior to pulling the data into a local `data.frame`. Reducing the data that is pulled locally will speed up the data download speed and reduce your memory usage.\n\n\n" - } - } -} diff --git a/catalog/scores/scores_models.R b/catalog/scores/scores_models.R index bbb5e657a9..908663dd26 100644 --- a/catalog/scores/scores_models.R +++ b/catalog/scores/scores_models.R @@ -311,6 +311,17 @@ for (i in 1:length(config$variable_groups)){ # LOOP OVER VARIABLE GROUPS -- BUIL model_keywords <- c(list('Scores',config$project_id, names(config$variable_groups)[i], m, var_name_full[j], var_name, duration_value, duration_name), as.list(model_sites$site_id)) + ## build radiantearth stac and raw json link + stac_link <- paste0('https://radiantearth.github.io/stac-browser/#/external/raw.githubusercontent.com/eco4cast/usgsrc4cast-ci/main/catalog/scores/', + names(config$variable_groups)[i],'/', + var_formal_name, '/models/', + m,'.json') + + json_link <- paste0('https://raw.githubusercontent.com/eco4cast/usgsrc4cast-ci/main/catalog/scores/', + names(config$variable_groups)[i],'/', + var_formal_name, '/models/', + m,'.json') + stac4cast::build_model(model_id = m, stac_id = stac_id, team_name = registered_model_id$`Long name of the model (can include spaces)`[idx], @@ -331,7 +342,9 @@ for (i in 1:length(config$variable_groups)){ # LOOP OVER VARIABLE GROUPS -- BUIL table_description = scores_description_create, full_var_df = model_vars, code_web_link = model_code_link, - model_keywords = model_keywords) + model_keywords = model_keywords, + stac_web_link = stac_link, + raw_json_link = json_link) } ## end model loop } ## end duration loop diff --git a/catalog/summaries/models/collection.json b/catalog/summaries/models/collection.json deleted file mode 100644 index a854671339..0000000000 --- a/catalog/summaries/models/collection.json +++ /dev/null @@ -1,183 +0,0 @@ -{ - "id": "models", - "description": "Summaries are the forecasts statistics of the raw forecasts (i.e., mean, median, confidence intervals). You can access the summaries at the top level of the dataset where all models, variables, and dates that forecasts were produced (reference_datetime) are available. The code to access the entire dataset is provided as an asset. Given the size of the forecast catalog, it can be time-consuming to access the data at the full dataset level. For quicker access to the forecasts for a particular model (model_id), we also provide the code to access the data at the model_id level as an asset for each model.", - "stac_version": "1.0.0", - "license": "CC0-1.0", - "stac_extensions": [ - "https://stac-extensions.github.io/scientific/v1.0.0/schema.json", - "https://stac-extensions.github.io/item-assets/v1.0.0/schema.json", - "https://stac-extensions.github.io/table/v1.2.0/schema.json" - ], - "type": "Collection", - "sci:doi": "https://doi.org/10.1002/fee.2616", - "links": [ - { - "rel": "item", - "type": "application/json", - "href": "model_items/climatology.json" - }, - { - "rel": "item", - "type": "application/json", - "href": "model_items/persistenceRW.json" - }, - { - "rel": "item", - "type": "application/json", - "href": "model_items/USGSHABs1.json" - }, - { - "rel": "parent", - "type": "application/json", - "href": "../collection.json" - }, - { - "rel": "root", - "type": "application/json", - "href": "../collection.json" - }, - { - "rel": "self", - "type": "application/json", - "href": "collection.json" - }, - { - "rel": "cite-as", - "href": {} - }, - { - "rel": "about", - "href": "https://projects.ecoforecast.org/usgsrc4cast-docs/", - "type": "text/html", - "title": "EFI-USGS River Chlorophyll Forecasting Challenge Documentation" - }, - { - "rel": "describedby", - "href": "https://projects.ecoforecast.org/usgsrc4cast-docs/", - "title": "EFI-USGS River Chlorophyll Forecast Challenge Dashboard", - "type": "text/html" - } - ], - "title": "Models", - "extent": { - "spatial": { - "bbox": [ - [-122.6692, 39.6328, -74.7781, 45.5175] - ] - }, - "temporal": { - "interval": [ - [ - "2024-02-07T00:00:00Z", - "2024-06-03T00:00:00Z" - ] - ] - } - }, - "table:columns": [ - { - "name": "reference_datetime", - "type": "timestamp[us, tz=UTC]", - "description": "datetime that the forecast was initiated (horizon = 0)" - }, - { - "name": "site_id", - "type": "string", - "description": "For forecasts that are not on a spatial grid, use of a site dimension that maps to a more detailed geometry (points, polygons, etc.) is allowable. In general this would be documented in the external metadata (e.g., alook-up table that provides lon and lat)" - }, - { - "name": "datetime", - "type": "timestamp[us, tz=UTC]", - "description": "datetime of the forecasted value (ISO 8601)" - }, - { - "name": "family", - "type": "string", - "description": "For ensembles: “ensemble.” Default value if unspecified for probability distributions: Name of the statistical distribution associated with the reported statistics. The “sample” distribution is synonymous with “ensemble.”For summary statistics: “summary.”" - }, - { - "name": "pub_datetime", - "type": "timestamp[us, tz=UTC]", - "description": "datetime that forecast was submitted" - }, - { - "name": "mean", - "type": "double", - "description": "mean forecast prediction" - }, - { - "name": "median", - "type": "double", - "description": "median forecast prediction" - }, - { - "name": "sd", - "type": "double", - "description": "standard deviation forecasts" - }, - { - "name": "quantile97.5", - "type": "double", - "description": "upper 97.5 percentile value of forecast" - }, - { - "name": "quantile02.5", - "type": "double", - "description": "upper 2.5 percentile value of forecast" - }, - { - "name": "quantile90", - "type": "double", - "description": "upper 90 percentile value of forecast" - }, - { - "name": "quantile10", - "type": "double", - "description": "upper 10 percentile value of forecast" - }, - { - "name": "project_id", - "type": "string", - "description": "unique identifier for the forecast project" - }, - { - "name": "duration", - "type": "string", - "description": "temporal duration of forecast (hourly, daily, etc.); follows ISO 8601 duration convention" - }, - { - "name": "variable", - "type": "string", - "description": "name of forecasted variable" - }, - { - "name": "model_id", - "type": "string", - "description": "unique model identifier" - }, - { - "name": "reference_date", - "type": "string", - "description": "date that the forecast was initiated" - } - ], - "assets": { - "data": { - "href": "s3://anonymous@bio230014-bucket01/vera4cast/forecasts/summaries/parquet/?endpoint_override=sdsc.osn.xsede.org", - "type": "application/x-parquet", - "title": "Database Access", - "roles": [ - "data" - ], - "description": "Use `arrow` for remote access to the database. This R code will return results for forecasts of the variable by the specific model .\n\n### R\n\n```{r}\n# Use code below\n\nall_results <- arrow::open_dataset(\"s3://anonymous@bio230014-bucket01/vera4cast/forecasts/summaries/parquet/?endpoint_override=sdsc.osn.xsede.org\")\ndf <- all_results |> dplyr::collect()\n\n```\n \n\nYou can use dplyr operations before calling `dplyr::collect()` to `summarise`, `select` columns, and/or `filter` rows prior to pulling the data into a local `data.frame`. Reducing the data that is pulled locally will speed up the data download speed and reduce your memory usage.\n\n\n" - }, - "thumbnail": { - "href": "pending", - "type": "image/JPEG", - "roles": [ - "thumbnail" - ], - "title": "pending" - } - } -} diff --git a/catalog/summaries/models/model_items/.empty b/catalog/summaries/models/model_items/.empty deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/catalog/summaries/models/model_items/USGSHABs1.json b/catalog/summaries/models/model_items/USGSHABs1.json deleted file mode 100644 index 3c862ced93..0000000000 --- a/catalog/summaries/models/model_items/USGSHABs1.json +++ /dev/null @@ -1,198 +0,0 @@ -{ - "stac_version": "1.0.0", - "stac_extensions": [ - "https://stac-extensions.github.io/table/v1.2.0/schema.json" - ], - "type": "Feature", - "id": "USGSHABs1", - "bbox": [ - [ - -122.6692, - 45.5175, - -74.7781, - 45.5175 - ] - ], - "geometry": { - "type": "MultiPoint", - "coordinates": [ - [], - [], - [], - [], - [], - [], - [], - [] - ] - }, - "properties": { - "description": "\nmodel info: Uses the randomForest::randomForest() R package model to train site-specific models for predicting river chl-a. Uses ensemble Kalman filter to adjust predicted chl-a states.\n\nSites: USGS-14211720, USGS-14181500, USGS-05586300, USGS-05558300, USGS-05553700, USGS-05543010, USGS-05549500, USGS-01427510\n\nVariables: Daily Chlorophyll_a", - "start_datetime": "2024-02-13", - "end_datetime": "2024-06-01", - "providers": [ - { - "url": "pending", - "name": "pending", - "roles": [ - "producer", - "processor", - "licensor" - ] - }, - { - "url": "https://www.ecoforecastprojectvt.org", - "name": "Ecoforecast Challenge", - "roles": [ - "host" - ] - } - ], - "license": "CC0-1.0", - "keywords": [ - "Forecasting", - "usgsrc4cast", - "Daily Chlorophyll_a" - ], - "table:columns": [ - { - "name": "reference_datetime", - "type": "timestamp[us, tz=UTC]", - "description": "datetime that the forecast was initiated (horizon = 0)" - }, - { - "name": "site_id", - "type": "string", - "description": "For forecasts that are not on a spatial grid, use of a site dimension that maps to a more detailed geometry (points, polygons, etc.) is allowable. In general this would be documented in the external metadata (e.g., alook-up table that provides lon and lat)" - }, - { - "name": "datetime", - "type": "timestamp[us, tz=UTC]", - "description": "datetime of the forecasted value (ISO 8601)" - }, - { - "name": "family", - "type": "string", - "description": "For ensembles: “ensemble.” Default value if unspecified for probability distributions: Name of the statistical distribution associated with the reported statistics. The “sample” distribution is synonymous with “ensemble.”For summary statistics: “summary.”" - }, - { - "name": "pub_datetime", - "type": "timestamp[us, tz=UTC]", - "description": "datetime that forecast was submitted" - }, - { - "name": "mean", - "type": "double", - "description": "mean forecast prediction" - }, - { - "name": "median", - "type": "double", - "description": "median forecast prediction" - }, - { - "name": "sd", - "type": "double", - "description": "standard deviation forecasts" - }, - { - "name": "quantile97.5", - "type": "double", - "description": "upper 97.5 percentile value of forecast" - }, - { - "name": "quantile02.5", - "type": "double", - "description": "upper 2.5 percentile value of forecast" - }, - { - "name": "quantile90", - "type": "double", - "description": "upper 90 percentile value of forecast" - }, - { - "name": "quantile10", - "type": "double", - "description": "upper 10 percentile value of forecast" - }, - { - "name": "project_id", - "type": "string", - "description": "unique identifier for the forecast project" - }, - { - "name": "duration", - "type": "string", - "description": "temporal duration of forecast (hourly, daily, etc.); follows ISO 8601 duration convention" - }, - { - "name": "variable", - "type": "string", - "description": "name of forecasted variable" - }, - { - "name": "model_id", - "type": "string", - "description": "unique model identifier" - }, - { - "name": "reference_date", - "type": "string", - "description": "date that the forecast was initiated" - } - ] - }, - "collection": "forecasts", - "links": [ - { - "rel": "collection", - "href": "../collection.json", - "type": "application/json", - "title": "USGSHABs1" - }, - { - "rel": "root", - "href": "../../../catalog.json", - "type": "application/json", - "title": "Forecast Catalog" - }, - { - "rel": "parent", - "href": "../collection.json", - "type": "application/json", - "title": "USGSHABs1" - }, - { - "rel": "self", - "href": "USGSHABs1.json", - "type": "application/json", - "title": "Model Forecast" - }, - { - "rel": "item", - "href": "https://code.usgs.gov/wma/proxies/habs/habs-forecast-chl-usgsrc4cast/-/blob/main/2_model/src/chla_models.R?ref_type=heads", - "type": "text/html", - "title": "Link for Model Code" - } - ], - "assets": { - "1": { - "type": "application/json", - "title": "Model Metadata", - "href": "https://sdsc.osn.xsede.org/bio230014-bucket01/challenges/metadata/model_id/USGSHABs1.json", - "description": "Use `jsonlite::fromJSON()` to download the model metadata JSON file. This R code will return metadata provided during the model registration.\n \n\n### R\n\n```{r}\n# Use code below\n\nmodel_metadata <- jsonlite::fromJSON(\"https://sdsc.osn.xsede.org/bio230014-bucket01/challenges/metadata/model_id/USGSHABs1.json\")\n\n" - }, - "2": { - "type": "text/html", - "title": "Link for Model Code", - "href": "https://code.usgs.gov/wma/proxies/habs/habs-forecast-chl-usgsrc4cast/-/blob/main/2_model/src/chla_models.R?ref_type=heads", - "description": "The link to the model code provided by the model submission team" - }, - "3": { - "type": "application/x-parquet", - "title": "Database Access for Daily Chlorophyll_a", - "href": "s3://anonymous@bio230014-bucket01/challenges/forecasts/summariesproject_id=/duration=P1D/variable=chla/model_id=USGSHABs1?endpoint_override=sdsc.osn.xsede.org", - "description": "Use `arrow` for remote access to the database. This R code will return results for this variable and model combination.\n\n### R\n\n```{r}\n# Use code below\n\nall_results <- arrow::open_dataset(\"s3://anonymous@bio230014-bucket01/challenges/forecasts/summariesproject_id=/duration=P1D/variable=chla/model_id=USGSHABs1?endpoint_override=sdsc.osn.xsede.org\")\ndf <- all_results |> dplyr::collect()\n\n```\n \n\nYou can use dplyr operations before calling `dplyr::collect()` to `summarise`, `select` columns, and/or `filter` rows prior to pulling the data into a local `data.frame`. Reducing the data that is pulled locally will speed up the data download speed and reduce your memory usage.\n\n\n" - } - } -} diff --git a/catalog/summaries/models/model_items/climatology.json b/catalog/summaries/models/model_items/climatology.json deleted file mode 100644 index b75800ec0b..0000000000 --- a/catalog/summaries/models/model_items/climatology.json +++ /dev/null @@ -1,200 +0,0 @@ -{ - "stac_version": "1.0.0", - "stac_extensions": [ - "https://stac-extensions.github.io/table/v1.2.0/schema.json" - ], - "type": "Feature", - "id": "climatology", - "bbox": [ - [ - -122.6692, - 45.5175, - -74.7781, - 45.5175 - ] - ], - "geometry": { - "type": "MultiPoint", - "coordinates": [ - [], - [], - [], - [], - [], - [], - [], - [], - [], - [] - ] - }, - "properties": { - "description": "\nmodel info: Forecasts stream chlorophyll-a based on the historic average and standard deviation for that given site and day-of-year.\n\nSites: USGS-01427510, USGS-01463500, USGS-05543010, USGS-05553700, USGS-05558300, USGS-05586300, USGS-14181500, USGS-14211010, USGS-14211720, USGS-05549500\n\nVariables: Daily Chlorophyll_a", - "start_datetime": "2024-02-07", - "end_datetime": "2024-06-03", - "providers": [ - { - "url": "pending", - "name": "pending", - "roles": [ - "producer", - "processor", - "licensor" - ] - }, - { - "url": "https://www.ecoforecastprojectvt.org", - "name": "Ecoforecast Challenge", - "roles": [ - "host" - ] - } - ], - "license": "CC0-1.0", - "keywords": [ - "Forecasting", - "usgsrc4cast", - "Daily Chlorophyll_a" - ], - "table:columns": [ - { - "name": "reference_datetime", - "type": "timestamp[us, tz=UTC]", - "description": "datetime that the forecast was initiated (horizon = 0)" - }, - { - "name": "site_id", - "type": "string", - "description": "For forecasts that are not on a spatial grid, use of a site dimension that maps to a more detailed geometry (points, polygons, etc.) is allowable. In general this would be documented in the external metadata (e.g., alook-up table that provides lon and lat)" - }, - { - "name": "datetime", - "type": "timestamp[us, tz=UTC]", - "description": "datetime of the forecasted value (ISO 8601)" - }, - { - "name": "family", - "type": "string", - "description": "For ensembles: “ensemble.” Default value if unspecified for probability distributions: Name of the statistical distribution associated with the reported statistics. The “sample” distribution is synonymous with “ensemble.”For summary statistics: “summary.”" - }, - { - "name": "pub_datetime", - "type": "timestamp[us, tz=UTC]", - "description": "datetime that forecast was submitted" - }, - { - "name": "mean", - "type": "double", - "description": "mean forecast prediction" - }, - { - "name": "median", - "type": "double", - "description": "median forecast prediction" - }, - { - "name": "sd", - "type": "double", - "description": "standard deviation forecasts" - }, - { - "name": "quantile97.5", - "type": "double", - "description": "upper 97.5 percentile value of forecast" - }, - { - "name": "quantile02.5", - "type": "double", - "description": "upper 2.5 percentile value of forecast" - }, - { - "name": "quantile90", - "type": "double", - "description": "upper 90 percentile value of forecast" - }, - { - "name": "quantile10", - "type": "double", - "description": "upper 10 percentile value of forecast" - }, - { - "name": "project_id", - "type": "string", - "description": "unique identifier for the forecast project" - }, - { - "name": "duration", - "type": "string", - "description": "temporal duration of forecast (hourly, daily, etc.); follows ISO 8601 duration convention" - }, - { - "name": "variable", - "type": "string", - "description": "name of forecasted variable" - }, - { - "name": "model_id", - "type": "string", - "description": "unique model identifier" - }, - { - "name": "reference_date", - "type": "string", - "description": "date that the forecast was initiated" - } - ] - }, - "collection": "forecasts", - "links": [ - { - "rel": "collection", - "href": "../collection.json", - "type": "application/json", - "title": "climatology" - }, - { - "rel": "root", - "href": "../../../catalog.json", - "type": "application/json", - "title": "Forecast Catalog" - }, - { - "rel": "parent", - "href": "../collection.json", - "type": "application/json", - "title": "climatology" - }, - { - "rel": "self", - "href": "climatology.json", - "type": "application/json", - "title": "Model Forecast" - }, - { - "rel": "item", - "href": "https://github.com/eco4cast/usgsrc4cast-ci/blob/main/baseline_models/models/aquatics_climatology.R", - "type": "text/html", - "title": "Link for Model Code" - } - ], - "assets": { - "1": { - "type": "application/json", - "title": "Model Metadata", - "href": "https://sdsc.osn.xsede.org/bio230014-bucket01/challenges/metadata/model_id/climatology.json", - "description": "Use `jsonlite::fromJSON()` to download the model metadata JSON file. This R code will return metadata provided during the model registration.\n \n\n### R\n\n```{r}\n# Use code below\n\nmodel_metadata <- jsonlite::fromJSON(\"https://sdsc.osn.xsede.org/bio230014-bucket01/challenges/metadata/model_id/climatology.json\")\n\n" - }, - "2": { - "type": "text/html", - "title": "Link for Model Code", - "href": "https://github.com/eco4cast/usgsrc4cast-ci/blob/main/baseline_models/models/aquatics_climatology.R", - "description": "The link to the model code provided by the model submission team" - }, - "3": { - "type": "application/x-parquet", - "title": "Database Access for Daily Chlorophyll_a", - "href": "s3://anonymous@bio230014-bucket01/challenges/forecasts/summariesproject_id=/duration=P1D/variable=chla/model_id=climatology?endpoint_override=sdsc.osn.xsede.org", - "description": "Use `arrow` for remote access to the database. This R code will return results for this variable and model combination.\n\n### R\n\n```{r}\n# Use code below\n\nall_results <- arrow::open_dataset(\"s3://anonymous@bio230014-bucket01/challenges/forecasts/summariesproject_id=/duration=P1D/variable=chla/model_id=climatology?endpoint_override=sdsc.osn.xsede.org\")\ndf <- all_results |> dplyr::collect()\n\n```\n \n\nYou can use dplyr operations before calling `dplyr::collect()` to `summarise`, `select` columns, and/or `filter` rows prior to pulling the data into a local `data.frame`. Reducing the data that is pulled locally will speed up the data download speed and reduce your memory usage.\n\n\n" - } - } -} diff --git a/catalog/summaries/models/model_items/persistenceRW.json b/catalog/summaries/models/model_items/persistenceRW.json deleted file mode 100644 index c78fb446e6..0000000000 --- a/catalog/summaries/models/model_items/persistenceRW.json +++ /dev/null @@ -1,200 +0,0 @@ -{ - "stac_version": "1.0.0", - "stac_extensions": [ - "https://stac-extensions.github.io/table/v1.2.0/schema.json" - ], - "type": "Feature", - "id": "persistenceRW", - "bbox": [ - [ - -122.6692, - 45.5175, - -74.7781, - 45.5175 - ] - ], - "geometry": { - "type": "MultiPoint", - "coordinates": [ - [], - [], - [], - [], - [], - [], - [], - [], - [], - [] - ] - }, - "properties": { - "description": "\nmodel info: Random walk model based on most recent stream chl-a observations using the fable::RW() model.\n\nSites: USGS-01427510, USGS-01463500, USGS-05543010, USGS-05549500, USGS-05553700, USGS-05558300, USGS-05586300, USGS-14181500, USGS-14211010, USGS-14211720\n\nVariables: Daily Chlorophyll_a", - "start_datetime": "2024-02-07", - "end_datetime": "2024-06-02", - "providers": [ - { - "url": "pending", - "name": "pending", - "roles": [ - "producer", - "processor", - "licensor" - ] - }, - { - "url": "https://www.ecoforecastprojectvt.org", - "name": "Ecoforecast Challenge", - "roles": [ - "host" - ] - } - ], - "license": "CC0-1.0", - "keywords": [ - "Forecasting", - "usgsrc4cast", - "Daily Chlorophyll_a" - ], - "table:columns": [ - { - "name": "reference_datetime", - "type": "timestamp[us, tz=UTC]", - "description": "datetime that the forecast was initiated (horizon = 0)" - }, - { - "name": "site_id", - "type": "string", - "description": "For forecasts that are not on a spatial grid, use of a site dimension that maps to a more detailed geometry (points, polygons, etc.) is allowable. In general this would be documented in the external metadata (e.g., alook-up table that provides lon and lat)" - }, - { - "name": "datetime", - "type": "timestamp[us, tz=UTC]", - "description": "datetime of the forecasted value (ISO 8601)" - }, - { - "name": "family", - "type": "string", - "description": "For ensembles: “ensemble.” Default value if unspecified for probability distributions: Name of the statistical distribution associated with the reported statistics. The “sample” distribution is synonymous with “ensemble.”For summary statistics: “summary.”" - }, - { - "name": "pub_datetime", - "type": "timestamp[us, tz=UTC]", - "description": "datetime that forecast was submitted" - }, - { - "name": "mean", - "type": "double", - "description": "mean forecast prediction" - }, - { - "name": "median", - "type": "double", - "description": "median forecast prediction" - }, - { - "name": "sd", - "type": "double", - "description": "standard deviation forecasts" - }, - { - "name": "quantile97.5", - "type": "double", - "description": "upper 97.5 percentile value of forecast" - }, - { - "name": "quantile02.5", - "type": "double", - "description": "upper 2.5 percentile value of forecast" - }, - { - "name": "quantile90", - "type": "double", - "description": "upper 90 percentile value of forecast" - }, - { - "name": "quantile10", - "type": "double", - "description": "upper 10 percentile value of forecast" - }, - { - "name": "project_id", - "type": "string", - "description": "unique identifier for the forecast project" - }, - { - "name": "duration", - "type": "string", - "description": "temporal duration of forecast (hourly, daily, etc.); follows ISO 8601 duration convention" - }, - { - "name": "variable", - "type": "string", - "description": "name of forecasted variable" - }, - { - "name": "model_id", - "type": "string", - "description": "unique model identifier" - }, - { - "name": "reference_date", - "type": "string", - "description": "date that the forecast was initiated" - } - ] - }, - "collection": "forecasts", - "links": [ - { - "rel": "collection", - "href": "../collection.json", - "type": "application/json", - "title": "persistenceRW" - }, - { - "rel": "root", - "href": "../../../catalog.json", - "type": "application/json", - "title": "Forecast Catalog" - }, - { - "rel": "parent", - "href": "../collection.json", - "type": "application/json", - "title": "persistenceRW" - }, - { - "rel": "self", - "href": "persistenceRW.json", - "type": "application/json", - "title": "Model Forecast" - }, - { - "rel": "item", - "href": "https://github.com/eco4cast/usgsrc4cast-ci/blob/main/baseline_models/models/aquatics_persistenceRW.R", - "type": "text/html", - "title": "Link for Model Code" - } - ], - "assets": { - "1": { - "type": "application/json", - "title": "Model Metadata", - "href": "https://sdsc.osn.xsede.org/bio230014-bucket01/challenges/metadata/model_id/persistenceRW.json", - "description": "Use `jsonlite::fromJSON()` to download the model metadata JSON file. This R code will return metadata provided during the model registration.\n \n\n### R\n\n```{r}\n# Use code below\n\nmodel_metadata <- jsonlite::fromJSON(\"https://sdsc.osn.xsede.org/bio230014-bucket01/challenges/metadata/model_id/persistenceRW.json\")\n\n" - }, - "2": { - "type": "text/html", - "title": "Link for Model Code", - "href": "https://github.com/eco4cast/usgsrc4cast-ci/blob/main/baseline_models/models/aquatics_persistenceRW.R", - "description": "The link to the model code provided by the model submission team" - }, - "3": { - "type": "application/x-parquet", - "title": "Database Access for Daily Chlorophyll_a", - "href": "s3://anonymous@bio230014-bucket01/challenges/forecasts/summariesproject_id=/duration=P1D/variable=chla/model_id=persistenceRW?endpoint_override=sdsc.osn.xsede.org", - "description": "Use `arrow` for remote access to the database. This R code will return results for this variable and model combination.\n\n### R\n\n```{r}\n# Use code below\n\nall_results <- arrow::open_dataset(\"s3://anonymous@bio230014-bucket01/challenges/forecasts/summariesproject_id=/duration=P1D/variable=chla/model_id=persistenceRW?endpoint_override=sdsc.osn.xsede.org\")\ndf <- all_results |> dplyr::collect()\n\n```\n \n\nYou can use dplyr operations before calling `dplyr::collect()` to `summarise`, `select` columns, and/or `filter` rows prior to pulling the data into a local `data.frame`. Reducing the data that is pulled locally will speed up the data download speed and reduce your memory usage.\n\n\n" - } - } -} diff --git a/catalog/summaries/summaries_models.R b/catalog/summaries/summaries_models.R index 7d6d20185e..1e7ca09b68 100644 --- a/catalog/summaries/summaries_models.R +++ b/catalog/summaries/summaries_models.R @@ -305,6 +305,17 @@ for (i in 1:length(config$variable_groups)){ # LOOP OVER VARIABLE GROUPS -- BUIL model_keywords <- c(list('Summaries',config$project_id, names(config$variable_groups)[i], m, var_name_full[j], var_name, duration_value, duration_name), as.list(model_sites$site_id)) + ## build radiantearth stac and raw json link + stac_link <- paste0('https://radiantearth.github.io/stac-browser/#/external/raw.githubusercontent.com/eco4cast/usgsrc4cast-ci/main/catalog/summaries/', + names(config$variable_groups)[i],'/', + var_formal_name, '/models/', + m,'.json') + + json_link <- paste0('https://raw.githubusercontent.com/eco4cast/usgsrc4cast-ci/main/catalog/summaries/', + names(config$variable_groups)[i],'/', + var_formal_name, '/models/', + m,'.json') + stac4cast::build_model(model_id = m, stac_id = stac_id, team_name = registered_model_id$`Long name of the model (can include spaces)`[idx], @@ -325,7 +336,9 @@ for (i in 1:length(config$variable_groups)){ # LOOP OVER VARIABLE GROUPS -- BUIL table_description = summaries_description_create, full_var_df = model_vars, code_web_link = model_code_link, - model_keywords = model_keywords) + model_keywords = model_keywords, + stac_web_link = stac_link, + raw_json_link = json_link) } ## end model loop