diff --git a/catalog/R/catalog-common.R b/catalog/R/catalog-common.R
new file mode 100644
index 0000000000..8ec479c84d
--- /dev/null
+++ b/catalog/R/catalog-common.R
@@ -0,0 +1,4 @@
+conformsTo = list(
+  "https=//api.stacspec.org/v1.0.0-rc.1/collections",
+  "https=//api.stacspec.org/v1.0.0-rc.1/core"
+)
diff --git a/catalog/R/stac_functions.R b/catalog/R/stac_functions.R
new file mode 100644
index 0000000000..cc8f1ba88e
--- /dev/null
+++ b/catalog/R/stac_functions.R
@@ -0,0 +1,897 @@
+## MODEL level functions
+
+generate_authors <- function(metadata_table, index){
+
+  x <- list(list('url' = 'pending',
+                 'name' = 'pending',
+                 'roles' = list("producer",
+                                "processor",
+                                "licensor"))
+  )
+}
+
+generate_model_assets <- function(m_vars, m_duration, aws_path){
+
+  metadata_json_asset <- list(
+    "1"= list(
+      'type'= 'application/json',
+      'title' = 'Model Metadata',
+      'href' = paste0("https://", config$endpoint,"/", config$model_metadata_bucket,"/",m,".json"),
+      'description' = paste0("Use `jsonlite::fromJSON()` to download the model metadata JSON file. This R code will return metadata provided during the model registration.
+      \n\n### R\n\n```{r}\n# Use code below\n\nmodel_metadata <- jsonlite::fromJSON(",paste0('"','https://', config$endpoint,'/', config$model_metadata_bucket,'/',m,'.json"'),")\n\n")
+    )
+  )
+
+  iterator_list <- 1:length(m_vars)
+
+  model_data_assets <- purrr::map(iterator_list, function(i)
+    list(
+      'type'= 'application/x-parquet',
+      'title' = paste0('Database Access for ',m_vars[i],' ', m_duration[i]),
+      'href' = paste0("s3://anonymous@",
+                      aws_path,
+                      "/parquet/duration=P1D/variable=", m_vars[i],
+                      "/model_id=", m,
+                      "?endpoint_override=",config$endpoint),
+      'description' = paste0("Use `arrow` for remote access to the database. This R code will return results for this variable and model combination.\n\n### R\n\n```{r}\n# Use code below\n\nall_results <- arrow::open_dataset(",paste0("s3://anonymous@",
+                                                                                                                                                                                                                                          aws_path,
+                                                                                                                                                                                                                                          "/parquet/duration=P1D/variable=", m_vars[i],
+                                                                                                                                                                                                                                          "/model_id=", m,
+                                                                                                                                                                                                                                          "?endpoint_override=",config$endpoint),")\ndf <- all_results |> dplyr::collect()\n\n```
+       \n\nYou can use dplyr operations before calling `dplyr::collect()` to `summarise`, `select` columns, and/or `filter` rows prior to pulling the data into a local `data.frame`. Reducing the data that is pulled locally will speed up the data download speed and reduce your memory usage.\n\n\n")
+    )
+  )
+
+  model_assets <- c(metadata_json_asset, model_data_assets)
+
+  return(model_assets)
+}
+
+
+build_model <- function(model_id,
+                        theme_id,
+                        team_name,
+                        model_description,
+                        start_date,
+                        end_date,
+                        use_metadata,
+                        var_values,
+                        duration_names,
+                        site_values,
+                        model_documentation,
+                        destination_path,
+                        description_path,
+                        aws_download_path,
+                        theme_title,
+                        collection_name,
+                        thumbnail_image_name,
+                        table_schema,
+                        table_description) {
+
+
+  preset_keywords <- list("Forecasting", config$project_id)
+  variables_reformat <- paste(var_values, collapse = ", ")
+  site_reformat <- paste(site_values, collapse = ", ")
+
+  aws_asset_link <- paste0("s3://anonymous@",
+                           aws_download_path,
+                           "/model_id=", model_id,
+                           "?endpoint_override=",config$endpoint)
+
+  aws_asset_description <- paste0("Use `arrow` for remote access to the database. This R code will return results for forecasts of the variable by the specific model .\n\n### R\n\n```{r}\n# Use code below\n\nall_results <- arrow::open_dataset(",aws_asset_link,")\ndf <- all_results |> dplyr::collect()\n\n```
+       \n\nYou can use dplyr operations before calling `dplyr::collect()` to `summarise`, `select` columns, and/or `filter` rows prior to pulling the data into a local `data.frame`. Reducing the data that is pulled locally will speed up the data download speed and reduce your memory usage.\n\n\n")
+
+  meta <- list(
+    "stac_version"= "1.0.0",
+    "stac_extensions"= list('https://stac-extensions.github.io/table/v1.2.0/schema.json'),
+    "type"= "Feature",
+    "id"= model_id,
+    "bbox"=
+      list(list(as.numeric(catalog_config$bbox$min_lon),
+                as.numeric(catalog_config$bbox$max_lat),
+                as.numeric(catalog_config$bbox$max_lon),
+                as.numeric(catalog_config$bbox$max_lat))),
+    "geometry"= list(
+      "type"= catalog_config$site_type,
+      "coordinates"=  get_site_coords(sites = site_values)
+    ),
+    "properties"= list(
+      #'description' = model_description,
+      "description" = glue::glue('
+
+    model info: {model_description}
+
+    Sites: {site_reformat}
+
+    Variables: {variables_reformat}
+'),
+"start_datetime" = start_date,
+"end_datetime" = end_date,
+"providers"= c(generate_authors(metadata_table = model_documentation),list(
+  list(
+    "url"= catalog_config$host_url,
+    "name"= catalog_config$host_name,
+    "roles"= list(
+      "host"
+    )
+  )
+)
+),
+"license"= "CC0-1.0",
+"keywords"= c(preset_keywords, variables_reformat),
+#"table:columns" = stac4cast::build_table_columns_full_bucket(table_schema, table_description)
+"table:columns" = build_table_columns_full_bucket(table_schema, table_description)
+    ),
+"collection"= collection_name,
+"links"= list(
+  list(
+    "rel"= "collection",
+    'href' = '../collection.json',
+    "type"= "application/json",
+    "title"= theme_title
+  ),
+  list(
+    "rel"= "root",
+    'href' = '../../../catalog.json',
+    "type"= "application/json",
+    "title"= "Forecast Catalog"
+  ),
+  list(
+    "rel"= "parent",
+    'href' = '../collection.json',
+    "type"= "application/json",
+    "title"= theme_title
+  ),
+  list(
+    "rel"= "self",
+    "href" = paste0(model_id,'.json'),
+    "type"= "application/json",
+    "title"= "Model Forecast"
+  )),
+"assets"= generate_model_assets(var_values, duration_names, aws_download_path)#,
+#pull_images(theme_id,model_id,thumbnail_image_name)
+  )
+
+
+  dest <- destination_path
+  json <- file.path(dest, paste0(model_id, ".json"))
+
+  jsonlite::write_json(meta,
+                       json,
+                       pretty=TRUE,
+                       auto_unbox=TRUE)
+  stac4cast::stac_validate(json)
+
+  rm(meta)
+}
+
+get_grouping <- function(inv_bucket,
+                         theme,
+                         collapse=TRUE) {
+
+  groups <- duckdbfs::open_dataset(glue::glue("s3://anonymous@{inv_bucket}/catalog?endpoint_override=",config$endpoint)) |>
+    dplyr::filter(...1 == "parquet", ...2 == {theme}) |>
+    dplyr::select(model_id = ...3, reference_datetime = ...4, date = ...5) |>
+    dplyr::mutate(model_id = gsub("model_id=", "", model_id),
+                  reference_datetime =
+                    gsub("reference_datetime=", "", reference_datetime),
+                  date = gsub("date=", "", date)) |>
+    dplyr::collect()
+
+}
+
+# DONT USE THIS FUNCTION ANYMORE -- WAS USED FOR ORIGINAL NEON4CAST STAC CODE (KEEPING THIS FOR REFERENCE BUT DELETE EVENTUALLY)
+# generate_vars_sites <- function(m_id, theme){
+#
+#   # if (m_id %in%  c('GLEON_JRabaey_temp_physics','GLEON_lm_lag_1day','GLEON_physics','USGSHABs1','air2waterSat_2','fARIMA')){
+#   #   output_info <- c('pending','pending')
+#   # } else{
+#
+#   # do this for each theme / model
+#   # info_df <- arrow::open_dataset(info_extract$path(glue::glue("{theme}/model_id={m_id}/"))) |>
+#   #   #filter(reference_datetime == "2023-06-18")|> #just grab one EM to limit processing
+#   #   collect()
+#   #
+#   info_df <- duckdbfs::open_dataset(glue::glue("s3://anonymous@neon4cast-scores/parquet/{theme}/
+#                                                model_id={model_id}/reference_datetime={reference_datetime}?endpoint_override=sdsc.osn.xsede.org")) |>
+#     collect()
+#
+#   if ('siteID' %in% names(info_df)){
+#     info_df <- info_df |>
+#       rename(site_id = siteID)
+#   }
+#
+#   vars_vector <- sort(unique(info_df$variable))
+#   sites_vector <- sort(unique(info_df$site_id))
+#
+#   vars_list <- as.list(sort(unique(info_df$variable)))
+#   sites_list <- as.list(sort(unique(info_df$site_id)))
+#
+#   # output_vectors <- c(paste(vars_vector, collapse = ', '),
+#   #                  paste(sites_vector, collapse = ', '))
+#
+#   output_list <- list(vars_list,sites_list)
+#
+#   full_object <- list(vars_vector, sites_vector, output_list)
+#
+#   return(full_object)
+# }
+
+
+## FORECAST LEVEL FUNCTIONS
+generate_model_items <- function(model_list){
+
+  x <- purrr::map(model_list, function(i)
+    list(
+      "rel" = 'item',
+      'type'= 'application/json',
+      'href' = paste0('model_items/',i,'.json'))
+  )
+
+  return(x)
+}
+
+pull_images <- function(theme, m_id, image_name){
+
+  info_df <- arrow::open_dataset(info_extract$path(glue::glue("{theme}/model_id={m_id}/"))) |>
+    collect()
+
+  sites_vector <- sort(unique(info_df$site_id))
+
+  base_path <- catalog_config$base_image_path
+
+  image_assets <- purrr::map(sites_vector, function(i)
+    #url_validator <- Rcurl::url.exists(file.path(base_path,theme,m_id,i,image_name))
+    list(
+      "href"= file.path(base_path,theme,m_id,i,image_name),
+      "type"= "image/png",
+      "title"= paste0('Latest Results for ', i),
+      "description"= 'Image from s3 storage',
+      "roles" = list('thumbnail')
+    )
+  )
+
+  ## check if image rendered successfully on bucket. If not remove from assets
+  item_remove <- c()
+
+  if (image_name == 'latest_scores.png'){
+    for (item in seq.int(1:length(image_assets))){
+      url_validator = RCurl::url.exists(image_assets[[item]]$href)
+      if(url_validator == FALSE){
+        print(paste0('Removing ', image_assets[[item]]$title))
+        item_remove <- append(item_remove,item)
+      }
+    }
+    if (length(item_remove) > 0){
+      image_assets <- image_assets[-item_remove]
+    }
+  }
+
+  return(image_assets)
+
+}
+
+
+get_site_coords <- function(site_metadata, sites){
+
+  site_df <- read_csv(site_metadata)
+
+  # site_df <- data.frame(site_id = c('fcre', 'bvre', 'ccre'),
+  #                       site_lon = c(-79.837217, -79.815936, -79.95856),
+  #                       site_lat = c(37.303153, 37.312909, 37.370259))
+
+  site_lat_lon <- lapply(sites, function(i) c(site_df$latitude[which(site_df[,2] == i)], site_df$longtitude[which(site_df[,2] == i)]))
+
+  return(site_lat_lon)
+}
+
+
+generate_group_values <- function(group_values){
+
+  x <- purrr::map(group_values, function(i)
+    list(
+      "rel" = "child",
+      "type" = "application/json",
+      "href" = paste0(i,"/collection.json"),
+      "title" = i)
+  )
+
+  return(x)
+}
+
+
+build_forecast_scores <- function(table_schema,
+                                  theme_id,
+                                  table_description,
+                                  start_date,
+                                  end_date,
+                                  id_value,
+                                  description_string,
+                                  about_string,
+                                  about_title,
+                                  theme_title,
+                                  model_documentation,
+                                  destination_path,
+                                  aws_download_path,
+                                  link_items,
+                                  thumbnail_link,
+                                  thumbnail_title
+){
+
+  aws_asset_link <- paste0("s3://anonymous@",
+                           aws_download_path,
+                           #"/model_id=", model_id,
+                           "?endpoint_override=",config$endpoint)
+
+  aws_asset_description <-   aws_asset_description <- paste0("Use `arrow` for remote access to the database. This R code will return results for the VERA Forecasting Challenge.\n\n### R\n\n```{r}\n# Use code below\n\nall_results <- arrow::open_dataset(",aws_asset_link,")\ndf <- all_results |> dplyr::collect()\n\n```
+       \n\nYou can use dplyr operations before calling `dplyr::collect()` to `summarise`, `select` columns, and/or `filter` rows prior to pulling the data into a local `data.frame`. Reducing the data that is pulled locally will speed up the data download speed and reduce your memory usage.\n\n\n")
+  forecast_score <- list(
+    "id" = id_value,
+    "description" = description_string,
+    "stac_version"= "1.0.0",
+    "license"= "CC0-1.0",
+    "stac_extensions"= list("https://stac-extensions.github.io/scientific/v1.0.0/schema.json",
+                            "https://stac-extensions.github.io/item-assets/v1.0.0/schema.json",
+                            "https://stac-extensions.github.io/table/v1.2.0/schema.json"),
+    'type' = 'Collection',
+    'links' = c(link_items, #generate_model_items()
+                list(
+                  list(
+                    "rel" = "child",
+                    "type" = "application/json",
+                    "href" = "models/collection.json",
+                    "title" = "group item"
+                  ),
+                  list(
+                    "rel" = "parent",
+                    "type"= "application/json",
+                    "href" = '../catalog.json'
+                  ),
+                  list(
+                    "rel" = "root",
+                    "type" = "application/json",
+                    "href" = '../catalog.json'
+                  ),
+                  list(
+                    "rel" = "self",
+                    "type" = "application/json",
+                    "href" = 'collection.json'
+                  ),
+                  list(
+                    "rel" = "cite-as",
+                    "href" = catalog_config$citation_doi
+                  ),
+                  list(
+                    "rel" = "about",
+                    "href" = about_string,
+                    "type" = "text/html",
+                    "title" = about_title
+                  ),
+                  list(
+                    "rel" = "describedby",
+                    "href" = catalog_config$dashboard_url,
+                    "title" = catalog_config$dashboard_title,
+                    "type" = "text/html"
+                  )
+                )),
+    "title" = theme_title,
+    "extent" = list(
+      "spatial" = list(
+        'bbox' = list(list(as.numeric(catalog_config$bbox$min_lon),
+                           as.numeric(catalog_config$bbox$max_lat),
+                           as.numeric(catalog_config$bbox$max_lon),
+                           as.numeric(catalog_config$bbox$max_lat)))),
+      "temporal" = list(
+        'interval' = list(list(
+          paste0(start_date,"T00:00:00Z"),
+          paste0(end_date,"T00:00:00Z"))
+        ))
+    ),
+    #"table:columns" = stac4cast::build_table_columns_full_bucket(table_schema, table_description),
+    "table:columns" = build_table_columns_full_bucket(table_schema, table_description),
+
+    'assets' = list(
+      # 'data' = list(
+      #   "href"= model_documentation,
+      #   "type"= "text/csv",
+      #   "roles" = list('data'),
+      #   "title"= "NEON Field Site Metadata",
+      #   "description"= readr::read_file(model_metadata_path)
+      # ),
+      'data' = list(
+        "href" = aws_asset_link,
+        "type"= "application/x-parquet",
+        "title"= 'Database Access',
+        "roles" = list('data'),
+        "description"= aws_asset_description
+      ),
+      'thumbnail' = list(
+        "href"= thumbnail_link,
+        "type"= "image/JPEG",
+        "roles" = list('thumbnail'),
+        "title"= thumbnail_title
+      )
+    )
+  )
+
+
+  dest <- destination_path
+  json <- file.path(dest, "collection.json")
+
+  jsonlite::write_json(forecast_score,
+                       json,
+                       pretty=TRUE,
+                       auto_unbox=TRUE)
+  stac4cast::stac_validate(json)
+}
+
+
+generate_group_variable_items <- function(variables){
+
+
+  var_values <- variables
+
+  x <- purrr::map(var_values, function(i)
+    list(
+      "rel" = 'child',
+      'type'= 'application/json',
+      'href' = paste0(i,'/collection.json'))
+  )
+
+  return(x)
+}
+
+generate_variable_model_items <- function(model_list){
+
+
+  #var_values <- variables
+
+  x <- purrr::map(model_list, function(i)
+    list(
+      "rel" = 'item',
+      'type'= 'application/json',
+      'href' = paste0('../../models/model_items/',i,'.json'))
+  )
+
+  return(x)
+}
+
+build_group_variables <- function(table_schema,
+                                  theme_id,
+                                  table_description,
+                                  start_date,
+                                  end_date,
+                                  id_value,
+                                  description_string,
+                                  about_string,
+                                  about_title,
+                                  theme_title,
+                                  destination_path,
+                                  aws_download_path,
+                                  group_var_items
+){
+
+  aws_asset_link <-  paste0("s3://anonymous@",
+                            aws_download_path,
+                            #"/model_id=", model_id,
+                            "?endpoint_override=",config$endpoint)
+
+  aws_asset_description <-   aws_asset_description <- paste0("Use `arrow` for remote access to the database. This R code will return results for the NEON Ecological Forecasting Aquatics theme.\n\n### R\n\n```{r}\n# Use code below\n\nall_results <- arrow::open_dataset(",aws_asset_link,")\ndf <- all_results |> dplyr::collect()\n\n```
+       \n\nYou can use dplyr operations before calling `dplyr::collect()` to `summarise`, `select` columns, and/or `filter` rows prior to pulling the data into a local `data.frame`. Reducing the data that is pulled locally will speed up the data download speed and reduce your memory usage.\n\n\n")
+  forecast_score <- list(
+    "id" = id_value,
+    "description" = description_string,
+    "stac_version"= "1.0.0",
+    "license"= "CC0-1.0",
+    "stac_extensions"= list("https://stac-extensions.github.io/scientific/v1.0.0/schema.json",
+                            "https://stac-extensions.github.io/item-assets/v1.0.0/schema.json",
+                            "https://stac-extensions.github.io/table/v1.2.0/schema.json"),
+    'type' = 'Collection',
+    'links' = c(group_var_items,#generate_group_variable_items(variables = group_var_values)
+                list(
+                  list(
+                    "rel" = "parent",
+                    "type"= "application/json",
+                    "href" = '../collection.json'
+                  ),
+                  list(
+                    "rel" = "root",
+                    "type" = "application/json",
+                    "href" = '../collection.json'
+                  ),
+                  list(
+                    "rel" = "self",
+                    "type" = "application/json",
+                    "href" = 'collection.json'
+                  ),
+                  list(
+                    "rel" = "cite-as",
+                    "href" = "https://doi.org/10.1002/fee.2616"
+                  ),
+                  list(
+                    "rel" = "about",
+                    "href" = about_string,
+                    "type" = "text/html",
+                    "title" = about_title
+                  ),
+                  list(
+                    "rel" = "describedby",
+                    "href" = "https://ltreb-reservoirs.github.io/vera4cast/",
+                    "title" = "VERA Forecast Challenge Dashboard",
+                    "type" = "text/html"
+                  )
+                )),
+    "title" = theme_title,
+    "extent" = list(
+      "spatial" = list(
+        'bbox' = list(list(as.numeric(catalog_config$bbox$min_lon),
+                           as.numeric(catalog_config$bbox$max_lat),
+                           as.numeric(catalog_config$bbox$max_lon),
+                           as.numeric(catalog_config$bbox$max_lat)))),
+      "temporal" = list(
+        'interval' = list(list(
+          paste0(start_date,"T00:00:00Z"),
+          paste0(end_date,"T00:00:00Z"))
+        ))
+    ),
+    #"table:columns" = stac4cast::build_table_columns_full_bucket(table_schema, table_description),
+    "table:columns" = build_table_columns_full_bucket(table_schema, table_description),
+    'assets' = list(
+      'data' = list(
+        "href" = aws_asset_link,
+        "type"= "application/x-parquet",
+        "title"= 'Database Access',
+        "roles" = list('data'),
+        "description"= aws_asset_description
+      )
+    )
+  )
+
+
+  dest <- destination_path
+  json <- file.path(dest, 'collection.json')
+
+  jsonlite::write_json(forecast_score,
+                       json,
+                       pretty=TRUE,
+                       auto_unbox=TRUE)
+  stac4cast::stac_validate(json)
+}
+
+# build_theme <- function(start_date,end_date, id_value, theme_description, theme_title, destination_path, thumbnail_link, thumbnail_title){
+#
+#   theme <- list(
+#     "id" = id_value,
+#     "type" = "Collection",
+#     "links" = list(
+#       list(
+#         "rel" = "child",
+#         "type" = "application/json",
+#         "href" = 'forecasts/collection.json',
+#         "title" = 'forecast item'
+#       ),
+#       list(
+#         "rel" = "child",
+#         "type" = "application/json",
+#         "href" = 'scores/collection.json',
+#         "title" = 'scores item'
+#       ),
+#       list(
+#         "rel"= "parent",
+#         "type"= "application/json",
+#         "href"= "../catalog.json",
+#         "title" = 'parent'
+#       ),
+#       list(
+#         "rel"= "root",
+#         "type"= "application/json",
+#         "href"= "../catalog.json",
+#         "title" = 'root'
+#       ),
+#       list(
+#         "rel"= "self",
+#         "type"= "application/json",
+#         "href" = 'collection.json',
+#         "title" = 'self'
+#       ),
+#       list(
+#         "rel" ="cite-as",
+#         "href"= catalog_config$citation_link,
+#         "title" = "citation"
+#       ),
+#       list(
+#         "rel"= "about",
+#         "href"= catalog_config$about_string,
+#         "type"= "text/html",
+#         "title"= catalog_config$about_title
+#       ),
+#       list(
+#         "rel"= "describedby",
+#         "href"= catalog_config$about_string,
+#         "title"= catalog_config$about_title,
+#         "type"= "text/html"
+#       )
+#     ),
+#     "title"= theme_title,
+#     'assets' = list(
+#       'thumbnail' = list(
+#         "href"= thumbnail_link,
+#         "type"= "image/JPEG",
+#         "roles" = list('thumbnail'),
+#         "title"= thumbnail_title
+#       )
+#     ),
+#     "extent" = list(
+#       "spatial" = list(
+#         'bbox' = list(list(as.numeric(catalog_config$bbox$min_lon),
+#                       as.numeric(catalog_config$bbox$max_lat),
+#                       as.numeric(catalog_config$bbox$max_lon),
+#                       as.numeric(catalog_config$bbox$max_lat)))
+#       ),
+#       "temporal" = list(
+#         'interval' = list(list(
+#           paste0(start_date,'T00:00:00Z'),
+#           paste0(end_date,'T00:00:00Z'))
+#         ))
+#     ),
+#     "license" = "CC0-1.0",
+#     "keywords" = list(
+#       "Forecasting",
+#       "Data",
+#       "Ecology"
+#     ),
+#     "providers" = list(
+#       list(
+#         "url"= catalog_config$host_url,
+#         "name"= catalog_config$host_name,
+#         "roles" = list(
+#           "producer",
+#           "processor",
+#           "licensor"
+#         )
+#       ),
+#       list(
+#         "url"= catalog_config$host_url,
+#         "name"= catalog_config$host_name,
+#         "roles" = list('host')
+#       )
+#     ),
+#     "description" = theme_description,
+#     "stac_version" = "1.0.0",
+#     "stac_extensions" = list(
+#       "https://stac-extensions.github.io/scientific/v1.0.0/schema.json",
+#       "https://stac-extensions.github.io/item-assets/v1.0.0/schema.json",
+#       "https://stac-extensions.github.io/table/v1.2.0/schema.json"
+#     ),
+#     "publications" = list(
+#       "doi" = catalog_config$citation_doi,
+#       "citation"= catalog_config$citation_text
+#     )
+#   )
+#
+#
+#   dest <- destination_path
+#   json <- file.path(dest, "collection.json")
+#
+#   jsonlite::write_json(theme,
+#                        json,
+#                        pretty=TRUE,
+#                        auto_unbox=TRUE)
+#   stac4cast::stac_validate(json)
+# }
+
+
+
+
+## ADD PLACEHOLDER FUNCTION FOR STAC4CAST TABLE BUILD
+build_table_columns_full_bucket <- function(data_object,description_df){
+
+  full_string_list <- strsplit(data_object$ToString(),'\n')[[1]]
+
+  #create initial empty list
+  init_list = vector(mode="list", length = data_object$num_cols)
+
+  ## loop through parquet df and description information to build the list
+  for (i in seq.int(1,data_object$num_cols)){
+    list_items <- strsplit(full_string_list[i],': ')[[1]]
+    col_list <- list(name = list_items[1],
+                     type = list_items[2],
+                     description = description_df[1,list_items[1]])
+
+    init_list[[i]] <- col_list
+
+  }
+  return(init_list)
+}
+
+## WE DON'T USE THE FOLLOWING TWO FUNCITONS ANYMORE. KEEPING THEM FOR REFERENCE BUT DELETE EVENTUALLY
+#' build_site_item <- function(theme_id,
+#'                             start_date,
+#'                             end_date,
+#'                             destination_path,
+#'                             theme_title,
+#'                             collection_name,
+#'                             thumbnail_link,
+#'                             site_coords) {
+#'
+#'
+#'   preset_keywords <- list("Forecasting", "NEON")
+#'
+#'   meta <- list(
+#'     "stac_version"= "1.0.0",
+#'     "stac_extensions"= list('https://stac-extensions.github.io/table/v1.2.0/schema.json'),
+#'     "type"= "Feature",
+#'     "id"= collection_name,
+#'     "bbox"=
+#'       list(-156.6194, 17.9696, -66.7987,  71.2824),
+#'     "geometry"= list(
+#'       "type"= "MultiPoint",
+#'       "coordinates"= site_coords
+#'     ),
+#'     "properties"= list(
+#'       #'description' = model_description,
+#'       "description" = 'NEON Site Information',
+#'       "start_datetime" = start_date,
+#'       "end_datetime" = end_date,
+#'       "providers"= list(
+#'         list(
+#'           "url"= "https://ecoforecast.org",
+#'           "name"= "Ecoforecast Challenge",
+#'           "roles"= list(
+#'             "host"
+#'           )
+#'         )
+#'       ),
+#'       "license"= "CC0-1.0",
+#'       "keywords"= c(preset_keywords),
+#'       "table:columns" = build_site_metadata()
+#'     ),
+#'     "collection"= collection_name,
+#'     "links"= list(
+#'       list(
+#'         "rel"= "catalog",
+#'         'href' = '../catalog.json',
+#'         "type"= "application/json",
+#'         "title"= theme_title
+#'       ),
+#'       list(
+#'         "rel"= "root",
+#'         'href' = '../catalog.json',
+#'         "type"= "application/json",
+#'         "title"= "EFI Forecast Catalog"
+#'       ),
+#'       list(
+#'         "rel"= "parent",
+#'         'href' = '../catalog.json',
+#'         "type"= "application/json",
+#'         "title"= theme_title
+#'       ),
+#'       list(
+#'         "rel"= "self",
+#'         "href" = 'collection.json',
+#'         "type"= "application/json",
+#'         "title"= "Raw JSON Text"
+#'       ),
+#'       list(
+#'         "rel" ="cite-as",
+#'         "href"= "https://doi.org/10.1002/fee.2616",
+#'         "title" = "citation"
+#'       ),
+#'       list(
+#'         "rel"= "about",
+#'         "href"= "https://projects.ecoforecast.org/neon4cast-docs/",
+#'         "type"= "text/html",
+#'         "title"= "NEON Forecast Challenge Documentation"
+#'       ),
+#'       list(
+#'         "rel"= "describedby",
+#'         "href"= "https://www.neonscience.org/field-sites/explore-field-sites",
+#'         "title"= "Explore the NEON Field Sites",
+#'         "type"= "text/html"
+#'       )),
+#'     "assets"= list(
+#'       'data' = list(
+#'         "href" = "https://raw.githubusercontent.com/eco4cast/neon4cast-targets/main/NEON_Field_Site_Metadata_20220412.csv",
+#'         "type"= "text/plain",
+#'         "title"= 'NEON Sites Table',
+#'         "roles" = list('data'),
+#'         "description"= 'Table that includes information for all NEON sites'
+#'       ),
+#'       "thumbnail" = list(
+#'         "href"= thumbnail_link,
+#'         "type"= "image/png",
+#'         "title"= 'NEON Sites Image',
+#'         "description"= 'Image describing the NEON sites',
+#'         "roles" = list('thumbnail')
+#'       )
+#'     )
+#'   )
+#'
+#'
+#'   dest <- destination_path
+#'   json <- file.path(dest, "collection.json")
+#'
+#'   jsonlite::write_json(meta,
+#'                        json,
+#'                        pretty=TRUE,
+#'                        auto_unbox=TRUE)
+#'   stac4cast::stac_validate(json)
+#'
+#'   rm(meta)
+#' }
+
+#
+# build_site_metadata <- function(){
+#   site_test <- read_csv("https://raw.githubusercontent.com/eco4cast/neon4cast-targets/main/NEON_Field_Site_Metadata_20220412.csv", col_types = cols())
+#
+#   schema_info <- sapply(site_test, class)
+#
+#   description_create <- data.frame(field_domain_id = 'domain identifier',
+#                                    field_site_id = 'site identifier',
+#                                    field_site_name = 'site name',
+#                                    terrestrial = 'terrestrial theme indicator for site',
+#                                    aquatics = 'aquatics theme indicator for site',
+#                                    phenology = 'phenology theme indicator for site',
+#                                    ticks = 'ticks theme indicator for site',
+#                                    beetles = 'beetles theme indicator for site',
+#                                    phenocam_code = 'code for phenocam',
+#                                    phenocam_roi = 'phenocam region of interest',
+#                                    phenocam_vegetation = 'phenocam vegetation identifier',
+#                                    field_site_type = 'site theme type',
+#                                    field_site_subtype = 'site theme subtype',
+#                                    field_colocated_site = 'colocated field site',
+#                                    field_site_host = 'site host organization',
+#                                    field_site_url = 'site host organization URL',
+#                                    field_nonneon_research_allowed = 'indicate whether non-NEON research is allowed at this site',
+#                                    field_access_details = 'details for accessing the field site',
+#                                    field_neon_field_operations_office = 'NEON field operations office',
+#                                    field_latitude = 'field site latitude',
+#                                    field_longitude = 'field site longitude',
+#                                    field_geodetic_datum = 'geodetic datum for the field site',
+#                                    field_utm_northing = 'northing UTM coordinates',
+#                                    field_utm_easting = 'easting UTM coordinates',
+#                                    field_utm_zone = 'UTM zone for field site',
+#                                    field_site_county = 'county where field site is located',
+#                                    field_site_state = 'state where field site is located',
+#                                    field_site_country = 'country where field site is located',
+#                                    field_mean_elevation_m = 'mean elevation of field site in meters',
+#                                    field_minimum_elevation_m = 'minimum elevation of field site in meters',
+#                                    field_maximum_elevation_m = 'maximum elevation of field site in meters',
+#                                    field_mean_annual_temperature_C = 'mean annual temperaure of field site in degC',
+#                                    field_mean_annual_precipitation_mm= 'mean annual precipitation of field site in mm',
+#                                    field_dominant_wind_direction = 'the dominant wind direction at the field site',
+#                                    field_mean_canopy_height_m = 'mean canpoy height at the field site in meters',
+#                                    field_dominant_nlcd_classes = 'National Land Cover Database Class for field site',
+#                                    field_dominant_plant_species = 'dominant plant species at field site',
+#                                    field_usgs_huc = 'USGS Hydrologic Unit Code for the field site',
+#                                    field_watershed_name = 'watershed name for the field site',
+#                                    field_watershed_size_km2 = 'watershed size of field site in square kilometers',
+#                                    field_lake_depth_mean_m = 'mean lake depth of field site in meters',
+#                                    field_lake_depth_max_m = 'max lake depth of field site in meters',
+#                                    field_tower_height_m = 'height of tower at field site in meters',
+#                                    field_usgs_geology_unit = 'USGS geology unit for field site',
+#                                    field_megapit_soil_family = 'megapit soil family for field site',
+#                                    field_soil_subgroup = 'soild subgroup of field site',
+#                                    field_avg_number_of_green_days = 'average number of green days at field site',
+#                                    field_avg_green_increase_doy = 'day of year for average green increase at field site',
+#                                    field_avg_green_max_doy = 'average day of year with maximum green at field site',
+#                                    field_avg_green_decrease_doy = 'avergae day of year of green decrease at field site',
+#                                    field_avg_green_min_doy = 'average day of year with minimum green at field site',
+#                                    field_phenocams = 'details about phenocams located at each field site',
+#                                    field_number_tower_levels = 'number of tower levels at field site',
+#                                    neon_url = 'NEON URL for field site')
+#
+#
+#
+#
+#
+#   x <- purrr::map(seq.int(1:ncol(site_test)), function(i)
+#     list(
+#       "name" = names(site_test)[i],
+#       'description'= description_create[,i],
+#       'type' = schema_info[[i]]
+#     )
+#   )
+#
+#   return(x)
+# }