diff --git a/.dockerignore b/.dockerignore index 6803498..c725284 100644 --- a/.dockerignore +++ b/.dockerignore @@ -1,3 +1,5 @@ cli .git -.github \ No newline at end of file +.github +.vscode +terraform diff --git a/.github/workflows/Generate-subset-app.yaml b/.github/workflows/Generate-subset-app.yaml index e706535..2d852e2 100644 --- a/.github/workflows/Generate-subset-app.yaml +++ b/.github/workflows/Generate-subset-app.yaml @@ -1,5 +1,7 @@ on: push: + branches: + - main paths: - cli/**.go - '.github/workflows/Generate-subset-app.yaml' @@ -25,21 +27,21 @@ jobs: - name: Package MacOS executable working-directory: ./cli run: | - GOOS=darwin GOARCH=amd64 CGO_ENABLED=0 go build + GOOS=darwin GOARCH=amd64 CGO_ENABLED=0 go build -o hfsubset tar -czvf hfsubset-macos_amd64.tar.gz hfsubset rm hfsubset - name: Package Linux executable working-directory: ./cli run: | - GOOS=linux GOARCH=amd64 CGO_ENABLED=0 go build + GOOS=linux GOARCH=amd64 CGO_ENABLED=0 go build -o hfsubset tar -czvf hfsubset-linux_amd64.tar.gz hfsubset rm hfsubset - name: Package Windows executable working-directory: ./cli run: | - GOOS=windows GOARCH=amd64 CGO_ENABLED=0 go build + GOOS=windows GOARCH=amd64 CGO_ENABLED=0 go build -o hfsubset zip hfsubset-windows_amd64.zip hfsubset.exe rm hfsubset.exe diff --git a/.gitignore b/.gitignore index db01932..751c93e 100644 --- a/.gitignore +++ b/.gitignore @@ -4,3 +4,6 @@ .Ruserdata hfsubset *.gpkg + +terraform/.terraform +terraform/.infracost diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 0000000..4a73e8f --- /dev/null +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "hydrofabric"] + path = hydrofabric + url = https://github.com/NOAA-OWP/hydrofabric.git diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 0000000..24e086b --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,5 @@ +{ + "go.toolsEnvVars": { + "GO111MODULE": "on" + } +} diff --git a/Dockerfile b/Dockerfile.lambda similarity index 85% rename from Dockerfile rename to Dockerfile.lambda index ebe3d04..c40e3e3 100644 --- a/Dockerfile +++ b/Dockerfile.lambda @@ -25,9 +25,7 @@ RUN apt update \ RUN mkdir -p /hydrofabric/subset # Install CRAN Packages -# && echo "options(Ncpus = $(nproc --all), repos=c(CRAN = 'https://packagemanager.rstudio.com/cran/__linux__/${DISTRIB_CODENAME}/latest'))" >> .Rprofile \ -ENV HF_CRAN_R_PKGS="pak box zlib cli arrow crayon dplyr DBI RSQLite sf \ - terra lambdr glue rstudioapi purrr magrittr nhdplusTools aws.s3" +ENV HF_CRAN_R_PKGS="arrow aws.s3 base64enc box DBI dplyr glue lambdr logger nhdplusTools pak readr RSQLite sf" RUN cd /hydrofabric \ && . /etc/lsb-release \ && echo "options(ncpus = $(nproc --all))" >> .Rprofile \ @@ -38,6 +36,7 @@ RUN cd /hydrofabric \ ${HF_CRAN_R_PKGS} COPY . /hydrofabric/subset +COPY hydrofabric/inst/qml /hydrofabric/qml RUN cd /hydrofabric \ && chmod 755 subset/runtime.R \ diff --git a/cli/go.mod b/cli/go.mod index be176fc..709b48f 100644 --- a/cli/go.mod +++ b/cli/go.mod @@ -1,13 +1,3 @@ module hfsubset go 1.20 - -require github.com/schollz/progressbar/v3 v3.13.1 - -require ( - github.com/mattn/go-runewidth v0.0.14 // indirect - github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db // indirect - github.com/rivo/uniseg v0.4.4 // indirect - golang.org/x/sys v0.8.0 // indirect - golang.org/x/term v0.8.0 // indirect -) diff --git a/cli/go.sum b/cli/go.sum index 43ff125..e69de29 100644 --- a/cli/go.sum +++ b/cli/go.sum @@ -1,26 +0,0 @@ -github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= -github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= -github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= -github.com/k0kubun/go-ansi v0.0.0-20180517002512-3bf9e2903213/go.mod h1:vNUNkEQ1e29fT/6vq2aBdFsgNPmy8qMdSay1npru+Sw= -github.com/mattn/go-isatty v0.0.17/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM= -github.com/mattn/go-runewidth v0.0.14 h1:+xnbZSEeDbOIg5/mE6JF0w6n9duR1l3/WmbinWVwUuU= -github.com/mattn/go-runewidth v0.0.14/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w= -github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db h1:62I3jR2EmQ4l5rM/4FEfDWcRD+abF5XlKShorW5LRoQ= -github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db/go.mod h1:l0dey0ia/Uv7NcFFVbCLtqEBQbrT4OCwCSKTEv6enCw= -github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= -github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= -github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc= -github.com/rivo/uniseg v0.4.4 h1:8TfxU8dW6PdqD27gjM8MVNuicgxIjxpm4K7x4jp8sis= -github.com/rivo/uniseg v0.4.4/go.mod h1:FN3SvrM+Zdj16jyLfmOkMNblXMcoc8DfTHruCPUcx88= -github.com/schollz/progressbar/v3 v3.13.1 h1:o8rySDYiQ59Mwzy2FELeHY5ZARXZTVJC7iHD6PEFUiE= -github.com/schollz/progressbar/v3 v3.13.1/go.mod h1:xvrbki8kfT1fzWzBT/UZd9L6GA+jdL7HAgq2RFnO6fQ= -github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= -github.com/stretchr/testify v1.3.0 h1:TivCn/peBQ7UY8ooIcPgZFpTNSz0Q2U6UrFlUfqbe0Q= -github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= -golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.8.0 h1:EBmGv8NaZBZTWvrbjNoL6HVt+IVy3QDQpJs7VRIw3tU= -golang.org/x/sys v0.8.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/term v0.6.0/go.mod h1:m6U89DPEgQRMq3DNkDClhWw02AUbt2daBVO4cn4Hv9U= -golang.org/x/term v0.8.0 h1:n5xxQn2i3PC0yLAbjTpNT85q/Kgzcr2gIoX9OrJUols= -golang.org/x/term v0.8.0/go.mod h1:xPskH00ivmX89bAKVGSKKtLOWNx2+17Eiy94tnKShWo= diff --git a/cli/main.go b/cli/main.go index 688284f..d7149cb 100644 --- a/cli/main.go +++ b/cli/main.go @@ -26,11 +26,11 @@ import ( "flag" "fmt" "io" + "log" "net/http" "os" + "strconv" "strings" - - "github.com/schollz/progressbar/v3" ) const usage string = `hfsubset - Hydrofabric Subsetter @@ -48,6 +48,15 @@ Examples: hfsubset -o ./poudre.gpkg -t hl_uri "Gages-06752260" + # Using network-linked data index identifiers + hfsubset -o ./poudre.gpkg -t nldi_feature "nwis:USGS-08279500" + + # Specifying layers and hydrofabric version + hfsubset -l divides,nexus -o ./divides_nexus.gpkg -r "v20" -t hl_uri "Gages-06752260" + + # Finding data around a POI + hfsubset -l flowpaths,reference_flowpaths -o ./sacramento_flowpaths.gpkg -t xy -121.494400,38.581573 + Options: ` @@ -63,6 +72,7 @@ type SubsetResponse struct { data []byte } +// Parse comma-delimited layers string func (opts *SubsetRequest) Layers() []string { split := strings.Split(*opts.layers, ",") for i, v := range split { @@ -71,10 +81,53 @@ func (opts *SubsetRequest) Layers() []string { return split } -func (opts *SubsetRequest) IDs() []string { +// Parse IDs format, i.e. trim spaces +func (opts *SubsetRequest) IDs(key string) []string { for i, v := range opts.id { opts.id[i] = strings.TrimSpace(v) } + + if key == "nldi_feature" { + var feat struct { + FeatureSource string `json:"featureSource"` + FeatureId string `json:"featureId"` + } + + feat.FeatureSource = "" + feat.FeatureId = "" + for i, v := range opts.id { + f := strings.Split(v, ":") + + feat.FeatureSource = f[0] + feat.FeatureId = f[1] + fstr, _ := json.Marshal(feat) + opts.id[i] = string(fstr) + feat.FeatureSource = "" + feat.FeatureId = "" + } + } + + if key == "xy" { + var xy struct { + X float64 + Y float64 + } + + xy.X = -1 + xy.Y = -1 + for i, v := range opts.id { + f := strings.Split(v, ",") + + xy.X, _ = strconv.ParseFloat(f[0], 64) + xy.Y, _ = strconv.ParseFloat(f[1], 64) + + fstr, _ := json.Marshal(xy) + opts.id[i] = string(fstr) + xy.X = -1 + xy.Y = -1 + } + } + return opts.id } @@ -85,33 +138,25 @@ func (opts *SubsetRequest) MarshalJSON() ([]byte, error) { switch *opts.id_type { case "id": key = "id" - break case "hl_uri": key = "hl_uri" - break case "comid": key = "comid" - break case "nldi_feature": - // key = "nldi" - // break - fallthrough + key = "nldi_feature" case "xy": - // key = "loc" - // break - panic("-nldi_feature and -xy support are not implemented currently") + key = "xy" default: panic("type " + *opts.id_type + " not supported; only one of: id, hl_uri, comid, nldi_feature, xy") } jsonmap["layers"] = opts.Layers() - jsonmap[key] = opts.IDs() - // TODO: use opts.version - jsonmap["version"] = "v20" // v20 is v2.0 + jsonmap[key] = opts.IDs(key) + jsonmap["version"] = *opts.version return json.Marshal(jsonmap) } -func makeRequest(lambda_endpoint string, opts *SubsetRequest, bar *progressbar.ProgressBar) *SubsetResponse { +func makeRequest(lambda_endpoint string, opts *SubsetRequest, logger *log.Logger) *SubsetResponse { var uri string = lambda_endpoint + "/2015-03-31/functions/function/invocations" payload, err := opts.MarshalJSON() if err != nil { @@ -120,14 +165,14 @@ func makeRequest(lambda_endpoint string, opts *SubsetRequest, bar *progressbar.P reader := bytes.NewReader(payload) - bar.Describe("[1/4] waiting for response") + logger.Println("[1/4] waiting for response") req, err := http.Post(uri, "application/json", reader) if err != nil { panic(err) } defer req.Body.Close() - bar.Describe("[2/4] reading hydrofabric subset") + logger.Println("[2/4] reading hydrofabric subset") resp := new(SubsetResponse) b := new(bytes.Buffer) buffer := bufio.NewWriter(b) @@ -137,15 +182,15 @@ func makeRequest(lambda_endpoint string, opts *SubsetRequest, bar *progressbar.P } r := b.Bytes() + + // Trim quotes if returned if r[0] == '"' && r[len(r)-1] == '"' { r = r[1 : len(r)-1] } - bar.Describe("[3/4] decoding gzip") + logger.Println("[3/4] parsing base64 response") rr := bytes.NewReader(r) gpkg := base64.NewDecoder(base64.StdEncoding, rr) - // gpkg, _ := gzip.NewReader(rr) - // defer gpkg.Close() resp.data, err = io.ReadAll(gpkg) if err != nil { panic(err) @@ -154,16 +199,15 @@ func makeRequest(lambda_endpoint string, opts *SubsetRequest, bar *progressbar.P return resp } -func writeToFile(request *SubsetRequest, response *SubsetResponse, bar *progressbar.ProgressBar) int { +func writeToFile(request *SubsetRequest, response *SubsetResponse, logger *log.Logger) int { f, err := os.Create(*request.output) if err != nil { panic(err) } - bar.Describe(fmt.Sprintf("[4/4] writing to %s", *request.output)) + logger.Printf("[4/4] writing to %s", *request.output) w := bufio.NewWriter(f) - mw := io.MultiWriter(w, bar) - n, err := mw.Write(response.data) + n, err := w.Write(response.data) if err != nil { panic(err) } @@ -186,7 +230,7 @@ Either "all" or "core", or one or more of: opts := new(SubsetRequest) opts.id_type = flag.String("t", "id", `One of: "id", "hl_uri", "comid", "xy", or "nldi_feature"`) opts.layers = flag.String("l", "core", layers_help) - opts.version = flag.String("r", "pre-release", "Hydrofabric version") + opts.version = flag.String("r", "v20", "Hydrofabric version") opts.output = flag.String("o", "hydrofabric.gpkg", "Output file name") quiet := flag.Bool("quiet", false, "Disable progress bar") flag.Parse() @@ -205,12 +249,11 @@ Either "all" or "core", or one or more of: *opts.layers = "divides,nexus,flowpaths,network,hydrolocations" } - bar := progressbar.NewOptions(3, - progressbar.OptionSetWidth(15), - progressbar.OptionSetDescription("[0/4] sending http request"), - progressbar.OptionShowBytes(false), - progressbar.OptionSetVisibility(!*quiet), - ) + logger := log.New(os.Stdout, "hfsubset ==> ", log.Ltime) + if *quiet { + logger.SetOutput(io.Discard) + } + logger.Println("[0/4] sending http request") var endpoint string if v, ok := os.LookupEnv("HFSUBSET_ENDPOINT"); ok { @@ -220,11 +263,9 @@ Either "all" or "core", or one or more of: endpoint = "https://hfsubset-e9kvx.ondigitalocean.app" } - resp := makeRequest(endpoint, opts, bar) + resp := makeRequest(endpoint, opts, logger) response_size := len(resp.data) - bytes_written := writeToFile(opts, resp, bar) - bar.Finish() - println() // so progress bar doesn't show up + bytes_written := writeToFile(opts, resp, logger) if bytes_written != response_size { panic(fmt.Sprintf("wrote %d bytes out of %d bytes to %s", bytes_written, response_size, *opts.output)) diff --git a/hydrofabric b/hydrofabric new file mode 160000 index 0000000..b07c109 --- /dev/null +++ b/hydrofabric @@ -0,0 +1 @@ +Subproject commit b07c1097608cca386cb29085e0976eabfc91d42c diff --git a/runtime.R b/runtime.R index 00929e9..01a9bb5 100644 --- a/runtime.R +++ b/runtime.R @@ -23,17 +23,43 @@ cache_dir <- tempdir() na_if_null <- function(x) if (is.null(x)) "NULL" else x subset <- function( - id = NULL, - comid = NULL, - hl_uri = NULL, - nldi = NULL, - loc = NULL, - layers = c("divides", "nexus", "flowpaths", "network", "hydrolocations"), - version = c("v1.0", "v1.1", "v1.2", "v20") + id = NULL, + comid = NULL, + hl_uri = NULL, + nldi_feature = NULL, + xy = NULL, + layers = c("divides", + "nexus", + "flowpaths", + "network", + "hydrolocations", + "reference_flowline", + "reference_catchment", + "refactored_flowpaths", + "refactored_divides"), + version = c("v20", + "00_reference", + "01_reference", + "02_refactored", + "03_uniform") ) { version <- match.arg(version) s3_uri <- paste0("s3://lynker-spatial/", version, "/") + missing_all <- is.null(id) && + is.null(comid) && + is.null(hl_uri) && + is.null(nldi_feature) && + is.null(xy) + + if (missing_all) { + return(list( + "response" = "Error", + "status" = 400, + "message" = "No ID parameters were given." + )) + } + logger::log_info(glue::glue( "[subset] Received request:", "{{", @@ -41,9 +67,9 @@ subset <- function( " id: {na_if_null(id)}, ", " comid: {na_if_null(comid)}, ", " hl_uri: {na_if_null(hl_uri)}, ", - " nldi: {na_if_null(nldi)}, ", - " loc: {na_if_null(loc)}, ", - " layers: {layers}, ", + " nldi_feature: {na_if_null(nldi_feature)}, ", + " xy: {na_if_null(xy)}, ", + " layers: {paste0(layers, collapse = ',')}, ", " version: {version}", "}}", .sep = "\n" @@ -56,18 +82,18 @@ subset <- function( id = id, comid = comid, hl_uri = hl_uri, - nldi_feature = nldi, - loc = loc, + nldi_feature = nldi_feature, + xy = xy, base_s3 = s3_uri, lyrs = layers, outfile = hf_tmp, cache_dir = cache_dir, - qml_dir = "/hydrofabric/inst/qml" + qml_dir = "/hydrofabric/qml" ) base64enc::base64encode(readr::read_file_raw(hf_tmp)) } lambdr::start_lambda(config = lambdr::lambda_config( - environ = parent.frame() + environ = parent.frame() )) diff --git a/subset_network.R b/subset_network.R deleted file mode 100644 index 2c491d9..0000000 --- a/subset_network.R +++ /dev/null @@ -1,339 +0,0 @@ -# MIT License -# -# Copyright (c) 2021 Mike Johnson -# -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in all -# copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -# SOFTWARE. - -read_qml = function (qml_file) { - paste(readLines(qml_file), collapse = "\n") -} - -create_style_row = function (gpkg_path, - layer_name, - style_name, - style_qml) { - geom_col <- - sf::st_read( - gpkg_path, - query = paste0( - "SELECT column_name from gpkg_geometry_columns ", - "WHERE table_name = '", - layer_name, - "'" - ), - quiet = TRUE - )[1, - 1] - data.frame( - f_table_catalog = "", - f_table_schema = "", - f_table_name = layer_name, - f_geometry_column = geom_col, - styleName = style_name, - styleQML = style_qml, - styleSLD = "", - useAsDefault = TRUE, - description = "Generated for hydrofabric", - owner = "", - ui = NA, - update_time = Sys.time() - ) -} - -append_style = function (gpkg_path, - qml_dir = system.file("qml", package = "hydrofabric"), - layer_names) { - f = list.files(qml_dir, full.names = TRUE) - - good_layers = gsub(".qml", "", basename(f)) - - layer_names = layer_names[layer_names %in% good_layers] - - files = grep(paste(layer_names, collapse = "|"), f, value = TRUE) - - styles <- sapply(files, read_qml) - style_names <- - sapply(layer_names, paste0, "__hydrofabric_style") - style_rows <- - do.call( - rbind, - mapply( - create_style_row, - layer_names, - style_names, - styles, - MoreArgs = list(gpkg_path = gpkg_path), - SIMPLIFY = FALSE - ) - ) - if ("layer_styles" %in% sf::st_layers(gpkg_path)$name) { - try(sf::st_delete(gpkg_path, "layer_styles"), silent = TRUE) - } - sf::st_write( - style_rows, - gpkg_path, - layer = "layer_styles", - append = FALSE, - quiet = TRUE - ) - return(gpkg_path) -} - -#' Access Hydrofabric Network -#' @param VPU Vector Processing Unit -#' @param base_s3 the base hydrofabric directory to access in Lynker's s3 -#' @param cache_dir should data be cached to a local directory? Will speed up multiple subsets in the same region -#' @param cache_overwrite description. Should a cached file be overwritten -#' @return file path -#' @export - -get_fabric = function(VPU, - base_s3 = 's3://lynker-spatial/pre-release/', - cache_dir = NULL, - cache_overwrite = FALSE) { - Key <- NULL - - xx = aws.s3::get_bucket_df( - bucket = dirname(base_s3), - prefix = basename(base_s3), - region = 'us-west-2' - ) |> - dplyr::filter(grepl(basename(base_s3), Key) & - grepl(paste0(VPU, ".gpkg$"), Key)) |> - dplyr::filter(!grepl("[.]_", Key)) |> - dplyr::filter(!grepl("/", dirname(Key))) - - if (!is.null(cache_dir)) { - dir.create(cache_dir, - recursive = TRUE, - showWarnings = FALSE) - gpkg = glue::glue("{cache_dir}/{basename(xx$Key)}") - if (cache_overwrite) { - unlink(gpkg) - } - temp = FALSE - } else { - gpkg = tempfile(fileext = ".gpkg") - temp = TRUE - } - - if (!file.exists(gpkg)) { - aws.s3::save_object( - bucket = xx$Bucket, - object = xx$Key, - file = gpkg, - region = 'us-west-2' - ) - } - - return(gpkg) - -} - -#' Subset Hydrofabric Network -#' @param id hydrofabric id (relevant only to nextgen fabrics) -#' @param comid NHDPlusV2 COMID -#' @param hl_uri hydrolocation URI (relevant only to nextgen fabrics) -#' @param nldi_feature list with names 'featureSource' and 'featureID' where 'featureSource' is derived from the "source" column of the response of dataRetrieval::get_nldi_sources() and the 'featureID' is a known identifier from the specified 'featureSource'. -#' @param loc Location given as vector of XY in CRS 4326 (long, lat) -#' @param base_s3 the base hydrofabric directory to access in Lynker's s3 -#' @param lyrs layers to extract. Default is all possible in the hydrofabric GPKG data model -#' @param outfile file path to write to. Must have ".gpkg" extension -#' @param cache_dir should data be cached to a local directory? Will speed up multiple subsets in the same region -#' @param cache_overwrite description. Should a cached file be overwritten -#' @return file path (outfile) or list of features -#' @export - -subset_network = function(id = NULL, - comid = NULL, - hl_uri = NULL, - nldi_feature = NULL, - loc = NULL, - base_s3 = 's3://lynker-spatial/pre-release/', - lyrs = c( - "divides", - "nexus", - "flowpaths", - "network", - "hydrolocations", - "reference_flowline", - "reference_catchment", - "refactored_flowpaths", - "refactored_divides" - ), - outfile = NULL, - cache_dir = NULL, - qml_dir = system.file("qml", package = "hydrofabric"), - cache_overwrite = FALSE) { - Key <- - hf_hydroseq <- - hf_id <- hydroseq <- member_COMID <- toid <- vpu <- NULL - - net = arrow::open_dataset(glue::glue(base_s3, "conus_net.parquet")) |> - dplyr::select(id, toid, hf_id, hl_uri, hf_hydroseq, hydroseq, vpu) |> - dplyr::collect() |> - dplyr::distinct() - - if (!is.null(id) & !is.null(net)) { - comid = dplyr::filter(net, id == !!id | toid == !!id) |> - dplyr::slice_max(hf_hydroseq) |> - dplyr::pull(hf_id) - } - - if (!is.null(nldi_feature)) { - comid = nhdplusTools::get_nldi_feature(nldi_feature)$comid - } - - if (!is.null(loc)) { - comid = nhdplusTools::discover_nhdplus_id(point = sf::st_sfc(sf::st_point(c(loc[1], loc[2])), crs = 4326)) - } - - if (!is.null(hl_uri) & !is.null(net)) { - origin = dplyr::filter(net, hl_uri == !!hl_uri) |> - dplyr::slice_max(hf_hydroseq) |> - dplyr::pull(toid) |> - unique() - } - - if (!is.null(comid) & !is.null(net)) { - origin = dplyr::filter(net, hf_id == comid) |> - dplyr::slice_max(hf_hydroseq) |> - dplyr::pull(id) |> - unique() - } else if (is.null(net)) { - origin = comid - } - - if (is.null(origin)) { - stop("origin not found") - } - - - if (is.null(net)) { - xx = suppressMessages({ - nhdplusTools::get_nhdplus(comid = comid) - }) - - v = nhdplusTools::vpu_boundaries - - vpuid = v$VPUID[which(lengths(sf::st_intersects(sf::st_transform( - v, sf::st_crs(xx) - ), xx)) > 0)] - } else { - vpuid = unique(dplyr::pull(dplyr::filter(net, id == origin | - toid == origin), vpu)) - } - - - gpkg = get_fabric( - VPU = vpuid, - base_s3 = base_s3, - cache_dir = cache_dir, - cache_overwrite = cache_overwrite - ) - lyrs = lyrs[lyrs %in% sf::st_layers(gpkg)$name] - - db <- DBI::dbConnect(RSQLite::SQLite(), gpkg) - on.exit(DBI::dbDisconnect(db)) - - if (!is.null(net)) { - sub_net = dplyr::distinct(dplyr::select(dplyr::filter(net, vpu == vpuid), id, toid)) - } else { - lookup <- - c( - id = "ID", - id = "COMID", - toid = "toID", - toid = "toCOMID" - ) - dplyr::sub_net = dplyr::tbl(db, lyrs[grepl("flowline|flowpath", lyrs)]) |> - dplyr::select(dplyr::any_of( - c( - "id", - "toid", - 'COMID', - 'toCOMID', - "ID", - "toID", - "member_COMID" - ) - )) |> - dplyr::collect() |> - dplyr::rename(dplyr::any_of(lookup)) - - if ("member_COMID" %in% names(sub_net)) { - origin = dplyr::filter(sub_net, grepl(origin, member_COMID)) |> - dplyr::pull(id) - - sub_net = dplyr::select(sub_net, -member_COMID) - } - } - - message("Starting from: `", origin, "`") - - tmap = suppressWarnings({ - nhdplusTools::get_sorted(dplyr::distinct(sub_net), outlets = origin) - }) - - if (grepl("nex", utils::tail(tmap$id, 1))) { - tmap = utils::head(tmap, -1) - } - - ids = unique(c(unlist(tmap))) - - hydrofabric = list() - - for (j in 1:length(lyrs)) { - message(glue::glue("Subsetting: {lyrs[j]} ({j}/{length(lyrs)})")) - - crs = sf::st_layers(gpkg)$crs - - t = dplyr::tbl(db, lyrs[j]) |> - dplyr::filter(dplyr::if_any(dplyr::any_of( - c('COMID', 'FEATUREID', 'divide_id', 'id', 'ds_id', "ID") - ), ~ . %in% !!ids)) |> - dplyr::collect() - - if (all(!any(is.na(as.character(crs[[j]]))), nrow(t) > 0)) { - if (any(c("geometry", "geom") %in% names(t))) { - t = sf::st_as_sf(t, crs = crs[[j]]) - } else { - t = t - } - } - - if (!is.null(outfile)) { - sf::write_sf(t, outfile, lyrs[j]) - } else { - hydrofabric[[lyrs[j]]] = t - } - } - - if (is.null(cache_dir)) { - unlink(gpkg) - } - - if (!is.null(outfile)) { - outfile = append_style(outfile, qml_dir = qml_dir, layer_names = lyrs) - return(outfile) - } else { - hydrofabric - } - -} diff --git a/subset_network.R b/subset_network.R new file mode 120000 index 0000000..92865cd --- /dev/null +++ b/subset_network.R @@ -0,0 +1 @@ +hydrofabric/R/subset_network.R \ No newline at end of file diff --git a/terraform/.terraform.lock.hcl b/terraform/.terraform.lock.hcl new file mode 100644 index 0000000..78715c7 --- /dev/null +++ b/terraform/.terraform.lock.hcl @@ -0,0 +1,25 @@ +# This file is maintained automatically by "terraform init". +# Manual edits may be lost in future updates. + +provider "registry.terraform.io/hashicorp/aws" { + version = "5.23.1" + constraints = "~> 5.0" + hashes = [ + "h1:s23thJVPJHUdS7ESZHoeMkxNcTeaqWvg2usv8ylFVL4=", + "zh:024a188ad3c979a9ec0d7d898aaa90a3867a8839edc8d3543ea6155e6e010064", + "zh:05b73a04c58534a7527718ef55040577d5c573ea704e16a813e7d1b18a7f4c26", + "zh:13932cdee2fa90f40ebaa783f033752864eb6899129e055511359f8d1ada3710", + "zh:3500f5febc7878b4426ef89a16c0096eefd4dd0c5b0d9ba00f9ed54387df5d09", + "zh:394a48dea7dfb0ae40e506ccdeb5387829dbb8ab00fb64f41c347a1de092aa00", + "zh:51a57f258b3bce2c167b39b6ecf486f72f523da05d4c92adc6b697abe1c5ff1f", + "zh:7290488a96d8d10119b431eb08a37407c0812283042a21b69bcc2454eabc08ad", + "zh:7545389dbbba624c0ffa72fa376b359b27f484aba02139d37ee5323b589e0939", + "zh:92266ac6070809e0c874511ae93097c8b1eddce4c0213e487c5439e89b6ad64d", + "zh:9b12af85486a96aedd8d7984b0ff811a4b42e3d88dad1a3fb4c0b580d04fa425", + "zh:9c3841bd650d6ba471c7159bcdfa35200e5e49c2ea11032c481a33cf7875879d", + "zh:bd103c46a16e7f9357e08d6427c316ccc56d203452130eed8e36ede3afa3322c", + "zh:cab0a16e320c6ca285a3a51f40c8f46dbaa0712856594819b415b4d8b3e63910", + "zh:e8adedcda4d6ff47dcae9c9bb884da26ca448fb6f7436be95ad6a341e4d8094a", + "zh:fc23701a3723f50878f440dcdf8768ea96d60a0d7c351aa6dfb912ad832c8384", + ] +} diff --git a/terraform/main.tf b/terraform/main.tf new file mode 100644 index 0000000..4691508 --- /dev/null +++ b/terraform/main.tf @@ -0,0 +1,300 @@ +terraform { + required_providers { + aws = { + source = "hashicorp/aws" + version = "~> 5.0" + } + } +} + +provider "aws" { + region = "us-west-2" +} + +data "aws_subnets" "lynker_spatial_service_subnets" { + filter { + name = "vpc-id" + values = [ + aws_subnet.lynker_spatial_public_subnet_a.cidr_block, + aws_subnet.lynker_spatial_public_subnet_b.cidr_block, + aws_subnet.lynker_spatial_public_subnet_c.cidr_block + ] + } +} + +//! IAM: provides permissions to assume ECS and ECR roles +data "aws_iam_policy_document" "hfsubset_ecs_task_policy_document" { + statement { + actions = ["sts:AssumeRole"] + effect = "Allow" + + principals { + type = "Service" + identifiers = ["ecs-tasks.amazonaws.com", "ecs.amazonaws.com", "ecr.amazonaws.com"] + } + } +} + +//! IAM: provides permissions for read-only access to S3 bucket +data "aws_iam_policy_document" "hfsubset_s3_access_policy_document" { + statement { + effect = "Allow" + actions = [ + "s3:ListBucket", + "s3:ListObjects", + "s3:GetObject" + ] + resources = [ + "arn:aws:s3:::lynker-spatial", + "arn:aws:s3:::lynker-spatial/*" + ] + } +} + +//! IAM: provides permissions to create ECS cluster nodes, pull from ECR, and logging +data "aws_iam_policy_document" "hfsubset_ecs_exec_policy_document" { + statement { + effect = "Allow" + actions = [ + "ec2:AttachNetworkInterface", + "ec2:CreateNetworkInterface", + "ec2:CreateNetworkInterfacePermission", + "ec2:DeleteNetworkInterface", + "ec2:DeleteNetworkInterfacePermission", + "ec2:Describe*", + "ec2:DetachNetworkInterface", + "elasticloadbalancing:DeregisterInstancesFromLoadBalancer", + "elasticloadbalancing:DeregisterTargets", + "elasticloadbalancing:Describe*", + "elasticloadbalancing:RegisterInstancesWithLoadBalancer", + "elasticloadbalancing:RegisterTargets", + "ecr:GetAuthorizationToken", + "ecr:BatchCheckLayerAvailability", + "ecr:GetDownloadUrlForLayer", + "ecr:BatchGetImage", + "logs:CreateLogGroup", + "logs:CreateLogStream", + "logs:PutLogEvents", + "logs:DescribeLogStreams" + ] + resources = ["*"] // FIXME: least privilege + } +} + +# Resources =================================================================== + +//! Lynker-Spatial VPC +resource "aws_vpc" "lynker_spatial_vpc" { + cidr_block = "172.25.0.0/16" + enable_network_address_usage_metrics = true +} + +resource "aws_subnet" "lynker_spatial_public_subnet_a" { + vpc_id = aws_vpc.lynker_spatial_vpc.id + cidr_block = "172.25.1.0/24" + availability_zone = "us-west-2a" + + tags = { + Name = "Lynker Spatial Public Subnet A" + } +} + +resource "aws_subnet" "lynker_spatial_public_subnet_b" { + vpc_id = aws_vpc.lynker_spatial_vpc.id + cidr_block = "172.25.2.0/24" + availability_zone = "us-west-2b" + + tags = { + Name = "Lynker Spatial Public Subnet B" + } +} + +resource "aws_subnet" "lynker_spatial_public_subnet_c" { + vpc_id = aws_vpc.lynker_spatial_vpc.id + cidr_block = "172.25.3.0/24" + availability_zone = "us-west-2c" + + tags = { + Name = "Lynker Spatial Public Subnet C" + } +} + +//! ECR Repository +resource "aws_ecr_repository" "hfsubset_ecr" { + name = "hydrofabric-hfsubset" + image_tag_mutability = "IMMUTABLE" + image_scanning_configuration { + scan_on_push = true + } +} + +//! ECS Cluster +resource "aws_ecs_cluster" "hfsubset_ecs" { + name = "hydrofabric-hfsubset-ecs-cluster" +} + +//! IAM Resource for ECS Tasks +resource "aws_iam_role" "hfsubset_ecs_task_role" { + name = "hydrofabric-hfsubset-ecs-task-role" + description = "Allows attached service to assume ECS and ECR roles" + assume_role_policy = data.aws_iam_policy_document.hfsubset_ecs_task_policy_document.json +} + +//! IAM Resource for ECS Execution +resource "aws_iam_role" "hfsubset_ecs_exec_role" { + name = "hydrofabric-hfsubset-ecs-exec-role" + description = "Allows attached service to execute ECS tasks, pull from ECR, and output logs" + assume_role_policy = data.aws_iam_policy_document.hfsubset_ecs_exec_policy_document.json +} + +//! IAM Resource for Read-only S3 Access +resource "aws_iam_role" "hfsubset_s3_access_role" { + name = "hydrofabric-hfsubset-s3-access-role" + description = "Allows attached service read-only access to the lynker-spatial S3 bucket" + assume_role_policy = data.aws_iam_policy_document.hfsubset_s3_access_policy_document.json +} + +//! Application Load Balancer Security Group +resource "aws_security_group" "hfsubset_alb_sg" { + name = "hydrofabric-hfsubset-alb-security-group" + description = "Security group for hfsubset ALB; allows TCP/80 access bidirectionally anywhere." + + ingress { + description = "TCP/80 incoming from anywhere" + protocol = "tcp" + from_port = 80 + to_port = 80 + cidr_blocks = ["0.0.0.0/0"] + } + + egress { + description = "TCP/80 outgoing to anywhere" + protocol = "tcp" + from_port = 80 + to_port = 80 + cidr_blocks = ["0.0.0.0/0"] + } +} + +//! ECS Task Security Group +resource "aws_security_group" "hfsubset_ecs_task_sg" { + name = "hydrofabric-hfsubset-ecs-task-security-group" + vpc_id = aws_vpc.lynker_spatial_vpc.id + + description = "Security group for hfsubset ECS Task; allows TCP/8080 to hfsubset ALB only." + + ingress { + description = "TCP/8080 incoming from hfsubset ALB" + protocol = "tcp" + from_port = 8080 + to_port = 8080 + security_groups = [aws_security_group.hfsubset_alb_sg.id] + } + + egress { + description = "TCP/8080 outgoing to hfsubset ALB" + protocol = "tcp" + from_port = 8080 + to_port = 8080 + security_groups = [aws_security_group.hfsubset_alb_sg.id] + } +} + +//! Application Load Balancer +resource "aws_lb" "hfsubset_alb" { + name = "hydrofabric-hfsubset-alb" + internal = false + load_balancer_type = "application" + enable_deletion_protection = false + drop_invalid_header_fields = true + security_groups = [aws_security_group.hfsubset_alb_sg.id] + subnets = data.aws_subnets.lynker_spatial_service_subnets.ids +} + +//! ALB Target Group +resource "aws_lb_target_group" "hfsubset_alb_target_group" { + name = "hydrofabric-hfsubset-alb-target" + port = 80 + protocol = "HTTP" + target_type = "ip" + vpc_id = aws_vpc.lynker_spatial_vpc.id +} + +//! ALB Listener +resource "aws_lb_listener" "hfsubset_alb_listener" { + load_balancer_arn = aws_lb.hfsubset_alb.arn + port = "443" + protocol = "HTTPS" + + default_action { + type = "forward" + target_group_arn = aws_lb_target_group.hfsubset_alb_target_group.arn + } +} + +//! ECS Task Definition +resource "aws_ecs_task_definition" "hfsubset_ecs_task_def" { + family = "hydrofabric-hfsubset-task-definition" + container_definitions = jsonencode([{ + name = "hydrofabric-hfsubset-container" + image = aws_ecr_repository.hfsubset_ecr.repository_url + cpu = 1024 // 1024 CPU units ~ 1 vCPU + memory = 4096 + portMappings = [ + { + name = "hydrofabric-hfsubset-http" + containerPort = 8080 + hostPort = 8080 + protocol = "tcp" + appProtocol = "http" + } + ] + essential = true + environment = [] + mountPoints = [] + volumesFrom = [] + logConfiguration = { + logDriver = "awslogs" + options = { + "awslogs-create-group" = "true" + "awslogs-group" = "/ecs/hydrofabric-hfsubset" + "awslogs-region" = "us-west-1" + } + } + }]) + + task_role_arn = aws_iam_role.hfsubset_ecs_task_role.arn + execution_role_arn = aws_iam_role.hfsubset_ecs_exec_role.arn + network_mode = "awsvpc" + requires_compatibilities = ["FARGATE"] + cpu = "1024" + memory = "4096" + + runtime_platform { + operating_system_family = "LINUX" + cpu_architecture = "X86_64" + } +} + +//! ECS Service +resource "aws_ecs_service" "hfsubset_ecs_service" { + name = "hydrofabric-hfsubset-ecs-service" + cluster = aws_ecs_cluster.hfsubset_ecs.id + task_definition = aws_ecs_task_definition.hfsubset_ecs_task_def.arn + desired_count = 1 + depends_on = [aws_lb_listener.hfsubset_alb_listener] + launch_type = "FARGATE" + + load_balancer { + target_group_arn = aws_lb_target_group.hfsubset_alb_target_group.arn + container_name = "hydrofabric-hfsubset-container" + container_port = 8080 + } + + network_configuration { + subnets = data.aws_subnets.lynker_spatial_service_subnets.ids + assign_public_ip = true + security_groups = [aws_security_group.hfsubset_ecs_task_sg.id] + } +} +