From 8f34db64873195de7252bb617b0b8fa593d80572 Mon Sep 17 00:00:00 2001 From: Aaron Siddhartha Mondal Date: Sun, 10 Nov 2024 20:01:04 +0100 Subject: [PATCH] [Breaking] Make NativeLink configurable via K8s-compatible yaml --- Cargo.lock | 711 ++++++- Cargo.toml | 4 + .../docker-compose/local-storage-cas.json | 16 +- .../docker-compose/scheduler.json | 20 +- .../docker-compose/worker.json | 24 +- flake.nix | 48 + kubernetes/configmaps/cas.json | 27 +- kubernetes/configmaps/scheduler.json | 18 +- kubernetes/configmaps/worker.json | 21 +- nativelink-config/BUILD.bazel | 2 + nativelink-config/Cargo.toml | 5 + nativelink-config/examples/basic_cas.json | 30 +- .../examples/filesystem_cas.json | 50 +- nativelink-config/examples/redis.json | 37 +- .../s3_backend_with_local_fast_cas.json | 56 +- nativelink-config/src/cas_server.rs | 94 +- nativelink-config/src/lib.rs | 114 ++ nativelink-config/src/schedulers.rs | 93 +- nativelink-config/src/stores.rs | 243 ++- nativelink-controller/BUILD.bazel | 2 + nativelink-controller/Cargo.toml | 33 + nativelink-controller/cas.yaml | 92 + nativelink-controller/deploy.yaml | 69 + nativelink-controller/nativelink-crd.yaml | 1738 +++++++++++++++++ nativelink-controller/scheduler.yaml | 77 + nativelink-controller/src/bin/generate-crd.rs | 30 + .../src/bin/nativelink-controller.rs | 291 +++ nativelink-controller/src/controller.rs | 83 + nativelink-controller/src/lib.rs | 15 + nativelink-controller/worker.yaml | 75 + .../src/default_scheduler_factory.rs | 50 +- nativelink-scheduler/src/grpc_scheduler.rs | 4 +- .../src/property_modifier_scheduler.rs | 2 +- nativelink-scheduler/src/simple_scheduler.rs | 4 +- .../tests/cache_lookup_scheduler_test.rs | 2 +- .../tests/property_modifier_scheduler_test.rs | 10 +- .../tests/simple_scheduler_test.rs | 42 +- nativelink-service/tests/ac_server_test.rs | 9 +- nativelink-service/tests/bep_server_test.rs | 5 +- .../tests/bytestream_server_test.rs | 5 +- nativelink-service/tests/cas_server_test.rs | 5 +- nativelink-store/src/compression_store.rs | 2 +- nativelink-store/src/dedup_store.rs | 2 +- nativelink-store/src/default_store_factory.rs | 78 +- nativelink-store/src/existence_cache_store.rs | 2 +- nativelink-store/src/fast_slow_store.rs | 2 +- nativelink-store/src/filesystem_store.rs | 4 +- nativelink-store/src/grpc_store.rs | 4 +- nativelink-store/src/memory_store.rs | 2 +- nativelink-store/src/redis_store.rs | 2 +- nativelink-store/src/ref_store.rs | 2 +- nativelink-store/src/s3_store.rs | 6 +- nativelink-store/src/shard_store.rs | 2 +- .../src/size_partitioning_store.rs | 2 +- nativelink-store/src/verify_store.rs | 2 +- nativelink-store/tests/ac_utils_test.rs | 2 +- .../tests/completeness_checking_store_test.rs | 6 +- .../tests/compression_store_test.rs | 77 +- nativelink-store/tests/dedup_store_test.rs | 73 +- .../tests/existence_store_test.rs | 28 +- .../tests/fast_slow_store_test.rs | 54 +- .../tests/filesystem_store_test.rs | 143 +- nativelink-store/tests/memory_store_test.rs | 16 +- nativelink-store/tests/ref_store_test.rs | 10 +- nativelink-store/tests/s3_store_test.rs | 24 +- nativelink-store/tests/shard_store_test.rs | 12 +- .../tests/size_partitioning_store_test.rs | 15 +- nativelink-store/tests/verify_store_test.rs | 81 +- .../src/running_actions_manager.rs | 6 +- nativelink-worker/tests/local_worker_test.rs | 33 +- .../tests/running_actions_manager_test.rs | 21 +- src/bin/nativelink.rs | 20 +- 72 files changed, 4160 insertions(+), 829 deletions(-) create mode 100644 nativelink-controller/BUILD.bazel create mode 100644 nativelink-controller/Cargo.toml create mode 100644 nativelink-controller/cas.yaml create mode 100644 nativelink-controller/deploy.yaml create mode 100644 nativelink-controller/nativelink-crd.yaml create mode 100644 nativelink-controller/scheduler.yaml create mode 100644 nativelink-controller/src/bin/generate-crd.rs create mode 100644 nativelink-controller/src/bin/nativelink-controller.rs create mode 100644 nativelink-controller/src/controller.rs create mode 100644 nativelink-controller/src/lib.rs create mode 100644 nativelink-controller/worker.yaml diff --git a/Cargo.lock b/Cargo.lock index c9609016a..377a54f21 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -24,6 +24,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e89da841a80418a9b391ebaea17f5c112ffaaa96f621d2c285b5174da76b9011" dependencies = [ "cfg-if", + "getrandom", "once_cell", "version_check", "zerocopy", @@ -44,6 +45,21 @@ version = "0.2.18" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5c6cb57a04249c6480766f7f7cef5467412af1490f8d1e243141daddada3264f" +[[package]] +name = "android-tzdata" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e999941b234f3131b00bc13c22d06e8c5ff726d1b6318ac7eb276997bbb4fef0" + +[[package]] +name = "android_system_properties" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311" +dependencies = [ + "libc", +] + [[package]] name = "anstream" version = "0.6.15" @@ -95,9 +111,9 @@ dependencies = [ [[package]] name = "anyhow" -version = "1.0.89" +version = "1.0.93" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "86fdf8605db99b54d3cd748a44c6d04df638eb5dafb219b135d0149bd0db01f6" +checksum = "4c95c10ba0b00a02636238b814946408b1322d5ac4760326e6fb8ec956d85775" [[package]] name = "arc-swap" @@ -128,6 +144,18 @@ dependencies = [ "serde_json", ] +[[package]] +name = "async-broadcast" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "20cd0e2e25ea8e5f7e9df04578dc6cf5c83577fd09b1a46aaf5c85e1c33f2a7e" +dependencies = [ + "event-listener", + "event-listener-strategy", + "futures-core", + "pin-project-lite", +] + [[package]] name = "async-lock" version = "3.4.0" @@ -158,7 +186,7 @@ checksum = "c7c24de15d275a1ecfd47a380fb4d5ec9bfe0933f309ed5e705b775596a3574d" dependencies = [ "proc-macro2", "quote", - "syn 2.0.79", + "syn 2.0.87", ] [[package]] @@ -169,7 +197,7 @@ checksum = "721cae7de5c34fbb2acd27e21e6d2cf7b886dce0c27388d46c4e6c47ea4318dd" dependencies = [ "proc-macro2", "quote", - "syn 2.0.79", + "syn 2.0.87", ] [[package]] @@ -414,7 +442,7 @@ dependencies = [ "regex-lite", "roxmltree", "serde_json", - "thiserror", + "thiserror 1.0.64", ] [[package]] @@ -446,7 +474,7 @@ dependencies = [ "http-body 1.0.1", "httparse", "hyper 0.14.30", - "hyper-rustls", + "hyper-rustls 0.24.2", "indexmap 2.6.0", "once_cell", "pin-project-lite", @@ -572,6 +600,17 @@ dependencies = [ "tower-service", ] +[[package]] +name = "backoff" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b62ddb9cb1ec0a098ad4bbf9344d0713fa193ae1a80af55febcff2627b6a00c1" +dependencies = [ + "getrandom", + "instant", + "rand", +] + [[package]] name = "backtrace" version = "0.3.74" @@ -618,6 +657,12 @@ dependencies = [ "serde", ] +[[package]] +name = "bitflags" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" + [[package]] name = "bitflags" version = "2.6.0" @@ -700,6 +745,21 @@ version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" +[[package]] +name = "chrono" +version = "0.4.38" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a21f936df1771bf62b77f047b726c4625ff2e8aa607c01ec06e5a05bd8463401" +dependencies = [ + "android-tzdata", + "iana-time-zone", + "js-sys", + "num-traits", + "serde", + "wasm-bindgen", + "windows-targets", +] + [[package]] name = "clap" version = "4.5.20" @@ -731,7 +791,7 @@ dependencies = [ "heck", "proc-macro2", "quote", - "syn 2.0.79", + "syn 2.0.87", ] [[package]] @@ -907,6 +967,41 @@ dependencies = [ "typenum", ] +[[package]] +name = "darling" +version = "0.20.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6f63b86c8a8826a49b8c21f08a2d07338eec8d900540f8630dc76284be802989" +dependencies = [ + "darling_core", + "darling_macro", +] + +[[package]] +name = "darling_core" +version = "0.20.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "95133861a8032aaea082871032f5815eb9e98cef03fa916ab4500513994df9e5" +dependencies = [ + "fnv", + "ident_case", + "proc-macro2", + "quote", + "strsim", + "syn 2.0.87", +] + +[[package]] +name = "darling_macro" +version = "0.20.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d336a2a514f6ccccaa3e09b02d41d35330c07ddf03a62165fcec10bb561c7806" +dependencies = [ + "darling_core", + "quote", + "syn 2.0.87", +] + [[package]] name = "deranged" version = "0.3.11" @@ -933,12 +1028,50 @@ dependencies = [ "subtle", ] +[[package]] +name = "dyn-clone" +version = "1.0.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0d6ef0072f8a535281e4876be788938b528e9a1d43900b82c2569af7da799125" + +[[package]] +name = "educe" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d7bc049e1bd8cdeb31b68bbd586a9464ecf9f3944af3958a7a9d0f8b9799417" +dependencies = [ + "enum-ordinalize", + "proc-macro2", + "quote", + "syn 2.0.87", +] + [[package]] name = "either" version = "1.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "60b1af1c220855b6ceac025d3f6ecdd2b7c4894bfe9cd9bda4fbb4bc7c0d4cf0" +[[package]] +name = "enum-ordinalize" +version = "4.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fea0dcfa4e54eeb516fe454635a95753ddd39acda650ce703031c6973e315dd5" +dependencies = [ + "enum-ordinalize-derive", +] + +[[package]] +name = "enum-ordinalize-derive" +version = "4.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0d28318a75d4aead5c4db25382e8ef717932d0346600cacae6357eb5941bc5ff" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.87", +] + [[package]] name = "equivalent" version = "1.0.1" @@ -1031,6 +1164,15 @@ dependencies = [ "num-traits", ] +[[package]] +name = "fluent-uri" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "17c704e9dbe1ddd863da1e6ff3567795087b1eb201ce80d8fa81162e1516500d" +dependencies = [ + "bitflags 1.3.2", +] + [[package]] name = "fnv" version = "1.0.7" @@ -1097,7 +1239,7 @@ checksum = "1458c6e22d36d61507034d5afecc64f105c1d39712b7ac6ec3b352c423f715cc" dependencies = [ "proc-macro2", "quote", - "syn 2.0.79", + "syn 2.0.87", ] [[package]] @@ -1156,7 +1298,7 @@ checksum = "162ee34ebcb7c64a8abebc059ce0fee27c2262618d7b60ed8faf72fef13c3650" dependencies = [ "proc-macro2", "quote", - "syn 2.0.79", + "syn 2.0.87", ] [[package]] @@ -1266,6 +1408,16 @@ version = "0.12.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" +[[package]] +name = "hashbrown" +version = "0.14.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1" +dependencies = [ + "ahash", + "allocator-api2", +] + [[package]] name = "hashbrown" version = "0.15.0" @@ -1290,6 +1442,30 @@ dependencies = [ "num-traits", ] +[[package]] +name = "headers" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "322106e6bd0cba2d5ead589ddb8150a13d7c4217cf80d7c4f682ca994ccc6aa9" +dependencies = [ + "base64 0.21.7", + "bytes", + "headers-core", + "http 1.1.0", + "httpdate", + "mime", + "sha1", +] + +[[package]] +name = "headers-core" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "54b4a22553d4242c49fddb9ba998a99962b5cc6f22cb5a3482bec22522403ce4" +dependencies = [ + "http 1.1.0", +] + [[package]] name = "heck" version = "0.5.0" @@ -1317,6 +1493,15 @@ dependencies = [ "digest", ] +[[package]] +name = "home" +version = "0.5.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3d1354bf6b7235cb4a0576c2619fd4ed18183f689b12b006a0ee7329eeff9a5" +dependencies = [ + "windows-sys 0.52.0", +] + [[package]] name = "http" version = "0.2.12" @@ -1436,6 +1621,26 @@ dependencies = [ "want", ] +[[package]] +name = "hyper-http-proxy" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5d06dbdfbacf34d996c6fb540a71a684a7aae9056c71951163af8a8a4c07b9a4" +dependencies = [ + "bytes", + "futures-util", + "headers", + "http 1.1.0", + "hyper 1.4.1", + "hyper-rustls 0.27.3", + "hyper-util", + "pin-project-lite", + "rustls-native-certs 0.7.3", + "tokio", + "tokio-rustls 0.26.0", + "tower-service", +] + [[package]] name = "hyper-rustls" version = "0.24.2" @@ -1453,6 +1658,25 @@ dependencies = [ "webpki-roots", ] +[[package]] +name = "hyper-rustls" +version = "0.27.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08afdbb5c31130e3034af566421053ab03787c640246a446327f550d11bcb333" +dependencies = [ + "futures-util", + "http 1.1.0", + "hyper 1.4.1", + "hyper-util", + "log", + "rustls 0.23.14", + "rustls-native-certs 0.8.0", + "rustls-pki-types", + "tokio", + "tokio-rustls 0.26.0", + "tower-service", +] + [[package]] name = "hyper-timeout" version = "0.5.1" @@ -1485,6 +1709,35 @@ dependencies = [ "tracing", ] +[[package]] +name = "iana-time-zone" +version = "0.1.61" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "235e081f3925a06703c2d0117ea8b91f042756fd6e7a6e5d901e8ca1a996b220" +dependencies = [ + "android_system_properties", + "core-foundation-sys", + "iana-time-zone-haiku", + "js-sys", + "wasm-bindgen", + "windows-core", +] + +[[package]] +name = "iana-time-zone-haiku" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f31827a206f56af32e590ba56d5d2d085f558508192593743f16b2306495269f" +dependencies = [ + "cc", +] + +[[package]] +name = "ident_case" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39" + [[package]] name = "idna" version = "0.5.0" @@ -1516,6 +1769,15 @@ dependencies = [ "serde", ] +[[package]] +name = "instant" +version = "0.1.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e0242819d153cba4b4b05a5a8f2a7e9bbf97b6055b2a002b395c96b5ff3c0222" +dependencies = [ + "cfg-if", +] + [[package]] name = "is_terminal_polyfill" version = "1.70.1" @@ -1546,6 +1808,168 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "json-patch" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b1fb8864823fad91877e6caea0baca82e49e8db50f8e5c9f9a453e27d3330fc" +dependencies = [ + "jsonptr", + "serde", + "serde_json", + "thiserror 1.0.64", +] + +[[package]] +name = "jsonpath-rust" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "19d8fe85bd70ff715f31ce8c739194b423d79811a19602115d611a3ec85d6200" +dependencies = [ + "lazy_static", + "once_cell", + "pest", + "pest_derive", + "regex", + "serde_json", + "thiserror 1.0.64", +] + +[[package]] +name = "jsonptr" +version = "0.4.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1c6e529149475ca0b2820835d3dce8fcc41c6b943ca608d32f35b449255e4627" +dependencies = [ + "fluent-uri", + "serde", + "serde_json", +] + +[[package]] +name = "k8s-openapi" +version = "0.23.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c8847402328d8301354c94d605481f25a6bdc1ed65471fd96af8eca71141b13" +dependencies = [ + "base64 0.22.1", + "chrono", + "schemars", + "serde", + "serde-value", + "serde_json", +] + +[[package]] +name = "kube" +version = "0.96.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "efffeb3df0bd4ef3e5d65044573499c0e4889b988070b08c50b25b1329289a1f" +dependencies = [ + "k8s-openapi", + "kube-client", + "kube-core", + "kube-derive", + "kube-runtime", +] + +[[package]] +name = "kube-client" +version = "0.96.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8bf471ece8ff8d24735ce78dac4d091e9fcb8d74811aeb6b75de4d1c3f5de0f1" +dependencies = [ + "base64 0.22.1", + "bytes", + "chrono", + "either", + "futures", + "home", + "http 1.1.0", + "http-body 1.0.1", + "http-body-util", + "hyper 1.4.1", + "hyper-http-proxy", + "hyper-rustls 0.27.3", + "hyper-timeout", + "hyper-util", + "jsonpath-rust", + "k8s-openapi", + "kube-core", + "pem", + "rustls 0.23.14", + "rustls-pemfile 2.2.0", + "secrecy", + "serde", + "serde_json", + "serde_yaml", + "thiserror 1.0.64", + "tokio", + "tokio-util", + "tower 0.5.1", + "tower-http", + "tracing", +] + +[[package]] +name = "kube-core" +version = "0.96.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f42346d30bb34d1d7adc5c549b691bce7aa3a1e60254e68fab7e2d7b26fe3d77" +dependencies = [ + "chrono", + "form_urlencoded", + "http 1.1.0", + "json-patch", + "k8s-openapi", + "schemars", + "serde", + "serde-value", + "serde_json", + "thiserror 1.0.64", +] + +[[package]] +name = "kube-derive" +version = "0.96.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f9364e04cc5e0482136c6ee8b7fb7551812da25802249f35b3def7aaa31e82ad" +dependencies = [ + "darling", + "proc-macro2", + "quote", + "serde_json", + "syn 2.0.87", +] + +[[package]] +name = "kube-runtime" +version = "0.96.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d3fbf1f6ffa98e65f1d2a9a69338bb60605d46be7edf00237784b89e62c9bd44" +dependencies = [ + "ahash", + "async-broadcast", + "async-stream", + "async-trait", + "backoff", + "educe", + "futures", + "hashbrown 0.14.5", + "json-patch", + "jsonptr", + "k8s-openapi", + "kube-client", + "parking_lot", + "pin-project", + "serde", + "serde_json", + "thiserror 1.0.64", + "tokio", + "tokio-util", + "tracing", +] + [[package]] name = "lazy_static" version = "1.5.0" @@ -1574,11 +1998,21 @@ version = "0.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c0ff37bd590ca25063e35af745c343cb7a0271906fb7b37e4813e8f79f00268d" dependencies = [ - "bitflags", + "bitflags 2.6.0", "libc", "redox_syscall", ] +[[package]] +name = "libyml" +version = "0.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3302702afa434ffa30847a83305f0a69d6abd74293b6554c18ec85c7ef30c980" +dependencies = [ + "anyhow", + "version_check", +] + [[package]] name = "linux-raw-sys" version = "0.4.14" @@ -1738,6 +2172,7 @@ dependencies = [ "hyper-util", "mimalloc", "nativelink-config", + "nativelink-controller", "nativelink-error", "nativelink-metric", "nativelink-metric-collector", @@ -1770,11 +2205,33 @@ dependencies = [ "byte-unit", "humantime", "pretty_assertions", + "schemars", "serde", + "serde_json", "serde_json5", "shellexpand", ] +[[package]] +name = "nativelink-controller" +version = "0.5.3" +dependencies = [ + "anyhow", + "chrono", + "futures", + "k8s-openapi", + "kube", + "nativelink-config", + "schemars", + "serde", + "serde_json", + "serde_yml", + "thiserror 2.0.0", + "tokio", + "tracing", + "tracing-subscriber", +] + [[package]] name = "nativelink-error" version = "0.5.3" @@ -1796,7 +2253,7 @@ version = "0.5.3" dependencies = [ "proc-macro2", "quote", - "syn 2.0.79", + "syn 2.0.87", ] [[package]] @@ -1833,7 +2290,7 @@ version = "0.5.3" dependencies = [ "proc-macro2", "quote", - "syn 2.0.79", + "syn 2.0.87", ] [[package]] @@ -1939,7 +2396,7 @@ dependencies = [ "http 1.1.0", "http-body 1.0.1", "hyper 0.14.30", - "hyper-rustls", + "hyper-rustls 0.24.2", "lz4_flex", "memory-stats", "mock_instant", @@ -1974,7 +2431,7 @@ version = "0.5.3" dependencies = [ "async-lock", "async-trait", - "bitflags", + "bitflags 2.6.0", "blake3", "bytes", "console-subscriber", @@ -2120,7 +2577,7 @@ dependencies = [ "js-sys", "once_cell", "pin-project-lite", - "thiserror", + "thiserror 1.0.64", ] [[package]] @@ -2149,7 +2606,16 @@ dependencies = [ "glob", "once_cell", "opentelemetry", - "thiserror", + "thiserror 1.0.64", +] + +[[package]] +name = "ordered-float" +version = "2.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68f19d67e5a2795c94e73e0bb1cc1a7edeb2e28efd39e2e1c9b7a40c1108b11c" +dependencies = [ + "num-traits", ] [[package]] @@ -2199,7 +2665,17 @@ version = "0.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "31f2f4539bffe53fc4b4da301df49d114b845b077bd5727b7fe2bd9d8df2ae68" dependencies = [ - "bitflags", + "bitflags 2.6.0", +] + +[[package]] +name = "pem" +version = "3.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e459365e590736a54c3fa561947c84837534b8e9af6fc5bf781307e82658fae" +dependencies = [ + "base64 0.22.1", + "serde", ] [[package]] @@ -2215,7 +2691,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fdbef9d1d47087a895abd220ed25eb4ad973a5e26f6a4367b038c25e28dfc2d9" dependencies = [ "memchr", - "thiserror", + "thiserror 1.0.64", "ucd-trie", ] @@ -2239,7 +2715,7 @@ dependencies = [ "pest_meta", "proc-macro2", "quote", - "syn 2.0.79", + "syn 2.0.87", ] [[package]] @@ -2280,7 +2756,7 @@ checksum = "a4502d8515ca9f32f1fb543d987f63d95a14934883db45bdb48060b6b69257f8" dependencies = [ "proc-macro2", "quote", - "syn 2.0.79", + "syn 2.0.87", ] [[package]] @@ -2327,7 +2803,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "479cf940fbbb3426c32c5d5176f62ad57549a0bb84773423ba8be9d089f5faba" dependencies = [ "proc-macro2", - "syn 2.0.79", + "syn 2.0.87", ] [[package]] @@ -2375,7 +2851,7 @@ dependencies = [ "memchr", "parking_lot", "protobuf", - "thiserror", + "thiserror 1.0.64", ] [[package]] @@ -2405,7 +2881,7 @@ dependencies = [ "prost", "prost-types", "regex", - "syn 2.0.79", + "syn 2.0.87", "tempfile", ] @@ -2419,7 +2895,7 @@ dependencies = [ "itertools", "proc-macro2", "quote", - "syn 2.0.79", + "syn 2.0.87", ] [[package]] @@ -2496,7 +2972,7 @@ version = "0.5.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9b6dfecf2c74bce2466cabf93f6664d6998a69eb21e39f4207930065b27b771f" dependencies = [ - "bitflags", + "bitflags 2.6.0", ] [[package]] @@ -2610,7 +3086,7 @@ version = "0.38.37" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8acb788b847c24f28525660c4d7758620a7210875711f79e7f663cc152726811" dependencies = [ - "bitflags", + "bitflags 2.6.0", "errno", "libc", "linux-raw-sys", @@ -2669,6 +3145,19 @@ dependencies = [ "security-framework", ] +[[package]] +name = "rustls-native-certs" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fcaf18a4f2be7326cd874a5fa579fae794320a0f388d365dca7e480e55f83f8a" +dependencies = [ + "openssl-probe", + "rustls-pemfile 2.2.0", + "rustls-pki-types", + "schannel", + "security-framework", +] + [[package]] name = "rustls-pemfile" version = "1.0.4" @@ -2744,6 +3233,30 @@ dependencies = [ "windows-sys 0.59.0", ] +[[package]] +name = "schemars" +version = "0.8.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09c024468a378b7e36765cd36702b7a90cc3cba11654f6685c8f233408e89e92" +dependencies = [ + "dyn-clone", + "schemars_derive", + "serde", + "serde_json", +] + +[[package]] +name = "schemars_derive" +version = "0.8.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b1eee588578aff73f856ab961cd2f79e36bc45d7ded33a7562adba4667aecc0e" +dependencies = [ + "proc-macro2", + "quote", + "serde_derive_internals", + "syn 2.0.87", +] + [[package]] name = "scopeguard" version = "1.2.0" @@ -2766,13 +3279,22 @@ version = "3.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "60a7b59a5d9b0099720b417b6325d91a52cbf5b3dcb5041d864be53eefa58abc" +[[package]] +name = "secrecy" +version = "0.10.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e891af845473308773346dc847b2c23ee78fe442e0472ac50e22a18a93d3ae5a" +dependencies = [ + "zeroize", +] + [[package]] name = "security-framework" version = "2.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "897b2245f0b511c87893af39b033e5ca9cce68824c4d7e7630b5a1d339658d02" dependencies = [ - "bitflags", + "bitflags 2.6.0", "core-foundation", "core-foundation-sys", "libc", @@ -2804,6 +3326,16 @@ dependencies = [ "serde_derive", ] +[[package]] +name = "serde-value" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f3a1a3341211875ef120e117ea7fd5228530ae7e7036a779fdc9117be6b3282c" +dependencies = [ + "ordered-float", + "serde", +] + [[package]] name = "serde_derive" version = "1.0.210" @@ -2812,14 +3344,25 @@ checksum = "243902eda00fad750862fc144cea25caca5e20d615af0a81bee94ca738f1df1f" dependencies = [ "proc-macro2", "quote", - "syn 2.0.79", + "syn 2.0.87", +] + +[[package]] +name = "serde_derive_internals" +version = "0.29.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "18d26a20a969b9e3fdf2fc2d9f21eda6c40e2de84c9408bb5d3b05d499aae711" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.87", ] [[package]] name = "serde_json" -version = "1.0.128" +version = "1.0.132" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6ff5456707a1de34e7e37f2a6fd3d3f808c318259cbd01ab6377795054b483d8" +checksum = "d726bfaff4b320266d395898905d0eba0345aae23b54aee3a737e260fd46db03" dependencies = [ "indexmap 2.6.0", "itoa", @@ -2839,6 +3382,34 @@ dependencies = [ "serde", ] +[[package]] +name = "serde_yaml" +version = "0.9.34+deprecated" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6a8b1a1a2ebf674015cc02edccce75287f1a0130d394307b36743c2f5d504b47" +dependencies = [ + "indexmap 2.6.0", + "itoa", + "ryu", + "serde", + "unsafe-libyaml", +] + +[[package]] +name = "serde_yml" +version = "0.0.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "59e2dd588bf1597a252c3b920e0143eb99b0f76e4e082f4c92ce34fbc9e71ddd" +dependencies = [ + "indexmap 2.6.0", + "itoa", + "libyml", + "memchr", + "ryu", + "serde", + "version_check", +] + [[package]] name = "serial_test" version = "3.1.1" @@ -2860,7 +3431,7 @@ checksum = "82fe9db325bcef1fbcde82e078a5cc4efdf787e96b3b9cf45b50b529f2083d67" dependencies = [ "proc-macro2", "quote", - "syn 2.0.79", + "syn 2.0.87", ] [[package]] @@ -2988,9 +3559,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.79" +version = "2.0.87" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "89132cd0bf050864e1d38dc3bbc07a0eb8e7530af26344d3d2bbbef83499f590" +checksum = "25aa4ce346d03a6dcd68dd8b4010bcb74e54e62c90c573f394c46eae99aba32d" dependencies = [ "proc-macro2", "quote", @@ -3028,7 +3599,16 @@ version = "1.0.64" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d50af8abc119fb8bb6dbabcfa89656f46f84aa0ac7688088608076ad2b459a84" dependencies = [ - "thiserror-impl", + "thiserror-impl 1.0.64", +] + +[[package]] +name = "thiserror" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "15291287e9bff1bc6f9ff3409ed9af665bec7a5fc8ac079ea96be07bca0e2668" +dependencies = [ + "thiserror-impl 2.0.0", ] [[package]] @@ -3039,7 +3619,18 @@ checksum = "08904e7672f5eb876eaaf87e0ce17857500934f4981c4a0ab2b4aa98baac7fc3" dependencies = [ "proc-macro2", "quote", - "syn 2.0.79", + "syn 2.0.87", +] + +[[package]] +name = "thiserror-impl" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22efd00f33f93fa62848a7cab956c3d38c8d43095efda1decfc2b3a5dc0b8972" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.87", ] [[package]] @@ -3123,7 +3714,7 @@ checksum = "693d596312e88961bc67d7f1f97af8a70227d9f90c31bba5806eec004978d752" dependencies = [ "proc-macro2", "quote", - "syn 2.0.79", + "syn 2.0.87", ] [[package]] @@ -3168,6 +3759,7 @@ dependencies = [ "futures-core", "futures-sink", "pin-project-lite", + "slab", "tokio", ] @@ -3215,7 +3807,7 @@ dependencies = [ "prost-build", "prost-types", "quote", - "syn 2.0.79", + "syn 2.0.87", ] [[package]] @@ -3248,8 +3840,29 @@ dependencies = [ "futures-util", "pin-project-lite", "sync_wrapper 0.1.2", + "tokio", + "tokio-util", "tower-layer", "tower-service", + "tracing", +] + +[[package]] +name = "tower-http" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8437150ab6bbc8c5f0f519e3d5ed4aa883a83dd4cdd3d1b21f9482936046cb97" +dependencies = [ + "base64 0.22.1", + "bitflags 2.6.0", + "bytes", + "http 1.1.0", + "http-body 1.0.1", + "mime", + "pin-project-lite", + "tower-layer", + "tower-service", + "tracing", ] [[package]] @@ -3270,6 +3883,7 @@ version = "0.1.40" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c3523ab5a71916ccf420eebdf5521fcef02141234bbc0b8a49f2fdc4544364ef" dependencies = [ + "log", "pin-project-lite", "tracing-attributes", "tracing-core", @@ -3283,7 +3897,7 @@ checksum = "34704c8d6ebcbc939824180af020566b01a7c01f80641264eba0999f6c2b6be7" dependencies = [ "proc-macro2", "quote", - "syn 2.0.79", + "syn 2.0.87", ] [[package]] @@ -3383,6 +3997,12 @@ version = "0.2.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ebc1c04c71510c7f702b52b7c350734c9ff1295c464a03335b00bb84fc54f853" +[[package]] +name = "unsafe-libyaml" +version = "0.2.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "673aac59facbab8a9007c7f6108d11f63b603f7cabff99fabf650fea5c32b861" + [[package]] name = "untrusted" version = "0.9.0" @@ -3483,7 +4103,7 @@ dependencies = [ "once_cell", "proc-macro2", "quote", - "syn 2.0.79", + "syn 2.0.87", "wasm-bindgen-shared", ] @@ -3505,7 +4125,7 @@ checksum = "4c74f6e152a76a2ad448e223b0fc0b6b5747649c3d769cc6bf45737bf97d0ed6" dependencies = [ "proc-macro2", "quote", - "syn 2.0.79", + "syn 2.0.87", "wasm-bindgen-backend", "wasm-bindgen-shared", ] @@ -3544,6 +4164,15 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" +[[package]] +name = "windows-core" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "33ab640c8d7e35bf8ba19b884ba838ceb4fba93a4e8c65a9059d08afcfc683d9" +dependencies = [ + "windows-targets", +] + [[package]] name = "windows-sys" version = "0.52.0" @@ -3656,7 +4285,7 @@ checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e" dependencies = [ "proc-macro2", "quote", - "syn 2.0.79", + "syn 2.0.87", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index 33fe54582..b41ab4759 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -35,10 +35,14 @@ enable_tokio_console = [ nix = [ "nativelink-worker/nix" ] +crd = [ + "nativelink-config/crd" +] [dependencies] nativelink-error = { path = "nativelink-error" } nativelink-config = { path = "nativelink-config" } +nativelink-controller = { path = "nativelink-controller" } nativelink-scheduler = { path = "nativelink-scheduler" } nativelink-service = { path = "nativelink-service" } nativelink-store = { path = "nativelink-store" } diff --git a/deployment-examples/docker-compose/local-storage-cas.json b/deployment-examples/docker-compose/local-storage-cas.json index e7b622043..fc08dab03 100644 --- a/deployment-examples/docker-compose/local-storage-cas.json +++ b/deployment-examples/docker-compose/local-storage-cas.json @@ -4,31 +4,31 @@ // so objects are compressed, deduplicated and uses some in-memory // optimizations for certain hot paths. { - "stores": { - "CAS_MAIN_STORE": { + "stores": [ + { + "name": "CAS_MAIN_STORE", "compression": { "compression_algorithm": { "lz4": {} }, "backend": { + "name": "fs-cas", "filesystem": { "content_path": "~/.cache/nativelink/content_path-cas", "temp_path": "~/.cache/nativelink/tmp_path-cas", "eviction_policy": { - // 10gb. - "max_bytes": 10000000000, + "max_bytes": "10Gb", } } } } - }, - "AC_MAIN_STORE": { + }, { + "name": "AC_MAIN_STORE", "filesystem": { "content_path": "~/.cache/nativelink/content_path-ac", "temp_path": "~/.cache/nativelink/tmp_path-ac", "eviction_policy": { - // 500mb. - "max_bytes": 500000000, + "max_bytes": "500Mb" } } } diff --git a/deployment-examples/docker-compose/scheduler.json b/deployment-examples/docker-compose/scheduler.json index 4af051738..39778e805 100644 --- a/deployment-examples/docker-compose/scheduler.json +++ b/deployment-examples/docker-compose/scheduler.json @@ -1,7 +1,8 @@ { - "stores": { - "GRPC_LOCAL_STORE": { + "stores": [ + { // Note: This file is used to test GRPC store. + "name": "GRPC_LOCAL_STORE", "grpc": { "instance_name": "main", "endpoints": [ @@ -9,9 +10,9 @@ ], "store_type": "cas" } - }, - "GRPC_LOCAL_AC_STORE": { + }, { // Note: This file is used to test GRPC store. + "name": "GRPC_LOCAL_AC_STORE", "grpc": { "instance_name": "main", "endpoints": [ @@ -20,10 +21,11 @@ "store_type": "ac" } } - }, - "schedulers": { - "MAIN_SCHEDULER": { - "simple": { + ], + "schedulers": [ + { + "name": "MAIN_SCHEDULER", + "siple": { "supported_platform_properties": { "cpu_count": "minimum", "OSFamily": "priority", @@ -31,7 +33,7 @@ } } } - }, + ], "servers": [{ "listener": { "http": { diff --git a/deployment-examples/docker-compose/worker.json b/deployment-examples/docker-compose/worker.json index 186722e01..a1e8631be 100644 --- a/deployment-examples/docker-compose/worker.json +++ b/deployment-examples/docker-compose/worker.json @@ -1,7 +1,8 @@ { - "stores": { - "GRPC_LOCAL_STORE": { + "stores": [ + { // Note: This file is used to test GRPC store. + "name": "GRPC_LOCAL_STORE", "grpc": { "instance_name": "main", "endpoints": [ @@ -9,9 +10,9 @@ ], "store_type": "cas" } - }, - "GRPC_LOCAL_AC_STORE": { + }, { // Note: This file is used to test GRPC store. + "name": "GRPC_LOCAL_AC_STORE", "grpc": { "instance_name": "main", "endpoints": [ @@ -19,27 +20,28 @@ ], "store_type": "ac" } - }, - "WORKER_FAST_SLOW_STORE": { - "fast_slow": { + }, { + "name": "WORKER_FAST_SLOW_STORE", + "fastSlow": { "fast": { + "name": "fs-worker", "filesystem": { "content_path": "/root/.cache/nativelink/data-worker-test/content_path-cas", "temp_path": "/root/.cache/nativelink/data-worker-test/tmp_path-cas", "eviction_policy": { - // 10gb. - "max_bytes": 10000000000, + "max_bytes": "10Gb", } } }, "slow": { - "ref_store": { + "name": "ref-worker", + "ref": { "name": "GRPC_LOCAL_STORE", } } } } - }, + ], "workers": [{ "local": { "worker_api_endpoint": { diff --git a/flake.nix b/flake.nix index f9f70a972..491f8bb86 100644 --- a/flake.nix +++ b/flake.nix @@ -203,6 +203,14 @@ cargoArtifacts = cargoArtifactsFor p; }); + nativelinkControllerFor = p: + (craneLibFor p).buildPackage ((commonArgsFor p) + // { + pname = "nativelink-controller"; + cargoExtraArgs = "--package=nativelink-controller"; + cargoArtifacts = cargoArtifactsFor p; + }); + nativeTargetPkgs = if pkgs.system == "x86_64-linux" then pkgs.pkgsCross.musl64 @@ -225,6 +233,14 @@ cargoExtraArgs = "--features enable_tokio_console"; }); + # TODO(aaronmondal): Enable. + # nativelink-controller = nativelinkControllerFor nativeTargetPkgs; + + # These two can be built by all build platforms. This is not true for + # darwin targets which are only buildable via native compilation. + nativelink-controller-aarch64-linux = nativelinkControllerFor pkgs.pkgsCross.aarch64-multiplatform-musl; + nativelink-controller-x86_64-linux = nativelinkControllerFor pkgs.pkgsCross.musl64; + publish-ghcr = pkgs.callPackage ./tools/publish-ghcr.nix {}; local-image-test = pkgs.callPackage ./tools/local-image-test.nix {}; @@ -279,6 +295,35 @@ }; }; + nativelink-controller-image = let + nativelinkControllerForImage = + if pkgs.stdenv.isx86_64 + then nativelink-controller-x86_64-linux + else nativelink-controller-aarch64-linux; + in + buildImage { + name = "nativelink-controller"; + copyToRoot = [ + (pkgs.buildEnv { + name = "nativelink-buildEnv"; + paths = [nativelinkControllerForImage]; + pathsToLink = ["/bin"]; + }) + ]; + config = { + Entrypoint = [(pkgs.lib.getExe' nativelinkControllerForImage "nativelink-controller")]; + Labels = { + "org.opencontainers.image.description" = "Controller for NativeLink CRDs."; + "org.opencontainers.image.documentation" = "https://github.com/TraceMachina/nativelink"; + "org.opencontainers.image.licenses" = "Apache-2.0"; + "org.opencontainers.image.revision" = "${self.rev or self.dirtyRev or "dirty"}"; + "org.opencontainers.image.source" = "https://github.com/TraceMachina/nativelink"; + "org.opencontainers.image.title" = "NativeLink Controller"; + "org.opencontainers.image.vendor" = "Trace Machina, Inc."; + }; + }; + }; + nativelink-worker-init = pkgs.callPackage ./tools/nativelink-worker-init.nix {inherit buildImage self nativelink-image;}; rbe-autogen = pkgs.callPackage ./local-remote-execution/rbe-autogen.nix { @@ -415,6 +460,7 @@ nativelink-aarch64-linux nativelink-debug nativelink-image + nativelink-controller-image nativelink-is-executable-test nativelink-worker-init nativelink-x86_64-linux @@ -513,6 +559,8 @@ pkgs.fluxcd pkgs.go pkgs.kustomize + pkgs.grpcurl + pkgs.kubectx ## Web pkgs.bun # got patched to the newest version (v.1.1.25) diff --git a/kubernetes/configmaps/cas.json b/kubernetes/configmaps/cas.json index e26b9c1cb..e86e757f1 100644 --- a/kubernetes/configmaps/cas.json +++ b/kubernetes/configmaps/cas.json @@ -2,48 +2,51 @@ // `~/.cache/nativelink`. When this location is mounted as a PersistentVolume // it persists the cache across restarts. { - "stores": { - "CAS_MAIN_STORE": { - "existence_cache": { + "stores": [ + { + "name": "CAS_MAIN_STORE", + "existenceCache": { "backend": { + "name": "compression-store", "compression": { "compression_algorithm": { "lz4": {} }, "backend": { + "name": "fs-cas", "filesystem": { "content_path": "~/.cache/nativelink/content_path-cas", "temp_path": "~/.cache/nativelink/tmp_path-cas", "eviction_policy": { - // 10gb. - "max_bytes": 10000000000, + "max_bytes": "10Gb" } } } } } } - }, - "AC_MAIN_STORE": { - "completeness_checking": { + }, { + "name": "AC_MAIN_STORE", + "completenessChecking": { "backend": { + "name": "fs-ac", "filesystem": { "content_path": "~/.cache/nativelink/content_path-ac", "temp_path": "~/.cache/nativelink/tmp_path-ac", "eviction_policy": { - // 500mb. - "max_bytes": 500000000, + "max_bytes": "500Mb", } } }, "cas_store": { - "ref_store": { + "name": "ref-cas", + "ref": { "name": "CAS_MAIN_STORE" } } } } - }, + ], "servers": [{ "listener": { "http": { diff --git a/kubernetes/configmaps/scheduler.json b/kubernetes/configmaps/scheduler.json index e41060209..941f700ab 100644 --- a/kubernetes/configmaps/scheduler.json +++ b/kubernetes/configmaps/scheduler.json @@ -1,7 +1,8 @@ { - "stores": { - "GRPC_LOCAL_STORE": { + "stores": [ + { // Note: This file is used to test GRPC store. + "name": "GRPC_LOCAL_STORE", "grpc": { "instance_name": "main", "endpoints": [ @@ -9,9 +10,9 @@ ], "store_type": "cas" } - }, - "GRPC_LOCAL_AC_STORE": { + }, { // Note: This file is used to test GRPC store. + "name": "GRPC_LOCAL_AC_STORE", "grpc": { "instance_name": "main", "endpoints": [ @@ -20,11 +21,12 @@ "store_type": "ac" } } - }, - "schedulers": { - "MAIN_SCHEDULER": { + ], + "schedulers": [ + { // TODO(adams): use the right scheduler because reclient doesn't use the cached results? // TODO(adams): max_bytes_per_stream + "name": "MAIN_SCHEDULER", "simple": { "supported_platform_properties": { "cpu_count": "priority", @@ -46,7 +48,7 @@ } } } - }, + ], "servers": [{ "listener": { "http": { diff --git a/kubernetes/configmaps/worker.json b/kubernetes/configmaps/worker.json index 2a3d2911d..1f166891e 100644 --- a/kubernetes/configmaps/worker.json +++ b/kubernetes/configmaps/worker.json @@ -1,7 +1,8 @@ { - "stores": { - "GRPC_LOCAL_STORE": { + "stores": [ + { // Note: This file is used to test GRPC store. + "name": "GRPC_LOCAL_STORE", "grpc": { "instance_name": "main", "endpoints": [ @@ -9,9 +10,9 @@ ], "store_type": "cas" } - }, - "GRPC_LOCAL_AC_STORE": { + }, { // Note: This file is used to test GRPC store. + "name": "GRPC_LOCAL_AC_STORE", "grpc": { "instance_name": "main", "endpoints": [ @@ -19,10 +20,11 @@ ], "store_type": "ac" } - }, - "WORKER_FAST_SLOW_STORE": { - "fast_slow": { + }, { + "name": "WORKER_FAST_SLOW_STORE", + "fastSlow": { "fast": { + "name": "fs-worker", "filesystem": { "content_path": "~/.cache/nativelink/data-worker-test/content_path-cas", "temp_path": "~/.cache/nativelink/data-worker-test/tmp_path-cas", @@ -33,13 +35,14 @@ } }, "slow": { - "ref_store": { + "name": "ref-worker", + "ref": { "name": "GRPC_LOCAL_STORE", } } } } - }, + ], "workers": [{ "local": { "worker_api_endpoint": { diff --git a/nativelink-config/BUILD.bazel b/nativelink-config/BUILD.bazel index 3d36b2998..601bc8885 100644 --- a/nativelink-config/BUILD.bazel +++ b/nativelink-config/BUILD.bazel @@ -22,7 +22,9 @@ rust_library( deps = [ "@crates//:byte-unit", "@crates//:humantime", + "@crates//:schemars", "@crates//:serde", + "@crates//:serde_json", "@crates//:shellexpand", ], ) diff --git a/nativelink-config/Cargo.toml b/nativelink-config/Cargo.toml index c4881854b..85db96930 100644 --- a/nativelink-config/Cargo.toml +++ b/nativelink-config/Cargo.toml @@ -3,12 +3,17 @@ name = "nativelink-config" version = "0.5.3" edition = "2021" +[features] +crd = [] + [dependencies] byte-unit = { version = "5.1.4", default-features = false, features = ["byte"] } humantime = "2.1.0" serde = { version = "1.0.210", default-features = false, features = ["derive"] } +serde_json = "1.0.132" serde_json5 = "0.1.0" shellexpand = { version = "3.1.0", default-features = false, features = ["base-0"] } +schemars = "0.8.21" [dev-dependencies] pretty_assertions = { version = "1.4.1", features = ["std"] } diff --git a/nativelink-config/examples/basic_cas.json b/nativelink-config/examples/basic_cas.json index 173951deb..49b3b05bd 100644 --- a/nativelink-config/examples/basic_cas.json +++ b/nativelink-config/examples/basic_cas.json @@ -1,26 +1,26 @@ { - "stores": { - "AC_MAIN_STORE": { + "stores": [ + { + "name": "AC_MAIN_STORE", "filesystem": { "content_path": "/tmp/nativelink/data-worker-test/content_path-ac", "temp_path": "/tmp/nativelink/data-worker-test/tmp_path-ac", "eviction_policy": { - // 1gb. - "max_bytes": 1000000000, + "max_bytes": "1Gb", } } - }, - "WORKER_FAST_SLOW_STORE": { - "fast_slow": { + }, { + "name": "WORKER_FAST_SLOW_STORE", + "fastSlow": { // "fast" must be a "filesystem" store because the worker uses it to make // hardlinks on disk to a directory where the jobs are running. "fast": { + "name": "fs-worker", "filesystem": { "content_path": "/tmp/nativelink/data-worker-test/content_path-cas", "temp_path": "/tmp/nativelink/data-worker-test/tmp_path-cas", "eviction_policy": { - // 10gb. - "max_bytes": 10000000000, + "max_bytes": "10Gb", } } }, @@ -30,13 +30,15 @@ /// so they share the same underlying CAS. Since workers require a fast_slow /// store, we use the fast store as our primary data store, and the slow store /// is just a noop, since there's no shared storage in this config. - "noop": {} + "name": "discard", + "noop": {}, } } } - }, - "schedulers": { - "MAIN_SCHEDULER": { + ], + "schedulers": [ + { + "name": "MAIN_SCHEDULER", "simple": { "supported_platform_properties": { "cpu_count": "minimum", @@ -61,7 +63,7 @@ } } } - }, + ], "workers": [{ "local": { "worker_api_endpoint": { diff --git a/nativelink-config/examples/filesystem_cas.json b/nativelink-config/examples/filesystem_cas.json index 6f43bf6be..0cac1b8e3 100644 --- a/nativelink-config/examples/filesystem_cas.json +++ b/nativelink-config/examples/filesystem_cas.json @@ -4,25 +4,26 @@ // so objects are compressed, deduplicated and uses some in-memory // optimizations for certain hot paths. { - "stores": { - "FS_CONTENT_STORE": { + "stores": [ + { + "name": "FS_CONTENT_STORE", "compression": { "compression_algorithm": { "lz4": {} }, "backend": { + "name": "fs-conent", "filesystem": { "content_path": "/tmp/nativelink/data/content_path-cas", "temp_path": "/tmp/nativelink/data/tmp_path-cas", "eviction_policy": { - // 2gb. - "max_bytes": 2000000000, + "max_bytes": "2Gb", } } } } - }, - "CAS_MAIN_STORE": { + }, { + "name": "CAS_MAIN_STORE", "verify": { "backend": { // Because we are using a dedup store, we can bypass small objects @@ -31,43 +32,48 @@ // general build content, since many objects are quite small and by // putting this size distinguish store in place will prevent 1+ index // read/write per small object request. - "size_partitioning": { + "name": "cas-parts", + "sizePartitioning": { "size": 262144, // 256k. "lower_store": { - "ref_store": { + "name": "cas-lower", + "ref": { "name": "FS_CONTENT_STORE" } }, "upper_store": { + "name": "cas-upper", "dedup": { "index_store": { // Since our index store is queried so much, we use a fast_slow // store so it will keep in memory objects that are accessed // frequently before going to disk. // Note: indexes are generally quite small, but accessed frequently. - "fast_slow": { + "name": "cas-index", + "fastSlow": { "fast": { + "name": "cas-index-fast", "memory": { "eviction_policy": { - // 10mb. - "max_bytes": 10000000, + "max_bytes": "10Mb", } } }, "slow": { + "name": "cas-index-slow", "filesystem": { "content_path": "/tmp/nativelink/data/content_path-index", "temp_path": "/tmp/nativelink/data/tmp_path-index", "eviction_policy": { - // 500mb. - "max_bytes": 500000000, + "max_bytes": "500Mb", } } } } }, "content_store": { - "ref_store": { + "name": "cas-content", + "ref": { "name": "FS_CONTENT_STORE" } } @@ -78,20 +84,20 @@ "verify_size": true, "verify_hash": true } - }, - "AC_MAIN_STORE": { + }, { + "name": "AC_MAIN_STORE", "filesystem": { "content_path": "/tmp/nativelink/data/content_path-ac", "temp_path": "/tmp/nativelink/data/tmp_path-ac", "eviction_policy": { - // 500mb. - "max_bytes": 500000000, + "max_bytes": "500Mb", } } } - }, - "schedulers": { - "MAIN_SCHEDULER": { + ], + "schedulers": [ + { + "name": "MAIN_SCHEDULER", "simple": { "supported_platform_properties": { "cpu_count": "minimum", @@ -112,7 +118,7 @@ } } } - }, + ], "servers": [{ "listener": { "http": { diff --git a/nativelink-config/examples/redis.json b/nativelink-config/examples/redis.json index 1f06fe560..c9a9fb68f 100644 --- a/nativelink-config/examples/redis.json +++ b/nativelink-config/examples/redis.json @@ -1,40 +1,45 @@ { - "stores": { - "CAS_FAST_SLOW_STORE": { - "redis_store": { + "stores": [ + { + "name": "CAS_FAST_SLOW_STORE", + "redis": { "addresses": ["redis://127.0.0.1:6379/"], "mode": "cluster" } - }, - "AC_FAST_SLOW_STORE": { - "redis_store": { + }, { + "name": "AC_FAST_SLOW_STORE", + "redis": { "addresses": ["redis://127.0.0.1:6379/"], "mode": "cluster" } - }, - "AC_MAIN_STORE": { - "completeness_checking": { + }, { + "name": "AC_MAIN_STORE", + "completenessChecking": { "backend": { - "ref_store": { + "name": "ac-ref", + "ref": { "name": "AC_FAST_SLOW_STORE" } }, "cas_store": { - "ref_store": { + "name": "cas-ref", + "ref": { "name": "CAS_MAIN_STORE" } } } - }, - "CAS_MAIN_STORE": { - "existence_cache": { + }, { + "name": "CAS_MAIN_STORE", + "existenceCache": { "backend": { + "name": "cas-compression", "compression": { "compression_algorithm": { "lz4": {} }, "backend": { - "ref_store": { + "name": "cas-compression-ref", + "ref": { "name": "CAS_FAST_SLOW_STORE" } } @@ -42,7 +47,7 @@ } } } - }, + ], "servers": [ { "listener": { diff --git a/nativelink-config/examples/s3_backend_with_local_fast_cas.json b/nativelink-config/examples/s3_backend_with_local_fast_cas.json index d74dd910d..3ab212bc5 100644 --- a/nativelink-config/examples/s3_backend_with_local_fast_cas.json +++ b/nativelink-config/examples/s3_backend_with_local_fast_cas.json @@ -1,23 +1,27 @@ { - "stores": { - "CAS_MAIN_STORE": { + "stores": [ + { + "name": "CAS_MAIN_STORE", "verify": { "backend": { + "name": "cas-dedup", "dedup": { "index_store": { - "fast_slow": { + "name": "cas-index", + "fastSlow": { "fast": { + "name": "cas-index-fast", "filesystem": { "content_path": "/tmp/nativelink/data/content_path-index", "temp_path": "/tmp/nativelink/data/tmp_path-index", "eviction_policy": { - // 500mb. - "max_bytes": 500000000, + "max_bytes": "500Mb", } } }, "slow": { - "experimental_s3_store": { + "name": "cas-index-slow", + "s3": { "region": "eu-north-1", "bucket": "crossplane-bucket-af79aeca9", "key_prefix": "test-prefix-index/", @@ -26,30 +30,32 @@ "delay": 0.3, "jitter": 0.5, }, - "additional_max_concurrent_requests": 10 } } } }, "content_store": { + "name": "cas-content", "compression": { "compression_algorithm": { "lz4": {} }, "backend": { - "fast_slow": { + "name": "cas-content-backend", + "fastSlow": { "fast": { + "name": "cas-content-backend-fast", "filesystem": { "content_path": "/tmp/nativelink/data/content_path-content", "temp_path": "/tmp/nativelink/data/tmp_path-content", "eviction_policy": { - // 2gb. - "max_bytes": 2000000000, + "max_bytes": "2Gb", } } }, "slow": { - "experimental_s3_store": { + "name": "cas-content-backend-slow", + "s3": { "region": "eu-north-1", "bucket": "crossplane-bucket-af79aeca9", "key_prefix": "test-prefix-dedup-cas/", @@ -58,7 +64,6 @@ "delay": 0.3, "jitter": 0.5, }, - "additional_max_concurrent_requests": 10 } } } @@ -68,18 +73,18 @@ } }, "verify_size": true, - "hash_verification_function": "sha256" + "verify_hash": true } - }, - "AC_MAIN_STORE": { - "fast_slow": { + }, { + "name": "AC_MAIN_STORE", + "fastSlow": { "fast": { + "name": "ac-fast", "memory": { "eviction_policy": { - // 100mb. - "max_bytes": 100000000, + "max_bytes": "100Mb", } - }, + }, // TODO(aaronmondal): Broken?? "filesystem": { "content_path": "/tmp/nativelink/data/content_path-ac", "temp_path": "/tmp/nativelink/data/tmp_path-ac", @@ -90,7 +95,8 @@ } }, "slow": { - "experimental_s3_store": { + "name": "ac-slow", + "s3": { "region": "eu-north-1", "bucket": "crossplane-bucket-af79aeca9", "key_prefix": "test-prefix-ac/", @@ -99,14 +105,14 @@ "delay": 0.3, "jitter": 0.5, }, - "additional_max_concurrent_requests": 10 } } } } - }, - "schedulers": { - "MAIN_SCHEDULER": { + ], + "schedulers": [ + { + "name": "MAIN_SCHEDULER", "simple": { "supported_platform_properties": { "cpu_count": "minimum", @@ -127,7 +133,7 @@ } } } - }, + ], "servers": [{ "listener": { "http": { diff --git a/nativelink-config/src/cas_server.rs b/nativelink-config/src/cas_server.rs index 3061c4f44..3d55c40ea 100644 --- a/nativelink-config/src/cas_server.rs +++ b/nativelink-config/src/cas_server.rs @@ -14,7 +14,8 @@ use std::collections::HashMap; -use serde::Deserialize; +use schemars::JsonSchema; +use serde::{Deserialize, Serialize}; use crate::schedulers::SchedulerConfig; use crate::serde_utils::{ @@ -33,7 +34,7 @@ pub type SchedulerRefName = String; pub type InstanceName = String; #[allow(non_camel_case_types)] -#[derive(Deserialize, Debug, Default, Clone, Copy)] +#[derive(Deserialize, Debug, Default, Clone, Copy, Serialize, JsonSchema)] pub enum HttpCompressionAlgorithm { /// No compression. #[default] @@ -52,8 +53,7 @@ pub enum HttpCompressionAlgorithm { /// services with different compression settings that are served on /// different ports. Then configure the non-cloud clients to use one port /// and cloud-clients to use another. -#[derive(Deserialize, Debug, Default)] -#[serde(deny_unknown_fields)] +#[derive(Clone, Deserialize, Debug, Default, Serialize, JsonSchema)] pub struct HttpCompressionConfig { /// The compression algorithm that the server will use when sending /// responses to clients. Enabling this will likely save a lot of @@ -62,6 +62,7 @@ pub struct HttpCompressionConfig { /// see: /// /// Default: `HttpCompressionAlgorithm::none` + #[serde(skip_serializing_if = "Option::is_none")] pub send_compression_algorithm: Option, /// The compression algorithm that the server will accept from clients. @@ -75,8 +76,7 @@ pub struct HttpCompressionConfig { pub accepted_compression_algorithms: Vec, } -#[derive(Deserialize, Debug)] -#[serde(deny_unknown_fields)] +#[derive(Clone, Deserialize, Debug, Serialize, JsonSchema)] pub struct AcStoreConfig { /// The store name referenced in the `stores` map in the main config. /// This store name referenced here may be reused multiple times. @@ -89,8 +89,7 @@ pub struct AcStoreConfig { pub read_only: bool, } -#[derive(Deserialize, Debug)] -#[serde(deny_unknown_fields)] +#[derive(Clone, Deserialize, Debug, Serialize, JsonSchema)] pub struct CasStoreConfig { /// The store name referenced in the `stores` map in the main config. /// This store name referenced here may be reused multiple times. @@ -98,16 +97,14 @@ pub struct CasStoreConfig { pub cas_store: StoreRefName, } -#[derive(Deserialize, Debug, Default)] -#[serde(deny_unknown_fields)] +#[derive(Clone, Deserialize, Debug, Default, Serialize, JsonSchema)] pub struct CapabilitiesRemoteExecutionConfig { /// Scheduler used to configure the capabilities of remote execution. #[serde(deserialize_with = "convert_string_with_shellexpand")] pub scheduler: SchedulerRefName, } -#[derive(Deserialize, Debug, Default)] -#[serde(deny_unknown_fields)] +#[derive(Clone, Deserialize, Debug, Default, Serialize, JsonSchema)] pub struct CapabilitiesConfig { /// Configuration for remote execution capabilities. /// If not set the capabilities service will inform the client that remote @@ -115,8 +112,7 @@ pub struct CapabilitiesConfig { pub remote_execution: Option, } -#[derive(Deserialize, Debug)] -#[serde(deny_unknown_fields)] +#[derive(Clone, Deserialize, Debug, Serialize, JsonSchema)] pub struct ExecutionConfig { /// The store name referenced in the `stores` map in the main config. /// This store name referenced here may be reused multiple times. @@ -129,8 +125,7 @@ pub struct ExecutionConfig { pub scheduler: SchedulerRefName, } -#[derive(Deserialize, Debug, Default)] -#[serde(deny_unknown_fields)] +#[derive(Clone, Deserialize, Debug, Default, Serialize, JsonSchema)] pub struct ByteStreamConfig { /// Name of the store in the "stores" configuration. pub cas_stores: HashMap, @@ -159,16 +154,14 @@ pub struct ByteStreamConfig { pub persist_stream_on_disconnect_timeout: usize, } -#[derive(Deserialize, Debug)] -#[serde(deny_unknown_fields)] +#[derive(Clone, Deserialize, Debug, Serialize, JsonSchema)] pub struct WorkerApiConfig { /// The scheduler name referenced in the `schedulers` map in the main config. #[serde(deserialize_with = "convert_string_with_shellexpand")] pub scheduler: SchedulerRefName, } -#[derive(Deserialize, Debug, Default)] -#[serde(deny_unknown_fields)] +#[derive(Clone, Deserialize, Debug, Default, JsonSchema, Serialize)] pub struct PrometheusConfig { /// Path to register prometheus metrics. If path is "/metrics", and your /// domain is "example.com", you can reach the endpoint with: @@ -179,8 +172,7 @@ pub struct PrometheusConfig { pub path: String, } -#[derive(Deserialize, Debug, Default)] -#[serde(deny_unknown_fields)] +#[derive(Clone, Deserialize, Debug, Default, JsonSchema, Serialize)] pub struct AdminConfig { /// Path to register the admin API. If path is "/admin", and your /// domain is "example.com", you can reach the endpoint with: @@ -191,8 +183,7 @@ pub struct AdminConfig { pub path: String, } -#[derive(Deserialize, Debug, Default)] -#[serde(deny_unknown_fields)] +#[derive(Clone, Deserialize, Debug, Default, JsonSchema, Serialize)] pub struct HealthConfig { /// Path to register the health status check. If path is "/status", and your /// domain is "example.com", you can reach the endpoint with: @@ -203,7 +194,7 @@ pub struct HealthConfig { pub path: String, } -#[derive(Deserialize, Debug)] +#[derive(Clone, Deserialize, Debug, JsonSchema, Serialize)] pub struct BepConfig { /// The store to publish build events to. /// The store name referenced in the `stores` map in the main config. @@ -211,8 +202,7 @@ pub struct BepConfig { pub store: StoreRefName, } -#[derive(Deserialize, Debug)] -#[serde(deny_unknown_fields)] +#[derive(Clone, Deserialize, Debug, JsonSchema, Serialize)] pub struct ServicesConfig { /// The Content Addressable Storage (CAS) backend config. /// The key is the instance_name used in the protocol and the @@ -265,8 +255,7 @@ pub struct ServicesConfig { pub health: Option, } -#[derive(Deserialize, Debug)] -#[serde(deny_unknown_fields)] +#[derive(Clone, Deserialize, Debug, JsonSchema, Serialize)] pub struct TlsConfig { /// Path to the certificate file. #[serde(deserialize_with = "convert_string_with_shellexpand")] @@ -293,8 +282,7 @@ pub struct TlsConfig { /// /// Note: All of these default to hyper's default values unless otherwise /// specified. -#[derive(Deserialize, Debug, Default)] -#[serde(deny_unknown_fields)] +#[derive(Clone, Deserialize, Debug, Default, JsonSchema, Serialize)] pub struct HttpServerConfig { /// Interval to send keep-alive pings via HTTP2. /// Note: This is in seconds. @@ -361,14 +349,13 @@ pub struct HttpServerConfig { } #[allow(non_camel_case_types)] -#[derive(Deserialize, Debug)] +#[derive(Clone, Deserialize, Debug, JsonSchema, Serialize)] pub enum ListenerConfig { /// Listener for HTTP/HTTPS/HTTP2 sockets. http(HttpListener), } -#[derive(Deserialize, Debug)] -#[serde(deny_unknown_fields)] +#[derive(Clone, Deserialize, Debug, JsonSchema, Serialize)] pub struct HttpListener { /// Address to listen on. Example: `127.0.0.1:8080` or `:8080` to listen /// to all IPs. @@ -391,8 +378,7 @@ pub struct HttpListener { pub tls: Option, } -#[derive(Deserialize, Debug)] -#[serde(deny_unknown_fields)] +#[derive(Clone, Deserialize, Debug, JsonSchema, Serialize)] pub struct ServerConfig { /// Name of the server. This is used to help identify the service /// for telemetry and logs. @@ -409,7 +395,7 @@ pub struct ServerConfig { } #[allow(non_camel_case_types)] -#[derive(Deserialize, Debug)] +#[derive(Clone, Deserialize, Debug, JsonSchema, Serialize)] pub enum WorkerProperty { /// List of static values. /// Note: Generally there should only ever be 1 value, but if the platform @@ -423,8 +409,7 @@ pub enum WorkerProperty { } /// Generic config for an endpoint and associated configs. -#[derive(Deserialize, Debug, Default)] -#[serde(deny_unknown_fields)] +#[derive(Clone, Deserialize, Debug, Default, JsonSchema, Serialize)] pub struct EndpointConfig { /// URI of the endpoint. #[serde(deserialize_with = "convert_string_with_shellexpand")] @@ -439,7 +424,7 @@ pub struct EndpointConfig { } #[allow(non_camel_case_types)] -#[derive(Copy, Clone, Deserialize, Debug, Default)] +#[derive(Copy, Clone, Deserialize, Debug, Default, JsonSchema, Serialize)] pub enum UploadCacheResultsStrategy { /// Only upload action results with an exit code of 0. #[default] @@ -455,8 +440,10 @@ pub enum UploadCacheResultsStrategy { failures_only, } +// TODO(aaronmondal): Rework this to be compatible with schema generation while +// providing sensible UX. #[allow(non_camel_case_types)] -#[derive(Clone, Deserialize, Debug)] +#[derive(Clone, Deserialize, Debug, JsonSchema, Serialize)] pub enum EnvironmentSource { /// The name of the platform property in the action to get the value from. property(String), @@ -466,7 +453,7 @@ pub enum EnvironmentSource { /// The max amount of time in milliseconds the command is allowed to run /// (requested by the client). - timeout_millis, + timeout_millis(String), /// A special file path will be provided that can be used to communicate /// with the parent process about out-of-band information. This file @@ -484,7 +471,7 @@ pub enum EnvironmentSource { /// /// All fields are optional, file does not need to be created and may be /// empty. - side_channel_file, + side_channel_file(String), /// A "root" directory for the action. This directory can be used to /// store temporary files that are not needed after the action has @@ -499,11 +486,10 @@ pub enum EnvironmentSource { /// variable, `mkdir $ENV_VAR_NAME/tmp` and `export TMPDIR=$ENV_VAR_NAME/tmp`. /// Another example might be to bind-mount the `/tmp` path in a container to /// this path in `entrypoint`. - action_directory, + action_directory(String), } -#[derive(Deserialize, Debug, Default)] -#[serde(deny_unknown_fields)] +#[derive(Clone, Deserialize, Debug, Default, JsonSchema, Serialize)] pub struct UploadActionResultConfig { /// Underlying AC store that the worker will use to publish execution results /// into. Objects placed in this store should be reachable from the @@ -531,6 +517,7 @@ pub struct UploadActionResultConfig { /// /// Default: UploadCacheResultsStrategy::FailuresOnly #[serde(default)] + #[serde(skip_serializing_if = "Option::is_none")] pub upload_historical_results_strategy: Option, /// Template to use for the `ExecuteResponse.message` property. This message @@ -562,8 +549,7 @@ pub struct UploadActionResultConfig { pub failure_message_template: String, } -#[derive(Deserialize, Debug, Default)] -#[serde(deny_unknown_fields)] +#[derive(Clone, Deserialize, Debug, Default, JsonSchema, Serialize)] pub struct LocalWorkerConfig { /// Name of the worker. This is give a more friendly name to a worker for logging /// and metric publishing. @@ -654,14 +640,13 @@ pub struct LocalWorkerConfig { } #[allow(non_camel_case_types)] -#[derive(Deserialize, Debug)] +#[derive(Clone, Deserialize, Debug, JsonSchema, Serialize)] pub enum WorkerConfig { /// A worker type that executes jobs locally on this machine. local(LocalWorkerConfig), } -#[derive(Deserialize, Debug, Clone, Copy)] -#[serde(deny_unknown_fields)] +#[derive(Deserialize, Debug, Clone, Copy, JsonSchema, Serialize)] pub struct GlobalConfig { /// Maximum number of open files that can be opened at one time. /// This value is not strictly enforced, it is a best effort. Some internal libraries @@ -720,12 +705,11 @@ pub struct GlobalConfig { pub default_digest_size_health_check: usize, } -#[derive(Deserialize, Debug)] -#[serde(deny_unknown_fields)] +#[derive(Clone, Deserialize, Debug, JsonSchema, Serialize)] pub struct CasConfig { /// List of stores available to use in this config. /// The keys can be used in other configs when needing to reference a store. - pub stores: HashMap, + pub stores: Vec, /// Worker configurations used to execute jobs. pub workers: Option>, @@ -733,7 +717,7 @@ pub struct CasConfig { /// List of schedulers available to use in this config. /// The keys can be used in other configs when needing to reference a /// scheduler. - pub schedulers: Option>, + pub schedulers: Option>, /// Servers to setup for this process. pub servers: Vec, diff --git a/nativelink-config/src/lib.rs b/nativelink-config/src/lib.rs index 0607e28c5..ac59469de 100644 --- a/nativelink-config/src/lib.rs +++ b/nativelink-config/src/lib.rs @@ -16,3 +16,117 @@ pub mod cas_server; pub mod schedulers; pub mod serde_utils; pub mod stores; + +use schemars::JsonSchema; +use serde::de::IntoDeserializer; +use serde::{Deserialize, Deserializer, Serialize}; + +#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)] +pub struct NamedConfig { + pub name: String, + #[serde(flatten)] + pub spec: Spec, +} + +#[derive(Debug, Clone, JsonSchema)] +#[serde(rename_all = "camelCase")] +pub enum NamedRef { + Name(String), + Spec(Box>), +} + +impl NamedRef { + pub fn new(name: impl Into, spec: T) -> Self + where + T: Into, + { + Self::Spec(Box::new(NamedConfig:: { + name: name.into(), + spec: spec.into(), + })) + } +} + +impl Serialize for NamedRef +where + Spec: Serialize, +{ + fn serialize(&self, serializer: S) -> Result + where + S: serde::Serializer, + { + match self { + NamedRef::Name(name) => name.serialize(serializer), + NamedRef::Spec(config) => config.serialize(serializer), + } + } +} + +impl From> for NamedRef { + fn from(config: NamedConfig) -> Self { + NamedRef::Spec(Box::new(config)) + } +} + +impl<'de, Spec> Deserialize<'de> for NamedRef +where + Spec: Deserialize<'de>, +{ + fn deserialize(deserializer: D) -> Result + where + D: Deserializer<'de>, + { + // First, try to deserialize as a string + let value = serde_json::Value::deserialize(deserializer)?; + + match value { + // If it's a string, convert to Name variant + serde_json::Value::String(s) => Ok(NamedRef::::Name(s)), + + // If it's an object, try to deserialize as NamedConfig + serde_json::Value::Object(_) => { + let store_config = NamedConfig::::deserialize(value.into_deserializer()) + .map_err(serde::de::Error::custom)?; + Ok(NamedRef::::Spec(Box::new(store_config))) + } + + // Otherwise, return an error + _ => Err(serde::de::Error::custom( + "Expected either a string or an object for StoreRef enum", + )), + } + } +} + +/// This macro (and the invocation below) Implements the "From" trait for all +/// variations of the "Spec". For instance, This allows patterns like this: +/// +/// ```txt +/// crate::impl_from_spec!( +/// SchedulerSpec, +/// (Simple, SimpleSpec), +/// ) +/// ``` +/// +/// resolves to: +/// +/// ```txt +/// impl From for SchedulerSpec { +/// fn from(spec: SimpleSpec) -> Self { +/// SchedulerSpec::Simple(spec); +/// } +/// } +/// ``` +/// +#[macro_export] +macro_rules! impl_from_spec { + ($target:ident, $(($variant:ident, $spec:ident)),* $(,)?) => { + $( + impl From<$spec> for $target { + fn from(spec: $spec) -> Self { + $target::$variant(spec) + } + } + )* + } +} diff --git a/nativelink-config/src/schedulers.rs b/nativelink-config/src/schedulers.rs index 588b6021d..e1170864f 100644 --- a/nativelink-config/src/schedulers.rs +++ b/nativelink-config/src/schedulers.rs @@ -14,24 +14,49 @@ use std::collections::HashMap; -use serde::Deserialize; +use schemars::JsonSchema; +use serde::{Deserialize, Serialize}; use crate::serde_utils::{convert_duration_with_shellexpand, convert_numeric_with_shellexpand}; use crate::stores::{GrpcEndpoint, Retry, StoreRefName}; -#[allow(non_camel_case_types)] -#[derive(Deserialize, Debug)] -pub enum SchedulerConfig { - simple(SimpleScheduler), - grpc(GrpcScheduler), - cache_lookup(CacheLookupScheduler), - property_modifier(PropertyModifierScheduler), +#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)] +#[serde(rename_all = "camelCase")] +pub enum SchedulerSpec { + Simple(SimpleSpec), + Grpc(GrpcSpec), + CacheLookup(CacheLookupSpec), + PropertyModifier(PropertyModifierSpec), +} + +pub type SchedulerRefName = String; +pub type SchedulerConfig = crate::NamedConfig; +pub type SchedulerRef = crate::NamedRef; + +impl From for SchedulerConfig { + fn from(scheduler_ref: SchedulerRef) -> Self { + match scheduler_ref { + SchedulerRef::Name(name) => SchedulerConfig { + name: name.clone(), + spec: todo!("TODO(aaronmondal): Implement RefScheduler"), + }, + SchedulerRef::Spec(spec) => *spec, + } + } } +crate::impl_from_spec!( + SchedulerSpec, + (Simple, SimpleSpec), + (Grpc, GrpcSpec), + (CacheLookup, CacheLookupSpec), + (PropertyModifier, PropertyModifierSpec), +); + /// When the scheduler matches tasks to workers that are capable of running /// the task, this value will be used to determine how the property is treated. #[allow(non_camel_case_types)] -#[derive(Deserialize, Debug, Clone, Copy, Hash, Eq, PartialEq)] +#[derive(Deserialize, Debug, Clone, Copy, Hash, Eq, PartialEq, Serialize, JsonSchema)] pub enum PropertyType { /// Requires the platform property to be a u64 and when the scheduler looks /// for appropriate worker nodes that are capable of executing the task, @@ -56,7 +81,7 @@ pub enum PropertyType { /// on how to choose which worker should run the job when multiple /// workers are able to run the task. #[allow(non_camel_case_types)] -#[derive(Copy, Clone, Deserialize, Debug, Default)] +#[derive(Copy, Clone, Deserialize, Debug, Default, Serialize, JsonSchema)] pub enum WorkerAllocationStrategy { /// Prefer workers that have been least recently used to run a job. #[default] @@ -65,9 +90,9 @@ pub enum WorkerAllocationStrategy { most_recently_used, } -#[derive(Deserialize, Debug, Default)] -#[serde(deny_unknown_fields)] -pub struct SimpleScheduler { +#[derive(Clone, Deserialize, Debug, Default, Serialize, JsonSchema)] +#[cfg_attr(not(feature = "crd"), serde(deny_unknown_fields))] +pub struct SimpleSpec { /// A list of supported platform properties mapped to how these properties /// are used when the scheduler looks for worker nodes capable of running /// the task. @@ -132,16 +157,20 @@ pub struct SimpleScheduler { } #[allow(non_camel_case_types)] -#[derive(Deserialize, Debug)] +#[derive(Clone, Deserialize, Debug, Serialize, JsonSchema)] pub enum ExperimentalSimpleSchedulerBackend { /// Use an in-memory store for the scheduler. - memory, + Memory(MemoryBackend), + /// Use a redis store for the scheduler. redis(ExperimentalRedisSchedulerBackend), } -#[derive(Deserialize, Debug, Default)] -#[serde(deny_unknown_fields)] +#[derive(Serialize, Deserialize, Debug, Clone, JsonSchema, Default)] +pub struct MemoryBackend {} + +#[derive(Clone, Deserialize, Debug, Default, Serialize, JsonSchema)] +#[cfg_attr(not(feature = "crd"), serde(deny_unknown_fields))] pub struct ExperimentalRedisSchedulerBackend { /// A reference to the redis store to use for the scheduler. /// Note: This MUST resolve to a RedisStore. @@ -152,9 +181,9 @@ pub struct ExperimentalRedisSchedulerBackend { /// is useful to use when doing some kind of local action cache or CAS away from /// the main cluster of workers. In general, it's more efficient to point the /// build at the main scheduler directly though. -#[derive(Deserialize, Debug)] -#[serde(deny_unknown_fields)] -pub struct GrpcScheduler { +#[derive(Clone, Deserialize, Debug, Serialize, JsonSchema)] +#[cfg_attr(not(feature = "crd"), serde(deny_unknown_fields))] +pub struct GrpcSpec { /// The upstream scheduler to forward requests to. pub endpoint: GrpcEndpoint, @@ -174,20 +203,21 @@ pub struct GrpcScheduler { pub connections_per_endpoint: usize, } -#[derive(Deserialize, Debug)] -#[serde(deny_unknown_fields)] -pub struct CacheLookupScheduler { +#[derive(Clone, Deserialize, Debug, Serialize, JsonSchema)] +#[cfg_attr(not(feature = "crd"), serde(deny_unknown_fields))] +pub struct CacheLookupSpec { /// The reference to the action cache store used to return cached /// actions from rather than running them again. /// To prevent unintended issues, this store should probably be a CompletenessCheckingStore. pub ac_store: StoreRefName, /// The nested scheduler to use if cache lookup fails. - pub scheduler: Box, + #[schemars(with = "SchedulerRefName")] + pub scheduler: SchedulerRef, } -#[derive(Deserialize, Debug, Clone)] -#[serde(deny_unknown_fields)] +#[derive(Deserialize, Debug, Clone, Serialize, JsonSchema)] +#[cfg_attr(not(feature = "crd"), serde(deny_unknown_fields))] pub struct PlatformPropertyAddition { /// The name of the property to add. pub name: String, @@ -196,7 +226,7 @@ pub struct PlatformPropertyAddition { } #[allow(non_camel_case_types)] -#[derive(Deserialize, Debug, Clone)] +#[derive(Deserialize, Debug, Clone, Serialize, JsonSchema)] pub enum PropertyModification { /// Add a property to the action properties. add(PlatformPropertyAddition), @@ -204,9 +234,9 @@ pub enum PropertyModification { remove(String), } -#[derive(Deserialize, Debug)] -#[serde(deny_unknown_fields)] -pub struct PropertyModifierScheduler { +#[derive(Clone, Deserialize, Debug, Serialize, JsonSchema)] +#[cfg_attr(not(feature = "crd"), serde(deny_unknown_fields))] +pub struct PropertyModifierSpec { /// A list of modifications to perform to incoming actions for the nested /// scheduler. These are performed in order and blindly, so removing a /// property that doesn't exist is fine and overwriting an existing property @@ -215,5 +245,6 @@ pub struct PropertyModifierScheduler { pub modifications: Vec, /// The nested scheduler to use after modifying the properties. - pub scheduler: Box, + #[schemars(with = "SchedulerRefName")] + pub scheduler: SchedulerRef, } diff --git a/nativelink-config/src/stores.rs b/nativelink-config/src/stores.rs index 9c4e21a87..760bf7707 100644 --- a/nativelink-config/src/stores.rs +++ b/nativelink-config/src/stores.rs @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +use schemars::JsonSchema; use serde::{Deserialize, Serialize}; use crate::serde_utils::{ @@ -23,9 +24,55 @@ use crate::serde_utils::{ /// Name of the store. This type will be used when referencing a store /// in the `CasConfig::stores`'s map key. pub type StoreRefName = String; +pub type StoreRef = crate::NamedRef; +pub type StoreConfig = crate::NamedConfig; + +impl From for StoreConfig { + fn from(store_ref: StoreRef) -> Self { + match store_ref { + StoreRef::Name(name) => StoreConfig { + name: name.clone(), + spec: StoreSpec::Ref(RefSpec { name }), + }, + StoreRef::Spec(config) => *config, + } + } +} + +crate::impl_from_spec!( + StoreSpec, + (Memory, MemorySpec), + (S3, S3Spec), + (Verify, VerifySpec), + (CompletenessChecking, CompletenessCheckingSpec), + (Compression, CompressionSpec), + (Dedup, DedupSpec), + (ExistenceCache, ExistenceCacheSpec), + (FastSlow, FastSlowSpec), + (Shard, ShardSpec), + (Filesystem, FilesystemSpec), + (Ref, RefSpec), + (SizePartitioning, SizePartitioningSpec), + (Grpc, GrpcSpec), + (Redis, RedisSpec), + (Noop, NoopSpec), +); + +#[derive(JsonSchema)] +#[schemars(rename = "EvictionPolicy")] +pub struct EvictionPolicySchema { + /// Maximum number of bytes before eviction takes place + pub max_bytes: usize, + /// Low watermark for eviction + pub evict_bytes: usize, + /// Maximum age in seconds + pub max_seconds: u32, + /// Maximum number of items + pub max_count: u64, +} #[allow(non_camel_case_types)] -#[derive(Serialize, Deserialize, Debug, Clone, Copy)] +#[derive(Serialize, Deserialize, Debug, Clone, Copy, JsonSchema)] pub enum ConfigDigestHashFunction { /// Use the sha256 hash function. /// @@ -36,9 +83,9 @@ pub enum ConfigDigestHashFunction { blake3, } -#[allow(non_camel_case_types)] -#[derive(Serialize, Deserialize, Debug, Clone)] -pub enum StoreConfig { +#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)] +#[serde(rename_all = "camelCase")] +pub enum StoreSpec { /// Memory store will store all data in a hashmap in memory. /// /// **Example JSON Config:** @@ -52,7 +99,7 @@ pub enum StoreConfig { /// } /// ``` /// - memory(MemoryStore), + Memory(MemorySpec), /// S3 store will use Amazon's S3 service as a backend to store /// the files. This configuration can be used to share files @@ -76,7 +123,7 @@ pub enum StoreConfig { /// } /// ``` /// - experimental_s3_store(S3Store), + S3(S3Spec), /// Verify store is used to apply verifications to an underlying /// store implementation. It is strongly encouraged to validate @@ -100,7 +147,7 @@ pub enum StoreConfig { /// } /// ``` /// - verify(Box), + Verify(VerifySpec), /// Completeness checking store verifies if the /// output files & folders exist in the CAS before forwarding @@ -128,7 +175,7 @@ pub enum StoreConfig { /// } /// ``` /// - completeness_checking(Box), + CompletenessChecking(CompletenessCheckingSpec), /// A compression store that will compress the data inbound and /// outbound. There will be a non-trivial cost to compress and @@ -156,7 +203,7 @@ pub enum StoreConfig { /// } /// ``` /// - compression(Box), + Compression(CompressionSpec), /// A dedup store will take the inputs and run a rolling hash /// algorithm on them to slice the input into smaller parts then @@ -221,7 +268,7 @@ pub enum StoreConfig { /// } /// ``` /// - dedup(Box), + Dedup(DedupSpec), /// Existence store will wrap around another store and cache calls /// to has so that subsequent has_with_results calls will be @@ -248,7 +295,7 @@ pub enum StoreConfig { /// } /// ``` /// - existence_cache(Box), + ExistenceCache(ExistenceCacheSpec), /// FastSlow store will first try to fetch the data from the `fast` /// store and then if it does not exist try the `slow` store. @@ -291,7 +338,7 @@ pub enum StoreConfig { /// } /// ``` /// - fast_slow(Box), + FastSlow(FastSlowSpec), /// Shards the data to multiple stores. This is useful for cases /// when you want to distribute the load across multiple stores. @@ -313,7 +360,8 @@ pub enum StoreConfig { /// } /// ``` /// - shard(ShardStore), + //Shard { name: String, spec: ShardSpec }, + Shard(ShardSpec), /// Stores the data on the filesystem. This store is designed for /// local persistent storage. Restarts of this program should restore @@ -334,7 +382,7 @@ pub enum StoreConfig { /// } /// ``` /// - filesystem(FilesystemStore), + Filesystem(FilesystemSpec), /// Store used to reference a store in the root store manager. /// This is useful for cases when you want to share a store in different @@ -349,7 +397,7 @@ pub enum StoreConfig { /// } /// ``` /// - ref_store(RefStore), + Ref(RefSpec), /// Uses the size field of the digest to separate which store to send the /// data. This is useful for cases when you'd like to put small objects @@ -377,7 +425,7 @@ pub enum StoreConfig { /// } /// ``` /// - size_partitioning(Box), + SizePartitioning(SizePartitioningSpec), /// This store will pass-through calls to another GRPC store. This store /// is not designed to be used as a sub-store of another store, but it @@ -400,7 +448,7 @@ pub enum StoreConfig { /// } /// ``` /// - grpc(GrpcStore), + Grpc(GrpcSpec), /// Stores data in any stores compatible with Redis APIs. /// @@ -417,7 +465,7 @@ pub enum StoreConfig { /// } /// ``` /// - redis_store(RedisStore), + Redis(RedisSpec), /// Noop store is a store that sends streams into the void and all data /// retrieval will return 404 (NotFound). This can be useful for cases @@ -429,15 +477,17 @@ pub enum StoreConfig { /// "noop": {} /// ``` /// - noop, + Noop(NoopSpec), } /// Configuration for an individual shard of the store. -#[derive(Serialize, Deserialize, Debug, Clone)] -#[serde(deny_unknown_fields)] +#[derive(Serialize, Deserialize, Debug, Clone, JsonSchema)] pub struct ShardConfig { + // Note: Instead of having a separate 'store' field, we flatten the + // StoreConfig fields directly into this struct /// Store to shard the data to. - pub store: StoreConfig, + #[schemars(with = "StoreRefName")] + pub store: StoreRef, /// The weight of the store. This is used to determine how much data /// should be sent to the store. The actual percentage is the sum of @@ -447,38 +497,40 @@ pub struct ShardConfig { pub weight: Option, } -#[derive(Serialize, Deserialize, Debug, Clone)] -#[serde(deny_unknown_fields)] -pub struct ShardStore { +#[derive(Serialize, Deserialize, Debug, Clone, JsonSchema)] +#[cfg_attr(not(feature = "crd"), serde(deny_unknown_fields))] +pub struct ShardSpec { /// Stores to shard the data to. pub stores: Vec, } -#[derive(Serialize, Deserialize, Debug, Clone)] -#[serde(deny_unknown_fields)] -pub struct SizePartitioningStore { +#[derive(Serialize, Deserialize, Debug, Clone, JsonSchema)] +#[cfg_attr(not(feature = "crd"), serde(deny_unknown_fields))] +pub struct SizePartitioningSpec { /// Size to partition the data on. #[serde(deserialize_with = "convert_data_size_with_shellexpand")] pub size: u64, /// Store to send data when object is < (less than) size. - pub lower_store: StoreConfig, + #[schemars(with = "StoreRefName")] + pub lower_store: StoreRef, /// Store to send data when object is >= (less than eq) size. - pub upper_store: StoreConfig, + #[schemars(with = "StoreRefName")] + pub upper_store: StoreRef, } -#[derive(Serialize, Deserialize, Debug, Default, Clone)] -#[serde(deny_unknown_fields)] -pub struct RefStore { +#[derive(Serialize, Deserialize, Debug, Default, Clone, JsonSchema)] +#[cfg_attr(not(feature = "crd"), serde(deny_unknown_fields))] +pub struct RefSpec { /// Name of the store under the root "stores" config object. #[serde(deserialize_with = "convert_string_with_shellexpand")] - pub name: String, + pub name: StoreRefName, } -#[derive(Serialize, Deserialize, Debug, Default, Clone)] -#[serde(deny_unknown_fields)] -pub struct FilesystemStore { +#[derive(Serialize, Deserialize, Debug, Default, Clone, JsonSchema)] +#[cfg_attr(not(feature = "crd"), serde(deny_unknown_fields))] +pub struct FilesystemSpec { /// Path on the system where to store the actual content. This is where /// the bulk of the data will be placed. /// On service bootup this folder will be scanned and all files will be @@ -504,6 +556,7 @@ pub struct FilesystemStore { /// Policy used to evict items out of the store. Failure to set this /// value will cause items to never be removed from the store causing /// infinite memory usage. + //#[schemars(with = "EvictionPolicySchema")] pub eviction_policy: Option, /// The block size of the filesystem for the running machine @@ -514,37 +567,40 @@ pub struct FilesystemStore { pub block_size: u64, } -#[derive(Serialize, Deserialize, Debug, Clone)] -#[serde(deny_unknown_fields)] -pub struct FastSlowStore { +#[derive(Serialize, Deserialize, Debug, Clone, JsonSchema)] +pub struct FastSlowSpec { /// Fast store that will be attempted to be contacted before reaching /// out to the `slow` store. - pub fast: StoreConfig, + #[schemars(with = "StoreRefName")] + pub fast: StoreRef, /// If the object does not exist in the `fast` store it will try to /// get it from this store. - pub slow: StoreConfig, + #[schemars(with = "StoreRefName")] + pub slow: StoreRef, } -#[derive(Serialize, Deserialize, Debug, Default, Clone)] -#[serde(deny_unknown_fields)] -pub struct MemoryStore { +#[derive(Serialize, Deserialize, Debug, Default, Clone, JsonSchema)] +#[cfg_attr(not(feature = "crd"), serde(deny_unknown_fields))] +pub struct MemorySpec { /// Policy used to evict items out of the store. Failure to set this /// value will cause items to never be removed from the store causing /// infinite memory usage. pub eviction_policy: Option, } -#[derive(Serialize, Deserialize, Debug, Clone)] -#[serde(deny_unknown_fields)] -pub struct DedupStore { +#[derive(Serialize, Deserialize, Debug, Clone, JsonSchema)] +#[cfg_attr(not(feature = "crd"), serde(deny_unknown_fields))] +pub struct DedupSpec { /// Store used to store the index of each dedup slice. This store /// should generally be fast and small. - pub index_store: StoreConfig, + #[schemars(with = "StoreRefName")] + pub index_store: StoreRef, /// The store where the individual chunks will be uploaded. This /// store should generally be the slower & larger store. - pub content_store: StoreConfig, + #[schemars(with = "StoreRefName")] + pub content_store: StoreRef, /// Minimum size that a chunk will be when slicing up the content. /// Note: This setting can be increased to improve performance @@ -590,31 +646,34 @@ pub struct DedupStore { pub max_concurrent_fetch_per_get: u32, } -#[derive(Serialize, Deserialize, Debug, Clone)] -#[serde(deny_unknown_fields)] -pub struct ExistenceCacheStore { +#[derive(Serialize, Deserialize, Debug, Clone, JsonSchema)] +#[cfg_attr(not(feature = "crd"), serde(deny_unknown_fields))] +pub struct ExistenceCacheSpec { /// The underlying store wrap around. All content will first flow /// through self before forwarding to backend. In the event there /// is an error detected in self, the connection to the backend /// will be terminated, and early termination should always cause /// updates to fail on the backend. - pub backend: StoreConfig, + #[schemars(with = "StoreRefName")] + pub backend: StoreRef, /// Policy used to evict items out of the store. Failure to set this /// value will cause items to never be removed from the store causing /// infinite memory usage. + //#[schemars(with = "EvictionPolicySchema")] pub eviction_policy: Option, } -#[derive(Serialize, Deserialize, Debug, Clone)] -#[serde(deny_unknown_fields)] -pub struct VerifyStore { +#[derive(Serialize, Deserialize, Debug, Clone, JsonSchema)] +#[cfg_attr(not(feature = "crd"), serde(deny_unknown_fields))] +pub struct VerifySpec { /// The underlying store wrap around. All content will first flow /// through self before forwarding to backend. In the event there /// is an error detected in self, the connection to the backend /// will be terminated, and early termination should always cause /// updates to fail on the backend. - pub backend: StoreConfig, + #[schemars(with = "StoreRefName")] + pub backend: StoreRef, /// If set the store will verify the size of the data before accepting /// an upload of data. @@ -632,19 +691,24 @@ pub struct VerifyStore { pub verify_hash: bool, } -#[derive(Serialize, Deserialize, Debug, Clone)] -#[serde(deny_unknown_fields)] -pub struct CompletenessCheckingStore { - /// The underlying store that will have it's results validated before sending to client. - pub backend: StoreConfig, +#[derive(Serialize, Deserialize, Debug, Clone, JsonSchema)] +#[cfg_attr(not(feature = "crd"), serde(deny_unknown_fields))] +pub struct CompletenessCheckingSpec { + /// The underlying store wrap around. All content will first flow + /// through self before forwarding to backend. In the event there + /// is an error detected in self, the connection to the backend + /// will be terminated, and early termination should always cause + /// updates to fail on the backend. + #[schemars(with = "StoreRefName")] + pub backend: StoreRef, /// When a request is made, the results are decoded and all output digests/files are verified /// to exist in this CAS store before returning success. - pub cas_store: StoreConfig, + #[schemars(with = "StoreRefName")] + pub cas_store: StoreRef, } -#[derive(Serialize, Deserialize, Debug, Default, PartialEq, Clone, Copy)] -#[serde(deny_unknown_fields)] +#[derive(Serialize, Deserialize, Debug, Default, PartialEq, Clone, Copy, JsonSchema)] pub struct Lz4Config { /// Size of the blocks to compress. /// Higher values require more ram, but might yield slightly better @@ -667,7 +731,7 @@ pub struct Lz4Config { } #[allow(non_camel_case_types)] -#[derive(Serialize, Deserialize, Debug, PartialEq, Clone)] +#[derive(Serialize, Deserialize, Debug, PartialEq, Clone, JsonSchema)] pub enum CompressionAlgorithm { /// LZ4 compression algorithm is extremely fast for compression and /// decompression, however does not perform very well in compression @@ -679,15 +743,16 @@ pub enum CompressionAlgorithm { lz4(Lz4Config), } -#[derive(Serialize, Deserialize, Debug, Clone)] -#[serde(deny_unknown_fields)] -pub struct CompressionStore { +#[derive(Serialize, Deserialize, Debug, Clone, JsonSchema)] +#[cfg_attr(not(feature = "crd"), serde(deny_unknown_fields))] +pub struct CompressionSpec { /// The underlying store wrap around. All content will first flow /// through self before forwarding to backend. In the event there /// is an error detected in self, the connection to the backend /// will be terminated, and early termination should always cause /// updates to fail on the backend. - pub backend: StoreConfig, + #[schemars(with = "StoreRefName")] + pub backend: StoreRef, /// The compression algorithm to use. pub compression_algorithm: CompressionAlgorithm, @@ -697,8 +762,7 @@ pub struct CompressionStore { /// is touched it updates the timestamp. Inserts and updates will execute the /// eviction policy removing any expired entries and/or the oldest entries /// until the store size becomes smaller than max_bytes. -#[derive(Serialize, Deserialize, Debug, Default, Clone)] -#[serde(deny_unknown_fields)] +#[derive(Serialize, Deserialize, Debug, Default, Clone, JsonSchema)] pub struct EvictionPolicy { /// Maximum number of bytes before eviction takes place. /// Default: 0. Zero means never evict based on size. @@ -724,9 +788,9 @@ pub struct EvictionPolicy { pub max_count: u64, } -#[derive(Serialize, Deserialize, Debug, Default, Clone)] -#[serde(deny_unknown_fields)] -pub struct S3Store { +#[derive(Serialize, Deserialize, Debug, Default, Clone, JsonSchema)] +#[cfg_attr(not(feature = "crd"), serde(deny_unknown_fields))] +pub struct S3Spec { /// S3 region. Usually us-east-1, us-west-2, af-south-1, exc... #[serde(default, deserialize_with = "convert_string_with_shellexpand")] pub region: String, @@ -788,7 +852,7 @@ pub struct S3Store { } #[allow(non_camel_case_types)] -#[derive(Serialize, Deserialize, Debug, Clone, Copy)] +#[derive(Serialize, Deserialize, Debug, Clone, Copy, JsonSchema)] pub enum StoreType { /// The store is content addressable storage. cas, @@ -796,7 +860,7 @@ pub enum StoreType { ac, } -#[derive(Serialize, Deserialize, Debug, Clone)] +#[derive(Serialize, Deserialize, Debug, Clone, JsonSchema)] pub struct ClientTlsConfig { /// Path to the certificate authority to use to validate the remote. #[serde(deserialize_with = "convert_string_with_shellexpand")] @@ -811,8 +875,7 @@ pub struct ClientTlsConfig { pub key_file: Option, } -#[derive(Serialize, Deserialize, Debug, Clone)] -#[serde(deny_unknown_fields)] +#[derive(Serialize, Deserialize, Debug, Clone, JsonSchema)] pub struct GrpcEndpoint { /// The endpoint address (i.e. grpc(s)://example.com:443). #[serde(deserialize_with = "convert_string_with_shellexpand")] @@ -823,9 +886,9 @@ pub struct GrpcEndpoint { pub concurrency_limit: Option, } -#[derive(Serialize, Deserialize, Debug, Clone)] -#[serde(deny_unknown_fields)] -pub struct GrpcStore { +#[derive(Serialize, Deserialize, Debug, Clone, JsonSchema)] +#[cfg_attr(not(feature = "crd"), serde(deny_unknown_fields))] +pub struct GrpcSpec { /// Instance name for GRPC calls. Proxy calls will have the instance_name changed to this. #[serde(default, deserialize_with = "convert_string_with_shellexpand")] pub instance_name: String, @@ -853,7 +916,7 @@ pub struct GrpcStore { } /// The possible error codes that might occur on an upstream request. -#[derive(Serialize, Deserialize, Clone, Debug, PartialEq)] +#[derive(Serialize, Deserialize, Clone, Debug, PartialEq, JsonSchema)] pub enum ErrorCode { Cancelled = 1, Unknown = 2, @@ -874,8 +937,8 @@ pub enum ErrorCode { // Note: This list is duplicated from nativelink-error/lib.rs. } -#[derive(Serialize, Deserialize, Debug, Clone)] -pub struct RedisStore { +#[derive(Serialize, Deserialize, Debug, Clone, JsonSchema)] +pub struct RedisSpec { /// The hostname or IP address of the Redis server. /// Ex: ["redis://username:password@redis-server-url:6380/99"] /// 99 Represents database ID, 6380 represents the port. @@ -991,7 +1054,7 @@ pub struct RedisStore { pub retry: Retry, } -#[derive(Debug, Default, Deserialize, Serialize, Clone, PartialEq, Eq)] +#[derive(Debug, Default, Deserialize, Serialize, Clone, PartialEq, Eq, JsonSchema)] #[serde(rename_all = "lowercase")] pub enum RedisMode { Cluster, @@ -1000,6 +1063,9 @@ pub enum RedisMode { Standard, } +#[derive(Serialize, Deserialize, Debug, Clone, JsonSchema, Default)] +pub struct NoopSpec {} + /// Retry configuration. This configuration is exponential and each iteration /// a jitter as a percentage is applied of the calculated delay. For example: /// ```haskell @@ -1021,8 +1087,7 @@ pub enum RedisMode { /// 8 4.8s - 8s /// Remember that to get total results is additive, meaning the above results /// would mean a single request would have a total delay of 9.525s - 15.875s. -#[derive(Serialize, Deserialize, Clone, Debug, Default)] -#[serde(deny_unknown_fields)] +#[derive(Serialize, Deserialize, Clone, Debug, Default, JsonSchema)] pub struct Retry { /// Maximum number of retries until retrying stops. /// Setting this to zero will always attempt 1 time, but not retry. diff --git a/nativelink-controller/BUILD.bazel b/nativelink-controller/BUILD.bazel new file mode 100644 index 000000000..ee963187a --- /dev/null +++ b/nativelink-controller/BUILD.bazel @@ -0,0 +1,2 @@ +# TODO(aaronmondal): At the moment the implicit dependency kube-core can't be +# built with rules_rust. Figure out why and fix it. diff --git a/nativelink-controller/Cargo.toml b/nativelink-controller/Cargo.toml new file mode 100644 index 000000000..a406de80f --- /dev/null +++ b/nativelink-controller/Cargo.toml @@ -0,0 +1,33 @@ +[package] +name = "nativelink-controller" +version = "0.5.3" +edition = "2021" + +[[bin]] +name = "nativelink-controller" + +[[bin]] +name = "generate-crd" + +[features] +default = ["crd"] +crd = [ + "nativelink-config/crd" +] + +[dependencies] +nativelink-config = { path = "../nativelink-config" } + +serde = { version = "1.0.210", default-features = false } +kube = { version = "0.96.0", features = ["runtime", "derive", "client"] } +k8s-openapi = { version = "0.23.0", features = ["v1_31", "schemars"] } +schemars = "0.8.21" +serde_json = "1.0.128" +serde_yml = "0.0.12" +futures = { version = "0.3.30", default-features = false } +tokio = { version = "1.40.0", features = ["fs", "rt-multi-thread", "signal", "io-util", "process", "macros"], default-features = false } +tracing = { version = "0.1.40", default-features = false } +anyhow = "1.0.93" +thiserror = "2.0.0" +tracing-subscriber = { version = "0.3.18", default-features = false, features = ["fmt"] } +chrono = "0.4.38" diff --git a/nativelink-controller/cas.yaml b/nativelink-controller/cas.yaml new file mode 100644 index 000000000..4857c2424 --- /dev/null +++ b/nativelink-controller/cas.yaml @@ -0,0 +1,92 @@ +--- +apiVersion: kube.rs/v1alpha1 +kind: NativeLink +metadata: + name: nativelink-cas + namespace: default +spec: + image: localhost:5001/nativelink:c567im4v5y7c25g0xaicmpfapgbjfgii + replicas: 1 + runtime: + args: [] + env: + RUST_LOG: "info" + # This configuration places objects in various directories in + # `~/.cache/nativelink`. When this location is mounted as a PersistentVolume + # it persists the cache across restarts. + config: + stores: + - name: CAS_MAIN_STORE, + existenceCache: + backend: "compression-store" + + - name: compression-store + compression: + compression_algorithm: + lz4: {} + backend: fs-cas + + - name: fs-cas + filesystem: + content_path: ~/.cache/nativelink/content_path-cas + temp_path: ~/.cache/nativelink/tmp_path-cas + eviction_policy: + # 10gb + max_bytes: 10000000000 + + - name: AC_MAIN_STORE + completenessChecking: + backend: fs-ac + cas_store: ref-cas + + - name: fs-ac + filesystem: + content_path: ~/.cache/nativelink/content_path-ac + temp_path: ~/.cache/nativelink/tmp_path-ac + eviction_policy: + # 500mb + max_bytes: 500000000 + + - name: ref-cas + ref: + name: CAS_MAIN_STORE + servers: + - listener: + http: + socket_address: 0.0.0.0:50051 + services: + cas: + main: + cas_store: CAS_MAIN_STORE + ac: + main: + ac_store: AC_MAIN_STORE + capabilities: {} + bytestream: + cas_stores: + main: CAS_MAIN_STORE + # Only publish metrics on a private port. + - listener: + http: + socket_address: 0.0.0.0:50061 + services: + experimental_prometheus: + path: /metrics + - listener: + http: + socket_address: 0.0.0.0:50071 + tls: + cert_file: /root/example-do-not-use-in-prod-rootca.crt + key_file: /root/example-do-not-use-in-prod-key.pem + services: + cas: + main: + cas_store: CAS_MAIN_STORE + ac: + main: + ac_store: AC_MAIN_STORE + capabilities: {} + bytestream: + cas_stores: + main: CAS_MAIN_STORE + health: {} diff --git a/nativelink-controller/deploy.yaml b/nativelink-controller/deploy.yaml new file mode 100644 index 000000000..0232c3d03 --- /dev/null +++ b/nativelink-controller/deploy.yaml @@ -0,0 +1,69 @@ +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: nativelink-controller + namespace: default +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: nativelink-controller +rules: +- apiGroups: ["kube.rs"] + resources: ["nativelinks"] + verbs: ["get", "list", "watch", "create", "update", "patch", "delete"] +- apiGroups: ["kube.rs"] + resources: ["nativelinks/status"] + verbs: ["get", "update", "patch"] +- apiGroups: [""] # Core API group + resources: ["configmaps"] + verbs: ["get", "list", "watch", "create", "update", "patch", "delete"] +- apiGroups: ["apps"] # Added permissions for Deployments + resources: ["deployments"] + verbs: ["get", "list", "watch", "create", "update", "patch", "delete"] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: nativelink-controller +subjects: +- kind: ServiceAccount + name: nativelink-controller + namespace: default +roleRef: + kind: ClusterRole + name: nativelink-controller + apiGroup: rbac.authorization.k8s.io +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: nativelink-controller +spec: + selector: + matchLabels: + app: nativelink-controller + template: + metadata: + labels: + app: nativelink-controller + spec: + serviceAccountName: nativelink-controller + containers: + - name: controller + image: localhost:5001/nativelink-controller:1fk6yr22dwjcj3kb1lxjhgk914i5a295 + env: + - name: NATIVELINK_PATH + value: "/usr/local/bin/nativelink" + volumeMounts: + - name: nativelink-executable + mountPath: /usr/local/bin/nativelink + - name: temp + mountPath: /tmp + volumes: + - name: nativelink-executable + hostPath: + path: /path/to/nativelink # Update this with actual path + - name: temp + emptyDir: {} diff --git a/nativelink-controller/nativelink-crd.yaml b/nativelink-controller/nativelink-crd.yaml new file mode 100644 index 000000000..2c27dfe84 --- /dev/null +++ b/nativelink-controller/nativelink-crd.yaml @@ -0,0 +1,1738 @@ +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + name: nativelinks.kube.rs +spec: + group: kube.rs + names: + categories: [] + kind: NativeLink + plural: nativelinks + shortNames: [] + singular: nativelink + scope: Namespaced + versions: + - additionalPrinterColumns: + - description: Whether the process is running + jsonPath: '.status.running' + name: Running + type: boolean + name: v1alpha1 + schema: + openAPIV3Schema: + description: Auto-generated derived type for NativeLinkSpec via `CustomResource` + properties: + spec: + properties: + config: + description: The NativeLink server configuration + properties: + global: + description: Any global configurations that apply to all modules live here. + nullable: true + properties: + default_digest_hash_function: + description: |- + Default hash function to use while uploading blobs to the CAS when not set by client. + + Default: ConfigDigestHashFunction::sha256 + enum: + - sha256 + - blake3 + nullable: true + type: string + default_digest_size_health_check: + default: 0 + description: |- + Default digest size to use for health check when running diagnostics checks. Health checks are expected to use this size for filling a buffer that is used for creation of digest. + + Default: 1024*1024 (1MiB) + format: uint + minimum: 0.0 + type: integer + disable_metrics: + default: false + description: |- + This flag can be used to prevent metrics from being collected at runtime. Metrics are still able to be collected, but this flag prevents metrics that are collected at runtime (performance metrics) from being tallied. The overhead of collecting metrics is very low, so this flag should only be used if there is a very good reason to disable metrics. This flag can be forcibly set using the `NATIVELINK_DISABLE_METRICS` variable. If the variable is set it will always disable metrics regardless of what this flag is set to. + + Default: + type: boolean + idle_file_descriptor_timeout_millis: + default: 0 + description: |- + If a file descriptor is idle for this many milliseconds, it will be closed. In the event a client or store takes a long time to send or receive data the file descriptor will be closed, and since `max_open_files` blocks new open_file requests until a slot opens up, it will allow new requests to be processed. If a read or write is attempted on a closed file descriptor, the file will be reopened and the operation will continue. + + On services where worker(s) and scheduler(s) live in the same process, this also prevents deadlocks if a file->file copy is happening, but cannot open a new file descriptor because the limit has been reached. + + Default: 1000 (1 second) + format: uint64 + minimum: 0.0 + type: integer + max_open_files: + description: |- + Maximum number of open files that can be opened at one time. This value is not strictly enforced, it is a best effort. Some internal libraries open files or read metadata from a files which do not obey this limit, however the vast majority of cases will have this limit be honored. As a rule of thumb this value should be less than half the value of `ulimit -n`. Any network open file descriptors is not counted in this limit, but is counted in the kernel limit. It is a good idea to set a very large `ulimit -n`. Note: This value must be greater than 10. + + Default: 512 + format: uint + minimum: 0.0 + type: integer + required: + - max_open_files + type: object + schedulers: + description: List of schedulers available to use in this config. The keys can be used in other configs when needing to reference a scheduler. + items: + oneOf: + - required: + - simple + - required: + - grpc + - required: + - cacheLookup + - required: + - propertyModifier + properties: + cacheLookup: + properties: + ac_store: + description: The reference to the action cache store used to return cached actions from rather than running them again. To prevent unintended issues, this store should probably be a CompletenessCheckingStore. + type: string + scheduler: + description: The nested scheduler to use if cache lookup fails. + type: string + required: + - ac_store + - scheduler + type: object + grpc: + description: A scheduler that simply forwards requests to an upstream scheduler. This is useful to use when doing some kind of local action cache or CAS away from the main cluster of workers. In general, it's more efficient to point the build at the main scheduler directly though. + properties: + connections_per_endpoint: + default: 0 + description: The number of connections to make to each specified endpoint to balance the load over multiple TCP connections. Default 1. + format: uint + minimum: 0.0 + type: integer + endpoint: + description: The upstream scheduler to forward requests to. + properties: + address: + description: The endpoint address (i.e. grpc(s)://example.com:443). + type: string + concurrency_limit: + description: The maximum concurrency to allow on this endpoint. + format: uint + minimum: 0.0 + nullable: true + type: integer + tls_config: + description: The TLS configuration to use to connect to the endpoint (if grpcs). + nullable: true + properties: + ca_file: + description: Path to the certificate authority to use to validate the remote. + type: string + cert_file: + description: Path to the certificate file for client authentication. + nullable: true + type: string + key_file: + description: Path to the private key file for client authentication. + nullable: true + type: string + required: + - ca_file + type: object + required: + - address + type: object + max_concurrent_requests: + default: 0 + description: Limit the number of simultaneous upstream requests to this many. A value of zero is treated as unlimited. If the limit is reached the request is queued. + format: uint + minimum: 0.0 + type: integer + retry: + default: + delay: 0.0 + jitter: 0.0 + max_retries: 0 + retry_on_errors: null + description: Retry configuration to use when a network request fails. + properties: + delay: + default: 0.0 + description: Delay in seconds for exponential back off. + format: float + type: number + jitter: + default: 0.0 + description: 'Amount of jitter to add as a percentage in decimal form. This will change the formula like: ```haskell random( (2 ^ {attempt_number}) * {delay} * (1 - (jitter / 2)), (2 ^ {attempt_number}) * {delay} * (1 + (jitter / 2)), ) ```' + format: float + type: number + max_retries: + default: 0 + description: Maximum number of retries until retrying stops. Setting this to zero will always attempt 1 time, but not retry. + format: uint + minimum: 0.0 + type: integer + retry_on_errors: + description: A list of error codes to retry on, if this is not set then the default error codes to retry on are used. These default codes are the most likely to be non-permanent. - Unknown - Cancelled - DeadlineExceeded - ResourceExhausted - Aborted - Internal - Unavailable - DataLoss + items: + description: The possible error codes that might occur on an upstream request. + enum: + - Cancelled + - Unknown + - InvalidArgument + - DeadlineExceeded + - NotFound + - AlreadyExists + - PermissionDenied + - ResourceExhausted + - FailedPrecondition + - Aborted + - OutOfRange + - Unimplemented + - Internal + - Unavailable + - DataLoss + - Unauthenticated + type: string + nullable: true + type: array + type: object + required: + - endpoint + type: object + name: + type: string + propertyModifier: + properties: + modifications: + description: A list of modifications to perform to incoming actions for the nested scheduler. These are performed in order and blindly, so removing a property that doesn't exist is fine and overwriting an existing property is also fine. If adding properties that do not exist in the nested scheduler is not supported and will likely cause unexpected behaviour. + items: + oneOf: + - required: + - add + - required: + - remove + properties: + add: + description: Add a property to the action properties. + properties: + name: + description: The name of the property to add. + type: string + value: + description: The value to assign to the property. + type: string + required: + - name + - value + type: object + remove: + description: Remove a named property from the action. + type: string + type: object + type: array + scheduler: + description: The nested scheduler to use after modifying the properties. + type: string + required: + - modifications + - scheduler + type: object + simple: + properties: + allocation_strategy: + default: least_recently_used + description: The strategy used to assign workers jobs. + enum: + - least_recently_used + - most_recently_used + type: string + client_action_timeout_s: + default: 0 + description: 'Mark operations as completed with error if no client has updated them within this duration. Default: 60 (seconds)' + format: uint64 + minimum: 0.0 + type: integer + experimental_backend: + description: 'The storage backend to use for the scheduler. Default: memory' + nullable: true + oneOf: + - required: + - Memory + - required: + - redis + properties: + Memory: + description: Use an in-memory store for the scheduler. + type: object + redis: + description: Use a redis store for the scheduler. + properties: + redis_store: + description: 'A reference to the redis store to use for the scheduler. Note: This MUST resolve to a RedisStore.' + type: string + required: + - redis_store + type: object + type: object + max_job_retries: + default: 0 + description: 'If a job returns an internal error or times out this many times when attempting to run on a worker the scheduler will return the last error to the client. Jobs will be retried and this configuration is to help prevent one rogue job from infinitely retrying and taking up a lot of resources when the task itself is the one causing the server to go into a bad state. Default: 3' + format: uint + minimum: 0.0 + type: integer + retain_completed_for_s: + default: 0 + description: 'The amount of time to retain completed actions in memory for in case a WaitExecution is called after the action has completed. Default: 60 (seconds)' + format: uint32 + minimum: 0.0 + type: integer + supported_platform_properties: + additionalProperties: + description: When the scheduler matches tasks to workers that are capable of running the task, this value will be used to determine how the property is treated. + enum: + - minimum + - exact + - priority + type: string + description: |- + A list of supported platform properties mapped to how these properties are used when the scheduler looks for worker nodes capable of running the task. + + For example, a value of: ```json { "cpu_count": "minimum", "cpu_arch": "exact" } ``` With a job that contains: ```json { "cpu_count": "8", "cpu_arch": "arm" } ``` Will result in the scheduler filtering out any workers that do not have "cpu_arch" = "arm" and filter out any workers that have less than 8 cpu cores available. + + The property names here must match the property keys provided by the worker nodes when they join the pool. In other words, the workers will publish their capabilities to the scheduler when they join the worker pool. If the worker fails to notify the scheduler of its (for example) "cpu_arch", the scheduler will never send any jobs to it, if all jobs have the "cpu_arch" label. There is no special treatment of any platform property labels other and entirely driven by worker configs and this config. + nullable: true + type: object + worker_timeout_s: + default: 0 + description: 'Remove workers from pool once the worker has not responded in this amount of time in seconds. Default: 5 (seconds)' + format: uint64 + minimum: 0.0 + type: integer + type: object + required: + - name + type: object + nullable: true + type: array + servers: + description: Servers to setup for this process. + items: + properties: + listener: + description: Configuration + oneOf: + - required: + - http + properties: + http: + description: Listener for HTTP/HTTPS/HTTP2 sockets. + properties: + advanced_http: + default: + experimental_http2_adaptive_window: null + experimental_http2_enable_connect_protocol: null + experimental_http2_initial_connection_window_size: null + experimental_http2_initial_stream_window_size: null + experimental_http2_keep_alive_timeout: null + experimental_http2_max_concurrent_streams: null + experimental_http2_max_frame_size: null + experimental_http2_max_header_list_size: null + experimental_http2_max_pending_accept_reset_streams: null + experimental_http2_max_send_buf_size: null + http2_keep_alive_interval: null + description: Advanced Http server configuration. + properties: + experimental_http2_adaptive_window: + nullable: true + type: boolean + experimental_http2_enable_connect_protocol: + nullable: true + type: boolean + experimental_http2_initial_connection_window_size: + format: uint32 + minimum: 0.0 + nullable: true + type: integer + experimental_http2_initial_stream_window_size: + format: uint32 + minimum: 0.0 + nullable: true + type: integer + experimental_http2_keep_alive_timeout: + description: 'Note: This is in seconds.' + format: uint32 + minimum: 0.0 + nullable: true + type: integer + experimental_http2_max_concurrent_streams: + format: uint32 + minimum: 0.0 + nullable: true + type: integer + experimental_http2_max_frame_size: + format: uint32 + minimum: 0.0 + nullable: true + type: integer + experimental_http2_max_header_list_size: + format: uint32 + minimum: 0.0 + nullable: true + type: integer + experimental_http2_max_pending_accept_reset_streams: + format: uint32 + minimum: 0.0 + nullable: true + type: integer + experimental_http2_max_send_buf_size: + format: uint32 + minimum: 0.0 + nullable: true + type: integer + http2_keep_alive_interval: + description: 'Interval to send keep-alive pings via HTTP2. Note: This is in seconds.' + format: uint32 + minimum: 0.0 + nullable: true + type: integer + type: object + compression: + default: + accepted_compression_algorithms: [] + description: Data transport compression configuration to use for this service. + properties: + accepted_compression_algorithms: + description: |- + The compression algorithm that the server will accept from clients. The server will broadcast the supported compression algorithms to clients and the client will choose which compression algorithm to use. Enabling this will likely save a lot of data transfer, but will consume a lot of CPU and add a lot of latency. see: + + Default: {no supported compression} + items: + enum: + - none + - gzip + type: string + type: array + send_compression_algorithm: + description: |- + The compression algorithm that the server will use when sending responses to clients. Enabling this will likely save a lot of data transfer, but will consume a lot of CPU and add a lot of latency. see: + + Default: `HttpCompressionAlgorithm::none` + enum: + - none + - gzip + nullable: true + type: string + required: + - accepted_compression_algorithms + type: object + socket_address: + description: 'Address to listen on. Example: `127.0.0.1:8080` or `:8080` to listen to all IPs.' + type: string + tls: + description: |- + Tls Configuration for this server. If not set, the server will not use TLS. + + Default: None + nullable: true + properties: + cert_file: + description: Path to the certificate file. + type: string + client_ca_file: + description: Path to the certificate authority for mTLS, if client authentication is required for this endpoint. + nullable: true + type: string + client_crl_file: + description: Path to the certificate revocation list for mTLS, if client authentication is required for this endpoint. + nullable: true + type: string + key_file: + description: Path to the private key file. + type: string + required: + - cert_file + - key_file + type: object + required: + - socket_address + type: object + type: object + name: + default: '' + description: |- + Name of the server. This is used to help identify the service for telemetry and logs. + + Default: {index of server in config} + type: string + services: + description: Services to attach to server. + nullable: true + properties: + ac: + additionalProperties: + properties: + ac_store: + description: The store name referenced in the `stores` map in the main config. This store name referenced here may be reused multiple times. + type: string + read_only: + default: false + description: Whether the Action Cache store may be written to, this if set to false it is only possible to read from the Action Cache. + type: boolean + required: + - ac_store + type: object + description: The Action Cache (AC) backend config. The key is the instance_name used in the protocol and the value is the underlying AC store config. + nullable: true + type: object + admin: + description: This is the service for any administrative tasks. It provides a REST API endpoint for administrative purposes. + nullable: true + properties: + path: + default: '' + description: |- + Path to register the admin API. If path is "/admin", and your domain is "example.com", you can reach the endpoint with: . + + Default: "/admin" + type: string + type: object + bytestream: + description: This is the service used to stream data to and from the CAS. Bazel's protocol strongly encourages users to use this streaming interface to interact with the CAS when the data is large. + nullable: true + properties: + cas_stores: + additionalProperties: + type: string + description: Name of the store in the "stores" configuration. + type: object + max_bytes_per_stream: + default: 0 + description: |- + Max number of bytes to send on each grpc stream chunk. According to 16KiB - 64KiB is optimal. + + Default: 64KiB + format: uint + minimum: 0.0 + type: integer + max_decoding_message_size: + default: 0 + description: 'Maximum number of bytes to decode on each grpc stream chunk. Default: 4 MiB' + format: uint + minimum: 0.0 + type: integer + persist_stream_on_disconnect_timeout: + default: 0 + description: |- + In the event a client disconnects while uploading a blob, we will hold the internal stream open for this many seconds before closing it. This allows clients that disconnect to reconnect and continue uploading the same blob. + + Default: 10 (seconds) + format: uint + minimum: 0.0 + type: integer + required: + - cas_stores + type: object + capabilities: + additionalProperties: + properties: + remote_execution: + description: Configuration for remote execution capabilities. If not set the capabilities service will inform the client that remote execution is not supported. + nullable: true + properties: + scheduler: + description: Scheduler used to configure the capabilities of remote execution. + type: string + required: + - scheduler + type: object + type: object + description: Capabilities service is required in order to use most of the bazel protocol. This service is used to provide the supported features and versions of this bazel GRPC service. + nullable: true + type: object + cas: + additionalProperties: + properties: + cas_store: + description: The store name referenced in the `stores` map in the main config. This store name referenced here may be reused multiple times. + type: string + required: + - cas_store + type: object + description: The Content Addressable Storage (CAS) backend config. The key is the instance_name used in the protocol and the value is the underlying CAS store config. + nullable: true + type: object + execution: + additionalProperties: + properties: + cas_store: + description: The store name referenced in the `stores` map in the main config. This store name referenced here may be reused multiple times. This value must be a CAS store reference. + type: string + scheduler: + description: The scheduler name referenced in the `schedulers` map in the main config. + type: string + required: + - cas_store + - scheduler + type: object + description: 'The remote execution service configuration. NOTE: This service is under development and is currently just a place holder.' + nullable: true + type: object + experimental_bep: + description: Experimental - Build Event Protocol (BEP) configuration. This is the service that will consume build events from the client and publish them to a store for processing by an external service. + nullable: true + properties: + store: + description: The store to publish build events to. The store name referenced in the `stores` map in the main config. + type: string + required: + - store + type: object + experimental_prometheus: + description: Experimental - Prometheus metrics configuration. Metrics are gathered as a singleton but may be served on multiple endpoints. + nullable: true + properties: + path: + default: '' + description: |- + Path to register prometheus metrics. If path is "/metrics", and your domain is "example.com", you can reach the endpoint with: . + + Default: "/metrics" + type: string + type: object + health: + description: This is the service for health status check. + nullable: true + properties: + path: + default: '' + description: |- + Path to register the health status check. If path is "/status", and your domain is "example.com", you can reach the endpoint with: . + + Default: "/status" + type: string + type: object + worker_api: + description: 'This is the service used for workers to connect and communicate through. NOTE: This service should be served on a different, non-public port. In other words, `worker_api` configuration should not have any other services that are served on the same port. Doing so is a security risk, as workers have a different permission set than a client that makes the remote execution/cache requests.' + nullable: true + properties: + scheduler: + description: The scheduler name referenced in the `schedulers` map in the main config. + type: string + required: + - scheduler + type: object + type: object + required: + - listener + type: object + type: array + stores: + description: List of stores available to use in this config. The keys can be used in other configs when needing to reference a store. + items: + oneOf: + - required: + - memory + - required: + - s3 + - required: + - verify + - required: + - completenessChecking + - required: + - compression + - required: + - dedup + - required: + - existenceCache + - required: + - fastSlow + - required: + - shard + - required: + - filesystem + - required: + - ref + - required: + - sizePartitioning + - required: + - grpc + - required: + - redis + - required: + - noop + properties: + completenessChecking: + description: |- + Completeness checking store verifies if the output files & folders exist in the CAS before forwarding the request to the underlying store. Note: This store should only be used on AC stores. + + **Example JSON Config:** ```json "completeness_checking": { "backend": { "filesystem": { "content_path": "~/.cache/nativelink/content_path-ac", "temp_path": "~/.cache/nativelink/tmp_path-ac", "eviction_policy": { // 500mb. "max_bytes": 500000000, } } }, "cas_store": { "ref_store": { "name": "CAS_MAIN_STORE" } } } ``` + properties: + backend: + description: The underlying store wrap around. All content will first flow through self before forwarding to backend. In the event there is an error detected in self, the connection to the backend will be terminated, and early termination should always cause updates to fail on the backend. + type: string + cas_store: + description: When a request is made, the results are decoded and all output digests/files are verified to exist in this CAS store before returning success. + type: string + required: + - backend + - cas_store + type: object + compression: + description: |- + A compression store that will compress the data inbound and outbound. There will be a non-trivial cost to compress and decompress the data, but in many cases if the final store is a store that requires network transport and/or storage space is a concern it is often faster and more efficient to use this store before those stores. + + **Example JSON Config:** ```json "compression": { "compression_algorithm": { "lz4": {} }, "backend": { "filesystem": { "content_path": "/tmp/nativelink/data/content_path-cas", "temp_path": "/tmp/nativelink/data/tmp_path-cas", "eviction_policy": { // 2gb. "max_bytes": 2000000000, } } } } ``` + properties: + backend: + description: The underlying store wrap around. All content will first flow through self before forwarding to backend. In the event there is an error detected in self, the connection to the backend will be terminated, and early termination should always cause updates to fail on the backend. + type: string + compression_algorithm: + description: The compression algorithm to use. + oneOf: + - required: + - lz4 + properties: + lz4: + description: |- + LZ4 compression algorithm is extremely fast for compression and decompression, however does not perform very well in compression ratio. In most cases build artifacts are highly compressible, however lz4 is quite good at aborting early if the data is not deemed very compressible. + + see: + properties: + block_size: + default: 0 + description: |- + Size of the blocks to compress. Higher values require more ram, but might yield slightly better compression ratios. + + Default: 65536 (64k). + format: uint32 + minimum: 0.0 + type: integer + max_decode_block_size: + default: 0 + description: |- + Maximum size allowed to attempt to deserialize data into. This is needed because the block_size is embedded into the data so if there was a bad actor, they could upload an extremely large block_size'ed entry and we'd allocate a large amount of memory when retrieving the data. To prevent this from happening, we allow you to specify the maximum that we'll attempt deserialize. + + Default: value in `block_size`. + format: uint32 + minimum: 0.0 + type: integer + type: object + type: object + required: + - backend + - compression_algorithm + type: object + dedup: + description: |- + A dedup store will take the inputs and run a rolling hash algorithm on them to slice the input into smaller parts then run a sha256 algorithm on the slice and if the object doesn't already exist, upload the slice to the `content_store` using a new digest of just the slice. Once all parts exist, an Action-Cache-like digest will be built and uploaded to the `index_store` which will contain a reference to each chunk/digest of the uploaded file. Downloading a request will first grab the index from the `index_store`, and forward the download content of each chunk as if it were one file. + + This store is exceptionally good when the following conditions are met: * Content is mostly the same (inserts, updates, deletes are ok) * Content is not compressed or encrypted * Uploading or downloading from `content_store` is the bottleneck. + + Note: This store pairs well when used with CompressionStore as the `content_store`, but never put DedupStore as the backend of CompressionStore as it will negate all the gains. + + Note: When running `.has()` on this store, it will only check to see if the entry exists in the `index_store` and not check if the individual chunks exist in the `content_store`. + + **Example JSON Config:** ```json "dedup": { "index_store": { "memory_store": { "max_size": 1000000000, // 1GB "eviction_policy": "LeastRecentlyUsed" } }, "content_store": { "compression": { "compression_algorithm": { "lz4": {} }, "backend": { "fast_slow": { "fast": { "memory_store": { "max_size": 500000000, // 500MB "eviction_policy": "LeastRecentlyUsed" } }, "slow": { "filesystem": { "content_path": "/tmp/nativelink/data/content_path-content", "temp_path": "/tmp/nativelink/data/tmp_path-content", "eviction_policy": { "max_bytes": 2000000000 // 2gb. } } } } } } } } ``` + properties: + content_store: + description: The store where the individual chunks will be uploaded. This store should generally be the slower & larger store. + type: string + index_store: + description: Store used to store the index of each dedup slice. This store should generally be fast and small. + type: string + max_concurrent_fetch_per_get: + default: 0 + description: |- + Due to implementation detail, we want to prefer to download the first chunks of the file so we can stream the content out and free up some of our buffers. This configuration will be used to to restrict the number of concurrent chunk downloads at a time per `get()` request. + + This setting will also affect how much memory might be used per `get()` request. Estimated worst case memory per `get()` request is: `max_concurrent_fetch_per_get * max_size`. + + Default: 10 + format: uint32 + minimum: 0.0 + type: integer + max_size: + default: 0 + description: |- + Maximum size a chunk is allowed to be. + + Default: 524288 (512k) + format: uint32 + minimum: 0.0 + type: integer + min_size: + default: 0 + description: |- + Minimum size that a chunk will be when slicing up the content. Note: This setting can be increased to improve performance because it will actually not check this number of bytes when deciding where to partition the data. + + Default: 65536 (64k) + format: uint32 + minimum: 0.0 + type: integer + normal_size: + default: 0 + description: |- + A best-effort attempt will be made to keep the average size of the chunks to this number. It is not a guarantee, but a slight attempt will be made. + + This value will also be about the threshold used to determine if we should even attempt to dedup the entry or just forward it directly to the content_store without an index. The actual value will be about `normal_size * 1.3` due to implementation details. + + Default: 262144 (256k) + format: uint32 + minimum: 0.0 + type: integer + required: + - content_store + - index_store + type: object + existenceCache: + description: |- + Existence store will wrap around another store and cache calls to has so that subsequent has_with_results calls will be faster. This is useful for cases when you have a store that is slow to respond to has calls. Note: This store should only be used on CAS stores. + + **Example JSON Config:** ```json "existence_cache": { "backend": { "memory": { "eviction_policy": { // 500mb. "max_bytes": 500000000, } } }, "cas_store": { "ref_store": { "name": "CAS_MAIN_STORE" } } } ``` + properties: + backend: + description: The underlying store wrap around. All content will first flow through self before forwarding to backend. In the event there is an error detected in self, the connection to the backend will be terminated, and early termination should always cause updates to fail on the backend. + type: string + eviction_policy: + description: Policy used to evict items out of the store. Failure to set this value will cause items to never be removed from the store causing infinite memory usage. + nullable: true + properties: + evict_bytes: + default: 0 + description: 'When eviction starts based on hitting max_bytes, continue until max_bytes - evict_bytes is met to create a low watermark. This stops operations from thrashing when the store is close to the limit. Default: 0' + format: uint + minimum: 0.0 + type: integer + max_bytes: + default: 0 + description: 'Maximum number of bytes before eviction takes place. Default: 0. Zero means never evict based on size.' + format: uint + minimum: 0.0 + type: integer + max_count: + default: 0 + description: 'Maximum size of the store before an eviction takes place. Default: 0. Zero means never evict based on count.' + format: uint64 + minimum: 0.0 + type: integer + max_seconds: + default: 0 + description: 'Maximum number of seconds for an entry to live since it was last accessed before it is evicted. Default: 0. Zero means never evict based on time.' + format: uint32 + minimum: 0.0 + type: integer + type: object + required: + - backend + type: object + fastSlow: + description: |- + FastSlow store will first try to fetch the data from the `fast` store and then if it does not exist try the `slow` store. When the object does exist in the `slow` store, it will copy the data to the `fast` store while returning the data. This store should be thought of as a store that "buffers" the data to the `fast` store. On uploads it will mirror data to both `fast` and `slow` stores. + + WARNING: If you need data to always exist in the `slow` store for something like remote execution, be careful because this store will never check to see if the objects exist in the `slow` store if it exists in the `fast` store (ie: it assumes that if an object exists `fast` store it will exist in `slow` store). + + ***Example JSON Config:*** ```json "fast_slow": { "fast": { "filesystem": { "content_path": "/tmp/nativelink/data/content_path-index", "temp_path": "/tmp/nativelink/data/tmp_path-index", "eviction_policy": { // 500mb. "max_bytes": 500000000, } } }, "slow": { "filesystem": { "content_path": "/tmp/nativelink/data/content_path-index", "temp_path": "/tmp/nativelink/data/tmp_path-index", "eviction_policy": { // 500mb. "max_bytes": 500000000, } } } } ``` + properties: + fast: + description: Fast store that will be attempted to be contacted before reaching out to the `slow` store. + type: string + slow: + description: If the object does not exist in the `fast` store it will try to get it from this store. + type: string + required: + - fast + - slow + type: object + filesystem: + description: |- + Stores the data on the filesystem. This store is designed for local persistent storage. Restarts of this program should restore the previous state, meaning anything uploaded will be persistent as long as the filesystem integrity holds. This store uses the filesystem's `atime` (access time) to hold the last touched time of the file(s). + + **Example JSON Config:** ```json "filesystem": { "content_path": "/tmp/nativelink/data-worker-test/content_path-cas", "temp_path": "/tmp/nativelink/data-worker-test/tmp_path-cas", "eviction_policy": { // 10gb. "max_bytes": 10000000000, } } ``` + properties: + block_size: + default: 0 + description: 'The block size of the filesystem for the running machine value is used to determine an entry''s actual size on disk consumed For a 4KB block size filesystem, a 1B file actually consumes 4KB Default: 4096' + format: uint64 + minimum: 0.0 + type: integer + content_path: + description: Path on the system where to store the actual content. This is where the bulk of the data will be placed. On service bootup this folder will be scanned and all files will be added to the cache. In the event one of the files doesn't match the criteria, the file will be deleted. + type: string + eviction_policy: + description: Policy used to evict items out of the store. Failure to set this value will cause items to never be removed from the store causing infinite memory usage. + nullable: true + properties: + evict_bytes: + default: 0 + description: 'When eviction starts based on hitting max_bytes, continue until max_bytes - evict_bytes is met to create a low watermark. This stops operations from thrashing when the store is close to the limit. Default: 0' + format: uint + minimum: 0.0 + type: integer + max_bytes: + default: 0 + description: 'Maximum number of bytes before eviction takes place. Default: 0. Zero means never evict based on size.' + format: uint + minimum: 0.0 + type: integer + max_count: + default: 0 + description: 'Maximum size of the store before an eviction takes place. Default: 0. Zero means never evict based on count.' + format: uint64 + minimum: 0.0 + type: integer + max_seconds: + default: 0 + description: 'Maximum number of seconds for an entry to live since it was last accessed before it is evicted. Default: 0. Zero means never evict based on time.' + format: uint32 + minimum: 0.0 + type: integer + type: object + read_buffer_size: + default: 0 + description: 'Buffer size to use when reading files. Generally this should be left to the default value except for testing. Default: 32k.' + format: uint32 + minimum: 0.0 + type: integer + temp_path: + description: 'A temporary location of where files that are being uploaded or deleted will be placed while the content cannot be guaranteed to be accurate. This location must be on the same block device as `content_path` so atomic moves can happen (ie: move without copy). All files in this folder will be deleted on every startup.' + type: string + required: + - content_path + - temp_path + type: object + grpc: + description: |- + This store will pass-through calls to another GRPC store. This store is not designed to be used as a sub-store of another store, but it does satisfy the interface and will likely work. + + One major GOTCHA is that some stores use a special function on this store to get the size of the underlying object, which is only reliable when this store is serving the a CAS store, not an AC store. If using this store directly without being a child of any store there are no side effects and is the most efficient way to use it. + + **Example JSON Config:** ```json "grpc": { "instance_name": "main", "endpoints": [ {"address": "grpc://${CAS_ENDPOINT:-127.0.0.1}:50051"} ], "store_type": "ac" } ``` + properties: + connections_per_endpoint: + default: 0 + description: The number of connections to make to each specified endpoint to balance the load over multiple TCP connections. Default 1. + format: uint + minimum: 0.0 + type: integer + endpoints: + description: The endpoint of the grpc connection. + items: + properties: + address: + description: The endpoint address (i.e. grpc(s)://example.com:443). + type: string + concurrency_limit: + description: The maximum concurrency to allow on this endpoint. + format: uint + minimum: 0.0 + nullable: true + type: integer + tls_config: + description: The TLS configuration to use to connect to the endpoint (if grpcs). + nullable: true + properties: + ca_file: + description: Path to the certificate authority to use to validate the remote. + type: string + cert_file: + description: Path to the certificate file for client authentication. + nullable: true + type: string + key_file: + description: Path to the private key file for client authentication. + nullable: true + type: string + required: + - ca_file + type: object + required: + - address + type: object + type: array + instance_name: + default: '' + description: Instance name for GRPC calls. Proxy calls will have the instance_name changed to this. + type: string + max_concurrent_requests: + default: 0 + description: Limit the number of simultaneous upstream requests to this many. A value of zero is treated as unlimited. If the limit is reached the request is queued. + format: uint + minimum: 0.0 + type: integer + retry: + default: + delay: 0.0 + jitter: 0.0 + max_retries: 0 + retry_on_errors: null + description: Retry configuration to use when a network request fails. + properties: + delay: + default: 0.0 + description: Delay in seconds for exponential back off. + format: float + type: number + jitter: + default: 0.0 + description: 'Amount of jitter to add as a percentage in decimal form. This will change the formula like: ```haskell random( (2 ^ {attempt_number}) * {delay} * (1 - (jitter / 2)), (2 ^ {attempt_number}) * {delay} * (1 + (jitter / 2)), ) ```' + format: float + type: number + max_retries: + default: 0 + description: Maximum number of retries until retrying stops. Setting this to zero will always attempt 1 time, but not retry. + format: uint + minimum: 0.0 + type: integer + retry_on_errors: + description: A list of error codes to retry on, if this is not set then the default error codes to retry on are used. These default codes are the most likely to be non-permanent. - Unknown - Cancelled - DeadlineExceeded - ResourceExhausted - Aborted - Internal - Unavailable - DataLoss + items: + description: The possible error codes that might occur on an upstream request. + enum: + - Cancelled + - Unknown + - InvalidArgument + - DeadlineExceeded + - NotFound + - AlreadyExists + - PermissionDenied + - ResourceExhausted + - FailedPrecondition + - Aborted + - OutOfRange + - Unimplemented + - Internal + - Unavailable + - DataLoss + - Unauthenticated + type: string + nullable: true + type: array + type: object + store_type: + description: The type of the upstream store, this ensures that the correct server calls are made. + enum: + - cas + - ac + type: string + required: + - endpoints + - store_type + type: object + memory: + description: |- + Memory store will store all data in a hashmap in memory. + + **Example JSON Config:** ```json "memory": { "eviction_policy": { // 10mb. "max_bytes": 10000000, } } } ``` + properties: + eviction_policy: + description: Policy used to evict items out of the store. Failure to set this value will cause items to never be removed from the store causing infinite memory usage. + nullable: true + properties: + evict_bytes: + default: 0 + description: 'When eviction starts based on hitting max_bytes, continue until max_bytes - evict_bytes is met to create a low watermark. This stops operations from thrashing when the store is close to the limit. Default: 0' + format: uint + minimum: 0.0 + type: integer + max_bytes: + default: 0 + description: 'Maximum number of bytes before eviction takes place. Default: 0. Zero means never evict based on size.' + format: uint + minimum: 0.0 + type: integer + max_count: + default: 0 + description: 'Maximum size of the store before an eviction takes place. Default: 0. Zero means never evict based on count.' + format: uint64 + minimum: 0.0 + type: integer + max_seconds: + default: 0 + description: 'Maximum number of seconds for an entry to live since it was last accessed before it is evicted. Default: 0. Zero means never evict based on time.' + format: uint32 + minimum: 0.0 + type: integer + type: object + type: object + name: + type: string + noop: + description: |- + Noop store is a store that sends streams into the void and all data retrieval will return 404 (NotFound). This can be useful for cases where you may need to partition your data and part of your data needs to be discarded. + + **Example JSON Config:** ```json "noop": {} ``` + type: object + redis: + description: |- + Stores data in any stores compatible with Redis APIs. + + Pairs well with SizePartitioning and/or FastSlow stores. Ideal for accepting small object sizes as most redis store services have a max file upload of between 256Mb-512Mb. + + **Example JSON Config:** ```json "redis_store": { "addresses": [ "redis://127.0.0.1:6379/", ] } ``` + properties: + addresses: + description: 'The hostname or IP address of the Redis server. Ex: ["redis://username:password@redis-server-url:6380/99"] 99 Represents database ID, 6380 represents the port.' + items: + type: string + type: array + broadcast_channel_capacity: + default: 0 + description: |- + When using pubsub interface, this is the maximum number of items to keep queued up before dropping old items. + + Default: 4096 + format: uint + minimum: 0.0 + type: integer + command_timeout_ms: + default: 0 + description: |- + The amount of time in milliseconds until the redis store considers the command to be timed out. This will trigger a retry of the command and potentially a reconnection to the redis server. + + Default: 10000 (10 seconds) + format: uint64 + minimum: 0.0 + type: integer + connection_pool_size: + default: 0 + description: |- + The number of connections to keep open to the redis server(s). + + Default: 3 + format: uint + minimum: 0.0 + type: integer + connection_timeout_ms: + default: 0 + description: |- + The amount of time in milliseconds until the redis store considers the connection to unresponsive. This will trigger a reconnection to the redis server. + + Default: 3000 (3 seconds) + format: uint64 + minimum: 0.0 + type: integer + connection_timeout_s: + default: 0 + description: |- + The connection timeout for the Redis connection in seconds. + + Default: 10 + format: uint64 + minimum: 0.0 + type: integer + experimental_pub_sub_channel: + description: |- + An optional and experimental Redis channel to publish write events to. + + If set, every time a write operation is made to a Redis node then an event will be published to a Redis channel with the given name. If unset, the writes will still be made, but the write events will not be published. + + Default: (Empty String / No Channel) + nullable: true + type: string + key_prefix: + default: '' + description: |- + An optional prefix to prepend to all keys in this store. + + Setting this value can make it convenient to query or organize your data according to the shared prefix. + + Default: (Empty String / No Prefix) + type: string + max_chunk_uploads_per_update: + default: 0 + description: |- + The maximum number of upload chunks to allow per update. This is used to limit the amount of memory used when uploading large objects to the redis server. A good rule of thumb is to think of the data as: AVAIL_MEMORY / (read_chunk_size * max_chunk_uploads_per_update) = THORETICAL_MAX_CONCURRENT_UPLOADS (note: it is a good idea to divide AVAIL_MAX_MEMORY by ~10 to account for other memory usage) + + Default: 10 + format: uint + minimum: 0.0 + type: integer + mode: + default: standard + description: |- + Set the mode Redis is operating in. + + Available options are "cluster" for [cluster mode](https://redis.io/docs/latest/operate/oss_and_stack/reference/cluster-spec/), "sentinel" for [sentinel mode](https://redis.io/docs/latest/operate/oss_and_stack/management/sentinel/), or "standard" if Redis is operating in neither cluster nor sentinel mode. + + Default: standard, + enum: + - cluster + - sentinel + - standard + type: string + read_chunk_size: + default: 0 + description: |- + The amount of data to read from the redis server at a time. This is used to limit the amount of memory used when reading large objects from the redis server as well as limiting the amount of time a single read operation can take. + + IMPORTANT: If this value is too high, the `command_timeout_ms` might be triggered if the latency or throughput to the redis server is too low. + + Default: 64KiB + format: uint + minimum: 0.0 + type: integer + response_timeout_s: + default: 0 + description: |- + The response timeout for the Redis connection in seconds. + + Default: 10 + format: uint64 + minimum: 0.0 + type: integer + retry: + default: + delay: 0.0 + jitter: 0.0 + max_retries: 0 + retry_on_errors: null + description: |- + Retry configuration to use when a network request fails. See the `Retry` struct for more information. + + Default: Retry { max_retries: 0, /* unlimited */ delay: 0.1, /* 100ms */ jitter: 0.5, /* 50% */ retry_on_errors: None, /* not used in redis store */ } + properties: + delay: + default: 0.0 + description: Delay in seconds for exponential back off. + format: float + type: number + jitter: + default: 0.0 + description: 'Amount of jitter to add as a percentage in decimal form. This will change the formula like: ```haskell random( (2 ^ {attempt_number}) * {delay} * (1 - (jitter / 2)), (2 ^ {attempt_number}) * {delay} * (1 + (jitter / 2)), ) ```' + format: float + type: number + max_retries: + default: 0 + description: Maximum number of retries until retrying stops. Setting this to zero will always attempt 1 time, but not retry. + format: uint + minimum: 0.0 + type: integer + retry_on_errors: + description: A list of error codes to retry on, if this is not set then the default error codes to retry on are used. These default codes are the most likely to be non-permanent. - Unknown - Cancelled - DeadlineExceeded - ResourceExhausted - Aborted - Internal - Unavailable - DataLoss + items: + description: The possible error codes that might occur on an upstream request. + enum: + - Cancelled + - Unknown + - InvalidArgument + - DeadlineExceeded + - NotFound + - AlreadyExists + - PermissionDenied + - ResourceExhausted + - FailedPrecondition + - Aborted + - OutOfRange + - Unimplemented + - Internal + - Unavailable + - DataLoss + - Unauthenticated + type: string + nullable: true + type: array + type: object + required: + - addresses + type: object + ref: + description: |- + Store used to reference a store in the root store manager. This is useful for cases when you want to share a store in different nested stores. Example, you may want to share the same memory store used for the action cache, but use a FastSlowStore and have the fast store also share the memory store for efficiency. + + **Example JSON Config:** ```json "ref_store": { "name": "FS_CONTENT_STORE" } ``` + properties: + name: + description: Name of the store under the root "stores" config object. + type: string + required: + - name + type: object + s3: + description: |- + S3 store will use Amazon's S3 service as a backend to store the files. This configuration can be used to share files across multiple instances. + + This configuration will never delete files, so you are responsible for purging old files in other ways. + + **Example JSON Config:** ```json "experimental_s3_store": { "region": "eu-north-1", "bucket": "crossplane-bucket-af79aeca9", "key_prefix": "test-prefix-index/", "retry": { "max_retries": 6, "delay": 0.3, "jitter": 0.5 }, "multipart_max_concurrent_uploads": 10 } ``` + properties: + bucket: + default: '' + description: Bucket name to use as the backend. + type: string + consider_expired_after_s: + default: 0 + description: |- + If the number of seconds since the `last_modified` time of the object is greater than this value, the object will not be considered "existing". This allows for external tools to delete objects that have not been uploaded in a long time. If a client receives a NotFound the client should re-upload the object. + + There should be sufficient buffer time between how long the expiration configuration of the external tool is and this value. Keeping items around for a few days is generally a good idea. + + Default: 0. Zero means never consider an object expired. + format: uint32 + minimum: 0.0 + type: integer + disable_http2: + default: false + description: |- + Disable http/2 connections and only use http/1.1. Default client configuration will have http/1.1 and http/2 enabled for connection schemes. Http/2 should be disabled if environments have poor support or performance related to http/2. Safe to keep default unless underlying network environment or S3 API servers specify otherwise. + + Default: false + type: boolean + insecure_allow_http: + default: false + description: |- + Allow unencrypted HTTP connections. Only use this for local testing. + + Default: false + type: boolean + key_prefix: + description: If you wish to prefix the location on s3. If None, no prefix will be used. + nullable: true + type: string + max_retry_buffer_per_request: + description: |- + The maximum buffer size to retain in case of a retryable error during upload. Setting this to zero will disable upload buffering; this means that in the event of a failure during upload, the entire upload will be aborted and the client will likely receive an error. + + Default: 5MB. + format: uint + minimum: 0.0 + nullable: true + type: integer + multipart_max_concurrent_uploads: + description: |- + Maximum number of concurrent UploadPart requests per MultipartUpload. + + Default: 10. + format: uint + minimum: 0.0 + nullable: true + type: integer + region: + default: '' + description: S3 region. Usually us-east-1, us-west-2, af-south-1, exc... + type: string + retry: + default: + delay: 0.0 + jitter: 0.0 + max_retries: 0 + retry_on_errors: null + description: Retry configuration to use when a network request fails. + properties: + delay: + default: 0.0 + description: Delay in seconds for exponential back off. + format: float + type: number + jitter: + default: 0.0 + description: 'Amount of jitter to add as a percentage in decimal form. This will change the formula like: ```haskell random( (2 ^ {attempt_number}) * {delay} * (1 - (jitter / 2)), (2 ^ {attempt_number}) * {delay} * (1 + (jitter / 2)), ) ```' + format: float + type: number + max_retries: + default: 0 + description: Maximum number of retries until retrying stops. Setting this to zero will always attempt 1 time, but not retry. + format: uint + minimum: 0.0 + type: integer + retry_on_errors: + description: A list of error codes to retry on, if this is not set then the default error codes to retry on are used. These default codes are the most likely to be non-permanent. - Unknown - Cancelled - DeadlineExceeded - ResourceExhausted - Aborted - Internal - Unavailable - DataLoss + items: + description: The possible error codes that might occur on an upstream request. + enum: + - Cancelled + - Unknown + - InvalidArgument + - DeadlineExceeded + - NotFound + - AlreadyExists + - PermissionDenied + - ResourceExhausted + - FailedPrecondition + - Aborted + - OutOfRange + - Unimplemented + - Internal + - Unavailable + - DataLoss + - Unauthenticated + type: string + nullable: true + type: array + type: object + type: object + shard: + description: |- + Shards the data to multiple stores. This is useful for cases when you want to distribute the load across multiple stores. The digest hash is used to determine which store to send the data to. + + **Example JSON Config:** ```json "shard": { "stores": [ "memory": { "eviction_policy": { // 10mb. "max_bytes": 10000000 }, "weight": 1 } ] } ``` + properties: + stores: + description: Stores to shard the data to. + items: + description: Configuration for an individual shard of the store. + properties: + store: + description: Store to shard the data to. + type: string + weight: + description: |- + The weight of the store. This is used to determine how much data should be sent to the store. The actual percentage is the sum of all the store's weights divided by the individual store's weight. + + Default: 1 + format: uint32 + minimum: 0.0 + nullable: true + type: integer + required: + - store + type: object + type: array + required: + - stores + type: object + sizePartitioning: + description: |- + Uses the size field of the digest to separate which store to send the data. This is useful for cases when you'd like to put small objects in one store and large objects in another store. This should only be used if the size field is the real size of the content, in other words, don't use on AC (Action Cache) stores. Any store where you can safely use VerifyStore.verify_size = true, this store should be safe to use (ie: CAS stores). + + **Example JSON Config:** ```json "size_partitioning": { "size": 134217728, // 128mib. "lower_store": { "memory": { "eviction_policy": { "max_bytes": "${NATIVELINK_CAS_MEMORY_CONTENT_LIMIT:-100000000}" } } }, "upper_store": { /// This store discards data larger than 128mib. "noop": {} } } ``` + properties: + lower_store: + description: Store to send data when object is < (less than) size. + type: string + size: + description: Size to partition the data on. + format: uint64 + minimum: 0.0 + type: integer + upper_store: + description: Store to send data when object is >= (less than eq) size. + type: string + required: + - lower_store + - size + - upper_store + type: object + verify: + description: |- + Verify store is used to apply verifications to an underlying store implementation. It is strongly encouraged to validate as much data as you can before accepting data from a client, failing to do so may cause the data in the store to be populated with invalid data causing all kinds of problems. + + The suggested configuration is to have the CAS validate the hash and size and the AC validate nothing. + + **Example JSON Config:** ```json "verify": { "memory": { "eviction_policy": { "max_bytes": 500000000 // 500mb. } }, "verify_size": true, "hash_verification_function": "sha256" } ``` + properties: + backend: + description: The underlying store wrap around. All content will first flow through self before forwarding to backend. In the event there is an error detected in self, the connection to the backend will be terminated, and early termination should always cause updates to fail on the backend. + type: string + verify_hash: + default: false + description: |- + If the data should be hashed and verify that the key matches the computed hash. The hash function is automatically determined based request and if not set will use the global default. + + This should be set to None for AC, but hashing function like `sha256` for CAS stores. + type: boolean + verify_size: + default: false + description: |- + If set the store will verify the size of the data before accepting an upload of data. + + This should be set to false for AC, but true for CAS stores. + type: boolean + required: + - backend + type: object + required: + - name + type: object + type: array + workers: + description: Worker configurations used to execute jobs. + items: + oneOf: + - required: + - local + properties: + local: + description: A worker type that executes jobs locally on this machine. + properties: + additional_environment: + additionalProperties: + oneOf: + - required: + - property + - required: + - value + - required: + - timeout_millis + - required: + - side_channel_file + - required: + - action_directory + properties: + action_directory: + description: |- + A "root" directory for the action. This directory can be used to store temporary files that are not needed after the action has completed. This directory will be purged after the action has completed. + + For example: If an action writes temporary data to a path but nativelink should clean up this path after the job has executed, you may create any directory under the path provided in this variable. A common pattern would be to use `entrypoint` to set a shell script that reads this variable, `mkdir $ENV_VAR_NAME/tmp` and `export TMPDIR=$ENV_VAR_NAME/tmp`. Another example might be to bind-mount the `/tmp` path in a container to this path in `entrypoint`. + type: string + property: + description: The name of the platform property in the action to get the value from. + type: string + side_channel_file: + description: |- + A special file path will be provided that can be used to communicate with the parent process about out-of-band information. This file will be read after the command has finished executing. Based on the contents of the file, the behavior of the result may be modified. + + The format of the file contents should be json with the following schema: { // If set the command will be considered a failure. // May be one of the following static strings: // "timeout": Will Consider this task to be a timeout. "failure": "timeout", } + + All fields are optional, file does not need to be created and may be empty. + type: string + timeout_millis: + description: The max amount of time in milliseconds the command is allowed to run (requested by the client). + type: string + value: + description: The raw value to set. + type: string + type: object + description: An optional mapping of environment names to set for the execution as well as those specified in the action itself. If set, will set each key as an environment variable before executing the job with the value of the environment variable being the value of the property of the action being executed of that name or the fixed value. + nullable: true + type: object + cas_fast_slow_store: + description: Underlying CAS store that the worker will use to download CAS artifacts. This store must be a `FastSlowStore`. The `fast` store must be a `FileSystemStore` because it will use hardlinks when building out the files instead of copying the files. The slow store must eventually resolve to the same store the scheduler/client uses to send job requests. + type: string + entrypoint: + default: '' + description: 'The command to execute on every execution request. This will be parsed as a command + arguments (not shell). Example: "run.sh" and a job with command: "sleep 5" will result in a command like: "run.sh sleep 5". Default: {Use the command from the job request}.' + type: string + experimental_precondition_script: + description: An optional script to run before every action is processed on the worker. The value should be the full path to the script to execute and will pause all actions on the worker if it returns an exit code other than 0. If not set, then the worker will never pause and will continue to accept jobs according to the scheduler configuration. This is useful, for example, if the worker should not take any more actions until there is enough resource available on the machine to handle them. + nullable: true + type: string + max_action_timeout: + default: 0 + description: |- + The maximum time an action is allowed to run. If a task requests for a timeout longer than this time limit, the task will be rejected. Value in seconds. + + Default: 1200 (seconds / 20 mins) + format: uint + minimum: 0.0 + type: integer + name: + default: '' + description: 'Name of the worker. This is give a more friendly name to a worker for logging and metric publishing. Default: {Index position in the workers list}' + type: string + platform_properties: + additionalProperties: + oneOf: + - required: + - values + - required: + - query_cmd + properties: + query_cmd: + description: A dynamic configuration. The string will be executed as a command (not sell) and will be split by "\n" (new line character). + type: string + values: + description: 'List of static values. Note: Generally there should only ever be 1 value, but if the platform property key is PropertyType::Priority it may have more than one value.' + items: + type: string + type: array + type: object + description: Properties of this worker. This configuration will be sent to the scheduler and used to tell the scheduler to restrict what should be executed on this worker. + type: object + timeout_handled_externally: + default: false + description: |- + If timeout is handled in `entrypoint` or another wrapper script. If set to true NativeLink will not honor the timeout the action requested and instead will always force kill the action after max_action_timeout has been reached. If this is set to false, the smaller value of the action's timeout and max_action_timeout will be used to which NativeLink will kill the action. + + The real timeout can be received via an environment variable set in: `EnvironmentSource::TimeoutMillis`. + + Example on where this is useful: `entrypoint` launches the action inside a docker container, but the docker container may need to be downloaded. Thus the timer should not start until the docker container has started executing the action. In this case, action will likely be wrapped in another program, like `timeout` and propagate timeouts via `EnvironmentSource::SideChannelFile`. + + Default: false (NativeLink fully handles timeouts) + type: boolean + upload_action_result: + default: + ac_store: null + failure_message_template: '' + historical_results_store: null + success_message_template: '' + upload_ac_results_strategy: success_only + description: Configuration for uploading action results. + properties: + ac_store: + description: 'Underlying AC store that the worker will use to publish execution results into. Objects placed in this store should be reachable from the scheduler/client-cas after they have finished updating. Default: {No uploading is done}' + nullable: true + type: string + failure_message_template: + default: '' + description: |- + Same as `success_message_template` but for failure case. + + An example that is fully compatible with `bb_browser` is: + + Default: "" (no message) + type: string + historical_results_store: + description: |- + Store to upload historical results to. This should be a CAS store if set. + + Default: {CAS store of parent} + nullable: true + type: string + success_message_template: + default: '' + description: |- + Template to use for the `ExecuteResponse.message` property. This message is attached to the response before it is sent to the client. The following special variables are supported: - {digest_function} - Digest function used to calculate the action digest. - {action_digest_hash} - Action digest hash. - {action_digest_size} - Action digest size. - {historical_results_hash} - HistoricalExecuteResponse digest hash. - {historical_results_size} - HistoricalExecuteResponse digest size. + + A common use case of this is to provide a link to the web page that contains more useful information for the user. + + An example that is fully compatible with `bb_browser` is: + + Default: "" (no message) + type: string + upload_ac_results_strategy: + default: success_only + description: |- + In which situations should the results be published to the ac_store, if set to SuccessOnly then only results with an exit code of 0 will be uploaded, if set to Everything all completed results will be uploaded. + + Default: UploadCacheResultsStrategy::SuccessOnly + enum: + - success_only + - never + - everything + - failures_only + type: string + upload_historical_results_strategy: + description: |- + In which situations should the results be published to the historical CAS. The historical CAS is where failures are published. These messages conform to the CAS key-value lookup format and are always a `HistoricalExecuteResponse` serialized message. + + Default: UploadCacheResultsStrategy::FailuresOnly + enum: + - success_only + - never + - everything + - failures_only + nullable: true + type: string + type: object + work_directory: + description: The directory work jobs will be executed from. This directory will be fully managed by the worker service and will be purged on startup. This directory and the directory referenced in local_filesystem_store_ref's stores::FilesystemStore::content_path must be on the same filesystem. Hardlinks will be used when placing files that are accessible to the jobs that are sourced from local_filesystem_store_ref's content_path. + type: string + worker_api_endpoint: + description: Endpoint which the worker will connect to the scheduler's WorkerApiService. + properties: + timeout: + description: 'Timeout in seconds that a request should take. Default: 5 (seconds)' + format: float + nullable: true + type: number + tls_config: + description: The TLS configuration to use to connect to the endpoint. + nullable: true + properties: + ca_file: + description: Path to the certificate authority to use to validate the remote. + type: string + cert_file: + description: Path to the certificate file for client authentication. + nullable: true + type: string + key_file: + description: Path to the private key file for client authentication. + nullable: true + type: string + required: + - ca_file + type: object + uri: + description: URI of the endpoint. + type: string + required: + - uri + type: object + required: + - cas_fast_slow_store + - platform_properties + - work_directory + - worker_api_endpoint + type: object + type: object + nullable: true + type: array + required: + - servers + - stores + type: object + image: + description: The container image to use for the NativeLink Pod + type: string + replicas: + default: 1 + description: Number of replicas + format: int32 + type: integer + runtime: + default: + args: [] + env: {} + working_dir: null + description: 'TODO(aaronmondal): Instead of these values, consider a "deployment" field that imports the K8s Deployment schema. Optional overrides for process management' + properties: + args: + default: [] + description: Arguments to pass to the NativeLink executable + items: + type: string + type: array + env: + additionalProperties: + type: string + default: {} + description: Environment variables to set for the process + type: object + working_dir: + description: Working directory for the process + nullable: true + type: string + type: object + required: + - config + - image + type: object + status: + nullable: true + properties: + active_ports: + additionalProperties: + format: uint16 + minimum: 0.0 + type: integer + description: Port mappings for active services + type: object + error: + description: Any error message if NativeLink failed to start/run + nullable: true + type: string + last_started: + description: Last time NativeLink was started + nullable: true + type: string + running: + description: Whether NativeLink is currently running + type: boolean + required: + - active_ports + - running + type: object + required: + - spec + title: NativeLink + type: object + served: true + storage: true + subresources: + status: {} diff --git a/nativelink-controller/scheduler.yaml b/nativelink-controller/scheduler.yaml new file mode 100644 index 000000000..af061de42 --- /dev/null +++ b/nativelink-controller/scheduler.yaml @@ -0,0 +1,77 @@ +--- +apiVersion: kube.rs/v1alpha1 +kind: NativeLink +metadata: + name: nativelink-sample + namespace: default +spec: + image: localhost:5001/nativelink:c567im4v5y7c25g0xaicmpfapgbjfgii + replicas: 1 + runtime: + args: [] + env: + RUST_LOG: "info" + config: + stores: + - name: GRPC_LOCAL_STORE + grpc: + instance_name: main, + endpoints: + - address: grpc://${CAS_ENDPOINT:-127.0.0.1}:50051 + store_type: cas + - name: GRPC_LOCAL_AC_STORE + grpc: + instance_name: main + endpoints: + - address: grpc://${CAS_ENDPOINT:-127.0.0.1}:50051" + store_type: ac + schedulers: + # TODO(adams): use the right scheduler because reclient doesn't use the + # cached results? + # TODO(adams): max_bytes_per_stream + - name: MAIN_SCHEDULER, + simple: + supported_platform_properties: + cpu_count: priority + memory_kb: priority + network_kbps: priority + disk_read_iops: priority + disk_read_bps: priority + disk_write_iops: priority + disk_write_bps: priority + shm_size: priority + gpu_count: priority + gpu_model: priority + cpu_vendor: priority + cpu_arch: priority + cpu_model: priority + kernel_version: priority + OSFamily: priority + container-image: priority + servers: + - listener: + http: + socket_address: 0.0.0.0:50052 + services: + ac: + main: + ac_store: GRPC_LOCAL_AC_STORE + execution: + main: + cas_store: GRPC_LOCAL_STORE, + scheduler: MAIN_SCHEDULER, + capabilities: + main: + remote_execution: + scheduler: MAIN_SCHEDULER, + - listener: + http: + socket_address: 0.0.0.0:50061 + services: + # Note: This should be served on a different port, because it has + # a different permission set than the other services. + # In other words, this service is a backend api. The ones above + # are a frontend api. + worker_api: + scheduler: MAIN_SCHEDULER, + health: {} diff --git a/nativelink-controller/src/bin/generate-crd.rs b/nativelink-controller/src/bin/generate-crd.rs new file mode 100644 index 000000000..14857f29f --- /dev/null +++ b/nativelink-controller/src/bin/generate-crd.rs @@ -0,0 +1,30 @@ +// Copyright 2024 The NativeLink Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use kube::CustomResourceExt; +use nativelink_controller::controller::NativeLink; + +fn main() -> Result<(), Box> { + let crd = NativeLink::crd(); + let yaml = serde_yml::to_string(&crd)?; + + let output_file = std::env::args() + .nth(1) + .unwrap_or_else(|| "nativelink-crd.yaml".to_string()); + + std::fs::write(&output_file, yaml)?; + println!("Generated CRD schema written to: {output_file}"); + + Ok(()) +} diff --git a/nativelink-controller/src/bin/nativelink-controller.rs b/nativelink-controller/src/bin/nativelink-controller.rs new file mode 100644 index 000000000..0c228fd8e --- /dev/null +++ b/nativelink-controller/src/bin/nativelink-controller.rs @@ -0,0 +1,291 @@ +// Copyright 2024 The NativeLink Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::collections::HashMap; +use std::sync::Arc; +use std::time::Duration; + +use chrono::Utc; +use futures::StreamExt; +use k8s_openapi::api::apps::v1::{Deployment, DeploymentSpec}; +use k8s_openapi::api::core::v1::{ + ConfigMap, ConfigMapVolumeSource, Container, ContainerPort, PodSpec, PodTemplateSpec, Volume, + VolumeMount, +}; +use kube::api::{Api, ObjectMeta, Patch, PatchParams, PostParams, ResourceExt}; +use kube::runtime::controller::Action; +use kube::runtime::{watcher, Controller}; +use kube::{Client, Resource}; +use nativelink_controller::controller::{NativeLink, NativeLinkStatus}; +use tracing::*; + +#[derive(Clone)] +struct Context { + client: Client, +} + +#[derive(Debug, thiserror::Error)] +enum Error { + #[error(transparent)] + KubeError(#[from] kube::Error), + #[error(transparent)] + SerializationError(#[from] serde_json::Error), +} + +/// Read the CRD and create or update the ConfigMap containing the json config +/// which we'll mount into pods. +async fn create_or_update_configmap(nativelink: &NativeLink, client: Client) -> Result<(), Error> { + let namespace = nativelink.namespace().unwrap_or_default(); + let configmaps: Api = Api::namespaced(client, &namespace); + let name = format!("nativelink-config-{}", nativelink.name_any()); + + let config_data = serde_json::to_string_pretty(&nativelink.spec.config)?; + let data = std::collections::BTreeMap::from([("nativelink.json".to_string(), config_data)]); + + let configmap = ConfigMap { + metadata: ObjectMeta { + name: Some(name.clone()), + namespace: Some(namespace.clone()), + owner_references: Some(vec![nativelink.controller_owner_ref(&()).unwrap()]), + ..Default::default() + }, + data: Some(data), + ..Default::default() + }; + + match configmaps.get(&name).await { + Ok(_) => { + configmaps + .replace(&name, &PostParams::default(), &configmap) + .await?; + } + Err(kube::Error::Api(api_err)) if api_err.code == 404 => { + configmaps + .create(&PostParams::default(), &configmap) + .await?; + } + Err(e) => return Err(Error::KubeError(e)), + } + + Ok(()) +} + +/// Read the CRD and create or update the Deployment. +async fn create_or_update_deployment(nativelink: &NativeLink, client: Client) -> Result<(), Error> { + let namespace = nativelink.namespace().unwrap_or_default(); + let deployments: Api = Api::namespaced(client, &namespace); + let name = format!("nativelink-deployment-{}", nativelink.name_any()); + let labels = std::collections::BTreeMap::from([("app".to_string(), name.clone())]); + + let configmap_name = format!("nativelink-config-{}", nativelink.name_any()); + + let ports = nativelink + .spec + .config + .servers + .iter() + .filter_map(|s| { + if let nativelink_config::cas_server::ListenerConfig::http(listener) = &s.listener { + listener + .socket_address + .split(':') + .last() + .and_then(|p| p.parse().ok()) + .map(|port| ContainerPort { + container_port: port, + protocol: Some("TCP".to_string()), + ..Default::default() + }) + } else { + None + } + }) + .collect(); + + let config_file_path = "/etc/nativelink/nativelink.json".to_string(); + + let mut args = vec![config_file_path]; + args.extend(nativelink.spec.runtime.args.clone()); + + let container = Container { + name: "nativelink".to_string(), + image: Some(nativelink.spec.image.clone()), + args: Some(args), + env: Some( + nativelink + .spec + .runtime + .env + .iter() + .map(|(k, v)| k8s_openapi::api::core::v1::EnvVar { + name: k.clone(), + value: Some(v.clone()), + ..Default::default() + }) + .collect(), + ), + volume_mounts: Some(vec![VolumeMount { + name: "nativelink-config".to_string(), + mount_path: "/etc/nativelink/".to_string(), + ..Default::default() + }]), + working_dir: nativelink.spec.runtime.working_dir.clone(), + ports: Some(ports), + ..Default::default() + }; + + let pod_spec = PodSpec { + containers: vec![container], + volumes: Some(vec![Volume { + name: "nativelink-config".to_string(), + config_map: Some(ConfigMapVolumeSource { + name: configmap_name.clone(), + ..Default::default() + }), + ..Default::default() + }]), + ..Default::default() + }; + + let deployment_spec = DeploymentSpec { + replicas: Some(nativelink.spec.replicas), + selector: k8s_openapi::apimachinery::pkg::apis::meta::v1::LabelSelector { + match_labels: Some(labels.clone()), + ..Default::default() + }, + template: PodTemplateSpec { + metadata: Some(ObjectMeta { + labels: Some(labels.clone()), + ..Default::default() + }), + spec: Some(pod_spec), + }, + ..Default::default() + }; + + let deployment = Deployment { + metadata: ObjectMeta { + name: Some(name.clone()), + namespace: Some(namespace.clone()), + owner_references: Some(vec![nativelink.controller_owner_ref(&()).unwrap()]), + ..Default::default() + }, + spec: Some(deployment_spec), + ..Default::default() + }; + + match deployments.get(&name).await { + Ok(_) => { + deployments + .replace(&name, &PostParams::default(), &deployment) + .await?; + } + Err(kube::Error::Api(api_err)) if api_err.code == 404 => { + deployments + .create(&PostParams::default(), &deployment) + .await?; + } + Err(e) => return Err(Error::KubeError(e)), + } + + Ok(()) +} + +/// Refresh the deployment status. +async fn update_status(nativelink: &NativeLink, client: Client) -> Result<(), Error> { + let namespace = nativelink.namespace().unwrap_or_default(); + let deployments: Api = Api::namespaced(client.clone(), &namespace); + let nativelinks: Api = Api::namespaced(client, &namespace); + let name = format!("nativelink-deployment-{}", nativelink.name_any()); + + let deployment = deployments.get(&name).await?; + + let available_replicas = deployment + .status + .as_ref() + .and_then(|s| s.available_replicas) + .unwrap_or(0); + + let status = NativeLinkStatus { + running: available_replicas > 0, + last_started: Some(Utc::now().to_rfc3339()), + error: None, + active_ports: HashMap::new(), + }; + + let status_patch = serde_json::json!({ + "status": status + }); + + nativelinks + .patch_status( + &nativelink.name_any(), + &PatchParams::default(), + &Patch::Merge(&status_patch), + ) + .await?; + + Ok(()) +} + +/// Run a reconciliation loop. +/// TODO(aaronmondal): Consider a non-blocking implementation. +async fn reconcile(nativelink: Arc, ctx: Arc) -> Result { + let client = ctx.client.clone(); + + create_or_update_configmap(&nativelink, client.clone()).await?; + + create_or_update_deployment(&nativelink, client.clone()).await?; + + update_status(&nativelink, client.clone()).await?; + + Ok(Action::requeue(Duration::from_secs(300))) +} + +/// Error policy to requeue on error. +fn requeue_error_policy(_object: Arc, error: &Error, _ctx: Arc) -> Action { + warn!("Reconciliation error: {:?}", error); + Action::requeue(Duration::from_secs(10)) +} + +#[tokio::main] +async fn main() -> anyhow::Result<()> { + tracing_subscriber::fmt::init(); + let client = Client::try_default().await?; + + let nativelinks: Api = Api::all(client.clone()); + if let Ok(list) = nativelinks.list(&Default::default()).await { + for item in list { + info!("Raw item: {}", serde_json::to_string_pretty(&item)?); + } + } + let context = Context { + client: client.clone(), + }; + + info!("Starting NativeLink controller"); + info!("ASDF"); + Controller::new(nativelinks, watcher::Config::default()) + .shutdown_on_signal() + .run(reconcile, requeue_error_policy, Arc::new(context)) + .for_each(|res| async move { + match res { + Ok(_) => debug!("Reconciliation successful"), + Err(err) => warn!("Reconciliation error: {:?}", err), + } + }) + .await; + + Ok(()) +} diff --git a/nativelink-controller/src/controller.rs b/nativelink-controller/src/controller.rs new file mode 100644 index 000000000..4bc3781dd --- /dev/null +++ b/nativelink-controller/src/controller.rs @@ -0,0 +1,83 @@ +// Copyright 2024 The NativeLink Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use nativelink_config::cas_server::CasConfig; +use std::collections::HashMap; + +use kube::CustomResource; +use schemars::JsonSchema; +use serde::{Deserialize, Serialize}; + +#[derive(Deserialize, Serialize, Clone, Debug, Default, JsonSchema)] +pub struct NativeLinkStatus { + /// Whether NativeLink is currently running + pub running: bool, + + /// Last time NativeLink was started + pub last_started: Option, + + /// Any error message if NativeLink failed to start/run + pub error: Option, + + /// Port mappings for active services + pub active_ports: HashMap, +} + +#[derive(CustomResource, Clone, Debug, Serialize, Deserialize, JsonSchema)] +#[kube( + group = "kube.rs", + version = "v1alpha1", + kind = "NativeLink", + namespaced, + status = "NativeLinkStatus", + printcolumn = r#"{"name":"Running", "type":"boolean", "description":"Whether the process is running", "jsonPath":".status.running"}"# +)] +pub struct NativeLinkSpec { + /// The NativeLink server configuration + pub config: CasConfig, + + /// TODO(aaronmondal): Instead of these values, consider a "deployment" + /// field that imports the K8s Deployment schema. + + /// Optional overrides for process management + #[serde(default)] + pub runtime: RuntimeConfig, + + /// The container image to use for the NativeLink Pod + pub image: String, + + /// Number of replicas + #[serde(default = "default_replicas")] + pub replicas: i32, +} + +fn default_replicas() -> i32 { + 1 +} + +// TODO(aaronmondal): Probably unnecessary to map these out. Consider importing +// from the k8s openapi schemas. +#[derive(Clone, Debug, Serialize, Deserialize, JsonSchema, Default)] +pub struct RuntimeConfig { + /// Arguments to pass to the NativeLink executable + #[serde(default)] + pub args: Vec, + + /// Environment variables to set for the process + #[serde(default)] + pub env: HashMap, + + /// Working directory for the process + pub working_dir: Option, +} diff --git a/nativelink-controller/src/lib.rs b/nativelink-controller/src/lib.rs new file mode 100644 index 000000000..89638f8c9 --- /dev/null +++ b/nativelink-controller/src/lib.rs @@ -0,0 +1,15 @@ +// Copyright 2024 The NativeLink Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +pub mod controller; diff --git a/nativelink-controller/worker.yaml b/nativelink-controller/worker.yaml new file mode 100644 index 000000000..8295687aa --- /dev/null +++ b/nativelink-controller/worker.yaml @@ -0,0 +1,75 @@ +--- +apiVersion: kube.rs/v1alpha1 +kind: NativeLink +metadata: + name: nativelink-worker + namespace: default +spec: + image: localhost:5001/nativelink:c567im4v5y7c25g0xaicmpfapgbjfgii + replicas: 1 + runtime: + args: [] + env: + RUST_LOG: "info" + config: + stores: + - name: GRPC_LOCAL_STORE + grpc: + instance_name: main + endpoints: + - address: grpc://${CAS_ENDPOINT:-127.0.0.1}:50051 + store_type: cas + + - name: GRPC_LOCAL_AC_STORE + grpc: + instance_name: main + endpoints: + - address: grpc://${CAS_ENDPOINT:-127.0.0.1}:50051 + store_type: ac + + - name: WORKER_FAST_SLOW_STORE + fastSlow: + fast: fs-worker + slow: ref-worker + + - name: fs-worker + filesystem: + content_path: ~/.cache/nativelink/data-worker-test/content_path-cas + temp_path: ~/.cache/nativelink/data-worker-test/tmp_path-cas + eviction_policy: + # 10gb. + max_bytes: 10000000000 + + - name: ref-worker + ref: + name: GRPC_LOCAL_STORE + + workers: + - local: + worker_api_endpoint: + uri: grpc://${SCHEDULER_ENDPOINT:-127.0.0.1}:50061 + cas_fast_slow_store: WORKER_FAST_SLOW_STORE + upload_action_result: + ac_store: GRPC_LOCAL_AC_STORE + work_directory: ~/.cache/nativelink/work + platform_properties: + cpu_count: + query_cmd: nproc + memory_kb: + values: ["500000"] + network_kbps: + values: ["100000"] + cpu_arch: + values: ["x86_64"] + OSFamily: + values: ["Linux"] + container-image: + values: + # WARNING: Treat the string below as nothing more than a raw string + # that is matched by the scheduler against the value specified in + # the `exec_properties` of the corresponding platform at + # `local-remote-execution/generated-cc/config/BUILD`. + - "${NATIVELINK_WORKER_PLATFORM:-undefined_platform}" + servers: [] + global: + max_open_files: 524288 diff --git a/nativelink-scheduler/src/default_scheduler_factory.rs b/nativelink-scheduler/src/default_scheduler_factory.rs index daa110469..358e07dfe 100644 --- a/nativelink-scheduler/src/default_scheduler_factory.rs +++ b/nativelink-scheduler/src/default_scheduler_factory.rs @@ -15,7 +15,10 @@ use std::sync::Arc; use std::time::SystemTime; -use nativelink_config::schedulers::{ExperimentalSimpleSchedulerBackend, SchedulerConfig}; +use nativelink_config::schedulers::{ + ExperimentalSimpleSchedulerBackend, MemoryBackend, SchedulerConfig, SchedulerRef, + SchedulerSpec, SimpleSpec, +}; use nativelink_config::stores::EvictionPolicy; use nativelink_error::{make_input_err, Error, ResultExt}; use nativelink_store::redis_store::RedisStore; @@ -42,27 +45,29 @@ pub type SchedulerFactoryResults = ( ); pub fn scheduler_factory( - scheduler_type_cfg: &SchedulerConfig, + scheduler_type_cfg: &SchedulerRef, store_manager: &StoreManager, ) -> Result { inner_scheduler_factory(scheduler_type_cfg, store_manager) } fn inner_scheduler_factory( - scheduler_type_cfg: &SchedulerConfig, + scheduler_config: &SchedulerRef, store_manager: &StoreManager, ) -> Result { - let scheduler: SchedulerFactoryResults = match scheduler_type_cfg { - SchedulerConfig::simple(config) => { - simple_scheduler_factory(config, store_manager, SystemTime::now)? + let scheduler_cfg: SchedulerConfig = scheduler_config.clone().into(); + + let scheduler: SchedulerFactoryResults = match &scheduler_cfg.spec { + SchedulerSpec::Simple(spec) => { + simple_scheduler_factory(spec, store_manager, SystemTime::now)? } - SchedulerConfig::grpc(config) => (Some(Arc::new(GrpcScheduler::new(config)?)), None), - SchedulerConfig::cache_lookup(config) => { + SchedulerSpec::Grpc(spec) => (Some(Arc::new(GrpcScheduler::new(spec)?)), None), + SchedulerSpec::CacheLookup(spec) => { let ac_store = store_manager - .get_store(&config.ac_store) - .err_tip(|| format!("'ac_store': '{}' does not exist", config.ac_store))?; + .get_store(&spec.ac_store) + .err_tip(|| format!("'ac_store': '{}' does not exist", spec.ac_store))?; let (action_scheduler, worker_scheduler) = - inner_scheduler_factory(&config.scheduler, store_manager) + inner_scheduler_factory(&spec.scheduler, store_manager) .err_tip(|| "In nested CacheLookupScheduler construction")?; let cache_lookup_scheduler = Arc::new(CacheLookupScheduler::new( ac_store, @@ -70,12 +75,12 @@ fn inner_scheduler_factory( )?); (Some(cache_lookup_scheduler), worker_scheduler) } - SchedulerConfig::property_modifier(config) => { + SchedulerSpec::PropertyModifier(spec) => { let (action_scheduler, worker_scheduler) = - inner_scheduler_factory(&config.scheduler, store_manager) + inner_scheduler_factory(&spec.scheduler, store_manager) .err_tip(|| "In nested PropertyModifierScheduler construction")?; let property_modifier_scheduler = Arc::new(PropertyModifierScheduler::new( - config, + spec, action_scheduler.err_tip(|| "Nested scheduler is not an action scheduler")?, )); (Some(property_modifier_scheduler), worker_scheduler) @@ -86,24 +91,25 @@ fn inner_scheduler_factory( } fn simple_scheduler_factory( - config: &nativelink_config::schedulers::SimpleScheduler, + spec: &SimpleSpec, store_manager: &StoreManager, now_fn: fn() -> SystemTime, ) -> Result { - match config + match spec .experimental_backend .as_ref() - .unwrap_or(&ExperimentalSimpleSchedulerBackend::memory) - { - ExperimentalSimpleSchedulerBackend::memory => { + .unwrap_or(&ExperimentalSimpleSchedulerBackend::Memory( + MemoryBackend {}, + )) { + ExperimentalSimpleSchedulerBackend::Memory(_) => { let task_change_notify = Arc::new(Notify::new()); let awaited_action_db = memory_awaited_action_db_factory( - config.retain_completed_for_s, + spec.retain_completed_for_s, &task_change_notify.clone(), SystemTime::now, ); let (action_scheduler, worker_scheduler) = - SimpleScheduler::new(config, awaited_action_db, task_change_notify); + SimpleScheduler::new(spec, awaited_action_db, task_change_notify); Ok((Some(action_scheduler), Some(worker_scheduler))) } ExperimentalSimpleSchedulerBackend::redis(redis_config) => { @@ -133,7 +139,7 @@ fn simple_scheduler_factory( ) .err_tip(|| "In state_manager_factory::redis_state_manager")?; let (action_scheduler, worker_scheduler) = - SimpleScheduler::new(config, awaited_action_db, task_change_notify); + SimpleScheduler::new(spec, awaited_action_db, task_change_notify); Ok((Some(action_scheduler), Some(worker_scheduler))) } } diff --git a/nativelink-scheduler/src/grpc_scheduler.rs b/nativelink-scheduler/src/grpc_scheduler.rs index 38b0e71b1..1cafb88df 100644 --- a/nativelink-scheduler/src/grpc_scheduler.rs +++ b/nativelink-scheduler/src/grpc_scheduler.rs @@ -91,7 +91,7 @@ pub struct GrpcScheduler { } impl GrpcScheduler { - pub fn new(config: &nativelink_config::schedulers::GrpcScheduler) -> Result { + pub fn new(config: &nativelink_config::schedulers::GrpcSpec) -> Result { let jitter_amt = config.retry.jitter; Self::new_with_jitter( config, @@ -107,7 +107,7 @@ impl GrpcScheduler { } pub fn new_with_jitter( - config: &nativelink_config::schedulers::GrpcScheduler, + config: &nativelink_config::schedulers::GrpcSpec, jitter_fn: Box Duration + Send + Sync>, ) -> Result { let endpoint = tls_utils::endpoint(&config.endpoint)?; diff --git a/nativelink-scheduler/src/property_modifier_scheduler.rs b/nativelink-scheduler/src/property_modifier_scheduler.rs index 49044cb58..3699bb516 100644 --- a/nativelink-scheduler/src/property_modifier_scheduler.rs +++ b/nativelink-scheduler/src/property_modifier_scheduler.rs @@ -37,7 +37,7 @@ pub struct PropertyModifierScheduler { impl PropertyModifierScheduler { pub fn new( - config: &nativelink_config::schedulers::PropertyModifierScheduler, + config: &nativelink_config::schedulers::PropertyModifierSpec, scheduler: Arc, ) -> Self { Self { diff --git a/nativelink-scheduler/src/simple_scheduler.rs b/nativelink-scheduler/src/simple_scheduler.rs index ee4ce0491..32e2ced2f 100644 --- a/nativelink-scheduler/src/simple_scheduler.rs +++ b/nativelink-scheduler/src/simple_scheduler.rs @@ -281,7 +281,7 @@ impl SimpleScheduler { impl SimpleScheduler { pub fn new( - scheduler_cfg: &nativelink_config::schedulers::SimpleScheduler, + scheduler_cfg: &nativelink_config::schedulers::SimpleSpec, awaited_action_db: A, task_change_notify: Arc, ) -> (Arc, Arc) { @@ -311,7 +311,7 @@ impl SimpleScheduler { I: InstantWrapper, NowFn: Fn() -> I + Clone + Send + Unpin + Sync + 'static, >( - scheduler_cfg: &nativelink_config::schedulers::SimpleScheduler, + scheduler_cfg: &nativelink_config::schedulers::SimpleSpec, awaited_action_db: A, on_matching_engine_run: F, task_change_notify: Arc, diff --git a/nativelink-scheduler/tests/cache_lookup_scheduler_test.rs b/nativelink-scheduler/tests/cache_lookup_scheduler_test.rs index bdc326a95..c456abb73 100644 --- a/nativelink-scheduler/tests/cache_lookup_scheduler_test.rs +++ b/nativelink-scheduler/tests/cache_lookup_scheduler_test.rs @@ -49,7 +49,7 @@ struct TestContext { fn make_cache_scheduler() -> Result { let mock_scheduler = Arc::new(MockActionScheduler::new()); let ac_store = Store::new(MemoryStore::new( - &nativelink_config::stores::MemoryStore::default(), + &nativelink_config::stores::MemorySpec::default(), )); let cache_scheduler = CacheLookupScheduler::new(ac_store.clone(), mock_scheduler.clone())?; Ok(TestContext { diff --git a/nativelink-scheduler/tests/property_modifier_scheduler_test.rs b/nativelink-scheduler/tests/property_modifier_scheduler_test.rs index 07c315cee..9cb88d696 100644 --- a/nativelink-scheduler/tests/property_modifier_scheduler_test.rs +++ b/nativelink-scheduler/tests/property_modifier_scheduler_test.rs @@ -22,7 +22,9 @@ mod utils { } use futures::{join, StreamExt}; -use nativelink_config::schedulers::{PlatformPropertyAddition, PropertyModification}; +use nativelink_config::schedulers::{ + PlatformPropertyAddition, PropertyModification, SchedulerRef, SimpleSpec, +}; use nativelink_error::Error; use nativelink_macro::nativelink_test; use nativelink_scheduler::property_modifier_scheduler::PropertyModifierScheduler; @@ -42,11 +44,9 @@ struct TestContext { fn make_modifier_scheduler(modifications: Vec) -> TestContext { let mock_scheduler = Arc::new(MockActionScheduler::new()); - let config = nativelink_config::schedulers::PropertyModifierScheduler { + let config = nativelink_config::schedulers::PropertyModifierSpec { modifications, - scheduler: Box::new(nativelink_config::schedulers::SchedulerConfig::simple( - nativelink_config::schedulers::SimpleScheduler::default(), - )), + scheduler: SchedulerRef::new("simple", SimpleSpec::default()), }; let modifier_scheduler = PropertyModifierScheduler::new(&config, mock_scheduler.clone()); TestContext { diff --git a/nativelink-scheduler/tests/simple_scheduler_test.rs b/nativelink-scheduler/tests/simple_scheduler_test.rs index 46620dbfa..5edcc28a9 100644 --- a/nativelink-scheduler/tests/simple_scheduler_test.rs +++ b/nativelink-scheduler/tests/simple_scheduler_test.rs @@ -160,7 +160,7 @@ async fn basic_add_action_with_one_worker_test() -> Result<(), Error> { let task_change_notify = Arc::new(Notify::new()); let (scheduler, _worker_scheduler) = SimpleScheduler::new_with_callback( - &nativelink_config::schedulers::SimpleScheduler::default(), + &nativelink_config::schedulers::SimpleSpec::default(), memory_awaited_action_db_factory( 0, &task_change_notify.clone(), @@ -228,7 +228,7 @@ async fn client_does_not_receive_update_timeout() -> Result<(), Error> { let task_change_notify = Arc::new(Notify::new()); let (scheduler, _worker_scheduler) = SimpleScheduler::new_with_callback( - &nativelink_config::schedulers::SimpleScheduler { + &nativelink_config::schedulers::SimpleSpec { worker_timeout_s: WORKER_TIMEOUT_S, ..Default::default() }, @@ -291,7 +291,7 @@ async fn find_executing_action() -> Result<(), Error> { let task_change_notify = Arc::new(Notify::new()); let (scheduler, _worker_scheduler) = SimpleScheduler::new_with_callback( - &nativelink_config::schedulers::SimpleScheduler::default(), + &nativelink_config::schedulers::SimpleSpec::default(), memory_awaited_action_db_factory( 0, &task_change_notify.clone(), @@ -368,7 +368,7 @@ async fn remove_worker_reschedules_multiple_running_job_test() -> Result<(), Err let worker_id2: WorkerId = WorkerId(Uuid::new_v4()); let task_change_notify = Arc::new(Notify::new()); let (scheduler, _worker_scheduler) = SimpleScheduler::new_with_callback( - &nativelink_config::schedulers::SimpleScheduler { + &nativelink_config::schedulers::SimpleSpec { worker_timeout_s: WORKER_TIMEOUT_S, ..Default::default() }, @@ -546,7 +546,7 @@ async fn set_drain_worker_pauses_and_resumes_worker_test() -> Result<(), Error> let task_change_notify = Arc::new(Notify::new()); let (scheduler, _worker_scheduler) = SimpleScheduler::new_with_callback( - &nativelink_config::schedulers::SimpleScheduler::default(), + &nativelink_config::schedulers::SimpleSpec::default(), memory_awaited_action_db_factory( 0, &task_change_notify.clone(), @@ -630,7 +630,7 @@ async fn worker_should_not_queue_if_properties_dont_match_test() -> Result<(), E let task_change_notify = Arc::new(Notify::new()); let (scheduler, _worker_scheduler) = SimpleScheduler::new_with_callback( - &nativelink_config::schedulers::SimpleScheduler { + &nativelink_config::schedulers::SimpleSpec { supported_platform_properties: Some(prop_defs), ..Default::default() }, @@ -724,7 +724,7 @@ async fn cacheable_items_join_same_action_queued_test() -> Result<(), Error> { let task_change_notify = Arc::new(Notify::new()); let (scheduler, _worker_scheduler) = SimpleScheduler::new_with_callback( - &nativelink_config::schedulers::SimpleScheduler::default(), + &nativelink_config::schedulers::SimpleSpec::default(), memory_awaited_action_db_factory( 0, &task_change_notify.clone(), @@ -825,7 +825,7 @@ async fn cacheable_items_join_same_action_queued_test() -> Result<(), Error> { async fn worker_disconnects_does_not_schedule_for_execution_test() -> Result<(), Error> { let task_change_notify = Arc::new(Notify::new()); let (scheduler, _worker_scheduler) = SimpleScheduler::new_with_callback( - &nativelink_config::schedulers::SimpleScheduler::default(), + &nativelink_config::schedulers::SimpleSpec::default(), memory_awaited_action_db_factory( 0, &task_change_notify.clone(), @@ -985,7 +985,7 @@ async fn matching_engine_fails_sends_abort() -> Result<(), Error> { let (senders, awaited_action) = MockAwaitedAction::new(); let (scheduler, _worker_scheduler) = SimpleScheduler::new_with_callback( - &nativelink_config::schedulers::SimpleScheduler::default(), + &nativelink_config::schedulers::SimpleSpec::default(), awaited_action, || async move {}, task_change_notify, @@ -1030,7 +1030,7 @@ async fn matching_engine_fails_sends_abort() -> Result<(), Error> { let (senders, awaited_action) = MockAwaitedAction::new(); let (scheduler, _worker_scheduler) = SimpleScheduler::new_with_callback( - &nativelink_config::schedulers::SimpleScheduler::default(), + &nativelink_config::schedulers::SimpleSpec::default(), awaited_action, || async move {}, task_change_notify, @@ -1081,7 +1081,7 @@ async fn worker_timesout_reschedules_running_job_test() -> Result<(), Error> { let worker_id2: WorkerId = WorkerId(Uuid::new_v4()); let task_change_notify = Arc::new(Notify::new()); let (scheduler, _worker_scheduler) = SimpleScheduler::new_with_callback( - &nativelink_config::schedulers::SimpleScheduler { + &nativelink_config::schedulers::SimpleSpec { worker_timeout_s: WORKER_TIMEOUT_S, ..Default::default() }, @@ -1206,7 +1206,7 @@ async fn update_action_sends_completed_result_to_client_test() -> Result<(), Err let task_change_notify = Arc::new(Notify::new()); let (scheduler, _worker_scheduler) = SimpleScheduler::new_with_callback( - &nativelink_config::schedulers::SimpleScheduler::default(), + &nativelink_config::schedulers::SimpleSpec::default(), memory_awaited_action_db_factory( 0, &task_change_notify.clone(), @@ -1307,7 +1307,7 @@ async fn update_action_sends_completed_result_after_disconnect() -> Result<(), E let task_change_notify = Arc::new(Notify::new()); let (scheduler, _worker_scheduler) = SimpleScheduler::new_with_callback( - &nativelink_config::schedulers::SimpleScheduler::default(), + &nativelink_config::schedulers::SimpleSpec::default(), memory_awaited_action_db_factory( 0, &task_change_notify.clone(), @@ -1425,7 +1425,7 @@ async fn update_action_with_wrong_worker_id_errors_test() -> Result<(), Error> { let task_change_notify = Arc::new(Notify::new()); let (scheduler, _worker_scheduler) = SimpleScheduler::new_with_callback( - &nativelink_config::schedulers::SimpleScheduler::default(), + &nativelink_config::schedulers::SimpleSpec::default(), memory_awaited_action_db_factory( 0, &task_change_notify.clone(), @@ -1523,7 +1523,7 @@ async fn does_not_crash_if_operation_joined_then_relaunched() -> Result<(), Erro let task_change_notify = Arc::new(Notify::new()); let (scheduler, _worker_scheduler) = SimpleScheduler::new_with_callback( - &nativelink_config::schedulers::SimpleScheduler::default(), + &nativelink_config::schedulers::SimpleSpec::default(), memory_awaited_action_db_factory( 0, &task_change_notify.clone(), @@ -1663,7 +1663,7 @@ async fn run_two_jobs_on_same_worker_with_platform_properties_restrictions() -> supported_props.insert("prop1".to_string(), PropertyType::minimum); let task_change_notify = Arc::new(Notify::new()); let (scheduler, _worker_scheduler) = SimpleScheduler::new_with_callback( - &nativelink_config::schedulers::SimpleScheduler { + &nativelink_config::schedulers::SimpleSpec { supported_platform_properties: Some(supported_props), ..Default::default() }, @@ -1825,7 +1825,7 @@ async fn run_jobs_in_the_order_they_were_queued() -> Result<(), Error> { supported_props.insert("prop1".to_string(), PropertyType::minimum); let task_change_notify = Arc::new(Notify::new()); let (scheduler, _worker_scheduler) = SimpleScheduler::new_with_callback( - &nativelink_config::schedulers::SimpleScheduler { + &nativelink_config::schedulers::SimpleSpec { supported_platform_properties: Some(supported_props), ..Default::default() }, @@ -1892,7 +1892,7 @@ async fn worker_retries_on_internal_error_and_fails_test() -> Result<(), Error> let task_change_notify = Arc::new(Notify::new()); let (scheduler, _worker_scheduler) = SimpleScheduler::new_with_callback( - &nativelink_config::schedulers::SimpleScheduler { + &nativelink_config::schedulers::SimpleSpec { max_job_retries: 1, ..Default::default() }, @@ -2044,7 +2044,7 @@ async fn ensure_scheduler_drops_inner_spawn() -> Result<(), Error> { // DropChecker. let task_change_notify = Arc::new(Notify::new()); let (scheduler, _worker_scheduler) = SimpleScheduler::new_with_callback( - &nativelink_config::schedulers::SimpleScheduler::default(), + &nativelink_config::schedulers::SimpleSpec::default(), memory_awaited_action_db_factory( 0, &task_change_notify.clone(), @@ -2077,7 +2077,7 @@ async fn ensure_task_or_worker_change_notification_received_test() -> Result<(), let task_change_notify = Arc::new(Notify::new()); let (scheduler, _worker_scheduler) = SimpleScheduler::new_with_callback( - &nativelink_config::schedulers::SimpleScheduler::default(), + &nativelink_config::schedulers::SimpleSpec::default(), memory_awaited_action_db_factory( 0, &task_change_notify.clone(), @@ -2149,7 +2149,7 @@ async fn ensure_task_or_worker_change_notification_received_test() -> Result<(), async fn client_reconnect_keeps_action_alive() -> Result<(), Error> { let task_change_notify = Arc::new(Notify::new()); let (scheduler, _worker_scheduler) = SimpleScheduler::new_with_callback( - &nativelink_config::schedulers::SimpleScheduler { + &nativelink_config::schedulers::SimpleSpec { worker_timeout_s: WORKER_TIMEOUT_S, ..Default::default() }, diff --git a/nativelink-service/tests/ac_server_test.rs b/nativelink-service/tests/ac_server_test.rs index 50dcada89..b71d8626c 100644 --- a/nativelink-service/tests/ac_server_test.rs +++ b/nativelink-service/tests/ac_server_test.rs @@ -17,6 +17,7 @@ use std::sync::Arc; use bytes::BytesMut; use maplit::hashmap; +use nativelink_config::stores::{MemorySpec, StoreRef}; use nativelink_error::Error; use nativelink_macro::nativelink_test; use nativelink_proto::build::bazel::remote::execution::v2::action_cache_server::ActionCache; @@ -55,9 +56,7 @@ async fn make_store_manager() -> Result, Error> { store_manager.add_store( "main_cas", store_factory( - &nativelink_config::stores::StoreConfig::memory( - nativelink_config::stores::MemoryStore::default(), - ), + &StoreRef::new("main_cas", MemorySpec::default()), &store_manager, None, ) @@ -66,9 +65,7 @@ async fn make_store_manager() -> Result, Error> { store_manager.add_store( "main_ac", store_factory( - &nativelink_config::stores::StoreConfig::memory( - nativelink_config::stores::MemoryStore::default(), - ), + &StoreRef::new("main_ac", MemorySpec::default()), &store_manager, None, ) diff --git a/nativelink-service/tests/bep_server_test.rs b/nativelink-service/tests/bep_server_test.rs index 4beefaa71..f9018eef5 100644 --- a/nativelink-service/tests/bep_server_test.rs +++ b/nativelink-service/tests/bep_server_test.rs @@ -18,6 +18,7 @@ use std::sync::Arc; use futures::StreamExt; use hyper::body::Frame; use nativelink_config::cas_server::BepConfig; +use nativelink_config::stores::{MemorySpec, StoreRef}; use nativelink_error::{Error, ResultExt}; use nativelink_macro::nativelink_test; use nativelink_proto::google::devtools::build::v1::build_event::console_output::Output; @@ -53,9 +54,7 @@ async fn make_store_manager() -> Result, Error> { store_manager.add_store( BEP_STORE_NAME, store_factory( - &nativelink_config::stores::StoreConfig::memory( - nativelink_config::stores::MemoryStore::default(), - ), + &StoreRef::new("memory", MemorySpec::default()), &store_manager, None, ) diff --git a/nativelink-service/tests/bytestream_server_test.rs b/nativelink-service/tests/bytestream_server_test.rs index aa8d37698..d5e760275 100644 --- a/nativelink-service/tests/bytestream_server_test.rs +++ b/nativelink-service/tests/bytestream_server_test.rs @@ -25,6 +25,7 @@ use hyper_util::server::conn::auto; use hyper_util::service::TowerToHyperService; use maplit::hashmap; use nativelink_config::cas_server::ByteStreamConfig; +use nativelink_config::stores::{MemorySpec, StoreRef}; use nativelink_error::{make_err, Code, Error, ResultExt}; use nativelink_macro::nativelink_test; use nativelink_proto::google::bytestream::byte_stream_client::ByteStreamClient; @@ -60,9 +61,7 @@ async fn make_store_manager() -> Result, Error> { store_manager.add_store( "main_cas", store_factory( - &nativelink_config::stores::StoreConfig::memory( - nativelink_config::stores::MemoryStore::default(), - ), + &StoreRef::new("main", MemorySpec::default()), &store_manager, None, ) diff --git a/nativelink-service/tests/cas_server_test.rs b/nativelink-service/tests/cas_server_test.rs index d97728c3f..f890f5b3d 100644 --- a/nativelink-service/tests/cas_server_test.rs +++ b/nativelink-service/tests/cas_server_test.rs @@ -17,6 +17,7 @@ use std::sync::Arc; use futures::StreamExt; use maplit::hashmap; +use nativelink_config::stores::{MemorySpec, StoreRef}; use nativelink_error::Error; use nativelink_macro::nativelink_test; use nativelink_proto::build::bazel::remote::execution::v2::content_addressable_storage_server::ContentAddressableStorage; @@ -49,9 +50,7 @@ async fn make_store_manager() -> Result, Error> { store_manager.add_store( "main_cas", store_factory( - &nativelink_config::stores::StoreConfig::memory( - nativelink_config::stores::MemoryStore::default(), - ), + &StoreRef::new("main", MemorySpec::default()), &store_manager, None, ) diff --git a/nativelink-store/src/compression_store.rs b/nativelink-store/src/compression_store.rs index dd50ce7ea..a52886518 100644 --- a/nativelink-store/src/compression_store.rs +++ b/nativelink-store/src/compression_store.rs @@ -219,7 +219,7 @@ pub struct CompressionStore { impl CompressionStore { pub fn new( - compression_config: &nativelink_config::stores::CompressionStore, + compression_config: &nativelink_config::stores::CompressionSpec, inner_store: Store, ) -> Result, Error> { let lz4_config = match compression_config.compression_algorithm { diff --git a/nativelink-store/src/dedup_store.rs b/nativelink-store/src/dedup_store.rs index fee1e021f..d37861acf 100644 --- a/nativelink-store/src/dedup_store.rs +++ b/nativelink-store/src/dedup_store.rs @@ -60,7 +60,7 @@ pub struct DedupStore { impl DedupStore { pub fn new( - config: &nativelink_config::stores::DedupStore, + config: &nativelink_config::stores::DedupSpec, index_store: Store, content_store: Store, ) -> Result, Error> { diff --git a/nativelink-store/src/default_store_factory.rs b/nativelink-store/src/default_store_factory.rs index b72b8e1a4..1efe7352f 100644 --- a/nativelink-store/src/default_store_factory.rs +++ b/nativelink-store/src/default_store_factory.rs @@ -18,7 +18,7 @@ use std::time::SystemTime; use futures::stream::FuturesOrdered; use futures::{Future, TryStreamExt}; -use nativelink_config::stores::StoreConfig; +use nativelink_config::stores::{StoreConfig, StoreRef, StoreSpec}; use nativelink_error::Error; use nativelink_util::health_utils::HealthRegistryBuilder; use nativelink_util::store_trait::{Store, StoreDriver}; @@ -43,61 +43,61 @@ use crate::verify_store::VerifyStore; type FutureMaybeStore<'a> = Box> + 'a>; pub fn store_factory<'a>( - backend: &'a StoreConfig, + config: &'a StoreRef, store_manager: &'a Arc, maybe_health_registry_builder: Option<&'a mut HealthRegistryBuilder>, ) -> Pin> { Box::pin(async move { - let store: Arc = match backend { - StoreConfig::memory(config) => MemoryStore::new(config), - StoreConfig::experimental_s3_store(config) => { - S3Store::new(config, SystemTime::now).await? - } - StoreConfig::redis_store(config) => RedisStore::new(config.clone())?, - StoreConfig::verify(config) => VerifyStore::new( - config, - store_factory(&config.backend, store_manager, None).await?, + let store_config: StoreConfig = config.clone().into(); + + let store: Arc = match &store_config.spec { + StoreSpec::Memory(spec) => MemoryStore::new(spec), + StoreSpec::S3(spec) => S3Store::new(spec, SystemTime::now).await?, + StoreSpec::Redis(spec) => RedisStore::new(spec.clone())?, + StoreSpec::Verify(spec) => VerifyStore::new( + spec, + store_factory(&spec.backend, store_manager, None).await?, ), - StoreConfig::compression(config) => CompressionStore::new( - &config.clone(), - store_factory(&config.backend, store_manager, None).await?, + StoreSpec::Compression(spec) => CompressionStore::new( + &spec.clone(), + store_factory(&spec.backend, store_manager, None).await?, )?, - StoreConfig::dedup(config) => DedupStore::new( - config, - store_factory(&config.index_store, store_manager, None).await?, - store_factory(&config.content_store, store_manager, None).await?, + StoreSpec::Dedup(spec) => DedupStore::new( + spec, + store_factory(&spec.index_store, store_manager, None).await?, + store_factory(&spec.content_store, store_manager, None).await?, )?, - StoreConfig::existence_cache(config) => ExistenceCacheStore::new( - config, - store_factory(&config.backend, store_manager, None).await?, + StoreSpec::ExistenceCache(spec) => ExistenceCacheStore::new( + spec, + store_factory(&spec.backend, store_manager, None).await?, ), - StoreConfig::completeness_checking(config) => CompletenessCheckingStore::new( - store_factory(&config.backend, store_manager, None).await?, - store_factory(&config.cas_store, store_manager, None).await?, + StoreSpec::CompletenessChecking(spec) => CompletenessCheckingStore::new( + store_factory(&spec.backend, store_manager, None).await?, + store_factory(&spec.cas_store, store_manager, None).await?, ), - StoreConfig::fast_slow(config) => FastSlowStore::new( - config, - store_factory(&config.fast, store_manager, None).await?, - store_factory(&config.slow, store_manager, None).await?, + StoreSpec::FastSlow(spec) => FastSlowStore::new( + spec, + store_factory(&spec.fast, store_manager, None).await?, + store_factory(&spec.slow, store_manager, None).await?, ), - StoreConfig::filesystem(config) => ::new(config).await?, - StoreConfig::ref_store(config) => RefStore::new(config, Arc::downgrade(store_manager)), - StoreConfig::size_partitioning(config) => SizePartitioningStore::new( - config, - store_factory(&config.lower_store, store_manager, None).await?, - store_factory(&config.upper_store, store_manager, None).await?, + StoreSpec::Filesystem(spec) => ::new(spec).await?, + StoreSpec::Ref(spec) => RefStore::new(spec, Arc::downgrade(store_manager)), + StoreSpec::SizePartitioning(spec) => SizePartitioningStore::new( + spec, + store_factory(&spec.lower_store, store_manager, None).await?, + store_factory(&spec.upper_store, store_manager, None).await?, ), - StoreConfig::grpc(config) => GrpcStore::new(config).await?, - StoreConfig::noop => NoopStore::new(), - StoreConfig::shard(config) => { - let stores = config + StoreSpec::Grpc(spec) => GrpcStore::new(spec).await?, + StoreSpec::Noop(_spec) => NoopStore::new(), + StoreSpec::Shard(spec) => { + let stores = spec .stores .iter() .map(|store_config| store_factory(&store_config.store, store_manager, None)) .collect::>() .try_collect::>() .await?; - ShardStore::new(config, stores)? + ShardStore::new(spec, stores)? } }; diff --git a/nativelink-store/src/existence_cache_store.rs b/nativelink-store/src/existence_cache_store.rs index 087e37b8c..8e1cd9ad8 100644 --- a/nativelink-store/src/existence_cache_store.rs +++ b/nativelink-store/src/existence_cache_store.rs @@ -18,7 +18,7 @@ use std::sync::Arc; use std::time::SystemTime; use async_trait::async_trait; -use nativelink_config::stores::{EvictionPolicy, ExistenceCacheStore as ExistenceCacheStoreConfig}; +use nativelink_config::stores::{EvictionPolicy, ExistenceCacheSpec as ExistenceCacheStoreConfig}; use nativelink_error::{error_if, Error, ResultExt}; use nativelink_metric::MetricsComponent; use nativelink_util::buf_channel::{DropCloserReadHalf, DropCloserWriteHalf}; diff --git a/nativelink-store/src/fast_slow_store.rs b/nativelink-store/src/fast_slow_store.rs index a6ae8ba71..acdf88740 100644 --- a/nativelink-store/src/fast_slow_store.rs +++ b/nativelink-store/src/fast_slow_store.rs @@ -53,7 +53,7 @@ pub struct FastSlowStore { impl FastSlowStore { pub fn new( - _config: &nativelink_config::stores::FastSlowStore, + _config: &nativelink_config::stores::FastSlowSpec, fast_store: Store, slow_store: Store, ) -> Arc { diff --git a/nativelink-store/src/filesystem_store.rs b/nativelink-store/src/filesystem_store.rs index 924edca79..b0af651eb 100644 --- a/nativelink-store/src/filesystem_store.rs +++ b/nativelink-store/src/filesystem_store.rs @@ -536,14 +536,14 @@ pub struct FilesystemStore { impl FilesystemStore { pub async fn new( - config: &nativelink_config::stores::FilesystemStore, + config: &nativelink_config::stores::FilesystemSpec, ) -> Result, Error> { Self::new_with_timeout_and_rename_fn(config, sleep, |from, to| std::fs::rename(from, to)) .await } pub async fn new_with_timeout_and_rename_fn( - config: &nativelink_config::stores::FilesystemStore, + config: &nativelink_config::stores::FilesystemSpec, sleep_fn: fn(Duration) -> Sleep, rename_fn: fn(&OsStr, &OsStr) -> Result<(), std::io::Error>, ) -> Result, Error> { diff --git a/nativelink-store/src/grpc_store.rs b/nativelink-store/src/grpc_store.rs index d02a0b943..796417053 100644 --- a/nativelink-store/src/grpc_store.rs +++ b/nativelink-store/src/grpc_store.rs @@ -70,7 +70,7 @@ pub struct GrpcStore { } impl GrpcStore { - pub async fn new(config: &nativelink_config::stores::GrpcStore) -> Result, Error> { + pub async fn new(config: &nativelink_config::stores::GrpcSpec) -> Result, Error> { let jitter_amt = config.retry.jitter; Self::new_with_jitter( config, @@ -87,7 +87,7 @@ impl GrpcStore { } pub async fn new_with_jitter( - config: &nativelink_config::stores::GrpcStore, + config: &nativelink_config::stores::GrpcSpec, jitter_fn: Box Duration + Send + Sync>, ) -> Result, Error> { error_if!( diff --git a/nativelink-store/src/memory_store.rs b/nativelink-store/src/memory_store.rs index 99042d37f..fe3b21fd1 100644 --- a/nativelink-store/src/memory_store.rs +++ b/nativelink-store/src/memory_store.rs @@ -57,7 +57,7 @@ pub struct MemoryStore { } impl MemoryStore { - pub fn new(config: &nativelink_config::stores::MemoryStore) -> Arc { + pub fn new(config: &nativelink_config::stores::MemorySpec) -> Arc { let empty_policy = nativelink_config::stores::EvictionPolicy::default(); let eviction_policy = config.eviction_policy.as_ref().unwrap_or(&empty_policy); Arc::new(Self { diff --git a/nativelink-store/src/redis_store.rs b/nativelink-store/src/redis_store.rs index 05dbf82c4..fa6f86733 100644 --- a/nativelink-store/src/redis_store.rs +++ b/nativelink-store/src/redis_store.rs @@ -140,7 +140,7 @@ pub struct RedisStore { impl RedisStore { /// Create a new `RedisStore` from the given configuration. - pub fn new(mut config: nativelink_config::stores::RedisStore) -> Result, Error> { + pub fn new(mut config: nativelink_config::stores::RedisSpec) -> Result, Error> { if config.addresses.is_empty() { return Err(make_err!( Code::InvalidArgument, diff --git a/nativelink-store/src/ref_store.rs b/nativelink-store/src/ref_store.rs index d2446d174..d41d7b058 100644 --- a/nativelink-store/src/ref_store.rs +++ b/nativelink-store/src/ref_store.rs @@ -46,7 +46,7 @@ pub struct RefStore { impl RefStore { pub fn new( - config: &nativelink_config::stores::RefStore, + config: &nativelink_config::stores::RefSpec, store_manager: Weak, ) -> Arc { Arc::new(RefStore { diff --git a/nativelink-store/src/s3_store.rs b/nativelink-store/src/s3_store.rs index 8622d1b0e..7f0ab49ee 100644 --- a/nativelink-store/src/s3_store.rs +++ b/nativelink-store/src/s3_store.rs @@ -142,7 +142,7 @@ pub struct TlsConnector { impl TlsConnector { #[must_use] pub fn new( - config: &nativelink_config::stores::S3Store, + config: &nativelink_config::stores::S3Spec, jitter_fn: Arc Duration + Send + Sync>, ) -> Self { let connector_with_roots = hyper_rustls::HttpsConnectorBuilder::new().with_webpki_roots(); @@ -261,7 +261,7 @@ where NowFn: Fn() -> I + Send + Sync + Unpin + 'static, { pub async fn new( - config: &nativelink_config::stores::S3Store, + config: &nativelink_config::stores::S3Spec, now_fn: NowFn, ) -> Result, Error> { let jitter_amt = config.retry.jitter; @@ -299,7 +299,7 @@ where } pub fn new_with_client_and_jitter( - config: &nativelink_config::stores::S3Store, + config: &nativelink_config::stores::S3Spec, s3_client: Client, jitter_fn: Arc Duration + Send + Sync>, now_fn: NowFn, diff --git a/nativelink-store/src/shard_store.rs b/nativelink-store/src/shard_store.rs index b57f60a12..b8287d00c 100644 --- a/nativelink-store/src/shard_store.rs +++ b/nativelink-store/src/shard_store.rs @@ -46,7 +46,7 @@ pub struct ShardStore { impl ShardStore { pub fn new( - config: &nativelink_config::stores::ShardStore, + config: &nativelink_config::stores::ShardSpec, stores: Vec, ) -> Result, Error> { error_if!( diff --git a/nativelink-store/src/size_partitioning_store.rs b/nativelink-store/src/size_partitioning_store.rs index b3569c90f..995895d73 100644 --- a/nativelink-store/src/size_partitioning_store.rs +++ b/nativelink-store/src/size_partitioning_store.rs @@ -35,7 +35,7 @@ pub struct SizePartitioningStore { impl SizePartitioningStore { pub fn new( - config: &nativelink_config::stores::SizePartitioningStore, + config: &nativelink_config::stores::SizePartitioningSpec, lower_store: Store, upper_store: Store, ) -> Arc { diff --git a/nativelink-store/src/verify_store.rs b/nativelink-store/src/verify_store.rs index 674ab2bd2..859322ce4 100644 --- a/nativelink-store/src/verify_store.rs +++ b/nativelink-store/src/verify_store.rs @@ -47,7 +47,7 @@ pub struct VerifyStore { } impl VerifyStore { - pub fn new(config: &nativelink_config::stores::VerifyStore, inner_store: Store) -> Arc { + pub fn new(config: &nativelink_config::stores::VerifySpec, inner_store: Store) -> Arc { Arc::new(VerifyStore { inner_store, verify_size: config.verify_size, diff --git a/nativelink-store/tests/ac_utils_test.rs b/nativelink-store/tests/ac_utils_test.rs index f4d50b46d..2ea062cec 100644 --- a/nativelink-store/tests/ac_utils_test.rs +++ b/nativelink-store/tests/ac_utils_test.rs @@ -46,7 +46,7 @@ const HASH1_SIZE: i64 = 147; async fn upload_file_to_store_with_large_file() -> Result<(), Error> { let filepath = make_temp_path("test.txt").await; let expected_data = vec![0x88; 1024 * 1024]; // 1MB. - let store = MemoryStore::new(&nativelink_config::stores::MemoryStore::default()); + let store = MemoryStore::new(&nativelink_config::stores::MemorySpec::default()); let digest = DigestInfo::try_new(HASH1, HASH1_SIZE)?; // Dummy hash data. { // Write 1MB of 0x88s to the file. diff --git a/nativelink-store/tests/completeness_checking_store_test.rs b/nativelink-store/tests/completeness_checking_store_test.rs index 319299924..66b7205ed 100644 --- a/nativelink-store/tests/completeness_checking_store_test.rs +++ b/nativelink-store/tests/completeness_checking_store_test.rs @@ -14,7 +14,7 @@ use std::sync::Arc; -use nativelink_config::stores::MemoryStore as MemoryStoreConfig; +use nativelink_config::stores::MemorySpec; use nativelink_error::Error; use nativelink_macro::nativelink_test; use nativelink_proto::build::bazel::remote::execution::v2::{ @@ -36,8 +36,8 @@ const STDOUT: DigestInfo = DigestInfo::new([5u8; 32], 0); const STDERR: DigestInfo = DigestInfo::new([6u8; 32], 0); async fn setup() -> Result<(Arc, Arc, DigestInfo), Error> { - let backend_store = Store::new(MemoryStore::new(&MemoryStoreConfig::default())); - let cas_store = MemoryStore::new(&MemoryStoreConfig::default()); + let backend_store = Store::new(MemoryStore::new(&MemorySpec::default())); + let cas_store = MemoryStore::new(&MemorySpec::default()); let ac_store = CompletenessCheckingStore::new(backend_store.clone(), Store::new(cas_store.clone())); diff --git a/nativelink-store/tests/compression_store_test.rs b/nativelink-store/tests/compression_store_test.rs index d6780dec0..0de2b5e4f 100644 --- a/nativelink-store/tests/compression_store_test.rs +++ b/nativelink-store/tests/compression_store_test.rs @@ -20,6 +20,7 @@ use std::sync::Arc; use bincode::{DefaultOptions, Options}; use bytes::Bytes; +use nativelink_config::stores::{CompressionAlgorithm, CompressionSpec, MemorySpec, StoreRef}; use nativelink_error::{make_err, Code, Error, ResultExt}; use nativelink_macro::nativelink_test; use nativelink_store::compression_store::{ @@ -73,19 +74,15 @@ async fn simple_smoke_test() -> Result<(), Error> { const RAW_INPUT: &str = "123"; let store = CompressionStore::new( - &nativelink_config::stores::CompressionStore { - backend: nativelink_config::stores::StoreConfig::memory( - nativelink_config::stores::MemoryStore::default(), - ), - compression_algorithm: nativelink_config::stores::CompressionAlgorithm::lz4( + &CompressionSpec { + backend: StoreRef::new("memory", MemorySpec::default()), + compression_algorithm: CompressionAlgorithm::lz4( nativelink_config::stores::Lz4Config { ..Default::default() }, ), }, - Store::new(MemoryStore::new( - &nativelink_config::stores::MemoryStore::default(), - )), + Store::new(MemoryStore::new(&MemorySpec::default())), ) .err_tip(|| "Failed to create compression store")?; @@ -114,20 +111,16 @@ async fn partial_reads_test() -> Result<(), Error> { ]; let store_owned = CompressionStore::new( - &nativelink_config::stores::CompressionStore { - backend: nativelink_config::stores::StoreConfig::memory( - nativelink_config::stores::MemoryStore::default(), - ), - compression_algorithm: nativelink_config::stores::CompressionAlgorithm::lz4( + &CompressionSpec { + backend: StoreRef::new("memory", MemorySpec::default()), + compression_algorithm: CompressionAlgorithm::lz4( nativelink_config::stores::Lz4Config { block_size: 10, ..Default::default() }, ), }, - Store::new(MemoryStore::new( - &nativelink_config::stores::MemoryStore::default(), - )), + Store::new(MemoryStore::new(&MemorySpec::default())), ) .err_tip(|| "Failed to create compression store")?; let store = Pin::new(&store_owned); @@ -167,19 +160,15 @@ async fn partial_reads_test() -> Result<(), Error> { #[nativelink_test] async fn rand_5mb_smoke_test() -> Result<(), Error> { let store_owned = CompressionStore::new( - &nativelink_config::stores::CompressionStore { - backend: nativelink_config::stores::StoreConfig::memory( - nativelink_config::stores::MemoryStore::default(), - ), - compression_algorithm: nativelink_config::stores::CompressionAlgorithm::lz4( + &CompressionSpec { + backend: StoreRef::new("memory", MemorySpec::default()), + compression_algorithm: CompressionAlgorithm::lz4( nativelink_config::stores::Lz4Config { ..Default::default() }, ), }, - Store::new(MemoryStore::new( - &nativelink_config::stores::MemoryStore::default(), - )), + Store::new(MemoryStore::new(&MemorySpec::default())), ) .err_tip(|| "Failed to create compression store")?; let store = Pin::new(&store_owned); @@ -202,13 +191,11 @@ async fn rand_5mb_smoke_test() -> Result<(), Error> { #[nativelink_test] async fn sanity_check_zero_bytes_test() -> Result<(), Error> { - let inner_store = MemoryStore::new(&nativelink_config::stores::MemoryStore::default()); + let inner_store = MemoryStore::new(&MemorySpec::default()); let store_owned = CompressionStore::new( - &nativelink_config::stores::CompressionStore { - backend: nativelink_config::stores::StoreConfig::memory( - nativelink_config::stores::MemoryStore::default(), - ), - compression_algorithm: nativelink_config::stores::CompressionAlgorithm::lz4( + &CompressionSpec { + backend: StoreRef::new("memory", MemorySpec::default()), + compression_algorithm: CompressionAlgorithm::lz4( nativelink_config::stores::Lz4Config { ..Default::default() }, @@ -259,13 +246,11 @@ async fn check_header_test() -> Result<(), Error> { const MAX_SIZE_INPUT: u64 = 1024 * 1024; // 1MB. const RAW_INPUT: &str = "123"; - let inner_store = MemoryStore::new(&nativelink_config::stores::MemoryStore::default()); + let inner_store = MemoryStore::new(&MemorySpec::default()); let store_owned = CompressionStore::new( - &nativelink_config::stores::CompressionStore { - backend: nativelink_config::stores::StoreConfig::memory( - nativelink_config::stores::MemoryStore::default(), - ), - compression_algorithm: nativelink_config::stores::CompressionAlgorithm::lz4( + &CompressionSpec { + backend: StoreRef::new("memory", MemorySpec::default()), + compression_algorithm: CompressionAlgorithm::lz4( nativelink_config::stores::Lz4Config { block_size: BLOCK_SIZE, ..Default::default() @@ -347,13 +332,11 @@ async fn check_footer_test() -> Result<(), Error> { const BLOCK_SIZE: u32 = 32 * 1024; const EXPECTED_INDEXES: [u32; 7] = [32898, 32898, 32898, 32898, 140, 140, 140]; - let inner_store = MemoryStore::new(&nativelink_config::stores::MemoryStore::default()); + let inner_store = MemoryStore::new(&MemorySpec::default()); let store_owned = CompressionStore::new( - &nativelink_config::stores::CompressionStore { - backend: nativelink_config::stores::StoreConfig::memory( - nativelink_config::stores::MemoryStore::default(), - ), - compression_algorithm: nativelink_config::stores::CompressionAlgorithm::lz4( + &CompressionSpec { + backend: StoreRef::new("memory", MemorySpec::default()), + compression_algorithm: CompressionAlgorithm::lz4( nativelink_config::stores::Lz4Config { block_size: BLOCK_SIZE, ..Default::default() @@ -495,13 +478,11 @@ async fn get_part_is_zero_digest() -> Result<(), Error> { let digest = DigestInfo::new(Sha256::new().finalize().into(), 0); - let inner_store = MemoryStore::new(&nativelink_config::stores::MemoryStore::default()); + let inner_store = MemoryStore::new(&MemorySpec::default()); let store_owned = CompressionStore::new( - &nativelink_config::stores::CompressionStore { - backend: nativelink_config::stores::StoreConfig::memory( - nativelink_config::stores::MemoryStore::default(), - ), - compression_algorithm: nativelink_config::stores::CompressionAlgorithm::lz4( + &CompressionSpec { + backend: StoreRef::new("memory", MemorySpec::default()), + compression_algorithm: CompressionAlgorithm::lz4( nativelink_config::stores::Lz4Config { block_size: BLOCK_SIZE, ..Default::default() diff --git a/nativelink-store/tests/dedup_store_test.rs b/nativelink-store/tests/dedup_store_test.rs index d8372fde5..438df369a 100644 --- a/nativelink-store/tests/dedup_store_test.rs +++ b/nativelink-store/tests/dedup_store_test.rs @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +use nativelink_config::stores::{MemorySpec, StoreRef}; use nativelink_error::{Code, Error, ResultExt}; use nativelink_macro::nativelink_test; use nativelink_store::cas_utils::ZERO_BYTE_DIGESTS; @@ -23,14 +24,10 @@ use pretty_assertions::assert_eq; use rand::rngs::SmallRng; use rand::{Rng, SeedableRng}; -fn make_default_config() -> nativelink_config::stores::DedupStore { - nativelink_config::stores::DedupStore { - index_store: nativelink_config::stores::StoreConfig::memory( - nativelink_config::stores::MemoryStore::default(), - ), - content_store: nativelink_config::stores::StoreConfig::memory( - nativelink_config::stores::MemoryStore::default(), - ), +fn make_default_config() -> nativelink_config::stores::DedupSpec { + nativelink_config::stores::DedupSpec { + index_store: StoreRef::new("index", MemorySpec::default()), + content_store: StoreRef::new("content", MemorySpec::default()), min_size: 8 * 1024, normal_size: 32 * 1024, max_size: 128 * 1024, @@ -54,10 +51,10 @@ async fn simple_round_trip_test() -> Result<(), Error> { let store = DedupStore::new( &make_default_config(), Store::new(MemoryStore::new( - &nativelink_config::stores::MemoryStore::default(), + &nativelink_config::stores::MemorySpec::default(), )), // Index store. Store::new(MemoryStore::new( - &nativelink_config::stores::MemoryStore::default(), + &nativelink_config::stores::MemorySpec::default(), )), // Content store. )?; @@ -85,11 +82,11 @@ async fn check_missing_last_chunk_test() -> Result<(), Error> { "7c8608f5b079bef66c45bd67f7d8ede15d2e1830ea38fd8ad4c6de08b6f21a0c"; const LAST_CHUNK_SIZE: usize = 25779; - let content_store = MemoryStore::new(&nativelink_config::stores::MemoryStore::default()); + let content_store = MemoryStore::new(&nativelink_config::stores::MemorySpec::default()); let store = DedupStore::new( &make_default_config(), Store::new(MemoryStore::new( - &nativelink_config::stores::MemoryStore::default(), + &nativelink_config::stores::MemorySpec::default(), )), // Index store. Store::new(content_store.clone()), )?; @@ -133,10 +130,10 @@ async fn fetch_part_test() -> Result<(), Error> { let store = DedupStore::new( &make_default_config(), Store::new(MemoryStore::new( - &nativelink_config::stores::MemoryStore::default(), + &nativelink_config::stores::MemorySpec::default(), )), // Index store. Store::new(MemoryStore::new( - &nativelink_config::stores::MemoryStore::default(), + &nativelink_config::stores::MemorySpec::default(), )), // Content store. )?; @@ -173,24 +170,16 @@ async fn check_length_not_set_with_chunk_read_beyond_first_chunk_regression_test const START_READ_BYTE: usize = 7; let store = DedupStore::new( - &nativelink_config::stores::DedupStore { - index_store: nativelink_config::stores::StoreConfig::memory( - nativelink_config::stores::MemoryStore::default(), - ), - content_store: nativelink_config::stores::StoreConfig::memory( - nativelink_config::stores::MemoryStore::default(), - ), + &nativelink_config::stores::DedupSpec { + index_store: StoreRef::new("index", MemorySpec::default()), + content_store: StoreRef::new("index", MemorySpec::default()), min_size: 5, normal_size: 6, max_size: 7, max_concurrent_fetch_per_get: 10, }, - Store::new(MemoryStore::new( - &nativelink_config::stores::MemoryStore::default(), - )), // Index store. - Store::new(MemoryStore::new( - &nativelink_config::stores::MemoryStore::default(), - )), // Content store. + Store::new(MemoryStore::new(&MemorySpec::default())), // Index store. + Store::new(MemoryStore::new(&MemorySpec::default())), // Content store. )?; let original_data = make_random_data(DATA_SIZE); @@ -226,24 +215,16 @@ async fn check_chunk_boundary_reads_test() -> Result<(), Error> { const START_READ_BYTE: usize = 10; let store = DedupStore::new( - &nativelink_config::stores::DedupStore { - index_store: nativelink_config::stores::StoreConfig::memory( - nativelink_config::stores::MemoryStore::default(), - ), - content_store: nativelink_config::stores::StoreConfig::memory( - nativelink_config::stores::MemoryStore::default(), - ), + &nativelink_config::stores::DedupSpec { + index_store: StoreRef::new("index", MemorySpec::default()), + content_store: StoreRef::new("content", MemorySpec::default()), min_size: 5, normal_size: 6, max_size: 7, max_concurrent_fetch_per_get: 10, }, - Store::new(MemoryStore::new( - &nativelink_config::stores::MemoryStore::default(), - )), // Index store. - Store::new(MemoryStore::new( - &nativelink_config::stores::MemoryStore::default(), - )), // Content store. + Store::new(MemoryStore::new(&MemorySpec::default())), // Index store. + Store::new(MemoryStore::new(&MemorySpec::default())), // Content store. )?; let original_data = make_random_data(DATA_SIZE); @@ -307,8 +288,8 @@ async fn check_chunk_boundary_reads_test() -> Result<(), Error> { async fn has_checks_content_store() -> Result<(), Error> { const DATA_SIZE: usize = MEGABYTE_SZ / 4; - let index_store = MemoryStore::new(&nativelink_config::stores::MemoryStore::default()); - let content_store = MemoryStore::new(&nativelink_config::stores::MemoryStore { + let index_store = MemoryStore::new(&nativelink_config::stores::MemorySpec::default()); + let content_store = MemoryStore::new(&nativelink_config::stores::MemorySpec { eviction_policy: Some(nativelink_config::stores::EvictionPolicy { max_count: 10, ..Default::default() @@ -373,8 +354,8 @@ async fn has_checks_content_store() -> Result<(), Error> { async fn has_with_no_existing_index_returns_none_test() -> Result<(), Error> { const DATA_SIZE: usize = 10; - let index_store = MemoryStore::new(&nativelink_config::stores::MemoryStore::default()); - let content_store = MemoryStore::new(&nativelink_config::stores::MemoryStore { + let index_store = MemoryStore::new(&nativelink_config::stores::MemorySpec::default()); + let content_store = MemoryStore::new(&nativelink_config::stores::MemorySpec { eviction_policy: Some(nativelink_config::stores::EvictionPolicy { max_count: 10, ..Default::default() @@ -404,8 +385,8 @@ async fn has_with_no_existing_index_returns_none_test() -> Result<(), Error> { /// properly return Some(0). #[nativelink_test] async fn has_with_zero_digest_returns_some_test() -> Result<(), Error> { - let index_store = MemoryStore::new(&nativelink_config::stores::MemoryStore::default()); - let content_store = MemoryStore::new(&nativelink_config::stores::MemoryStore { + let index_store = MemoryStore::new(&nativelink_config::stores::MemorySpec::default()); + let content_store = MemoryStore::new(&nativelink_config::stores::MemorySpec { eviction_policy: Some(nativelink_config::stores::EvictionPolicy { max_count: 10, ..Default::default() diff --git a/nativelink-store/tests/existence_store_test.rs b/nativelink-store/tests/existence_store_test.rs index 2f47eabe0..2d9e3061d 100644 --- a/nativelink-store/tests/existence_store_test.rs +++ b/nativelink-store/tests/existence_store_test.rs @@ -16,7 +16,7 @@ use std::time::Duration; use mock_instant::thread_local::MockClock; use nativelink_config::stores::{ - EvictionPolicy, ExistenceCacheStore as ExistenceCacheStoreConfig, StoreConfig, + EvictionPolicy, ExistenceCacheSpec, MemorySpec, NoopSpec, StoreRef, }; use nativelink_error::{Error, ResultExt}; use nativelink_macro::nativelink_test; @@ -32,13 +32,11 @@ const VALID_HASH1: &str = "0123456789abcdef0000000000000000000100000000000001234 #[nativelink_test] async fn simple_exist_cache_test() -> Result<(), Error> { const VALUE: &str = "123"; - let config = ExistenceCacheStoreConfig { - backend: StoreConfig::noop, // Note: Not used. + let config = ExistenceCacheSpec { + backend: StoreRef::new("dummy", NoopSpec::default()), eviction_policy: Default::default(), }; - let inner_store = Store::new(MemoryStore::new( - &nativelink_config::stores::MemoryStore::default(), - )); + let inner_store = Store::new(MemoryStore::new(&MemorySpec::default())); let store = ExistenceCacheStore::new(&config, inner_store.clone()); let digest = DigestInfo::try_new(VALID_HASH1, 3).unwrap(); @@ -72,12 +70,12 @@ async fn simple_exist_cache_test() -> Result<(), Error> { #[nativelink_test] async fn update_flags_existance_cache_test() -> Result<(), Error> { const VALUE: &str = "123"; - let config = ExistenceCacheStoreConfig { - backend: StoreConfig::noop, + let config = ExistenceCacheSpec { + backend: StoreRef::new("dummy", NoopSpec::default()), eviction_policy: Default::default(), }; let inner_store = Store::new(MemoryStore::new( - &nativelink_config::stores::MemoryStore::default(), + &nativelink_config::stores::MemorySpec::default(), )); let store = ExistenceCacheStore::new(&config, inner_store.clone()); @@ -97,12 +95,12 @@ async fn update_flags_existance_cache_test() -> Result<(), Error> { #[nativelink_test] async fn get_part_caches_if_exact_size_set() -> Result<(), Error> { const VALUE: &str = "123"; - let config = ExistenceCacheStoreConfig { - backend: StoreConfig::noop, + let config = ExistenceCacheSpec { + backend: StoreRef::new("dummy", NoopSpec::default()), eviction_policy: Default::default(), }; let inner_store = Store::new(MemoryStore::new( - &nativelink_config::stores::MemoryStore::default(), + &nativelink_config::stores::MemorySpec::default(), )); let digest = DigestInfo::try_new(VALID_HASH1, 3).unwrap(); inner_store @@ -127,15 +125,15 @@ async fn get_part_caches_if_exact_size_set() -> Result<(), Error> { #[nativelink_test] async fn ensure_has_requests_eventually_do_let_evictions_happen() -> Result<(), Error> { const VALUE: &str = "123"; - let inner_store = MemoryStore::new(&nativelink_config::stores::MemoryStore::default()); + let inner_store = MemoryStore::new(&nativelink_config::stores::MemorySpec::default()); let digest = DigestInfo::try_new(VALID_HASH1, 3).unwrap(); inner_store .update_oneshot(digest, VALUE.into()) .await .err_tip(|| "Failed to update store")?; let store = ExistenceCacheStore::new_with_time( - &ExistenceCacheStoreConfig { - backend: StoreConfig::noop, + &ExistenceCacheSpec { + backend: StoreRef::new("dummy", NoopSpec::default()), eviction_policy: Some(EvictionPolicy { max_seconds: 10, ..Default::default() diff --git a/nativelink-store/tests/fast_slow_store_test.rs b/nativelink-store/tests/fast_slow_store_test.rs index caaaa7344..3f6ade05b 100644 --- a/nativelink-store/tests/fast_slow_store_test.rs +++ b/nativelink-store/tests/fast_slow_store_test.rs @@ -18,6 +18,7 @@ use std::sync::{Arc, Mutex}; use async_trait::async_trait; use bytes::Bytes; +use nativelink_config::stores::{MemorySpec, StoreRef}; use nativelink_error::{make_err, Code, Error, ResultExt}; use nativelink_macro::nativelink_test; use nativelink_metric::MetricsComponent; @@ -35,20 +36,12 @@ use rand::{Rng, SeedableRng}; const MEGABYTE_SZ: usize = 1024 * 1024; fn make_stores() -> (Store, Store, Store) { - let fast_store = Store::new(MemoryStore::new( - &nativelink_config::stores::MemoryStore::default(), - )); - let slow_store = Store::new(MemoryStore::new( - &nativelink_config::stores::MemoryStore::default(), - )); + let fast_store = Store::new(MemoryStore::new(&MemorySpec::default())); + let slow_store = Store::new(MemoryStore::new(&MemorySpec::default())); let fast_slow_store = Store::new(FastSlowStore::new( - &nativelink_config::stores::FastSlowStore { - fast: nativelink_config::stores::StoreConfig::memory( - nativelink_config::stores::MemoryStore::default(), - ), - slow: nativelink_config::stores::StoreConfig::memory( - nativelink_config::stores::MemoryStore::default(), - ), + &nativelink_config::stores::FastSlowSpec { + fast: StoreRef::new("fast", MemorySpec::default()), + slow: StoreRef::new("slow", MemorySpec::default()), }, fast_store.clone(), slow_store.clone(), @@ -336,13 +329,9 @@ async fn drop_on_eof_completes_store_futures() -> Result<(), Error> { })); let fast_slow_store = FastSlowStore::new( - &nativelink_config::stores::FastSlowStore { - fast: nativelink_config::stores::StoreConfig::memory( - nativelink_config::stores::MemoryStore::default(), - ), - slow: nativelink_config::stores::StoreConfig::memory( - nativelink_config::stores::MemoryStore::default(), - ), + &nativelink_config::stores::FastSlowSpec { + fast: StoreRef::new("fast", MemorySpec::default()), + slow: StoreRef::new("slow", MemorySpec::default()), }, fast_store, slow_store, @@ -379,19 +368,15 @@ async fn drop_on_eof_completes_store_futures() -> Result<(), Error> { #[nativelink_test] async fn ignore_value_in_fast_store() -> Result<(), Error> { let fast_store = Store::new(MemoryStore::new( - &nativelink_config::stores::MemoryStore::default(), + &nativelink_config::stores::MemorySpec::default(), )); let slow_store = Store::new(MemoryStore::new( - &nativelink_config::stores::MemoryStore::default(), + &nativelink_config::stores::MemorySpec::default(), )); let fast_slow_store = Arc::new(FastSlowStore::new( - &nativelink_config::stores::FastSlowStore { - fast: nativelink_config::stores::StoreConfig::memory( - nativelink_config::stores::MemoryStore::default(), - ), - slow: nativelink_config::stores::StoreConfig::memory( - nativelink_config::stores::MemoryStore::default(), - ), + &nativelink_config::stores::FastSlowSpec { + fast: StoreRef::new("fast", MemorySpec::default()), + slow: StoreRef::new("slow", MemorySpec::default()), }, fast_store.clone(), slow_store, @@ -411,15 +396,14 @@ async fn ignore_value_in_fast_store() -> Result<(), Error> { #[nativelink_test] async fn has_checks_fast_store_when_noop() -> Result<(), Error> { let fast_store = Store::new(MemoryStore::new( - &nativelink_config::stores::MemoryStore::default(), + &nativelink_config::stores::MemorySpec::default(), )); let slow_store = Store::new(NoopStore::new()); - let fast_slow_store_config = nativelink_config::stores::FastSlowStore { - fast: nativelink_config::stores::StoreConfig::memory( - nativelink_config::stores::MemoryStore::default(), - ), - slow: nativelink_config::stores::StoreConfig::noop, + let fast_slow_store_config = nativelink_config::stores::FastSlowSpec { + fast: StoreRef::new("fast", MemorySpec::default()), + slow: StoreRef::new("slow", MemorySpec::default()), }; + let fast_slow_store = Arc::new(FastSlowStore::new( &fast_slow_store_config, fast_store.clone(), diff --git a/nativelink-store/tests/filesystem_store_test.rs b/nativelink-store/tests/filesystem_store_test.rs index 7a3ca42b2..4b60d1322 100644 --- a/nativelink-store/tests/filesystem_store_test.rs +++ b/nativelink-store/tests/filesystem_store_test.rs @@ -27,6 +27,9 @@ use filetime::{set_file_atime, FileTime}; use futures::executor::block_on; use futures::task::Poll; use futures::{poll, Future, FutureExt}; +use nativelink_config::stores::{ + EvictionPolicy, FastSlowSpec, FilesystemSpec, MemorySpec, StoreRef, +}; use nativelink_error::{make_err, Code, Error, ResultExt}; use nativelink_macro::nativelink_test; use nativelink_store::fast_slow_store::FastSlowStore; @@ -255,7 +258,7 @@ async fn valid_results_after_shutdown_test() -> Result<(), Error> { let temp_path = make_temp_path("temp_path"); { let store = Store::new( - FilesystemStore::::new(&nativelink_config::stores::FilesystemStore { + FilesystemStore::::new(&FilesystemSpec { content_path: content_path.clone(), temp_path: temp_path.clone(), eviction_policy: None, @@ -278,7 +281,7 @@ async fn valid_results_after_shutdown_test() -> Result<(), Error> { { // With a new store ensure content is still readable (ie: restores from shutdown). let store = Box::pin( - FilesystemStore::::new(&nativelink_config::stores::FilesystemStore { + FilesystemStore::::new(&FilesystemSpec { content_path, temp_path, eviction_policy: None, @@ -310,17 +313,15 @@ async fn temp_files_get_deleted_on_replace_test() -> Result<(), Error> { let temp_path = make_temp_path("temp_path"); let store = Box::pin( - FilesystemStore::>::new( - &nativelink_config::stores::FilesystemStore { - content_path: content_path.clone(), - temp_path: temp_path.clone(), - eviction_policy: Some(nativelink_config::stores::EvictionPolicy { - max_count: 3, - ..Default::default() - }), + FilesystemStore::>::new(&FilesystemSpec { + content_path: content_path.clone(), + temp_path: temp_path.clone(), + eviction_policy: Some(EvictionPolicy { + max_count: 3, ..Default::default() - }, - ) + }), + ..Default::default() + }) .await?, ); @@ -390,18 +391,16 @@ async fn file_continues_to_stream_on_content_replace_test() -> Result<(), Error> let temp_path = make_temp_path("temp_path"); let store = Arc::new( - FilesystemStore::>::new( - &nativelink_config::stores::FilesystemStore { - content_path: content_path.clone(), - temp_path: temp_path.clone(), - eviction_policy: Some(nativelink_config::stores::EvictionPolicy { - max_count: 3, - ..Default::default() - }), - block_size: 1, - read_buffer_size: 1, - }, - ) + FilesystemStore::>::new(&FilesystemSpec { + content_path: content_path.clone(), + temp_path: temp_path.clone(), + eviction_policy: Some(EvictionPolicy { + max_count: 3, + ..Default::default() + }), + block_size: 1, + read_buffer_size: 1, + }) .await?, ); @@ -513,18 +512,16 @@ async fn file_gets_cleans_up_on_cache_eviction() -> Result<(), Error> { let temp_path = make_temp_path("temp_path"); let store = Arc::new( - FilesystemStore::>::new( - &nativelink_config::stores::FilesystemStore { - content_path: content_path.clone(), - temp_path: temp_path.clone(), - eviction_policy: Some(nativelink_config::stores::EvictionPolicy { - max_count: 1, - ..Default::default() - }), - block_size: 1, - read_buffer_size: 1, - }, - ) + FilesystemStore::>::new(&FilesystemSpec { + content_path: content_path.clone(), + temp_path: temp_path.clone(), + eviction_policy: Some(EvictionPolicy { + max_count: 1, + ..Default::default() + }), + block_size: 1, + read_buffer_size: 1, + }) .await?, ); @@ -610,7 +607,7 @@ async fn atime_updates_on_get_part_test() -> Result<(), Error> { let digest1 = DigestInfo::try_new(HASH1, VALUE1.len())?; let store = Box::pin( - FilesystemStore::::new(&nativelink_config::stores::FilesystemStore { + FilesystemStore::::new(&FilesystemSpec { content_path: make_temp_path("content_path"), temp_path: make_temp_path("temp_path"), eviction_policy: None, @@ -671,10 +668,10 @@ async fn oldest_entry_evicted_with_access_times_loaded_from_disk() -> Result<(), // Load the existing store from disk. let store = Box::pin( - FilesystemStore::::new(&nativelink_config::stores::FilesystemStore { + FilesystemStore::::new(&FilesystemSpec { content_path, temp_path: make_temp_path("temp_path"), - eviction_policy: Some(nativelink_config::stores::EvictionPolicy { + eviction_policy: Some(EvictionPolicy { max_bytes: 0, max_seconds: 0, max_count: 1, @@ -702,7 +699,7 @@ async fn eviction_drops_file_test() -> Result<(), Error> { let digest1 = DigestInfo::try_new(HASH1, VALUE1.len())?; let store = Box::pin( - FilesystemStore::::new(&nativelink_config::stores::FilesystemStore { + FilesystemStore::::new(&FilesystemSpec { content_path: make_temp_path("content_path"), temp_path: make_temp_path("temp_path"), eviction_policy: None, @@ -752,7 +749,7 @@ async fn digest_contents_replaced_continues_using_old_data() -> Result<(), Error let digest = DigestInfo::try_new(HASH1, VALUE1.len())?; let store = Box::pin( - FilesystemStore::::new(&nativelink_config::stores::FilesystemStore { + FilesystemStore::::new(&FilesystemSpec { content_path: make_temp_path("content_path"), temp_path: make_temp_path("temp_path"), eviction_policy: None, @@ -819,18 +816,16 @@ async fn eviction_on_insert_calls_unref_once() -> Result<(), Error> { let big_digest = DigestInfo::try_new(HASH1, BIG_VALUE.len())?; let store = Box::pin( - FilesystemStore::>::new( - &nativelink_config::stores::FilesystemStore { - content_path: make_temp_path("content_path"), - temp_path: make_temp_path("temp_path"), - eviction_policy: Some(nativelink_config::stores::EvictionPolicy { - max_bytes: 5, - ..Default::default() - }), - block_size: 1, + FilesystemStore::>::new(&FilesystemSpec { + content_path: make_temp_path("content_path"), + temp_path: make_temp_path("temp_path"), + eviction_policy: Some(EvictionPolicy { + max_bytes: 5, ..Default::default() - }, - ) + }), + block_size: 1, + ..Default::default() + }) .await?, ); // Insert data into store. @@ -916,14 +911,12 @@ async fn rename_on_insert_fails_due_to_filesystem_error_proper_cleanup_happens() let temp_path = make_temp_path("temp_path"); let store = Box::pin( - FilesystemStore::>::new( - &nativelink_config::stores::FilesystemStore { - content_path: content_path.clone(), - temp_path: temp_path.clone(), - eviction_policy: None, - ..Default::default() - }, - ) + FilesystemStore::>::new(&FilesystemSpec { + content_path: content_path.clone(), + temp_path: temp_path.clone(), + eviction_policy: None, + ..Default::default() + }) .await?, ); @@ -1004,7 +997,7 @@ async fn get_part_timeout_test() -> Result<(), Error> { let store = Arc::new( FilesystemStore::::new_with_timeout_and_rename_fn( - &nativelink_config::stores::FilesystemStore { + &FilesystemSpec { content_path: content_path.clone(), temp_path: temp_path.clone(), read_buffer_size: 1, @@ -1051,7 +1044,7 @@ async fn get_part_is_zero_digest() -> Result<(), Error> { let store = Arc::new( FilesystemStore::::new_with_timeout_and_rename_fn( - &nativelink_config::stores::FilesystemStore { + &FilesystemSpec { content_path: content_path.clone(), temp_path: temp_path.clone(), read_buffer_size: 1, @@ -1118,7 +1111,7 @@ async fn has_with_results_on_zero_digests() -> Result<(), Error> { let store = Arc::new( FilesystemStore::::new_with_timeout_and_rename_fn( - &nativelink_config::stores::FilesystemStore { + &FilesystemSpec { content_path: content_path.clone(), temp_path: temp_path.clone(), read_buffer_size: 1, @@ -1161,7 +1154,7 @@ async fn update_file_future_drops_before_rename() -> Result<(), Error> { let content_path = make_temp_path("content_path"); let store = Arc::pin( FilesystemStore::::new_with_timeout_and_rename_fn( - &nativelink_config::stores::FilesystemStore { + &FilesystemSpec { content_path: content_path.clone(), temp_path: make_temp_path("temp_path"), eviction_policy: None, @@ -1245,7 +1238,7 @@ async fn deleted_file_removed_from_store() -> Result<(), Error> { let store = Box::pin( FilesystemStore::::new_with_timeout_and_rename_fn( - &nativelink_config::stores::FilesystemStore { + &FilesystemSpec { content_path: content_path.clone(), temp_path: temp_path.clone(), read_buffer_size: 1, @@ -1290,7 +1283,7 @@ async fn get_file_size_uses_block_size() -> Result<(), Error> { let store = Box::pin( FilesystemStore::::new_with_timeout_and_rename_fn( - &nativelink_config::stores::FilesystemStore { + &FilesystemSpec { content_path: content_path.clone(), temp_path: temp_path.clone(), read_buffer_size: 1, @@ -1336,7 +1329,7 @@ async fn update_with_whole_file_closes_file() -> Result<(), Error> { let digest = DigestInfo::try_new(HASH1, value.len())?; let store = Box::pin( - FilesystemStore::::new(&nativelink_config::stores::FilesystemStore { + FilesystemStore::::new(&FilesystemSpec { content_path: content_path.clone(), temp_path: temp_path.clone(), read_buffer_size: 1, @@ -1383,16 +1376,12 @@ async fn update_with_whole_file_slow_path_when_low_file_descriptors() -> Result< let store = FastSlowStore::new( // Note: The config is not needed for this test, so use dummy data. - &nativelink_config::stores::FastSlowStore { - fast: nativelink_config::stores::StoreConfig::memory( - nativelink_config::stores::MemoryStore::default(), - ), - slow: nativelink_config::stores::StoreConfig::memory( - nativelink_config::stores::MemoryStore::default(), - ), + &FastSlowSpec { + fast: StoreRef::new("fast", MemorySpec::default()), + slow: StoreRef::new("slow", MemorySpec::default()), }, Store::new( - FilesystemStore::::new(&nativelink_config::stores::FilesystemStore { + FilesystemStore::::new(&FilesystemSpec { content_path: make_temp_path("content_path"), temp_path: make_temp_path("temp_path"), read_buffer_size: 1, @@ -1401,7 +1390,7 @@ async fn update_with_whole_file_slow_path_when_low_file_descriptors() -> Result< .await?, ), Store::new( - FilesystemStore::::new(&nativelink_config::stores::FilesystemStore { + FilesystemStore::::new(&FilesystemSpec { content_path: make_temp_path("content_path1"), temp_path: make_temp_path("temp_path1"), read_buffer_size: 1, @@ -1443,7 +1432,7 @@ async fn update_with_whole_file_uses_same_inode() -> Result<(), Error> { let store = Box::pin( FilesystemStore::::new_with_timeout_and_rename_fn( - &nativelink_config::stores::FilesystemStore { + &FilesystemSpec { content_path: content_path.clone(), temp_path: temp_path.clone(), read_buffer_size: 1, diff --git a/nativelink-store/tests/memory_store_test.rs b/nativelink-store/tests/memory_store_test.rs index a31f18469..9d958af01 100644 --- a/nativelink-store/tests/memory_store_test.rs +++ b/nativelink-store/tests/memory_store_test.rs @@ -39,7 +39,7 @@ const INVALID_HASH: &str = "g111111111111111111111111111111111111111111111111111 async fn insert_one_item_then_update() -> Result<(), Error> { const VALUE1: &str = "13"; const VALUE2: &str = "23"; - let store = MemoryStore::new(&nativelink_config::stores::MemoryStore::default()); + let store = MemoryStore::new(&nativelink_config::stores::MemorySpec::default()); // Insert dummy value into store. store @@ -91,7 +91,7 @@ async fn ensure_full_copy_of_bytes_is_made_test() -> Result<(), Error> { let mut sum_memory_usage_increase_perc: f64 = 0.0; for _ in 0..MAX_STATS_ITERATIONS { - let store_owned = MemoryStore::new(&nativelink_config::stores::MemoryStore::default()); + let store_owned = MemoryStore::new(&nativelink_config::stores::MemorySpec::default()); let store = Pin::new(&store_owned); let initial_virtual_mem = memory_stats() @@ -130,7 +130,7 @@ async fn ensure_full_copy_of_bytes_is_made_test() -> Result<(), Error> { #[nativelink_test] async fn read_partial() -> Result<(), Error> { const VALUE1: &str = "1234"; - let store_owned = MemoryStore::new(&nativelink_config::stores::MemoryStore::default()); + let store_owned = MemoryStore::new(&nativelink_config::stores::MemorySpec::default()); let store = Pin::new(&store_owned); let digest = DigestInfo::try_new(VALID_HASH1, 4).unwrap(); @@ -153,7 +153,7 @@ async fn read_partial() -> Result<(), Error> { #[nativelink_test] async fn read_zero_size_item_test() -> Result<(), Error> { const VALUE: &str = ""; - let store_owned = MemoryStore::new(&nativelink_config::stores::MemoryStore::default()); + let store_owned = MemoryStore::new(&nativelink_config::stores::MemorySpec::default()); let store = Pin::new(&store_owned); // Insert dummy value into store. @@ -173,7 +173,7 @@ async fn read_zero_size_item_test() -> Result<(), Error> { #[nativelink_test] async fn errors_with_invalid_inputs() -> Result<(), Error> { const VALUE1: &str = "123"; - let store_owned = MemoryStore::new(&nativelink_config::stores::MemoryStore::default()); + let store_owned = MemoryStore::new(&nativelink_config::stores::MemorySpec::default()); let store = Pin::new(store_owned.as_ref()); { // .has() tests. @@ -241,7 +241,7 @@ async fn errors_with_invalid_inputs() -> Result<(), Error> { async fn get_part_is_zero_digest() -> Result<(), Error> { let digest = DigestInfo::new(Sha256::new().finalize().into(), 0); - let store = MemoryStore::new(&nativelink_config::stores::MemoryStore::default()); + let store = MemoryStore::new(&nativelink_config::stores::MemorySpec::default()); let store_clone = store.clone(); let (mut writer, mut reader) = make_buf_channel_pair(); @@ -269,7 +269,7 @@ async fn has_with_results_on_zero_digests() -> Result<(), Error> { let keys = vec![digest.into()]; let mut results = vec![None]; - let store_owned = MemoryStore::new(&nativelink_config::stores::MemoryStore::default()); + let store_owned = MemoryStore::new(&nativelink_config::stores::MemorySpec::default()); let store = Pin::new(&store_owned); let _ = store @@ -304,7 +304,7 @@ async fn list_test() -> Result<(), Error> { const KEY3: StoreKey = StoreKey::new_str("key3"); const VALUE: &str = "value1"; - let store = MemoryStore::new(&nativelink_config::stores::MemoryStore::default()); + let store = MemoryStore::new(&nativelink_config::stores::MemorySpec::default()); store.update_oneshot(KEY1, VALUE.into()).await?; store.update_oneshot(KEY2, VALUE.into()).await?; store.update_oneshot(KEY3, VALUE.into()).await?; diff --git a/nativelink-store/tests/ref_store_test.rs b/nativelink-store/tests/ref_store_test.rs index d47ab4a25..652146421 100644 --- a/nativelink-store/tests/ref_store_test.rs +++ b/nativelink-store/tests/ref_store_test.rs @@ -30,12 +30,12 @@ fn setup_stores() -> (Arc, Store, Store) { let store_manager = Arc::new(StoreManager::new()); let memory_store = Store::new(MemoryStore::new( - &nativelink_config::stores::MemoryStore::default(), + &nativelink_config::stores::MemorySpec::default(), )); store_manager.add_store("foo", memory_store.clone()); let ref_store = Store::new(RefStore::new( - &nativelink_config::stores::RefStore { + &nativelink_config::stores::RefSpec { name: "foo".to_string(), }, Arc::downgrade(&store_manager), @@ -141,12 +141,12 @@ async fn inner_store_test() -> Result<(), Error> { let store_manager = Arc::new(StoreManager::new()); let memory_store = Store::new(MemoryStore::new( - &nativelink_config::stores::MemoryStore::default(), + &nativelink_config::stores::MemorySpec::default(), )); store_manager.add_store("mem_store", memory_store.clone()); let ref_store_inner = Store::new(RefStore::new( - &nativelink_config::stores::RefStore { + &nativelink_config::stores::RefSpec { name: "mem_store".to_string(), }, Arc::downgrade(&store_manager), @@ -154,7 +154,7 @@ async fn inner_store_test() -> Result<(), Error> { store_manager.add_store("ref_store_inner", ref_store_inner.clone()); let ref_store_outer = Store::new(RefStore::new( - &nativelink_config::stores::RefStore { + &nativelink_config::stores::RefSpec { name: "ref_store_inner".to_string(), }, Arc::downgrade(&store_manager), diff --git a/nativelink-store/tests/s3_store_test.rs b/nativelink-store/tests/s3_store_test.rs index e25ce5154..f5b73c5c2 100644 --- a/nativelink-store/tests/s3_store_test.rs +++ b/nativelink-store/tests/s3_store_test.rs @@ -59,7 +59,7 @@ async fn simple_has_object_found() -> Result<(), Error> { .build(); let s3_client = aws_sdk_s3::Client::from_conf(test_config); let store = S3Store::new_with_client_and_jitter( - &nativelink_config::stores::S3Store { + &nativelink_config::stores::S3Spec { bucket: BUCKET_NAME.to_string(), ..Default::default() }, @@ -94,7 +94,7 @@ async fn simple_has_object_not_found() -> Result<(), Error> { .build(); let s3_client = aws_sdk_s3::Client::from_conf(test_config); let store = S3Store::new_with_client_and_jitter( - &nativelink_config::stores::S3Store { + &nativelink_config::stores::S3Spec { bucket: BUCKET_NAME.to_string(), ..Default::default() }, @@ -153,7 +153,7 @@ async fn simple_has_retries() -> Result<(), Error> { let s3_client = aws_sdk_s3::Client::from_conf(test_config); let store = S3Store::new_with_client_and_jitter( - &nativelink_config::stores::S3Store { + &nativelink_config::stores::S3Spec { bucket: BUCKET_NAME.to_string(), retry: nativelink_config::stores::Retry { max_retries: 1024, @@ -205,7 +205,7 @@ async fn simple_update_ac() -> Result<(), Error> { .build(); let s3_client = aws_sdk_s3::Client::from_conf(test_config); let store = S3Store::new_with_client_and_jitter( - &nativelink_config::stores::S3Store { + &nativelink_config::stores::S3Spec { bucket: BUCKET_NAME.to_string(), ..Default::default() }, @@ -293,7 +293,7 @@ async fn simple_get_ac() -> Result<(), Error> { .build(); let s3_client = aws_sdk_s3::Client::from_conf(test_config); let store = S3Store::new_with_client_and_jitter( - &nativelink_config::stores::S3Store { + &nativelink_config::stores::S3Spec { bucket: BUCKET_NAME.to_string(), ..Default::default() }, @@ -338,7 +338,7 @@ async fn smoke_test_get_part() -> Result<(), Error> { .build(); let s3_client = aws_sdk_s3::Client::from_conf(test_config); let store = S3Store::new_with_client_and_jitter( - &nativelink_config::stores::S3Store { + &nativelink_config::stores::S3Spec { bucket: BUCKET_NAME.to_string(), ..Default::default() }, @@ -399,7 +399,7 @@ async fn get_part_simple_retries() -> Result<(), Error> { let s3_client = aws_sdk_s3::Client::from_conf(test_config); let store = S3Store::new_with_client_and_jitter( - &nativelink_config::stores::S3Store { + &nativelink_config::stores::S3Spec { bucket: BUCKET_NAME.to_string(), retry: nativelink_config::stores::Retry { max_retries: 1024, @@ -530,7 +530,7 @@ async fn multipart_update_large_cas() -> Result<(), Error> { .build(); let s3_client = aws_sdk_s3::Client::from_conf(test_config); let store = S3Store::new_with_client_and_jitter( - &nativelink_config::stores::S3Store { + &nativelink_config::stores::S3Spec { bucket: BUCKET_NAME.to_string(), ..Default::default() }, @@ -570,7 +570,7 @@ async fn ensure_empty_string_in_stream_works_test() -> Result<(), Error> { .build(); let s3_client = aws_sdk_s3::Client::from_conf(test_config); let store = S3Store::new_with_client_and_jitter( - &nativelink_config::stores::S3Store { + &nativelink_config::stores::S3Spec { bucket: BUCKET_NAME.to_string(), ..Default::default() }, @@ -613,7 +613,7 @@ async fn get_part_is_zero_digest() -> Result<(), Error> { .build(); let s3_client = aws_sdk_s3::Client::from_conf(test_config); let store = Arc::new(S3Store::new_with_client_and_jitter( - &nativelink_config::stores::S3Store { + &nativelink_config::stores::S3Spec { bucket: BUCKET_NAME.to_string(), ..Default::default() }, @@ -655,7 +655,7 @@ async fn has_with_results_on_zero_digests() -> Result<(), Error> { .build(); let s3_client = aws_sdk_s3::Client::from_conf(test_config); let store = S3Store::new_with_client_and_jitter( - &nativelink_config::stores::S3Store { + &nativelink_config::stores::S3Spec { bucket: BUCKET_NAME.to_string(), ..Default::default() }, @@ -698,7 +698,7 @@ async fn has_with_expired_result() -> Result<(), Error> { .build(); let s3_client = aws_sdk_s3::Client::from_conf(test_config); let store = S3Store::new_with_client_and_jitter( - &nativelink_config::stores::S3Store { + &nativelink_config::stores::S3Spec { bucket: BUCKET_NAME.to_string(), consider_expired_after_s: 2 * 24 * 60 * 60, // 2 days. ..Default::default() diff --git a/nativelink-store/tests/shard_store_test.rs b/nativelink-store/tests/shard_store_test.rs index ac67b855c..67e95f415 100644 --- a/nativelink-store/tests/shard_store_test.rs +++ b/nativelink-store/tests/shard_store_test.rs @@ -14,6 +14,7 @@ use std::sync::Arc; +use nativelink_config::stores::{MemorySpec, ShardConfig, ShardSpec, StoreRef}; use nativelink_error::Error; use nativelink_macro::nativelink_test; use nativelink_store::memory_store::MemoryStore; @@ -28,18 +29,19 @@ use rand::{Rng, SeedableRng}; const MEGABYTE_SZ: usize = 1024 * 1024; fn make_stores(weights: &[u32]) -> (Arc, Vec>) { - let memory_store_config = nativelink_config::stores::MemoryStore::default(); - let store_config = nativelink_config::stores::StoreConfig::memory(memory_store_config.clone()); + let memory_store_spec = MemorySpec::default(); + let store_config = StoreRef::new("memory", memory_store_spec.clone()); + let stores: Vec<_> = weights .iter() - .map(|_| MemoryStore::new(&memory_store_config)) + .map(|_| MemoryStore::new(&memory_store_spec)) .collect(); let shard_store = ShardStore::new( - &nativelink_config::stores::ShardStore { + &ShardSpec { stores: weights .iter() - .map(|weight| nativelink_config::stores::ShardConfig { + .map(|weight| ShardConfig { store: store_config.clone(), weight: Some(*weight), }) diff --git a/nativelink-store/tests/size_partitioning_store_test.rs b/nativelink-store/tests/size_partitioning_store_test.rs index 0339121ec..52a89d8cf 100644 --- a/nativelink-store/tests/size_partitioning_store_test.rs +++ b/nativelink-store/tests/size_partitioning_store_test.rs @@ -14,6 +14,7 @@ use std::sync::Arc; +use nativelink_config::stores::{MemorySpec, SizePartitioningSpec, StoreRef}; use nativelink_error::Error; use nativelink_macro::nativelink_test; use nativelink_store::memory_store::MemoryStore; @@ -37,18 +38,14 @@ fn setup_stores( Arc, Arc, ) { - let lower_memory_store = MemoryStore::new(&nativelink_config::stores::MemoryStore::default()); - let upper_memory_store = MemoryStore::new(&nativelink_config::stores::MemoryStore::default()); + let lower_memory_store = MemoryStore::new(&MemorySpec::default()); + let upper_memory_store = MemoryStore::new(&MemorySpec::default()); let size_part_store = SizePartitioningStore::new( - &nativelink_config::stores::SizePartitioningStore { + &SizePartitioningSpec { size, - lower_store: nativelink_config::stores::StoreConfig::memory( - nativelink_config::stores::MemoryStore::default(), - ), - upper_store: nativelink_config::stores::StoreConfig::memory( - nativelink_config::stores::MemoryStore::default(), - ), + lower_store: StoreRef::new("lower", MemorySpec::default()), + upper_store: StoreRef::new("upper", MemorySpec::default()), }, Store::new(lower_memory_store.clone()), Store::new(upper_memory_store.clone()), diff --git a/nativelink-store/tests/verify_store_test.rs b/nativelink-store/tests/verify_store_test.rs index 93d92b907..777fe74b4 100644 --- a/nativelink-store/tests/verify_store_test.rs +++ b/nativelink-store/tests/verify_store_test.rs @@ -16,6 +16,7 @@ use std::pin::Pin; use futures::future::pending; use futures::try_join; +use nativelink_config::stores::{MemorySpec, StoreRef, VerifySpec}; use nativelink_error::{Error, ResultExt}; use nativelink_macro::nativelink_test; use nativelink_store::memory_store::MemoryStore; @@ -34,12 +35,10 @@ const VALID_HASH1: &str = "0123456789abcdef0000000000000000000100000000000001234 async fn verify_size_false_passes_on_update() -> Result<(), Error> { const VALUE1: &str = "123"; - let inner_store = MemoryStore::new(&nativelink_config::stores::MemoryStore::default()); + let inner_store = MemoryStore::new(&MemorySpec::default()); let store = VerifyStore::new( - &nativelink_config::stores::VerifyStore { - backend: nativelink_config::stores::StoreConfig::memory( - nativelink_config::stores::MemoryStore::default(), - ), + &VerifySpec { + backend: StoreRef::new("memory", MemorySpec::default()), verify_size: false, verify_hash: false, }, @@ -67,12 +66,10 @@ async fn verify_size_true_fails_on_update() -> Result<(), Error> { const VALUE1: &str = "123"; const EXPECTED_ERR: &str = "Expected size 100 but got size 3 on insert"; - let inner_store = MemoryStore::new(&nativelink_config::stores::MemoryStore::default()); + let inner_store = MemoryStore::new(&MemorySpec::default()); let store = VerifyStore::new( - &nativelink_config::stores::VerifyStore { - backend: nativelink_config::stores::StoreConfig::memory( - nativelink_config::stores::MemoryStore::default(), - ), + &VerifySpec { + backend: StoreRef::new("memory", MemorySpec::default()), verify_size: true, verify_hash: false, }, @@ -107,12 +104,10 @@ async fn verify_size_true_fails_on_update() -> Result<(), Error> { async fn verify_size_true_suceeds_on_update() -> Result<(), Error> { const VALUE1: &str = "123"; - let inner_store = MemoryStore::new(&nativelink_config::stores::MemoryStore::default()); + let inner_store = MemoryStore::new(&MemorySpec::default()); let store = VerifyStore::new( - &nativelink_config::stores::VerifyStore { - backend: nativelink_config::stores::StoreConfig::memory( - nativelink_config::stores::MemoryStore::default(), - ), + &VerifySpec { + backend: StoreRef::new("memory", MemorySpec::default()), verify_size: true, verify_hash: false, }, @@ -132,12 +127,10 @@ async fn verify_size_true_suceeds_on_update() -> Result<(), Error> { #[nativelink_test] async fn verify_size_true_suceeds_on_multi_chunk_stream_update() -> Result<(), Error> { - let inner_store = MemoryStore::new(&nativelink_config::stores::MemoryStore::default()); + let inner_store = MemoryStore::new(&MemorySpec::default()); let store = VerifyStore::new( - &nativelink_config::stores::VerifyStore { - backend: nativelink_config::stores::StoreConfig::memory( - nativelink_config::stores::MemoryStore::default(), - ), + &VerifySpec { + backend: StoreRef::new("memory", MemorySpec::default()), verify_size: true, verify_hash: false, }, @@ -174,12 +167,10 @@ async fn verify_sha256_hash_true_suceeds_on_update() -> Result<(), Error> { const HASH: &str = "a665a45920422f9d417e4867efdc4fb8a04a1f3fff1fa07e998e86f7f7a27ae3"; const VALUE: &str = "123"; - let inner_store = MemoryStore::new(&nativelink_config::stores::MemoryStore::default()); + let inner_store = MemoryStore::new(&MemorySpec::default()); let store = VerifyStore::new( - &nativelink_config::stores::VerifyStore { - backend: nativelink_config::stores::StoreConfig::memory( - nativelink_config::stores::MemoryStore::default(), - ), + &VerifySpec { + backend: StoreRef::new("memory", MemorySpec::default()), verify_size: false, verify_hash: true, }, @@ -204,12 +195,10 @@ async fn verify_sha256_hash_true_fails_on_update() -> Result<(), Error> { const VALUE: &str = "123"; const ACTUAL_HASH: &str = "a665a45920422f9d417e4867efdc4fb8a04a1f3fff1fa07e998e86f7f7a27ae3"; - let inner_store = MemoryStore::new(&nativelink_config::stores::MemoryStore::default()); + let inner_store = MemoryStore::new(&MemorySpec::default()); let store = VerifyStore::new( - &nativelink_config::stores::VerifyStore { - backend: nativelink_config::stores::StoreConfig::memory( - nativelink_config::stores::MemoryStore::default(), - ), + &VerifySpec { + backend: StoreRef::new("memory", MemorySpec::default()), verify_size: false, verify_hash: true, }, @@ -239,12 +228,10 @@ async fn verify_blake3_hash_true_suceeds_on_update() -> Result<(), Error> { const HASH: &str = "b3d4f8803f7e24b8f389b072e75477cdbcfbe074080fb5e500e53e26e054158e"; const VALUE: &str = "123"; - let inner_store = MemoryStore::new(&nativelink_config::stores::MemoryStore::default()); + let inner_store = MemoryStore::new(&MemorySpec::default()); let store = VerifyStore::new( - &nativelink_config::stores::VerifyStore { - backend: nativelink_config::stores::StoreConfig::memory( - nativelink_config::stores::MemoryStore::default(), - ), + &VerifySpec { + backend: StoreRef::new("memory", MemorySpec::default()), verify_size: false, verify_hash: true, }, @@ -275,12 +262,10 @@ async fn verify_blake3_hash_true_fails_on_update() -> Result<(), Error> { const VALUE: &str = "123"; const ACTUAL_HASH: &str = "b3d4f8803f7e24b8f389b072e75477cdbcfbe074080fb5e500e53e26e054158e"; - let inner_store = MemoryStore::new(&nativelink_config::stores::MemoryStore::default()); + let inner_store = MemoryStore::new(&MemorySpec::default()); let store = VerifyStore::new( - &nativelink_config::stores::VerifyStore { - backend: nativelink_config::stores::StoreConfig::memory( - nativelink_config::stores::MemoryStore::default(), - ), + &VerifySpec { + backend: StoreRef::new("memory", MemorySpec::default()), verify_size: false, verify_hash: true, }, @@ -320,12 +305,10 @@ async fn verify_fails_immediately_on_too_much_data_sent_update() -> Result<(), E const VALUE: &str = "123"; const EXPECTED_ERR: &str = "Expected size 4 but already received 6 on insert"; - let inner_store = MemoryStore::new(&nativelink_config::stores::MemoryStore::default()); + let inner_store = MemoryStore::new(&MemorySpec::default()); let store = VerifyStore::new( - &nativelink_config::stores::VerifyStore { - backend: nativelink_config::stores::StoreConfig::memory( - nativelink_config::stores::MemoryStore::default(), - ), + &VerifySpec { + backend: StoreRef::new("memory", MemorySpec::default()), verify_size: true, verify_hash: false, }, @@ -366,12 +349,10 @@ async fn verify_size_and_hash_suceeds_on_small_data() -> Result<(), Error> { const HASH: &str = "a665a45920422f9d417e4867efdc4fb8a04a1f3fff1fa07e998e86f7f7a27ae3"; const VALUE: &str = "123"; - let inner_store = MemoryStore::new(&nativelink_config::stores::MemoryStore::default()); + let inner_store = MemoryStore::new(&MemorySpec::default()); let store = VerifyStore::new( - &nativelink_config::stores::VerifyStore { - backend: nativelink_config::stores::StoreConfig::memory( - nativelink_config::stores::MemoryStore::default(), - ), + &VerifySpec { + backend: StoreRef::new("memory", MemorySpec::default()), verify_size: true, verify_hash: true, }, diff --git a/nativelink-worker/src/running_actions_manager.rs b/nativelink-worker/src/running_actions_manager.rs index 83d62dcd8..c1cde454c 100644 --- a/nativelink-worker/src/running_actions_manager.rs +++ b/nativelink-worker/src/running_actions_manager.rs @@ -810,16 +810,16 @@ impl RunningActionImpl { .get(property) .map_or_else(|| Cow::Borrowed(""), |v| Cow::Borrowed(v.as_str())), EnvironmentSource::value(value) => Cow::Borrowed(value.as_str()), - EnvironmentSource::timeout_millis => { + EnvironmentSource::timeout_millis(_) => { Cow::Owned(requested_timeout.as_millis().to_string()) } - EnvironmentSource::side_channel_file => { + EnvironmentSource::side_channel_file(_) => { let file_cow = format!("{}/{}", self.action_directory, Uuid::new_v4().simple()); maybe_side_channel_file = Some(Cow::Owned(file_cow.clone().into())); Cow::Owned(file_cow) } - EnvironmentSource::action_directory => { + EnvironmentSource::action_directory(_) => { Cow::Borrowed(self.action_directory.as_str()) } }; diff --git a/nativelink-worker/tests/local_worker_test.rs b/nativelink-worker/tests/local_worker_test.rs index f7fe2d1ce..2da2de5d2 100644 --- a/nativelink-worker/tests/local_worker_test.rs +++ b/nativelink-worker/tests/local_worker_test.rs @@ -31,6 +31,7 @@ mod utils { use hyper::body::Frame; use nativelink_config::cas_server::{LocalWorkerConfig, WorkerProperty}; +use nativelink_config::stores::{MemorySpec, StoreRef}; use nativelink_error::{make_err, make_input_err, Code, Error}; use nativelink_macro::nativelink_test; use nativelink_proto::build::bazel::remote::execution::v2::platform::Property; @@ -406,17 +407,13 @@ async fn simple_worker_start_action_test() -> Result<(), Box Result<(), Box> { let cas_store = Store::new(FastSlowStore::new( - &nativelink_config::stores::FastSlowStore { + &nativelink_config::stores::FastSlowSpec { // Note: These are not needed for this test, so we put dummy memory stores here. - fast: nativelink_config::stores::StoreConfig::memory( - nativelink_config::stores::MemoryStore::default(), - ), - slow: nativelink_config::stores::StoreConfig::memory( - nativelink_config::stores::MemoryStore::default(), - ), + fast: StoreRef::new("fast", MemorySpec::default()), + slow: StoreRef::new("slow", MemorySpec::default()), }, Store::new( - ::new(&nativelink_config::stores::FilesystemStore { + ::new(&nativelink_config::stores::FilesystemSpec { content_path: make_temp_path("content_path"), temp_path: make_temp_path("temp_path"), ..Default::default() @@ -424,11 +421,11 @@ async fn new_local_worker_creates_work_directory_test() -> Result<(), Box Result<(), Box Result<(), Box> { let cas_store = Store::new(FastSlowStore::new( - &nativelink_config::stores::FastSlowStore { + &nativelink_config::stores::FastSlowSpec { // Note: These are not needed for this test, so we put dummy memory stores here. - fast: nativelink_config::stores::StoreConfig::memory( - nativelink_config::stores::MemoryStore::default(), - ), - slow: nativelink_config::stores::StoreConfig::memory( - nativelink_config::stores::MemoryStore::default(), - ), + fast: StoreRef::new("fast", MemorySpec::default()), + slow: StoreRef::new("slow", MemorySpec::default()), }, Store::new( - ::new(&nativelink_config::stores::FilesystemStore { + ::new(&nativelink_config::stores::FilesystemSpec { content_path: make_temp_path("content_path"), temp_path: make_temp_path("temp_path"), ..Default::default() @@ -472,11 +465,11 @@ async fn new_local_worker_removes_work_directory_before_start_test( .await?, ), Store::new(MemoryStore::new( - &nativelink_config::stores::MemoryStore::default(), + &nativelink_config::stores::MemorySpec::default(), )), )); let ac_store = Store::new(MemoryStore::new( - &nativelink_config::stores::MemoryStore::default(), + &nativelink_config::stores::MemorySpec::default(), )); let work_directory = make_temp_path("foo"); fs::create_dir_all(format!("{}/{}", work_directory, "another_dir")).await?; diff --git a/nativelink-worker/tests/running_actions_manager_test.rs b/nativelink-worker/tests/running_actions_manager_test.rs index 4328190ba..72ce232a6 100644 --- a/nativelink-worker/tests/running_actions_manager_test.rs +++ b/nativelink-worker/tests/running_actions_manager_test.rs @@ -27,6 +27,7 @@ use std::time::{Duration, SystemTime, UNIX_EPOCH}; use futures::{FutureExt, StreamExt, TryFutureExt, TryStreamExt}; use nativelink_config::cas_server::EnvironmentSource; +use nativelink_config::stores::{MemorySpec, StoreRef}; use nativelink_error::{make_input_err, Code, Error, ResultExt}; use nativelink_macro::nativelink_test; use nativelink_proto::build::bazel::remote::execution::v2::command::EnvironmentVariable; @@ -88,20 +89,20 @@ async fn setup_stores() -> Result< ), Error, > { - let fast_config = nativelink_config::stores::FilesystemStore { + let fast_spec = nativelink_config::stores::FilesystemSpec { content_path: make_temp_path("content_path"), temp_path: make_temp_path("temp_path"), eviction_policy: None, ..Default::default() }; - let slow_config = nativelink_config::stores::MemoryStore::default(); - let fast_store = FilesystemStore::new(&fast_config).await?; - let slow_store = MemoryStore::new(&slow_config); - let ac_store = MemoryStore::new(&slow_config); + let slow_spec = nativelink_config::stores::MemorySpec::default(); + let fast_store = FilesystemStore::new(&fast_spec).await?; + let slow_store = MemoryStore::new(&slow_spec); + let ac_store = MemoryStore::new(&slow_spec); let cas_store = FastSlowStore::new( - &nativelink_config::stores::FastSlowStore { - fast: nativelink_config::stores::StoreConfig::filesystem(fast_config), - slow: nativelink_config::stores::StoreConfig::memory(slow_config), + &nativelink_config::stores::FastSlowSpec { + fast: StoreRef::new("fast", MemorySpec::default()), + slow: StoreRef::new("slow", MemorySpec::default()), }, Store::new(fast_store.clone()), Store::new(slow_store.clone()), @@ -1657,7 +1658,7 @@ exit 0 ), ( "INNER_TIMEOUT".to_string(), - EnvironmentSource::timeout_millis, + EnvironmentSource::timeout_millis("unused".into()), ), ( "PATH".to_string(), @@ -1816,7 +1817,7 @@ exit 1 entrypoint: Some(test_wrapper_script.into_string().unwrap()), additional_environment: Some(HashMap::from([( "SIDE_CHANNEL_FILE".to_string(), - EnvironmentSource::side_channel_file, + EnvironmentSource::side_channel_file(String::new()), )])), }, cas_store: cas_store.clone(), diff --git a/src/bin/nativelink.rs b/src/bin/nativelink.rs index 7e2276a63..2a6e735c9 100644 --- a/src/bin/nativelink.rs +++ b/src/bin/nativelink.rs @@ -182,13 +182,18 @@ async fn inner_main( { let mut health_registry_lock = health_registry_builder.lock().await; - for (name, store_cfg) in cfg.stores { + for store_cfg in cfg.stores { + let name = store_cfg.name.clone(); let health_component_name = format!("stores/{name}"); let mut health_register_store = health_registry_lock.sub_builder(&health_component_name); - let store = store_factory(&store_cfg, &store_manager, Some(&mut health_register_store)) - .await - .err_tip(|| format!("Failed to create store '{name}'"))?; + let store = store_factory( + &store_cfg.into(), + &store_manager, + Some(&mut health_register_store), + ) + .await + .err_tip(|| format!("Failed to create store '{name}'"))?; store_manager.add_store(&name, store); } } @@ -196,15 +201,16 @@ async fn inner_main( let mut action_schedulers = HashMap::new(); let mut worker_schedulers = HashMap::new(); if let Some(schedulers_cfg) = cfg.schedulers { - for (name, scheduler_cfg) in schedulers_cfg { + for scheduler_cfg in schedulers_cfg { + let name = scheduler_cfg.name.clone(); let (maybe_action_scheduler, maybe_worker_scheduler) = - scheduler_factory(&scheduler_cfg, &store_manager) + scheduler_factory(&scheduler_cfg.into(), &store_manager) .err_tip(|| format!("Failed to create scheduler '{name}'"))?; if let Some(action_scheduler) = maybe_action_scheduler { action_schedulers.insert(name.clone(), action_scheduler.clone()); } if let Some(worker_scheduler) = maybe_worker_scheduler { - worker_schedulers.insert(name.clone(), worker_scheduler.clone()); + worker_schedulers.insert(name, worker_scheduler.clone()); } } }