Skip to content

Commit

Permalink
Merge scheduler and cas for K8s (#1506)
Browse files Browse the repository at this point in the history
There is no reason for these two to be separate. The new setup reduces
complexity and potentially increases performance due to in-process
communication.
  • Loading branch information
aaronmondal authored Nov 28, 2024
1 parent 91f3a2c commit 1b7d059
Show file tree
Hide file tree
Showing 16 changed files with 155 additions and 280 deletions.
23 changes: 7 additions & 16 deletions .github/workflows/lre.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -185,15 +185,10 @@ jobs:
--timeout=15m \
nativelink"
- name: Wait for CAS
- name: Wait for NativeLink
run: >
nix develop --impure --command
bash -c "kubectl rollout status deploy/nativelink-cas"
- name: Wait for scheduler
run: >
nix develop --impure --command
bash -c "kubectl rollout status deploy/nativelink-scheduler"
bash -c "kubectl rollout status deploy/nativelink"
- name: Wait for worker
run: >
Expand All @@ -203,8 +198,7 @@ jobs:
- name: Get gateway IPs
id: gateway-ips
run: |
echo "cache_ip=$(kubectl get gtw cache-gateway -o=jsonpath='{.status.addresses[0].value}')" >> "$GITHUB_ENV"
echo "scheduler_ip=$(kubectl get gtw scheduler-gateway -o=jsonpath='{.status.addresses[0].value}')" >> "$GITHUB_ENV"
echo "nativelink_ip=$(kubectl get gtw nativelink-gateway -o=jsonpath='{.status.addresses[0].value}')" >> "$GITHUB_ENV"
- name: Print cluster state
run: |
Expand All @@ -213,19 +207,16 @@ jobs:
kubectl get svc -A
kubectl get deployments -A
kubectl describe gtw
echo "cas"
kubectl logs -l app=nativelink-cas
echo "scheduler"
kubectl logs -l app=nativelink-scheduler
echo "nativelink"
kubectl logs -l app=nativelink
echo "worker"
kubectl logs -l app=nativelink-worker
- name: Build hello_lre with LRE toolchain.
run: >
nix develop --impure --command
bash -c "bazel run \
--remote_instance_name=main \
--remote_cache=grpc://$cache_ip \
--remote_executor=grpc://$scheduler_ip \
--remote_cache=grpc://$nativelink_ip \
--remote_executor=grpc://$nativelink_ip \
--verbose_failures \
@local-remote-execution//examples:hello_lre"
5 changes: 2 additions & 3 deletions deploy/chromium-example/build_chromium_tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -62,8 +62,7 @@ echo "Generating ninja projects"
gn gen --args="use_remoteexec=true rbe_cfg_dir=\"../../buildtools/reclient_cfgs/linux\"" out/Default

# Fetch cache and schedular IP address for passing to ninja
CACHE=$(kubectl get gtw cache-gateway -o=jsonpath='{.status.addresses[0].value}')
SCHEDULER=$(kubectl get gtw scheduler-gateway -o=jsonpath='{.status.addresses[0].value}')
NATIVELINK=$(kubectl get gtw nativelink-gateway -o=jsonpath='{.status.addresses[0].value}')

echo "Starting autoninja build"
RBE_service=${SCHEDULER}:80 RBE_cas_service=${CACHE}:80 RBE_instance=main RBE_reclient_timeout=60m RBE_exec_timeout=4m RBE_alsologtostderr=true RBE_service_no_security=true RBE_service_no_auth=true RBE_local_resource_fraction=0.00001 RBE_automatic_auth=false RBE_gcert_refresh_timeout=20 RBE_compression_threshold=-1 RBE_metrics_namespace=main RBE_platform= RBE_experimental_credentials_helper= RBE_experimental_credentials_helper_args= RBE_log_http_calls=true RBE_use_rpc_credentials=false RBE_exec_strategy=remote_local_fallback autoninja -v -j 50 -C out/Default cc_unittests
RBE_service=${NATIVELINK}:80 RBE_cas_service=${NATIVELINK}:80 RBE_instance="" RBE_reclient_timeout=60m RBE_exec_timeout=4m RBE_alsologtostderr=true RBE_service_no_security=true RBE_service_no_auth=true RBE_local_resource_fraction=0.00001 RBE_automatic_auth=false RBE_gcert_refresh_timeout=20 RBE_compression_threshold=-1 RBE_metrics_namespace="" RBE_platform= RBE_experimental_credentials_helper= RBE_experimental_credentials_helper_args= RBE_log_http_calls=true RBE_use_rpc_credentials=false RBE_exec_strategy=remote_local_fallback autoninja -v -j 50 -C out/Default cc_unittests
1 change: 1 addition & 0 deletions flake.nix
Original file line number Diff line number Diff line change
Expand Up @@ -513,6 +513,7 @@
pkgs.fluxcd
pkgs.go
pkgs.kustomize
pkgs.kubectx

## Web
pkgs.bun # got patched to the newest version (v.1.1.25)
Expand Down
3 changes: 1 addition & 2 deletions kubernetes/base/kustomization.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,7 @@
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- cas.yaml
- scheduler.yaml
- nativelink.yaml
- worker.yaml
- ../configmaps

Expand Down
43 changes: 24 additions & 19 deletions kubernetes/base/cas.yaml → kubernetes/base/nativelink.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,61 +2,66 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: nativelink-cas
name: nativelink
spec:
replicas: 1
selector:
matchLabels:
app: nativelink-cas
app: nativelink
template:
metadata:
labels:
app: nativelink-cas
app: nativelink
spec:
containers:
- name: nativelink-cas
- name: nativelink
# This image will be edited by kustomize.
image: nativelink
env:
- name: RUST_LOG
value: info
ports:
- containerPort: 9090
- containerPort: 50051
- containerPort: 50052
- containerPort: 50061
- containerPort: 50071
volumeMounts:
- name: cas-config
mountPath: /cas.json
subPath: cas.json
- name: nativelink-config
mountPath: /nativelink-config.json
subPath: nativelink-config.json
- name: tls-volume
mountPath: /root
readOnly: true
args: ["/cas.json"]
args: ["/nativelink-config.json"]
volumes:
- name: cas-config
- name: nativelink-config
configMap:
name: cas
name: nativelink-config
- name: tls-volume
secret:
secretName: tls-secret
---
apiVersion: v1
kind: Service
metadata:
name: nativelink-cas
name: nativelink
spec:
selector:
app: nativelink-cas
app: nativelink
ports:
- name: http
- name: metrics
protocol: TCP
port: 9090
targetPort: 9090
- name: grpc
protocol: TCP
port: 50051
targetPort: 50051
- name: metrics
- name: grpcs
protocol: TCP
port: 50052
targetPort: 50052
- name: worker-api
protocol: TCP
port: 50061
targetPort: 50061
- name: https
protocol: TCP
port: 50071
targetPort: 50071
52 changes: 0 additions & 52 deletions kubernetes/base/scheduler.yaml

This file was deleted.

6 changes: 2 additions & 4 deletions kubernetes/base/worker.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -28,10 +28,8 @@ spec:
env:
- name: RUST_LOG
value: info
- name: CAS_ENDPOINT
value: nativelink-cas
- name: SCHEDULER_ENDPOINT
value: nativelink-scheduler
- name: NATIVELINK_ENDPOINT
value: nativelink
volumeMounts:
- name: worker-config
mountPath: /worker.json
Expand Down
21 changes: 4 additions & 17 deletions kubernetes/components/gateway-routes/routes.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,25 +2,12 @@
apiVersion: gateway.networking.k8s.io/v1alpha2
kind: GRPCRoute
metadata:
name: cache-route
name: nativelink-route
spec:
parentRefs:
- name: cache-gateway
sectionName: cache-gateway
- name: nativelink-gateway
sectionName: nativelink-gateway
rules:
- backendRefs:
- name: nativelink-cas
- name: nativelink
port: 50051
---
apiVersion: gateway.networking.k8s.io/v1alpha2
kind: GRPCRoute
metadata:
name: scheduler-route
spec:
parentRefs:
- name: scheduler-gateway
sectionName: scheduler-gateway
rules:
- backendRefs:
- name: nativelink-scheduler
port: 50052
10 changes: 2 additions & 8 deletions kubernetes/configmaps/kustomization.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,9 @@ apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization

configMapGenerator:
- name: cas
- name: nativelink-config
files:
- cas.json
options:
annotations:
kustomize.toolkit.fluxcd.io/substitute: disabled
- name: scheduler
files:
- scheduler.json
- nativelink-config.json
options:
annotations:
kustomize.toolkit.fluxcd.io/substitute: disabled
Expand Down
Loading

0 comments on commit 1b7d059

Please sign in to comment.