From f5558ba641a5e1fccdc71a7fea7bfd290226bb26 Mon Sep 17 00:00:00 2001 From: Nicolas Ochem Date: Wed, 20 Jan 2021 21:01:44 -0800 Subject: [PATCH 1/2] wait for bootstrap node to be up before launching other nodes We replace the bootstrap service with a headless service and have all nodes connect directly to the pod ips. This exposes a bug where nodes are stuck with zero connections if they try to connect too early. This was previously masked by kubeproxy. We fix that by adding an init container which waits for the first bootstrap node to accept connections. The bootstrap attribute is now declarative: we no longer consider every baking node as bootstrap. The bootstrap: true must be passed in values.yaml We add headless services in order to give the pods a meaningful FQDN. Rename "bootstrap" statefulset to "baker" to reflect this new model. Other changes: * do not hardcode the protocol version in the key-importer script, pass it as a variable instead * In values.yaml, baker nodes and regular nodes are now listed separately, which simplifies the code in other places. * invited values always has one node even though the main topology may have several. To add more nodes to an invite, edit the yaml manually. fix #69 --- baker/entrypoint.sh | 4 +-- chain-initiator/entrypoint.sh | 6 ++-- charts/rpc-auth/templates/rpc-auth.yaml | 4 +-- .../templates/{bootstrap.yaml => baker.yaml} | 29 ++++++++++--------- charts/tezos/templates/configs.yaml | 3 ++ charts/tezos/templates/node.yaml | 21 ++++++++------ charts/tezos/templates/static.yaml | 23 ++++++++------- charts/tezos/values.yaml | 7 +++-- config-generator/entrypoint.py | 7 ++++- devspace.yaml | 4 +++ mkchain/tqchain/mkchain.py | 18 ++++++++---- wait-for-bootstrap/Dockerfile | 5 ++++ wait-for-bootstrap/entrypoint.sh | 19 ++++++++++++ zerotier/entrypoint.sh | 16 +++------- 14 files changed, 106 insertions(+), 60 deletions(-) rename charts/tezos/templates/{bootstrap.yaml => baker.yaml} (92%) create mode 100644 wait-for-bootstrap/Dockerfile create mode 100755 wait-for-bootstrap/entrypoint.sh diff --git a/baker/entrypoint.sh b/baker/entrypoint.sh index b7d873ac5..00b98a095 100755 --- a/baker/entrypoint.sh +++ b/baker/entrypoint.sh @@ -3,6 +3,6 @@ set -x baker_command=$(echo $CHAIN_PARAMS | jq -r '.baker_command') -POD_INDEX=$(echo $POD_NAME | sed -e s/tezos-bootstrap-node-//) -baker_account=$(echo $NODES | jq -r "[.[] | select(.bake_for)] | . [${POD_INDEX}].bake_for") +POD_INDEX=$(echo $POD_NAME | sed -e s/tezos-baking-node-//) +baker_account=$(echo $NODES | jq -r ".baking[${POD_INDEX}].bake_for") /usr/local/bin/${baker_command} -d /var/tezos/client run with local node /var/tezos/node ${baker_account:?Error: baker account not set} diff --git a/chain-initiator/entrypoint.sh b/chain-initiator/entrypoint.sh index c14ee8a0a..e6c5d5a28 100755 --- a/chain-initiator/entrypoint.sh +++ b/chain-initiator/entrypoint.sh @@ -2,10 +2,10 @@ set -x # wait for node to exist -until nslookup tezos-bootstrap-node-rpc; do echo waiting for tezos-bootstrap-node-rpc; sleep 2; done; +until nslookup tezos-node-rpc; do echo waiting for tezos-node-rpc; sleep 2; done; # wait for node to respond to rpc -until wget -O- http://tezos-bootstrap-node-rpc:8732/version; do sleep 2; done; +until wget -O- http://tezos-node-rpc:8732/version; do sleep 2; done; protocol_hash=$(echo $CHAIN_PARAMS | jq -r '.protocol_hash') activation_account=$(echo $CHAIN_PARAMS | jq -r '.activation_account') -/usr/local/bin/tezos-client -A tezos-bootstrap-node-rpc -P 8732 -d /var/tezos/client -l --block genesis activate protocol "${protocol_hash}" with fitness -1 and key "${activation_account}" and parameters /etc/tezos/parameters.json +/usr/local/bin/tezos-client -A tezos-node-rpc -P 8732 -d /var/tezos/client -l --block genesis activate protocol "${protocol_hash}" with fitness -1 and key "${activation_account}" and parameters /etc/tezos/parameters.json diff --git a/charts/rpc-auth/templates/rpc-auth.yaml b/charts/rpc-auth/templates/rpc-auth.yaml index e7fce4532..9a50536f2 100644 --- a/charts/rpc-auth/templates/rpc-auth.yaml +++ b/charts/rpc-auth/templates/rpc-auth.yaml @@ -134,7 +134,7 @@ spec: - name: REDIS_PORT value: {{ .Values.redis.port | quote }} - name: TEZOS_RPC_SERVICE - value: tezos-bootstrap-node-rpc + value: tezos-node-rpc - name: TEZOS_RPC_SERVICE_PORT value: "8732" --- @@ -185,5 +185,5 @@ spec: - path: /tezos-node-rpc/(.*?)/(.*) pathType: Exact backend: - serviceName: tezos-bootstrap-node-rpc + serviceName: tezos-node-rpc servicePort: 8732 diff --git a/charts/tezos/templates/bootstrap.yaml b/charts/tezos/templates/baker.yaml similarity index 92% rename from charts/tezos/templates/bootstrap.yaml rename to charts/tezos/templates/baker.yaml index 8e3a5750a..1b0abb6d6 100644 --- a/charts/tezos/templates/bootstrap.yaml +++ b/charts/tezos/templates/baker.yaml @@ -1,11 +1,3 @@ -{{- /* Function to get number of bakers */}} -{{- define "bakers" }} - {{- range $i, $node := $.Values.nodes }} - {{- if (hasKey $node "bake_for") }} -{{ $i }}: true - {{- end }} - {{- end }} -{{- end -}} {{- if not .Values.is_invitation }} apiVersion: batch/v1 kind: Job @@ -73,18 +65,18 @@ spec: apiVersion: apps/v1 kind: StatefulSet metadata: - name: tezos-bootstrap-node + name: tezos-baking-node namespace: {{ .Release.Namespace }} spec: - replicas: {{ include "bakers" . | fromYaml | values | len }} - serviceName: tezos-bootstrap-node-rpc + replicas: {{ .Values.nodes.baking | len }} + serviceName: tezos-baking-node selector: matchLabels: - app: tezos-bootstrap-node + app: tezos-baking-node template: metadata: labels: - app: tezos-bootstrap-node + app: tezos-baking-node appType: tezos spec: containers: @@ -175,6 +167,11 @@ spec: name: var-volume - mountPath: /dev/net/tun name: dev-net-tun + env: + - name: POD_NAME + valueFrom: + fieldRef: + fieldPath: metadata.name {{- end }} - image: {{ .Values.tezos_k8s_images.key_importer }} name: key-importer @@ -219,6 +216,12 @@ spec: name: config-volume - mountPath: /var/tezos name: var-volume + - image: {{ .Values.tezos_k8s_images.wait_for_bootstrap }} + imagePullPolicy: IfNotPresent + name: wait-for-bootstrap + volumeMounts: + - mountPath: /var/tezos + name: var-volume securityContext: fsGroup: 100 volumes: diff --git a/charts/tezos/templates/configs.yaml b/charts/tezos/templates/configs.yaml index d4a0141e4..0c9506029 100644 --- a/charts/tezos/templates/configs.yaml +++ b/charts/tezos/templates/configs.yaml @@ -4,6 +4,7 @@ data: { "bootstrap_mutez": "{{ .Values.protocol.bootstrap_mutez }}", "bootstrap_peers": {{ toJson .Values.bootstrap_peers }}, + "nodes": {{ toJson .Values.nodes }}, "chain_name": "{{ .Values.chain_name }}", "genesis_block": "{{ .Values.genesis.genesis_chain_id }}", "activation_account": "{{ (index .Values.accounts 0).name }}", @@ -24,6 +25,8 @@ data: CHAIN_NAME: "{{ .Values.chain_name }}" NETWORK_ID: "{{ .Values.zerotier_config.zerotier_network }}" ZTAUTHTOKEN: "{{ .Values.zerotier_config.zerotier_token }}" + NODES: | +{{ .Values.nodes | toJson | indent 4}} kind: ConfigMap metadata: name: zerotier-config diff --git a/charts/tezos/templates/node.yaml b/charts/tezos/templates/node.yaml index 42d8fa6fe..a2670e133 100644 --- a/charts/tezos/templates/node.yaml +++ b/charts/tezos/templates/node.yaml @@ -1,11 +1,3 @@ -{{- /* Function to get number of non bakers */}} -{{- define "nonBakers" }} - {{- range $i, $node := $.Values.nodes }} - {{- if (not (hasKey $node "bake_for")) }} -{{ $i }}: true - {{- end }} - {{- end }} -{{- end -}} apiVersion: apps/v1 kind: StatefulSet metadata: @@ -13,7 +5,7 @@ metadata: namespace: {{ .Release.Namespace }} spec: podManagementPolicy: Parallel - replicas: {{ include "nonBakers" . | fromYaml | values | len }} + replicas: {{ .Values.nodes.regular | len }} selector: matchLabels: app: tezos-node @@ -100,6 +92,11 @@ spec: name: var-volume - mountPath: /dev/net/tun name: dev-net-tun + env: + - name: POD_NAME + valueFrom: + fieldRef: + fieldPath: metadata.name {{- end }} - image: {{ .Values.tezos_k8s_images.key_importer }} name: key-importer @@ -143,6 +140,12 @@ spec: name: config-volume - mountPath: /var/tezos name: var-volume + - image: {{ .Values.tezos_k8s_images.wait_for_bootstrap }} + imagePullPolicy: IfNotPresent + name: wait-for-bootstrap + volumeMounts: + - mountPath: /var/tezos + name: var-volume securityContext: fsGroup: 100 volumes: diff --git a/charts/tezos/templates/static.yaml b/charts/tezos/templates/static.yaml index b19860c18..2de39bc21 100644 --- a/charts/tezos/templates/static.yaml +++ b/charts/tezos/templates/static.yaml @@ -1,26 +1,29 @@ apiVersion: v1 kind: Service metadata: - name: tezos-bootstrap-node-rpc + name: tezos-node-rpc namespace: {{ .Release.Namespace }} spec: ports: - port: 8732 selector: - app: tezos-bootstrap-node + app: tezos-baking-node type: NodePort --- -{{- if not .Values.zerotier_in_use }} apiVersion: v1 kind: Service metadata: - name: tezos-bootstrap-node-p2p - namespace: {{ .Release.Namespace }} + name: tezos-node spec: - ports: - - port: 9732 + clusterIP: None selector: - app: tezos-bootstrap-node - type: NodePort + app: tezos-node --- -{{ end }} +apiVersion: v1 +kind: Service +metadata: + name: tezos-baking-node +spec: + clusterIP: None + selector: + app: tezos-baking-node diff --git a/charts/tezos/values.yaml b/charts/tezos/values.yaml index e173dbc86..b20e1507d 100644 --- a/charts/tezos/values.yaml +++ b/charts/tezos/values.yaml @@ -17,6 +17,7 @@ tezos_k8s_images: baker: tezos-k8s-baker:dev key_importer: tezos-k8s-key-importer:dev config_generator: tezos-k8s-config-generator:dev + wait_for_bootstrap: tezos-k8s-wait-for-bootstrap:dev key_importer: tezos-k8s-key-importer:dev zerotier: tezos-k8s-zerotier:dev @@ -30,14 +31,16 @@ accounts: - name: sample1 key: edsk... private: true - bootstrap: true baker: true - name: sample2 key: edsk... private: true - bootstrap: true nodes: + baking: - bake_for: sample2 + bootstrap: true + regular: + - {} # TODO # archive nodes, node with open rpc # take into account fundraisers accounts that people need to activate diff --git a/config-generator/entrypoint.py b/config-generator/entrypoint.py index ed3017f07..ea82da5ea 100644 --- a/config-generator/entrypoint.py +++ b/config-generator/entrypoint.py @@ -42,7 +42,12 @@ def main(): if bootstrap_peers == []: bootstrap_peers.extend(get_zerotier_bootstrap_peer_ips()) else: - bootstrap_peers.append("tezos-bootstrap-node-p2p:9732") + local_bootstrap_peers = [] + for i, node in enumerate(CHAIN_PARAMS["nodes"]["baking"]): + if node.get("bootstrap", False) and f"tezos-baking-node-{i}" not in socket.gethostname(): + local_bootstrap_peers.append(f"tezos-baking-node-{i}.tezos-baking-node:9732") + bootstrap_peers.extend(local_bootstrap_peers) + config_json = json.dumps( get_node_config( diff --git a/devspace.yaml b/devspace.yaml index 6c64e3e41..a487bf096 100755 --- a/devspace.yaml +++ b/devspace.yaml @@ -28,6 +28,10 @@ images: image: tezos-k8s-key-importer dockerfile: ./key-importer/Dockerfile context: ./key-importer + wait-for-bootstrap: + image: tezos-k8s-wait-for-bootstrap + dockerfile: ./wait-for-bootstrap/Dockerfile + context: ./wait-for-bootstrap config-generator: image: tezos-k8s-config-generator dockerfile: ./config-generator/Dockerfile diff --git a/mkchain/tqchain/mkchain.py b/mkchain/tqchain/mkchain.py index b4b396c0b..0dc8248d7 100644 --- a/mkchain/tqchain/mkchain.py +++ b/mkchain/tqchain/mkchain.py @@ -120,7 +120,7 @@ def main(): ) exit(1) - bootstrap_accounts = [f"baker{n}" for n in range(args.number_of_bakers)] + baking_accounts = [f"baker{n}" for n in range(args.number_of_bakers)] base_constants = { "chain_name": args.chain_name, @@ -142,7 +142,7 @@ def main(): } accounts = {"secret": [], "public": []} - for account in bootstrap_accounts: + for account in baking_accounts: keys = gen_key(args.docker_image) for key_type in keys: accounts[key_type].append( @@ -153,11 +153,17 @@ def main(): } ) - creation_nodes = [ - {"bake_for": f"baker{n}"} for n in range(args.number_of_bakers) - ] + [{} for n in range(args.number_of_nodes - args.number_of_bakers)] + creation_nodes = { + "baking": [{"bake_for": f"baker{n}"} for n in range(args.number_of_bakers)], + "regular": [{} for n in range(args.number_of_nodes - args.number_of_bakers)], + } + + # first nodes are acting as bootstrap nodes for the others + creation_nodes["baking"][0]["bootstrap"] = True + if len(creation_nodes["baking"]) > 1: + creation_nodes["baking"][1]["bootstrap"] = True - invitation_nodes = [{}] + invitation_nodes = {"baking": [], "regular": [{}]} bootstrap_peers = [args.bootstrap_peer] if args.bootstrap_peer else [] diff --git a/wait-for-bootstrap/Dockerfile b/wait-for-bootstrap/Dockerfile new file mode 100644 index 000000000..7bf21b0cc --- /dev/null +++ b/wait-for-bootstrap/Dockerfile @@ -0,0 +1,5 @@ +FROM tezos/tezos:v8-release +RUN sudo apk add jq netcat-openbsd +COPY entrypoint.sh / +ENTRYPOINT ["/entrypoint.sh"] +CMD [] diff --git a/wait-for-bootstrap/entrypoint.sh b/wait-for-bootstrap/entrypoint.sh new file mode 100755 index 000000000..aaae50566 --- /dev/null +++ b/wait-for-bootstrap/entrypoint.sh @@ -0,0 +1,19 @@ +#!/bin/sh + +# When the tezos-node boots for the first time and the bootstrap node is not up yet, it will never connect. +# So at first boot (when peers.json is empty) we wait for bootstrap node. +# This is probably a bug in tezos core, though. + +if [ -e /var/tezos/node/peers.json ] && [ "$(jq length /var/tezos/node/peers.json)" -gt "0" ]; then + printf "Node already has an internal list of peers, no need to wait for bootstrap \n" + exit 0 +fi + +FIRST_BOOTSTRAP_NODE="tezos-baking-node-0.tezos-baking-node" +if [ "$(hostname -f | cut -d"." -f1-2)" == "${FIRST_BOOTSTRAP_NODE}" ]; then + printf "do not wait for myself\n" + exit 0 +fi + +# wait for node to respond to rpc +until nc -q 0 ${FIRST_BOOTSTRAP_NODE} 8732; do echo "waiting for bootstrap node to accept connections"; sleep 2; done; diff --git a/zerotier/entrypoint.sh b/zerotier/entrypoint.sh index dd05ab9a3..fa544a5fb 100755 --- a/zerotier/entrypoint.sh +++ b/zerotier/entrypoint.sh @@ -7,15 +7,6 @@ set -x supervisord -c /etc/supervisor/supervisord.conf -echo "Set zerotier name" -if [[ $(hostname) == *"bootstrap-node"* ]]; then - zerotier_name="${CHAIN_NAME}_bootstrap" - zerotier_description="Bootstrap node for chain ${CHAIN_NAME}" -else - zerotier_name="${CHAIN_NAME}_node" - zerotier_description="P2p node of chain ${CHAIN_NAME}" -fi - [ ! -z $NETWORK_ID ] && { sleep 5; zerotier-cli -D/var/tezos/zerotier join $NETWORK_ID || exit 1; } # waiting for Zerotier IP @@ -41,12 +32,13 @@ do done echo "Set zerotier name" -if [[ $(hostname) == *"bootstrap-node"* ]]; then +POD_INDEX=$(echo $POD_NAME | sed -e s/tezos-baking-node-// | sed -e s/tezos-node-//) +if grep baking <<< $POD_NAME && [ "$(echo $NODES | jq -r ".baking[${POD_INDEX}].bootstrap")" == "true" ]; then zerotier_name="${CHAIN_NAME}_bootstrap" - zerotier_description="Bootstrap node for chain ${CHAIN_NAME}" + zerotier_description="Bootstrap node ${POD_NAME} for chain ${CHAIN_NAME}" else zerotier_name="${CHAIN_NAME}_node" - zerotier_description="P2p node of chain ${CHAIN_NAME}" + zerotier_description="Node ${POD_NAME} of chain ${CHAIN_NAME}" fi curl -s -XPOST \ -H "Authorization: Bearer $ZTAUTHTOKEN" \ From cf1d22d388b6112795a7a620dd4cbadaaf928ad9 Mon Sep 17 00:00:00 2001 From: Nicolas Ochem Date: Wed, 20 Jan 2021 21:41:50 -0800 Subject: [PATCH 2/2] lint --- config-generator/entrypoint.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/config-generator/entrypoint.py b/config-generator/entrypoint.py index ea82da5ea..882c915d6 100644 --- a/config-generator/entrypoint.py +++ b/config-generator/entrypoint.py @@ -44,11 +44,15 @@ def main(): else: local_bootstrap_peers = [] for i, node in enumerate(CHAIN_PARAMS["nodes"]["baking"]): - if node.get("bootstrap", False) and f"tezos-baking-node-{i}" not in socket.gethostname(): - local_bootstrap_peers.append(f"tezos-baking-node-{i}.tezos-baking-node:9732") + if ( + node.get("bootstrap", False) + and f"tezos-baking-node-{i}" not in socket.gethostname() + ): + local_bootstrap_peers.append( + f"tezos-baking-node-{i}.tezos-baking-node:9732" + ) bootstrap_peers.extend(local_bootstrap_peers) - config_json = json.dumps( get_node_config( CHAIN_PARAMS["chain_name"],