diff --git a/assets/scss/_styles_project.scss b/assets/scss/_styles_project.scss index 52a3e49e..02812d08 100644 --- a/assets/scss/_styles_project.scss +++ b/assets/scss/_styles_project.scss @@ -1,5 +1,4 @@ @import '_home_svg.scss'; - .btn-lg, .btn-group-lg > .btn { border-radius: 6px; } @@ -437,7 +436,7 @@ nav.foldable-nav .with-child, nav.foldable-nav .without-child { stroke: none; } - + path.exit-flag { fill: $dark-blue; } @@ -508,6 +507,139 @@ nav.foldable-nav .with-child.depad { padding-left: 0; } +.start-align-labels.brupop-diagram { + .outer-label { + &.active-volume-label, + &.outer-label { + text-anchor: start; + } + } +} + +.brupop-diagram, +.brupop-state-machine { + .line-arrow-connector { + stroke-miterlimit: 10; + stroke: $dark-blue; + .connector { + + fill: none; + pointer-events : stroke; + &.dotted { + stroke-dasharray: 1 2; + } + } + .arrow-head { + fill: $dark-blue; + } + + } + + .label { + font-size: 12px; + font-family: $td-fonts-serif; + font-weight: 300; + + &.active-volume-label { + fill: $white; + text-anchor: middle; + } + &.outer-label { + fill: $dark-blue; + text-anchor: middle; + } + + } +} +.brupop-state-machine { + .state { + fill: $tan; + rx: 9; + ry: 9; + pointer-events: all; + } +} + +.brupop-diagram { + .node, + .agent, + .api-server, + .controller, + .unused-container, + .unused-volume, + .active-volume, + .line-arrow-connector .arrow-head, + .ellipses, + .future-volume, + .label-backer, + .wait { + pointer-events: all; + } + .node { + fill: $tan; + rx: 3; + } + .agent, + .api-server, + .controller, + .unused-container, + .unused-volume, + .active-volume, + .future-volume { + rx: 2; + } + .agent { + fill: $light-teal; + } + + .label-backer, + .unused-container, + .unused-volume { + fill: $white; + } + + + .active-volume { + fill: $dark-blue; + } + .future-volume { + fill: url(#stripes); + } + + .wait { + stroke-width: 2px; + stroke: $light-blue; + fill: none; + } + + .api-server { + fill: $dark-orange; + } + .controller { + fill: $light-blue; + } + .ellipses { + fill: $tan; + stroke: none; + rx: 5; + ry: 5; + } + + #stripes { + width: 7; + height: 7; + rect { + fill: $dark-blue; + } + line { + stroke: #ffffff; + opacity: 0.1; + stroke-width: 7px; + } + } + +} + /* old docs notice */ .pageinfo.olddocs { margin-left: 0; diff --git a/assets/scss/_variables_project.scss b/assets/scss/_variables_project.scss index a816efa0..8e12d8f9 100644 --- a/assets/scss/_variables_project.scss +++ b/assets/scss/_variables_project.scss @@ -1,5 +1,13 @@ + $google_font_name: "IBM Plex Sans"; -$google_font_family: "IBM+Plex+Sans+Condensed:ital,wght@0,300;0,600;1,300;1,600&family=IBM+Plex+Sans:ital,wght@0,100;0,300;0,600;1,100;1,300;1,600"; +$google_font_family: "IBM+Plex+Sans:ital,wght@0,100;0,300;0,600;1,100;1,300;1,600"; + +// this is a work around for the baked in css2 vs css call to google fonts. I don't like having two calls, but there isn't a clean way to work around this +$google_font_family_secondary: "IBM+Plex+Sans+Condensed:ital,wght@0,300;0,600;1,300;1,600"; +$web-font-path_secondary: "https://fonts.googleapis.com/css?family=#{$google_font_family_secondary}&display=swap"; +@import url($web-font-path_secondary); + + $heading_font_stack: "'IBM Plex Sans Condensed', sans-serif"; diff --git a/assets/scss/rtl/_main.scss b/assets/scss/rtl/_main.scss new file mode 100644 index 00000000..c605a3e2 --- /dev/null +++ b/assets/scss/rtl/_main.scss @@ -0,0 +1 @@ +// override. This is for RTL support, we don't need it. \ No newline at end of file diff --git a/content/en/brupop/1.3.x/_index.markdown b/content/en/brupop/1.3.x/_index.markdown new file mode 100644 index 00000000..2778aee2 --- /dev/null +++ b/content/en/brupop/1.3.x/_index.markdown @@ -0,0 +1,4 @@ ++++ +type="docs" +title="1.3.x (Current)" ++++ diff --git a/content/en/brupop/1.3.x/concepts/index.markdown b/content/en/brupop/1.3.x/concepts/index.markdown new file mode 100644 index 00000000..7fc35297 --- /dev/null +++ b/content/en/brupop/1.3.x/concepts/index.markdown @@ -0,0 +1,91 @@ ++++ +title = "Concepts" +type = "docs" +description = "Introduction to the components and concepts used in Brupop" +weight = 1 ++++ + +You can update Bottlerocket in a couple of ways: + +* **node replacement** where new instances with a new version of the OS replace nodes with older versions of the OS, +* **in-place updates** where the node downloads and reboots into a new version of the OS while maintaining the same instance/machine. + +There is no single preferred nor advised method to update a node; both methods have pros and cons depending on your situation. + +You can trigger an {{< cross-project-current-link project="os" url="/en/os/x.x.x/update/methods/in-place/#apiclient-commands">}}in-place update manually with the API{{< /cross-project-current-link >}} or you can use the Bottlerocket Update Operator (Brupop). +**Brupop is a Kubernetes operator for managing in-place updates of Bottlerocket on Kubernetes.** + +If you use Bottlerocket on ECS or intend to replace nodes in Kubernetes, Brupop is not for you. +Even if you do plan to do in-place updates Brupop is not required as you can manage in-place updates in other ways. +However, Brupop offers a declarative, automated way to manage in-place Bottlerocket updates. + +## Controlled updates + +Brupop uses the [Kubernetes controller pattern](https://kubernetes.io/docs/concepts/architecture/controller/) in an effort to safely update all the nodes whilst minimizing disruptions to workloads. +To achieve this, Brupop does the following: + +* Controls the rate and flow of updates across the entire cluster, +* First prevents new workloads from being scheduled to the node then drains existing workloads prior to updates, +* Contains and prevents the propagation of update problems when the controller detects update failures. + +{{< brupop/components-diagram >}} + +Brupop collects the state of each node with an agent. +The Brupop Agent runs in a container on each node as a [DaemonSet](https://kubernetes.io/docs/concepts/workloads/controllers/daemonset/). +This agent sends the state to an API Server. +API Server instances run in the cluster itself and communicates with the Kubernetes API to record the state as a custom resource. + +{{< brupop/agent-api-server-control-plane >}} + +{{< alert title="Bottlerocket API Server vs Brupop API Server?" color="success" >}} +Don’t confuse Bottlerocket’s {{< cross-project-current-link project="os" url="/en/os/x.x.x/concepts/api-driven/">}}API Server{{< /cross-project-current-link >}} with Brupop’s API Server, these are two distinct things, just with the same name. +In this part of the documentation, unless otherwise noted, assume that “API Server” refers to the Brupop API Server. +{{< /alert >}} + +The Controller also runs in a container on the cluster where it regularly evaluates the information about the state of each node and the cluster as a whole; based on this information it supplies instructions to the individual agents about update actions. + +{{< brupop/agent-controller-diagram >}} + +## States + +At any given point nodes are in one of five Brupop states: **idle**, **staged & performed update**, **rebooted into update**, **monitoring update** or **error reset**. +A node is never in more than one state. +The state of each node is represented as a [Kubernetes Custom Resource](https://kubernetes.io/docs/concepts/extend-kubernetes/api-extension/custom-resources/) called a `BottlerocketShadow` resource or `brs`. + +{{< brupop/state-machine >}} + +### Idle + +A node in the **idle** state does not have a pending update in-process. +Most of the time your nodes will remain in this state. + +### Staged & Performed Update + +{{< brupop/staged-and-performed >}} + +Bottlerocket uses multiple partitions to manage in-place updates. +The OS runs from one partition and, when a new update is available, the update is downloaded and installed into the other. +The Brupop controller periodically requests the agent to check for and download the most recent version of Bottlerocket. +Once downloaded, Bottlerocket modifies the bootloader configuration to boot from the partition with the update and the agent changes the state to **Staged & Performed Update** with the Brupop API server. + +### Reboot into Update + +{{< brupop/reboot-into-update >}} + +To minimize disruptions to the workloads running in the cluster, the controller signals to Kubernetes to prevent new workloads from being scheduled on to the node as well as shut down existing workloads (drain). +Once drained, the agent triggers a reboot into the new OS and changes the state to **Rebooted Into Update** with the Brupop API server. + +### Monitoring Update + +{{< brupop/monitoring >}} + +Once the node reboots the update is technically complete, however the time whilst all your workloads startup is critical. +Bottlerocket’s versioning and variant scheme is built to mitigate incompatibilities between OS versions, there is always a chance that an unforeseen incompatibility exists with some component of your architecture. +Brupop’s state machine has a reserved state for monitoring these incompatibilities (**Monitoring Updates**), however as of this version, this state is a noop. +You can suggest a direction for this state on the [Brupop GitHub Repo](https://github.com/bottlerocket-os/bottlerocket-update-operator/issues/new?assignees=&labels=&projects=&template=issue.md&title=Suggestion%20for%20monitoring%20state). + +Consequently, the Agent immediately transitions through **Monitoring Updates** back to **Idle** with the API server. + +### Error Reset + +In the situation that any of the above states fail, the state becomes **Error Reset** before transitioning back to **Idle**. diff --git a/content/en/brupop/1.3.x/operate/index.markdown b/content/en/brupop/1.3.x/operate/index.markdown new file mode 100644 index 00000000..39181437 --- /dev/null +++ b/content/en/brupop/1.3.x/operate/index.markdown @@ -0,0 +1,35 @@ ++++ +type = "docs" +title = "Operate & Observe" +weight = 10 +description = "Understanding the day-to-day use of Brupop" ++++ + +After installation on your cluster Brupop runs in the background and generally requires no intervention. +Your nodes will check for updates and apply them according to your configuration and the Bottlerocket update waves. + +However, you can observe the status of the updates by [adhoc query](#adhoc-query) or setup [on-going monitoring](#on-going-monitoring). + +## Adhoc Query + +If you want to see the update status of your nodes, use `kubectl` to get the custom resource `brs` : + +```shell +kubectl get brs --namespace brupop-bottlerocket-aws +``` + +`kubectl` returns the [state](../concepts/#states), current version, target state, and target version. +For example: + +```shell +AME STATE VERSION TARGET STATE TARGET VERSION +brs-node-1 Idle 1.17.0 Idle +brs-node-2 Idle 1.17.0 StagedUpdate 1.18.0 +``` + +## On-going monitoring + +To facilitate on-going monitoring the Brupop API server and controller provides you with metrics endpoints (`/metrics`) compatible with [Prometheus](https://prometheus.io/). +The metrics endpoints expose two metrics: one that describes the current version of each node (`brupop_hosts_version`) and another for the [state](../concepts/#states) of each node (`brupop_hosts_state`). + +For a sample configuration of using Prometheus with Brupop see the {{< github-link-at-version url="https://github.com/bottlerocket-os/bottlerocket-update-operator/blob/vx.x.x/deploy/examples/prometheus-resources.yaml" project="brupop" >}}configuration on the Brupop GitHub Repo{{}}. diff --git a/content/en/brupop/1.3.x/setup/_index.markdown b/content/en/brupop/1.3.x/setup/_index.markdown new file mode 100644 index 00000000..87be0632 --- /dev/null +++ b/content/en/brupop/1.3.x/setup/_index.markdown @@ -0,0 +1,14 @@ ++++ +type = "docs" +title = "Setup" +weight = 5 +description = "Steps to use and configure Brupop on your Bottlerocket nodes" ++++ + +Setting up Brupop for the first time has three major steps: + +- Installing the prerequisite, `cert-manager` on your cluster, +- Installing Brupop itself, +- Labeling the nodes you want to update with Brupop. + +Many clusters require nothing more than the three above steps, but familiarize yourself with the additional configuration options before installing as you may need to tweak the configuration for your particular needs. diff --git a/content/en/brupop/1.3.x/setup/cert-manager/index.markdown b/content/en/brupop/1.3.x/setup/cert-manager/index.markdown new file mode 100644 index 00000000..8324f870 --- /dev/null +++ b/content/en/brupop/1.3.x/setup/cert-manager/index.markdown @@ -0,0 +1,50 @@ ++++ +title = "Prerequisite: cert-manager" +type = "docs" +description = "Prepare your cluster for Brupop" +weight = 1 ++++ + +Brupop uses [cert-manager](https://cert-manager.io/) to manage self-signed certificates. +You can install it with `kubectl` or [helm](https://helm.sh/). + +{{% alert title="Note" color="success" %}} +This guide uses the most recent release of `cert-manager`, {{< brupop/cert-manager-version >}}, but there is no particular hard dependency on this version. +{{% /alert %}} + +## Installing `cert-manager` using `kubectl` + +Use `kubectl` to install cert-manager: + +```shell +kubectl apply -f https://github.com/cert-manager/cert-manager/releases/download/v{{< brupop/cert-manager-version >}}/cert-manager.yaml +``` + +## Installing `cert-manager` using `helm` + +First, add the `cert-manager` helm chart: + +```shell +helm repo add jetstack https://charts.jetstack.io +``` + +Then update your local chart: + +```shell +helm repo update +``` + +Finally, install `cert-manager` including its CRDs: + +```shell +helm install \ + cert-manager jetstack/cert-manager \ + --namespace cert-manager \ + --create-namespace \ + --version v{{< brupop/cert-manager-version >}} \ + --set installCRDs=true +``` + +## Next step + +After installing `cert-manager`, go ahead and [install Brupop itself](../install/). diff --git a/content/en/brupop/1.3.x/setup/configure/index.markdown b/content/en/brupop/1.3.x/setup/configure/index.markdown new file mode 100644 index 00000000..7a463a45 --- /dev/null +++ b/content/en/brupop/1.3.x/setup/configure/index.markdown @@ -0,0 +1,285 @@ ++++ +title = "Configure Brupop" +type = "docs" +description = "Making the operator work for your needs" +weight = 30 ++++ + +When you install Brupop, the operator comes pre-configured with reasonable defaults. +[Labeling your nodes](#label-nodes) is the only required configuration step. + +Aside from labeling nodes, you configure Brupop with [helm](https://helm.sh/) or with a manifest. +Helm reduces the configuration burden for Brupop substantially with few down sides, so this documentation focuses on configuration with helm. +If you choose to not use helm, refer to the {{< github-link-at-version url="https://github.com/bottlerocket-os/bottlerocket-update-operator/blob/vx.x.x/bottlerocket-update-operator.yaml" project="brupop" >}}pre-baked manifest for an example{{< /github-link-at-version >}}. + +## Required Configuration + +### Label nodes + +{{% alert title="Warning" color="warning" %}} +You can fully install Brupop but if you do not apply the proper node labels the operator will not update your nodes. +{{% /alert %}} + +[Kubernetes node labels](https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/) control which nodes Brupop updates; specifically, the label `bottlerocket.aws/updater-interface-version=2.0.0` dictates which nodes in the cluster get automatic updates. + +You can label nodes using {{< cross-project-current-link url="/en/os/x.x.x/api/settings/kubernetes/#node-labels" >}}`settings.kubernetes.node-labels`{{}} with TOML {{< cross-project-current-link url="/en/os/x.x.x/concepts/api-driven/#user-data" >}}(including instance user data){{}}, using `apiclient` in a host container, or `kubectl`. + +#### Label a node with `apiclient` + +From the control or admin container, run the following: + +```shell +apiclient set settings.kubernetes.node-labels.bottlerocket.aws/updater-interface-version=2.0.0 +``` + +#### Label all nodes when starting an EKS cluster with `eksctl` + +```yaml +... +nodeGroups: + - name: name-of-your-nodegroup + labels: { bottlerocket.aws/updater-interface-version: 2.0.0 } +... +``` + +#### Labeling nodes with `kubectl` + +#### Label a single node + +```shell +# replace MY_NODE_NAME with the name of your node +kubectl label node MY_NODE_NAME bottlerocket.aws/updater-interface-version=2.0.0 +``` + +##### Label all nodes + +If you are running Bottlerocket on all nodes in your cluster, you can use `kubectl` to label all nodes at once: + +```shell +kubectl label node $(kubectl get nodes -o jsonpath='{.items[*].metadata.name}') bottlerocket.aws/updater-interface-version=2.0.0 +``` + +#### Labeling a node with the Bottlerocket API + +Add the following TOML to your instance user data: + +```TOML +... +[settings.kubernetes.node-labels] +"bottlerocket.aws/updater-interface-version" = "2.0.0" +... +``` + +## Optional Configuration + +### API Server Ports + +__Helm Configuration__: `apiserver_internal_port` for internal traffic, `apiserver_service_port` for node agent traffic. + +Brupop uses two ports for [communication between components](../../concepts/#controlled-updates): `apiserver_internal_port` for the controller and the [`BottlerocketShadow` custom resource](../../concepts/#states) and the `apiserver_service_port` for the conversion webhook. +Refer to the the +{{< github-link-at-version project="brupop" url="https://github.com/bottlerocket-os/bottlerocket-update-operator/blob/vx.x.x/bottlerocket-update-operator.yaml">}} manifest {{< / github-link-at-version >}} for more information on the usage of each port. + +By default, the operator’s API server uses port `8443` for internal traffic and port `443` for node agents, but you can change these ports via this configuration. +Both ports must be set or the operator will fail to start. + +Example: + +```YAML +apiserver_internal_port: "8443" +``` + +--- + +### Concurrent Updates + +__Helm Configuration__: `max_concurrent_updates` + +You can set the maximum concurrency of updates that Brupop will perform. +You either set a specific number of concurrent updates or, alternately, `"unlimited"` to update as many nodes as possible concurrently. +In either case, Brupop always respects [`PodDisruptionBudget`](https://kubernetes.io/docs/tasks/run-application/configure-pdb/). + +{{% alert title="Conflicts between load balancing and concurrency" color="warning" %}} +Take caution when setting concurrency and [excluding load balancers](#load-balancer-exclusion) together, as misconfiguration can result in a condition where all nodes exclude load balancing and can never drain fully to complete the update. +Setting up `PodDisruptionBudget` guards against this condition. +{{% /alert %}} + +Example: + +```yaml +max_concurrent_updates: "1" +``` + +--- + +### Namespace + +__Helm Configuration__: `brupop-bottlerocket-aws` + +You can change the namespace where Kubernetes deploys Brupop (default: `brupop-bottlerocket-aws`). + +Example: + +```yaml +namespace: "brupop-bottlerocket-aws" +``` + +--- + +### Load balancer exclusion + +__Helm Configuration__: `exclude_from_lb_wait_time_in_sec` + +With this option, you control the exclusion of the node from load balancing and delays draining the node for the number of seconds specified. +Internally, Brupop uses [`node.kubernetes.io/exclude-from-external-load-balancers`](https://kubernetes.io/docs/reference/labels-annotations-taints/#node-kubernetes-io-exclude-from-external-load-balancers) to exclude the node from load balancing. + +See [Concurrent Updates](#concurrent-updates) for an important warning about concurrency and load balancer exclusion. + +Example: + +```yaml +exclude_from_lb_wait_time_in_sec: "0" +``` + +--- + +### Logging + +Brupop emits logs from the controller, agent, and API server through standard Kubernetes logging mechanisms but you configure the log format and filter. + +#### Format + +__Helm Configuration__: `logging.formatter` + +Log formatting has four options: + +- `full`: Human-readable, single-line logs, +- `compact`: A shorter version of `full`, +- `pretty`: "Excessively pretty", terminal-optimized human-readable logs (default), +- `json`: New line-delimited JSON-formatted (machine-readable) logs. + +Example: + +```yaml +logging: + formatter: "pretty" +``` + +#### Colours + +__Helm Configuration__: `logging.ansi_enabled` + +You can optionally set the logs to add ANSI colour information (`true`/`false`), which is helpful if viewing in a terminal, but adds garbage characters for non-terminal logging utilities. + +Example: + +```yaml +logging: + ansi_enabled: "true" +``` + +#### Filter + +__Helm Configuration__: The controller, agent, and API server are configured via`logging.controller.tracing_filter`, `logging.agent.tracing_filter`, and `logging.apiserver.tracing_filter` (respectively). + +Log filtering accepts on both typical log levels (`info` (default), `debug`, `error`) or through [filter directives](https://docs.rs/tracing-subscriber/0.3.17/tracing_subscriber/filter/struct.EnvFilter.html#directives). + +Example: + +```yaml + controller: + tracing_filter: "info" + agent: + tracing_filter: "debug" + apiserver: + tracing_filter: "error" +``` + +--- + +### Placement + +__Helm Configuration__: `placement.agent`, `placement.controller`, `placement.apiserver` + +With these configurations, you can control the [tolerations](https://kubernetes.io/docs/concepts/scheduling-eviction/taint-and-toleration/) for the agent, controller and API server. +For the controller and API server you can also control the [node selector](https://kubernetes.io/docs/concepts/scheduling-eviction/assign-pod-node/#nodeselector), and [pod affinitiy and anti-affinity](https://kubernetes.io/docs/concepts/scheduling-eviction/assign-pod-node/#affinity-and-anti-affinity). + +Example: + +```yaml +# Placement controls +# See the Kubernetes documentation about placement controls for more details: +# * https://kubernetes.io/docs/concepts/scheduling-eviction/taint-and-toleration/ +# * https://kubernetes.io/docs/concepts/scheduling-eviction/assign-pod-node/#nodeselector +# * https://kubernetes.io/docs/concepts/scheduling-eviction/assign-pod-node/#affinity-and-anti-affinity +placement: + agent: + # The agent is a daemonset, so the only controls that apply to it are tolerations. + tolerations: [] + + controller: + tolerations: [] + nodeSelector: {} + podAffinity: {} + podAntiAffinity: {} + + apiserver: + tolerations: [] + nodeSelector: {} + podAffinity: {} + # By default, apiserver pods prefer not to be scheduled to the same node. + podAntiAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 1 + podAffinityTerm: + labelSelector: + matchExpressions: + - key: brupop.bottlerocket.aws/component + operator: In + values: + - apiserver + topologyKey: kubernetes.io/hostname +``` + +--- + +### Private Image Registry + +__Helm Configuration__: `image_pull_secrets` + +If you are testing Brupop with a private image registry, you can configure pull secrets to fetch images. + +Example: + +```yaml +image_pull_secrets: + - name: "brupop" +``` + +--- + +### Scheduling + +__Helm Configuration__: `scheduler_cron_expression` + +Brupop schedules node updates based on a cron expression in the following format: + +```text + ┌───────────── seconds (0 - 59) + │ ┌───────────── minute (0 - 59) + │ │ ┌───────────── hour (0 - 23) + │ │ │ ┌───────────── day of the month (1 - 31) + │ │ │ │ ┌───────────── month (Jan, Feb, Mar, Apr, Jun, Jul, Aug, Sep, Oct, Nov, Dec) + │ │ │ │ │ ┌───────────── day of the week (Mon, Tue, Wed, Thu, Fri, Sat, Sun) + │ │ │ │ │ │ ┌───────────── year (formatted as YYYY) + │ │ │ │ │ │ │ + │ │ │ │ │ │ │ + * * * * * * * +``` + +Example: + +```yaml +# Every day at 3 AM +scheduler_cron_expression: "* * 3 * * * *" +``` diff --git a/content/en/brupop/1.3.x/setup/install/index.markdown b/content/en/brupop/1.3.x/setup/install/index.markdown new file mode 100644 index 00000000..9019d950 --- /dev/null +++ b/content/en/brupop/1.3.x/setup/install/index.markdown @@ -0,0 +1,60 @@ ++++ +title = "Install Brupop" +type = "docs" +description = "Install the Bottlerocket Update Operator on your Kubernetes cluster" +weight = 10 ++++ + +Installing Brupop creates the custom resource definitions (CRDs), roles, and deployments and uses the latest operator image from [Amazon ECR Public](https://gallery.ecr.aws/bottlerocket/bottlerocket-update-operator). + +You can install Brupop either [with `helm`](#install-with-helm) or a [pre-baked manifest](#install-with-a-manifest). + +## Install with `helm` + +First, using [helm](https://helm.sh/) add the `bottlerocket-operator-chart` + +```shell +helm repo add brupop https://bottlerocket-os.github.io/bottlerocket-update-operator +``` + +Then update your local chart: + +```shell +helm repo update +``` + +Create a namespace for the operator: + +```shell +kubectl create namespace brupop-bottlerocket-aws +``` + +Next, install the Brupop custom resource definition: + +```shell +helm install brupop-crd brupop/bottlerocket-shadow +``` + +Finally, install the operator itself: + +```shell +helm install brupop-operator brupop/bottlerocket-update-operator +``` + +After you've installed the operator, you can move on to the next step: [configuring Brupop](../configure/). + +## Install with a Manifest + +First, }}">download the manifest from the release to your local machine and run the following: + +```shell +kubectl apply -f bottlerocket-update-operator-v{{< current-version project="brupop" >}}.yaml +``` + +Alternately, you can point `kubectl` directly at the manifest URL. + +```shell +kubectl apply -f https://github.com/bottlerocket-os/bottlerocket-update-operator/releases/download/v{{< current-version project="brupop" >}}/bottlerocket-update-operator-v{{< current-version project="brupop" >}}.yaml +``` + +After you've installed the operator, you can move on to the next step: [configuring Brupop](../configure/). diff --git a/content/en/brupop/1.3.x/troubleshoot/index.markdown b/content/en/brupop/1.3.x/troubleshoot/index.markdown new file mode 100644 index 00000000..b2c7c61c --- /dev/null +++ b/content/en/brupop/1.3.x/troubleshoot/index.markdown @@ -0,0 +1,74 @@ ++++ +type = "docs" +title = "Troubleshoot" +weight = 30 +description = "Debugging and solving Brupop problems" ++++ + +## Debugging information + +Brupop’s components emit useful logs for debugging and troubleshooting. + +### API Server deployment logs + +Searching through the API Server’s deployment logs for a particular Node ID will yield the mutations to the node. +Assuming the default namespace you can retrieve these by running: + +```shell +kubectl logs deployment/brupop-apiserver --namespace brupop-bottlerocket-aws +``` + +### Agent logs + +Logs from the agent show the specific update actions taken on a particular node. + +First, find the node in the list of the Brupop agent pods (assuming the default namespace): + +```shell +kubectl get pods --selector=brupop.bottlerocket.aws/component=agent -o wide --namespace brupop-bottlerocket-aws +``` + +From this list get the logs for the agent you’re troubleshooting by replacing `` with the node name from the previous step. + +```shell +kubectl logs --namespace brupop-bottlerocket-aws +``` + +## Common Issues + +### Stuck Updates + +When one or more nodes do not progress through the states and return to idle it is a "stuck update." By default, Brupop only updates one node so a single node can prevent nodes across the cluster from updating. + +There are a few potential causes of stuck updates: + +1. Pod Disruption Budget preventing a node drain. +Brupop uses the Kubernetes Eviction API to drain pods from a node. +It’s possible to have Pod Disruption Budgets configured (often mistakenly) to disallow a pod removal resulting in a un-drainable node that Brupop cannot update. + **Troubleshooting step:** Check your pod disruption budget configuration. +2. Unable to access `updates.bottlerocket.aws`. +Bottlerocket needs to access metadata from a public endpoint to get information about the most recent release. +Production environments may limit this type of outbound access. +**Troubleshooting step:** Log into the control container of a node and run `apiclient update check`. +Failures with this check indicate an outbound block. +**Potential solution:** Scrape the contents of `updates.bottlerocket.aws` with [`Tuftool`](https://github.com/awslabs/tough/tree/develop/tuftool#download-tuf-repo) and serve from within your cluster, then update your settings accordingly for {{< setting-reference setting="settings.updates.metadata-base-url" current_version="true">}}settings.updates.metadata-base-url{{}} and {{< setting-reference setting="settings.updates.targets-base-url" current_version="true">}}settings.updates.targets-base-url{{}}. + +3. Other issues while updating. +**Troubleshooting step:** Check the agent logs for the stuck node. + +### Bottlerocket instances start with an old version of Bottlerocket + +After using Brupop for a while you may notice that any brand new nodes added to the cluster start with an older version of Bottlerocket then Brupop flags them for an update almost immediately. +Brupop can only update existing nodes and it doesn’t manage the node creation process. +Depending on how you created your nodes determines how to address this issue: + +* **Auto-scaling group**: update your AMI ID in the launch configuration or template. +* **Manual creation of nodes with AWS CLI**: Update the `image-id` argument to the latest AMI ID +* **VMware**: Change the `target-name` argument when downloading the OVA with tuftool + +## Related + +* [Bottlerocket FAQ](/en/faq) + - [Why do some of the nodes in my cluster have an update available and others do not?](/en/faq/#7_3) + - [Why are my nodes egressing to `updates.bottlerocket.aws`?](/en/faq/#7_2) +* [Log Configuration](../setup/configure/#logging) diff --git a/content/en/brupop/1.3.x/uninstall/_index.markdown b/content/en/brupop/1.3.x/uninstall/_index.markdown new file mode 100644 index 00000000..6cdbfb02 --- /dev/null +++ b/content/en/brupop/1.3.x/uninstall/_index.markdown @@ -0,0 +1,23 @@ ++++ +type = "docs" +title = "Disable/Uninstall" +weight = 90 +description = "Removing Brupop from nodes or your cluster" ++++ + +You can disable Brupop from managing some or all nodes of your cluster as well as fully remove it from your cluster. + +## Disabling Brupop on nodes + +Brupop will only manage updates for the nodes you’ve labeled `bottlerocket.aws/updater-interface-version=2.0.0`. +Consequently, if you remove the label, Brupop will no longer manage the node updates. +See the [Kubectl `label` docs](https://kubernetes.io/docs/reference/generated/kubectl/kubectl-commands#label) for more information on removing a label. + +## Uninstalling Brupop + +To fully remove Brupop from your cluster, execute the following [helm](https://helm.sh/) uninstall operations on your cluster: + +```shell +helm uninstall brupop +helm uninstall brupop-crd +``` diff --git a/content/en/brupop/_index.markdown b/content/en/brupop/_index.markdown new file mode 100644 index 00000000..06770508 --- /dev/null +++ b/content/en/brupop/_index.markdown @@ -0,0 +1,32 @@ ++++ +type="docs" +title="Brupop" +description="Documentation for the Bottlerocket Update Operator (Brupop)" +body_class="suppress_section_listing" +no_version_warning=true ++++ + + +This section covers installing and using the Bottlerocket Update Operator only. +If you’re seeking general information about Bottlerocket updates, {{< cross-project-current-link project="os" url="/en/os/x.x.x/update/" >}}check the Updating documentation for the OS{{< /cross-project-current-link >}}. + +If you’re looking for information on building, contributing to, or learning about the inner workings of Brupop, the [GitHub repo](https://github.com/bottlerocket-os/bottlerocket-update-operator) is a better destination. + +## Organization + +The Brupop documentation is organized by minor version, with each minor release getting it’s own namespaced, version-specific section. +Inside each version-specific sections are subsections which address specific tasks or categories of information. + +The current documented versions: + +{{< subsections-list >}} + +## Version & Update Policy + +Brupop follows semantic ([semver](https://semver.org/)) versioning to ensure that minor (e.g. `1.1.1` -> `1.2.0`) or patch (e.g. `1.1.0` -> `1.1.1`) updates do not introduce any breaking or incompatible changes. +However, patches are only provided to the latest version, so you should keep your Brupop installation up to date with the latest release. + +## Something Missing? + +This [documentation is open-source](https://github.com/bottlerocket-os/bottlerocket-project-website/tree/main/content/en/brupop) and likely incomplete, but will evolve over time to encompass a more complete explanation of the software. +Should you find gaps, you’re invited to file issues or contribute. diff --git a/content/en/faq/_index.markdown b/content/en/faq/_index.markdown index 966e9072..4b21b6cb 100644 --- a/content/en/faq/_index.markdown +++ b/content/en/faq/_index.markdown @@ -1,6 +1,7 @@ +++ type="docs" title="FAQ" +weight=1 +++ {{< faqlist >}} diff --git a/content/en/faqitems/7_2-why-updates-bottlerocket-aws.markdown b/content/en/faqitems/7_2-why-updates-bottlerocket-aws.markdown new file mode 100644 index 00000000..460bb82e --- /dev/null +++ b/content/en/faqitems/7_2-why-updates-bottlerocket-aws.markdown @@ -0,0 +1,7 @@ ++++ +question = "Why are my nodes egressing to `updates.bottlerocket.aws`?" +group = "Updates" ++++ + +The [Bottlerocket Updater API](https://github.com/bottlerocket-os/bottlerocket/blob/develop/sources/updater/README.md) uses TUF metadata served from a public endpoint. +The default AWS variants endpoint is `updates.bottlerocket.aws`. diff --git a/content/en/faqitems/7_3-not_all_nodes_have_available_update.markdown b/content/en/faqitems/7_3-not_all_nodes_have_available_update.markdown new file mode 100644 index 00000000..493a4734 --- /dev/null +++ b/content/en/faqitems/7_3-not_all_nodes_have_available_update.markdown @@ -0,0 +1,8 @@ ++++ +question = "Why do some of the nodes in my cluster have an update available and others do not?" +group = "Updates" ++++ +This is normal. +Bottlerocket uses "waves" to stagger deployment of updates. +When a node starts for the first time, the boot process generates a random seed (or uses the value from {{< setting-reference setting="settings.updates.seed" current_version="true">}}settings.updates.seed{{}}). +Bottlerocket's update process uses the seed to determine if a node should update, so in the situation where some of your nodes have an available update and some do not, it just means that the update wave hasn't reached that seed of some nodes and it has for the others. diff --git a/content/en/os/_index.markdown b/content/en/os/_index.markdown index 03876fe1..58b1ae8e 100644 --- a/content/en/os/_index.markdown +++ b/content/en/os/_index.markdown @@ -4,6 +4,7 @@ type="docs" description="Documentation for the Bottlerocket operating system" body_class="suppress_section_listing" no_version_warning=true +weight=2 +++ This section covers installing and using the Bottlerocket operating system[^1]. If you’re looking for information on building, contributing to, or learning about the inner workings of Bottlerocket, the [GitHub repo](https://github.com/bottlerocket-os/bottlerocket) has more information. diff --git a/data/versions/current.toml b/data/versions/current.toml index 9521b6c6..239a2cac 100644 --- a/data/versions/current.toml +++ b/data/versions/current.toml @@ -7,4 +7,10 @@ [k8s] versions = ["1.23","1.24","1.25","1.26","1.27","1.28","1.29"] [ecs] - versions = ["1","2"] \ No newline at end of file + versions = ["1","2"] + +[brupop] + major = 1 + minor = 3 + patch = 0 + cert_manager = "1.14.1" diff --git a/layouts/partials/faq-body.html b/layouts/partials/faq-body.html index 3455fe91..773f43fb 100644 --- a/layouts/partials/faq-body.html +++ b/layouts/partials/faq-body.html @@ -6,7 +6,7 @@

{{ $group_name }}

{{- range (sort (index $questions $group_name) "question" ) -}} -

{{ .question }}

+

{{ .question | markdownify }}

{{ .answer }} {{- end -}}
diff --git a/layouts/partials/faq-index.html b/layouts/partials/faq-index.html index 2ac6cbc3..43dc421b 100644 --- a/layouts/partials/faq-index.html +++ b/layouts/partials/faq-index.html @@ -6,7 +6,7 @@
    {{- range (sort (index $questions $group_name) "question" ) -}}
  1. - {{ .question }} + {{ .question | markdownify }}
  2. {{- end -}}
diff --git a/layouts/shortcodes/brupop/agent-api-server-control-plane.html b/layouts/shortcodes/brupop/agent-api-server-control-plane.html new file mode 100644 index 00000000..2ed474ea --- /dev/null +++ b/layouts/shortcodes/brupop/agent-api-server-control-plane.html @@ -0,0 +1,54 @@ + + + + + + + + + + + + + + Bottlerocket Host + + + + + + + + + + + + + + + + + + + Kubernetes Control Plane + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/layouts/shortcodes/brupop/agent-controller-diagram.html b/layouts/shortcodes/brupop/agent-controller-diagram.html new file mode 100644 index 00000000..a9df83b0 --- /dev/null +++ b/layouts/shortcodes/brupop/agent-controller-diagram.html @@ -0,0 +1,51 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Bottlerocket Host + + + + + + + + + + + + + Kubernetes Control Plane + + + + \ No newline at end of file diff --git a/layouts/shortcodes/brupop/cert-manager-version.html b/layouts/shortcodes/brupop/cert-manager-version.html new file mode 100644 index 00000000..b406627e --- /dev/null +++ b/layouts/shortcodes/brupop/cert-manager-version.html @@ -0,0 +1 @@ +{{ $.Site.Data.versions.current.brupop.cert_manager }} \ No newline at end of file diff --git a/layouts/shortcodes/brupop/components-diagram.html b/layouts/shortcodes/brupop/components-diagram.html new file mode 100644 index 00000000..d3ca900f --- /dev/null +++ b/layouts/shortcodes/brupop/components-diagram.html @@ -0,0 +1,82 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Brupop Agent + (1x / node) + + + + + + + + + + + + + + + + API Server + (Default: + 3x / cluster) + + + + + + + + + + Controller + (1x / cluster) + + + \ No newline at end of file diff --git a/layouts/shortcodes/brupop/monitoring.html b/layouts/shortcodes/brupop/monitoring.html new file mode 100644 index 00000000..3efb50a9 --- /dev/null +++ b/layouts/shortcodes/brupop/monitoring.html @@ -0,0 +1,70 @@ + + + + + + + + + + + + + Prev Version + + + + + New Running Version + + + + Node 1 + + + + + + + + + + + + + + + + + + + + + + + + + + + Prev Version + + + + + New Running Version + + + + Node 1 + + + + Update state to + + + + Monitoring Update + + + \ No newline at end of file diff --git a/layouts/shortcodes/brupop/reboot-into-update.html b/layouts/shortcodes/brupop/reboot-into-update.html new file mode 100644 index 00000000..7dc2b69d --- /dev/null +++ b/layouts/shortcodes/brupop/reboot-into-update.html @@ -0,0 +1,94 @@ + + + + + + + + + + + + + + + + + + + New Version + + + + + Running Version + + + + + + + + + + + + + + + + + + Prev Version + + + + + + New Running Version + + + + + + + + + + + + + Reboot + + + + Node 1 + + + + Node 1 + + + + Update state to + + + + Rebooted Into Update + + + + + + + + + Stop new and drain existing worloads + + + \ No newline at end of file diff --git a/layouts/shortcodes/brupop/staged-and-performed.html b/layouts/shortcodes/brupop/staged-and-performed.html new file mode 100644 index 00000000..a2d6fb52 --- /dev/null +++ b/layouts/shortcodes/brupop/staged-and-performed.html @@ -0,0 +1,59 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + New Version + + + + Running Version + + + + + Download + + + + + + + Make + Boot Partition + + + + + Update state to + Staged & Performed + + + + \ No newline at end of file diff --git a/layouts/shortcodes/brupop/state-machine.html b/layouts/shortcodes/brupop/state-machine.html new file mode 100644 index 00000000..ed24af03 --- /dev/null +++ b/layouts/shortcodes/brupop/state-machine.html @@ -0,0 +1,52 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Idle + + + Staged & Performed + Update + + + Rebooted + into Update + + + Monitoring + Update + + + + Error Reset + + \ No newline at end of file diff --git a/layouts/shortcodes/cross-project-current-link.html b/layouts/shortcodes/cross-project-current-link.html new file mode 100644 index 00000000..a6b9d6af --- /dev/null +++ b/layouts/shortcodes/cross-project-current-link.html @@ -0,0 +1,10 @@ +{{- $project := .Get "project" | default "os" -}} +{{- $url_arg := .Get "url" -}} +{{- $current_version_data := $.Site.Data.versions.current -}} +{{- $v := index $current_version_data $project -}} + +{{- $new_url := print "/" $v.major "." $v.minor ".x/" -}} + +{{- $url := replace $url_arg "/x.x.x/" $new_url }} + +{{ .Inner | markdownify }} diff --git a/layouts/shortcodes/current-version.html b/layouts/shortcodes/current-version.html new file mode 100644 index 00000000..679a2009 --- /dev/null +++ b/layouts/shortcodes/current-version.html @@ -0,0 +1,8 @@ +{{- $project := .Get "project" | default "os" -}} +{{- $minor_override_value := .Get "minor_override" -}} +{{- $patch_override_value := .Get "patch_override" -}} +{{- $seperator := default "." (.Get "seperator_override") }} +{{- $version_data := $.Site.Data.versions.current -}} +{{- $project_data := index $version_data $project -}} + +{{ $project_data.major }}{{ $seperator }}{{ default $project_data.minor $minor_override_value }}{{ $seperator }}{{ default $project_data.patch $patch_override_value }} \ No newline at end of file diff --git a/layouts/shortcodes/github-link-at-version.html b/layouts/shortcodes/github-link-at-version.html new file mode 100644 index 00000000..06f61ea1 --- /dev/null +++ b/layouts/shortcodes/github-link-at-version.html @@ -0,0 +1,11 @@ +{{- $project := .Get "project" | default "os" -}} +{{- $url_arg := .Get "url" -}} +{{- $replace := .Get "replace" | default "/vx.x.x/" -}} +{{- $current_version_data := $.Site.Data.versions.current -}} +{{- $v := index $current_version_data $project -}} + +{{- $new_url := print "/v" $v.major "." $v.minor "." $v.patch "/" -}} + +{{- $url := replace $url_arg $replace $new_url }} + +{{ .Inner | markdownify }} diff --git a/layouts/shortcodes/setting-reference.html b/layouts/shortcodes/setting-reference.html index 6e501297..bb29055a 100644 --- a/layouts/shortcodes/setting-reference.html +++ b/layouts/shortcodes/setting-reference.html @@ -2,9 +2,17 @@ {{- $setting := or (.Get "setting") $.Inner -}} {{- $setting_parts := strings.Split $setting "." -}} {{- $ref := index $setting_parts 1 -}} -{{- $current_path := print .Page.File.Dir -}} -{{- $parts := split $current_path "/" -}} -{{- $version := index $parts 1 -}} +{{- $version := "" -}} +{{- if (.Get "current_version") -}} + {{- $versions := index $.Site.Data.versions.current "os" -}} + {{/* create the version string (e.g. `1.14.x`) */}} + {{- $version = print $versions.major "." $versions.minor ".x" -}} +{{- else -}} + {{- $current_path := print .Page.File.Dir -}} + + {{- $parts := split $current_path "/" -}} + {{- $version = index $parts 1 -}} +{{- end -}} {{- $lang := print $.Page.Language -}} {{- $settings_at_version := index (index .Site.Data.settings $version) $ref }} {{- $page := . -}}