From 269ff589e262afb018852798f55a0ef0fcd31bcb Mon Sep 17 00:00:00 2001 From: Frode Nordahl Date: Mon, 19 Aug 2024 18:32:26 +0200 Subject: [PATCH 01/10] tests: Export microovn_extract_ctn_n. This function is useful outisde the `microovn.bash` file. Signed-off-by: Frode Nordahl --- tests/test_helper/microovn.bash | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/tests/test_helper/microovn.bash b/tests/test_helper/microovn.bash index 0c2ee344..d8587fe1 100644 --- a/tests/test_helper/microovn.bash +++ b/tests/test_helper/microovn.bash @@ -553,12 +553,18 @@ MICROOVN_PREFIX_LRP=lrp-sw MICROOVN_SUFFIX_LRP_LSP=lrp -function microovn_extract_ctn_n__() { +# microovn_extract_ctn_n CONTAINER +# +# When CONTAINER is a string ending with "-n" this function will extract and +# validate that n is an integer before printing it. +# +# Note that it is up to the caller to ensure that the prerequisite mentioned +# above is met. +function microovn_extract_ctn_n() { local container=$1; shift local n=${container##*-} assert test "$n" -ge 0 - assert test "$n" -le 9 echo "$n" } @@ -575,7 +581,8 @@ function microovn_add_gw_router() { local container=$1; shift local n - n=$(microovn_extract_ctn_n__ "$container") + n=$(microovn_extract_ctn_n "$container") + assert test "$n" -le 9 local ls_name="${MICROOVN_PREFIX_LS}-${container}" local lr_name="${MICROOVN_PREFIX_LR}-${container}" local lrp_name="${MICROOVN_PREFIX_LRP}-${container}" @@ -650,7 +657,8 @@ function microovn_add_vif() { local if_name=$1; shift local n - n=$(microovn_extract_ctn_n__ "$container") + n=$(microovn_extract_ctn_n "$container") + assert test "$n" -le 9 local lladdr="00:00:02:00:01:0$n" local cidr="10.42.$n.10/24" local ls_name="${MICROOVN_PREFIX_LS}-${container}" From 165289ecac539948225f0bd8a0e6949fe9c5b3c5 Mon Sep 17 00:00:00 2001 From: Frode Nordahl Date: Mon, 19 Aug 2024 18:38:05 +0200 Subject: [PATCH 02/10] tests: Make microovn mac/ip generation smarter. The current code make silly assumptions such as no more than 9 containers. Handle up to 255 containers. Signed-off-by: Frode Nordahl --- tests/test_helper/microovn.bash | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/tests/test_helper/microovn.bash b/tests/test_helper/microovn.bash index d8587fe1..0622fd89 100644 --- a/tests/test_helper/microovn.bash +++ b/tests/test_helper/microovn.bash @@ -582,11 +582,13 @@ function microovn_add_gw_router() { local n n=$(microovn_extract_ctn_n "$container") - assert test "$n" -le 9 + assert test "$n" -le 255 local ls_name="${MICROOVN_PREFIX_LS}-${container}" local lr_name="${MICROOVN_PREFIX_LR}-${container}" local lrp_name="${MICROOVN_PREFIX_LRP}-${container}" local lrp_lsp_name="${ls_name}-${MICROOVN_SUFFIX_LRP_LSP}" + local lrp_addresses + printf -v lrp_addresses "00:00:02:00:00:%02x 10.42.%d.1/24" "$n" "$n" lxc_exec "$container" \ "microovn.ovn-nbctl \ @@ -597,8 +599,7 @@ function microovn_add_gw_router() { -- \ set Logical_Router $lr_name options:chassis=$container \ -- \ - lrp-add $lr_name $lrp_name \ - 00:00:02:00:00:0$n 10.42.$n.1/24 \ + lrp-add $lr_name $lrp_name $lrp_addresses \ -- \ lsp-add $ls_name $lrp_lsp_name \ -- \ @@ -658,9 +659,11 @@ function microovn_add_vif() { local n n=$(microovn_extract_ctn_n "$container") - assert test "$n" -le 9 - local lladdr="00:00:02:00:01:0$n" - local cidr="10.42.$n.10/24" + assert test "$n" -le 255 + local lladdr + printf -v lladdr "00:00:02:00:01:%02x" "$n" + local cidr + printf -v cidr "10.42.%d.10/24" "$n" local ls_name="${MICROOVN_PREFIX_LS}-${container}" local lsp_name="${container}-${ns_name}-${if_name}" From d2b79fdb634e04317c6242683d99a6a898e7a218 Mon Sep 17 00:00:00 2001 From: Frode Nordahl Date: Mon, 19 Aug 2024 17:52:46 +0200 Subject: [PATCH 03/10] tests/upgrade: Remove unnecessary service check. The upgrade test setup teardown script currently iterates over all TEST_CONTAINERS and filter out any not running 'central' services as part of the upgrade steup. At the very beginning of the same file this categorization is already being performed to populate the CENTRAL_CONTAINERS and CHASSIS_CONTAINERS variables. Use them instead. Signed-off-by: Frode Nordahl --- tests/test_helper/setup_teardown/upgrade.bash | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/tests/test_helper/setup_teardown/upgrade.bash b/tests/test_helper/setup_teardown/upgrade.bash index 37ff29b0..1bd9d5a3 100644 --- a/tests/test_helper/setup_teardown/upgrade.bash +++ b/tests/test_helper/setup_teardown/upgrade.bash @@ -48,12 +48,7 @@ setup_file() { echo "# Upgrading MicroOVN from revision $MICROOVN_SNAP_REV" >&3 install_microovn "$MICROOVN_SNAP_PATH" $TEST_CONTAINERS - for container in $TEST_CONTAINERS; do - local container_services - container_services=$(microovn_get_cluster_services "$container") - if [[ "$container_services" != *"central"* ]]; then - continue - fi + for container in $CENTRAL_CONTAINERS; do microovn_wait_ovndb_state "$container" nb connected 15 microovn_wait_ovndb_state "$container" sb connected 15 done From 52cb407ad6edfb1a60fc56b36ba1d1d78bd86455 Mon Sep 17 00:00:00 2001 From: Frode Nordahl Date: Mon, 19 Aug 2024 18:57:08 +0200 Subject: [PATCH 04/10] tests: Rename `perform_manual_upgrade_steps` function. The function is conditionally performing an action, and this is not evident in its current name. Let's name it `maybe_perform_manual_upgrade_steps` instead! Signed-off-by: Frode Nordahl --- tests/test_helper/setup_teardown/upgrade.bash | 2 +- tests/test_helper/upgrade_procedures.bash | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/test_helper/setup_teardown/upgrade.bash b/tests/test_helper/setup_teardown/upgrade.bash index 1bd9d5a3..2f0a649c 100644 --- a/tests/test_helper/setup_teardown/upgrade.bash +++ b/tests/test_helper/setup_teardown/upgrade.bash @@ -53,7 +53,7 @@ setup_file() { microovn_wait_ovndb_state "$container" sb connected 15 done - perform_manual_upgrade_steps $TEST_CONTAINERS + maybe_perform_manual_upgrade_steps $TEST_CONTAINERS fi } diff --git a/tests/test_helper/upgrade_procedures.bash b/tests/test_helper/upgrade_procedures.bash index 9c000e47..d83f95bd 100644 --- a/tests/test_helper/upgrade_procedures.bash +++ b/tests/test_helper/upgrade_procedures.bash @@ -121,11 +121,11 @@ function revision_111_upgrade_tls() { ovsdb_rebuild_tls_cluster "sb" "$central_containers" } -# perform_manual_upgrade_steps CONTAINER1 [CONTAINER2 ...] +# maybe_perform_manual_upgrade_steps CONTAINER1 [CONTAINER2 ...] # # Sequentially execute manual steps that are required for upgrade # between certain MicroOVN snap revisions. -function perform_manual_upgrade_steps() { +function maybe_perform_manual_upgrade_steps() { local containers=$*; shift if [ "$MICROOVN_SNAP_REV" -lt 111 ]; then From b9359dcc97a45bf732fb53740e32f0dc8bf48071 Mon Sep 17 00:00:00 2001 From: Frode Nordahl Date: Mon, 19 Aug 2024 18:53:26 +0200 Subject: [PATCH 05/10] tests: Separate upgrade for central and chassis. This patch prepares for further changes in the next patches. Signed-off-by: Frode Nordahl --- tests/test_helper/setup_teardown/upgrade.bash | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/tests/test_helper/setup_teardown/upgrade.bash b/tests/test_helper/setup_teardown/upgrade.bash index 2f0a649c..8c3f7963 100644 --- a/tests/test_helper/setup_teardown/upgrade.bash +++ b/tests/test_helper/setup_teardown/upgrade.bash @@ -45,15 +45,22 @@ setup_file() { assert [ -n "$MICROOVN_SNAP_REV" ] if [ -n "$UPGRADE_DO_UPGRADE" ]; then - echo "# Upgrading MicroOVN from revision $MICROOVN_SNAP_REV" >&3 - install_microovn "$MICROOVN_SNAP_PATH" $TEST_CONTAINERS + assert [ -n "$CENTRAL_CONTAINERS" ] + assert [ -n "$CHASSIS_CONTAINERS" ] + echo "# Upgrading MicroOVN from revision $MICROOVN_SNAP_REV " \ + "central container(s)." >&3 + install_microovn "$MICROOVN_SNAP_PATH" $CENTRAL_CONTAINERS for container in $CENTRAL_CONTAINERS; do microovn_wait_ovndb_state "$container" nb connected 15 microovn_wait_ovndb_state "$container" sb connected 15 done - maybe_perform_manual_upgrade_steps $TEST_CONTAINERS + maybe_perform_manual_upgrade_steps $CENTRAL_CONTAINERS + + echo "# Upgrading MicroOVN from revision $MICROOVN_SNAP_REV " \ + "on chassis container(s)." >&3 + install_microovn "$MICROOVN_SNAP_PATH" $CHASSIS_CONTAINERS fi } From 5d86b6c0f8fdcc55af21b648b0f5595420f4dca4 Mon Sep 17 00:00:00 2001 From: Frode Nordahl Date: Tue, 20 Aug 2024 09:23:13 +0200 Subject: [PATCH 06/10] tests: Move microcluster online check to shared setup. Commit f5ac8c5e15aba2344b36a42ad8344e7b0c107751 added this to the `ovsdb_schema_upgrade` test, but it is needed for all upgrade tests. Move it to the `upgrade` setup/teardown script wich is shared among all the upgrade tests. Drop the use of `run` since we don't care about the output of `microovn_wait_online`. Signed-off-by: Frode Nordahl --- tests/test_helper/bats/ovsdb_schema_upgrade.bats | 8 -------- tests/test_helper/setup_teardown/upgrade.bash | 9 +++++++++ 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/tests/test_helper/bats/ovsdb_schema_upgrade.bats b/tests/test_helper/bats/ovsdb_schema_upgrade.bats index 44375de1..dd0b155b 100644 --- a/tests/test_helper/bats/ovsdb_schema_upgrade.bats +++ b/tests/test_helper/bats/ovsdb_schema_upgrade.bats @@ -57,14 +57,6 @@ setup() { old_member_msg_sb="$original_sb" fi - # Before we proceed with upgrade, make sure that cluster is fully converged. Starting - # the upgrade before the cluster is ready may lead to unexpectedly long convergence - # after the upgrade. - local container="" - for container in $TEST_CONTAINERS; do - run wait_microovn_online "$container" 60 - done - local container_index=0 local upgrade_container_index=0 local old_container_index=0 diff --git a/tests/test_helper/setup_teardown/upgrade.bash b/tests/test_helper/setup_teardown/upgrade.bash index 8c3f7963..fa60006f 100644 --- a/tests/test_helper/setup_teardown/upgrade.bash +++ b/tests/test_helper/setup_teardown/upgrade.bash @@ -38,6 +38,13 @@ setup_file() { fi done + # Make sure that microcluster is fully converged before proceeding. + # Performing further actions before the microcluster is ready may lead to + # unexpectedly long convergence after a microcluster schema upgrade. + for container in $TEST_CONTAINERS; do + wait_microovn_online "$container" 60 + done + # detect and export initial MicroOVN snap revision container=$(echo "$TEST_CONTAINERS" | awk '{print $1;}' ) export MICROOVN_SNAP_REV="" @@ -61,6 +68,8 @@ setup_file() { echo "# Upgrading MicroOVN from revision $MICROOVN_SNAP_REV " \ "on chassis container(s)." >&3 install_microovn "$MICROOVN_SNAP_PATH" $CHASSIS_CONTAINERS + + wait_microovn_online "$container" 60 fi } From c85e72a3dfed5445b0d11fd236d8add1be9fbedd Mon Sep 17 00:00:00 2001 From: Frode Nordahl Date: Tue, 20 Aug 2024 09:23:13 +0200 Subject: [PATCH 07/10] tests: Increase upgrade OVSDB connected timeout. When waiting for central nodes to come back online after snap upgrade, the current timeout of 15 seconds is not sufficient. The default election timer is set to 16 seconds, so it can take at least this amount of time before a restarted node can successfully reconnect to the cluster. Signed-off-by: Frode Nordahl --- tests/test_helper/setup_teardown/upgrade.bash | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_helper/setup_teardown/upgrade.bash b/tests/test_helper/setup_teardown/upgrade.bash index fa60006f..a0bc14b0 100644 --- a/tests/test_helper/setup_teardown/upgrade.bash +++ b/tests/test_helper/setup_teardown/upgrade.bash @@ -59,8 +59,8 @@ setup_file() { install_microovn "$MICROOVN_SNAP_PATH" $CENTRAL_CONTAINERS for container in $CENTRAL_CONTAINERS; do - microovn_wait_ovndb_state "$container" nb connected 15 - microovn_wait_ovndb_state "$container" sb connected 15 + microovn_wait_ovndb_state "$container" nb connected 32 + microovn_wait_ovndb_state "$container" sb connected 32 done maybe_perform_manual_upgrade_steps $CENTRAL_CONTAINERS From 3478911dfa6a223449c821e4b0747d23e7963fe1 Mon Sep 17 00:00:00 2001 From: Frode Nordahl Date: Tue, 20 Aug 2024 16:10:27 +0200 Subject: [PATCH 08/10] tests: Add schema check from `microovn status` POV. Signed-off-by: Frode Nordahl --- tests/test_helper/microovn.bash | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/tests/test_helper/microovn.bash b/tests/test_helper/microovn.bash index 0622fd89..b90a8430 100644 --- a/tests/test_helper/microovn.bash +++ b/tests/test_helper/microovn.bash @@ -547,6 +547,23 @@ function wait_ovsdb_cluster_container_join() { return $rc } +# microovn_status_is_schema_ok CONTAINER NBSB +# +# Checks whether schema for NBSB (nb|sb) on CONTAINER is OK from the +# perspective of the `microovn status` command. +function microovn_status_is_schema_ok() { + local container=$1; shift + local nbsb=$1; shift + + local schema_name + schema_name=$(_ovn_schema_name "$nbsb") + + local cmd + printf -v cmd 'microovn status | grep -q %s:\ OK' "${schema_name//_/\\ }" + + lxc_exec "$container" "$cmd" +} + MICROOVN_PREFIX_LS=sw MICROOVN_PREFIX_LR=lr MICROOVN_PREFIX_LRP=lrp-sw From b77d8377f793e9f7d12eda82d3de5a41898c960d Mon Sep 17 00:00:00 2001 From: Frode Nordahl Date: Tue, 20 Aug 2024 16:32:54 +0200 Subject: [PATCH 09/10] tests: Add ping_packets_lost reaper. While the check is a very simple awk expression, we are using it multiple times. Signed-off-by: Frode Nordahl --- tests/test_helper/common.bash | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/tests/test_helper/common.bash b/tests/test_helper/common.bash index 212f2c1a..109060ba 100644 --- a/tests/test_helper/common.bash +++ b/tests/test_helper/common.bash @@ -306,3 +306,18 @@ function collect_coverage() { lxc_pull_dir "$container/var/snap/microovn/common/data/coverage" "$output_dir" done } + +# ping_packets_lost CONTAINER DST [ NETNS ] +# +# Stop ping process previously started by a call to ``ping_start`` and print +# how many packets were lost, if any. +function ping_packets_lost() { + local container=$1; shift + local dst=$1; shift + local netns=$1 + + local n_lost + n_lost=$(ping_reap "$container" "$dst" "$netns" \ + | awk '/packets/{print$1-$4}') + echo "$n_lost" +} From 4dc85389edbb254402479790e349e9f6c54bd4a2 Mon Sep 17 00:00:00 2001 From: Frode Nordahl Date: Mon, 10 Jun 2024 14:01:30 +0200 Subject: [PATCH 10/10] tests: Measure data path uptime during upgrade. Create a network namespace to simulate a container/virtual machine workload running on a node without the central service. Measurements are performed by starting a ping in the background inside the workload before performing certain steps, and then reaping the ping process, reading the resulting statistics. The target for the ICMP echo requests is a gateway router which the OVN Controller installs into the local Open vSwitch instance, and we have confirmed that this is sufficient to detect side effect of an sincorrect upgrade sequence. Results are checked at two points in the upgrade process: 1. After upgrading central nodes. This measurement ensures that MicroOVN handles the upgrade in such a way so that a central first upgrade does not have datapath impact. Zero packet loss is expected. 2. After upgrading chassis nodes with workloads. This measurment ensures that MicroOVN handles service life cycle on snap refresh with minimal downtime for workloads. Signed-off-by: Frode Nordahl --- tests/test_helper/setup_teardown/upgrade.bash | 83 +++++++++++++++++-- 1 file changed, 78 insertions(+), 5 deletions(-) diff --git a/tests/test_helper/setup_teardown/upgrade.bash b/tests/test_helper/setup_teardown/upgrade.bash index a0bc14b0..ea3c2f73 100644 --- a/tests/test_helper/setup_teardown/upgrade.bash +++ b/tests/test_helper/setup_teardown/upgrade.bash @@ -22,7 +22,8 @@ setup_file() { export CENTRAL_CONTAINERS export CHASSIS_CONTAINERS - launch_containers $TEST_CONTAINERS + launch_containers_args \ + "${TEST_LXD_LAUNCH_ARGS:--c security.nesting=true}" $TEST_CONTAINERS wait_containers_ready $TEST_CONTAINERS install_microovn_from_store "$MICROOVN_SNAP_CHANNEL" $TEST_CONTAINERS bootstrap_cluster $TEST_CONTAINERS @@ -54,8 +55,25 @@ setup_file() { if [ -n "$UPGRADE_DO_UPGRADE" ]; then assert [ -n "$CENTRAL_CONTAINERS" ] assert [ -n "$CHASSIS_CONTAINERS" ] - echo "# Upgrading MicroOVN from revision $MICROOVN_SNAP_REV " \ - "central container(s)." >&3 + + # Export names used locally on chassis containers for use in + # teardown_file(). + export UPGRADE_NS_NAME="upgrade_ns0" + export UPGRADE_VIF_NAME="upgrade_vif0" + + # Set up gateway router, workload and background ping on each chassis. + for container in $CHASSIS_CONTAINERS; do + local ctn_n + ctn_n=$(microovn_extract_ctn_n "$container") + microovn_add_gw_router "$container" + netns_add "$container" "$UPGRADE_NS_NAME" + microovn_add_vif "$container" \ + "$UPGRADE_NS_NAME" "$UPGRADE_VIF_NAME" + ping_start "$container" 10.42.${ctn_n}.1 "$UPGRADE_NS_NAME" + done + + echo "# Upgrading MicroOVN from revision $MICROOVN_SNAP_REV "\ + "on central container(s)." >&3 install_microovn "$MICROOVN_SNAP_PATH" $CENTRAL_CONTAINERS for container in $CENTRAL_CONTAINERS; do @@ -65,15 +83,70 @@ setup_file() { maybe_perform_manual_upgrade_steps $CENTRAL_CONTAINERS - echo "# Upgrading MicroOVN from revision $MICROOVN_SNAP_REV " \ + # Reap ping and assert on result. + # + # Start background ping for next measurement. + for container in $CHASSIS_CONTAINERS; do + local ctn_n + ctn_n=$(microovn_extract_ctn_n "$container") + local n_lost + n_lost=$(ping_packets_lost \ + "$container" 10.42.${ctn_n}.1 "$UPGRADE_NS_NAME") + # Apart from the one packet that can be lost while stopping + # ``ping``, we expect zero loss. + assert test "$n_lost" -le 1 + + ping_start "$container" 10.42.${ctn_n}.1 "$UPGRADE_NS_NAME" + done + + echo "# Upgrading MicroOVN from revision $MICROOVN_SNAP_REV "\ "on chassis container(s)." >&3 install_microovn "$MICROOVN_SNAP_PATH" $CHASSIS_CONTAINERS - wait_microovn_online "$container" 60 + # Now that the remaining containers have been upgraded any pending + # schema conversions will be performed both for the microcluster and + # OVSDB databases. Ensure these processes are complete before + # measuring the result. + for container in $TEST_CONTAINERS; do + wait_microovn_online "$container" 60 + for db in nb sb; do + local cmd + printf -v cmd \ + 'microovn_status_is_schema_ok %s %s' "$container" "$db" + wait_until "$cmd" + done + done + + # Reap ping and assert on result. + for container in $CHASSIS_CONTAINERS; do + local ctn_n + ctn_n=$(microovn_extract_ctn_n "$container") + local max_lost=8 + local n_lost + n_lost=$(ping_packets_lost \ + "$container" 10.42.${ctn_n}.1 "$UPGRADE_NS_NAME") + # Upgrading the node with the instance being monitored will + # inevitably cause some data path interruption as Open vSwitch + # restarts. + assert test "$n_lost" -le "$max_lost" + echo "# Upgrade induced packet loss: $n_lost packets " \ + "(threshold $max_lost)" >&3 + done fi } teardown_file() { collect_coverage $TEST_CONTAINERS + + if [ -n "$UPGRADE_NS_NAME" ] && [ -n "$UPGRADE_VIF_NAME" ]; then + local container + for container in $CHASSIS_CONTAINERS; do + microovn_delete_vif "$container" \ + "$UPGRADE_NS_NAME" "$UPGRADE_VIF_NAME" + netns_delete "$container" "$UPGRADE_NS_NAME" + microovn_delete_gw_router "$container" + done + fi + delete_containers $TEST_CONTAINERS }