Skip to content

Commit

Permalink
Merge main into no-ipv6-range
Browse files Browse the repository at this point in the history
  • Loading branch information
david-crespo committed Mar 19, 2024
2 parents fdd7738 + a3fa540 commit 147a52a
Show file tree
Hide file tree
Showing 421 changed files with 30,941 additions and 8,674 deletions.
208 changes: 208 additions & 0 deletions .github/buildomat/jobs/a4x2-deploy.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,208 @@
#!/bin/bash
#:
#: name = "a4x2-deploy"
#: variety = "basic"
#: target = "lab-2.0-opte-0.27"
#: rust_toolchain = "stable"
#: output_rules = [
#: "/out/falcon/*.log",
#: "/out/falcon/*.err",
#: "/out/connectivity-report.json",
#: "/ci/out/*-sled-agent.log",
#: "/ci/out/*cockroach*.log",
#: "%/out/dhcp-server.log",
#: ]
#: skip_clone = true
#: enable = true
#:
#: [dependencies.a4x2]
#: job = "a4x2-prepare"

set -o errexit
set -o pipefail
set -o xtrace

pfexec mkdir -p /out
pfexec chown "$UID" /out

#
# If we fail, try to collect some debugging information
#
_exit_trap() {
local status=$?
[[ $status -eq 0 ]] && exit 0

set +o errexit

df -h

# show what services have issues
for gimlet in g0 g1 g2 g3; do
./a4x2 exec $gimlet "svcs -xvZ"
done

mkdir -p /out/falcon
cp .falcon/* /out/falcon/
for x in ce cr1 cr2 g0 g1 g2 g3; do
mv /out/falcon/$x.out /out/falcon/$x.log
done
cp connectivity-report.json /out/

mkdir -p /ci/out

for gimlet in g0 g1 g2 g3; do
./a4x2 exec \
$gimlet \
"cat /var/svc/log/oxide-sled-agent:default.log" > \
/ci/out/$gimlet-sled-agent.log
done

# collect cockroachdb logs
mkdir -p /ci/log
for gimlet in g0 g1 g2 g3; do
./a4x2 exec $gimlet 'cat /pool/ext/*/crypt/zone/oxz_cockroachdb*/root/data/logs/cockroach.log' > \
/ci/out/$gimlet-cockroach.log

./a4x2 exec $gimlet 'cat /pool/ext/*/crypt/zone/oxz_cockroachdb*/root/data/logs/cockroach-stderr.log' > \
/ci/out/$gimlet-cockroach-stderr.log

./a4x2 exec $gimlet 'cat /pool/ext/*/crypt/zone/oxz_cockroachdb*/root/data/logs/cockroach-health.log' > \
/ci/out/$gimlet-cockroach-health.log

./a4x2 exec $gimlet 'cat /pool/ext/*/crypt/zone/oxz_cockroachdb*/root/var/svc/log/oxide-cockroachdb:default.log*' > \
/ci/out/$gimlet-oxide-cockroachdb.log
done
}
trap _exit_trap EXIT

#
# Install propolis
#
curl -fOL https://buildomat.eng.oxide.computer/wg/0/artefact/01HJ4BJJY2Q9EKXHYV6HQZ8XPN/qQS2fnkS9LebcL4cDLeHRWdleSiXaGKEXGLDucRoab8pwBSi/01HJ4BJY5F995ET252YSD4NJWV/01HJ4CGFH946THBF0ZRH6SRM8X/propolis-server
chmod +x propolis-server
pfexec mv propolis-server /usr/bin/

#
# Make space for CI work
#
export DISK=${DISK:-c1t1d0}
pfexec diskinfo
pfexec zpool create -o ashift=12 -f cpool $DISK
pfexec zfs create -o mountpoint=/ci cpool/ci

if [[ $(curl -s http://catacomb.eng.oxide.computer:12346/trim-me) =~ "true" ]]; then
pfexec zpool trim cpool
while [[ ! $(zpool status -t cpool) =~ "100%" ]]; do sleep 10; done
fi

pfexec chown "$UID" /ci
cd /ci

#
# Fetch and decompress the cargo bay from the a4x2-prepeare job
#
for x in ce cr1 cr2 omicron-common g0 g1 g2 g3 tools; do
tar -xvzf /input/a4x2/out/cargo-bay-$x.tgz
done

for sled in g0 g1 g2 g3; do
cp -r cargo-bay/omicron-common/omicron/out/* cargo-bay/$sled/omicron/out/
done
ls -R

#
# Fetch the a4x2 topology manager program
#
buildomat_url=https://buildomat.eng.oxide.computer
testbed_artifact_path=public/file/oxidecomputer/testbed/topo/
testbed_rev=67454d38958bcf51830850aec36600df84b7d8a0
curl -fOL $buildomat_url/$testbed_artifact_path/$testbed_rev/a4x2
chmod +x a4x2

#
# Create a zpool for falcon images and disks
#

#
# Install falcon base images
#
export FALCON_DATASET=cpool/falcon
images="debian-11.0_0 helios-2.0_0"
for img in $images; do
file=$img.raw.xz
curl -OL http://catacomb.eng.oxide.computer:12346/falcon/$file
unxz --keep -T 0 $file

file=$img.raw
name=${img%_*}
fsize=`ls -l $img.raw | awk '{print $5}'`
let vsize=(fsize + 4096 - size%4096)

pfexec zfs create -p -V $vsize -o volblocksize=4k "$FALCON_DATASET/img/$name"
pfexec dd if=$img.raw of="/dev/zvol/rdsk/$FALCON_DATASET/img/$name" bs=1024k status=progress
pfexec zfs snapshot "$FALCON_DATASET/img/$name@base"
done

#
# Install OVMF
#
curl -fOL http://catacomb.eng.oxide.computer:12346/falcon/OVMF_CODE.fd
pfexec mkdir -p /var/ovmf
pfexec cp OVMF_CODE.fd /var/ovmf/OVMF_CODE.fd

#
# Fetch the arista image
#
curl -fOL http://catacomb.eng.oxide.computer:12346/falcon/arista.gz.xz
unxz arista.gz.xz
pfexec zfs receive cpool/falcon/img/arista@base < arista.gz

#
# Run the VM dhcp server
#
export EXT_INTERFACE=${EXT_INTERFACE:-igb0}

cp /input/a4x2/out/dhcp-server .
chmod +x dhcp-server
first=`bmat address ls -f extra -Ho first`
last=`bmat address ls -f extra -Ho last`
gw=`bmat address ls -f extra -Ho gateway`
server=`ipadm show-addr $EXT_INTERFACE/dhcp -po ADDR | sed 's#/.*##g'`
pfexec ./dhcp-server $first $last $gw $server &> /out/dhcp-server.log &

#
# Run the topology
#
pfexec ./a4x2 launch

#
# Add a route to the rack ip pool
#

# Get the DHCP address for the external interface of the customer edge VM. This
# VM interface is attached to the host machine's external interface via viona.
customer_edge_addr=$(./a4x2 exec ce \
"ip -4 -j addr show enp0s10 | jq -r '.[0].addr_info[] | select(.dynamic == true) | .local'")

# Add the route to the rack via the customer edge VM
pfexec dladm
pfexec ipadm
pfexec netstat -nr
pfexec route add 198.51.100.0/24 $customer_edge_addr

#
# Run the communications test program
#
# TODO tighten up packet loss tolerance. For now it's more or less ok for it to
# just run with _some_ comms. The program will fail if there are no comms to a
# given sled.
cp /input/a4x2/out/commtest .
chmod +x commtest
NO_COLOR=1 pfexec ./commtest http://198.51.100.23 run \
--ip-pool-begin 198.51.100.40 \
--ip-pool-end 198.51.100.70 \
--icmp-loss-tolerance 500 \
--test-duration 200s \
--packet-rate 10

cp connectivity-report.json /out/
94 changes: 94 additions & 0 deletions .github/buildomat/jobs/a4x2-prepare.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
#!/bin/bash
#:
#: name = "a4x2-prepare"
#: variety = "basic"
#: target = "helios-2.0"
#: rust_toolchain = "stable"
#: output_rules = [
#: "=/out/cargo-bay-ce.tgz",
#: "=/out/cargo-bay-cr1.tgz",
#: "=/out/cargo-bay-cr2.tgz",
#: "=/out/cargo-bay-g0.tgz",
#: "=/out/cargo-bay-g1.tgz",
#: "=/out/cargo-bay-g2.tgz",
#: "=/out/cargo-bay-g3.tgz",
#: "=/out/cargo-bay-tools.tgz",
#: "=/out/cargo-bay-omicron-common.tgz",
#: "=/out/commtest",
#: "=/out/dhcp-server",
#: ]
#: access_repos = [
#: "oxidecomputer/testbed",
#: ]
#: enable = true

source ./env.sh

set -o errexit
set -o pipefail
set -o xtrace

pfexec mkdir -p /out
pfexec chown "$UID" /out

#
# Prep to build omicron
#
banner "prerequisites"
set -o xtrace
./tools/install_builder_prerequisites.sh -y

#
# Build the commtest program and place in the output
#
banner "commtest"
cargo build -p end-to-end-tests --bin commtest --bin dhcp-server --release
cp target/release/commtest /out/
cp target/release/dhcp-server /out/

#
# Clone the testbed repo
#
banner "testbed"
cd /work/oxidecomputer
rm -rf testbed
git clone https://github.com/oxidecomputer/testbed
cd testbed/a4x2

#
# Build the a4x2 cargo bay using the omicron sources in this branch, fetch the
# softnpu artifacts into the cargo bay, zip up the cargo bay and place it in the
# output.
#
OMICRON=/work/oxidecomputer/omicron ./config/build-packages.sh

# Create an omicron archive that captures common assets

pushd cargo-bay
mkdir -p omicron-common/omicron/
cp -r g0/omicron/out omicron-common/omicron/
# sled agent, gateway and switch archives are sled-specific
rm omicron-common/omicron/out/omicron-sled-agent.tar
rm omicron-common/omicron/out/omicron-gateway*
rm omicron-common/omicron/out/switch-softnpu.tar.gz
popd

# Remove everything in $sled/omicron/out except sled-agent, mgs (gateway), and
# switch tar archives, these common elements are in the omicron-common archive
for sled in g0 g1 g2 g3; do
find cargo-bay/$sled/omicron/out/ -maxdepth 1 -mindepth 1 \
| grep -v sled-agent \
| grep -v omicron-gateway \
| grep -v switch-softnpu \
| xargs -l rm -rf
done

# Put the softnpu artifacts in place.
./config/fetch-softnpu-artifacts.sh

# Archive everything up and place it in the output
for x in ce cr1 cr2 g0 g1 g2 g3 tools omicron-common; do
tar -czf cargo-bay-$x.tgz cargo-bay/$x
mv cargo-bay-$x.tgz /out/
done

5 changes: 5 additions & 0 deletions .github/buildomat/jobs/ci-tools.sh
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
#: "=/work/end-to-end-tests/*.gz",
#: "=/work/caboose-util.gz",
#: "=/work/tufaceous.gz",
#: "=/work/commtest",
#: ]

set -o errexit
Expand All @@ -33,6 +34,10 @@ export CARGO_INCREMENTAL=0
ptime -m cargo build --locked -p end-to-end-tests --tests --bin bootstrap \
--message-format json-render-diagnostics >/tmp/output.end-to-end.json

mkdir -p /work
ptime -m cargo build --locked -p end-to-end-tests --tests --bin commtest
cp target/debug/commtest /work/commtest

mkdir -p /work/end-to-end-tests
for p in target/debug/bootstrap $(/opt/ooce/bin/jq -r 'select(.profile.test) | .executable' /tmp/output.end-to-end.json); do
# shellcheck disable=SC2094
Expand Down
33 changes: 15 additions & 18 deletions .github/buildomat/jobs/deploy.sh
Original file line number Diff line number Diff line change
Expand Up @@ -102,19 +102,6 @@ z_swadm () {
pfexec zlogin oxz_switch /opt/oxide/dendrite/bin/swadm $@
}

# XXX remove. This is just to test against a development branch of OPTE in CI.
set +x
OPTE_COMMIT="73d4669ea213d0b7aca35c4babb6fd09ed51d29e"
curl -sSfOL https://buildomat.eng.oxide.computer/public/file/oxidecomputer/opte/module/$OPTE_COMMIT/xde
pfexec rem_drv xde || true
pfexec mv xde /kernel/drv/amd64/xde
pfexec add_drv xde || true
curl -sSfOL https://buildomat.eng.oxide.computer/wg/0/artefact/01HM09S4M15WNXB2B2MX8R1GBT/yLalJU5vT4S4IEpwSeY4hPuspxw3JcINokZmlfNU14npHkzG/01HM09SJ2RQSFGW7MVKC9JKZ8D/01HM0A58D888AJ7YP6N1Q6T6ZD/opteadm
chmod +x opteadm
cp opteadm /tmp/opteadm
pfexec mv opteadm /opt/oxide/opte/bin/opteadm
set -x

#
# XXX work around 14537 (UFS should not allow directories to be unlinked) which
# is probably not yet fixed in xde branch? Once the xde branch merges from
Expand Down Expand Up @@ -161,7 +148,7 @@ cd /opt/oxide/work

ptime -m tar xvzf /input/package/work/package.tar.gz
cp /input/package/work/zones/* out/
mv out/omicron-nexus-single-sled.tar.gz out/omicron-nexus.tar.gz
mv out/nexus-single-sled.tar.gz out/nexus.tar.gz
mkdir tests
for p in /input/ci-tools/work/end-to-end-tests/*.gz; do
ptime -m gunzip < "$p" > "tests/$(basename "${p%.gz}")"
Expand Down Expand Up @@ -244,12 +231,10 @@ first = \"$SERVICE_IP_POOL_START\"
/^last/c\\
last = \"$SERVICE_IP_POOL_END\"
}
/^\\[rack_network_config/,/^$/ {
/^infra_ip_first/c\\
/^infra_ip_first/c\\
infra_ip_first = \"$UPLINK_IP\"
/^infra_ip_last/c\\
/^infra_ip_last/c\\
infra_ip_last = \"$UPLINK_IP\"
}
/^\\[\\[rack_network_config.ports/,/^\$/ {
/^routes/c\\
routes = \\[{nexthop = \"$GATEWAY_IP\", destination = \"0.0.0.0/0\"}\\]
Expand Down Expand Up @@ -348,6 +333,18 @@ while [[ $(pfexec svcs -z $(zoneadm list -n | grep oxz_ntp) \
done
echo "Waited for chrony: ${retry}s"

# Wait for at least one nexus zone to become available
retry=0
until zoneadm list | grep nexus; do
if [[ $retry -gt 300 ]]; then
echo "Failed to start at least one nexus zone after 300 seconds"
exit 1
fi
sleep 1
retry=$((retry + 1))
done
echo "Waited for nexus: ${retry}s"

export RUST_BACKTRACE=1
export E2E_TLS_CERT IPPOOL_START IPPOOL_END
eval "$(./tests/bootstrap)"
Expand Down
Loading

0 comments on commit 147a52a

Please sign in to comment.