Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add on-demand communication probes. #4585

Merged
merged 3 commits into from
Mar 9, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
205 changes: 205 additions & 0 deletions .github/buildomat/jobs/a4x2-deploy.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,205 @@
#!/bin/bash
#:
#: name = "a4x2-deploy"
#: variety = "basic"
#: target = "lab-2.0-opte-0.27"
#: rust_toolchain = "stable"
#: output_rules = [
#: "/out/falcon/*.log",
#: "/out/falcon/*.err",
#: "/out/connectivity-report.json",
#: "/ci/out/*-sled-agent.log",
#: "/ci/out/*cockroach*.log",
#: "%/out/dhcp-server.log",
#: ]
#: skip_clone = true
#: enable = false
#:
#: [dependencies.a4x2]
#: job = "a4x2-prepare"

set -o errexit
set -o pipefail
set -o xtrace

pfexec mkdir -p /out
pfexec chown "$UID" /out

#
# If we fail, try to collect some debugging information
#
_exit_trap() {
local status=$?
[[ $status -eq 0 ]] && exit 0

set +o errexit

df -h

# show what services have issues
for gimlet in g0 g1 g2 g3; do
./a4x2 exec $gimlet "svcs -xvZ"
done

mkdir -p /out/falcon
cp .falcon/* /out/falcon/
for x in ce cr1 cr2 g0 g1 g2 g3; do
mv /out/falcon/$x.out /out/falcon/$x.log
done
cp connectivity-report.json /out/

mkdir -p /ci/out

for gimlet in g0 g1 g2 g3; do
./a4x2 exec \
$gimlet \
"cat /var/svc/log/oxide-sled-agent:default.log" > \
/ci/out/$gimlet-sled-agent.log
done

# collect cockroachdb logs
mkdir -p /ci/log
for gimlet in g0 g1 g2 g3; do
./a4x2 exec $gimlet 'cat /pool/ext/*/crypt/zone/oxz_cockroachdb*/root/data/logs/cockroach.log' > \
/ci/out/$gimlet-cockroach.log

./a4x2 exec $gimlet 'cat /pool/ext/*/crypt/zone/oxz_cockroachdb*/root/data/logs/cockroach-stderr.log' > \
/ci/out/$gimlet-cockroach-stderr.log

./a4x2 exec $gimlet 'cat /pool/ext/*/crypt/zone/oxz_cockroachdb*/root/data/logs/cockroach-health.log' > \
/ci/out/$gimlet-cockroach-health.log

./a4x2 exec $gimlet 'cat /pool/ext/*/crypt/zone/oxz_cockroachdb*/root/var/svc/log/oxide-cockroachdb:default.log*' > \
/ci/out/$gimlet-oxide-cockroachdb.log
done
}
trap _exit_trap EXIT

#
# Install propolis
#
curl -fOL https://buildomat.eng.oxide.computer/wg/0/artefact/01HJ4BJJY2Q9EKXHYV6HQZ8XPN/qQS2fnkS9LebcL4cDLeHRWdleSiXaGKEXGLDucRoab8pwBSi/01HJ4BJY5F995ET252YSD4NJWV/01HJ4CGFH946THBF0ZRH6SRM8X/propolis-server
chmod +x propolis-server
pfexec mv propolis-server /usr/bin/

#
# Make space for CI work
#
export DISK=${DISK:-c1t1d0}
pfexec diskinfo
pfexec zpool create -f cpool $DISK
pfexec zfs create -o mountpoint=/ci cpool/ci

if [[ $(curl -s http://catacomb.eng.oxide.computer:12346/trim-me) =~ "true" ]]; then
pfexec zpool trim cpool
while [[ ! $(zpool status -t cpool) =~ "100%" ]]; do sleep 10; done
fi

pfexec chown "$UID" /ci
cd /ci

#
# Fetch and decompress the cargo bay from the a4x2-prepeare job
#
for x in ce cr1 cr2 omicron-common g0 g1 g2 g3 tools; do
tar -xvzf /input/a4x2/out/cargo-bay-$x.tgz
done

for sled in g0 g1 g2 g3; do
cp -r cargo-bay/omicron-common/omicron/out/* cargo-bay/$sled/omicron/out/
done
ls -R

#
# Fetch the a4x2 topology manager program
#
buildomat_url=https://buildomat.eng.oxide.computer
testbed_artifact_path=public/file/oxidecomputer/testbed/topo/
testbed_rev=677559e30b4dfc65c374b24336ac23d40102de81
curl -fOL $buildomat_url/$testbed_artifact_path/$testbed_rev/a4x2
chmod +x a4x2

#
# Create a zpool for falcon images and disks
#

#
# Install falcon base images
#
export FALCON_DATASET=cpool/falcon
images="debian-11.0_0 helios-2.0_0"
for img in $images; do
file=$img.raw.xz
curl -OL http://catacomb.eng.oxide.computer:12346/falcon/$file
unxz --keep -T 0 $file

file=$img.raw
name=${img%_*}
fsize=`ls -l $img.raw | awk '{print $5}'`
let vsize=(fsize + 4096 - size%4096)

pfexec zfs create -p -V $vsize -o volblocksize=4k "$FALCON_DATASET/img/$name"
pfexec dd if=$img.raw of="/dev/zvol/rdsk/$FALCON_DATASET/img/$name" bs=1024k status=progress
pfexec zfs snapshot "$FALCON_DATASET/img/$name@base"
done

#
# Install OVMF
#
curl -fOL http://catacomb.eng.oxide.computer:12346/falcon/OVMF_CODE.fd
pfexec mkdir -p /var/ovmf
pfexec cp OVMF_CODE.fd /var/ovmf/OVMF_CODE.fd

#
# Fetch the arista image
#
curl -fOL http://catacomb.eng.oxide.computer:12346/falcon/arista.gz.xz
unxz arista.gz.xz
pfexec zfs receive cpool/falcon/img/arista@base < arista.gz

#
# Run the VM dhcp server
#
export EXT_INTERFACE=${EXT_INTERFACE:-igb0}

cp /input/a4x2/out/dhcp-server .
chmod +x dhcp-server
first=`bmat address ls -f extra -Ho first`
last=`bmat address ls -f extra -Ho last`
gw=`bmat address ls -f extra -Ho gateway`
server=`ipadm show-addr $EXT_INTERFACE/dhcp -po ADDR | sed 's#/.*##g'`
pfexec ./dhcp-server $first $last $gw $server &> /out/dhcp-server.log &

#
# Run the topology
#
pfexec ./a4x2 launch

#
# Add a route to the rack ip pool
#

# Get the DHCP address for the external interface of the customer edge VM. This
# VM interface is attached to the host machine's external interface via viona.
customer_edge_addr=$(./a4x2 exec ce \
"ip -4 -j addr show enp0s10 | jq -r '.[0].addr_info[] | select(.dynamic == true) | .local'")

# Add the route to the rack via the customer edge VM
pfexec dladm
pfexec ipadm
pfexec netstat -nr
pfexec route add 198.51.100.0/24 $customer_edge_addr

#
# Run the communications test program
#
cp /input/a4x2/out/commtest .
chmod +x commtest
pfexec ./commtest http://198.51.100.23 run \
--ip-pool-begin 198.51.100.40 \
--ip-pool-end 198.51.100.70 \
--icmp-loss-tolerance 10 \
--test-duration 300s \
--packet-rate 30

cp connectivity-report.json /out/
94 changes: 94 additions & 0 deletions .github/buildomat/jobs/a4x2-prepare.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
#!/bin/bash
#:
#: name = "a4x2-prepare"
#: variety = "basic"
#: target = "helios-2.0"
#: rust_toolchain = "stable"
#: output_rules = [
#: "=/out/cargo-bay-ce.tgz",
#: "=/out/cargo-bay-cr1.tgz",
#: "=/out/cargo-bay-cr2.tgz",
#: "=/out/cargo-bay-g0.tgz",
#: "=/out/cargo-bay-g1.tgz",
#: "=/out/cargo-bay-g2.tgz",
#: "=/out/cargo-bay-g3.tgz",
#: "=/out/cargo-bay-tools.tgz",
#: "=/out/cargo-bay-omicron-common.tgz",
#: "=/out/commtest",
#: "=/out/dhcp-server",
#: ]
#: access_repos = [
#: "oxidecomputer/testbed",
#: ]
#: enable = false

source ./env.sh

set -o errexit
set -o pipefail
set -o xtrace

pfexec mkdir -p /out
pfexec chown "$UID" /out

#
# Prep to build omicron
#
banner "prerequisites"
set -o xtrace
./tools/install_builder_prerequisites.sh -y

#
# Build the commtest program and place in the output
#
banner "commtest"
cargo build -p end-to-end-tests --bin commtest --bin dhcp-server --release
cp target/release/commtest /out/
cp target/release/dhcp-server /out/

#
# Clone the testbed repo
#
banner "testbed"
cd /work/oxidecomputer
rm -rf testbed
git clone https://github.com/oxidecomputer/testbed
cd testbed/a4x2

#
# Build the a4x2 cargo bay using the omicron sources in this branch, fetch the
# softnpu artifacts into the cargo bay, zip up the cargo bay and place it in the
# output.
#
OMICRON=/work/oxidecomputer/omicron ./config/build-packages.sh

# Create an omicron archive that captures common assets

pushd cargo-bay
mkdir -p omicron-common/omicron/
cp -r g0/omicron/out omicron-common/omicron/
# sled agent, gateway and switch archives are sled-specific
rm omicron-common/omicron/out/omicron-sled-agent.tar
rm omicron-common/omicron/out/omicron-gateway*
rm omicron-common/omicron/out/switch-softnpu.tar.gz
popd

# Remove everything in $sled/omicron/out except sled-agent, mgs (gateway), and
# switch tar archives, these common elements are in the omicron-common archive
for sled in g0 g1 g2 g3; do
find cargo-bay/$sled/omicron/out/ -maxdepth 1 -mindepth 1 \
| grep -v sled-agent \
| grep -v omicron-gateway \
| grep -v switch-softnpu \
| xargs -l rm -rf
done

# Put the softnpu artifacts in place.
./config/fetch-softnpu-artifacts.sh

# Archive everything up and place it in the output
for x in ce cr1 cr2 g0 g1 g2 g3 tools omicron-common; do
tar -czf cargo-bay-$x.tgz cargo-bay/$x
mv cargo-bay-$x.tgz /out/
done

5 changes: 5 additions & 0 deletions .github/buildomat/jobs/ci-tools.sh
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
#: "=/work/end-to-end-tests/*.gz",
#: "=/work/caboose-util.gz",
#: "=/work/tufaceous.gz",
#: "=/work/commtest",
#: ]

set -o errexit
Expand All @@ -33,6 +34,10 @@ export CARGO_INCREMENTAL=0
ptime -m cargo build --locked -p end-to-end-tests --tests --bin bootstrap \
--message-format json-render-diagnostics >/tmp/output.end-to-end.json

mkdir -p /work
ptime -m cargo build --locked -p end-to-end-tests --tests --bin commtest
cp target/debug/commtest /work/commtest

mkdir -p /work/end-to-end-tests
for p in target/debug/bootstrap $(/opt/ooce/bin/jq -r 'select(.profile.test) | .executable' /tmp/output.end-to-end.json); do
# shellcheck disable=SC2094
Expand Down
1 change: 1 addition & 0 deletions .github/buildomat/jobs/package.sh
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,7 @@ zones=(
out/omicron-gateway-softnpu.tar.gz
out/omicron-gateway-asic.tar.gz
out/overlay.tar.gz
out/probe.tar.gz
)
cp "${zones[@]}" /work/zones/

Expand Down
Empty file modified .github/buildomat/jobs/tuf-repo.sh
100644 → 100755
Empty file.
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -13,4 +13,5 @@ debug.out
rusty-tags.vi
*.sw*
tags
.direnv
.direnv
connectivity-report.json
Loading
Loading