Skip to content

Commit

Permalink
Merge branch 'raw-ctes' into more-raw-cte-setup
Browse files Browse the repository at this point in the history
  • Loading branch information
smklein committed May 28, 2024
2 parents 7f46a0a + c77a167 commit e2817bb
Show file tree
Hide file tree
Showing 483 changed files with 37,030 additions and 10,395 deletions.
41 changes: 41 additions & 0 deletions .cargo/xtask.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
# This config file is used by `cargo xtask verify-libraries`


# These are libraries that we expect to show up in any executable produced
# by the omicron repo.
[libraries."libc.so.1"]
[libraries."libcontract.so.1"]
[libraries."libcrypto.so.3"]
[libraries."libdevinfo.so.1"]
[libraries."libdlpi.so.1"]
[libraries."libdoor.so.1"]
[libraries."libefi.so.1"]
[libraries."libgcc_s.so.1"]
[libraries."libipcc.so.1"]
[libraries."libkstat.so.1"]
[libraries."libm.so.2"]
[libraries."libnsl.so.1"]
[libraries."libnvpair.so.1"]
[libraries."libpq.so.5"]
[libraries."libpthread.so.1"]
[libraries."libresolv.so.2"]
[libraries."librt.so.1"]
[libraries."libscf.so.1"]
[libraries."libsocket.so.1"]
[libraries."libssl.so.3"]
[libraries."libumem.so.1"]
[libraries."libxml2.so.2"]
[libraries."libxmlsec1.so.1"]

# libnvme is a global zone only library and therefore we must be sure that only
# programs running in the gz require it. Additionally only sled-agent should be
# managing a sled's hardware.
[libraries."libnvme.so.1"]
binary_allow_list = [
"installinator",
"omicron-dev",
"omicron-package",
"services-ledger-check-migrate",
"sled-agent",
"sled-agent-sim",
]
11 changes: 10 additions & 1 deletion .config/hakari.toml
Original file line number Diff line number Diff line change
Expand Up @@ -30,4 +30,13 @@ platforms = [
exact-versions = true

[traversal-excludes]
workspace-members = ["xtask"]
workspace-members = [
# Exclude xtask because it needs to be built quickly.
"xtask",

# Exclude omicron-uuid-kinds because it is a no-std crate. Depending on the
# workspace-hack isn't too problematic because other projects pulling in
# omicron as a git dependency will only see an empty workspace-hack. But
# let's make this explicit.
"omicron-uuid-kinds",
]
13 changes: 13 additions & 0 deletions .github/buildomat/build-and-test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,19 @@ export RUSTC_BOOTSTRAP=1
# We report build progress to stderr, and the "--timings=json" output goes to stdout.
ptime -m cargo build -Z unstable-options --timings=json --workspace --tests --locked --verbose 1> "$OUTPUT_DIR/crate-build-timings.json"

# If we are running on illumos we want to verify that we are not requiring
# system libraries outside of specific binaries. If we encounter this situation
# we bail.
# NB: `cargo xtask verify-libraries` runs `cargo build --bins` to ensure it can
# check the final executables.
if [[ $target_os == "illumos" ]]; then
banner verify-libraries
# This has a separate timeout from `cargo nextest` since `timeout` expects
# to run an external command and therefore we cannot run bash functions or
# subshells.
ptime -m timeout 10m cargo xtask verify-libraries
fi

#
# We apply our own timeout to ensure that we get a normal failure on timeout
# rather than a buildomat timeout. See oxidecomputer/buildomat#8.
Expand Down
210 changes: 210 additions & 0 deletions .github/buildomat/jobs/a4x2-deploy.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,210 @@
#!/bin/bash
#:
#: name = "a4x2-deploy"
#: variety = "basic"
#: target = "lab-2.0-opte-0.27"
#: rust_toolchain = "stable"
#: output_rules = [
#: "/out/falcon/*.log",
#: "/out/falcon/*.err",
#: "/out/connectivity-report.json",
#: "/ci/out/*-sled-agent.log",
#: "/ci/out/*cockroach*.log",
#: "%/out/dhcp-server.log",
#: ]
#: skip_clone = true
#: enable = true
#:
#: [dependencies.a4x2]
#: job = "a4x2-prepare"

set -o errexit
set -o pipefail
set -o xtrace

pfexec mkdir -p /out
pfexec chown "$UID" /out

#
# If we fail, try to collect some debugging information
#
_exit_trap() {
local status=$?
[[ $status -eq 0 ]] && exit 0

set +o errexit

df -h

# show what services have issues
for gimlet in g0 g1 g2 g3; do
./a4x2 exec $gimlet "svcs -xvZ"
done

mkdir -p /out/falcon
cp .falcon/* /out/falcon/
for x in ce cr1 cr2 g0 g1 g2 g3; do
mv /out/falcon/$x.out /out/falcon/$x.log
done
cp connectivity-report.json /out/

mkdir -p /ci/out

for gimlet in g0 g1 g2 g3; do
./a4x2 exec \
$gimlet \
"cat /var/svc/log/oxide-sled-agent:default.log" > \
/ci/out/$gimlet-sled-agent.log
done

# collect cockroachdb logs
mkdir -p /ci/log
for gimlet in g0 g1 g2 g3; do
./a4x2 exec $gimlet 'cat /pool/ext/*/crypt/zone/oxz_cockroachdb*/root/data/logs/cockroach.log' > \
/ci/out/$gimlet-cockroach.log

./a4x2 exec $gimlet 'cat /pool/ext/*/crypt/zone/oxz_cockroachdb*/root/data/logs/cockroach-stderr.log' > \
/ci/out/$gimlet-cockroach-stderr.log

./a4x2 exec $gimlet 'cat /pool/ext/*/crypt/zone/oxz_cockroachdb*/root/data/logs/cockroach-health.log' > \
/ci/out/$gimlet-cockroach-health.log

./a4x2 exec $gimlet 'cat /pool/ext/*/crypt/zone/oxz_cockroachdb*/root/var/svc/log/oxide-cockroachdb:default.log*' > \
/ci/out/$gimlet-oxide-cockroachdb.log
done
}
trap _exit_trap EXIT

#
# Install propolis
#
curl -fOL https://buildomat.eng.oxide.computer/wg/0/artefact/01HJ4BJJY2Q9EKXHYV6HQZ8XPN/qQS2fnkS9LebcL4cDLeHRWdleSiXaGKEXGLDucRoab8pwBSi/01HJ4BJY5F995ET252YSD4NJWV/01HJ4CGFH946THBF0ZRH6SRM8X/propolis-server
chmod +x propolis-server
pfexec mv propolis-server /usr/bin/

#
# Make space for CI work
#
export DISK=${DISK:-c1t1d0}
pfexec diskinfo
pfexec zpool create -o ashift=12 -f cpool $DISK
pfexec zfs create -o mountpoint=/ci cpool/ci

if [[ $(curl -s http://catacomb.eng.oxide.computer:12346/trim-me) =~ "true" ]]; then
pfexec zpool trim cpool
while [[ ! $(zpool status -t cpool) =~ "100%" ]]; do sleep 10; done
fi

pfexec chown "$UID" /ci
cd /ci

#
# Fetch and decompress the cargo bay from the a4x2-prepeare job
#
for x in ce cr1 cr2 omicron-common g0 g1 g2 g3 tools; do
tar -xvzf /input/a4x2/out/cargo-bay-$x.tgz
done

for sled in g0 g1 g2 g3; do
cp -r cargo-bay/omicron-common/omicron/out/* cargo-bay/$sled/omicron/out/
done
ls -R

#
# Fetch the a4x2 topology manager program
#
buildomat_url=https://buildomat.eng.oxide.computer
testbed_artifact_path=public/file/oxidecomputer/testbed/topo/
testbed_rev=67454d38958bcf51830850aec36600df84b7d8a0
curl -fOL $buildomat_url/$testbed_artifact_path/$testbed_rev/a4x2
chmod +x a4x2

#
# Create a zpool for falcon images and disks
#

#
# Install falcon base images
#
export FALCON_DATASET=cpool/falcon
images="debian-11.0_0 helios-2.0_0"
for img in $images; do
file=$img.raw.xz
curl -OL http://catacomb.eng.oxide.computer:12346/falcon/$file
unxz --keep -T 0 $file

file=$img.raw
name=${img%_*}
fsize=`ls -l $img.raw | awk '{print $5}'`
let vsize=(fsize + 4096 - size%4096)

pfexec zfs create -p -V $vsize -o volblocksize=4k "$FALCON_DATASET/img/$name"
pfexec dd if=$img.raw of="/dev/zvol/rdsk/$FALCON_DATASET/img/$name" bs=1024k status=progress
pfexec zfs snapshot "$FALCON_DATASET/img/$name@base"
done

#
# Install OVMF
#
curl -fOL http://catacomb.eng.oxide.computer:12346/falcon/OVMF_CODE.fd
pfexec mkdir -p /var/ovmf
pfexec cp OVMF_CODE.fd /var/ovmf/OVMF_CODE.fd

#
# Fetch the arista image
#
curl -fOL http://catacomb.eng.oxide.computer:12346/falcon/arista.gz.xz
unxz arista.gz.xz
pfexec zfs receive cpool/falcon/img/arista@base < arista.gz

#
# Run the VM dhcp server
#
export EXT_INTERFACE=${EXT_INTERFACE:-igb0}

cp /input/a4x2/out/dhcp-server .
chmod +x dhcp-server
first=`bmat address ls -f extra -Ho first`
last=`bmat address ls -f extra -Ho last`
gw=`bmat address ls -f extra -Ho gateway`
server=`ipadm show-addr $EXT_INTERFACE/dhcp -po ADDR | sed 's#/.*##g'`
pfexec ./dhcp-server $first $last $gw $server &> /out/dhcp-server.log &

#
# Run the topology
#
pfexec ./a4x2 launch

#
# Add a route to the rack ip pool
#

# Get the DHCP address for the external interface of the customer edge VM. This
# VM interface is attached to the host machine's external interface via viona.
customer_edge_addr=$(./a4x2 exec ce \
"ip -4 -j addr show enp0s10 | jq -r '.[0].addr_info[] | select(.dynamic == true) | .local'")

# Add the route to the rack via the customer edge VM
pfexec dladm
pfexec ipadm
pfexec netstat -nr
pfexec route add 198.51.100.0/24 $customer_edge_addr

#
# Run the communications test program
#
# TODO tighten up packet loss tolerance. For now it's more or less ok for it to
# just run with _some_ comms. The program will fail if there are no comms to a
# given sled.
cp /input/a4x2/out/commtest .
chmod +x commtest
NO_COLOR=1 pfexec ./commtest \
--api-timeout 30m \
http://198.51.100.23 run \
--ip-pool-begin 198.51.100.40 \
--ip-pool-end 198.51.100.70 \
--icmp-loss-tolerance 500 \
--test-duration 200s \
--packet-rate 10

cp connectivity-report.json /out/
94 changes: 94 additions & 0 deletions .github/buildomat/jobs/a4x2-prepare.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
#!/bin/bash
#:
#: name = "a4x2-prepare"
#: variety = "basic"
#: target = "helios-2.0"
#: rust_toolchain = "stable"
#: output_rules = [
#: "=/out/cargo-bay-ce.tgz",
#: "=/out/cargo-bay-cr1.tgz",
#: "=/out/cargo-bay-cr2.tgz",
#: "=/out/cargo-bay-g0.tgz",
#: "=/out/cargo-bay-g1.tgz",
#: "=/out/cargo-bay-g2.tgz",
#: "=/out/cargo-bay-g3.tgz",
#: "=/out/cargo-bay-tools.tgz",
#: "=/out/cargo-bay-omicron-common.tgz",
#: "=/out/commtest",
#: "=/out/dhcp-server",
#: ]
#: access_repos = [
#: "oxidecomputer/testbed",
#: ]
#: enable = true

source ./env.sh

set -o errexit
set -o pipefail
set -o xtrace

pfexec mkdir -p /out
pfexec chown "$UID" /out

#
# Prep to build omicron
#
banner "prerequisites"
set -o xtrace
./tools/install_builder_prerequisites.sh -y

#
# Build the commtest program and place in the output
#
banner "commtest"
cargo build -p end-to-end-tests --bin commtest --bin dhcp-server --release
cp target/release/commtest /out/
cp target/release/dhcp-server /out/

#
# Clone the testbed repo
#
banner "testbed"
cd /work/oxidecomputer
rm -rf testbed
git clone https://github.com/oxidecomputer/testbed
cd testbed/a4x2

#
# Build the a4x2 cargo bay using the omicron sources in this branch, fetch the
# softnpu artifacts into the cargo bay, zip up the cargo bay and place it in the
# output.
#
OMICRON=/work/oxidecomputer/omicron ./config/build-packages.sh

# Create an omicron archive that captures common assets

pushd cargo-bay
mkdir -p omicron-common/omicron/
cp -r g0/omicron/out omicron-common/omicron/
# sled agent, gateway and switch archives are sled-specific
rm omicron-common/omicron/out/omicron-sled-agent.tar
rm omicron-common/omicron/out/omicron-gateway*
rm omicron-common/omicron/out/switch-softnpu.tar.gz
popd

# Remove everything in $sled/omicron/out except sled-agent, mgs (gateway), and
# switch tar archives, these common elements are in the omicron-common archive
for sled in g0 g1 g2 g3; do
find cargo-bay/$sled/omicron/out/ -maxdepth 1 -mindepth 1 \
| grep -v sled-agent \
| grep -v omicron-gateway \
| grep -v switch-softnpu \
| xargs -l rm -rf
done

# Put the softnpu artifacts in place.
./config/fetch-softnpu-artifacts.sh

# Archive everything up and place it in the output
for x in ce cr1 cr2 g0 g1 g2 g3 tools omicron-common; do
tar -czf cargo-bay-$x.tgz cargo-bay/$x
mv cargo-bay-$x.tgz /out/
done

Loading

0 comments on commit e2817bb

Please sign in to comment.