feat(metrics): dockerized grafana dashboard to visualize node metrics #3568
Workflow file for this run
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: Check before merge | |
on: | |
# tests must run for a PR to be valid and pass merge queue muster | |
# on main, we want to know that all commits are passing at a glance, any deviation should help bisecting errors | |
# the merge run checks should show on master and enable this clear test/passing history | |
merge_group: | |
branches: [main] | |
pull_request: | |
branches: ["*"] | |
env: | |
CARGO_INCREMENTAL: 0 # bookkeeping for incremental builds has overhead, not useful in CI. | |
jobs: | |
cargo-udeps: | |
if: "!startsWith(github.event.head_commit.message, 'chore(release):')" | |
name: Unused dependency check | |
runs-on: ubuntu-latest | |
steps: | |
- uses: actions/checkout@v3 | |
- name: Install Rust | |
uses: dtolnay/rust-toolchain@stable | |
with: | |
#Needs nightly to distinguish between deps of different versions | |
toolchain: nightly | |
# we need rustfmt here while we have a build step | |
components: rustfmt | |
- name: Install cargo-udeps | |
# TODO: (2023-07-03) This is a non-released version from udeps, which includes a bug fix: | |
# https://github.com/est31/cargo-udeps/issues/180 | |
# Change this when this is released. | |
run: cargo install --git=https://github.com/est31/cargo-udeps.git --rev=f7a4705 --locked | |
- name: Run cargo-udeps | |
run: cargo +nightly udeps --all-targets | |
lint: | |
runs-on: ubuntu-latest | |
env: | |
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} | |
steps: | |
- uses: actions/checkout@v3 | |
with: | |
fetch-depth: 0 | |
- uses: wagoid/commitlint-github-action@f114310111fdbd07e99f47f9ca13d62b3ec98372 | |
checks: | |
if: "!startsWith(github.event.head_commit.message, 'chore(release):')" | |
name: various checks | |
runs-on: ubuntu-latest | |
steps: | |
- uses: actions/checkout@v3 | |
- name: Install Rust | |
uses: dtolnay/rust-toolchain@stable | |
with: | |
toolchain: stable | |
components: rustfmt, clippy | |
- uses: Swatinem/rust-cache@v2 | |
- name: Check formatting | |
run: cargo fmt --all -- --check | |
- shell: bash | |
run: cargo clippy --all-targets --all-features -- -Dwarnings | |
- name: Check documentation | |
# Deny certain `rustdoc` lints that are unwanted. | |
# See https://doc.rust-lang.org/rustdoc/lints.html for lints that are 'warning' by default. | |
run: RUSTDOCFLAGS="--deny=warnings" cargo doc --no-deps | |
- name: Check local-discovery is not a default feature | |
shell: bash | |
run: if [[ ! $(cargo metadata --no-deps --format-version 1 | jq -r '.packages[].features.default[]? | select(. == "local-discovery")') ]]; then echo "local-discovery is not a default feature in any package."; else echo "local-discovery is a default feature in at least one package." && exit 1; fi | |
- name: Check the whole workspace can build | |
run: cargo build --all-targets --all-features | |
unit: | |
if: "!startsWith(github.event.head_commit.message, 'chore(release):')" | |
name: Unit Tests | |
runs-on: ${{ matrix.os }} | |
strategy: | |
matrix: | |
os: [ubuntu-latest, windows-latest, macos-latest] | |
steps: | |
- uses: actions/checkout@v3 | |
- name: Install Rust | |
uses: dtolnay/rust-toolchain@stable | |
with: | |
toolchain: stable | |
- uses: Swatinem/rust-cache@v2 | |
- name: Run testnet tests | |
timeout-minutes: 25 | |
run: cargo test --release --package sn_testnet | |
- name: Run network tests | |
timeout-minutes: 25 | |
run: cargo test --release --package sn_networking | |
- name: Run protocol tests | |
timeout-minutes: 25 | |
run: cargo test --release --package sn_protocol | |
- name: Run transfers tests | |
timeout-minutes: 25 | |
run: cargo test --release --package sn_transfers | |
- name: Run register tests | |
timeout-minutes: 25 | |
run: cargo test --release --package sn_registers | |
env: | |
# this will speed up PR merge flows, while giving us a modicum | |
# of proptesting | |
# we do many more runs on the nightly run | |
PROPTEST_CASES: 50 | |
e2e: | |
if: "!startsWith(github.event.head_commit.message, 'chore(release):')" | |
name: E2E tests | |
runs-on: ${{ matrix.os }} | |
strategy: | |
matrix: | |
os: [ubuntu-latest, windows-latest, macos-latest] | |
steps: | |
- uses: actions/checkout@v3 | |
- name: Install Rust | |
uses: dtolnay/rust-toolchain@stable | |
with: | |
toolchain: stable | |
- uses: Swatinem/rust-cache@v2 | |
- name: Build node and client | |
run: cargo build --release --bin safenode --bin safe --bin faucet | |
timeout-minutes: 30 | |
- name: Start a local network | |
uses: maidsafe/sn-local-testnet-action@main | |
with: | |
action: start | |
interval: 2000 | |
node-path: target/release/safenode | |
faucet-path: target/release/faucet | |
platform: ${{ matrix.os }} | |
- name: Check SAFE_PEERS was set | |
shell: bash | |
run: | | |
if [[ -z "$SAFE_PEERS" ]]; then | |
echo "The SAFE_PEERS variable has not been set" | |
exit 1 | |
else | |
echo "SAFE_PEERS has been set to $SAFE_PEERS" | |
fi | |
- name: Create and fund a wallet to pay for files storage | |
run: | | |
cargo run --bin faucet --release -- --log-output-dest=data-dir send 1000000 $(cargo run --bin safe --release -- --log-output-dest=data-dir wallet address | tail -n 1) | tail -n 1 > transfer_hex | |
cargo run --bin safe --release -- --log-output-dest=data-dir wallet receive --file transfer_hex | |
env: | |
SN_LOG: "all" | |
timeout-minutes: 5 | |
- name: Start a client to pay for files storage | |
run: cargo run --bin safe --release -- --log-output-dest=data-dir wallet pay "./resources" | |
env: | |
SN_LOG: "all" | |
timeout-minutes: 10 | |
- name: Start a client to upload files | |
run: cargo run --bin safe --release -- --log-output-dest=data-dir files upload -- "./resources" | |
env: | |
SN_LOG: "all" | |
timeout-minutes: 15 | |
- name: Start a client to download files | |
run: cargo run --bin safe --release -- --log-output-dest=data-dir files download | |
env: | |
SN_LOG: "all" | |
timeout-minutes: 2 | |
- name: Start a client to create a register | |
run: cargo run --bin safe --release -- --log-output-dest=data-dir register create baobao | |
env: | |
SN_LOG: "all" | |
timeout-minutes: 10 | |
- name: Start a client to get a register | |
run: cargo run --bin safe --release -- --log-output-dest=data-dir register get -n baobao | |
env: | |
SN_LOG: "all" | |
timeout-minutes: 2 | |
- name: Start a client to edit a register | |
run: cargo run --bin safe --release -- --log-output-dest=data-dir register edit -n baobao wood | |
env: | |
SN_LOG: "all" | |
timeout-minutes: 10 | |
- name: Stop the local network and upload logs | |
if: always() | |
uses: maidsafe/sn-local-testnet-action@main | |
with: | |
action: stop | |
log_file_prefix: safe_test_logs_e2e | |
platform: ${{ matrix.os }} | |
gossipsub: | |
if: "!startsWith(github.event.head_commit.message, 'chore(release):')" | |
name: Gossipsub E2E tests | |
runs-on: ${{ matrix.os }} | |
strategy: | |
matrix: | |
os: [ubuntu-latest, windows-latest, macos-latest] | |
steps: | |
- uses: actions/checkout@v3 | |
- name: Install Rust | |
uses: dtolnay/rust-toolchain@stable | |
with: | |
toolchain: stable | |
- uses: Swatinem/rust-cache@v2 | |
- name: Build node | |
run: cargo build --release --bin safenode | |
timeout-minutes: 30 | |
- name: Build gossipsub testing executable | |
run: cargo test --release -p sn_node --features=local-discovery --test msgs_over_gossipsub --no-run | |
timeout-minutes: 30 | |
- name: Start a local network | |
uses: maidsafe/sn-local-testnet-action@main | |
with: | |
action: start | |
interval: 2000 | |
node-path: target/release/safenode | |
faucet-path: target/release/faucet | |
platform: ${{ matrix.os }} | |
- name: Gossipsub - nodes to subscribe to topics, and publish messages | |
run: cargo test --release -p sn_node --features local-discovery msgs_over_gossipsub -- --nocapture | |
timeout-minutes: 20 | |
- name: Stop the local network and upload logs | |
if: always() | |
uses: maidsafe/sn-local-testnet-action@main | |
with: | |
action: stop | |
log_file_prefix: safe_test_logs_e2e | |
platform: ${{ matrix.os }} | |
spend_test: | |
if: "!startsWith(github.event.head_commit.message, 'chore(release):')" | |
name: spend tests against network | |
runs-on: ${{ matrix.os }} | |
strategy: | |
matrix: | |
os: [ubuntu-latest, windows-latest, macos-latest] | |
steps: | |
- uses: actions/checkout@v3 | |
- name: Install Rust | |
uses: dtolnay/rust-toolchain@stable | |
with: | |
toolchain: stable | |
- uses: Swatinem/rust-cache@v2 | |
- name: Build safe bins | |
run: cargo build --release --features local-discovery --bin safenode --bin faucet | |
timeout-minutes: 30 | |
- name: Build testing executable | |
run: cargo test --release -p sn_node --features=local-discovery --test sequential_transfers --test storage_payments --test nodes_rewards --no-run | |
timeout-minutes: 30 | |
- name: Start a local network | |
uses: maidsafe/sn-local-testnet-action@main | |
with: | |
action: start | |
interval: 2000 | |
node-path: target/release/safenode | |
faucet-path: target/release/faucet | |
platform: ${{ matrix.os }} | |
- name: Check SAFE_PEERS was set | |
shell: bash | |
run: | | |
if [[ -z "$SAFE_PEERS" ]]; then | |
echo "The SAFE_PEERS variable has not been set" | |
exit 1 | |
else | |
echo "SAFE_PEERS has been set to $SAFE_PEERS" | |
fi | |
# This should be first to avoid slow reward acceptance etc | |
- name: execute the nodes rewards tests | |
run: cargo test --release -p sn_node --features="local-discovery" --test nodes_rewards -- --nocapture --test-threads=1 | |
env: | |
SN_LOG: "all" | |
timeout-minutes: 25 | |
- name: execute the spend tests | |
run: cargo test --release -p sn_node --features="local-discovery" --test sequential_transfers -- --nocapture | |
env: | |
SN_LOG: "all" | |
timeout-minutes: 25 | |
- name: execute the storage payment tests | |
run: cargo test --release -p sn_node --features="local-discovery" --test storage_payments -- --nocapture --test-threads=1 | |
env: | |
SN_LOG: "all" | |
timeout-minutes: 25 | |
- name: Stop the local network and upload logs | |
if: always() | |
uses: maidsafe/sn-local-testnet-action@main | |
with: | |
action: stop | |
log_file_prefix: safe_test_logs_spend | |
platform: ${{ matrix.os }} | |
churn: | |
if: "!startsWith(github.event.head_commit.message, 'chore(release):')" | |
name: Network churning tests | |
runs-on: ${{ matrix.os }} | |
strategy: | |
matrix: | |
include: | |
- os: ubuntu-latest | |
node_data_path: /home/runner/.local/share/safe/node | |
- os: windows-latest | |
node_data_path: C:\\Users\\runneradmin\\AppData\\Roaming\\safe\\node | |
- os: macos-latest | |
node_data_path: /Users/runner/Library/Application Support/safe/node | |
steps: | |
- uses: actions/checkout@v3 | |
- uses: dtolnay/rust-toolchain@stable | |
with: | |
toolchain: stable | |
- uses: Swatinem/rust-cache@v2 | |
- name: Build safe bins | |
run: cargo build --release --features local-discovery --bin safenode --bin faucet | |
timeout-minutes: 30 | |
- name: Build churn tests | |
run: cargo test --release -p sn_node --features=local-discovery --test data_with_churn --no-run | |
timeout-minutes: 30 | |
- name: Start a local network | |
uses: maidsafe/sn-local-testnet-action@main | |
with: | |
action: start | |
interval: 2000 | |
node-path: target/release/safenode | |
faucet-path: target/release/faucet | |
platform: ${{ matrix.os }} | |
- name: Check SAFE_PEERS was set | |
shell: bash | |
run: | | |
if [[ -z "$SAFE_PEERS" ]]; then | |
echo "The SAFE_PEERS variable has not been set" | |
exit 1 | |
else | |
echo "SAFE_PEERS has been set to $SAFE_PEERS" | |
fi | |
- name: Chunks data integrity during nodes churn | |
run: cargo test --release -p sn_node --features="local-discovery" --test data_with_churn -- --nocapture | |
env: | |
TEST_DURATION_MINS: 5 | |
TEST_TOTAL_CHURN_CYCLES: 15 | |
SN_LOG: "all" | |
timeout-minutes: 30 | |
- name: Verify restart of nodes using rg | |
shell: bash | |
timeout-minutes: 1 | |
# get the counts, then the specific line, and then the digit count only | |
# then check we have an expected level of restarts | |
# TODO: make this use an env var, or relate to testnet size | |
run : | | |
restart_count=$(rg "Node is restarting in" "${{ matrix.node_data_path }}" -c --stats | \ | |
rg "(\d+) matches" | rg "\d+" -o) | |
echo "Restart $restart_count nodes" | |
peer_removed=$(rg "PeerRemovedFromRoutingTable" "${{ matrix.node_data_path }}" -c --stats | \ | |
rg "(\d+) matches" | rg "\d+" -o) | |
echo "PeerRemovedFromRoutingTable $peer_removed times" | |
if [ $peer_removed -lt $restart_count ]; then | |
echo "PeerRemovedFromRoutingTable times of: $peer_removed is less than the restart count of: $restart_count" | |
exit 1 | |
fi | |
node_count=$(ls "${{ matrix.node_data_path }}" | wc -l) | |
echo "Node dir count is $node_count" | |
# TODO: reenable this once the testnet dir creation is tidied up to avoid a large count here | |
# if [ $restart_count -lt $node_count ]; then | |
# echo "Restart count of: $restart_count is less than the node count of: $node_count" | |
# exit 1 | |
# fi | |
- name: Verify data replication using rg | |
shell: bash | |
timeout-minutes: 1 | |
# get the counts, then the specific line, and then the digit count only | |
# then check we have an expected level of replication | |
# TODO: make this use an env var, or relate to testnet size | |
run : | | |
fetching_attempt_count=$(rg "FetchingKeysForReplication" "${{ matrix.node_data_path }}" -c --stats | \ | |
rg "(\d+) matches" | rg "\d+" -o) | |
echo "Carried out $fetching_attempt_count fetching attempts" | |
node_count=$(ls "${{ matrix.node_data_path }}" | wc -l) | |
if [ $fetching_attempt_count -lt $node_count ]; then | |
echo "Replication fetching attempts of: $fetching_attempt_count is less than the node count of: $node_count" | |
exit 1 | |
fi | |
- name: Stop the local network and upload logs | |
if: always() | |
uses: maidsafe/sn-local-testnet-action@main | |
with: | |
action: stop | |
log_file_prefix: safe_test_logs_churn | |
platform: ${{ matrix.os }} | |
verify_data_location: | |
if: "!startsWith(github.event.head_commit.message, 'chore(release):')" | |
name: Verify data location | |
runs-on: ${{ matrix.os }} | |
strategy: | |
matrix: | |
include: | |
- os: ubuntu-latest | |
node_data_path: /home/runner/.local/share/safe/node | |
- os: windows-latest | |
node_data_path: C:\\Users\\runneradmin\\AppData\\Roaming\\safe\\node | |
- os: macos-latest | |
node_data_path: /Users/runner/Library/Application Support/safe/node | |
steps: | |
- uses: actions/checkout@v3 | |
- uses: dtolnay/rust-toolchain@stable | |
with: | |
toolchain: stable | |
- uses: Swatinem/rust-cache@v2 | |
- name: Build safe bins | |
run: cargo build --release --features local-discovery --bin safenode --bin faucet | |
timeout-minutes: 30 | |
- name: Build data location test | |
run: cargo test --release -p sn_node --features=local-discovery --test verify_data_location --no-run | |
timeout-minutes: 30 | |
- name: Start a local network | |
uses: maidsafe/sn-local-testnet-action@main | |
with: | |
action: start | |
interval: 2000 | |
node-path: target/release/safenode | |
faucet-path: target/release/faucet | |
platform: ${{ matrix.os }} | |
- name: Check SAFE_PEERS was set | |
shell: bash | |
run: | | |
if [[ -z "$SAFE_PEERS" ]]; then | |
echo "The SAFE_PEERS variable has not been set" | |
exit 1 | |
else | |
echo "SAFE_PEERS has been set to $SAFE_PEERS" | |
fi | |
- name: Verify the location of the data on the network (4 * 5 mins) | |
run: cargo test --release -p sn_node --features="local-discovery" --test verify_data_location -- --nocapture | |
env: | |
CHURN_COUNT: 3 | |
SN_LOG: "all" | |
timeout-minutes: 30 | |
- name: Verify restart of nodes using rg | |
shell: bash | |
timeout-minutes: 1 | |
# get the counts, then the specific line, and then the digit count only | |
# then check we have an expected level of restarts | |
# TODO: make this use an env var, or relate to testnet size | |
run : | | |
restart_count=$(rg "Node is restarting in" "${{ matrix.node_data_path }}" -c --stats | \ | |
rg "(\d+) matches" | rg "\d+" -o) | |
echo "Restart $restart_count nodes" | |
peer_removed=$(rg "PeerRemovedFromRoutingTable" "${{ matrix.node_data_path }}" -c --stats | \ | |
rg "(\d+) matches" | rg "\d+" -o) | |
echo "PeerRemovedFromRoutingTable $peer_removed times" | |
if [ $peer_removed -lt $restart_count ]; then | |
echo "PeerRemovedFromRoutingTable times of: $peer_removed is less than the restart count of: $restart_count" | |
exit 1 | |
fi | |
node_count=$(ls "${{ matrix.node_data_path }}" | wc -l) | |
echo "Node dir count is $node_count" | |
- name: Stop the local network and upload logs | |
if: always() | |
uses: maidsafe/sn-local-testnet-action@main | |
with: | |
action: stop | |
log_file_prefix: safe_test_logs_data_location | |
platform: ${{ matrix.os }} |