diff --git a/debugging/test-gateways.sh b/debugging/test-gateways.sh index b5b4bf1..50a7ca0 100755 --- a/debugging/test-gateways.sh +++ b/debugging/test-gateways.sh @@ -1,5 +1,20 @@ #!/usr/bin/env bash +EXIT_CODE=0 +cleanup_gateway_test_called=false +cleanup_gateway_test() { + if [ "$cleanup_gateway_test_called" = true ]; then + echo "cleanup_gateway_test already called" + return + fi + echo "cleanup_gateway_test called" + cleanup_gateway_test_called=true + + exit $EXIT_CODE +} + +trap cleanup_gateway_test EXIT + # Query all endpoints until failure # This script is intended to be run from the root of the helia-http-gateway repository @@ -11,7 +26,10 @@ if ! nc -z localhost $PORT; then fi ensure_gateway_running() { - npx wait-on "tcp:$PORT" -t 1000 || exit 1 + npx wait-on "tcp:$PORT" -t 1000 || { + EXIT_CODE=1 + cleanup_gateway_test + } } # Use the first argument to this script (if any) as the maximum timeout for curl @@ -20,7 +38,7 @@ test_website() { ensure_gateway_running local website=$1 echo "Requesting $website" - curl -m $max_timeout -s --no-progress-meter -o /dev/null -w "%{url}: HTTP_%{http_code} in %{time_total} seconds (TTFB: %{time_starttransfer}, rediect: %{time_redirect})\n" -L $website + curl -m $max_timeout -s --no-progress-meter -o /dev/null -w "%{url}: HTTP_%{http_code} in %{time_total} seconds (TTFB: %{time_starttransfer}, redirect: %{time_redirect})\n" -L $website echo "running GC" curl -X POST -m $max_timeout -s --no-progress-meter -o /dev/null -w "%{url}: HTTP_%{http_code} in %{time_total} seconds\n" http://localhost:$PORT/api/v0/repo/gc } @@ -35,7 +53,7 @@ test_website http://localhost:$PORT/ipns/docs.ipfs.tech test_website http://localhost:$PORT/ipns/docs.libp2p.io -test_website http://localhost:$PORT/ipns/drand.love +# test_website http://localhost:$PORT/ipns/drand.love #drand.love is not publishing dnslink records test_website http://localhost:$PORT/ipns/fil.org @@ -65,3 +83,15 @@ test_website http://localhost:$PORT/ipns/specs.ipfs.tech test_website http://localhost:$PORT/ipns/saturn.tech test_website http://localhost:$PORT/ipns/web3.storage + +test_website http://localhost:$PORT/ipfs/bafkreiezuss4xkt5gu256vjccx7vocoksxk77vwmdrpwoumfbbxcy2zowq # stock images 3 sec skateboarder video + +test_website http://localhost:$PORT/ipfs/bafybeidsp6fva53dexzjycntiucts57ftecajcn5omzfgjx57pqfy3kwbq # big buck bunny + +test_website http://localhost:$PORT/ipfs/bafybeiaysi4s6lnjev27ln5icwm6tueaw2vdykrtjkwiphwekaywqhcjze # wikipedia + +test_website http://localhost:$PORT/ipfs/bafybeifaiclxh6pc3bdtrrkpbvvqqxq6hz5r6htdzxaga4fikfpu2u56qi # uniswap interface + +test_website http://localhost:$PORT/ipfs/bafybeiae366charqmeewxags5b2jxtkhfmqyyagvqhrr5l7l7xfpp5ikpa # cid.ipfs.tech + +test_website http://localhost:$PORT/ipfs/bafybeiedlhslivmuj2iinnpd24ulx3fyd7cjenddbkeoxbf3snjiz3npda # docs.ipfs.tech diff --git a/debugging/time-permutations.sh b/debugging/time-permutations.sh new file mode 100755 index 0000000..e7ef141 --- /dev/null +++ b/debugging/time-permutations.sh @@ -0,0 +1,153 @@ +#!/usr/bin/env bash + +### +# This script is intended to be run from the root of the helia-http-gateway repository +# It will run the gateway with different configurations and measure the time it takes to run +# The results will be written to a CSV file, and runs that fail prior to the timeout are considered failed runs. +# +# Use like `./debugging/time-permutations.sh 30s 100` to execute 100 iterations of all permutations, where each permutation is run for a maximum of 30 seconds +# This command can be run to ensure that until-death is properly cleaning up after itself (starting/stopping the gateway) +# +# Realistically, you should be running something like `./debugging/time-permutations.sh 15m 100` to get some logs of failure cases like those investigated in https://github.com/ipfs/helia-http-gateway/issues/18 +# + +# globals.. same for all configurations +# export DEBUG="helia*,helia*:trace,libp2p*,libp2p*:trace" +export DEBUG="*,*:trace" +unset FASTIFY_DEBUG +export PORT=8080 +export HOST="0.0.0.0" +export ECHO_HEADERS=false +export METRICS=true +export USE_TRUSTLESS_GATEWAYS=false # always set to false since helia-dr and helia-all are the failing cases. +export USE_BITSWAP=true # needs to be true to be able to fetch content without USE_TRUSTLESS_GATEWAYS +export ALLOW_UNHANDLED_ERROR_RECOVERY=false +unset DELEGATED_ROUTING_V1_HOST +unset TRUSTLESS_GATEWAYS + +unset_all() { + unset USE_SUBDOMAINS + # unset USE_BITSWAP + # unset USE_TRUSTLESS_GATEWAYS + unset USE_LIBP2P + unset USE_DELEGATED_ROUTING + unset FILE_DATASTORE_PATH + unset FILE_BLOCKSTORE_PATH +} + +max_time=${1:-30m} +max_iterations=${2:-10} + +mkdir -p permutation-logs +rm -rf permutation-logs/* + +# Results file +results_file="results.csv" +echo "USE_SUBDOMAINS,USE_BITSWAP,USE_TRUSTLESS_GATEWAYS,USE_LIBP2P,USE_DELEGATED_ROUTING,time(max=${max_time}),successful_run" > $results_file + +run_test() { + + npx wait-on "tcp:$PORT" -t 10000 -r # wait for the port to be released + + config_id="USE_SUBDOMAINS=$USE_SUBDOMAINS,USE_BITSWAP=$USE_BITSWAP,USE_TRUSTLESS_GATEWAYS=$USE_TRUSTLESS_GATEWAYS,USE_LIBP2P=$USE_LIBP2P,USE_DELEGATED_ROUTING=$USE_DELEGATED_ROUTING" + # if we cannot get any data, we should skip this run.. we need at least USE_BITSWAP enabled, plus either USE_LIBP2P or USE_DELEGATED_ROUTING + if [ "$USE_BITSWAP" = false ]; then + echo "Skipping test for configuration: $config_id" + return + fi + # TODO: we should also allow USE_TRUSTLESS_GATEWAYS=true, but we need to fix the issue with helia-dr and helia-all first + if [ "$USE_LIBP2P" = false ] && [ "$USE_DELEGATED_ROUTING" = false ]; then + echo "Skipping test for configuration: $config_id" + return + fi + echo "Running test for configuration: $config_id" + + rm -f time_info_pipe + mkfifo time_info_pipe + + # log file with config_id and timestamp + run_log_file="permutation-logs/${USE_SUBDOMAINS}-${USE_BITSWAP}-${USE_TRUSTLESS_GATEWAYS}-${USE_LIBP2P}-${USE_DELEGATED_ROUTING}+$(date +%Y-%m-%d%H:%M:%S.%3N).log" + + # This is complicated, but we need to run the command in a subshell to be able to kill it if it takes too long, and also to get the timing information + (timeout --signal=SIGTERM ${max_time} bash -c "time (./debugging/until-death.sh 2 &>${run_log_file})" 2>&1) &> time_info_pipe & + subshell_pid=$! + + # Wait for the process to complete and get the timing information + time_output=$(cat time_info_pipe) + wait $subshell_pid + exit_status=$? # get the exit status of the subshell + + # remove the fifo + rm time_info_pipe + was_successful=false + if [ $exit_status -eq 124 ]; then + echo "timeout occurred... (SUCCESSFUL RUN)" + was_successful=true + real_time="${max_time}" + # remove the log file because the test didn't fail before the timeout + rm $run_log_file + else + echo "no timeout occurred...(FAILED RUN)" + was_successful=false + + real_time=$(echo "$time_output" | grep real | awk '{print $2}') + fi + + # Write to file + echo "$USE_SUBDOMAINS,$USE_BITSWAP,$USE_TRUSTLESS_GATEWAYS,$USE_LIBP2P,$USE_DELEGATED_ROUTING,$real_time,$was_successful" >> $results_file +} + +main() { + # Iterate over boolean values for a subset of environment variables + for USE_SUBDOMAINS_VAL in true false; do + # for USE_BITSWAP_VAL in true false; do + # for USE_TRUSTLESS_GATEWAYS_VAL in true false; do + for USE_LIBP2P_VAL in true false; do + for USE_DELEGATED_ROUTING_VAL in true false; do + unset_all + + # Export each variable + export USE_SUBDOMAINS=$USE_SUBDOMAINS_VAL + # export USE_BITSWAP=$USE_BITSWAP_VAL + # export USE_TRUSTLESS_GATEWAYS=$USE_TRUSTLESS_GATEWAYS_VAL + export USE_LIBP2P=$USE_LIBP2P_VAL + export USE_DELEGATED_ROUTING=$USE_DELEGATED_ROUTING_VAL + run_test + done + done + # done + # done + done +} + +cleanup_permutations_called=false +cleanup_permutations() { + if [ "$cleanup_permutations_called" = true ]; then + echo "cleanup_permutations already called" + return + fi + echo "cleanup_permutations called" + cleanup_permutations_called=true + + kill -s TERM $subshell_pid + echo "sent TERM signal to subshell" + wait $subshell_pid # wait for the process to exit + + npx wait-on "tcp:$PORT" -t 10000 -r # wait for the port to be released + + exit 1 +} + +trap cleanup_permutations SIGINT +trap cleanup_permutations SIGTERM + +npm run build +# Tell until-death.sh not build the gateway +export DEBUG_NO_BUILD=true + +for ((i = 1; i <= $max_iterations; i++)) +do + echo "Iteration $i" + main +done + diff --git a/debugging/until-death.sh b/debugging/until-death.sh index b622471..fce1d56 100755 --- a/debugging/until-death.sh +++ b/debugging/until-death.sh @@ -9,6 +9,46 @@ fi # You have to pass `DEBUG=" " to disable debugging when using this script` export DEBUG=${DEBUG:-"helia-http-gateway,helia-http-gateway:server,helia-http-gateway:*:helia-fetch"} export PORT=${PORT:-8080} +EXIT_CODE=0 + +cleanup_until_death_called=false +cleanup_until_death() { + if [ "$cleanup_until_death_called" = true ]; then + echo "cleanup_until_death_called already called" + return + fi + echo "cleanup_until_death called" + cleanup_until_death_called=true + if [ "$gateway_already_running" != true ]; then + lsof -i TCP:$PORT | grep LISTEN | awk '{print $2}' | xargs --no-run-if-empty kill -9 + + echo "waiting for the gateway to exit" + npx wait-on "tcp:$PORT" -t 10000 -r # wait for the port to be released + fi + + + exit $EXIT_CODE +} + +trap cleanup_until_death EXIT + +# Before starting, output all env vars that helia-http-gateway uses +echo "DEBUG=$DEBUG" +echo "FASTIFY_DEBUG=$FASTIFY_DEBUG" +echo "PORT=$PORT" +echo "HOST=$HOST" +echo "USE_SUBDOMAINS=$USE_SUBDOMAINS" +echo "METRICS=$METRICS" +echo "USE_BITSWAP=$USE_BITSWAP" +echo "USE_TRUSTLESS_GATEWAYS=$USE_TRUSTLESS_GATEWAYS" +echo "TRUSTLESS_GATEWAYS=$TRUSTLESS_GATEWAYS" +echo "USE_LIBP2P=$USE_LIBP2P" +echo "ECHO_HEADERS=$ECHO_HEADERS" +echo "USE_DELEGATED_ROUTING=$USE_DELEGATED_ROUTING" +echo "DELEGATED_ROUTING_V1_HOST=$DELEGATED_ROUTING_V1_HOST" +echo "FILE_DATASTORE_PATH=$FILE_DATASTORE_PATH" +echo "FILE_BLOCKSTORE_PATH=$FILE_BLOCKSTORE_PATH" +echo "ALLOW_UNHANDLED_ERROR_RECOVERY=$ALLOW_UNHANDLED_ERROR_RECOVERY" gateway_already_running=false if nc -z localhost $PORT; then @@ -21,55 +61,33 @@ start_gateway() { echo "gateway is already running" return fi - npm run build - + # if DEBUG_NO_BUILD is set, then we assume the gateway is already built + if [ "$DEBUG_NO_BUILD" != true ]; then + npm run build + fi + echo "starting gateway..." # npx clinic doctor --open=false -- node dist/src/index.js & - node dist/src/index.js & + (node --trace-warnings dist/src/index.js) & + process_id=$! # echo "process id: $!" + npx wait-on "tcp:$PORT" -t 10000 || { + EXIT_CODE=1 + cleanup_until_death + } } -start_gateway & process_pid=$! +start_gateway ensure_gateway_running() { - npx wait-on "tcp:$PORT" -t 5000 || exit 1 + npx wait-on "tcp:$PORT" -t 5000 || { + EXIT_CODE=1 + cleanup_until_death + } } - -cleanup_called=false -cleanup() { - if [ "$cleanup_called" = true ]; then - echo "cleanup already called" - return - fi - # kill $process_pid - # when we're done, ensure the process is killed by sending a SIGTEM - # kill -s SIGTERM $process_pid - # kill any process listening on $PORT - # fuser -k $PORT/tcp - # kill any process listening on $PORT with SIGTERM - - if [ "$gateway_already_running" != true ]; then - kill -s SIGINT $(lsof -i :$PORT -t) - return - fi - - exit 1 -} - -trap cleanup SIGINT -trap cleanup SIGTERM - -# if we get a non-zero exit code, we know the server is no longer listening -# we should also exit early after 4 loops -# iterations=0 -# max_loops=1 +max_timeout=${1:-15} while [ $? -ne 1 ]; do -# # iterations=$((iterations+1)) -# if [ $iterations -gt $max_loops ]; then -# echo "exiting after $max_loops loops" -# break -# fi ensure_gateway_running - ./debugging/test-gateways.sh 30 2>&1 | tee -a debugging/test-gateways.log + ./debugging/test-gateways.sh $max_timeout # 2>&1 | tee -a debugging/test-gateways.log done -cleanup +cleanup_until_death diff --git a/e2e-tests/smoketest.spec.ts b/e2e-tests/smoketest.spec.ts index b6a01d4..24617a5 100644 --- a/e2e-tests/smoketest.spec.ts +++ b/e2e-tests/smoketest.spec.ts @@ -3,15 +3,15 @@ import { PORT } from '../src/constants.js' // test all the same pages listed at https://probelab.io/websites/ const pages = [ - // 'blog.ipfs.tech', // timing out + 'blog.ipfs.tech', 'blog.libp2p.io', 'consensuslab.world', 'docs.ipfs.tech', 'docs.libp2p.io', // 'drand.love', // no dnsaddr or dnslink TXT record, only "x-ipfs-path" header (supported only by ipfs-companion and brave) - // 'fil.org', // timing out - // 'filecoin.io', // timing out - // 'green.filecoin.io', // timing out + 'fil.org', + 'filecoin.io', + 'green.filecoin.io', 'ipfs.tech', 'ipld.io', 'libp2p.io', @@ -21,9 +21,9 @@ const pages = [ 'research.protocol.ai', 'singularity.storage', 'specs.ipfs.tech', - // 'strn.network' // redirects to saturn.tech - 'saturn.tech' - // 'web3.storage' // timing out + // 'strn.network', // redirects to saturn.tech + 'saturn.tech', + 'web3.storage' ] // increase default test timeout to 2 minutes diff --git a/playwright.config.ts b/playwright.config.ts index fd383bf..c829324 100644 --- a/playwright.config.ts +++ b/playwright.config.ts @@ -64,10 +64,11 @@ export default defineConfig({ // Tiros does not re-use the existing server. reuseExistingServer: process.env.CI == null, env: { + METRICS: process.env.METRICS ?? 'false', DEBUG: process.env.DEBUG ?? ' ', // we save to the filesystem so github CI can cache the data. - FILE_BLOCKSTORE_PATH: join(process.cwd(), 'test', 'fixtures', 'e2e', 'blockstore'), - FILE_DATASTORE_PATH: join(process.cwd(), 'test', 'fixtures', 'e2e', 'datastore') + FILE_BLOCKSTORE_PATH: process.env.FILE_BLOCKSTORE_PATH ?? join(process.cwd(), 'test', 'fixtures', 'e2e', 'blockstore'), + FILE_DATASTORE_PATH: process.env.FILE_DATASTORE_PATH ?? (process.cwd(), 'test', 'fixtures', 'e2e', 'datastore') } } })