ipfs · SgtPooki · Mar 29, 2024 · Mar 29, 2024 · SgtPooki · Mar 29, 2024
diff --git a/debugging/test-gateways.sh b/debugging/test-gateways.sh
@@ -1,5 +1,20 @@
 #!/usr/bin/env bash
 
+EXIT_CODE=0
+cleanup_gateway_test_called=false
+cleanup_gateway_test() {
+  if [ "$cleanup_gateway_test_called" = true ]; then
+    echo "cleanup_gateway_test already called"
+    return
+  fi
+  echo "cleanup_gateway_test called"
+  cleanup_gateway_test_called=true
+
+  exit $EXIT_CODE
+}
+
+trap cleanup_gateway_test EXIT
+
 # Query all endpoints until failure
 # This script is intended to be run from the root of the helia-http-gateway repository
 
@@ -11,7 +26,10 @@ if ! nc -z localhost $PORT; then
 fi
 
 ensure_gateway_running() {
-  npx wait-on "tcp:$PORT" -t 1000 || exit 1
+  npx wait-on "tcp:$PORT" -t 1000 || {
+    EXIT_CODE=1
+    cleanup_gateway_test
+  }
 }
 
 # Use the first argument to this script (if any) as the maximum timeout for curl
@@ -20,7 +38,7 @@ test_website() {
   ensure_gateway_running
   local website=$1
   echo "Requesting $website"
-  curl -m $max_timeout -s --no-progress-meter -o /dev/null -w "%{url}: HTTP_%{http_code} in %{time_total} seconds (TTFB: %{time_starttransfer}, rediect: %{time_redirect})\n" -L $website
+  curl -m $max_timeout -s --no-progress-meter -o /dev/null -w "%{url}: HTTP_%{http_code} in %{time_total} seconds (TTFB: %{time_starttransfer}, redirect: %{time_redirect})\n" -L $website
   echo "running GC"
   curl -X POST -m $max_timeout -s --no-progress-meter -o /dev/null -w "%{url}: HTTP_%{http_code} in %{time_total} seconds\n" http://localhost:$PORT/api/v0/repo/gc
 }
@@ -35,7 +53,7 @@ test_website  http://localhost:$PORT/ipns/docs.ipfs.tech
 
 test_website  http://localhost:$PORT/ipns/docs.libp2p.io
 
-test_website  http://localhost:$PORT/ipns/drand.love
+# test_website  http://localhost:$PORT/ipns/drand.love #drand.love is not publishing dnslink records
 
 test_website  http://localhost:$PORT/ipns/fil.org
 
@@ -65,3 +83,15 @@ test_website http://localhost:$PORT/ipns/specs.ipfs.tech
 test_website http://localhost:$PORT/ipns/saturn.tech
 
 test_website http://localhost:$PORT/ipns/web3.storage
+
+test_website http://localhost:$PORT/ipfs/bafkreiezuss4xkt5gu256vjccx7vocoksxk77vwmdrpwoumfbbxcy2zowq # stock images 3 sec skateboarder video
+
+test_website http://localhost:$PORT/ipfs/bafybeidsp6fva53dexzjycntiucts57ftecajcn5omzfgjx57pqfy3kwbq # big buck bunny
+
+test_website http://localhost:$PORT/ipfs/bafybeiaysi4s6lnjev27ln5icwm6tueaw2vdykrtjkwiphwekaywqhcjze # wikipedia
+
+test_website http://localhost:$PORT/ipfs/bafybeifaiclxh6pc3bdtrrkpbvvqqxq6hz5r6htdzxaga4fikfpu2u56qi # uniswap interface
+
+test_website http://localhost:$PORT/ipfs/bafybeiae366charqmeewxags5b2jxtkhfmqyyagvqhrr5l7l7xfpp5ikpa # cid.ipfs.tech
+
+test_website http://localhost:$PORT/ipfs/bafybeiedlhslivmuj2iinnpd24ulx3fyd7cjenddbkeoxbf3snjiz3npda # docs.ipfs.tech
diff --git a/debugging/time-permutations.sh b/debugging/time-permutations.sh
@@ -0,0 +1,153 @@
+#!/usr/bin/env bash
+
+###
+# This script is intended to be run from the root of the helia-http-gateway repository
+# It will run the gateway with different configurations and measure the time it takes to run
+# The results will be written to a CSV file, and runs that fail prior to the timeout are considered failed runs.
+#
+# Use like `./debugging/time-permutations.sh 30s 100` to execute 100 iterations of all permutations, where each permutation is run for a maximum of 30 seconds
+# This command can be run to ensure that until-death is properly cleaning up after itself (starting/stopping the gateway)
+#
+# Realistically, you should be running something like `./debugging/time-permutations.sh 15m 100` to get some logs of failure cases like those investigated in https://github.com/ipfs/helia-http-gateway/issues/18
+#
+
+# globals.. same for all configurations
+# export DEBUG="helia*,helia*:trace,libp2p*,libp2p*:trace"
+export DEBUG="*,*:trace"
+unset FASTIFY_DEBUG
+export PORT=8080
+export HOST="0.0.0.0"
+export ECHO_HEADERS=false
+export METRICS=true
+export USE_TRUSTLESS_GATEWAYS=false # always set to false since helia-dr and helia-all are the failing cases.
+export USE_BITSWAP=true # needs to be true to be able to fetch content without USE_TRUSTLESS_GATEWAYS
+export ALLOW_UNHANDLED_ERROR_RECOVERY=false
+unset DELEGATED_ROUTING_V1_HOST
+unset TRUSTLESS_GATEWAYS
+
+unset_all() {
+  unset USE_SUBDOMAINS
+  # unset USE_BITSWAP
+  # unset USE_TRUSTLESS_GATEWAYS
+  unset USE_LIBP2P
+  unset USE_DELEGATED_ROUTING
+  unset FILE_DATASTORE_PATH
+  unset FILE_BLOCKSTORE_PATH
+}
+
+max_time=${1:-30m}
+max_iterations=${2:-10}
+
+mkdir -p permutation-logs
+rm -rf permutation-logs/*
+
+# Results file
+results_file="results.csv"
+echo "USE_SUBDOMAINS,USE_BITSWAP,USE_TRUSTLESS_GATEWAYS,USE_LIBP2P,USE_DELEGATED_ROUTING,time(max=${max_time}),successful_run" > $results_file
+
+run_test() {
+
+  npx wait-on "tcp:$PORT" -t 10000 -r # wait for the port to be released
+
+  config_id="USE_SUBDOMAINS=$USE_SUBDOMAINS,USE_BITSWAP=$USE_BITSWAP,USE_TRUSTLESS_GATEWAYS=$USE_TRUSTLESS_GATEWAYS,USE_LIBP2P=$USE_LIBP2P,USE_DELEGATED_ROUTING=$USE_DELEGATED_ROUTING"
+  # if we cannot get any data, we should skip this run.. we need at least USE_BITSWAP enabled, plus either USE_LIBP2P or USE_DELEGATED_ROUTING
+  if [ "$USE_BITSWAP" = false ]; then
+    echo "Skipping test for configuration: $config_id"
+    return
+  fi
+  # TODO: we should also allow USE_TRUSTLESS_GATEWAYS=true, but we need to fix the issue with helia-dr and helia-all first
+  if [ "$USE_LIBP2P" = false ] && [ "$USE_DELEGATED_ROUTING" = false ]; then
+    echo "Skipping test for configuration: $config_id"
+    return
+  fi
+  echo "Running test for configuration: $config_id"
+
+  rm -f time_info_pipe
+  mkfifo time_info_pipe
+
+  # log file with config_id and timestamp
+  run_log_file="permutation-logs/${USE_SUBDOMAINS}-${USE_BITSWAP}-${USE_TRUSTLESS_GATEWAYS}-${USE_LIBP2P}-${USE_DELEGATED_ROUTING}+$(date +%Y-%m-%d%H:%M:%S.%3N).log"
+
+  # This is complicated, but we need to run the command in a subshell to be able to kill it if it takes too long, and also to get the timing information
+  (timeout --signal=SIGTERM ${max_time} bash -c "time (./debugging/until-death.sh 2 &>${run_log_file})" 2>&1) &> time_info_pipe &
+  subshell_pid=$!
+
+  # Wait for the process to complete and get the timing information
+  time_output=$(cat time_info_pipe)
+  wait $subshell_pid
+  exit_status=$? # get the exit status of the subshell
+
+  # remove the fifo
+  rm time_info_pipe
+  was_successful=false
+  if [ $exit_status -eq 124 ]; then
+    echo "timeout occurred... (SUCCESSFUL RUN)"
+    was_successful=true
+    real_time="${max_time}"
+    # remove the log file because the test didn't fail before the timeout
+    rm $run_log_file
+  else
+    echo "no timeout occurred...(FAILED RUN)"
+    was_successful=false
+
+    real_time=$(echo "$time_output" | grep real | awk '{print $2}')
+  fi
+
+  # Write to file
+  echo "$USE_SUBDOMAINS,$USE_BITSWAP,$USE_TRUSTLESS_GATEWAYS,$USE_LIBP2P,$USE_DELEGATED_ROUTING,$real_time,$was_successful" >> $results_file
+}
+
+main() {
+  # Iterate over boolean values for a subset of environment variables
+  for USE_SUBDOMAINS_VAL in true false; do
+    # for USE_BITSWAP_VAL in true false; do
+      # for USE_TRUSTLESS_GATEWAYS_VAL in true false; do
+    for USE_LIBP2P_VAL in true false; do
+      for USE_DELEGATED_ROUTING_VAL in true false; do
+        unset_all
+
+        # Export each variable
+        export USE_SUBDOMAINS=$USE_SUBDOMAINS_VAL
+        # export USE_BITSWAP=$USE_BITSWAP_VAL
+        # export USE_TRUSTLESS_GATEWAYS=$USE_TRUSTLESS_GATEWAYS_VAL
+        export USE_LIBP2P=$USE_LIBP2P_VAL
+        export USE_DELEGATED_ROUTING=$USE_DELEGATED_ROUTING_VAL
+        run_test
+      done
+    done
+      # done
+    # done
+  done
+}
+
+cleanup_permutations_called=false
+cleanup_permutations() {
+  if [ "$cleanup_permutations_called" = true ]; then
+    echo "cleanup_permutations already called"
+    return
+  fi
+  echo "cleanup_permutations called"
+  cleanup_permutations_called=true
+
+  kill -s TERM $subshell_pid
+  echo "sent TERM signal to subshell"
+  wait $subshell_pid # wait for the process to exit
+
+  npx wait-on "tcp:$PORT" -t 10000 -r # wait for the port to be released
+
+  exit 1
+}
+
+trap cleanup_permutations SIGINT
+trap cleanup_permutations SIGTERM
+
+npm run build
+# Tell until-death.sh not build the gateway
+export DEBUG_NO_BUILD=true
+
+for ((i = 1; i <= $max_iterations; i++))
+do
+  echo "Iteration $i"
+  main
+done
+
diff --git a/debugging/until-death.sh b/debugging/until-death.sh
@@ -9,6 +9,46 @@ fi
 # You have to pass `DEBUG=" " to disable debugging when using this script`
 export DEBUG=${DEBUG:-"helia-http-gateway,helia-http-gateway:server,helia-http-gateway:*:helia-fetch"}
 export PORT=${PORT:-8080}
+EXIT_CODE=0
+
+cleanup_until_death_called=false
+cleanup_until_death() {
+  if [ "$cleanup_until_death_called" = true ]; then
+    echo "cleanup_until_death_called already called"
+    return
+  fi
+  echo "cleanup_until_death called"
+  cleanup_until_death_called=true
+  if [ "$gateway_already_running" != true ]; then
+    lsof -i TCP:$PORT | grep LISTEN | awk '{print $2}' | xargs --no-run-if-empty kill -9
+
+    echo "waiting for the gateway to exit"
+    npx wait-on "tcp:$PORT" -t 10000 -r # wait for the port to be released
+  fi
+
+
+  exit $EXIT_CODE
+}
+
+trap cleanup_until_death EXIT
+
+# Before starting, output all env vars that helia-http-gateway uses
+echo "DEBUG=$DEBUG"
+echo "FASTIFY_DEBUG=$FASTIFY_DEBUG"
+echo "PORT=$PORT"
+echo "HOST=$HOST"
+echo "USE_SUBDOMAINS=$USE_SUBDOMAINS"
+echo "METRICS=$METRICS"
+echo "USE_BITSWAP=$USE_BITSWAP"
+echo "USE_TRUSTLESS_GATEWAYS=$USE_TRUSTLESS_GATEWAYS"
+echo "TRUSTLESS_GATEWAYS=$TRUSTLESS_GATEWAYS"
+echo "USE_LIBP2P=$USE_LIBP2P"
+echo "ECHO_HEADERS=$ECHO_HEADERS"
+echo "USE_DELEGATED_ROUTING=$USE_DELEGATED_ROUTING"
+echo "DELEGATED_ROUTING_V1_HOST=$DELEGATED_ROUTING_V1_HOST"
+echo "FILE_DATASTORE_PATH=$FILE_DATASTORE_PATH"
+echo "FILE_BLOCKSTORE_PATH=$FILE_BLOCKSTORE_PATH"
+echo "ALLOW_UNHANDLED_ERROR_RECOVERY=$ALLOW_UNHANDLED_ERROR_RECOVERY"
 
 gateway_already_running=false
 if nc -z localhost $PORT; then
@@ -21,55 +61,33 @@ start_gateway() {
     echo "gateway is already running"
     return
   fi
-  npm run build
-
+  # if DEBUG_NO_BUILD is set, then we assume the gateway is already built
+  if [ "$DEBUG_NO_BUILD" != true ]; then
+    npm run build
+  fi
+  echo "starting gateway..."
   # npx clinic doctor --open=false -- node dist/src/index.js &
-  node dist/src/index.js &
+  (node --trace-warnings dist/src/index.js) &
+  process_id=$!
   # echo "process id: $!"
+  npx wait-on "tcp:$PORT" -t 10000 || {
+    EXIT_CODE=1
+    cleanup_until_death
+  }
 }
-start_gateway & process_pid=$!
+start_gateway
 
 ensure_gateway_running() {
-  npx wait-on "tcp:$PORT" -t 5000 || exit 1
+  npx wait-on "tcp:$PORT" -t 5000 || {
+    EXIT_CODE=1
+    cleanup_until_death
+  }
 }
 
-
-cleanup_called=false
-cleanup() {
-  if [ "$cleanup_called" = true ]; then
-    echo "cleanup already called"
-    return
-  fi
-  # kill $process_pid
-  # when we're done, ensure the process is killed by sending a SIGTEM
-  # kill -s SIGTERM $process_pid
-  # kill any process listening on $PORT
-  # fuser -k $PORT/tcp
-  # kill any process listening on $PORT with SIGTERM
-
-  if [ "$gateway_already_running" != true ]; then
-    kill -s SIGINT $(lsof -i :$PORT -t)
-    return
-  fi
-
-  exit 1
-}
-
-trap cleanup SIGINT
-trap cleanup SIGTERM
-
-# if we get a non-zero exit code, we know the server is no longer listening
-# we should also exit early after 4 loops
-# iterations=0
-# max_loops=1
+max_timeout=${1:-15}
 while [ $? -ne 1 ]; do
-#   # iterations=$((iterations+1))
-#   if [ $iterations -gt $max_loops ]; then
-#     echo "exiting after $max_loops loops"
-#     break
-#   fi
   ensure_gateway_running
-  ./debugging/test-gateways.sh 30 2>&1 | tee -a debugging/test-gateways.log
+  ./debugging/test-gateways.sh $max_timeout # 2>&1 | tee -a debugging/test-gateways.log
 done
 
-cleanup
+cleanup_until_death
diff --git a/e2e-tests/smoketest.spec.ts b/e2e-tests/smoketest.spec.ts
@@ -3,15 +3,15 @@ import { PORT } from '../src/constants.js'
 
 // test all the same pages listed at https://probelab.io/websites/
 const pages = [
-  // 'blog.ipfs.tech', // timing out
+  'blog.ipfs.tech',
   'blog.libp2p.io',
   'consensuslab.world',
   'docs.ipfs.tech',
   'docs.libp2p.io',
   // 'drand.love', // no dnsaddr or dnslink TXT record, only "x-ipfs-path" header (supported only by ipfs-companion and brave)
-  // 'fil.org', // timing out
-  // 'filecoin.io', // timing out
-  // 'green.filecoin.io', // timing out
+  'fil.org',
+  'filecoin.io',
+  'green.filecoin.io',
   'ipfs.tech',
   'ipld.io',
   'libp2p.io',
@@ -21,9 +21,9 @@ const pages = [
   'research.protocol.ai',
   'singularity.storage',
   'specs.ipfs.tech',
-  // 'strn.network' // redirects to saturn.tech
-  'saturn.tech'
-  // 'web3.storage' // timing out
+  // 'strn.network', // redirects to saturn.tech
+  'saturn.tech',
+  'web3.storage'
 ]
 
 // increase default test timeout to 2 minutes

diff --git a/playwright.config.ts b/playwright.config.ts
@@ -64,10 +64,11 @@ export default defineConfig({
     // Tiros does not re-use the existing server.
     reuseExistingServer: process.env.CI == null,
     env: {
+      METRICS: process.env.METRICS ?? 'false',
       DEBUG: process.env.DEBUG ?? ' ',
       // we save to the filesystem so github CI can cache the data.
-      FILE_BLOCKSTORE_PATH: join(process.cwd(), 'test', 'fixtures', 'e2e', 'blockstore'),
-      FILE_DATASTORE_PATH: join(process.cwd(), 'test', 'fixtures', 'e2e', 'datastore')
+      FILE_BLOCKSTORE_PATH: process.env.FILE_BLOCKSTORE_PATH ?? join(process.cwd(), 'test', 'fixtures', 'e2e', 'blockstore'),
+      FILE_DATASTORE_PATH: process.env.FILE_DATASTORE_PATH ?? (process.cwd(), 'test', 'fixtures', 'e2e', 'datastore')
     }
   }
 })