diff --git a/.github/workflows/profiler.yml b/.github/workflows/profiler.yml new file mode 100644 index 000000000..20b41858a --- /dev/null +++ b/.github/workflows/profiler.yml @@ -0,0 +1,30 @@ +name: Performance check + +on: + pull_request: + +jobs: + run_profiler: + name: Run Profiler + runs-on: ubuntu-latest + permissions: # these permissions must be set for AWS auth to work! + id-token: write + contents: read + + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Configure AWS Credentials + uses: aws-actions/configure-aws-credentials@v1 + with: + aws-region: us-west-2 + role-to-assume: ${{ secrets.AWS_ROLE_TO_ASSUME }} + role-session-name: PushDockerImage + + - name: Run all tests + run: | + python -m venv profiler_env + source profiler_env/bin/activate + ./tools/perf_checker/perf_checker.sh \ No newline at end of file diff --git a/tools/perf_checker/benchmark1.py b/tools/perf_checker/benchmark1.py new file mode 100644 index 000000000..0fea47166 --- /dev/null +++ b/tools/perf_checker/benchmark1.py @@ -0,0 +1,22 @@ +from time import perf_counter + +import cellxgene_census + +import tiledbsoma as soma + +census_S3_latest = dict(census_version="2023-10-23") + + +def main(): + t1 = perf_counter() + with cellxgene_census.open_soma(**census_S3_latest) as census: + with census["census_data"]["homo_sapiens"].axis_query( + measurement_name="RNA", + obs_query=soma.AxisQuery(value_filter="""tissue_general == 'eye'"""), + ) as query: + query.to_anndata(X_name="raw") + t2 = perf_counter() + print(f"End to end time {t2 - t1}") + + +main() diff --git a/tools/perf_checker/perf_checker.sh b/tools/perf_checker/perf_checker.sh new file mode 100755 index 000000000..49ea3c0b7 --- /dev/null +++ b/tools/perf_checker/perf_checker.sh @@ -0,0 +1,36 @@ +.sh +#!/bin/sh +set -euox pipefail + +python -m venv perf +source perf/bin/activate +pip install psutil +pip install gitpython +pip install somacore +pip install tiledbsoma +pip install cellxgene_census + + +sudo wget https://s3.amazonaws.com/mountpoint-s3-release/latest/x86_64/mount-s3.deb +sudo apt install -y ./mount-s3.deb +mkdir ./mount-s3 +mkdir ./s3_cache +mount-s3 census-profiler-tests ./mount-s3 --cache ./s3_cache --metadata-ttl 300 + +# new benchmarks must be added to this list +declare -a benchmarks=("benchmark1.py") + +git clone https://github.com/single-cell-data/TileDB-SOMA.git +pip install TileDB-SOMA/profiler +cd TileDB-SOMA +git checkout temp_profiler_for_harnes +cd ../ + + +arraylength=${#benchmarks[@]} +for (( i=0; i<${arraylength}; i++ )) +do + python ./TileDB-SOMA/profiler "python ${benchmarks[$i]}" -t time + pwd + python ./cellxgene-census/tools/perf_checker/profile_report.py ${benchmarks[$i]} +done diff --git a/tools/perf_checker/profile_report.py b/tools/perf_checker/profile_report.py new file mode 100644 index 000000000..5a7431fc3 --- /dev/null +++ b/tools/perf_checker/profile_report.py @@ -0,0 +1,33 @@ +import data +import argparse + +parser = argparse.ArgumentParser() +parser.add_argument("benchmark", type=str) +args = parser.parse_args() + +# Processes the set of previously written logs + +threshold = 1.10 # Percent difference + +db = data.FileBasedProfileDB("./mount-s3") +actual_max_ts = 0 +dt = db.find(f"python {parser.benchmark}") +last_two = dt[-2:] +c = 0 + +for s in last_two: + new_db = sorted(dt, key=lambda ProfileData: ProfileData.timestamp) + + L = [] + L[0] = dt[0].user_time_sec + dt[0].elapsed_time + L[1] = dt[1].user_time_sec + dt[1].elapsed_time + for i in range(0, len(dt)): + print(f"{i} dt[{i}].user_time_sec = {dt[i].user_time_sec} ts {dt[i].timestamp}") + print(f"Prev = {L[0]} Curr = {L[1]}") + + if threshold * float(L[1]) < float(L[0]) or float(L[1]) > threshold * float(L[0]): + raise SystemExit(f"Potential performance degradation detected {L[0]} va {L[1]}") + print("No recent performance degradation detected") + print( + f"Prev TBD version = {dt[0].tiledbsoma_version} Curr TBD version = {dt[1].tiledbsoma_version}" + )