Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

WIP: iai-callgrind #444

Closed
523 changes: 306 additions & 217 deletions .github/workflows/main.yaml

Large diffs are not rendered by default.

4 changes: 4 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -73,3 +73,7 @@ debug-assertions = true
inherits = "release"
lto = "fat"
overflow-checks = true

[profile.bench]
# Required for iai-callgrind
debug = true
109 changes: 102 additions & 7 deletions ci/ci-util.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import subprocess as sp
import sys
from dataclasses import dataclass
from glob import glob
from inspect import cleandoc
from os import getenv
from pathlib import Path
Expand All @@ -18,16 +19,27 @@
"""
usage:

./ci/ci-util.py <SUBCOMMAND>
./ci/ci-util.py <COMMAND> [flags]

SUBCOMMAND:
generate-matrix Calculate a matrix of which functions had source change,
print that as JSON object.
COMMAND:
generate-matrix
Calculate a matrix of which functions had source change, print that as
a JSON object.

locate-baseline [--download] [--extract]
Locate the most recent benchmark baseline available in CI and, if flags
specify, download and extract it. Never exits with nonzero status if
downloading fails.

Note that `--extract` will overwrite files in `iai-home`.
"""
)

REPO_ROOT = Path(__file__).parent.parent
GIT = ["git", "-C", REPO_ROOT]
DEFAULT_BRANCH = "icount-benchmarks" # TODO: change once ready to merge
WORKFLOW_NAME = "CI" # Workflow that generates the benchmark artifacts
ARTIFACT_GLOB = "baseline-icount*"

# Don't run exhaustive tests if these files change, even if they contaiin a function
# definition.
Expand All @@ -40,6 +52,11 @@
TYPES = ["f16", "f32", "f64", "f128"]


def eprint(*args, **kwargs):
"""Print to stderr."""
print(*args, file=sys.stderr, **kwargs)


class FunctionDef(TypedDict):
"""Type for an entry in `function-definitions.json`"""

Expand Down Expand Up @@ -145,9 +162,85 @@ def make_workflow_output(self) -> str:
return output


def eprint(*args, **kwargs):
"""Print to stderr."""
print(*args, file=sys.stderr, **kwargs)
def locate_baseline(flags: list[str]) -> None:
"""Find the most recent baseline from CI, download it if specified.

This returns rather than erroring, even if the `gh` commands fail. This is to avoid
erroring in CI if the baseline is unavailable (artifact time limit exceeded, first
run on the branch, etc).
"""

download = False
extract = False

while len(flags) > 0:
match flags[0]:
case "--download":
download = True
case "--extract":
extract = True
case _:
eprint(USAGE)
exit(1)
flags = flags[1:]

if extract and not download:
eprint("cannot extract without downloading")
exit(1)

try:
# Locate the most recent job to run on our branch
latest_job = sp.check_output(
[
"gh",
"run",
"list",
"--limit=1",
"--status=completed", # TODO: change to status=success
f"--branch={DEFAULT_BRANCH}",
"--json=databaseId,url,headSha,conclusion,createdAt,"
"status,workflowDatabaseId,workflowName",
f'--jq=select(.[].workflowName == "{WORKFLOW_NAME}")',
],
text=True,
)
except sp.CalledProcessError as e:
eprint(f"failed to run github command: {e}")
return

latest = json.loads(latest_job)[0]
eprint("latest job: ", json.dumps(latest, indent=4))

if not download:
return

job_id = latest.get("databaseId")
if job_id is None:
eprint("unable to find job ID")
return

# TODO: this is overriding the job to download from (for consistency), remove
job_id = "12785149073"

sp.run(
["gh", "run", "download", str(job_id), f"--pattern={ARTIFACT_GLOB}"],
check=False,
)

if not extract:
return

# Find the baseline with the most recent timestamp. GH downloads the files to e.g.
# `some-dirname/some-dirname.tar.xz`, so just glob the whole thing together.
candidate_baselines = glob(f"{ARTIFACT_GLOB}/{ARTIFACT_GLOB}")
if len(candidate_baselines) == 0:
eprint("no possible baseline directories found")
return

candidate_baselines.sort(reverse=True)
baseline_archive = candidate_baselines[0]
eprint(f"extracting {baseline_archive}")
sp.run(["tar", "xJvf", baseline_archive], check=True)


def main():
Expand All @@ -156,6 +249,8 @@ def main():
ctx = Context()
output = ctx.make_workflow_output()
print(f"matrix={output}")
case ["locate-baseline", *flags]:
locate_baseline(flags)
case ["--help" | "-h"]:
print(USAGE)
exit()
Expand Down
8 changes: 8 additions & 0 deletions crates/libm-test/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -20,13 +20,17 @@ build-musl = ["dep:musl-math-sys"]
# Enable report generation without bringing in more dependencies by default
benchmarking-reports = ["criterion/plotters", "criterion/html_reports"]

# Enable icount benchmarks (requires iai-callgrind and valgrind)
icount = ["dep:iai-callgrind"]

# Run with a reduced set of benchmarks, such as for CI
short-benchmarks = []

[dependencies]
anyhow = "1.0.90"
az = { version = "1.2.1", optional = true }
gmp-mpfr-sys = { version = "1.6.4", optional = true, default-features = false, features = ["mpfr"] }
iai-callgrind = { version = "0.14.0", optional = true }
indicatif = { version = "0.17.9", default-features = false }
libm = { path = "../..", features = ["unstable-public-internals"] }
libm-macros = { path = "../libm-macros" }
Expand All @@ -48,6 +52,10 @@ rand = { version = "0.8.5", optional = true }
criterion = { version = "0.5.1", default-features = false, features = ["cargo_bench_support"] }
libtest-mimic = "0.8.1"

[[bench]]
name = "icount"
harness = false

[[bench]]
name = "random"
harness = false
Expand Down
171 changes: 171 additions & 0 deletions crates/libm-test/benches/icount.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,171 @@
use std::hint::black_box;

use iai_callgrind::{library_benchmark, library_benchmark_group, main};
use libm_test::gen::spaced;
use libm_test::{CheckBasis, CheckCtx, GeneratorKind, MathOp, OpRustArgs, TupleCall, op};

const BENCH_ITER_ITEMS: u64 = 500;

macro_rules! icount_benches {
(
fn_name: $fn_name:ident,
attrs: [$($_attr:meta),*],
) => {
paste::paste! {
fn [< setup_ $fn_name >]() -> Vec<OpRustArgs<op::$fn_name::Routine>> {
type Op = op::$fn_name::Routine;
let mut ctx = CheckCtx::new(
Op::IDENTIFIER,
CheckBasis::None,
GeneratorKind::QuickSpaced
);
ctx.override_iterations(BENCH_ITER_ITEMS);
let ret = spaced::get_test_cases::<Op>(&ctx).0.collect::<Vec<_>>();
println!("operation {}, {} steps", Op::NAME, ret.len());
ret
}

#[library_benchmark]
#[bench::logspace([< setup_ $fn_name >]())]
fn [< icount_bench_ $fn_name >](cases: Vec<OpRustArgs<op::$fn_name::Routine>>) {
type Op = op::$fn_name::Routine;
let f = black_box(Op::ROUTINE);
for input in cases.iter().copied() {
input.call(f);
}
}

library_benchmark_group!(
name = [< icount_bench_ $fn_name _group >];
benchmarks = [< icount_bench_ $fn_name >]
);
}
};
}

libm_macros::for_each_function! {
callback: icount_benches,
}

main!(
library_benchmark_groups = icount_bench_acos_group,
icount_bench_acosf_group,
icount_bench_acosh_group,
icount_bench_acoshf_group,
icount_bench_asin_group,
icount_bench_asinf_group,
icount_bench_asinh_group,
icount_bench_asinhf_group,
icount_bench_atan2_group,
icount_bench_atan2f_group,
icount_bench_atan_group,
icount_bench_atanf_group,
icount_bench_atanh_group,
icount_bench_atanhf_group,
icount_bench_cbrt_group,
icount_bench_cbrtf_group,
icount_bench_ceil_group,
icount_bench_ceilf_group,
icount_bench_copysign_group,
icount_bench_copysignf128_group,
icount_bench_copysignf16_group,
icount_bench_copysignf_group,
icount_bench_cos_group,
icount_bench_cosf_group,
icount_bench_cosh_group,
icount_bench_coshf_group,
icount_bench_erf_group,
icount_bench_erfc_group,
icount_bench_erfcf_group,
icount_bench_erff_group,
icount_bench_exp10_group,
icount_bench_exp10f_group,
icount_bench_exp2_group,
icount_bench_exp2f_group,
icount_bench_exp_group,
icount_bench_expf_group,
icount_bench_expm1_group,
icount_bench_expm1f_group,
icount_bench_fabs_group,
icount_bench_fabsf128_group,
icount_bench_fabsf16_group,
icount_bench_fabsf_group,
icount_bench_fdim_group,
icount_bench_fdimf128_group,
icount_bench_fdimf16_group,
icount_bench_fdimf_group,
icount_bench_floor_group,
icount_bench_floorf_group,
icount_bench_fma_group,
icount_bench_fmaf_group,
icount_bench_fmax_group,
icount_bench_fmaxf_group,
icount_bench_fmin_group,
icount_bench_fminf_group,
icount_bench_fmod_group,
icount_bench_fmodf_group,
icount_bench_frexp_group,
icount_bench_frexpf_group,
icount_bench_hypot_group,
icount_bench_hypotf_group,
icount_bench_ilogb_group,
icount_bench_ilogbf_group,
icount_bench_j0_group,
icount_bench_j0f_group,
icount_bench_j1_group,
icount_bench_j1f_group,
icount_bench_jn_group,
icount_bench_jnf_group,
icount_bench_ldexp_group,
icount_bench_ldexpf_group,
icount_bench_lgamma_group,
icount_bench_lgamma_r_group,
icount_bench_lgammaf_group,
icount_bench_lgammaf_r_group,
icount_bench_log10_group,
icount_bench_log10f_group,
icount_bench_log1p_group,
icount_bench_log1pf_group,
icount_bench_log2_group,
icount_bench_log2f_group,
icount_bench_log_group,
icount_bench_logf_group,
icount_bench_modf_group,
icount_bench_modff_group,
icount_bench_nextafter_group,
icount_bench_nextafterf_group,
icount_bench_pow_group,
icount_bench_powf_group,
icount_bench_remainder_group,
icount_bench_remainderf_group,
icount_bench_remquo_group,
icount_bench_remquof_group,
icount_bench_rint_group,
icount_bench_rintf_group,
icount_bench_round_group,
icount_bench_roundf_group,
icount_bench_scalbn_group,
icount_bench_scalbnf_group,
icount_bench_sin_group,
icount_bench_sinf_group,
icount_bench_sinh_group,
icount_bench_sinhf_group,
icount_bench_sqrt_group,
icount_bench_sqrtf_group,
icount_bench_tan_group,
icount_bench_tanf_group,
icount_bench_tanh_group,
icount_bench_tanhf_group,
icount_bench_tgamma_group,
icount_bench_tgammaf_group,
icount_bench_trunc_group,
icount_bench_truncf128_group,
icount_bench_truncf16_group,
icount_bench_truncf_group,
icount_bench_y0_group,
icount_bench_y0f_group,
icount_bench_y1_group,
icount_bench_y1f_group,
icount_bench_yn_group,
icount_bench_ynf_group,
);
3 changes: 2 additions & 1 deletion crates/libm-test/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,8 @@ pub use f8_impl::f8;
pub use libm::support::{Float, Int, IntTy, MinInt};
pub use num::{FloatExt, linear_ints, logspace};
pub use op::{
BaseName, FloatTy, Identifier, MathOp, OpCFn, OpCRet, OpFTy, OpRustFn, OpRustRet, Ty,
BaseName, FloatTy, Identifier, MathOp, OpCFn, OpCRet, OpFTy, OpRustArgs, OpRustFn, OpRustRet,
Ty,
};
pub use precision::{MaybeOverride, SpecialCase, default_ulp};
use run_cfg::EXTENSIVE_MAX_ITERATIONS;
Expand Down
2 changes: 2 additions & 0 deletions crates/libm-test/src/op.rs
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,8 @@ pub type OpCFn<Op> = <Op as MathOp>::CFn;
pub type OpCRet<Op> = <Op as MathOp>::CRet;
/// Access the associated `RustFn` type from an op (helper to avoid ambiguous associated types).
pub type OpRustFn<Op> = <Op as MathOp>::RustFn;
/// Access the associated `RustArgs` type from an op (helper to avoid ambiguous associated types).
pub type OpRustArgs<Op> = <Op as MathOp>::RustArgs;
/// Access the associated `RustRet` type from an op (helper to avoid ambiguous associated types).
pub type OpRustRet<Op> = <Op as MathOp>::RustRet;

Expand Down
Loading
Loading