Merge cockroachdb#128184

128184: roachprod-microbench: standardize naming r=srosenberg a=herkolategan Previously, we would refer to "new" and "old" as the revisions microbenchmarks ran against. This has been updated to use a more descriptive naming scheme. Baseline, previously "old", refers to the base revision compared against, which will usually be a stable release. Experiment, previously "new", refers to the experimental revision, a PR, or merge to master which is being compared against the stable baseline. Epic: None Release note: None Co-authored-by: Herko Lategan <[email protected]>
herkolategan · Aug 6, 2024 · d527c4c · d527c4c
2 parents 8bc8c72 + cbd92db
commit d527c4c
Show file tree

Hide file tree

Showing 7 changed files with 57 additions and 65 deletions.
diff --git a/build/teamcity/cockroach/nightlies/microbenchmark_weekly.sh b/build/teamcity/cockroach/nightlies/microbenchmark_weekly.sh
@@ -92,8 +92,8 @@ done
 
 # Execute microbenchmarks
 ./bin/roachprod-microbench run "$ROACHPROD_CLUSTER" \
-  --binaries "${sha_arr[0]}" \
-  --compare-binaries "${sha_arr[1]}" \
+  --binaries experiment="${sha_arr[0]}" \
+  --binaries baseline="${sha_arr[1]}" \
   --output-dir="$output_dir" \
   --iterations "$BENCH_ITERATIONS" \
   --shell="$BENCH_SHELL" \
@@ -112,8 +112,9 @@ if [ -d "$output_dir/0" ] && [ "$(ls -A "$output_dir/0")" ] \
   if [ -n "${TRIGGERED_BUILD:-}" ]; then
     slack_token="${MICROBENCH_SLACK_TOKEN}"
   fi
+  # Sheet description is in the form: `baseline` to `experiment`
   sheet_description="${name_arr[1]} -> ${name_arr[0]}"
-  ./bin/roachprod-microbench compare "$output_dir/0" "$output_dir/1" \
+  ./bin/roachprod-microbench compare "$output_dir/experiment" "$output_dir/baseline" \
     ${slack_token:+--slack-token="$slack_token"} \
     --sheet-desc="$sheet_description" 2>&1 | tee "$output_dir/sheets.txt"
 else

diff --git a/pkg/cmd/roachprod-microbench/compare.go b/pkg/cmd/roachprod-microbench/compare.go
@@ -100,12 +100,12 @@ func (c *compare) readMetrics() (map[string]*model.MetricMap, error) {
 
 		// Read the previous and current results. If either is missing, we'll just
 		// skip it.
-		if err := processReportFile(results, "old", pkg,
+		if err := processReportFile(results, "baseline", pkg,
 			filepath.Join(c.oldDir, getReportLogName(reportLogName, pkg))); err != nil {
 			return nil, err
 
 		}
-		if err := processReportFile(results, "new", pkg,
+		if err := processReportFile(results, "experiment", pkg,
 			filepath.Join(c.newDir, getReportLogName(reportLogName, pkg))); err != nil {
 			log.Printf("failed to add report for %s: %s", pkg, err)
 			return nil, err
@@ -185,7 +185,7 @@ func (c *compare) publishToGoogleSheets(
 			sheetName = fmt.Sprintf("%s (%s)", sheetName, c.sheetDesc)
 		}
 
-		url, err := c.service.CreateSheet(c.ctx, sheetName, comparisonResults, "old", "new")
+		url, err := c.service.CreateSheet(c.ctx, sheetName, comparisonResults, "baseline", "experiment")
 		if err != nil {
 			return nil, errors.Wrapf(err, "failed to create sheet for %s", pkgGroup)
 		}

diff --git a/pkg/cmd/roachprod-microbench/compare_test.go b/pkg/cmd/roachprod-microbench/compare_test.go
@@ -49,7 +49,7 @@ func metricsToText(metricMaps map[string]*model.MetricMap) string {
 				for i, key := range summaryKeys {
 					centers[i] = entry.Summaries[key].Center
 				}
-				comparison := metric.ComputeComparison(entryKey, "old", "new")
+				comparison := metric.ComputeComparison(entryKey, "baseline", "experiment")
 				fmt.Fprintf(buf, "BenchmarkEntry %s %s %v %s\n",
 					entryKey, comparison.FormattedDelta, centers, comparison.Distribution.String(),
 				)

diff --git a/pkg/cmd/roachprod-microbench/executor.go b/pkg/cmd/roachprod-microbench/executor.go
@@ -18,7 +18,6 @@ import (
 	"path"
 	"path/filepath"
 	"regexp"
-	"strconv"
 	"strings"
 	"time"
 
@@ -30,12 +29,12 @@ import (
 	"github.com/cockroachdb/cockroach/pkg/util/retry"
 	"github.com/cockroachdb/cockroach/pkg/util/timeutil"
 	"github.com/cockroachdb/errors"
+	"golang.org/x/exp/maps"
 )
 
 type executorConfig struct {
 	cluster           string
-	binaries          string
-	compareBinaries   string
+	binaries          map[string]string
 	excludeList       []string
 	ignorePackageList []string
 	outputDir         string
@@ -51,7 +50,6 @@ type executorConfig struct {
 
 type executor struct {
 	executorConfig
-	remoteBinariesList     []string
 	excludeBenchmarksRegex [][]*regexp.Regexp
 	ignorePackages         map[string]struct{}
 	runOptions             install.RunOptions
@@ -63,9 +61,9 @@ type benchmark struct {
 	name string
 }
 
-type benchmarkIndexed struct {
+type benchmarkKey struct {
 	benchmark
-	index int
+	key string
 }
 
 type benchmarkExtractionResult struct {
@@ -109,11 +107,6 @@ func newExecutor(config executorConfig) (*executor, error) {
 		})
 	}
 
-	remoteBinariesList := []string{config.binaries}
-	if config.compareBinaries != "" {
-		remoteBinariesList = append(remoteBinariesList, config.compareBinaries)
-	}
-
 	if config.iterations < 1 {
 		return nil, errors.New("iterations must be greater than 0")
 	}
@@ -126,7 +119,6 @@ func newExecutor(config executorConfig) (*executor, error) {
 	return &executor{
 		executorConfig:         config,
 		excludeBenchmarksRegex: excludeBenchmarks,
-		remoteBinariesList:     remoteBinariesList,
 		ignorePackages:         ignorePackages,
 		runOptions:             runOptions,
 		log:                    l,
@@ -135,7 +127,7 @@ func newExecutor(config executorConfig) (*executor, error) {
 
 func defaultExecutorConfig() executorConfig {
 	return executorConfig{
-		binaries:     "new",
+		binaries:     map[string]string{"experiment": "experiment"},
 		outputDir:    "artifacts/roachprod-microbench",
 		timeout:      "10m",
 		shellCommand: "COCKROACH_RANDOM_SEED=1",
@@ -155,7 +147,7 @@ func (e *executor) listBenchmarks(
 	// Generate commands for listing benchmarks.
 	commands := make([][]cluster.RemoteCommand, 0)
 	for _, pkg := range packages {
-		for _, bin := range e.remoteBinariesList {
+		for _, bin := range e.binaries {
 			remoteBinDir := fmt.Sprintf("%s/%s/bin", bin, pkg)
 			// The command will not fail if the directory does not exist.
 			command := cluster.RemoteCommand{
@@ -226,11 +218,11 @@ func (e *executor) listBenchmarks(
 	for k, v := range benchmarkCounts {
 		// Some packages override TestMain to run the tests twice or more thus
 		// appearing more than once in the listing logic.
-		if v >= len(e.remoteBinariesList) {
+		if v >= len(e.binaries) {
 			packageCounts[k.pkg]++
 			benchmarks = append(benchmarks, k)
 		} else {
-			e.log.Printf("Ignoring benchmark %s/%s, missing from %d binaries", k.pkg, k.name, len(e.remoteBinariesList)-v)
+			e.log.Printf("Ignoring benchmark %s/%s, missing from %d binaries", k.pkg, k.name, len(e.binaries)-v)
 		}
 	}
 
@@ -297,7 +289,7 @@ func (e *executor) executeBenchmarks() error {
 
 	// List packages containing benchmarks on the remote cluster for the first
 	// remote binaries' directory.
-	remotePackageDir := e.remoteBinariesList[0]
+	remotePackageDir := maps.Values(e.binaries)[0]
 	remotePackages, err := e.listRemotePackages(muteLogger, remotePackageDir)
 	if err != nil {
 		return err
@@ -321,23 +313,23 @@ func (e *executor) executeBenchmarks() error {
 		return errors.New("no packages containing benchmarks found")
 	}
 
-	// Create reports for each binary.
-	// Currently, we only support comparing two binaries at most.
-	reporters := make([]*report, 0)
+	// Create reports for each key, binary combination.
+	reporters := make(map[string]*report)
 	defer func() {
 		for _, report := range reporters {
 			report.closeReports()
 		}
 	}()
-	for index := range e.remoteBinariesList {
+	for key := range e.binaries {
 		report := &report{}
-		dir := path.Join(e.outputDir, strconv.Itoa(index))
+		// Use the binary key as the report output directory name.
+		dir := path.Join(e.outputDir, key)
 		err = os.MkdirAll(dir, os.ModePerm)
 		if err != nil {
 			return err
 		}
 		err = report.createReports(e.log, dir, validPackages)
-		reporters = append(reporters, report)
+		reporters[key] = report
 		if err != nil {
 			return err
 		}
@@ -357,11 +349,11 @@ func (e *executor) executeBenchmarks() error {
 		commandGroup := make([]cluster.RemoteCommand, 0)
 		// Weave the commands between binaries and iterations.
 		for i := 0; i < e.iterations; i++ {
-			for binIndex, bin := range e.remoteBinariesList {
+			for key, bin := range e.binaries {
 				shellCommand := fmt.Sprintf(`"cd %s/%s/bin && %s"`, bin, bench.pkg, runCommand)
 				command := cluster.RemoteCommand{
 					Args:     []string{"sh", "-c", shellCommand},
-					Metadata: benchmarkIndexed{bench, binIndex},
+					Metadata: benchmarkKey{bench, key},
 				}
 				commandGroup = append(commandGroup, command)
 			}
@@ -389,8 +381,8 @@ func (e *executor) executeBenchmarks() error {
 			fmt.Print(".")
 		}
 		extractResults := extractBenchmarkResults(response.Stdout)
-		benchmarkResponse := response.Metadata.(benchmarkIndexed)
-		report := reporters[benchmarkResponse.index]
+		benchmarkResponse := response.Metadata.(benchmarkKey)
+		report := reporters[benchmarkResponse.key]
 		for _, benchmarkResult := range extractResults.results {
 			if _, writeErr := report.benchmarkOutput[benchmarkResponse.pkg].WriteString(
 				fmt.Sprintf("%s\n", strings.Join(benchmarkResult, " "))); writeErr != nil {

diff --git a/pkg/cmd/roachprod-microbench/main.go b/pkg/cmd/roachprod-microbench/main.go
@@ -109,8 +109,7 @@ func makeRunCommand() *cobra.Command {
 		},
 		RunE: runCmdFunc,
 	}
-	cmd.Flags().StringVar(&config.binaries, "binaries", config.binaries, "remote path of the test binaries")
-	cmd.Flags().StringVar(&config.compareBinaries, "compare-binaries", "", "run additional binaries on this remote path and compare the results")
+	cmd.Flags().StringToStringVar(&config.binaries, "binaries", config.binaries, "local output name and remote path of the test binaries to run (ex., experiment=<sha1>,baseline=<sha2>")
 	cmd.Flags().StringVar(&config.outputDir, "output-dir", config.outputDir, "output directory for run log and microbenchmark results")
 	cmd.Flags().StringVar(&config.timeout, "timeout", config.timeout, "timeout for each benchmark e.g. 10m")
 	cmd.Flags().StringVar(&config.shellCommand, "shell", config.shellCommand, "additional shell command to run on node before benchmark execution")
@@ -161,7 +160,7 @@ func makeCompareCommand() *cobra.Command {
 			return err
 		}
 
-		comparisonResult := c.createComparisons(metricMaps, "old", "new")
+		comparisonResult := c.createComparisons(metricMaps, "baseline", "experiment")
 
 		var links map[string]string
 		if config.publishGoogleSheet {

diff --git a/pkg/cmd/roachprod-microbench/report.go b/pkg/cmd/roachprod-microbench/report.go
@@ -83,7 +83,7 @@ func (report *report) closeReports() {
 }
 
 func (report *report) writeBenchmarkErrorLogs(response cluster.RemoteResponse, tag string) error {
-	benchmarkResponse := response.Metadata.(benchmarkIndexed)
+	benchmarkResponse := response.Metadata.(benchmarkKey)
 	stdoutLogName := fmt.Sprintf("%s-%s-stdout.log", benchmarkResponse.name, tag)
 	stderrLogName := fmt.Sprintf("%s-%s-stderr.log", benchmarkResponse.name, tag)
 	report.log.Printf("Writing error logs for benchmark at %s, %s\n", stdoutLogName, stderrLogName)

diff --git a/pkg/cmd/roachprod-microbench/testdata/compare b/pkg/cmd/roachprod-microbench/testdata/compare
@@ -3,34 +3,34 @@ compare name-conflict-a name-conflict-b
 ----
 Package pkg/parent
 Metric B/op
-BenchmarkEntry pkg/parent/sub→WithNameConflict ~ [2200 2000] p=1.000 n=1
-BenchmarkEntry pkg/parent→WithNameConflict +18.45% [122 103] p=0.008 n=5
+BenchmarkEntry pkg/parent/sub→WithNameConflict ~ [2000 2200] p=1.000 n=1
+BenchmarkEntry pkg/parent→WithNameConflict +18.45% [103 122] p=0.008 n=5
 Metric allocs/op
-BenchmarkEntry pkg/parent/sub→WithNameConflict ~ [2200 2000] p=1.000 n=1
-BenchmarkEntry pkg/parent→WithNameConflict +18.45% [122 103] p=0.008 n=5
+BenchmarkEntry pkg/parent/sub→WithNameConflict ~ [2000 2200] p=1.000 n=1
+BenchmarkEntry pkg/parent→WithNameConflict +18.45% [103 122] p=0.008 n=5
 Metric sec/op
-BenchmarkEntry pkg/parent/sub→WithNameConflict ~ [2.2e-06 2.0000000000000003e-06] p=1.000 n=1
-BenchmarkEntry pkg/parent→WithNameConflict +18.45% [1.22e-07 1.0300000000000001e-07] p=0.008 n=5
+BenchmarkEntry pkg/parent/sub→WithNameConflict ~ [2.0000000000000003e-06 2.2e-06] p=1.000 n=1
+BenchmarkEntry pkg/parent→WithNameConflict +18.45% [1.0300000000000001e-07 1.22e-07] p=0.008 n=5
 
 # Compare reports with the same set of benchmarks
 compare set-a set-b
 ----
 Package pkg/server
 Metric B/op
-BenchmarkEntry pkg/server→AdminAPIDataDistribution-8 -14.38% [8.7777966e+07 1.02515994e+08] p=0.000 n=10
-BenchmarkEntry pkg/server→SetupSpanForIncomingRPC/grpcMeta-8 +19456.56% [62581 320] p=0.000 n=10
-BenchmarkEntry pkg/server→SetupSpanForIncomingRPC/no_parent-8 +19286.25% [62036 320] p=0.000 n=10
-BenchmarkEntry pkg/server→SetupSpanForIncomingRPC/traceInfo-8 +19286.25% [62036 320] p=0.000 n=10
+BenchmarkEntry pkg/server→AdminAPIDataDistribution-8 -14.38% [1.02515994e+08 8.7777966e+07] p=0.000 n=10
+BenchmarkEntry pkg/server→SetupSpanForIncomingRPC/grpcMeta-8 +19456.56% [320 62581] p=0.000 n=10
+BenchmarkEntry pkg/server→SetupSpanForIncomingRPC/no_parent-8 +19286.25% [320 62036] p=0.000 n=10
+BenchmarkEntry pkg/server→SetupSpanForIncomingRPC/traceInfo-8 +19286.25% [320 62036] p=0.000 n=10
 Metric allocs/op
-BenchmarkEntry pkg/server→AdminAPIDataDistribution-8 -21.18% [619796.5 786333] p=0.000 n=10
-BenchmarkEntry pkg/server→SetupSpanForIncomingRPC/grpcMeta-8 +800.00% [27 3] p=0.000 n=10
-BenchmarkEntry pkg/server→SetupSpanForIncomingRPC/no_parent-8 +600.00% [21 3] p=0.000 n=10
-BenchmarkEntry pkg/server→SetupSpanForIncomingRPC/traceInfo-8 +600.00% [21 3] p=0.000 n=10
+BenchmarkEntry pkg/server→AdminAPIDataDistribution-8 -21.18% [786333 619796.5] p=0.000 n=10
+BenchmarkEntry pkg/server→SetupSpanForIncomingRPC/grpcMeta-8 +800.00% [3 27] p=0.000 n=10
+BenchmarkEntry pkg/server→SetupSpanForIncomingRPC/no_parent-8 +600.00% [3 21] p=0.000 n=10
+BenchmarkEntry pkg/server→SetupSpanForIncomingRPC/traceInfo-8 +600.00% [3 21] p=0.000 n=10
 Metric sec/op
-BenchmarkEntry pkg/server→AdminAPIDataDistribution-8 -31.53% [0.5577142265000001 0.8145041255000001] p=0.000 n=10
-BenchmarkEntry pkg/server→SetupSpanForIncomingRPC/grpcMeta-8 +6950.33% [6.387600000000001e-05 9.060000000000001e-07] p=0.000 n=10
-BenchmarkEntry pkg/server→SetupSpanForIncomingRPC/no_parent-8 +6886.35% [6.217850000000001e-05 8.900000000000001e-07] p=0.000 n=10
-BenchmarkEntry pkg/server→SetupSpanForIncomingRPC/traceInfo-8 +6916.60% [6.256e-05 8.916000000000001e-07] p=0.000 n=10
+BenchmarkEntry pkg/server→AdminAPIDataDistribution-8 -31.53% [0.8145041255000001 0.5577142265000001] p=0.000 n=10
+BenchmarkEntry pkg/server→SetupSpanForIncomingRPC/grpcMeta-8 +6950.33% [9.060000000000001e-07 6.387600000000001e-05] p=0.000 n=10
+BenchmarkEntry pkg/server→SetupSpanForIncomingRPC/no_parent-8 +6886.35% [8.900000000000001e-07 6.217850000000001e-05] p=0.000 n=10
+BenchmarkEntry pkg/server→SetupSpanForIncomingRPC/traceInfo-8 +6916.60% [8.916000000000001e-07 6.256e-05] p=0.000 n=10
 Package pkg/util
 Metric B/op
 BenchmarkEntry pkg/util/hlc→DecimalToHLC-8 ~ [0 0] p=1.000 n=10
@@ -39,20 +39,20 @@ BenchmarkEntry pkg/util/hlc→TimestampIsEmpty/empty-8 ~ [0 0] p=1.000 n=10
 BenchmarkEntry pkg/util/hlc→TimestampIsEmpty/walltime-8 ~ [0 0] p=1.000 n=10
 BenchmarkEntry pkg/util/hlc→TimestampString-8 ~ [24 24] p=1.000 n=10
 BenchmarkEntry pkg/util/hlc→TimestampStringSynthetic-8 ~ [24 24] p=1.000 n=10
-BenchmarkEntry pkg/util/hlc→Update-8 ~ [6074.5 6066] p=0.971 n=10
+BenchmarkEntry pkg/util/hlc→Update-8 ~ [6066 6074.5] p=0.971 n=10
 Metric allocs/op
 BenchmarkEntry pkg/util/hlc→DecimalToHLC-8 ~ [0 0] p=1.000 n=10
 BenchmarkEntry pkg/util/hlc→TimestampIsEmpty/all-8 ~ [0 0] p=1.000 n=10
 BenchmarkEntry pkg/util/hlc→TimestampIsEmpty/empty-8 ~ [0 0] p=1.000 n=10
 BenchmarkEntry pkg/util/hlc→TimestampIsEmpty/walltime-8 ~ [0 0] p=1.000 n=10
 BenchmarkEntry pkg/util/hlc→TimestampString-8 ~ [1 1] p=1.000 n=10
 BenchmarkEntry pkg/util/hlc→TimestampStringSynthetic-8 ~ [1 1] p=1.000 n=10
-BenchmarkEntry pkg/util/hlc→Update-8 ~ [46.5 47] p=0.926 n=10
+BenchmarkEntry pkg/util/hlc→Update-8 ~ [47 46.5] p=0.926 n=10
 Metric sec/op
-BenchmarkEntry pkg/util/hlc→DecimalToHLC-8 -1.09% [3.912e-07 3.955e-07] p=0.001 n=10
-BenchmarkEntry pkg/util/hlc→TimestampIsEmpty/all-8 ~ [1.4155000000000001e-09 1.3935e-09] p=0.148 n=10
-BenchmarkEntry pkg/util/hlc→TimestampIsEmpty/empty-8 ~ [8.8595e-10 8.8665e-10] p=0.436 n=10
-BenchmarkEntry pkg/util/hlc→TimestampIsEmpty/walltime-8 ~ [1.4000000000000001e-09 1.393e-09] p=0.404 n=10
-BenchmarkEntry pkg/util/hlc→TimestampString-8 -1.73% [6.920000000000001e-08 7.041500000000001e-08] p=0.000 n=10
-BenchmarkEntry pkg/util/hlc→TimestampStringSynthetic-8 ~ [6.9535e-08 6.9835e-08] p=0.159 n=10
-BenchmarkEntry pkg/util/hlc→Update-8 ~ [0.067674381 0.06884473399999999] p=0.143 n=10
+BenchmarkEntry pkg/util/hlc→DecimalToHLC-8 -1.09% [3.955e-07 3.912e-07] p=0.001 n=10
+BenchmarkEntry pkg/util/hlc→TimestampIsEmpty/all-8 ~ [1.3935e-09 1.4155000000000001e-09] p=0.148 n=10
+BenchmarkEntry pkg/util/hlc→TimestampIsEmpty/empty-8 ~ [8.8665e-10 8.8595e-10] p=0.436 n=10
+BenchmarkEntry pkg/util/hlc→TimestampIsEmpty/walltime-8 ~ [1.393e-09 1.4000000000000001e-09] p=0.404 n=10
+BenchmarkEntry pkg/util/hlc→TimestampString-8 -1.73% [7.041500000000001e-08 6.920000000000001e-08] p=0.000 n=10
+BenchmarkEntry pkg/util/hlc→TimestampStringSynthetic-8 ~ [6.9835e-08 6.9535e-08] p=0.159 n=10
+BenchmarkEntry pkg/util/hlc→Update-8 ~ [0.06884473399999999 0.067674381] p=0.143 n=10