Skip to content

Commit

Permalink
Merge pull request #309 from opencompl/sasha/big-pipeline
Browse files Browse the repository at this point in the history
bigger matrix size for pipeline
  • Loading branch information
superlopuh authored Oct 3, 2024
2 parents 4a3b885 + 59d4cbe commit 975633a
Show file tree
Hide file tree
Showing 9 changed files with 78 additions and 37 deletions.
5 changes: 4 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,10 @@ all: maybe_update_xdsl_commit
low_level_representation: maybe_update_xdsl_commit
snakemake --cores $(JOBS) --rerun-incomplete low_level_representation

pipeline: maybe_update_xdsl_commit
snakemake --cores $(JOBS) --rerun-incomplete pipeline

clean:
snakemake --delete-all-output --rerun-incomplete fast all
snakemake --delete-all-output --rerun-incomplete fast all pipeline low_level_representation

include ./Makefile.xdsl
68 changes: 45 additions & 23 deletions Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -85,10 +85,22 @@ MANUAL_KERNELS = [
# Test sets
###########################################################

PIPELINE_PARAMS_FAST = "1x20x5xf64"
PIPELINE_PARAMS_FULL = "1x200x5xf64"

TESTSET_PIPELINE_FAST = [
*expand("matmul/" + PIPELINE_PARAMS_FAST + "/{phase}", phase=XDSL_LINALG_OPT_VARIANTS),
]

TESTSET_PIPELINE = [
*expand("matmul/" + PIPELINE_PARAMS_FULL + "/{phase}", phase=XDSL_LINALG_OPT_VARIANTS),
]

# Minimum set of tests to be used as a meaningful smoke test,
# runs as fast as possible to save CI time
TESTSET_FAST = [
*MANUAL_KERNELS,
*TESTSET_PIPELINE_FAST,
# 3d templated kernels
*expand(
"matmul_transb/4x16x16xf32/{variant}",
Expand Down Expand Up @@ -118,7 +130,6 @@ TESTSET_FAST = [
"sum/4x8xf32/{variant}", variant=["baseline", "snrt", "linalg", "linalg_xdsl"]
),
*expand("sum/8x8xf16/{variant}", variant=["baseline", "linalg_xdsl"]),
*expand("matmul/1x20x5xf64/{phase}", phase=XDSL_LINALG_OPT_VARIANTS),
]

TESTSET_LOW_LEVEL_REPRESENTATION = [
Expand Down Expand Up @@ -176,6 +187,7 @@ TESTSET_LOW_LEVEL_REPRESENTATION = [
TESTSET_ALL = [
*MANUAL_KERNELS,
*TESTSET_LOW_LEVEL_REPRESENTATION,
*TESTSET_PIPELINE,
# 3d templated kernels: baseline + linalg_xdsl
*expand(
"matmul/{M}x{K}x{N}xf64/{variant}",
Expand All @@ -185,9 +197,7 @@ TESTSET_ALL = [
variant=["baseline", "linalg_xdsl"],
),
# Passes contributions
"matmul/1x400x25xf64/linalg_xdsl",
"matmul/1x400x25xf64/linalg_full_xdsl",
*expand("matmul/1x400x25xf64/{phase}", phase=XDSL_LINALG_OPT_VARIANTS),
"matmul/" + PIPELINE_PARAMS_FULL + "/linalg_xdsl",
# 2d templated kernels: baseline + linalg_xdsl
*expand(
"{kernel}/{M}x{N}xf64/{variant}",
Expand Down Expand Up @@ -224,7 +234,8 @@ def select_test_set_profiles(wildcards) -> list[str]:
sets = {
"fast": sorted(set(TESTSET_FAST)),
"all": sorted(set(TESTSET_ALL)),
"low_level_representation": sorted(set(TESTSET_LOW_LEVEL_REPRESENTATION))
"low_level_representation": sorted(set(TESTSET_LOW_LEVEL_REPRESENTATION)),
"pipeline": sorted(set(TESTSET_PIPELINE)),
}
name = wildcards.testset
if name not in sets:
Expand All @@ -233,6 +244,19 @@ def select_test_set_profiles(wildcards) -> list[str]:
)
return expand("kernels/{test}.profile.json", test=sets[name])

def select_test_set_regalloc_jsons(wildcards) -> list[str]:
sets = {
"fast": sorted(set(TESTSET_FAST)),
"all": sorted(set(TESTSET_ALL)),
"low_level_representation": sorted(set(TESTSET_LOW_LEVEL_REPRESENTATION)),
"pipeline": sorted(set(TESTSET_PIPELINE)),
}
name = wildcards.testset
if name not in sets:
raise ValueError(
f"unknown test set name '{name}', valid values are: {sets.keys()}"
)
return expand("kernels/{test}.regalloc.json", test=sets[name])

###########################################################
# Target rules
Expand All @@ -246,7 +270,6 @@ rule fast:
"results/pivoted_fpu.fast.csv",
"results/pivoted_ipc.fast.csv",
"results/regalloc.fast.csv",
"results/pipeline.fast.csv",
# This is the default rule taking over former result
# file names:
output:
Expand All @@ -266,6 +289,17 @@ rule low_level_representation:
input:
"results/kernels.low_level_representation.csv"

rule pipeline:
input:
kernels="results/kernels.pipeline.csv",
regalloc="kernels/regalloc.pipeline.jsonl",
frep_count="results/frep_count.csv",
pipeline_py="scripts/pipeline.py",
output:
"results/pipeline.csv",
shell:
"python {input.pipeline_py} {input.kernels} {input.regalloc} {input.frep_count} -o {output}"

rule all:
input:
"results/kernels.all.csv",
Expand Down Expand Up @@ -384,18 +418,18 @@ rule assembly_to_regalloc_stats:

rule combine_regalloc_stats:
input:
*expand("kernels/{test}.regalloc.json", test=TESTSET_FAST),
select_test_set_regalloc_jsons
output:
"kernels/regalloc.fast.jsonl",
"kernels/regalloc.{testset}.jsonl",
shell:
"cat {input} > {output}"


rule count_frep_instructions:
input:
expand("kernels/matmul/1x20x5xf64/{test}.S", test=XDSL_LINALG_OPT_VARIANTS)
expand("kernels/matmul/" + PIPELINE_PARAMS_FULL + "/{phase}.S", phase=XDSL_LINALG_OPT_VARIANTS),
output:
"results/frep_count.fast.csv"
"results/frep_count.csv"
shell:
"""
echo "variant,frep_count" > {output}
Expand All @@ -411,7 +445,7 @@ rule regalloc_stats_to_csv:
input:
"kernels/regalloc.fast.jsonl",
output:
"results/regalloc.fast.csv",
"results/regalloc.{testset}.csv",
run:
import pandas as pd

Expand All @@ -424,18 +458,6 @@ rule regalloc_stats_to_csv:
df.to_csv(output[0], index=True)


rule pipeline:
input:
kernels="results/kernels.fast.csv",
regalloc="kernels/regalloc.fast.jsonl",
frep_count="results/frep_count.fast.csv",
pipeline_py="scripts/pipeline.py",
output:
"results/pipeline.fast.csv",
shell:
"python {input.pipeline_py} {input.kernels} {input.regalloc} {input.frep_count} -o {output}"


rule optimization_pipelines:
input:
passes = "kernels/optimization_passes.txt",
Expand Down
File renamed without changes.
7 changes: 7 additions & 0 deletions results/kernels.pipeline.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
test,params,impl,cycles,end,end_fpss,fpss_avg_fpu_latency,fpss_avg_load_latency,fpss_fpu_fmadd_issues,fpss_fpu_issues,fpss_fpu_latency,fpss_fpu_occupancy,fpss_fpu_rel_occupancy,fpss_issues,fpss_load_latency,fpss_loads,fpss_occupancy,fpss_section_latency,fpss_stores,fseq_fpu_yield,fseq_yield,section,snitch_avg_load_latency,snitch_fseq_offloads,snitch_fseq_rel_offloads,snitch_issues,snitch_load_latency,snitch_loads,snitch_occupancy,snitch_stores,start,tend,total_ipc,tstart
matmul,1x200x5xf64,linalg_0_xdsl,40161,41098,41095,2.9950099800399204,1.0,1000,1002,3001,0.024949577948756255,0.20011983223487118,5007,3000,3000,0.12467319040860536,0,1005,1.0,1.0,1,0,5007,0.19972874865371573,20062,0,0,0.49953935409974853,0,938,0.0,0.624212544508354,0.0
matmul,1x200x5xf64,linalg_1_xdsl,19165,20098,20095,2.985104270109235,1.0,1000,1007,3006,0.05254369945212627,0.33444038525406844,3011,1000,1000,0.15710931385337856,0,1000,1.0,1.0,1,0,3011,0.23019877675840977,10069,0,0,0.5253848160709627,0,934,0.0,0.6824941299243412,0.0
matmul,1x200x5xf64,linalg_2_xdsl,4147,5080,5077,2.985104270109235,1.0,1000,1007,3006,0.24282613937786351,0.9862879529872673,1021,5,5,0.24620207378828068,0,5,1.0,1.0,1,0,1021,0.32619808306709264,2109,0,0,0.5085604051121293,0,934,0.0,0.7547624789004099,0.0
matmul,1x200x5xf64,linalg_3_xdsl,4124,5039,5036,2.985104270109235,1.0,1000,1007,3006,0.244180407371484,0.9862879529872673,1021,5,5,0.24757516973811833,0,5,36.464285714285715,36.464285714285715,1,0,28,0.224,97,0,0,0.02352085354025218,0,916,0.0,0.2710960232783705,0.0
matmul,1x200x5xf64,linalg_4_xdsl,4130,5069,5066,2.975296442687747,0.0,1000,1012,3011,0.24503631961259079,0.9980276134122288,1014,0,0,0.24552058111380146,0,0,42.25,42.25,1,0,24,0.3,56,0,0,0.013559322033898305,0,940,0.0,0.25907990314769974,0.0
matmul,1x200x5xf64,linalg_5_xdsl,1115,2030,2027,2.980217606330366,0.0,1000,1011,3013,0.9067264573991032,0.998025666337611,1013,0,0,0.9085201793721973,0,0,53.31578947368421,53.31578947368421,1,0,19,0.37254901960784315,32,0,0,0.028699551569506727,0,916,0.0,0.9372197309417041,0.0
7 changes: 7 additions & 0 deletions results/pipeline.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
variant,F Registers,X Registers,Cycles,FPU Occupancy [%],F Loads,F Stores,FMAdd Issues,FRep Count
Baseline,3,14,40161,2.49,3000,1005,1000,0
+ Streams,3,12,19165,5.25,1000,1000,1000,0
+ Scalar Replacement,3,11,4147,24.28,5,5,1000,0
+ FRep,3,10,4124,24.42,5,5,1000,2
+ Fuse Fill,5,9,4130,24.50,0,0,1000,1
+ Unroll and Jam,8,8,1115,90.67,0,0,1000,1
7 changes: 0 additions & 7 deletions results/pipeline.fast.csv

This file was deleted.

10 changes: 5 additions & 5 deletions results/regalloc.fast.csv
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
impl,params,allocated_float,allocated_int
matmul,4x16x8xf64,8,9
sum,4x4xf64,3,8
conv2d_d1_s1_3x3,4x4xf64,8,9
fill,4x4xf64,3,4
relu,4x4xf64,3,6
pooling_nchw_sum_d1_s2_3x3,4x4xf64,7,7
matmul,4x16x8xf64,8,9
pooling_nchw_max_d1_s2_3x3,4x4xf64,7,7
conv2d_d1_s1_3x3,4x4xf64,8,9
pooling_nchw_sum_d1_s2_3x3,4x4xf64,7,7
relu,4x4xf64,3,6
sum,4x4xf64,3,8
sum,4x8xf32,3,8
sum,8x8xf16,3,8
10 changes: 10 additions & 0 deletions results/regalloc.pipeline.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
impl,params,allocated_float,allocated_int
conv2d_d1_s1_3x3,4x4xf64,8,9
fill,4x4xf64,3,4
matmul,4x16x8xf64,8,9
pooling_nchw_max_d1_s2_3x3,4x4xf64,7,7
pooling_nchw_sum_d1_s2_3x3,4x4xf64,7,7
relu,4x4xf64,3,6
sum,4x4xf64,3,8
sum,4x8xf32,3,8
sum,8x8xf16,3,8
1 change: 0 additions & 1 deletion scripts/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,6 @@ def main():
regalloc_df = pd.read_json(regalloc_stats, lines=True)
regalloc_df = regalloc_df[regalloc_df.impl == "matmul"]
del regalloc_df["impl"]
regalloc_df = regalloc_df[regalloc_df.params == "1x20x5xf64"]
del regalloc_df["params"]

regalloc_df = regalloc_df.set_index("variant")
Expand Down

0 comments on commit 975633a

Please sign in to comment.