diff --git a/Snakefile b/Snakefile index 3d9a63fa..920a283d 100644 --- a/Snakefile +++ b/Snakefile @@ -12,7 +12,8 @@ XDSL_LINALG_OPT_VARIANTS = [ "linalg_1_xdsl", # incremental insertion of xDSL passes "linalg_2_xdsl", # incremental insertion of xDSL passes "linalg_3_xdsl", # incremental insertion of xDSL passes - "linalg_4_xdsl", # should run the same passes as linalg_xdsl but via a fully expanded pipeline instead of xdsl-opt test passes/mini-pipelines + "linalg_4_xdsl", # incremental insertion of xDSL passes + "linalg_5_xdsl", # should run the same passes as linalg_xdsl but via a fully expanded pipeline instead of xdsl-opt test passes/mini-pipelines ] XDSL_LINALG_VARIANTS = [ @@ -117,7 +118,7 @@ TESTSET_FAST = [ "sum/4x8xf32/{variant}", variant=["baseline", "snrt", "linalg", "linalg_xdsl"] ), *expand("sum/8x8xf16/{variant}", variant=["baseline", "linalg_xdsl"]), - *expand("matmul/1x20x5xf64/linalg_{phase}_xdsl", phase=range(0, 5)), + *expand("matmul/1x20x5xf64/{phase}", phase=XDSL_LINALG_OPT_VARIANTS), ] TESTSET_LOW_LEVEL_REPRESENTATION = [ @@ -170,7 +171,7 @@ TESTSET_ALL = [ # Passes contributions "matmul/1x400x25xf64/linalg_xdsl", "matmul/1x400x25xf64/linalg_full_xdsl", - *expand("matmul/1x400x25xf64/linalg_{phase}_xdsl", phase=range(0, 5)), + *expand("matmul/1x400x25xf64/{phase}", phase=XDSL_LINALG_OPT_VARIANTS), # 2d templated kernels: baseline + linalg_xdsl *expand( "{kernel}/{M}x{N}xf64/{variant}", diff --git a/kernels/optimization_passes.txt b/kernels/optimization_passes.txt index d6bdd57a..f0683608 100644 --- a/kernels/optimization_passes.txt +++ b/kernels/optimization_passes.txt @@ -1,4 +1,5 @@ memref-streamify memref-stream-unnest-out-parameters convert-riscv-scf-for-to-frep +memref-stream-fold-fill memref-stream-interleave diff --git a/results/kernels.csv b/results/kernels.csv index f249f741..eb62823f 100644 --- a/results/kernels.csv +++ b/results/kernels.csv @@ -16,9 +16,10 @@ fill,4x4xf64,baseline,50,760,757,0.0,0.0,0,1,0,0.02,0.058823529411764705,17,0,0, fill,4x4xf64,linalg_xdsl,63,760,757,0.9444444444444444,0.0,0,18,17,0.2857142857142857,0.9,20,0,0,0.31746031746031744,0,0,3.333333333333333,3.3333333333333335,1,0.0,6,0.2857142857142857,15,0,0,0.23809523809523808,0,698,0.0,0.5555555555555556,0.0 matmul,1x20x5xf64,linalg_0_xdsl,4136,4896,4893,2.950980392156863,1.0,100,102,301,0.024661508704061894,0.20118343195266272,507,300,300,0.12258220502901354,0,105,1.0,1.0,1,0.0,507,0.19735305566368236,2062,0,0,0.4985493230174081,0,761,0.0,0.6211315280464217,0.0 matmul,1x20x5xf64,linalg_1_xdsl,2050,2829,2826,2.8598130841121496,1.0,100,107,306,0.05219512195121951,0.3440514469453376,311,100,100,0.15170731707317073,0,100,1.0,1.0,1,0.0,311,0.22552574329224076,1068,0,0,0.5209756097560976,0,780,0.0,0.6726829268292683,0.0 -matmul,1x20x5xf64,linalg_2_xdsl,497,1253,1250,2.7767857142857144,0.0,100,112,311,0.22535211267605634,0.9824561403508771,114,0,0,0.22937625754527163,0,0,1.0,1.0,1,0.0,114,0.3081081081081081,256,0,0,0.5150905432595574,0,757,0.0,0.744466800804829,0.0 -matmul,1x20x5xf64,linalg_3_xdsl,507,1283,1280,2.7767857142857144,0.0,100,112,311,0.22090729783037474,0.9824561403508771,114,0,0,0.22485207100591717,0,0,4.750000000000001,4.75,1,0.0,24,0.3037974683544304,55,0,0,0.10848126232741617,0,777,0.0,0.33333333333333337,0.0 -matmul,1x20x5xf64,linalg_4_xdsl,191,967,964,2.81981981981982,0.0,100,111,313,0.581151832460733,0.9823008849557522,113,0,0,0.5916230366492147,0,0,5.947368421052632,5.947368421052632,1,0.0,19,0.37254901960784315,32,0,0,0.16753926701570682,0,777,0.0,0.7591623036649215,0.0 +matmul,1x20x5xf64,linalg_2_xdsl,541,1320,1317,2.8598130841121496,1.0,100,107,306,0.1977818853974122,0.8842975206611571,121,5,5,0.22365988909426987,0,5,1.0,1.0,1,0.0,121,0.28205128205128205,308,0,0,0.5693160813308688,0,780,0.0,0.7929759704251387,0.0 +matmul,1x20x5xf64,linalg_3_xdsl,507,1281,1278,2.8598130841121496,1.0,100,107,306,0.21104536489151873,0.8842975206611571,121,5,5,0.23865877712031558,0,5,4.321428571428571,4.321428571428571,1,0.0,28,0.22580645161290322,96,0,0,0.1893491124260355,0,775,0.0,0.42800788954635105,0.0 +matmul,1x20x5xf64,linalg_4_xdsl,507,1283,1280,2.7767857142857144,0.0,100,112,311,0.22090729783037474,0.9824561403508771,114,0,0,0.22485207100591717,0,0,4.750000000000001,4.75,1,0.0,24,0.3037974683544304,55,0,0,0.10848126232741617,0,777,0.0,0.33333333333333337,0.0 +matmul,1x20x5xf64,linalg_5_xdsl,191,967,964,2.81981981981982,0.0,100,111,313,0.581151832460733,0.9823008849557522,113,0,0,0.5916230366492147,0,0,5.947368421052632,5.947368421052632,1,0.0,19,0.37254901960784315,32,0,0,0.16753926701570682,0,777,0.0,0.7591623036649215,0.0 matmul,4x16x8xf64,baseline,2495,3293,3290,2.9941520467836256,1.4991334488734835,480,513,1536,0.20561122244488977,0.4568121104185218,1123,865,577,0.4501002004008016,0,33,1.0,1.0,1,0.0,1123,0.7975852272727273,285,0,0,0.11422845691382766,0,799,0.0,0.5643286573146293,0.0 matmul,4x16x8xf64,linalg_xdsl,708,1493,1490,2.811418685121107,0.0,512,578,1625,0.8163841807909604,0.996551724137931,580,0,0,0.8192090395480226,0,0,5.37037037037037,5.37037037037037,1,0.0,108,0.5869565217391305,76,0,0,0.10734463276836158,0,786,0.0,0.9265536723163842,0.0 matmul_transb,4x16x16xf32,baseline,3386,4184,4181,2.539660056657224,1.4921875,0,706,1793,0.20850561134081513,0.3935340022296544,1794,1528,1024,0.5298287064382753,0,64,1.0,1.0,1,0.0,1794,0.5561066336019839,1432,0,0,0.42291789722386297,0,799,0.0,0.9527466036621383,0.0 diff --git a/results/kernels.fast.csv b/results/kernels.fast.csv index f249f741..eb62823f 100644 --- a/results/kernels.fast.csv +++ b/results/kernels.fast.csv @@ -16,9 +16,10 @@ fill,4x4xf64,baseline,50,760,757,0.0,0.0,0,1,0,0.02,0.058823529411764705,17,0,0, fill,4x4xf64,linalg_xdsl,63,760,757,0.9444444444444444,0.0,0,18,17,0.2857142857142857,0.9,20,0,0,0.31746031746031744,0,0,3.333333333333333,3.3333333333333335,1,0.0,6,0.2857142857142857,15,0,0,0.23809523809523808,0,698,0.0,0.5555555555555556,0.0 matmul,1x20x5xf64,linalg_0_xdsl,4136,4896,4893,2.950980392156863,1.0,100,102,301,0.024661508704061894,0.20118343195266272,507,300,300,0.12258220502901354,0,105,1.0,1.0,1,0.0,507,0.19735305566368236,2062,0,0,0.4985493230174081,0,761,0.0,0.6211315280464217,0.0 matmul,1x20x5xf64,linalg_1_xdsl,2050,2829,2826,2.8598130841121496,1.0,100,107,306,0.05219512195121951,0.3440514469453376,311,100,100,0.15170731707317073,0,100,1.0,1.0,1,0.0,311,0.22552574329224076,1068,0,0,0.5209756097560976,0,780,0.0,0.6726829268292683,0.0 -matmul,1x20x5xf64,linalg_2_xdsl,497,1253,1250,2.7767857142857144,0.0,100,112,311,0.22535211267605634,0.9824561403508771,114,0,0,0.22937625754527163,0,0,1.0,1.0,1,0.0,114,0.3081081081081081,256,0,0,0.5150905432595574,0,757,0.0,0.744466800804829,0.0 -matmul,1x20x5xf64,linalg_3_xdsl,507,1283,1280,2.7767857142857144,0.0,100,112,311,0.22090729783037474,0.9824561403508771,114,0,0,0.22485207100591717,0,0,4.750000000000001,4.75,1,0.0,24,0.3037974683544304,55,0,0,0.10848126232741617,0,777,0.0,0.33333333333333337,0.0 -matmul,1x20x5xf64,linalg_4_xdsl,191,967,964,2.81981981981982,0.0,100,111,313,0.581151832460733,0.9823008849557522,113,0,0,0.5916230366492147,0,0,5.947368421052632,5.947368421052632,1,0.0,19,0.37254901960784315,32,0,0,0.16753926701570682,0,777,0.0,0.7591623036649215,0.0 +matmul,1x20x5xf64,linalg_2_xdsl,541,1320,1317,2.8598130841121496,1.0,100,107,306,0.1977818853974122,0.8842975206611571,121,5,5,0.22365988909426987,0,5,1.0,1.0,1,0.0,121,0.28205128205128205,308,0,0,0.5693160813308688,0,780,0.0,0.7929759704251387,0.0 +matmul,1x20x5xf64,linalg_3_xdsl,507,1281,1278,2.8598130841121496,1.0,100,107,306,0.21104536489151873,0.8842975206611571,121,5,5,0.23865877712031558,0,5,4.321428571428571,4.321428571428571,1,0.0,28,0.22580645161290322,96,0,0,0.1893491124260355,0,775,0.0,0.42800788954635105,0.0 +matmul,1x20x5xf64,linalg_4_xdsl,507,1283,1280,2.7767857142857144,0.0,100,112,311,0.22090729783037474,0.9824561403508771,114,0,0,0.22485207100591717,0,0,4.750000000000001,4.75,1,0.0,24,0.3037974683544304,55,0,0,0.10848126232741617,0,777,0.0,0.33333333333333337,0.0 +matmul,1x20x5xf64,linalg_5_xdsl,191,967,964,2.81981981981982,0.0,100,111,313,0.581151832460733,0.9823008849557522,113,0,0,0.5916230366492147,0,0,5.947368421052632,5.947368421052632,1,0.0,19,0.37254901960784315,32,0,0,0.16753926701570682,0,777,0.0,0.7591623036649215,0.0 matmul,4x16x8xf64,baseline,2495,3293,3290,2.9941520467836256,1.4991334488734835,480,513,1536,0.20561122244488977,0.4568121104185218,1123,865,577,0.4501002004008016,0,33,1.0,1.0,1,0.0,1123,0.7975852272727273,285,0,0,0.11422845691382766,0,799,0.0,0.5643286573146293,0.0 matmul,4x16x8xf64,linalg_xdsl,708,1493,1490,2.811418685121107,0.0,512,578,1625,0.8163841807909604,0.996551724137931,580,0,0,0.8192090395480226,0,0,5.37037037037037,5.37037037037037,1,0.0,108,0.5869565217391305,76,0,0,0.10734463276836158,0,786,0.0,0.9265536723163842,0.0 matmul_transb,4x16x16xf32,baseline,3386,4184,4181,2.539660056657224,1.4921875,0,706,1793,0.20850561134081513,0.3935340022296544,1794,1528,1024,0.5298287064382753,0,64,1.0,1.0,1,0.0,1794,0.5561066336019839,1432,0,0,0.42291789722386297,0,799,0.0,0.9527466036621383,0.0 diff --git a/results/pipeline.fast.csv b/results/pipeline.fast.csv index 890added..ad2959e1 100644 --- a/results/pipeline.fast.csv +++ b/results/pipeline.fast.csv @@ -1,6 +1,7 @@ variant,F Registers,X Registers,FPU Occupancy [%] Baseline,3,14,2.47 + Streams,3,12,5.22 -+ Scalar Replacement,5,8,22.54 -+ Unroll and Jam,5,9,22.09 -+ FRep,8,8,58.12 ++ Scalar Replacement,3,11,19.78 ++ FRep,3,10,21.10 ++ Fuse Fill,5,9,22.09 ++ Unroll and Jam,8,8,58.12 diff --git a/scripts/pipeline.py b/scripts/pipeline.py index cbf0391e..ea6c073d 100644 --- a/scripts/pipeline.py +++ b/scripts/pipeline.py @@ -17,8 +17,9 @@ def merge_stats( "linalg_0_xdsl": "Baseline", "linalg_1_xdsl": "+ Streams", "linalg_2_xdsl": "+ Scalar Replacement", - "linalg_3_xdsl": "+ Unroll and Jam", - "linalg_4_xdsl": "+ FRep", + "linalg_3_xdsl": "+ FRep", + "linalg_4_xdsl": "+ Fuse Fill", + "linalg_5_xdsl": "+ Unroll and Jam", } col_names = { "allocated_float": "F Registers",