From de528ffb17ebce96e0bc4dde1749146c41ca1d0d Mon Sep 17 00:00:00 2001 From: Mats Petersson Date: Tue, 25 Jun 2024 15:42:02 +0100 Subject: [PATCH] [Flang] Extracting internal constants from scalar literals (#73829) Constants actual arguments in function/subroutine calls are currently lowered as allocas + store. This can sometimes inhibit LTO and the constant will not be propagated to the called function. Particularly in cases where the function/subroutine call happens inside a condition. This patch changes the lowering of these constant actual arguments to a global constant + fir.address_of_op. This lowering makes it easier for LTO to propagate the constant. The optimization must be enabled explicitly to run. Use -mmlir --enable-constant-argument-globalisation to enable. --------- Co-authored-by: Dmitriy Smirnov --- .../flang/Optimizer/Transforms/Passes.h | 2 + .../flang/Optimizer/Transforms/Passes.td | 9 + flang/include/flang/Tools/CLOptions.inc | 8 + flang/lib/Optimizer/Transforms/CMakeLists.txt | 1 + .../ConstantArgumentGlobalisation.cpp | 185 ++++++++++++++++++ flang/test/Fir/boxproc.fir | 4 +- .../test/Lower/character-local-variables.f90 | 5 +- .../constant-argument-globalisation-2.fir | 98 ++++++++++ .../constant-argument-globalisation.fir | 67 +++++++ 9 files changed, 375 insertions(+), 4 deletions(-) create mode 100644 flang/lib/Optimizer/Transforms/ConstantArgumentGlobalisation.cpp create mode 100644 flang/test/Transforms/constant-argument-globalisation-2.fir create mode 100644 flang/test/Transforms/constant-argument-globalisation.fir diff --git a/flang/include/flang/Optimizer/Transforms/Passes.h b/flang/include/flang/Optimizer/Transforms/Passes.h index 1ca1539e76fc63..df709645c01b0b 100644 --- a/flang/include/flang/Optimizer/Transforms/Passes.h +++ b/flang/include/flang/Optimizer/Transforms/Passes.h @@ -57,6 +57,8 @@ namespace fir { #define GEN_PASS_DECL_OMPFUNCTIONFILTERING #define GEN_PASS_DECL_VSCALEATTR #define GEN_PASS_DECL_FUNCTIONATTR +#define GEN_PASS_DECL_CONSTANTARGUMENTGLOBALISATIONOPT + #include "flang/Optimizer/Transforms/Passes.h.inc" std::unique_ptr createAffineDemotionPass(); diff --git a/flang/include/flang/Optimizer/Transforms/Passes.td b/flang/include/flang/Optimizer/Transforms/Passes.td index 27aee5650e75d5..b3ed9acad36df4 100644 --- a/flang/include/flang/Optimizer/Transforms/Passes.td +++ b/flang/include/flang/Optimizer/Transforms/Passes.td @@ -251,6 +251,15 @@ def MemoryAllocationOpt : Pass<"memory-allocation-opt", "mlir::func::FuncOp"> { ]; } +// This needs to be a "mlir::ModuleOp" pass, because it inserts global constants +def ConstantArgumentGlobalisationOpt : Pass<"constant-argument-globalisation-opt", "mlir::ModuleOp"> { + let summary = "Convert constant function arguments to global constants."; + let description = [{ + Convert scalar literals of function arguments to global constants. + }]; + let dependentDialects = [ "fir::FIROpsDialect" ]; +} + def StackArrays : Pass<"stack-arrays", "mlir::ModuleOp"> { let summary = "Move local array allocations from heap memory into stack memory"; let description = [{ diff --git a/flang/include/flang/Tools/CLOptions.inc b/flang/include/flang/Tools/CLOptions.inc index bb4347d0e82c8f..7f2910c5cfd3c3 100644 --- a/flang/include/flang/Tools/CLOptions.inc +++ b/flang/include/flang/Tools/CLOptions.inc @@ -25,6 +25,10 @@ static llvm::cl::opt disable##DOName("disable-" DOOption, \ llvm::cl::desc("disable " DODescription " pass"), llvm::cl::init(false), \ llvm::cl::Hidden) +#define EnableOption(EOName, EOOption, EODescription) \ + static llvm::cl::opt enable##EOName("enable-" EOOption, \ + llvm::cl::desc("enable " EODescription " pass"), llvm::cl::init(false), \ + llvm::cl::Hidden) /// Shared option in tools to control whether dynamically sized array /// allocations should always be on the heap. @@ -86,6 +90,8 @@ DisableOption(BoxedProcedureRewrite, "boxed-procedure-rewrite", DisableOption(ExternalNameConversion, "external-name-interop", "convert names with external convention"); +EnableOption(ConstantArgumentGlobalisation, "constant-argument-globalisation", + "the local constant argument to global constant conversion"); using PassConstructor = std::unique_ptr(); @@ -270,6 +276,8 @@ inline void createDefaultFIROptimizerPassPipeline( // These passes may increase code size. pm.addPass(fir::createSimplifyIntrinsics()); pm.addPass(fir::createAlgebraicSimplificationPass(config)); + if (enableConstantArgumentGlobalisation) + pm.addPass(fir::createConstantArgumentGlobalisationOpt()); } if (pc.LoopVersioning) diff --git a/flang/lib/Optimizer/Transforms/CMakeLists.txt b/flang/lib/Optimizer/Transforms/CMakeLists.txt index 149afdf601c936..94d94398d696ad 100644 --- a/flang/lib/Optimizer/Transforms/CMakeLists.txt +++ b/flang/lib/Optimizer/Transforms/CMakeLists.txt @@ -6,6 +6,7 @@ add_flang_library(FIRTransforms AnnotateConstant.cpp AssumedRankOpConversion.cpp CharacterConversion.cpp + ConstantArgumentGlobalisation.cpp ControlFlowConverter.cpp ArrayValueCopy.cpp ExternalNameConversion.cpp diff --git a/flang/lib/Optimizer/Transforms/ConstantArgumentGlobalisation.cpp b/flang/lib/Optimizer/Transforms/ConstantArgumentGlobalisation.cpp new file mode 100644 index 00000000000000..f7074a79a8a8d7 --- /dev/null +++ b/flang/lib/Optimizer/Transforms/ConstantArgumentGlobalisation.cpp @@ -0,0 +1,185 @@ +//===- ConstantArgumentGlobalisation.cpp ----------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "flang/Optimizer/Builder/FIRBuilder.h" +#include "flang/Optimizer/Dialect/FIRDialect.h" +#include "flang/Optimizer/Dialect/FIROps.h" +#include "flang/Optimizer/Dialect/FIRType.h" +#include "flang/Optimizer/Transforms/Passes.h" +#include "mlir/Dialect/Func/IR/FuncOps.h" +#include "mlir/IR/Diagnostics.h" +#include "mlir/IR/Dominance.h" +#include "mlir/Pass/Pass.h" +#include "mlir/Transforms/GreedyPatternRewriteDriver.h" + +namespace fir { +#define GEN_PASS_DEF_CONSTANTARGUMENTGLOBALISATIONOPT +#include "flang/Optimizer/Transforms/Passes.h.inc" +} // namespace fir + +#define DEBUG_TYPE "flang-constant-argument-globalisation-opt" + +namespace { +unsigned uniqueLitId = 1; + +class CallOpRewriter : public mlir::OpRewritePattern { +protected: + const mlir::DominanceInfo &di; + +public: + using OpRewritePattern::OpRewritePattern; + + CallOpRewriter(mlir::MLIRContext *ctx, const mlir::DominanceInfo &_di) + : OpRewritePattern(ctx), di(_di) {} + + mlir::LogicalResult + matchAndRewrite(fir::CallOp callOp, + mlir::PatternRewriter &rewriter) const override { + LLVM_DEBUG(llvm::dbgs() << "Processing call op: " << callOp << "\n"); + auto module = callOp->getParentOfType(); + bool needUpdate = false; + fir::FirOpBuilder builder(rewriter, module); + llvm::SmallVector newOperands; + llvm::SmallVector> allocas; + for (const mlir::Value &a : callOp.getArgs()) { + auto alloca = mlir::dyn_cast_or_null(a.getDefiningOp()); + // We can convert arguments that are alloca, and that has + // the value by reference attribute. All else is just added + // to the argument list. + if (!alloca || !alloca->hasAttr(fir::getAdaptToByRefAttrName())) { + newOperands.push_back(a); + continue; + } + + mlir::Type varTy = alloca.getInType(); + assert(!fir::hasDynamicSize(varTy) && + "only expect statically sized scalars to be by value"); + + // Find immediate store with const argument + mlir::Operation *store = nullptr; + for (mlir::Operation *s : alloca->getUsers()) { + if (mlir::isa(s) && di.dominates(s, callOp)) { + // We can only deal with ONE store - if already found one, + // set to nullptr and exit the loop. + if (store) { + store = nullptr; + break; + } + store = s; + } + } + + // If we didn't find any store, or multiple stores, add argument as is + // and move on. + if (!store) { + newOperands.push_back(a); + continue; + } + + LLVM_DEBUG(llvm::dbgs() << " found store " << *store << "\n"); + + mlir::Operation *definingOp = store->getOperand(0).getDefiningOp(); + // If not a constant, add to operands and move on. + if (!mlir::isa(definingOp)) { + // Unable to remove alloca arg + newOperands.push_back(a); + continue; + } + + LLVM_DEBUG(llvm::dbgs() << " found define " << *definingOp << "\n"); + + std::string globalName = + "_global_const_." + std::to_string(uniqueLitId++); + assert(!builder.getNamedGlobal(globalName) && + "We should have a unique name here"); + + if (std::find_if(allocas.begin(), allocas.end(), [alloca](auto x) { + return x.first == alloca; + }) == allocas.end()) { + allocas.push_back(std::make_pair(alloca, store)); + } + + auto loc = callOp.getLoc(); + fir::GlobalOp global = builder.createGlobalConstant( + loc, varTy, globalName, + [&](fir::FirOpBuilder &builder) { + mlir::Operation *cln = definingOp->clone(); + builder.insert(cln); + mlir::Value val = + builder.createConvert(loc, varTy, cln->getResult(0)); + builder.create(loc, val); + }, + builder.createInternalLinkage()); + mlir::Value addr = builder.create(loc, global.resultType(), + global.getSymbol()); + newOperands.push_back(addr); + needUpdate = true; + } + + if (needUpdate) { + auto loc = callOp.getLoc(); + llvm::SmallVector newResultTypes; + newResultTypes.append(callOp.getResultTypes().begin(), + callOp.getResultTypes().end()); + fir::CallOp newOp = builder.create( + loc, newResultTypes, + callOp.getCallee().has_value() ? callOp.getCallee().value() + : mlir::SymbolRefAttr{}, + newOperands); + // Copy all the attributes from the old to new op. + newOp->setAttrs(callOp->getAttrs()); + rewriter.replaceOp(callOp, newOp); + + for (auto a : allocas) { + if (a.first->hasOneUse()) { + // If the alloca is only used for a store and the call operand, the + // store is no longer required. + rewriter.eraseOp(a.second); + rewriter.eraseOp(a.first); + } + } + LLVM_DEBUG(llvm::dbgs() << "global constant for " << callOp << " as " + << newOp << '\n'); + return mlir::success(); + } + + // Failure here just means "we couldn't do the conversion", which is + // perfectly acceptable to the upper layers of this function. + return mlir::failure(); + } +}; + +// this pass attempts to convert immediate scalar literals in function calls +// to global constants to allow transformations such as Dead Argument +// Elimination +class ConstantArgumentGlobalisationOpt + : public fir::impl::ConstantArgumentGlobalisationOptBase< + ConstantArgumentGlobalisationOpt> { +public: + ConstantArgumentGlobalisationOpt() = default; + + void runOnOperation() override { + mlir::ModuleOp mod = getOperation(); + mlir::DominanceInfo *di = &getAnalysis(); + auto *context = &getContext(); + mlir::RewritePatternSet patterns(context); + mlir::GreedyRewriteConfig config; + config.enableRegionSimplification = + mlir::GreedySimplifyRegionLevel::Disabled; + config.strictMode = mlir::GreedyRewriteStrictness::ExistingOps; + + patterns.insert(context, *di); + if (mlir::failed(mlir::applyPatternsAndFoldGreedily( + mod, std::move(patterns), config))) { + mlir::emitError(mod.getLoc(), + "error in constant globalisation optimization\n"); + signalPassFailure(); + } + } +}; +} // namespace diff --git a/flang/test/Fir/boxproc.fir b/flang/test/Fir/boxproc.fir index 834017bff71aa3..9e4ea0bc210775 100644 --- a/flang/test/Fir/boxproc.fir +++ b/flang/test/Fir/boxproc.fir @@ -16,9 +16,7 @@ // CHECK-LABEL: define void @_QPtest_proc_dummy_other(ptr // CHECK-SAME: %[[VAL_0:.*]]) -// CHECK: %[[VAL_1:.*]] = alloca i32, i64 1, align 4 -// CHECK: store i32 4, ptr %[[VAL_1]], align 4 -// CHECK: call void %[[VAL_0]](ptr %[[VAL_1]]) +// CHECK: call void %[[VAL_0]](ptr %{{.*}}) func.func @_QPtest_proc_dummy() { %c0_i32 = arith.constant 0 : i32 diff --git a/flang/test/Lower/character-local-variables.f90 b/flang/test/Lower/character-local-variables.f90 index 0cf61a2623c4e7..d5b959eca1ff63 100644 --- a/flang/test/Lower/character-local-variables.f90 +++ b/flang/test/Lower/character-local-variables.f90 @@ -1,4 +1,6 @@ ! RUN: bbc -hlfir=false %s -o - | FileCheck %s +! RUN: bbc -hlfir=false --enable-constant-argument-globalisation %s -o - \ +! RUN: | FileCheck %s --check-prefix=CHECK-CONST ! Test lowering of local character variables @@ -118,7 +120,8 @@ subroutine assumed_length_param(n) integer :: n ! CHECK: %[[c4:.*]] = arith.constant 4 : i64 ! CHECK: fir.store %[[c4]] to %[[tmp:.*]] : !fir.ref - ! CHECK: fir.call @_QPtake_int(%[[tmp]]) {{.*}}: (!fir.ref) -> () + ! CHECK-CONST: %[[tmp:.*]] = fir.address_of(@_global_const_.{{.*}}) : !fir.ref + ! CHECK-CONST: fir.call @_QPtake_int(%[[tmp]]) {{.*}}: (!fir.ref) -> () call take_int(len(c(n), kind=8)) end diff --git a/flang/test/Transforms/constant-argument-globalisation-2.fir b/flang/test/Transforms/constant-argument-globalisation-2.fir new file mode 100644 index 00000000000000..03e08a0dcb914b --- /dev/null +++ b/flang/test/Transforms/constant-argument-globalisation-2.fir @@ -0,0 +1,98 @@ +// RUN: fir-opt --split-input-file --constant-argument-globalisation-opt < %s | FileCheck %s + +module { +// Test for "two conditional writes to the same alloca doesn't get replaced." + func.func @func(%arg0: i32, %arg1: i1) { + %c2_i32 = arith.constant 2 : i32 + %addr = fir.alloca i32 {adapt.valuebyref} + fir.if %arg1 { + fir.store %c2_i32 to %addr : !fir.ref + } else { + fir.store %arg0 to %addr : !fir.ref + } + fir.call @sub2(%addr) : (!fir.ref) -> () + return + } + func.func private @sub2(!fir.ref) + +// CHECK-LABEL: func.func @func +// CHECK-SAME: [[ARG0:%.*]]: i32 +// CHECK-SAME: [[ARG1:%.*]]: i1) +// CHECK: [[CONST:%.*]] = arith.constant +// CHECK: [[ADDR:%.*]] = fir.alloca i32 +// CHECK: fir.if [[ARG1]] +// CHECK: fir.store [[CONST]] to [[ADDR]] +// CHECK: } else { +// CHECK: fir.store [[ARG0]] to [[ADDR]] +// CHECK: fir.call @sub2([[ADDR]]) +// CHECK: return + +} + +// ----- + +module { +// Test for "two writes to the same alloca doesn't get replaced." + func.func @func() { + %c1_i32 = arith.constant 1 : i32 + %c2_i32 = arith.constant 2 : i32 + %addr = fir.alloca i32 {adapt.valuebyref} + fir.store %c1_i32 to %addr : !fir.ref + fir.store %c2_i32 to %addr : !fir.ref + fir.call @sub2(%addr) : (!fir.ref) -> () + return + } + func.func private @sub2(!fir.ref) + +// CHECK-LABEL: func.func @func +// CHECK: [[CONST1:%.*]] = arith.constant +// CHECK: [[CONST2:%.*]] = arith.constant +// CHECK: [[ADDR:%.*]] = fir.alloca i32 +// CHECK: fir.store [[CONST1]] to [[ADDR]] +// CHECK: fir.store [[CONST2]] to [[ADDR]] +// CHECK: fir.call @sub2([[ADDR]]) +// CHECK: return + +} + +// ----- + +module { +// Test for "one write to the the alloca gets replaced." + func.func @func() { + %c1_i32 = arith.constant 1 : i32 + %addr = fir.alloca i32 {adapt.valuebyref} + fir.store %c1_i32 to %addr : !fir.ref + fir.call @sub2(%addr) : (!fir.ref) -> () + return + } + func.func private @sub2(!fir.ref) + +// CHECK-LABEL: func.func @func +// CHECK: [[ADDR:%.*]] = fir.address_of([[EXTR:@.*]]) : !fir.ref +// CHECK: fir.call @sub2([[ADDR]]) +// CHECK: return +// CHECK: fir.global internal [[EXTR]] constant : i32 { +// CHECK: %{{.*}} = arith.constant 1 : i32 +// CHECK: fir.has_value %{{.*}} : i32 +// CHECK: } + +} + +// ----- +// Check that same argument used twice is converted. +module { + func.func @func(%arg0: !fir.ref, %arg1: i1) { + %c2_i32 = arith.constant 2 : i32 + %addr1 = fir.alloca i32 {adapt.valuebyref} + fir.store %c2_i32 to %addr1 : !fir.ref + fir.call @sub1(%addr1, %addr1) : (!fir.ref, !fir.ref) -> () + return + } +} + +// CHECK-LABEL: func.func @func +// CHECK-NEXT: %[[ARG1:.*]] = fir.address_of([[CONST1:@.*]]) : !fir.ref +// CHECK-NEXT: %[[ARG2:.*]] = fir.address_of([[CONST2:@.*]]) : !fir.ref +// CHECK-NEXT: fir.call @sub1(%[[ARG1]], %[[ARG2]]) +// CHECK-NEXT: return diff --git a/flang/test/Transforms/constant-argument-globalisation.fir b/flang/test/Transforms/constant-argument-globalisation.fir new file mode 100644 index 00000000000000..553c3c6c248458 --- /dev/null +++ b/flang/test/Transforms/constant-argument-globalisation.fir @@ -0,0 +1,67 @@ +// RUN: fir-opt --constant-argument-globalisation-opt < %s | FileCheck %s +// RUN: %flang_fc1 -emit-llvm -flang-deprecated-no-hlfir -O2 -o - %s | FileCheck --check-prefix=DISABLE %s +module { + func.func @sub1(%arg0: !fir.ref {fir.bindc_name = "x"}, %arg1: !fir.ref {fir.bindc_name = "y"}) { + %0 = fir.alloca i32 {adapt.valuebyref} + %1 = fir.alloca f64 {adapt.valuebyref} + %2 = fir.alloca f64 {adapt.valuebyref} + %c1_i32 = arith.constant 1 : i32 + %cst = arith.constant 1.000000e+00 : f64 + %cst_0 = arith.constant 0.000000e+00 : f64 + %3 = fir.declare %arg0 {uniq_name = "_QFsub1Ex"} : (!fir.ref) -> !fir.ref + %4 = fir.declare %arg1 {uniq_name = "_QFsub1Ey"} : (!fir.ref) -> !fir.ref + fir.store %cst_0 to %2 : !fir.ref + %false = arith.constant false + fir.store %cst to %1 : !fir.ref + %false_1 = arith.constant false + fir.store %c1_i32 to %0 : !fir.ref + %false_2 = arith.constant false + fir.call @sub2(%2, %1, %3, %4, %0) fastmath : (!fir.ref, !fir.ref, !fir.ref, !fir.ref, !fir.ref) -> () + return + } + func.func private @sub2(!fir.ref, !fir.ref, !fir.ref, !fir.ref, !fir.ref) + +// CHECK-LABEL: func.func @sub1( +// CHECK-SAME: [[ARG0:%.*]]: !fir.ref {{{.*}}}, +// CHECK-SAME: [[ARG1:%.*]]: !fir.ref {{{.*}}}) { +// CHECK: [[X:%.*]] = fir.declare [[ARG0]] {{.*}} +// CHECK: [[Y:%.*]] = fir.declare [[ARG1]] {{.*}} +// CHECK: [[CONST_R0:%.*]] = fir.address_of([[EXTR_0:@.*]]) : !fir.ref +// CHECK: [[CONST_R1:%.*]] = fir.address_of([[EXTR_1:@.*]]) : !fir.ref +// CHECK: [[CONST_I:%.*]] = fir.address_of([[EXTR_2:@.*]]) : !fir.ref +// CHECK: fir.call @sub2([[CONST_R0]], [[CONST_R1]], [[X]], [[Y]], [[CONST_I]]) +// CHECK-SAME: fastmath +// CHECK: return + +// CHECK: fir.global internal [[EXTR_0]] constant : f64 { +// CHECK: %{{.*}} = arith.constant 0.000000e+00 : f64 +// CHECK: fir.has_value %{{.*}} : f64 +// CHECK: } +// CHECK: fir.global internal [[EXTR_1]] constant : f64 { +// CHECK: %{{.*}} = arith.constant 1.000000e+00 : f64 +// CHECK: fir.has_value %{{.*}} : f64 +// CHECK: } +// CHECK: fir.global internal [[EXTR_2]] constant : i32 { +// CHECK: %{{.*}} = arith.constant 1 : i32 +// CHECK: fir.has_value %{{.*}} : i32 +// CHECK: } + +// DISABLE-LABEL: ; ModuleID = +// DISABLE-NOT: @_extruded +// DISABLE: define void @sub1( +// DISABLE-SAME: ptr [[ARG0:%.*]], +// DISABLE-SAME: ptr [[ARG1:%.*]]) +// DISABLE-SAME: { +// DISABLE: [[CONST_R0:%.*]] = alloca double +// DISABLE: [[CONST_R1:%.*]] = alloca double +// DISABLE: [[CONST_I:%.*]] = alloca i32 +// DISABLE: store double 0.0{{.*}}+00, ptr [[CONST_R0]] +// DISABLE: store double 1.0{{.*}}+00, ptr [[CONST_R1]] +// DISABLE: store i32 1, ptr [[CONST_I]] +// DISABLE: call void @sub2(ptr nonnull [[CONST_R0]], +// DISABLE-SAME: ptr nonnull [[CONST_R1]], +// DISABLE-SAME: ptr [[ARG0]], ptr [[ARG1]], +// DISABLE-SAME: ptr nonnull [[CONST_I]]) +// DISABLE: ret void +// DISABLE: } +}