Skip to content

Commit

Permalink
Revert "Revert "[AMDGPU] Reorganize GCN subtarget features for unalig…
Browse files Browse the repository at this point in the history
…ned access""

This reverts commit 8b08fa0.

The underlying problems were fixed by D90607.

Change-Id: I328857b7edad6d101951ad2489ff769a59e097bb
  • Loading branch information
jayfoad authored and searlmc1 committed Mar 11, 2021
1 parent 91c055b commit 1cc2ae4
Show file tree
Hide file tree
Showing 16 changed files with 77 additions and 62 deletions.
30 changes: 16 additions & 14 deletions llvm/lib/Target/AMDGPU/AMDGPU.td
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ def FeatureAddNoCarryInsts : SubtargetFeature<"add-no-carry-insts",
def FeatureUnalignedBufferAccess : SubtargetFeature<"unaligned-buffer-access",
"UnalignedBufferAccess",
"true",
"Support unaligned global loads and stores"
"Hardware supports unaligned global loads and stores"
>;

def FeatureTrapHandler: SubtargetFeature<"trap-handler",
Expand All @@ -105,18 +105,10 @@ def FeatureUnalignedScratchAccess : SubtargetFeature<"unaligned-scratch-access",
"Support unaligned scratch loads and stores"
>;

// LDS alignment enforcement is controlled by a configuration register:
// SH_MEM_CONFIG.alignment_mode
def FeatureUnalignedAccessMode : SubtargetFeature<"unaligned-access-mode",
"UnalignedAccessMode",
"true",
"Support unaligned local and region loads and stores"
>;

def FeatureUnalignedDSAccess : SubtargetFeature<"unaligned-ds-access",
"UnalignedDSAccess",
"true",
"Does not requires 16 byte alignment for certain local and region loads and stores"
"Hardware supports unaligned local and region loads and stores"
>;

def FeatureApertureRegs : SubtargetFeature<"aperture-regs",
Expand Down Expand Up @@ -658,6 +650,15 @@ def FeatureTrigReducedRange : SubtargetFeature<"trig-reduced-range",
"Requires use of fract on arguments to trig instructions"
>;

// Alignment enforcement is controlled by a configuration register:
// SH_MEM_CONFIG.alignment_mode
def FeatureUnalignedAccessMode : SubtargetFeature<"unaligned-access-mode",
"UnalignedAccessMode",
"true",
"Enable unaligned global, local and region loads and stores if the hardware"
" supports it"
>;

// Dummy feature used to disable assembler instructions.
def FeatureDisable : SubtargetFeature<"",
"FeatureDisable","true",
Expand All @@ -683,7 +684,7 @@ def FeatureSeaIslands : GCNSubtargetFeatureGeneration<"SEA_ISLANDS",
FeatureWavefrontSize64, FeatureFlatAddressSpace,
FeatureCIInsts, FeatureMovrel, FeatureTrigReducedRange,
FeatureGFX7GFX8GFX9Insts, FeatureSMemTimeInst, FeatureMadMacF32Insts,
FeatureDsSrc2Insts]
FeatureDsSrc2Insts, FeatureUnalignedBufferAccess]
>;

def FeatureVolcanicIslands : GCNSubtargetFeatureGeneration<"VOLCANIC_ISLANDS",
Expand All @@ -696,7 +697,7 @@ def FeatureVolcanicIslands : GCNSubtargetFeatureGeneration<"VOLCANIC_ISLANDS",
FeatureSDWA, FeatureSDWAOutModsVOPC, FeatureSDWAMac, FeatureDPP,
FeatureIntClamp, FeatureTrigReducedRange, FeatureGFX8Insts,
FeatureGFX7GFX8GFX9Insts, FeatureSMemTimeInst, FeatureMadMacF32Insts,
FeatureDsSrc2Insts, FeatureFastDenormalF32]
FeatureDsSrc2Insts, FeatureFastDenormalF32, FeatureUnalignedBufferAccess]
>;

def FeatureGFX9 : GCNSubtargetFeatureGeneration<"GFX9",
Expand All @@ -712,7 +713,8 @@ def FeatureGFX9 : GCNSubtargetFeatureGeneration<"GFX9",
FeatureAddNoCarryInsts, FeatureGFX8Insts, FeatureGFX7GFX8GFX9Insts,
FeatureScalarFlatScratchInsts, FeatureScalarAtomics, FeatureR128A16,
FeatureSMemTimeInst, FeatureMadMacF32Insts, FeatureDsSrc2Insts,
FeatureFastDenormalF32, FeatureSupportsXNACK, FeatureUnalignedDSAccess
FeatureFastDenormalF32, FeatureSupportsXNACK, FeatureUnalignedBufferAccess,
FeatureUnalignedDSAccess
]
>;

Expand All @@ -731,7 +733,7 @@ def FeatureGFX10 : GCNSubtargetFeatureGeneration<"GFX10",
FeatureVOP3Literal, FeatureDPP8,
FeatureNoDataDepHazard, FeaturePkFmacF16Inst,
FeatureGFX10A16, FeatureFastDenormalF32, FeatureG16,
FeatureUnalignedDSAccess
FeatureUnalignedBufferAccess, FeatureUnalignedDSAccess
]
>;

Expand Down
6 changes: 3 additions & 3 deletions llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1070,9 +1070,9 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
return false;
};

unsigned GlobalAlign32 = ST.hasUnalignedBufferAccess() ? 0 : 32;
unsigned GlobalAlign16 = ST.hasUnalignedBufferAccess() ? 0 : 16;
unsigned GlobalAlign8 = ST.hasUnalignedBufferAccess() ? 0 : 8;
unsigned GlobalAlign32 = ST.hasUnalignedBufferAccessEnabled() ? 0 : 32;
unsigned GlobalAlign16 = ST.hasUnalignedBufferAccessEnabled() ? 0 : 16;
unsigned GlobalAlign8 = ST.hasUnalignedBufferAccessEnabled() ? 0 : 8;

// TODO: Refine based on subtargets which support unaligned access or 128-bit
// LDS
Expand Down
4 changes: 2 additions & 2 deletions llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ GCNSubtarget::initializeSubtargetDependencies(const Triple &TT,
SmallString<256> FullFS("+promote-alloca,+load-store-opt,+enable-ds128,");

if (isAmdHsaOS()) // Turn on FlatForGlobal for HSA.
FullFS += "+flat-for-global,+unaligned-buffer-access,+trap-handler,";
FullFS += "+flat-for-global,+unaligned-access-mode,+trap-handler,";

FullFS += "+enable-prt-strict-null,"; // This is overridden by a disable in FS

Expand Down Expand Up @@ -187,7 +187,6 @@ GCNSubtarget::GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
FlatForGlobal(false),
AutoWaitcntBeforeBarrier(false),
UnalignedScratchAccess(false),
UnalignedBufferAccess(false),
UnalignedAccessMode(false),

HasApertureRegs(false),
Expand Down Expand Up @@ -259,6 +258,7 @@ GCNSubtarget::GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
HasUnpackedD16VMem(false),
LDSMisalignedBug(false),
HasMFMAInlineLiteralBug(false),
UnalignedBufferAccess(false),
UnalignedDSAccess(false),

ScalarizeGlobal(false),
Expand Down
18 changes: 13 additions & 5 deletions llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
Original file line number Diff line number Diff line change
Expand Up @@ -311,7 +311,6 @@ class GCNSubtarget : public AMDGPUGenSubtargetInfo,
bool FlatForGlobal;
bool AutoWaitcntBeforeBarrier;
bool UnalignedScratchAccess;
bool UnalignedBufferAccess;
bool UnalignedAccessMode;
bool HasApertureRegs;
bool SupportsXNACK;
Expand Down Expand Up @@ -400,6 +399,7 @@ class GCNSubtarget : public AMDGPUGenSubtargetInfo,
bool HasMFMAInlineLiteralBug;
bool HasVertexCache;
short TexVTXClauseSize;
bool UnalignedBufferAccess;
bool UnalignedDSAccess;
bool ScalarizeGlobal;

Expand Down Expand Up @@ -714,6 +714,18 @@ class GCNSubtarget : public AMDGPUGenSubtargetInfo,
return UnalignedBufferAccess;
}

bool hasUnalignedBufferAccessEnabled() const {
return UnalignedBufferAccess && UnalignedAccessMode;
}

bool hasUnalignedDSAccess() const {
return UnalignedDSAccess;
}

bool hasUnalignedDSAccessEnabled() const {
return UnalignedDSAccess && UnalignedAccessMode;
}

bool hasUnalignedScratchAccess() const {
return UnalignedScratchAccess;
}
Expand All @@ -722,10 +734,6 @@ class GCNSubtarget : public AMDGPUGenSubtargetInfo,
return UnalignedAccessMode;
}

bool hasUnalignedDSAccess() const {
return UnalignedDSAccess;
}

bool hasApertureRegs() const {
return HasApertureRegs;
}
Expand Down
1 change: 0 additions & 1 deletion llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,6 @@ class GCNTTIImpl final : public BasicTTIImplBase<GCNTTIImpl> {
AMDGPU::FeatureEnableUnsafeDSOffsetFolding,
AMDGPU::FeatureFlatForGlobal,
AMDGPU::FeaturePromoteAlloca,
AMDGPU::FeatureUnalignedBufferAccess,
AMDGPU::FeatureUnalignedScratchAccess,
AMDGPU::FeatureUnalignedAccessMode,

Expand Down
5 changes: 2 additions & 3 deletions llvm/lib/Target/AMDGPU/SIISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1433,8 +1433,7 @@ bool SITargetLowering::allowsMisalignedMemoryAccessesImpl(
AddrSpace == AMDGPUAS::REGION_ADDRESS) {
// Check if alignment requirements for ds_read/write instructions are
// disabled.
if (Subtarget->hasUnalignedDSAccess() &&
Subtarget->hasUnalignedAccessMode() &&
if (Subtarget->hasUnalignedDSAccessEnabled() &&
!Subtarget->hasLDSMisalignedBug()) {
if (IsFast)
*IsFast = Alignment != Align(2);
Expand Down Expand Up @@ -1484,7 +1483,7 @@ bool SITargetLowering::allowsMisalignedMemoryAccessesImpl(
return AlignedBy4;
}

if (Subtarget->hasUnalignedBufferAccess() &&
if (Subtarget->hasUnalignedBufferAccessEnabled() &&
!(AddrSpace == AMDGPUAS::LOCAL_ADDRESS ||
AddrSpace == AMDGPUAS::REGION_ADDRESS)) {
// If we have an uniform constant load, it still requires using a slow
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/load-constant.96.ll
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -mattr=+unaligned-buffer-access < %s | FileCheck -check-prefixes=GCN,GFX9,GFX9-UNALIGNED %s
; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -mattr=-unaligned-buffer-access < %s | FileCheck -check-prefixes=GCN,GFX9,GFX9-NOUNALIGNED %s
; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=hawaii -mattr=+unaligned-buffer-access < %s | FileCheck -check-prefixes=GCN,GFX7,GFX7-UNALIGNED %s
; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=hawaii -mattr=-unaligned-buffer-access < %s | FileCheck -check-prefixes=GCN,GFX7,GFX7-NOUNALIGNED %s
; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -mattr=+unaligned-access-mode < %s | FileCheck -check-prefixes=GCN,GFX9,GFX9-UNALIGNED %s
; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -mattr=-unaligned-access-mode < %s | FileCheck -check-prefixes=GCN,GFX9,GFX9-NOUNALIGNED %s
; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=hawaii -mattr=+unaligned-access-mode < %s | FileCheck -check-prefixes=GCN,GFX7,GFX7-UNALIGNED %s
; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=hawaii -mattr=-unaligned-access-mode < %s | FileCheck -check-prefixes=GCN,GFX7,GFX7-NOUNALIGNED %s

; FIXME:
; XUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=tahiti < %s | FileCheck -check-prefixes=GCN,GFX6 %s
Expand Down
10 changes: 5 additions & 5 deletions llvm/test/CodeGen/AMDGPU/amdgpu.private-memory.ll
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
; RUN: llc -show-mc-encoding --amdhsa-code-object-version=2 -mattr=+promote-alloca -disable-promote-alloca-to-vector -amdgpu-load-store-vectorizer=0 -enable-amdgpu-aa=0 -verify-machineinstrs -march=amdgcn < %s | FileCheck -enable-var-scope -check-prefix=SI-PROMOTE -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -show-mc-encoding --amdhsa-code-object-version=2 -mattr=+promote-alloca -disable-promote-alloca-to-vector -amdgpu-load-store-vectorizer=0 -enable-amdgpu-aa=0 -verify-machineinstrs -mtriple=amdgcn--amdhsa -mcpu=kaveri -mattr=-unaligned-buffer-access < %s | FileCheck -enable-var-scope -check-prefix=SI-PROMOTE -check-prefix=SI -check-prefix=FUNC -check-prefix=HSA-PROMOTE %s
; RUN: llc -show-mc-encoding --amdhsa-code-object-version=2 -mattr=+promote-alloca -disable-promote-alloca-to-vector -amdgpu-load-store-vectorizer=0 -enable-amdgpu-aa=0 -verify-machineinstrs -mtriple=amdgcn--amdhsa -mcpu=kaveri -mattr=-unaligned-access-mode < %s | FileCheck -enable-var-scope -check-prefix=SI-PROMOTE -check-prefix=SI -check-prefix=FUNC -check-prefix=HSA-PROMOTE %s
; RUN: llc -show-mc-encoding --amdhsa-code-object-version=2 -mattr=-promote-alloca -amdgpu-load-store-vectorizer=0 -enable-amdgpu-aa=0 -verify-machineinstrs -march=amdgcn < %s | FileCheck %s -check-prefix=SI-ALLOCA -check-prefix=SI -check-prefix=FUNC
; RUN: llc -show-mc-encoding --amdhsa-code-object-version=2 -mattr=-promote-alloca -amdgpu-load-store-vectorizer=0 -enable-amdgpu-aa=0 -verify-machineinstrs -mtriple=amdgcn-amdhsa -mcpu=kaveri -mattr=-unaligned-buffer-access < %s | FileCheck -enable-var-scope -check-prefix=SI-ALLOCA -check-prefix=SI -check-prefix=FUNC -check-prefix=HSA-ALLOCA %s
; RUN: llc -show-mc-encoding --amdhsa-code-object-version=2 -mattr=+promote-alloca -disable-promote-alloca-to-vector -amdgpu-load-store-vectorizer=0 -enable-amdgpu-aa=0 -verify-machineinstrs -mtriple=amdgcn-amdhsa -march=amdgcn -mcpu=tonga -mattr=-unaligned-buffer-access < %s | FileCheck -enable-var-scope -check-prefix=SI-PROMOTE -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -show-mc-encoding --amdhsa-code-object-version=2 -mattr=+promote-alloca -amdgpu-load-store-vectorizer=0 -enable-amdgpu-aa=0 -verify-machineinstrs -mtriple=amdgcn-amdhsa -march=amdgcn -mcpu=tonga -mattr=-unaligned-buffer-access < %s | FileCheck -enable-var-scope -check-prefix=SI-PROMOTE-VECT -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -show-mc-encoding --amdhsa-code-object-version=2 -mattr=-promote-alloca -amdgpu-load-store-vectorizer=0 -enable-amdgpu-aa=0 -verify-machineinstrs -mtriple=amdgcn-amdhsa -march=amdgcn -mcpu=tonga -mattr=-unaligned-buffer-access < %s | FileCheck -enable-var-scope -check-prefix=SI-ALLOCA -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -show-mc-encoding --amdhsa-code-object-version=2 -mattr=-promote-alloca -amdgpu-load-store-vectorizer=0 -enable-amdgpu-aa=0 -verify-machineinstrs -mtriple=amdgcn-amdhsa -mcpu=kaveri -mattr=-unaligned-access-mode < %s | FileCheck -enable-var-scope -check-prefix=SI-ALLOCA -check-prefix=SI -check-prefix=FUNC -check-prefix=HSA-ALLOCA %s
; RUN: llc -show-mc-encoding --amdhsa-code-object-version=2 -mattr=+promote-alloca -disable-promote-alloca-to-vector -amdgpu-load-store-vectorizer=0 -enable-amdgpu-aa=0 -verify-machineinstrs -mtriple=amdgcn-amdhsa -march=amdgcn -mcpu=tonga -mattr=-unaligned-access-mode < %s | FileCheck -enable-var-scope -check-prefix=SI-PROMOTE -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -show-mc-encoding --amdhsa-code-object-version=2 -mattr=+promote-alloca -amdgpu-load-store-vectorizer=0 -enable-amdgpu-aa=0 -verify-machineinstrs -mtriple=amdgcn-amdhsa -march=amdgcn -mcpu=tonga -mattr=-unaligned-access-mode < %s | FileCheck -enable-var-scope -check-prefix=SI-PROMOTE-VECT -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -show-mc-encoding --amdhsa-code-object-version=2 -mattr=-promote-alloca -amdgpu-load-store-vectorizer=0 -enable-amdgpu-aa=0 -verify-machineinstrs -mtriple=amdgcn-amdhsa -march=amdgcn -mcpu=tonga -mattr=-unaligned-access-mode < %s | FileCheck -enable-var-scope -check-prefix=SI-ALLOCA -check-prefix=SI -check-prefix=FUNC %s

; RUN: opt -S -mtriple=amdgcn-unknown-amdhsa -data-layout=A5 -mcpu=kaveri -amdgpu-promote-alloca -disable-promote-alloca-to-vector < %s | FileCheck -enable-var-scope -check-prefix=HSAOPT -check-prefix=OPT %s
; RUN: opt -S -mtriple=amdgcn-unknown-unknown -data-layout=A5 -mcpu=kaveri -amdgpu-promote-alloca -disable-promote-alloca-to-vector < %s | FileCheck -enable-var-scope -check-prefix=NOHSAOPT -check-prefix=OPT %s
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/AMDGPU/chain-hi-to-lo.ll
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX900 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs -amdgpu-enable-flat-scratch < %s | FileCheck -check-prefixes=GCN,FLATSCR %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs -mattr=-unaligned-access-mode < %s | FileCheck -check-prefixes=GCN,GFX900 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs -mattr=-unaligned-access-mode -amdgpu-enable-flat-scratch < %s | FileCheck -check-prefixes=GCN,FLATSCR %s

define <2 x half> @chain_hi_to_lo_private() {
; GFX900-LABEL: chain_hi_to_lo_private:
Expand Down
6 changes: 3 additions & 3 deletions llvm/test/CodeGen/AMDGPU/fast-unaligned-load-store.global.ll
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -mattr=-unaligned-buffer-access < %s | FileCheck -check-prefixes=GCN,GFX7-ALIGNED %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -mattr=+unaligned-buffer-access < %s | FileCheck -check-prefixes=GCN,GFX7-UNALIGNED %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=+unaligned-buffer-access < %s | FileCheck -check-prefixes=GCN,GFX9 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -mattr=-unaligned-access-mode < %s | FileCheck -check-prefixes=GCN,GFX7-ALIGNED %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -mattr=+unaligned-access-mode < %s | FileCheck -check-prefixes=GCN,GFX7-UNALIGNED %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=+unaligned-access-mode < %s | FileCheck -check-prefixes=GCN,GFX9 %s

; Should not merge this to a dword load
define i32 @global_load_2xi16_align2(i16 addrspace(1)* %p) #0 {
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/AMDGPU/unaligned-load-store.ll
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
; RUN: llc -march=amdgcn -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=ALIGNED %s
; RUN: llc -march=amdgcn -mcpu=bonaire -mattr=+unaligned-buffer-access -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=UNALIGNED %s
; RUN: llc -march=amdgcn -mcpu=bonaire -mattr=+unaligned-access-mode -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=UNALIGNED %s
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=ALIGNED %s

; SI-LABEL: {{^}}local_unaligned_load_store_i16:
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# RUN: llc -march=amdgcn -mcpu=hawaii -run-pass=none -o - %s | FileCheck -check-prefix=MCPU %s
# RUN: llc -march=amdgcn -mattr=+unaligned-buffer-access -run-pass=none -o - %s | FileCheck -check-prefix=MATTR %s
# RUN: llc -march=amdgcn -mattr=+unaligned-access-mode -run-pass=none -o - %s | FileCheck -check-prefix=MATTR %s

# FIXME: This overrides attributes that already are present. It should probably
# only touch functions without an existing attribute.
Expand All @@ -10,8 +10,8 @@
# MCPU: attributes #0 = { "target-cpu"="fiji" }
# MCPU: attributes #1 = { "target-cpu"="hawaii" }

# MATTR: attributes #0 = { "target-cpu"="fiji" "target-features"="+unaligned-buffer-access" }
# MATTR: attributes #1 = { "target-features"="+unaligned-buffer-access" }
# MATTR: attributes #0 = { "target-cpu"="fiji" "target-features"="+unaligned-access-mode" }
# MATTR: attributes #1 = { "target-features"="+unaligned-access-mode" }

--- |
define amdgpu_kernel void @with_cpu_attr() #0 {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
# RUN: llc -march=amdgcn -mcpu=hawaii -run-pass=none -o - %s | FileCheck -check-prefix=MCPU %s
# RUN: llc -march=amdgcn -mattr=+unaligned-buffer-access -run-pass=none -o - %s | FileCheck -check-prefix=MATTR %s
# RUN: llc -march=amdgcn -mattr=+unaligned-access-mode -run-pass=none -o - %s | FileCheck -check-prefix=MATTR %s

# The command line arguments for -mcpu and -mattr should manifest themselves by adding the corresponding attributes to the stub IR function.

# MCPU: attributes #0 = { "target-cpu"="hawaii" }
# MATTR: attributes #0 = { "target-features"="+unaligned-buffer-access" }
# MATTR: attributes #0 = { "target-features"="+unaligned-access-mode" }

---
name: no_ir
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
; RUN: opt -S -load-store-vectorizer -mattr=-unaligned-buffer-access,+max-private-element-size-16 < %s | FileCheck -check-prefix=ALIGNED -check-prefix=ALL %s
; RUN: opt -S -load-store-vectorizer -mattr=+unaligned-buffer-access,+unaligned-scratch-access,+max-private-element-size-16 < %s | FileCheck -check-prefix=UNALIGNED -check-prefix=ALL %s
; RUN: opt -S -passes='function(load-store-vectorizer)' -mattr=-unaligned-buffer-access,+max-private-element-size-16 < %s | FileCheck -check-prefix=ALIGNED -check-prefix=ALL %s
; RUN: opt -S -passes='function(load-store-vectorizer)' -mattr=+unaligned-buffer-access,+unaligned-scratch-access,+max-private-element-size-16 < %s | FileCheck -check-prefix=UNALIGNED -check-prefix=ALL %s
; RUN: opt -S -load-store-vectorizer --mcpu=hawaii -mattr=-unaligned-access-mode,+max-private-element-size-16 < %s | FileCheck -check-prefix=ALIGNED -check-prefix=ALL %s
; RUN: opt -S -load-store-vectorizer --mcpu=hawaii -mattr=+unaligned-access-mode,+unaligned-scratch-access,+max-private-element-size-16 < %s | FileCheck -check-prefix=UNALIGNED -check-prefix=ALL %s
; RUN: opt -S -passes='function(load-store-vectorizer)' --mcpu=hawaii -mattr=-unaligned-access-mode,+max-private-element-size-16 < %s | FileCheck -check-prefix=ALIGNED -check-prefix=ALL %s
; RUN: opt -S -passes='function(load-store-vectorizer)' --mcpu=hawaii -mattr=+unaligned-access-mode,+unaligned-scratch-access,+max-private-element-size-16 < %s | FileCheck -check-prefix=UNALIGNED -check-prefix=ALL %s

target triple = "amdgcn--"
target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5"
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
; RUN: opt -mtriple=amdgcn-amd-amdhsa -load-store-vectorizer -S -o - %s | FileCheck %s
; RUN: opt -mtriple=amdgcn-amd-amdhsa --mcpu=hawaii -load-store-vectorizer -S -o - %s | FileCheck %s
; Copy of test/CodeGen/AMDGPU/merge-stores.ll with some additions

target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5"
Expand Down
Loading

0 comments on commit 1cc2ae4

Please sign in to comment.