Skip to content

Commit

Permalink
[AIE2P] Combine G_SHUFFLE_VECTOR into Extract+Broadcast
Browse files Browse the repository at this point in the history
  • Loading branch information
katerynamuts committed Jan 23, 2025
1 parent 293d8d8 commit 25ca974
Show file tree
Hide file tree
Showing 5 changed files with 142 additions and 10 deletions.
9 changes: 8 additions & 1 deletion llvm/lib/Target/AIE/AIECombine.td
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,12 @@ def combine_vector_shuffle_vsel : GICombineRule<
[{ return matchShuffleToVSel(*${root}, MRI, ${matchinfo}); }]),
(apply [{ applyVSel(*${root}, MRI, B, ${matchinfo}); }])>;

def combine_shuffle_to_vextbcst : GICombineRule<
(defs root:$root, build_fn_matchinfo:$matchinfo),
(match (wip_match_opcode G_SHUFFLE_VECTOR): $root,
[{ return matchShuffleToExtractBroadcast(*${root}, MRI, (const AIEBaseInstrInfo &)B.getTII(), ${matchinfo}); }]),
(apply [{ Helper.applyBuildFn(*${root}, ${matchinfo}); }])>;

def AIE2PreLegalizerCombiner
: GICombiner<"AIE2PreLegalizerCombinerImpl", [ combine_unpad_vector, combine_pad_vector,
all_combines, combine_S20NarrowingOpt,
Expand All @@ -100,7 +106,8 @@ def AIE2PPreLegalizerCombiner
combine_splat_vector, combine_vector_broadcast,
combine_concat_to_pad_vector,
combine_vector_shuffle_broadcast,
combine_vector_shuffle_vsel]> {
combine_vector_shuffle_vsel,
combine_shuffle_to_vextbcst]> {
let CombineAllMethodName = "tryCombineAllImpl";
}

Expand Down
69 changes: 64 additions & 5 deletions llvm/lib/Target/AIE/AIECombinerHelper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1086,12 +1086,9 @@ bool llvm::matchSplatVector(MachineInstr &MI, MachineRegisterInfo &MRI,
return true;
}

bool llvm::applySplatVector(MachineInstr &MI, MachineRegisterInfo &MRI,
MachineIRBuilder &B,
std::pair<Register, Register> &MatchInfo) {
B.setInstrAndDebugLoc(MI);
static void buildBroadcastVector(MachineIRBuilder &B, MachineRegisterInfo &MRI,
Register SrcReg, Register DstVecReg) {
const AIEBaseInstrInfo &AIETII = (const AIEBaseInstrInfo &)B.getTII();
auto [DstVecReg, SrcReg] = MatchInfo;
const LLT SrcTy = MRI.getType(SrcReg);
const LLT DstVecTy = MRI.getType(DstVecReg);
const unsigned DstVecSize = DstVecTy.getSizeInBits();
Expand Down Expand Up @@ -1146,6 +1143,14 @@ bool llvm::applySplatVector(MachineInstr &MI, MachineRegisterInfo &MRI,
DstVec512BitReg, DstVec512BitReg});
}
}
}

bool llvm::applySplatVector(MachineInstr &MI, MachineRegisterInfo &MRI,
MachineIRBuilder &B,
std::pair<Register, Register> &MatchInfo) {
B.setInstrAndDebugLoc(MI);
auto [DstVecReg, SrcReg] = MatchInfo;
buildBroadcastVector(B, MRI, SrcReg, DstVecReg);
MI.eraseFromParent();
return true;
}
Expand Down Expand Up @@ -1826,3 +1831,57 @@ bool llvm::matchShuffleToVSel(
MatchInfo = std::make_tuple(DstReg, Src1Reg, Src2Reg, DstMask);
return true;
}

/// This function returns the unique index in the shuffle mask \p Mask if the
/// unique index exists.
static std::optional<int> getUniqueIndex(ArrayRef<int> Mask) {
std::optional<int> UniqOpIdx;
for (unsigned I = 0; I < Mask.size(); I++) {
int Idx = Mask[I];
if (Idx < 0)
continue;

if (!UniqOpIdx) {
UniqOpIdx = Idx;
continue;
}

if (UniqOpIdx != Idx) {
return std::nullopt;
}
}
return UniqOpIdx;
}

/// \returns true if it is possible to combine a shuffle vector with a mask
/// that extracts the only element from the first source vector and broadcasts
/// it. E.g.:
/// From : %X:_(<4 x s64>) = COPY $wl0
/// %1:_(<4 x s64>) = COPY $wl1
/// %2:_(<8 x s64>) = G_SHUFFLE_VECTOR %X(<4 x s64>), %1(<4 x s64>),
/// shufflemask(3, 3, 3, 3, 3, 3, 3, 3)
/// To : %2:_(<8 x s64>) = G_AIE_BROADCAST_VECTOR %X(<4 x s64>)
bool llvm::matchShuffleToExtractBroadcast(MachineInstr &MI,
MachineRegisterInfo &MRI,
const AIEBaseInstrInfo &TII,
BuildFnTy &MatchInfo) {
assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();

std::optional<int> UniqOpIdx = getUniqueIndex(Mask);
if (!UniqOpIdx)
return false;

assert(UniqOpIdx >= 0 && "Couldn't find a unique operand to extract!");

const unsigned ExtractOpc = TII.getGenericExtractVectorEltOpcode(true);

MatchInfo = [=, &MI, &MRI](MachineIRBuilder &B) {
const Register DstReg = MI.getOperand(0).getReg();
const Register SrcVecReg = MI.getOperand(1).getReg();
auto Cst = B.buildConstant(LLT::scalar(32), UniqOpIdx.value());
auto Extr = B.buildInstr(ExtractOpc, {LLT::scalar(32)}, {SrcVecReg, Cst});
buildBroadcastVector(B, MRI, Extr.getReg(0), DstReg);
};
return true;
}
5 changes: 5 additions & 0 deletions llvm/lib/Target/AIE/AIECombinerHelper.h
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,11 @@ bool matchShuffleToBroadcast(MachineInstr &MI, MachineRegisterInfo &MRI,
bool matchShuffleToVSel(
MachineInstr &MI, MachineRegisterInfo &MRI,
std::tuple<Register, Register, Register, uint64_t> &MatchInfo);
/// Combine a shuffle vector with a mask that extracts the only element from
/// the first source vector and broadcasts it.
bool matchShuffleToExtractBroadcast(MachineInstr &MI, MachineRegisterInfo &MRI,
const AIEBaseInstrInfo &TII,
BuildFnTy &MatchInfo);
/// \return true if \a MemI can be moved just before \a Dest in order to allow
/// post-increment combining
bool canDelayMemOp(MachineInstr &MemI, MachineInstr &Dest,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -227,3 +227,63 @@ body: |
$x0 = COPY %0(<16 x s32>)
PseudoRET implicit $lr, implicit $x0
...
---
name: shuffle_vector_to_extract_broadcast_src1
tracksRegLiveness: true
body: |
bb.1:
liveins: $wl0, $wl1
; CHECK-LABEL: name: shuffle_vector_to_extract_broadcast_src1
; CHECK: liveins: $wl0, $wl1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<8 x s32>) = COPY $wl0
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[COPY]](<8 x s32>), [[C]](s32)
; CHECK-NEXT: [[AIE_BROADCAST_VECTOR:%[0-9]+]]:_(<16 x s32>) = G_AIE_BROADCAST_VECTOR [[AIE_SEXT_EXTRACT_VECTOR_ELT]](s32)
; CHECK-NEXT: [[AIE_UNPAD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_AIE_UNPAD_VECTOR [[AIE_BROADCAST_VECTOR]](<16 x s32>)
; CHECK-NEXT: PseudoRET implicit $lr, implicit [[AIE_UNPAD_VECTOR]](<8 x s32>)
%0:_(<8 x s32>) = COPY $wl0
%1:_(<8 x s32>) = COPY $wl1
%2:_(<8 x s32>) = G_SHUFFLE_VECTOR %0(<8 x s32>), %1(<8 x s32>), shufflemask(2, 2, 2, 2, 2, 2, 2, 2)
PseudoRET implicit $lr, implicit %2
...
---
name: shuffle_vector_to_extract_broadcast_undef
tracksRegLiveness: true
body: |
bb.1:
liveins: $wl0, $wl1
; CHECK-LABEL: name: shuffle_vector_to_extract_broadcast_undef
; CHECK: liveins: $wl0, $wl1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s64>) = COPY $wl0
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[COPY]](<4 x s64>), [[C]](s32)
; CHECK-NEXT: [[AIE_BROADCAST_VECTOR:%[0-9]+]]:_(<8 x s64>) = G_AIE_BROADCAST_VECTOR [[AIE_SEXT_EXTRACT_VECTOR_ELT]](s32)
; CHECK-NEXT: PseudoRET implicit $lr, implicit [[AIE_BROADCAST_VECTOR]](<8 x s64>)
%0:_(<4 x s64>) = COPY $wl0
%1:_(<4 x s64>) = COPY $wl1
%2:_(<8 x s64>) = G_SHUFFLE_VECTOR %0(<4 x s64>), %1(<4 x s64>), shufflemask(3, 3, -1, -1, 3, 3, 3, 3)
PseudoRET implicit $lr, implicit %2
...
---
name: shuffle_vector_to_extract_broadcast_neg_test
tracksRegLiveness: true
body: |
bb.1:
liveins: $wl0, $wl1
; CHECK-LABEL: name: shuffle_vector_to_extract_broadcast_neg_test
; CHECK: liveins: $wl0, $wl1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s64>) = COPY $wl0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s64>) = COPY $wl1
; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<8 x s64>) = G_SHUFFLE_VECTOR [[COPY]](<4 x s64>), [[COPY1]], shufflemask(3, 2, undef, undef, 3, 3, 3, 3)
; CHECK-NEXT: PseudoRET implicit $lr, implicit [[SHUF]](<8 x s64>)
%0:_(<4 x s64>) = COPY $wl0
%1:_(<4 x s64>) = COPY $wl1
%2:_(<8 x s64>) = G_SHUFFLE_VECTOR %0(<4 x s64>), %1(<4 x s64>), shufflemask(3, 2, -1, -1, 3, 3, 3, 3)
PseudoRET implicit $lr, implicit %2
...
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
# See https://llvm.org/LICENSE.txt for license information.
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
#
# (c) Copyright 2024 Advanced Micro Devices, Inc. or its affiliates
# (c) Copyright 2024-2025 Advanced Micro Devices, Inc. or its affiliates
# RUN: llc -mtriple aie2p -run-pass=aie2p-prelegalizer-combiner %s -verify-machineinstrs -o - | FileCheck %s


Expand Down Expand Up @@ -102,10 +102,11 @@ body: |
; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $r0
; CHECK-NEXT: [[AIE_BROADCAST_VECTOR:%[0-9]+]]:_(<16 x s32>) = G_AIE_BROADCAST_VECTOR [[COPY]](s32)
; CHECK-NEXT: [[AIE_UNPAD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_AIE_UNPAD_VECTOR [[AIE_BROADCAST_VECTOR]](<16 x s32>)
; CHECK-NEXT: [[AIE_BROADCAST_VECTOR1:%[0-9]+]]:_(<16 x s32>) = G_AIE_BROADCAST_VECTOR [[COPY]](s32)
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[AIE_UNPAD_VECTOR]](<8 x s32>), [[C]](s32)
; CHECK-NEXT: [[AIE_BROADCAST_VECTOR1:%[0-9]+]]:_(<16 x s32>) = G_AIE_BROADCAST_VECTOR [[AIE_SEXT_EXTRACT_VECTOR_ELT]](s32)
; CHECK-NEXT: [[AIE_UNPAD_VECTOR1:%[0-9]+]]:_(<8 x s32>) = G_AIE_UNPAD_VECTOR [[AIE_BROADCAST_VECTOR1]](<16 x s32>)
; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<8 x s32>) = G_SHUFFLE_VECTOR [[AIE_UNPAD_VECTOR]](<8 x s32>), [[AIE_UNPAD_VECTOR1]], shufflemask(0, 0, 0, 0, 0, 0, 0, 0)
; CHECK-NEXT: $wl0 = COPY [[SHUF]](<8 x s32>)
; CHECK-NEXT: $wl0 = COPY [[AIE_UNPAD_VECTOR1]](<8 x s32>)
%1:_(s32) = COPY $r0
%2:_(s32) = COPY $r1
%3:_(<8 x s32>) = G_BUILD_VECTOR %1:_(s32), %1:_(s32), %1:_(s32), %1:_(s32), %1:_(s32), %1:_(s32), %1:_(s32), %1:_(s32)
Expand Down

0 comments on commit 25ca974

Please sign in to comment.