From 25ca974c672c015eaebc4aeecc5d9b1d30e6ee6c Mon Sep 17 00:00:00 2001 From: Kateryna Muts Date: Wed, 22 Jan 2025 14:04:34 +0000 Subject: [PATCH] [AIE2P] Combine G_SHUFFLE_VECTOR into Extract+Broadcast --- llvm/lib/Target/AIE/AIECombine.td | 9 ++- llvm/lib/Target/AIE/AIECombinerHelper.cpp | 69 +++++++++++++++++-- llvm/lib/Target/AIE/AIECombinerHelper.h | 5 ++ .../prelegalizercombiner-shuffle-vector.mir | 60 ++++++++++++++++ .../prelegalizercombiner-vector-broadcast.mir | 9 +-- 5 files changed, 142 insertions(+), 10 deletions(-) diff --git a/llvm/lib/Target/AIE/AIECombine.td b/llvm/lib/Target/AIE/AIECombine.td index a45d44e50930..376a8c070f30 100644 --- a/llvm/lib/Target/AIE/AIECombine.td +++ b/llvm/lib/Target/AIE/AIECombine.td @@ -83,6 +83,12 @@ def combine_vector_shuffle_vsel : GICombineRule< [{ return matchShuffleToVSel(*${root}, MRI, ${matchinfo}); }]), (apply [{ applyVSel(*${root}, MRI, B, ${matchinfo}); }])>; +def combine_shuffle_to_vextbcst : GICombineRule< + (defs root:$root, build_fn_matchinfo:$matchinfo), + (match (wip_match_opcode G_SHUFFLE_VECTOR): $root, + [{ return matchShuffleToExtractBroadcast(*${root}, MRI, (const AIEBaseInstrInfo &)B.getTII(), ${matchinfo}); }]), + (apply [{ Helper.applyBuildFn(*${root}, ${matchinfo}); }])>; + def AIE2PreLegalizerCombiner : GICombiner<"AIE2PreLegalizerCombinerImpl", [ combine_unpad_vector, combine_pad_vector, all_combines, combine_S20NarrowingOpt, @@ -100,7 +106,8 @@ def AIE2PPreLegalizerCombiner combine_splat_vector, combine_vector_broadcast, combine_concat_to_pad_vector, combine_vector_shuffle_broadcast, - combine_vector_shuffle_vsel]> { + combine_vector_shuffle_vsel, + combine_shuffle_to_vextbcst]> { let CombineAllMethodName = "tryCombineAllImpl"; } diff --git a/llvm/lib/Target/AIE/AIECombinerHelper.cpp b/llvm/lib/Target/AIE/AIECombinerHelper.cpp index f342eaaba16e..2cf1ab48937f 100644 --- a/llvm/lib/Target/AIE/AIECombinerHelper.cpp +++ b/llvm/lib/Target/AIE/AIECombinerHelper.cpp @@ -1086,12 +1086,9 @@ bool llvm::matchSplatVector(MachineInstr &MI, MachineRegisterInfo &MRI, return true; } -bool llvm::applySplatVector(MachineInstr &MI, MachineRegisterInfo &MRI, - MachineIRBuilder &B, - std::pair &MatchInfo) { - B.setInstrAndDebugLoc(MI); +static void buildBroadcastVector(MachineIRBuilder &B, MachineRegisterInfo &MRI, + Register SrcReg, Register DstVecReg) { const AIEBaseInstrInfo &AIETII = (const AIEBaseInstrInfo &)B.getTII(); - auto [DstVecReg, SrcReg] = MatchInfo; const LLT SrcTy = MRI.getType(SrcReg); const LLT DstVecTy = MRI.getType(DstVecReg); const unsigned DstVecSize = DstVecTy.getSizeInBits(); @@ -1146,6 +1143,14 @@ bool llvm::applySplatVector(MachineInstr &MI, MachineRegisterInfo &MRI, DstVec512BitReg, DstVec512BitReg}); } } +} + +bool llvm::applySplatVector(MachineInstr &MI, MachineRegisterInfo &MRI, + MachineIRBuilder &B, + std::pair &MatchInfo) { + B.setInstrAndDebugLoc(MI); + auto [DstVecReg, SrcReg] = MatchInfo; + buildBroadcastVector(B, MRI, SrcReg, DstVecReg); MI.eraseFromParent(); return true; } @@ -1826,3 +1831,57 @@ bool llvm::matchShuffleToVSel( MatchInfo = std::make_tuple(DstReg, Src1Reg, Src2Reg, DstMask); return true; } + +/// This function returns the unique index in the shuffle mask \p Mask if the +/// unique index exists. +static std::optional getUniqueIndex(ArrayRef Mask) { + std::optional UniqOpIdx; + for (unsigned I = 0; I < Mask.size(); I++) { + int Idx = Mask[I]; + if (Idx < 0) + continue; + + if (!UniqOpIdx) { + UniqOpIdx = Idx; + continue; + } + + if (UniqOpIdx != Idx) { + return std::nullopt; + } + } + return UniqOpIdx; +} + +/// \returns true if it is possible to combine a shuffle vector with a mask +/// that extracts the only element from the first source vector and broadcasts +/// it. E.g.: +/// From : %X:_(<4 x s64>) = COPY $wl0 +/// %1:_(<4 x s64>) = COPY $wl1 +/// %2:_(<8 x s64>) = G_SHUFFLE_VECTOR %X(<4 x s64>), %1(<4 x s64>), +/// shufflemask(3, 3, 3, 3, 3, 3, 3, 3) +/// To : %2:_(<8 x s64>) = G_AIE_BROADCAST_VECTOR %X(<4 x s64>) +bool llvm::matchShuffleToExtractBroadcast(MachineInstr &MI, + MachineRegisterInfo &MRI, + const AIEBaseInstrInfo &TII, + BuildFnTy &MatchInfo) { + assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR); + ArrayRef Mask = MI.getOperand(3).getShuffleMask(); + + std::optional UniqOpIdx = getUniqueIndex(Mask); + if (!UniqOpIdx) + return false; + + assert(UniqOpIdx >= 0 && "Couldn't find a unique operand to extract!"); + + const unsigned ExtractOpc = TII.getGenericExtractVectorEltOpcode(true); + + MatchInfo = [=, &MI, &MRI](MachineIRBuilder &B) { + const Register DstReg = MI.getOperand(0).getReg(); + const Register SrcVecReg = MI.getOperand(1).getReg(); + auto Cst = B.buildConstant(LLT::scalar(32), UniqOpIdx.value()); + auto Extr = B.buildInstr(ExtractOpc, {LLT::scalar(32)}, {SrcVecReg, Cst}); + buildBroadcastVector(B, MRI, Extr.getReg(0), DstReg); + }; + return true; +} diff --git a/llvm/lib/Target/AIE/AIECombinerHelper.h b/llvm/lib/Target/AIE/AIECombinerHelper.h index 799fa2866f83..dbfdabf4a6b1 100644 --- a/llvm/lib/Target/AIE/AIECombinerHelper.h +++ b/llvm/lib/Target/AIE/AIECombinerHelper.h @@ -69,6 +69,11 @@ bool matchShuffleToBroadcast(MachineInstr &MI, MachineRegisterInfo &MRI, bool matchShuffleToVSel( MachineInstr &MI, MachineRegisterInfo &MRI, std::tuple &MatchInfo); +/// Combine a shuffle vector with a mask that extracts the only element from +/// the first source vector and broadcasts it. +bool matchShuffleToExtractBroadcast(MachineInstr &MI, MachineRegisterInfo &MRI, + const AIEBaseInstrInfo &TII, + BuildFnTy &MatchInfo); /// \return true if \a MemI can be moved just before \a Dest in order to allow /// post-increment combining bool canDelayMemOp(MachineInstr &MemI, MachineInstr &Dest, diff --git a/llvm/test/CodeGen/AIE/GlobalISel/prelegalizercombiner-shuffle-vector.mir b/llvm/test/CodeGen/AIE/GlobalISel/prelegalizercombiner-shuffle-vector.mir index 844e900bdbb2..f41fe817200c 100644 --- a/llvm/test/CodeGen/AIE/GlobalISel/prelegalizercombiner-shuffle-vector.mir +++ b/llvm/test/CodeGen/AIE/GlobalISel/prelegalizercombiner-shuffle-vector.mir @@ -227,3 +227,63 @@ body: | $x0 = COPY %0(<16 x s32>) PseudoRET implicit $lr, implicit $x0 ... +--- +name: shuffle_vector_to_extract_broadcast_src1 +tracksRegLiveness: true +body: | + bb.1: + liveins: $wl0, $wl1 + + ; CHECK-LABEL: name: shuffle_vector_to_extract_broadcast_src1 + ; CHECK: liveins: $wl0, $wl1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<8 x s32>) = COPY $wl0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[COPY]](<8 x s32>), [[C]](s32) + ; CHECK-NEXT: [[AIE_BROADCAST_VECTOR:%[0-9]+]]:_(<16 x s32>) = G_AIE_BROADCAST_VECTOR [[AIE_SEXT_EXTRACT_VECTOR_ELT]](s32) + ; CHECK-NEXT: [[AIE_UNPAD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_AIE_UNPAD_VECTOR [[AIE_BROADCAST_VECTOR]](<16 x s32>) + ; CHECK-NEXT: PseudoRET implicit $lr, implicit [[AIE_UNPAD_VECTOR]](<8 x s32>) + %0:_(<8 x s32>) = COPY $wl0 + %1:_(<8 x s32>) = COPY $wl1 + %2:_(<8 x s32>) = G_SHUFFLE_VECTOR %0(<8 x s32>), %1(<8 x s32>), shufflemask(2, 2, 2, 2, 2, 2, 2, 2) + PseudoRET implicit $lr, implicit %2 +... +--- +name: shuffle_vector_to_extract_broadcast_undef +tracksRegLiveness: true +body: | + bb.1: + liveins: $wl0, $wl1 + + ; CHECK-LABEL: name: shuffle_vector_to_extract_broadcast_undef + ; CHECK: liveins: $wl0, $wl1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s64>) = COPY $wl0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 + ; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[COPY]](<4 x s64>), [[C]](s32) + ; CHECK-NEXT: [[AIE_BROADCAST_VECTOR:%[0-9]+]]:_(<8 x s64>) = G_AIE_BROADCAST_VECTOR [[AIE_SEXT_EXTRACT_VECTOR_ELT]](s32) + ; CHECK-NEXT: PseudoRET implicit $lr, implicit [[AIE_BROADCAST_VECTOR]](<8 x s64>) + %0:_(<4 x s64>) = COPY $wl0 + %1:_(<4 x s64>) = COPY $wl1 + %2:_(<8 x s64>) = G_SHUFFLE_VECTOR %0(<4 x s64>), %1(<4 x s64>), shufflemask(3, 3, -1, -1, 3, 3, 3, 3) + PseudoRET implicit $lr, implicit %2 +... +--- +name: shuffle_vector_to_extract_broadcast_neg_test +tracksRegLiveness: true +body: | + bb.1: + liveins: $wl0, $wl1 + + ; CHECK-LABEL: name: shuffle_vector_to_extract_broadcast_neg_test + ; CHECK: liveins: $wl0, $wl1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s64>) = COPY $wl0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s64>) = COPY $wl1 + ; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<8 x s64>) = G_SHUFFLE_VECTOR [[COPY]](<4 x s64>), [[COPY1]], shufflemask(3, 2, undef, undef, 3, 3, 3, 3) + ; CHECK-NEXT: PseudoRET implicit $lr, implicit [[SHUF]](<8 x s64>) + %0:_(<4 x s64>) = COPY $wl0 + %1:_(<4 x s64>) = COPY $wl1 + %2:_(<8 x s64>) = G_SHUFFLE_VECTOR %0(<4 x s64>), %1(<4 x s64>), shufflemask(3, 2, -1, -1, 3, 3, 3, 3) + PseudoRET implicit $lr, implicit %2 +... diff --git a/llvm/test/CodeGen/AIE/aie2p/GlobalIsel/prelegalizercombiner-vector-broadcast.mir b/llvm/test/CodeGen/AIE/aie2p/GlobalIsel/prelegalizercombiner-vector-broadcast.mir index 47ef26af5d37..5e57d563aeba 100644 --- a/llvm/test/CodeGen/AIE/aie2p/GlobalIsel/prelegalizercombiner-vector-broadcast.mir +++ b/llvm/test/CodeGen/AIE/aie2p/GlobalIsel/prelegalizercombiner-vector-broadcast.mir @@ -4,7 +4,7 @@ # See https://llvm.org/LICENSE.txt for license information. # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception # -# (c) Copyright 2024 Advanced Micro Devices, Inc. or its affiliates +# (c) Copyright 2024-2025 Advanced Micro Devices, Inc. or its affiliates # RUN: llc -mtriple aie2p -run-pass=aie2p-prelegalizer-combiner %s -verify-machineinstrs -o - | FileCheck %s @@ -102,10 +102,11 @@ body: | ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $r0 ; CHECK-NEXT: [[AIE_BROADCAST_VECTOR:%[0-9]+]]:_(<16 x s32>) = G_AIE_BROADCAST_VECTOR [[COPY]](s32) ; CHECK-NEXT: [[AIE_UNPAD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_AIE_UNPAD_VECTOR [[AIE_BROADCAST_VECTOR]](<16 x s32>) - ; CHECK-NEXT: [[AIE_BROADCAST_VECTOR1:%[0-9]+]]:_(<16 x s32>) = G_AIE_BROADCAST_VECTOR [[COPY]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[AIE_UNPAD_VECTOR]](<8 x s32>), [[C]](s32) + ; CHECK-NEXT: [[AIE_BROADCAST_VECTOR1:%[0-9]+]]:_(<16 x s32>) = G_AIE_BROADCAST_VECTOR [[AIE_SEXT_EXTRACT_VECTOR_ELT]](s32) ; CHECK-NEXT: [[AIE_UNPAD_VECTOR1:%[0-9]+]]:_(<8 x s32>) = G_AIE_UNPAD_VECTOR [[AIE_BROADCAST_VECTOR1]](<16 x s32>) - ; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<8 x s32>) = G_SHUFFLE_VECTOR [[AIE_UNPAD_VECTOR]](<8 x s32>), [[AIE_UNPAD_VECTOR1]], shufflemask(0, 0, 0, 0, 0, 0, 0, 0) - ; CHECK-NEXT: $wl0 = COPY [[SHUF]](<8 x s32>) + ; CHECK-NEXT: $wl0 = COPY [[AIE_UNPAD_VECTOR1]](<8 x s32>) %1:_(s32) = COPY $r0 %2:_(s32) = COPY $r1 %3:_(<8 x s32>) = G_BUILD_VECTOR %1:_(s32), %1:_(s32), %1:_(s32), %1:_(s32), %1:_(s32), %1:_(s32), %1:_(s32), %1:_(s32)