Skip to content

Commit

Permalink
[AIE2P] Support bfp16 scl2vec intrinsics - backend
Browse files Browse the repository at this point in the history
  • Loading branch information
niwinanto committed Jan 20, 2025
1 parent 8546389 commit c085f62
Show file tree
Hide file tree
Showing 6 changed files with 1,433 additions and 0 deletions.
2 changes: 2 additions & 0 deletions llvm/lib/Target/AIE/aie2p/AIE2PInstrInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -472,6 +472,8 @@ unsigned AIE2PInstrInfo::getOpCode(MachineInstr &I) const {
return isSigned ? AIE2P::VUNPACK_mv_unpack_x_unpackSign1
: AIE2P::VUNPACK_mv_unpack_x_unpackSign0;
}
case Intrinsic::aie2p_vshuffle_576_bfp16:
return AIE2P::VSHUFFLE_vec_shuffle_ex;
default:
llvm_unreachable("Unexpected Intrinsic ID");
}
Expand Down
55 changes: 55 additions & 0 deletions llvm/lib/Target/AIE/aie2p/AIE2PInstructionSelector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,7 @@ class AIE2PInstructionSelector : public AIEBaseInstructionSelector {
bool selectReadTM(MachineInstr &I, MachineRegisterInfo &MRI);
bool selectVUNPACK(MachineInstr &I, MachineRegisterInfo &MRI);
bool selectVPACK(MachineInstr &I, MachineRegisterInfo &MRI);
bool selectVSHUFFLE_BFP(MachineInstr &I, MachineRegisterInfo &MRI);

private:
bool selectImpl(MachineInstr &I,
Expand Down Expand Up @@ -254,6 +255,8 @@ bool AIE2PInstructionSelector::select(MachineInstr &I) {
case Intrinsic::aie2p_v16bf16_to_v16accfloat:
case Intrinsic::aie2p_v32bf16_to_v32accfloat:
return selectVCONV(I, MRI);
case Intrinsic::aie2p_vshuffle_576_bfp16:
return selectVSHUFFLE_BFP(I, MRI);
default:
return selectImpl(I, *CoverageInfo);
}
Expand Down Expand Up @@ -2973,6 +2976,58 @@ AIE2PInstructionSelector::getCombinedOpcodeSRSUPS(
return {};
}

bool AIE2PInstructionSelector ::selectVSHUFFLE_BFP(MachineInstr &I,
MachineRegisterInfo &MRI) {
Register DstMant = I.getOperand(0).getReg();
Register DstExp = I.getOperand(1).getReg();
Register Src1Mant = I.getOperand(3).getReg();
Register Src1Exp = I.getOperand(4).getReg();
Register Src2Mant = I.getOperand(5).getReg();
Register Src2Exp = I.getOperand(6).getReg();
Register Mode = I.getOperand(7).getReg();

unsigned OpCode = TII.getOpCode(I);
Register Src1Reg = MRI.createVirtualRegister(&AIE2P::mEXmRegClass);
MachineInstrBuilder RegSeq1 =
MIB.buildInstr(TargetOpcode::REG_SEQUENCE, {Src1Reg}, {})
.addReg(Src1Mant)
.addImm(AIE2P::sub_bfp16_x)
.addReg(Src1Exp)
.addImm(AIE2P::sub_bfp16_e);
Register Src2Reg = MRI.createVirtualRegister(&AIE2P::mEXnRegClass);
MachineInstrBuilder RegSeq2 =
MIB.buildInstr(TargetOpcode::REG_SEQUENCE, {Src2Reg}, {})
.addReg(Src2Mant)
.addImm(AIE2P::sub_bfp16_x)
.addReg(Src2Exp)
.addImm(AIE2P::sub_bfp16_e);
constrainOperandRegClass(*MF, TRI, MRI, TII, RBI, *RegSeq1,
AIE2P::VEC512RegClass, RegSeq1->getOperand(1));
constrainOperandRegClass(*MF, TRI, MRI, TII, RBI, *RegSeq1,
AIE2P::EXPVEC64RegClass, RegSeq1->getOperand(3));
constrainOperandRegClass(*MF, TRI, MRI, TII, RBI, *RegSeq2,
AIE2P::VEC512RegClass, RegSeq2->getOperand(1));
constrainOperandRegClass(*MF, TRI, MRI, TII, RBI, *RegSeq2,
AIE2P::EXPVEC64RegClass, RegSeq2->getOperand(3));

Register DstReg = MRI.createVirtualRegister(&AIE2P::mEXmRegClass);
MachineInstrBuilder MI =
MIB.buildInstr(OpCode, {DstReg}, {Src1Reg, Src2Reg, Mode});

auto MantCopyMI = MIB.buildInstr(TargetOpcode::COPY, {DstMant}, {})
.addReg(DstReg, 0, AIE2P::sub_bfp16_x);
auto ExpCopyMI = MIB.buildInstr(TargetOpcode::COPY, {DstExp}, {})
.addReg(DstReg, 0, AIE2P::sub_bfp16_e);
constrainOperandRegClass(*MF, TRI, MRI, TII, RBI, *MantCopyMI,
AIE2P::VEC512RegClass, MantCopyMI->getOperand(0));

constrainOperandRegClass(*MF, TRI, MRI, TII, RBI, *ExpCopyMI,
AIE2P::EXPVEC64RegClass, ExpCopyMI->getOperand(0));

I.eraseFromParent();
return constrainSelectedInstRegOperands(*MI, TII, TRI, RBI);
}

namespace llvm {
InstructionSelector *
createAIE2PInstructionSelector(const AIE2PTargetMachine &TM,
Expand Down
3 changes: 3 additions & 0 deletions llvm/lib/Target/AIE/aie2p/AIE2PRegisterBankInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -869,6 +869,9 @@ AIE2PRegisterBankInfo::getRegBankFromRegClass(const TargetRegisterClass &RC,
case AIE2P::eSRegClassID:
case AIE2P::mS2RegClassID:
case AIE2P::mS3RegClassID:
case AIE2P::EXPVEC64RegClassID:
case AIE2P::EXPVEC64_with_sub_hi_exp_in_eEheRegClassID:
case AIE2P::EXPVEC64_with_sub_hi_exp_in_eEhoRegClassID:
return GPRs;
case AIE2P::ePRegClassID:
case AIE2P::eSpecial20RegClassID:
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Target/AIE/aie2p/AIE2PRegisterBanks.td
Original file line number Diff line number Diff line change
Expand Up @@ -14,3 +14,4 @@
include "AIEBaseRegisterBanks.td"
def GPRRegBank : RegisterBank<"GPRRegBank", [eR, eL, eE, EXPVEC64]>;
def AccRegBank : RegisterBank<"AccRegBank", [ACC512, ACC1024, ACC2048]>;
def GPRRegBank : RegisterBank<"GPRRegBank", [eR, eL, eE, EXPVEC64]>;
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
#
# This file is licensed under the Apache License v2.0 with LLVM Exceptions.
# See https://llvm.org/LICENSE.txt for license information.
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
#
# (c) Copyright 2025 Advanced Micro Devices, Inc. or its affiliates
# RUN: llc -mtriple aie2p -run-pass=instruction-select %s -verify-machineinstrs -o - | FileCheck %s

---
name: test_shuffle
alignment: 16
exposesReturnsTwice: false
legalized: true
regBankSelected: true
body: |
bb.1.entry:
liveins: $e1, $e2, $r0, $x1, $x2
; CHECK-LABEL: name: test_shuffle
; CHECK: liveins: $e1, $e2, $r0, $x1, $x2
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vec512 = COPY $x1
; CHECK-NEXT: [[COPY1:%[0-9]+]]:expvec64 = COPY $e1
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vec512 = COPY $x2
; CHECK-NEXT: [[COPY3:%[0-9]+]]:expvec64 = COPY $e2
; CHECK-NEXT: [[COPY4:%[0-9]+]]:er = COPY $r0
; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:mexm = REG_SEQUENCE [[COPY]], %subreg.sub_bfp16_x, [[COPY1]], %subreg.sub_bfp16_e
; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:mexn = REG_SEQUENCE [[COPY2]], %subreg.sub_bfp16_x, [[COPY3]], %subreg.sub_bfp16_e
; CHECK-NEXT: [[VSHUFFLE_vec_shuffle_ex:%[0-9]+]]:mexm = VSHUFFLE_vec_shuffle_ex [[REG_SEQUENCE]], [[REG_SEQUENCE1]], [[COPY4]]
; CHECK-NEXT: [[COPY5:%[0-9]+]]:vec512 = COPY [[VSHUFFLE_vec_shuffle_ex]].sub_bfp16_x
; CHECK-NEXT: [[COPY6:%[0-9]+]]:expvec64 = COPY [[VSHUFFLE_vec_shuffle_ex]].sub_bfp16_e
; CHECK-NEXT: PseudoRET implicit $lr, implicit [[COPY5]], implicit [[COPY6]]
%2:vregbank(<64 x s8>) = COPY $x1
%3:gprregbank(<8 x s8>) = COPY $e1
%4:vregbank(<64 x s8>) = COPY $x2
%5:gprregbank(<8 x s8>) = COPY $e2
%6:gprregbank(s32) = COPY $r0
%7:vregbank(<64 x s8>), %8:gprregbank(<8 x s8>) = G_INTRINSIC intrinsic(@llvm.aie2p.vshuffle.576.bfp16), %2(<64 x s8>), %3(<8 x s8>), %4(<64 x s8>), %5(<8 x s8>), %6(s32)
PseudoRET implicit $lr, implicit %7, implicit %8
...
---
name: test_shuffle_imlicitdef
alignment: 16
exposesReturnsTwice: false
legalized: true
regBankSelected: true
body: |
bb.1.entry:
liveins: $e1, $e2, $r0, $x1, $x2
; CHECK-LABEL: name: test_shuffle_imlicitdef
; CHECK: liveins: $e1, $e2, $r0, $x1, $x2
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vec512 = COPY $x1
; CHECK-NEXT: [[COPY1:%[0-9]+]]:expvec64 = COPY $e1
; CHECK-NEXT: [[DEF:%[0-9]+]]:vec512 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF1:%[0-9]+]]:expvec64 = IMPLICIT_DEF
; CHECK-NEXT: [[COPY2:%[0-9]+]]:er = COPY $r0
; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:mexm = REG_SEQUENCE [[COPY]], %subreg.sub_bfp16_x, [[COPY1]], %subreg.sub_bfp16_e
; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:mexn = REG_SEQUENCE [[DEF]], %subreg.sub_bfp16_x, [[DEF1]], %subreg.sub_bfp16_e
; CHECK-NEXT: [[VSHUFFLE_vec_shuffle_ex:%[0-9]+]]:mexm = VSHUFFLE_vec_shuffle_ex [[REG_SEQUENCE]], [[REG_SEQUENCE1]], [[COPY2]]
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vec512 = COPY [[VSHUFFLE_vec_shuffle_ex]].sub_bfp16_x
; CHECK-NEXT: [[COPY4:%[0-9]+]]:expvec64 = COPY [[VSHUFFLE_vec_shuffle_ex]].sub_bfp16_e
; CHECK-NEXT: PseudoRET implicit $lr, implicit [[COPY3]], implicit [[COPY4]]
%2:vregbank(<64 x s8>) = COPY $x1
%3:gprregbank(<8 x s8>) = COPY $e1
%4:vregbank(<64 x s8>) = G_IMPLICIT_DEF
%5:gprregbank(<8 x s8>) = G_IMPLICIT_DEF
%6:gprregbank(s32) = COPY $r0
%7:vregbank(<64 x s8>), %8:gprregbank(<8 x s8>) = G_INTRINSIC intrinsic(@llvm.aie2p.vshuffle.576.bfp16), %2(<64 x s8>), %3(<8 x s8>), %4(<64 x s8>), %5(<8 x s8>), %6(s32)
PseudoRET implicit $lr, implicit %7, implicit %8
...
Loading

0 comments on commit c085f62

Please sign in to comment.