[AArch64] Implement NEON FP8 vectors as VectorType #123603

momchil-velikov · 2025-01-20T12:29:57Z

Reimplement Neon FP8 vector types using attribute neon_vector_type instead of having them as builtin types.
This allows to implement FP8 Neon intrinsics without the need to add special cases for these types
when using __builtin_shufflevector or bitcast (using C-style cast operator) between vectors, both
extensively used in the generated code in arm_neon.h.

Co-Aurhored-By: Caroline Concatto <[email protected]>

llvmbot · 2025-01-20T12:30:32Z

@llvm/pr-subscribers-clang
@llvm/pr-subscribers-backend-arm

@llvm/pr-subscribers-backend-aarch64

Author: Momchil Velikov (momchil-velikov)

Changes

Reimplement Neon FP8 vector types using attribute neon_vector_type instead of having them as builtin types.
This allows to implement FP8 Neon intrinsics without the need to add special cases for these types
when using __builtin_shufflevector or bitcast (using C-style cast operator) between vectors, both
extensively used in the generated code in arm_neon.h.

Patch is 47.17 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/123603.diff

19 Files Affected:

(modified) clang/include/clang/AST/Type.h (+5)
(modified) clang/include/clang/Basic/AArch64SVEACLETypes.def (-2)
(modified) clang/include/clang/Basic/TargetBuiltins.h (+3-1)
(modified) clang/lib/AST/ItaniumMangle.cpp (+5)
(modified) clang/lib/CodeGen/CGBuiltin.cpp (+1)
(modified) clang/lib/CodeGen/CGExpr.cpp (+9-2)
(modified) clang/lib/CodeGen/CodeGenTypes.cpp (+3-1)
(modified) clang/lib/CodeGen/Targets/AArch64.cpp (+1-6)
(modified) clang/lib/Sema/SemaARM.cpp (+2)
(modified) clang/lib/Sema/SemaExpr.cpp (+6-1)
(modified) clang/lib/Sema/SemaType.cpp (+2-1)
(added) clang/test/CodeGen/AArch64/builtin-shufflevector-fp8.c (+123)
(added) clang/test/CodeGen/AArch64/fp8-cast.c (+193)
(modified) clang/test/CodeGen/arm-mfp8.c (+53-35)
(modified) clang/test/CodeGenCXX/aarch64-mangle-neon-vectors.cpp (+7)
(modified) clang/test/CodeGenCXX/mangle-neon-vectors.cpp (+11)
(added) clang/test/Sema/aarch64-fp8-cast.c (+104)
(modified) clang/test/Sema/arm-mfp8.cpp (+22-12)
(modified) clang/utils/TableGen/NeonEmitter.cpp (+3-8)

diff --git a/clang/include/clang/AST/Type.h b/clang/include/clang/AST/Type.h
index 3457d524c63aaa..1d9743520654eb 100644
--- a/clang/include/clang/AST/Type.h
+++ b/clang/include/clang/AST/Type.h
@@ -2518,6 +2518,7 @@ class alignas(TypeAlignment) Type : public ExtQualsTypeCommonBase {
   bool isFloat32Type() const;
   bool isDoubleType() const;
   bool isBFloat16Type() const;
+  bool isMFloat8Type() const;
   bool isFloat128Type() const;
   bool isIbm128Type() const;
   bool isRealType() const;         // C99 6.2.5p17 (real floating + integer)
@@ -8537,6 +8538,10 @@ inline bool Type::isBFloat16Type() const {
   return isSpecificBuiltinType(BuiltinType::BFloat16);
 }
 
+inline bool Type::isMFloat8Type() const {
+  return isSpecificBuiltinType(BuiltinType::MFloat8);
+}
+
 inline bool Type::isFloat128Type() const {
   return isSpecificBuiltinType(BuiltinType::Float128);
 }
diff --git a/clang/include/clang/Basic/AArch64SVEACLETypes.def b/clang/include/clang/Basic/AArch64SVEACLETypes.def
index 063cac1f4a58ee..2dd2754e778d60 100644
--- a/clang/include/clang/Basic/AArch64SVEACLETypes.def
+++ b/clang/include/clang/Basic/AArch64SVEACLETypes.def
@@ -201,8 +201,6 @@ SVE_PREDICATE_TYPE_ALL("__clang_svboolx4_t", "svboolx4_t", SveBoolx4, SveBoolx4T
 SVE_OPAQUE_TYPE("__SVCount_t", "__SVCount_t", SveCount, SveCountTy)
 
 AARCH64_VECTOR_TYPE_MFLOAT("__mfp8", "__mfp8", MFloat8, MFloat8Ty, 1, 8, 1)
-AARCH64_VECTOR_TYPE_MFLOAT("__MFloat8x8_t", "__MFloat8x8_t", MFloat8x8, MFloat8x8Ty, 8, 8, 1)
-AARCH64_VECTOR_TYPE_MFLOAT("__MFloat8x16_t", "__MFloat8x16_t", MFloat8x16, MFloat8x16Ty, 16, 8, 1)
 
 #undef SVE_VECTOR_TYPE
 #undef SVE_VECTOR_TYPE_BFLOAT
diff --git a/clang/include/clang/Basic/TargetBuiltins.h b/clang/include/clang/Basic/TargetBuiltins.h
index 4dc8b24ed8ae6c..83ef015018f1a1 100644
--- a/clang/include/clang/Basic/TargetBuiltins.h
+++ b/clang/include/clang/Basic/TargetBuiltins.h
@@ -208,7 +208,8 @@ namespace clang {
       Float16,
       Float32,
       Float64,
-      BFloat16
+      BFloat16,
+      MFloat8
     };
 
     NeonTypeFlags(unsigned F) : Flags(F) {}
@@ -230,6 +231,7 @@ namespace clang {
       switch (getEltType()) {
       case Int8:
       case Poly8:
+      case MFloat8:
         return 8;
       case Int16:
       case Float16:
diff --git a/clang/lib/AST/ItaniumMangle.cpp b/clang/lib/AST/ItaniumMangle.cpp
index 1dd936cf4fb518..9948963d7f44b3 100644
--- a/clang/lib/AST/ItaniumMangle.cpp
+++ b/clang/lib/AST/ItaniumMangle.cpp
@@ -3919,6 +3919,9 @@ void CXXNameMangler::mangleNeonVectorType(const VectorType *T) {
     case BuiltinType::Float:     EltName = "float32_t"; break;
     case BuiltinType::Half:      EltName = "float16_t"; break;
     case BuiltinType::BFloat16:  EltName = "bfloat16_t"; break;
+    case BuiltinType::MFloat8:
+      EltName = "mfloat8_t";
+      break;
     default:
       llvm_unreachable("unexpected Neon vector element type");
     }
@@ -3972,6 +3975,8 @@ static StringRef mangleAArch64VectorBase(const BuiltinType *EltType) {
     return "Float64";
   case BuiltinType::BFloat16:
     return "Bfloat16";
+  case BuiltinType::MFloat8:
+    return "Mfloat8";
   default:
     llvm_unreachable("Unexpected vector element base type");
   }
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index b80833fd91884d..5fd4c3e34ebeba 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -6819,6 +6819,7 @@ static llvm::FixedVectorType *GetNeonType(CodeGenFunction *CGF,
   switch (TypeFlags.getEltType()) {
   case NeonTypeFlags::Int8:
   case NeonTypeFlags::Poly8:
+  case NeonTypeFlags::MFloat8:
     return llvm::FixedVectorType::get(CGF->Int8Ty, V1Ty ? 1 : (8 << IsQuad));
   case NeonTypeFlags::Int16:
   case NeonTypeFlags::Poly16:
diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp
index 9a9a8c7f6eae09..eeff77db2d2684 100644
--- a/clang/lib/CodeGen/CGExpr.cpp
+++ b/clang/lib/CodeGen/CGExpr.cpp
@@ -2414,8 +2414,15 @@ void CodeGenFunction::EmitStoreThroughLValue(RValue Src, LValue Dst,
         Vec = Builder.CreateBitCast(Vec, IRVecTy);
         // iN --> <N x i1>.
       }
-      Vec = Builder.CreateInsertElement(Vec, Src.getScalarVal(),
-                                        Dst.getVectorIdx(), "vecins");
+      llvm::Value *SrcVal = Src.getScalarVal();
+      // Allow inserting `<1 x T>` into an `<N x T>`. It can happen with scalar
+      // types which are mapped to vector LLVM IR types (e.g. for implementing
+      // an ABI).
+      if (auto *EltTy = dyn_cast<llvm::FixedVectorType>(SrcVal->getType());
+          EltTy && EltTy->getNumElements() == 1)
+        SrcVal = Builder.CreateBitCast(SrcVal, EltTy->getElementType());
+      Vec = Builder.CreateInsertElement(Vec, SrcVal, Dst.getVectorIdx(),
+                                        "vecins");
       if (IRStoreTy) {
         // <N x i1> --> <iN>.
         Vec = Builder.CreateBitCast(Vec, IRStoreTy);
diff --git a/clang/lib/CodeGen/CodeGenTypes.cpp b/clang/lib/CodeGen/CodeGenTypes.cpp
index 09191a4901f493..950b23f4e13b99 100644
--- a/clang/lib/CodeGen/CodeGenTypes.cpp
+++ b/clang/lib/CodeGen/CodeGenTypes.cpp
@@ -650,7 +650,9 @@ llvm::Type *CodeGenTypes::ConvertType(QualType T) {
     // An ext_vector_type of Bool is really a vector of bits.
     llvm::Type *IRElemTy = VT->isExtVectorBoolType()
                                ? llvm::Type::getInt1Ty(getLLVMContext())
-                               : ConvertType(VT->getElementType());
+                               : (VT->getElementType()->isMFloat8Type()
+                                      ? llvm::Type::getInt8Ty(getLLVMContext())
+                                      : ConvertType(VT->getElementType()));
     ResultType = llvm::FixedVectorType::get(IRElemTy, VT->getNumElements());
     break;
   }
diff --git a/clang/lib/CodeGen/Targets/AArch64.cpp b/clang/lib/CodeGen/Targets/AArch64.cpp
index 7db67ecba07c8f..c702e79ff8eb98 100644
--- a/clang/lib/CodeGen/Targets/AArch64.cpp
+++ b/clang/lib/CodeGen/Targets/AArch64.cpp
@@ -383,10 +383,6 @@ ABIArgInfo AArch64ABIInfo::classifyArgumentType(QualType Ty, bool IsVariadicFn,
         NSRN = std::min(NSRN + 1, 8u);
       else {
         switch (BT->getKind()) {
-        case BuiltinType::MFloat8x8:
-        case BuiltinType::MFloat8x16:
-          NSRN = std::min(NSRN + 1, 8u);
-          break;
         case BuiltinType::SveBool:
         case BuiltinType::SveCount:
           NPRN = std::min(NPRN + 1, 4u);
@@ -629,8 +625,7 @@ bool AArch64ABIInfo::isHomogeneousAggregateBaseType(QualType Ty) const {
   // but with the difference that any floating-point type is allowed,
   // including __fp16.
   if (const BuiltinType *BT = Ty->getAs<BuiltinType>()) {
-    if (BT->isFloatingPoint() || BT->getKind() == BuiltinType::MFloat8x16 ||
-        BT->getKind() == BuiltinType::MFloat8x8)
+    if (BT->isFloatingPoint())
       return true;
   } else if (const VectorType *VT = Ty->getAs<VectorType>()) {
     if (auto Kind = VT->getVectorKind();
diff --git a/clang/lib/Sema/SemaARM.cpp b/clang/lib/Sema/SemaARM.cpp
index db418d80e0e09c..2620bbc97ba02a 100644
--- a/clang/lib/Sema/SemaARM.cpp
+++ b/clang/lib/Sema/SemaARM.cpp
@@ -352,6 +352,8 @@ static QualType getNeonEltType(NeonTypeFlags Flags, ASTContext &Context,
     return Context.DoubleTy;
   case NeonTypeFlags::BFloat16:
     return Context.BFloat16Ty;
+  case NeonTypeFlags::MFloat8:
+    return Context.MFloat8Ty;
   }
   llvm_unreachable("Invalid NeonTypeFlag!");
 }
diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp
index ae40895980d90a..2087e8b251d8c6 100644
--- a/clang/lib/Sema/SemaExpr.cpp
+++ b/clang/lib/Sema/SemaExpr.cpp
@@ -7502,7 +7502,7 @@ static bool breakDownVectorType(QualType type, uint64_t &len,
   if (const VectorType *vecType = type->getAs<VectorType>()) {
     len = vecType->getNumElements();
     eltType = vecType->getElementType();
-    assert(eltType->isScalarType());
+    assert(eltType->isScalarType() || eltType->isMFloat8Type());
     return true;
   }
 
@@ -10168,6 +10168,11 @@ QualType Sema::CheckVectorOperands(ExprResult &LHS, ExprResult &RHS,
     return HLSL().handleVectorBinOpConversion(LHS, RHS, LHSType, RHSType,
                                               IsCompAssign);
 
+  // Any operation with MFloat8 type is only possible with C intrinsics
+  if ((LHSVecType && LHSVecType->getElementType()->isMFloat8Type()) ||
+      (RHSVecType && RHSVecType->getElementType()->isMFloat8Type()))
+    return InvalidOperands(Loc, LHS, RHS);
+
   // AltiVec-style "vector bool op vector bool" combinations are allowed
   // for some operators but not others.
   if (!AllowBothBool && LHSVecType &&
diff --git a/clang/lib/Sema/SemaType.cpp b/clang/lib/Sema/SemaType.cpp
index 2ccf5a8e1d6f31..33d5378944ddbf 100644
--- a/clang/lib/Sema/SemaType.cpp
+++ b/clang/lib/Sema/SemaType.cpp
@@ -8306,7 +8306,8 @@ static bool isPermittedNeonBaseType(QualType &Ty, VectorKind VecKind, Sema &S) {
          BTy->getKind() == BuiltinType::ULongLong ||
          BTy->getKind() == BuiltinType::Float ||
          BTy->getKind() == BuiltinType::Half ||
-         BTy->getKind() == BuiltinType::BFloat16;
+         BTy->getKind() == BuiltinType::BFloat16 ||
+         BTy->getKind() == BuiltinType::MFloat8;
 }
 
 static bool verifyValidIntegerConstantExpr(Sema &S, const ParsedAttr &Attr,
diff --git a/clang/test/CodeGen/AArch64/builtin-shufflevector-fp8.c b/clang/test/CodeGen/AArch64/builtin-shufflevector-fp8.c
new file mode 100644
index 00000000000000..147ca1d1becc15
--- /dev/null
+++ b/clang/test/CodeGen/AArch64/builtin-shufflevector-fp8.c
@@ -0,0 +1,123 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
+// RUN: %clang_cc1 -triple aarch64-linux -target-feature +neon -disable-O0-optnone -emit-llvm -o - %s | opt -S -passes=mem2reg | FileCheck %s
+
+// REQUIRES: aarch64-registered-target
+
+typedef __attribute__((neon_vector_type(8))) signed char int8x8_t;
+typedef __attribute__((neon_vector_type(16))) signed char int8x16_t;
+
+typedef __attribute__((neon_vector_type(8))) __mfp8 mfloat8x8_t;
+typedef __attribute__((neon_vector_type(16))) __mfp8 mfloat8x16_t;
+
+// CHECK-LABEL: define dso_local <8 x i8> @test_8x8(
+// CHECK-SAME: <8 x i8> [[X:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <8 x i8> [[X]], <8 x i8> [[X]], <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 3, i32 2, i32 1, i32 0>
+// CHECK-NEXT:    ret <8 x i8> [[SHUFFLE]]
+//
+mfloat8x8_t test_8x8(mfloat8x8_t x) {
+  return __builtin_shufflevector(x, x, 3, 2, 1, 0, 3, 2, 1, 0);
+}
+
+// CHECK-LABEL: define dso_local <8 x i8> @test_8x8_v(
+// CHECK-SAME: <8 x i8> [[X:%.*]], <8 x i8> noundef [[P:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[MASK:%.*]] = and <8 x i8> [[P]], splat (i8 7)
+// CHECK-NEXT:    [[SHUF_IDX:%.*]] = extractelement <8 x i8> [[MASK]], i64 0
+// CHECK-NEXT:    [[SHUF_ELT:%.*]] = extractelement <8 x i8> [[X]], i8 [[SHUF_IDX]]
+// CHECK-NEXT:    [[SHUF_INS:%.*]] = insertelement <8 x i8> poison, i8 [[SHUF_ELT]], i64 0
+// CHECK-NEXT:    [[SHUF_IDX1:%.*]] = extractelement <8 x i8> [[MASK]], i64 1
+// CHECK-NEXT:    [[SHUF_ELT2:%.*]] = extractelement <8 x i8> [[X]], i8 [[SHUF_IDX1]]
+// CHECK-NEXT:    [[SHUF_INS3:%.*]] = insertelement <8 x i8> [[SHUF_INS]], i8 [[SHUF_ELT2]], i64 1
+// CHECK-NEXT:    [[SHUF_IDX4:%.*]] = extractelement <8 x i8> [[MASK]], i64 2
+// CHECK-NEXT:    [[SHUF_ELT5:%.*]] = extractelement <8 x i8> [[X]], i8 [[SHUF_IDX4]]
+// CHECK-NEXT:    [[SHUF_INS6:%.*]] = insertelement <8 x i8> [[SHUF_INS3]], i8 [[SHUF_ELT5]], i64 2
+// CHECK-NEXT:    [[SHUF_IDX7:%.*]] = extractelement <8 x i8> [[MASK]], i64 3
+// CHECK-NEXT:    [[SHUF_ELT8:%.*]] = extractelement <8 x i8> [[X]], i8 [[SHUF_IDX7]]
+// CHECK-NEXT:    [[SHUF_INS9:%.*]] = insertelement <8 x i8> [[SHUF_INS6]], i8 [[SHUF_ELT8]], i64 3
+// CHECK-NEXT:    [[SHUF_IDX10:%.*]] = extractelement <8 x i8> [[MASK]], i64 4
+// CHECK-NEXT:    [[SHUF_ELT11:%.*]] = extractelement <8 x i8> [[X]], i8 [[SHUF_IDX10]]
+// CHECK-NEXT:    [[SHUF_INS12:%.*]] = insertelement <8 x i8> [[SHUF_INS9]], i8 [[SHUF_ELT11]], i64 4
+// CHECK-NEXT:    [[SHUF_IDX13:%.*]] = extractelement <8 x i8> [[MASK]], i64 5
+// CHECK-NEXT:    [[SHUF_ELT14:%.*]] = extractelement <8 x i8> [[X]], i8 [[SHUF_IDX13]]
+// CHECK-NEXT:    [[SHUF_INS15:%.*]] = insertelement <8 x i8> [[SHUF_INS12]], i8 [[SHUF_ELT14]], i64 5
+// CHECK-NEXT:    [[SHUF_IDX16:%.*]] = extractelement <8 x i8> [[MASK]], i64 6
+// CHECK-NEXT:    [[SHUF_ELT17:%.*]] = extractelement <8 x i8> [[X]], i8 [[SHUF_IDX16]]
+// CHECK-NEXT:    [[SHUF_INS18:%.*]] = insertelement <8 x i8> [[SHUF_INS15]], i8 [[SHUF_ELT17]], i64 6
+// CHECK-NEXT:    [[SHUF_IDX19:%.*]] = extractelement <8 x i8> [[MASK]], i64 7
+// CHECK-NEXT:    [[SHUF_ELT20:%.*]] = extractelement <8 x i8> [[X]], i8 [[SHUF_IDX19]]
+// CHECK-NEXT:    [[SHUF_INS21:%.*]] = insertelement <8 x i8> [[SHUF_INS18]], i8 [[SHUF_ELT20]], i64 7
+// CHECK-NEXT:    ret <8 x i8> [[SHUF_INS21]]
+//
+mfloat8x8_t test_8x8_v(mfloat8x8_t x, int8x8_t p) {
+  return __builtin_shufflevector(x, p);
+}
+
+// CHECK-LABEL: define dso_local <16 x i8> @test_8x16(
+// CHECK-SAME: <16 x i8> [[X:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <16 x i8> [[X]], <16 x i8> [[X]], <16 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+// CHECK-NEXT:    ret <16 x i8> [[SHUFFLE]]
+//
+mfloat8x16_t test_8x16(mfloat8x16_t x) {
+  return __builtin_shufflevector(x, x, 7, 6, 5, 4, 3, 2, 1, 0, 7, 6, 5, 4, 3, 2,
+                                 1, 0);
+}
+
+// CHECK-LABEL: define dso_local <16 x i8> @test_8x16_v(
+// CHECK-SAME: <16 x i8> [[X:%.*]], <16 x i8> noundef [[P:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[MASK:%.*]] = and <16 x i8> [[P]], splat (i8 15)
+// CHECK-NEXT:    [[SHUF_IDX:%.*]] = extractelement <16 x i8> [[MASK]], i64 0
+// CHECK-NEXT:    [[SHUF_ELT:%.*]] = extractelement <16 x i8> [[X]], i8 [[SHUF_IDX]]
+// CHECK-NEXT:    [[SHUF_INS:%.*]] = insertelement <16 x i8> poison, i8 [[SHUF_ELT]], i64 0
+// CHECK-NEXT:    [[SHUF_IDX1:%.*]] = extractelement <16 x i8> [[MASK]], i64 1
+// CHECK-NEXT:    [[SHUF_ELT2:%.*]] = extractelement <16 x i8> [[X]], i8 [[SHUF_IDX1]]
+// CHECK-NEXT:    [[SHUF_INS3:%.*]] = insertelement <16 x i8> [[SHUF_INS]], i8 [[SHUF_ELT2]], i64 1
+// CHECK-NEXT:    [[SHUF_IDX4:%.*]] = extractelement <16 x i8> [[MASK]], i64 2
+// CHECK-NEXT:    [[SHUF_ELT5:%.*]] = extractelement <16 x i8> [[X]], i8 [[SHUF_IDX4]]
+// CHECK-NEXT:    [[SHUF_INS6:%.*]] = insertelement <16 x i8> [[SHUF_INS3]], i8 [[SHUF_ELT5]], i64 2
+// CHECK-NEXT:    [[SHUF_IDX7:%.*]] = extractelement <16 x i8> [[MASK]], i64 3
+// CHECK-NEXT:    [[SHUF_ELT8:%.*]] = extractelement <16 x i8> [[X]], i8 [[SHUF_IDX7]]
+// CHECK-NEXT:    [[SHUF_INS9:%.*]] = insertelement <16 x i8> [[SHUF_INS6]], i8 [[SHUF_ELT8]], i64 3
+// CHECK-NEXT:    [[SHUF_IDX10:%.*]] = extractelement <16 x i8> [[MASK]], i64 4
+// CHECK-NEXT:    [[SHUF_ELT11:%.*]] = extractelement <16 x i8> [[X]], i8 [[SHUF_IDX10]]
+// CHECK-NEXT:    [[SHUF_INS12:%.*]] = insertelement <16 x i8> [[SHUF_INS9]], i8 [[SHUF_ELT11]], i64 4
+// CHECK-NEXT:    [[SHUF_IDX13:%.*]] = extractelement <16 x i8> [[MASK]], i64 5
+// CHECK-NEXT:    [[SHUF_ELT14:%.*]] = extractelement <16 x i8> [[X]], i8 [[SHUF_IDX13]]
+// CHECK-NEXT:    [[SHUF_INS15:%.*]] = insertelement <16 x i8> [[SHUF_INS12]], i8 [[SHUF_ELT14]], i64 5
+// CHECK-NEXT:    [[SHUF_IDX16:%.*]] = extractelement <16 x i8> [[MASK]], i64 6
+// CHECK-NEXT:    [[SHUF_ELT17:%.*]] = extractelement <16 x i8> [[X]], i8 [[SHUF_IDX16]]
+// CHECK-NEXT:    [[SHUF_INS18:%.*]] = insertelement <16 x i8> [[SHUF_INS15]], i8 [[SHUF_ELT17]], i64 6
+// CHECK-NEXT:    [[SHUF_IDX19:%.*]] = extractelement <16 x i8> [[MASK]], i64 7
+// CHECK-NEXT:    [[SHUF_ELT20:%.*]] = extractelement <16 x i8> [[X]], i8 [[SHUF_IDX19]]
+// CHECK-NEXT:    [[SHUF_INS21:%.*]] = insertelement <16 x i8> [[SHUF_INS18]], i8 [[SHUF_ELT20]], i64 7
+// CHECK-NEXT:    [[SHUF_IDX22:%.*]] = extractelement <16 x i8> [[MASK]], i64 8
+// CHECK-NEXT:    [[SHUF_ELT23:%.*]] = extractelement <16 x i8> [[X]], i8 [[SHUF_IDX22]]
+// CHECK-NEXT:    [[SHUF_INS24:%.*]] = insertelement <16 x i8> [[SHUF_INS21]], i8 [[SHUF_ELT23]], i64 8
+// CHECK-NEXT:    [[SHUF_IDX25:%.*]] = extractelement <16 x i8> [[MASK]], i64 9
+// CHECK-NEXT:    [[SHUF_ELT26:%.*]] = extractelement <16 x i8> [[X]], i8 [[SHUF_IDX25]]
+// CHECK-NEXT:    [[SHUF_INS27:%.*]] = insertelement <16 x i8> [[SHUF_INS24]], i8 [[SHUF_ELT26]], i64 9
+// CHECK-NEXT:    [[SHUF_IDX28:%.*]] = extractelement <16 x i8> [[MASK]], i64 10
+// CHECK-NEXT:    [[SHUF_ELT29:%.*]] = extractelement <16 x i8> [[X]], i8 [[SHUF_IDX28]]
+// CHECK-NEXT:    [[SHUF_INS30:%.*]] = insertelement <16 x i8> [[SHUF_INS27]], i8 [[SHUF_ELT29]], i64 10
+// CHECK-NEXT:    [[SHUF_IDX31:%.*]] = extractelement <16 x i8> [[MASK]], i64 11
+// CHECK-NEXT:    [[SHUF_ELT32:%.*]] = extractelement <16 x i8> [[X]], i8 [[SHUF_IDX31]]
+// CHECK-NEXT:    [[SHUF_INS33:%.*]] = insertelement <16 x i8> [[SHUF_INS30]], i8 [[SHUF_ELT32]], i64 11
+// CHECK-NEXT:    [[SHUF_IDX34:%.*]] = extractelement <16 x i8> [[MASK]], i64 12
+// CHECK-NEXT:    [[SHUF_ELT35:%.*]] = extractelement <16 x i8> [[X]], i8 [[SHUF_IDX34]]
+// CHECK-NEXT:    [[SHUF_INS36:%.*]] = insertelement <16 x i8> [[SHUF_INS33]], i8 [[SHUF_ELT35]], i64 12
+// CHECK-NEXT:    [[SHUF_IDX37:%.*]] = extractelement <16 x i8> [[MASK]], i64 13
+// CHECK-NEXT:    [[SHUF_ELT38:%.*]] = extractelement <16 x i8> [[X]], i8 [[SHUF_IDX37]]
+// CHECK-NEXT:    [[SHUF_INS39:%.*]] = insertelement <16 x i8> [[SHUF_INS36]], i8 [[SHUF_ELT38]], i64 13
+// CHECK-NEXT:    [[SHUF_IDX40:%.*]] = extractelement <16 x i8> [[MASK]], i64 14
+// CHECK-NEXT:    [[SHUF_ELT41:%.*]] = extractelement <16 x i8> [[X]], i8 [[SHUF_IDX40]]
+// CHECK-NEXT:    [[SHUF_INS42:%.*]] = insertelement <16 x i8> [[SHUF_INS39]], i8 [[SHUF_ELT41]], i64 14
+// CHECK-NEXT:    [[SHUF_IDX43:%.*]] = extractelement <16 x i8> [[MASK]], i64 15
+// CHECK-NEXT:    [[SHUF_ELT44:%.*]] = extractelement <16 x i8> [[X]], i8 [[SHUF_IDX43]]
+// CHECK-NEXT:    [[SHUF_INS45:%.*]] = insertelement <16 x i8> [[SHUF_INS42]], i8 [[SHUF_ELT44]], i64 15
+// CHECK-NEXT:    ret <16 x i8> [[SHUF_INS45]]
+//
+mfloat8x16_t test_8x16_v(mfloat8x16_t x, int8x16_t p) {
+  return __builtin_shufflevector(x, p);
+}
diff --git a/clang/test/CodeGen/AArch64/fp8-cast.c b/clang/test/CodeGen/AArch64/fp8-cast.c
new file mode 100644
index 00000000000000..a9ce31b9e6beab
--- /dev/null
+++ b/clang/test/CodeGen/AArch64/fp8-cast.c
@@ -0,0 +1,193 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
+// RUN: %clang_cc1        -triple aarch64-none-linux-gnu -target-feature +neon -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg | FileCheck %s
+// RUN: %clang_cc1 -x c++ -triple aarch64-none-linux-gnu -target-feature +neon -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg | FileCheck %s -check-prefix CHECK-CXX
+
+// RUN: %clang_cc1        -triple aarch64-none-linux-gnu -target-feature +neon -disable-O0-optnone -Werror -Wall -S -o /dev/null %s
+
+// REQUIRES: aarch64-registered-target
+
+#include <arm_neon.h>
+
+// Bitcast between FP8 Neon vectors
+// CHECK-LABEL: define dso_local <8 x i8> @test_f8_f8(
+// CHECK-SAME: <8 x i8> [[X:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    ret <8 x i8> [[X]]
+//
+// CHECK-CXX-LABEL: define dso_local <8 x i8> @_Z10test_f8_f813__Mfloat8x8_t(
+// CHECK-CXX-SAME: <8 x i8> [[X:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-CXX-NEXT:  [[ENTRY:.*:]]
+// CHECK-CXX-NEXT:    ret <8 x i8> [[X]]
+//
+mfloat8x8_t test_f8_f8(mfloat8x8_t x) {
+    return (mfloat8x8_t) x;
+}
+
+// CHECK-LABEL: define dso_local <16 x i8> @testq_f8_f8(
+// CHECK-SAME: <16 x i8> [[X:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    ret <16 x i8> [[X]]
+//
+// CHECK-CXX-LABEL: define dso_local <16 x i8> @_Z11testq_f8_f814__Mfloat8x16_t(
+// CHECK-CXX-SAME: <16 x i8> [[X:%.*]]) #[[ATTR0]] {
+// CHECK-CXX-NEXT:  [[ENTRY:.*:]]
+// CHECK-CXX-NEXT:    ret <16 x i8> [[X]]
+//
+mfloat8x16_t testq_f8_f8(mfloat8x16_t x) {
+    return (mfloat8x16_t) x;
+}
+
+// Bitcast between FP8 and int8 Neon vectors
+// CHECK-LABEL: define dso_local <...
[truncated]

jthackray

LGTM

Implement NEON FP8 vectors as VectorType

bb3a440

Co-Aurhored-By: Caroline Concatto <[email protected]>

momchil-velikov requested review from jthackray, rgwott, Lukacma, CarolineConcatto, efriedma-quic and tmatheson-arm January 20, 2025 12:29

llvmbot added clang Clang issues not falling into any other category backend:ARM backend:AArch64 clang:frontend Language frontend issues, e.g. anything involving "Sema" clang:codegen labels Jan 20, 2025

jthackray approved these changes Jan 20, 2025

View reviewed changes

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

[AArch64] Implement NEON FP8 vectors as VectorType #123603

[AArch64] Implement NEON FP8 vectors as VectorType #123603

momchil-velikov commented Jan 20, 2025

llvmbot commented Jan 20, 2025 •

edited

Loading

jthackray left a comment

[AArch64] Implement NEON FP8 vectors as VectorType #123603

Are you sure you want to change the base?

[AArch64] Implement NEON FP8 vectors as VectorType #123603

Conversation

momchil-velikov commented Jan 20, 2025

llvmbot commented Jan 20, 2025 • edited Loading

jthackray left a comment

Choose a reason for hiding this comment

llvmbot commented Jan 20, 2025 •

edited

Loading