Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 50 additions & 2 deletions clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -269,6 +269,40 @@ static mlir::Value emitX86FunnelShift(CIRGenBuilderTy &builder,
mlir::ValueRange{op0, op1, amt});
}

static mlir::Value emitX86Muldq(CIRGenBuilderTy &builder, mlir::Location loc,
bool isSigned,
SmallVectorImpl<mlir::Value> &ops,
unsigned opTypePrimitiveSizeInBits) {
mlir::Type ty = cir::VectorType::get(builder.getSInt64Ty(),
opTypePrimitiveSizeInBits / 64);
mlir::Value lhs = builder.createBitcast(loc, ops[0], ty);
mlir::Value rhs = builder.createBitcast(loc, ops[1], ty);
if (isSigned) {
cir::ConstantOp shiftAmt =
builder.getConstant(loc, cir::IntAttr::get(builder.getSInt64Ty(), 32));
cir::VecSplatOp shiftSplatVecOp =
cir::VecSplatOp::create(builder, loc, ty, shiftAmt.getResult());
mlir::Value shiftSplatValue = shiftSplatVecOp.getResult();
// In CIR, right-shift operations are automatically lowered to either an
// arithmetic or logical shift depending on the operand type. The purpose
// of the shifts here is to propagate the sign bit of the 32-bit input
// into the upper bits of each vector lane.
lhs = builder.createShift(loc, lhs, shiftSplatValue, true);
lhs = builder.createShift(loc, lhs, shiftSplatValue, false);
rhs = builder.createShift(loc, rhs, shiftSplatValue, true);
rhs = builder.createShift(loc, rhs, shiftSplatValue, false);
} else {
cir::ConstantOp maskScalar = builder.getConstant(
loc, cir::IntAttr::get(builder.getSInt64Ty(), 0xffffffff));
cir::VecSplatOp mask =
cir::VecSplatOp::create(builder, loc, ty, maskScalar.getResult());
// Clear the upper bits
lhs = builder.createAnd(loc, lhs, mask);
rhs = builder.createAnd(loc, rhs, mask);
}
return builder.createMul(loc, lhs, rhs);
}

mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID,
const CallExpr *expr) {
if (builtinID == Builtin::BI__builtin_cpu_is) {
Expand Down Expand Up @@ -1212,12 +1246,26 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID,
case X86::BI__builtin_ia32_sqrtph512:
case X86::BI__builtin_ia32_sqrtps512:
case X86::BI__builtin_ia32_sqrtpd512:
cgm.errorNYI(expr->getSourceRange(),
std::string("unimplemented X86 builtin call: ") +
getContext().BuiltinInfo.getName(builtinID));
return {};
case X86::BI__builtin_ia32_pmuludq128:
case X86::BI__builtin_ia32_pmuludq256:
case X86::BI__builtin_ia32_pmuludq512:
case X86::BI__builtin_ia32_pmuludq512: {
unsigned opTypePrimitiveSizeInBits =
cgm.getDataLayout().getTypeSizeInBits(ops[0].getType());
return emitX86Muldq(builder, getLoc(expr->getExprLoc()), /*isSigned*/ false,
ops, opTypePrimitiveSizeInBits);
}
case X86::BI__builtin_ia32_pmuldq128:
case X86::BI__builtin_ia32_pmuldq256:
case X86::BI__builtin_ia32_pmuldq512:
case X86::BI__builtin_ia32_pmuldq512: {
unsigned opTypePrimitiveSizeInBits =
cgm.getDataLayout().getTypeSizeInBits(ops[0].getType());
return emitX86Muldq(builder, getLoc(expr->getExprLoc()), /*isSigned*/ true,
ops, opTypePrimitiveSizeInBits);
}
case X86::BI__builtin_ia32_pternlogd512_mask:
case X86::BI__builtin_ia32_pternlogq512_mask:
case X86::BI__builtin_ia32_pternlogd128_mask:
Expand Down
52 changes: 52 additions & 0 deletions clang/test/CIR/CodeGenBuiltins/X86/avx2-builtins.c
Original file line number Diff line number Diff line change
Expand Up @@ -51,3 +51,55 @@ __m256i test_mm256_shufflehi_epi16(__m256i a) {
// OGCG: shufflevector <16 x i16> %{{.*}}, <16 x i16> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 7, i32 6, i32 6, i32 5, i32 8, i32 9, i32 10, i32 11, i32 15, i32 14, i32 14, i32 13>
return _mm256_shufflehi_epi16(a, 107);
}

__m256i test_mm256_mul_epu32(__m256i a, __m256i b) {
// CIR-LABEL: _mm256_mul_epu32
// CIR: [[BC_A:%.*]] = cir.cast bitcast %{{.*}} : {{.*}} -> !cir.vector<4 x !s64i>
// CIR: [[BC_B:%.*]] = cir.cast bitcast %{{.*}} : {{.*}} -> !cir.vector<4 x !s64i>
// CIR: [[MASK_SCALAR:%.*]] = cir.const #cir.int<4294967295> : !s64i
// CIR: [[MASK_VEC:%.*]] = cir.vec.splat [[MASK_SCALAR]] : !s64i, !cir.vector<4 x !s64i>
// CIR: [[AND_A:%.*]] = cir.binop(and, [[BC_A]], [[MASK_VEC]])
// CIR: [[AND_B:%.*]] = cir.binop(and, [[BC_B]], [[MASK_VEC]])
// CIR: [[MUL:%.*]] = cir.binop(mul, [[AND_A]], [[AND_B]])

// LLVM-LABEL: _mm256_mul_epu32
// LLVM: and <4 x i64> %{{.*}}, splat (i64 4294967295)
// LLVM: and <4 x i64> %{{.*}}, splat (i64 4294967295)
// LLVM: mul <4 x i64> %{{.*}}, %{{.*}}

// OGCG-LABEL: _mm256_mul_epu32
// OGCG: and <4 x i64> %{{.*}}, splat (i64 4294967295)
// OGCG: and <4 x i64> %{{.*}}, splat (i64 4294967295)
// OGCG: mul <4 x i64> %{{.*}}, %{{.*}}

return _mm256_mul_epu32(a, b);
}

__m256i test_mm256_mul_epi32(__m256i a, __m256i b) {
// CIR-LABEL: _mm256_mul_epi32
// CIR: [[A64:%.*]] = cir.cast bitcast %{{.*}} : {{.*}} -> !cir.vector<4 x !s64i>
// CIR: [[B64:%.*]] = cir.cast bitcast %{{.*}} : {{.*}} -> !cir.vector<4 x !s64i>
// CIR: [[SC:%.*]] = cir.const #cir.int<32> : !s64i
// CIR: [[SV:%.*]] = cir.vec.splat [[SC]] : !s64i, !cir.vector<4 x !s64i>
// CIR: [[SHL_A:%.*]] = cir.shift(left, [[A64]] : !cir.vector<4 x !s64i>, [[SV]] : !cir.vector<4 x !s64i>)
// CIR: [[ASHR_A:%.*]] = cir.shift(right, [[SHL_A]] : !cir.vector<4 x !s64i>, [[SV]] : !cir.vector<4 x !s64i>)
// CIR: [[SHL_B:%.*]] = cir.shift(left, [[B64]] : !cir.vector<4 x !s64i>, [[SV]] : !cir.vector<4 x !s64i>)
// CIR: [[ASHR_B:%.*]] = cir.shift(right, [[SHL_B]] : !cir.vector<4 x !s64i>, [[SV]] : !cir.vector<4 x !s64i>)
// CIR: [[MUL:%.*]] = cir.binop(mul, [[ASHR_A]], [[ASHR_B]])

// LLVM-LABEL: _mm256_mul_epi32
// LLVM: shl <4 x i64> %{{.*}}, splat (i64 32)
// LLVM: ashr <4 x i64> %{{.*}}, splat (i64 32)
// LLVM: shl <4 x i64> %{{.*}}, splat (i64 32)
// LLVM: ashr <4 x i64> %{{.*}}, splat (i64 32)
// LLVM: mul <4 x i64> %{{.*}}, %{{.*}}

// OGCG-LABEL: _mm256_mul_epi32
// OGCG: shl <4 x i64> %{{.*}}, splat (i64 32)
// OGCG: ashr <4 x i64> %{{.*}}, splat (i64 32)
// OGCG: shl <4 x i64> %{{.*}}, splat (i64 32)
// OGCG: ashr <4 x i64> %{{.*}}, splat (i64 32)
// OGCG: mul <4 x i64> %{{.*}}, %{{.*}}

return _mm256_mul_epi32(a, b);
}
52 changes: 52 additions & 0 deletions clang/test/CIR/CodeGenBuiltins/X86/avx512f-builtins.c
Original file line number Diff line number Diff line change
Expand Up @@ -695,3 +695,55 @@ void test_mm512_mask_i64scatter_epi32(void *__addr, __mmask8 __mask, __m512i __i
// OGCG: @llvm.x86.avx512.mask.scatter.qpi.512
return _mm512_mask_i64scatter_epi32(__addr, __mask, __index, __v1, 2);
}

__m512i test_mm512_mul_epi32(__m512i __A, __m512i __B) {
// CIR-LABEL: _mm512_mul_epi32
// CIR: [[A64:%.*]] = cir.cast bitcast %{{.*}} : !cir.vector<16 x !s32i> -> !cir.vector<8 x !s64i>
// CIR: [[B64:%.*]] = cir.cast bitcast %{{.*}} : !cir.vector<16 x !s32i> -> !cir.vector<8 x !s64i>
// CIR: [[SC:%.*]] = cir.const #cir.int<32> : !s64i
// CIR: [[SV:%.*]] = cir.vec.splat [[SC]] : !s64i, !cir.vector<8 x !s64i>
// CIR: [[SHL_A:%.*]] = cir.shift(left, [[A64]] : !cir.vector<8 x !s64i>, [[SV]] : !cir.vector<8 x !s64i>)
// CIR: [[ASHR_A:%.*]] = cir.shift(right, [[SHL_A]] : !cir.vector<8 x !s64i>, [[SV]] : !cir.vector<8 x !s64i>)
// CIR: [[SHL_B:%.*]] = cir.shift(left, [[B64]] : !cir.vector<8 x !s64i>, [[SV]] : !cir.vector<8 x !s64i>)
// CIR: [[ASHR_B:%.*]] = cir.shift(right, [[SHL_B]] : !cir.vector<8 x !s64i>, [[SV]] : !cir.vector<8 x !s64i>)
// CIR: [[MUL:%.*]] = cir.binop(mul, [[ASHR_A]], [[ASHR_B]])

// LLVM-LABEL: _mm512_mul_epi32
// LLVM: shl <8 x i64> %{{.*}}, splat (i64 32)
// LLVM: ashr <8 x i64> %{{.*}}, splat (i64 32)
// LLVM: shl <8 x i64> %{{.*}}, splat (i64 32)
// LLVM: ashr <8 x i64> %{{.*}}, splat (i64 32)
// LLVM: mul <8 x i64> %{{.*}}, %{{.*}}

// OGCG-LABEL: _mm512_mul_epi32
// OGCG: shl <8 x i64> %{{.*}}, splat (i64 32)
// OGCG: ashr <8 x i64> %{{.*}}, splat (i64 32)
// OGCG: shl <8 x i64> %{{.*}}, splat (i64 32)
// OGCG: ashr <8 x i64> %{{.*}}, splat (i64 32)
// OGCG: mul <8 x i64> %{{.*}}, %{{.*}}

return _mm512_mul_epi32(__A, __B);
}

__m512i test_mm512_mul_epu32(__m512i __A, __m512i __B) {
// CIR-LABEL: _mm512_mul_epu32
// CIR: [[BC_A:%.*]] = cir.cast bitcast %{{.*}} : {{.*}} -> !cir.vector<8 x !s64i>
// CIR: [[BC_B:%.*]] = cir.cast bitcast %{{.*}} : {{.*}} -> !cir.vector<8 x !s64i>
// CIR: [[MASK_SCALAR:%.*]] = cir.const #cir.int<4294967295> : !s64i
// CIR: [[MASK_VEC:%.*]] = cir.vec.splat [[MASK_SCALAR]] : !s64i, !cir.vector<8 x !s64i>
// CIR: [[AND_A:%.*]] = cir.binop(and, [[BC_A]], [[MASK_VEC]])
// CIR: [[AND_B:%.*]] = cir.binop(and, [[BC_B]], [[MASK_VEC]])
// CIR: [[MUL:%.*]] = cir.binop(mul, [[AND_A]], [[AND_B]])

// LLVM-LABEL: _mm512_mul_epu32
// LLVM: and <8 x i64> %{{.*}}, splat (i64 4294967295)
// LLVM: and <8 x i64> %{{.*}}, splat (i64 4294967295)
// LLVM: mul <8 x i64> %{{.*}}, %{{.*}}

// OGCG-LABEL: _mm512_mul_epu32
// OGCG: and <8 x i64> %{{.*}}, splat (i64 4294967295)
// OGCG: and <8 x i64> %{{.*}}, splat (i64 4294967295)
// OGCG: mul <8 x i64> %{{.*}}, %{{.*}}

return _mm512_mul_epu32(__A, __B);
}
23 changes: 23 additions & 0 deletions clang/test/CIR/CodeGenBuiltins/X86/sse2-builtins.c
Original file line number Diff line number Diff line change
Expand Up @@ -159,3 +159,26 @@ __m128i test_mm_shuffle_epi32(__m128i A) {
// OGCG: shufflevector <4 x i32> %{{.*}}, <4 x i32> poison, <4 x i32> <i32 2, i32 3, i32 0, i32 1>
return _mm_shuffle_epi32(A, 0x4E);
}

__m128i test_mm_mul_epu32(__m128i A, __m128i B) {
// CIR-LABEL: _mm_mul_epu32
// CIR: [[BC_A:%.*]] = cir.cast bitcast %{{.*}} : {{.*}} -> !cir.vector<2 x !s64i>
// CIR: [[BC_B:%.*]] = cir.cast bitcast %{{.*}} : {{.*}} -> !cir.vector<2 x !s64i>
// CIR: [[MASK_SCALAR:%.*]] = cir.const #cir.int<4294967295> : !s64i
// CIR: [[MASK_VEC:%.*]] = cir.vec.splat [[MASK_SCALAR]] : !s64i, !cir.vector<2 x !s64i>
// CIR: [[AND_A:%.*]] = cir.binop(and, [[BC_A]], [[MASK_VEC]])
// CIR: [[AND_B:%.*]] = cir.binop(and, [[BC_B]], [[MASK_VEC]])
// CIR: [[MUL:%.*]] = cir.binop(mul, [[AND_A]], [[AND_B]])

// LLVM-LABEL: _mm_mul_epu32
// LLVM: and <2 x i64> %{{.*}}, splat (i64 4294967295)
// LLVM: and <2 x i64> %{{.*}}, splat (i64 4294967295)
// LLVM: mul <2 x i64> %{{.*}}, %{{.*}}

// OGCG-LABEL: _mm_mul_epu32
// OGCG: and <2 x i64> %{{.*}}, splat (i64 4294967295)
// OGCG: and <2 x i64> %{{.*}}, splat (i64 4294967295)
// OGCG: mul <2 x i64> %{{.*}}, %{{.*}}

return _mm_mul_epu32(A, B);
}
45 changes: 45 additions & 0 deletions clang/test/CIR/CodeGenBuiltins/X86/sse41-builtins.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +sse4.1 -fclangir -emit-cir -o %t.cir -Wall -Werror -Wsign-conversion
// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +sse4.1 -fclangir -emit-llvm -o %t.ll -Wall -Werror -Wsign-conversion
// RUN: FileCheck --check-prefixes=LLVM --input-file=%t.ll %s

// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +sse4.1 -fclangir -emit-cir -o %t.cir -Wall -Werror -Wsign-conversion
// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +sse4.1 -fclangir -emit-llvm -o %t.ll -Wall -Werror -Wsign-conversion
// RUN: FileCheck --check-prefixes=LLVM --input-file=%t.ll %s

// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +sse4.1 -emit-llvm -o - -Wall -Werror -Wsign-conversion | FileCheck %s --check-prefixes=OGCG
// RUN: %clang_cc1 -x c -flax-vector-conversions=none -fms-extensions -fms-compatibility -ffreestanding %s -triple=x86_64-windows-msvc -target-feature +sse4.1 -emit-llvm -o - -Wall -Werror -Wsign-conversion | FileCheck %s --check-prefixes=OGCG
// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +sse4.1 -emit-llvm -o - -Wall -Werror -Wsign-conversion | FileCheck %s --check-prefixes=OGCG
// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -fms-extensions -fms-compatibility -ffreestanding %s -triple=x86_64-windows-msvc -target-feature +sse4.1 -emit-llvm -o - -Wall -Werror -Wsign-conversion | FileCheck %s --check-prefixes=OGCG

#include <immintrin.h>

__m128i test_mm_mul_epi32(__m128i x, __m128i y) {
// CIR-LABEL: _mm_mul_epi32
// CIR: [[A64:%.*]] = cir.cast bitcast %{{.*}} : {{.*}} -> !cir.vector<2 x !s64i>
// CIR: [[B64:%.*]] = cir.cast bitcast %{{.*}} : {{.*}} -> !cir.vector<2 x !s64i>
// CIR: [[SC:%.*]] = cir.const #cir.int<32> : !s64i
// CIR: [[SV:%.*]] = cir.vec.splat [[SC]] : !s64i, !cir.vector<2 x !s64i>
// CIR: [[SHL_A:%.*]] = cir.shift(left, [[A64]] : !cir.vector<2 x !s64i>, [[SV]] : !cir.vector<2 x !s64i>)
// CIR: [[ASHR_A:%.*]] = cir.shift(right, [[SHL_A]] : !cir.vector<2 x !s64i>, [[SV]] : !cir.vector<2 x !s64i>)
// CIR: [[SHL_B:%.*]] = cir.shift(left, [[B64]] : !cir.vector<2 x !s64i>, [[SV]] : !cir.vector<2 x !s64i>)
// CIR: [[ASHR_B:%.*]] = cir.shift(right, [[SHL_B]] : !cir.vector<2 x !s64i>, [[SV]] : !cir.vector<2 x !s64i>)
// CIR: [[MUL:%.*]] = cir.binop(mul, [[ASHR_A]], [[ASHR_B]])

// LLVM-LABEL: _mm_mul_epi32
// LLVM: shl <2 x i64> %{{.*}}, splat (i64 32)
// LLVM: ashr <2 x i64> %{{.*}}, splat (i64 32)
// LLVM: shl <2 x i64> %{{.*}}, splat (i64 32)
// LLVM: ashr <2 x i64> %{{.*}}, splat (i64 32)
// LLVM: mul <2 x i64> %{{.*}}, %{{.*}}

// OGCG-LABEL: _mm_mul_epi32
// OGCG: shl <2 x i64> %{{.*}}, splat (i64 32)
// OGCG: ashr <2 x i64> %{{.*}}, splat (i64 32)
// OGCG: shl <2 x i64> %{{.*}}, splat (i64 32)
// OGCG: ashr <2 x i64> %{{.*}}, splat (i64 32)
// OGCG: mul <2 x i64> %{{.*}}, %{{.*}}

return _mm_mul_epi32(x, y);
}