-
Notifications
You must be signed in to change notification settings - Fork 15.3k
[X86][Clang] VectorExprEvaluator::VisitCallExpr / InterpretBuiltin - allow AVX512 mask predicate intrinsics to be used in constexpr #165054
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
RKSimon
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Still missing most of header changes + corresponding test coverage
| TEST_CONSTEXPR(_mm_movepi8_mask( | ||
| ((__m128i)(__v16qi){0, 1, char(129), 3, 4, 5, 6, 7, | ||
| 8, 9, 10, 11, 12, 13, 14, 15}) | ||
| ) == (__mmask16)0x0004); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Just put the whole test on a single line - 80col isn't helping us here
|
@llvm/pr-subscribers-clang Author: None (sskzakaria) ChangesEnables constexpr evaluation for the following AVX512 Instrinsics: Part 1 Patch is 20.18 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/165054.diff 11 Files Affected:
diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td
index 500aa85fe5356..4501c15c375f8 100644
--- a/clang/include/clang/Basic/BuiltinsX86.td
+++ b/clang/include/clang/Basic/BuiltinsX86.td
@@ -2533,24 +2533,28 @@ let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256
def rsqrt14ps256_mask : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>, unsigned char)">;
}
-let Features = "avx512bw", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+let Features = "avx512bw",
+ Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
def cvtb2mask512 : X86Builtin<"unsigned long long int(_Vector<64, char>)">;
def cvtmask2b512 : X86Builtin<"_Vector<64, char>(unsigned long long int)">;
def cvtmask2w512 : X86Builtin<"_Vector<32, short>(unsigned int)">;
}
-let Features = "avx512dq", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+let Features = "avx512dq",
+ Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
def cvtd2mask512 : X86Builtin<"unsigned short(_Vector<16, int>)">;
def cvtmask2d512 : X86Builtin<"_Vector<16, int>(unsigned short)">;
def cvtmask2q512 : X86Builtin<"_Vector<8, long long int>(unsigned char)">;
def cvtq2mask512 : X86Builtin<"unsigned char(_Vector<8, long long int>)">;
}
-let Features = "avx512bw,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+let Features = "avx512bw,avx512vl",
+ Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
def cvtb2mask128 : X86Builtin<"unsigned short(_Vector<16, char>)">;
}
-let Features = "avx512bw,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+let Features = "avx512bw,avx512vl",
+ Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
def cvtb2mask256 : X86Builtin<"unsigned int(_Vector<32, char>)">;
}
@@ -2570,11 +2574,13 @@ let Features = "avx512bw,avx512vl", Attributes = [NoThrow, Const, RequiredVector
def cvtmask2w256 : X86Builtin<"_Vector<16, short>(unsigned short)">;
}
-let Features = "avx512dq,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+let Features = "avx512dq,avx512vl",
+ Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
def cvtd2mask128 : X86Builtin<"unsigned char(_Vector<4, int>)">;
}
-let Features = "avx512dq,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+let Features = "avx512dq,avx512vl",
+ Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
def cvtd2mask256 : X86Builtin<"unsigned char(_Vector<8, int>)">;
}
@@ -2594,11 +2600,13 @@ let Features = "avx512dq,avx512vl", Attributes = [NoThrow, Const, RequiredVector
def cvtmask2q256 : X86Builtin<"_Vector<4, long long int>(unsigned char)">;
}
-let Features = "avx512dq,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+let Features = "avx512dq,avx512vl",
+ Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
def cvtq2mask128 : X86Builtin<"unsigned char(_Vector<2, long long int>)">;
}
-let Features = "avx512dq,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+let Features = "avx512dq,avx512vl",
+ Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
def cvtq2mask256 : X86Builtin<"unsigned char(_Vector<4, long long int>)">;
}
@@ -3392,15 +3400,18 @@ let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256
def vcvtps2ph256_mask : X86Builtin<"_Vector<8, short>(_Vector<8, float>, _Constant int, _Vector<8, short>, unsigned char)">;
}
-let Features = "avx512bw", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+let Features = "avx512bw",
+ Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
def cvtw2mask512 : X86Builtin<"unsigned int(_Vector<32, short>)">;
}
-let Features = "avx512bw,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+let Features = "avx512bw,avx512vl",
+ Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
def cvtw2mask128 : X86Builtin<"unsigned char(_Vector<8, short>)">;
}
-let Features = "avx512bw,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+let Features = "avx512bw,avx512vl",
+ Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
def cvtw2mask256 : X86Builtin<"unsigned short(_Vector<16, short>)">;
}
diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index b3ab82da5e01a..3576b37b44b18 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -3377,6 +3377,25 @@ static bool interp__builtin_ia32_vpconflict(InterpState &S, CodePtr OpPC,
return true;
}
+static bool interp__builtin_ia32_cvt_mask(InterpState &S, CodePtr OpPC,
+ const CallExpr *Call, unsigned ID) {
+ assert(Call->getNumArgs() == 1);
+
+ const Pointer &Vec = S.Stk.pop<Pointer>();
+ APInt RetMask(Vec.getNumElems(), 0);
+ unsigned VectorLen = Vec.getNumElems();
+ PrimType ElemT = Vec.getFieldDesc()->getPrimType();
+
+ for (unsigned ElemNum = 0; ElemNum < VectorLen; ++ElemNum) {
+ APSInt A;
+ INT_TYPE_SWITCH_NO_BOOL(ElemT, { A = Vec.elem<T>(ElemNum).toAPSInt(); });
+ unsigned MSB = A[A.getBitWidth() - 1];
+ RetMask.setBitVal(ElemNum, MSB);
+ }
+ pushInteger(S, RetMask, Call->getType());
+ return true;
+}
+
static bool interp__builtin_x86_byteshift(
InterpState &S, CodePtr OpPC, const CallExpr *Call, unsigned ID,
llvm::function_ref<APInt(const Pointer &, unsigned Lane, unsigned I,
@@ -4542,6 +4561,20 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
case X86::BI__builtin_ia32_vec_set_v4di:
return interp__builtin_vec_set(S, OpPC, Call, BuiltinID);
+ case X86::BI__builtin_ia32_cvtb2mask128:
+ case X86::BI__builtin_ia32_cvtb2mask256:
+ case X86::BI__builtin_ia32_cvtb2mask512:
+ case X86::BI__builtin_ia32_cvtw2mask128:
+ case X86::BI__builtin_ia32_cvtw2mask256:
+ case X86::BI__builtin_ia32_cvtw2mask512:
+ case X86::BI__builtin_ia32_cvtd2mask128:
+ case X86::BI__builtin_ia32_cvtd2mask256:
+ case X86::BI__builtin_ia32_cvtd2mask512:
+ case X86::BI__builtin_ia32_cvtq2mask128:
+ case X86::BI__builtin_ia32_cvtq2mask256:
+ case X86::BI__builtin_ia32_cvtq2mask512:
+ return interp__builtin_ia32_cvt_mask(S, OpPC, Call, BuiltinID);
+
case X86::BI__builtin_ia32_cmpb128_mask:
case X86::BI__builtin_ia32_cmpw128_mask:
case X86::BI__builtin_ia32_cmpd128_mask:
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index d0404b957ab03..46b3baa42e2fe 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -15767,6 +15767,33 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E,
return Success(Vec.getVectorElt(Idx).getInt(), E);
}
+ case clang::X86::BI__builtin_ia32_cvtb2mask128:
+ case clang::X86::BI__builtin_ia32_cvtb2mask256:
+ case clang::X86::BI__builtin_ia32_cvtb2mask512:
+ case clang::X86::BI__builtin_ia32_cvtw2mask128:
+ case clang::X86::BI__builtin_ia32_cvtw2mask256:
+ case clang::X86::BI__builtin_ia32_cvtw2mask512:
+ case clang::X86::BI__builtin_ia32_cvtd2mask128:
+ case clang::X86::BI__builtin_ia32_cvtd2mask256:
+ case clang::X86::BI__builtin_ia32_cvtd2mask512:
+ case clang::X86::BI__builtin_ia32_cvtq2mask128:
+ case clang::X86::BI__builtin_ia32_cvtq2mask256:
+ case clang::X86::BI__builtin_ia32_cvtq2mask512: {
+ assert(E->getNumArgs() == 1);
+ APValue Vec;
+ if (!EvaluateVector(E->getArg(0), Vec, Info))
+ return false;
+
+ unsigned VectorLen = Vec.getVectorLength();
+ APSInt RetMask(llvm::APInt(VectorLen, 0), /*isUnsigned=*/true);
+ for (unsigned ElemNum = 0; ElemNum < VectorLen; ++ElemNum) {
+ const APSInt &A = Vec.getVectorElt(ElemNum).getInt();
+ unsigned MSB = A[A.getBitWidth() - 1];
+ RetMask.setBitVal(ElemNum, MSB);
+ }
+ return Success(APValue(RetMask), E);
+ }
+
case clang::X86::BI__builtin_ia32_cmpb128_mask:
case clang::X86::BI__builtin_ia32_cmpw128_mask:
case clang::X86::BI__builtin_ia32_cmpd128_mask:
diff --git a/clang/lib/Headers/avx512bwintrin.h b/clang/lib/Headers/avx512bwintrin.h
index ac75b6ccde735..87687fcb0bf7f 100644
--- a/clang/lib/Headers/avx512bwintrin.h
+++ b/clang/lib/Headers/avx512bwintrin.h
@@ -1776,15 +1776,13 @@ _mm512_mask_testn_epi16_mask (__mmask32 __U, __m512i __A, __m512i __B)
_mm512_setzero_si512());
}
-static __inline__ __mmask64 __DEFAULT_FN_ATTRS512
-_mm512_movepi8_mask (__m512i __A)
-{
+static __inline__ __mmask64 __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_movepi8_mask(__m512i __A) {
return (__mmask64) __builtin_ia32_cvtb2mask512 ((__v64qi) __A);
}
-static __inline__ __mmask32 __DEFAULT_FN_ATTRS512
-_mm512_movepi16_mask (__m512i __A)
-{
+static __inline__ __mmask32 __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_movepi16_mask(__m512i __A) {
return (__mmask32) __builtin_ia32_cvtw2mask512 ((__v32hi) __A);
}
diff --git a/clang/lib/Headers/avx512dqintrin.h b/clang/lib/Headers/avx512dqintrin.h
index fef1a2d64d538..bcdaea5a75ddd 100644
--- a/clang/lib/Headers/avx512dqintrin.h
+++ b/clang/lib/Headers/avx512dqintrin.h
@@ -1052,9 +1052,8 @@ _mm512_maskz_cvtepu64_ps (__mmask8 __U, __m512i __A) {
(__v2df)_mm_setzero_pd(), \
(__mmask8)(U), (int)(C), (int)(R)))
-static __inline__ __mmask16 __DEFAULT_FN_ATTRS512
-_mm512_movepi32_mask (__m512i __A)
-{
+static __inline__ __mmask16 __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_movepi32_mask(__m512i __A) {
return (__mmask16) __builtin_ia32_cvtd2mask512 ((__v16si) __A);
}
@@ -1070,9 +1069,8 @@ _mm512_movm_epi64 (__mmask8 __A)
return (__m512i) __builtin_ia32_cvtmask2q512 (__A);
}
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS512
-_mm512_movepi64_mask (__m512i __A)
-{
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_movepi64_mask(__m512i __A) {
return (__mmask8) __builtin_ia32_cvtq2mask512 ((__v8di) __A);
}
diff --git a/clang/lib/Headers/avx512vlbwintrin.h b/clang/lib/Headers/avx512vlbwintrin.h
index 263a1079b26d5..c46b1d49515b5 100644
--- a/clang/lib/Headers/avx512vlbwintrin.h
+++ b/clang/lib/Headers/avx512vlbwintrin.h
@@ -2488,27 +2488,23 @@ _mm256_mask_testn_epi16_mask (__mmask16 __U, __m256i __A, __m256i __B)
_mm256_setzero_si256());
}
-static __inline__ __mmask16 __DEFAULT_FN_ATTRS128
-_mm_movepi8_mask (__m128i __A)
-{
+static __inline__ __mmask16 __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_movepi8_mask(__m128i __A) {
return (__mmask16) __builtin_ia32_cvtb2mask128 ((__v16qi) __A);
}
-static __inline__ __mmask32 __DEFAULT_FN_ATTRS256
-_mm256_movepi8_mask (__m256i __A)
-{
+static __inline__ __mmask32 __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_movepi8_mask(__m256i __A) {
return (__mmask32) __builtin_ia32_cvtb2mask256 ((__v32qi) __A);
}
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
-_mm_movepi16_mask (__m128i __A)
-{
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_movepi16_mask(__m128i __A) {
return (__mmask8) __builtin_ia32_cvtw2mask128 ((__v8hi) __A);
}
-static __inline__ __mmask16 __DEFAULT_FN_ATTRS256
-_mm256_movepi16_mask (__m256i __A)
-{
+static __inline__ __mmask16 __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_movepi16_mask(__m256i __A) {
return (__mmask16) __builtin_ia32_cvtw2mask256 ((__v16hi) __A);
}
diff --git a/clang/lib/Headers/avx512vldqintrin.h b/clang/lib/Headers/avx512vldqintrin.h
index 707d039cf4c07..8014ee4bf555a 100644
--- a/clang/lib/Headers/avx512vldqintrin.h
+++ b/clang/lib/Headers/avx512vldqintrin.h
@@ -914,15 +914,13 @@ _mm256_maskz_cvtepu64_ps(__mmask8 __U, __m256i __A) {
(__v8sf)_mm256_setzero_ps(), \
(__mmask8)(U)))
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
-_mm_movepi32_mask (__m128i __A)
-{
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_movepi32_mask(__m128i __A) {
return (__mmask8) __builtin_ia32_cvtd2mask128 ((__v4si) __A);
}
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS256
-_mm256_movepi32_mask (__m256i __A)
-{
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_movepi32_mask(__m256i __A) {
return (__mmask8) __builtin_ia32_cvtd2mask256 ((__v8si) __A);
}
@@ -950,15 +948,13 @@ _mm256_movm_epi64 (__mmask8 __A)
return (__m256i) __builtin_ia32_cvtmask2q256 (__A);
}
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
-_mm_movepi64_mask (__m128i __A)
-{
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_movepi64_mask(__m128i __A) {
return (__mmask8) __builtin_ia32_cvtq2mask128 ((__v2di) __A);
}
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS256
-_mm256_movepi64_mask (__m256i __A)
-{
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_movepi64_mask(__m256i __A) {
return (__mmask8) __builtin_ia32_cvtq2mask256 ((__v4di) __A);
}
diff --git a/clang/test/CodeGen/X86/avx512bw-builtins.c b/clang/test/CodeGen/X86/avx512bw-builtins.c
index be2cd480f7558..a6671451c4527 100644
--- a/clang/test/CodeGen/X86/avx512bw-builtins.c
+++ b/clang/test/CodeGen/X86/avx512bw-builtins.c
@@ -2537,6 +2537,18 @@ __mmask64 test_mm512_movepi8_mask(__m512i __A) {
return _mm512_movepi8_mask(__A);
}
+TEST_CONSTEXPR(_mm512_movepi8_mask(
+ ((__m512i)(__v64qi){0, 1, char(129), 3, 4, 5, 6, 7,
+ 8, 9, 10, 11, 12, 13, 14, 15,
+ 16, 17, 18, 19, 20, 21, 22, 23,
+ 24, 25, 26, 27, 28, 29, 30, 31,
+ 32, 33, 34, 35, 36, 37, 38, 39,
+ 40, 41, 42, 43, 44, 45, 46, 47,
+ 48, 49, 50, 51, 52, 53, 54, 55,
+ 56, 57, 58, 59, 60, 61, 62, char(255)})
+) == (__mmask64)0x8000000000000004);
+
+
__m512i test_mm512_movm_epi8(__mmask64 __A) {
// CHECK-LABEL: test_mm512_movm_epi8
// CHECK: %{{.*}} = bitcast i64 %{{.*}} to <64 x i1>
@@ -2544,6 +2556,18 @@ __m512i test_mm512_movm_epi8(__mmask64 __A) {
return _mm512_movm_epi8(__A);
}
+TEST_CONSTEXPR(_mm512_movepi8_mask(
+ ((__m512i)(__v64qi){0, 1, char(129), 3, 4, 5, 6, 7,
+ 8, 9, 10, 11, 12, 13, 14, 15,
+ 16, 17, 18, 19, 20, 21, 22, 23,
+ 24, 25, 26, 27, 28, 29, 30, 31,
+ 32, 33, 34, 35, 36, 37, 38, 39,
+ 40, 41, 42, 43, 44, 45, 46, 47,
+ 48, 49, 50, 51, 52, 53, 54, 55,
+ 56, 57, 58, 59, 60, 61, 62, char(255)})
+) == (__mmask64)0x8000000000000004);
+
+
__m512i test_mm512_movm_epi16(__mmask32 __A) {
// CHECK-LABEL: test_mm512_movm_epi16
// CHECK: %{{.*}} = bitcast i32 %{{.*}} to <32 x i1>
@@ -2741,6 +2765,13 @@ __mmask32 test_mm512_movepi16_mask(__m512i __A) {
return _mm512_movepi16_mask(__A);
}
+TEST_CONSTEXPR(_mm512_movepi16_mask(
+ ((__m512i)(__v32hi){0, 1, short(32768), 3, 4, 5, 6, 7,
+ 8, 9, 10, 11, 12, 13, 14, 15,
+ 16, 17, 18, 19, 20, 21, 22, 23,
+ 24, 25, 26, 27, 28, 29, 30, short(32768)})
+) == (__mmask32)0x80000004);
+
void test_mm512_mask_cvtepi16_storeu_epi8 (void * __P, __mmask32 __M, __m512i __A)
{
// CHECK-LABEL: test_mm512_mask_cvtepi16_storeu_epi8
diff --git a/clang/test/CodeGen/X86/avx512dq-builtins.c b/clang/test/CodeGen/X86/avx512dq-builtins.c
index 9c4ada3a2b7b8..ef34326a9ef91 100644
--- a/clang/test/CodeGen/X86/avx512dq-builtins.c
+++ b/clang/test/CodeGen/X86/avx512dq-builtins.c
@@ -1251,6 +1251,8 @@ __mmask16 test_mm512_movepi32_mask(__m512i __A) {
return _mm512_movepi32_mask(__A);
}
+TEST_CONSTEXPR(_mm512_movepi32_mask(((__m512i)(__v16si){0, 1, -1, 3, 4, 5, 6, 7,8, 9, 10, 11, 12, 13, 14, -1})) == (__mmask16)0x8004);
+
__m512i test_mm512_movm_epi32(__mmask16 __A) {
// CHECK-LABEL: test_mm512_movm_epi32
// CHECK: %{{.*}} = bitcast i16 %{{.*}} to <16 x i1>
@@ -1271,6 +1273,8 @@ __mmask8 test_mm512_movepi64_mask(__m512i __A) {
return _mm512_movepi64_mask(__A);
}
+TEST_CONSTEXPR(_mm512_movepi64_mask(((__m512i)(__v8di){0, 1, -1, 3, 4, 5, 6, -1})) == (__mmask8)0x84);
+
__m512 test_mm512_broadcast_f32x2(__m128 __A) {
// CHECK-LABEL: test_mm512_broadcast_f32x2
// CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
diff --git a/clang/test/CodeGen/X86/avx512vlbw-builtins.c b/clang/test/CodeGen/X86/avx512vlbw-builtins.c
index febef46458ae9..ab6288d72ee44 100644
--- a/clang/test/CodeGen/X86/avx512vlbw-builtins.c
+++ b/clang/test/CodeGen/X86/avx512vlbw-builtins.c
@@ -3051,12 +3051,22 @@ __mmask16 test_mm_movepi8_mask(__m128i __A) {
return _mm_movepi8_mask(__A);
}
+TEST_CONSTEXPR(_mm_movepi8_mask(
+ ((__m128i)(__v16qi){0, 1, char(129), 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15})) == (__mmask16)0x0004);
+
__mmask32 test_mm256_movepi8_mask(__m256i __A) {
// CHECK-LABEL: test_mm256_movepi8_mask
// CHECK: [[CMP:%.*]] = icmp slt <32 x i8> %{{.*}}, zeroinitializer
return _mm256_movepi8_mask(__A);
}
+TEST_CONSTEXPR(_mm256_movepi8_mask(
+ ((__m256i)(__v32qi){0, 1, char(255), 3, 4, 5, 6, 7,
+ 8, 9, 10, 11, 12, 13, 14, 15,
+ 16, 17, 18, 19, 20, 21, 22, 23,
+ 24, 25, 26, 27, 28, 29, 30, char(128)})
+) == (__mmask32)0x80000004);
+
__m128i test_mm_movm_epi8(__mmask16 __A) {
// CHECK-LABEL: test_mm_movm_epi8
// CHECK: %{{.*}} = bitcast i16 %{{.*}} to <16 x i1>
@@ -3442,6 +3452,7 @@ __mmask8 test_mm_movepi16_mask(__m128i __A) {
// CHECK: [[CMP:%.*]] = icmp slt <8 x i16> %{{.*}}, zeroinitializer
return _mm_movepi16_mask(__A);
}
+TEST_CONSTEXPR(_mm_movepi16_mask(((__m128i)(__v8hi){0, 1, -1, 3, 4, 5, 6, 7})) == (__mmask8)0x04);
__mmask16 test_mm256_movepi16_mask(__m256i __A) {
// CHECK-LABEL: test_mm256_movepi16_mask
@@ -3449,6 +3460,9 @@ __mmask16 test_mm256_movepi16_mask(__m256i __A) {
return _mm256_movepi16_mask(__A);
}
+TEST_CONSTEXPR(_mm256_movepi16_mask(
+ ((__m256i)(__v16hi){0, 1, short(32769), 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, short(65535)})) == (__mmask16)0x8004);
+
__m128i test_mm_mask_shufflehi_epi16(__m128i __W, __mmask8 __U, __m128i __A) {
// CHECK-LABEL: test_mm_mask_shufflehi_epi16
// CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 5, i32 4, i32 4>
diff --git a/clang/test/CodeGen/X86/avx512vldq-builtins.c b/clang/test/CodeGen/X86/avx512vldq-builtins.c
index 1bfc0229eeb26..92d8e1aa0879a 100644
--- a/clang/test/CodeGen/X86/avx512vldq-builtins.c
+++ b/clang/test/CodeGen/X86/avx512vldq-builtins.c
@@ -921,12 +921,21 @@ __mmask8 test_mm_movepi32_mask(__m128i __A) {
return _mm_movepi32_mask(__A);
}
+TEST_CONSTEXPR(_mm512_movepi16_mask(
+ ((__m512i)(__v32hi){0, 1, short(32768), 3, 4, 5, 6, 7,
+ 8, 9, 10, 11, 12, 13, 14, 15,
+ 16, 17, 18, 19, 20, 21, 22, 23,
+ 24, 25, 26, 27, 28, 29, 30, short(32768)})
+) == (__mmask32)0x80000004);
+
__mmask8 test_mm256_movepi32_mask(__m256i __A) {
// CHECK-LABEL: test_mm256_movepi32_mask
// CHECK: [[CMP:%.*]] = icmp slt <8 x i32> %{{.*}}, zeroinitializer
return _mm256_movepi32_mask(__A);
}
+TEST_CONSTEXPR(_mm256_movepi32_mask(((__m256i)(__v8si){0, 1, -1, 3, 4, 5, 6, -2147483648})) == (__mmask8)0x84);
+
__m128i test_mm_movm_epi32(__mmask8 __A) {
// CHECK-LABEL: test_mm_movm_epi32
// CHECK: %{{.*}} = bitcast i8 %{{.*}} to <8 x i1>
@@ -965,6 +974,8 @@ __mmask8 test_mm_movepi64_mask(__m128i __A) {
return _mm_movepi64_mask(__A);
}
+TEST_CONSTEXPR(_mm_movepi64_mask(((__m128i)(__v2di){0, -1})) == (__mmask8)0x02);
+
__mmask8 test_mm256_movepi64_mask(__m256i __A) {
// CHECK-LABEL: test_mm256_movepi64_mask
// CHECK: [[CMP:%.*]] = icmp slt <4 x i64> %{{.*}}, zeroinitializer
@@ -972,6 +983,7 @@ __mmask8 test_mm256_movepi64_mask(__m256i __A) {
return _mm256_movepi64_mask(__A);
}
...
[truncated]
|
|
@llvm/pr-subscribers-backend-x86 Author: None (sskzakaria) ChangesEnables constexpr evaluation for the following AVX512 Instrinsics: Part 1 Patch is 20.18 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/165054.diff 11 Files Affected:
diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td
index 500aa85fe5356..4501c15c375f8 100644
--- a/clang/include/clang/Basic/BuiltinsX86.td
+++ b/clang/include/clang/Basic/BuiltinsX86.td
@@ -2533,24 +2533,28 @@ let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256
def rsqrt14ps256_mask : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>, unsigned char)">;
}
-let Features = "avx512bw", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+let Features = "avx512bw",
+ Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
def cvtb2mask512 : X86Builtin<"unsigned long long int(_Vector<64, char>)">;
def cvtmask2b512 : X86Builtin<"_Vector<64, char>(unsigned long long int)">;
def cvtmask2w512 : X86Builtin<"_Vector<32, short>(unsigned int)">;
}
-let Features = "avx512dq", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+let Features = "avx512dq",
+ Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
def cvtd2mask512 : X86Builtin<"unsigned short(_Vector<16, int>)">;
def cvtmask2d512 : X86Builtin<"_Vector<16, int>(unsigned short)">;
def cvtmask2q512 : X86Builtin<"_Vector<8, long long int>(unsigned char)">;
def cvtq2mask512 : X86Builtin<"unsigned char(_Vector<8, long long int>)">;
}
-let Features = "avx512bw,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+let Features = "avx512bw,avx512vl",
+ Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
def cvtb2mask128 : X86Builtin<"unsigned short(_Vector<16, char>)">;
}
-let Features = "avx512bw,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+let Features = "avx512bw,avx512vl",
+ Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
def cvtb2mask256 : X86Builtin<"unsigned int(_Vector<32, char>)">;
}
@@ -2570,11 +2574,13 @@ let Features = "avx512bw,avx512vl", Attributes = [NoThrow, Const, RequiredVector
def cvtmask2w256 : X86Builtin<"_Vector<16, short>(unsigned short)">;
}
-let Features = "avx512dq,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+let Features = "avx512dq,avx512vl",
+ Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
def cvtd2mask128 : X86Builtin<"unsigned char(_Vector<4, int>)">;
}
-let Features = "avx512dq,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+let Features = "avx512dq,avx512vl",
+ Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
def cvtd2mask256 : X86Builtin<"unsigned char(_Vector<8, int>)">;
}
@@ -2594,11 +2600,13 @@ let Features = "avx512dq,avx512vl", Attributes = [NoThrow, Const, RequiredVector
def cvtmask2q256 : X86Builtin<"_Vector<4, long long int>(unsigned char)">;
}
-let Features = "avx512dq,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+let Features = "avx512dq,avx512vl",
+ Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
def cvtq2mask128 : X86Builtin<"unsigned char(_Vector<2, long long int>)">;
}
-let Features = "avx512dq,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+let Features = "avx512dq,avx512vl",
+ Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
def cvtq2mask256 : X86Builtin<"unsigned char(_Vector<4, long long int>)">;
}
@@ -3392,15 +3400,18 @@ let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256
def vcvtps2ph256_mask : X86Builtin<"_Vector<8, short>(_Vector<8, float>, _Constant int, _Vector<8, short>, unsigned char)">;
}
-let Features = "avx512bw", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+let Features = "avx512bw",
+ Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
def cvtw2mask512 : X86Builtin<"unsigned int(_Vector<32, short>)">;
}
-let Features = "avx512bw,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+let Features = "avx512bw,avx512vl",
+ Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
def cvtw2mask128 : X86Builtin<"unsigned char(_Vector<8, short>)">;
}
-let Features = "avx512bw,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+let Features = "avx512bw,avx512vl",
+ Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
def cvtw2mask256 : X86Builtin<"unsigned short(_Vector<16, short>)">;
}
diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index b3ab82da5e01a..3576b37b44b18 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -3377,6 +3377,25 @@ static bool interp__builtin_ia32_vpconflict(InterpState &S, CodePtr OpPC,
return true;
}
+static bool interp__builtin_ia32_cvt_mask(InterpState &S, CodePtr OpPC,
+ const CallExpr *Call, unsigned ID) {
+ assert(Call->getNumArgs() == 1);
+
+ const Pointer &Vec = S.Stk.pop<Pointer>();
+ APInt RetMask(Vec.getNumElems(), 0);
+ unsigned VectorLen = Vec.getNumElems();
+ PrimType ElemT = Vec.getFieldDesc()->getPrimType();
+
+ for (unsigned ElemNum = 0; ElemNum < VectorLen; ++ElemNum) {
+ APSInt A;
+ INT_TYPE_SWITCH_NO_BOOL(ElemT, { A = Vec.elem<T>(ElemNum).toAPSInt(); });
+ unsigned MSB = A[A.getBitWidth() - 1];
+ RetMask.setBitVal(ElemNum, MSB);
+ }
+ pushInteger(S, RetMask, Call->getType());
+ return true;
+}
+
static bool interp__builtin_x86_byteshift(
InterpState &S, CodePtr OpPC, const CallExpr *Call, unsigned ID,
llvm::function_ref<APInt(const Pointer &, unsigned Lane, unsigned I,
@@ -4542,6 +4561,20 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
case X86::BI__builtin_ia32_vec_set_v4di:
return interp__builtin_vec_set(S, OpPC, Call, BuiltinID);
+ case X86::BI__builtin_ia32_cvtb2mask128:
+ case X86::BI__builtin_ia32_cvtb2mask256:
+ case X86::BI__builtin_ia32_cvtb2mask512:
+ case X86::BI__builtin_ia32_cvtw2mask128:
+ case X86::BI__builtin_ia32_cvtw2mask256:
+ case X86::BI__builtin_ia32_cvtw2mask512:
+ case X86::BI__builtin_ia32_cvtd2mask128:
+ case X86::BI__builtin_ia32_cvtd2mask256:
+ case X86::BI__builtin_ia32_cvtd2mask512:
+ case X86::BI__builtin_ia32_cvtq2mask128:
+ case X86::BI__builtin_ia32_cvtq2mask256:
+ case X86::BI__builtin_ia32_cvtq2mask512:
+ return interp__builtin_ia32_cvt_mask(S, OpPC, Call, BuiltinID);
+
case X86::BI__builtin_ia32_cmpb128_mask:
case X86::BI__builtin_ia32_cmpw128_mask:
case X86::BI__builtin_ia32_cmpd128_mask:
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index d0404b957ab03..46b3baa42e2fe 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -15767,6 +15767,33 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E,
return Success(Vec.getVectorElt(Idx).getInt(), E);
}
+ case clang::X86::BI__builtin_ia32_cvtb2mask128:
+ case clang::X86::BI__builtin_ia32_cvtb2mask256:
+ case clang::X86::BI__builtin_ia32_cvtb2mask512:
+ case clang::X86::BI__builtin_ia32_cvtw2mask128:
+ case clang::X86::BI__builtin_ia32_cvtw2mask256:
+ case clang::X86::BI__builtin_ia32_cvtw2mask512:
+ case clang::X86::BI__builtin_ia32_cvtd2mask128:
+ case clang::X86::BI__builtin_ia32_cvtd2mask256:
+ case clang::X86::BI__builtin_ia32_cvtd2mask512:
+ case clang::X86::BI__builtin_ia32_cvtq2mask128:
+ case clang::X86::BI__builtin_ia32_cvtq2mask256:
+ case clang::X86::BI__builtin_ia32_cvtq2mask512: {
+ assert(E->getNumArgs() == 1);
+ APValue Vec;
+ if (!EvaluateVector(E->getArg(0), Vec, Info))
+ return false;
+
+ unsigned VectorLen = Vec.getVectorLength();
+ APSInt RetMask(llvm::APInt(VectorLen, 0), /*isUnsigned=*/true);
+ for (unsigned ElemNum = 0; ElemNum < VectorLen; ++ElemNum) {
+ const APSInt &A = Vec.getVectorElt(ElemNum).getInt();
+ unsigned MSB = A[A.getBitWidth() - 1];
+ RetMask.setBitVal(ElemNum, MSB);
+ }
+ return Success(APValue(RetMask), E);
+ }
+
case clang::X86::BI__builtin_ia32_cmpb128_mask:
case clang::X86::BI__builtin_ia32_cmpw128_mask:
case clang::X86::BI__builtin_ia32_cmpd128_mask:
diff --git a/clang/lib/Headers/avx512bwintrin.h b/clang/lib/Headers/avx512bwintrin.h
index ac75b6ccde735..87687fcb0bf7f 100644
--- a/clang/lib/Headers/avx512bwintrin.h
+++ b/clang/lib/Headers/avx512bwintrin.h
@@ -1776,15 +1776,13 @@ _mm512_mask_testn_epi16_mask (__mmask32 __U, __m512i __A, __m512i __B)
_mm512_setzero_si512());
}
-static __inline__ __mmask64 __DEFAULT_FN_ATTRS512
-_mm512_movepi8_mask (__m512i __A)
-{
+static __inline__ __mmask64 __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_movepi8_mask(__m512i __A) {
return (__mmask64) __builtin_ia32_cvtb2mask512 ((__v64qi) __A);
}
-static __inline__ __mmask32 __DEFAULT_FN_ATTRS512
-_mm512_movepi16_mask (__m512i __A)
-{
+static __inline__ __mmask32 __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_movepi16_mask(__m512i __A) {
return (__mmask32) __builtin_ia32_cvtw2mask512 ((__v32hi) __A);
}
diff --git a/clang/lib/Headers/avx512dqintrin.h b/clang/lib/Headers/avx512dqintrin.h
index fef1a2d64d538..bcdaea5a75ddd 100644
--- a/clang/lib/Headers/avx512dqintrin.h
+++ b/clang/lib/Headers/avx512dqintrin.h
@@ -1052,9 +1052,8 @@ _mm512_maskz_cvtepu64_ps (__mmask8 __U, __m512i __A) {
(__v2df)_mm_setzero_pd(), \
(__mmask8)(U), (int)(C), (int)(R)))
-static __inline__ __mmask16 __DEFAULT_FN_ATTRS512
-_mm512_movepi32_mask (__m512i __A)
-{
+static __inline__ __mmask16 __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_movepi32_mask(__m512i __A) {
return (__mmask16) __builtin_ia32_cvtd2mask512 ((__v16si) __A);
}
@@ -1070,9 +1069,8 @@ _mm512_movm_epi64 (__mmask8 __A)
return (__m512i) __builtin_ia32_cvtmask2q512 (__A);
}
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS512
-_mm512_movepi64_mask (__m512i __A)
-{
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_movepi64_mask(__m512i __A) {
return (__mmask8) __builtin_ia32_cvtq2mask512 ((__v8di) __A);
}
diff --git a/clang/lib/Headers/avx512vlbwintrin.h b/clang/lib/Headers/avx512vlbwintrin.h
index 263a1079b26d5..c46b1d49515b5 100644
--- a/clang/lib/Headers/avx512vlbwintrin.h
+++ b/clang/lib/Headers/avx512vlbwintrin.h
@@ -2488,27 +2488,23 @@ _mm256_mask_testn_epi16_mask (__mmask16 __U, __m256i __A, __m256i __B)
_mm256_setzero_si256());
}
-static __inline__ __mmask16 __DEFAULT_FN_ATTRS128
-_mm_movepi8_mask (__m128i __A)
-{
+static __inline__ __mmask16 __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_movepi8_mask(__m128i __A) {
return (__mmask16) __builtin_ia32_cvtb2mask128 ((__v16qi) __A);
}
-static __inline__ __mmask32 __DEFAULT_FN_ATTRS256
-_mm256_movepi8_mask (__m256i __A)
-{
+static __inline__ __mmask32 __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_movepi8_mask(__m256i __A) {
return (__mmask32) __builtin_ia32_cvtb2mask256 ((__v32qi) __A);
}
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
-_mm_movepi16_mask (__m128i __A)
-{
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_movepi16_mask(__m128i __A) {
return (__mmask8) __builtin_ia32_cvtw2mask128 ((__v8hi) __A);
}
-static __inline__ __mmask16 __DEFAULT_FN_ATTRS256
-_mm256_movepi16_mask (__m256i __A)
-{
+static __inline__ __mmask16 __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_movepi16_mask(__m256i __A) {
return (__mmask16) __builtin_ia32_cvtw2mask256 ((__v16hi) __A);
}
diff --git a/clang/lib/Headers/avx512vldqintrin.h b/clang/lib/Headers/avx512vldqintrin.h
index 707d039cf4c07..8014ee4bf555a 100644
--- a/clang/lib/Headers/avx512vldqintrin.h
+++ b/clang/lib/Headers/avx512vldqintrin.h
@@ -914,15 +914,13 @@ _mm256_maskz_cvtepu64_ps(__mmask8 __U, __m256i __A) {
(__v8sf)_mm256_setzero_ps(), \
(__mmask8)(U)))
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
-_mm_movepi32_mask (__m128i __A)
-{
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_movepi32_mask(__m128i __A) {
return (__mmask8) __builtin_ia32_cvtd2mask128 ((__v4si) __A);
}
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS256
-_mm256_movepi32_mask (__m256i __A)
-{
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_movepi32_mask(__m256i __A) {
return (__mmask8) __builtin_ia32_cvtd2mask256 ((__v8si) __A);
}
@@ -950,15 +948,13 @@ _mm256_movm_epi64 (__mmask8 __A)
return (__m256i) __builtin_ia32_cvtmask2q256 (__A);
}
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
-_mm_movepi64_mask (__m128i __A)
-{
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_movepi64_mask(__m128i __A) {
return (__mmask8) __builtin_ia32_cvtq2mask128 ((__v2di) __A);
}
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS256
-_mm256_movepi64_mask (__m256i __A)
-{
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_movepi64_mask(__m256i __A) {
return (__mmask8) __builtin_ia32_cvtq2mask256 ((__v4di) __A);
}
diff --git a/clang/test/CodeGen/X86/avx512bw-builtins.c b/clang/test/CodeGen/X86/avx512bw-builtins.c
index be2cd480f7558..a6671451c4527 100644
--- a/clang/test/CodeGen/X86/avx512bw-builtins.c
+++ b/clang/test/CodeGen/X86/avx512bw-builtins.c
@@ -2537,6 +2537,18 @@ __mmask64 test_mm512_movepi8_mask(__m512i __A) {
return _mm512_movepi8_mask(__A);
}
+TEST_CONSTEXPR(_mm512_movepi8_mask(
+ ((__m512i)(__v64qi){0, 1, char(129), 3, 4, 5, 6, 7,
+ 8, 9, 10, 11, 12, 13, 14, 15,
+ 16, 17, 18, 19, 20, 21, 22, 23,
+ 24, 25, 26, 27, 28, 29, 30, 31,
+ 32, 33, 34, 35, 36, 37, 38, 39,
+ 40, 41, 42, 43, 44, 45, 46, 47,
+ 48, 49, 50, 51, 52, 53, 54, 55,
+ 56, 57, 58, 59, 60, 61, 62, char(255)})
+) == (__mmask64)0x8000000000000004);
+
+
__m512i test_mm512_movm_epi8(__mmask64 __A) {
// CHECK-LABEL: test_mm512_movm_epi8
// CHECK: %{{.*}} = bitcast i64 %{{.*}} to <64 x i1>
@@ -2544,6 +2556,18 @@ __m512i test_mm512_movm_epi8(__mmask64 __A) {
return _mm512_movm_epi8(__A);
}
+TEST_CONSTEXPR(_mm512_movepi8_mask(
+ ((__m512i)(__v64qi){0, 1, char(129), 3, 4, 5, 6, 7,
+ 8, 9, 10, 11, 12, 13, 14, 15,
+ 16, 17, 18, 19, 20, 21, 22, 23,
+ 24, 25, 26, 27, 28, 29, 30, 31,
+ 32, 33, 34, 35, 36, 37, 38, 39,
+ 40, 41, 42, 43, 44, 45, 46, 47,
+ 48, 49, 50, 51, 52, 53, 54, 55,
+ 56, 57, 58, 59, 60, 61, 62, char(255)})
+) == (__mmask64)0x8000000000000004);
+
+
__m512i test_mm512_movm_epi16(__mmask32 __A) {
// CHECK-LABEL: test_mm512_movm_epi16
// CHECK: %{{.*}} = bitcast i32 %{{.*}} to <32 x i1>
@@ -2741,6 +2765,13 @@ __mmask32 test_mm512_movepi16_mask(__m512i __A) {
return _mm512_movepi16_mask(__A);
}
+TEST_CONSTEXPR(_mm512_movepi16_mask(
+ ((__m512i)(__v32hi){0, 1, short(32768), 3, 4, 5, 6, 7,
+ 8, 9, 10, 11, 12, 13, 14, 15,
+ 16, 17, 18, 19, 20, 21, 22, 23,
+ 24, 25, 26, 27, 28, 29, 30, short(32768)})
+) == (__mmask32)0x80000004);
+
void test_mm512_mask_cvtepi16_storeu_epi8 (void * __P, __mmask32 __M, __m512i __A)
{
// CHECK-LABEL: test_mm512_mask_cvtepi16_storeu_epi8
diff --git a/clang/test/CodeGen/X86/avx512dq-builtins.c b/clang/test/CodeGen/X86/avx512dq-builtins.c
index 9c4ada3a2b7b8..ef34326a9ef91 100644
--- a/clang/test/CodeGen/X86/avx512dq-builtins.c
+++ b/clang/test/CodeGen/X86/avx512dq-builtins.c
@@ -1251,6 +1251,8 @@ __mmask16 test_mm512_movepi32_mask(__m512i __A) {
return _mm512_movepi32_mask(__A);
}
+TEST_CONSTEXPR(_mm512_movepi32_mask(((__m512i)(__v16si){0, 1, -1, 3, 4, 5, 6, 7,8, 9, 10, 11, 12, 13, 14, -1})) == (__mmask16)0x8004);
+
__m512i test_mm512_movm_epi32(__mmask16 __A) {
// CHECK-LABEL: test_mm512_movm_epi32
// CHECK: %{{.*}} = bitcast i16 %{{.*}} to <16 x i1>
@@ -1271,6 +1273,8 @@ __mmask8 test_mm512_movepi64_mask(__m512i __A) {
return _mm512_movepi64_mask(__A);
}
+TEST_CONSTEXPR(_mm512_movepi64_mask(((__m512i)(__v8di){0, 1, -1, 3, 4, 5, 6, -1})) == (__mmask8)0x84);
+
__m512 test_mm512_broadcast_f32x2(__m128 __A) {
// CHECK-LABEL: test_mm512_broadcast_f32x2
// CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
diff --git a/clang/test/CodeGen/X86/avx512vlbw-builtins.c b/clang/test/CodeGen/X86/avx512vlbw-builtins.c
index febef46458ae9..ab6288d72ee44 100644
--- a/clang/test/CodeGen/X86/avx512vlbw-builtins.c
+++ b/clang/test/CodeGen/X86/avx512vlbw-builtins.c
@@ -3051,12 +3051,22 @@ __mmask16 test_mm_movepi8_mask(__m128i __A) {
return _mm_movepi8_mask(__A);
}
+TEST_CONSTEXPR(_mm_movepi8_mask(
+ ((__m128i)(__v16qi){0, 1, char(129), 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15})) == (__mmask16)0x0004);
+
__mmask32 test_mm256_movepi8_mask(__m256i __A) {
// CHECK-LABEL: test_mm256_movepi8_mask
// CHECK: [[CMP:%.*]] = icmp slt <32 x i8> %{{.*}}, zeroinitializer
return _mm256_movepi8_mask(__A);
}
+TEST_CONSTEXPR(_mm256_movepi8_mask(
+ ((__m256i)(__v32qi){0, 1, char(255), 3, 4, 5, 6, 7,
+ 8, 9, 10, 11, 12, 13, 14, 15,
+ 16, 17, 18, 19, 20, 21, 22, 23,
+ 24, 25, 26, 27, 28, 29, 30, char(128)})
+) == (__mmask32)0x80000004);
+
__m128i test_mm_movm_epi8(__mmask16 __A) {
// CHECK-LABEL: test_mm_movm_epi8
// CHECK: %{{.*}} = bitcast i16 %{{.*}} to <16 x i1>
@@ -3442,6 +3452,7 @@ __mmask8 test_mm_movepi16_mask(__m128i __A) {
// CHECK: [[CMP:%.*]] = icmp slt <8 x i16> %{{.*}}, zeroinitializer
return _mm_movepi16_mask(__A);
}
+TEST_CONSTEXPR(_mm_movepi16_mask(((__m128i)(__v8hi){0, 1, -1, 3, 4, 5, 6, 7})) == (__mmask8)0x04);
__mmask16 test_mm256_movepi16_mask(__m256i __A) {
// CHECK-LABEL: test_mm256_movepi16_mask
@@ -3449,6 +3460,9 @@ __mmask16 test_mm256_movepi16_mask(__m256i __A) {
return _mm256_movepi16_mask(__A);
}
+TEST_CONSTEXPR(_mm256_movepi16_mask(
+ ((__m256i)(__v16hi){0, 1, short(32769), 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, short(65535)})) == (__mmask16)0x8004);
+
__m128i test_mm_mask_shufflehi_epi16(__m128i __W, __mmask8 __U, __m128i __A) {
// CHECK-LABEL: test_mm_mask_shufflehi_epi16
// CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 5, i32 4, i32 4>
diff --git a/clang/test/CodeGen/X86/avx512vldq-builtins.c b/clang/test/CodeGen/X86/avx512vldq-builtins.c
index 1bfc0229eeb26..92d8e1aa0879a 100644
--- a/clang/test/CodeGen/X86/avx512vldq-builtins.c
+++ b/clang/test/CodeGen/X86/avx512vldq-builtins.c
@@ -921,12 +921,21 @@ __mmask8 test_mm_movepi32_mask(__m128i __A) {
return _mm_movepi32_mask(__A);
}
+TEST_CONSTEXPR(_mm512_movepi16_mask(
+ ((__m512i)(__v32hi){0, 1, short(32768), 3, 4, 5, 6, 7,
+ 8, 9, 10, 11, 12, 13, 14, 15,
+ 16, 17, 18, 19, 20, 21, 22, 23,
+ 24, 25, 26, 27, 28, 29, 30, short(32768)})
+) == (__mmask32)0x80000004);
+
__mmask8 test_mm256_movepi32_mask(__m256i __A) {
// CHECK-LABEL: test_mm256_movepi32_mask
// CHECK: [[CMP:%.*]] = icmp slt <8 x i32> %{{.*}}, zeroinitializer
return _mm256_movepi32_mask(__A);
}
+TEST_CONSTEXPR(_mm256_movepi32_mask(((__m256i)(__v8si){0, 1, -1, 3, 4, 5, 6, -2147483648})) == (__mmask8)0x84);
+
__m128i test_mm_movm_epi32(__mmask8 __A) {
// CHECK-LABEL: test_mm_movm_epi32
// CHECK: %{{.*}} = bitcast i8 %{{.*}} to <8 x i1>
@@ -965,6 +974,8 @@ __mmask8 test_mm_movepi64_mask(__m128i __A) {
return _mm_movepi64_mask(__A);
}
+TEST_CONSTEXPR(_mm_movepi64_mask(((__m128i)(__v2di){0, -1})) == (__mmask8)0x02);
+
__mmask8 test_mm256_movepi64_mask(__m256i __A) {
// CHECK-LABEL: test_mm256_movepi64_mask
// CHECK: [[CMP:%.*]] = icmp slt <4 x i64> %{{.*}}, zeroinitializer
@@ -972,6 +983,7 @@ __mmask8 test_mm256_movepi64_mask(__m256i __A) {
return _mm256_movepi64_mask(__A);
}
...
[truncated]
|
RKSimon
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Please fix the merge failures
🐧 Linux x64 Test Results
|
|
@RKSimon PTAL when you can, I changed the name of the builtin so its not the same as the builtin for movm intrinsics |
| assert(Call->getNumArgs() == 1); | ||
|
|
||
| const Pointer &Vec = S.Stk.pop<Pointer>(); | ||
| APInt RetMask(Vec.getNumElems(), 0); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
RetWidth ?
what name change? |
|
style fix Co-authored-by: Timm Baeder <[email protected]>
RKSimon
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM - cheers
Enables constexpr evaluation for the following AVX512 Instrinsics:
Part 1
FIXES: #162072