From 00115fd82621a909b6ee8bf049159fd09da3cba1 Mon Sep 17 00:00:00 2001 From: sskzakaria Date: Fri, 24 Oct 2025 18:01:57 -0400 Subject: [PATCH 01/13] VectorExprEvaluator::VisitCallExpr / InterpretBuiltin - allow AVX512 mask predicate intrinsics --- clang/include/clang/Basic/BuiltinsX86.td | 33 +++++++++++++------- clang/lib/AST/ByteCode/InterpBuiltin.cpp | 33 ++++++++++++++++++++ clang/lib/AST/ExprConstant.cpp | 27 ++++++++++++++++ clang/lib/Headers/avx512vlbwintrin.h | 10 +++--- clang/test/CodeGen/X86/avx512vlbw-builtins.c | 5 +++ 5 files changed, 91 insertions(+), 17 deletions(-) diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td index 62c70fba946be..c962b28668562 100644 --- a/clang/include/clang/Basic/BuiltinsX86.td +++ b/clang/include/clang/Basic/BuiltinsX86.td @@ -2502,24 +2502,28 @@ let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256 def rsqrt14ps256_mask : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>, unsigned char)">; } -let Features = "avx512bw", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in { +let Features = "avx512bw", + Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in { def cvtb2mask512 : X86Builtin<"unsigned long long int(_Vector<64, char>)">; def cvtmask2b512 : X86Builtin<"_Vector<64, char>(unsigned long long int)">; def cvtmask2w512 : X86Builtin<"_Vector<32, short>(unsigned int)">; } -let Features = "avx512dq", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in { +let Features = "avx512dq", + Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in { def cvtd2mask512 : X86Builtin<"unsigned short(_Vector<16, int>)">; def cvtmask2d512 : X86Builtin<"_Vector<16, int>(unsigned short)">; def cvtmask2q512 : X86Builtin<"_Vector<8, long long int>(unsigned char)">; def cvtq2mask512 : X86Builtin<"unsigned char(_Vector<8, long long int>)">; } -let Features = "avx512bw,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in { +let Features = "avx512bw,avx512vl", + Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in { def cvtb2mask128 : X86Builtin<"unsigned short(_Vector<16, char>)">; } -let Features = "avx512bw,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in { +let Features = "avx512bw,avx512vl", + Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in { def cvtb2mask256 : X86Builtin<"unsigned int(_Vector<32, char>)">; } @@ -2539,11 +2543,13 @@ let Features = "avx512bw,avx512vl", Attributes = [NoThrow, Const, RequiredVector def cvtmask2w256 : X86Builtin<"_Vector<16, short>(unsigned short)">; } -let Features = "avx512dq,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in { +let Features = "avx512dq,avx512vl", + Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in { def cvtd2mask128 : X86Builtin<"unsigned char(_Vector<4, int>)">; } -let Features = "avx512dq,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in { +let Features = "avx512dq,avx512vl", + Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in { def cvtd2mask256 : X86Builtin<"unsigned char(_Vector<8, int>)">; } @@ -2563,11 +2569,13 @@ let Features = "avx512dq,avx512vl", Attributes = [NoThrow, Const, RequiredVector def cvtmask2q256 : X86Builtin<"_Vector<4, long long int>(unsigned char)">; } -let Features = "avx512dq,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in { +let Features = "avx512dq,avx512vl", + Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in { def cvtq2mask128 : X86Builtin<"unsigned char(_Vector<2, long long int>)">; } -let Features = "avx512dq,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in { +let Features = "avx512dq,avx512vl", + Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in { def cvtq2mask256 : X86Builtin<"unsigned char(_Vector<4, long long int>)">; } @@ -3361,15 +3369,18 @@ let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256 def vcvtps2ph256_mask : X86Builtin<"_Vector<8, short>(_Vector<8, float>, _Constant int, _Vector<8, short>, unsigned char)">; } -let Features = "avx512bw", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in { +let Features = "avx512bw", + Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in { def cvtw2mask512 : X86Builtin<"unsigned int(_Vector<32, short>)">; } -let Features = "avx512bw,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in { +let Features = "avx512bw,avx512vl", + Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in { def cvtw2mask128 : X86Builtin<"unsigned char(_Vector<8, short>)">; } -let Features = "avx512bw,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in { +let Features = "avx512bw,avx512vl", + Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in { def cvtw2mask256 : X86Builtin<"unsigned short(_Vector<16, short>)">; } diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index 0ee18be166845..f4c61b0ae8d06 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -3128,6 +3128,25 @@ static bool interp__builtin_ia32_vpconflict(InterpState &S, CodePtr OpPC, return true; } +static bool interp__builtin_ia32_cvt_mask(InterpState &S, CodePtr OpPC, + const CallExpr *Call, unsigned ID) { + assert(Call->getNumArgs() == 1); + + const Pointer &Vec = S.Stk.pop(); + APInt RetMask(Vec.getNumElems(), 0); + unsigned VectorLen = Vec.getNumElems(); + PrimType ElemT = Vec.getFieldDesc()->getPrimType(); + + for (unsigned ElemNum = 0; ElemNum < VectorLen; ++ElemNum) { + APSInt A; + INT_TYPE_SWITCH_NO_BOOL(ElemT, { A = Vec.elem(ElemNum).toAPSInt(); }); + unsigned MSB = A[A.getBitWidth() - 1]; + RetMask.setBitVal(ElemNum, MSB); + } + pushInteger(S, RetMask, Call->getType()); + return true; +} + bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, uint32_t BuiltinID) { if (!S.getASTContext().BuiltinInfo.isConstantEvaluated(BuiltinID)) @@ -4141,6 +4160,20 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, case X86::BI__builtin_ia32_vec_set_v4di: return interp__builtin_vec_set(S, OpPC, Call, BuiltinID); + case X86::BI__builtin_ia32_cvtb2mask128: + case X86::BI__builtin_ia32_cvtb2mask256: + case X86::BI__builtin_ia32_cvtb2mask512: + case X86::BI__builtin_ia32_cvtw2mask128: + case X86::BI__builtin_ia32_cvtw2mask256: + case X86::BI__builtin_ia32_cvtw2mask512: + case X86::BI__builtin_ia32_cvtd2mask128: + case X86::BI__builtin_ia32_cvtd2mask256: + case X86::BI__builtin_ia32_cvtd2mask512: + case X86::BI__builtin_ia32_cvtq2mask128: + case X86::BI__builtin_ia32_cvtq2mask256: + case X86::BI__builtin_ia32_cvtq2mask512: + return interp__builtin_ia32_cvt_mask(S, OpPC, Call, BuiltinID); + default: S.FFDiag(S.Current->getLocation(OpPC), diag::note_invalid_subexpr_in_const_expr) diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index 16141b27f4ce8..75a633a5c6232 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -15449,6 +15449,33 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E, unsigned Idx = static_cast(IdxAPS.getZExtValue() & (N - 1)); return Success(Vec.getVectorElt(Idx).getInt(), E); } + + case clang::X86::BI__builtin_ia32_cvtb2mask128: + case clang::X86::BI__builtin_ia32_cvtb2mask256: + case clang::X86::BI__builtin_ia32_cvtb2mask512: + case clang::X86::BI__builtin_ia32_cvtw2mask128: + case clang::X86::BI__builtin_ia32_cvtw2mask256: + case clang::X86::BI__builtin_ia32_cvtw2mask512: + case clang::X86::BI__builtin_ia32_cvtd2mask128: + case clang::X86::BI__builtin_ia32_cvtd2mask256: + case clang::X86::BI__builtin_ia32_cvtd2mask512: + case clang::X86::BI__builtin_ia32_cvtq2mask128: + case clang::X86::BI__builtin_ia32_cvtq2mask256: + case clang::X86::BI__builtin_ia32_cvtq2mask512: { + assert(E->getNumArgs() == 1); + APValue Vec; + if (!EvaluateVector(E->getArg(0), Vec, Info)) + return false; + + unsigned VectorLen = Vec.getVectorLength(); + APSInt RetMask(llvm::APInt(VectorLen, 0), /*isUnsigned=*/true); + for (unsigned ElemNum = 0; ElemNum < VectorLen; ++ElemNum) { + const APSInt &A = Vec.getVectorElt(ElemNum).getInt(); + unsigned MSB = A[A.getBitWidth() - 1]; + RetMask.setBitVal(ElemNum, MSB); + } + return Success(APValue(RetMask), E); + } } } diff --git a/clang/lib/Headers/avx512vlbwintrin.h b/clang/lib/Headers/avx512vlbwintrin.h index 639fb60f476c6..d15e4fdc2ffc1 100644 --- a/clang/lib/Headers/avx512vlbwintrin.h +++ b/clang/lib/Headers/avx512vlbwintrin.h @@ -2492,15 +2492,13 @@ _mm256_mask_testn_epi16_mask (__mmask16 __U, __m256i __A, __m256i __B) _mm256_setzero_si256()); } -static __inline__ __mmask16 __DEFAULT_FN_ATTRS128 -_mm_movepi8_mask (__m128i __A) -{ +static __inline__ __mmask16 __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_movepi8_mask(__m128i __A) { return (__mmask16) __builtin_ia32_cvtb2mask128 ((__v16qi) __A); } -static __inline__ __mmask32 __DEFAULT_FN_ATTRS256 -_mm256_movepi8_mask (__m256i __A) -{ +static __inline__ __mmask32 __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_movepi8_mask(__m256i __A) { return (__mmask32) __builtin_ia32_cvtb2mask256 ((__v32qi) __A); } diff --git a/clang/test/CodeGen/X86/avx512vlbw-builtins.c b/clang/test/CodeGen/X86/avx512vlbw-builtins.c index d569283928a0a..c38e9a26c3609 100644 --- a/clang/test/CodeGen/X86/avx512vlbw-builtins.c +++ b/clang/test/CodeGen/X86/avx512vlbw-builtins.c @@ -3009,6 +3009,11 @@ __mmask16 test_mm_movepi8_mask(__m128i __A) { return _mm_movepi8_mask(__A); } +TEST_CONSTEXPR(_mm_movepi8_mask( + ((__m128i)(__v16qi){0, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15}) +) == (__mmask16)0x0000); + __mmask32 test_mm256_movepi8_mask(__m256i __A) { // CHECK-LABEL: test_mm256_movepi8_mask // CHECK: [[CMP:%.*]] = icmp slt <32 x i8> %{{.*}}, zeroinitializer From 6df958d73c16e75129b2137dd1f5878950a6ce24 Mon Sep 17 00:00:00 2001 From: sskzakaria Date: Fri, 24 Oct 2025 18:57:35 -0400 Subject: [PATCH 02/13] _mm256_movepi8_mask --- clang/test/CodeGen/X86/avx512vlbw-builtins.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/clang/test/CodeGen/X86/avx512vlbw-builtins.c b/clang/test/CodeGen/X86/avx512vlbw-builtins.c index c38e9a26c3609..45ebc1d819ce7 100644 --- a/clang/test/CodeGen/X86/avx512vlbw-builtins.c +++ b/clang/test/CodeGen/X86/avx512vlbw-builtins.c @@ -3010,9 +3010,9 @@ __mmask16 test_mm_movepi8_mask(__m128i __A) { } TEST_CONSTEXPR(_mm_movepi8_mask( - ((__m128i)(__v16qi){0, 1, 2, 3, 4, 5, 6, 7, + ((__m128i)(__v16qi){0, 1, char(129), 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}) -) == (__mmask16)0x0000); +) == (__mmask16)0x0004); __mmask32 test_mm256_movepi8_mask(__m256i __A) { // CHECK-LABEL: test_mm256_movepi8_mask @@ -3020,6 +3020,13 @@ __mmask32 test_mm256_movepi8_mask(__m256i __A) { return _mm256_movepi8_mask(__A); } +TEST_CONSTEXPR(_mm256_movepi8_mask( + ((__m256i)(__v32qi){0, 1, char(255), 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 30, char(128)}) +) == (__mmask32)0x80000004); + __m128i test_mm_movm_epi8(__mmask16 __A) { // CHECK-LABEL: test_mm_movm_epi8 // CHECK: %{{.*}} = bitcast i16 %{{.*}} to <16 x i1> From 01d277598e6a660bd3c4c63d7761233c19d6d277 Mon Sep 17 00:00:00 2001 From: sskzakaria Date: Fri, 24 Oct 2025 19:14:54 -0400 Subject: [PATCH 03/13] fixing merge conflict --- clang/lib/AST/ByteCode/InterpBuiltin.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index 7c3c2fdd7c591..3837764fab96b 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -3336,6 +3336,9 @@ static bool interp__builtin_ia32_cvt_mask(InterpState &S, CodePtr OpPC, RetMask.setBitVal(ElemNum, MSB); } pushInteger(S, RetMask, Call->getType()); + return true; +} + static bool interp__builtin_x86_byteshift( InterpState &S, CodePtr OpPC, const CallExpr *Call, unsigned ID, llvm::function_ref Date: Wed, 29 Oct 2025 17:26:19 -0400 Subject: [PATCH 04/13] minor --- clang/lib/AST/ExprConstant.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index 3a77567189244..46b3baa42e2fe 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -15791,6 +15791,9 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E, unsigned MSB = A[A.getBitWidth() - 1]; RetMask.setBitVal(ElemNum, MSB); } + return Success(APValue(RetMask), E); + } + case clang::X86::BI__builtin_ia32_cmpb128_mask: case clang::X86::BI__builtin_ia32_cmpw128_mask: case clang::X86::BI__builtin_ia32_cmpd128_mask: From 7a4694021a6679c495310299d9e8aa6ac04de214 Mon Sep 17 00:00:00 2001 From: sskzakaria Date: Mon, 17 Nov 2025 20:31:18 -0500 Subject: [PATCH 05/13] movepi test coverage --- clang/lib/Headers/avx512bwintrin.h | 10 +++---- clang/lib/Headers/avx512dqintrin.h | 10 +++---- clang/lib/Headers/avx512vlbwintrin.h | 10 +++---- clang/lib/Headers/avx512vldqintrin.h | 20 +++++-------- clang/test/CodeGen/X86/avx512bw-builtins.c | 31 ++++++++++++++++++++ clang/test/CodeGen/X86/avx512dq-builtins.c | 4 +++ clang/test/CodeGen/X86/avx512vlbw-builtins.c | 8 +++-- clang/test/CodeGen/X86/avx512vldq-builtins.c | 12 ++++++++ 8 files changed, 72 insertions(+), 33 deletions(-) diff --git a/clang/lib/Headers/avx512bwintrin.h b/clang/lib/Headers/avx512bwintrin.h index ac75b6ccde735..87687fcb0bf7f 100644 --- a/clang/lib/Headers/avx512bwintrin.h +++ b/clang/lib/Headers/avx512bwintrin.h @@ -1776,15 +1776,13 @@ _mm512_mask_testn_epi16_mask (__mmask32 __U, __m512i __A, __m512i __B) _mm512_setzero_si512()); } -static __inline__ __mmask64 __DEFAULT_FN_ATTRS512 -_mm512_movepi8_mask (__m512i __A) -{ +static __inline__ __mmask64 __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_movepi8_mask(__m512i __A) { return (__mmask64) __builtin_ia32_cvtb2mask512 ((__v64qi) __A); } -static __inline__ __mmask32 __DEFAULT_FN_ATTRS512 -_mm512_movepi16_mask (__m512i __A) -{ +static __inline__ __mmask32 __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_movepi16_mask(__m512i __A) { return (__mmask32) __builtin_ia32_cvtw2mask512 ((__v32hi) __A); } diff --git a/clang/lib/Headers/avx512dqintrin.h b/clang/lib/Headers/avx512dqintrin.h index fef1a2d64d538..bcdaea5a75ddd 100644 --- a/clang/lib/Headers/avx512dqintrin.h +++ b/clang/lib/Headers/avx512dqintrin.h @@ -1052,9 +1052,8 @@ _mm512_maskz_cvtepu64_ps (__mmask8 __U, __m512i __A) { (__v2df)_mm_setzero_pd(), \ (__mmask8)(U), (int)(C), (int)(R))) -static __inline__ __mmask16 __DEFAULT_FN_ATTRS512 -_mm512_movepi32_mask (__m512i __A) -{ +static __inline__ __mmask16 __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_movepi32_mask(__m512i __A) { return (__mmask16) __builtin_ia32_cvtd2mask512 ((__v16si) __A); } @@ -1070,9 +1069,8 @@ _mm512_movm_epi64 (__mmask8 __A) return (__m512i) __builtin_ia32_cvtmask2q512 (__A); } -static __inline__ __mmask8 __DEFAULT_FN_ATTRS512 -_mm512_movepi64_mask (__m512i __A) -{ +static __inline__ __mmask8 __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_movepi64_mask(__m512i __A) { return (__mmask8) __builtin_ia32_cvtq2mask512 ((__v8di) __A); } diff --git a/clang/lib/Headers/avx512vlbwintrin.h b/clang/lib/Headers/avx512vlbwintrin.h index 65392c070dcde..c46b1d49515b5 100644 --- a/clang/lib/Headers/avx512vlbwintrin.h +++ b/clang/lib/Headers/avx512vlbwintrin.h @@ -2498,15 +2498,13 @@ _mm256_movepi8_mask(__m256i __A) { return (__mmask32) __builtin_ia32_cvtb2mask256 ((__v32qi) __A); } -static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 -_mm_movepi16_mask (__m128i __A) -{ +static __inline__ __mmask8 __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_movepi16_mask(__m128i __A) { return (__mmask8) __builtin_ia32_cvtw2mask128 ((__v8hi) __A); } -static __inline__ __mmask16 __DEFAULT_FN_ATTRS256 -_mm256_movepi16_mask (__m256i __A) -{ +static __inline__ __mmask16 __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_movepi16_mask(__m256i __A) { return (__mmask16) __builtin_ia32_cvtw2mask256 ((__v16hi) __A); } diff --git a/clang/lib/Headers/avx512vldqintrin.h b/clang/lib/Headers/avx512vldqintrin.h index 707d039cf4c07..8014ee4bf555a 100644 --- a/clang/lib/Headers/avx512vldqintrin.h +++ b/clang/lib/Headers/avx512vldqintrin.h @@ -914,15 +914,13 @@ _mm256_maskz_cvtepu64_ps(__mmask8 __U, __m256i __A) { (__v8sf)_mm256_setzero_ps(), \ (__mmask8)(U))) -static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 -_mm_movepi32_mask (__m128i __A) -{ +static __inline__ __mmask8 __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_movepi32_mask(__m128i __A) { return (__mmask8) __builtin_ia32_cvtd2mask128 ((__v4si) __A); } -static __inline__ __mmask8 __DEFAULT_FN_ATTRS256 -_mm256_movepi32_mask (__m256i __A) -{ +static __inline__ __mmask8 __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_movepi32_mask(__m256i __A) { return (__mmask8) __builtin_ia32_cvtd2mask256 ((__v8si) __A); } @@ -950,15 +948,13 @@ _mm256_movm_epi64 (__mmask8 __A) return (__m256i) __builtin_ia32_cvtmask2q256 (__A); } -static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 -_mm_movepi64_mask (__m128i __A) -{ +static __inline__ __mmask8 __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_movepi64_mask(__m128i __A) { return (__mmask8) __builtin_ia32_cvtq2mask128 ((__v2di) __A); } -static __inline__ __mmask8 __DEFAULT_FN_ATTRS256 -_mm256_movepi64_mask (__m256i __A) -{ +static __inline__ __mmask8 __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_movepi64_mask(__m256i __A) { return (__mmask8) __builtin_ia32_cvtq2mask256 ((__v4di) __A); } diff --git a/clang/test/CodeGen/X86/avx512bw-builtins.c b/clang/test/CodeGen/X86/avx512bw-builtins.c index be2cd480f7558..a6671451c4527 100644 --- a/clang/test/CodeGen/X86/avx512bw-builtins.c +++ b/clang/test/CodeGen/X86/avx512bw-builtins.c @@ -2537,6 +2537,18 @@ __mmask64 test_mm512_movepi8_mask(__m512i __A) { return _mm512_movepi8_mask(__A); } +TEST_CONSTEXPR(_mm512_movepi8_mask( + ((__m512i)(__v64qi){0, 1, char(129), 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 30, 31, + 32, 33, 34, 35, 36, 37, 38, 39, + 40, 41, 42, 43, 44, 45, 46, 47, + 48, 49, 50, 51, 52, 53, 54, 55, + 56, 57, 58, 59, 60, 61, 62, char(255)}) +) == (__mmask64)0x8000000000000004); + + __m512i test_mm512_movm_epi8(__mmask64 __A) { // CHECK-LABEL: test_mm512_movm_epi8 // CHECK: %{{.*}} = bitcast i64 %{{.*}} to <64 x i1> @@ -2544,6 +2556,18 @@ __m512i test_mm512_movm_epi8(__mmask64 __A) { return _mm512_movm_epi8(__A); } +TEST_CONSTEXPR(_mm512_movepi8_mask( + ((__m512i)(__v64qi){0, 1, char(129), 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 30, 31, + 32, 33, 34, 35, 36, 37, 38, 39, + 40, 41, 42, 43, 44, 45, 46, 47, + 48, 49, 50, 51, 52, 53, 54, 55, + 56, 57, 58, 59, 60, 61, 62, char(255)}) +) == (__mmask64)0x8000000000000004); + + __m512i test_mm512_movm_epi16(__mmask32 __A) { // CHECK-LABEL: test_mm512_movm_epi16 // CHECK: %{{.*}} = bitcast i32 %{{.*}} to <32 x i1> @@ -2741,6 +2765,13 @@ __mmask32 test_mm512_movepi16_mask(__m512i __A) { return _mm512_movepi16_mask(__A); } +TEST_CONSTEXPR(_mm512_movepi16_mask( + ((__m512i)(__v32hi){0, 1, short(32768), 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 30, short(32768)}) +) == (__mmask32)0x80000004); + void test_mm512_mask_cvtepi16_storeu_epi8 (void * __P, __mmask32 __M, __m512i __A) { // CHECK-LABEL: test_mm512_mask_cvtepi16_storeu_epi8 diff --git a/clang/test/CodeGen/X86/avx512dq-builtins.c b/clang/test/CodeGen/X86/avx512dq-builtins.c index 9c4ada3a2b7b8..ef34326a9ef91 100644 --- a/clang/test/CodeGen/X86/avx512dq-builtins.c +++ b/clang/test/CodeGen/X86/avx512dq-builtins.c @@ -1251,6 +1251,8 @@ __mmask16 test_mm512_movepi32_mask(__m512i __A) { return _mm512_movepi32_mask(__A); } +TEST_CONSTEXPR(_mm512_movepi32_mask(((__m512i)(__v16si){0, 1, -1, 3, 4, 5, 6, 7,8, 9, 10, 11, 12, 13, 14, -1})) == (__mmask16)0x8004); + __m512i test_mm512_movm_epi32(__mmask16 __A) { // CHECK-LABEL: test_mm512_movm_epi32 // CHECK: %{{.*}} = bitcast i16 %{{.*}} to <16 x i1> @@ -1271,6 +1273,8 @@ __mmask8 test_mm512_movepi64_mask(__m512i __A) { return _mm512_movepi64_mask(__A); } +TEST_CONSTEXPR(_mm512_movepi64_mask(((__m512i)(__v8di){0, 1, -1, 3, 4, 5, 6, -1})) == (__mmask8)0x84); + __m512 test_mm512_broadcast_f32x2(__m128 __A) { // CHECK-LABEL: test_mm512_broadcast_f32x2 // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <16 x i32> diff --git a/clang/test/CodeGen/X86/avx512vlbw-builtins.c b/clang/test/CodeGen/X86/avx512vlbw-builtins.c index f3c65325c84ac..ab6288d72ee44 100644 --- a/clang/test/CodeGen/X86/avx512vlbw-builtins.c +++ b/clang/test/CodeGen/X86/avx512vlbw-builtins.c @@ -3052,9 +3052,7 @@ __mmask16 test_mm_movepi8_mask(__m128i __A) { } TEST_CONSTEXPR(_mm_movepi8_mask( - ((__m128i)(__v16qi){0, 1, char(129), 3, 4, 5, 6, 7, - 8, 9, 10, 11, 12, 13, 14, 15}) -) == (__mmask16)0x0004); + ((__m128i)(__v16qi){0, 1, char(129), 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15})) == (__mmask16)0x0004); __mmask32 test_mm256_movepi8_mask(__m256i __A) { // CHECK-LABEL: test_mm256_movepi8_mask @@ -3454,6 +3452,7 @@ __mmask8 test_mm_movepi16_mask(__m128i __A) { // CHECK: [[CMP:%.*]] = icmp slt <8 x i16> %{{.*}}, zeroinitializer return _mm_movepi16_mask(__A); } +TEST_CONSTEXPR(_mm_movepi16_mask(((__m128i)(__v8hi){0, 1, -1, 3, 4, 5, 6, 7})) == (__mmask8)0x04); __mmask16 test_mm256_movepi16_mask(__m256i __A) { // CHECK-LABEL: test_mm256_movepi16_mask @@ -3461,6 +3460,9 @@ __mmask16 test_mm256_movepi16_mask(__m256i __A) { return _mm256_movepi16_mask(__A); } +TEST_CONSTEXPR(_mm256_movepi16_mask( + ((__m256i)(__v16hi){0, 1, short(32769), 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, short(65535)})) == (__mmask16)0x8004); + __m128i test_mm_mask_shufflehi_epi16(__m128i __W, __mmask8 __U, __m128i __A) { // CHECK-LABEL: test_mm_mask_shufflehi_epi16 // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> poison, <8 x i32> diff --git a/clang/test/CodeGen/X86/avx512vldq-builtins.c b/clang/test/CodeGen/X86/avx512vldq-builtins.c index 1bfc0229eeb26..92d8e1aa0879a 100644 --- a/clang/test/CodeGen/X86/avx512vldq-builtins.c +++ b/clang/test/CodeGen/X86/avx512vldq-builtins.c @@ -921,12 +921,21 @@ __mmask8 test_mm_movepi32_mask(__m128i __A) { return _mm_movepi32_mask(__A); } +TEST_CONSTEXPR(_mm512_movepi16_mask( + ((__m512i)(__v32hi){0, 1, short(32768), 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 30, short(32768)}) +) == (__mmask32)0x80000004); + __mmask8 test_mm256_movepi32_mask(__m256i __A) { // CHECK-LABEL: test_mm256_movepi32_mask // CHECK: [[CMP:%.*]] = icmp slt <8 x i32> %{{.*}}, zeroinitializer return _mm256_movepi32_mask(__A); } +TEST_CONSTEXPR(_mm256_movepi32_mask(((__m256i)(__v8si){0, 1, -1, 3, 4, 5, 6, -2147483648})) == (__mmask8)0x84); + __m128i test_mm_movm_epi32(__mmask8 __A) { // CHECK-LABEL: test_mm_movm_epi32 // CHECK: %{{.*}} = bitcast i8 %{{.*}} to <8 x i1> @@ -965,6 +974,8 @@ __mmask8 test_mm_movepi64_mask(__m128i __A) { return _mm_movepi64_mask(__A); } +TEST_CONSTEXPR(_mm_movepi64_mask(((__m128i)(__v2di){0, -1})) == (__mmask8)0x02); + __mmask8 test_mm256_movepi64_mask(__m256i __A) { // CHECK-LABEL: test_mm256_movepi64_mask // CHECK: [[CMP:%.*]] = icmp slt <4 x i64> %{{.*}}, zeroinitializer @@ -972,6 +983,7 @@ __mmask8 test_mm256_movepi64_mask(__m256i __A) { return _mm256_movepi64_mask(__A); } +TEST_CONSTEXPR(_mm256_movepi64_mask(((__m256i)(__v4di){0, 1, -1, 3})) == (__mmask8)0x04); __m256 test_mm256_broadcast_f32x2(__m128 __A) { // CHECK-LABEL: test_mm256_broadcast_f32x2 From 3cd8a093e6df30cf8b26f28d8337e6cb7f9f4edc Mon Sep 17 00:00:00 2001 From: sskzakaria Date: Tue, 18 Nov 2025 14:35:23 -0500 Subject: [PATCH 06/13] removing redundant interp__builtin_x86_byteshift --- clang/lib/AST/ByteCode/InterpBuiltin.cpp | 31 ------------------------ clang/lib/Headers/avx512dqintrin.h | 4 +-- clang/lib/Headers/avx512vldqintrin.h | 4 +-- 3 files changed, 4 insertions(+), 35 deletions(-) diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index 79eeed1560c1e..b780a911f748b 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -3380,37 +3380,6 @@ static bool interp__builtin_ia32_cvt_mask(InterpState &S, CodePtr OpPC, return true; } -static bool interp__builtin_x86_byteshift( - InterpState &S, CodePtr OpPC, const CallExpr *Call, unsigned ID, - llvm::function_ref - Fn) { - assert(Call->getNumArgs() == 2); - - APSInt ImmAPS = popToAPSInt(S, Call->getArg(1)); - uint64_t Shift = ImmAPS.getZExtValue() & 0xff; - - const Pointer &Src = S.Stk.pop(); - if (!Src.getFieldDesc()->isPrimitiveArray()) - return false; - - unsigned NumElems = Src.getNumElems(); - const Pointer &Dst = S.Stk.peek(); - PrimType ElemT = Src.getFieldDesc()->getPrimType(); - - for (unsigned Lane = 0; Lane != NumElems; Lane += 16) { - for (unsigned I = 0; I != 16; ++I) { - unsigned Base = Lane + I; - APSInt Result = APSInt(Fn(Src, Lane, I, Shift)); - INT_TYPE_SWITCH_NO_BOOL(ElemT, - { Dst.elem(Base) = static_cast(Result); }); - } - } - - Dst.initializeAllElements(); - - return true; -} static bool interp__builtin_ia32_shuffle_generic( InterpState &S, CodePtr OpPC, const CallExpr *Call, diff --git a/clang/lib/Headers/avx512dqintrin.h b/clang/lib/Headers/avx512dqintrin.h index 6fdbf511b14da..f200b22f27ee1 100644 --- a/clang/lib/Headers/avx512dqintrin.h +++ b/clang/lib/Headers/avx512dqintrin.h @@ -1046,8 +1046,8 @@ _mm512_maskz_cvtepu64_ps (__mmask8 __U, __m512i __A) { (__v2df)_mm_setzero_pd(), \ (__mmask8)(U), (int)(C), (int)(R))) -static __inline__ __mmask16 __DEFAULT_FN_ATTRS512_CONSTEXPR -_mm512_movepi32_mask(__m512i __A) { +static __inline__ __mmask16 + __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_movepi32_mask(__m512i __A) { return (__mmask16) __builtin_ia32_cvtd2mask512 ((__v16si) __A); } diff --git a/clang/lib/Headers/avx512vldqintrin.h b/clang/lib/Headers/avx512vldqintrin.h index 8014ee4bf555a..c956aeb7d03a4 100644 --- a/clang/lib/Headers/avx512vldqintrin.h +++ b/clang/lib/Headers/avx512vldqintrin.h @@ -914,8 +914,8 @@ _mm256_maskz_cvtepu64_ps(__mmask8 __U, __m256i __A) { (__v8sf)_mm256_setzero_ps(), \ (__mmask8)(U))) -static __inline__ __mmask8 __DEFAULT_FN_ATTRS128_CONSTEXPR -_mm_movepi32_mask(__m128i __A) { +static __inline__ __mmask8 + __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_movepi32_mask(__m128i __A) { return (__mmask8) __builtin_ia32_cvtd2mask128 ((__v4si) __A); } From b835f2aa438cbf98fe6bf94602735f98e628e9e9 Mon Sep 17 00:00:00 2001 From: sskzakaria Date: Tue, 18 Nov 2025 14:38:47 -0500 Subject: [PATCH 07/13] mend --- clang/lib/AST/ByteCode/InterpBuiltin.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index b780a911f748b..54264d2826304 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -3380,7 +3380,6 @@ static bool interp__builtin_ia32_cvt_mask(InterpState &S, CodePtr OpPC, return true; } - static bool interp__builtin_ia32_shuffle_generic( InterpState &S, CodePtr OpPC, const CallExpr *Call, llvm::function_ref(unsigned, unsigned)> From a7d36a3c9b681110ad8ee494b2a94ea9407620d9 Mon Sep 17 00:00:00 2001 From: sskzakaria Date: Tue, 18 Nov 2025 15:13:14 -0500 Subject: [PATCH 08/13] fixing build error --- clang/lib/AST/ExprConstant.cpp | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index 5469461669ba4..a627932266763 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -16588,12 +16588,16 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E, return false; unsigned VectorLen = Vec.getVectorLength(); - APSInt RetMask(llvm::APInt(VectorLen, 0), /*isUnsigned=*/true); + unsigned RetWidth = Info.Ctx.getIntWidth(E->getType()); + llvm::APInt Bits(RetWidth, 0); + for (unsigned ElemNum = 0; ElemNum < VectorLen; ++ElemNum) { const APSInt &A = Vec.getVectorElt(ElemNum).getInt(); unsigned MSB = A[A.getBitWidth() - 1]; - RetMask.setBitVal(ElemNum, MSB); + Bits.setBitVal(ElemNum, MSB); } + + APSInt RetMask(Bits, /*isUnsigned=*/true); return Success(APValue(RetMask), E); } From 519d31fbf2bb17ed7039fc556c8fc62d208f4f3d Mon Sep 17 00:00:00 2001 From: sskzakaria Date: Tue, 18 Nov 2025 17:07:50 -0500 Subject: [PATCH 09/13] minior change --- clang/lib/AST/ByteCode/InterpBuiltin.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index 54264d2826304..09f55ed3a4a62 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -3361,7 +3361,7 @@ static bool interp__builtin_ia32_vpconflict(InterpState &S, CodePtr OpPC, return true; } -static bool interp__builtin_ia32_cvt_mask(InterpState &S, CodePtr OpPC, +static bool interp__builtin_ia32_cvt_vec2mask(InterpState &S, CodePtr OpPC, const CallExpr *Call, unsigned ID) { assert(Call->getNumArgs() == 1); @@ -4790,7 +4790,7 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, case X86::BI__builtin_ia32_cvtq2mask128: case X86::BI__builtin_ia32_cvtq2mask256: case X86::BI__builtin_ia32_cvtq2mask512: - return interp__builtin_ia32_cvt_mask(S, OpPC, Call, BuiltinID); + return interp__builtin_ia32_cvt_vec2mask(S, OpPC, Call, BuiltinID); case X86::BI__builtin_ia32_cmpb128_mask: case X86::BI__builtin_ia32_cmpw128_mask: From 409da23d594ab29dbeba322389ff25acd886c140 Mon Sep 17 00:00:00 2001 From: sskzakaria Date: Tue, 18 Nov 2025 17:08:26 -0500 Subject: [PATCH 10/13] mend --- clang/lib/AST/ByteCode/InterpBuiltin.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index 09f55ed3a4a62..cef444ea247c9 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -3362,7 +3362,8 @@ static bool interp__builtin_ia32_vpconflict(InterpState &S, CodePtr OpPC, } static bool interp__builtin_ia32_cvt_vec2mask(InterpState &S, CodePtr OpPC, - const CallExpr *Call, unsigned ID) { + const CallExpr *Call, + unsigned ID) { assert(Call->getNumArgs() == 1); const Pointer &Vec = S.Stk.pop(); From c9bd48b18cd297c7c65d4baff2c6575d750d57ba Mon Sep 17 00:00:00 2001 From: sskzakaria Date: Wed, 19 Nov 2025 19:00:05 -0500 Subject: [PATCH 11/13] added RetWidth --- clang/lib/AST/ByteCode/InterpBuiltin.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index cef444ea247c9..db66bc2409dd0 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -3367,7 +3367,9 @@ static bool interp__builtin_ia32_cvt_vec2mask(InterpState &S, CodePtr OpPC, assert(Call->getNumArgs() == 1); const Pointer &Vec = S.Stk.pop(); - APInt RetMask(Vec.getNumElems(), 0); + unsigned RetWidth = S.getASTContext().getIntWidth(Call->getType()); + APInt RetMask(RetWidth, 0); + unsigned VectorLen = Vec.getNumElems(); PrimType ElemT = Vec.getFieldDesc()->getPrimType(); From 32a7fb23c67089dcfc6af8ff1cfc120acba09291 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Thu, 20 Nov 2025 09:52:41 +0000 Subject: [PATCH 12/13] Update clang/lib/AST/ByteCode/InterpBuiltin.cpp style fix Co-authored-by: Timm Baeder --- clang/lib/AST/ByteCode/InterpBuiltin.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index 57a8e57bda36c..eba71d66bc4d6 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -3374,7 +3374,7 @@ static bool interp__builtin_ia32_cvt_vec2mask(InterpState &S, CodePtr OpPC, unsigned VectorLen = Vec.getNumElems(); PrimType ElemT = Vec.getFieldDesc()->getPrimType(); - for (unsigned ElemNum = 0; ElemNum < VectorLen; ++ElemNum) { + for (unsigned ElemNum = 0; ElemNum != VectorLen; ++ElemNum) { APSInt A; INT_TYPE_SWITCH_NO_BOOL(ElemT, { A = Vec.elem(ElemNum).toAPSInt(); }); unsigned MSB = A[A.getBitWidth() - 1]; From 2266db0c92300573fed9a27a9d74bab9b69cec40 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Thu, 20 Nov 2025 10:52:25 +0000 Subject: [PATCH 13/13] Apply suggestion from @RKSimon style fix --- clang/lib/AST/ExprConstant.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index f26124feabe00..ce5301f17b3e7 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -16614,7 +16614,7 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E, unsigned RetWidth = Info.Ctx.getIntWidth(E->getType()); llvm::APInt Bits(RetWidth, 0); - for (unsigned ElemNum = 0; ElemNum < VectorLen; ++ElemNum) { + for (unsigned ElemNum = 0; ElemNum != VectorLen; ++ElemNum) { const APSInt &A = Vec.getVectorElt(ElemNum).getInt(); unsigned MSB = A[A.getBitWidth() - 1]; Bits.setBitVal(ElemNum, MSB);