@@ -93,6 +93,243 @@ SIMDE_BEGIN_DECLS_
9393 #define _SIDD_UNIT_MASK SIMDE_SIDD_UNIT_MASK
9494#endif
9595
96+ SIMDE_FUNCTION_ATTRIBUTES
97+ int
98+ simde_mm_cmpestra_8_ (simde__m128i a , int la , simde__m128i b , int lb , const int imm8 )
99+ SIMDE_REQUIRE_CONSTANT_RANGE (imm8 , 0 , 255 ) {
100+ const int cmp_op = imm8 & 0x0c ;
101+ const int polarity = imm8 & 0x30 ;
102+ simde__m128i_private
103+ bool_res_ = simde__m128i_to_private (simde_mm_setzero_si128 ()),
104+ a_ = simde__m128i_to_private (a ),
105+ b_ = simde__m128i_to_private (b );
106+ const int upper_bound = (128 / 8 ) - 1 ;
107+ int a_invalid = 0 ;
108+ int b_invalid = 0 ;
109+ for (int i = 0 ; i <= upper_bound ; i ++ ) {
110+ for (int j = 0 ; j <= upper_bound ; j ++ ){
111+ int bitvalue = ((a_ .i8 [i ] == b_ .i8 [j ]) ? 1 : 0 );
112+ if (i == la )
113+ a_invalid = 1 ;
114+ if (j == lb )
115+ b_invalid = 1 ;
116+ switch (cmp_op ){
117+ case SIMDE_SIDD_CMP_EQUAL_ANY :
118+ case SIMDE_SIDD_CMP_RANGES :
119+ if (!a_invalid && !b_invalid );
120+ else
121+ bitvalue = 0 ;
122+ break ;
123+ case SIMDE_SIDD_CMP_EQUAL_EACH :
124+ if (!a_invalid && !b_invalid );
125+ else if (a_invalid && b_invalid )
126+ bitvalue = 1 ;
127+ else
128+ bitvalue = 0 ;
129+ break ;
130+ case SIMDE_SIDD_CMP_EQUAL_ORDERED :
131+ if (!a_invalid && !b_invalid );
132+ else if (a_invalid && !b_invalid )
133+ bitvalue = 1 ;
134+ else if (a_invalid && b_invalid )
135+ bitvalue = 1 ;
136+ else
137+ bitvalue = 0 ;
138+ break ;
139+ }
140+ bool_res_ .i8 [i ] |= (bitvalue << j );
141+ }
142+ }
143+ int32_t int_res_1 = 0 ;
144+ int32_t int_res_2 = 0 ;
145+ switch (cmp_op ) {
146+ case SIMDE_SIDD_CMP_EQUAL_ANY :
147+ for (int i = 0 ; i <= upper_bound ; i ++ ){
148+ SIMDE_VECTORIZE_REDUCTION (|:int_res_1 )
149+ for (int j = 0 ; j <= upper_bound ; j ++ ){
150+ int_res_1 |= (((bool_res_ .i8 [i ] >> j ) & 1 ) << i );
151+ }
152+ }
153+ break ;
154+ case SIMDE_SIDD_CMP_RANGES :
155+ for (int i = 0 ; i <= upper_bound ; i ++ ){
156+ SIMDE_VECTORIZE_REDUCTION (|:int_res_1 )
157+ for (int j = 0 ; j <= upper_bound ; j ++ ){
158+ int_res_1 |= ((((bool_res_ .i8 [i ] >> j ) & 1 ) & ((bool_res_ .i8 [i ] >> (j + 1 )) & 1 )) << i );
159+ j += 2 ;
160+ }
161+ }
162+ break ;
163+ case SIMDE_SIDD_CMP_EQUAL_EACH :
164+ SIMDE_VECTORIZE_REDUCTION (|:int_res_1 )
165+ for (int i = 0 ; i <= upper_bound ; i ++ ){
166+ int_res_1 |= (((bool_res_ .i8 [i ] >> i ) & 1 ) << i );
167+ }
168+ break ;
169+ case SIMDE_SIDD_CMP_EQUAL_ORDERED :
170+ int_res_1 = 0xff ;
171+ for (int i = 0 ; i <= upper_bound ; i ++ ){
172+ int k = i ;
173+ HEDLEY_DIAGNOSTIC_PUSH
174+ #if defined(SIMDE_BUG_CLANG_45959 )
175+ #pragma clang diagnostic ignored "-Wsign-conversion"
176+ #endif
177+ SIMDE_VECTORIZE_REDUCTION (& :int_res_1 )
178+ for (int j = 0 ; j <= (upper_bound - i ) ; j ++ ){
179+ int_res_1 &= (((bool_res_ .i8 [k ] >> j ) & 1 ) << i ) ;
180+ k += 1 ;
181+ }
182+ HEDLEY_DIAGNOSTIC_POP
183+ }
184+ break ;
185+ }
186+ for (int i = 0 ; i <= upper_bound ; i ++ ){
187+ if (polarity & SIMDE_SIDD_NEGATIVE_POLARITY ){
188+ if (polarity & SIMDE_SIDD_MASKED_POSITIVE_POLARITY ) {
189+ if (i >= lb ) {
190+ int_res_2 |= (((int_res_1 >> i ) & 1 ) << i );
191+ }
192+ else {
193+ int_res_2 |= ((((int_res_1 >> i ) & 1 ) ^ (-1 )) << i );
194+ }
195+ }
196+ else {
197+ int_res_2 |= ((((int_res_1 >> i ) & 1 ) ^ (-1 )) << i );
198+ }
199+ }
200+ else {
201+ int_res_2 |= ( ((int_res_1 >> i ) & 1 ) << i );
202+ }
203+ }
204+ return !int_res_2 & (lb > upper_bound );
205+ }
206+
207+ SIMDE_FUNCTION_ATTRIBUTES
208+ int
209+ simde_mm_cmpestra_16_ (simde__m128i a , int la , simde__m128i b , int lb , const int imm8 )
210+ SIMDE_REQUIRE_CONSTANT_RANGE (imm8 , 0 , 255 ) {
211+ const int cmp_op = imm8 & 0x0c ;
212+ const int polarity = imm8 & 0x30 ;
213+ simde__m128i_private
214+ bool_res_ = simde__m128i_to_private (simde_mm_setzero_si128 ()),
215+ a_ = simde__m128i_to_private (a ),
216+ b_ = simde__m128i_to_private (b );
217+ const int upper_bound = (128 / 16 ) - 1 ;
218+ int a_invalid = 0 ;
219+ int b_invalid = 0 ;
220+ for (int i = 0 ; i <= upper_bound ; i ++ ) {
221+ for (int j = 0 ; j <= upper_bound ; j ++ )
222+ {
223+ int bitvalue = ((a_ .i16 [i ] == b_ .i16 [j ]) ? 1 : 0 );
224+ a_invalid = 0 ;
225+ b_invalid = 0 ;
226+ if (i >= la )
227+ a_invalid = 1 ;
228+ if (j >= lb )
229+ b_invalid = 1 ;
230+ switch (cmp_op ){
231+ case SIMDE_SIDD_CMP_EQUAL_ANY :
232+ case SIMDE_SIDD_CMP_RANGES :
233+ if (!a_invalid && !b_invalid );
234+ else
235+ bitvalue = 0 ;
236+ break ;
237+ case SIMDE_SIDD_CMP_EQUAL_EACH :
238+ if (!a_invalid && !b_invalid );
239+ else if (a_invalid && b_invalid )
240+ bitvalue = 1 ;
241+ else
242+ bitvalue = 0 ;
243+ break ;
244+ case SIMDE_SIDD_CMP_EQUAL_ORDERED :
245+ if (!a_invalid && !b_invalid );
246+ else if (a_invalid && !b_invalid )
247+ bitvalue = 1 ;
248+ else if (a_invalid && b_invalid )
249+ bitvalue = 1 ;
250+ else
251+ bitvalue = 0 ;
252+ break ;
253+ }
254+ bool_res_ .i16 [i ] |= (bitvalue << j );
255+ }
256+ }
257+ int32_t int_res_1 = 0 ;
258+ int32_t int_res_2 = 0 ;
259+ switch (cmp_op ) {
260+ case SIMDE_SIDD_CMP_EQUAL_ANY :
261+ for (int i = 0 ; i <= upper_bound ; i ++ ){
262+ SIMDE_VECTORIZE_REDUCTION (|:int_res_1 )
263+ for (int j = 0 ; j <= upper_bound ; j ++ ){
264+ int_res_1 |= (((bool_res_ .i16 [i ] >> j ) & 1 ) << i ) ;
265+ }
266+ }
267+ break ;
268+ case SIMDE_SIDD_CMP_RANGES :
269+ for (int i = 0 ; i <= upper_bound ; i ++ ){
270+ SIMDE_VECTORIZE_REDUCTION (|:int_res_1 )
271+ for (int j = 0 ; j <= upper_bound ; j ++ ){
272+ int_res_1 |= ((((bool_res_ .i16 [i ] >> j ) & 1 ) & ((bool_res_ .i16 [i ] >> (j + 1 )) & 1 )) << i );
273+ j += 2 ;
274+ }
275+ }
276+ break ;
277+ case SIMDE_SIDD_CMP_EQUAL_EACH :
278+ SIMDE_VECTORIZE_REDUCTION (|:int_res_1 )
279+ for (int i = 0 ; i <= upper_bound ; i ++ ){
280+ int_res_1 |= (((bool_res_ .i16 [i ] >> i ) & 1 ) << i );
281+ }
282+ break ;
283+ case SIMDE_SIDD_CMP_EQUAL_ORDERED :
284+ int_res_1 = 0xffff ;
285+ for (int i = 0 ; i <= upper_bound ; i ++ ){
286+ int k = i ;
287+ HEDLEY_DIAGNOSTIC_PUSH
288+ #if defined(SIMDE_BUG_CLANG_45959 )
289+ #pragma clang diagnostic ignored "-Wsign-conversion"
290+ #endif
291+ SIMDE_VECTORIZE_REDUCTION (& :int_res_1 )
292+ for (int j = 0 ; j <= (upper_bound - i ) ; j ++ ){
293+ int_res_1 &= (((bool_res_ .i16 [k ] >> j ) & 1 ) << i ) ;
294+ k += 1 ;
295+ }
296+ HEDLEY_DIAGNOSTIC_POP
297+ }
298+ break ;
299+ }
300+ for (int i = 0 ; i <= upper_bound ; i ++ ){
301+ if (polarity & SIMDE_SIDD_NEGATIVE_POLARITY ){
302+ if (polarity & SIMDE_SIDD_MASKED_POSITIVE_POLARITY ) {
303+ if (i >= lb ) {
304+ int_res_2 |= (((int_res_1 >> i ) & 1 ) << i );
305+ }
306+ else {
307+ int_res_2 |= ((((int_res_1 >> i ) & 1 ) ^ (-1 )) << i );
308+ }
309+ }
310+ else {
311+ int_res_2 |= ((((int_res_1 >> i ) & 1 ) ^ (-1 )) << i );
312+ }
313+ }
314+ else {
315+ int_res_2 |= (((int_res_1 >> i ) & 1 ) << i );
316+ }
317+ }
318+ return !int_res_2 & (lb > upper_bound );
319+ }
320+
321+ #if defined(SIMDE_X86_SSE4_2_NATIVE )
322+ #define simde_mm_cmpestra (a , la , b , lb , imm8 ) _mm_cmpestra(a, la, b, lb, imm8)
323+ #else
324+ #define simde_mm_cmpestra (a , la , b , lb , imm8 ) \
325+ (((imm8) & SIMDE_SIDD_UWORD_OPS) \
326+ ? simde_mm_cmpestra_16_((a), (la), (b), (lb), (imm8)) \
327+ : simde_mm_cmpestra_8_((a), (la), (b), (lb), (imm8)))
328+ #endif
329+ #if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES )
330+ #define _mm_cmpestra (a , la , b , lb , imm8 ) simde_mm_cmpestra(a, la, b, lb, imm8)
331+ #endif
332+
96333SIMDE_FUNCTION_ATTRIBUTES
97334int simde_mm_cmpestrs (simde__m128i a , int la , simde__m128i b , int lb , const int imm8 )
98335 SIMDE_REQUIRE_CONSTANT_RANGE (imm8 , 0 , 127 ) {
0 commit comments