@@ -92,6 +92,231 @@ SIMDE_BEGIN_DECLS_
9292 #define _SIDD_UNIT_MASK SIMDE_SIDD_UNIT_MASK
9393#endif
9494
95+ SIMDE_FUNCTION_ATTRIBUTES
96+ int
97+ simde_mm_cmpestra_8_ (simde__m128i a , int la , simde__m128i b , int lb , const int imm8 )
98+ SIMDE_REQUIRE_CONSTANT_RANGE (imm8 , 0 , 255 ) {
99+ const int cmp_op = imm8 & 0x06 ;
100+ const int polarity = imm8 & 0x30 ;
101+ simde__m128i_private
102+ bool_res_ = simde__m128i_to_private (simde_mm_setzero_si128 ()),
103+ a_ = simde__m128i_to_private (a ),
104+ b_ = simde__m128i_to_private (b );
105+ const int upper_bound = (128 / 8 ) - 1 ;
106+ int a_invalid = 0 ;
107+ int b_invalid = 0 ;
108+ for (int i = 0 ; i < upper_bound ; i ++ ) {
109+ for (int j = 0 ; j < upper_bound ; j ++ ){
110+ int bitvalue = ((a_ .i8 [i ] == b_ .i8 [j ]) ? 1 : 0 );
111+ if (i == la )
112+ a_invalid = 1 ;
113+ if (j == lb )
114+ b_invalid = 1 ;
115+ switch (cmp_op ){
116+ case SIMDE_SIDD_CMP_EQUAL_ANY :
117+ bitvalue = 0 ;
118+ break ;
119+ case SIMDE_SIDD_CMP_RANGES :
120+ bitvalue = 0 ;
121+ break ;
122+ case SIMDE_SIDD_CMP_EQUAL_EACH :
123+ if (a_invalid && b_invalid )
124+ bitvalue = 1 ;
125+ else
126+ bitvalue = 0 ;
127+ break ;
128+ case SIMDE_SIDD_CMP_EQUAL_ORDERED :
129+ if (a_invalid && !b_invalid )
130+ bitvalue = 1 ;
131+ else if (a_invalid && b_invalid )
132+ bitvalue = 1 ;
133+ else
134+ bitvalue = 0 ;
135+ break ;
136+ }
137+ bool_res_ .i8 [i ] |= (bitvalue << j );
138+ }
139+ }
140+ int32_t int_res_1 = 0 ;
141+ int32_t int_res_2 = 0 ;
142+ switch (cmp_op ) {
143+ case SIMDE_SIDD_CMP_EQUAL_ANY :
144+ for (int i = 0 ; i < upper_bound ; i ++ ){
145+ SIMDE_VECTORIZE_REDUCTION (|:int_res_1 )
146+ for (int j = 0 ; j < upper_bound ; j ++ ){
147+ int_res_1 |= (((bool_res_ .i8 [i ] >> j ) & 1 ) << i );
148+ }
149+ }
150+ break ;
151+ case SIMDE_SIDD_CMP_RANGES :
152+ for (int i = 0 ; i < upper_bound ; i ++ ){
153+ SIMDE_VECTORIZE_REDUCTION (|:int_res_1 )
154+ for (int j = 0 ; j < upper_bound ; j ++ ){
155+ int_res_1 |= ((((bool_res_ .i8 [i ] >> j ) & 1 ) & ((bool_res_ .i8 [i ] >> (j + 1 )) & 1 )) << i );
156+ j += 2 ;
157+ }
158+ }
159+ break ;
160+ case SIMDE_SIDD_CMP_EQUAL_EACH :
161+ for (int i = 0 ; i < upper_bound ; i ++ ){
162+ SIMDE_VECTORIZE_REDUCTION (|:int_res_1 )
163+ for (int j = 0 ; j < upper_bound ; j ++ ){
164+ int_res_1 |= (((bool_res_ .i8 [i ] >> i ) & 1 ) << i );
165+ }
166+ }
167+ break ;
168+ case SIMDE_SIDD_CMP_EQUAL_ORDERED :
169+ int_res_1 = 0xff ;
170+ for (int i = 0 ; i < upper_bound ; i ++ ){
171+ int k = i ;
172+ SIMDE_VECTORIZE_REDUCTION (& :int_res_1 )
173+ for (int j = 0 ; j < (upper_bound - i ) ; j ++ ){
174+ int_res_1 &= (((bool_res_ .i8 [k ] >> j ) & 1 ) << i ) ;
175+ k += 1 ;
176+ }
177+ }
178+ break ;
179+ }
180+ for (int i = 0 ; i < upper_bound ; i ++ ){
181+ if (polarity & 1 ){
182+ if ((polarity >> 1 ) & 1 ) {
183+ if (i >= lb ) {
184+ int_res_2 |= (((int_res_1 >> i ) & 1 ) << i );
185+ }
186+ else {
187+ int_res_2 |= ((((int_res_1 >> i ) & 1 ) ^ (-1 )) << i );
188+ }
189+ }
190+ else {
191+ int_res_2 |= ((((int_res_1 >> i ) & 1 ) ^ (-1 )) << i );
192+ }
193+ }
194+ else {
195+ int_res_2 |= ( ((int_res_1 >> i ) & 1 ) << i );
196+ }
197+ }
198+ return !int_res_2 & (lb > upper_bound );
199+ }
200+
201+ SIMDE_FUNCTION_ATTRIBUTES
202+ int
203+ simde_mm_cmpestra_16_ (simde__m128i a , int la , simde__m128i b , int lb , const int imm8 )
204+ SIMDE_REQUIRE_CONSTANT_RANGE (imm8 , 0 , 255 ) {
205+ const int cmp_op = imm8 & 0x06 ;
206+ const int polarity = imm8 & 0x30 ;
207+ simde__m128i_private
208+ bool_res_ = simde__m128i_to_private (simde_mm_setzero_si128 ()),
209+ a_ = simde__m128i_to_private (a ),
210+ b_ = simde__m128i_to_private (b );
211+ const int upper_bound = (128 / 16 ) - 1 ;
212+ int a_invalid = 0 ;
213+ int b_invalid = 0 ;
214+ for (int i = 0 ; i < upper_bound ; i ++ ) {
215+ for (int j = 0 ; j < upper_bound ; j ++ )
216+ {
217+ int bitvalue = ((a_ .i16 [i ] == b_ .i16 [j ]) ? 1 : 0 );
218+ if (i == la )
219+ a_invalid = 1 ;
220+ if (j == lb )
221+ b_invalid = 1 ;
222+ switch (cmp_op ){
223+ case SIMDE_SIDD_CMP_EQUAL_ANY :
224+ bitvalue = 0 ;
225+ break ;
226+ case SIMDE_SIDD_CMP_RANGES :
227+ bitvalue = 0 ;
228+ break ;
229+ case SIMDE_SIDD_CMP_EQUAL_EACH :
230+ if (a_invalid && b_invalid )
231+ bitvalue = 1 ;
232+ else
233+ bitvalue = 0 ;
234+ break ;
235+ case SIMDE_SIDD_CMP_EQUAL_ORDERED :
236+ if (a_invalid && !b_invalid )
237+ bitvalue = 1 ;
238+ else if (a_invalid && b_invalid )
239+ bitvalue = 1 ;
240+ else
241+ bitvalue = 0 ;
242+ break ;
243+ }
244+ bool_res_ .i16 [i ] |= (bitvalue << j );
245+ }
246+ }
247+ int32_t int_res_1 = 0 ;
248+ int32_t int_res_2 = 0 ;
249+ switch (cmp_op ) {
250+ case SIMDE_SIDD_CMP_EQUAL_ANY :
251+ for (int i = 0 ; i < upper_bound ; i ++ ){
252+ SIMDE_VECTORIZE_REDUCTION (|:int_res_1 )
253+ for (int j = 0 ; j < upper_bound ; j ++ ){
254+ int_res_1 |= (((bool_res_ .i16 [i ] >> j ) & 1 ) << i ) ;
255+ }
256+ }
257+ break ;
258+ case SIMDE_SIDD_CMP_RANGES :
259+ for (int i = 0 ; i < upper_bound ; i ++ ){
260+ SIMDE_VECTORIZE_REDUCTION (|:int_res_1 )
261+ for (int j = 0 ; j < upper_bound ; j ++ ){
262+ int_res_1 |= ((((bool_res_ .i16 [i ] >> j ) & 1 ) & ((bool_res_ .i16 [i ] >> (j + 1 )) & 1 )) << i );
263+ j += 2 ;
264+ }
265+ }
266+ break ;
267+ case SIMDE_SIDD_CMP_EQUAL_EACH :
268+ for (int i = 0 ; i < upper_bound ; i ++ ){
269+ SIMDE_VECTORIZE_REDUCTION (|:int_res_1 )
270+ for (int j = 0 ; j < upper_bound ; j ++ ){
271+ int_res_1 |= (((bool_res_ .i16 [i ] >> i ) & 1 ) << i );
272+ }
273+ }
274+ break ;
275+ case SIMDE_SIDD_CMP_EQUAL_ORDERED :
276+ int_res_1 = 0xffff ;
277+ for (int i = 0 ; i < upper_bound ; i ++ ){
278+ int k = i ;
279+ SIMDE_VECTORIZE_REDUCTION (& :int_res_1 )
280+ for (int j = 0 ; j < (upper_bound - i ) ; j ++ ){
281+ int_res_1 &= (((bool_res_ .i16 [k ] >> j ) & 1 ) << i ) ;
282+ k += 1 ;
283+ }
284+ }
285+ break ;
286+ }
287+ for (int i = 0 ; i < upper_bound ; i ++ ){
288+ if (polarity & 1 ){
289+ if ((polarity >> 1 ) & 1 ) {
290+ if (i >= lb ) {
291+ int_res_2 |= (((int_res_1 >> i ) & 1 ) << i );
292+ }
293+ else {
294+ int_res_2 |= ((((int_res_1 >> i ) & 1 ) ^ (-1 )) << i );
295+ }
296+ }
297+ else {
298+ int_res_2 |= ((((int_res_1 >> i ) & 1 ) ^ (-1 )) << i );
299+ }
300+ }
301+ else {
302+ int_res_2 |= (((int_res_1 >> i ) & 1 ) << i );
303+ }
304+ }
305+ return !int_res_2 & (lb > upper_bound );
306+ }
307+
308+ #if defined(SIMDE_X86_SSE4_2_NATIVE )
309+ #define simde_mm_cmpestra (a , la , b , lb , imm8 ) _mm_cmpestra(a, la, b, lb, imm8)
310+ #else
311+ #define simde_mm_cmpestra (a , la , b , lb , imm8 ) \
312+ (((imm8) & SIMDE_SIDD_UWORD_OPS) \
313+ ? simde_mm_cmpestra_16_((a), (la), (b), (lb), (imm8)) \
314+ : simde_mm_cmpestra_8_((a), (la), (b), (lb), (imm8)))
315+ #endif
316+ #if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES )
317+ #define _mm_cmpestra (a , la , b , lb , imm8 ) simde_mm_cmpestra(a, la, b, lb, imm8)
318+ #endif
319+
95320SIMDE_FUNCTION_ATTRIBUTES
96321int simde_mm_cmpestrs (simde__m128i a , int la , simde__m128i b , int lb , const int imm8 )
97322 SIMDE_REQUIRE_CONSTANT_RANGE (imm8 , 0 , 127 ) {
0 commit comments