@@ -92,6 +92,227 @@ SIMDE_BEGIN_DECLS_
9292 #define _SIDD_UNIT_MASK SIMDE_SIDD_UNIT_MASK
9393#endif
9494
95+ SIMDE_FUNCTION_ATTRIBUTES
96+ int
97+ simde_mm_cmpestra_8_ (simde__m128i a , int la , simde__m128i b , int lb , const int imm8 )
98+ SIMDE_REQUIRE_CONSTANT_RANGE (imm8 , 0 , 255 ) {
99+ const int cmp_op = imm8 & 0x0c ;
100+ const int polarity = imm8 & 0x30 ;
101+ simde__m128i_private
102+ bool_res_ = simde__m128i_to_private (simde_mm_setzero_si128 ()),
103+ a_ = simde__m128i_to_private (a ),
104+ b_ = simde__m128i_to_private (b );
105+ const int upper_bound = (128 / 8 ) - 1 ;
106+ int a_invalid = 0 ;
107+ int b_invalid = 0 ;
108+ for (int i = 0 ; i < upper_bound ; i ++ ) {
109+ for (int j = 0 ; j < upper_bound ; j ++ ){
110+ int bitvalue = ((a_ .i8 [i ] == b_ .i8 [j ]) ? 1 : 0 );
111+ if (i == la )
112+ a_invalid = 1 ;
113+ if (j == lb )
114+ b_invalid = 1 ;
115+ switch (cmp_op ){
116+ case SIMDE_SIDD_CMP_EQUAL_ANY :
117+ case SIMDE_SIDD_CMP_RANGES :
118+ bitvalue = 0 ;
119+ break ;
120+ case SIMDE_SIDD_CMP_EQUAL_EACH :
121+ if (a_invalid && b_invalid )
122+ bitvalue = 1 ;
123+ else
124+ bitvalue = 0 ;
125+ break ;
126+ case SIMDE_SIDD_CMP_EQUAL_ORDERED :
127+ if (a_invalid && !b_invalid )
128+ bitvalue = 1 ;
129+ else if (a_invalid && b_invalid )
130+ bitvalue = 1 ;
131+ else
132+ bitvalue = 0 ;
133+ break ;
134+ }
135+ bool_res_ .i8 [i ] |= (bitvalue << j );
136+ }
137+ }
138+ int32_t int_res_1 = 0 ;
139+ int32_t int_res_2 = 0 ;
140+ switch (cmp_op ) {
141+ case SIMDE_SIDD_CMP_EQUAL_ANY :
142+ for (int i = 0 ; i < upper_bound ; i ++ ){
143+ SIMDE_VECTORIZE_REDUCTION (|:int_res_1 )
144+ for (int j = 0 ; j < upper_bound ; j ++ ){
145+ int_res_1 |= (((bool_res_ .i8 [i ] >> j ) & 1 ) << i );
146+ }
147+ }
148+ break ;
149+ case SIMDE_SIDD_CMP_RANGES :
150+ for (int i = 0 ; i < upper_bound ; i ++ ){
151+ SIMDE_VECTORIZE_REDUCTION (|:int_res_1 )
152+ for (int j = 0 ; j < upper_bound ; j ++ ){
153+ int_res_1 |= ((((bool_res_ .i8 [i ] >> j ) & 1 ) & ((bool_res_ .i8 [i ] >> (j + 1 )) & 1 )) << i );
154+ j += 2 ;
155+ }
156+ }
157+ break ;
158+ case SIMDE_SIDD_CMP_EQUAL_EACH :
159+ for (int i = 0 ; i < upper_bound ; i ++ ){
160+ SIMDE_VECTORIZE_REDUCTION (|:int_res_1 )
161+ for (int j = 0 ; j < upper_bound ; j ++ ){
162+ int_res_1 |= (((bool_res_ .i8 [i ] >> i ) & 1 ) << i );
163+ }
164+ }
165+ break ;
166+ case SIMDE_SIDD_CMP_EQUAL_ORDERED :
167+ int_res_1 = 0xff ;
168+ for (int i = 0 ; i < upper_bound ; i ++ ){
169+ int k = i ;
170+ SIMDE_VECTORIZE_REDUCTION (& :int_res_1 )
171+ for (int j = 0 ; j < (upper_bound - i ) ; j ++ ){
172+ int_res_1 &= (((bool_res_ .i8 [k ] >> j ) & 1 ) << i ) ;
173+ k += 1 ;
174+ }
175+ }
176+ break ;
177+ }
178+ for (int i = 0 ; i < upper_bound ; i ++ ){
179+ if (polarity & 1 ){
180+ if ((polarity >> 1 ) & 1 ) {
181+ if (i >= lb ) {
182+ int_res_2 |= (((int_res_1 >> i ) & 1 ) << i );
183+ }
184+ else {
185+ int_res_2 |= ((((int_res_1 >> i ) & 1 ) ^ (-1 )) << i );
186+ }
187+ }
188+ else {
189+ int_res_2 |= ((((int_res_1 >> i ) & 1 ) ^ (-1 )) << i );
190+ }
191+ }
192+ else {
193+ int_res_2 |= ( ((int_res_1 >> i ) & 1 ) << i );
194+ }
195+ }
196+ return !int_res_2 & (lb > upper_bound );
197+ }
198+
199+ SIMDE_FUNCTION_ATTRIBUTES
200+ int
201+ simde_mm_cmpestra_16_ (simde__m128i a , int la , simde__m128i b , int lb , const int imm8 )
202+ SIMDE_REQUIRE_CONSTANT_RANGE (imm8 , 0 , 255 ) {
203+ const int cmp_op = imm8 & 0x0c ;
204+ const int polarity = imm8 & 0x30 ;
205+ simde__m128i_private
206+ bool_res_ = simde__m128i_to_private (simde_mm_setzero_si128 ()),
207+ a_ = simde__m128i_to_private (a ),
208+ b_ = simde__m128i_to_private (b );
209+ const int upper_bound = (128 / 16 ) - 1 ;
210+ int a_invalid = 0 ;
211+ int b_invalid = 0 ;
212+ for (int i = 0 ; i < upper_bound ; i ++ ) {
213+ for (int j = 0 ; j < upper_bound ; j ++ )
214+ {
215+ int bitvalue = ((a_ .i16 [i ] == b_ .i16 [j ]) ? 1 : 0 );
216+ if (i == la )
217+ a_invalid = 1 ;
218+ if (j == lb )
219+ b_invalid = 1 ;
220+ switch (cmp_op ){
221+ case SIMDE_SIDD_CMP_EQUAL_ANY :
222+ case SIMDE_SIDD_CMP_RANGES :
223+ bitvalue = 0 ;
224+ break ;
225+ case SIMDE_SIDD_CMP_EQUAL_EACH :
226+ if (a_invalid && b_invalid )
227+ bitvalue = 1 ;
228+ else
229+ bitvalue = 0 ;
230+ break ;
231+ case SIMDE_SIDD_CMP_EQUAL_ORDERED :
232+ if (a_invalid && !b_invalid )
233+ bitvalue = 1 ;
234+ else if (a_invalid && b_invalid )
235+ bitvalue = 1 ;
236+ else
237+ bitvalue = 0 ;
238+ break ;
239+ }
240+ bool_res_ .i16 [i ] |= (bitvalue << j );
241+ }
242+ }
243+ int32_t int_res_1 = 0 ;
244+ int32_t int_res_2 = 0 ;
245+ switch (cmp_op ) {
246+ case SIMDE_SIDD_CMP_EQUAL_ANY :
247+ for (int i = 0 ; i < upper_bound ; i ++ ){
248+ SIMDE_VECTORIZE_REDUCTION (|:int_res_1 )
249+ for (int j = 0 ; j < upper_bound ; j ++ ){
250+ int_res_1 |= (((bool_res_ .i16 [i ] >> j ) & 1 ) << i ) ;
251+ }
252+ }
253+ break ;
254+ case SIMDE_SIDD_CMP_RANGES :
255+ for (int i = 0 ; i < upper_bound ; i ++ ){
256+ SIMDE_VECTORIZE_REDUCTION (|:int_res_1 )
257+ for (int j = 0 ; j < upper_bound ; j ++ ){
258+ int_res_1 |= ((((bool_res_ .i16 [i ] >> j ) & 1 ) & ((bool_res_ .i16 [i ] >> (j + 1 )) & 1 )) << i );
259+ j += 2 ;
260+ }
261+ }
262+ break ;
263+ case SIMDE_SIDD_CMP_EQUAL_EACH :
264+ for (int i = 0 ; i < upper_bound ; i ++ ){
265+ SIMDE_VECTORIZE_REDUCTION (|:int_res_1 )
266+ for (int j = 0 ; j < upper_bound ; j ++ ){
267+ int_res_1 |= (((bool_res_ .i16 [i ] >> i ) & 1 ) << i );
268+ }
269+ }
270+ break ;
271+ case SIMDE_SIDD_CMP_EQUAL_ORDERED :
272+ int_res_1 = 0xffff ;
273+ for (int i = 0 ; i < upper_bound ; i ++ ){
274+ int k = i ;
275+ SIMDE_VECTORIZE_REDUCTION (& :int_res_1 )
276+ for (int j = 0 ; j < (upper_bound - i ) ; j ++ ){
277+ int_res_1 &= (((bool_res_ .i16 [k ] >> j ) & 1 ) << i ) ;
278+ k += 1 ;
279+ }
280+ }
281+ break ;
282+ }
283+ for (int i = 0 ; i < upper_bound ; i ++ ){
284+ if (polarity & 1 ){
285+ if ((polarity >> 1 ) & 1 ) {
286+ if (i >= lb ) {
287+ int_res_2 |= (((int_res_1 >> i ) & 1 ) << i );
288+ }
289+ else {
290+ int_res_2 |= ((((int_res_1 >> i ) & 1 ) ^ (-1 )) << i );
291+ }
292+ }
293+ else {
294+ int_res_2 |= ((((int_res_1 >> i ) & 1 ) ^ (-1 )) << i );
295+ }
296+ }
297+ else {
298+ int_res_2 |= (((int_res_1 >> i ) & 1 ) << i );
299+ }
300+ }
301+ return !int_res_2 & (lb > upper_bound );
302+ }
303+
304+ #if defined(SIMDE_X86_SSE4_2_NATIVE )
305+ #define simde_mm_cmpestra (a , la , b , lb , imm8 ) _mm_cmpestra(a, la, b, lb, imm8)
306+ #else
307+ #define simde_mm_cmpestra (a , la , b , lb , imm8 ) \
308+ (((imm8) & SIMDE_SIDD_UWORD_OPS) \
309+ ? simde_mm_cmpestra_16_((a), (la), (b), (lb), (imm8)) \
310+ : simde_mm_cmpestra_8_((a), (la), (b), (lb), (imm8)))
311+ #endif
312+ #if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES )
313+ #define _mm_cmpestra (a , la , b , lb , imm8 ) simde_mm_cmpestra(a, la, b, lb, imm8)
314+ #endif
315+
95316SIMDE_FUNCTION_ATTRIBUTES
96317int simde_mm_cmpestrs (simde__m128i a , int la , simde__m128i b , int lb , const int imm8 )
97318 SIMDE_REQUIRE_CONSTANT_RANGE (imm8 , 0 , 127 ) {
0 commit comments