@@ -39,15 +39,15 @@ void avx2_cmpeq8(
3939#if defined(AVX2_ENABLED )
4040 uint32_t * target32 = (uint32_t * )target ;
4141
42- __m256 v_comparand = _mm256_set1_epi8 (byte );
42+ __m256i v_comparand = _mm256_set1_epi8 (byte );
4343
4444 uint32_t * out_mask = (uint32_t * )target ;
4545
4646 size_t i ;
4747
4848 for (i = 0 ; i < target_length * 2 ; ++ i ) {
49- __m256 v_data_a = * (__m256 * )(source + (i * 32 ));
50- __m256 v_results_a = _mm256_cmpeq_epi8 (v_data_a , v_comparand );
49+ __m256i v_data_a = * (__m256i * )(source + (i * 32 ));
50+ __m256i v_results_a = _mm256_cmpeq_epi8 (v_data_a , v_comparand );
5151 uint32_t mask = (uint32_t )_mm256_movemask_epi8 (v_results_a );
5252 target32 [i ] = mask ;
5353 }
@@ -63,10 +63,10 @@ void avx2_and_bits(
6363 size_t i ;
6464
6565 for (i = 0 ; i < target_length ; i += 32 ) {
66- __m256 v_data_a = * (__m256 * )(source_a + i );
67- __m256 v_data_b = * (__m256 * )(source_b + i );
68- __m256 v_results = _mm256_and_si256 (v_data_a , v_data_b );
69- * (__m256 * )(target + i ) = v_results ;
66+ __m256i v_data_a = * (__m256i * )(source_a + i );
67+ __m256i v_data_b = * (__m256i * )(source_b + i );
68+ __m256i v_results = _mm256_and_si256 (v_data_a , v_data_b );
69+ * (__m256i * )(target + i ) = v_results ;
7070 }
7171#endif
7272}
@@ -80,10 +80,10 @@ void avx2_and_not_bits(
8080 size_t i ;
8181
8282 for (i = 0 ; i < target_length ; i += 32 ) {
83- __m256 v_data_a = * (__m256 * )(source_a + i );
84- __m256 v_data_b = * (__m256 * )(source_b + i );
85- __m256 v_results = _mm256_andnot_si256 (v_data_a , v_data_b );
86- * (__m256 * )(target + i ) = v_results ;
83+ __m256i v_data_a = * (__m256i * )(source_a + i );
84+ __m256i v_data_b = * (__m256i * )(source_b + i );
85+ __m256i v_results = _mm256_andnot_si256 (v_data_a , v_data_b );
86+ * (__m256i * )(target + i ) = v_results ;
8787 }
8888#endif
8989}
@@ -93,14 +93,14 @@ void avx2_not_bits(
9393 size_t target_length ,
9494 uint8_t * source ) {
9595#if defined(AVX2_ENABLED )
96- __m256 ones = _mm256_set1_epi8 (0xff );
96+ __m256i ones = _mm256_set1_epi8 (0xff );
9797
9898 size_t i ;
9999
100100 for (i = 0 ; i < target_length ; i += 32 ) {
101- __m256 v_data = * (__m256 * )(source + i );
102- __m256 v_results = _mm256_xor_si256 (v_data , ones );
103- * (__m256 * )(target + i ) = v_results ;
101+ __m256i v_data = * (__m256i * )(source + i );
102+ __m256i v_results = _mm256_xor_si256 (v_data , ones );
103+ * (__m256i * )(target + i ) = v_results ;
104104 }
105105#endif
106106}
@@ -114,10 +114,10 @@ void avx2_or_bits(
114114 size_t i ;
115115
116116 for (i = 0 ; i < target_length ; i += 32 ) {
117- __m256 v_data_a = * (__m256 * )(source_a + i );
118- __m256 v_data_b = * (__m256 * )(source_b + i );
119- __m256 v_results = _mm256_or_si256 (v_data_a , v_data_b );
120- * (__m256 * )(target + i ) = v_results ;
117+ __m256i v_data_a = * (__m256i * )(source_a + i );
118+ __m256i v_data_b = * (__m256i * )(source_b + i );
119+ __m256i v_results = _mm256_or_si256 (v_data_a , v_data_b );
120+ * (__m256i * )(target + i ) = v_results ;
121121 }
122122#endif
123123}
@@ -131,10 +131,10 @@ void avx2_xor_bits(
131131 size_t i ;
132132
133133 for (i = 0 ; i < target_length ; i += 32 ) {
134- __m256 v_data_a = * (__m256 * )(source_a + i );
135- __m256 v_data_b = * (__m256 * )(source_b + i );
136- __m256 v_results = _mm256_xor_si256 (v_data_a , v_data_b );
137- * (__m256 * )(target + i ) = v_results ;
134+ __m256i v_data_a = * (__m256i * )(source_a + i );
135+ __m256i v_data_b = * (__m256i * )(source_b + i );
136+ __m256i v_results = _mm256_xor_si256 (v_data_a , v_data_b );
137+ * (__m256i * )(target + i ) = v_results ;
138138 }
139139#endif
140140}
0 commit comments