@@ -379,23 +379,21 @@ TEST(AVX512, _mm512_hexl_montgomery_reduce52) {
379379
380380 uint64_t modulus = 5 ;
381381 int r = 3 ;
382- uint64_t R = (1ULL << r);
383382 uint64_t prod_rs = (1ULL << (52 - r));
384383 uint64_t inv_mod = HenselLemma2adicRoot (r, modulus);
385384
386385 // mod_R_mask[63:r] all zeros & mod_R_mask[r-1:0] all ones
387- __m512i v_mod_R_mask = _mm512_set1_epi64 (R - 1 );
388386 __m512i v_modulus = _mm512_set1_epi64 (modulus);
389387 __m512i v_inv_mod = _mm512_set1_epi64 (inv_mod);
390388 __m512i v_prod_rs = _mm512_set1_epi64 (prod_rs);
391389
392- __m512i _c = _mm512_hexl_montgomery_reduce<52 >(
393- T_hi, T_lo, v_modulus, r, v_mod_R_mask, v_inv_mod, v_prod_rs);
390+ __m512i _c = _mm512_hexl_montgomery_reduce<52 , 3 >(T_hi, T_lo, v_modulus,
391+ v_inv_mod, v_prod_rs);
394392 AssertEqual (_c, expected_out);
395393
396394 // Out of Montgomery form
397- _c = _mm512_hexl_montgomery_reduce<52 >(T_hi, _c, v_modulus, r, v_mod_R_mask ,
398- v_inv_mod, v_prod_rs);
395+ _c = _mm512_hexl_montgomery_reduce<52 , 3 >(T_hi, _c, v_modulus, v_inv_mod ,
396+ v_prod_rs);
399397
400398 AssertEqual (_c, expected_c_out);
401399 }
@@ -419,16 +417,13 @@ TEST(AVX512, _mm512_hexl_montgomery_reduce52) {
419417 // Also, for r = 46 and N = 67280421310725 then N' = 62463730494515
420418 __m512i T_hi = _mm512_set_epi64 (559639348720ULL , 0 , 0 , 0 , 0 , 0 , 0 , 0 );
421419 __m512i T_lo = _mm512_set_epi64 (1832906312477596ULL , 0 , 0 , 0 , 0 , 0 , 0 , 0 );
422-
423- int r = 46 ;
424420 __m512i v_modulus = _mm512_set1_epi64 (67280421310725 );
425421 __m512i v_inv_mod = _mm512_set1_epi64 (62463730494515 );
426- __m512i v_mod_R_mask = _mm512_set1_epi64 (70368744177663 );
427422 __m512i v_prod_rs = _mm512_set1_epi64 (64 );
428423
429424 // 52 bits
430- __m512i c = _mm512_hexl_montgomery_reduce<52 >(
431- T_hi, T_lo, v_modulus, r, v_mod_R_mask, v_inv_mod, v_prod_rs);
425+ __m512i c = _mm512_hexl_montgomery_reduce<52 , 46 >(T_hi, T_lo, v_modulus,
426+ v_inv_mod, v_prod_rs);
432427 AssertEqual (c, expected_out);
433428 }
434429
@@ -437,18 +432,16 @@ TEST(AVX512, _mm512_hexl_montgomery_reduce52) {
437432 int r = 51 ;
438433 uint64_t modulus = 2251799813684809 ;
439434 uint64_t inv_mod = HenselLemma2adicRoot (r, modulus);
440- uint64_t mod_R_mask = (1ULL << r) - 1 ;
441435 uint64_t prod_rs = (1ULL << (52 - r));
442436 __m512i expected_out =
443437 _mm512_set_epi64 (1832909426971103 , 0 , 0 , 0 , 0 , 0 , 0 , 0 );
444438 __m512i T_hi = _mm512_set_epi64 (5446ULL , 0 , 0 , 0 , 0 , 0 , 0 , 0 );
445439 __m512i T_lo = _mm512_set_epi64 (3006504763740625ULL , 0 , 0 , 0 , 0 , 0 , 0 , 0 );
446440 __m512i v_modulus = _mm512_set1_epi64 (modulus);
447441 __m512i v_inv_mod = _mm512_set1_epi64 (inv_mod);
448- __m512i v_mod_R_mask = _mm512_set1_epi64 (mod_R_mask);
449442 __m512i v_prod_rs = _mm512_set1_epi64 (prod_rs);
450- __m512i c = _mm512_hexl_montgomery_reduce<52 >(
451- T_hi, T_lo, v_modulus, r, v_mod_R_mask, v_inv_mod, v_prod_rs);
443+ __m512i c = _mm512_hexl_montgomery_reduce<52 , 51 >(T_hi, T_lo, v_modulus,
444+ v_inv_mod, v_prod_rs);
452445 AssertEqual (c, expected_out);
453446 }
454447}
@@ -465,11 +458,8 @@ TEST(AVX512, _mm512_hexl_montgomery_reduce64) {
465458 __m512i expected_out = _mm512_set_epi64 (1546598034044 , 0 , 0 , 0 , 0 , 0 , 0 , 0 );
466459 __m512i T_hi = _mm512_set_epi64 (559639348720ULL , 0 , 0 , 0 , 0 , 0 , 0 , 0 );
467460 __m512i T_lo = _mm512_set_epi64 (1832906312477596ULL , 0 , 0 , 0 , 0 , 0 , 0 , 0 );
468-
469- int r = 46 ;
470461 __m512i v_modulus = _mm512_set1_epi64 (67280421310725 );
471462 __m512i v_inv_mod = _mm512_set1_epi64 (62463730494515 );
472- __m512i v_mod_R_mask = _mm512_set1_epi64 (70368744177663 );
473463
474464 // 64 bits
475465 uint64_t prod_rs = (1ULL << 63 ) - 1 ;
@@ -478,8 +468,8 @@ TEST(AVX512, _mm512_hexl_montgomery_reduce64) {
478468 T_hi = _mm512_set_epi64 (273261400 , 0 , 0 , 0 , 0 , 0 , 0 , 0 );
479469 T_lo = _mm512_set_epi64 (6847304339915631516 , 0 , 0 , 0 , 0 , 0 , 0 , 0 );
480470
481- __m512i c = _mm512_hexl_montgomery_reduce<64 >(
482- T_hi, T_lo, v_modulus, r, v_mod_R_mask, v_inv_mod, v_prod_rs);
471+ __m512i c = _mm512_hexl_montgomery_reduce<64 , 46 >(T_hi, T_lo, v_modulus,
472+ v_inv_mod, v_prod_rs);
483473 AssertEqual (c, expected_out);
484474 }
485475
@@ -488,7 +478,6 @@ TEST(AVX512, _mm512_hexl_montgomery_reduce64) {
488478 int r = 61 ;
489479 uint64_t modulus = 2305843009213693487 ;
490480 uint64_t inv_mod = HenselLemma2adicRoot (r, modulus);
491- uint64_t mod_R_mask = (1ULL << r) - 1ULL ;
492481 uint64_t prod_rs = (1ULL << 63 ) - 1 ;
493482 __m512i expected_out =
494483 _mm512_set_epi64 (59185395909485265 , 0 , 0 , 0 , 0 , 0 , 0 , 0 );
@@ -497,10 +486,9 @@ TEST(AVX512, _mm512_hexl_montgomery_reduce64) {
497486 _mm512_set_epi64 (9074465024201096609ULL , 0 , 0 , 0 , 0 , 0 , 0 , 0 );
498487 __m512i v_modulus = _mm512_set1_epi64 (modulus);
499488 __m512i v_inv_mod = _mm512_set1_epi64 (inv_mod);
500- __m512i v_mod_R_mask = _mm512_set1_epi64 (mod_R_mask);
501489 __m512i v_prod_rs = _mm512_set1_epi64 (prod_rs);
502- __m512i c = _mm512_hexl_montgomery_reduce<64 >(
503- T_hi, T_lo, v_modulus, r, v_mod_R_mask, v_inv_mod, v_prod_rs);
490+ __m512i c = _mm512_hexl_montgomery_reduce<64 , 61 >(T_hi, T_lo, v_modulus,
491+ v_inv_mod, v_prod_rs);
504492 AssertEqual (c, expected_out);
505493 }
506494
@@ -509,18 +497,16 @@ TEST(AVX512, _mm512_hexl_montgomery_reduce64) {
509497 int r = 62 ;
510498 uint64_t modulus = 4611686018427387631 ;
511499 uint64_t inv_mod = HenselLemma2adicRoot (r, modulus);
512- uint64_t mod_R_mask = (1ULL << r) - 1 ;
513500 uint64_t prod_rs = (1ULL << 63 ) - 1 ;
514501 __m512i expected_out =
515502 _mm512_set_epi64 (34747555017826833 , 0 , 0 , 0 , 0 , 0 , 0 , 0 );
516503 __m512i T_hi = _mm512_set_epi64 (1ULL , 0 , 0 , 0 , 0 , 0 , 0 , 0 );
517504 __m512i T_lo = _mm512_set_epi64 (262710483011949601ULL , 0 , 0 , 0 , 0 , 0 , 0 , 0 );
518505 __m512i v_modulus = _mm512_set1_epi64 (modulus);
519506 __m512i v_inv_mod = _mm512_set1_epi64 (inv_mod);
520- __m512i v_mod_R_mask = _mm512_set1_epi64 (mod_R_mask);
521507 __m512i v_prod_rs = _mm512_set1_epi64 (prod_rs);
522- __m512i c = _mm512_hexl_montgomery_reduce<64 >(
523- T_hi, T_lo, v_modulus, r, v_mod_R_mask, v_inv_mod, v_prod_rs);
508+ __m512i c = _mm512_hexl_montgomery_reduce<64 , 62 >(T_hi, T_lo, v_modulus,
509+ v_inv_mod, v_prod_rs);
524510 AssertEqual (c, expected_out);
525511 }
526512}
0 commit comments