@@ -1128,6 +1128,64 @@ delete_worst( fd_pack_t * pack,
11281128 return delete_transaction ( pack , worst , 1 , 1 );
11291129}
11301130
1131+ static inline
1132+ void fd_pack_copy_txnp ( fd_txn_p_t * out , fd_pack_ord_txn_t const * cur ) {
1133+ fd_txn_t const * txn = TXN (cur -> txn );
1134+ if (
1135+ #if FD_HAS_AVX512 && FD_PACK_USE_NON_TEMPORAL_MEMCPY
1136+ FD_LIKELY ( cur -> txn -> payload_sz >=1024UL )
1137+ #else
1138+ 0
1139+ #endif
1140+ ) {
1141+ #if FD_HAS_AVX512 && FD_PACK_USE_NON_TEMPORAL_MEMCPY
1142+ _mm512_stream_si512 ( (void * )(out -> payload + 0UL ), _mm512_load_epi64 ( cur -> txn -> payload + 0UL ) );
1143+ _mm512_stream_si512 ( (void * )(out -> payload + 64UL ), _mm512_load_epi64 ( cur -> txn -> payload + 64UL ) );
1144+ _mm512_stream_si512 ( (void * )(out -> payload + 128UL ), _mm512_load_epi64 ( cur -> txn -> payload + 128UL ) );
1145+ _mm512_stream_si512 ( (void * )(out -> payload + 192UL ), _mm512_load_epi64 ( cur -> txn -> payload + 192UL ) );
1146+ _mm512_stream_si512 ( (void * )(out -> payload + 256UL ), _mm512_load_epi64 ( cur -> txn -> payload + 256UL ) );
1147+ _mm512_stream_si512 ( (void * )(out -> payload + 320UL ), _mm512_load_epi64 ( cur -> txn -> payload + 320UL ) );
1148+ _mm512_stream_si512 ( (void * )(out -> payload + 384UL ), _mm512_load_epi64 ( cur -> txn -> payload + 384UL ) );
1149+ _mm512_stream_si512 ( (void * )(out -> payload + 448UL ), _mm512_load_epi64 ( cur -> txn -> payload + 448UL ) );
1150+ _mm512_stream_si512 ( (void * )(out -> payload + 512UL ), _mm512_load_epi64 ( cur -> txn -> payload + 512UL ) );
1151+ _mm512_stream_si512 ( (void * )(out -> payload + 576UL ), _mm512_load_epi64 ( cur -> txn -> payload + 576UL ) );
1152+ _mm512_stream_si512 ( (void * )(out -> payload + 640UL ), _mm512_load_epi64 ( cur -> txn -> payload + 640UL ) );
1153+ _mm512_stream_si512 ( (void * )(out -> payload + 704UL ), _mm512_load_epi64 ( cur -> txn -> payload + 704UL ) );
1154+ _mm512_stream_si512 ( (void * )(out -> payload + 768UL ), _mm512_load_epi64 ( cur -> txn -> payload + 768UL ) );
1155+ _mm512_stream_si512 ( (void * )(out -> payload + 832UL ), _mm512_load_epi64 ( cur -> txn -> payload + 832UL ) );
1156+ _mm512_stream_si512 ( (void * )(out -> payload + 896UL ), _mm512_load_epi64 ( cur -> txn -> payload + 896UL ) );
1157+ _mm512_stream_si512 ( (void * )(out -> payload + 960UL ), _mm512_load_epi64 ( cur -> txn -> payload + 960UL ) );
1158+ _mm512_stream_si512 ( (void * )(out -> payload + 1024UL ), _mm512_load_epi64 ( cur -> txn -> payload + 1024UL ) );
1159+ _mm512_stream_si512 ( (void * )(out -> payload + 1088UL ), _mm512_load_epi64 ( cur -> txn -> payload + 1088UL ) );
1160+ _mm512_stream_si512 ( (void * )(out -> payload + 1152UL ), _mm512_load_epi64 ( cur -> txn -> payload + 1152UL ) );
1161+ _mm512_stream_si512 ( (void * )(out -> payload + 1216UL ), _mm512_load_epi64 ( cur -> txn -> payload + 1216UL ) );
1162+ /* Copied out to 1280 bytes, which copies some other fields we needed to
1163+ copy anyway. */
1164+ FD_STATIC_ASSERT ( offsetof(fd_txn_p_t , payload_sz )+ sizeof (((fd_txn_p_t * )NULL )-> payload_sz )<=1280UL , nt_memcpy );
1165+ FD_STATIC_ASSERT ( offsetof(fd_txn_p_t , blockhash_slot )+ sizeof (((fd_txn_p_t * )NULL )-> blockhash_slot )<=1280UL , nt_memcpy );
1166+ FD_STATIC_ASSERT ( offsetof(fd_txn_p_t , scheduler_arrival_time_nanos )+ sizeof (((fd_txn_p_t * )NULL )-> scheduler_arrival_time_nanos )<=1280UL , nt_memcpy );
1167+ FD_STATIC_ASSERT ( offsetof(fd_txn_p_t , source_tpu )+ sizeof (((fd_txn_p_t * )NULL )-> source_tpu )<=1280UL , nt_memcpy );
1168+ FD_STATIC_ASSERT ( offsetof(fd_txn_p_t , source_ipv4 )+ sizeof (((fd_txn_p_t * )NULL )-> source_ipv4 )<=1280UL , nt_memcpy );
1169+ FD_STATIC_ASSERT ( offsetof(fd_txn_p_t , flags )+ sizeof (((fd_txn_p_t * )NULL )-> flags )<=1280UL , nt_memcpy );
1170+ FD_STATIC_ASSERT ( offsetof(fd_txn_p_t , _ ) <=1280UL , nt_memcpy );
1171+ const ulong offset_into_txn = 1280UL - offsetof(fd_txn_p_t , _ );
1172+ fd_memcpy ( offset_into_txn + (uchar * )TXN (out ), offset_into_txn + (uchar const * )txn ,
1173+ fd_ulong_max ( offset_into_txn , fd_txn_footprint ( txn -> instr_cnt , txn -> addr_table_lookup_cnt ) )- offset_into_txn );
1174+ #endif
1175+ } else {
1176+ fd_memcpy ( out -> payload , cur -> txn -> payload , cur -> txn -> payload_sz );
1177+ fd_memcpy ( TXN (out ), txn , fd_txn_footprint ( txn -> instr_cnt , txn -> addr_table_lookup_cnt ) );
1178+ out -> payload_sz = cur -> txn -> payload_sz ;
1179+ out -> pack_cu .requested_exec_plus_acct_data_cus = cur -> txn -> pack_cu .requested_exec_plus_acct_data_cus ;
1180+ out -> pack_cu .non_execution_cus = cur -> txn -> pack_cu .non_execution_cus ;
1181+ out -> scheduler_arrival_time_nanos = cur -> txn -> scheduler_arrival_time_nanos ;
1182+ out -> source_tpu = cur -> txn -> source_tpu ;
1183+ out -> source_ipv4 = cur -> txn -> source_ipv4 ;
1184+ out -> flags = cur -> txn -> flags ;
1185+ }
1186+ }
1187+
1188+
11311189static inline int
11321190validate_transaction ( fd_pack_t * pack ,
11331191 fd_pack_ord_txn_t const * ord ,
@@ -1925,58 +1983,7 @@ fd_pack_schedule_impl( fd_pack_t * pack,
19251983 FD_PACK_BITSET_OR ( bitset_rw_in_use , cur -> rw_bitset );
19261984 FD_PACK_BITSET_OR ( bitset_w_in_use , cur -> w_bitset );
19271985
1928- if (
1929- #if FD_HAS_AVX512 && FD_PACK_USE_NON_TEMPORAL_MEMCPY
1930- FD_LIKELY ( cur -> txn -> payload_sz >=1024UL )
1931- #else
1932- 0
1933- #endif
1934- ) {
1935- #if FD_HAS_AVX512 && FD_PACK_USE_NON_TEMPORAL_MEMCPY
1936- _mm512_stream_si512 ( (void * )(out -> payload + 0UL ), _mm512_load_epi64 ( cur -> txn -> payload + 0UL ) );
1937- _mm512_stream_si512 ( (void * )(out -> payload + 64UL ), _mm512_load_epi64 ( cur -> txn -> payload + 64UL ) );
1938- _mm512_stream_si512 ( (void * )(out -> payload + 128UL ), _mm512_load_epi64 ( cur -> txn -> payload + 128UL ) );
1939- _mm512_stream_si512 ( (void * )(out -> payload + 192UL ), _mm512_load_epi64 ( cur -> txn -> payload + 192UL ) );
1940- _mm512_stream_si512 ( (void * )(out -> payload + 256UL ), _mm512_load_epi64 ( cur -> txn -> payload + 256UL ) );
1941- _mm512_stream_si512 ( (void * )(out -> payload + 320UL ), _mm512_load_epi64 ( cur -> txn -> payload + 320UL ) );
1942- _mm512_stream_si512 ( (void * )(out -> payload + 384UL ), _mm512_load_epi64 ( cur -> txn -> payload + 384UL ) );
1943- _mm512_stream_si512 ( (void * )(out -> payload + 448UL ), _mm512_load_epi64 ( cur -> txn -> payload + 448UL ) );
1944- _mm512_stream_si512 ( (void * )(out -> payload + 512UL ), _mm512_load_epi64 ( cur -> txn -> payload + 512UL ) );
1945- _mm512_stream_si512 ( (void * )(out -> payload + 576UL ), _mm512_load_epi64 ( cur -> txn -> payload + 576UL ) );
1946- _mm512_stream_si512 ( (void * )(out -> payload + 640UL ), _mm512_load_epi64 ( cur -> txn -> payload + 640UL ) );
1947- _mm512_stream_si512 ( (void * )(out -> payload + 704UL ), _mm512_load_epi64 ( cur -> txn -> payload + 704UL ) );
1948- _mm512_stream_si512 ( (void * )(out -> payload + 768UL ), _mm512_load_epi64 ( cur -> txn -> payload + 768UL ) );
1949- _mm512_stream_si512 ( (void * )(out -> payload + 832UL ), _mm512_load_epi64 ( cur -> txn -> payload + 832UL ) );
1950- _mm512_stream_si512 ( (void * )(out -> payload + 896UL ), _mm512_load_epi64 ( cur -> txn -> payload + 896UL ) );
1951- _mm512_stream_si512 ( (void * )(out -> payload + 960UL ), _mm512_load_epi64 ( cur -> txn -> payload + 960UL ) );
1952- _mm512_stream_si512 ( (void * )(out -> payload + 1024UL ), _mm512_load_epi64 ( cur -> txn -> payload + 1024UL ) );
1953- _mm512_stream_si512 ( (void * )(out -> payload + 1088UL ), _mm512_load_epi64 ( cur -> txn -> payload + 1088UL ) );
1954- _mm512_stream_si512 ( (void * )(out -> payload + 1152UL ), _mm512_load_epi64 ( cur -> txn -> payload + 1152UL ) );
1955- _mm512_stream_si512 ( (void * )(out -> payload + 1216UL ), _mm512_load_epi64 ( cur -> txn -> payload + 1216UL ) );
1956- /* Copied out to 1280 bytes, which copies some other fields we needed to
1957- copy anyway. */
1958- FD_STATIC_ASSERT ( offsetof(fd_txn_p_t , payload_sz )+ sizeof (((fd_txn_p_t * )NULL )-> payload_sz )<=1280UL , nt_memcpy );
1959- FD_STATIC_ASSERT ( offsetof(fd_txn_p_t , blockhash_slot )+ sizeof (((fd_txn_p_t * )NULL )-> blockhash_slot )<=1280UL , nt_memcpy );
1960- FD_STATIC_ASSERT ( offsetof(fd_txn_p_t , scheduler_arrival_time_nanos )+ sizeof (((fd_txn_p_t * )NULL )-> scheduler_arrival_time_nanos )<=1280UL , nt_memcpy );
1961- FD_STATIC_ASSERT ( offsetof(fd_txn_p_t , source_tpu )+ sizeof (((fd_txn_p_t * )NULL )-> source_tpu )<=1280UL , nt_memcpy );
1962- FD_STATIC_ASSERT ( offsetof(fd_txn_p_t , source_ipv4 )+ sizeof (((fd_txn_p_t * )NULL )-> source_ipv4 )<=1280UL , nt_memcpy );
1963- FD_STATIC_ASSERT ( offsetof(fd_txn_p_t , flags )+ sizeof (((fd_txn_p_t * )NULL )-> flags )<=1280UL , nt_memcpy );
1964- FD_STATIC_ASSERT ( offsetof(fd_txn_p_t , _ ) <=1280UL , nt_memcpy );
1965- const ulong offset_into_txn = 1280UL - offsetof(fd_txn_p_t , _ );
1966- fd_memcpy ( offset_into_txn + (uchar * )TXN (out ), offset_into_txn + (uchar const * )txn ,
1967- fd_ulong_max ( offset_into_txn , fd_txn_footprint ( txn -> instr_cnt , txn -> addr_table_lookup_cnt ) )- offset_into_txn );
1968- #endif
1969- } else {
1970- fd_memcpy ( out -> payload , cur -> txn -> payload , cur -> txn -> payload_sz );
1971- fd_memcpy ( TXN (out ), txn , fd_txn_footprint ( txn -> instr_cnt , txn -> addr_table_lookup_cnt ) );
1972- out -> payload_sz = cur -> txn -> payload_sz ;
1973- out -> pack_cu .requested_exec_plus_acct_data_cus = cur -> txn -> pack_cu .requested_exec_plus_acct_data_cus ;
1974- out -> pack_cu .non_execution_cus = cur -> txn -> pack_cu .non_execution_cus ;
1975- out -> scheduler_arrival_time_nanos = cur -> txn -> scheduler_arrival_time_nanos ;
1976- out -> source_tpu = cur -> txn -> source_tpu ;
1977- out -> source_ipv4 = cur -> txn -> source_ipv4 ;
1978- out -> flags = cur -> txn -> flags ;
1979- }
1986+ fd_pack_copy_txnp ( out , cur );
19801987 out ++ ;
19811988
19821989 for ( fd_txn_acct_iter_t iter = fd_txn_acct_iter_init ( txn , FD_TXN_ACCT_CAT_WRITABLE );
@@ -2401,16 +2408,7 @@ fd_pack_try_schedule_bundle( fd_pack_t * pack,
24012408 _next = treap_rev_iter_next ( _cur , pool );
24022409
24032410 fd_pack_ord_txn_t * cur = treap_rev_iter_ele ( _cur , pool );
2404- fd_txn_t const * txn = TXN (cur -> txn );
2405- fd_memcpy ( out -> payload , cur -> txn -> payload , cur -> txn -> payload_sz );
2406- fd_memcpy ( TXN (out ), txn , fd_txn_footprint ( txn -> instr_cnt , txn -> addr_table_lookup_cnt ) );
2407- out -> payload_sz = cur -> txn -> payload_sz ;
2408- out -> pack_cu .requested_exec_plus_acct_data_cus = cur -> txn -> pack_cu .requested_exec_plus_acct_data_cus ;
2409- out -> pack_cu .non_execution_cus = cur -> txn -> pack_cu .non_execution_cus ;
2410- out -> scheduler_arrival_time_nanos = cur -> txn -> scheduler_arrival_time_nanos ;
2411- out -> source_tpu = cur -> txn -> source_tpu ;
2412- out -> source_ipv4 = cur -> txn -> source_ipv4 ;
2413- out -> flags = cur -> txn -> flags ;
2411+ fd_pack_copy_txnp ( out , cur );
24142412 out ++ ;
24152413
24162414 pack -> cumulative_block_cost += cur -> compute_est ;
@@ -3225,4 +3223,4 @@ fd_pack_verify( fd_pack_t * pack,
32253223}
32263224
32273225void * fd_pack_leave ( fd_pack_t * pack ) { FD_COMPILER_MFENCE (); return (void * )pack ; }
3228- void * fd_pack_delete ( void * mem ) { FD_COMPILER_MFENCE (); return mem ; }
3226+ void * fd_pack_delete ( void * mem ) { FD_COMPILER_MFENCE (); return mem ; }
0 commit comments