diff --git a/Makefile.in b/Makefile.in index 7365a9231..5fd7845ce 100644 --- a/Makefile.in +++ b/Makefile.in @@ -231,9 +231,10 @@ TESTS_UNIT := \ $(srcroot)test/unit/hook.c \ $(srcroot)test/unit/hpa.c \ $(srcroot)test/unit/hpa_thp_always.c \ + $(srcroot)test/unit/hpa_background_thread.c \ + $(srcroot)test/unit/hpa_central_pool.c \ $(srcroot)test/unit/hpa_vectorized_madvise.c \ $(srcroot)test/unit/hpa_vectorized_madvise_large_batch.c \ - $(srcroot)test/unit/hpa_background_thread.c \ $(srcroot)test/unit/hpdata.c \ $(srcroot)test/unit/huge.c \ $(srcroot)test/unit/inspect.c \ diff --git a/include/jemalloc/internal/arena_externs.h b/include/jemalloc/internal/arena_externs.h index cf191aebd..da55e646c 100644 --- a/include/jemalloc/internal/arena_externs.h +++ b/include/jemalloc/internal/arena_externs.h @@ -47,7 +47,11 @@ void arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads, size_t *nactive, size_t *ndirty, size_t *nmuzzy, arena_stats_t *astats, bin_stats_data_t *bstats, arena_stats_large_t *lstats, pac_estats_t *estats, hpa_shard_stats_t *hpastats, sec_stats_t *secstats); -void arena_handle_deferred_work(tsdn_t *tsdn, arena_t *arena); +void arena_stats_global_central_read(tsdn_t *tsdn, hpa_central_stats_t *stats); +void arena_stats_global_central_mutex_read( + tsdn_t *tsdn, mutex_prof_data_t *mutex_prof_data); + +void arena_handle_deferred_work(tsdn_t *tsdn, arena_t *arena); edata_t *arena_extent_alloc_large( tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment, bool zero); void arena_extent_dalloc_large_prep( @@ -125,7 +129,10 @@ void arena_prefork5(tsdn_t *tsdn, arena_t *arena); void arena_prefork6(tsdn_t *tsdn, arena_t *arena); void arena_prefork7(tsdn_t *tsdn, arena_t *arena); void arena_prefork8(tsdn_t *tsdn, arena_t *arena); +void arena_global_prefork(tsdn_t *tsdn, bool use_hpa); void arena_postfork_parent(tsdn_t *tsdn, arena_t *arena); +void arena_global_postfork_parent(tsdn_t *tsdn, bool use_hpa); void arena_postfork_child(tsdn_t *tsdn, arena_t *arena); +void arena_global_postfork_child(tsdn_t *tsdn, bool use_hpa); #endif /* JEMALLOC_INTERNAL_ARENA_EXTERNS_H */ diff --git a/include/jemalloc/internal/ctl.h b/include/jemalloc/internal/ctl.h index b290411b4..148bfa1d6 100644 --- a/include/jemalloc/internal/ctl.h +++ b/include/jemalloc/internal/ctl.h @@ -5,6 +5,7 @@ #include "jemalloc/internal/arena_stats.h" #include "jemalloc/internal/background_thread_structs.h" #include "jemalloc/internal/bin_stats.h" +#include "jemalloc/internal/hpa_central.h" #include "jemalloc/internal/jemalloc_internal_types.h" #include "jemalloc/internal/malloc_io.h" #include "jemalloc/internal/mutex_prof.h" @@ -66,6 +67,7 @@ typedef struct ctl_stats_s { size_t retained; background_thread_stats_t background_thread; + hpa_central_stats_t hpa_central; mutex_prof_data_t mutex_prof_data[mutex_prof_num_global_mutexes]; } ctl_stats_t; diff --git a/include/jemalloc/internal/hpa.h b/include/jemalloc/internal/hpa.h index 065677409..661be366c 100644 --- a/include/jemalloc/internal/hpa.h +++ b/include/jemalloc/internal/hpa.h @@ -50,6 +50,20 @@ struct hpa_shard_nonderived_stats_s { * Guarded by mtx. */ uint64_t ndehugifies; + + /* + * The number of times we donated pageslab to central pool + * + * Guarded by mtx. + */ + uint64_t ndonated_ps; + + /* + * The number of times we borrowed pageslab from a central pool + * + * Guarded by mtx. + */ + uint64_t nborrowed_ps; }; /* Completely derived; only used by CTL. */ diff --git a/include/jemalloc/internal/hpa_central.h b/include/jemalloc/internal/hpa_central.h index 3e0ff7dae..947c5463e 100644 --- a/include/jemalloc/internal/hpa_central.h +++ b/include/jemalloc/internal/hpa_central.h @@ -8,8 +8,35 @@ #include "jemalloc/internal/mutex.h" #include "jemalloc/internal/tsd_types.h" +typedef struct hpa_pool_s hpa_pool_t; +struct hpa_pool_s { + /* + * Pool of empty huge pages to be shared between shards that are + * participating. + * + * Page is owned by the pool if it lives in one of these two lists. + * This means that it should not be part of any hpa_shard's psset at the + * same time. + */ + hpdata_empty_list_t nonpurged; + hpdata_empty_list_t purged; +}; + +typedef struct hpa_central_stats_s hpa_central_stats_t; +struct hpa_central_stats_s { + /* Number of pages purged while they were in the central pool */ + uint64_t npurged_pool; + + /* Total number of dirty base pages in the pool */ + size_t ndirty_pool; +}; + typedef struct hpa_central_s hpa_central_t; struct hpa_central_s { + /* Guards the access to central pool of empty hugepages */ + malloc_mutex_t pool_mtx; + hpa_pool_t pool; + /* * Guards expansion of eden. We separate this from the regular mutex so * that cheaper operations can still continue while we're doing the OS @@ -30,6 +57,9 @@ struct hpa_central_s { /* The HPA hooks. */ hpa_hooks_t hooks; + + /* Stats */ + hpa_central_stats_t stats; }; bool hpa_central_init( @@ -38,4 +68,21 @@ bool hpa_central_init( hpdata_t *hpa_central_extract(tsdn_t *tsdn, hpa_central_t *central, size_t size, uint64_t age, bool hugify_eager, bool *oom); +/* Donate empty pages to central */ +void hpa_central_ps_insert(tsdn_t *tsdn, hpa_central_t *central, + hpdata_empty_list_t *pages, const nstime_t *now); +/* Get empty page from central without growing it */ +hpdata_t *hpa_central_ps_pop(tsdn_t *tsdn, hpa_central_t *central); + +/* Purge up to max_ps empty pages in the central */ +size_t hpa_central_purge( + tsdn_t *tsdn, hpa_central_t *central, const nstime_t *now, size_t max_ps); + +void hpa_central_prefork(tsdn_t *tsdn, hpa_central_t *central); +void hpa_central_postfork_parent(tsdn_t *tsdn, hpa_central_t *central); +void hpa_central_postfork_child(tsdn_t *tsdn, hpa_central_t *central); + +void hpa_central_stats_read( + tsdn_t *tsdn, hpa_central_t *central, hpa_central_stats_t *stats); + #endif /* JEMALLOC_INTERNAL_HPA_CENTRAL_H */ diff --git a/include/jemalloc/internal/hpa_opts.h b/include/jemalloc/internal/hpa_opts.h index 6747c2db8..e5c32f2af 100644 --- a/include/jemalloc/internal/hpa_opts.h +++ b/include/jemalloc/internal/hpa_opts.h @@ -152,6 +152,12 @@ struct hpa_shard_opts_s { * hpa_hugify_style_t for options). */ hpa_hugify_style_t hugify_style; + + /* + * If use_pool is true this shard will donate empty pages to the pool + * and borrow from the pool before using central allocator. + */ + bool use_pool; }; /* clang-format off */ @@ -183,7 +189,9 @@ struct hpa_shard_opts_s { /* min_purge_delay_ms */ \ 0, \ /* hugify_style */ \ - hpa_hugify_style_lazy \ + hpa_hugify_style_lazy, \ + /* use_pool */ \ + false \ } /* clang-format on */ diff --git a/include/jemalloc/internal/jemalloc_internal_externs.h b/include/jemalloc/internal/jemalloc_internal_externs.h index ea739ea88..e0d23352e 100644 --- a/include/jemalloc/internal/jemalloc_internal_externs.h +++ b/include/jemalloc/internal/jemalloc_internal_externs.h @@ -17,6 +17,7 @@ extern bool opt_abort_conf; extern bool opt_trust_madvise; extern bool opt_experimental_hpa_start_huge_if_thp_always; extern bool opt_experimental_hpa_enforce_hugify; +extern uint64_t opt_hpa_pool_purge_delay_ms; extern bool opt_confirm_conf; extern bool opt_hpa; extern hpa_shard_opts_t opt_hpa_opts; diff --git a/include/jemalloc/internal/mutex_prof.h b/include/jemalloc/internal/mutex_prof.h index 572200f35..b61d9d4aa 100644 --- a/include/jemalloc/internal/mutex_prof.h +++ b/include/jemalloc/internal/mutex_prof.h @@ -36,7 +36,8 @@ typedef enum { OP(tcache_list) \ OP(hpa_shard) \ OP(hpa_shard_grow) \ - OP(hpa_sec) + OP(hpa_sec) \ + OP(hpa_central_pool) typedef enum { #define OP(mtx) arena_prof_mutex_##mtx, diff --git a/include/jemalloc/internal/witness.h b/include/jemalloc/internal/witness.h index 7ca3c3470..2e4b43fcb 100644 --- a/include/jemalloc/internal/witness.h +++ b/include/jemalloc/internal/witness.h @@ -56,6 +56,7 @@ enum witness_rank_e { WITNESS_RANK_HPA_SHARD = WITNESS_RANK_EXTENTS, WITNESS_RANK_HPA_CENTRAL_GROW, + WITNESS_RANK_HPA_CENTRAL_POOL, WITNESS_RANK_HPA_CENTRAL, WITNESS_RANK_EDATA_CACHE, diff --git a/src/arena.c b/src/arena.c index 664ed6a3d..9f12fc19d 100644 --- a/src/arena.c +++ b/src/arena.c @@ -213,6 +213,21 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads, } } +void +arena_stats_global_central_read(tsdn_t *tsdn, hpa_central_stats_t *stats) { + hpa_central_stats_read(tsdn, &arena_pa_central_global.hpa, stats); +} + +void +arena_stats_global_central_mutex_read( + tsdn_t *tsdn, mutex_prof_data_t *mutex_prof_data) { + malloc_mutex_lock(tsdn, &arena_pa_central_global.hpa.pool_mtx); + malloc_mutex_prof_read( + tsdn, mutex_prof_data, &arena_pa_central_global.hpa.pool_mtx); + malloc_mutex_unlock(tsdn, &arena_pa_central_global.hpa.pool_mtx); +} + + static void arena_background_thread_inactivity_check( tsdn_t *tsdn, arena_t *arena, bool is_background_thread) { @@ -2321,6 +2336,13 @@ arena_prefork8(tsdn_t *tsdn, arena_t *arena) { } } +void +arena_global_prefork(tsdn_t *tsdn, bool use_hpa) { + if (use_hpa) { + hpa_central_prefork(tsdn, &arena_pa_central_global.hpa); + } +} + void arena_postfork_parent(tsdn_t *tsdn, arena_t *arena) { for (unsigned i = 0; i < nbins_total; i++) { @@ -2336,6 +2358,13 @@ arena_postfork_parent(tsdn_t *tsdn, arena_t *arena) { } } +void +arena_global_postfork_parent(tsdn_t *tsdn, bool use_hpa) { + if (use_hpa) { + hpa_central_postfork_parent(tsdn, &arena_pa_central_global.hpa); + } +} + void arena_postfork_child(tsdn_t *tsdn, arena_t *arena) { atomic_store_u(&arena->nthreads[0], 0, ATOMIC_RELAXED); @@ -2374,3 +2403,10 @@ arena_postfork_child(tsdn_t *tsdn, arena_t *arena) { malloc_mutex_postfork_child(tsdn, &arena->tcache_ql_mtx); } } + +void +arena_global_postfork_child(tsdn_t *tsdn, bool use_hpa) { + if (use_hpa) { + hpa_central_postfork_child(tsdn, &arena_pa_central_global.hpa); + } +} diff --git a/src/ctl.c b/src/ctl.c index 553c58ada..89f13ab3e 100644 --- a/src/ctl.c +++ b/src/ctl.c @@ -111,6 +111,8 @@ CTL_PROTO(opt_experimental_hpa_max_purge_nhp) CTL_PROTO(opt_hpa_purge_threshold) CTL_PROTO(opt_hpa_min_purge_delay_ms) CTL_PROTO(opt_hpa_hugify_style) +CTL_PROTO(opt_hpa_use_pool) +CTL_PROTO(opt_hpa_pool_purge_delay_ms) CTL_PROTO(opt_hpa_dirty_mult) CTL_PROTO(opt_hpa_sec_nshards) CTL_PROTO(opt_hpa_sec_max_alloc) @@ -274,6 +276,8 @@ CTL_PROTO(stats_arenas_i_hpa_shard_npurges) CTL_PROTO(stats_arenas_i_hpa_shard_nhugifies) CTL_PROTO(stats_arenas_i_hpa_shard_nhugify_failures) CTL_PROTO(stats_arenas_i_hpa_shard_ndehugifies) +CTL_PROTO(stats_arenas_i_hpa_shard_ndonated_ps) +CTL_PROTO(stats_arenas_i_hpa_shard_nborrowed_ps) /* Set of stats for non-hugified and hugified slabs. */ CTL_PROTO(stats_arenas_i_hpa_shard_slabs_npageslabs_nonhuge) @@ -345,6 +349,8 @@ CTL_PROTO(stats_active) CTL_PROTO(stats_background_thread_num_threads) CTL_PROTO(stats_background_thread_num_runs) CTL_PROTO(stats_background_thread_run_interval) +CTL_PROTO(stats_central_pool_ndirty) +CTL_PROTO(stats_central_pool_npurged) CTL_PROTO(stats_metadata) CTL_PROTO(stats_metadata_edata) CTL_PROTO(stats_metadata_rtree) @@ -482,6 +488,8 @@ static const ctl_named_node_t opt_node[] = {{NAME("abort"), CTL(opt_abort)}, {NAME("hpa_purge_threshold"), CTL(opt_hpa_purge_threshold)}, {NAME("hpa_min_purge_delay_ms"), CTL(opt_hpa_min_purge_delay_ms)}, {NAME("hpa_hugify_style"), CTL(opt_hpa_hugify_style)}, + {NAME("hpa_use_pool"), CTL(opt_hpa_use_pool)}, + {NAME("hpa_pool_purge_delay_ms"), CTL(opt_hpa_pool_purge_delay_ms)}, {NAME("hpa_dirty_mult"), CTL(opt_hpa_dirty_mult)}, {NAME("hpa_sec_nshards"), CTL(opt_hpa_sec_nshards)}, {NAME("hpa_sec_max_alloc"), CTL(opt_hpa_sec_max_alloc)}, @@ -792,6 +800,8 @@ static const ctl_named_node_t stats_arenas_i_hpa_shard_node[] = { {NAME("nhugifies"), CTL(stats_arenas_i_hpa_shard_nhugifies)}, {NAME("nhugify_failures"), CTL(stats_arenas_i_hpa_shard_nhugify_failures)}, {NAME("ndehugifies"), CTL(stats_arenas_i_hpa_shard_ndehugifies)}, + {NAME("ndonated_ps"), CTL(stats_arenas_i_hpa_shard_ndonated_ps)}, + {NAME("nborrowed_ps"), CTL(stats_arenas_i_hpa_shard_nborrowed_ps)}, {NAME("full_slabs"), CHILD(named, stats_arenas_i_hpa_shard_full_slabs)}, {NAME("empty_slabs"), CHILD(named, stats_arenas_i_hpa_shard_empty_slabs)}, @@ -843,6 +853,10 @@ static const ctl_named_node_t stats_background_thread_node[] = { {NAME("num_runs"), CTL(stats_background_thread_num_runs)}, {NAME("run_interval"), CTL(stats_background_thread_run_interval)}}; +static const ctl_named_node_t stats_central_pool_node[] = { + {NAME("ndirty"), CTL(stats_central_pool_ndirty)}, + {NAME("npurged"), CTL(stats_central_pool_npurged)}}; + #define OP(mtx) MUTEX_PROF_DATA_NODE(mutexes_##mtx) MUTEX_PROF_GLOBAL_MUTEXES #undef OP @@ -872,6 +886,7 @@ static const ctl_named_node_t stats_node[] = { {NAME("mutexes"), CHILD(named, stats_mutexes)}, {NAME("arenas"), CHILD(indexed, stats_arenas)}, {NAME("zero_reallocs"), CTL(stats_zero_reallocs)}, + {NAME("central_pool"), CHILD(named, stats_central_pool)}, }; static const ctl_named_node_t experimental_hooks_node[] = { @@ -1055,6 +1070,8 @@ ctl_arena_clear(ctl_arena_t *ctl_arena) { } } +static bool ctl_ever_used_central_pool(const hpa_shard_stats_t *hpastats); + static void ctl_arena_stats_amerge(tsdn_t *tsdn, ctl_arena_t *ctl_arena, arena_t *arena) { unsigned i; @@ -1068,6 +1085,14 @@ ctl_arena_stats_amerge(tsdn_t *tsdn, ctl_arena_t *ctl_arena, arena_t *arena) { ctl_arena->astats->lstats, ctl_arena->astats->estats, &ctl_arena->astats->hpastats, &ctl_arena->astats->secstats); + /* Read central pool mutex stats for arena 0 only */ + if (ctl_arena->arena_ind == 0 && + ctl_ever_used_central_pool(&ctl_arena->astats->hpastats)) { + arena_stats_global_central_mutex_read(tsdn, + &ctl_arena->astats->astats + .mutex_prof_data[arena_prof_mutex_hpa_central_pool]); + } + for (i = 0; i < SC_NBINS; i++) { bin_stats_t *bstats = &ctl_arena->astats->bstats[i].stats_data; @@ -1303,6 +1328,17 @@ ctl_arena_init(tsd_t *tsd, const arena_config_t *config) { return arena_ind; } +static bool +ctl_ever_used_central_pool(const hpa_shard_stats_t *hpastats) { + return hpastats->nonderived_stats.ndonated_ps > 0 + || hpastats->nonderived_stats.nborrowed_ps > 0; +} + +static void +ctl_hpa_central_stats_read(tsdn_t *tsdn, hpa_central_stats_t *central_stats) { + arena_stats_global_central_read(tsdn, central_stats); +} + static void ctl_background_thread_stats_read(tsdn_t *tsdn) { background_thread_stats_t *stats = &ctl_stats->background_thread; @@ -1353,6 +1389,13 @@ ctl_refresh(tsdn_t *tsdn) { } if (config_stats) { + if (ctl_ever_used_central_pool(&ctl_sarena->astats->hpastats)) { + ctl_hpa_central_stats_read( + tsdn, &ctl_stats->hpa_central); + } else { + ctl_stats->hpa_central.npurged_pool = 0; + ctl_stats->hpa_central.ndirty_pool = 0; + } ctl_stats->allocated = ctl_sarena->astats->allocated_small + ctl_sarena->astats->astats.allocated_large; ctl_stats->active = (ctl_sarena->pactive << LG_PAGE); @@ -1363,7 +1406,8 @@ ctl_refresh(tsdn_t *tsdn) { ctl_sarena->astats->astats.metadata_edata; ctl_stats->metadata_rtree = ctl_sarena->astats->astats.metadata_rtree; - ctl_stats->resident = ctl_sarena->astats->astats.resident; + ctl_stats->resident = ctl_sarena->astats->astats.resident + + ctl_stats->hpa_central.ndirty_pool; ctl_stats->metadata_thp = ctl_sarena->astats->astats.metadata_thp; ctl_stats->mapped = ctl_sarena->astats->astats.mapped; @@ -2164,6 +2208,9 @@ CTL_RO_NL_GEN( opt_hpa_min_purge_delay_ms, opt_hpa_opts.min_purge_delay_ms, uint64_t) CTL_RO_NL_GEN(opt_hpa_hugify_style, hpa_hugify_style_names[opt_hpa_opts.hugify_style], const char *) +CTL_RO_NL_GEN(opt_hpa_use_pool, opt_hpa_opts.use_pool, bool) +CTL_RO_NL_GEN( + opt_hpa_pool_purge_delay_ms, opt_hpa_pool_purge_delay_ms, uint64_t) /* * This will have to change before we publicly document this option; fxp_t and * its representation are internal implementation details. @@ -3800,6 +3847,10 @@ approximate_stats_active_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, label_return: return ret; } +CTL_RO_CGEN(config_stats, stats_central_pool_ndirty, + ctl_stats->hpa_central.ndirty_pool, size_t) +CTL_RO_CGEN(config_stats, stats_central_pool_npurged, + ctl_stats->hpa_central.npurged_pool, uint64_t) CTL_RO_GEN(stats_arenas_i_dss, arenas_i(mib[2])->dss, const char *) CTL_RO_GEN( @@ -4105,6 +4156,10 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_nhugify_failures, uint64_t); CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_ndehugifies, arenas_i(mib[2])->astats->hpastats.nonderived_stats.ndehugifies, uint64_t); +CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_ndonated_ps, + arenas_i(mib[2])->astats->hpastats.nonderived_stats.ndonated_ps, uint64_t); +CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_nborrowed_ps, + arenas_i(mib[2])->astats->hpastats.nonderived_stats.nborrowed_ps, uint64_t); /* Full, nonhuge */ CTL_RO_CGEN(config_stats, diff --git a/src/hpa.c b/src/hpa.c index cc330379c..ee1035824 100644 --- a/src/hpa.c +++ b/src/hpa.c @@ -111,6 +111,8 @@ hpa_shard_init(hpa_shard_t *shard, hpa_central_t *central, emap_t *emap, shard->stats.nhugifies = 0; shard->stats.nhugify_failures = 0; shard->stats.ndehugifies = 0; + shard->stats.ndonated_ps = 0; + shard->stats.nborrowed_ps = 0; /* * Fill these in last, so that if an hpa_shard gets used despite @@ -144,6 +146,8 @@ hpa_shard_nonderived_stats_accum( dst->nhugifies += src->nhugifies; dst->nhugify_failures += src->nhugify_failures; dst->ndehugifies += src->ndehugifies; + dst->ndonated_ps += src->ndonated_ps; + dst->nborrowed_ps += src->nborrowed_ps; } void @@ -281,6 +285,18 @@ hpa_assume_huge(tsdn_t *tsdn, hpa_shard_t *shard, hpdata_t *ps) { } } +static void +hpa_update_purgable_time(hpa_shard_t *shard, hpdata_t *ps) { + if (shard->opts.min_purge_delay_ms == 0) { + return; + } + nstime_t now; + uint64_t delayns = shard->opts.min_purge_delay_ms * 1000 * 1000; + shard->central->hooks.curtime(&now, /* first_reading */ true); + nstime_iadd(&now, delayns); + hpdata_time_purge_allowed_set(ps, &now); +} + static void hpa_update_purge_hugify_eligibility( tsdn_t *tsdn, hpa_shard_t *shard, hpdata_t *ps) { @@ -324,13 +340,8 @@ hpa_update_purge_hugify_eligibility( hpdata_allow_hugify(ps, now); } bool purgable = hpa_good_purge_candidate(shard, ps); - if (purgable && !hpdata_purge_allowed_get(ps) - && (shard->opts.min_purge_delay_ms > 0)) { - nstime_t now; - uint64_t delayns = shard->opts.min_purge_delay_ms * 1000 * 1000; - shard->central->hooks.curtime(&now, /* first_reading */ true); - nstime_iadd(&now, delayns); - hpdata_time_purge_allowed_set(ps, &now); + if (purgable && !hpdata_purge_allowed_get(ps)) { + hpa_update_purgable_time(shard, ps); } hpdata_purge_allowed_set(ps, purgable); @@ -445,6 +456,42 @@ hpa_purge_finish_hp( psset_update_end(&shard->psset, hp_item->hp); } +static void +hpa_donate_empty_ps(tsdn_t *tsdn, hpa_shard_t *shard) { + malloc_mutex_assert_owner(tsdn, &shard->mtx); + if (!shard->opts.use_pool) { + return; + } + + hpdata_empty_list_t to_donate; + hpdata_empty_list_init(&to_donate); + do { + hpdata_t *to_purge = (shard->opts.min_purge_delay_ms > 0) + ? psset_pick_purge( + &shard->psset, &shard->last_time_work_attempted) + : psset_pick_purge(&shard->psset, NULL); + + if (to_purge == NULL || !hpdata_empty(to_purge)) { + break; + } + assert(hpdata_ndirty_get(to_purge) > 0); + + /* Donate the page to the pool */ + psset_remove(&shard->psset, to_purge); + hpdata_empty_list_append(&to_donate, to_purge); + shard->stats.ndonated_ps++; + } while (true); + + if (!hpdata_empty_list_empty(&to_donate)) { + nstime_t now; + nstime_copy(&now, &shard->last_time_work_attempted); + malloc_mutex_unlock(tsdn, &shard->mtx); + hpa_central_ps_insert(tsdn, shard->central, &to_donate, + &shard->last_time_work_attempted); + malloc_mutex_lock(tsdn, &shard->mtx); + } +} + /* Returns number of huge pages purged. */ static inline size_t hpa_purge(tsdn_t *tsdn, hpa_shard_t *shard, size_t max_hp) { @@ -464,6 +511,8 @@ hpa_purge(tsdn_t *tsdn, hpa_shard_t *shard, size_t max_hp) { }; assert(batch.range_watermark > 0); + hpa_donate_empty_ps(tsdn, shard); + while (1) { hpa_batch_pass_start(&batch); assert(hpa_batch_empty(&batch)); @@ -631,6 +680,17 @@ hpa_shard_maybe_do_deferred_work( max_purges = max_purge_nhp; } + if (shard->opts.use_pool) { + size_t max_pool_ops = (forced ? (size_t)-1 : 8); + hpa_central_t *central = shard->central; + nstime_t now; + nstime_copy(&now, &shard->last_time_work_attempted); + /* we do not need to hold shard lock when purging the central */ + malloc_mutex_unlock(tsdn, &shard->mtx); + hpa_central_purge(tsdn, central, &now, max_pool_ops); + malloc_mutex_lock(tsdn, &shard->mtx); + } + malloc_mutex_assert_owner(tsdn, &shard->mtx); nops += hpa_purge(tsdn, shard, max_purges); malloc_mutex_assert_owner(tsdn, &shard->mtx); @@ -646,6 +706,19 @@ hpa_shard_maybe_do_deferred_work( } } +static void +hpa_add_pool_page_to_psset(tsdn_t *tsdn, hpa_shard_t *shard, hpdata_t *ps) { + assert(hpdata_alloc_allowed_get(ps) && hpdata_empty(ps) + && hpdata_consistent(ps)); + if (hpdata_purge_allowed_get(ps)) { + hpa_update_purgable_time(shard, ps); + if (hpdata_huge_get(ps)) { + shard->stats.nborrowed_ps++; + } + } + psset_insert(&shard->psset, ps); +} + static edata_t * hpa_try_alloc_one_no_grow( tsdn_t *tsdn, hpa_shard_t *shard, size_t size, bool *oom) { @@ -659,6 +732,12 @@ hpa_try_alloc_one_no_grow( } hpdata_t *ps = psset_pick_alloc(&shard->psset, size); + if (ps == NULL && shard->opts.use_pool) { + ps = hpa_central_ps_pop(tsdn, shard->central); + if (ps != NULL) { + hpa_add_pool_page_to_psset(tsdn, shard, ps); + } + } if (ps == NULL) { edata_cache_fast_put(tsdn, &shard->ecf, edata); return NULL; diff --git a/src/hpa_central.c b/src/hpa_central.c index b4f770c2c..281e265ea 100644 --- a/src/hpa_central.c +++ b/src/hpa_central.c @@ -2,10 +2,138 @@ #include "jemalloc/internal/jemalloc_internal_includes.h" #include "jemalloc/internal/hpa_central.h" + +#include "jemalloc/internal/assert.h" +#include "jemalloc/internal/hpa_utils.h" #include "jemalloc/internal/tsd.h" #include "jemalloc/internal/witness.h" #define HPA_EDEN_SIZE (128 * HUGEPAGE) +#define MILLION UINT64_C(1000000) + +uint64_t opt_hpa_pool_purge_delay_ms = 10000; /* 10s */ + +void +hpa_central_pool_init(hpa_pool_t *pool) { + hpdata_empty_list_init(&pool->nonpurged); + hpdata_empty_list_init(&pool->purged); +} + +void +hpa_central_stats_read( + tsdn_t *tsdn, hpa_central_t *central, hpa_central_stats_t *stats) { + malloc_mutex_lock(tsdn, ¢ral->pool_mtx); + stats->ndirty_pool = central->stats.ndirty_pool; + stats->npurged_pool = central->stats.npurged_pool; + malloc_mutex_unlock(tsdn, ¢ral->pool_mtx); +} + +static inline void +hpa_central_pool_concat_nonpurged(tsdn_t *tsdn, hpa_central_t *central, + hpdata_empty_list_t *pages, size_t new_dirty) { + malloc_mutex_lock(tsdn, ¢ral->pool_mtx); + hpdata_empty_list_concat(¢ral->pool.nonpurged, pages); + central->stats.ndirty_pool += new_dirty; + malloc_mutex_unlock(tsdn, ¢ral->pool_mtx); +} + +static void +hpa_central_get_nonpurged(tsdn_t *tsdn, hpa_central_t *central, + const nstime_t *now, hpa_purge_batch_t *batch) { + malloc_mutex_lock(tsdn, ¢ral->pool_mtx); + while (!hpa_batch_full(batch) + && !hpdata_empty_list_empty(¢ral->pool.nonpurged)) { + hpdata_t *ps = hpdata_empty_list_first( + ¢ral->pool.nonpurged); + assert(hpdata_empty(ps) && hpdata_purge_allowed_get(ps)); + + const nstime_t *allowed = hpdata_time_purge_allowed_get(ps); + if (nstime_compare(now, allowed) < 0) { + break; + } + hpdata_empty_list_remove(¢ral->pool.nonpurged, ps); + assert(batch->item_cnt < batch->items_capacity); + hpa_purge_item_t *hp_item = &batch->items[batch->item_cnt]; + batch->item_cnt++; + hp_item->hp = ps; + hp_item->dehugify = hpdata_huge_get(hp_item->hp); + size_t nranges; + hpdata_alloc_allowed_set(hp_item->hp, false); + size_t ndirty = hpdata_purge_begin( + hp_item->hp, &hp_item->state, &nranges); + assert(ndirty > 0 && nranges > 0); + batch->ndirty_in_batch += ndirty; + batch->nranges += nranges; + batch->npurged_hp_total++; + } + malloc_mutex_unlock(tsdn, ¢ral->pool_mtx); +} + +static void +hpa_central_put_purged( + tsdn_t *tsdn, hpa_central_t *central, const hpa_purge_batch_t *batch) { + assert(batch->item_cnt > 0); + hpdata_empty_list_t newly_purged; + hpdata_empty_list_init(&newly_purged); + + for (size_t i = 0; i < batch->item_cnt; ++i) { + hpa_purge_item_t *hp_item = &batch->items[i]; + if (hp_item->dehugify) { + hpdata_dehugify(hp_item->hp); + } + hpdata_purge_end(hp_item->hp, &hp_item->state); + hpdata_alloc_allowed_set(hp_item->hp, true); + hpdata_purge_allowed_set(hp_item->hp, false); + hpdata_empty_list_append(&newly_purged, hp_item->hp); + } + + malloc_mutex_lock(tsdn, ¢ral->pool_mtx); + hpdata_empty_list_concat(¢ral->pool.purged, &newly_purged); + central->stats.npurged_pool += batch->npurged_hp_total; + assert(central->stats.ndirty_pool >= batch->ndirty_in_batch); + central->stats.ndirty_pool -= batch->ndirty_in_batch; + malloc_mutex_unlock(tsdn, ¢ral->pool_mtx); +} + +void +hpa_central_ps_insert(tsdn_t *tsdn, hpa_central_t *central, + hpdata_empty_list_t *pages, const nstime_t *now) { + assert(!hpdata_empty_list_empty(pages)); + + assert(now != NULL); + nstime_t purge_time; + nstime_copy(&purge_time, now); + uint64_t purge_delay_ns = opt_hpa_pool_purge_delay_ms * MILLION; + nstime_iadd(&purge_time, purge_delay_ns); + + hpdata_t *ps; + size_t new_dirty = 0; + ql_foreach (ps, &pages->head, ql_link_empty) { + assert(hpdata_empty(ps)); + assert(hpdata_ndirty_get(ps) > 0); + hpdata_time_purge_allowed_set(ps, &purge_time); + new_dirty += hpdata_ndirty_get(ps); + } + hpa_central_pool_concat_nonpurged(tsdn, central, pages, new_dirty); +} + +hpdata_t * +hpa_central_ps_pop(tsdn_t *tsdn, hpa_central_t *central) { + hpdata_t *ps = NULL; + + malloc_mutex_lock(tsdn, ¢ral->pool_mtx); + if (!hpdata_empty_list_empty(¢ral->pool.nonpurged)) { + ps = hpdata_empty_list_first(¢ral->pool.nonpurged); + hpdata_empty_list_remove(¢ral->pool.nonpurged, ps); + } + if (ps == NULL && !hpdata_empty_list_empty(¢ral->pool.purged)) { + ps = hpdata_empty_list_first(¢ral->pool.purged); + hpdata_empty_list_remove(¢ral->pool.purged, ps); + } + malloc_mutex_unlock(tsdn, ¢ral->pool_mtx); + + return ps; +} bool hpa_central_init( @@ -19,10 +147,19 @@ hpa_central_init( return true; } + err = malloc_mutex_init(¢ral->pool_mtx, "hpa_central_pool", + WITNESS_RANK_HPA_CENTRAL_POOL, malloc_mutex_rank_exclusive); + if (err) { + return true; + } + hpa_central_pool_init(¢ral->pool); + central->base = base; central->eden = NULL; central->eden_len = 0; central->hooks = *hooks; + central->stats.npurged_pool = 0; + central->stats.ndirty_pool = 0; return false; } @@ -119,3 +256,49 @@ hpa_central_extract(tsdn_t *tsdn, hpa_central_t *central, size_t size, return ps; } + +size_t +hpa_central_purge( + tsdn_t *tsdn, hpa_central_t *central, const nstime_t *now, size_t max_ps) { + VARIABLE_ARRAY(hpa_purge_item_t, items, HPA_PURGE_BATCH_MAX); + hpa_purge_batch_t batch = { + .max_hp = max_ps, + .npurged_hp_total = 0, + .items = &items[0], + .items_capacity = HPA_PURGE_BATCH_MAX, + .range_watermark = hpa_process_madvise_max_iovec_len(), + }; + assert(batch.range_watermark > 0); + + do { + hpa_batch_pass_start(&batch); + assert(hpa_batch_empty(&batch)); + hpa_central_get_nonpurged(tsdn, central, now, &batch); + if (hpa_batch_empty(&batch)) { + break; + } + /* We don't need any lock while purging pages from the pool. */ + hpa_purge_batch(¢ral->hooks, batch.items, batch.item_cnt); + hpa_central_put_purged(tsdn, central, &batch); + } while (hpa_batch_full(&batch)); + return batch.npurged_hp_total; +} + +/* + *No need to do any of below for central->grow_mtx as shard->grow_mtx must be + * held to lock that one. + */ +void +hpa_central_prefork(tsdn_t *tsdn, hpa_central_t *central) { + malloc_mutex_prefork(tsdn, ¢ral->pool_mtx); +} + +void +hpa_central_postfork_parent(tsdn_t *tsdn, hpa_central_t *central) { + malloc_mutex_postfork_parent(tsdn, ¢ral->pool_mtx); +} + +void +hpa_central_postfork_child(tsdn_t *tsdn, hpa_central_t *central) { + malloc_mutex_postfork_child(tsdn, ¢ral->pool_mtx); +} diff --git a/src/jemalloc.c b/src/jemalloc.c index 6844da5ae..5257b8a54 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -1668,6 +1668,10 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS], } CONF_CONTINUE; } + CONF_HANDLE_BOOL(opt_hpa_opts.use_pool, "hpa_use_pool"); + CONF_HANDLE_UINT64_T(opt_hpa_pool_purge_delay_ms, + "hpa_pool_purge_delay_ms", 0, UINT64_MAX, + CONF_DONT_CHECK_MIN, CONF_DONT_CHECK_MAX, false); if (CONF_MATCH("hpa_dirty_mult")) { if (CONF_MATCH_VALUE("-1")) { @@ -4511,6 +4515,7 @@ _malloc_prefork(void) } } } + arena_global_prefork(tsd_tsdn(tsd), opt_hpa); prof_prefork1(tsd_tsdn(tsd)); stats_prefork(tsd_tsdn(tsd)); tsd_prefork(tsd); @@ -4548,6 +4553,7 @@ _malloc_postfork(void) arena_postfork_parent(tsd_tsdn(tsd), arena); } } + arena_global_postfork_parent(tsd_tsdn(tsd), opt_hpa); prof_postfork_parent(tsd_tsdn(tsd)); if (have_background_thread) { background_thread_postfork_parent(tsd_tsdn(tsd)); @@ -4578,6 +4584,7 @@ jemalloc_postfork_child(void) { arena_postfork_child(tsd_tsdn(tsd), arena); } } + arena_global_postfork_child(tsd_tsdn(tsd), opt_hpa); prof_postfork_child(tsd_tsdn(tsd)); if (have_background_thread) { background_thread_postfork_child(tsd_tsdn(tsd)); diff --git a/src/stats.c b/src/stats.c index 2ccac6c96..c0aad7ec8 100644 --- a/src/stats.c +++ b/src/stats.c @@ -817,6 +817,9 @@ stats_arena_hpa_shard_counters_print( uint64_t nhugifies; uint64_t nhugify_failures; uint64_t ndehugifies; + uint64_t ndonated_ps; + uint64_t nborrowed_ps; + ; CTL_M2_GET( "stats.arenas.0.hpa_shard.npageslabs", i, &npageslabs, size_t); @@ -848,6 +851,10 @@ stats_arena_hpa_shard_counters_print( &nhugify_failures, uint64_t); CTL_M2_GET( "stats.arenas.0.hpa_shard.ndehugifies", i, &ndehugifies, uint64_t); + CTL_M2_GET( + "stats.arenas.0.hpa_shard.ndonated_ps", i, &ndonated_ps, uint64_t); + CTL_M2_GET("stats.arenas.0.hpa_shard.nborrowed_ps", i, &nborrowed_ps, + uint64_t); emitter_table_printf(emitter, "HPA shard stats:\n" @@ -865,6 +872,10 @@ stats_arena_hpa_shard_counters_print( " / sec)\n" " Dehugifies: %" FMTu64 " (%" FMTu64 " / sec)\n" + " Donated ps to pool: %" FMTu64 " (%" FMTu64 + " / sec)\n" + " Borrowed ps from the pool: %" FMTu64 " (%" FMTu64 + " / sec)\n" "\n", npageslabs, npageslabs_huge, npageslabs_nonhuge, nactive, nactive_huge, nactive_nonhuge, ndirty, ndirty_huge, ndirty_nonhuge, @@ -873,7 +884,9 @@ stats_arena_hpa_shard_counters_print( rate_per_second(npurges, uptime), nhugifies, rate_per_second(nhugifies, uptime), nhugify_failures, rate_per_second(nhugify_failures, uptime), ndehugifies, - rate_per_second(ndehugifies, uptime)); + rate_per_second(ndehugifies, uptime), ndonated_ps, + rate_per_second(ndonated_ps, uptime), nborrowed_ps, + rate_per_second(nborrowed_ps, uptime)); emitter_json_kv(emitter, "npageslabs", emitter_type_size, &npageslabs); emitter_json_kv(emitter, "nactive", emitter_type_size, &nactive); @@ -887,6 +900,10 @@ stats_arena_hpa_shard_counters_print( &nhugify_failures); emitter_json_kv( emitter, "ndehugifies", emitter_type_uint64, &ndehugifies); + emitter_json_kv( + emitter, "ndonated_ps", emitter_type_uint64, &ndonated_ps); + emitter_json_kv( + emitter, "nborrowed_ps", emitter_type_uint64, &nborrowed_ps); emitter_json_object_kv_begin(emitter, "slabs"); emitter_json_kv(emitter, "npageslabs_nonhuge", emitter_type_size, @@ -1115,7 +1132,11 @@ stats_arena_mutexes_print( CTL_LEAF_PREPARE(stats_arenas_mib, 3, "mutexes"); for (mutex_prof_arena_ind_t i = 0; i < mutex_prof_num_arena_mutexes; - i++) { + i++) { + /* hpa_central_pool is global, only print for arena 0 */ + if (i == arena_prof_mutex_hpa_central_pool && arena_ind != 0) { + continue; + } const char *name = arena_mutex_names[i]; emitter_json_object_kv_begin(emitter, name); mutex_stats_read_arena( @@ -1639,6 +1660,8 @@ stats_general_print(emitter_t *emitter) { OPT_WRITE_SIZE_T("hpa_purge_threshold") OPT_WRITE_UINT64("hpa_min_purge_delay_ms") OPT_WRITE_CHAR_P("hpa_hugify_style") + OPT_WRITE_BOOL("hpa_use_pool") + OPT_WRITE_UINT64("hpa_pool_purge_delay_ms") OPT_WRITE_SIZE_T("hpa_sec_nshards") OPT_WRITE_SIZE_T("hpa_sec_max_alloc") OPT_WRITE_SIZE_T("hpa_sec_max_bytes") @@ -1845,7 +1868,9 @@ stats_print_helper(emitter_t *emitter, bool merged, bool destroyed, metadata_thp, resident, mapped, retained; size_t num_background_threads; size_t zero_reallocs; + size_t ndirty_pool; uint64_t background_thread_num_runs, background_thread_run_interval; + uint64_t npurged_pool; CTL_GET("stats.allocated", &allocated, size_t); CTL_GET("stats.active", &active, size_t); @@ -1858,6 +1883,8 @@ stats_print_helper(emitter_t *emitter, bool merged, bool destroyed, CTL_GET("stats.retained", &retained, size_t); CTL_GET("stats.zero_reallocs", &zero_reallocs, size_t); + CTL_GET("stats.central_pool.ndirty", &ndirty_pool, size_t); + CTL_GET("stats.central_pool.npurged", &npurged_pool, uint64_t); if (have_background_thread) { CTL_GET("stats.background_thread.num_threads", @@ -1900,6 +1927,11 @@ stats_print_helper(emitter_t *emitter, bool merged, bool destroyed, emitter_table_printf(emitter, "Count of realloc(non-null-ptr, 0) calls: %zu\n", zero_reallocs); + /* Central pool */ + emitter_table_printf(emitter, + "Central pool dirty: %zu, purged: %" FMTu64 "\n", ndirty_pool, + npurged_pool); + /* Background thread stats. */ emitter_json_object_kv_begin(emitter, "background_thread"); emitter_json_kv( diff --git a/test/unit/hpa.c b/test/unit/hpa.c index 5937601ef..b34ebf0c2 100644 --- a/test/unit/hpa.c +++ b/test/unit/hpa.c @@ -43,7 +43,9 @@ static hpa_shard_opts_t test_hpa_shard_opts_default = { /* min_purge_delay_ms */ 0, /* hugify_style */ - hpa_hugify_style_lazy}; + hpa_hugify_style_lazy, + /* use_pool */ + false}; static hpa_shard_opts_t test_hpa_shard_opts_purge = { /* slab_max_alloc */ @@ -67,7 +69,9 @@ static hpa_shard_opts_t test_hpa_shard_opts_purge = { /* min_purge_delay_ms */ 0, /* hugify_style */ - hpa_hugify_style_lazy}; + hpa_hugify_style_lazy, + /* use_pool */ + false}; static hpa_shard_opts_t test_hpa_shard_opts_aggressive = { /* slab_max_alloc */ @@ -91,7 +95,9 @@ static hpa_shard_opts_t test_hpa_shard_opts_aggressive = { /* min_purge_delay_ms */ 10, /* hugify_style */ - hpa_hugify_style_eager}; + hpa_hugify_style_eager, + /* use_pool */ + false}; static hpa_shard_t * create_test_data(const hpa_hooks_t *hooks, hpa_shard_opts_t *opts) { diff --git a/test/unit/hpa_central_pool.c b/test/unit/hpa_central_pool.c new file mode 100644 index 000000000..79fd22c22 --- /dev/null +++ b/test/unit/hpa_central_pool.c @@ -0,0 +1,329 @@ +#include "test/jemalloc_test.h" + +#include "jemalloc/internal/hpa.h" +#include "jemalloc/internal/nstime.h" + +#define SHARD_IND 111 +#define SHARD_IND2 112 + +#define ALLOC_MAX (HUGEPAGE) + +typedef struct test_data_s test_data_t; +struct test_data_s { + /* + * Must be the first member -- we convert back and forth between the + * test_data_t and the hpa_shard_t; + */ + hpa_shard_t shard; + hpa_central_t central; + base_t *base; + edata_cache_t shard_edata_cache; + + emap_t emap; +}; + +static hpa_shard_opts_t test_hpa_shard_opts_default = { + /* slab_max_alloc */ + ALLOC_MAX, + /* hugification_threshold */ + HUGEPAGE, + /* dirty_mult */ + FXP_INIT_PERCENT(25), + /* deferral_allowed */ + false, + /* hugify_delay_ms */ + 10 * 1000, + /* hugify_sync */ + false, + /* min_purge_interval_ms */ + 0, + /* experimental_max_purge_nhp */ + -1, + /* purge_threshold */ + HUGEPAGE, + /* min_purge_delay_ms */ + 0, + /* hugify_style */ + hpa_hugify_style_eager, + /* use_pool */ + true}; + +static hpa_shard_t * +create_test_data( + hpa_central_t *central, hpa_shard_opts_t *opts, unsigned int shard_ind) { + bool err; + base_t *base = base_new(TSDN_NULL, /* ind */ shard_ind, + &ehooks_default_extent_hooks, /* metadata_use_hooks */ true); + assert_ptr_not_null(base, ""); + + test_data_t *test_data = malloc(sizeof(test_data_t)); + assert_ptr_not_null(test_data, ""); + + test_data->base = base; + + err = edata_cache_init(&test_data->shard_edata_cache, base); + assert_false(err, ""); + + err = emap_init(&test_data->emap, test_data->base, /* zeroed */ false); + assert_false(err, ""); + + err = hpa_shard_init(&test_data->shard, central, &test_data->emap, + test_data->base, &test_data->shard_edata_cache, shard_ind, opts); + assert_false(err, ""); + + return (hpa_shard_t *)test_data; +} + +static void +destroy_test_data(hpa_shard_t *shard) { + test_data_t *test_data = (test_data_t *)shard; + base_delete(TSDN_NULL, test_data->base); + free(test_data); +} + +static uintptr_t defer_bump_ptr = HUGEPAGE * 123; +static void * +defer_test_map(size_t size) { + void *result = (void *)defer_bump_ptr; + defer_bump_ptr += size; + return result; +} + +static void +defer_test_unmap(void *ptr, size_t size) { + (void)ptr; + (void)size; +} + +static size_t ndefer_purge_calls = 0; +static size_t npurge_size = 0; +static void +defer_test_purge(void *ptr, size_t size) { + (void)ptr; + npurge_size = size; + ++ndefer_purge_calls; +} + +static bool defer_vectorized_purge_called = false; +static bool +defer_vectorized_purge(void *vec, size_t vlen, size_t nbytes) { + (void)vec; + (void)nbytes; + ++ndefer_purge_calls; + defer_vectorized_purge_called = true; + return false; +} + +static size_t ndefer_hugify_calls = 0; +static bool +defer_test_hugify(void *ptr, size_t size, bool sync) { + ++ndefer_hugify_calls; + return false; +} + +static size_t ndefer_dehugify_calls = 0; +static void +defer_test_dehugify(void *ptr, size_t size) { + ++ndefer_dehugify_calls; +} + +static nstime_t defer_curtime; +static void +defer_test_curtime(nstime_t *r_time, bool first_reading) { + *r_time = defer_curtime; +} + +static uint64_t +defer_test_ms_since(nstime_t *past_time) { + return (nstime_ns(&defer_curtime) - nstime_ns(past_time)) / 1000 / 1000; +} + +TEST_BEGIN(test_central_pool) { + test_skip_if(!hpa_supported() || !config_stats); + + hpa_hooks_t hooks; + hooks.map = &defer_test_map; + hooks.unmap = &defer_test_unmap; + hooks.purge = &defer_test_purge; + hooks.hugify = &defer_test_hugify; + hooks.dehugify = &defer_test_dehugify; + hooks.curtime = &defer_test_curtime; + hooks.ms_since = &defer_test_ms_since; + hooks.vectorized_purge = &defer_vectorized_purge; + + hpa_shard_opts_t opts = test_hpa_shard_opts_default; + opts.deferral_allowed = true; + opts.purge_threshold = HUGEPAGE; + opts.min_purge_delay_ms = 0; + opts.min_purge_interval_ms = 0; + + hpa_central_t central; + base_t *central_base = base_new(TSDN_NULL, /* ind */ 1234, + &ehooks_default_extent_hooks, /* metadata_use_hooks */ true); + assert_ptr_not_null(central_base, ""); + hpa_central_init(¢ral, central_base, &hooks); + ndefer_purge_calls = 0; + hpa_shard_t *shard1 = create_test_data(¢ral, &opts, SHARD_IND); + hpa_shard_t *shard2 = create_test_data(¢ral, &opts, SHARD_IND2); + + bool deferred_work_generated = false; + nstime_init(&defer_curtime, 10 * 1000 * 1000); + tsdn_t *tsdn = tsd_tsdn(tsd_fetch()); + enum { NALLOCS = HUGEPAGE_PAGES }; + edata_t *edatas[NALLOCS]; + for (int i = 0; i < NALLOCS / 2; i++) { + edatas[i] = pai_alloc(tsdn, &shard1->pai, PAGE, PAGE, false, + false, false, &deferred_work_generated); + expect_ptr_not_null(edatas[i], "Unexpected null edata"); + } + /* Remember the page */ + hpdata_t *ps = psset_pick_alloc(&shard1->psset, PAGE); + expect_true(hpdata_huge_get(ps), "Should be huge as we start as huge"); + + /* Deallocate all */ + for (int i = 0; i < NALLOCS / 2; i++) { + pai_dalloc( + tsdn, &shard1->pai, edatas[i], &deferred_work_generated); + } + hpa_shard_do_deferred_work(tsdn, shard1); + expect_true(deferred_work_generated, ""); + expect_zu_eq( + 0, ndefer_purge_calls, "Should donate, not purge delay=0ms"); + + /* Stats should not include the page */ + expect_zu_eq(shard1->psset.stats.merged.nactive, 0, ""); + expect_zu_eq(shard1->psset.stats.merged.npageslabs, 0, "Non huge"); + npurge_size = 0; + + /* Make allocation on second shard */ + edata_t *edata2 = pai_alloc(tsdn, &shard2->pai, PAGE, PAGE, false, + false, false, &deferred_work_generated); + expect_ptr_not_null(edata2, "Unexpected null edata"); + expect_zu_eq(shard2->psset.stats.merged.nactive, 1, ""); + hpdata_t *ps2 = psset_pick_alloc(&shard2->psset, PAGE); + expect_ptr_eq( + ps, ps2, "Expected to get the same page via central pool"); + expect_true(hpdata_huge_get(ps2), "Should still be huge"); + + expect_zu_eq(shard2->psset.stats.merged.npageslabs, 1, ""); + pai_dalloc(tsdn, &shard2->pai, edata2, &deferred_work_generated); + expect_true(deferred_work_generated, ""); + ndefer_purge_calls = 0; + npurge_size = 0; + hpa_shard_do_deferred_work(tsdn, shard1); + expect_zu_eq(0, ndefer_purge_calls, "No purge, no donate, delay==0ms"); + hpa_shard_do_deferred_work(tsdn, shard2); + expect_zu_eq(0, ndefer_purge_calls, "No purge, yes donate, delay==0ms"); + + /* Move the time above hard coded limit of 10s */ + nstime_iadd(&defer_curtime, UINT64_C(30) * 1000 * 1000 * 1000); + hpa_shard_do_deferred_work(tsdn, shard2); + expect_zu_eq(1, ndefer_purge_calls, "Purged, delay==0ms"); + expect_zu_eq(HUGEPAGE, npurge_size, "Should purge full folio"); + expect_zu_eq(shard1->psset.stats.merged.npageslabs, 0, ""); + expect_zu_eq(shard2->psset.stats.merged.npageslabs, 0, ""); + /* now alloc again and still get the same page */ + edata2 = pai_alloc(tsdn, &shard2->pai, PAGE, PAGE, false, false, false, + &deferred_work_generated); + expect_ptr_not_null(edata2, "Unexpected null edata"); + expect_zu_eq(shard2->psset.stats.merged.nactive, 1, ""); + ps2 = psset_pick_alloc(&shard2->psset, PAGE); + expect_ptr_eq( + ps, ps2, "Expected to get the same page via central pool"); + expect_zu_eq(shard2->psset.stats.merged.npageslabs, 1, ""); + pai_dalloc(tsdn, &shard2->pai, edata2, &deferred_work_generated); + + npurge_size = 0; + ndefer_purge_calls = 0; + destroy_test_data(shard1); + destroy_test_data(shard2); + base_delete(TSDN_NULL, central_base); +} +TEST_END + +TEST_BEGIN(test_central_pool_with_delay) { + test_skip_if(!hpa_supported() || !config_stats); + + hpa_hooks_t hooks; + hooks.map = &defer_test_map; + hooks.unmap = &defer_test_unmap; + hooks.purge = &defer_test_purge; + hooks.hugify = &defer_test_hugify; + hooks.dehugify = &defer_test_dehugify; + hooks.curtime = &defer_test_curtime; + hooks.ms_since = &defer_test_ms_since; + hooks.vectorized_purge = &defer_vectorized_purge; + + hpa_shard_opts_t opts = test_hpa_shard_opts_default; + opts.deferral_allowed = true; + opts.purge_threshold = HUGEPAGE; + opts.min_purge_delay_ms = 1000; + opts.min_purge_interval_ms = 0; + + hpa_central_t central; + base_t *central_base = base_new(TSDN_NULL, /* ind */ 1234, + &ehooks_default_extent_hooks, /* metadata_use_hooks */ true); + assert_ptr_not_null(central_base, ""); + hpa_central_init(¢ral, central_base, &hooks); + ndefer_purge_calls = 0; + hpa_shard_t *shard1 = create_test_data(¢ral, &opts, SHARD_IND); + hpa_shard_t *shard2 = create_test_data(¢ral, &opts, SHARD_IND2); + + bool deferred_work_generated = false; + nstime_init(&defer_curtime, 10 * 1000 * 1000); + tsdn_t *tsdn = tsd_tsdn(tsd_fetch()); + enum { NALLOCS = HUGEPAGE_PAGES }; + edata_t *edatas[NALLOCS]; + for (int i = 0; i < NALLOCS / 2; i++) { + edatas[i] = pai_alloc(tsdn, &shard1->pai, PAGE, PAGE, false, + false, false, &deferred_work_generated); + expect_ptr_not_null(edatas[i], "Unexpected null edata"); + } + /* Remember the page */ + hpdata_t *ps = psset_pick_alloc(&shard1->psset, PAGE); + expect_true(hpdata_huge_get(ps), "Should be huge as we start as huge"); + + /* Deallocate all */ + for (int i = 0; i < NALLOCS / 2; i++) { + pai_dalloc( + tsdn, &shard1->pai, edatas[i], &deferred_work_generated); + } + hpa_shard_do_deferred_work(tsdn, shard1); + expect_true(deferred_work_generated, ""); + expect_zu_eq(0, ndefer_purge_calls, "No purge, no donation delay=0ms"); + + /* Stats should include the page */ + expect_zu_eq(shard1->psset.stats.merged.nactive, 0, ""); + expect_zu_eq(shard1->psset.stats.merged.npageslabs, 1, ""); + + /* One more second passed */ + nstime_iadd(&defer_curtime, UINT64_C(1000) * 1000 * 1000); + hpa_shard_do_deferred_work(tsdn, shard1); + expect_zu_eq(0, ndefer_purge_calls, "No purge, donation"); + /* Stats should not include the page */ + expect_zu_eq(shard1->psset.stats.merged.nactive, 0, ""); + expect_zu_eq(shard1->psset.stats.merged.npageslabs, 0, ""); + /* Make allocation on second shard */ + edata_t *edata2 = pai_alloc(tsdn, &shard2->pai, PAGE, PAGE, false, + false, false, &deferred_work_generated); + expect_ptr_not_null(edata2, "Unexpected null edata"); + expect_zu_eq(shard2->psset.stats.merged.nactive, 1, ""); + hpdata_t *ps2 = psset_pick_alloc(&shard2->psset, PAGE); + expect_ptr_eq( + ps, ps2, "Expected to get the same page via central pool"); + expect_true(hpdata_huge_get(ps2), "Should still be huge"); + expect_zu_eq(shard2->psset.stats.merged.npageslabs, 1, ""); + + npurge_size = 0; + ndefer_purge_calls = 0; + destroy_test_data(shard1); + destroy_test_data(shard2); + base_delete(TSDN_NULL, central_base); +} +TEST_END + +int +main(void) { + return test_no_reentrancy( + test_central_pool, test_central_pool_with_delay); +} diff --git a/test/unit/hpa_vectorized_madvise.c b/test/unit/hpa_vectorized_madvise.c index e82f0ffb2..cea0c1dff 100644 --- a/test/unit/hpa_vectorized_madvise.c +++ b/test/unit/hpa_vectorized_madvise.c @@ -43,7 +43,9 @@ static hpa_shard_opts_t test_hpa_shard_opts_default = { /* purge_delay_ms */ 0, /* hugify_style */ - hpa_hugify_style_lazy}; + hpa_hugify_style_lazy, + /* use_pool */ + false}; static hpa_shard_t * create_test_data(const hpa_hooks_t *hooks, hpa_shard_opts_t *opts) { diff --git a/test/unit/hpa_vectorized_madvise_large_batch.c b/test/unit/hpa_vectorized_madvise_large_batch.c index d542f72a8..8b8ea0baa 100644 --- a/test/unit/hpa_vectorized_madvise_large_batch.c +++ b/test/unit/hpa_vectorized_madvise_large_batch.c @@ -44,7 +44,9 @@ static hpa_shard_opts_t test_hpa_shard_opts_default = { /* min_purge_delay_ms */ 0, /* hugify_style */ - hpa_hugify_style_lazy}; + hpa_hugify_style_lazy, + /* use_pool */ + false}; static hpa_shard_t * create_test_data(const hpa_hooks_t *hooks, hpa_shard_opts_t *opts) { diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c index f409f687c..31ea0e049 100644 --- a/test/unit/mallctl.c +++ b/test/unit/mallctl.c @@ -319,6 +319,8 @@ TEST_BEGIN(test_mallctl_opt) { TEST_MALLCTL_OPT(size_t, hpa_purge_threshold, always); TEST_MALLCTL_OPT(uint64_t, hpa_min_purge_delay_ms, always); TEST_MALLCTL_OPT(const char *, hpa_hugify_style, always); + TEST_MALLCTL_OPT(bool, hpa_use_pool, always); + TEST_MALLCTL_OPT(uint64_t, hpa_pool_purge_delay_ms, always); TEST_MALLCTL_OPT(unsigned, narenas, always); TEST_MALLCTL_OPT(const char *, percpu_arena, always); TEST_MALLCTL_OPT(size_t, oversize_threshold, always); @@ -1077,6 +1079,8 @@ TEST_BEGIN(test_stats_arenas_hpa_shard_counters) { TEST_STATS_ARENAS_HPA_SHARD_COUNTERS(uint64_t, npurges); TEST_STATS_ARENAS_HPA_SHARD_COUNTERS(uint64_t, nhugifies); TEST_STATS_ARENAS_HPA_SHARD_COUNTERS(uint64_t, ndehugifies); + TEST_STATS_ARENAS_HPA_SHARD_COUNTERS(uint64_t, ndonated_ps); + TEST_STATS_ARENAS_HPA_SHARD_COUNTERS(uint64_t, nborrowed_ps); #undef TEST_STATS_ARENAS_HPA_SHARD_COUNTERS }