@@ -1307,6 +1307,8 @@ static std::deque<class cpu_thread*> g_to_sleep;
13071307static atomic_t <bool > g_scheduler_ready = false ;
13081308static atomic_t <u64 > s_yield_frequency = 0 ;
13091309static atomic_t <u64 > s_max_allowed_yield_tsc = 0 ;
1310+ static atomic_t <u64 > s_lv2_timers_sum_of_ten_delay_in_us = 5000 ;
1311+ static atomic_t <u64 > s_lv2_timers_min_timer_in_us = 0 ;
13101312static u64 s_last_yield_tsc = 0 ;
13111313atomic_t <u32 > g_lv2_preempts_taken = 0 ;
13121314
@@ -1432,7 +1434,7 @@ bool lv2_obj::awake(cpu_thread* thread, s32 prio)
14321434
14331435 if (!g_postpone_notify_barrier)
14341436 {
1435- notify_all ();
1437+ notify_all (thread );
14361438 }
14371439
14381440 return result;
@@ -1573,6 +1575,11 @@ bool lv2_obj::sleep_unlocked(cpu_thread& thread, u64 timeout, u64 current_time)
15731575 {
15741576 if (it == end || it->first > wait_until)
15751577 {
1578+ if (it == g_waiting.cbegin ())
1579+ {
1580+ s_lv2_timers_min_timer_in_us.release (wait_until);
1581+ }
1582+
15761583 g_waiting.emplace (it, wait_until, &thread);
15771584 break ;
15781585 }
@@ -1835,6 +1842,8 @@ void lv2_obj::cleanup()
18351842 g_waiting.clear ();
18361843 g_pending = 0 ;
18371844 s_yield_frequency = 0 ;
1845+ s_lv2_timers_sum_of_ten_delay_in_us = 5000 ;
1846+ s_lv2_timers_min_timer_in_us = 0 ;
18381847}
18391848
18401849void lv2_obj::schedule_all (u64 current_time)
@@ -1876,7 +1885,7 @@ void lv2_obj::schedule_all(u64 current_time)
18761885 }
18771886
18781887 // Check registered timeouts
1879- while (!g_waiting.empty ())
1888+ while (!g_waiting.empty () && it != std::end (g_to_notify) )
18801889 {
18811890 const auto pair = &g_waiting.front ();
18821891
@@ -1896,15 +1905,7 @@ void lv2_obj::schedule_all(u64 current_time)
18961905 ensure (!target->state .test_and_set (cpu_flag::notify));
18971906
18981907 // Otherwise notify it to wake itself
1899- if (it == std::end (g_to_notify))
1900- {
1901- // Out of notification slots, notify locally (resizable container is not worth it)
1902- target->state .notify_one ();
1903- }
1904- else
1905- {
1906- *it++ = &target->state ;
1907- }
1908+ *it++ = &target->state ;
19081909 }
19091910 }
19101911 else
@@ -2171,7 +2172,35 @@ bool lv2_obj::wait_timeout(u64 usec, ppu_thread* cpu, bool scale, bool is_usleep
21712172#endif
21722173 // TODO: Tune for other non windows operating sytems
21732174
2174- if (g_cfg.core .sleep_timers_accuracy < (is_usleep ? sleep_timers_accuracy_level::_usleep : sleep_timers_accuracy_level::_all_timers))
2175+ const sleep_timers_accuracy_level accuarcy_type = g_cfg.core .sleep_timers_accuracy ;
2176+ const u64 avg_delay = get_avg_timer_reponse_delay ();
2177+
2178+ static atomic_t <u64 > g_success = 0 ;
2179+ static atomic_t <u64 > g_fails = 0 ;
2180+
2181+ if (accuarcy_type == sleep_timers_accuracy_level::_dynamic && avg_delay < 30 && avg_delay < (remaining + 15 ) / 2 )
2182+ {
2183+ wait_for (remaining);
2184+
2185+ if (remaining < host_min_quantum)
2186+ {
2187+ g_success += remaining;
2188+ // g_success++;
2189+ }
2190+
2191+ passed = get_system_time () - start_time;
2192+ continue ;
2193+ }
2194+ else
2195+ {
2196+ if (remaining < host_min_quantum)
2197+ {
2198+ g_fails += remaining;
2199+ // g_fails++;
2200+ }
2201+ }
2202+
2203+ if (g_cfg.core .sleep_timers_accuracy < (is_usleep ? sleep_timers_accuracy_level::_dynamic : sleep_timers_accuracy_level::_all_timers))
21752204 {
21762205 wait_for (remaining);
21772206 }
@@ -2222,7 +2251,7 @@ void lv2_obj::prepare_for_sleep(cpu_thread& cpu)
22222251 cpu_counter::remove (&cpu);
22232252}
22242253
2225- void lv2_obj::notify_all () noexcept
2254+ void lv2_obj::notify_all (cpu_thread* woke_thread ) noexcept
22262255{
22272256 for (auto cpu : g_to_notify)
22282257 {
@@ -2258,13 +2287,11 @@ void lv2_obj::notify_all() noexcept
22582287 return ;
22592288 }
22602289
2261- if (cpu->get_class () != thread_class::spu && cpu->state .none_of (cpu_flag::suspend))
2290+ if (cpu->get_class () == thread_class::ppu && cpu->state .none_of (cpu_flag::suspend + cpu_flag::signal ))
22622291 {
22632292 return ;
22642293 }
22652294
2266- std::optional<vm::writer_lock> lock;
2267-
22682295 constexpr usz total_waiters = std::size (spu_thread::g_spu_waiters_by_value);
22692296
22702297 u32 notifies[total_waiters]{};
@@ -2346,4 +2373,82 @@ void lv2_obj::notify_all() noexcept
23462373 vm::reservation_notifier_notify (addr);
23472374 }
23482375 }
2376+
2377+ if (woke_thread == cpu)
2378+ {
2379+ return ;
2380+ }
2381+
2382+ const u64 min_timer = s_lv2_timers_min_timer_in_us;
2383+ const u64 current_time = get_guest_system_time ();
2384+
2385+ if (current_time < min_timer)
2386+ {
2387+ return ;
2388+ }
2389+
2390+ atomic_bs_t <cpu_flag>* notifies_cpus[16 ];
2391+ usz count_notifies_cpus = 0 ;
2392+
2393+ std::unique_lock lock (g_mutex, std::try_to_lock);
2394+
2395+ if (!lock)
2396+ {
2397+ // Not only is that this method is an opportunistic optimization
2398+ // But if it's already locked than it is likely that soon another thread would do this check instead
2399+ return ;
2400+ }
2401+
2402+ // Do it BEFORE clearing the queue in order to measure the delay properly even if the sleeping thread notified itself
2403+ // This 'redundancy' is what allows proper measurements
2404+ if (u64 min_time2 = s_lv2_timers_min_timer_in_us; current_time >= min_time2)
2405+ {
2406+ const u64 sum = s_lv2_timers_sum_of_ten_delay_in_us.observe ();
2407+ s_lv2_timers_sum_of_ten_delay_in_us.release (sum - sum / 10 + (current_time - min_time2) / 10 );
2408+ }
2409+
2410+ // Check registered timeouts
2411+ while (!g_waiting.empty () && count_notifies_cpus < std::size (notifies_cpus))
2412+ {
2413+ const auto pair = &g_waiting.front ();
2414+
2415+ if (pair->first <= current_time)
2416+ {
2417+ const auto target = pair->second ;
2418+ g_waiting.pop_front ();
2419+
2420+ if (target != cpu)
2421+ {
2422+ // Change cpu_thread::state for the lightweight notification to work
2423+ ensure (!target->state .test_and_set (cpu_flag::notify));
2424+ notifies_cpus[count_notifies_cpus++] = &target->state ;
2425+ }
2426+ }
2427+ else
2428+ {
2429+ // The list is sorted so assume no more timeouts
2430+ break ;
2431+ }
2432+ }
2433+
2434+ if (g_waiting.empty ())
2435+ {
2436+ s_lv2_timers_min_timer_in_us.release (u64 {umax});
2437+ }
2438+ else
2439+ {
2440+ s_lv2_timers_min_timer_in_us.release (g_waiting.front ().first );
2441+ }
2442+
2443+ lock.unlock ();
2444+
2445+ for (usz i = count_notifies_cpus - 1 ; i != umax; i--)
2446+ {
2447+ atomic_wait_engine::notify_one (notifies_cpus[i]);
2448+ }
2449+ }
2450+
2451+ u64 lv2_obj::get_avg_timer_reponse_delay ()
2452+ {
2453+ return s_lv2_timers_sum_of_ten_delay_in_us / 10 ;
23492454}
0 commit comments