Skip to content

Commit 725fce4

Browse files
committed
LV2: Introduce Dynamic Timer signals
1 parent b16d267 commit 725fce4

File tree

8 files changed

+131
-23
lines changed

8 files changed

+131
-23
lines changed

rpcs3/Emu/Cell/lv2/lv2.cpp

Lines changed: 121 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1307,6 +1307,8 @@ static std::deque<class cpu_thread*> g_to_sleep;
13071307
static atomic_t<bool> g_scheduler_ready = false;
13081308
static atomic_t<u64> s_yield_frequency = 0;
13091309
static atomic_t<u64> s_max_allowed_yield_tsc = 0;
1310+
static atomic_t<u64> s_lv2_timers_sum_of_ten_delay_in_us = 5000;
1311+
static atomic_t<u64> s_lv2_timers_min_timer_in_us = 0;
13101312
static u64 s_last_yield_tsc = 0;
13111313
atomic_t<u32> g_lv2_preempts_taken = 0;
13121314

@@ -1432,7 +1434,7 @@ bool lv2_obj::awake(cpu_thread* thread, s32 prio)
14321434

14331435
if (!g_postpone_notify_barrier)
14341436
{
1435-
notify_all();
1437+
notify_all(thread);
14361438
}
14371439

14381440
return result;
@@ -1573,6 +1575,11 @@ bool lv2_obj::sleep_unlocked(cpu_thread& thread, u64 timeout, u64 current_time)
15731575
{
15741576
if (it == end || it->first > wait_until)
15751577
{
1578+
if (it == g_waiting.cbegin())
1579+
{
1580+
s_lv2_timers_min_timer_in_us.release(wait_until);
1581+
}
1582+
15761583
g_waiting.emplace(it, wait_until, &thread);
15771584
break;
15781585
}
@@ -1835,6 +1842,8 @@ void lv2_obj::cleanup()
18351842
g_waiting.clear();
18361843
g_pending = 0;
18371844
s_yield_frequency = 0;
1845+
s_lv2_timers_sum_of_ten_delay_in_us = 5000;
1846+
s_lv2_timers_min_timer_in_us = 0;
18381847
}
18391848

18401849
void lv2_obj::schedule_all(u64 current_time)
@@ -1876,7 +1885,7 @@ void lv2_obj::schedule_all(u64 current_time)
18761885
}
18771886

18781887
// Check registered timeouts
1879-
while (!g_waiting.empty())
1888+
while (!g_waiting.empty() && it != std::end(g_to_notify))
18801889
{
18811890
const auto pair = &g_waiting.front();
18821891

@@ -1896,15 +1905,7 @@ void lv2_obj::schedule_all(u64 current_time)
18961905
ensure(!target->state.test_and_set(cpu_flag::notify));
18971906

18981907
// Otherwise notify it to wake itself
1899-
if (it == std::end(g_to_notify))
1900-
{
1901-
// Out of notification slots, notify locally (resizable container is not worth it)
1902-
target->state.notify_one();
1903-
}
1904-
else
1905-
{
1906-
*it++ = &target->state;
1907-
}
1908+
*it++ = &target->state;
19081909
}
19091910
}
19101911
else
@@ -2171,7 +2172,35 @@ bool lv2_obj::wait_timeout(u64 usec, ppu_thread* cpu, bool scale, bool is_usleep
21712172
#endif
21722173
// TODO: Tune for other non windows operating sytems
21732174

2174-
if (g_cfg.core.sleep_timers_accuracy < (is_usleep ? sleep_timers_accuracy_level::_usleep : sleep_timers_accuracy_level::_all_timers))
2175+
const sleep_timers_accuracy_level accuarcy_type = g_cfg.core.sleep_timers_accuracy;
2176+
const u64 avg_delay = get_avg_timer_reponse_delay();
2177+
2178+
static atomic_t<u64> g_success = 0;
2179+
static atomic_t<u64> g_fails = 0;
2180+
2181+
if (accuarcy_type == sleep_timers_accuracy_level::_dynamic && avg_delay < 30 && avg_delay < (remaining + 15) / 2)
2182+
{
2183+
wait_for(remaining);
2184+
2185+
if (remaining < host_min_quantum)
2186+
{
2187+
g_success += remaining;
2188+
//g_success++;
2189+
}
2190+
2191+
passed = get_system_time() - start_time;
2192+
continue;
2193+
}
2194+
else
2195+
{
2196+
if (remaining < host_min_quantum)
2197+
{
2198+
g_fails += remaining;
2199+
//g_fails++;
2200+
}
2201+
}
2202+
2203+
if (g_cfg.core.sleep_timers_accuracy < (is_usleep ? sleep_timers_accuracy_level::_dynamic : sleep_timers_accuracy_level::_all_timers))
21752204
{
21762205
wait_for(remaining);
21772206
}
@@ -2222,7 +2251,7 @@ void lv2_obj::prepare_for_sleep(cpu_thread& cpu)
22222251
cpu_counter::remove(&cpu);
22232252
}
22242253

2225-
void lv2_obj::notify_all() noexcept
2254+
void lv2_obj::notify_all(cpu_thread* woke_thread) noexcept
22262255
{
22272256
for (auto cpu : g_to_notify)
22282257
{
@@ -2258,13 +2287,11 @@ void lv2_obj::notify_all() noexcept
22582287
return;
22592288
}
22602289

2261-
if (cpu->get_class() != thread_class::spu && cpu->state.none_of(cpu_flag::suspend))
2290+
if (cpu->get_class() == thread_class::ppu && cpu->state.none_of(cpu_flag::suspend + cpu_flag::signal))
22622291
{
22632292
return;
22642293
}
22652294

2266-
std::optional<vm::writer_lock> lock;
2267-
22682295
constexpr usz total_waiters = std::size(spu_thread::g_spu_waiters_by_value);
22692296

22702297
u32 notifies[total_waiters]{};
@@ -2346,4 +2373,82 @@ void lv2_obj::notify_all() noexcept
23462373
vm::reservation_notifier_notify(addr);
23472374
}
23482375
}
2376+
2377+
if (woke_thread == cpu)
2378+
{
2379+
return;
2380+
}
2381+
2382+
const u64 min_timer = s_lv2_timers_min_timer_in_us;
2383+
const u64 current_time = get_guest_system_time();
2384+
2385+
if (current_time < min_timer)
2386+
{
2387+
return;
2388+
}
2389+
2390+
atomic_bs_t<cpu_flag>* notifies_cpus[16];
2391+
usz count_notifies_cpus = 0;
2392+
2393+
std::unique_lock lock(g_mutex, std::try_to_lock);
2394+
2395+
if (!lock)
2396+
{
2397+
// Not only is that this method is an opportunistic optimization
2398+
// But if it's already locked than it is likely that soon another thread would do this check instead
2399+
return;
2400+
}
2401+
2402+
// Do it BEFORE clearing the queue in order to measure the delay properly even if the sleeping thread notified itself
2403+
// This 'redundancy' is what allows proper measurements
2404+
if (u64 min_time2 = s_lv2_timers_min_timer_in_us; current_time >= min_time2)
2405+
{
2406+
const u64 sum = s_lv2_timers_sum_of_ten_delay_in_us.observe();
2407+
s_lv2_timers_sum_of_ten_delay_in_us.release(sum - sum / 10 + (current_time - min_time2) / 10);
2408+
}
2409+
2410+
// Check registered timeouts
2411+
while (!g_waiting.empty() && count_notifies_cpus < std::size(notifies_cpus))
2412+
{
2413+
const auto pair = &g_waiting.front();
2414+
2415+
if (pair->first <= current_time)
2416+
{
2417+
const auto target = pair->second;
2418+
g_waiting.pop_front();
2419+
2420+
if (target != cpu)
2421+
{
2422+
// Change cpu_thread::state for the lightweight notification to work
2423+
ensure(!target->state.test_and_set(cpu_flag::notify));
2424+
notifies_cpus[count_notifies_cpus++] = &target->state;
2425+
}
2426+
}
2427+
else
2428+
{
2429+
// The list is sorted so assume no more timeouts
2430+
break;
2431+
}
2432+
}
2433+
2434+
if (g_waiting.empty())
2435+
{
2436+
s_lv2_timers_min_timer_in_us.release(u64{umax});
2437+
}
2438+
else
2439+
{
2440+
s_lv2_timers_min_timer_in_us.release(g_waiting.front().first);
2441+
}
2442+
2443+
lock.unlock();
2444+
2445+
for (usz i = count_notifies_cpus - 1; i != umax; i--)
2446+
{
2447+
atomic_wait_engine::notify_one(notifies_cpus[i]);
2448+
}
2449+
}
2450+
2451+
u64 lv2_obj::get_avg_timer_reponse_delay()
2452+
{
2453+
return s_lv2_timers_sum_of_ten_delay_in_us / 10;
23492454
}

rpcs3/Emu/Cell/lv2/sys_sync.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -454,11 +454,13 @@ struct lv2_obj
454454

455455
static bool wait_timeout(u64 usec, ppu_thread* cpu = {}, bool scale = true, bool is_usleep = false);
456456

457-
static void notify_all() noexcept;
457+
static void notify_all(cpu_thread* woke_thread = nullptr) noexcept;
458458

459459
// Can be called before the actual sleep call in order to move it out of mutex scope
460460
static void prepare_for_sleep(cpu_thread& cpu);
461461

462+
static u64 get_avg_timer_reponse_delay();
463+
462464
struct notify_all_t
463465
{
464466
notify_all_t() noexcept

rpcs3/Emu/RSX/RSXThread.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -870,11 +870,13 @@ namespace rsx
870870
{
871871
// Wait 16ms during emulation pause. This reduces cpu load while still giving us the chance to render overlays.
872872
do_local_task(rsx::FIFO::state::paused);
873+
lv2_obj::notify_all();
873874
thread_ctrl::wait_on(state, old, 16000);
874875
}
875876
else
876877
{
877878
on_semaphore_acquire_wait();
879+
lv2_obj::notify_all();
878880
std::this_thread::yield();
879881
}
880882
}

rpcs3/Emu/system_config.h

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -91,11 +91,7 @@ struct cfg_root : cfg::node
9191
cfg::uint<0, (1 << 6) - 1> spu_wakeup_delay_mask{ this, "SPU Wake-Up Delay Thread Mask", (1 << 6) - 1, true };
9292
cfg::uint<0, 400> max_cpu_preempt_count_per_frame{ this, "Max CPU Preempt Count", 0, true };
9393
cfg::_bool allow_rsx_cpu_preempt{ this, "Allow RSX CPU Preemptions", true, true };
94-
#if defined (__linux__) || defined (__APPLE__)
95-
cfg::_enum<sleep_timers_accuracy_level> sleep_timers_accuracy{ this, "Sleep Timers Accuracy", sleep_timers_accuracy_level::_as_host, true };
96-
#else
97-
cfg::_enum<sleep_timers_accuracy_level> sleep_timers_accuracy{ this, "Sleep Timers Accuracy", sleep_timers_accuracy_level::_usleep, true };
98-
#endif
94+
cfg::_enum<sleep_timers_accuracy_level> sleep_timers_accuracy{ this, "Sleep Timers Accuracy 2", sleep_timers_accuracy_level::_dynamic, true };
9995
cfg::_int<-1000, 1500> usleep_addend{ this, "Usleep Time Addend", 0, true };
10096

10197
cfg::uint64 perf_report_threshold{this, "Performance Report Threshold", 500, true}; // In µs, 0.5ms = default, 0 = everything

rpcs3/Emu/system_config_types.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -237,6 +237,7 @@ void fmt_class_string<sleep_timers_accuracy_level>::format(std::string& out, u64
237237
switch (value)
238238
{
239239
case sleep_timers_accuracy_level::_as_host: return "As Host";
240+
case sleep_timers_accuracy_level::_dynamic: return "Dynamic";
240241
case sleep_timers_accuracy_level::_usleep: return "Usleep Only";
241242
case sleep_timers_accuracy_level::_all_timers: return "All Timers";
242243
}

rpcs3/Emu/system_config_types.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ enum class spu_block_size_type
2424
enum class sleep_timers_accuracy_level
2525
{
2626
_as_host,
27+
_dynamic,
2728
_usleep,
2829
_all_timers,
2930
};

rpcs3/rpcs3qt/emu_settings.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1202,6 +1202,7 @@ QString emu_settings::GetLocalizedSetting(const QString& original, emu_settings_
12021202
switch (static_cast<sleep_timers_accuracy_level>(index))
12031203
{
12041204
case sleep_timers_accuracy_level::_as_host: return tr("As Host", "Sleep timers accuracy");
1205+
case sleep_timers_accuracy_level::_dynamic: return tr("Dynamic", "Sleep timers accuracy");
12051206
case sleep_timers_accuracy_level::_usleep: return tr("Usleep Only", "Sleep timers accuracy");
12061207
case sleep_timers_accuracy_level::_all_timers: return tr("All Timers", "Sleep timers accuracy");
12071208
}

rpcs3/rpcs3qt/emu_settings_type.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -233,7 +233,7 @@ inline static const std::map<emu_settings_type, cfg_location> settings_location
233233
{ emu_settings_type::SPUCache, { "Core", "SPU Cache"}},
234234
{ emu_settings_type::DebugConsoleMode, { "Core", "Debug Console Mode"}},
235235
{ emu_settings_type::MaxSPURSThreads, { "Core", "Max SPURS Threads"}},
236-
{ emu_settings_type::SleepTimersAccuracy, { "Core", "Sleep Timers Accuracy"}},
236+
{ emu_settings_type::SleepTimersAccuracy, { "Core", "Sleep Timers Accuracy 2"}},
237237
{ emu_settings_type::ClocksScale, { "Core", "Clocks scale"}},
238238
{ emu_settings_type::AccuratePPU128Loop, { "Core", "Accurate PPU 128-byte Reservation Op Max Length"}},
239239
{ emu_settings_type::PerformanceReport, { "Core", "Enable Performance Report"}},

0 commit comments

Comments
 (0)