LV2: Introduce Dynamic Timer signals

elad335 · elad335 · commit 725fce4c43a1 · 2024-12-30T13:02:09.000+02:00
diff --git a/rpcs3/Emu/Cell/lv2/lv2.cpp b/rpcs3/Emu/Cell/lv2/lv2.cpp
@@ -1307,6 +1307,8 @@ static std::deque<class cpu_thread*> g_to_sleep;
 static atomic_t<bool> g_scheduler_ready = false;
 static atomic_t<u64> s_yield_frequency = 0;
 static atomic_t<u64> s_max_allowed_yield_tsc = 0;
+static atomic_t<u64> s_lv2_timers_sum_of_ten_delay_in_us = 5000;
+static atomic_t<u64> s_lv2_timers_min_timer_in_us = 0;
 static u64 s_last_yield_tsc = 0;
 atomic_t<u32> g_lv2_preempts_taken = 0;
 
@@ -1432,7 +1434,7 @@ bool lv2_obj::awake(cpu_thread* thread, s32 prio)
 
 	if (!g_postpone_notify_barrier)
 	{
-		notify_all();
+		notify_all(thread);
 	}
 
 	return result;
@@ -1573,6 +1575,11 @@ bool lv2_obj::sleep_unlocked(cpu_thread& thread, u64 timeout, u64 current_time)
 		{
 			if (it == end || it->first > wait_until)
 			{
+				if (it == g_waiting.cbegin())
+				{
+					s_lv2_timers_min_timer_in_us.release(wait_until);
+				}
+
 				g_waiting.emplace(it, wait_until, &thread);
 				break;
 			}
@@ -1835,6 +1842,8 @@ void lv2_obj::cleanup()
 	g_waiting.clear();
 	g_pending = 0;
 	s_yield_frequency = 0;
+	s_lv2_timers_sum_of_ten_delay_in_us = 5000;
+	s_lv2_timers_min_timer_in_us = 0;
 }
 
 void lv2_obj::schedule_all(u64 current_time)
@@ -1876,7 +1885,7 @@ void lv2_obj::schedule_all(u64 current_time)
 	}
 
 	// Check registered timeouts
-	while (!g_waiting.empty())
+	while (!g_waiting.empty() && it != std::end(g_to_notify))
 	{
 		const auto pair = &g_waiting.front();
 
@@ -1896,15 +1905,7 @@ void lv2_obj::schedule_all(u64 current_time)
 				ensure(!target->state.test_and_set(cpu_flag::notify));
 
 				// Otherwise notify it to wake itself
-				if (it == std::end(g_to_notify))
-				{
-					// Out of notification slots, notify locally (resizable container is not worth it)
-					target->state.notify_one();
-				}
-				else
-				{
-					*it++ = &target->state;
-				}
+				*it++ = &target->state;
 			}
 		}
 		else
@@ -2171,7 +2172,35 @@ bool lv2_obj::wait_timeout(u64 usec, ppu_thread* cpu, bool scale, bool is_usleep
 #endif
 		// TODO: Tune for other non windows operating sytems
 
-		if (g_cfg.core.sleep_timers_accuracy < (is_usleep ? sleep_timers_accuracy_level::_usleep : sleep_timers_accuracy_level::_all_timers))
+		const sleep_timers_accuracy_level accuarcy_type = g_cfg.core.sleep_timers_accuracy;
+		const u64 avg_delay = get_avg_timer_reponse_delay();
+
+		static atomic_t<u64> g_success = 0;
+		static atomic_t<u64> g_fails = 0;
+
+		if (accuarcy_type == sleep_timers_accuracy_level::_dynamic && avg_delay < 30 && avg_delay < (remaining + 15) / 2)
+		{
+			wait_for(remaining);
+
+			if (remaining < host_min_quantum)
+			{
+				g_success += remaining;
+				//g_success++;
+			}
+
+			passed = get_system_time() - start_time;
+			continue;
+		}
+		else
+		{
+			if (remaining < host_min_quantum)
+			{
+				g_fails += remaining;
+				//g_fails++;
+			}
+		}
+
+		if (g_cfg.core.sleep_timers_accuracy < (is_usleep ? sleep_timers_accuracy_level::_dynamic : sleep_timers_accuracy_level::_all_timers))
 		{
 			wait_for(remaining);
 		}
@@ -2222,7 +2251,7 @@ void lv2_obj::prepare_for_sleep(cpu_thread& cpu)
 	cpu_counter::remove(&cpu);
 }
 
-void lv2_obj::notify_all() noexcept
+void lv2_obj::notify_all(cpu_thread* woke_thread) noexcept
 {
 	for (auto cpu : g_to_notify)
 	{
@@ -2258,13 +2287,11 @@ void lv2_obj::notify_all() noexcept
 		return;
 	}
 
-	if (cpu->get_class() != thread_class::spu && cpu->state.none_of(cpu_flag::suspend))
+	if (cpu->get_class() == thread_class::ppu && cpu->state.none_of(cpu_flag::suspend + cpu_flag::signal))
 	{
 		return;
 	}
 
-	std::optional<vm::writer_lock> lock;
-
 	constexpr usz total_waiters = std::size(spu_thread::g_spu_waiters_by_value);
 
 	u32 notifies[total_waiters]{};
@@ -2346,4 +2373,82 @@ void lv2_obj::notify_all() noexcept
 			vm::reservation_notifier_notify(addr);
 		}
 	}
+
+	if (woke_thread == cpu)
+	{
+		return;
+	}
+
+	const u64 min_timer = s_lv2_timers_min_timer_in_us;
+	const u64 current_time = get_guest_system_time();
+
+	if (current_time < min_timer)
+	{
+		return;
+	}
+
+	atomic_bs_t<cpu_flag>* notifies_cpus[16];
+	usz count_notifies_cpus = 0;
+
+	std::unique_lock lock(g_mutex, std::try_to_lock);
+
+	if (!lock)
+	{
+		// Not only is that this method is an opportunistic optimization
+		// But if it's already locked than it is likely that soon another thread would do this check instead
+		return;
+	}
+
+	// Do it BEFORE clearing the queue in order to measure the delay properly even if the sleeping thread notified itself
+	// This 'redundancy' is what allows proper measurements
+	if (u64 min_time2 = s_lv2_timers_min_timer_in_us; current_time >= min_time2)
+	{
+		const u64 sum = s_lv2_timers_sum_of_ten_delay_in_us.observe();
+		s_lv2_timers_sum_of_ten_delay_in_us.release(sum - sum / 10 + (current_time - min_time2) / 10);
+	}
+
+	// Check registered timeouts
+	while (!g_waiting.empty() && count_notifies_cpus < std::size(notifies_cpus))
+	{
+		const auto pair = &g_waiting.front();
+
+		if (pair->first <= current_time)
+		{
+			const auto target = pair->second;
+			g_waiting.pop_front();
+
+			if (target != cpu)
+			{
+				// Change cpu_thread::state for the lightweight notification to work
+				ensure(!target->state.test_and_set(cpu_flag::notify));
+				notifies_cpus[count_notifies_cpus++] = &target->state;
+			}
+		}
+		else
+		{
+			// The list is sorted so assume no more timeouts
+			break;
+		}
+	}
+
+	if (g_waiting.empty())
+	{
+		s_lv2_timers_min_timer_in_us.release(u64{umax});
+	}
+	else
+	{
+		s_lv2_timers_min_timer_in_us.release(g_waiting.front().first);
+	}
+
+	lock.unlock();
+
+	for (usz i = count_notifies_cpus - 1; i != umax; i--)
+	{
+		atomic_wait_engine::notify_one(notifies_cpus[i]);
+	}
+}
+
+u64 lv2_obj::get_avg_timer_reponse_delay()
+{
+	return s_lv2_timers_sum_of_ten_delay_in_us / 10;
 }
diff --git a/rpcs3/Emu/Cell/lv2/sys_sync.h b/rpcs3/Emu/Cell/lv2/sys_sync.h
@@ -454,11 +454,13 @@ struct lv2_obj
 
 	static bool wait_timeout(u64 usec, ppu_thread* cpu = {}, bool scale = true, bool is_usleep = false);
 
-	static void notify_all() noexcept;
+	static void notify_all(cpu_thread* woke_thread = nullptr) noexcept;
 
 	// Can be called before the actual sleep call in order to move it out of mutex scope
 	static void prepare_for_sleep(cpu_thread& cpu);
 
+	static u64 get_avg_timer_reponse_delay();
+
 	struct notify_all_t
 	{
 		notify_all_t() noexcept
diff --git a/rpcs3/Emu/RSX/RSXThread.cpp b/rpcs3/Emu/RSX/RSXThread.cpp
@@ -870,11 +870,13 @@ namespace rsx
 		{
 			// Wait 16ms during emulation pause. This reduces cpu load while still giving us the chance to render overlays.
 			do_local_task(rsx::FIFO::state::paused);
+			lv2_obj::notify_all();
 			thread_ctrl::wait_on(state, old, 16000);
 		}
 		else
 		{
 			on_semaphore_acquire_wait();
+			lv2_obj::notify_all();
 			std::this_thread::yield();
 		}
 	}
diff --git a/rpcs3/Emu/system_config.h b/rpcs3/Emu/system_config.h
@@ -91,11 +91,7 @@ struct cfg_root : cfg::node
 		cfg::uint<0, (1 << 6) - 1> spu_wakeup_delay_mask{ this, "SPU Wake-Up Delay Thread Mask", (1 << 6) - 1, true };
 		cfg::uint<0, 400> max_cpu_preempt_count_per_frame{ this, "Max CPU Preempt Count", 0, true };
 		cfg::_bool allow_rsx_cpu_preempt{ this, "Allow RSX CPU Preemptions", true, true };
-#if defined (__linux__) || defined (__APPLE__)
-		cfg::_enum<sleep_timers_accuracy_level> sleep_timers_accuracy{ this, "Sleep Timers Accuracy", sleep_timers_accuracy_level::_as_host, true };
-#else
-		cfg::_enum<sleep_timers_accuracy_level> sleep_timers_accuracy{ this, "Sleep Timers Accuracy", sleep_timers_accuracy_level::_usleep, true };
-#endif
+		cfg::_enum<sleep_timers_accuracy_level> sleep_timers_accuracy{ this, "Sleep Timers Accuracy 2", sleep_timers_accuracy_level::_dynamic, true };
 		cfg::_int<-1000, 1500> usleep_addend{ this, "Usleep Time Addend", 0, true };
 
 		cfg::uint64 perf_report_threshold{this, "Performance Report Threshold", 500, true}; // In µs, 0.5ms = default, 0 = everything
diff --git a/rpcs3/Emu/system_config_types.cpp b/rpcs3/Emu/system_config_types.cpp
@@ -237,6 +237,7 @@ void fmt_class_string<sleep_timers_accuracy_level>::format(std::string& out, u64
 		switch (value)
 		{
 		case sleep_timers_accuracy_level::_as_host: return "As Host";
+		case sleep_timers_accuracy_level::_dynamic: return "Dynamic";
 		case sleep_timers_accuracy_level::_usleep: return "Usleep Only";
 		case sleep_timers_accuracy_level::_all_timers: return "All Timers";
 		}
diff --git a/rpcs3/Emu/system_config_types.h b/rpcs3/Emu/system_config_types.h
@@ -24,6 +24,7 @@ enum class spu_block_size_type
 enum class sleep_timers_accuracy_level
 {
 	_as_host,
+	_dynamic,
 	_usleep,
 	_all_timers,
 };
diff --git a/rpcs3/rpcs3qt/emu_settings.cpp b/rpcs3/rpcs3qt/emu_settings.cpp
@@ -1202,6 +1202,7 @@ QString emu_settings::GetLocalizedSetting(const QString& original, emu_settings_
 		switch (static_cast<sleep_timers_accuracy_level>(index))
 		{
 		case sleep_timers_accuracy_level::_as_host: return tr("As Host", "Sleep timers accuracy");
+		case sleep_timers_accuracy_level::_dynamic: return tr("Dynamic", "Sleep timers accuracy");
 		case sleep_timers_accuracy_level::_usleep: return tr("Usleep Only", "Sleep timers accuracy");
 		case sleep_timers_accuracy_level::_all_timers: return tr("All Timers", "Sleep timers accuracy");
 		}
diff --git a/rpcs3/rpcs3qt/emu_settings_type.h b/rpcs3/rpcs3qt/emu_settings_type.h
@@ -233,7 +233,7 @@ inline static const std::map<emu_settings_type, cfg_location> settings_location
 	{ emu_settings_type::SPUCache,                 { "Core", "SPU Cache"}},
 	{ emu_settings_type::DebugConsoleMode,         { "Core", "Debug Console Mode"}},
 	{ emu_settings_type::MaxSPURSThreads,          { "Core", "Max SPURS Threads"}},
-	{ emu_settings_type::SleepTimersAccuracy,      { "Core", "Sleep Timers Accuracy"}},
+	{ emu_settings_type::SleepTimersAccuracy,      { "Core", "Sleep Timers Accuracy 2"}},
 	{ emu_settings_type::ClocksScale,              { "Core", "Clocks scale"}},
 	{ emu_settings_type::AccuratePPU128Loop,       { "Core", "Accurate PPU 128-byte Reservation Op Max Length"}},
 	{ emu_settings_type::PerformanceReport,        { "Core", "Enable Performance Report"}},

Original file line number	Diff line number	Diff line change
`@@ -870,11 +870,13 @@ namespace rsx`
`870`	`870`	`{`
`871`	`871`	`// Wait 16ms during emulation pause. This reduces cpu load while still giving us the chance to render overlays.`
`872`	`872`	`do_local_task(rsx::FIFO::state::paused);`
	`873`	`+ lv2_obj::notify_all();`
`873`	`874`	`thread_ctrl::wait_on(state, old, 16000);`
`874`	`875`	`}`
`875`	`876`	`else`
`876`	`877`	`{`
`877`	`878`	`on_semaphore_acquire_wait();`
	`879`	`+ lv2_obj::notify_all();`
`878`	`880`	`std::this_thread::yield();`
`879`	`881`	`}`
`880`	`882`	`}`
Original file line number	Diff line number	Diff line change
`@@ -237,6 +237,7 @@ void fmt_class_string<sleep_timers_accuracy_level>::format(std::string& out, u64`
`237`	`237`	`switch (value)`
`238`	`238`	`{`
`239`	`239`	`case sleep_timers_accuracy_level::_as_host: return "As Host";`
	`240`	`+ case sleep_timers_accuracy_level::_dynamic: return "Dynamic";`
`240`	`241`	`case sleep_timers_accuracy_level::_usleep: return "Usleep Only";`
`241`	`242`	`case sleep_timers_accuracy_level::_all_timers: return "All Timers";`
`242`	`243`	`}`
Original file line number	Diff line number	Diff line change
`@@ -24,6 +24,7 @@ enum class spu_block_size_type`
`24`	`24`	`enum class sleep_timers_accuracy_level`
`25`	`25`	`{`
`26`	`26`	`_as_host,`
	`27`	`+ _dynamic,`
`27`	`28`	`_usleep,`
`28`	`29`	`_all_timers,`
`29`	`30`	`};`
Original file line number	Diff line number	Diff line change
`@@ -1202,6 +1202,7 @@ QString emu_settings::GetLocalizedSetting(const QString& original, emu_settings_`
`1202`	`1202`	`switch (static_cast<sleep_timers_accuracy_level>(index))`
`1203`	`1203`	`{`
`1204`	`1204`	`case sleep_timers_accuracy_level::_as_host: return tr("As Host", "Sleep timers accuracy");`
	`1205`	`+ case sleep_timers_accuracy_level::_dynamic: return tr("Dynamic", "Sleep timers accuracy");`
`1205`	`1206`	`case sleep_timers_accuracy_level::_usleep: return tr("Usleep Only", "Sleep timers accuracy");`
`1206`	`1207`	`case sleep_timers_accuracy_level::_all_timers: return tr("All Timers", "Sleep timers accuracy");`
`1207`	`1208`	`}`