Add questionable workaround for env lock contention

2024-12-22 06:02:23 +01:00 · 2024-09-15 23:06:03 +02:00 · 2024-09-15 23:06:03 +02:00 · c1ea49940b
commit c1ea49940b
parent 5f308deb50
5 changed files with 65 additions and 5 deletions
--- a/src/emerge.cpp
+++ b/src/emerge.cpp
@ -316,6 +316,12 @@ bool EmergeManager::enqueueBlockEmergeEx(
 }


+size_t EmergeManager::getQueueSize()
+{
+	MutexAutoLock queuelock(m_queue_mutex);
+	return m_blocks_enqueued.size();
+}
+
 bool EmergeManager::isBlockInQueue(v3s16 pos)
 {
 	MutexAutoLock queuelock(m_queue_mutex);
@ -540,7 +546,9 @@ bool EmergeThread::popBlockEmerge(v3s16 *pos, BlockEmergeData *bedata)
 EmergeAction EmergeThread::getBlockOrStartGen(const v3s16 pos, bool allow_gen,
 	 const std::string *from_db, MapBlock **block, BlockMakeData *bmdata)
 {
+	//TimeTaker tt("", nullptr, PRECISION_MICRO);
 	Server::EnvAutoLock envlock(m_server);
+	//g_profiler->avg("EmergeThread: lock wait time [us]", tt.stop());

 	auto block_ok = [] (MapBlock *b) {
 		return b && b->isGenerated();
--- a/src/emerge.h
+++ b/src/emerge.h
@ -196,6 +196,7 @@ public:
 		EmergeCompletionCallback callback,
 		void *callback_param);

+	size_t getQueueSize();
 	bool isBlockInQueue(v3s16 pos);

 	Mapgen *getCurrentMapgen();
--- a/src/server.cpp
+++ b/src/server.cpp
@ -133,9 +133,13 @@ void *ServerThread::run()

 		u64 t0 = porting::getTimeUs();

-		const Server::StepSettings step_settings = m_server->getStepSettings();
+		const auto step_settings = m_server->getStepSettings();

 		try {
+			// see explanation inside
+			if (dtime > step_settings.steplen)
+				m_server->yieldToOtherThreads(dtime);
+
 			m_server->AsyncRunStep(step_settings.pause ? 0.0f : dtime);

 			const float remaining_time = step_settings.steplen
@ -655,7 +659,7 @@ void Server::AsyncRunStep(float dtime, bool initial_step)
 	{
 		EnvAutoLock lock(this);
 		float max_lag = m_env->getMaxLagEstimate();
-		constexpr float lag_warn_threshold = 2.0f;
+		constexpr float lag_warn_threshold = 1.0f;

 		// Decrease value gradually, halve it every minute.
 		if (m_max_lag_decrease.step(dtime, 0.5f)) {
@ -1113,6 +1117,52 @@ void Server::Receive(float timeout)
 	}
 }

+void Server::yieldToOtherThreads(float dtime)
+{
+	/*
+	 * Problem: the server thread and emerge thread compete for the envlock.
+	 * While the emerge thread needs it just once or twice for every processed item
+	 * the server thread uses it much more generously.
+	 * This is usually not a problem as the server sleeps between steps, which leaves
+	 * enough chance. But if the server is overloaded it's busy all the time and
+	 * - even with a fair envlock - the emerge thread can't get up to speed.
+	 * This generally has a much worse impact on gameplay than server lag itself
+	 * ever would.
+	 *
+	 * Workaround: If we detect that the server is overloaded, introduce some careful
+	 * artificial sleeps to leave the emerge threads enough chance to do their job.
+	 *
+	 * In the future the emerge code should be reworked to exclusively use a result
+	 * queue, thereby avoiding this problem (and terrible workaround).
+	 */
+
+	// don't activate workaround too quickly
+	constexpr size_t MIN_EMERGE_QUEUE_SIZE = 32;
+	const size_t qs_initial = m_emerge->getQueueSize();
+	if (qs_initial < MIN_EMERGE_QUEUE_SIZE)
+		return;
+
+	// give the thread a chance to run for every 28ms (on average)
+	// this was experimentally determined
+	const float QUANTUM = 28.0f / 1000;
+	// put an upper limit to not cause too much lag, also so this doesn't become self-sustaining
+	const int SLEEP_MAX = 10;
+
+	int sleep_count = std::clamp<int>(dtime / QUANTUM, 1, SLEEP_MAX);
+
+	ScopeProfiler sp(g_profiler, "Server::yieldTo...() sleep", SPT_AVG);
+	size_t qs = qs_initial;
+	while (sleep_count-- > 0) {
+		sleep_ms(1);
+		// abort if we don't make progress
+		size_t qs2 = m_emerge->getQueueSize();
+		if (qs2 >= qs || qs2 == 0)
+			break;
+		qs = qs2;
+	}
+	g_profiler->avg("Server::yieldTo...() progress [#]", qs_initial - qs);
+}
+
 PlayerSAO* Server::StageTwoClientInit(session_t peer_id)
 {
 	std::string playername;
--- a/src/server.h
+++ b/src/server.h
@ -167,9 +167,12 @@ public:
 	// Actual processing is done in another thread.
 	// This just checks if there was an error in that thread.
 	void step();
+
 	// This is run by ServerThread and does the actual processing
 	void AsyncRunStep(float dtime, bool initial_step = false);
 	void Receive(float timeout);
+	void yieldToOtherThreads(float dtime);
+
 	PlayerSAO* StageTwoClientInit(session_t peer_id);

 	/*
@ -602,8 +605,6 @@ private:
 	*/
 	PlayerSAO *emergePlayer(const char *name, session_t peer_id, u16 proto_version);

-	void handlePeerChanges();
-
 	/*
 		Variables
 	*/
--- a/src/util/timetaker.cpp
+++ b/src/util/timetaker.cpp
@ -35,7 +35,7 @@ u64 TimeTaker::stop(bool quiet)
 		if (m_result != nullptr) {
 			(*m_result) += dtime;
 		} else {
-			if (!quiet) {
+			if (!quiet && !m_name.empty()) {
 				infostream << m_name << " took "
 					<< dtime << TimePrecision_units[m_precision] << std::endl;
 			}