From 71893807b37c07bd428d71c47fd7cbd12bc82760 Mon Sep 17 00:00:00 2001 From: sfan5 Date: Fri, 7 Jun 2024 16:57:30 +0200 Subject: [PATCH] Call malloc_trim() regularly to improve deallocation behavior (#14707) --- irr/include/IMeshBuffer.h | 26 +++++++++++++++++++++++ src/CMakeLists.txt | 6 ++++++ src/client/mapblock_mesh.cpp | 5 +++++ src/cmake_config.h.in | 1 + src/mapblock.cpp | 1 + src/mapgen/mg_schematic.cpp | 4 +++- src/porting.cpp | 40 +++++++++++++++++++++++++++++++++++- src/porting.h | 12 +++++++++++ src/voxel.cpp | 8 ++++++-- 9 files changed, 99 insertions(+), 4 deletions(-) diff --git a/irr/include/IMeshBuffer.h b/irr/include/IMeshBuffer.h index 3e2e3d74e..c69a12d1d 100644 --- a/irr/include/IMeshBuffer.h +++ b/irr/include/IMeshBuffer.h @@ -176,6 +176,32 @@ class IMeshBuffer : public virtual IReferenceCounted } return 0; } + + //! Calculate size of vertices and indices in memory + virtual size_t getSize() const + { + size_t ret = 0; + switch (getVertexType()) { + case video::EVT_STANDARD: + ret += sizeof(video::S3DVertex) * getVertexCount(); + break; + case video::EVT_2TCOORDS: + ret += sizeof(video::S3DVertex2TCoords) * getVertexCount(); + break; + case video::EVT_TANGENTS: + ret += sizeof(video::S3DVertexTangents) * getVertexCount(); + break; + } + switch (getIndexType()) { + case video::EIT_16BIT: + ret += sizeof(u16) * getIndexCount(); + break; + case video::EIT_32BIT: + ret += sizeof(u32) * getIndexCount(); + break; + } + return ret; + } }; } // end namespace scene diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index bcbe6d7f5..2ea416a4d 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -351,6 +351,12 @@ endif() include(CheckSymbolExists) check_symbol_exists(strlcpy "string.h" HAVE_STRLCPY) +if(UNIX) + check_symbol_exists(malloc_trim "malloc.h" HAVE_MALLOC_TRIM) +else() + set(HAVE_MALLOC_TRIM FALSE) +endif() + check_include_files(endian.h HAVE_ENDIAN_H) configure_file( diff --git a/src/client/mapblock_mesh.cpp b/src/client/mapblock_mesh.cpp index f06dcfa58..429464e04 100644 --- a/src/client/mapblock_mesh.cpp +++ b/src/client/mapblock_mesh.cpp @@ -800,11 +800,16 @@ MapBlockMesh::MapBlockMesh(Client *client, MeshMakeData *data, v3s16 camera_offs MapBlockMesh::~MapBlockMesh() { + size_t sz = 0; for (scene::IMesh *m : m_mesh) { + for (u32 i = 0; i < m->getMeshBufferCount(); i++) + sz += m->getMeshBuffer(i)->getSize(); m->drop(); } for (MinimapMapblock *block : m_minimap_mapblocks) delete block; + + porting::TrackFreedMemory(sz); } bool MapBlockMesh::animate(bool faraway, float time, int crack, diff --git a/src/cmake_config.h.in b/src/cmake_config.h.in index 33ce41943..2185600af 100644 --- a/src/cmake_config.h.in +++ b/src/cmake_config.h.in @@ -31,6 +31,7 @@ #cmakedefine01 USE_REDIS #cmakedefine01 HAVE_ENDIAN_H #cmakedefine01 HAVE_STRLCPY +#cmakedefine01 HAVE_MALLOC_TRIM #cmakedefine01 CURSES_HAVE_CURSES_H #cmakedefine01 CURSES_HAVE_NCURSES_H #cmakedefine01 CURSES_HAVE_NCURSES_NCURSES_H diff --git a/src/mapblock.cpp b/src/mapblock.cpp index b3e951b1f..9a27a9f3d 100644 --- a/src/mapblock.cpp +++ b/src/mapblock.cpp @@ -85,6 +85,7 @@ MapBlock::~MapBlock() #endif delete[] data; + porting::TrackFreedMemory(sizeof(MapNode) * nodecount); } bool MapBlock::onObjectsActivation() diff --git a/src/mapgen/mg_schematic.cpp b/src/mapgen/mg_schematic.cpp index 5ce0b8844..bc1b35ab4 100644 --- a/src/mapgen/mg_schematic.cpp +++ b/src/mapgen/mg_schematic.cpp @@ -19,7 +19,6 @@ with this program; if not, write to the Free Software Foundation, Inc., */ #include -#include #include "mg_schematic.h" #include "server.h" #include "mapgen.h" @@ -32,6 +31,7 @@ with this program; if not, write to the Free Software Foundation, Inc., #include "serialization.h" #include "filesys.h" #include "voxelalgorithms.h" +#include "porting.h" /////////////////////////////////////////////////////////////////////////////// @@ -80,6 +80,8 @@ Schematic::~Schematic() { delete []schemdata; delete []slice_probs; + u32 nodecount = size.X * size.Y * size.Z; + porting::TrackFreedMemory(nodecount * sizeof(MapNode)); } ObjDef *Schematic::clone() const diff --git a/src/porting.cpp b/src/porting.cpp index 2b2405d38..0763e92e1 100644 --- a/src/porting.cpp +++ b/src/porting.cpp @@ -63,7 +63,11 @@ with this program; if not, write to the Free Software Foundation, Inc., #include #endif -#include "config.h" +#if HAVE_MALLOC_TRIM + // glibc-only pretty much + #include +#endif + #include "debug.h" #include "filesys.h" #include "log.h" @@ -72,6 +76,7 @@ with this program; if not, write to the Free Software Foundation, Inc., #include #include #include +#include #if !defined(SERVER) && defined(_WIN32) // On Windows export some driver-specific variables to encourage Minetest to be @@ -903,4 +908,37 @@ double perf_freq = get_perf_freq(); #endif +#if HAVE_MALLOC_TRIM + +/* + * On Linux/glibc we found that after deallocating bigger chunks of data (esp. MapBlocks) + * the memory would not be given back to the OS and would stay at peak usage. + * This appears to be a combination of unfortunate allocation order/fragmentation + * and the fact that glibc does not call madvise(MADV_DONTNEED) on its own. + * Some other allocators were also affected, jemalloc and musl libc were not. + * read more: + * + * As a workaround we track freed memory coarsely and call malloc_trim() once a + * certain amount is reached. + */ + +static std::atomic memory_freed; + +constexpr size_t MEMORY_TRIM_THRESHOLD = 128 * 1024 * 1024; + +void TrackFreedMemory(size_t amount) +{ + constexpr auto MO = std::memory_order_relaxed; + memory_freed.fetch_add(amount, MO); + if (memory_freed.load(MO) >= MEMORY_TRIM_THRESHOLD) { + // Synchronize call + if (memory_freed.exchange(0, MO) < MEMORY_TRIM_THRESHOLD) + return; + // Leave some headroom for future allocations + malloc_trim(1 * 1024 * 1024); + } +} + +#endif + } //namespace porting diff --git a/src/porting.h b/src/porting.h index a98e66bcf..27fabe7cd 100644 --- a/src/porting.h +++ b/src/porting.h @@ -290,6 +290,17 @@ void osSpecificInit(); // This attaches to the parents process console, or creates a new one if it doesnt exist. void attachOrCreateConsole(); +/** + * Call this after freeing bigger blocks of memory. Used on some platforms to + * properly give memory back to the OS. + * @param amount Number of bytes freed +*/ +#if HAVE_MALLOC_TRIM +void TrackFreedMemory(size_t amount); +#else +inline void TrackFreedMemory(size_t amount) { (void)amount; } +#endif + #ifdef _WIN32 // Quotes an argument for use in a CreateProcess() commandline (not cmd.exe!!) std::string QuoteArgv(const std::string &arg); @@ -298,6 +309,7 @@ std::string QuoteArgv(const std::string &arg); std::string ConvertError(DWORD error_code); #endif +// snprintf wrapper int mt_snprintf(char *buf, const size_t buf_size, const char *fmt, ...); /** diff --git a/src/voxel.cpp b/src/voxel.cpp index bb270f53b..0f87cf282 100644 --- a/src/voxel.cpp +++ b/src/voxel.cpp @@ -23,6 +23,7 @@ with this program; if not, write to the Free Software Foundation, Inc., #include "nodedef.h" #include "util/directiontables.h" #include "util/timetaker.h" +#include "porting.h" #include // memcpy, memset /* @@ -40,12 +41,15 @@ VoxelManipulator::~VoxelManipulator() void VoxelManipulator::clear() { - // Reset area to volume=0 - m_area = VoxelArea(); + // Reset area to empty volume + VoxelArea old; + std::swap(m_area, old); delete[] m_data; m_data = nullptr; delete[] m_flags; m_flags = nullptr; + + porting::TrackFreedMemory((sizeof(*m_data) + sizeof(*m_flags)) * old.getVolume()); } void VoxelManipulator::print(std::ostream &o, const NodeDefManager *ndef,