Performance Improvement: Use a cache which caches result for getFacePositions.

This greatly reduce the number of std::list generated by caching the result, which is always constant for each radius selected.
In the callgrind map, you will see original:
  * 3.3M calls to std::list for 9700 calls to getFacePositions
In the modified version, you will see:
  * 3.3K calls to std::list for 6900 call to getFacePositions
Callgrind map is here: #2321

it's a huge performance improvement to l_find_node_near
This commit is contained in:
Loic Blot 2015-02-15 17:30:38 +01:00
parent ed04e8e9e4
commit 7c8793cbea
4 changed files with 81 additions and 68 deletions

@ -59,7 +59,7 @@ void RemoteClient::ResendBlockIfOnWire(v3s16 p)
} }
} }
void RemoteClient::GetNextBlocks( void RemoteClient::GetNextBlocks (
ServerEnvironment *env, ServerEnvironment *env,
EmergeManager * emerge, EmergeManager * emerge,
float dtime, float dtime,
@ -182,18 +182,15 @@ void RemoteClient::GetNextBlocks(
//bool queue_is_full = false; //bool queue_is_full = false;
s16 d; s16 d;
for(d = d_start; d <= d_max; d++) for(d = d_start; d <= d_max; d++) {
{
/* /*
Get the border/face dot coordinates of a "d-radiused" Get the border/face dot coordinates of a "d-radiused"
box box
*/ */
std::list<v3s16> list; std::vector<v3s16> list = FacePositionCache::getFacePositions(d);
getFacePositions(list, d);
std::list<v3s16>::iterator li; std::vector<v3s16>::iterator li;
for(li=list.begin(); li!=list.end(); ++li) for(li = list.begin(); li != list.end(); ++li) {
{
v3s16 p = *li + center; v3s16 p = *li + center;
/* /*

@ -530,9 +530,8 @@ int ModApiEnvMod::l_find_node_near(lua_State *L)
} }
for(int d=1; d<=radius; d++){ for(int d=1; d<=radius; d++){
std::list<v3s16> list; std::vector<v3s16> list = FacePositionCache::getFacePositions(d);
getFacePositions(list, d); for(std::vector<v3s16>::iterator i = list.begin();
for(std::list<v3s16>::iterator i = list.begin();
i != list.end(); ++i){ i != list.end(); ++i){
v3s16 p = pos + (*i); v3s16 p = pos + (*i);
content_t c = env->getMap().getNodeNoEx(p).getContent(); content_t c = env->getMap().getNodeNoEx(p).getContent();

@ -20,79 +20,84 @@ with this program; if not, write to the Free Software Foundation, Inc.,
#include "numeric.h" #include "numeric.h"
#include "mathconstants.h" #include "mathconstants.h"
#include "../log.h" #include "log.h"
#include "../constants.h" // BS, MAP_BLOCKSIZE #include "../constants.h" // BS, MAP_BLOCKSIZE
#include <string.h> #include <string.h>
#include <iostream> #include <iostream>
std::map<u16, std::vector<v3s16> > FacePositionCache::m_cache;
// Calculate the borders of a "d-radius" cube // Calculate the borders of a "d-radius" cube
void getFacePositions(std::list<v3s16> &list, u16 d) std::vector<v3s16> FacePositionCache::getFacePositions(u16 d)
{ {
if(d == 0) if (m_cache.find(d) != m_cache.end())
{ return m_cache[d];
list.push_back(v3s16(0,0,0));
generateFacePosition(d);
return m_cache[d];
}
void FacePositionCache::generateFacePosition(u16 d)
{
m_cache[d] = std::vector<v3s16>();
if(d == 0) {
m_cache[d].push_back(v3s16(0,0,0));
return; return;
} }
if(d == 1) if(d == 1) {
{
/* /*
This is an optimized sequence of coordinates. This is an optimized sequence of coordinates.
*/ */
list.push_back(v3s16( 0, 1, 0)); // top m_cache[d].push_back(v3s16( 0, 1, 0)); // top
list.push_back(v3s16( 0, 0, 1)); // back m_cache[d].push_back(v3s16( 0, 0, 1)); // back
list.push_back(v3s16(-1, 0, 0)); // left m_cache[d].push_back(v3s16(-1, 0, 0)); // left
list.push_back(v3s16( 1, 0, 0)); // right m_cache[d].push_back(v3s16( 1, 0, 0)); // right
list.push_back(v3s16( 0, 0,-1)); // front m_cache[d].push_back(v3s16( 0, 0,-1)); // front
list.push_back(v3s16( 0,-1, 0)); // bottom m_cache[d].push_back(v3s16( 0,-1, 0)); // bottom
// 6 // 6
list.push_back(v3s16(-1, 0, 1)); // back left m_cache[d].push_back(v3s16(-1, 0, 1)); // back left
list.push_back(v3s16( 1, 0, 1)); // back right m_cache[d].push_back(v3s16( 1, 0, 1)); // back right
list.push_back(v3s16(-1, 0,-1)); // front left m_cache[d].push_back(v3s16(-1, 0,-1)); // front left
list.push_back(v3s16( 1, 0,-1)); // front right m_cache[d].push_back(v3s16( 1, 0,-1)); // front right
list.push_back(v3s16(-1,-1, 0)); // bottom left m_cache[d].push_back(v3s16(-1,-1, 0)); // bottom left
list.push_back(v3s16( 1,-1, 0)); // bottom right m_cache[d].push_back(v3s16( 1,-1, 0)); // bottom right
list.push_back(v3s16( 0,-1, 1)); // bottom back m_cache[d].push_back(v3s16( 0,-1, 1)); // bottom back
list.push_back(v3s16( 0,-1,-1)); // bottom front m_cache[d].push_back(v3s16( 0,-1,-1)); // bottom front
list.push_back(v3s16(-1, 1, 0)); // top left m_cache[d].push_back(v3s16(-1, 1, 0)); // top left
list.push_back(v3s16( 1, 1, 0)); // top right m_cache[d].push_back(v3s16( 1, 1, 0)); // top right
list.push_back(v3s16( 0, 1, 1)); // top back m_cache[d].push_back(v3s16( 0, 1, 1)); // top back
list.push_back(v3s16( 0, 1,-1)); // top front m_cache[d].push_back(v3s16( 0, 1,-1)); // top front
// 18 // 18
list.push_back(v3s16(-1, 1, 1)); // top back-left m_cache[d].push_back(v3s16(-1, 1, 1)); // top back-left
list.push_back(v3s16( 1, 1, 1)); // top back-right m_cache[d].push_back(v3s16( 1, 1, 1)); // top back-right
list.push_back(v3s16(-1, 1,-1)); // top front-left m_cache[d].push_back(v3s16(-1, 1,-1)); // top front-left
list.push_back(v3s16( 1, 1,-1)); // top front-right m_cache[d].push_back(v3s16( 1, 1,-1)); // top front-right
list.push_back(v3s16(-1,-1, 1)); // bottom back-left m_cache[d].push_back(v3s16(-1,-1, 1)); // bottom back-left
list.push_back(v3s16( 1,-1, 1)); // bottom back-right m_cache[d].push_back(v3s16( 1,-1, 1)); // bottom back-right
list.push_back(v3s16(-1,-1,-1)); // bottom front-left m_cache[d].push_back(v3s16(-1,-1,-1)); // bottom front-left
list.push_back(v3s16( 1,-1,-1)); // bottom front-right m_cache[d].push_back(v3s16( 1,-1,-1)); // bottom front-right
// 26 // 26
return; return;
} }
// Take blocks in all sides, starting from y=0 and going +-y // Take blocks in all sides, starting from y=0 and going +-y
for(s16 y=0; y<=d-1; y++) for(s16 y=0; y<=d-1; y++) {
{
// Left and right side, including borders // Left and right side, including borders
for(s16 z=-d; z<=d; z++) for(s16 z=-d; z<=d; z++) {
{ m_cache[d].push_back(v3s16(d,y,z));
list.push_back(v3s16(d,y,z)); m_cache[d].push_back(v3s16(-d,y,z));
list.push_back(v3s16(-d,y,z)); if(y != 0) {
if(y != 0) m_cache[d].push_back(v3s16(d,-y,z));
{ m_cache[d].push_back(v3s16(-d,-y,z));
list.push_back(v3s16(d,-y,z));
list.push_back(v3s16(-d,-y,z));
} }
} }
// Back and front side, excluding borders // Back and front side, excluding borders
for(s16 x=-d+1; x<=d-1; x++) for(s16 x=-d+1; x<=d-1; x++) {
{ m_cache[d].push_back(v3s16(x,y,d));
list.push_back(v3s16(x,y,d)); m_cache[d].push_back(v3s16(x,y,-d));
list.push_back(v3s16(x,y,-d)); if(y != 0) {
if(y != 0) m_cache[d].push_back(v3s16(x,-y,d));
{ m_cache[d].push_back(v3s16(x,-y,-d));
list.push_back(v3s16(x,-y,d));
list.push_back(v3s16(x,-y,-d));
} }
} }
} }
@ -100,10 +105,9 @@ void getFacePositions(std::list<v3s16> &list, u16 d)
// Take the bottom and top face with borders // Take the bottom and top face with borders
// -d<x<d, y=+-d, -d<z<d // -d<x<d, y=+-d, -d<z<d
for(s16 x=-d; x<=d; x++) for(s16 x=-d; x<=d; x++)
for(s16 z=-d; z<=d; z++) for(s16 z=-d; z<=d; z++) {
{ m_cache[d].push_back(v3s16(x,-d,z));
list.push_back(v3s16(x,-d,z)); m_cache[d].push_back(v3s16(x,d,z));
list.push_back(v3s16(x,d,z));
} }
} }

@ -25,10 +25,23 @@ with this program; if not, write to the Free Software Foundation, Inc.,
#include "../irr_v3d.h" #include "../irr_v3d.h"
#include "../irr_aabb3d.h" #include "../irr_aabb3d.h"
#include <list> #include <list>
#include <map>
#include <vector>
#include <algorithm> #include <algorithm>
// Calculate the borders of a "d-radius" cube
void getFacePositions(std::list<v3s16> &list, u16 d); /*
* This class permits to cache getFacePosition call results
* This reduces CPU usage and vector calls
*/
class FacePositionCache
{
public:
static std::vector<v3s16> getFacePositions(u16 d);
private:
static void generateFacePosition(u16 d);
static std::map<u16, std::vector<v3s16> > m_cache;
};
class IndentationRaiser class IndentationRaiser
{ {