From f37f9a6f0bc147910b3bc1dfa095db6dfc813a1d Mon Sep 17 00:00:00 2001
From: sfan5 <sfan5@live.de>
Date: Mon, 30 Dec 2024 22:17:52 +0100
Subject: [PATCH] Optimize getImageAverageColor

also fixes a bug with non-square handling

before:
  getImageAverageColor [us] _____________________  804x  11.253

after:
  imageAverageColorInline [us] __________________  804x   0.557
---
 src/client/imagefilters.cpp  | 121 +++++++++++++++++++++++++++++------
 src/client/imagefilters.h    |   5 ++
 src/client/imagesource.cpp   |  69 +-------------------
 src/client/imagesource.h     |   3 -
 src/client/texturesource.cpp |   3 +-
 5 files changed, 111 insertions(+), 90 deletions(-)
diff --git a/src/client/imagefilters.cpp b/src/client/imagefilters.cpp
index 6d460f0c4..09a1198ea 100644
--- a/src/client/imagefilters.cpp
+++ b/src/client/imagefilters.cpp
@@ -148,17 +148,6 @@ static void imageCleanTransparentWithInlining(video::IImage *src, u32 threshold)
 	}
 }
 
-/* Fill in RGB values for transparent pixels, to correct for odd colors
- * appearing at borders when blending.  This is because many PNG optimizers
- * like to discard RGB values of transparent pixels, but when blending then
- * with non-transparent neighbors, their RGB values will show up nonetheless.
- *
- * This function modifies the original image in-place.
- *
- * Parameter "threshold" is the alpha level below which pixels are considered
- * transparent. Should be 127 when the texture is used with ALPHA_CHANNEL_REF,
- * 0 when alpha blending is used.
- */
 void imageCleanTransparent(video::IImage *src, u32 threshold)
 {
 	if (src->getColorFormat() == video::ECF_A8R8G8B8)
@@ -167,13 +156,109 @@ void imageCleanTransparent(video::IImage *src, u32 threshold)
 		imageCleanTransparentWithInlining<false>(src, threshold);
 }
 
-/* Scale a region of an image into another image, using nearest-neighbor with
- * anti-aliasing; treat pixels as crisp rectangles, but blend them at boundaries
- * to prevent non-integer scaling ratio artifacts.  Note that this may cause
- * some blending at the edges where pixels don't line up perfectly, but this
- * filter is designed to produce the most accurate results for both upscaling
- * and downscaling.
- */
+/**********************************/
+
+namespace {
+	// For more colorspace transformations, see for example
+	// <https://github.com/tobspr/GLSL-Color-Spaces/blob/master/ColorSpaces.inc.glsl>
+
+	inline float linear_to_srgb_component(float v)
+	{
+		if (v > 0.0031308f)
+			return 1.055f * powf(v, 1.0f / 2.4f) - 0.055f;
+		return 12.92f * v;
+	}
+	inline float srgb_to_linear_component(float v)
+	{
+		if (v > 0.04045f)
+			return powf((v + 0.055f) / 1.055f, 2.4f);
+		return v / 12.92f;
+	}
+
+	template <float (*F)(float)>
+	struct LUT8 {
+		std::array<float, 256> t;
+		LUT8() {
+			for (size_t i = 0; i < t.size(); i++)
+				t[i] = F(i / 255.0f);
+		}
+	};
+	LUT8<srgb_to_linear_component> srgb_to_linear_lut;
+
+	v3f srgb_to_linear(const video::SColor col_srgb)
+	{
+		v3f col(srgb_to_linear_lut.t[col_srgb.getRed()],
+			srgb_to_linear_lut.t[col_srgb.getGreen()],
+			srgb_to_linear_lut.t[col_srgb.getBlue()]);
+		return col;
+	}
+
+	video::SColor linear_to_srgb(const v3f col_linear)
+	{
+		v3f col;
+		// we can't LUT this without losing precision, but thankfully we call
+		// it just once :)
+		col.X = linear_to_srgb_component(col_linear.X);
+		col.Y = linear_to_srgb_component(col_linear.Y);
+		col.Z = linear_to_srgb_component(col_linear.Z);
+		col *= 255.0f;
+		col.X = core::clamp<float>(col.X, 0.0f, 255.0f);
+		col.Y = core::clamp<float>(col.Y, 0.0f, 255.0f);
+		col.Z = core::clamp<float>(col.Z, 0.0f, 255.0f);
+		return video::SColor(0xff, myround(col.X), myround(col.Y),
+			myround(col.Z));
+	}
+}
+
+template <bool IS_A8R8G8B8>
+static video::SColor imageAverageColorInline(const video::IImage *src)
+{
+	void *const src_data = src->getData();
+	const core::dimension2du dim = src->getDimension();
+
+	auto get_pixel = [=](u32 x, u32 y) -> video::SColor {
+		if constexpr (IS_A8R8G8B8) {
+			return reinterpret_cast<u32 *>(src_data)[y*dim.Width + x];
+		} else {
+			return src->getPixel(x, y);
+		}
+	};
+
+	u32 total = 0;
+	v3f col_acc;
+	// limit runtime cost
+	const u32 stepx = std::max(1U, dim.Width / 16),
+		stepy = std::max(1U, dim.Height / 16);
+	for (u32 x = 0; x < dim.Width; x += stepx) {
+		for (u32 y = 0; y < dim.Height; y += stepy) {
+			video::SColor c = get_pixel(x, y);
+			if (c.getAlpha() > 0) {
+				total++;
+				col_acc += srgb_to_linear(c);
+			}
+		}
+	}
+
+	video::SColor ret(0, 0, 0, 0);
+	if (total > 0) {
+		col_acc /= total;
+		ret = linear_to_srgb(col_acc);
+	}
+	ret.setAlpha(255);
+	return ret;
+}
+
+video::SColor imageAverageColor(const video::IImage *img)
+{
+	if (img->getColorFormat() == video::ECF_A8R8G8B8)
+		return imageAverageColorInline<true>(img);
+	else
+		return imageAverageColorInline<false>(img);
+}
+
+
+/**********************************/
+
 void imageScaleNNAA(video::IImage *src, const core::rect<s32> &srcrect, video::IImage *dest)
 {
 	double sx, sy, minsx, maxsx, minsy, maxsy, area, ra, ga, ba, aa, pw, ph, pa;
diff --git a/src/client/imagefilters.h b/src/client/imagefilters.h
index 606cf8c58..f46f71940 100644
--- a/src/client/imagefilters.h
+++ b/src/client/imagefilters.h
@@ -6,6 +6,7 @@
 
 #include "irrlichttypes.h"
 #include <rect.h>
+#include <SColor.h>
 
 namespace irr::video
 {
@@ -26,6 +27,10 @@ namespace irr::video
  */
 void imageCleanTransparent(video::IImage *src, u32 threshold);
 
+/* Returns the gamma-correct average color of the image, with transparent pixels
+ * ignored. */
+video::SColor imageAverageColor(const video::IImage *img);
+
 /* Scale a region of an image into another image, using nearest-neighbor with
  * anti-aliasing; treat pixels as crisp rectangles, but blend them at boundaries
  * to prevent non-integer scaling ratio artifacts.  Note that this may cause
diff --git a/src/client/imagesource.cpp b/src/client/imagesource.cpp
index 4adc39834..3213ebe3e 100644
--- a/src/client/imagesource.cpp
+++ b/src/client/imagesource.cpp
@@ -925,48 +925,6 @@ void imageTransform(u32 transform, video::IImage *src, video::IImage *dst)
 	}
 }
 
-namespace {
-	// For more colorspace transformations, see for example
-	// https://github.com/tobspr/GLSL-Color-Spaces/blob/master/ColorSpaces.inc.glsl
-
-	inline float linear_to_srgb_component(float v)
-	{
-		if (v > 0.0031308f)
-			return 1.055f * powf(v, 1.0f / 2.4f) - 0.055f;
-		return 12.92f * v;
-	}
-	inline float srgb_to_linear_component(float v)
-	{
-		if (v > 0.04045f)
-			return powf((v + 0.055f) / 1.055f, 2.4f);
-		return v / 12.92f;
-	}
-
-	v3f srgb_to_linear(const video::SColor col_srgb)
-	{
-		v3f col(col_srgb.getRed(), col_srgb.getGreen(), col_srgb.getBlue());
-		col /= 255.0f;
-		col.X = srgb_to_linear_component(col.X);
-		col.Y = srgb_to_linear_component(col.Y);
-		col.Z = srgb_to_linear_component(col.Z);
-		return col;
-	}
-
-	video::SColor linear_to_srgb(const v3f col_linear)
-	{
-		v3f col;
-		col.X = linear_to_srgb_component(col_linear.X);
-		col.Y = linear_to_srgb_component(col_linear.Y);
-		col.Z = linear_to_srgb_component(col_linear.Z);
-		col *= 255.0f;
-		col.X = core::clamp<float>(col.X, 0.0f, 255.0f);
-		col.Y = core::clamp<float>(col.Y, 0.0f, 255.0f);
-		col.Z = core::clamp<float>(col.Z, 0.0f, 255.0f);
-		return video::SColor(0xff, myround(col.X), myround(col.Y),
-			myround(col.Z));
-	}
-}
-
 
 ///////////////////////////
 // ImageSource Functions //
@@ -1945,32 +1903,7 @@ video::IImage* ImageSource::generateImage(std::string_view name,
 	return baseimg;
 }
 
-video::SColor ImageSource::getImageAverageColor(const video::IImage &image)
+void ImageSource::insertSourceImage(const std::string &name, video::IImage *img, bool prefer_local)
 {
-	video::SColor c(0, 0, 0, 0);
-	u32 total = 0;
-	v3f col_acc(0, 0, 0);
-	core::dimension2d<u32> dim = image.getDimension();
-	u16 step = 1;
-	if (dim.Width > 16)
-		step = dim.Width / 16;
-	for (u16 x = 0; x < dim.Width; x += step) {
-		for (u16 y = 0; y < dim.Width; y += step) {
-			c = image.getPixel(x,y);
-			if (c.getAlpha() > 0) {
-				total++;
-				col_acc += srgb_to_linear(c);
-			}
-		}
-	}
-	if (total > 0) {
-		col_acc /= total;
-		c = linear_to_srgb(col_acc);
-	}
-	c.setAlpha(255);
-	return c;
-}
-
-void ImageSource::insertSourceImage(const std::string &name, video::IImage *img, bool prefer_local) {
 	m_sourcecache.insert(name, img, prefer_local);
 }
diff --git a/src/client/imagesource.h b/src/client/imagesource.h
index d6b7a4e9b..8abda2a40 100644
--- a/src/client/imagesource.h
+++ b/src/client/imagesource.h
@@ -45,9 +45,6 @@ struct ImageSource {
 	// Insert a source image into the cache without touching the filesystem.
 	void insertSourceImage(const std::string &name, video::IImage *img, bool prefer_local);
 
-	// TODO should probably be moved elsewhere
-	static video::SColor getImageAverageColor(const video::IImage &image);
-
 private:
 
 	// Generate image based on a string like "stone.png" or "[crack:1:0".
diff --git a/src/client/texturesource.cpp b/src/client/texturesource.cpp
index e53a1b670..01d2b8a9d 100644
--- a/src/client/texturesource.cpp
+++ b/src/client/texturesource.cpp
@@ -515,13 +515,14 @@ video::SColor TextureSource::getTextureAverageColor(const std::string &name)
 	video::ITexture *texture = getTexture(name);
 	if (!texture)
 		return {0, 0, 0, 0};
+	// Note: this downloads the texture back from the GPU, which is pointless
 	video::IImage *image = driver->createImage(texture,
 		core::position2d<s32>(0, 0),
 		texture->getOriginalSize());
 	if (!image)
 		return {0, 0, 0, 0};
 
-	video::SColor c = ImageSource::getImageAverageColor(*image);
+	video::SColor c = imageAverageColor(image);
 	image->drop();
 
 	return c;