Drop unused and unportable "fast math" code

2021-07-16 23:52:26 +02:00 · 2021-07-16 23:52:26 +02:00 · 1d66c921c2
commit 1d66c921c2
parent 22c3219ff0
3 changed files with 2 additions and 252 deletions
--- a/include/IrrCompileConfig.h
+++ b/include/IrrCompileConfig.h
@ -779,17 +779,6 @@ ones. */
 #undef __IRR_COMPILE_WITH_WAD_ARCHIVE_LOADER_
 #endif

-//! Set FPU settings
-/** Irrlicht should use approximate float and integer fpu techniques
-precision will be lower but speed higher. currently X86 only
-*/
-#if !defined(_IRR_OSX_PLATFORM_) && !defined(_IRR_SOLARIS_PLATFORM_)
-	//#define IRRLICHT_FAST_MATH
-	#ifdef NO_IRRLICHT_FAST_MATH
-	#undef IRRLICHT_FAST_MATH
-	#endif
-#endif
-
 // Some cleanup and standard stuff

 #ifdef _IRR_WINDOWS_API_
--- a/include/irrMath.h
+++ b/include/irrMath.h
@ -12,29 +12,6 @@
 #include <stdlib.h> // for abs() etc.
 #include <limits.h> // For INT_MAX / UINT_MAX

-#if defined(_IRR_SOLARIS_PLATFORM_) || defined(__BORLANDC__) || defined (__BCPLUSPLUS__) || defined (_WIN32_WCE)
-	#define sqrtf(X) (irr::f32)sqrt((irr::f64)(X))
-	#define sinf(X) (irr::f32)sin((irr::f64)(X))
-	#define cosf(X) (irr::f32)cos((irr::f64)(X))
-	#define asinf(X) (irr::f32)asin((irr::f64)(X))
-	#define acosf(X) (irr::f32)acos((irr::f64)(X))
-	#define atan2f(X,Y) (irr::f32)atan2((irr::f64)(X),(irr::f64)(Y))
-	#define ceilf(X) (irr::f32)ceil((irr::f64)(X))
-	#define floorf(X) (irr::f32)floor((irr::f64)(X))
-	#define powf(X,Y) (irr::f32)pow((irr::f64)(X),(irr::f64)(Y))
-	#define fmodf(X,Y) (irr::f32)fmod((irr::f64)(X),(irr::f64)(Y))
-	#define fabsf(X) (irr::f32)fabs((irr::f64)(X))
-	#define logf(X) (irr::f32)log((irr::f64)(X))
-#endif
-
-#ifndef FLT_MAX
-#define FLT_MAX 3.402823466E+38F
-#endif
-
-#ifndef FLT_MIN
-#define FLT_MIN 1.17549435e-38F
-#endif
-
 namespace irr
 {
 namespace core
@ -379,45 +356,14 @@ namespace core

 	#define F32_VALUE_0		0x00000000
 	#define F32_VALUE_1		0x3f800000
-	#define F32_SIGN_BIT		0x80000000U
-	#define F32_EXPON_MANTISSA	0x7FFFFFFFU

 	//! code is taken from IceFPU
 	//! Integer representation of a floating-point value.
-#ifdef IRRLICHT_FAST_MATH
-	#define IR(x)			((u32&)(x))
-#else
 	inline u32 IR(f32 x) {inttofloat tmp; tmp.f=x; return tmp.u;}
-#endif
-
-	//! Absolute integer representation of a floating-point value
-	#define AIR(x)			(IR(x)&0x7fffffff)

 	//! Floating-point representation of an integer value.
-#ifdef IRRLICHT_FAST_MATH
-	#define FR(x)			((f32&)(x))
-#else
 	inline f32 FR(u32 x) {inttofloat tmp; tmp.u=x; return tmp.f;}
 	inline f32 FR(s32 x) {inttofloat tmp; tmp.s=x; return tmp.f;}
-#endif
-
-	//! integer representation of 1.0
-	#define IEEE_1_0		0x3f800000
-	//! integer representation of 255.0
-	#define IEEE_255_0		0x437f0000
-
-#ifdef IRRLICHT_FAST_MATH
-	#define	F32_LOWER_0(f)		(F32_AS_U32(f) >  F32_SIGN_BIT)
-	#define	F32_LOWER_EQUAL_0(f)	(F32_AS_S32(f) <= F32_VALUE_0)
-	#define	F32_GREATER_0(f)	(F32_AS_S32(f) >  F32_VALUE_0)
-	#define	F32_GREATER_EQUAL_0(f)	(F32_AS_U32(f) <= F32_SIGN_BIT)
-	#define	F32_EQUAL_1(f)		(F32_AS_U32(f) == F32_VALUE_1)
-	#define	F32_EQUAL_0(f)		( (F32_AS_U32(f) & F32_EXPON_MANTISSA ) == F32_VALUE_0)
-
-	// only same sign
-	#define	F32_A_GREATER_B(a,b)	(F32_AS_S32((a)) > F32_AS_S32((b)))
-
-#else

 	#define	F32_LOWER_0(n)		((n) <  0.0f)
 	#define	F32_LOWER_EQUAL_0(n)	((n) <= 0.0f)
@ -426,8 +372,6 @@ namespace core
 	#define	F32_EQUAL_1(n)		((n) == 1.0f)
 	#define	F32_EQUAL_0(n)		((n) == 0.0f)
 	#define	F32_A_GREATER_B(a,b)	((a) > (b))
-#endif
-

 #ifndef REALINLINE
 	#ifdef _MSC_VER
@ -437,23 +381,6 @@ namespace core
 	#endif
 #endif

-#if defined(__BORLANDC__) || defined (__BCPLUSPLUS__)
-
-	// 8-bit bools in Borland builder
-
-	//! conditional set based on mask and arithmetic shift
-	REALINLINE u32 if_c_a_else_b ( const c8 condition, const u32 a, const u32 b )
-	{
-		return ( ( -condition >> 7 ) & ( a ^ b ) ) ^ b;
-	}
-
-	//! conditional set based on mask and arithmetic shift
-	REALINLINE u32 if_c_a_else_0 ( const c8 condition, const u32 a )
-	{
-		return ( -condition >> 31 ) & a;
-	}
-#else
-
 	//! conditional set based on mask and arithmetic shift
 	REALINLINE u32 if_c_a_else_b ( const s32 condition, const u32 a, const u32 b )
 	{
@ -471,7 +398,6 @@ namespace core
 	{
 		return ( -condition >> 31 ) & a;
 	}
-#endif

 	/*
 		if (condition) state |= m; else state &= ~m;
@ -526,30 +452,7 @@ namespace core
 	// calculate: 1 / sqrtf ( x )
 	REALINLINE f32 reciprocal_squareroot(const f32 f)
 	{
-#if defined ( IRRLICHT_FAST_MATH )
-		// NOTE: Unlike comment below says I found inaccuracies already at 4'th significant bit.
-		// p.E: Input 1, expected 1, got 0.999755859
-
-	#if defined(_MSC_VER) && !defined(_WIN64)
-		// SSE reciprocal square root estimate, accurate to 12 significant
-		// bits of the mantissa
-		f32 recsqrt;
-		__asm rsqrtss xmm0, f           // xmm0 = rsqrtss(f)
-		__asm movss recsqrt, xmm0       // return xmm0
-		return recsqrt;
-
-/*
-		// comes from Nvidia
-		u32 tmp = (u32(IEEE_1_0 << 1) + IEEE_1_0 - *(u32*)&x) >> 1;
-		f32 y = *(f32*)&tmp;
-		return y * (1.47f - 0.47f * x * y * y);
-*/
-	#else
 		return 1.f / sqrtf(f);
-	#endif
-#else // no fast math
-		return 1.f / sqrtf(f);
-#endif
 	}

 	// calculate: 1 / sqrtf( x )
@ -561,37 +464,7 @@ namespace core
 	// calculate: 1 / x
 	REALINLINE f32 reciprocal( const f32 f )
 	{
-#if defined (IRRLICHT_FAST_MATH)
-		// NOTE: Unlike with 1.f / f the values very close to 0 return -nan instead of inf
-
-		// SSE Newton-Raphson reciprocal estimate, accurate to 23 significant
-		// bi ts of the mantissa
-		// One Newton-Raphson Iteration:
-		// f(i+1) = 2 * rcpss(f) - f * rcpss(f) * rcpss(f)
-#if defined(_MSC_VER) && !defined(_WIN64)
-		f32 rec;
-		__asm rcpss xmm0, f               // xmm0 = rcpss(f)
-		__asm movss xmm1, f               // xmm1 = f
-		__asm mulss xmm1, xmm0            // xmm1 = f * rcpss(f)
-		__asm mulss xmm1, xmm0            // xmm2 = f * rcpss(f) * rcpss(f)
-		__asm addss xmm0, xmm0            // xmm0 = 2 * rcpss(f)
-		__asm subss xmm0, xmm1            // xmm0 = 2 * rcpss(f)
-										  //        - f * rcpss(f) * rcpss(f)
-		__asm movss rec, xmm0             // return xmm0
-		return rec;
-#else // no support yet for other compilers
 		return 1.f / f;
-#endif
-		//! i do not divide through 0.. (fpu expection)
-		// instead set f to a high value to get a return value near zero..
-		// -1000000000000.f.. is use minus to stay negative..
-		// must test's here (plane.normal dot anything ) checks on <= 0.f
-		//u32 x = (-(AIR(f) != 0 ) >> 31 ) & ( IR(f) ^ 0xd368d4a5 ) ^ 0xd368d4a5;
-		//return 1.f / FR ( x );
-
-#else // no fast math
-		return 1.f / f;
-#endif
 	}

 	// calculate: 1 / x
@ -604,45 +477,9 @@ namespace core
 	// calculate: 1 / x, low precision allowed
 	REALINLINE f32 reciprocal_approxim ( const f32 f )
 	{
-#if defined( IRRLICHT_FAST_MATH)
-
-		// SSE Newton-Raphson reciprocal estimate, accurate to 23 significant
-		// bi ts of the mantissa
-		// One Newton-Raphson Iteration:
-		// f(i+1) = 2 * rcpss(f) - f * rcpss(f) * rcpss(f)
-#if defined(_MSC_VER) && !defined(_WIN64)
-		f32 rec;
-		__asm rcpss xmm0, f               // xmm0 = rcpss(f)
-		__asm movss xmm1, f               // xmm1 = f
-		__asm mulss xmm1, xmm0            // xmm1 = f * rcpss(f)
-		__asm mulss xmm1, xmm0            // xmm2 = f * rcpss(f) * rcpss(f)
-		__asm addss xmm0, xmm0            // xmm0 = 2 * rcpss(f)
-		__asm subss xmm0, xmm1            // xmm0 = 2 * rcpss(f)
-										  //        - f * rcpss(f) * rcpss(f)
-		__asm movss rec, xmm0             // return xmm0
-		return rec;
-#else // no support yet for other compilers
 		return 1.f / f;
-#endif
-
-/*
-		// SSE reciprocal estimate, accurate to 12 significant bits of
-		f32 rec;
-		__asm rcpss xmm0, f             // xmm0 = rcpss(f)
-		__asm movss rec , xmm0          // return xmm0
-		return rec;
-*/
-/*
-		u32 x = 0x7F000000 - IR ( p );
-		const f32 r = FR ( x );
-		return r * (2.0f - p * r);
-*/
-#else // no fast math
-		return 1.f / f;
-#endif
 	}

-
 	REALINLINE s32 floor32(f32 x)
 	{
 		return (s32) floorf ( x );
@ -677,9 +514,7 @@ namespace core
 } // end namespace core
 } // end namespace irr

-#ifndef IRRLICHT_FAST_MATH
-	using irr::core::IR;
-	using irr::core::FR;
-#endif
+using irr::core::IR;
+using irr::core::FR;

 #endif
--- a/source/Irrlicht/irrMathFastCompat.h
+++ b/source/Irrlicht/irrMathFastCompat.h
@ -24,33 +24,7 @@ namespace core
 	// Input -1.40129846e-45, expected -1, got 0
 	REALINLINE s32 floor32_fast(f32 x)
 	{
-#ifdef IRRLICHT_FAST_MATH
-		const f32 h = 0.5f;
-
-		s32 t;
-
-#if defined(_MSC_VER) && !defined(_WIN64)
-		__asm
-		{
-			fld	x
-			fsub	h
-			fistp	t
-		}
-#elif defined(__GNUC__)
-		__asm__ __volatile__ (
-			"fsub %2 \n\t"
-			"fistpl %0"
-			: "=m" (t)
-			: "t" (x), "f" (h)
-			: "st"
-			);
-#else
 		return (s32) floorf ( x );
-#endif
-		return t;
-#else // no fast math
-		return (s32) floorf ( x );
-#endif
 	}
 	
 	// Some examples for unexpected results when using this with IRRLICHT_FAST_MATH:
@ -59,33 +33,7 @@ namespace core
 	// Input -3, expected -3, got -2
 	REALINLINE s32 ceil32_fast ( f32 x )
 	{
-#ifdef IRRLICHT_FAST_MATH
-		const f32 h = 0.5f;
-
-		s32 t;
-
-#if defined(_MSC_VER) && !defined(_WIN64)
-		__asm
-		{
-			fld	x
-			fadd	h
-			fistp	t
-		}
-#elif defined(__GNUC__)
-		__asm__ __volatile__ (
-			"fadd %2 \n\t"
-			"fistpl %0 \n\t"
-			: "=m"(t)
-			: "t"(x), "f"(h)
-			: "st"
-			);
-#else
 		return (s32) ceilf ( x );
-#endif
-		return t;
-#else // not fast math
-		return (s32) ceilf ( x );
-#endif
 	}	
 	
 	// Some examples for unexpected results when using this with IRRLICHT_FAST_MATH:
@ -95,29 +43,7 @@ namespace core
 	// Input -2.80259693e-45, expected -nan(ind), got -inf	
 	REALINLINE s32 round32_fast(f32 x)
 	{
-#if defined(IRRLICHT_FAST_MATH)
-		s32 t;
-
-#if defined(_MSC_VER) && !defined(_WIN64)
-		__asm
-		{
-			fld   x
-			fistp t
-		}
-#elif defined(__GNUC__)
-		__asm__ __volatile__ (
-			"fistpl %0 \n\t"
-			: "=m"(t)
-			: "t"(x)
-			: "st"
-			);
-#else
 		return (s32) round_(x);
-#endif
-		return t;
-#else // no fast math
-		return (s32) round_(x);
-#endif
 	}	
 	
 } // end namespace core