diff --git a/src/engine/qcommon/q_shared.h b/src/engine/qcommon/q_shared.h index be8efe5da6..853185c220 100644 --- a/src/engine/qcommon/q_shared.h +++ b/src/engine/qcommon/q_shared.h @@ -338,29 +338,6 @@ extern const quat_t quatIdentity; #define Q_ftol(x) ((long)(x)) - // Overall relative error bound (ignoring unknown powerpc case): 5 * 10^-6 - // https://en.wikipedia.org/wiki/Fast_inverse_square_root#/media/File:2nd-iter.png - inline float Q_rsqrt( float number ) - { - // compute approximate inverse square root -#if defined(DAEMON_USE_ARCH_INTRINSICS_i686_sse) - float y; - // SSE rsqrt relative error bound: 3.7 * 10^-4 - _mm_store_ss( &y, _mm_rsqrt_ss( _mm_load_ss( &number ) ) ); -#else - float x = 0.5f * number; - float y = Util::bit_cast( 0x5f3759df - ( Util::bit_cast( number ) >> 1 ) ); - // initial iteration - // relative error bound after the initial iteration: 1.8 * 10^-3 - y *= ( 1.5f - ( x * y * y ) ); -#if 0 - // second iteration for higher precision - y *= ( 1.5f - ( x * y * y ) ); -#endif -#endif - return y; - } - inline float Q_fabs( float x ) { return fabsf( x ); @@ -498,6 +475,99 @@ void SnapVector( V &&v ) v[ 2 ] = roundf( v[ 2 ] ); } +/* The original Q_rsqrt algorithm is: + +typedef union { float f; uint32_t u; } uf_t; +float Q_rsqrt( float n ) +{ + uint32_t magic = 0x5f3759dful; + float a = 0.5f; + float b = 3.0f; + uf_t o; o.f = n; + o.u = magic - ( o.u >> 1 ); + return a * o.f * ( b - n * o.f * o.f ); +} + +It could be written like this, this is what Quake 3 did: + +float Q_rsqrt( float n ) +{ + uint32_t magic = 0x5f3759dful; + float a = 0.5f; + float b = 3.0f; + float c = a * b; // 1.5f + uf_t o = n; + o.u = magic - ( o.u >> 1); + float x = n * a; + return o.f * a ( c - ( x * o.f * o.f ) ); +} + +It was written with a second iteration commented out: + +float Q_rsqrt( float n ) +{ + uint32_t magic = 0x5f3759dful; + float a = 0.5f; + float b = 3.0f; + float c = a * b; // 1.5f + uf_t o; o.f = n; + o.u = magic - ( o.u >> 1); + float x = n * a; + o.f *= a * ( c - ( x * o.f * o.f ) ); +// o.f *= a * ( c - ( x * o.f * o.f ) ); + return o.f; +} + +The relative error bound after the initial iteration was: 1.8×10⁻³ +The relative error bound after a second iteration was: 5×10⁻⁶ + +Better values are usable from: http://rrrola.wz.cz/inv_sqrt.html + +float Q_rsqrt( float n ) +{ + uint32_t magic = 0x5f1ffff9ul: + float a = 0.703952253f; + float b = 2.38924456f; + uf_t o; o.f = n; + o.u = magic - ( o.u >> 1 ); + return a * o.f * ( b - n * y.f * y.f ); +} + +The relative error bound after the initial iteration is: 2.00010826×10⁻⁷ */ + +#define Q_RSQRT_QUAKE3_CONSTANTS 0 +#define Q_RSQRT_DOUBLE_ITERATION 0 + +#if Q_RSQRT_QUAKE3_CONSTANTS +// Constants used in Quake 3. +static uint32_t qrsqrt_magic = 0x5f3759dful; +static float qrsqrt_a = 0.5f; +static float qrsqrt_b = 3.0f; +#else +// Constants computed by Řrřola. +static uint32_t qrsqrt_magic = 0x5f1ffff9ul; +static float qrsqrt_a = 0.703974056f; +static float qrsqrt_b = 2.38919526f; +#endif + +inline float Q_rsqrt( float n ) +{ + // Compute approximate inverse square root. +#if defined(DAEMON_USE_ARCH_INTRINSICS_i686_sse) + float o; + // SSE rsqrt relative error bound: 3.7 * 10^-4 + _mm_store_ss( &o, _mm_rsqrt_ss( _mm_load_ss( &n ) ) ); +#else + float o = Util::bit_cast( qrsqrt_magic - ( Util::bit_cast( n ) >> 1 ) ); + o *= qrsqrt_a * ( qrsqrt_b - n * o * o ); +#if Q_RSQRT_DOUBLE_ITERATION + // Second iteration for higher precision. + o *= qsqrt_a * ( qsqrt_b - n * o * o ); +#endif +#endif + return o; +} + #define VectorLerpTrem( f, s, e, r ) (( r )[ 0 ] = ( s )[ 0 ] + ( f ) * (( e )[ 0 ] - ( s )[ 0 ] ), \ ( r )[ 1 ] = ( s )[ 1 ] + ( f ) * (( e )[ 1 ] - ( s )[ 1 ] ), \ ( r )[ 2 ] = ( s )[ 2 ] + ( f ) * (( e )[ 2 ] - ( s )[ 2 ] ))