Skip to content

Commit

Permalink
q_shared: better magic constant for the Q_rsqrt trick
Browse files Browse the repository at this point in the history
  • Loading branch information
illwieckz committed Dec 10, 2024
1 parent cfd4d16 commit 95fa8f8
Showing 1 changed file with 93 additions and 23 deletions.
116 changes: 93 additions & 23 deletions src/engine/qcommon/q_shared.h
Original file line number Diff line number Diff line change
Expand Up @@ -338,29 +338,6 @@ extern const quat_t quatIdentity;

#define Q_ftol(x) ((long)(x))

// Overall relative error bound (ignoring unknown powerpc case): 5 * 10^-6
// https://en.wikipedia.org/wiki/Fast_inverse_square_root#/media/File:2nd-iter.png
inline float Q_rsqrt( float number )
{
// compute approximate inverse square root
#if defined(DAEMON_USE_ARCH_INTRINSICS_i686_sse)
float y;
// SSE rsqrt relative error bound: 3.7 * 10^-4
_mm_store_ss( &y, _mm_rsqrt_ss( _mm_load_ss( &number ) ) );
#else
float x = 0.5f * number;
float y = Util::bit_cast<float>( 0x5f3759df - ( Util::bit_cast<uint32_t>( number ) >> 1 ) );
// initial iteration
// relative error bound after the initial iteration: 1.8 * 10^-3
y *= ( 1.5f - ( x * y * y ) );
#if 0
// second iteration for higher precision
y *= ( 1.5f - ( x * y * y ) );
#endif
#endif
return y;
}

inline float Q_fabs( float x )
{
return fabsf( x );
Expand Down Expand Up @@ -498,6 +475,99 @@ void SnapVector( V &&v )
v[ 2 ] = roundf( v[ 2 ] );
}

/* The original Q_rsqrt algorithm is:
typedef union { float f; uint32_t u; } uf_t;
float Q_rsqrt( float n )
{
uint32_t magic = 0x5f3759dful;
float a = 0.5f;
float b = 3.0f;
uf_t o; o.f = n;
o.u = magic - ( o.u >> 1 );
return a * o.f * ( b - n * o.f * o.f );
}
It could be written like this, this is what Quake 3 did:
float Q_rsqrt( float n )
{
uint32_t magic = 0x5f3759dful;
float a = 0.5f;
float b = 3.0f;
float c = a * b; // 1.5f
uf_t o = n;
o.u = magic - ( o.u >> 1);
float x = n * a;
return o.f * a ( c - ( x * o.f * o.f ) );
}
It was written with a second iteration commented out:
float Q_rsqrt( float n )
{
uint32_t magic = 0x5f3759dful;
float a = 0.5f;
float b = 3.0f;
float c = a * b; // 1.5f
uf_t o; o.f = n;
o.u = magic - ( o.u >> 1);
float x = n * a;
o.f *= a * ( c - ( x * o.f * o.f ) );
// o.f *= a * ( c - ( x * o.f * o.f ) );
return o.f;
}
The relative error bound after the initial iteration was: 1.8×10⁻³
The relative error bound after a second iteration was: 5×10⁻⁶
Better values are usable from: http://rrrola.wz.cz/inv_sqrt.html
float Q_rsqrt( float n )
{
uint32_t magic = 0x5f1ffff9ul:
float a = 0.703952253f;
float b = 2.38924456f;
uf_t o; o.f = n;
o.u = magic - ( o.u >> 1 );
return a * o.f * ( b - n * y.f * y.f );
}
The relative error bound after the initial iteration is: 2.00010826×10⁻⁷ */

#define Q_RSQRT_QUAKE3_CONSTANTS 0
#define Q_RSQRT_DOUBLE_ITERATION 0

#if Q_RSQRT_QUAKE3_CONSTANTS
// Constants used in Quake 3.
static uint32_t qrsqrt_magic = 0x5f3759dful;
static float qrsqrt_a = 0.5f;
static float qrsqrt_b = 3.0f;
#else
// Constants computed by Řrřola.
static uint32_t qrsqrt_magic = 0x5f1ffff9ul;
static float qrsqrt_a = 0.703974056f;
static float qrsqrt_b = 2.38919526f;
#endif

inline float Q_rsqrt( float n )
{
// Compute approximate inverse square root.
#if defined(DAEMON_USE_ARCH_INTRINSICS_i686_sse)
float o;
// SSE rsqrt relative error bound: 3.7 * 10^-4
_mm_store_ss( &o, _mm_rsqrt_ss( _mm_load_ss( &n ) ) );
#else
float o = Util::bit_cast<float>( qrsqrt_magic - ( Util::bit_cast<uint32_t>( n ) >> 1 ) );
o *= qrsqrt_a * ( qrsqrt_b - n * o * o );
#if Q_RSQRT_DOUBLE_ITERATION
// Second iteration for higher precision.
o *= qsqrt_a * ( qsqrt_b - n * o * o );
#endif
#endif
return o;
}

#define VectorLerpTrem( f, s, e, r ) (( r )[ 0 ] = ( s )[ 0 ] + ( f ) * (( e )[ 0 ] - ( s )[ 0 ] ), \
( r )[ 1 ] = ( s )[ 1 ] + ( f ) * (( e )[ 1 ] - ( s )[ 1 ] ), \
( r )[ 2 ] = ( s )[ 2 ] + ( f ) * (( e )[ 2 ] - ( s )[ 2 ] ))
Expand Down

0 comments on commit 95fa8f8

Please sign in to comment.