Last active
August 3, 2025 10:06
-
-
Save mattiasgustavsson/db1ab1723d5e16934ebe2ff87a3afec0 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Rounds up to the next nearest power-of-two value | |
static inline unsigned int pow2_ceil( unsigned int x ) { | |
#if defined( __clang__ ) || defined( __GNUC__ ) | |
return 1 + ( ( x >= 2 ) * ( ( ( 1u << 31 ) >> ( __builtin_clz( ( x - 1 ) | 1 ) - 1 ) ) - 1 ) ); | |
#elif defined( _MSC_VER ) && ( defined( _M_IX86 ) || defined( _M_X64 ) ) | |
return ( 1u << 31 ) >> ( __lzcnt( ( x - 1 ) | ( x == 0 ) ) - 1 ); | |
#elif defined( _MSC_VER ) && ( defined( _M_ARM ) || defined( _M_ARM64 ) ) | |
return ( 1u << 31 ) >> ( __clz( ( x - 1 ) | ( x == 0 ) ) - 1 ); | |
#else | |
--x; // if x is already pow2, we don't want the next pow2 | |
// bit propagation | |
x |= x >> 1; | |
x |= x >> 2; | |
x |= x >> 4; | |
x |= x >> 8; | |
x |= x >> 16; | |
x = x + 1 // bring x from one-less-than-pow2 to pow2 | |
+ ( x >> 31 ); // we want 1, not 0, when x is 0 | |
return x; | |
#endif | |
} | |
// Macro version of pow2_ceil, which rounds a constant (at compile time) up to its nearest power-of-two value | |
#define _pow2c01( x ) ( (x) | ( (x) >> 1 ) ) | |
#define _pow2c02( x ) ( _pow2c01( x ) | (_pow2c01( x ) >> 2 ) ) | |
#define _pow2c04( x ) ( _pow2c02( x ) | (_pow2c02( x ) >> 4 ) ) | |
#define _pow2c08( x ) ( _pow2c04( x ) | (_pow2c04( x ) >> 8 ) ) | |
#define _pow2c16( x ) ( _pow2c08( x ) | (_pow2c08( x ) >> 16 ) ) | |
#define POW2_CEIL(x) ( ( (x) == 0 ) ? 1 : ( _pow2c16( (x) - 1 ) + 1 ) ) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
I found out that on the latest compiler versions, my previous intrinsics-based implementations tripped some undefined behavior optimizations. fixed it now. Thanks to @aganm for help with testing this