Skip to content

Instantly share code, notes, and snippets.

@simonwagner
Created January 3, 2016 18:04

Revisions

  1. simonwagner created this gist Jan 3, 2016.
    194 changes: 194 additions & 0 deletions nrange.cpp
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,194 @@
    #include <iostream>
    #include <limits>
    #include <functional>
    #include <chrono>

    using namespace std;

    #define likely(x) __builtin_expect (!!(x), 1)
    #define unlikely(x) __builtin_expect (!!(x), 0)

    struct verification_result {
    bool valid;
    uint32_t n;
    uint32_t i;
    uint32_t expected;
    uint32_t result;
    };

    void VerificationFail()
    {

    }

    #define TESTED_BIT_RANGE 32

    template<uint32_t (*F)(uint32_t, uint32_t)>
    static verification_result verify()
    {
    uint32_t expected_value_for_bit[TESTED_BIT_RANGE];

    //initialize expected values with 0
    for(uint8_t i = 0; i < TESTED_BIT_RANGE; ++i) {
    expected_value_for_bit[i] = 0;
    }

    //Test for all values of n
    uint32_t n = 1;
    while(true) {

    //verify result
    for(uint8_t i = 0; i < TESTED_BIT_RANGE; ++i) {
    uint32_t result = F(n, i);
    if(unlikely(result != expected_value_for_bit[i])) {
    VerificationFail();
    return {false, n, i, expected_value_for_bit[i], result};
    }
    }

    //now update the expected values
    for(uint8_t i = 0; i < TESTED_BIT_RANGE; ++i) {
    expected_value_for_bit[i] += (n & (1 << i)) > 0 ? 1 : 0;
    }

    if(unlikely(n % (numeric_limits<uint32_t>::max()/100) == 0)) {
    cout << ".";
    cout.flush();
    }


    if(likely(n < numeric_limits<uint32_t>::max())) {
    ++n;
    }
    else {
    break;
    }
    }

    return {true, 0, 0, 0, 0};
    }

    template<uint32_t (*F)(uint32_t, uint32_t)>
    static void verify_algorithm(const char* name)
    {
    cout << "Verifiying " << name;
    verification_result verify_result = verify<F>();
    if(verify_result.valid) {
    cout << "ok" << endl;
    }
    else {
    cout << "FAIL!" << endl;
    cout << "\t failed for n = " << verify_result.n << " i = " << verify_result.i << endl;
    cout << "\t expected " << verify_result.expected << ", but got " << verify_result.result << " instead" << endl;
    }
    }

    //SIMON

    /*
    return 0xFFFFFFFF if bit at index i is set, else return 0x0
    */
    static inline uint32_t mask_from_bit_at_index(uint32_t source, uint32_t i)
    {
    //(~source >> i) & 1 will be 1 if bit at i is 0, else it will be 0
    //then we subtract 1, so 0 will be 0xFFFFFFFF, and 1 will be 0x0
    return ((~source >> i) & 1) - 1;
    }

    static inline uint32_t mod_power_of_2(uint32_t x, uint32_t power)
    {
    return x & ((1ull << power) - 1);
    }

    static inline uint32_t unset_bit_at(uint32_t x, uint32_t i)
    {
    uint32_t mask = ~(1 << i);
    return x & mask;
    }

    /*
    Basic calculation:
    f(n, i) = (n+1)//2^(i+1)*2^i + zeta((n+1) % 2^(i+1) - 2^i)
    with zeta(x) = x >= 0 ? x : 0
    */
    static uint32_t simon_i_bit_set_in_range(uint32_t n, uint32_t i)
    {
    uint32_t N = n;
    //uint32_t count_i_bit_set_in_period = ((N >> i) >> 1) << i; //fancy version of n//2^(i+1)*2^i
    uint32_t count_i_bit_set_in_period = (N >> 1) & (0xFFFFFFFF << i);

    uint32_t remaining = mod_power_of_2(N, i + 1);
    //We want to subtract 2^i from remaining, but if the result is negative we want 0
    //To achieve this, we first set the bit at i to 0 and then AND the result with a mask that is 0xFFFFFFFF if bit i is set and 0 if not
    uint32_t count_i_bit_set_in_remaining_part = unset_bit_at(remaining, i) & mask_from_bit_at_index(remaining, i);

    return count_i_bit_set_in_period + count_i_bit_set_in_remaining_part;
    }

    //Now that I understand how the multiply is optimized away...
    //But now it's nearly the same as Johannes' code
    //Just n//2^(i+1)*2^i seems to be calculated a little bit faster
    static uint32_t simon_i_bit_set_in_range2(uint32_t n, uint32_t i)
    {
    uint32_t N = n;
    uint32_t count_i_bit_set_in_period = (N >> 1) & (0xFFFFFFFF << i);

    uint32_t remaining = mod_power_of_2(N, i);
    uint32_t count_i_bit_set_in_remaining_part = remaining & mask_from_bit_at_index(N, i);

    return count_i_bit_set_in_period + count_i_bit_set_in_remaining_part;
    }

    //SIMON END

    //JOHANNES

    static uint32_t johannes_i_bit_set_in_range(uint32_t n, uint32_t bit)
    {
    return (((uint64_t)n >> (bit + 1)) << bit) + (n & ((1ull << bit)-1)) * ((n >> bit) & 1);
    }

    //JOHANNES END

    template<uint32_t (*F)(uint32_t, uint32_t)>
    static void benchmark(const char* name) __attribute__ ((optnone))
    {
    const uint32_t N = 0x00ffffff;
    const uint32_t I = 31;
    uint64_t calls = 0;

    volatile uint32_t result;

    auto start = chrono::high_resolution_clock::now();

    for(uint32_t n = 0; n < N; ++n) {
    for(uint32_t i = 0; i < I; ++i) {
    result = F(n, i);
    ++calls;
    }
    }

    auto end = chrono::high_resolution_clock::now();

    auto diff = end - start;
    double time_ms = chrono::duration<double, milli>(diff).count();

    cout << "Running time for " << name << ": " << endl
    << "\t" << time_ms << "ms" << endl
    << "\t" << (uint32_t)(calls/time_ms*1000.0) << " ops/s" << endl;
    }

    int main(int argc, char* argv[])
    {
    benchmark<johannes_i_bit_set_in_range>("johannes_i_bit_set_in_range");
    benchmark<simon_i_bit_set_in_range>("simon_i_bit_set_in_range");
    benchmark<simon_i_bit_set_in_range2>("simon_i_bit_set_in_range2");

    //verify_algorithm<johannes_i_bit_set_in_range>("johannes_i_bit_set_in_range");
    verify_algorithm<simon_i_bit_set_in_range>("simon_i_bit_set_in_range");
    verify_algorithm<simon_i_bit_set_in_range2>("simon_i_bit_set_in_range2");

    return 0;
    }