simonwagner · January 3, 2016 18:04 · Jan 3, 2016
diff --git a/nrange.cpp b/nrange.cpp
@@ -0,0 +1,194 @@
+#include <iostream>
+#include <limits>
+#include <functional>
+#include <chrono>
+
+using namespace std;
+
+#define likely(x)    __builtin_expect (!!(x), 1)
+#define unlikely(x)  __builtin_expect (!!(x), 0)
+
+struct verification_result {
+    bool valid;
+    uint32_t n;
+    uint32_t i;
+    uint32_t expected;
+    uint32_t result;
+};
+
+void VerificationFail()
+{
+
+}
+
+#define TESTED_BIT_RANGE 32
+
+template<uint32_t (*F)(uint32_t, uint32_t)>
+static verification_result verify()
+{
+    uint32_t expected_value_for_bit[TESTED_BIT_RANGE];
+
+    //initialize expected values with 0
+    for(uint8_t i = 0; i < TESTED_BIT_RANGE; ++i) {
+        expected_value_for_bit[i] = 0;
+    }
+
+    //Test for all values of n
+    uint32_t n = 1;
+    while(true) {
+
+        //verify result
+        for(uint8_t i = 0; i < TESTED_BIT_RANGE; ++i) {
+            uint32_t result = F(n, i);
+            if(unlikely(result != expected_value_for_bit[i])) {
+                VerificationFail();
+                return {false, n, i, expected_value_for_bit[i], result};
+            }
+        }
+
+        //now update the expected values
+        for(uint8_t i = 0; i < TESTED_BIT_RANGE; ++i) {
+            expected_value_for_bit[i] += (n & (1 << i)) > 0 ? 1 : 0;
+        }
+
+        if(unlikely(n % (numeric_limits<uint32_t>::max()/100) == 0)) {
+            cout << ".";
+            cout.flush();
+        }
+
+
+        if(likely(n < numeric_limits<uint32_t>::max())) {
+            ++n;
+        }
+        else {
+            break;
+        }
+    }
+
+    return {true, 0, 0, 0, 0};
+}
+
+template<uint32_t (*F)(uint32_t, uint32_t)>
+static void verify_algorithm(const char* name)
+{
+    cout << "Verifiying " << name;
+    verification_result verify_result = verify<F>();
+    if(verify_result.valid) {
+        cout << "ok" << endl;
+    }
+    else {
+        cout << "FAIL!" << endl;
+        cout << "\t failed for n = " <<  verify_result.n << " i = " << verify_result.i << endl;
+        cout << "\t expected " <<  verify_result.expected << ", but got " << verify_result.result << " instead" << endl;
+    }
+}
+
+//SIMON
+
+/*
+return 0xFFFFFFFF if bit at index i is set, else return 0x0
+*/
+static inline uint32_t mask_from_bit_at_index(uint32_t source, uint32_t i)
+{
+    //(~source >> i) & 1 will be 1 if bit at i is 0, else it will be 0
+    //then we subtract 1, so 0 will be 0xFFFFFFFF, and 1 will be 0x0
+    return ((~source >> i) & 1) - 1;
+}
+
+static inline uint32_t mod_power_of_2(uint32_t x, uint32_t power)
+{
+    return x & ((1ull << power) - 1);
+}
+
+static inline uint32_t unset_bit_at(uint32_t x, uint32_t i)
+{
+    uint32_t mask = ~(1 << i);
+    return x & mask;
+}
+
+/*
+Basic calculation:
+
+f(n, i) = (n+1)//2^(i+1)*2^i + zeta((n+1) % 2^(i+1) - 2^i)
+
+with zeta(x) = x >= 0 ? x : 0
+*/
+static uint32_t simon_i_bit_set_in_range(uint32_t n, uint32_t i)
+{
+    uint32_t N = n;
+    //uint32_t count_i_bit_set_in_period = ((N >> i) >> 1) << i; //fancy version of n//2^(i+1)*2^i
+    uint32_t count_i_bit_set_in_period = (N >> 1) & (0xFFFFFFFF << i);
+
+    uint32_t remaining = mod_power_of_2(N, i + 1);
+    //We want to subtract 2^i from remaining, but if the result is negative we want 0
+    //To achieve this, we first set the bit at i to 0 and then AND the result with a mask that is 0xFFFFFFFF if bit i is set and 0 if not
+    uint32_t count_i_bit_set_in_remaining_part = unset_bit_at(remaining, i) & mask_from_bit_at_index(remaining, i);
+
+    return count_i_bit_set_in_period + count_i_bit_set_in_remaining_part;
+}
+
+//Now that I understand how the multiply is optimized away...
+//But now it's nearly the same as Johannes' code
+//Just n//2^(i+1)*2^i seems to be calculated a little bit faster
+static uint32_t simon_i_bit_set_in_range2(uint32_t n, uint32_t i)
+{
+    uint32_t N = n;
+    uint32_t count_i_bit_set_in_period = (N >> 1) & (0xFFFFFFFF << i);
+
+    uint32_t remaining = mod_power_of_2(N, i);
+    uint32_t count_i_bit_set_in_remaining_part = remaining & mask_from_bit_at_index(N, i);
+
+    return count_i_bit_set_in_period + count_i_bit_set_in_remaining_part;
+}
+
+//SIMON END
+
+//JOHANNES
+
+static uint32_t johannes_i_bit_set_in_range(uint32_t n, uint32_t bit)
+{
+    return (((uint64_t)n >> (bit + 1)) << bit) + (n & ((1ull << bit)-1)) * ((n >> bit) & 1);
+}
+
+//JOHANNES END
+
+template<uint32_t (*F)(uint32_t, uint32_t)>
+static void benchmark(const char* name) __attribute__ ((optnone))
+{
+    const uint32_t N = 0x00ffffff;
+    const uint32_t I = 31;
+    uint64_t calls = 0;
+
+    volatile uint32_t result;
+
+    auto start = chrono::high_resolution_clock::now();
+
+    for(uint32_t n = 0; n < N; ++n) {
+        for(uint32_t i = 0; i < I; ++i) {
+            result = F(n, i);
+            ++calls;
+        }
+    }
+
+    auto end = chrono::high_resolution_clock::now();
+
+    auto diff = end - start;
+    double time_ms = chrono::duration<double, milli>(diff).count();
+
+    cout << "Running time for " << name  << ": " << endl
+         << "\t" << time_ms << "ms" << endl
+         << "\t" << (uint32_t)(calls/time_ms*1000.0) << " ops/s" << endl;
+}
+
+int main(int argc, char* argv[])
+{
+    benchmark<johannes_i_bit_set_in_range>("johannes_i_bit_set_in_range");
+    benchmark<simon_i_bit_set_in_range>("simon_i_bit_set_in_range");
+    benchmark<simon_i_bit_set_in_range2>("simon_i_bit_set_in_range2");
+
+    //verify_algorithm<johannes_i_bit_set_in_range>("johannes_i_bit_set_in_range");
+    verify_algorithm<simon_i_bit_set_in_range>("simon_i_bit_set_in_range");
+    verify_algorithm<simon_i_bit_set_in_range2>("simon_i_bit_set_in_range2");
+
+    return 0;
+}