#include <iostream> #include <vector> #include <algorithm> #include <iterator> #include <assert.h> typedef std::chrono::high_resolution_clock Clock; template<typename T> void print( const std::vector<T>& V ) { std::copy(V.begin(), V.end(), std::ostream_iterator<T>(std::cout,",")); std::cout << "\n"; } //! Return the highest power of 3 less or equal to the array lenght (2n) size_t highest_leader( size_t len ) { size_t i = 1; while ( i * 3 < len ) i *= 3; return i; } //! Permutation (sigma) function returning the next index in the cycle. Permutation indices start at 1. //inline size_t next_index( size_t j, size_t len ) //{ // return ( j * 2 ) % ( len + 1 ); //} // Optimization suggested by @johanofverstedt. 1291ms to 703ms with -03 inline size_t next_index( size_t j, size_t len ) { j *= 2; size_t m = len + 1; while(j >= m) j -= m; return j; } //! Apply the cycle leader algorithm for the in-shuffle permutation template<typename T> void cycle_rotate( std::vector<T>& A, size_t leader, size_t section_offset, size_t section_len ) { size_t i = next_index(leader, section_len); while( i != leader ) { std::swap( A[section_offset + i - 1], A[section_offset + leader - 1] ); i = next_index(i, section_len); } } template<typename T> void interleave_inplace( std::vector<T>& A ) { if (A.size() <= 2) return; // Array size should be even assert( A.size() % 2 == 0 ); auto it = A.begin(); size_t m=0, n=1; while( m < n ) { size_t section_len = std::distance( it, A.end() ); size_t h = highest_leader(section_len); m = ( h > 1 ) ? h/2 : 1; n = section_len/2; //right cyclic shift std::rotate( it + m, it + n, it + m + n ); for( size_t leader = 1; leader < 2*m; leader *= 3 ) { cycle_rotate<T>( A, leader, std::distance( A.begin(), it ), 2*m ); } //move iterator to remaining un-interleaved section std::advance(it, 2*m); } } template <typename T> void interleave( std::vector<T>& A ) { std::vector<T> B(A); size_t half = A.size() / 2; for( size_t i =0; i < B.size(); i += 2 ) { A[i] = B[half + i/2]; A[i+1] = B[i/2]; } } int main(int argc, const char * argv[]) { int length = 500000; int nb_applications = 200; std::vector<int> A(length); for(int i=0; i<A.size(); i++) { A[i] = ( i < A.size()/2 ) ? -i-1 : int(i - A.size()/2)+1; } std::vector<int> B(A); { std::cout << "Trivial copy algo: "; auto start = Clock::now(); for( int i=0; i < nb_applications; ++i ) { interleave( A ); } auto end = Clock::now(); auto nsdur = std::chrono::duration_cast<std::chrono::nanoseconds>( end - start ); auto msdur = std::chrono::duration_cast<std::chrono::milliseconds>( end - start ); std::cout << nsdur.count() << "ns, " << msdur.count() << "ms" << std::endl; } { std::cout << "Inplace algo: "; auto start = Clock::now(); for( int i=0; i < nb_applications; ++i ) { interleave_inplace( B ); } auto end = Clock::now(); auto nsdur = std::chrono::duration_cast<std::chrono::nanoseconds>( end - start ); auto msdur = std::chrono::duration_cast<std::chrono::milliseconds>( end - start ); std::cout << nsdur.count() << "ns, " << msdur.count() << "ms" << std::endl; } assert( A == B ); }