Last active
March 16, 2016 17:48
-
-
Save jszuppe/c71bc9c26d0fdd854372 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
namespace detail { | |
template<class InputIterator, class OutputIterator, class BinaryFunction> | |
inline void dispatch_gpu_reduce(InputIterator first, | |
InputIterator last, | |
OutputIterator result, | |
BinaryFunction function, | |
command_queue &queue) | |
{ | |
... | |
} | |
template<class InputIterator, class OutputIterator, class BinaryFunction> | |
inline void dispatch_cpu_reduce(InputIterator first, | |
InputIterator last, | |
OutputIterator result, | |
BinaryFunction function, | |
command_queue &queue) | |
{ | |
... | |
} | |
} // detail namespace | |
// when InputIterator is a HOST iterator (we will need to copy) | |
template<class InputIterator, class OutputIterator, class BinaryFunction> | |
inline void reduce(InputIterator first, | |
InputIterator last, | |
OutputIterator result, | |
BinaryFunction function, | |
command_queue &queue = system::default_queue()) | |
{ | |
if(small number of elements) { // We may add some option to always force copying and using OpenCL-based algorithm | |
// run STL algorithm (why not?) | |
// if there's no STL equivalent implement it in C++ or run serial version | |
} | |
else { | |
// copy | |
// run device algorithm (below) | |
} | |
} | |
// when InputIterator is a device iterator | |
template<class InputIterator, class OutputIterator, class BinaryFunction> | |
inline void reduce(InputIterator first, | |
InputIterator last, | |
OutputIterator result, | |
BinaryFunction function, | |
command_queue &queue = system::default_queue()) | |
{ | |
if(small number of elements in the input vector) { | |
// run serial algorithm | |
} | |
if(device is a CPU){ | |
// dispatch a CPU algorithm | |
} | |
else if (device is a GPU) { | |
// dispatch a GPU algorithm | |
} | |
// some kind of accelerator, we don't know | |
else { | |
// run serial algorithm (at least it'll work) | |
// for SVM memory we can just map the memory and use STL algorithm | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment