-
-
Save cipri-tom/e4f28c2785ff0de30b71 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
local function printf(s, ...) | |
io.write(s:format(...)) | |
end | |
local ffi = require("ffi") | |
ffi.cdef[[ | |
typedef void (*cb)(void); | |
void set_v(int n, void (*)(void )); | |
void set_i(int n, void (*)(int )); | |
void set_d(int n, void (*)(double)); | |
int get_i(int n, int (*)(void)); | |
double get_d(int n, double (*)(void)); | |
void call_v(void ); | |
void call_i(int ); | |
void call_d(double); | |
void loop (int n); | |
]] | |
local callback = ffi.load("./callback.so") | |
local timeit = require("timeit") | |
local v = 0 | |
local function lset_v( ) v = v + 1 end | |
local function lset_a(a) a = a + 1 end | |
local function lget ( ) return v*2 end | |
print("operation ", "reps ", "time(s)", "nsec/call") | |
local c2l = { | |
{name='set_v', func=lset_v}, | |
{name='set_i', func=lset_a}, | |
{name='set_d', func=lset_a}, | |
{name='get_i', func=lget }, | |
{name='get_d', func=lget } | |
} | |
for _,test in ipairs(c2l) do | |
local r = timeit(function(n) | |
callback[test.name](n, test.func) | |
end) | |
printf("C into Lua %-12s %s\n", test.name, r) | |
end | |
print("Lua into C call(void) ", timeit(function(n) | |
for i = 1, n do callback.call_v() end | |
end)) | |
print("Lua into C call(int) ", timeit(function(n) | |
for i = 1, n do callback.call_i(3) end | |
end)) | |
print("Lua into C call(double)", timeit(function(n) | |
for i = 1, n do callback.call_d(3.5) end | |
end)) | |
print("Lua into Lua ", timeit(function(n) | |
for i = 1, n do lset_v() end | |
end)) | |
print("C empty loop ", timeit(function(n) | |
callback.loop(n) | |
end)) | |
print("Lua empty loop ", timeit(function(n) | |
for i = 1, n do end | |
end)) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// gcc -std=c99 -Wall -pedantic -O3 -shared -static-libgcc -fPIC callback.c -o callback.so | |
// --- CALLS ------------------------------------------------------------------ | |
void call_v(void) { | |
} | |
void call_i(int v) { | |
v += 5; | |
} | |
void call_d(double v) { | |
v += 5.0; | |
} | |
void loop(int n) { | |
for (int i = 0; i < n; i++) { | |
/* prevent compiler optimizations from skipping loop entirely */ | |
__asm__(""); | |
} | |
} | |
// --- SETTERS ---------------------------------------------------------------- | |
void set_v(int n, void (*f)(void)) { | |
for (int i = 0; i < n; i++) { | |
f(); | |
} | |
} | |
void set_i(int n, void (*f)(int)) { | |
for (int i = 0; i < n; i++) { | |
f(i); | |
} | |
} | |
void set_d(int n, void (*f)(double)) { | |
double a = 3.0; | |
for (int i = 0; i < n; i++) { | |
f(a); | |
} | |
} | |
// --- GETTERS ---------------------------------------------------------------- | |
void get_i(int n, int (*f)(void)) { | |
int v; | |
for (int i = 0; i < n; i++) { | |
v = f(); | |
} | |
(void)v; // avoid 'set-but-unused' warning | |
} | |
void get_d(int n, double (*f)(void)) { | |
double v; | |
for (int i = 0; i < n; i++) { | |
v = f(); | |
} | |
(void)v; // avoid 'set-but-unused' warning | |
} | |
// --- PUSH vs PULL ----------------------------------------------------------- | |
typedef double (*getter_fp)(int len, unsigned char mono[len]); | |
struct Arr { | |
int size; | |
double data[]; | |
}; | |
enum constants {MONO_LEN = 5}; | |
unsigned char mono[MONO_LEN] = {1, 2, 3, 4, 5}; | |
// --- --- PUSH style --------------------------------------------------------- | |
void push_style(struct Arr *a, getter_fp get_multiplier) | |
{ | |
for (int i = 0; i < a->size; ++i) | |
a->data[i] *= get_multiplier(MONO_LEN, mono); | |
} | |
// --- --- PULL style --------------------------------------------------------- | |
int get_mono_len() | |
{ | |
return MONO_LEN; | |
} | |
unsigned char* | |
get_mono(int idx) | |
{ | |
return mono; | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- OBJECTIVE : apply a LUA function to all members of an array | |
-- -- PUSH style: do it in C side, with callback to the LUA function | |
-- -- PULL style: do it in Lua side, with calls to C to get necessary info | |
local ffi = require("ffi") | |
ffi.cdef[[ | |
typedef double (*getter_fp)(int len, unsigned char mono[]); | |
struct Arr { | |
int size; | |
double data[?]; | |
}; | |
void push_style(struct Arr *a, getter_fp get_multiplier); | |
int get_mono_len(); | |
unsigned char* get_mono(int idx); | |
]] | |
local callback = ffi.load("./callback.so") | |
local timeit = require("timeit") | |
local arr_t = ffi.typeof("struct Arr"); | |
-- the callback | |
local function lget_multiplier(len, mono) | |
local s = 0 | |
for i=0,len-1 do s = s + mono[i] end | |
return s * 0.5 | |
end | |
-- PUSH style ----------------------------------------------------------------- | |
local function push_style(n) | |
local a = arr_t(n, {n}) | |
for i=0,n-1 do a.data[i] = i end | |
local cb = ffi.cast("getter_fp", lget_multiplier) | |
callback.push_style(a, cb) | |
return a | |
end | |
-- PULL style ----------------------------------------------------------------- | |
local function pull_style(n) | |
local a = arr_t(n, {n}) | |
for i=0,n-1 do a.data[i] = i end | |
local mono_len, mono = callback.get_mono_len() | |
for i=0,n-1 do | |
mono = callback.get_mono(i) | |
a.data[i] = a.data[i] * lget_multiplier(mono_len, mono) | |
end | |
return a | |
end | |
-- CHECK ---------------------------------------------------------------------- | |
local push_v, pull_v = push_style(100), pull_style(100) | |
assert(push_v.size == pull_v.size) | |
for i=0,push_v.size-1 do | |
assert(push_v.data[i] == pull_v.data[i]) | |
end | |
-- BENCH ---------------------------------------------------------------------- | |
print("PUSH style", timeit(push_style)) | |
print("PULL style", timeit(pull_style)) | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
operation reps time(s) nsec/call | |
C into Lua set_v 10000000 0.498 49.817 | |
C into Lua set_i 10000000 0.662 66.249 | |
C into Lua set_d 10000000 0.681 68.143 | |
C into Lua get_i 10000000 0.633 63.272 | |
C into Lua get_d 10000000 0.650 64.990 | |
Lua into C call(void) 100000000 0.381 3.807 | |
Lua into C call(int) 100000000 0.381 3.815 | |
Lua into C call(double) 100000000 0.415 4.154 | |
Lua into Lua 100000000 0.104 1.039 | |
C empty loop 1000000000 0.695 0.695 | |
Lua empty loop 1000000000 0.693 0.693 | |
PUSH style 1000000 0.158 158.256 | |
PULL style 1000000 0.207 207.297 | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
--- call a function with a repeat count argument. | |
-- Takes a single argument, which is a function which in turn takes one argument, | |
-- a repeat count. The function is called with increasingly large repeat counts | |
-- until it takes at least a certain amount of time to run, and is then called four | |
-- more times with the same repeat count, with the minimum elapsed time recorded. | |
-- Modeled loosely on Python's timeit, except the function passed in is responsible | |
-- for doing the actual repetition. | |
return function(func) | |
local reps = 10000 | |
local elapsed | |
repeat | |
reps = reps * 10 | |
local start = os.clock() | |
func(reps) | |
elapsed = os.clock() - start | |
until elapsed > 0.1 or reps >= 1e9 | |
for i = 1, 4 do | |
local start = os.clock() | |
func(reps) | |
elapsed = math.min(elapsed, os.clock() - start) | |
end | |
return ("%10d\t%.3f\t%7.3f"):format(reps, elapsed, elapsed / reps * 1e9) | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Thanks for the detailed analysis and run on very different architectures !
Really nice to see ARM in there too, we can only expect it to get better, indeed.
I had no idea people are still interested in this benchmark. I re-visited the answers to the question and I find that indeed, this is measuring the CPU more than anything else. I really recommend people take Josh's suggestion and benchmark everything in context, since this empty benchmark may not replicate to one's real scenario.
But other than that, we can see that the performance is about the same regardless of the direction (Lua -> C, C->Lua) except on ARM, so I'd first write for readability and ease of use, and then only move code to the other side if it turns out to be a bottleneck.