cipri-tom · August 20, 2021 08:23 · exikyut · Jul 24, 2021 · cipri-tom · Jul 29, 2021
diff --git a/callback-bench.lua b/callback-bench.lua
 local function printf(s, ...)
  io.write(s:format(...))
 end

 local ffi = require("ffi")
 ffi.cdef[[
  typedef void (*cb)(void);
  void set_v(int n, void (*)(void  ));
  void set_i(int n, void (*)(int   ));
  void set_d(int n, void (*)(double));

  int    get_i(int n, int    (*)(void));
  double get_d(int n, double (*)(void));

  void call_v(void  );
  void call_i(int   );
  void call_d(double);
  void loop   (int n);
 ]]
 local callback = ffi.load("./callback.so")
 local timeit = require("timeit")

 local v = 0
 local function lset_v( ) v = v + 1   end
 local function lset_a(a) a = a + 1   end
 local function lget  ( ) return v*2  end

 print("operation          ", "reps     ", "time(s)", "nsec/call")

 local c2l = {
  {name='set_v', func=lset_v},
  {name='set_i', func=lset_a},
  {name='set_d', func=lset_a},
  {name='get_i', func=lget  },
  {name='get_d', func=lget  }
 }

 for _,test in ipairs(c2l) do
  local r = timeit(function(n)
              callback[test.name](n, test.func)
            end)
  printf("C into Lua %-12s %s\n", test.name, r)
 end

 print("Lua into C call(void)  ", timeit(function(n)
  for i = 1, n do callback.call_v() end
 end))

 print("Lua into C call(int)   ", timeit(function(n)
  for i = 1, n do callback.call_i(3) end
 end))

 print("Lua into C call(double)", timeit(function(n)
  for i = 1, n do callback.call_d(3.5) end
 end))

 print("Lua into Lua       ", timeit(function(n)
  for i = 1, n do lset_v() end
 end))

 print("C empty loop       ", timeit(function(n)
  callback.loop(n)
 end))

 print("Lua empty loop     ", timeit(function(n)
  for i = 1, n do end
 end))
diff --git a/callback.c b/callback.c
 // gcc -std=c99 -Wall -pedantic -O3 -shared -static-libgcc -fPIC callback.c -o callback.so

 // --- CALLS ------------------------------------------------------------------
 void call_v(void) {
 }

 void call_i(int v) {
  v += 5;
 }

 void call_d(double v) {
  v += 5.0;
 }

 void loop(int n) {
  for (int i = 0; i < n; i++) {
      /* prevent compiler optimizations from skipping loop entirely */
      __asm__("");
  }
 }

 // --- SETTERS ----------------------------------------------------------------

 void set_v(int n, void (*f)(void)) {
  for (int i = 0; i < n; i++) {
      f();
  }
 }

 void set_i(int n, void (*f)(int)) {
  for (int i = 0; i < n; i++) {
      f(i);
  }
 }

 void set_d(int n, void (*f)(double)) {
  double a = 3.0;
  for (int i = 0; i < n; i++) {
      f(a);
  }
 }

 // --- GETTERS ----------------------------------------------------------------

 void get_i(int n, int (*f)(void)) {
  int v;
  for (int i = 0; i < n; i++) {
      v = f();
  }
  (void)v;  // avoid 'set-but-unused' warning
 }

 void get_d(int n, double (*f)(void)) {
  double v;
  for (int i = 0; i < n; i++) {
      v = f();
  }
  (void)v;  // avoid 'set-but-unused' warning
 }

 // --- PUSH vs PULL -----------------------------------------------------------

 typedef double (*getter_fp)(int len, unsigned char mono[len]);

 struct Arr {
  int size;
  double data[];
 };

 enum constants {MONO_LEN = 5};
 unsigned char mono[MONO_LEN] = {1, 2, 3, 4, 5};


 // --- --- PUSH style ---------------------------------------------------------
 void push_style(struct Arr *a, getter_fp get_multiplier)
 {
  for (int i = 0; i < a->size; ++i)
    a->data[i] *= get_multiplier(MONO_LEN, mono);
 }

 // --- --- PULL style ---------------------------------------------------------

 int get_mono_len()
 {
  return MONO_LEN;
 }

 unsigned char*
 get_mono(int idx)
 {
  return mono;
 }
diff --git a/push_pull.lua b/push_pull.lua
 -- OBJECTIVE    : apply a LUA function to all members of an array
 -- -- PUSH style: do it in C   side, with callback to the LUA function
 -- -- PULL style: do it in Lua side, with calls to C to get necessary info

 local ffi = require("ffi")

 ffi.cdef[[
  typedef double (*getter_fp)(int len, unsigned char mono[]);

  struct Arr {
    int size;
    double data[?];
  };

  void           push_style(struct Arr *a, getter_fp get_multiplier);
  int            get_mono_len();
  unsigned char* get_mono(int idx);
 ]]

 local callback = ffi.load("./callback.so")
 local timeit = require("timeit")

 local arr_t = ffi.typeof("struct Arr");

 -- the callback
 local function lget_multiplier(len, mono)
  local s = 0
  for i=0,len-1 do s = s + mono[i] end
  return s * 0.5
 end

 -- PUSH style -----------------------------------------------------------------
 local function push_style(n)
  local a = arr_t(n, {n})
  for i=0,n-1 do a.data[i] = i end

  local cb = ffi.cast("getter_fp", lget_multiplier)
  callback.push_style(a, cb)
  return a
 end

 -- PULL style -----------------------------------------------------------------
 local function pull_style(n)
  local a = arr_t(n, {n})
  for i=0,n-1 do a.data[i] = i end

  local mono_len, mono = callback.get_mono_len()
  for i=0,n-1 do
    mono = callback.get_mono(i)
    a.data[i] = a.data[i] * lget_multiplier(mono_len, mono)
  end
  return a
 end

 -- CHECK ----------------------------------------------------------------------
 local push_v, pull_v = push_style(100), pull_style(100)
 assert(push_v.size == pull_v.size)
 for i=0,push_v.size-1 do
  assert(push_v.data[i] == pull_v.data[i])
 end

 -- BENCH ----------------------------------------------------------------------
 print("PUSH style", timeit(push_style))
 print("PULL style", timeit(pull_style))

diff --git a/results.txt b/results.txt
 operation          	reps     	time(s)	nsec/call
 C into Lua set_v          10000000	0.498	 49.817
 C into Lua set_i          10000000	0.662	 66.249
 C into Lua set_d          10000000	0.681	 68.143
 C into Lua get_i          10000000	0.633	 63.272
 C into Lua get_d          10000000	0.650	 64.990
 Lua into C call(void)  	 100000000	0.381	  3.807
 Lua into C call(int)   	 100000000	0.381	  3.815
 Lua into C call(double)	 100000000	0.415	  4.154
 Lua into Lua       	 100000000	0.104	  1.039
 C empty loop       	1000000000	0.695	  0.695
 Lua empty loop     	1000000000	0.693	  0.693

 PUSH style	   1000000	0.158	158.256
 PULL style	   1000000	0.207	207.297

diff --git a/timeit.lua b/timeit.lua
 --- call a function with a repeat count argument.
 -- Takes a single argument, which is a function which in turn takes one argument,
 -- a repeat count.  The function is called with increasingly large repeat counts
 -- until it takes at least a certain amount of time to run, and is then called four
 -- more times with the same repeat count, with the minimum elapsed time recorded.
 -- Modeled loosely on Python's timeit, except the function passed in is responsible
 -- for doing the actual repetition.
 return function(func)
  local reps = 10000
  local elapsed
  repeat
    reps = reps * 10
    local start = os.clock()
    func(reps)
    elapsed = os.clock() - start
  until elapsed > 0.1 or reps >= 1e9
  for i = 1, 4 do
    local start = os.clock()
    func(reps)
    elapsed = math.min(elapsed, os.clock() - start)
  end
  return ("%10d\t%.3f\t%7.3f"):format(reps, elapsed, elapsed / reps * 1e9)
 end
	local function printf(s, ...)
	io.write(s:format(...))
	end

	local ffi = require("ffi")
	ffi.cdef[[
	typedef void (*cb)(void);
	void set_v(int n, void (*)(void ));
	void set_i(int n, void (*)(int ));
	void set_d(int n, void (*)(double));

	int get_i(int n, int (*)(void));
	double get_d(int n, double (*)(void));

	void call_v(void );
	void call_i(int );
	void call_d(double);
	void loop (int n);
	]]
	local callback = ffi.load("./callback.so")
	local timeit = require("timeit")

	local v = 0
	local function lset_v( ) v = v + 1 end
	local function lset_a(a) a = a + 1 end
	local function lget ( ) return v*2 end

	print("operation ", "reps ", "time(s)", "nsec/call")

	local c2l = {
	{name='set_v', func=lset_v},
	{name='set_i', func=lset_a},
	{name='set_d', func=lset_a},
	{name='get_i', func=lget },
	{name='get_d', func=lget }
	}

	for _,test in ipairs(c2l) do
	local r = timeit(function(n)
	callback[test.name](n, test.func)
	end)
	printf("C into Lua %-12s %s\n", test.name, r)
	end

	print("Lua into C call(void) ", timeit(function(n)
	for i = 1, n do callback.call_v() end
	end))

	print("Lua into C call(int) ", timeit(function(n)
	for i = 1, n do callback.call_i(3) end
	end))

	print("Lua into C call(double)", timeit(function(n)
	for i = 1, n do callback.call_d(3.5) end
	end))

	print("Lua into Lua ", timeit(function(n)
	for i = 1, n do lset_v() end
	end))

	print("C empty loop ", timeit(function(n)
	callback.loop(n)
	end))

	print("Lua empty loop ", timeit(function(n)
	for i = 1, n do end
	end))
	// gcc -std=c99 -Wall -pedantic -O3 -shared -static-libgcc -fPIC callback.c -o callback.so

	// --- CALLS ------------------------------------------------------------------
	void call_v(void) {
	}

	void call_i(int v) {
	v += 5;
	}

	void call_d(double v) {
	v += 5.0;
	}

	void loop(int n) {
	for (int i = 0; i < n; i++) {
	/* prevent compiler optimizations from skipping loop entirely */
	__asm__("");
	}
	}

	// --- SETTERS ----------------------------------------------------------------

	void set_v(int n, void (*f)(void)) {
	for (int i = 0; i < n; i++) {
	f();
	}
	}

	void set_i(int n, void (*f)(int)) {
	for (int i = 0; i < n; i++) {
	f(i);
	}
	}

	void set_d(int n, void (*f)(double)) {
	double a = 3.0;
	for (int i = 0; i < n; i++) {
	f(a);
	}
	}

	// --- GETTERS ----------------------------------------------------------------

	void get_i(int n, int (*f)(void)) {
	int v;
	for (int i = 0; i < n; i++) {
	v = f();
	}
	(void)v; // avoid 'set-but-unused' warning
	}

	void get_d(int n, double (*f)(void)) {
	double v;
	for (int i = 0; i < n; i++) {
	v = f();
	}
	(void)v; // avoid 'set-but-unused' warning
	}

	// --- PUSH vs PULL -----------------------------------------------------------

	typedef double (*getter_fp)(int len, unsigned char mono[len]);

	struct Arr {
	int size;
	double data[];
	};

	enum constants {MONO_LEN = 5};
	unsigned char mono[MONO_LEN] = {1, 2, 3, 4, 5};


	// --- --- PUSH style ---------------------------------------------------------
	void push_style(struct Arr *a, getter_fp get_multiplier)
	{
	for (int i = 0; i < a->size; ++i)
	a->data[i] *= get_multiplier(MONO_LEN, mono);
	}

	// --- --- PULL style ---------------------------------------------------------

	int get_mono_len()
	{
	return MONO_LEN;
	}

	unsigned char*
	get_mono(int idx)
	{
	return mono;
	}
	-- OBJECTIVE : apply a LUA function to all members of an array
	-- -- PUSH style: do it in C side, with callback to the LUA function
	-- -- PULL style: do it in Lua side, with calls to C to get necessary info

	local ffi = require("ffi")

	ffi.cdef[[
	typedef double (*getter_fp)(int len, unsigned char mono[]);

	struct Arr {
	int size;
	double data[?];
	};

	void push_style(struct Arr *a, getter_fp get_multiplier);
	int get_mono_len();
	unsigned char* get_mono(int idx);
	]]

	local callback = ffi.load("./callback.so")
	local timeit = require("timeit")

	local arr_t = ffi.typeof("struct Arr");

	-- the callback
	local function lget_multiplier(len, mono)
	local s = 0
	for i=0,len-1 do s = s + mono[i] end
	return s * 0.5
	end

	-- PUSH style -----------------------------------------------------------------
	local function push_style(n)
	local a = arr_t(n, {n})
	for i=0,n-1 do a.data[i] = i end

	local cb = ffi.cast("getter_fp", lget_multiplier)
	callback.push_style(a, cb)
	return a
	end

	-- PULL style -----------------------------------------------------------------
	local function pull_style(n)
	local a = arr_t(n, {n})
	for i=0,n-1 do a.data[i] = i end

	local mono_len, mono = callback.get_mono_len()
	for i=0,n-1 do
	mono = callback.get_mono(i)
	a.data[i] = a.data[i] * lget_multiplier(mono_len, mono)
	end
	return a
	end

	-- CHECK ----------------------------------------------------------------------
	local push_v, pull_v = push_style(100), pull_style(100)
	assert(push_v.size == pull_v.size)
	for i=0,push_v.size-1 do
	assert(push_v.data[i] == pull_v.data[i])
	end

	-- BENCH ----------------------------------------------------------------------
	print("PUSH style", timeit(push_style))
	print("PULL style", timeit(pull_style))
	operation reps time(s) nsec/call
	C into Lua set_v 10000000 0.498 49.817
	C into Lua set_i 10000000 0.662 66.249
	C into Lua set_d 10000000 0.681 68.143
	C into Lua get_i 10000000 0.633 63.272
	C into Lua get_d 10000000 0.650 64.990
	Lua into C call(void) 100000000 0.381 3.807
	Lua into C call(int) 100000000 0.381 3.815
	Lua into C call(double) 100000000 0.415 4.154
	Lua into Lua 100000000 0.104 1.039
	C empty loop 1000000000 0.695 0.695
	Lua empty loop 1000000000 0.693 0.693

	PUSH style 1000000 0.158 158.256
	PULL style 1000000 0.207 207.297
	--- call a function with a repeat count argument.
	-- Takes a single argument, which is a function which in turn takes one argument,
	-- a repeat count. The function is called with increasingly large repeat counts
	-- until it takes at least a certain amount of time to run, and is then called four
	-- more times with the same repeat count, with the minimum elapsed time recorded.
	-- Modeled loosely on Python's timeit, except the function passed in is responsible
	-- for doing the actual repetition.
	return function(func)
	local reps = 10000
	local elapsed
	repeat
	reps = reps * 10
	local start = os.clock()
	func(reps)
	elapsed = os.clock() - start
	until elapsed > 0.1 or reps >= 1e9
	for i = 1, 4 do
	local start = os.clock()
	func(reps)
	elapsed = math.min(elapsed, os.clock() - start)
	end
	return ("%10d\t%.3f\t%7.3f"):format(reps, elapsed, elapsed / reps * 1e9)
	end