t0mst0ne · October 20, 2015 14:43
diff --git a/Binomial-Probability-Mass-Function_Speed-Comparison.ipynb b/Binomial-Probability-Mass-Function_Speed-Comparison.ipynb
 {
 "worksheets": [
  {
   "cells": [
    {
     "metadata": {},
     "cell_type": "markdown",
     "source": "The Binomial Probability Mass Function in Python\n================================================\n\nKamil Slowikowski\n\nMay 3, 2014\n\nThis document compares the speed of a few different Python and C\nimplementations of the [binomial][1] probability mass function:\n\n$$P = \\binom{n}{k}  p^k  (1 - p)^{n - k}$$\n\n- A fair coin has $p = 0.5$ to land on each side after a flip, head or tails.\n- $P$ is the probability to observe $k$ heads after $n$ flips.\n- $\\binom{n}{k}$ is the [binomial coefficient][2]:\n\n$$\\binom{n}{k} = \\dfrac{n!}{k!(n-k)!}$$\n\n[1]: http://en.wikipedia.org/wiki/Binomial_distribution\n[2]: https://en.wikipedia.org/wiki/Binomial_coefficient\n\n\nResults\n-------\n\nThis is a single run on an AMD Opteron(tm) 6380 2.5GHz.\nEach time is based on 1000 or more loops (except sympy 10 loops).\n\n    Type     Slowdown    Per Loop      Function\n    ------   --------    ---------     ---------------------\n    sympy    50000         55   ms     sympy_bin_pmf\n    scipy      312        344   µs     scipy.stats.binom.pmf\n    numpy       32.7       36   µs     np_pmf\n    python      30.0       33   µs     pmf\n    GSL          1.18       1.3 µs     C2.gsl_ran_binomial_pdf\n    C            1.00       1.1 µs     C1.bin_pmf"
    },
    {
     "metadata": {},
     "cell_type": "code",
     "input": "k, n, p = 3, 40, 0.001",
     "prompt_number": 1,
     "outputs": [],
     "language": "python",
     "trusted": false,
     "collapsed": true
    },
    {
     "metadata": {},
     "cell_type": "code",
     "input": "## sympy\nfrom sympy.stats import density, Binomial\n\nsympy_bin_pmf = density(Binomial('B', n, p))\n\n%timeit sympy_bin_pmf(k)",
     "prompt_number": 2,
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": "10 loops, best of 3: 50 ms per loop\n"
      }
     ],
     "language": "python",
     "trusted": false,
     "collapsed": false
    },
    {
     "metadata": {},
     "cell_type": "code",
     "input": "## scipy\nimport scipy.stats as ss\n\n%timeit ss.binom.pmf(k, n, p)",
     "prompt_number": 3,
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": "1000 loops, best of 3: 343 µs per loop\n"
      }
     ],
     "language": "python",
     "trusted": false,
     "collapsed": false
    },
    {
     "metadata": {},
     "cell_type": "code",
     "input": "## numpy\nimport numpy as np\n\nF = np.math.factorial\nP = np.power\n\nnp_pmf = lambda k, n, p: F(n) / (F(k) * F(n - k)) * P(p, k) * P(1 - p, n - k)\n\n%timeit np_pmf(k, n, p)",
     "prompt_number": 4,
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": "10000 loops, best of 3: 35.9 µs per loop\n"
      }
     ],
     "language": "python",
     "trusted": false,
     "collapsed": false
    },
    {
     "metadata": {},
     "cell_type": "code",
     "input": "## python\ndef F(n):\n    result = 1\n    while n:\n        result *= n\n        n -= 1\n    return result\n\npmf = lambda k, n, p: F(n) / (F(k) * F(n - k)) * p ** k * (1 - p) ** (n - k)\n\n%timeit pmf(k, n, p)",
     "prompt_number": 5,
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": "10000 loops, best of 3: 34.3 µs per loop\n"
      }
     ],
     "language": "python",
     "trusted": false,
     "collapsed": false
    },
    {
     "metadata": {},
     "cell_type": "code",
     "input": "## C - GSL\nimport cffi\n\nffi = cffi.FFI()\nffi.cdef(\"\"\"\ndouble gsl_ran_binomial_pdf (const unsigned int k, const double p, const unsigned int n);\n\"\"\")\nC2 = ffi.verify(\"\"\"\n#include <gsl/gsl_randist.h>\n\"\"\", libraries=['gsl', 'gslcblas'])\n\n%timeit C2.gsl_ran_binomial_pdf(k, p, n)",
     "prompt_number": 6,
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": "1000000 loops, best of 3: 1.18 µs per loop\n"
      }
     ],
     "language": "python",
     "trusted": false,
     "collapsed": false
    },
    {
     "metadata": {},
     "cell_type": "code",
     "input": "## C\nimport cffi\n\nffi = cffi.FFI()\nffi.cdef(\"\"\"\ntypedef unsigned long ULONG;\nULONG binomial(ULONG n, ULONG k);\ndouble bin_pmf(int n, int k, double p);\n\"\"\")\nC1 = ffi.verify(\"\"\"\n#include <stdio.h>\n#include <limits.h>\n#include <math.h>\n \ntypedef unsigned long ULONG;\n \nULONG binomial(ULONG n, ULONG k)\n{\n    ULONG r = 1, d = n - k;\n\n    /* choose the smaller of k and n - k */\n    if (d > k) { k = d; d = n - k; }\n\n    while (n > k) {\n        if (r >= UINT_MAX / n) return 0; /* overflown */\n        r *= n--;\n\n        /* divide (n - k)! as soon as we can to delay overflows */\n        while (d > 1 && !(r % d)) r /= d--;\n    }\n    return r;\n}\ndouble bin_pmf(int k, int n, double p) {\n    return binomial(n, k) * pow(p, (double) k) * pow(1 - p, (double) n - k);\n}\n\"\"\")\n\n%timeit C1.bin_pmf(k, n, p)",
     "prompt_number": 7,
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": "1000000 loops, best of 3: 1.06 µs per loop\n"
      }
     ],
     "language": "python",
     "trusted": false,
     "collapsed": false
    },
    {
     "metadata": {},
     "cell_type": "code",
     "input": "# They produce the same answers.\nx6 = sympy_bin_pmf(k)\nx5 = ss.binom.pmf(k, n, p)\nx4 = np_pmf(k, n, p)\nx3 = pmf(k, n, p)\nx2 = C2.gsl_ran_binomial_pdf(k, p, n)\nx1 = C1.bin_pmf(k, n, p)\n\nnp.allclose(x1, x2, x3, x4), np.allclose(x4, x5, x6)",
     "prompt_number": 8,
     "outputs": [
      {
       "output_type": "pyout",
       "prompt_number": 8,
       "metadata": {},
       "text": "(True, True)"
      }
     ],
     "language": "python",
     "trusted": false,
     "collapsed": false
    }
   ],
   "metadata": {}
  }
 ],
 "metadata": {
  "name": "",
  "signature": "sha256:7a57ca7922be357653fd3d3c2dabccb3d1a1e83ba78f82c719321ff5711d6930"
 },
 "nbformat": 3
 }
	{
	"worksheets": [
	{
	"cells": [
	{
	"metadata": {},
	"cell_type": "markdown",
	"source": "The Binomial Probability Mass Function in Python\n================================================\n\nKamil Slowikowski\n\nMay 3, 2014\n\nThis document compares the speed of a few different Python and C\nimplementations of the [binomial][1] probability mass function:\n\n$$P = \\binom{n}{k} p^k (1 - p)^{n - k}$$\n\n- A fair coin has $p = 0.5$ to land on each side after a flip, head or tails.\n- $P$ is the probability to observe $k$ heads after $n$ flips.\n- $\\binom{n}{k}$ is the [binomial coefficient][2]:\n\n$$\\binom{n}{k} = \\dfrac{n!}{k!(n-k)!}$$\n\n[1]: http://en.wikipedia.org/wiki/Binomial_distribution\n[2]: https://en.wikipedia.org/wiki/Binomial_coefficient\n\n\nResults\n-------\n\nThis is a single run on an AMD Opteron(tm) 6380 2.5GHz.\nEach time is based on 1000 or more loops (except sympy 10 loops).\n\n Type Slowdown Per Loop Function\n ------ -------- --------- ---------------------\n sympy 50000 55 ms sympy_bin_pmf\n scipy 312 344 µs scipy.stats.binom.pmf\n numpy 32.7 36 µs np_pmf\n python 30.0 33 µs pmf\n GSL 1.18 1.3 µs C2.gsl_ran_binomial_pdf\n C 1.00 1.1 µs C1.bin_pmf"
	},
	{
	"metadata": {},
	"cell_type": "code",
	"input": "k, n, p = 3, 40, 0.001",
	"prompt_number": 1,
	"outputs": [],
	"language": "python",
	"trusted": false,
	"collapsed": true
	},
	{
	"metadata": {},
	"cell_type": "code",
	"input": "## sympy\nfrom sympy.stats import density, Binomial\n\nsympy_bin_pmf = density(Binomial('B', n, p))\n\n%timeit sympy_bin_pmf(k)",
	"prompt_number": 2,
	"outputs": [
	{
	"output_type": "stream",
	"stream": "stdout",
	"text": "10 loops, best of 3: 50 ms per loop\n"
	}
	],
	"language": "python",
	"trusted": false,
	"collapsed": false
	},
	{
	"metadata": {},
	"cell_type": "code",
	"input": "## scipy\nimport scipy.stats as ss\n\n%timeit ss.binom.pmf(k, n, p)",
	"prompt_number": 3,
	"outputs": [
	{
	"output_type": "stream",
	"stream": "stdout",
	"text": "1000 loops, best of 3: 343 µs per loop\n"
	}
	],
	"language": "python",
	"trusted": false,
	"collapsed": false
	},
	{
	"metadata": {},
	"cell_type": "code",
	"input": "## numpy\nimport numpy as np\n\nF = np.math.factorial\nP = np.power\n\nnp_pmf = lambda k, n, p: F(n) / (F(k) * F(n - k)) * P(p, k) * P(1 - p, n - k)\n\n%timeit np_pmf(k, n, p)",
	"prompt_number": 4,
	"outputs": [
	{
	"output_type": "stream",
	"stream": "stdout",
	"text": "10000 loops, best of 3: 35.9 µs per loop\n"
	}
	],
	"language": "python",
	"trusted": false,
	"collapsed": false
	},
	{
	"metadata": {},
	"cell_type": "code",
	"input": "## python\ndef F(n):\n result = 1\n while n:\n result = n\n n -= 1\n return result\n\npmf = lambda k, n, p: F(n) / (F(k) F(n - k)) * p ** k * (1 - p) ** (n - k)\n\n%timeit pmf(k, n, p)",
	"prompt_number": 5,
	"outputs": [
	{
	"output_type": "stream",
	"stream": "stdout",
	"text": "10000 loops, best of 3: 34.3 µs per loop\n"
	}
	],
	"language": "python",
	"trusted": false,
	"collapsed": false
	},
	{
	"metadata": {},
	"cell_type": "code",
	"input": "## C - GSL\nimport cffi\n\nffi = cffi.FFI()\nffi.cdef(\"\"\"\ndouble gsl_ran_binomial_pdf (const unsigned int k, const double p, const unsigned int n);\n\"\"\")\nC2 = ffi.verify(\"\"\"\n#include <gsl/gsl_randist.h>\n\"\"\", libraries=['gsl', 'gslcblas'])\n\n%timeit C2.gsl_ran_binomial_pdf(k, p, n)",
	"prompt_number": 6,
	"outputs": [
	{
	"output_type": "stream",
	"stream": "stdout",
	"text": "1000000 loops, best of 3: 1.18 µs per loop\n"
	}
	],
	"language": "python",
	"trusted": false,
	"collapsed": false
	},
	{
	"metadata": {},
	"cell_type": "code",
	"input": "## C\nimport cffi\n\nffi = cffi.FFI()\nffi.cdef(\"\"\"\ntypedef unsigned long ULONG;\nULONG binomial(ULONG n, ULONG k);\ndouble bin_pmf(int n, int k, double p);\n\"\"\")\nC1 = ffi.verify(\"\"\"\n#include <stdio.h>\n#include <limits.h>\n#include <math.h>\n \ntypedef unsigned long ULONG;\n \nULONG binomial(ULONG n, ULONG k)\n{\n ULONG r = 1, d = n - k;\n\n /* choose the smaller of k and n - k /\n if (d > k) { k = d; d = n - k; }\n\n while (n > k) {\n if (r >= UINT_MAX / n) return 0; / overflown /\n r = n--;\n\n /* divide (n - k)! as soon as we can to delay overflows /\n while (d > 1 && !(r % d)) r /= d--;\n }\n return r;\n}\ndouble bin_pmf(int k, int n, double p) {\n return binomial(n, k) pow(p, (double) k) * pow(1 - p, (double) n - k);\n}\n\"\"\")\n\n%timeit C1.bin_pmf(k, n, p)",
	"prompt_number": 7,
	"outputs": [
	{
	"output_type": "stream",
	"stream": "stdout",
	"text": "1000000 loops, best of 3: 1.06 µs per loop\n"
	}
	],
	"language": "python",
	"trusted": false,
	"collapsed": false
	},
	{
	"metadata": {},
	"cell_type": "code",
	"input": "# They produce the same answers.\nx6 = sympy_bin_pmf(k)\nx5 = ss.binom.pmf(k, n, p)\nx4 = np_pmf(k, n, p)\nx3 = pmf(k, n, p)\nx2 = C2.gsl_ran_binomial_pdf(k, p, n)\nx1 = C1.bin_pmf(k, n, p)\n\nnp.allclose(x1, x2, x3, x4), np.allclose(x4, x5, x6)",
	"prompt_number": 8,
	"outputs": [
	{
	"output_type": "pyout",
	"prompt_number": 8,
	"metadata": {},
	"text": "(True, True)"
	}
	],
	"language": "python",
	"trusted": false,
	"collapsed": false
	}
	],
	"metadata": {}
	}
	],
	"metadata": {
	"name": "",
	"signature": "sha256:7a57ca7922be357653fd3d3c2dabccb3d1a1e83ba78f82c719321ff5711d6930"
	},
	"nbformat": 3
	}