Skip to content

Instantly share code, notes, and snippets.

@jfirebaugh
Created April 3, 2011 20:25

Revisions

  1. jfirebaugh revised this gist Apr 3, 2011. 1 changed file with 3 additions and 0 deletions.
    3 changes: 3 additions & 0 deletions readme.mkd
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,3 @@
    A [kernel density estimate](http://en.wikipedia.org/wiki/Kernel_density_estimation) provides a means of estimating and visualizing the probability distribution function of a random variable based on a random sample. In contrast to a histogram, a kernel density estimate provides a smooth estimate, via the effect of a smoothing parameter called the _bandwidth_, here denoted by _h_. With the correct choice of bandwidth, important features of the distribution can be seen; an incorrect choice will result in undersmoothing or oversmoothing and obscure those features.

    Here we see a histogram and three kernel density estimates for a sample of waiting times in minutes between eruptions of [Old Faithful Geyser](http://en.wikipedia.org/wiki/Old_Faithful) in Yellowstone National Park, taken from R's [`faithful`](http://stat.ethz.ch/R-manual/R-patched/library/datasets/html/faithful.html) dataset. The data follow a [bimodal distribution](http://en.wikipedia.org/wiki/Bimodal_distribution); short eruptions are followed by a wait time averaging about 55 minutes, and long eruptions by a wait time averaging about 80 minutes. In recent years, wait times have been increasing, possibly due to the effects of earthquakes on the geyser's geohydrology.
  2. jfirebaugh created this gist Apr 3, 2011.
    8 changes: 8 additions & 0 deletions faithful.js
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,8 @@
    var faithful = [
    79, 54, 74, 62, 85, 55, 88, 85, 51, 85, 54, 84, 78, 47, 83, 52, 62, 84, 52, 79, 51, 47, 78, 69, 74, 83, 55, 76, 78, 79, 73, 77, 66, 80, 74, 52, 48, 80, 59, 90, 80, 58, 84, 58, 73, 83, 64, 53,
    82, 59, 75, 90, 54, 80, 54, 83, 71, 64, 77, 81, 59, 84, 48, 82, 60, 92, 78, 78, 65, 73, 82, 56, 79, 71, 62, 76, 60, 78, 76, 83, 75, 82, 70, 65, 73, 88, 76, 80, 48, 86, 60, 90, 50, 78, 63, 72,
    84, 75, 51, 82, 62, 88, 49, 83, 81, 47, 84, 52, 86, 81, 75, 59, 89, 79, 59, 81, 50, 85, 59, 87, 53, 69, 77, 56, 88, 81, 45, 82, 55, 90, 45, 83, 56, 89, 46, 82, 51, 86, 53, 79, 81, 60, 82, 77,
    76, 59, 80, 49, 96, 53, 77, 77, 65, 81, 71, 70, 81, 93, 53, 89, 45, 86, 58, 78, 66, 76, 63, 88, 52, 93, 49, 57, 77, 68, 81, 81, 73, 50, 85, 74, 55, 77, 83, 83, 51, 78, 84, 46, 83, 55, 81, 57,
    76, 84, 77, 81, 87, 77, 51, 78, 60, 82, 91, 53, 78, 46, 77, 84, 49, 83, 71, 80, 49, 75, 64, 76, 53, 94, 55, 76, 50, 82, 54, 75, 78, 79, 78, 78, 70, 79, 70, 54, 86, 50, 90, 54, 54, 77, 79, 64,
    75, 47, 86, 63, 85, 82, 57, 82, 67, 74, 54, 83, 73, 73, 88, 80, 71, 83, 56, 79, 78, 84, 58, 83, 43, 60, 75, 81, 46, 90, 46, 74
    ];
    87 changes: 87 additions & 0 deletions index.html
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,87 @@
    <html>
    <head>
    <title>Kernel Density Estimation</title>
    <script type="text/javascript" src="http://vis.stanford.edu/protovis/protovis-r3.2.js"></script>
    <script type="text/javascript" src="kde.js"></script>
    <script type="text/javascript" src="faithful.js"></script>
    <style type="text/css">

    body {
    margin: 0;
    }

    </style>
    </head>
    <body>
    <script type="text/javascript+protovis">

    /* Sizing and scales. */
    var w = document.body.clientWidth - 40,
    h = document.body.clientHeight - 25,
    x = pv.Scale.linear(30, 110).range(0, w),
    y = pv.Scale.linear(0, 0.1).range(0, h),
    bandwidths = [1, 7, 20];

    /* The root panel. */
    var vis = new pv.Panel()
    .width(w)
    .height(h)
    .bottom(20)
    .left(30)
    .right(10)
    .top(5);

    /* X-axis ticks. */
    vis.add(pv.Rule)
    .data(x.ticks())
    .left(x)
    .strokeStyle("#eee")
    .add(pv.Rule)
    .bottom(-5)
    .height(5)
    .strokeStyle("#000")
    .anchor("bottom").add(pv.Label)
    .text(x.tickFormat);

    /* Y-axis ticks. */
    vis.add(pv.Rule)
    .data(y.ticks(5))
    .bottom(y)
    .strokeStyle("#eee")
    .anchor("left").add(pv.Label)
    .text(y.tickFormat);

    /* Histogram */
    vis.add(pv.Bar)
    .data(pv.histogram(faithful).frequency(false).bins(x.ticks(60)))
    .bottom(0)
    .left(function(d) x(d.x))
    .width(function(d) x(d.dx + 30))
    .height(function(d) y(d.y))
    .fillStyle("#aaa")
    .strokeStyle("rgba(255, 255, 255, .2)")
    .lineWidth(1);

    /* Kernel density estimates */
    var panel = vis.add(pv.Panel)
    .data(bandwidths);

    var line = panel.add(pv.Line)
    .data(function(h) kde(faithful).scale(h).points(pv.range(30, 110, 0.1)))
    .left(function(d) x(d.x))
    .bottom(function(d) y(d.y));

    panel.add(pv.Dot)
    .shape("square")
    .lineWidth(0)
    .fillStyle(function() line.strokeStyle())
    .top(function () this.parent.index * 12 + 10)
    .left(10)
    .anchor("right").add(pv.Label)
    .text(function (h) "h = " + h);

    vis.render();

    </script>
    </body>
    </html>
    23 changes: 23 additions & 0 deletions kde.js
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,23 @@
    function kde(sample) {
    /* Epanechnikov kernel */
    function epanechnikov(u) {
    return Math.abs(u) <= 1 ? 0.75 * (1 - u*u) : 0;
    };

    var kernel = epanechnikov;
    return {
    scale: function(h) {
    kernel = function (u) { return epanechnikov(u / h) / h; };
    return this;
    },

    points: function(points) {
    return points.map(function(x) {
    var y = pv.sum(sample.map(function (v) {
    return kernel(x - v);
    })) / sample.length;
    return {x: x, y: y};
    });
    }
    }
    }