A visualization of blood insuline from the UCI diabetes dataset. The distribution is stored with a streaming histogram. Brush to zoom. Click to zoom out.
Last active
September 22, 2016 09:48
-
-
Save osroca/4fb0f64b71e838d70d31 to your computer and use it in GitHub Desktop.
Insuline histogram
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"anomaly_seed": "2c249dda00fbf54ab4cdd850532a584f286af5b6", | |
"category": 0, | |
"code": 200, | |
"columns": 1, | |
"constraints": false, | |
"created": "2014-10-28T20:13:11.111000", | |
"credits": 0.69940185546875, | |
"credits_per_prediction": 0.0, | |
"dataset": "dataset/5427f656ec65d11671000004", | |
"dataset_field_types": { | |
"categorical": 1, | |
"datetime": 0, | |
"numeric": 8, | |
"preferred": 9, | |
"text": 0, | |
"total": 9 | |
}, | |
"dataset_status": true, | |
"dataset_type": 0, | |
"description": "", | |
"excluded_fields": [], | |
"fields_meta": { | |
"count": 1, | |
"limit": 1000, | |
"offset": 0, | |
"query_total": 1, | |
"total": 9 | |
}, | |
"forest_size": 128, | |
"id_fields": [], | |
"input_fields": [ | |
"000000", | |
"000001", | |
"000002", | |
"000003", | |
"000004", | |
"000005", | |
"000006", | |
"000007", | |
"000008" | |
], | |
"locale": "en-US", | |
"max_columns": 9, | |
"max_rows": 768, | |
"model": { | |
"fields": { | |
"000004": { | |
"column_number": 4, | |
"datatype": "int16", | |
"name": "insulin", | |
"optype": "numeric", | |
"order": 0, | |
"preferred": true, | |
"summary": { | |
"bins": [ | |
[ | |
0, | |
374 | |
], | |
[ | |
19.33333, | |
9 | |
], | |
[ | |
43.4375, | |
32 | |
], | |
[ | |
59.894739999999999, | |
38 | |
], | |
[ | |
74.384619999999998, | |
26 | |
], | |
[ | |
91.217389999999995, | |
46 | |
], | |
[ | |
112.23256000000001, | |
43 | |
], | |
[ | |
134.34884, | |
43 | |
], | |
[ | |
160.17646999999999, | |
34 | |
], | |
[ | |
184.35293999999999, | |
34 | |
], | |
[ | |
208.57894999999999, | |
19 | |
], | |
[ | |
232, | |
11 | |
], | |
[ | |
251.40000000000001, | |
5 | |
], | |
[ | |
272.69999999999999, | |
10 | |
], | |
[ | |
288.5, | |
6 | |
], | |
[ | |
304.66667000000001, | |
3 | |
], | |
[ | |
324.75, | |
8 | |
], | |
[ | |
338.5, | |
2 | |
], | |
[ | |
368.33332999999999, | |
3 | |
], | |
[ | |
393.66667000000001, | |
3 | |
], | |
[ | |
415, | |
1 | |
], | |
[ | |
440, | |
1 | |
], | |
[ | |
465, | |
1 | |
], | |
[ | |
479.39999999999998, | |
5 | |
], | |
[ | |
495, | |
2 | |
], | |
[ | |
510, | |
1 | |
], | |
[ | |
542.66666999999995, | |
3 | |
], | |
[ | |
579, | |
1 | |
], | |
[ | |
600, | |
1 | |
], | |
[ | |
680, | |
1 | |
], | |
[ | |
744, | |
1 | |
], | |
[ | |
846, | |
1 | |
] | |
], | |
"maximum": 846, | |
"mean": 79.799480000000003, | |
"median": 30.5, | |
"minimum": 0, | |
"missing_count": 0, | |
"population": 768, | |
"splits": [ | |
0.18842999999999999, | |
30.5, | |
127.5 | |
], | |
"standard_deviation": 115.244, | |
"sum": 61286, | |
"sum_squares": 15077256, | |
"variance": 13281.18008 | |
} | |
} | |
}, | |
"kind": "iforest", | |
"mean_depth": 14.774407996894411 | |
}, | |
"name": "diabetes' dataset anomaly detector", | |
"number_of_anomalyscores": 0, | |
"number_of_batchanomalyscores": 0, | |
"number_of_public_anomalyscores": 0, | |
"out_of_bag": false, | |
"price": 0.0, | |
"private": true, | |
"project": "project/542537ecec65d1fc17000279", | |
"range": [ | |
1, | |
768 | |
], | |
"replacement": false, | |
"resource": "anomaly/544ff8d7ec65d102f200067d", | |
"rows": 768, | |
"sample_rate": 1.0, | |
"sample_size": 483, | |
"shared": false, | |
"size": 26192, | |
"source": "source/5427f64dec65d11671000000", | |
"source_status": true, | |
"status": { | |
"code": 5, | |
"elapsed": 2514, | |
"message": "The anomaly detector has been created", | |
"progress": 1.0 | |
}, | |
"subscription": true, | |
"tags": [], | |
"top_n": 10, | |
"updated": "2014-10-28T20:13:23.050000", | |
"white_box": false | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<!DOCTYPE html> | |
<meta charset="utf-8"> | |
<style> | |
.bar { | |
fill: #5AC; | |
opacity: 0.6; | |
} | |
.axis { | |
font: 12px sans-serif; | |
} | |
.axis path, | |
.axis line { | |
fill: none; | |
stroke: #000; | |
shape-rendering: crispEdges; | |
} | |
.brush .extent { | |
stroke: #fff; | |
fill-opacity: .125; | |
shape-rendering: crispEdges; | |
} | |
.fname { | |
font: 12px sans-serif; | |
font-weight: bold; | |
} | |
</style> | |
<body> | |
<script src="https://d3js.org/d3.v3.min.js"></script> | |
<!--script src="http://localhost:1025/static/js/d3.v3.min.js"></script--> | |
<script> | |
/* Given the raw bins (the distribution) and a point, estimate | |
* the total population up to that point. | |
*/ | |
function sum(rawBins, value) { | |
var total = 0; | |
for (var i = 0; i < rawBins.length; i++) { | |
var bin = rawBins[i]; | |
if ((bin.exact && bin.mean <= value) || value >= bin.next) { | |
total += bin.pop; | |
} else if (bin.prev < value && bin.mean >= value) { | |
var range = (bin.mean - value) / (bin.mean - bin.prev); | |
// Optionally, square the range for trapezoidal interpolation | |
total += (1 - range) * bin.pop / 2; | |
} else if (bin.mean < value && bin.next >= value) { | |
var range = (value - bin.mean) / (bin.next - bin.mean); | |
// Optionally, square the range for trapezoidal interpolation | |
total += bin.pop / 2 + range * bin.pop / 2; | |
} | |
} | |
return total; | |
} | |
/* Loads the distribution into a convenient format. */ | |
function loadRawBins(data) { | |
var bins = []; | |
if ("bins" in data.summary) { | |
var sbins = data.summary.bins; | |
minimum = data.summary.minimum; | |
maximum = data.summary.maximum; | |
for (var i = 0; i < sbins.length; i++) { | |
var bin = {mean: sbins[i][0], pop: sbins[i][1]}; | |
if (bin.pop == 1 || bin.mean == minimum || bin.mean == maximum) { | |
bin.exact = true; | |
} else { | |
bin.exact = false; | |
if (i > 0) { | |
bin.prev = sbins[i-1][0]; | |
} else { | |
bin.prev = minimum; | |
} | |
if (i < sbins.length - 1) { | |
bin.next = sbins[i+1][0]; | |
} else { | |
bin.next = maximum; | |
} | |
} | |
bins[i] = bin; | |
} | |
} else { | |
sbins = data.summary.counts; | |
for (var i = 0; i < sbins.length; i++) { | |
var bin = {mean: sbins[i][0], pop: sbins[i][1], exact: true}; | |
bins[i] = bin; | |
} | |
} | |
return bins; | |
} | |
/* Build equidistant bins for visualization given the | |
* raw bins and a range. | |
*/ | |
function buildViz(rawBins, range, vizBinCount) { | |
var vizBins = []; | |
var maxPop = 0; | |
var inc = (range[1] - range[0]) / vizBinCount; | |
// Using a slight shift to act like a closed interval | |
var shift = 0.0000001; | |
var prev = range[0] - shift; | |
var prevPop = sum(rawBins, prev); | |
for (var i = 0; i < vizBinCount; i++) { | |
var next = prev + inc; | |
var nextPop = sum(rawBins, next); | |
var pop = Math.round(nextPop - prevPop); | |
maxPop = Math.max(maxPop, pop); | |
vizBins[i] = {range: [prev + shift, next], pop: pop}; | |
prev = next; | |
prevPop = nextPop; | |
} | |
return {bins: vizBins, maxPop: maxPop, range: range}; | |
} | |
function capitalize(term) { | |
return term.charAt(0).toUpperCase() + term.slice(1); | |
} | |
function trimRange(fullRange, rawBins, buffer, segments) { | |
var total = 0; | |
for (var i = 0; i < rawBins.length; i++) { | |
total += rawBins[i].pop; | |
} | |
var interval = (fullRange[1] - fullRange[0]) / segments; | |
var start = fullRange[0]; | |
var startThreshold = total * buffer; | |
for (var i = 0; i < segments; i++) { | |
if (sum(rawBins, start) > startThreshold) break; | |
start += interval; | |
} | |
var end = fullRange[1]; | |
var endThreshold = total - (total * buffer); | |
for (var i = 0; i < segments; i++) { | |
if (sum(rawBins, end) < endThreshold) break; | |
end -= interval; | |
} | |
return [start - interval, end + interval]; | |
} | |
var margin = {top: 30, right: 40, bottom: 30, left: 40}, | |
width = 960 - margin.left - margin.right, | |
height = 500 - margin.top - margin.bottom; | |
var svg = d3.select("body").append("svg") | |
.attr("width", width + margin.left + margin.right) | |
.attr("height", height + margin.top + margin.bottom) | |
.append("g") | |
.attr("transform", "translate(" + margin.left + "," + margin.top + ")"); | |
d3.json('diabetes-000004.json'/*"plasma.json"*/, function(error, data) { | |
data = data.model.fields['000004']; | |
var rawBins = loadRawBins(data); | |
var x, y; | |
var xAxis, yAxis; | |
var xAxisG, yAxisG; | |
var vizBinCount = 32; | |
var buffer = (data.summary.maximum - data.summary.minimum) * 0.01; | |
var initRange = [data.summary.minimum - buffer, data.summary.maximum + buffer]; | |
// Remove to default to the entire range rather than trimming 0.75% from the edges | |
initRange = trimRange(initRange, rawBins, 0.0075, 128); | |
function init() { | |
var viz = buildViz(rawBins, initRange, vizBinCount); | |
x = d3.scale.linear() | |
.domain(viz.range) | |
.range([0, width], .1); | |
y = d3.scale.linear() | |
.domain([0, viz.maxPop]) | |
.range([height, 0]); | |
svg.selectAll(".bar") | |
.data(viz.bins) | |
.enter().append("rect") | |
.attr("class", "bar") | |
.attr("x", function(d) { return x(d.range[0]) + 1; }) | |
.attr("width", function(d) { return x(d.range[1]) - x(d.range[0]) - 2; }) | |
.attr("y", function(d) { return y(d.pop);}) | |
.attr("height", function(d) { return height - y(d.pop); }); | |
xAxis = d3.svg.axis() | |
.ticks(Math.round(width / 60)) | |
.scale(x) | |
.orient("bottom"); | |
yAxis = d3.svg.axis() | |
.ticks(Math.round(height / 30)) | |
.scale(y) | |
.orient("left"); | |
xAxisG = svg.append("g") | |
.attr("class", "x axis") | |
.attr("transform", "translate(0," + height + ")") | |
.call(xAxis); | |
xAxisG.append("text") | |
.attr("class", "fname") | |
.attr("y", -16) | |
.attr("x", width) | |
.attr("dy", ".71em") | |
.style("text-anchor", "end") | |
.text(capitalize(data.name)); | |
yAxisG = svg.append("g") | |
.attr("class", "y axis") | |
.call(yAxis); | |
yAxisG.append("text") | |
.attr("class", "fname") | |
.attr("transform", "rotate(-90)") | |
.attr("y", 6) | |
.attr("dy", ".71em") | |
.style("text-anchor", "end") | |
.text("Population"); | |
} | |
init(); | |
function update(rawBins, range, vizBinCount) { | |
var animationDuration = 700; | |
var viz = buildViz(rawBins, range, vizBinCount); | |
x = d3.scale.linear() | |
.domain(viz.range) | |
.range([0, width], .1); | |
y = d3.scale.linear() | |
.domain([0, viz.maxPop]) | |
.range([height, 0], .1); | |
svg.selectAll(".bar") | |
.data(viz.bins) | |
.transition().duration(animationDuration) | |
.attr("y", function(d) { return y(d.pop);}) | |
.attr("height", function(d) { return height - y(d.pop); }); | |
xAxis = d3.svg.axis() | |
.ticks(Math.round(width / 60)) | |
.scale(x) | |
.orient("bottom"); | |
yAxis = d3.svg.axis() | |
.ticks(Math.round(height / 30)) | |
.scale(y) | |
.orient("left"); | |
xAxisG.transition().duration(animationDuration).call(xAxis); | |
yAxisG.transition().duration(animationDuration).call(yAxis); | |
} | |
var brushX = d3.scale.identity().domain([0, width]); | |
var brushExtent; | |
var brush = d3.svg.brush() | |
.x(brushX) | |
.on("brush", brushed) | |
.on("brushend", brushended); | |
var gBrush = svg.append("g") | |
.attr("class", "brush") | |
.call(brush) | |
.call(brush.event); | |
gBrush.selectAll("rect") | |
.attr("height", height); | |
function brushed() { | |
brushExtent = brush.extent(); | |
} | |
function brushended() { | |
if (!d3.event.sourceEvent) return; // only transition after input | |
var start = x.invert(brushExtent[0]); | |
var end = x.invert(brushExtent[1]); | |
d3.select(this).call(brush.extent([[0], [0]])); | |
if (start == end) { | |
update(rawBins, initRange, vizBinCount); | |
} else { | |
update(rawBins, [start, end], vizBinCount); | |
} | |
} | |
}); | |
</script> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment