-
-
Save pwolfram/87ce2fa5186b5df40a4b to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
import numpy as np | |
def fast_hist(data, bin_edges): | |
"""Fast 1-dimensional histogram. Comparable to numpy.histogram(), but careless. | |
'bin_edges' should encompass all values in 'data'; the first and last elements | |
in 'bin_edges' are ignored, and are effectively (-infinity, infinity). | |
Returns the histogram array only. | |
""" | |
# Yes, I've tested this against histogram(). | |
return np.bincount(np.digitize(data, bin_edges[1:-1]), minlength=len(bin_edges) - 1) | |
def fast_hist_2d(data, bin_edges): | |
"""Fast 2-dimensional histogram. Comparable to numpy.histogramdd(), but careless. | |
'data' is an Nx2 array. 'bin_edges' is used for both dimensions and should | |
encompass all values in 'data'; the first and last elements in 'bin_edges' | |
are ignored, and are effectively (-infinity, infinity). | |
Returns the histogram array only. | |
""" | |
# Yes, I've tested this against histogramdd(). | |
xassign = np.digitize(data[:,0], bin_edges[1:-1]) | |
yassign = np.digitize(data[:,1], bin_edges[1:-1]) | |
nbins = len(bin_edges) - 1 | |
flatcount = np.bincount(xassign + yassign * nbins, minlength=nbins*nbins) | |
return flatcount.reshape((nbins, nbins)).T | |
def fast_hist_weighted(data, weights, bin_edges): | |
"""Fast 1-dimensional histogram. Comparable to numpy.histogram(), but careless. | |
'bin_edges' should encompass all values in 'data'; the first and last elements | |
in 'bin_edges' are ignored, and are effectively (-infinity, infinity). | |
Returns the histogram array only with weighting. Non-weighted version at https://gist.github.com/nkeim/4455635 | |
""" | |
return np.bincount(np.digitize(data, bin_edges[1:-1]), minlength=len(bin_edges) - 1, weights=weights) | |
def test_hist(): | |
print 'Note: bins must contain the whole range of the data for fast and regular histograms to be equivalent!' | |
a = np.random.randn(1e7) | |
b = np.random.randn(1e7) | |
lvec = np.array([-1000, 0,1,4,16, 1000]) | |
pdf,_ = np.histogram(a,bins=lvec,weights=b,density=False) | |
pdfnew = fast_hist_weighted(a,b,lvec) | |
print 'Error between weighted fast histograms:' | |
print np.max(np.abs(pdf-pdfnew)) | |
print np.mean(np.sqrt((pdf-pdfnew)**2.0)) | |
pdf,_ = np.histogram(a,bins=lvec,density=False) | |
pdfnew = fast_hist(a,lvec) | |
print 'Error between fast histograms:' | |
print np.max(np.abs(pdf-pdfnew)) | |
print np.mean(np.sqrt((pdf-pdfnew)**2.0)) | |
if __name__ == "__main__": | |
test_hist() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment