Last active
September 3, 2015 12:39
-
-
Save glouppe/5f2fae95e80392e7a0e3 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
from sklearn.cross_validation import train_test_split | |
from sklearn.datasets import fetch_mldata | |
from sklearn.ensemble import RandomForestClassifier | |
from sklearn.ensemble import ExtraTreesClassifier | |
from sklearn.ensemble import GradientBoostingClassifier | |
from rep.estimators import TMVAClassifier | |
from functools import partial | |
try: | |
from time import process_time as bench_time | |
except ImportError: | |
from time import time as bench_time | |
data = fetch_mldata('MNIST original') | |
X = data.data | |
y = data.target | |
mask = (y == 3) | (y == 8) | |
X = X[mask] | |
X = X.astype(float) | |
X += np.random.normal(loc=0.0, scale=0.001, size=X.shape) # TMVA crashes on constant features | |
print X.dtype | |
y = y[mask] | |
y[y==3] = 0 # TMVA crashes on multi-class classification | |
y[y==8] = 1 | |
indices = np.random.permutation(len(X))[:1000] | |
X, y = X[indices], y[indices] | |
X_train, X_test, y_train, y_test = train_test_split(X, y) | |
n_estimators = 100 | |
max_features = int(X.shape[1] ** 0.5) | |
max_depth = 1000000 | |
for name, EstimatorClass in [ | |
("sklearn.RandomForestClassifier", partial(RandomForestClassifier, | |
n_estimators=n_estimators, | |
max_features=max_features, | |
max_depth=max_depth)), | |
("sklearn.ExtraTreesClassifier", partial(ExtraTreesClassifier, | |
n_estimators=n_estimators, | |
max_features=max_features, | |
max_depth=max_depth)), | |
("sklearn.GradientBoostingClassifier", partial(GradientBoostingClassifier, | |
n_estimators=n_estimators, | |
max_depth=3)), | |
("tmva.kBDT(BoostType=Bagging)", partial(TMVAClassifier, | |
method="kBDT", | |
BoostType="Bagging", | |
NTrees=n_estimators, | |
UseNvars=max_features, | |
MaxDepth=max_depth, | |
MinNodeSize=0., | |
UseBaggedBoost=True, | |
UseRandomisedTrees=True, | |
nCuts=-1, | |
nEventsMin=1)), | |
("tmva.kBDT(BoostType=Grad)", partial(TMVAClassifier, | |
method="kBDT", | |
BoostType="Grad", | |
NTrees=n_estimators, | |
MaxDepth=3, | |
MinNodeSize=0., | |
nCuts=-1, | |
nEventsMin=1))]: | |
est = EstimatorClass() | |
time_start = bench_time() | |
est.fit(X_train, y_train) | |
chrono = bench_time() - time_start | |
print("%s \t train time=%s \t score=%f" % (name, | |
chrono, | |
est.score(X_test, y_test))) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment