Created
May 2, 2023 18:25
-
-
Save mattharrison/d50ef81280e8180cfca54fc0552f060c to your computer and use it in GitHub Desktop.
hyperopt tuning
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sklearn.metrics import roc_auc_score | |
from hyperopt import hp, Trials, fmin, tpe | |
from hyperopt import fmin, tpe, hp, STATUS_OK, Trials | |
from sklearn.metrics import accuracy_score, roc_auc_score | |
from typing import Any, Dict, Union | |
import xgboost as xgb | |
def hyperparameter_tuning(space: Dict[str, Union[float, int]], | |
X_train: pd.DataFrame, y_train: pd.Series, | |
X_test: pd.DataFrame, y_test: pd.Series, | |
early_stopping_rounds: int=50, | |
metric:callable=accuracy_score) -> Dict[str, Any]: | |
""" | |
Perform hyperparameter tuning for an XGBoost classifier. | |
This function takes a dictionary of hyperparameters, training | |
and test data, and an optional value for early stopping rounds, | |
and returns a dictionary with the loss and model resulting from | |
the tuning process. The model is trained using the training | |
data and evaluated on the test data. The loss is computed as | |
the negative of the accuracy score. | |
Parameters | |
---------- | |
space : Dict[str, Union[float, int]] | |
A dictionary of hyperparameters for the XGBoost classifier. | |
X_train : pd.DataFrame | |
The training data. | |
y_train : pd.Series | |
The training target. | |
X_test : pd.DataFrame | |
The test data. | |
y_test : pd.Series | |
The test target. | |
early_stopping_rounds : int, optional | |
The number of early stopping rounds to use. The default value | |
is 50. | |
metric : callable | |
Metric to maximize. Default is accuracy | |
Returns | |
------- | |
Dict[str, Any] | |
A dictionary with the loss and model resulting from the | |
tuning process. The loss is a float, and the model is an | |
XGBoost classifier. | |
""" | |
int_vals = ['max_depth', 'reg_alpha'] | |
space = {k: (int(val) if k in int_vals else val) | |
for k,val in space.items()} | |
space['early_stopping_rounds'] = early_stopping_rounds | |
model = xgb.XGBClassifier(**space) | |
evaluation = [(X_train, y_train), | |
(X_test, y_test)] | |
model.fit(X_train, y_train, | |
eval_set=evaluation, | |
verbose=False) | |
pred = model.predict(X_test) | |
score = metric(y_test, pred) | |
return {'loss': -score, 'status': STATUS_OK, 'model': model} | |
params = {'random_state': 42} | |
rounds = [{'max_depth': hp.quniform('max_depth', 1, 9, 1), # tree | |
'min_child_weight': hp.loguniform('min_child_weight', -2, 3)}, | |
{'subsample': hp.uniform('subsample', 0.5, 1), # stochastic | |
'colsample_bytree': hp.uniform('colsample_bytree', 0.5, 1)}, | |
{'gamma': hp.loguniform('gamma', -10, 10)}, # regularization | |
{'learning_rate': hp.loguniform('learning_rate', -7, 0)} # boosting | |
] | |
for round in rounds: | |
params = {**params, **round} | |
trials = Trials() | |
best = fmin(fn=lambda space: hyperparameter_tuning( | |
space, X_kag_train, y_kag_train, X_kag_test, y_kag_test, | |
metric=metrics.accuracy_score), | |
space=params, | |
algo=tpe.suggest, | |
max_evals=40, | |
trials=trials, | |
) | |
params = {**params, **best} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment