-
-
Save dmoisset/ad6cfdf4edee498f63f99bbc8bdcf906 to your computer and use it in GitHub Desktop.
Logistic Regresion (with type annotations).
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# An example implementation of Logistic Regression | |
# Originally by Alejandro Peralta | |
# Type annotation by Daniel Moisset | |
import numpy as np | |
import scipy as sp | |
from sklearn import cross_validation | |
from sklearn.utils.fixes import expit as logistic_sigmoid | |
from sklearn.utils.extmath import log_logistic | |
from sklearn.datasets import make_classification | |
from sklearn.linear_model import LogisticRegression | |
from typing import Tuple | |
class LR(object): | |
def fit(self, X: "np.ndarray[float]", y: "np.ndarray[int]") -> 'LR': | |
weights = np.zeros(X.shape[1] + 1) | |
self.classes_ = np.unique(y) | |
y = np.copy(y) | |
mask = (y == self.classes_[1]) | |
y[~mask] = -1 | |
self.weights, _, info = sp.optimize.fmin_l_bfgs_b( | |
func=self.likelihood, | |
x0=weights, | |
fprime=None, | |
args=(X, y, 1.0), | |
pgtol=0.00001, | |
maxiter=100) | |
print ("Minimization information: %s" % info) | |
return self | |
def decision_function(self, X_test: "np.ndarray[float]") -> "np.ndarray[float]": | |
scores = np.dot(X_test, self.weights[:-1].T) + self.weights[-1] | |
return scores.ravel() if len(scores.shape) > 1 and scores.shape[1] == 1 else scores | |
def predict(self, X_test: "np.ndarray[float]") -> "np.ndarray[int]": | |
scores = self.decision_function(X_test) | |
if len(scores.shape) == 1: | |
indices = (scores > 0).astype(np.int) | |
else: | |
indices = scores.argmax(axis=1) | |
x = self.classes_[indices] | |
return self.classes_[indices] | |
def predict_proba(self, X_test: "np.ndarray[float]") -> "np.ndarray[float]": | |
return logistic_sigmoid(np.dot(X_test, self.weights[:-1]) + self.weights[-1]) | |
def likelihood(self, weights: "np.ndarray[float]", | |
X: "np.ndarray[float]", y: "np.ndarray[int]", C: float) -> Tuple[float, "np.ndarray[float]"]: | |
""" | |
Returns Likelihood and gradient of likelihood | |
""" | |
grad = np.empty_like(weights) | |
_, n_features = X.shape | |
c, w = weights[-1], weights[:-1] | |
# z_i = sum(weights[k] + X[i,k] for k in [0 .. l]) where... | |
# ...l is length of the vector where i is the data point | |
z = np.dot(X, w) + c # X[i, k="0"] is 1 for all i | |
yz = z * y | |
# L(weights) = sum(log(g(y[i]*z[i]) for i in [0 .. n])) | |
# ...g is the sigmoid function | |
L = - np.sum(log_logistic(yz)) + C * 0.5 * np.dot(w, w) | |
# Gradient | |
z = logistic_sigmoid(yz) | |
z0 = (z - 1) * y | |
grad[:n_features] = np.dot(X.T, z0) + C * w | |
grad[-1] = z0.sum() # all X[i, k="0"] is 1 | |
return L, grad | |
if __name__ == '__main__': | |
train, tags = make_classification(n_features=1, n_informative=1, n_redundant=0, n_classes=2, n_clusters_per_class=1) | |
X_train, X_test, y_train, y_test = cross_validation.train_test_split(train, tags) # type: np.ndarray[float], np.ndarray[float], np.ndarray[int], np.ndarray[int] | |
clf = LR() | |
clf.fit(X_train, y_train) | |
print("Coef:", clf.weights[:X_train.shape[1]]) | |
print("Intercept:", clf.weights[-1]) | |
print("Score:", clf.decision_function(X_test)) | |
print("Prediction:", clf.predict(X_test)) | |
print("Prediction prob:", clf.predict_proba(X_test)) | |
print("--------------") | |
clf0 = LogisticRegression() | |
clf0.fit(X_train, y_train) | |
print("Coef:", clf0.coef_) | |
print("Intercept:", clf0.intercept_) | |
print("Score:", clf0.decision_function(X_test)) | |
print("Prediction:", clf0.predict(X_test)) | |
print("Prediction prob:", clf0.predict_proba(X_test)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment