agramfort · July 25, 2013 10:31
diff --git a/problem_gcv.py b/problem_gcv.py
 import numpy as np
 from scipy import linalg
 from sklearn import linear_model

 def _gcv(X, y, alphas, fit_intercept):
    """Local gcv"""
    # singular values of the design matrix
    n1, p = X.shape
    _, s, _ = linalg.svd(X, 0)
    clf = linear_model.Ridge(fit_intercept=fit_intercept)
    best_gcv = np.inf
    for alpha in alphas:
        dof = np.sum(s ** 2 / (s ** 2 + alpha)) # ridge degrees of freedom
        clf.alpha = alpha
        clf.fit(X, y)
        sse = (1. - clf.score(X, y)) * np.var(y) * n1
        gcv_ = sse / n1 / (1. - dof / n1) ** 2
        if n1 > p:
            M = np.dot(np.dot(
                    X, linalg.inv(np.dot(X.T, X) + alpha * np.eye(p))), X.T)
        else:
            K = np.dot(X, X.T)
            M = (K - np.dot(np.dot(K, linalg.inv(K + np.eye(n1) * alpha)), K)) / alpha
        gcv_ = np.sum(((y - clf.predict(X)) / (1. - np.diag(M))) ** 2)
        if gcv_ < best_gcv:
            best_gcv = gcv_
            best_alpha = alpha
    return best_alpha, best_gcv

 # set the parameters
 alphas = np.logspace(-2, 6, 9)
 n1, p = 100, 200
 # n1: number of training samples
 # p: number of features
 np.random.seed([2])

 fit_intercept = False
 fit_intercept = True

 # generate the data
 y, X, beta = (np.random.randn(n1), np.random.randn(n1, p), np.random.randn(p))
 beta *= .1 # control the SNR
 X -= X.mean(0)
 y += np.dot(X, beta)
 alpha, _ = _gcv(X, y, alphas, fit_intercept)

 # try with sklearn's GCV
 gcv1 = linear_model.RidgeCV(alphas=alphas, fit_intercept=fit_intercept).fit(X, y)

 print 'sklearns, alpha: ', gcv1.alpha_, 'local implementation: ', alpha 

 gcv2 = linear_model.RidgeCV(alphas=[alpha], fit_intercept=fit_intercept).fit(X, y)

 gcv3 = linear_model.RidgeCV(alphas=[100], fit_intercept=fit_intercept).fit(X, y)

 # our estimate is actually much better than scikit learn's one 
 print 'error on beta: %f (skl) %f (local) %f (manual)' % \
        (np.sum((gcv1.coef_ - beta) ** 2),
         np.sum((gcv2.coef_ - beta) ** 2),
         np.sum((gcv3.coef_ - beta) ** 2))

 # this can easily be fixed by not applying the centering of the matrix X
 # sklearn.linear_model.ridge, line 564
	import numpy as np
	from scipy import linalg
	from sklearn import linear_model

	def _gcv(X, y, alphas, fit_intercept):
	"""Local gcv"""
	# singular values of the design matrix
	n1, p = X.shape
	_, s, _ = linalg.svd(X, 0)
	clf = linear_model.Ridge(fit_intercept=fit_intercept)
	best_gcv = np.inf
	for alpha in alphas:
	dof = np.sum(s 2 / (s 2 + alpha)) # ridge degrees of freedom
	clf.alpha = alpha
	clf.fit(X, y)
	sse = (1. - clf.score(X, y)) * np.var(y) * n1
	gcv_ = sse / n1 / (1. - dof / n1) ** 2
	if n1 > p:
	M = np.dot(np.dot(
	X, linalg.inv(np.dot(X.T, X) + alpha * np.eye(p))), X.T)
	else:
	K = np.dot(X, X.T)
	M = (K - np.dot(np.dot(K, linalg.inv(K + np.eye(n1) * alpha)), K)) / alpha
	gcv_ = np.sum(((y - clf.predict(X)) / (1. - np.diag(M))) ** 2)
	if gcv_ < best_gcv:
	best_gcv = gcv_
	best_alpha = alpha
	return best_alpha, best_gcv

	# set the parameters
	alphas = np.logspace(-2, 6, 9)
	n1, p = 100, 200
	# n1: number of training samples
	# p: number of features
	np.random.seed([2])

	fit_intercept = False
	fit_intercept = True

	# generate the data
	y, X, beta = (np.random.randn(n1), np.random.randn(n1, p), np.random.randn(p))
	beta *= .1 # control the SNR
	X -= X.mean(0)
	y += np.dot(X, beta)
	alpha, _ = _gcv(X, y, alphas, fit_intercept)

	# try with sklearn's GCV
	gcv1 = linear_model.RidgeCV(alphas=alphas, fit_intercept=fit_intercept).fit(X, y)

	print 'sklearns, alpha: ', gcv1.alpha_, 'local implementation: ', alpha

	gcv2 = linear_model.RidgeCV(alphas=[alpha], fit_intercept=fit_intercept).fit(X, y)

	gcv3 = linear_model.RidgeCV(alphas=[100], fit_intercept=fit_intercept).fit(X, y)

	# our estimate is actually much better than scikit learn's one
	print 'error on beta: %f (skl) %f (local) %f (manual)' % \
	(np.sum((gcv1.coef_ - beta) ** 2),
	np.sum((gcv2.coef_ - beta) ** 2),
	np.sum((gcv3.coef_ - beta) ** 2))

	# this can easily be fixed by not applying the centering of the matrix X
	# sklearn.linear_model.ridge, line 564