|
import pylab as pl |
|
import numpy as np |
|
from sklearn import mixture |
|
|
|
GMM = mixture.GMM |
|
#GMM = mixture.DPGMM |
|
#GMM = mixture.VBGMM |
|
|
|
# make some data |
|
Npts = 100 |
|
np.random.seed(0) |
|
centers = np.array([[0.25, 0.75], |
|
[0.75, 0.75], |
|
[0.75, 0.25]]) |
|
covars = 0.1 |
|
X = np.random.normal(centers, covars, size=(Npts, 3, 2)) * 10 |
|
X = X.reshape(-1, 2) |
|
colors = (np.ones((Npts,1)) * np.arange(3)).reshape(-1) |
|
|
|
pl.figure() |
|
pl.scatter(X[:, 0], X[:, 1], c=colors, s=16, lw=0) |
|
pl.title('input data') |
|
|
|
n_components = np.arange(1, 16) |
|
BIC = np.zeros(n_components.shape) |
|
AIC = np.zeros(n_components.shape) |
|
|
|
for i, n in enumerate(n_components): |
|
clf = GMM(n_components=n, |
|
covariance_type='diag') |
|
clf.fit(X) |
|
if isinstance(GMM, mixture.VBGMM) or isinstance(GMM, mixture.DPGMM): |
|
BIC[i] = - 2 * clf.eval(X)[0].sum() |
|
else: |
|
BIC[i] = clf.bic(X) |
|
|
|
|
|
pl.figure() |
|
pl.plot(n_components, AIC, label='AIC') |
|
pl.plot(n_components, BIC, label='BIC') |
|
pl.legend(loc=0) |
|
pl.xlabel('n_components') |
|
pl.ylabel('AIC / BIC') |
|
|
|
i_n = np.argmin(BIC) |
|
|
|
clf = GMM(n_components[i_n]) |
|
clf.fit(X) |
|
label = clf.predict(X) |
|
|
|
pl.figure() |
|
pl.scatter(X[:, 0], X[:, 1], c=label, s=16, lw=0) |
|
pl.title('classification at min(BIC)') |
|
pl.show() |