Last active
February 14, 2019 03:46
-
-
Save ryuzakyl/12c221ff0e54d8b1ac171c69ea552c0a to your computer and use it in GitHub Desktop.
Andrew's Curves for high dimensional multi-variate data
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env | |
# -*- coding: utf-8 -*- | |
from math import ceil, sqrt, pi | |
import pylab | |
import numpy as np | |
import pandas as pd | |
from sklearn import datasets | |
# ------------------------------------------------- | |
def andrews_curves(M, m=100): | |
# validating samples data | |
if not isinstance(M, np.ndarray): | |
raise ValueError('Unsupported format for samples.') | |
# validating data dimensions | |
if not 1 <= len(M.shape) <= 2: | |
raise ValueError("Only data vectors (1D) and collections of data vectors (2D) arrays supported") | |
# getting data vectors | |
X = np.reshape(M, (1, -1)) if len(M.shape) == 1 else M.copy() | |
# getting the rows and the amount | |
rows, n = X.shape | |
# andrew curve dimension (a.k.a, amount theta angles) | |
t = np.linspace(-pi, pi, m) | |
# matrix Amxn: | |
# m: range of values for angle 'theta' | |
# n: amount of components for the Fourier function | |
A = np.empty((m, n)) | |
# setting first column of A | |
A[:, 0] = [1.0 / sqrt(2.0)] * m | |
# filling columns of A | |
for i in range(1, n): | |
# computing the scaling coefficient for angle 'theta' | |
c = ceil(i / 2) | |
# computing i-th column of matrix A | |
col = np.sin(c * t) if i % 2 == 1 else np.cos(c * t) | |
# setting column in matrix A | |
A[:, i] = col[:] | |
# computing Andrew's Curves for provided data | |
andrew_curves = np.dot(A, X.T).T | |
# returning the Andrew's Curves (raveling if needed) | |
return np.ravel(andrew_curves) if andrew_curves.shape[0] == 1 else andrew_curves | |
# ------------------------------------------------- | |
# loading iris data set | |
iris = datasets.load_iris() | |
X = iris.data | |
Y = iris.target | |
# building data frame with Andrew's Curves of iris samples | |
df = pd.DataFrame(andrews_curves(X)) | |
df['class'] = Y | |
# plotting the 3 classes | |
ax = df[df['class'] == 0].iloc[:, :-1].T.plot(color='b', legend=None) | |
df[df['class'] == 1].iloc[:, :-1].T.plot(ax=ax, color='g', legend=None) | |
df[df['class'] == 2].iloc[:, :-1].T.plot(ax=ax, color='r', legend=None) | |
# showing plot | |
pylab.show() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment