Last active
October 4, 2016 01:21
-
-
Save MartinBodocky/90a18cc4cba0c575f7aa to your computer and use it in GitHub Desktop.
PCA with Accord.Net and FSharp.Charting
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Inspired by http://arxiv.org/abs/1210.7463 | |
// reference accord framework | |
#r "../packages/Accord.3.0.2/lib/net45/Accord.dll" | |
#r "../packages/Accord.Controls.3.0.2/lib/net45/Accord.Controls.dll" | |
#r "../packages/Accord.IO.3.0.2/lib/net45/Accord.IO.dll" | |
#r "../packages/Accord.Math.3.0.2/lib/net45/Accord.Math.dll" | |
#r "../packages/Accord.Statistics.3.0.2/lib/net45/Accord.Statistics.dll" | |
//reference deelde with fsharp charting | |
#r "../packages/Deedle.1.2.4/lib/net40/Deedle.dll" | |
#r "../packages/FSharp.Charting.0.90.12/lib/net40/FSharp.Charting.dll" | |
#I "../packages/FSharp.Charting.0.90.12" | |
#load "FSharp.Charting.fsx" | |
open System | |
open Accord | |
open Accord.Controls | |
open Accord.Math | |
open Accord.Math.Comparers | |
open Accord.Math.Decompositions | |
open Accord.Statistics | |
open Accord.Statistics.Analysis | |
open FSharp.Charting | |
let X : double array = [| 1.0; 2.0; 4.0; 6.0; 12.0; 15.0; 25.0; 45.0; 68.0; 67.0; 65.0; 98.0 |] | |
let xSum = X.Sum() / (float X.Length) | |
let xMean = X.Mean() | |
let x1 : double array = [| 0.0; 8.0; 12.0; 20.0 |] | |
let x2 : double array = [| 8.0; 9.0; 11.0; 12.0 |] | |
let mean1 = x1.Mean() | |
let mean2 = x2.Mean() | |
let stdDev1 = x1.StandardDeviation() | |
let stdDev2 = x2.StandardDeviation() | |
printfn "%A - %G" mean1 stdDev1 | |
printfn "%A - %G" mean2 stdDev2 | |
// covariance measure of relationship between two variables | |
let cov = x1.Covariance(x2) | |
let data : double [] [] = | |
[| [| 9.0; 39.0 |] | |
[| 15.0; 56.0 |] | |
[| 25.0; 93.0 |] | |
[| 14.0; 61.0 |] | |
[| 10.0; 50.0 |] | |
[| 18.0; 75.0 |] | |
[| 0.0; 32.0 |] | |
[| 16.0; 85.0 |] | |
[| 5.0; 42.0 |] | |
[| 19.0; 70.0 |] | |
[| 16.0; 66.0 |] | |
[| 20.0; 80.0 |] |] | |
let convarianceMatrix = data.Covariance() | |
printfn "Convariance matrix %A" (convarianceMatrix.ToString(" 000.00")) | |
Chart.Point(data |> Array.map (fun arr -> (arr.[0], arr.[1]))) | |
// playing with matrixes | |
let A : double [] [] = | |
[| [| 2.0; 3.0 |] | |
[| 2.0; 1.0 |] |] | |
// create vector | |
let u : double [] = [| 1.0; 3.0 |] | |
// Multiply them | |
let Au = A.Multiply(u) | |
//Eigenvectors, Eigenvalues and the Eigendecomposition | |
let M = | |
array2D [| [| 3.0; 2.0; 4.0 |] | |
[| 2.0; 0.0; 2.0 |] | |
[| 4.0; 2.0; 3.0 |] |] | |
// create an Eigenvale composition | |
let evd = new EigenvalueDecomposition(M) | |
// store the eigenvalues and eigenvectors | |
let delta = evd.RealEigenvalues | |
let V = evd.Eigenvectors | |
// Reconstruct | |
let R = V.MultiplyByDiagonal(delta).MultiplyByTranspose(V) | |
printfn "Delta %A , Vector %A" delta V | |
printfn "%A" R | |
(* PCA is just another name for projecting the data into the first | |
orthogonal vectors obtained by Eigendecomposing its covariance matrix *) | |
let data2 = | |
[| [| 2.5; 2.4 |] | |
[| 0.5; 0.7 |] | |
[| 2.2; 2.9 |] | |
[| 1.9; 2.2 |] | |
[| 3.1; 3.0 |] | |
[| 2.3; 2.7 |] | |
[| 2.0; 1.6 |] | |
[| 1.0; 1.1 |] | |
[| 1.5; 1.6 |] | |
[| 1.1; 0.9 |] |] | |
Chart.Point(data2 |> Array.map (fun arr -> (arr.[0], arr.[1]))) | |
// Substract mean | |
let mean = data2.Mean() | |
let dataAdjust = array2D(data2.Subtract(mean)) | |
// calculate the covariance matrix | |
let covAdjustData = dataAdjust.Covariance() | |
// calculate Eigenvectors pf covariance matrix | |
let evdAdjustData = new EigenvalueDecomposition(covAdjustData) | |
let eigenValues = evdAdjustData.RealEigenvalues; | |
let eigenVectors = evdAdjustData.Eigenvectors; | |
// Sort eigenvalues and values in descending order | |
let eigenVectorsSorted = Matrix.Sort(eigenValues, eigenVectors,new GeneralComparer(ComparerDirection.Descending, true)) | |
(* corresponding pairs are defined that each index from eigen values | |
corresponding to index column/vector from eigen vectors *) | |
// multiply adjust data with eigen vectors | |
let finalData = dataAdjust.Multiply(eigenVectors) | |
printfn "%A" (finalData.ToString(" 0.0000000000; -0.0000000000;")) | |
(* | |
SVD - Singular Value Decomposition | |
*) | |
let dataSVD = | |
[| [| 2.5; 2.4 |] | |
[| 0.5; 0.7 |] | |
[| 2.2; 2.9 |] | |
[| 1.9; 2.2 |] | |
[| 3.1; 3.0 |] | |
[| 2.3; 2.7 |] | |
[| 2.0; 1.6 |] | |
[| 1.0; 1.1 |] | |
[| 1.5; 1.6 |] | |
[| 1.1; 0.9 |] |] | |
Chart.Point(dataSVD |> Array.map (fun arr -> (arr.[0], arr.[1]))) | |
// Substract mean | |
let meanSVD = dataSVD.Mean() | |
let dataAdjustSVD = array2D(dataSVD.Subtract(meanSVD)) | |
// We are skipping calculation of covariance matrix and execute SVD | |
let svd = new SingularValueDecomposition(dataAdjustSVD) | |
let singularValues = svd.Diagonal | |
let svdEigenVectors = svd.RightSingularVectors | |
// Calculate the eigen values as the square of the singular values | |
let svdEigenValuesTempStep = singularValues.ElementwisePower(2.0) | |
// because of SVD we need divide eigen values by n - 1 to get the same as from EigenValueDecomposition | |
let svdEigenValues = svdEigenValuesTempStep.Divide(float (dataSVD.GetLength(0)) - 1.0) | |
printfn "Feature vector %A" (svdEigenVectors.ToString(" +0.0000000000; -0.0000000000;")) | |
// create final data for SVD | |
let svdFinalData = dataAdjustSVD.Multiply(svdEigenVectors) | |
printfn "%A" (svdFinalData.ToString(" 0.0000000000; -0.0000000000;")) | |
(* Now we can see how to use built-in PCA fuctionality at once *) | |
let dataPCA = | |
[| [| 2.5; 2.4 |] | |
[| 0.5; 0.7 |] | |
[| 2.2; 2.9 |] | |
[| 1.9; 2.2 |] | |
[| 3.1; 3.0 |] | |
[| 2.3; 2.7 |] | |
[| 2.0; 1.6 |] | |
[| 1.0; 1.1 |] | |
[| 1.5; 1.6 |] | |
[| 1.1; 0.9 |] |] | |
let pca = new PrincipalComponentAnalysis(dataPCA) | |
// Also we can set to use the analysis by correlation, which is more indicated when analysing data with high different measurement units | |
pca.Method = AnalysisMethod.Standardize | |
// and just compute | |
pca.Compute(); | |
// transform data | |
let pcaFinalData = pca.Transform(dataPCA) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment