[TOC]
Imports System
Imports Microsoft.VisualBasic
Imports CenterSpace.NMath.Core
Imports CenterSpace.NMath.Stats
Namespace CenterSpace.NMath.Stats.Examples.VisualBasic
' A .NET example in VB.NET showing how to perform a principal component analysis on a data set.
Module PrincipalComponentExample
Sub Main()
' Read in data from a file. These data give air pollution and related values
' for 41 U.S. cities.
' SO2: Sulfur dioxide content of air in micrograms per cubic meter
' Temp: Average annual temperature in degrees Fahrenheit
' Man: Number of manufacturing enterprises employing 20 or more workers
' Pop: Population size in thousands from the 1970 census
' Wind: Average annual wind speed in miles per hour
' Rain: Average annual precipitation in inches
' RainDays: Average number of days with precipitation per year
' Source: http://lib.stat.cmu.edu/DASL/Datafiles/AirPollution.html
Dim DF As DataFrame = DataFrame.Load("..\\..\\PrincipalComponentExample.dat", True, True, ControlChars.Tab, True)
Console.WriteLine()
Console.WriteLine(DF)
Console.WriteLine()
' Class DoublePCA performs a double-precision principal component
' analysis on a given data set. The data may optionally be centered and
' scaled before analysis takes place. By default, variables are centered
' but not scaled.
Dim PCA As New DoublePCA(DF)
' Once your data is analyzed, you can can retrieve information about the data.
' If centering was specified, the column means are substracted from
' the column values before analysis takes place. If scaling was specified,
' column values are scaled to have unit variance before analysis by dividing
' by the column norm.
Console.WriteLine("Number of Observations = " & PCA.NumberOfObservations)
Console.WriteLine("Number of Variables = " & PCA.NumberOfVariables)
Console.WriteLine()
Console.WriteLine("Column Means = " & PCA.Means.ToString())
Console.WriteLine()
Console.WriteLine("Column Norms = " & PCA.Norms.ToString())
Console.WriteLine()
Console.WriteLine("Data was centered? = " & PCA.IsCentered)
Console.WriteLine("Data was scaled? = " & PCA.IsScaled)
Console.WriteLine()
' The Loadings property gets the loading matrix. Each column is a principal component.
Console.WriteLine("Loadings = " & PCA.Loadings.ToString())
Console.WriteLine()
' You can retrieve a particular principal component using the indexer.
Console.WriteLine("First principal component = " & PCA(0).ToString())
Console.WriteLine()
Console.WriteLine("Second principal component = " & PCA(1).ToString())
Console.WriteLine()
' The first principal component accounts for as much of the variability in the
' data as possible, and each succeeding component accounts for as much of the
' remaining variability as possible.
Console.WriteLine("Variance Proportions = " & PCA.VarianceProportions.ToString())
Console.WriteLine()
Console.WriteLine("Cumulative Variance Proportions = " & PCA.CumulativeVarianceProportions.ToString())
Console.WriteLine()
' You can also get the number of principal components required to account for
' a given proportion of the total variance. In this case, a plane fit to the
' original 7-dimensional space accounts for 99% of the variance.
Console.WriteLine("PCs that account for 99% of the variance = " & PCA.Threshold(0.99))
Console.WriteLine()
' The Score matrix is the data formed by transforming the original data into
' the space of the principal components.
Console.WriteLine("Scores = " & PCA.Scores.ToString())
Console.WriteLine()
Console.WriteLine("Press Enter Key")
Console.Read()
End Sub
End Module
End Namespace
[TOC]