# VB Principal Component Example

← All NMath Stats Code Examples

```Imports System
Imports Microsoft.VisualBasic

Imports CenterSpace.NMath.Core
Imports CenterSpace.NMath.Stats
Imports System.IO

Namespace CenterSpace.NMath.Stats.Examples.VisualBasic

' A .NET example in Visual Basic showing how to perform a principal component analysis on a data set.
Module PrincipalComponentExample

Sub Main()

' Read in data from a file. These data give air pollution and related values
' for 41 U.S. cities.
'   SO2: Sulfur dioxide content of air in micrograms per cubic meter
'   Temp: Average annual temperature in degrees Fahrenheit
'   Man: Number of manufacturing enterprises employing 20 or more workers
'   Pop: Population size in thousands from the 1970 census
'   Wind: Average annual wind speed in miles per hour
'   Rain: Average annual precipitation in inches
'   RainDays: Average number of days with precipitation per year
' Source: http://lib.stat.cmu.edu/DASL/Datafiles/AirPollution.html

Dim DF As DataFrame = DataFrame.Load("PrincipalComponentExample.dat", True, True, ControlChars.Tab, True)

Console.WriteLine()
Console.WriteLine(DF)
Console.WriteLine()

' Class DoublePCA performs a double-precision principal component
' analysis on a given data set. The data may optionally be centered and
' scaled before analysis takes place. By default, variables are centered
' but not scaled.
Dim PCA As New DoublePCA(DF)

' Once your data is analyzed, you can can retrieve information about the data.
' If centering was specified, the column means are subtracted from
' the column values before analysis takes place. If scaling was specified,
' column values are scaled to have unit variance before analysis by dividing
' by the column norm.
Console.WriteLine("Number of Observations = " & PCA.NumberOfObservations)
Console.WriteLine("Number of Variables = " & PCA.NumberOfVariables)
Console.WriteLine()
Console.WriteLine("Column Means = " & PCA.Means.ToString("G5"))
Console.WriteLine()
Console.WriteLine("Column Norms = " & PCA.Norms.ToString("G5"))
Console.WriteLine()
Console.WriteLine("Data was centered? = " & PCA.IsCentered)
Console.WriteLine("Data was scaled? = " & PCA.IsScaled)
Console.WriteLine()

Console.WriteLine()

' You can retrieve a particular principal component using the indexer.
Console.WriteLine("First principal component = " & PCA(0).ToString("G5"))
Console.WriteLine()
Console.WriteLine("Second principal component = " & PCA(1).ToString("G5"))
Console.WriteLine()

' The first principal component accounts for as much of the variability in the
' data as possible, and each succeeding component accounts for as much of the
' remaining variability as possible.
Console.WriteLine("Variance Proportions = " & PCA.VarianceProportions.ToString("G5"))
Console.WriteLine()
Console.WriteLine("Cumulative Variance Proportions = " & PCA.CumulativeVarianceProportions.ToString("G9"))
Console.WriteLine()

' You can also get the number of principal components required to account for
' a given proportion of the total variance. In this case, a plane fit to the
' original 7-dimensional space accounts for 99% of the variance.
Console.WriteLine("PCs that account for 99% of the variance = " & PCA.Threshold(0.99))
Console.WriteLine()

' The Score matrix is the data formed by transforming the original data into
' the space of the principal components.
Console.WriteLine("Scores =")
Console.WriteLine(PCA.Scores.ToTabDelimited("G9"))

Console.WriteLine()
Console.WriteLine("Press Enter Key")