VB Principal Component Example

← All NMath Stats Code Examples


Imports System
Imports Microsoft.VisualBasic

Imports CenterSpace.NMath.Core
Imports CenterSpace.NMath.Stats
Imports System.IO

Namespace CenterSpace.NMath.Stats.Examples.VisualBasic

  ' A .NET example in Visual Basic showing how to perform a principal component analysis on a data set.
  Module PrincipalComponentExample

    Sub Main()

      ' Read in data from a file. These data give air pollution and related values
      ' for 41 U.S. cities.
      '   SO2: Sulfur dioxide content of air in micrograms per cubic meter 
      '   Temp: Average annual temperature in degrees Fahrenheit 
      '   Man: Number of manufacturing enterprises employing 20 or more workers 
      '   Pop: Population size in thousands from the 1970 census 
      '   Wind: Average annual wind speed in miles per hour 
      '   Rain: Average annual precipitation in inches 
      '   RainDays: Average number of days with precipitation per year 
      ' Source: http://lib.stat.cmu.edu/DASL/Datafiles/AirPollution.html

      Dim DF As DataFrame = DataFrame.Load("PrincipalComponentExample.dat", True, True, ControlChars.Tab, True)


      ' Class DoublePCA performs a double-precision principal component
      ' analysis on a given data set. The data may optionally be centered and
      ' scaled before analysis takes place. By default, variables are centered
      ' but not scaled.
      Dim PCA As New DoublePCA(DF)

      ' Once your data is analyzed, you can can retrieve information about the data.
      ' If centering was specified, the column means are subtracted from
      ' the column values before analysis takes place. If scaling was specified,
      ' column values are scaled to have unit variance before analysis by dividing
      ' by the column norm.
      Console.WriteLine("Number of Observations = " & PCA.NumberOfObservations)
      Console.WriteLine("Number of Variables = " & PCA.NumberOfVariables)
      Console.WriteLine("Column Means = " & PCA.Means.ToString("G5"))
      Console.WriteLine("Column Norms = " & PCA.Norms.ToString("G5"))
      Console.WriteLine("Data was centered? = " & PCA.IsCentered)
      Console.WriteLine("Data was scaled? = " & PCA.IsScaled)

      ' The Loadings property gets the loading matrix. Each column is a principal component.
      Console.WriteLine("Loadings =")

      ' You can retrieve a particular principal component using the indexer.
      Console.WriteLine("First principal component = " & PCA(0).ToString("G5"))
      Console.WriteLine("Second principal component = " & PCA(1).ToString("G5"))

      ' The first principal component accounts for as much of the variability in the
      ' data as possible, and each succeeding component accounts for as much of the
      ' remaining variability as possible.
      Console.WriteLine("Variance Proportions = " & PCA.VarianceProportions.ToString("G5"))
      Console.WriteLine("Cumulative Variance Proportions = " & PCA.CumulativeVarianceProportions.ToString("G9"))

      ' You can also get the number of principal components required to account for
      ' a given proportion of the total variance. In this case, a plane fit to the
      ' original 7-dimensional space accounts for 99% of the variance.
      Console.WriteLine("PCs that account for 99% of the variance = " & PCA.Threshold(0.99))

      ' The Score matrix is the data formed by transforming the original data into
      ' the space of the principal components.
      Console.WriteLine("Scores =")

      Console.WriteLine("Press Enter Key")

    End Sub

  End Module

End Namespace

← All NMath Stats Code Examples