using System; using CenterSpace.NMath.Core; using System.IO; namespace CenterSpace.NMath.Examples.CSharp { /// <summary> /// A .NET example in C# showing how to perform a principal component analysis on a data set. /// </summary> public class PrincipalComponentExample { static void Main( string[] args ) { // Read in data from a file. These data give air pollution and related values // for 41 U.S. cities. // SO2: Sulfur dioxide content of air in micrograms per cubic meter // Temp: Average annual temperature in degrees Fahrenheit // Man: Number of manufacturing enterprises employing 20 or more workers // Pop: Population size in thousands from the 1970 census // Wind: Average annual wind speed in miles per hour // Rain: Average annual precipitation in inches // RainDays: Average number of days with precipitation per year // Source: http://lib.stat.cmu.edu/DASL/Datafiles/AirPollution.html DataFrame df = DataFrame.Load( "PrincipalComponentExample.dat", true, true, "\t", true ); Console.WriteLine(); Console.WriteLine( df ); Console.WriteLine(); // Class DoublePCA performs a double-precision principal component // analysis on a given data set. The data may optionally be centered and // scaled before analysis takes place. By default, variables are centered // but not scaled. var pca = new DoublePCA( df ); // Once your data is analyzed, you can can retrieve information about the data. // If centering was specified, the column means are subtracted from // the column values before analysis takes place. If scaling was specified, // column values are scaled to have unit variance before analysis by dividing // by the column norm. Console.WriteLine( "Number of Observations = " + pca.NumberOfObservations ); Console.WriteLine( "Number of Variables = " + pca.NumberOfVariables ); Console.WriteLine(); Console.WriteLine( "Column Means = " + pca.Means.ToString( "G5" ) ); Console.WriteLine(); Console.WriteLine( "Column Norms = " + pca.Norms.ToString( "G5" ) ); Console.WriteLine(); Console.WriteLine( "Data was centered? = " + pca.IsCentered ); Console.WriteLine( "Data was scaled? = " + pca.IsScaled ); Console.WriteLine(); // The Loadings property gets the loading matrix. Each column is a principal component. Console.WriteLine( "Loadings =" ); Console.WriteLine( pca.Loadings.ToTabDelimited( "G9" )); Console.WriteLine(); // You can retrieve a particular principal component using the indexer. Console.WriteLine( "First principal component = " + pca[0].ToString( "G5" ) ); Console.WriteLine(); Console.WriteLine( "Second principal component = " + pca[1].ToString( "G5" ) ); Console.WriteLine(); // The first principal component accounts for as much of the variability in the // data as possible, and each succeeding component accounts for as much of the // remaining variability as possible. Console.WriteLine( "Variance Proportions = " + pca.VarianceProportions.ToString( "G5" ) ); Console.WriteLine(); Console.WriteLine( "Cumulative Variance Proportions = " + pca.CumulativeVarianceProportions.ToString( "G9" ) ); Console.WriteLine(); // You can also get the number of principal components required to account for // a given proportion of the total variance. In this case, a plane fit to the // original 7-dimensional space accounts for 99% of the variance. Console.WriteLine( "PCs that account for 99% of the variance = " + pca.Threshold( .99 ) ); Console.WriteLine(); // The Score matrix is the data formed by transforming the original data into // the space of the principal components. Console.WriteLine( "Scores =" ); Console.WriteLine( pca.Scores.ToTabDelimited( "G9" ) ); Console.WriteLine( "Press Enter Key" ); Console.Read(); } // Main } // class } // namespace← All NMath Code Examples