using System; using System.IO; using CenterSpace.NMath.Core; namespace CenterSpace.NMath.Examples.CSharp { class FactorAnalysisAdvancedExample { static void Main( string[] args ) { // NMath Stats provide classes for performing a factor analysis on a set of case data. // Case data should be provided to these classes in matrix form - the variable values // in columns and each row representing a case. In this example we look at // a hypothetical sample of 300 responses on 6 items from a survey of college students // favorite subject matter. The items range in value from 1 to 5, which represent a scale // from Strongly Dislike to Strongly Like. Our 6 items asked students to rate their liking // of different college subject matter areas, including biology (BIO), geology (GEO), // chemistry (CHEM), algebra (ALG), calculus (CALC), and statistics (STAT). // First load the data, which is in a comma delimited form. DataFrame favoriteSubject = DataFrame.Load( "advanced_factor_analysis.csv", true, false, ", ", true ).CleanRows(); // NMath Stats provides three classes for // performing factor analysis. All will perform analysis on the correlation matrix // or the covariance matrix of case data. In addition each of these classes has // two class parameters, on specifying the algorithm used to extract the factors, // and the other specifying a factor rotation method. Here we use the class // FactorAnalysisCovariance, which analyzes the covariance matrix of the case data, // with principal factors extraction and varimax rotation. // The other two factor analysis classes are FactorAnalysisCorrelation, for analyzing // the correlation matrix, and DoubleFactorAnalysis which can be used if you dont // have access to the original case data, just the correlation or covariance matrix // (DoubleFactorAnalysis is a base class for FactorAnalysisCorrelation and // FactorAnalysisCovariance). // Construct the factor analysis object we use for our analysis. Here we // first construct instance of the factor extraction and rotation classes // and use them in the factor analysis object construction. This gives // us control of the parameters affecting these algorithms. // Construct a principal components factor extraction object specifying the // function object for determining the number of factors to extract. The // type of this argument is Func<DoubleVector, DoubleMatrix, int>, it // takes as arguments the vector of eigenvalues and the matrix of eigenvectors // and returns the number of factors to extract. The class NumberOfFactors // contains static methods for creating functors for several common // strategies. Here we extract factors whose eigenvalues are greater // than 1.2 times the mean of the eigenvalues. var factorExtraction = new PCFactorExtraction( NumberOfFactors.EigenvaluesGreaterThanMean( 1.2 ) ); // Next construct an instance of the rotation algorithm we want to use, // which is the varimax algorithm. Here we specify convergence criteria // be setting the tolerance to 1e-6. Iteration will stop when the relative // change in the sum of the singular values is less than this number. // We also specify that we do NOT want Kaiser normalization to be performed. var factorRotation = new VarimaxRotation { Tolerance = 1e-6, Normalize = false }; // We now construct our factor analysis object. We provide the case data as a matrix (columns // correspond to variables and rows correspond to cases), the bias type - variances will be // computed as biased, and our extraction and rotation objects. var FA = new FactorAnalysisCovariance<PCFactorExtraction, VarimaxRotation>( favoriteSubject.ToDoubleMatrix(), BiasType.Biased, factorExtraction, factorRotation ); Console.WriteLine(); Console.WriteLine( "Number of factors extracted: " + FA.NumberOfFactors ); // Looks like we will retain two factors. // Extracted communalities are estimates of the proportion of variance in each variable // accounted for by the factors. DoubleVector extractedCommunalities = FA.ExtractedCommunalities; Console.WriteLine(); Console.WriteLine( "Predictor\tExtracted Communality" ); Console.WriteLine( "-------------------------------------" ); for ( int i = 0; i < favoriteSubject.Cols; i++ ) { Console.Write( favoriteSubject[i].Name + "\t\t" ); Console.WriteLine( extractedCommunalities[i].ToString( "G3" ) ); } Console.WriteLine(); // We can get a little better picture of the communalities by looking at their // rescaled values. The FactorAnalysisCovariance class provides many rescaled // results for calculations involving the extracted factors. In the rescaled // version the factors are first rescaled by dividing by the standard deviations // of the case variables before being used in the calculation. // // The rescaled communalities have their values are between 0 and 1. Most of the values // are close to 1, except for STAT. Maybe we should extract another factor? DoubleVector rescaledCommunalities = FA.RescaledExtractedCommunalities; Console.WriteLine( "Predictor\tRescaled Communality" ); Console.WriteLine( "-------------------------------------" ); for ( int i = 0; i < favoriteSubject.Cols; i++ ) { Console.Write( favoriteSubject[i].Name + "\t\t" ); Console.WriteLine( rescaledCommunalities[i].ToString( "G3" ) ); } Console.WriteLine(); // Next we look at the variance explained by the initial solution // by printing out a table of these values. // The first column will just be the extracted factor number. // // The second Totalcolumn gives the eigenvalue, or amount of // variance in the original variables accounted for by each factor. // Note that only the first two factors will be kept because their // value is greater than 1.2 times the mean of the eigenvalues. // // The % of Variance column gives the ratio, expressed as a percentage, // of the variance accounted for by each factor to the total // variance in all of the variables. // // The Cumulative % column gives the percentage of variance accounted // for by the first n factors. For example, the cumulative percentage // for the second factor is the sum of the percentage of variance // for the first and second factors. Console.WriteLine( "Factor\tTotal\tVariance\tCumulative" ); Console.WriteLine( "----------------------------------------------------" ); for ( int i = 0; i < FA.VarianceProportions.Length; i++ ) { Console.Write( i ); Console.Write( \t+ FA.FactorExtraction.Eigenvalues[i].ToString( "G4" ) + \t); Console.Write( FA.VarianceProportions[i].ToString( "P4" ) + \t); Console.WriteLine( FA.CumulativeVarianceProportions[i].ToString( "P4" ) ); } // Looks like we retain over 75% of the variance with just two factors. // Next we look at the the percentages of variance explained by the // extracted rotated factors. Comparing this table with the first // three rows of the previous one (three factors are extracted) // we see that the cumulative percentage of variation explained by the // extracted factors is maintained by the rotated factors, // but that variation is now spread more evenly over the factors, // but not by a lot. Maybe we could skip rotation, or try a // different rotation type. double eigenValueSum = NMathFunctions.Sum( FA.FactorExtraction.Eigenvalues ); DoubleVector RotatedSSLoadingsVarianceProportions = FA.RotatedSumOfSquaredLoadings / eigenValueSum; Console.WriteLine(); Console.WriteLine( "Rotated Extraction Sums of Squared Loadings" ); Console.WriteLine(); Console.WriteLine( "Factor\tTotal\tVariance\tCumulative" ); Console.WriteLine( "----------------------------------------------------" ); double cumulative = 0; for ( int i = 0; i < FA.NumberOfFactors; i++ ) { cumulative += RotatedSSLoadingsVarianceProportions[i]; Console.Write( i ); Console.Write( \t+ FA.RotatedSumOfSquaredLoadings[i].ToString( "G4" ) ); Console.Write( \t+ RotatedSSLoadingsVarianceProportions[i].ToString( "P4" ) ); Console.WriteLine( \t+ cumulative.ToString( "P4" ) ); } Console.WriteLine(); // The rotated factor matrix helps you to determine what the factors represent. DoubleMatrix rotatedComponentMatrix = FA.RotatedFactors; Console.WriteLine( "Rotated Factor Matrix" ); Console.WriteLine(); Console.WriteLine( "Predictor\tFactor" ); Console.WriteLine( "\t\t1\t\t2" ); Console.WriteLine( "-------------------------------------" ); for ( int i = 0; i < favoriteSubject.Cols; i++ ) { Console.Write( favoriteSubject[i].Name + "\t\t" ); Console.Write( rotatedComponentMatrix[i, 0].ToString( "G4" ) + "\t\t" ); Console.WriteLine( rotatedComponentMatrix[i, 1].ToString( "G4" ) ); } // The first factor is most highly correlated with BIO, GEO, CHEM. // CHEM a better representative, however, because it is less correlated // with the other factor. // // The second factor is most highly correlated ALG, CALC, and STAT. Console.WriteLine(); Console.WriteLine( "Press Enter Key" ); Console.Read(); } } }← All NMath Code Examples