← All NMath Code Examples
using System;
using System.IO;
using CenterSpace.NMath.Core;
namespace CenterSpace.NMath.Examples.CSharp
{
class FactorAnalysisAdvancedExample
{
static void Main( string[] args )
{
// NMath Stats provide classes for performing a factor analysis on a set of case data.
// Case data should be provided to these classes in matrix form - the variable values
// in columns and each row representing a case. In this example we look at
// a hypothetical sample of 300 responses on 6 items from a survey of college students
// favorite subject matter. The items range in value from 1 to 5, which represent a scale
// from Strongly Dislike to Strongly Like. Our 6 items asked students to rate their liking
// of different college subject matter areas, including biology (BIO), geology (GEO),
// chemistry (CHEM), algebra (ALG), calculus (CALC), and statistics (STAT).
// First load the data, which is in a comma delimited form.
DataFrame favoriteSubject = DataFrame.Load( "advanced_factor_analysis.csv", true, false, ", ", true ).CleanRows();
// NMath Stats provides three classes for
// performing factor analysis. All will perform analysis on the correlation matrix
// or the covariance matrix of case data. In addition each of these classes has
// two class parameters, on specifying the algorithm used to extract the factors,
// and the other specifying a factor rotation method. Here we use the class
// FactorAnalysisCovariance, which analyzes the covariance matrix of the case data,
// with principal factors extraction and varimax rotation.
// The other two factor analysis classes are FactorAnalysisCorrelation, for analyzing
// the correlation matrix, and DoubleFactorAnalysis which can be used if you dont
// have access to the original case data, just the correlation or covariance matrix
// (DoubleFactorAnalysis is a base class for FactorAnalysisCorrelation and
// FactorAnalysisCovariance).
// Construct the factor analysis object we use for our analysis. Here we
// first construct instance of the factor extraction and rotation classes
// and use them in the factor analysis object construction. This gives
// us control of the parameters affecting these algorithms.
// Construct a principal components factor extraction object specifying the
// function object for determining the number of factors to extract. The
// type of this argument is Func<DoubleVector, DoubleMatrix, int>, it
// takes as arguments the vector of eigenvalues and the matrix of eigenvectors
// and returns the number of factors to extract. The class NumberOfFactors
// contains static methods for creating functors for several common
// strategies. Here we extract factors whose eigenvalues are greater
// than 1.2 times the mean of the eigenvalues.
var factorExtraction = new PCFactorExtraction( NumberOfFactors.EigenvaluesGreaterThanMean( 1.2 ) );
// Next construct an instance of the rotation algorithm we want to use,
// which is the varimax algorithm. Here we specify convergence criteria
// be setting the tolerance to 1e-6. Iteration will stop when the relative
// change in the sum of the singular values is less than this number.
// We also specify that we do NOT want Kaiser normalization to be performed.
var factorRotation = new VarimaxRotation
{
Tolerance = 1e-6,
Normalize = false
};
// We now construct our factor analysis object. We provide the case data as a matrix (columns
// correspond to variables and rows correspond to cases), the bias type - variances will be
// computed as biased, and our extraction and rotation objects.
var FA = new FactorAnalysisCovariance<PCFactorExtraction, VarimaxRotation>( favoriteSubject.ToDoubleMatrix(),
BiasType.Biased, factorExtraction, factorRotation );
Console.WriteLine();
Console.WriteLine( "Number of factors extracted: " + FA.NumberOfFactors );
// Looks like we will retain two factors.
// Extracted communalities are estimates of the proportion of variance in each variable
// accounted for by the factors.
DoubleVector extractedCommunalities = FA.ExtractedCommunalities;
Console.WriteLine();
Console.WriteLine( "Predictor\tExtracted Communality" );
Console.WriteLine( "-------------------------------------" );
for ( int i = 0; i < favoriteSubject.Cols; i++ )
{
Console.Write( favoriteSubject[i].Name + "\t\t" );
Console.WriteLine( extractedCommunalities[i].ToString( "G3" ) );
}
Console.WriteLine();
// We can get a little better picture of the communalities by looking at their
// rescaled values. The FactorAnalysisCovariance class provides many rescaled
// results for calculations involving the extracted factors. In the rescaled
// version the factors are first rescaled by dividing by the standard deviations
// of the case variables before being used in the calculation.
//
// The rescaled communalities have their values are between 0 and 1. Most of the values
// are close to 1, except for STAT. Maybe we should extract another factor?
DoubleVector rescaledCommunalities = FA.RescaledExtractedCommunalities;
Console.WriteLine( "Predictor\tRescaled Communality" );
Console.WriteLine( "-------------------------------------" );
for ( int i = 0; i < favoriteSubject.Cols; i++ )
{
Console.Write( favoriteSubject[i].Name + "\t\t" );
Console.WriteLine( rescaledCommunalities[i].ToString( "G3" ) );
}
Console.WriteLine();
// Next we look at the variance explained by the initial solution
// by printing out a table of these values.
// The first column will just be the extracted factor number.
//
// The second Totalcolumn gives the eigenvalue, or amount of
// variance in the original variables accounted for by each factor.
// Note that only the first two factors will be kept because their
// value is greater than 1.2 times the mean of the eigenvalues.
//
// The % of Variance column gives the ratio, expressed as a percentage,
// of the variance accounted for by each factor to the total
// variance in all of the variables.
//
// The Cumulative % column gives the percentage of variance accounted
// for by the first n factors. For example, the cumulative percentage
// for the second factor is the sum of the percentage of variance
// for the first and second factors.
Console.WriteLine( "Factor\tTotal\tVariance\tCumulative" );
Console.WriteLine( "----------------------------------------------------" );
for ( int i = 0; i < FA.VarianceProportions.Length; i++ )
{
Console.Write( i );
Console.Write( \t+ FA.FactorExtraction.Eigenvalues[i].ToString( "G4" ) + \t);
Console.Write( FA.VarianceProportions[i].ToString( "P4" ) + \t);
Console.WriteLine( FA.CumulativeVarianceProportions[i].ToString( "P4" ) );
}
// Looks like we retain over 75% of the variance with just two factors.
// Next we look at the the percentages of variance explained by the
// extracted rotated factors. Comparing this table with the first
// three rows of the previous one (three factors are extracted)
// we see that the cumulative percentage of variation explained by the
// extracted factors is maintained by the rotated factors,
// but that variation is now spread more evenly over the factors,
// but not by a lot. Maybe we could skip rotation, or try a
// different rotation type.
double eigenValueSum = NMathFunctions.Sum( FA.FactorExtraction.Eigenvalues );
DoubleVector RotatedSSLoadingsVarianceProportions = FA.RotatedSumOfSquaredLoadings / eigenValueSum;
Console.WriteLine();
Console.WriteLine( "Rotated Extraction Sums of Squared Loadings" );
Console.WriteLine();
Console.WriteLine( "Factor\tTotal\tVariance\tCumulative" );
Console.WriteLine( "----------------------------------------------------" );
double cumulative = 0;
for ( int i = 0; i < FA.NumberOfFactors; i++ )
{
cumulative += RotatedSSLoadingsVarianceProportions[i];
Console.Write( i );
Console.Write( \t+ FA.RotatedSumOfSquaredLoadings[i].ToString( "G4" ) );
Console.Write( \t+ RotatedSSLoadingsVarianceProportions[i].ToString( "P4" ) );
Console.WriteLine( \t+ cumulative.ToString( "P4" ) );
}
Console.WriteLine();
// The rotated factor matrix helps you to determine what the factors represent.
DoubleMatrix rotatedComponentMatrix = FA.RotatedFactors;
Console.WriteLine( "Rotated Factor Matrix" );
Console.WriteLine();
Console.WriteLine( "Predictor\tFactor" );
Console.WriteLine( "\t\t1\t\t2" );
Console.WriteLine( "-------------------------------------" );
for ( int i = 0; i < favoriteSubject.Cols; i++ )
{
Console.Write( favoriteSubject[i].Name + "\t\t" );
Console.Write( rotatedComponentMatrix[i, 0].ToString( "G4" ) + "\t\t" );
Console.WriteLine( rotatedComponentMatrix[i, 1].ToString( "G4" ) );
}
// The first factor is most highly correlated with BIO, GEO, CHEM.
// CHEM a better representative, however, because it is less correlated
// with the other factor.
//
// The second factor is most highly correlated ALG, CALC, and STAT.
Console.WriteLine();
Console.WriteLine( "Press Enter Key" );
Console.Read();
}
}
}
← All NMath Code Examples