C# FA Advanced Example

← All NMath Core Code Examples

 

using System;
using System.IO;

using CenterSpace.NMath.Core;

namespace CenterSpace.NMath.Core.Examples.CSharp
{
  class FactorAnalysisAdvancedExample
  {
    static void Main( string[] args )
    {
      // NMath Stats provide classes for performing a factor analysis on a set of case data. 
      // Case data should be provided to these classes in matrix form - the variable values
      // in columns and each row representing a case. In this example we look at
      // a hypothetical sample of 300 responses on 6 items from a survey of college students' 
      // favorite subject matter. The items range in value from 1 to 5, which represent a scale
      // from Strongly Dislike to Strongly Like. Our 6 items asked students to rate their liking
      // of different college subject matter areas, including biology (BIO), geology (GEO), 
      // chemistry (CHEM), algebra (ALG), calculus (CALC), and statistics (STAT). 

      // First load the data, which is in a comma delimited form.
      DataFrame favoriteSubject = DataFrame.Load( "advanced_factor_analysis.csv", true, false, ", ", true ).CleanRows(); 

      // NMath Stats provides three classes for
      // performing factor analysis. All will perform analysis on the correlation matrix
      // or the covariance matrix of case data. In addition each of these classes has
      // two class parameters, on specifying the algorithm used to extract the factors,
      // and the other specifying a factor rotation method. Here we use the class
      // FactorAnalysisCovariance, which analyzes the covariance matrix of the case data,
      // with principal factors extraction and varimax rotation. 
      // The other two factor analysis classes are FactorAnalysisCorrelation, for analyzing
      // the correlation matrix, and DoubleFactorAnalysis which can be used if you don't
      // have access to the original case data, just the correlation or covariance matrix
      // (DoubleFactorAnalysis is a base class for FactorAnalysisCorrelation and
      // FactorAnalysisCovariance).

      // Construct the factor analysis object we use for our analysis. Here we
      // first construct instance of the factor extraction and rotation classes
      // and use them in the factor analysis object construction. This gives 
      // us control of the parameters affecting these algorithms.

      // Construct a principal components factor extraction object specifying the 
      // function object for determining the number of factors to extract. The
      // type of this argument is Func<DoubleVector, DoubleMatrix, int>, it 
      // takes as arguments the vector of eigenvalues and the matrix of eigenvectors
      // and returns the number of factors to extract. The class NumberOfFactors 
      // contains static methods for creating functors for several common
      // strategies. Here we extract factors whose eigenvalues are greater
      // than 1.2 times the mean of the eigenvalues.
      var factorExtraction = new PCFactorExtraction( NumberOfFactors.EigenvaluesGreaterThanMean( 1.2 ) );

      // Next construct an instance of the rotation algorithm we want to use,
      // which is the varimax algorithm. Here we specify convergence criteria
      // be setting the tolerance to 1e-6. Iteration will stop when the relative 
      // change in the sum of the singular values is less than this number.
      // We also specify that we do NOT want Kaiser normalization to be performed.
      var factorRotation = new VarimaxRotation
      {
        Tolerance = 1e-6,
        Normalize = false
      };

      // We now construct our factor analysis object. We provide the case data as a matrix (columns
      // correspond to variables and rows correspond to cases), the bias type - variances will be
      // computed as biased, and our extraction and rotation objects.
      var FA = new FactorAnalysisCovariance<PCFactorExtraction, VarimaxRotation>( favoriteSubject.ToDoubleMatrix(),
        BiasType.Biased, factorExtraction, factorRotation );

      Console.WriteLine();
      Console.WriteLine( "Number of factors extracted: " + FA.NumberOfFactors );
      // Looks like we will retain two factors.

      // Extracted communalities are estimates of the proportion of variance in each variable
      // accounted for by the factors. 
      DoubleVector extractedCommunalities = FA.ExtractedCommunalities;
      Console.WriteLine();
      Console.WriteLine( "Predictor\tExtracted Communality" );
      Console.WriteLine( "-------------------------------------" );
      for ( int i = 0; i < favoriteSubject.Cols; i++ )
      {
        Console.Write( favoriteSubject[i].Name + "\t\t" );
        Console.WriteLine( extractedCommunalities[i].ToString( "G3" ) );
      }

      Console.WriteLine();

      // We can get a little better picture of the communalities by looking at their
      // rescaled values. The FactorAnalysisCovariance class provides many 'rescaled'
      // results for calculations involving the extracted factors. In the rescaled
      // version the factors are first rescaled by dividing by the standard deviations
      // of the case variables before being used in the calculation.
      //
      // The rescaled communalities have their values are between 0 and 1. Most of the values
      // are close to 1, except for STAT. Maybe we should extract another factor?
      DoubleVector rescaledCommunalities = FA.RescaledExtractedCommunalities;
      Console.WriteLine( "Predictor\tRescaled Communality" );
      Console.WriteLine( "-------------------------------------" );
      for ( int i = 0; i < favoriteSubject.Cols; i++ )
      {
        Console.Write( favoriteSubject[i].Name + "\t\t" );
        Console.WriteLine( rescaledCommunalities[i].ToString( "G3" ) );
      }

      Console.WriteLine();

      // Next we look at the variance explained by the initial solution 
      // by printing out a table of these values. 
      // The first column will just be the extracted factor number. 
      //
      // The second 'Total' column gives the eigenvalue, or amount of 
      // variance in the original variables accounted for by each factor.
      // Note that only the first two factors will be kept because their
      // value is greater than 1.2 times the mean of the eigenvalues. 
      //
      // The % of Variance column gives the ratio, expressed as a percentage, 
      // of the variance accounted for by each factor to the total 
      // variance in all of the variables.
      //
      // The Cumulative % column gives the percentage of variance accounted 
      // for by the first n factors. For example, the cumulative percentage
      // for the second factor is the sum of the percentage of variance
      // for the first and second factors.
      Console.WriteLine( "Factor\tTotal\tVariance\tCumulative" );
      Console.WriteLine( "----------------------------------------------------" );
      for ( int i = 0; i < FA.VarianceProportions.Length; i++ )
      {
        Console.Write( i );
        Console.Write( '\t' + FA.FactorExtraction.Eigenvalues[i].ToString( "G4" ) + '\t' );
        Console.Write( FA.VarianceProportions[i].ToString( "P4" ) + '\t' );
        Console.WriteLine( FA.CumulativeVarianceProportions[i].ToString( "P4" ) );
      }
      // Looks like we retain over 75% of the variance with just two factors.

      // Next we look at the the percentages of variance explained by the
      // extracted rotated factors. Comparing this table with the first
      // three rows of the previous one (three factors are extracted)
      // we see that the cumulative percentage of variation explained by the
      // extracted factors is maintained by the rotated factors, 
      // but that variation is now spread more evenly over the factors,
      // but not by a lot. Maybe we could skip rotation, or try a 
      // different rotation type.
      double eigenValueSum = NMathFunctions.Sum( FA.FactorExtraction.Eigenvalues );
      DoubleVector RotatedSSLoadingsVarianceProportions = FA.RotatedSumOfSquaredLoadings / eigenValueSum;
      Console.WriteLine();
      Console.WriteLine( "Rotated Extraction Sums of Squared Loadings" );
      Console.WriteLine();
      Console.WriteLine( "Factor\tTotal\tVariance\tCumulative" );
      Console.WriteLine( "----------------------------------------------------" );
      double cumulative = 0;

      for ( int i = 0; i < FA.NumberOfFactors; i++ )
      {
        cumulative += RotatedSSLoadingsVarianceProportions[i];
        Console.Write( i );
        Console.Write( '\t' + FA.RotatedSumOfSquaredLoadings[i].ToString( "G4" ) );
        Console.Write( '\t' + RotatedSSLoadingsVarianceProportions[i].ToString( "P4" ) );
        Console.WriteLine( '\t' + cumulative.ToString( "P4" ) );
      }

      Console.WriteLine();

      // The rotated factor matrix helps you to determine what the factors represent.
      DoubleMatrix rotatedComponentMatrix = FA.RotatedFactors;
      Console.WriteLine( "Rotated Factor Matrix" );
      Console.WriteLine();
      Console.WriteLine( "Predictor\tFactor" );
      Console.WriteLine( "\t\t1\t\t2" );
      Console.WriteLine( "-------------------------------------" );
      for ( int i = 0; i < favoriteSubject.Cols; i++ )
      {
        Console.Write( favoriteSubject[i].Name + "\t\t" );
        Console.Write( rotatedComponentMatrix[i, 0].ToString( "G4" ) + "\t\t" );
        Console.WriteLine( rotatedComponentMatrix[i, 1].ToString( "G4" ) );
      }

      // The first factor is most highly correlated with BIO, GEO, CHEM.
      // CHEM a better representative, however, because it is less correlated
      // with the other factor.
      //
      // The second factor is most highly correlated ALG, CALC, and STAT.

      Console.WriteLine();
      Console.WriteLine( "Press Enter Key" );
      Console.Read();
    }
  }
}

← All NMath Stats Code Examples
Top