C# Factor Example

← All NMath Stats Code Examples

 

using System;

using CenterSpace.NMath.Core;
using CenterSpace.NMath.Stats;
using System.IO;

namespace FactorExample
{
  /// <summary>
  /// A .NET example in C# showing how to create and manipulate factors.
  /// </summary>
  /// <remarks>
  /// The Factor class represents a categorical vector in which all
  /// elements are drawn from a finite number of factor levels. Thus, a Factor contains
  /// two parts: a string array of factor levels, and an integer array of
  /// categorical data, of which each element is an index into the array of levels.
  /// </remarks>
  public class FactorExample
  {

    static void Main( string[] args )
    {
      // Read in data from the file. The data show test scores for 17 children on a
      // simple reading test. The child's gender ( "male" or "female" ) and grade
      // (4, 5, or 6) is also recorded.
      DataFrame df = DataFrame.Load( "FactorExample.dat", true, false, "\t", true );

      Console.WriteLine();
      Console.WriteLine( df + "\n" );

      // Factors are usually constructed from a data frame column using the
      // GetFactor() method, which creates a Factor with levels for the sorted, unique
      // values in the column.
      Factor gender = df.GetFactor( "Gender" );

      // Display the levels and categorical data for the gender factor.
      Console.WriteLine( "Gender factor: " + gender );
      Console.WriteLine( "Gender levels: " + gender.LevelsToString() );
      Console.WriteLine( "Gender data: " + gender.DataToString() );
      Console.WriteLine();

      // Construct a factor for grade level.
      Factor grade = df.GetFactor( "Grade" );

      // Display the levels and categorical data for the grade factor.
      Console.WriteLine( "Grade factor: " + grade );
      Console.WriteLine( "Grade levels: " + grade.LevelsToString() );
      Console.WriteLine( "Grade data: " + grade.DataToString() );
      Console.WriteLine();

      // The principal use of factors is in conjunction with the
      // GetGroupings() methods on Subset. One overload of this method accepts
      // a single Factor and returns an array of subsets containing the indices
      // for each level of the given factor.
      Subset[] genders = Subset.GetGroupings( gender );
      Subset[] grades = Subset.GetGroupings( grade );

      // Display overall mean
      Console.WriteLine( "Grand mean = {0}", StatsFunctions.Mean( df["Score"] ) );
      Console.WriteLine();

      // Display mean for each level of the Gender and Grade factors.
      Console.WriteLine( "Marginal Means" );
      for ( int i = 0; i < gender.NumberOfLevels; i++ )
      {
        double mean = StatsFunctions.Mean(
          df[df.IndexOfColumn( "Score" ), genders[i]] );
        Console.WriteLine( "Mean for gender {0} = {1}", gender.Levels[i], mean.ToString( "F2" ) );
      }
      for ( int i = 0; i < grade.NumberOfLevels; i++ )
      {
        double mean = StatsFunctions.Mean(
          df[df.IndexOfColumn( "Score" ), grades[i]] );
        mean = System.Math.Round( mean, 2 );
        Console.WriteLine( "Mean for grade {0} = {1}", grade.Levels[i], mean );
      }
      Console.WriteLine();

      // Another overload of GetGroupings() accepts two Factor objects and returns
      // a two-dimensional jagged array of subsets containing the indices for
      // each combination of levels in the two factors.
      Console.WriteLine( "Cell Means" );
      Subset[,] cells = Subset.GetGroupings( gender, grade );
      for ( int i = 0; i < gender.NumberOfLevels; i++ )
      {
        for ( int j = 0; j < grade.NumberOfLevels; j++ )
        {
          double mean = StatsFunctions.Mean(
            df[df.IndexOfColumn( "Score" ), cells[i, j]] );
          mean = System.Math.Round( mean, 2 );
          Console.WriteLine( "Mean for gender {0} in grade {1} = {2}",
            gender.Levels[i], grade.Levels[j], mean );
        }
      }
      Console.WriteLine();

      // Combining DataFrame.GetFactor()with Subset.GetGroupings() to access “cells”
      // is such a common operation that class DataFrame also provides the Tabulate()
      // method as a convenience. This method accepts one or two grouping columns, a
      // data column, and a delegate to apply to each data column subset. This code 
      // displays the same marginal and cell means shown above, but with far fewer
      // lines of code:
      var meanFunction =
        new Func<IDFColumn, double>( StatsFunctions.Mean );
      Console.WriteLine( "Same results using cross-tabulation:\n" );
      Console.WriteLine( df.Tabulate( "Grade", "Score", meanFunction ) + "\n" );
      Console.WriteLine( df.Tabulate( "Gender", "Score", meanFunction ) + "\n" );
      Console.WriteLine( df.Tabulate( "Grade", "Gender", "Score", meanFunction ) + "\n" );

      // Factors are used internally by ANOVA classes for grouping data. 
      var anova = new TwoWayAnova( df, df.IndexOfColumn( "Gender" ),
        df.IndexOfColumn( "Grade" ), df.IndexOfColumn( "Score" ) );
      Console.WriteLine( anova );

      Console.WriteLine();
      Console.WriteLine( "Press Enter Key" );
      Console.Read();

    }  // Main

  }  // class

}  // namespace

← All NMath Stats Code Examples
Top