C# Factor Example

[TOC]

using System;

using CenterSpace.NMath.Core;
using CenterSpace.NMath.Stats;

namespace FactorExample
{
  /// <summary>
  /// A .NET example in C# showing how to create and manipulate factors.
  /// </summary>
  /// <remarks>
  /// The Factor class represents a categorical vector in which all
  /// elements are drawn from a finite number of factor levels. Thus, a Factor contains
  /// two parts: a string array of factor levels, and an integer array of
  /// categorical data, of which each element is an index into the array of levels.
  /// </remarks>
  public class FactorExample
  {

    static void Main(string[] args)
    {
      // Read in data from the file. The data show test scores for 17 children on a
      // simple reading test. The child's gender ("male" or "female") and grade
      // (4, 5, or 6) is also recorded.
      DataFrame df = DataFrame.Load("..\\..\\FactorExample.dat", true, false, "\t", true);

      Console.WriteLine();
      Console.WriteLine(df + "\n");

      // Factors are usually constructed from a data frame column using the
      // GetFactor() method, which creates a Factor with levels for the sorted, unique
      // values in the column.
      Factor gender = df.GetFactor("Gender");

      // Display the levels and categorical data for the gender factor.
      Console.WriteLine("Gender factor: " + gender);
      Console.WriteLine("Gender levels: " + gender.LevelsToString());
      Console.WriteLine("Gender data: " + gender.DataToString());
      Console.WriteLine();

      // Construct a factor for grade level.
      Factor grade = df.GetFactor("Grade");

      // Display the levels and categorical data for the grade factor.
      Console.WriteLine("Grade factor: " + grade);
      Console.WriteLine("Grade levels: " + grade.LevelsToString());
      Console.WriteLine("Grade data: " + grade.DataToString());
      Console.WriteLine();

      // The principal use of factors is in conjunction with the
      // GetGroupings() methods on Subset. One overload of this method accepts
      // a single Factor and returns an array of subsets containing the indices
      // for each level of the given factor.
      Subset[] genders = Subset.GetGroupings(gender);
      Subset[] grades = Subset.GetGroupings(grade);

      // Display overall mean
      Console.WriteLine("Grand mean = {0}", StatsFunctions.Mean(df["Score"]));
      Console.WriteLine();

      // Display mean for each level of the Gender and Grade factors.
      Console.WriteLine("Marginal Means");
      for (int i = 0; i < gender.NumberOfLevels; i++)
      {
        double mean = StatsFunctions.Mean(
          df[df.IndexOfColumn("Score"), genders[i]]);
        mean = System.Math.Round(mean, 2);
        Console.WriteLine("Mean for gender {0} = {1}", gender.Levels[i], mean);
      }
      for (int i = 0; i < grade.NumberOfLevels; i++)
      {
        double mean = StatsFunctions.Mean(
          df[df.IndexOfColumn("Score"), grades[i]]);
        mean = System.Math.Round(mean, 2);
        Console.WriteLine("Mean for grade {0} = {1}", grade.Levels[i], mean);
      }
      Console.WriteLine();

      // Another overload of GetGroupings() accepts two Factor objects and returns
      // a two-dimensional jagged array of subsets containing the indices for
      // each combination of levels in the two factors.
      Console.WriteLine("Cell Means");
      Subset[,] cells = Subset.GetGroupings(gender, grade);
      for (int i = 0; i < gender.NumberOfLevels; i++)
      {
        for (int j = 0; j < grade.NumberOfLevels; j++)
        {
          double mean = StatsFunctions.Mean(
            df[df.IndexOfColumn("Score"), cells[i, j]]);
          mean = System.Math.Round(mean, 2);
          Console.WriteLine("Mean for gender {0} in grade {1} = {2}",
            gender.Levels[i], grade.Levels[j], mean);
        }
      }
      Console.WriteLine();

      // Combining DataFrame.GetFactor()with Subset.GetGroupings() to access “cells”
      // is such a common operation that class DataFrame also provides the Tabulate()
      // method as a convenience. This method accepts one or two grouping columns, a
      // data column, and a delegate to apply to each data column subset. This code 
      // displays the same marginal and cell means shown above, but with far fewer
      // lines of code:
      StatsFunctions.DoubleIDFColumnFunction meanFunction =
        new StatsFunctions.DoubleIDFColumnFunction(StatsFunctions.Mean);
      Console.WriteLine("Same results using cross-tabulation:\n");
      Console.WriteLine(df.Tabulate("Grade", "Score", meanFunction) + "\n");
      Console.WriteLine(df.Tabulate("Gender", "Score", meanFunction) + "\n");
      Console.WriteLine(df.Tabulate("Grade", "Gender", "Score", meanFunction) + "\n");

      // Factors are used internally by ANOVA classes for grouping data. 
      TwoWayAnova anova = new TwoWayAnova(df, df.IndexOfColumn("Gender"),
        df.IndexOfColumn("Grade"), df.IndexOfColumn("Score"));
      Console.WriteLine(anova);

      Console.WriteLine();
      Console.WriteLine("Press Enter Key");
      Console.Read();

    }  // Main

  }  // class

}  // namespace

[TOC]