C# Cross Tabulation Example

[TOC]

using System;
using System.Collections;

using CenterSpace.NMath.Core;
using CenterSpace.NMath.Stats;

namespace CenterSpace.NMath.Stats.Examples.CSharp
{
  /// <summary>
  /// A .NET example in C# showing how to use the cross-tabulation functionality of DataFrame.
  /// </summary>
  /// <remarks>
  /// As illustrated in the FactorExample, the DataFrame.GetFactor() method can be used
  /// in conjunction with Subset.GetGroupings() to access "cells" of data based on one
  /// or two grouping factors. This is such a common operation that class DataFrame also
  /// provides the Tabulate() methods as a convenience. This method accepts one or two
  /// grouping columns, a data column, and a delegate to apply to each data column subset.
  /// The results are returned in a new data frame.
  /// </remarks>
  public class CrossTabulationExample
  {
    /// <summary>
    /// The main entry point for the application.
    /// </summary>
    [STAThread]
    static void Main(string[] args)
    {
      // Read in data from the file. The data show test scores for 18 children on a
      // simple reading test. The child's gender ("male" or "female") and grade
      // (4, 5, or 6) is also recorded.
      DataFrame df = DataFrame.Load( "..\\..\\CrossTabulationExample.dat", true, false, "\t", true );

      Console.WriteLine();
      Console.WriteLine( df + "\n" );

      // This code encapsulates the static StatsFunctions.Mean() function in a
      // StatsFunctions.DoubleIDFColumnFunction delegate, then displays the average
      // test score for each grade:
      StatsFunctions.DoubleIDFColumnFunction mean = 
        new StatsFunctions.DoubleIDFColumnFunction( StatsFunctions.Mean );
      Console.WriteLine( df.Tabulate( "Grade", "Score", mean ) );
      Console.WriteLine();

      // The code shows the average test score for every combination of gender and grade:
      DataFrame means = df.Tabulate( "Grade", "Gender", "Score", mean );
      Console.WriteLine( means );
      Console.WriteLine();

      // The returned data frame can be easily accessed for individual results:
      Console.WriteLine( "Average score for boys in grade 5 = {0}", means[5, "male"] );
      Console.WriteLine( "Average score for grade 5 = {0}", means[5, "Overall"] );
      Console.WriteLine( "Average score for boys = {0}", means["Overall", "male"] );
      Console.WriteLine( "Grand average = {0}", means["Overall", "Overall"] );
      Console.WriteLine();

      // Most of the static descriptive statistics functions on class StatsFunctions
      // accept an IDFColumn and return a double. A few return integers. For example,
      // this code encapsulates StatsFunctions.Count(), which returns the number of items
      // in a column, in a StatsFunctions.IntIDFColumnFunction, then displays the number
      // of subjects in each cell:
      StatsFunctions.IntIDFColumnFunction count =
        new StatsFunctions.IntIDFColumnFunction( StatsFunctions.Count );
      Console.WriteLine( df.Tabulate( "Grade", "Gender", "Score", count ) );
      Console.WriteLine();

      // The delegate the returns a generic object can be especially useful if you want to 
      // tabulate a variety of summary statistics all at once: 
      StatsFunctions.GenericIDFColumnFunction getSummaryDelegate =
        new StatsFunctions.GenericIDFColumnFunction( GetSummary );
      DataFrame summaryStats = df.Tabulate( "Grade", "Gender", "Score", getSummaryDelegate );
      
      Console.WriteLine( "Summary Statistics for Boys in Grade 6" );
      Console.WriteLine( summaryStats[6, "male"] );
      
      Console.WriteLine();
      Console.WriteLine( "Press Enter Key" );
      Console.Read();

    }  // Main

    static private object GetSummary( IDFColumn data )
    {
      MySummary summary = new MySummary();
      summary.N = StatsFunctions.Count( data );
      summary.Mean = StatsFunctions.Mean( data );
      summary.StDev = StatsFunctions.StandardDeviation( data );
      summary.Min = StatsFunctions.MinValue( data );
      summary.Max = StatsFunctions.MaxValue( data );
      return summary;
    }

    private class MySummary
    {
      public int N;
      public double Mean;
      public double StDev;
      public double Min;
      public double Max;

      public override string ToString()
      {
        string nl = System.Environment.NewLine;
        System.Text.StringBuilder buff = new System.Text.StringBuilder();
        buff.Append( "Size = " + N + nl );
        buff.Append( "Mean = " + Mean + nl );
        buff.Append( "Standard Deviation = " + StDev + nl );
        buff.Append( "Minimum = " + Min + nl );
        buff.Append( "Maximum = " + Max + nl );
        return buff.ToString();
      }
    }

  }  // class

}  // namespace


[TOC]