[TOC]
using System;
using CenterSpace.NMath.Core;
using CenterSpace.NMath.Stats;
namespace FactorExample
{
/// <summary>
/// A .NET example in C# showing how to create and manipulate factors.
/// </summary>
/// <remarks>
/// The Factor class represents a categorical vector in which all
/// elements are drawn from a finite number of factor levels. Thus, a Factor contains
/// two parts: a string array of factor levels, and an integer array of
/// categorical data, of which each element is an index into the array of levels.
/// </remarks>
public class FactorExample
{
static void Main(string[] args)
{
// Read in data from the file. The data show test scores for 17 children on a
// simple reading test. The child's gender ("male" or "female") and grade
// (4, 5, or 6) is also recorded.
DataFrame df = DataFrame.Load("..\\..\\FactorExample.dat", true, false, "\t", true);
Console.WriteLine();
Console.WriteLine(df + "\n");
// Factors are usually constructed from a data frame column using the
// GetFactor() method, which creates a Factor with levels for the sorted, unique
// values in the column.
Factor gender = df.GetFactor("Gender");
// Display the levels and categorical data for the gender factor.
Console.WriteLine("Gender factor: " + gender);
Console.WriteLine("Gender levels: " + gender.LevelsToString());
Console.WriteLine("Gender data: " + gender.DataToString());
Console.WriteLine();
// Construct a factor for grade level.
Factor grade = df.GetFactor("Grade");
// Display the levels and categorical data for the grade factor.
Console.WriteLine("Grade factor: " + grade);
Console.WriteLine("Grade levels: " + grade.LevelsToString());
Console.WriteLine("Grade data: " + grade.DataToString());
Console.WriteLine();
// The principal use of factors is in conjunction with the
// GetGroupings() methods on Subset. One overload of this method accepts
// a single Factor and returns an array of subsets containing the indices
// for each level of the given factor.
Subset[] genders = Subset.GetGroupings(gender);
Subset[] grades = Subset.GetGroupings(grade);
// Display overall mean
Console.WriteLine("Grand mean = {0}", StatsFunctions.Mean(df["Score"]));
Console.WriteLine();
// Display mean for each level of the Gender and Grade factors.
Console.WriteLine("Marginal Means");
for (int i = 0; i < gender.NumberOfLevels; i++)
{
double mean = StatsFunctions.Mean(
df[df.IndexOfColumn("Score"), genders[i]]);
mean = System.Math.Round(mean, 2);
Console.WriteLine("Mean for gender {0} = {1}", gender.Levels[i], mean);
}
for (int i = 0; i < grade.NumberOfLevels; i++)
{
double mean = StatsFunctions.Mean(
df[df.IndexOfColumn("Score"), grades[i]]);
mean = System.Math.Round(mean, 2);
Console.WriteLine("Mean for grade {0} = {1}", grade.Levels[i], mean);
}
Console.WriteLine();
// Another overload of GetGroupings() accepts two Factor objects and returns
// a two-dimensional jagged array of subsets containing the indices for
// each combination of levels in the two factors.
Console.WriteLine("Cell Means");
Subset[,] cells = Subset.GetGroupings(gender, grade);
for (int i = 0; i < gender.NumberOfLevels; i++)
{
for (int j = 0; j < grade.NumberOfLevels; j++)
{
double mean = StatsFunctions.Mean(
df[df.IndexOfColumn("Score"), cells[i, j]]);
mean = System.Math.Round(mean, 2);
Console.WriteLine("Mean for gender {0} in grade {1} = {2}",
gender.Levels[i], grade.Levels[j], mean);
}
}
Console.WriteLine();
// Combining DataFrame.GetFactor()with Subset.GetGroupings() to access cells
// is such a common operation that class DataFrame also provides the Tabulate()
// method as a convenience. This method accepts one or two grouping columns, a
// data column, and a delegate to apply to each data column subset. This code
// displays the same marginal and cell means shown above, but with far fewer
// lines of code:
StatsFunctions.DoubleIDFColumnFunction meanFunction =
new StatsFunctions.DoubleIDFColumnFunction(StatsFunctions.Mean);
Console.WriteLine("Same results using cross-tabulation:\n");
Console.WriteLine(df.Tabulate("Grade", "Score", meanFunction) + "\n");
Console.WriteLine(df.Tabulate("Gender", "Score", meanFunction) + "\n");
Console.WriteLine(df.Tabulate("Grade", "Gender", "Score", meanFunction) + "\n");
// Factors are used internally by ANOVA classes for grouping data.
TwoWayAnova anova = new TwoWayAnova(df, df.IndexOfColumn("Gender"),
df.IndexOfColumn("Grade"), df.IndexOfColumn("Score"));
Console.WriteLine(anova);
Console.WriteLine();
Console.WriteLine("Press Enter Key");
Console.Read();
} // Main
} // class
} // namespace
[TOC]