← All NMath Code Examples
using System;
using System.IO;
using CenterSpace.NMath.Core;
namespace CenterSpace.NMath.Examples.CSharp
{
/// <summary>
/// A .NET example in C# showing how to create and manipulate factors.
/// </summary>
/// <remarks>
/// The Factor class represents a categorical vector in which all
/// elements are drawn from a finite number of factor levels. Thus, a Factor contains
/// two parts: a string array of factor levels, and an integer array of
/// categorical data, of which each element is an index into the array of levels.
/// </remarks>
public class FactorExample
{
static void Main( string[] args )
{
// Read in data from the file. The data show test scores for 17 children on a
// simple reading test. The childs gender ( "male" or "female" ) and grade
// (4, 5, or 6) is also recorded.
DataFrame df = DataFrame.Load( "FactorExample.dat", true, false, "\t", true );
Console.WriteLine();
Console.WriteLine( df + "\n" );
// Factors are usually constructed from a data frame column using the
// GetFactor() method, which creates a Factor with levels for the sorted, unique
// values in the column.
Factor gender = df.GetFactor( "Gender" );
// Display the levels and categorical data for the gender factor.
Console.WriteLine( "Gender factor: " + gender );
Console.WriteLine( "Gender levels: " + gender.LevelsToString() );
Console.WriteLine( "Gender data: " + gender.DataToString() );
Console.WriteLine();
// Construct a factor for grade level.
Factor grade = df.GetFactor( "Grade" );
// Display the levels and categorical data for the grade factor.
Console.WriteLine( "Grade factor: " + grade );
Console.WriteLine( "Grade levels: " + grade.LevelsToString() );
Console.WriteLine( "Grade data: " + grade.DataToString() );
Console.WriteLine();
// The principal use of factors is in conjunction with the
// GetGroupings() methods on Subset. One overload of this method accepts
// a single Factor and returns an array of subsets containing the indices
// for each level of the given factor.
Subset[] genders = Subset.GetGroupings( gender );
Subset[] grades = Subset.GetGroupings( grade );
// Display overall mean
Console.WriteLine( "Grand mean = {0}", StatsFunctions.Mean( df["Score"] ) );
Console.WriteLine();
// Display mean for each level of the Gender and Grade factors.
Console.WriteLine( "Marginal Means" );
for ( int i = 0; i < gender.NumberOfLevels; i++ )
{
double mean = StatsFunctions.Mean(
df[df.IndexOfColumn( "Score" ), genders[i]] );
Console.WriteLine( "Mean for gender {0} = {1}", gender.Levels[i], mean.ToString( "F2" ) );
}
for ( int i = 0; i < grade.NumberOfLevels; i++ )
{
double mean = StatsFunctions.Mean(
df[df.IndexOfColumn( "Score" ), grades[i]] );
mean = System.Math.Round( mean, 2 );
Console.WriteLine( "Mean for grade {0} = {1}", grade.Levels[i], mean );
}
Console.WriteLine();
// Another overload of GetGroupings() accepts two Factor objects and returns
// a two-dimensional jagged array of subsets containing the indices for
// each combination of levels in the two factors.
Console.WriteLine( "Cell Means" );
Subset[,] cells = Subset.GetGroupings( gender, grade );
for ( int i = 0; i < gender.NumberOfLevels; i++ )
{
for ( int j = 0; j < grade.NumberOfLevels; j++ )
{
double mean = StatsFunctions.Mean(
df[df.IndexOfColumn( "Score" ), cells[i, j]] );
mean = System.Math.Round( mean, 2 );
Console.WriteLine( "Mean for gender {0} in grade {1} = {2}",
gender.Levels[i], grade.Levels[j], mean );
}
}
Console.WriteLine();
// Combining DataFrame.GetFactor()with Subset.GetGroupings() to access cells
// is such a common operation that class DataFrame also provides the Tabulate()
// method as a convenience. This method accepts one or two grouping columns, a
// data column, and a delegate to apply to each data column subset. This code
// displays the same marginal and cell means shown above, but with far fewer
// lines of code:
var meanFunction =
new Func<IDFColumn, double>( StatsFunctions.Mean );
Console.WriteLine( "Same results using cross-tabulation:\n" );
Console.WriteLine( df.Tabulate( "Grade", "Score", meanFunction ) + "\n" );
Console.WriteLine( df.Tabulate( "Gender", "Score", meanFunction ) + "\n" );
Console.WriteLine( df.Tabulate( "Grade", "Gender", "Score", meanFunction ) + "\n" );
// Factors are used internally by ANOVA classes for grouping data.
var anova = new TwoWayAnova( df, df.IndexOfColumn( "Gender" ),
df.IndexOfColumn( "Grade" ), df.IndexOfColumn( "Score" ) );
Console.WriteLine( anova );
Console.WriteLine();
Console.WriteLine( "Press Enter Key" );
Console.Read();
} // Main
} // class
} // namespace
← All NMath Code Examples