C# Advanced Two Way Anova Example

[TOC]

using System;

namespace CenterSpace.NMath.Stats.Examples.CSharp
{
  /// <summary>
  /// A .NET example in C# showing some of the advanced features of the class
  /// TwoWayAnova.
  /// </summary>
  class AdvancedTwoWayAnovaExample
  {
    private static string materialColumnName_ = "Material";
    private static string tempColumnName_ = "Temperature";
    private static string lifetimeColumnName_ = "Lifetime";

    static void Main(string[] args)
    {
      // Create the DataFrame that will hold the data to analyze. The variable
      // being measured is battery lifetime. The two factors are material and 
      // temperature. The data must have at least three columns. One column
      // must be numeric and contain the values of the variable being measured,
      // battery life in this case, the other two columns must contain the values
      // of the two factors that correspond to the measurement. Thus a row of the
      // the DataFrame that we are going to use contains a value for the battery
      // material (M1, M2, or M3), a temperature (15, 70, or 175), and a lifetime
      // (in hours). The method CreateBatteryData() builds the DataFrame.
      DataFrame batteryData = CreateBatteryData();

      // Now create a TwoWayAnova object from the battery data. The three
      // integer arguments indicate the following column indicies, respectively:
      // index of the column containing factor A, index of the column containing
      /// factor B, index of the column containing the measured values.
      TwoWayAnova anova = new TwoWayAnova(batteryData, 0, 1, 2);

      // Class TwoWayAnova provides access to the data in a particular
      // cell. A cell is defined by the values of the two factors. For 
      // example, the following code prints out all the battery lifetimes
      // for batteries made from material M2 at temperature 70.
      DFNumericColumn mTwoSeventyValues = anova.GetCellData(materialColumnName_, "M2",
                                                             tempColumnName_, "70");

      Console.WriteLine();
      Console.WriteLine("Lifetimes of M2 batteries at 70 degrees: {0}", mTwoSeventyValues);

      // You can also get the means for each cell:
      double mean = anova.GetMeanForCell(materialColumnName_, "M2", tempColumnName_, "70");
      Console.WriteLine("Mean lifetime of M2 batteries at 70 degrees: {0}", mean);

      // Means for a given factor level can also be accessed. For example, the 
      // following code gets the mean lifetime for all batteries made from material
      // M1
      mean = anova.GetMeanForFactorLevel(materialColumnName_, "M1");
      Console.WriteLine("Mean lifetime for M1 batteries = {0}", mean);

      // You can also get the grand mean.
      Console.WriteLine("Overall mean lifetime is {0}", anova.GrandMean);

      // The TwoWayAnovaTable class is used to access all the traditional 
      // two way ANOVA data.
      TwoWayAnovaTable anovaTable = anova.AnovaTable;
      Console.WriteLine(Environment.NewLine + "Source: " + materialColumnName_);
      Console.WriteLine("  Degrees of Freedom: {0}", anovaTable.DegreesOfFreedom(materialColumnName_));
      Console.WriteLine("  Sum of Squares    : {0}", anovaTable.SumOfSquares(materialColumnName_));
      Console.WriteLine("  Mean Square       : {0}", anovaTable.MeanSquare(materialColumnName_));
      Console.WriteLine("  F                 : {0}", anovaTable.Fstatistic(materialColumnName_));
      Console.WriteLine("  P                 : {0}", anovaTable.FstatisticPvalue(materialColumnName_));

      Console.WriteLine(Environment.NewLine + "Source: " + tempColumnName_);
      Console.WriteLine("  Degrees of Freedom: {0}", anovaTable.DegreesOfFreedom(tempColumnName_));
      Console.WriteLine("  Sum of Squares    : {0}", anovaTable.SumOfSquares(tempColumnName_));
      Console.WriteLine("  Mean Square       : {0}", anovaTable.MeanSquare(tempColumnName_));
      Console.WriteLine("  F                 : {0}", anovaTable.Fstatistic(tempColumnName_));
      Console.WriteLine("  P                 : {0}", anovaTable.FstatisticPvalue(tempColumnName_));

      Console.WriteLine(Environment.NewLine + "Source: Interaction");
      Console.WriteLine("  Degrees of Freedom: {0}", anovaTable.InteractionDegreesOfFreedom);
      Console.WriteLine("  Sum of Squares    : {0}", anovaTable.InteractionSumOfSquares);
      Console.WriteLine("  Mean Square       : {0}", anovaTable.InteractionMeanSquare);
      Console.WriteLine("  F                 : {0}", anovaTable.InteractionFstatistic);
      Console.WriteLine("  P                 : {0}", anovaTable.InteractionFstatisticPvalue);

      Console.WriteLine(Environment.NewLine + "Source: Error");
      Console.WriteLine("  Degrees of Freedom: {0}", anovaTable.ErrorDegreesOfFreedom);
      Console.WriteLine("  Sum of Squares    : {0}", anovaTable.ErrorSumOfSquares);
      Console.WriteLine("  Mean Square       : {0}", anovaTable.ErrorMeanSquare);

      Console.WriteLine(Environment.NewLine + "Total");
      Console.WriteLine("  Degrees of Freedom: {0}", anovaTable.TotalDegreesOfFreedom);
      Console.WriteLine("  Sum of Squares    : {0}", anovaTable.TotalSumOfSquares);

      // Class TwoWayAnova computes a two way ANOVA using a multiple linear
      // regression. The following function prints out details of the regression.
      // See the function WriteRegressionInfo() for more information.
      Console.WriteLine(Environment.NewLine + "-------- Regression Information -----------");

      WriteRegressionInfo(anova);

      Console.WriteLine();
      Console.WriteLine("Press Enter Key");
      Console.Read();
    }

    /// <summary>
    /// Two way ANOVAs are computed using a multiple linear regression. The
    /// details of this regression are available from the TwoWayAnova class.
    /// This functions shows how to access these details and sends the 
    /// information to the Console.
    /// </summary>
    /// <param name="anova">ANOVA to process.</param>
    private static void WriteRegressionInfo(TwoWayAnova anova)
    {
      // A multiple linear regression is used to solve a two way ANOVA
      // problem by creating dummy variables using an encoding. The
      // TwoWayAnova class uses "effects" encoding to accomplish this.
      // In effects encoding we define k - 1 dummy variables to encode
      // k levels of the factor in interest. A dummy variable di for the
      // the ith level is then defined as
      // 
      // di =  1 if ith level,
      // di = -1 if kth level,
      // di =  0 otherwise
      //
      // If factor A has k levels, and factor B has m levels, there will also
      // be (k - 1) * (m - 1) interaction variables in the regression. It
      // follows that there will not be a regression parameter for every factor
      // level, or interaction. In the code below you will notice that we check
      // for a null return value when attempting to retrieve a regression 
      // parameter object for a particular factor level or interaction of
      // factor levels. This is the reason.


      // First print out information for the intercept parameter. The class
      // AnovaRegressionParameter is derived from LinearRegressionParameter.
      // It merely adds the SumOfSquares property.
      AnovaRegressionParameter interceptParam = anova.RegressionInterceptParameter;
      Console.WriteLine(Environment.NewLine + "Parameter: Intercept");
      Console.WriteLine("  Estimate              : {0}", interceptParam.Value);
      Console.WriteLine("  Standard Error        : {0}", interceptParam.StandardError);
      Console.WriteLine("  T for H0 Parameter = 0: {0}", interceptParam.TStatistic(0));
      Console.WriteLine("  Prob > |T|            : {0}", interceptParam.TStatisticPValue(0));
      Console.WriteLine("  Type I SS             : {0}", interceptParam.SumOfSquares);
      Console.WriteLine("  Variable Label: INTERCEPT" + Environment.NewLine);

      // Next print out information for the Material factor parameters. Since there
      // is one less paramters than there are levels, we'll have to check for null
      // return values from GetRegressionFactorParameter().
      // AnovaRegressionFactorParam is derived from AnovaRegressionParameter, and
      // hence from LinearRegressionParameter, adding the Encoding property. 
      AnovaRegressionFactorParam factorParam;
      string[] materialLevels = { "M1", "M2", "M3" };
      for (int i = 0; i < materialLevels.Length; ++i)
      {
        factorParam = anova.GetRegressionFactorParameter(materialColumnName_, materialLevels[i]);
        if (factorParam == null)
        {
          continue;
        }

        Console.WriteLine(string.Format("Parameter: {0}", materialLevels[i]));
        Console.WriteLine("  Estimate              : {0}", factorParam.Value);
        Console.WriteLine("  Standard Error        : {0}", factorParam.StandardError);
        Console.WriteLine("  T for H0 Parameter = 0: {0}", factorParam.TStatistic(0));
        Console.WriteLine("  Prob > |T|            : {0}", factorParam.TStatisticPValue(0));
        Console.WriteLine("  Type I SS             : {0}", factorParam.SumOfSquares);
        string encoding = string.Format("  Variable Label: dummy variable = {0} if {1} = {2}",
                                         factorParam.Encoding, materialColumnName_, materialLevels[i]);
        Console.WriteLine(encoding + Environment.NewLine);
      }

      // Now, do the same for the temperature factor parameter.
      string[] tempLevels = { "15", "70", "125" };
      for (int i = 0; i < materialLevels.Length; ++i)
      {
        factorParam = anova.GetRegressionFactorParameter(tempColumnName_, tempLevels[i]);
        if (factorParam == null)
        {
          continue;
        }

        Console.WriteLine(string.Format("Parameter: {0}", tempLevels[i]));
        Console.WriteLine("  Estimate              : {0}", factorParam.Value);
        Console.WriteLine("  Standard Error        : {0}", factorParam.StandardError);
        Console.WriteLine("  T for H0 Parameter = 0: {0}", factorParam.TStatistic(0));
        Console.WriteLine("  Prob > |T|            : {0}", factorParam.TStatisticPValue(0));
        Console.WriteLine("  Type I SS             : {0}", factorParam.SumOfSquares);
        string encoding = string.Format("  Variable Label: dummy variable = {0} if {1} = {2}",
          factorParam.Encoding, tempColumnName_, tempLevels[i]);
        Console.WriteLine(encoding + Environment.NewLine);

      }

      // Finally, print out information for the interaction parameters. 
      // AnovaRegressionInteractionParam is derived from AnovaRegressionParameter, and
      // hence from LinearRegressionParameter. It does not contain an Encoding 
      // property. The value of the interaction variables is just the product of
      // the values of their corresponding factor variables.
      AnovaRegressionInteractionParam interactionParam;
      for (int i = 0; i < materialLevels.Length; ++i)
      {
        for (int j = 0; j < tempLevels.Length; ++j)
        {
          interactionParam = anova.GetRegressionInteractionParameter(materialColumnName_, materialLevels[i],
            tempColumnName_, tempLevels[j]);
          if (interactionParam == null)
          {
            continue;
          }

          Console.WriteLine(string.Format("Parameter: {0}, {1}", materialLevels[i], tempLevels[j]));
          Console.WriteLine("  Estimate              : {0}", interactionParam.Value);
          Console.WriteLine("  Standard Error        : {0}", interactionParam.StandardError);
          Console.WriteLine("  T for H0 Parameter = 0: {0}", interactionParam.TStatistic(0));
          Console.WriteLine("  Prob > |T|            : {0}", interactionParam.TStatisticPValue(0));
          Console.WriteLine("  Type I SS             : {0}", interactionParam.SumOfSquares);
          string encoding = string.Format("  Variable Label: interaction of {0} and {1}",
            materialLevels[i], tempLevels[j]);
          Console.WriteLine(encoding + Environment.NewLine);
        }
      }
    }

    private static DataFrame CreateBatteryData()
    {
      DataFrame data = new DataFrame();
      data.AddColumn(new DFStringColumn(materialColumnName_));
      data.AddColumn(new DFStringColumn(tempColumnName_));
      DFIntColumn batteryLifetimeColumn = new DFIntColumn(lifetimeColumnName_);
      data.AddColumn(batteryLifetimeColumn);

      int rowNumber = 0;
      data.AddRow(++rowNumber, "M1", "15", 130);
      data.AddRow(++rowNumber, "M1", "15", 155);
      data.AddRow(++rowNumber, "M1", "15", 74);
      data.AddRow(++rowNumber, "M1", "15", 180);
      data.AddRow(++rowNumber, "M1", "70", 34);
      data.AddRow(++rowNumber, "M1", "70", 40);
      data.AddRow(++rowNumber, "M1", "70", 80);
      data.AddRow(++rowNumber, "M1", "70", 75);
      data.AddRow(++rowNumber, "M1", "125", 20);
      data.AddRow(++rowNumber, "M1", "125", 70);
      data.AddRow(++rowNumber, "M1", "125", 82);
      data.AddRow(++rowNumber, "M1", "125", 58);
      data.AddRow(++rowNumber, "M2", "15", 150);
      data.AddRow(++rowNumber, "M2", "15", 188);
      data.AddRow(++rowNumber, "M2", "15", 159);
      data.AddRow(++rowNumber, "M2", "15", 126);
      data.AddRow(++rowNumber, "M2", "70", 136);
      data.AddRow(++rowNumber, "M2", "70", 122);
      data.AddRow(++rowNumber, "M2", "70", 106);
      data.AddRow(++rowNumber, "M2", "70", 115);
      data.AddRow(++rowNumber, "M2", "125", 25);
      data.AddRow(++rowNumber, "M2", "125", 70);
      data.AddRow(++rowNumber, "M2", "125", 58);
      data.AddRow(++rowNumber, "M2", "125", 45);
      data.AddRow(++rowNumber, "M3", "15", 138);
      data.AddRow(++rowNumber, "M3", "15", 110);
      data.AddRow(++rowNumber, "M3", "15", 168);
      data.AddRow(++rowNumber, "M3", "15", 160);
      data.AddRow(++rowNumber, "M3", "70", 174);
      data.AddRow(++rowNumber, "M3", "70", 120);
      data.AddRow(++rowNumber, "M3", "70", 150);
      data.AddRow(++rowNumber, "M3", "70", 139);
      data.AddRow(++rowNumber, "M3", "125", 96);
      data.AddRow(++rowNumber, "M3", "125", 104);
      data.AddRow(++rowNumber, "M3", "125", 82);
      data.AddRow(++rowNumber, "M3", "125", 60);

      return data;
    }

  }  // class

}  // namespace


[TOC]