C# Data Frame Example

[TOC]

using System;
using System.Collections;

using CenterSpace.NMath.Core;
using CenterSpace.NMath.Stats;

namespace CenterSpace.NMath.Stats.Examples.CSharp
{
  /// <summary>
  /// A .NET example in C# showing how to manipulate data using the DataFrame class.
  /// </summary>
  /// <remarks>
  /// The statistical functions in NMath Stats support the NMath Core types
  /// DoubleVector and DoubleMatrix, as well as simple arrays of doubles. In many
  /// cases, these types are sufficient for storing and manipulating your
  /// statistical data. However, they suffer from two limitations: they can only
  /// store numeric data, and they have limited support for adding, inserting, removing,
  /// and reordering data. Therefore, NMath Stats provides the DataFrame class which
  /// represents a two-dimensional data object consisting of a list of columns of the
  /// same length. Columns are themselves lists of different types of data: numeric,
  /// string, boolean, generic, and so on.                                                        
  /// </remarks> 
  public class DataFrameExample
  {

    static void Main(string[] args)
    {
      // Create an empty data frame.
      DataFrame df = new DataFrame();

      // Add some columns. These data describe the relationship between
      // the size of acorns and various oak tree species. Columns in a data frame
      // can be accessed by numeric index (0...n-1) or by a name supplied at
      // construction time.
      df.AddColumn(new DFStringColumn("Region"));
      df.AddColumn(new DFNumericColumn("AcornSize"));
      df.AddColumn(new DFNumericColumn("TreeHeight"));
      df.AddColumn(new DFBoolColumn("Threatened"));

      // Add some rows of data. Rows can be accessed by numeric index (0...n-1)
      // or by a key object. The first parameter to the AddRow() method, in this
      // case the name of the oak tree species, is the row key.
      df.AddRow("Quercus alba L.", "Atlantic", 1.4, 27, false);
      df.AddRow("Quercus bicolor Willd.", "Atlantic", 3.4, 21, false);
      df.AddRow("Quercus macrocarpa Michx.", "Atlantic", 9.1, 25, false);
      df.AddRow("Quercus Chapmanii Sarg.", "Atlantic", 0.9, 15, false);
      df.AddRow("Quercus Durandii Buckl.", "Atlantic", 0.8, 23, true);
      df.AddRow("Quercus laurifolia Michx.", "Atlantic", 1.1, 27, false);
      df.AddRow("Quercus marilandica Muenchh.", "Atlantic", 3.6, 9, false);
      df.AddRow("Quercus nigra L.", "Atlantic", 1.1, 24, true);
      df.AddRow("Quercus palustris Muenchh.", "Atlantic", 1.1, 23, false);
      df.AddRow("Quercus texana Buckl.", "Atlantic", 1.1, 9, false);
      df.AddRow("Quercus coccinea Muenchh.", "Atlantic", 1.2, 4, false);
      df.AddRow("Quercus Douglasii Hook. & Arn", "California", 4.1, 18, false);
      df.AddRow("Quercus dumosa Nutt.", "California", 1.6, 6, false);
      df.AddRow("Quercus Engelmannii Greene", "California", 2.0, 17, false);
      df.AddRow("Quercus Garryana Hook.", "California", 5.5, 20, true);
      df.AddRow("Quercus chrysolepis Liebm.", "California", 17.1, 15, false);
      df.AddRow("Quercus vaccinifolia Engelm.", "California", 0.4, 1, false);
      df.AddRow("Quercus tomentella Engelm", "California", 7.1, 18, true);

      // Display the total data frame.
      Console.WriteLine();
      Console.WriteLine(df);
      Console.WriteLine();

      // Reorder some columns. Let's move the AcornSize column to the end.
      df.PermuteColumns(0, 3, 1, 2);
      Console.WriteLine(df);
      Console.WriteLine();

      // If you don't know the index of a column you can query for it by name.
      int acornSizeCol = df.IndexOfColumn("AcornSize");
      int treeHeightCol = df.IndexOfColumn("TreeHeight");

      // Sort the rows. Let's sort the rows by AcornSize in asending order, and secondarily
      // by TreeHeight in descending order. 
      int[] colIndices = { acornSizeCol, treeHeightCol };
      SortingType[] sortingTypes = { SortingType.Ascending,  
                                     SortingType.Descending };
      df.SortRows(colIndices, sortingTypes);
      Console.WriteLine(df);
      Console.WriteLine();

      // Remove some columns and rows.
      df.RemoveColumn("Threatened");
      df.RemoveRow("Quercus nigra L.");
      df.RemoveRow(2);
      Console.WriteLine(df);
      Console.WriteLine();

      // Update a value by row and column index.
      int rowIndex = df.IndexOfKey("Quercus chrysolepis Liebm.");
      int colIndex = df.IndexOfColumn("AcornSize");
      df[rowIndex, colIndex] = 17.2;

      // Get a row dictionary for one species of oak tree. The keys are the column names,
      // and the values are the row data.
      IDictionary dict = df.GetRowDictionary("Quercus palustris Muenchh.");
      Console.WriteLine("Quercus palustris Muenchh.");
      foreach (string key in dict.Keys)
      {
        Console.WriteLine(key + ": " + dict[key]);
      }
      Console.WriteLine();

      // Get a column dictionary for the TreeHeight column. The keys are the row keys, and
      // values are the column data.
      dict = df.GetColumnDictionary("TreeHeight");
      Console.WriteLine("TreeHeight");
      foreach (string key in dict.Keys)
      {
        Console.WriteLine(key + ": " + dict[key]);
      }
      Console.WriteLine();

      // Compute some descriptive statistics
      Console.WriteLine("Acorn Size:");
      Console.WriteLine("Mean = " + StatsFunctions.Mean(df["AcornSize"]));
      Console.WriteLine("Var = " + StatsFunctions.Variance(df["AcornSize"]));
      Console.WriteLine();

      // Export data to a DoubleMatrix. Non-numeric columns are ignored.
      DoubleMatrix A = df.ToDoubleMatrix();
      Console.WriteLine(A);
      Console.WriteLine();

      // Get a DoubleVector for the values in the AcornSize column.
      DoubleVector v = df["AcornSize"].ToDoubleVector();
      Console.WriteLine(v);
      Console.WriteLine();

      Console.WriteLine();
      Console.WriteLine("Press Enter Key");
      Console.Read();

    }  // Main

  }  // class

}  // namespace


[TOC]