C# Data Frame Example

← All NMath Core Code Examples

 

using System;
using System.Collections;

using CenterSpace.NMath.Core;

namespace CenterSpace.NMath.Core.Examples.CSharp
{
  /// <summary>
  /// A .NET example in C# showing how to manipulate data using the DataFrame class.
  /// </summary>
  /// <remarks>
  /// The statistical functions in NMath support the NMath Core types
  /// DoubleVector and DoubleMatrix, as well as simple arrays of doubles. In many
  /// cases, these types are sufficient for storing and manipulating your
  /// statistical data. However, they suffer from two limitations: they can only
  /// store numeric data, and they have limited support for adding, inserting, removing,
  /// and reordering data. Therefore, NMath Stats provides the DataFrame class which
  /// represents a two-dimensional data object consisting of a list of columns of the
  /// same length. Columns are themselves lists of different types of data: numeric,
  /// string, boolean, generic, and so on.                                                        
  /// </remarks> 
  public class DataFrameExample
  {

    static void Main( string[] args )
    {
      // Create an empty data frame.
      var df = new DataFrame();

      // Add some columns. These data describe the relationship between
      // the size of acorns and various oak tree species. Columns in a data frame
      // can be accessed by numeric index (0...n-1) or by a name supplied at
      // construction time.
      df.AddColumn( new DFStringColumn( "Region" ) );
      df.AddColumn( new DFNumericColumn( "AcornSize" ) );
      df.AddColumn( new DFNumericColumn( "TreeHeight" ) );
      df.AddColumn( new DFBoolColumn( "Threatened" ) );

      // Add some rows of data. Rows can be accessed by numeric index (0...n-1)
      // or by a key object. The first parameter to the AddRow() method, in this
      // case the name of the oak tree species, is the row key.
      df.AddRow( "Quercus alba L.", "Atlantic", 1.4, 27, false );
      df.AddRow( "Quercus bicolor Willd.", "Atlantic", 3.4, 21, false );
      df.AddRow( "Quercus macrocarpa Michx.", "Atlantic", 9.1, 25, false );
      df.AddRow( "Quercus Chapmanii Sarg.", "Atlantic", 0.9, 15, false );
      df.AddRow( "Quercus Durandii Buckl.", "Atlantic", 0.8, 23, true );
      df.AddRow( "Quercus laurifolia Michx.", "Atlantic", 1.1, 27, false );
      df.AddRow( "Quercus marilandica Muenchh.", "Atlantic", 3.7, 9, false );
      df.AddRow( "Quercus nigra L.", "Atlantic", 1.1, 24, true );
      df.AddRow( "Quercus palustris Muenchh.", "Atlantic", 1.1, 23, false );
      df.AddRow( "Quercus texana Buckl.", "Atlantic", 1.1, 9, false );
      df.AddRow( "Quercus coccinea Muenchh.", "Atlantic", 1.2, 4, false );
      df.AddRow( "Quercus Douglasii Hook. & Arn", "California", 4.1, 18, false );
      df.AddRow( "Quercus dumosa Nutt.", "California", 1.6, 6, false );
      df.AddRow( "Quercus Engelmannii Greene", "California", 2.0, 17, false );
      df.AddRow( "Quercus Garryana Hook.", "California", 5.5, 20, true );
      df.AddRow( "Quercus chrysolepis Liebm.", "California", 17.1, 15, false );
      df.AddRow( "Quercus vaccinifolia Engelm.", "California", 0.4, 1, false );
      df.AddRow( "Quercus tomentella Engelm", "California", 7.1, 18, true );

      // Display the entire, original data frame.
      Console.WriteLine();
      Console.WriteLine( df );
      Console.WriteLine();

      // Reorder some columns. Let's move the AcornSize column to the end.
      df.PermuteColumns( 0, 3, 1, 2 );
      Console.WriteLine( df );
      Console.WriteLine();

      // If you don't know the index of a column you can query for it by name.
      int acornSizeCol = df.IndexOfColumn( "AcornSize" );
      int treeHeightCol = df.IndexOfColumn( "TreeHeight" );

      // Sort the rows. Let's sort the rows by AcornSize in ascending order, and secondarily
      // by TreeHeight in descending order. 
      int[] colIndices = { acornSizeCol, treeHeightCol };
      SortingType[] sortingTypes = { SortingType.Ascending,  
                                     SortingType.Descending };
      df.SortRows( colIndices, sortingTypes );
      Console.WriteLine( df );
      Console.WriteLine();

      // Remove some columns and rows.
      df.RemoveColumn( "Threatened" );
      df.RemoveRow( "Quercus nigra L." );
      df.RemoveRow( 2 );
      Console.WriteLine( df );
      Console.WriteLine();

      // Update a value by row and column index.
      int rowIndex = df.IndexOfKey( "Quercus chrysolepis Liebm." );
      int colIndex = df.IndexOfColumn( "AcornSize" );
      df[rowIndex, colIndex] = 17.2;

      // Get a row dictionary for one species of oak tree. The keys are the column names,
      // and the values are the row data.
      IDictionary dict = df.GetRowDictionary( "Quercus palustris Muenchh." );
      Console.WriteLine( "Quercus palustris Muenchh." );
      foreach ( string key in dict.Keys )
      {
        Console.WriteLine( key + ": " + dict[key] );
      }
      Console.WriteLine();

      // Get a column dictionary for the TreeHeight column. The keys are the row keys, and
      // values are the column data.
      dict = df.GetColumnDictionary( "TreeHeight" );
      Console.WriteLine( "TreeHeight" );
      foreach ( string key in dict.Keys )
      {
        Console.WriteLine( key + ": " + dict[key] );
      }
      Console.WriteLine();

      // Compute some descriptive statistics
      Console.WriteLine( "Acorn Size:" );
      Console.WriteLine( "Mean = " + StatsFunctions.Mean( df["AcornSize"] ) );
      Console.WriteLine( "Var = " + StatsFunctions.Variance( df["AcornSize"] ) );
      Console.WriteLine();

      // Export data to a DoubleMatrix. Non-numeric columns are ignored.
      DoubleMatrix A = df.ToDoubleMatrix();
      Console.WriteLine( A );
      Console.WriteLine();

      // Get a DoubleVector for the values in the AcornSize column.
      DoubleVector v = df["AcornSize"].ToDoubleVector();
      Console.WriteLine( v );
      Console.WriteLine();

      Console.WriteLine();
      Console.WriteLine( "Press Enter Key" );
      Console.Read();

    }  // Main

  }  // class

}  // namespace


← All NMath Stats Code Examples
Top