C# Subset Example

← All NMath Core Code Examples

 

using System;

using CenterSpace.NMath.Core;
using System.IO;

namespace SubsetExample
{
  /// <summary>
  /// A .NET example in C# showing how to access arbitrary subsets of a data frame.
  /// </summary>
  /// <remarks>
  /// In addition to accessors for individual elements, columns, or rows in
  /// a data frame, class DataFrame provides a large number of indexers and
  /// member functions for accessing sub-frames containing any arbitrary subset
  /// of rows, columns, or both. Such indexers and methods accept the NMath Core
  /// types Slice and Range to indicate sets of row or column indices with constant
  /// spacing. In addition, NMath Stats introduces a new class called Subset.
  /// Like a Slice or Range, a Subset represents a collection of indices that can be
  /// used to view a subset of data from another data structure. Unlike a Slice or
  /// Range, however, a Subset need not be continuous, or even ordered. It is
  /// simply an arbitrary collection of indices.
  /// </remarks>
  public class SubsetExample
  {

    static void Main( string[] args )
    {
      // Read in data from the file. The data comes from The Data and Story 
      // Library (http://lib.stat.cmu.edu/DASL) and is described below:
      //
      // These data measure protein consumption in twenty-five European
      // countries for nine food groups. 
      DataFrame df = DataFrame.Load( "SubsetExample.dat" );

      Console.WriteLine();
      Console.WriteLine( "COMPLETE DATA SET\n" + df + "\n" );

      // Subset instances can be constructed in a variety of ways. One constructor
      // simply accepts an array of integers. The integers do not need to be ordered.
      var sub = new Subset( new int[] { 5, 4, 0, 3 } );

      // Let's use this subset to get a sub-frame of the data. This code gets rows
      // 5, 4, 0, and 3 from the original data frame, in that order, and all columns.
      DataFrame df2 = df.GetRows( sub );
      Console.WriteLine( "ARBITRARY SUBSET OF ROWS\n" + df2 + "\n" );

      // This code gets uses the same subset to get columns 5, 4, 0, and 3 from the
      // original data frame, in that order, and all rows.
      df2 = df.GetColumns( sub );
      Console.WriteLine( "ARBITRARY SUBSET OF COLUMNS\n" + df2 + "\n" );

      // Indexers enable you to subset both rows and columns simultaneously.
      df2 = df[sub, sub];
      Console.WriteLine( "ARBITRARY SUBSET OF ROWS AND COLUMNS\n" + df2 + "\n" );

      // A very useful constructor takes an array of boolean values and constructs a
      // Subset containing the indices of all true elements in the array. Let's create
      // a subset of row indices containing those rows where protein from Milk exceeds
      // protein from Fish.
      var bArray = new bool[df.Rows];
      for ( int i = 0; i < df.Rows; i++ )
      {
        bArray[i] = ( (double) df["Milk"][i] > (double) df["Fish"][i] );
      }
      var milkGTfish = new Subset( bArray );
      df2 = df.GetRows( milkGTfish );
      Console.WriteLine( "ROWS WHERE MILK > FISH\n" + df2 + "\n" );

      // The StatsFunctions.If() method applies a given logical function delegate to 
      // a data set and returns an array of boolean values. Let's create a subset for
      // countries where protein consumption from Nuts exceeds 3.0. See below for
      // the definition of logical function GT3().
      bArray = StatsFunctions.If( df["Nuts"],
        new Func<double, bool>( GT3 ) );
      var nutsGT3 = new Subset( bArray );
      df2 = df.GetRows( nutsGT3 );
      Console.WriteLine( "ROWS WHERE NUTS > 3.0\n" + df2 + "\n" );

      // The Subset class provides a variety of operators for combining subsets, including
      // operator& for intersections and operator| for unions. 
      df2 = df.GetRows( milkGTfish & nutsGT3 );
      Console.WriteLine( "ROWS WHERE (MILK > FISH) AND (NUTS > 3.0)\n" + df2 + "\n" );
      df2 = df.GetRows( milkGTfish | nutsGT3 );
      Console.WriteLine( "ROWS WHERE (MILK > FISH) OR (NUTS > 3.0)\n" + df2 + "\n" );

      Console.WriteLine();
      Console.WriteLine( "Press Enter Key" );
      Console.Read();

    }  // Main

    private static bool GT3( double x )
    {
      return ( x > 3.0 );
    }

  }  // class

}  // namespace


← All NMath Stats Code Examples
Top