C# Subset Example

[TOC]

using System;

using CenterSpace.NMath.Core;
using CenterSpace.NMath.Stats;

namespace SubsetExample
{
  /// <summary>
  /// A .NET example in C# showing how to access arbitrary subsets of a data frame.
  /// </summary>
  /// <remarks>
  /// In addition to accessors for individual elements, columns, or rows in
  /// a data frame, class DataFrame provides a large number of indexers and
  /// member functions for accessing sub-frames containing any arbitrary subset
  /// of rows, columns, or both. Such indexers and methods accept the NMath Core
  /// types Slice and Range to indicate sets of row or column indices with constant
  /// spacing. In addition, NMath Stats introduces a new class called Subset.
  /// Like a Slice or Range, a Subset represents a collection of indices that can be
  /// used to view a subset of data from another data structure. Unlike a Slice or
  /// Range, however, a Subset need not be continuous, or even ordered. It is
  /// simply an arbitrary collection of indices.
  /// </remarks>
  public class SubsetExample
  {

    static void Main(string[] args)
    {

      // Read in data from the file. The data comes from The Data and Story 
      // Library (http://lib.stat.cmu.edu/DASL) and is described below:
      //
      // These data measure protein consumption in twenty-five European
      // countries for nine food groups. 
      DataFrame df = DataFrame.Load("..\\..\\SubsetExample.dat");
      Console.WriteLine();
      Console.WriteLine("COMPLETE DATA SET\n" + df + "\n");

      // Subset instances can be constructed in a variety of ways. One constructor
      // simply accepts an array of integers. The integers do not need to be ordered.
      Subset sub = new Subset(new int[] { 5, 4, 0, 3 });

      // Let's use this subset to get a sub-frame of the data. This code gets rows
      // 5, 4, 0, and 3 from the original data frame, in that order, and all columns.
      DataFrame df2 = df.GetRows(sub);
      Console.WriteLine("ARBITRARY SUBSET OF ROWS\n" + df2 + "\n");

      // This code gets uses the same subset to get columns 5, 4, 0, and 3 from the
      // original data frame, in that order, and all rows.
      df2 = df.GetColumns(sub);
      Console.WriteLine("ARBITRARY SUBSET OF COLUMNS\n" + df2 + "\n");

      // Indexers enable you to subset both rows and columns simultaneously.
      df2 = df[sub, sub];
      Console.WriteLine("ARBITRARY SUBSET OF ROWS AND COLUMNS\n" + df2 + "\n");

      // A very useful constructor takes an array of boolean values and constructs a
      // Subset containing the indices of all true elements in the array. Let's create
      // a subset of row indices containing those rows where protein from Milk exceeds
      // protein from Fish.
      bool[] bArray = new bool[df.Rows];
      for (int i = 0; i < df.Rows; i++)
      {
        bArray[i] = ((double)df["Milk"][i] > (double)df["Fish"][i]);
      }
      Subset milkGTfish = new Subset(bArray);
      df2 = df.GetRows(milkGTfish);
      Console.WriteLine("ROWS WHERE MILK > FISH\n" + df2 + "\n");

      // The StatsFunctions.If() method applies a given logical function delegate to 
      // a data set and returns an array of boolean values. Let's create a subset for
      // countries where protein consumption from Nuts exceeds 3.0. See below for
      // the definition of logical function GT3().
      bArray = StatsFunctions.If(df["Nuts"],
        new StatsFunctions.LogicalDoubleFunction(GT3));
      Subset nutsGT3 = new Subset(bArray);
      df2 = df.GetRows(nutsGT3);
      Console.WriteLine("ROWS WHERE NUTS > 3.0\n" + df2 + "\n");

      // The Subset class provides a variety of operators for combining subsets, including
      // operator& for intersections and operator| for unions. 
      df2 = df.GetRows(milkGTfish & nutsGT3);
      Console.WriteLine("ROWS WHERE (MILK > FISH) AND (NUTS > 3.0)\n" + df2 + "\n");
      df2 = df.GetRows(milkGTfish | nutsGT3);
      Console.WriteLine("ROWS WHERE (MILK > FISH) OR (NUTS > 3.0)\n" + df2 + "\n");

      Console.WriteLine();
      Console.WriteLine("Press Enter Key");
      Console.Read();

    }  // Main

    private static bool GT3(double x)
    {
      return (x > 3.0);
    }

  }  // class

}  // namespace


[TOC]