[TOC]
using System;
using CenterSpace.NMath.Core;
using CenterSpace.NMath.Stats;
namespace SubsetExample
{
/// <summary>
/// A .NET example in C# showing how to access arbitrary subsets of a data frame.
/// </summary>
/// <remarks>
/// In addition to accessors for individual elements, columns, or rows in
/// a data frame, class DataFrame provides a large number of indexers and
/// member functions for accessing sub-frames containing any arbitrary subset
/// of rows, columns, or both. Such indexers and methods accept the NMath Core
/// types Slice and Range to indicate sets of row or column indices with constant
/// spacing. In addition, NMath Stats introduces a new class called Subset.
/// Like a Slice or Range, a Subset represents a collection of indices that can be
/// used to view a subset of data from another data structure. Unlike a Slice or
/// Range, however, a Subset need not be continuous, or even ordered. It is
/// simply an arbitrary collection of indices.
/// </remarks>
public class SubsetExample
{
static void Main(string[] args)
{
// Read in data from the file. The data comes from The Data and Story
// Library (http://lib.stat.cmu.edu/DASL) and is described below:
//
// These data measure protein consumption in twenty-five European
// countries for nine food groups.
DataFrame df = DataFrame.Load("..\\..\\SubsetExample.dat");
Console.WriteLine();
Console.WriteLine("COMPLETE DATA SET\n" + df + "\n");
// Subset instances can be constructed in a variety of ways. One constructor
// simply accepts an array of integers. The integers do not need to be ordered.
Subset sub = new Subset(new int[] { 5, 4, 0, 3 });
// Let's use this subset to get a sub-frame of the data. This code gets rows
// 5, 4, 0, and 3 from the original data frame, in that order, and all columns.
DataFrame df2 = df.GetRows(sub);
Console.WriteLine("ARBITRARY SUBSET OF ROWS\n" + df2 + "\n");
// This code gets uses the same subset to get columns 5, 4, 0, and 3 from the
// original data frame, in that order, and all rows.
df2 = df.GetColumns(sub);
Console.WriteLine("ARBITRARY SUBSET OF COLUMNS\n" + df2 + "\n");
// Indexers enable you to subset both rows and columns simultaneously.
df2 = df[sub, sub];
Console.WriteLine("ARBITRARY SUBSET OF ROWS AND COLUMNS\n" + df2 + "\n");
// A very useful constructor takes an array of boolean values and constructs a
// Subset containing the indices of all true elements in the array. Let's create
// a subset of row indices containing those rows where protein from Milk exceeds
// protein from Fish.
bool[] bArray = new bool[df.Rows];
for (int i = 0; i < df.Rows; i++)
{
bArray[i] = ((double)df["Milk"][i] > (double)df["Fish"][i]);
}
Subset milkGTfish = new Subset(bArray);
df2 = df.GetRows(milkGTfish);
Console.WriteLine("ROWS WHERE MILK > FISH\n" + df2 + "\n");
// The StatsFunctions.If() method applies a given logical function delegate to
// a data set and returns an array of boolean values. Let's create a subset for
// countries where protein consumption from Nuts exceeds 3.0. See below for
// the definition of logical function GT3().
bArray = StatsFunctions.If(df["Nuts"],
new StatsFunctions.LogicalDoubleFunction(GT3));
Subset nutsGT3 = new Subset(bArray);
df2 = df.GetRows(nutsGT3);
Console.WriteLine("ROWS WHERE NUTS > 3.0\n" + df2 + "\n");
// The Subset class provides a variety of operators for combining subsets, including
// operator& for intersections and operator| for unions.
df2 = df.GetRows(milkGTfish & nutsGT3);
Console.WriteLine("ROWS WHERE (MILK > FISH) AND (NUTS > 3.0)\n" + df2 + "\n");
df2 = df.GetRows(milkGTfish | nutsGT3);
Console.WriteLine("ROWS WHERE (MILK > FISH) OR (NUTS > 3.0)\n" + df2 + "\n");
Console.WriteLine();
Console.WriteLine("Press Enter Key");
Console.Read();
} // Main
private static bool GT3(double x)
{
return (x > 3.0);
}
} // class
} // namespace
[TOC]