← All NMath Code Examples
using System;
using System.IO;
using CenterSpace.NMath.Core;
namespace CenterSpace.NMath.Examples.CSharp
{
/// <summary>
/// A .NET example in C# showing how to access arbitrary subsets of a data frame.
/// </summary>
/// <remarks>
/// In addition to accessors for individual elements, columns, or rows in
/// a data frame, class DataFrame provides a large number of indexers and
/// member functions for accessing sub-frames containing any arbitrary subset
/// of rows, columns, or both. Such indexers and methods accept the NMath Core
/// types Slice and Range to indicate sets of row or column indices with constant
/// spacing. In addition, NMath Stats introduces a new class called Subset.
/// Like a Slice or Range, a Subset represents a collection of indices that can be
/// used to view a subset of data from another data structure. Unlike a Slice or
/// Range, however, a Subset need not be continuous, or even ordered. It is
/// simply an arbitrary collection of indices.
/// </remarks>
public class SubsetExample
{
static void Main( string[] args )
{
// Read in data from the file. The data comes from The Data and Story
// Library (http://lib.stat.cmu.edu/DASL) and is described below:
//
// These data measure protein consumption in twenty-five European
// countries for nine food groups.
DataFrame df = DataFrame.Load( "SubsetExample.dat" );
Console.WriteLine();
Console.WriteLine( "COMPLETE DATA SET\n" + df + "\n" );
// Subset instances can be constructed in a variety of ways. One constructor
// simply accepts an array of integers. The integers do not need to be ordered.
var sub = new Subset( new int[] { 5, 4, 0, 3 } );
// Lets use this subset to get a sub-frame of the data. This code gets rows
// 5, 4, 0, and 3 from the original data frame, in that order, and all columns.
DataFrame df2 = df.GetRows( sub );
Console.WriteLine( "ARBITRARY SUBSET OF ROWS\n" + df2 + "\n" );
// This code gets uses the same subset to get columns 5, 4, 0, and 3 from the
// original data frame, in that order, and all rows.
df2 = df.GetColumns( sub );
Console.WriteLine( "ARBITRARY SUBSET OF COLUMNS\n" + df2 + "\n" );
// Indexers enable you to subset both rows and columns simultaneously.
df2 = df[sub, sub];
Console.WriteLine( "ARBITRARY SUBSET OF ROWS AND COLUMNS\n" + df2 + "\n" );
// A very useful constructor takes an array of boolean values and constructs a
// Subset containing the indices of all true elements in the array. Lets create
// a subset of row indices containing those rows where protein from Milk exceeds
// protein from Fish.
var bArray = new bool[df.Rows];
for ( int i = 0; i < df.Rows; i++ )
{
bArray[i] = ( (double) df["Milk"][i] > (double) df["Fish"][i] );
}
var milkGTfish = new Subset( bArray );
df2 = df.GetRows( milkGTfish );
Console.WriteLine( "ROWS WHERE MILK > FISH\n" + df2 + "\n" );
// The StatsFunctions.If() method applies a given logical function delegate to
// a data set and returns an array of boolean values. Lets create a subset for
// countries where protein consumption from Nuts exceeds 3.0. See below for
// the definition of logical function GT3().
bArray = StatsFunctions.If( df["Nuts"],
new Func<double, bool>( GT3 ) );
var nutsGT3 = new Subset( bArray );
df2 = df.GetRows( nutsGT3 );
Console.WriteLine( "ROWS WHERE NUTS > 3.0\n" + df2 + "\n" );
// The Subset class provides a variety of operators for combining subsets, including
// operator& for intersections and operator| for unions.
df2 = df.GetRows( milkGTfish & nutsGT3 );
Console.WriteLine( "ROWS WHERE (MILK > FISH) AND (NUTS > 3.0)\n" + df2 + "\n" );
df2 = df.GetRows( milkGTfish | nutsGT3 );
Console.WriteLine( "ROWS WHERE (MILK > FISH) OR (NUTS > 3.0)\n" + df2 + "\n" );
Console.WriteLine();
Console.WriteLine( "Press Enter Key" );
Console.Read();
} // Main
private static bool GT3( double x )
{
return ( x > 3.0 );
}
} // class
} // namespace
← All NMath Code Examples