[TOC]
Imports System
Imports System.Collections
Imports CenterSpace.NMath.Core
Imports CenterSpace.NMath.Stats
Namespace CenterSpace.NMath.Stats.Examples.VisualBasic
' A .NET example in VB.NET showing how to access arbitrary subsets of a data frame.
' In addition to accessors for individual elements, columns, or rows in
' a data frame, class DataFrame provides a large number of indexers and
' member functions for accessing sub-frames containing any arbitrary subset
' of rows, columns, or both. Such indexers and methods accept the NMath Core
' types Slice and Range to indicate sets of row or column indices with constant
' spacing. In addition, NMath Stats introduces a new class called Subset.
' Like a Slice or Range, a Subset represents a collection of indices that can be
' used to view a subset of data from another data structure. Unlike a Slice or
' Range, however, a Subset need not be continuous, or even ordered. It is
' simply an arbitrary collection of indices.
Module SubsetExample
Sub Main()
' Read in data from the file. The data comes from The Data and Story
' Library (http:'lib.stat.cmu.edu/DASL) and is described below:
'
' These data measure protein consumption in twenty-five European
' countries for nine food groups.
Dim DF As DataFrame = DataFrame.Load("..\\..\\SubsetExample.dat")
Console.WriteLine()
Console.WriteLine("COMPLETE DATA SET")
Console.WriteLine(DF)
Console.WriteLine()
' Subset instances can be constructed in a variety of ways. One constructor
' simply accepts an array of integers. The integers do not need to be ordered.
Dim Subset1 As Subset = New Subset(New Integer() {5, 4, 0, 3})
' Let's use this subset to get a sub-frame of the data. This code gets rows
' 5, 4, 0, and 3 from the original data frame, in that order, and all columns.
Console.WriteLine("ARBITRARY SUBSET OF ROWS")
Console.WriteLine(DF.GetRows(Subset1))
Console.WriteLine()
' This code gets uses the same subset to get columns 5, 4, 0, and 3 from the
' original data frame, in that order, and all rows.
Console.WriteLine("ARBITRARY SUBSET OF COLUMNS")
Console.WriteLine(DF.GetColumns(Subset1))
Console.WriteLine()
' Indexers enable you to subset both rows and columns simultaneously.
Console.WriteLine("ARBITRARY SUBSET OF ROWS AND COLUMNS")
Console.WriteLine(DF(Subset1, Subset1))
Console.WriteLine()
' A very useful constructor takes an array of boolean values and constructs a
' Subset containing the indices of all true elements in the array. Let's create
' a subset of row indices containing those rows where protein from Milk exceeds
' protein from Fish.
Dim BArray(DF.Rows) As Boolean
Dim I As Integer
For I = 0 To DF.Rows - 1
BArray(I) = CType(DF("Milk")(I), Double) > CType(DF("Fish")(I), Double)
Next
Dim MilkGTFish As Subset = New Subset(BArray)
Console.WriteLine("ROWS WHERE MILK > FISH")
Console.WriteLine(DF.GetRows(MilkGTFish))
Console.WriteLine()
' The StatsFunctions.If() method applies a given logical function delegate to
' a data set and returns an array of boolean values. Let's create a subset for
' countries where protein consumption from Nuts exceeds 3.0. See below for
' the definition of logical function GT3().
BArray = StatsFunctions.If(DF("Nuts"), New StatsFunctions.LogicalDoubleFunction(AddressOf GT3))
Dim NutsGT3 As Subset = New Subset(BArray)
Console.WriteLine("ROWS WHERE NUTS > 3.0")
Console.WriteLine(DF.GetRows(NutsGT3))
Console.WriteLine()
' The Subset class provides a variety of operators for combining subsets, including
' operator& for intersections and operator| for unions.
Console.WriteLine("ROWS WHERE (MILK > FISH) AND (NUTS > 3.0)")
Console.WriteLine(DF.GetRows(Subset.Intersection(MilkGTFish, NutsGT3)))
Console.WriteLine()
Console.WriteLine("ROWS WHERE (MILK > FISH) OR (NUTS > 3.0)")
Console.WriteLine(DF.GetRows(Subset.Union(MilkGTFish, NutsGT3)))
Console.WriteLine()
Console.WriteLine()
Console.WriteLine("Press Enter Key")
Console.Read()
End Sub
Function GT3(ByVal X As Double) As Boolean
Return (X > 3.0)
End Function
End Module
End Namespace
[TOC]