[TOC]
using System;
using System.Collections;
using CenterSpace.NMath.Core;
using CenterSpace.NMath.Stats;
namespace CenterSpace.NMath.Stats.Examples.CSharp
{
///
/// A .NET example in C# showing how to manipulate data using the DataFrame class.
///
///
/// The statistical functions in NMath Stats support the NMath Core types
/// DoubleVector and DoubleMatrix, as well as simple arrays of doubles. In many
/// cases, these types are sufficient for storing and manipulating your
/// statistical data. However, they suffer from two limitations: they can only
/// store numeric data, and they have limited support for adding, inserting, removing,
/// and reordering data. Therefore, NMath Stats provides the DataFrame class which
/// represents a two-dimensional data object consisting of a list of columns of the
/// same length. Columns are themselves lists of different types of data: numeric,
/// string, boolean, generic, and so on.
///
public class DataFrameExample
{
static void Main(string[] args)
{
// Create an empty data frame.
DataFrame df = new DataFrame();
// Add some columns. These data describe the relationship between
// the size of acorns and various oak tree species. Columns in a data frame
// can be accessed by numeric index (0...n-1) or by a name supplied at
// construction time.
df.AddColumn(new DFStringColumn("Region"));
df.AddColumn(new DFNumericColumn("AcornSize"));
df.AddColumn(new DFNumericColumn("TreeHeight"));
df.AddColumn(new DFBoolColumn("Threatened"));
// Add some rows of data. Rows can be accessed by numeric index (0...n-1)
// or by a key object. The first parameter to the AddRow() method, in this
// case the name of the oak tree species, is the row key.
df.AddRow("Quercus alba L.", "Atlantic", 1.4, 27, false);
df.AddRow("Quercus bicolor Willd.", "Atlantic", 3.4, 21, false);
df.AddRow("Quercus macrocarpa Michx.", "Atlantic", 9.1, 25, false);
df.AddRow("Quercus Chapmanii Sarg.", "Atlantic", 0.9, 15, false);
df.AddRow("Quercus Durandii Buckl.", "Atlantic", 0.8, 23, true);
df.AddRow("Quercus laurifolia Michx.", "Atlantic", 1.1, 27, false);
df.AddRow("Quercus marilandica Muenchh.", "Atlantic", 3.6, 9, false);
df.AddRow("Quercus nigra L.", "Atlantic", 1.1, 24, true);
df.AddRow("Quercus palustris Muenchh.", "Atlantic", 1.1, 23, false);
df.AddRow("Quercus texana Buckl.", "Atlantic", 1.1, 9, false);
df.AddRow("Quercus coccinea Muenchh.", "Atlantic", 1.2, 4, false);
df.AddRow("Quercus Douglasii Hook. & Arn", "California", 4.1, 18, false);
df.AddRow("Quercus dumosa Nutt.", "California", 1.6, 6, false);
df.AddRow("Quercus Engelmannii Greene", "California", 2.0, 17, false);
df.AddRow("Quercus Garryana Hook.", "California", 5.5, 20, true);
df.AddRow("Quercus chrysolepis Liebm.", "California", 17.1, 15, false);
df.AddRow("Quercus vaccinifolia Engelm.", "California", 0.4, 1, false);
df.AddRow("Quercus tomentella Engelm", "California", 7.1, 18, true);
// Display the total data frame.
Console.WriteLine();
Console.WriteLine(df);
Console.WriteLine();
// Reorder some columns. Let's move the AcornSize column to the end.
df.PermuteColumns(0, 3, 1, 2);
Console.WriteLine(df);
Console.WriteLine();
// If you don't know the index of a column you can query for it by name.
int acornSizeCol = df.IndexOfColumn("AcornSize");
int treeHeightCol = df.IndexOfColumn("TreeHeight");
// Sort the rows. Let's sort the rows by AcornSize in asending order, and secondarily
// by TreeHeight in descending order.
int[] colIndices = { acornSizeCol, treeHeightCol };
SortingType[] sortingTypes = { SortingType.Ascending,
SortingType.Descending };
df.SortRows(colIndices, sortingTypes);
Console.WriteLine(df);
Console.WriteLine();
// Remove some columns and rows.
df.RemoveColumn("Threatened");
df.RemoveRow("Quercus nigra L.");
df.RemoveRow(2);
Console.WriteLine(df);
Console.WriteLine();
// Update a value by row and column index.
int rowIndex = df.IndexOfKey("Quercus chrysolepis Liebm.");
int colIndex = df.IndexOfColumn("AcornSize");
df[rowIndex, colIndex] = 17.2;
// Get a row dictionary for one species of oak tree. The keys are the column names,
// and the values are the row data.
IDictionary dict = df.GetRowDictionary("Quercus palustris Muenchh.");
Console.WriteLine("Quercus palustris Muenchh.");
foreach (string key in dict.Keys)
{
Console.WriteLine(key + ": " + dict[key]);
}
Console.WriteLine();
// Get a column dictionary for the TreeHeight column. The keys are the row keys, and
// values are the column data.
dict = df.GetColumnDictionary("TreeHeight");
Console.WriteLine("TreeHeight");
foreach (string key in dict.Keys)
{
Console.WriteLine(key + ": " + dict[key]);
}
Console.WriteLine();
// Compute some descriptive statistics
Console.WriteLine("Acorn Size:");
Console.WriteLine("Mean = " + StatsFunctions.Mean(df["AcornSize"]));
Console.WriteLine("Var = " + StatsFunctions.Variance(df["AcornSize"]));
Console.WriteLine();
// Export data to a DoubleMatrix. Non-numeric columns are ignored.
DoubleMatrix A = df.ToDoubleMatrix();
Console.WriteLine(A);
Console.WriteLine();
// Get a DoubleVector for the values in the AcornSize column.
DoubleVector v = df["AcornSize"].ToDoubleVector();
Console.WriteLine(v);
Console.WriteLine();
Console.WriteLine();
Console.WriteLine("Press Enter Key");
Console.Read();
} // Main
} // class
} // namespace
[TOC]