[TOC]
using System;
using System.Collections;
using CenterSpace.NMath.Core;
using CenterSpace.NMath.Stats;
namespace CenterSpace.NMath.Stats.Examples.CSharp
{
/// <summary>
/// A .NET example in C# showing how to use the descriptive statistics functions of the
/// StatsFunctions class.
/// </summary>
class StatsFunctionsExample
{
static void Main(string[] args)
{
// Read in data from a comma-delimited file. Data has age, gender, grade
// columns.
// Specify the data file, whether it includes headers, whether it has row keys,
// the delimiter, and whether to try to parse data into non-generic types.
DataFrame data = DataFrame.Load("..\\..\\StatsFunctionsExample.dat", true, false, ",", true);
// Print out all data.
Console.WriteLine();
Console.WriteLine(data);
Console.WriteLine();
// Print out top grade
Console.WriteLine("highest grade.. " + StatsFunctions.MaxValue(data["Grade"]));
Console.WriteLine();
// How many male and female students are there?
IDictionary counts = StatsFunctions.Counts(data["Gender"]);
Console.Write("There are " + counts["Male"] + " male students and ");
Console.WriteLine(counts["Female"] + " female students.");
Console.WriteLine();
// Mean, median, mode grades
Console.WriteLine("mean grade: " + StatsFunctions.Mean(data["Grade"]));
Console.WriteLine("median grade: " + StatsFunctions.Median(data["Grade"]));
Console.WriteLine("most common grade: " + StatsFunctions.Mode(data["Grade"]));
Console.WriteLine();
// Is there a correlation between age and grade?
double correlation = StatsFunctions.Correlation(data["Age"], data["Grade"]);
if (correlation > 0.0)
{
Console.WriteLine("There is a positive correlation of " + correlation + " between age and grade.");
Console.WriteLine();
}
// What percentage of students pass with a 50% passing rate? 60%?
Console.WriteLine("Percentage passing with 50% cutoff: " + System.Math.Round(((1.0 - StatsFunctions.PercentileRank(data["Grade"], 50)) * 100.0), 2) + "%");
Console.WriteLine("Percentage passing with 60% cutoff: " + System.Math.Round(((1.0 - StatsFunctions.PercentileRank(data["Grade"], 60)) * 100.0), 2) + "%");
Console.WriteLine();
// Percentiles
Console.WriteLine("Interquartile range is " + StatsFunctions.InterquartileRange(data["Grade"]));
Console.WriteLine("What's the 80th percentile? " + StatsFunctions.Percentile(data["Grade"], 0.8));
Console.WriteLine();
// Split by gender and do descriptive statistics
Factor gender = data.GetFactor("Gender");
Subset[] genderGroups = Subset.GetGroupings(gender);
for (int i = 0; i < genderGroups.Length; i++)
{
DataFrame genderSpecific = data.GetRows(genderGroups[i]);
Console.WriteLine(gender.Levels[i] + " students...");
Console.WriteLine("mean grade: " + System.Math.Round(StatsFunctions.Mean(genderSpecific["Grade"]), 2));
Console.WriteLine("median grade: " + StatsFunctions.Median(genderSpecific["Grade"]));
Console.WriteLine("most common grade: " + StatsFunctions.Mode(genderSpecific["Grade"]));
Console.WriteLine();
}
// Young students
DataFrame young = data[new Subset(StatsFunctions.If(data["Age"], new StatsFunctions.LogicalDoubleFunction(Young))), Slice.All];
Console.WriteLine("Young students...");
Console.WriteLine("mean grade: " + System.Math.Round(StatsFunctions.Mean(young["Grade"]), 2));
Console.WriteLine("median grade: " + StatsFunctions.Median(young["Grade"]));
Console.WriteLine("most common grade: " + StatsFunctions.Mode(young["Grade"]));
Console.WriteLine();
// Top grade by a young student?
double top = StatsFunctions.MaxValue(young["Grade"]);
Console.WriteLine("Top grade by a young student was... " + top);
// What percentage of the whole group beat the top-ranked young student?
Console.WriteLine("Percentage beating top-ranked young student... " + System.Math.Round(((1.0 - StatsFunctions.PercentileRank(data["Grade"], top)) * 100.0), 2) + "%");
Console.WriteLine();
Console.WriteLine("Press Enter Key");
Console.Read();
} // Main
private static bool Young(double age)
{
return age < 20.0;
}
} // class
} // namespace
[TOC]