[TOC]
using System;
using System.IO;
using CenterSpace.NMath.Core;
using CenterSpace.NMath.Stats;
namespace CenterSpace.NMath.Stats.Examples.CSharp
{
/// <summary>
/// This .NET example in C# shows how to use the CrossValidation classes to
/// find the optimal number of components for a Partial Least Squares (PLS)
/// calculation.
/// For each number of components we will perform a K-fold cross validation.
/// In K-fold cross validation the data set is divided into k subsets, and
/// the holdout method is repeated k times. Each time, one of the k subsets
/// is used as the test set and the other k-1 subsets are put together to
/// form a training set. Then the average error across all k trials is computed.
/// The optimal number of components will then be the number of components for
/// which this average error is a minimum.
/// </summary>
class PLS2CrossValidationExample
{
static void Main(string[] args)
{
string yDatafilename = "..\\..\\chemometricY.dat";
string xDatafilename = "..\\..\\chemometricX.dat";
StreamReader xDataStream;
StreamReader yDataStream;
try
{
xDataStream = new StreamReader(xDatafilename);
yDataStream = new StreamReader(yDatafilename);
}
catch (FileNotFoundException e)
{
string msg = string.Format("Could not find data file {0}, {1}.", xDatafilename, yDatafilename);
msg += Environment.NewLine;
msg += e.Message;
msg += Environment.NewLine;
Console.WriteLine(msg);
return;
}
DoubleMatrix spectralData = new DoubleMatrix(xDataStream);
DoubleMatrix concentrationData = new DoubleMatrix(yDataStream);
int numDependentVars = concentrationData.Cols;
int numIndependentVars = spectralData.Cols;
int numSamples = spectralData.Rows;
int k = 6;
// The CrossValidation class needs the full set of data, a way to generate
// subsets of the data and a PLS calculator object. The subset generator
// is specified by an instance of the ICrossValidationSubets interface.
KFoldsSubsets subsetGenerator = new KFoldsSubsets(k);
// Construct a PLS2 cross validation object that uses SIMPLS algorithm to
// calculate the partial least squares models.
PLS2SimplsAlgorithm calculator = new PLS2SimplsAlgorithm();
PLS2CrossValidation cv = new PLS2CrossValidation(calculator, subsetGenerator);
// Now for each number of components perform cross validation and record the
// minimum average Mean Square Error and the number of components at which
// it is achieved.
int optimalNumComponents = -1;
double minMse = Double.MaxValue;
Console.WriteLine();
Console.WriteLine("Components\tMean Square Error");
Console.WriteLine("=================================\n\n");
for (int numComponents = 1; numComponents < numIndependentVars - 1; ++numComponents)
{
cv.DoCrossValidation(spectralData, concentrationData, numComponents);
if (!calculator.IsGood)
{
Console.WriteLine("Calcuation with {0} components is not good. Message:", numComponents);
Console.WriteLine(calculator.Message);
}
double mse = cv.AverageMeanSqrError.TwoNorm();
Console.WriteLine(numComponents + "\t\t" + mse);
if (mse < minMse)
{
minMse = mse;
optimalNumComponents = numComponents;
}
}
Console.WriteLine("\n\nOptimal number of components = " + optimalNumComponents);
Console.WriteLine("Minimum MSE = " + minMse);
Console.WriteLine();
Console.WriteLine("Press Enter Key");
Console.Read();
}
}
}
[TOC]