C# PLS2 Cross Validation Example

← All NMath Stats Code Examples

 

using System;
using System.IO;

using CenterSpace.NMath.Core;
using CenterSpace.NMath.Stats;


namespace CenterSpace.NMath.Stats.Examples.CSharp
{
  /// <summary>
  /// This .NET example in C# shows how to use the CrossValidation classes to 
  /// find the optimal number of components for a Partial Least Squares (PLS) 
  /// calculation.
  /// For each number of components we will perform a K-fold cross validation.
  /// In K-fold cross validation the data set is divided into k subsets, and 
  /// the holdout method is repeated k times. Each time, one of the k subsets 
  /// is used as the test set and the other k-1 subsets are put together to 
  /// form a training set. Then the average error across all k trials is computed.
  /// The optimal number of components will then be the number of components for
  /// which this average error is a minimum.
  /// </summary>
  class PLS2CrossValidationExample
  {

    static void Main( string[] args )
    {
      var spectralData = new DoubleMatrix( new StreamReader( "chemometricX.dat" ) );
      var concentrationData = new DoubleMatrix( new StreamReader( "chemometricY.dat" ) );

      int numDependentVars = concentrationData.Cols;
      int numIndependentVars = spectralData.Cols;
      int numSamples = spectralData.Rows;
      int k = 6;

      // The CrossValidation class needs the full set of data, a way to generate
      // subsets of the data and a PLS calculator object. The subset generator 
      // is specified by an instance of the ICrossValidationSubets interface.
      var subsetGenerator = new KFoldsSubsets( k );

      // Construct a PLS2 cross validation object that uses SIMPLS algorithm to
      // calculate the partial least squares models.
      var calculator = new PLS2SimplsAlgorithm();
      var cv = new PLS2CrossValidation( calculator, subsetGenerator );

      // Now for each number of components perform cross validation and record the 
      // minimum average Mean Square Error and the number of components at which
      // it is achieved. 
      int optimalNumComponents = -1;
      double minMse = Double.MaxValue;

      Console.WriteLine();
      Console.WriteLine( "Components\tMean Square Error" );
      Console.WriteLine( "=================================\n" );

      for ( int numComponents = 1; numComponents < numIndependentVars - 1; ++numComponents )
      {
        cv.DoCrossValidation( spectralData, concentrationData, numComponents );
        if ( !calculator.IsGood )
        {
          Console.WriteLine( "Calculation with {0} components is not good. Message:", numComponents );
          Console.WriteLine( calculator.Message );
        }
        double mse = cv.AverageMeanSqrError.TwoNorm();
        Console.WriteLine( numComponents + "\t\t" + mse.ToString( "G5" ) );
        if ( mse < minMse )
        {
          minMse = mse;
          optimalNumComponents = numComponents;
        }
      }
      Console.WriteLine( "\n\nOptimal number of components = " + optimalNumComponents );
      Console.WriteLine( "Minimum MSE = " + minMse.ToString( "G5" ) );

      Console.WriteLine();
      Console.WriteLine( "Press Enter Key" );
      Console.Read();
    }
  }
}

← All NMath Stats Code Examples
Top