using System; using System.IO; using CenterSpace.NMath.Core; namespace CenterSpace.NMath.Examples.CSharp { /// <summary> /// This .NET example in C# shows how to use the CrossValidation classes to /// find the optimal number of components for a Partial Least Squares (PLS) /// calculation. /// For each number of components we will perform a K-fold cross validation. /// In K-fold cross validation the data set is divided into k subsets, and /// the holdout method is repeated k times. Each time, one of the k subsets /// is used as the test set and the other k-1 subsets are put together to /// form a training set. Then the average error across all k trials is computed. /// The optimal number of components will then be the number of components for /// which this average error is a minimum. /// </summary> class PLS2CrossValidationExample { static void Main( string[] args ) { var spectralData = new DoubleMatrix( new StreamReader( "chemometricX.dat" ) ); var concentrationData = new DoubleMatrix( new StreamReader( "chemometricY.dat" ) ); int numDependentVars = concentrationData.Cols; int numIndependentVars = spectralData.Cols; int numSamples = spectralData.Rows; int k = 6; // The CrossValidation class needs the full set of data, a way to generate // subsets of the data and a PLS calculator object. The subset generator // is specified by an instance of the ICrossValidationSubets interface. var subsetGenerator = new KFoldsSubsets( k ); // Construct a PLS2 cross validation object that uses SIMPLS algorithm to // calculate the partial least squares models. var calculator = new PLS2SimplsAlgorithm(); var cv = new PLS2CrossValidation( calculator, subsetGenerator ); // Now for each number of components perform cross validation and record the // minimum average Mean Square Error and the number of components at which // it is achieved. int optimalNumComponents = -1; double minMse = Double.MaxValue; Console.WriteLine(); Console.WriteLine( "Components\tMean Square Error" ); Console.WriteLine( "=================================\n" ); for ( int numComponents = 1; numComponents < numIndependentVars - 1; ++numComponents ) { cv.DoCrossValidation( spectralData, concentrationData, numComponents ); if ( !calculator.IsGood ) { Console.WriteLine( "Calculation with {0} components is not good. Message:", numComponents ); Console.WriteLine( calculator.Message ); } double mse = cv.AverageMeanSqrError.TwoNorm(); Console.WriteLine( numComponents + "\t\t" + mse.ToString( "G5" ) ); if ( mse < minMse ) { minMse = mse; optimalNumComponents = numComponents; } } Console.WriteLine( "\n\nOptimal number of components = " + optimalNumComponents ); Console.WriteLine( "Minimum MSE = " + minMse.ToString( "G5" ) ); Console.WriteLine(); Console.WriteLine( "Press Enter Key" ); Console.Read(); } } }← All NMath Code Examples