VB PLS2 Cross Validation Example

← All NMath Stats Code Examples

 

Imports System
Imports System.IO

Imports CenterSpace.NMath.Core
Imports CenterSpace.NMath.Stats

Namespace CenterSpace.NMath.Stats.Examples.VisualBasic

  ' A .NET example in Visual Basic showing Partial Least Squares (PLS) and
  ' cross validation.

  ' This example shows how to use the CrossValidation classes to find the 
  ' optimal number of components for a Partial Least Squares calculation.
  ' For each number of components we will perform a K-fold cross validation.
  ' In K-fold cross validation the data set is divided into k subsets, and 
  ' the holdout method is repeated k times. Each time, one of the k subsets 
  ' is used as the test set and the other k-1 subsets are put together to 
  ' form a training set. Then the average error across all k trials is computed.
  ' The optimal number of components will then be the number of components for
  ' which this average error is a minimum.

  Module PLS2CrossValidationExample

    Sub Main()

      Dim spectralData As New DoubleMatrix(New StreamReader("chemometricX.dat"))
      Dim ConcentrationData As New DoubleMatrix(New StreamReader("chemometricY.dat"))

      Dim numDependentVars As Integer = ConcentrationData.Cols
      Dim numIndependentVars As Integer = spectralData.Cols
      Dim numSamples As Integer = spectralData.Rows
      Dim K As Integer = 6

      ' The CrossValidation class needs the full set of data, a way to generate
      ' subsets of the data and a PLS calculator object. The subset generator is 
      ' specified by an instance of the ICrossValidationSubets interface.
      Dim subsetGenerator As KFoldsSubsets = New KFoldsSubsets(K)

      ' Construct a PLS2 cross validation object that uses SIMPLS algorithm to
      ' calculate the partial least squares models.
      Dim Calculator As PLS2SimplsAlgorithm = New PLS2SimplsAlgorithm()
      Dim CV As PLS2CrossValidation = New PLS2CrossValidation(Calculator, subsetGenerator)

      ' Now for each number of components perform cross validation and record the 
      ' minimum average Mean Square Error and the number of components at which
      ' it is achieved. 
      Dim optimalNumComponents As Integer = -1
      Dim minMse As Double = Double.MaxValue

      Console.WriteLine()
      Console.WriteLine("Components" & ControlChars.Tab & "MeanSquareError")
      Console.WriteLine("=================================")
      Console.WriteLine()
      Console.WriteLine()

      For numComponents As Integer = 1 To (numIndependentVars - 2)
        CV.DoCrossValidation(spectralData, ConcentrationData, numComponents)
        If (Not Calculator.IsGood) Then
          Console.WriteLine("Calculation with {0} components is not good. Message:", numComponents)
          Console.WriteLine(Calculator.Message)
        End If
        Dim MSE As Double = CV.AverageMeanSqrError.TwoNorm()
        Console.WriteLine(numComponents & ControlChars.Tab & ControlChars.Tab & MSE.ToString("G5"))
        If (MSE < minMse) Then
          minMse = MSE
          optimalNumComponents = numComponents
        End If
      Next

      Console.WriteLine()
      Console.WriteLine()
      Console.WriteLine("Optimal number of components = " & optimalNumComponents)
      Console.WriteLine("Minimum MSE = " & minMse.ToString("G5"))

      Console.WriteLine()
      Console.WriteLine("Press Enter Key")
      Console.Read()

    End Sub
  End Module
End Namespace
← All NMath Stats Code Examples
Top