[TOC]
Imports System
Imports System.IO
Imports CenterSpace.NMath.Core
Imports CenterSpace.NMath.Stats
Namespace CenterSpace.NMath.Stats.Examples.VisualBasic
' A .NET example in VB.NET showing Partial Least Squares (PLS) and
' cross validation.
' This example shows how to use the CrossValidation classes to find the
' optimal number of components for a Partial Least Squares calculation.
' For each number of components we will perform a K-fold cross validation.
' In K-fold cross validation the data set is divided into k subsets, and
' the holdout method is repeated k times. Each time, one of the k subsets
' is used as the test set and the other k-1 subsets are put together to
' form a training set. Then the average error across all k trials is computed.
' The optimal number of components will then be the number of components for
' which this average error is a minimum.
Module PLS2CrossValidationExample
Sub Main()
Dim yDatafilename As String = "..\\..\\chemometricY.dat"
Dim xDatafilename As String = "..\\..\\chemometricX.dat"
Dim xDataStream As StreamReader
Dim yDataStream As StreamReader
Try
xDataStream = New StreamReader(xDatafilename)
yDataStream = New StreamReader(yDatafilename)
Catch E As FileNotFoundException
Dim Msg As String = String.Format("Could not find data file {0}", xDatafilename)
Msg += yDatafilename
Msg += "."
Msg += Environment.NewLine
Msg += E.Message
Msg += Environment.NewLine
Console.WriteLine(Msg)
Return
End Try
Dim spectralData As New DoubleMatrix(xDataStream)
Dim ConcentrationData As New DoubleMatrix(yDataStream)
Dim numDependentVars As Integer = ConcentrationData.Cols
Dim numIndependentVars As Integer = spectralData.Cols
Dim numSamples As Integer = spectralData.Rows
Dim K As Integer = 6
' The CrossValidation class needs the full set of data, a way to generate
' subsets of the data and a PLS calculator object. The subset generator is
' specified by an instance of the ICrossValidationSubets interface.
Dim subsetGenerator As KFoldsSubsets = New KFoldsSubsets(K)
' Construct a PLS2 cross validation object that uses SIMPLS algorithm to
' calculate the partial least squares models.
Dim Calculator As PLS2SimplsAlgorithm = New PLS2SimplsAlgorithm()
Dim CV As PLS2CrossValidation = New PLS2CrossValidation(Calculator, subsetGenerator)
' Now for each number of components perform cross validation and record the
' minimum average Mean Square Error and the number of components at which
' it is achieved.
Dim optimalNumComponents As Integer = -1
Dim minMse As Double = Double.MaxValue
Console.WriteLine()
Console.WriteLine("Components" & ControlChars.Tab & "MeanSquareError")
Console.WriteLine("=================================")
Console.WriteLine()
Console.WriteLine()
For numComponents As Integer = 1 To (numIndependentVars - 2)
CV.DoCrossValidation(spectralData, ConcentrationData, numComponents)
If (Not Calculator.IsGood) Then
Console.WriteLine("Calcuation with {0} components is not good. Message:", numComponents)
Console.WriteLine(Calculator.Message)
End If
Dim MSE As Double = CV.AverageMeanSqrError.TwoNorm()
Console.WriteLine(numComponents & ControlChars.Tab & ControlChars.Tab & MSE)
If (MSE < minMse) Then
minMse = MSE
optimalNumComponents = numComponents
End If
Next
Console.WriteLine()
Console.WriteLine()
Console.WriteLine("Optimal number of components = " & optimalNumComponents)
Console.WriteLine("Minimum MSE = " & minMse)
Console.WriteLine()
Console.WriteLine("Press Enter Key")
Console.Read()
End Sub
End Module
End Namespace
[TOC]