VB.NET NMF Clustering Example

[TOC]

Imports System
Imports System.IO
Imports System.Text
Imports System.Collections.Generic

Imports CenterSpace.NMath.Core
Imports CenterSpace.NMath.Stats

Namespace CenterSpace.NMath.Stats.Examples.VisualBasic

  ' A .NET example in VB.NET demonstrating how to cluster the "samples" represented as columns
  ' of a matrix using a Nonnegative Matrix Factorization (NMF). Note that the output
  ' of this example may vary from run to run since the NMF algorithm uses an iterative
  ' process that starts at a random solution. Thus the factorization may converge
  ' to different results on different runs (the NMF factorization is not unique).
  Module NMFClusteringExample

    Sub Main()

      Dim FileName As String = "..\\..\\ratio.dat"
      Dim Data As DataFrame = ReadDataFromFile(FileName)
      If (Data Is Nothing) Then ' Problem reading data!
        Return
      End If

      Dim MatData As DoubleMatrix = Data.ToDoubleMatrix()

      ' Factor the matrix using defaults for all factorization settings and the divergence iterative 
      ' update for computing the factorization.
      Dim Cluster As New NMFClustering(Of NMFDivergenceUpdate)()
      Dim K As Integer = 3
      Cluster.Factor(MatData, K)

      ' Check if the iterative factorization converged before hitting the default maximum
      ' number of iterations.
      If (Cluster.Converged) Then

        Console.WriteLine("Factorization converged in {0} iterations.", Cluster.Iterations)
      Else
        Console.WriteLine("Factorization failed to converge in {0} iterations.", Cluster.MaxFactorizationIterations)
      End If

      ' Get the connectivity matrix. The connectivity matrix is an adjancy matrix, A, such that 
      ' columns of the factored matrix are in the same cluster if A[i,j] == 1 and are not in the
      ' same cluster if A[i,j] == 0
      Dim Connectivity As ConnectivityMatrix = Cluster.Connectivity
      Console.WriteLine("Connectivity Matrix: ")
      Console.WriteLine(Connectivity.ToTabDelimited())
      Console.WriteLine()

      ' Print out the cluster each column belongs to using the cluster set.
      Dim Cs As ClusterSet = Cluster.ClusterSet
      Dim I As Integer = 0
      For I = 0 To Cs.N - 1
        Console.WriteLine("Column {0} belongs to cluster {1}", Data.ColumnHeaders(I), Cs(I))
      Next
      Console.WriteLine()

      ' Print out the the members of each cluster using the cluster set.
      Dim ClusterNumber As Integer = 0
      For ClusterNumber = 0 To Cs.NumberOfClusters - 1
        Dim Members() As Integer = Cs.Cluster(ClusterNumber)
        Console.Write("Cluster number {0} contains: ", ClusterNumber)
        For I = 0 To Members.Length - 1
          Console.Write("{0} ", Data.ColumnHeaders(Members(I)))
        Next
        Console.WriteLine()
      Next

      Console.WriteLine()
      Console.WriteLine("Press Enter Key")
      Console.ReadKey()

    End Sub

    Function ReadDataFromFile(ByVal FileName As String) As DataFrame
      Dim Data As DataFrame
      Try
        ' Load the example data into a DataFrame
        Data = DataFrame.Load(FileName, True, True, ControlChars.Tab, True)
      Catch E As NMathException
        Dim Msg As New StringBuilder(String.Format("Could not find data file {0}.", FileName))
        Msg.Append(Environment.NewLine)
        Msg.Append(E.Message)
        Msg.Append(Environment.NewLine)
        Msg.Append("Data file must have the same name as the example source ")
        Msg.Append(Environment.NewLine)
        Msg.Append("file and be located three directories up from where the ")
        Msg.Append(Environment.NewLine)
        Msg.Append("executable is running.")
        Console.WriteLine(Msg)
        Console.WriteLine()
        Return Nothing
      End Try
      Return Data
    End Function

  End Module
End Namespace

[TOC]