← All NMath Code Examples
using System;
using CenterSpace.NMath.Core;
using System.IO;
namespace CenterSpace.NMath.Examples.CSharp
{
/// <summary>
/// A .NET example in C# showing how to compute a consensus matrix averaging different MNF clusterings.
/// </summary>
/// <remarks>
/// A Nonnegative Matrix Factorization (NMF) is an approximate factorization
/// of a positive matrix v into a product of two matrices w and h:
/// v ~ wh
/// This factorization can by used to group, or cluster, the columns of v
/// (the columns of v are usually refered to as "samples" ). NMF uses an
/// iterative algorithm with random starting values for w and h. This, coupled
/// with the fact that the factorization is not unique, means that if you cluster
/// the columns of v using an NMF cluster several different times, you may get several
/// different clusterings. The NMF consensus matrix is a way to average
/// the possibly different clusterings, and is computed using the following process:
///
/// Cluster the columns of v using NMF n times. Each NMF clustering will yield
/// a "connectivity matrix". The connectivity matrix is a symmetric matrix
/// whose i, jth entry is 1 if columns i and j of v were clustered together,
/// and 0 if they were not. The "consensus matrix" is also a symmetric matrix
/// whose i, jth entry is formed by taking the average of the i, jth entries of
/// the n connectivity matrices.
///
/// It is clear that each i, jth entry of the consensus matrix has a value between 0
/// (columns i and j were not clustered together on any of the n runs) and 1 (columns
/// i and j were clustered together on all n runs). Thus the i, jth entry of a
/// consensus matrix may be considered, in some sense, a "probability" that columns
/// i and j belong to the same cluster.
/// A consensus matrix C may also used to perform a hierarchical clustering of the
/// columns of v by using as the distance function:
///
/// distance between columns i and j = 1.0 - C[i,j]
///
/// This is demonstrated in the example below.
/// </remarks>
class NMFConsensusMatrixExample
{
static void Main( string[] args )
{
// Read in some data..
DataFrame data = DataFrame.Load( "nmf_data.dat", true, true, "\t", true );
// Extract the data as a DoubleMatrix.
DoubleMatrix v = data.ToDoubleMatrix();
// Set the order of the NMF (this is the number of columns in w, where
// v ~ wh
int k = 3;
// Set the number of runs or connectivity matrices to use to form the
// consensus matrix.
int numberOfRuns = 70;
// Construct a consensus matrix using the "divergence" update
// algorithm.
var consensusMatrix =
new NMFConsensusMatrix<NMFDivergenceUpdate>( v, data.ColumnHeaders, k, numberOfRuns );
Console.WriteLine();
// Print out the number of runs in which the NMF algorithm actually converged to an answer, and the
// resulting consensus matrix.
Console.WriteLine( "{0} runs out of {1} converged.", consensusMatrix.NumberOfConvergedRuns, numberOfRuns );
Console.WriteLine();
Console.WriteLine( "Consensus Matrix:" );
Console.WriteLine( consensusMatrix.ToTabDelimited( "G3" ) );
// Lets look at the first column and for each successive column print out the
// "probability" that they are clustered together (well use the column
// names from the data frame instead of column numbers).
string label = consensusMatrix.Labels[0];
Console.WriteLine();
for ( int j = 1; j < consensusMatrix.Order; j++ )
{
Console.WriteLine( "The \"probability\" that {0} is clustered with {1} is {2}",
label, consensusMatrix.Labels[j], consensusMatrix[0, j] );
}
// Perform a hierarchical cluster analysis using the consensus matrix
// to define the distance function as described in the class description
// above.
// The cluster analysis class wants to cluster the rows of a matrix. Since we
// are essentially clustering a bunch of column numbers, well provide a matrix
// with one column and n rows where n is the number of columns of v (and the
// order of of the consensus matrix). The column will contain the numbers 0
// to n - 1 (basically, were just clustering the numbers 0,...,n - 1).
var itemNumbers = new DoubleMatrix( consensusMatrix.Order, 1, 0, 1 );
// The distance function object holds the consensus matrix C and returns the distance
// between i and j as 1.0 - C[i,j]
var distanceFunctionObject = new ConsensusMatrixDistance( consensusMatrix );
var clusterAnalysisDist = new Distance.Function( distanceFunctionObject.CaDistance );
var ca = new ClusterAnalysis( itemNumbers, clusterAnalysisDist );
// Form three clusters using the cluster analysis cut tree function and print them out.
ClusterSet clusters = ca.CutTree( 3 );
Console.WriteLine();
for ( int clusterNumber = 0; clusterNumber < clusters.NumberOfClusters; clusterNumber++ )
{
int[] members = clusters.Cluster( clusterNumber );
Console.Write( "Cluster number {0} contains: ", clusterNumber );
for ( int i = 0; i < members.Length; i++ )
{
Console.Write( "{0} ", consensusMatrix.Labels[members[i]] );
}
Console.WriteLine();
}
Console.WriteLine();
Console.WriteLine( "Press Enter Key" );
Console.Read();
}
}
public class ConsensusMatrixDistance
{
private ConnectivityMatrix consensusMatrix;
public ConsensusMatrixDistance( ConnectivityMatrix conn )
{
consensusMatrix = conn;
}
public double CaDistance( DoubleVector data1, DoubleVector data2 )
{
int i = (int) data1[0];
int j = (int) data2[0];
return 1.0 - consensusMatrix[i, j];
}
}
}
← All NMath Code Examples