# C# NMF Consensus Matrix Example

← All NMath Core Code Examples

```ï»¿using System;

using CenterSpace.NMath.Core;
using System.IO;

namespace CenterSpace.NMath.Core.Examples.CSharp
{
/// <summary>
/// A .NET example in C# showing how to compute a consensus matrix averaging different MNF clusterings.
/// </summary>
/// <remarks>
/// A Nonnegative Matrix Factorization (NMF) is an approximate factorization
/// of a positive matrix v into a product of two matrices w and h:
/// v ~ wh
/// This factorization can by used to group, or cluster, the columns of v
/// (the columns of v are usually refered to as "samples" ). NMF uses an
/// iterative algorithm with random starting values for w and h. This, coupled
/// with the fact that the factorization is not unique, means that if you cluster
/// the columns of v using an NMF cluster several different times, you may get several
/// different clusterings. The NMF consensus matrix is a way to average
/// the possibly different clusterings, and is computed using the following process:
///
/// Cluster the columns of v using NMF n times. Each NMF clustering will yield
/// a "connectivity matrix". The connectivity matrix is a symmetric matrix
/// whose i, jth entry is 1 if columns i and j of v were clustered together,
/// and 0 if they were not. The "consensus matrix" is also a symmetric matrix
/// whose i, jth entry is formed by taking the average of the i, jth entries of
/// the n connectivity matrices.
///
/// It is clear that each i, jth entry of the consensus matrix has a value between 0
/// (columns i and j were not clustered together on any of the n runs) and 1 (columns
/// i and j were clustered together on all n runs). Thus the i, jth entry of a
/// consensus matrix may be considered, in some sense, a "probability" that columns
/// i and j belong to the same cluster.
/// A consensus matrix C may also used to perform a hierarchical clustering of the
/// columns of v by using as the distance function:
///
/// distance between columns i and j = 1.0 - C[i,j]
///
/// This is demonstrated in the example below.
/// </remarks>
class NMFConsensusMatrixExample
{

static void Main( string[] args )
{
DataFrame data = DataFrame.Load( "nmf_data.dat", true, true, "\t", true );

// Extract the data as a DoubleMatrix.
DoubleMatrix v = data.ToDoubleMatrix();

// Set the order of the NMF (this is the number of columns in w, where
// v ~ wh
int k = 3;

// Set the number of runs or connectivity matrices to use to form the
// consensus matrix.
int numberOfRuns = 70;

// Construct a consensus matrix using the "divergence" update
// algorithm.
var consensusMatrix =
new NMFConsensusMatrix<NMFDivergenceUpdate>( v, data.ColumnHeaders, k, numberOfRuns );

Console.WriteLine();

// Print out the number of runs in which the NMF algorithm actually converged to an answer, and the
// resulting consensus matrix.
Console.WriteLine( "{0} runs out of {1} converged.", consensusMatrix.NumberOfConvergedRuns, numberOfRuns );
Console.WriteLine();
Console.WriteLine( "Consensus Matrix:" );
Console.WriteLine( consensusMatrix.ToTabDelimited( "G3" ) );

// Let's look at the first column and for each successive column print out the
// "probability" that they are clustered together (we'll use the column
// names from the data frame instead of column numbers).
string label = consensusMatrix.Labels;
Console.WriteLine();
for ( int j = 1; j < consensusMatrix.Order; j++ )
{
Console.WriteLine( "The \"probability\" that {0} is clustered with {1} is {2}",
label, consensusMatrix.Labels[j], consensusMatrix[0, j] );
}

// Perform a hierarchical cluster analysis using the consensus matrix
// to define the distance function as described in the class description
// above.

// The cluster analysis class wants to cluster the rows of a matrix. Since we
// are essentially clustering a bunch of column numbers, we'll provide a matrix
// with one column and n rows where n is the number of columns of v (and the
// order of of the consensus matrix). The column will contain the numbers 0
// to n - 1 (basically, we're just clustering the numbers 0,...,n - 1).
var itemNumbers = new DoubleMatrix( consensusMatrix.Order, 1, 0, 1 );

// The distance function object holds the consensus matrix C and returns the distance
// between i and j as 1.0 - C[i,j]
var distanceFunctionObject = new ConsensusMatrixDistance( consensusMatrix );
var clusterAnalysisDist = new Distance.Function( distanceFunctionObject.CaDistance );
var ca = new ClusterAnalysis( itemNumbers, clusterAnalysisDist );

// Form three clusters using the cluster analysis cut tree function and print them out.
ClusterSet clusters = ca.CutTree( 3 );
Console.WriteLine();
for ( int clusterNumber = 0; clusterNumber < clusters.NumberOfClusters; clusterNumber++ )
{
int[] members = clusters.Cluster( clusterNumber );
Console.Write( "Cluster number {0} contains: ", clusterNumber );
for ( int i = 0; i < members.Length; i++ )
{
Console.Write( "{0} ", consensusMatrix.Labels[members[i]] );
}
Console.WriteLine();
}

Console.WriteLine();
Console.WriteLine( "Press Enter Key" );
}
}

public class ConsensusMatrixDistance
{
private ConnectivityMatrix consensusMatrix;

public ConsensusMatrixDistance( ConnectivityMatrix conn )
{
consensusMatrix = conn;
}

public double CaDistance( DoubleVector data1, DoubleVector data2 )
{
int i = (int) data1;
int j = (int) data2;
return 1.0 - consensusMatrix[i, j];
}
}
}

```
← All NMath Stats Code Examples
Top