[TOC]
using System;
using System.Text;
using System.Collections.Generic;
using CenterSpace.NMath.Core;
using CenterSpace.NMath.Stats;
namespace CenterSpace.NMath.Stats.Examples.CSharp
{
/// <summary>
/// A .NET example in C# showing use of Nonnegative Matrix Factorization (NMF).
/// </summary>
class NMFExample
{
static void Main(string[] args)
{
DataFrame data;
string filename = "..\\..\\NMFExample.dat";
try
{
// Load the example data into a DataFrame
data = DataFrame.Load(filename, false, false, ",", true);
}
catch (NMathException e)
{
string msg = string.Format("Could not find data file {0}.", filename);
msg += Environment.NewLine;
msg += e.Message;
msg += Environment.NewLine;
msg += "Data file must have the same name as the example source ";
msg += Environment.NewLine;
msg += "file and be located two directories up from where the ";
msg += Environment.NewLine;
msg += "executable is running.";
Console.WriteLine(msg);
Console.WriteLine();
return;
}
// Construct a Nonnegative Matrix Factorization object that will perform 200
// iterations when computing the factors.
NMFact fact = new NMFact(200);
// Now, perform the factorization data = WH using the the default initial
// initial values for WH (which are uniform random in (0, 1) ), the default
// update algorithm (NMFMultiplicativeUpdate) and k = 2.
fact.Factor(data, 2);
// Look at the results. Note that since the inital values for W and H are
// random, the results will not be exactly the same on each run of the
// program.
Console.WriteLine("\nOutput 0, Default initial W, H, and algorithm -----------------");
PrintResults(fact);
// Note that the values of W and H are not close to the expected values
// Set up some initial values for W and H so we can play with some of the
// factorizations parameters and observe the effects without the random
// variations induced by random initial values.
RandGenUniform rng = new RandGenUniform(0x124);
DoubleMatrix initialW = new DoubleMatrix(3, 2, rng);
DoubleMatrix initialH = new DoubleMatrix(2, 4, rng);
// Factor and print the results
fact.Factor(data, 2, initialW, initialH);
Console.WriteLine("\n\nOutput 1 -------------------------------------------------------");
PrintResults(fact);
// Note that the factorization is pretty darn good - the cost function
// is on the order of 10e-18 and the difference of the elements of WH
// and V are all on the order of 10e-9 or 10e-10. But we are not close
// to the expected results. This shows the non-uniqueness of NMF
// solutions. Let's up the number of iterations to, say, 1000 and see
// if our factorization improves:
fact.NumIterations = 1000;
initialW = new DoubleMatrix(3, 2, rng);
initialH = new DoubleMatrix(2, 4, rng);
fact.Factor(data, 2, initialW, initialH);
Console.WriteLine("\n\nOutput 2, 1000 iterations ----------------------------------");
PrintResults(fact);
// Cost goes way down, all the elements of V - WH are 0 or on the order
// of 10e-15 - that's basically 0 within the limits of a double precision
// number (15 significant digits). Within machine precision, the
// factorization is exact and the solution is definitely a local minimum
// of the function ||V - WH||. Let's try and make it converge to the
// expected solution by setting the initial values very close to the
// expected values.
initialW = new DoubleMatrix("3x2[1 5 2 1 3 4]");
initialW = initialW + .01;
initialH = new DoubleMatrix("2x4[1 4 3 2 8 1 2 7]");
initialH = initialH - .01;
fact.Factor(data, 2, initialW, initialH);
Console.WriteLine("\n\nOutput 3, initial W, H close to expected values --------------------");
PrintResults(fact);
// Wow. We still converge to the same solution as before. That solution
// must be a very strong attractor for this algorithm. Let's try a
// different algorithm. The GD-CLS algorithm uses the multiplicative
// method, which is basically a version of the gradient descent (GD)
// optimization scheme, to approximate the basis vector matrix W. H
// is calculated using a constrained least squares (CLS).
fact.UpdateAlgorithm = new NMFGdClsUpdate();
double delta = 0.01;
initialW = new DoubleMatrix("3x2[1 5 2 1 3 4]");
initialW = initialW + delta;
initialH = new DoubleMatrix("2x4[1 4 3 2 8 1 2 7]");
initialH = initialH + delta;
fact.Factor(data, 2, initialW, initialH);
Console.WriteLine("\n\nOutput 4, GD-CLS algorithm initial W, H close to expected values -----");
PrintResults(fact);
// Now, that's more like it. We can find the expected soltion if we
// out right on top of it.
Console.WriteLine();
Console.WriteLine("Press Enter Key");
Console.Read();
}
static void PrintResults(NMFact nmf)
{
// Matrices rounded for better legibility
// Look at the results:
Console.WriteLine("\nFactored Matrix: ");
Console.WriteLine(nmf.V.ToTabDelimited());
Console.WriteLine("\nW: ");
Console.WriteLine(NMathFunctions.Round(nmf.W, 4).ToTabDelimited());
Console.WriteLine("\nH: ");
Console.WriteLine(NMathFunctions.Round(nmf.H, 4).ToTabDelimited());
Console.WriteLine("\nCost (error) = " + nmf.Cost);
// Look at the product of the factors and the difference from
// the factored matrix V
DoubleMatrix WH = NMathFunctions.Product(nmf.W, nmf.H);
DoubleMatrix D = nmf.V - WH;
Console.WriteLine("\nWH = ");
Console.WriteLine(NMathFunctions.Round(WH, 4).ToTabDelimited());
Console.WriteLine("\nV - WH = ");
Console.WriteLine(NMathFunctions.Round(D, 10).ToTabDelimited());
Console.WriteLine();
}
}
}
[TOC]