C# NMF Example

[TOC]

using System;
using System.Text;
using System.Collections.Generic;

using CenterSpace.NMath.Core;
using CenterSpace.NMath.Stats;

namespace CenterSpace.NMath.Stats.Examples.CSharp
{
  /// <summary>
  /// A .NET example in C# showing use of Nonnegative Matrix Factorization (NMF).
  /// </summary>
  class NMFExample
  {

    static void Main(string[] args)
    {
      DataFrame data;
      string filename = "..\\..\\NMFExample.dat";
      try
      {
        // Load the example data into a DataFrame
        data = DataFrame.Load(filename, false, false, ",", true);
      }
      catch (NMathException e)
      {
        string msg = string.Format("Could not find data file {0}.", filename);
        msg += Environment.NewLine;
        msg += e.Message;
        msg += Environment.NewLine;
        msg += "Data file must have the same name as the example source ";
        msg += Environment.NewLine;
        msg += "file and be located two directories up from where the ";
        msg += Environment.NewLine;
        msg += "executable is running.";
        Console.WriteLine(msg);
        Console.WriteLine();
        return;
      }

      // Construct a Nonnegative Matrix Factorization object that will perform 200
      // iterations when computing the factors.
      NMFact fact = new NMFact(200);

      // Now, perform the factorization data = WH using the the default initial
      // initial values for WH (which are uniform random in (0, 1) ), the default
      // update algorithm (NMFMultiplicativeUpdate) and k = 2.
      fact.Factor(data, 2);

      // Look at the results. Note that since the inital values for W and H are
      // random, the results will not be exactly the same on each run of the
      // program.
      Console.WriteLine("\nOutput 0, Default initial W, H, and algorithm -----------------");
      PrintResults(fact);
      // Note that the values of W and H are not close to the expected values

      // Set up some initial values for W and H so we can play with some of the 
      // factorizations parameters and observe the effects without the random
      // variations induced by random initial values.
      RandGenUniform rng = new RandGenUniform(0x124);
      DoubleMatrix initialW = new DoubleMatrix(3, 2, rng);
      DoubleMatrix initialH = new DoubleMatrix(2, 4, rng);

      // Factor and print the results
      fact.Factor(data, 2, initialW, initialH);
      Console.WriteLine("\n\nOutput 1 -------------------------------------------------------");
      PrintResults(fact);

      // Note that the factorization is pretty darn good - the cost function 
      // is on the order of 10e-18 and the difference of the elements of WH
      // and V are all on the order of 10e-9 or 10e-10. But we are not close
      // to the expected results. This shows the non-uniqueness of NMF 
      // solutions. Let's up the number of iterations to, say, 1000 and see
      // if our factorization improves:
      fact.NumIterations = 1000;
      initialW = new DoubleMatrix(3, 2, rng);
      initialH = new DoubleMatrix(2, 4, rng);
      fact.Factor(data, 2, initialW, initialH);
      Console.WriteLine("\n\nOutput 2, 1000 iterations ----------------------------------");
      PrintResults(fact);

      // Cost goes way down, all the elements of V - WH are 0 or on the order
      // of 10e-15 - that's basically 0 within the limits of a double precision
      // number (15 significant digits). Within machine precision, the 
      // factorization is exact and the solution is definitely a local minimum
      // of the function ||V - WH||. Let's try and make it converge to the 
      // expected solution by setting the initial values very close to the 
      // expected values.
      initialW = new DoubleMatrix("3x2[1 5  2 1  3 4]");
      initialW = initialW + .01;
      initialH = new DoubleMatrix("2x4[1 4 3 2  8 1 2 7]");
      initialH = initialH - .01;
      fact.Factor(data, 2, initialW, initialH);
      Console.WriteLine("\n\nOutput 3, initial W, H close to expected values --------------------");
      PrintResults(fact);

      // Wow. We still converge to the same solution as before. That solution
      // must be a very strong attractor for this algorithm. Let's try a 
      // different algorithm. The GD-CLS algorithm uses the multiplicative
      // method, which is basically a version of the gradient descent (GD) 
      // optimization scheme, to approximate the basis vector matrix W. H
      // is calculated using a constrained least squares (CLS).
      fact.UpdateAlgorithm = new NMFGdClsUpdate();
      double delta = 0.01;
      initialW = new DoubleMatrix("3x2[1 5  2 1  3 4]");
      initialW = initialW + delta;
      initialH = new DoubleMatrix("2x4[1 4 3 2  8 1 2 7]");
      initialH = initialH + delta;
      fact.Factor(data, 2, initialW, initialH);
      Console.WriteLine("\n\nOutput 4, GD-CLS algorithm initial W, H close to expected values -----");
      PrintResults(fact);

      // Now, that's more like it. We can find the expected soltion if we 
      // out right on top of it.

      Console.WriteLine();
      Console.WriteLine("Press Enter Key");
      Console.Read();
    }

    static void PrintResults(NMFact nmf)
    {
      // Matrices rounded for better legibility

      // Look at the results:
      Console.WriteLine("\nFactored Matrix: ");
      Console.WriteLine(nmf.V.ToTabDelimited());
      Console.WriteLine("\nW: ");
      Console.WriteLine(NMathFunctions.Round(nmf.W, 4).ToTabDelimited());
      Console.WriteLine("\nH: ");
      Console.WriteLine(NMathFunctions.Round(nmf.H, 4).ToTabDelimited());
      Console.WriteLine("\nCost (error) = " + nmf.Cost);

      // Look at the product of the factors and the difference from
      // the factored matrix V
      DoubleMatrix WH = NMathFunctions.Product(nmf.W, nmf.H);
      DoubleMatrix D = nmf.V - WH;
      Console.WriteLine("\nWH = ");
      Console.WriteLine(NMathFunctions.Round(WH, 4).ToTabDelimited());
      Console.WriteLine("\nV - WH = ");
      Console.WriteLine(NMathFunctions.Round(D, 10).ToTabDelimited());
      Console.WriteLine();
    }
  }
}
[TOC]