C# Missing Values Example

← All NMath Stats Code Examples

 

using System;

using CenterSpace.NMath.Stats;
using System.IO;

namespace MissingValuesExample
{
  /// <summary>
  /// A .NET example in C# showing how to manipulate data that has missing values.
  /// </summary>
  class MissingValuesExample
  {
    static void Main( string[] args )
    {
      // In the data, missing values are denoted by -1. Set the defaults
      // accordingly.
      StatsSettings.IntegerMissingValue = -1;

      // Read in data from a tab-delimited file. Data has fields for name, manufacturer,
      // type, calories, protein, fat, sodium, fiber, carbohydrates, sugars, potassium,
      // vitamins, shelf weight, cups, rating, age, gender and grade.
      DataFrame data = DataFrame.Load( "MissingValuesExample.dat" );

      // Print out initial data. 
      Console.WriteLine();
      Console.WriteLine( data );
      Console.WriteLine();

      // Check how many missing values are in each numeric column
      for ( int c = 0; c < data.Cols; c++ )
      {
        if ( data[c].IsNumeric )
        {
          Console.WriteLine( data[c].Label + ": " + ( StatsFunctions.Count( data[c] ) - StatsFunctions.NaNCount( data[c] ) ) );
        }
      }
      Console.WriteLine();

      // The columns "carbo", "sugars" and "potass" contain missing values.
      // We can still perform descriptive statistics on them.
      Console.WriteLine( "Average sugar content: " + StatsFunctions.NaNMean( data["sugars"] ).ToString( "G5" ) );

      // Sorting routines give ambiguous results when columns contain NaN values.
      // We can strip the missing values first.
      DFIntColumn stripped = (DFIntColumn) StatsFunctions.NaNRemove( data["sugars"] );
      Console.WriteLine( "Median sugar content: " + StatsFunctions.Median( stripped ).ToString( "G5" ) );
      Console.WriteLine( "90th percentile sugar content: " + StatsFunctions.Percentile( stripped, 0.9 ).ToString( "G5" ) );
      Console.WriteLine();

      // Create a sub-frame that contains only rows without missing values.
      DataFrame cleanData = data.CleanRows();

      Console.WriteLine( "Stripped " + ( data.Rows - cleanData.Rows ) + " rows containing missing values." );

      Console.WriteLine();
      Console.WriteLine( "Press Enter Key" );
      Console.Read();
    }
  }
}

← All NMath Stats Code Examples
Top