VB Factor Example

← All NMath Stats Code Examples

 

Imports System
Imports System.Collections
Imports Microsoft.VisualBasic

Imports CenterSpace.NMath.Core
Imports CenterSpace.NMath.Stats
Imports System.IO

Namespace CenterSpace.NMath.Stats.Examples.VisualBasic

  ' A .NET example in Visual Basic showing how to create and manipulate factors.

  ' The Factor class represents a categorical vector in which all
  ' elements are drawn from a finite number of factor levels. Thus, a Factor contains
  ' two parts: a string array of factor levels, and an integer array of
  ' categorical data, of which each element is an index into the array of levels.
  Module FactorExample

    Sub Main()

      ' Read in data from the file. The data show test scores for 17 children on a
      ' simple reading test. The child's gender ("male" or "female") and grade
      ' (4, 5, or 6) is also recorded.
      Dim DF As DataFrame = DataFrame.Load("FactorExample.dat", True, False, ControlChars.Tab, True)

      Console.WriteLine()
      Console.WriteLine(DF)
      Console.WriteLine()

      ' Factors are usually constructed from a data frame column using the
      ' GetFactor() method, which creates a Factor with levels for the sorted, unique
      ' values in the column.
      Dim Gender As Factor = DF.GetFactor("Gender")

      ' Display the levels and categorical data for the gender factor.
      Console.WriteLine("Gender factor: " & Gender.ToString())
      Console.WriteLine("Gender levels: " & Gender.LevelsToString())
      Console.WriteLine("Gender data: " & Gender.DataToString())
      Console.WriteLine()

      ' Construct a factor for grade level.
      Dim Grade As Factor = DF.GetFactor("Grade")

      ' Display the levels and categorical data for the grade factor.
      Console.WriteLine("Grade factor: " & Grade.ToString())
      Console.WriteLine("Grade levels: " & Grade.LevelsToString())
      Console.WriteLine("Grade data: " & Grade.DataToString())
      Console.WriteLine()

      ' The principal use of factors is in conjunction with the
      ' GetGroupings() methods on Subset. One overload of this method accepts
      ' a single Factor and returns an array of subsets containing the indices
      ' for each level of the given factor.
      Dim Genders As Subset() = Subset.GetGroupings(Gender)
      Dim Grades As Subset() = Subset.GetGroupings(Grade)

      ' Display overall mean
      Console.WriteLine("Grand mean = " & Math.Round(StatsFunctions.Mean(DF("Score"))))
      Console.WriteLine()

      ' Display mean for each level of the Gender and Grade factors.
      Console.WriteLine("Marginal Means")
      Dim I As Integer
      Dim Mean As Double

      For I = 0 To Gender.NumberOfLevels - 1
        Mean = StatsFunctions.Mean(DF(DF.IndexOfColumn("Score"), Genders(I)))
        Mean = Math.Round(Mean, 2)
        Console.WriteLine("Mean for gender " & Gender.Levels(I) & " = " & Mean)
      Next

      For I = 0 To Grade.NumberOfLevels - 1
        Mean = StatsFunctions.Mean(DF(DF.IndexOfColumn("Score"), Grades(I)))
        Mean = Math.Round(Mean, 2)
        Console.WriteLine("Mean for grade " & Grade.Levels(I) & " = " & Mean)
      Next
      Console.WriteLine()

      ' Another overload of GetGroupings() accepts two Factor objects and returns
      ' a two-dimensional jagged array of subsets containing the indices for
      ' each combination of levels in the two factors.
      Console.WriteLine("Cell Means")
      Dim Cells As Subset(,) = Subset.GetGroupings(Gender, Grade)

      Dim J As Integer
      For I = 0 To Gender.NumberOfLevels - 1
        For J = 0 To Grade.NumberOfLevels - 1
          Mean = StatsFunctions.Mean(DF(DF.IndexOfColumn("Score"), Cells(I, J)))
          Mean = Math.Round(Mean, 2)
          Console.WriteLine("Mean for gender " & Gender.Levels(I) & " in grade " & Grade.Levels(I) & " = " & Mean)
        Next
      Next
      Console.WriteLine()

      ' Combining DataFrame.GetFactor()with Subset.GetGroupings() to access “cells”
      ' is such a common operation that class DataFrame also provides the Tabulate()
      ' method as a convenience. This method accepts one or two grouping columns, a
      ' data column, and a delegate to apply to each data column subset. This code 
      ' displays the same marginal and cell means shown above, but with far fewer
      ' lines of code:
      Dim MeanFunction As New Func(Of IDFColumn, Double)(AddressOf StatsFunctions.Mean)
      Console.WriteLine("Same results using cross-tabulation:" & Environment.NewLine)
      Console.WriteLine(DF.Tabulate("Grade", "Score", MeanFunction).ToString() & Environment.NewLine)
      Console.WriteLine(DF.Tabulate("Gender", "Score", MeanFunction).ToString() & Environment.NewLine)
      Console.WriteLine(DF.Tabulate("Grade", "Gender", "Score", MeanFunction).ToString() & Environment.NewLine)

      ' Factors are used internally by ANOVA classes for grouping data. 
      Dim Anova As TwoWayAnova = New TwoWayAnova(DF, DF.IndexOfColumn("Gender"), DF.IndexOfColumn("Grade"), DF.IndexOfColumn("Score"))
      Console.WriteLine(Anova)

      Console.WriteLine()
      Console.WriteLine("Press Enter Key")
      Console.Read()

    End Sub

  End Module

End Namespace


← All NMath Stats Code Examples
Top