VB Logistic Regression Example

← All NMath Stats Code Examples


Imports System
Imports System.Collections.Generic
Imports System.IO

Imports CenterSpace.NMath.Core
Imports CenterSpace.NMath.Stats

Namespace LogisticRegressionExample

  ' A .NET example in Visual Basic showing how to perform logistic regression.
  Module LogisticRegressionExample

    Sub Main()

      Console.WriteLine("Coronary Heart Disease Example -----------------")

      Console.WriteLine("Low Birth Weight Example -----------------------")

      Console.WriteLine("Crime Example -----------------------------------")

      Console.WriteLine("Press Enter Key")

    End Sub

    ' Example relating the presence of coronary heart disease and age. The data consist of subjects'
    ' age and the whether or not the subject displays evidence of coronary heart disease
    ' (1 for present, 0 for not present).
    Private Sub CoronaryHeartDiseaseAge()

      ' The data for this example are stored in a matrix. The first column contains the independent,
      ' or predictor, variable values. The second column contains the observed outcome values (0 or 1),
      ' where 1 indicates the presence of coronary heart disease, and 0 denotes its absence.
      Dim ChdDataAll As New DoubleMatrix(New StreamReader(New FileStream("chdage.mat", FileMode.Open)))
      Dim ChdData As DoubleMatrix = ChdDataAll(Slice.All, New Slice(1, 2))

      If (ChdData = Nothing) Then
        Console.WriteLine("Could not load data for coronary heart disease example. Exiting.")
      End If

      ' A logistic regression can be constructed from data in the following format: a matrix whose
      ' rows contain the predictor variable values, and a vector of booleans for the observed values.
      Dim Obs(ChdData.Rows - 1) As Boolean

      Dim I As Integer
      For I = 0 To ChdData.Rows - 1
        Obs(I) = ChdData(I, 1) <> 0

      Dim RegMat As DoubleMatrix = ChdData(Slice.All, New Slice(0, 1))

      ' The logistic regression Module takes a Module parameter indicating the parameter calculation
      ' algorithm to use. Here we use a Newton-Raphson calculator Module, essentially an iteratively
      ' reweighted least squares. Since we want our model to have an intercept parameter, we set
      ' the last argument to true.
      Dim LogReg As New LogisticRegression(Of NewtonRaphsonParameterCalc)(RegMat, Obs, True)

      ' First we check that parameter calculation is successful. If not, we
      ' print out some diagnostic information and exit.
      If (Not LogReg.IsGood) Then
        Console.WriteLine("Logistic regression parameter calculation failed:")
        Dim ParameterCalc = LogReg.ParameterCalculator
        Console.WriteLine("Maximum iterations: " & ParameterCalc.MaxIterations)
        Console.WriteLine("Number of iterations: " & ParameterCalc.Iterations)
        Console.WriteLine("Newton Raphson converged: " & ParameterCalc.Converged)
      End If

      ' Parameter calculation is successful. The fit analysis Module is still
      ' under construction and will contain more statistics. For now we look
      ' at the G-statistic.
      Dim FitAnalysis As New LogisticRegressionFitAnalysis(Of NewtonRaphsonParameterCalc)(LogReg)
      Console.WriteLine("Log likelihood: " & FitAnalysis.LogLikelihood.ToString("G3"))
      Console.WriteLine("G-statistic: " & FitAnalysis.GStatistic.ToString("G3"))
      Console.WriteLine("G-statistic P-value: " & FitAnalysis.GStatisticPValue.ToString("G3"))

      ' Print out the parameter values and related statistics:
      Dim ParameterEstimates() As LogisticRegressionParameter(Of NewtonRaphsonParameterCalc) = LogReg.ParameterEstimates
      Console.WriteLine("Intercept Parameter:")
      Console.WriteLine("Age Coefficient:")

      ' Predict the probability of the presence of coronary heart disease for some ages.
      Dim Ages As New DoubleMatrix("5x1 [29.0 37.0 48.0 64.0 78.0]")
      Dim Probabilities As DoubleVector = LogReg.PredictedProbabilities(Ages)
      For I = 0 To Ages.Rows - 1
        Console.WriteLine("The probability of the presence of coronary heart disease at age {0} is {1}",
          Ages(I, 0), Probabilities(I).ToString("G3"))

    End Sub

    ' Example applying logistic regression to a study of low birth weights. The goal of this study was
    ' to identify risk factors associated with giving birth to a low birth weight baby. There are four
    ' variables under consideration: Age, Weight of subject, Race, and Number of physician visits during
    ' pregnancy.
    Private Sub LowBirthWeight()

      Dim Data As DataFrame = DataFrame.Load("lowbwt.dat", True, False, " ", True)

      ' Logistic regression provides a convenience method for producing design, or dummy, variables
      ' Imports "reference cell coding". If the categorical variable has k levels, there will be k - 1
      ' design variables created. Reference cell coding involves setting all the design variable
      ' values to 0 for the reference group, and then setting a single design variable equal to 1 for each of
      ' the other groups.

      ' We first create a data frame containing the design variables and their values 
      ' constructed from the Race column of the data. Since the race variable has
      ' 3 levels there will be two design variables. By default they will be named
      ' Race_0 and Race_1.
      Dim RaceColIndex As Integer = Data.IndexOfColumn("Race")
      Dim RaceDesignVars As DataFrame = LogisticRegression(Of NewtonRaphsonParameterCalc).DesignVariables(Data(RaceColIndex))

      ' Next we remove the Race column from our input data and replace it with 
      ' the two design variable columns.
      Dim C As Integer
      For C = 0 To RaceDesignVars.Cols - 1
        Data.InsertColumn(RaceColIndex + C, RaceDesignVars(C))

      ' Now convert the data frame's data to a matrix of floating point values.
      Dim MatrixDat As DoubleMatrix = Data.ToDoubleMatrix()

      ' The first column of the data is patient ID and the second column of the data contains the
      ' observed condition of low birth weight. A 1 in the observation column indicates low birth weight
      ' and a 0 indicated normal birth weight. We want to exclude the first column of patient ID's from the
      ' regression data.
      Dim A As DoubleMatrix = MatrixDat(Range.All, New Range(1, Position.End))

      ' We now construct the logistic regression. This constructor allows
      ' you to leave the column of observed values in the data matrix. 
      ' However you must supply the constructor with the index of the 
      ' observation column and a predicate function object for converting
      ' the numerical values to boolean: true if the condition is present
      ' and false if it is not. So in constructing the object we pass in
      ' the matrix containing the independent, or predictor, variable 
      ' values and the observed values. Next we pass in a 0 indicating the
      ' matrix column at index 0 contains the observed values. Next we pass
      ' in a lambda expression indicating the nonzero values in the observation
      ' column indicate the presence of low birth weight. Finally we 
      ' include an intercept parameter as indicated by the final true 
      ' argument.
      Dim ObservationPredicate = Function(x)
                                   Return x <> 0
                                 End Function
      Dim LR As New LogisticRegression(Of NewtonRaphsonParameterCalc)(MatrixDat, 0, ObservationPredicate, True)

      ' Check to see if parameter calculation succeeded. If not print out diagnostics
      ' and exit.
      Console.WriteLine("LR good? " & LR.IsGood)
      If (Not LR.IsGood) Then
        Console.WriteLine("Logistic regression parameter calculation failed:")
        Dim ParameterCalc = LR.ParameterCalculator
        Console.WriteLine("Maximum iterations: " & ParameterCalc.MaxIterations)
        Console.WriteLine("Number of iterations: " & ParameterCalc.Iterations)
        Console.WriteLine("Newton Raphson converged: " & ParameterCalc.Converged)
      End If

      ' Parameter calculation succeeded. Print out the model parameter estimates
      ' and related information.
      Dim parameterEstimates = LR.ParameterEstimates
      For I = 0 To parameterEstimates.Length - 1
        Dim estimate = parameterEstimates(I)
        If (I = 0) Then
          Console.WriteLine("Constant term = {0}, SE = {1}", Math.Round(estimate.Value, 3),
          Console.WriteLine("Coefficient for {0} = {1}, SE = {2}", Data(I).Name, Math.Round(estimate.Value, 3),
        End If


      ' We can look at the parameter covariance matrix.
      Console.WriteLine("Parameter covariance matrix:")
      Console.WriteLine(NMathFunctions.Round(LR.ParameterCovarianceMatrix, 3).ToTabDelimited())

      ' Finally, print out some fit information.
      Dim FitAnalysis = New LogisticRegressionFitAnalysis(Of NewtonRaphsonParameterCalc)(LR)
      Console.WriteLine("Log likelihood = " & FitAnalysis.LogLikelihood.ToString("G3"))
      Console.WriteLine("G-statistic = " & FitAnalysis.GStatistic.ToString("G3"))
      Dim PValue = FitAnalysis.GStatisticPValue
      Console.WriteLine("Pr[X^2({0}) > {1}] = {2}", LR.NumberOfPredictors, FitAnalysis.GStatistic,

      ' Predict the probability of a 29 year old white women weighing 159 pounds and with
      ' 5 physician visits during pregnancy.
      Dim Subject As New DoubleVector(29.0, 159.0, 0.0, 0.0, 5.0)
      Dim Prob As Double = LR.PredictedProbability(Subject)
      Console.WriteLine("Estimated probability of a white woman age {0}, weighing {1} lbs, {2} Dr. visits is {3}",
        Subject(0), Subject(1), Subject(4), Prob.ToString("G5"))
    End Sub

    Private Sub Crime()

      Dim CrimeData = DataFrame.Load("crime.dat", True, False, " ", True)

      Dim ColumnNames() As String = {"CrimeRat", "MaleTeen", "South", "Educ", "Police59"}
      Dim Columns(ColumnNames.Length - 1) As Integer
      Dim I As Integer
      For I = 0 To ColumnNames.Length - 1
        Columns(I) = CrimeData.IndexOfColumn(ColumnNames(I))

      Dim S As New Subset(Columns)
      Dim Data = CrimeData.GetColumns(S)
      Dim MatrixData = Data.ToDoubleMatrix()
      Dim ObservationPredicate = Function(x)
                                   Return x >= 110.0
                                 End Function
      Dim LR As New LogisticRegression(Of NewtonRaphsonParameterCalc)(MatrixData, 0, ObservationPredicate, True)
      Console.WriteLine("lr is good: " & LR.IsGood)
      Dim ParamEst() As LogisticRegressionParameter(Of NewtonRaphsonParameterCalc) = LR.ParameterEstimates
      For I = 0 To ParamEst.Length - 1

      Dim Fit As New LogisticRegressionFitAnalysis(Of NewtonRaphsonParameterCalc)(LR)
      Dim Pearson = Fit.PearsonStatistic()
      Console.WriteLine("Pearson Statistic -")
      Console.WriteLine(Environment.NewLine & "Pearson: " & Pearson.ToString())

      ' Calculate the Hosmer Lemeshow statistic Imports 10 groups.
      Console.WriteLine("Hosmer Lemeshow Statistic -")
      Dim hosmerLemeshowStat = Fit.HLStatistic(10)
    End Sub

  End Module

End Namespace

← All NMath Stats Code Examples