# VB Logistic Regression Example

← All NMath Stats Code Examples

```ï»¿Imports System
Imports System.Collections.Generic
Imports System.IO

Imports CenterSpace.NMath.Core
Imports CenterSpace.NMath.Stats

Namespace LogisticRegressionExample

' A .NET example in Visual Basic showing how to perform logistic regression.
Module LogisticRegressionExample

Sub Main()

Console.WriteLine("Coronary Heart Disease Example -----------------")
Console.WriteLine(Environment.NewLine)
CoronaryHeartDiseaseAge()
Console.WriteLine(Environment.NewLine)

Console.WriteLine("Low Birth Weight Example -----------------------")
Console.WriteLine(Environment.NewLine)
LowBirthWeight()
Console.WriteLine(Environment.NewLine)

Console.WriteLine("Crime Example -----------------------------------")
Console.WriteLine(Environment.NewLine)
Crime()
Console.WriteLine(Environment.NewLine)

Console.WriteLine("Press Enter Key")

End Sub

' Example relating the presence of coronary heart disease and age. The data consist of subjects'
' age and the whether or not the subject displays evidence of coronary heart disease
' (1 for present, 0 for not present).
Private Sub CoronaryHeartDiseaseAge()

' The data for this example are stored in a matrix. The first column contains the independent,
' or predictor, variable values. The second column contains the observed outcome values (0 or 1),
' where 1 indicates the presence of coronary heart disease, and 0 denotes its absence.
Dim ChdDataAll As New DoubleMatrix(New StreamReader(New FileStream("chdage.mat", FileMode.Open)))
Dim ChdData As DoubleMatrix = ChdDataAll(Slice.All, New Slice(1, 2))

If (ChdData = Nothing) Then
Console.WriteLine("Could not load data for coronary heart disease example. Exiting.")
Return
End If

' A logistic regression can be constructed from data in the following format: a matrix whose
' rows contain the predictor variable values, and a vector of booleans for the observed values.
Dim Obs(ChdData.Rows - 1) As Boolean

Dim I As Integer
For I = 0 To ChdData.Rows - 1
Obs(I) = ChdData(I, 1) <> 0
Next

Dim RegMat As DoubleMatrix = ChdData(Slice.All, New Slice(0, 1))

' The logistic regression Module takes a Module parameter indicating the parameter calculation
' algorithm to use. Here we use a Newton-Raphson calculator Module, essentially an iteratively
' reweighted least squares. Since we want our model to have an intercept parameter, we set
' the last argument to true.
Dim LogReg As New LogisticRegression(Of NewtonRaphsonParameterCalc)(RegMat, Obs, True)

' First we check that parameter calculation is successful. If not, we
' print out some diagnostic information and exit.
If (Not LogReg.IsGood) Then
Console.WriteLine("Logistic regression parameter calculation failed:")
Console.WriteLine(LogReg.ParameterCalculationErrorMessage)
Dim ParameterCalc = LogReg.ParameterCalculator
Console.WriteLine("Maximum iterations: " & ParameterCalc.MaxIterations)
Console.WriteLine("Number of iterations: " & ParameterCalc.Iterations)
Console.WriteLine("Newton Raphson converged: " & ParameterCalc.Converged)
Return
End If

' Parameter calculation is successful. The fit analysis Module is still
' under construction and will contain more statistics. For now we look
' at the G-statistic.
Dim FitAnalysis As New LogisticRegressionFitAnalysis(Of NewtonRaphsonParameterCalc)(LogReg)
Console.WriteLine("Log likelihood: " & FitAnalysis.LogLikelihood.ToString("G3"))
Console.WriteLine("G-statistic: " & FitAnalysis.GStatistic.ToString("G3"))
Console.WriteLine("G-statistic P-value: " & FitAnalysis.GStatisticPValue.ToString("G3"))
Console.WriteLine()

' Print out the parameter values and related statistics:
Dim ParameterEstimates() As LogisticRegressionParameter(Of NewtonRaphsonParameterCalc) = LogReg.ParameterEstimates
Console.WriteLine("Intercept Parameter:")
Console.WriteLine(ParameterEstimates(0).ToString())
Console.WriteLine()
Console.WriteLine("Age Coefficient:")
Console.WriteLine(ParameterEstimates(1).ToString())
Console.WriteLine()

' Predict the probability of the presence of coronary heart disease for some ages.
Dim Ages As New DoubleMatrix("5x1 [29.0 37.0 48.0 64.0 78.0]")
Dim Probabilities As DoubleVector = LogReg.PredictedProbabilities(Ages)
For I = 0 To Ages.Rows - 1
Console.WriteLine("The probability of the presence of coronary heart disease at age {0} is {1}",
Ages(I, 0), Probabilities(I).ToString("G3"))
Next

End Sub

' Example applying logistic regression to a study of low birth weights. The goal of this study was
' to identify risk factors associated with giving birth to a low birth weight baby. There are four
' variables under consideration: Age, Weight of subject, Race, and Number of physician visits during
' pregnancy.
Private Sub LowBirthWeight()

Dim Data As DataFrame = DataFrame.Load("lowbwt.dat", True, False, " ", True)

' Logistic regression provides a convenience method for producing design, or dummy, variables
' Imports "reference cell coding". If the categorical variable has k levels, there will be k - 1
' design variables created. Reference cell coding involves setting all the design variable
' values to 0 for the reference group, and then setting a single design variable equal to 1 for each of
' the other groups.

' We first create a data frame containing the design variables and their values
' constructed from the Race column of the data. Since the race variable has
' 3 levels there will be two design variables. By default they will be named
' Race_0 and Race_1.
Dim RaceColIndex As Integer = Data.IndexOfColumn("Race")
Dim RaceDesignVars As DataFrame = LogisticRegression(Of NewtonRaphsonParameterCalc).DesignVariables(Data(RaceColIndex))

' Next we remove the Race column from our input data and replace it with
' the two design variable columns.
Data.RemoveColumn(RaceColIndex)
Dim C As Integer
For C = 0 To RaceDesignVars.Cols - 1
Data.InsertColumn(RaceColIndex + C, RaceDesignVars(C))
Next

' Now convert the data frame's data to a matrix of floating point values.
Dim MatrixDat As DoubleMatrix = Data.ToDoubleMatrix()

' The first column of the data is patient ID and the second column of the data contains the
' observed condition of low birth weight. A 1 in the observation column indicates low birth weight
' and a 0 indicated normal birth weight. We want to exclude the first column of patient ID's from the
' regression data.
Dim A As DoubleMatrix = MatrixDat(Range.All, New Range(1, Position.End))

' We now construct the logistic regression. This constructor allows
' you to leave the column of observed values in the data matrix.
' However you must supply the constructor with the index of the
' observation column and a predicate function object for converting
' the numerical values to boolean: true if the condition is present
' and false if it is not. So in constructing the object we pass in
' the matrix containing the independent, or predictor, variable
' values and the observed values. Next we pass in a 0 indicating the
' matrix column at index 0 contains the observed values. Next we pass
' in a lambda expression indicating the nonzero values in the observation
' column indicate the presence of low birth weight. Finally we
' include an intercept parameter as indicated by the final true
' argument.
Dim ObservationPredicate = Function(x)
Return x <> 0
End Function
Dim LR As New LogisticRegression(Of NewtonRaphsonParameterCalc)(MatrixDat, 0, ObservationPredicate, True)

' Check to see if parameter calculation succeeded. If not print out diagnostics
' and exit.
Console.WriteLine("LR good? " & LR.IsGood)
If (Not LR.IsGood) Then
Console.WriteLine("Logistic regression parameter calculation failed:")
Console.WriteLine(LR.ParameterCalculationErrorMessage)
Dim ParameterCalc = LR.ParameterCalculator
Console.WriteLine("Maximum iterations: " & ParameterCalc.MaxIterations)
Console.WriteLine("Number of iterations: " & ParameterCalc.Iterations)
Console.WriteLine("Newton Raphson converged: " & ParameterCalc.Converged)
Return
End If

' Parameter calculation succeeded. Print out the model parameter estimates
' and related information.
Dim parameterEstimates = LR.ParameterEstimates
For I = 0 To parameterEstimates.Length - 1
Dim estimate = parameterEstimates(I)
If (I = 0) Then
Console.WriteLine("Constant term = {0}, SE = {1}", Math.Round(estimate.Value, 3),
estimate.StandardError.ToString("G3"))
Else
Console.WriteLine("Coefficient for {0} = {1}, SE = {2}", Data(I).Name, Math.Round(estimate.Value, 3),
estimate.StandardError.ToString("G3"))
End If
Next

Console.WriteLine()

' We can look at the parameter covariance matrix.
Console.WriteLine("Parameter covariance matrix:")
Console.WriteLine(NMathFunctions.Round(LR.ParameterCovarianceMatrix, 3).ToTabDelimited())
Console.WriteLine()

' Finally, print out some fit information.
Dim FitAnalysis = New LogisticRegressionFitAnalysis(Of NewtonRaphsonParameterCalc)(LR)
Console.WriteLine("Log likelihood = " & FitAnalysis.LogLikelihood.ToString("G3"))
Console.WriteLine("G-statistic = " & FitAnalysis.GStatistic.ToString("G3"))
Dim PValue = FitAnalysis.GStatisticPValue
Console.WriteLine("Pr[X^2({0}) > {1}] = {2}", LR.NumberOfPredictors, FitAnalysis.GStatistic,
FitAnalysis.GStatisticPValue)

' Predict the probability of a 29 year old white women weighing 159 pounds and with
' 5 physician visits during pregnancy.
Dim Subject As New DoubleVector(29.0, 159.0, 0.0, 0.0, 5.0)
Dim Prob As Double = LR.PredictedProbability(Subject)
Console.WriteLine("Estimated probability of a white woman age {0}, weighing {1} lbs, {2} Dr. visits is {3}",
Subject(0), Subject(1), Subject(4), Prob.ToString("G5"))
End Sub

Private Sub Crime()

Dim CrimeData = DataFrame.Load("crime.dat", True, False, " ", True)

Dim ColumnNames() As String = {"CrimeRat", "MaleTeen", "South", "Educ", "Police59"}
Dim Columns(ColumnNames.Length - 1) As Integer
Dim I As Integer
For I = 0 To ColumnNames.Length - 1
Columns(I) = CrimeData.IndexOfColumn(ColumnNames(I))
Next

Dim S As New Subset(Columns)
Dim Data = CrimeData.GetColumns(S)
Dim MatrixData = Data.ToDoubleMatrix()
Dim ObservationPredicate = Function(x)
Return x >= 110.0
End Function
Dim LR As New LogisticRegression(Of NewtonRaphsonParameterCalc)(MatrixData, 0, ObservationPredicate, True)
Console.WriteLine("lr is good: " & LR.IsGood)
Dim ParamEst() As LogisticRegressionParameter(Of NewtonRaphsonParameterCalc) = LR.ParameterEstimates
For I = 0 To ParamEst.Length - 1
Console.WriteLine(ParamEst(I).ToString())
Next

Dim Fit As New LogisticRegressionFitAnalysis(Of NewtonRaphsonParameterCalc)(LR)
Dim Pearson = Fit.PearsonStatistic()
Console.WriteLine("Pearson Statistic -")
Console.WriteLine(Environment.NewLine & "Pearson: " & Pearson.ToString())
Console.WriteLine()

' Calculate the Hosmer Lemeshow statistic Imports 10 groups.
Console.WriteLine("Hosmer Lemeshow Statistic -")
Dim hosmerLemeshowStat = Fit.HLStatistic(10)
Console.WriteLine(hosmerLemeshowStat)
End Sub

End Module

End Namespace

```
← All NMath Stats Code Examples
Top