← All NMath Code Examples
using System;
using System.Collections.Generic;
using System.IO;
using CenterSpace.NMath.Core;
namespace LogisticRegressionExample
{
/// <summary>
/// A .NET example in C# showing how to perform logistic regression.
/// </summary>
class LogisticRegressionExample
{
static void Main( string[] args )
{
Console.WriteLine();
Console.WriteLine( "Coronary Heart Disease Example -----------------" );
Console.WriteLine( Environment.NewLine );
CoronaryHeartDiseaseAge();
Console.WriteLine( Environment.NewLine );
Console.WriteLine( "Low Birth Weight Example -----------------------" );
Console.WriteLine( Environment.NewLine );
LowBirthWeight();
Console.WriteLine( Environment.NewLine );
Console.WriteLine( "Crime Example -----------------------------------" );
Console.WriteLine( Environment.NewLine );
Crime();
Console.WriteLine( Environment.NewLine );
Console.WriteLine( "Press Enter Key" );
Console.Read();
}
/// <summary>
/// Example relating the presence of coronary heart disease and age. The data consist of subjects
/// age and the whether or not the subject displays evidence of coronary heart disease
/// (1 for present, 0 for not present).
/// </summary>
static void CoronaryHeartDiseaseAge()
{
// The data for this example are stored in a matrix. The first column contains the independent,
// or predictor, variable values. The second column contains the observed outcome values (0 or 1),
// where 1 indicates the presence of coronary heart disease, and 0 denotes its absence.
var chdDataAll = new DoubleMatrix( new StreamReader( new FileStream( "chdage.mat", FileMode.Open ) ) );
// The first column is patient ID. We need last two columns.
var chdData = chdDataAll[Slice.All, new Slice( 1, 2 )];
// A logistic regression can be constructed from data in the following format: a matrix whose
// rows contain the predictor variable values, and a vector of booleans for the observed values.
var obs = new bool[chdData.Rows];
for ( int i = 0; i < chdData.Rows; i++ )
{
obs[i] = chdData[i, 1] != 0.0;
}
DoubleMatrix regMat = chdData[Slice.All, new Slice( 0, 1 )];
// The logistic regression class takes a class parameter indicating the parameter calculation
// algorithm to use. Here we use a Newton-Raphson calculator class, essentially an iteratively
// reweighted least squares. Since we want our model to have an intercept parameter, we set
// the last argument to true.
var logReg = new LogisticRegression<NewtonRaphsonParameterCalc>( regMat, obs, true );
// First we check that parameter calculation is successful. If not, we
// print out some diagnostic information and exit.
if ( !logReg.IsGood )
{
Console.WriteLine( "Logistic regression parameter calculation failed:" );
Console.WriteLine( logReg.ParameterCalculationErrorMessage );
var parameterCalc = logReg.ParameterCalculator;
Console.WriteLine( "Maximum iterations: " + parameterCalc.MaxIterations );
Console.WriteLine( "Number of iterations: " + parameterCalc.Iterations );
Console.WriteLine( "Newton Raphson converged: " + parameterCalc.Converged );
return;
}
// Parameter calculation is successful. The fit analysis class is still
// under construction and will contain more statistics. For now we look
// at the G-statistic.
var fitAnalysis = new LogisticRegressionFitAnalysis<NewtonRaphsonParameterCalc>( logReg );
Console.WriteLine( "Log likelihood: " + fitAnalysis.LogLikelihood.ToString( "G3" ) );
Console.WriteLine( "G-statistic: " + fitAnalysis.GStatistic.ToString( "G3" ) );
Console.WriteLine( "G-statistic P-value: " + fitAnalysis.GStatisticPValue.ToString( "G3" ) );
Console.WriteLine();
// Print out the parameter values and related statistics:
var parameterEstimates = logReg.ParameterEstimates;
Console.WriteLine( "Intercept Parameter:" );
Console.WriteLine( parameterEstimates[0].ToString() );
Console.WriteLine();
Console.WriteLine( "Age Coefficient:" );
Console.WriteLine( parameterEstimates[1].ToString() );
Console.WriteLine();
// Predict the probability of the presence of coronary heart disease for some ages.
var ages = new DoubleMatrix( "5x1 [29.0 37.0 48.0 64.0 78.0]" );
var probabilities = logReg.PredictedProbabilities( ages );
for ( int i = 0; i < ages.Rows; i++ )
{
Console.WriteLine( "The probability of the presence of coronary heart disease at age {0} is {1}",
ages[i, 0], probabilities[i].ToString( "G3" ) );
}
}
/// <summary>
/// Example applying logistic regression to a study of low birth weights. The goal of this study was
/// to identify risk factors associated with giving birth to a low birth weight baby. There are four
/// variables under consideration: Age, Weight of subject, Race, and Number of physician visits during
/// pregnancy.
/// </summary>
static void LowBirthWeight()
{
DataFrame data = DataFrame.Load( "lowbwt.dat", true, true, " ", true );
// Logistic regression provides a convenience method for producing design, or dummy, variables
// using "reference cell coding". If the categorical variable has k levels, there will be k - 1
// design variables created. Reference cell coding involves setting all the design variable
// values to 0 for the reference group, and then setting a single design variable equal to 1 for each of
// the other groups.
// We first create a data frame containing the design variables and their values
// constructed from the Race column of the data. Since the race variable has
// 3 levels there will be two design variables. By default they will be named
// Race_0 and Race_1.
int raceColIndex = data.IndexOfColumn( "Race" );
DataFrame raceDesignVars = LogisticRegression<NewtonRaphsonParameterCalc>.DesignVariables( data[raceColIndex] );
// Next we remove the Race column from our input data and replace it with
// the two design variable columns.
data.RemoveColumn( raceColIndex );
for ( int c = 0; c < raceDesignVars.Cols; c++ )
{
data.InsertColumn( raceColIndex + c, raceDesignVars[c] );
}
// Now convert the data frames data to a matrix of floating point values.
DoubleMatrix matrixDat = data.ToDoubleMatrix();
// The first column of the data is patient ID and the second column of the data contains the
// observed condition of low birth weight. A 1 in the observation column indicates low birth weight
// and a 0 indicated normal birth weight. We want to exclude the first column of patient IDs from the
// regression data.
DoubleMatrix A = matrixDat[Range.All, new Range( 1, Position.End )];
// We now construct the logistic regression. This constructor allows
// you to leave the column of observed values in the data matrix.
// However you must supply the constructor with the index of the
// observation column and a predicate function object for converting
// the numerical values to boolean: true if the condition is present
// and false if it is not. So in constructing the object we pass in
// the matrix containing the independent, or predictor, variable
// values and the observed values. Next we pass in a 0 indicating the
// matrix column at index 0 contains the observed values. Next we pass
// in a lambda expression indicating the nonzero values in the observation
// column indicate the presence of low birth weight. Finally we
// include an intercept parameter as indicated by the final true
// argument.
var lr = new LogisticRegression<NewtonRaphsonParameterCalc>( matrixDat, 0, x => x != 0, true );
// Check to see if parameter calculation succeeded. If not print out diagnostics
// and exit.
Console.WriteLine( "LR good? " + lr.IsGood );
if ( !lr.IsGood )
{
Console.WriteLine( "Logistic regression parameter calculation failed:" );
Console.WriteLine( lr.ParameterCalculationErrorMessage );
var parameterCalc = lr.ParameterCalculator;
Console.WriteLine( "Maximum iterations: " + parameterCalc.MaxIterations );
Console.WriteLine( "Number of iterations: " + parameterCalc.Iterations );
Console.WriteLine( "Newton Raphson converged: " + parameterCalc.Converged );
return;
}
// Parameter calculation succeeded. Print out the model parameter estimates
// and related information.
var parameterEstimates = lr.ParameterEstimates;
for ( int i = 0; i < parameterEstimates.Length; i++ )
{
var estimate = parameterEstimates[i];
if ( i == 0 )
{
Console.WriteLine( "Constant term = {0}, SE = {1}", estimate.Value.ToString( "G3" ),
estimate.StandardError.ToString( "G3" ) );
}
else
{
Console.WriteLine( "Coefficient for {0} = {1}, SE = {2}", data[i].Name, estimate.Value.ToString( "G3" ),
estimate.StandardError.ToString( "G3" ) );
}
}
Console.WriteLine();
// We can look at the parameter covariance matrix.
Console.WriteLine( "Parameter covariance matrix:" );
Console.WriteLine( lr.ParameterCovarianceMatrix.ToTabDelimited( "G8" ) );
Console.WriteLine();
// Finally, print out some fit information.
var fitAnalysis = new LogisticRegressionFitAnalysis<NewtonRaphsonParameterCalc>( lr );
Console.WriteLine( "Log likelihood = " + fitAnalysis.LogLikelihood.ToString( "G3" ) );
Console.WriteLine( "G-statistic = " + fitAnalysis.GStatistic.ToString( "G3" ) );
var pValue = fitAnalysis.GStatisticPValue;
Console.WriteLine( "Pr[X^2({0}) > {1}] = {2}", lr.NumberOfPredictors, fitAnalysis.GStatistic.ToString( "G3" ),
fitAnalysis.GStatisticPValue.ToString( "G3" ) );
// Predict the probability of a 29 year old white women weighing 159 pounds and with
// 5 physician visits during pregnancy.
DoubleVector subject = new DoubleVector( 29.0, 159.0, 0.0, 0.0, 5.0 );
double prob = lr.PredictedProbability( subject );
Console.WriteLine( "Estimated probability of a white woman age {0}, weighing {1} lbs, {2} Dr. visits is {3}",
subject[0], subject[1], subject[4], prob.ToString( "G5" ) );
}
static void Crime()
{
DataFrame crimeData = DataFrame.Load( "crime.dat", true, false, " ", true );
string[] columnNames = new string[] { "CrimeRat", "MaleTeen", "South", "Educ", "Police59" };
int[] columns = new int[columnNames.Length];
for ( int i = 0; i < columnNames.Length; i++ )
{
columns[i] = crimeData.IndexOfColumn( columnNames[i] );
}
Subset s = new Subset( columns );
var data = crimeData.GetColumns( s );
var matrixData = data.ToDoubleMatrix();
var lr = new LogisticRegression<NewtonRaphsonParameterCalc>( matrixData, 0, x => x >= 110.0, true );
Console.WriteLine( "lr is good: " + lr.IsGood );
var paramEst = lr.ParameterEstimates;
for ( int i = 0; i < paramEst.Length; i++ )
{
Console.WriteLine( paramEst[i].ToString() );
}
var fit = new LogisticRegressionFitAnalysis<NewtonRaphsonParameterCalc>( lr );
var pearson = fit.PearsonStatistic();
Console.WriteLine( "Pearson Statistic -" );
Console.WriteLine( Environment.NewLine + "Pearson: " + pearson );
Console.WriteLine();
// Calculate the Hosmer Lemeshow statistic using 10 groups.
Console.WriteLine( "Hosmer Lemeshow Statistic -" );
var hosmerLemeshowStat = fit.HLStatistic( 10 );
Console.WriteLine( hosmerLemeshowStat );
}
}
}
← All NMath Code Examples