A customer contacted us about computing “variance inflation factors”.
Wikipedia defines this as:
In statistics, the variance inflation factor (VIF) is a method of detecting the severity of multicollinearity. More precisely, the VIF is an index which measures how much the variance of a coefficient (square of the standard deviation) is increased because of collinearity. [Ref]
Here’s an implementation using CenterSpace’s NMath and NMath Stats libraries.
// Returns all the variance inflation factors
private static DoubleVector Vif( LinearRegression lr )
{
// iterate through predictors and find variance
// inflation factor for each
DoubleVector factors =
new DoubleVector( lr.NumberOfPredictors );
for (int i = 0; i < lr.NumberOfPredictors; i++)
{
factors[i] = Vif( lr, i );
}
return factors;
}
// Returns a single variance inflation factor
private static double Vif( LinearRegression lr, int i )
{
// remove predictor, change observation
LinearRegression lr2 = (LinearRegression)lr.Clone();
lr2.RemovePredictor( i );
lr2.SetRegressionData( lr2.PredictorMatrix,
lr.PredictorMatrix.Col( i ), true );
// calculate variance inflation factor
LinearRegressionAnova anova =
new LinearRegressionAnova( lr2 );
// return factor
return 1.0 / (1.0 - anova.RSquared);
}
And here’s an example using these functions:
DoubleMatrix independent = new DoubleMatrix(
"30x3[0.270 78 41 0.282 79 56 0.277 81 63 " +
"0.280 80 68 0.272 76 69 0.262 78 65 " +
"0.275 82 61 0.267 79 47 0.265 76 32 " +
"0.277 79 24 0.282 82 28 0.270 85 26 " +
"0.272 86 32 0.287 83 40 0.277 84 55 " +
"0.287 82 63 0.280 80 72 0.277 78 72 " +
"0.277 84 67 0.277 86 60 0.292 85 44 " +
"0.287 87 40 0.277 94 32 0.285 92 27 " +
"0.282 95 28 0.265 96 33 0.265 94 41 " +
"0.265 96 52 0.268 91 64 0.260 90 71]" );
DoubleVector dependent =
new DoubleVector( "0.386 0.374 0.393 0.425 " +
"0.406 0.344 0.327 0.288 0.269 0.256 0.286 " +
"0.298 0.329 0.318 0.381 0.381 0.470 0.443 " +
"0.386 0.342 0.319 0.307 0.284 0.326 0.309 " +
"0.359 0.376 0.416 0.437 0.548" );
LinearRegression regression =
new LinearRegression( independent, dependent, true );
Console.WriteLine( "Is good? " + regression.IsGood );
LinearRegressionAnova anova =
new LinearRegressionAnova( regression );
Console.WriteLine( "variance: " + regression.Variance );
Console.WriteLine( "r-squared: " + anova.RSquared );
DoubleVector vif = Vif( regression );
Console.WriteLine( "variance inflation factors: " + vif );
-Trevor
Note: This functionality is now in NMath Stats.