Skip to content

Commit

Permalink
implement dfbeta diagnostic
Browse files Browse the repository at this point in the history
  • Loading branch information
jbytecode committed Oct 14, 2020
1 parent ae9ae5d commit c066c68
Show file tree
Hide file tree
Showing 6 changed files with 88 additions and 2 deletions.
2 changes: 1 addition & 1 deletion Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ authors = [
]
name = "LinRegOutliers"
uuid = "6d4de0fb-32d9-4c65-aac1-cc9ed8b94b1a"
version = "0.4.1"
version = "0.4.2"

[deps]
Clustering = "aaaa29a8-35af-508c-8bc3-b662a17a0fe5"
Expand Down
1 change: 1 addition & 0 deletions src/.dev/outlier.jl
Original file line number Diff line number Diff line change
Expand Up @@ -62,3 +62,4 @@ include("../ransac.jl")


println("ready")

25 changes: 25 additions & 0 deletions src/.dev/test.jl
Original file line number Diff line number Diff line change
Expand Up @@ -570,4 +570,29 @@ end
calculated = covratio(setting, i)
@test abs(knownvals[i] - calculated) < eps
end
end

@testset "dfbeta - phone data" begin
eps = 0.00001
reg = createRegressionSetting(@formula(calls ~ year), phones)
n, p = size(phones)
knownvalues = [9.6439157 -0.14686166; 5.3459460 -0.08092134;
1.6258961 -0.02443345; -0.6866294 0.01022725;
-2.7169197 0.04002009; -4.1910124 0.06085238;
-5.1029254 0.07267870; -5.5535000 0.07697356;
-5.2512176 0.06983887; -4.6721589 0.05791933;
-3.6868718 0.03945523; -2.3254391 0.01478033;
-0.5673087 -0.01652355; 1.4653937 -0.04958099;
-5.4474441 0.12867978; -8.6540162 0.18101030;
-14.6631749 0.28835085; -22.0168113 0.41708081;
-32.9443226 0.60863505; -49.0851269 0.89065754;
22.9820710 -0.41140246; 39.1639294 -0.69370540;
45.7655562 -0.80379984; 53.6862082 -0.93638735;]
lenres = length(result)
for i in 1:n
for j in 1:p
dfbetaresult = dfbeta(reg, i)
@test abs(dfbetaresult[j] - knownvalues[i,j]) < eps
end
end
end
2 changes: 1 addition & 1 deletion src/LinRegOutliers.jl
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ export find_minimum_nonzero
export phones, hbk, stackloss, weightloss, hs93randomdata, woodgravity

# Diagnostics
export dffit
export dffit, dfbeta
export hatmatrix
export studentizedResiduals
export adjustedResiduals
Expand Down
34 changes: 34 additions & 0 deletions src/basis.jl
Original file line number Diff line number Diff line change
Expand Up @@ -295,4 +295,38 @@ function covratio(X::Array{Float64, 2}, y::Array{Float64, 1}, omittedIndex::Int)
covrat = det(s2omitted * xxinvomitted) / det(s2 * xxinv)

return covrat
end



"""
dfbeta(setting, omittedIndex)
Apply DFBETA diagnostic for a given regression setting and observation index.
# Arguments
- `setting::RegressionSetting`: A regression setting object.
- `omittedIndex::Int`: Index of the omitted observation.
# Example
```julia-repl
julia> setting = createRegressionSetting(@formula(calls ~ year), phones);
julia> dfbeta(setting, 1)
2-element Array{Float64,1}:
9.643915678524024
-0.14686166007904422
```
"""
function dfbeta(setting::RegressionSetting, omittedIndex::Int)::Array{Float64,1}
X = designMatrix(setting)
y = responseVector(setting)
return dfbeta(X, y, omittedIndex)
end

function dfbeta(X::Array{Float64,2}, y::Array{Float64,1}, omittedIndex::Int)::Array{Float64,1}
n = length(y)
omittedindices = filter(x -> x != omittedIndex, 1:n)
regfull = ols(X, y)
regomitted = ols(X[omittedindices, :], y[omittedindices])
return coef(regfull) .- coef(regomitted)
end
26 changes: 26 additions & 0 deletions test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -572,3 +572,29 @@ end
@test abs(knownvals[i] - calculated) < eps
end
end



@testset "dfbeta - phone data" begin
eps = 0.00001
reg = createRegressionSetting(@formula(calls ~ year), phones)
n, p = size(phones)
knownvalues = [9.6439157 -0.14686166; 5.3459460 -0.08092134;
1.6258961 -0.02443345; -0.6866294 0.01022725;
-2.7169197 0.04002009; -4.1910124 0.06085238;
-5.1029254 0.07267870; -5.5535000 0.07697356;
-5.2512176 0.06983887; -4.6721589 0.05791933;
-3.6868718 0.03945523; -2.3254391 0.01478033;
-0.5673087 -0.01652355; 1.4653937 -0.04958099;
-5.4474441 0.12867978; -8.6540162 0.18101030;
-14.6631749 0.28835085; -22.0168113 0.41708081;
-32.9443226 0.60863505; -49.0851269 0.89065754;
22.9820710 -0.41140246; 39.1639294 -0.69370540;
45.7655562 -0.80379984; 53.6862082 -0.93638735;]
for i in 1:n
for j in 1:p
dfbetaresult = dfbeta(reg, i)
@test abs(dfbetaresult[j] - knownvalues[i,j]) < eps
end
end
end

0 comments on commit c066c68

Please sign in to comment.