Skip to content

Commit

Permalink
mahalanobis returns Union type, code refactor
Browse files Browse the repository at this point in the history
  • Loading branch information
jbytecode committed Sep 30, 2020
1 parent a6c5b96 commit 3b77cab
Show file tree
Hide file tree
Showing 7 changed files with 25 additions and 11 deletions.
2 changes: 1 addition & 1 deletion Project.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name = "LinRegOutliers"
uuid = "6d4de0fb-32d9-4c65-aac1-cc9ed8b94b1a"
authors = ["Mehmet Hakan Satman <[email protected]>"]
version = "0.3.12"
version = "0.3.13"

[deps]
Clustering = "aaaa29a8-35af-508c-8bc3-b662a17a0fe5"
Expand Down
2 changes: 2 additions & 0 deletions src/.dev/outlier.jl
Original file line number Diff line number Diff line change
Expand Up @@ -61,3 +61,5 @@ include("../ransac.jl")

println("ready")

X = DataFrame(calls=phones.calls, year=phones.year)
result = mve(X)
7 changes: 3 additions & 4 deletions src/diagnostics.jl
Original file line number Diff line number Diff line change
Expand Up @@ -319,13 +319,13 @@ Calculate Mahalanobis distances.
Mahalanobis, Prasanta Chandra. "On the generalized distance in statistics."
National Institute of Science of India, 1936.
"""
function mahalabonisSquaredMatrix(data::DataFrame; meanvector=nothing, covmatrix=nothing)::Array{Float64,2}
function mahalabonisSquaredMatrix(data::DataFrame; meanvector=nothing, covmatrix=nothing)::Union{Nothing,Array{Float64,2}}
datamat = convert(Matrix, data)
return mahalabonisSquaredMatrix(datamat, meanvector=meanvector, covmatrix=covmatrix)
end


function mahalabonisSquaredMatrix(datamat::Matrix; meanvector=nothing, covmatrix=nothing)::Array{Float64,2}
function mahalabonisSquaredMatrix(datamat::Matrix; meanvector=nothing, covmatrix=nothing)::Union{Nothing,Array{Float64,2}}
if meanvector === nothing
meanvector = applyColumns(mean, datamat)
end
Expand All @@ -340,8 +340,7 @@ function mahalabonisSquaredMatrix(datamat::Matrix; meanvector=nothing, covmatrix
if det(covmatrix) == 0
@warn "singular covariance matrix, mahalanobis distances can not be calculated"
end
n = size(datamat)[1]
return zeros(Float64, (n, n))
return nothing
end
end

2 changes: 1 addition & 1 deletion src/mve.jl
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ function mve(data::DataFrame; alpha=0.05)
meanvector = applyColumns(mean, data[hsubset, :])
md2mat = mahalabonisSquaredMatrix(data, meanvector=meanvector, covmatrix=covmatrix)
DJ = sqrt(sort(diag(md2mat))[h])
goal = (DJ / c)^p * det(covmatrix)
goal = (DJ / c)^p * det(covmatrix)
catch e
# Possibly singularity
end
Expand Down
9 changes: 6 additions & 3 deletions src/ransac.jl
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
"""
ransac(setting; t, w=0.5, m=0, k=0, d=0, confidence=0.99)
Run the RANSAC (1981) algorithm for the given regression setting
# Arguments
- `setting::RegressionSetting`: RegressionSetting object with a formula and a dataset.
- `t::Float64`: The threshold distance of a sample point to the regression hyperplane to determine if it fits the model well.
Expand All @@ -11,13 +14,13 @@ Run the RANSAC (1981) algorithm for the given regression setting
- `confidence::Float64`: Required to determine the number of optimum iterations if k is not specified.
# Examples
```julia-repl
```julia-repl
julia> df = DataFrame(y=[0,1,2,3,3,4,10], x=[0,1,2,2,3,4,2])
julia> reg = createRegressionSetting(@formula(y ~ x), df)
julia> ransac(reg, t=0.8, w=0.85)
1-element Array{Int64,1}:
7
```
```
# References
Martin A. Fischler & Robert C. Bolles (June 1981). "Random Sample Consensus: A Paradigm for Model Fitting with Applications to Image Analysis and Automated Cartography"
Expand Down Expand Up @@ -54,7 +57,7 @@ function ransac(setting::RegressionSetting; t::Float64, w::Float64=0.5, m::Int=0
ols = lm(setting.formula, setting.data[sampled_indices, :])
betas = coef(ols)

e = abs.(Y - X*betas) ./ norm([1; betas[2:end]], 2)
e = abs.(Y - X * betas) ./ norm([1; betas[2:end]], 2)

iteration_inlier_indices = filter(i -> e[i] < t, 1:n)
inliers_count = length(iteration_inlier_indices)
Expand Down
7 changes: 6 additions & 1 deletion src/satman2013.jl
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,12 @@ function satman2013(setting::RegressionSetting)
end

medians = applyColumns(median, X0)
md2 = diag(mahalabonisSquaredMatrix(X0, meanvector = medians, covmatrix = covmat))
mhs = mahalabonisSquaredMatrix(X0, meanvector=medians, covmatrix=covmat)
if mhs isa Nothing
md2 = zeros(Float64, n)
else
md2 = diag(mhs)
end
md = sqrt.(md2)

sorted_indices = sortperm(md)
Expand Down
7 changes: 6 additions & 1 deletion src/satman2015.jl
Original file line number Diff line number Diff line change
Expand Up @@ -233,7 +233,12 @@ function satman2015(setting::RegressionSetting)

meanvector = applyColumns(mean, X[basic_subset_indices,:])
covmat = cov(X[basic_subset_indices,:])
md2 = diag(mahalabonisSquaredMatrix(X, meanvector = meanvector, covmatrix = covmat))
mhs = mahalabonisSquaredMatrix(X, meanvector = meanvector, covmatrix = covmat)
if mhs isa Nothing
md2 = zeros(Float64, n)
else
md2 = diag(mhs)
end
md = sqrt.(md2)
sorted_indices = sortperm(md)
best_h_indices = sorted_indices[1:h]
Expand Down

0 comments on commit 3b77cab

Please sign in to comment.