Skip to content

Commit

Permalink
Merge pull request #13 from invenia/rf/julia-1.0
Browse files Browse the repository at this point in the history
Julia 1.0 updates
  • Loading branch information
rofinn authored Dec 12, 2018
2 parents 13b5cd0 + d644a9d commit 73e78d1
Show file tree
Hide file tree
Showing 14 changed files with 94 additions and 85 deletions.
32 changes: 17 additions & 15 deletions .appveyor.yml
Original file line number Diff line number Diff line change
@@ -1,29 +1,31 @@
environment:
matrix:
- JULIAVERSION: "julialang/bin/winnt/x86/0.5/julia-0.5-latest-win32.exe"
- JULIAVERSION: "julialang/bin/winnt/x64/0.5/julia-0.5-latest-win64.exe"
- JULIAVERSION: "julianightlies/bin/winnt/x86/julia-latest-win32.exe"
- JULIAVERSION: "julianightlies/bin/winnt/x64/julia-latest-win64.exe"
- julia_version: 0.7
- julia_version: 1.0
- julia_version: nightly

platform:
- x86 # 32-bit
- x64 # 64-bit

branches:
only:
- master
- /release-.*/

notifications:
- provider: Email
on_build_success: false
on_build_failure: false
on_build_status_changed: false

install:
# Download most recent Julia Windows binary
- ps: (new-object net.webclient).DownloadFile(
$("http://s3.amazonaws.com/"+$env:JULIAVERSION),
"C:\projects\julia-binary.exe")
# Run installer silently, output to C:\projects\julia
- C:\projects\julia-binary.exe /S /D=C:\projects\julia
- ps: iex ((new-object net.webclient).DownloadString("https://raw.githubusercontent.com/JuliaCI/Appveyor.jl/version-1/bin/install.ps1"))

build_script:
# Need to convert from shallow to complete for Pkg.clone to work
- IF EXIST .git\shallow (git fetch --unshallow)
- C:\projects\julia\bin\julia -e "versioninfo();
Pkg.clone(pwd(), \"Impute\"); Pkg.build(\"Impute\")"
- echo "%JL_BUILD_SCRIPT%"
- C:\julia\bin\julia -e "%JL_BUILD_SCRIPT%"

test_script:
- C:\projects\julia\bin\julia -e "Pkg.test(\"Impute\")"
- echo "%JL_TEST_SCRIPT%"
- C:\julia\bin\julia -e "%JL_TEST_SCRIPT%"
23 changes: 17 additions & 6 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@ os:
- linux
- osx
julia:
- release
- 0.7
- 1.0
- nightly
notifications:
email: false
Expand All @@ -17,8 +18,18 @@ matrix:
# - if [[ -a .git/shallow ]]; then git fetch --unshallow; fi
# - julia -e 'Pkg.clone(pwd()); Pkg.build("Impute"); Pkg.test("Impute"; coverage=true)'
after_success:
# push coverage results to CodeCov
- julia -e 'cd(Pkg.dir("Impute")); Pkg.add("Coverage"); using Coverage; Codecov.submit(Codecov.process_folder())'
# build documentation
- julia -e 'Pkg.add("Documenter")'
- julia -e 'cd(Pkg.dir("Impute")); include(joinpath("docs", "make.jl"))'
- |
julia -e '
VERSION >= v"0.7.0-DEV.3656" && using Pkg
VERSION >= v"0.7.0-DEV.5183" || cd(Pkg.dir("Impute"))
Pkg.add("Coverage")
using Coverage
Codecov.submit(Codecov.process_folder())
'
- |
julia -e '
VERSION >= v"0.7.0-DEV.3656" && using Pkg
VERSION >= v"0.7.0-DEV.5183" || cd(Pkg.dir("Impute"))
Pkg.add("Documenter")
include(joinpath("docs", "make.jl"))
'
5 changes: 2 additions & 3 deletions REQUIRE
Original file line number Diff line number Diff line change
@@ -1,3 +1,2 @@
julia 0.5
DataFrames 0.11.0
Missings 0.2.3
julia 0.7
DataFrames 0.15
7 changes: 3 additions & 4 deletions src/Impute.jl
Original file line number Diff line number Diff line change
@@ -1,13 +1,12 @@
module Impute

using Missings
using DataFrames
using Statistics

import Missings: ismissing
import DataFrames: DataFrameRow
import Base.Iterators

export impute, impute!, chain, chain!, drop!, interp, interp!, ImputeError
export impute, impute!, chain, chain!, drop, drop!, interp, interp!, ImputeError

const Dataset = Union{AbstractArray, DataFrame}

Expand All @@ -19,7 +18,7 @@ Is thrown by `impute` methods when the limit of imputable values has been exceed
# Fields
* msg::T - the message to print.
"""
immutable ImputeError{T} <: Exception
struct ImputeError{T} <: Exception
msg::T
end

Expand Down
22 changes: 11 additions & 11 deletions src/context.jl
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ Stores common summary information for all Imputor types.
* `limit::Float64`: allowable limit for missing values to impute
* `missing::Function`: returns a Bool if the value counts as missing.
"""
type Context
mutable struct Context
num::Int
count::Int
limit::Float64
Expand All @@ -32,7 +32,7 @@ exceeds our `ctx.limit` we throw an `ImputeError`
* `ctx::Context`: the contextual information about missing information.
* `x`: the value to check (may be an single values, abstract array or row)
"""
function ismissing(ctx::Context, x)
function Base.ismissing(ctx::Context, x)
missing = if isa(x, DataFrameRow)
any(entry -> ctx.missing(entry[2]), x)
elseif isa(x, AbstractArray)
Expand All @@ -57,49 +57,49 @@ function ismissing(ctx::Context, x)
end

"""
findfirst{T<:Any}(ctx::Context, data::AbstractArray{T, 1}) -> Int
findfirst(ctx::Context, data::AbstractVector) -> Int
Returns the first not missing index in `data`.
# Arguments
* `ctx::Context`: the context to pass into `ismissing`
* `data::AbstractArray{T, 1}`: the data array to search
* `data::AbstractVector`: the data array to search
# Returns
* `Int`: the first index in `data` that isn't missing
"""
function Base.findfirst{T<:Any}(ctx::Context, data::AbstractArray{T, 1})
function Base.findfirst(ctx::Context, data::AbstractVector)
return findfirst(x -> !ismissing(ctx, x), data)
end

"""
findlast{T<:Any}(ctx::Context, data::AbstractArray{T, 1}) -> Int
findlast(ctx::Context, data::AbstractVector) -> Int
Returns the last not missing index in `data`.
# Arguments
* `ctx::Context`: the context to pass into `ismissing`
* `data::AbstractArray{T, 1}`: the data array to search
* `data::AbstractVector`: the data array to search
# Returns
* `Int`: the last index in `data` that isn't missing
"""
function Base.findlast{T<:Any}(ctx::Context, data::AbstractArray{T, 1})
function Base.findlast(ctx::Context, data::AbstractVector)
return findlast(x -> !ismissing(ctx, x), data)
end

"""
findnext{T<:Any}(ctx::Context, data::AbstractArray{T, 1}) -> Int
findnext(ctx::Context, data::AbstractVector) -> Int
Returns the next not missing index in `data`.
# Arguments
* `ctx::Context`: the context to pass into `ismissing`
* `data::AbstractArray{T, 1}`: the data array to search
* `data::AbstractVector`: the data array to search
# Returns
* `Int`: the next index in `data` that isn't missing
"""
function Base.findnext{T<:Any}(ctx::Context, data::AbstractArray{T, 1}, idx::Int)
function Base.findnext(ctx::Context, data::AbstractVector, idx::Int)
return findnext(x -> !ismissing(ctx, x), data, idx)
end
13 changes: 6 additions & 7 deletions src/imputors.jl
Original file line number Diff line number Diff line change
Expand Up @@ -32,28 +32,28 @@ function impute!(imp::Imputor, data::Dataset, limit::Float64=0.1)
end

"""
impute!{T<:Any}(imp::Imputor, ctx::Context, data::AbstractArray{T, 2})
impute!(imp::Imputor, ctx::Context, data::AbstractMatrix)
Imputes the data in a matrix by imputing the values 1 column at a time;
if this is not the desired behaviour custom imputor methods should overload this method.
# Arguments
* `imp::Imputor`: the Imputor method to use
* `ctx::Context`: the contextual information for missing data
* `data::AbstractArray{T, 2}`: the data to impute
* `data::AbstractMatrix`: the data to impute
# Returns
* `AbstractArray{T, 2}`: the input `data` with values imputed
* `AbstractMatrix`: the input `data` with values imputed
"""
function impute!{T<:Any}(imp::Imputor, ctx::Context, data::AbstractArray{T, 2})
function impute!(imp::Imputor, ctx::Context, data::AbstractMatrix)
for i in 1:size(data, 2)
impute!(imp, ctx, view(data, :, i))
end
return data
end

"""
impute!{T<:Any}(imp::Imputor, ctx::Context, data::DataFrame)
impute!(imp::Imputor, ctx::Context, data::DataFrame)
Imputes the data in a DataFrame by imputing the values 1 column at a time;
if this is not the desired behaviour custom imputor methods should overload this method.
Expand All @@ -74,8 +74,7 @@ function impute!(imp::Imputor, ctx::Context, data::DataFrame)
return data
end

imputors_path = joinpath(dirname(@__FILE__), "imputors")

for file in ("drop.jl", "locf.jl", "nocb.jl", "interp.jl", "fill.jl", "chain.jl")
include(joinpath(imputors_path, file))
include(joinpath("imputors", file))
end
4 changes: 2 additions & 2 deletions src/imputors/chain.jl
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@ Runs multiple `Imputor`s on the same data in the order they're provided.
# Fields
* `imputors::Array{Imputor}`
"""
type Chain <: Imputor
imputors::Array{Imputor}
struct Chain <: Imputor
imputors::Vector{Imputor}
end

"""
Expand Down
23 changes: 11 additions & 12 deletions src/imputors/drop.jl
Original file line number Diff line number Diff line change
Expand Up @@ -3,29 +3,29 @@
Removes missing values from the `AbstractArray` or `DataFrame` provided.
"""
type Drop <: Imputor end
struct Drop <: Imputor end

"""
impute!{T<:Any}(imp::Drop, ctx::Context, data::AbstractArray{T, 1})
impute!(imp::Drop, ctx::Context, data::AbstractVector)
Uses `filter!` to remove missing elements from the array.
# Arguments
* `imp::Drop`: this `Imputor` method
* `ctx::Context`: contextual information for missing data
* `data::AbstractArray{T, 1}`: the data to impute
* `data::AbstractVector`: the data to impute
# Returns
* `AbstractArray{T, 1}`: our data array with missing elements removed
* `AbstractVector`: our data array with missing elements removed
"""
function impute!{T<:Any}(imp::Drop, ctx::Context, data::AbstractArray{T, 1})
function impute!(imp::Drop, ctx::Context, data::AbstractVector)
return filter!(x -> !ismissing(ctx, x), data)
end

"""
impute!{T<:Any}(imp::Drop, ctx::Context, data::AbstractArray{T, 2})
impute!(imp::Drop, ctx::Context, data::AbstractMatrix)
Finds the missing rows in the matrix and uses a mask (Array{Bool, 1}) to return the
Finds the missing rows in the matrix and uses a mask (Vector{Bool}) to return the
`data` with those rows removed. Unfortunately, the mask approach requires copying the matrix.
NOTES (or premature optimizations):
Expand All @@ -38,15 +38,14 @@ NOTES (or premature optimizations):
# Arguments
* `imp::Drop`: this `Imputor` method
* `ctx::Context`: contextual information for missing data
* `data::AbstractArray{T, 2}`: the data to impute
* `data::AbstractMatrix`: the data to impute
# Returns
* `AbstractArray{T, 2}`: a new matrix with missing rows removed
* `AbstractMatrix`: a new matrix with missing rows removed
"""
function impute!{T<:Any}(imp::Drop, ctx::Context, data::AbstractArray{T, 2})
function impute!(imp::Drop, ctx::Context, data::AbstractMatrix)
ctx.num = size(data, 1)
mask = map(i -> !ismissing(ctx, data[i, :]), 1:size(data, 1))

return data[mask, :]
end

Expand All @@ -69,6 +68,6 @@ NOTE: this isn't quite as fast as `dropnull` in `DataFrames`s as we're using an
function impute!(imp::Drop, ctx::Context, data::DataFrame)
ctx.num = size(data, 1)
m = typeof(data).name.module
m.deleterows!(data, find(map(r -> ismissing(ctx, r), m.eachrow(data))))
m.deleterows!(data, findall(r -> ismissing(ctx, r), m.eachrow(data)))
return data
end
10 changes: 5 additions & 5 deletions src/imputors/fill.jl
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@ Fills in the missing data with a specific value.
* `value::Any`: A scalar missing value or a function that returns the a scalar if
passed the data with missing data removed (e.g, `mean`)
"""
type Fill <: Imputor
value::Any
struct Fill{T} <: Imputor
value::T
end

"""
Expand All @@ -19,14 +19,14 @@ By default `Fill()` will use the mean of the existing values as the fill value.
Fill() = Fill(mean)

"""
impute!{T<:Any}(imp::Fill, ctx::Context, data::AbstractArray{T, 1})
impute!(imp::Fill, ctx::Context, data::AbstractVector)
Computes the fill value if `imp.value` is a `Function` (i.e., `imp.value(drop(copy(data)))`)
and replaces all missing values in the `data` with that value.
"""
function impute!{T<:Any}(imp::Fill, ctx::Context, data::AbstractArray{T, 1})
function impute!(imp::Fill, ctx::Context, data::AbstractVector)
fill_val = if isa(imp.value, Function)
imp.value(drop(copy(data)))
imp.value(Iterators.drop(copy(data)))
else
imp.value
end
Expand Down
8 changes: 4 additions & 4 deletions src/imputors/interp.jl
Original file line number Diff line number Diff line change
Expand Up @@ -3,26 +3,26 @@
Performs linear interpolation between the nearest values in an vector.
"""
type Interpolate <: Imputor end
struct Interpolate <: Imputor end

"""
impute!{T<:Any}(imp::Interpolate, ctx::Context, data::AbstractArray{T, 1})
impute!(imp::Interpolate, ctx::Context, data::AbstractVector)
Uses linear interpolation between existing elements of a vector to fill in missing data.
WARNING: Missing values at the head or tail of the array cannot be interpolated if there
are no existing values on both sides. As a result, this method does not guarantee
that all missing values will be imputed.
"""
function impute!{T<:Any}(imp::Interpolate, ctx::Context, data::AbstractArray{T, 1})
function impute!(imp::Interpolate, ctx::Context, data::AbstractVector{<:Union{T, Missing}}) where T
i = findfirst(ctx, data) + 1

while i < length(data)
if ismissing(ctx, data[i])
prev_idx = i - 1
next_idx = findnext(ctx, data, i + 1)

if next_idx > 0
if next_idx !== nothing
gap_sz = (next_idx - prev_idx) - 1

diff = data[next_idx] - data[prev_idx]
Expand Down
6 changes: 3 additions & 3 deletions src/imputors/locf.jl
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
type LOCF <: Imputor end
struct LOCF <: Imputor end

"""
impute!{T<:Any}(imp::LOCF, ctx::Context, data::AbstractArray{T, 1})
impute!(imp::LOCF, ctx::Context, data::AbstractVector)
Iterates forwards through the `data` and fills missing data with the last
existing observation.
Expand All @@ -15,7 +15,7 @@ that all missing values will be imputed.
```
"""
function impute!{T<:Any}(imp::LOCF, ctx::Context, data::AbstractArray{T, 1})
function impute!(imp::LOCF, ctx::Context, data::AbstractVector)
start_idx = findfirst(ctx, data) + 1
for i in start_idx:length(data)
if ismissing(ctx, data[i])
Expand Down
Loading

0 comments on commit 73e78d1

Please sign in to comment.