Merge pull request #26 from ericphanson/eph/fixes

Fix readme example & update to liblinear v247
JuliaML · Apr 17, 2024 · 8cc780f · 8cc780f
2 parents 1ce0572 + ccbc4b3
commit 8cc780f
Show file tree

Hide file tree

Showing 9 changed files with 128 additions and 90 deletions.
diff --git a/.github/workflows/TagBot.yml b/.github/workflows/TagBot.yml
@@ -1,11 +1,33 @@
 name: TagBot
 on:
-  schedule:
-    - cron: 0 0 * * *
+  issue_comment:
+    types:
+      - created
+  workflow_dispatch:
+    inputs:
+      lookback:
+        default: 3
+permissions:
+  actions: read
+  checks: read
+  contents: write
+  deployments: read
+  issues: read
+  discussions: read
+  packages: read
+  pages: read
+  pull-requests: read
+  repository-projects: read
+  security-events: read
+  statuses: read
 jobs:
   TagBot:
+    if: github.event_name == 'workflow_dispatch' || github.actor == 'JuliaTagBot'
     runs-on: ubuntu-latest
     steps:
       - uses: JuliaRegistries/TagBot@v1
         with:
           token: ${{ secrets.GITHUB_TOKEN }}
+          # Edit the following line to reflect the actual name of the GitHub Secret containing your private key
+          ssh: ${{ secrets.DOCUMENTER_KEY }}
+          # ssh: ${{ secrets.NAME_OF_MY_SSH_PRIVATE_KEY_SECRET }}
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -0,0 +1,43 @@
+name: CI
+on:
+  push:
+    branches:
+      - master
+    tags: ['*']
+  pull_request:
+  workflow_dispatch:
+concurrency:
+  # Skip intermediate builds: always.
+  # Cancel intermediate builds: only if it is a pull request build.
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: ${{ startsWith(github.ref, 'refs/pull/') }}
+jobs:
+  test:
+    name: Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ github.event_name }}
+    runs-on: ${{ matrix.os }}
+    strategy:
+      fail-fast: false
+      matrix:
+        version:
+          - '1.3'
+          - '1'
+        os:
+          - ubuntu-latest
+          - macos-latest
+          - windows-latest
+        arch:
+          - x64
+          - x86
+    steps:
+      - uses: actions/checkout@v4
+      - uses: julia-actions/setup-julia@v1
+        with:
+          version: ${{ matrix.version }}
+          arch: ${{ matrix.arch }}
+      - uses: julia-actions/cache@v1
+      - uses: julia-actions/julia-buildpkg@v1
+      - uses: julia-actions/julia-runtest@v1
+      - uses: julia-actions/julia-processcoverage@v1
+      - uses: codecov/codecov-action@v3
+        with:
+          files: lcov.info
diff --git a/.travis.yml b/.travis.yml
diff --git a/Project.toml b/Project.toml
@@ -2,7 +2,7 @@ name = "LIBLINEAR"
 uuid = "2d691ee1-e668-5016-a719-b2531b85e0f5"
 authors = ["innerlee"]
 repo = "https://github.com/innerlee/LIBLINEAR.jl.git"
-version = "0.6.0"
+version = "0.6.1"
 
 [deps]
 Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
@@ -11,6 +11,7 @@ liblinear_jll = "275f1f90-abd2-5ca1-9ad8-abd4e3d66eb7"
 
 [compat]
 julia = "1.3"
+liblinear_jll = "~2.47.0"
 
 [extras]
 DelimitedFiles = "8bb1440f-4735-579b-a4ab-409b98df4dab"

diff --git a/README.md b/README.md
@@ -1,7 +1,7 @@
 # LIBLINEAR
 
-[![Build Status](https://travis-ci.org/innerlee/LIBLINEAR.jl.svg?branch=master)](https://travis-ci.org/innerlee/LIBLINEAR.jl)
-[![Build status](https://ci.appveyor.com/api/projects/status/x9jq6w5mji1u6eff?svg=true)](https://ci.appveyor.com/project/innerlee/liblinear-jl)
+[![CI](https://github.com/innerlee/LIBLINEAR.jl/actions/workflows/ci.yml/badge.svg)](https://github.com/innerlee/LIBLINEAR.jl/actions/workflows/ci.yml)
+
 
 Julia bindings for [LIBLINEAR](https://www.csie.ntu.edu.tw/~cjlin/liblinear/).
 
@@ -15,7 +15,7 @@ iris = dataset("datasets", "iris")
 labels = iris.Species
 
 # First dimension of input data is features; second is instances
-data = convert(Matrix, iris[:, 1:4])'
+data = Matrix(iris[:, 1:4])'
 
 # Train SVM on half of the data using default parameters. See the linear_train
 # function in LIBLINEAR.jl for optional parameter settings.

diff --git a/REQUIRE b/REQUIRE
diff --git a/appveyor.yml b/appveyor.yml
diff --git a/src/LIBLINEAR.jl b/src/LIBLINEAR.jl
@@ -1,4 +1,3 @@
-__precompile__(true)
 module LIBLINEAR
 
 using SparseArrays
@@ -22,7 +21,7 @@ const L2R_LR_DUAL         = Cint(7)
 const L2R_L2LOSS_SVR      = Cint(11)
 const L2R_L2LOSS_SVR_DUAL = Cint(12)
 const L2R_L1LOSS_SVR_DUAL = Cint(13)
-
+const ONECLASS_SVM        = Cint(21)
 
 struct FeatureNode
     index         :: Cint
@@ -38,14 +37,16 @@ struct Problem
 end
 
 struct Parameter
-    solver_type   :: Cint
-    eps           :: Float64
-    C             :: Float64
-    nr_weight     :: Cint
-    weight_label  :: Ptr{Cint}
-    weight        :: Ptr{Float64}
-    p             :: Float64
-    init_sol      :: Ptr{Float64}            # Initial-solution specification supported only for solver L2R_LR and L2R_L2LOSS_SVC
+    solver_type     :: Cint
+    eps             :: Float64                 # stopping tolerance
+    C               :: Float64
+    nr_weight       :: Cint
+    weight_label    :: Ptr{Cint}
+    weight          :: Ptr{Float64}
+    p               :: Float64
+    nu              :: Float64                 # one-class SVM only
+    init_sol        :: Ptr{Float64}            # Initial-solution specification supported only for solver L2R_LR and L2R_L2LOSS_SVC
+    regularize_bias :: Cint
 end
 
 struct Model
@@ -55,6 +56,7 @@ struct Model
     w             :: Ptr{Float64}
     label         :: Ptr{Cint}               # label of each class
     bias          :: Float64
+    rho           :: Float64                 # one-class SVM only
 end
 
 # model in julia
@@ -66,6 +68,7 @@ mutable struct LinearModel{T}
   _labels         :: Vector{Cint}            # internal label names
   labels          :: Vector{T}
   bias            :: Float64
+  rho             :: Float64
 end
 
 # helper
@@ -182,7 +185,9 @@ function linear_train(
             eps           :: Real=Inf,
             C             :: Real=1.0,
             p             :: Real=0.1,
+            nu            :: Real=0.5,
             init_sol      :: Ptr{Float64}=convert(Ptr{Float64}, C_NULL), # initial solutions for solvers L2R_LR, L2R_L2LOSS_SVC
+            regularize_bias :: Cint = Cint(1),
             bias          :: Real=-1.0,
             verbose       :: Bool=false
             ) where {T, U<:Real}
@@ -201,6 +206,7 @@ function linear_train(
         L2R_L2LOSS_SVR          =>  0.001,
         L2R_L2LOSS_SVR_DUAL     =>  0.1,
         L2R_L1LOSS_SVR_DUAL     =>  0.1,
+        ONECLASS_SVM            =>  0.01,
     )[solver_type])
 
     nfeatures = size(instances, 1) # instances are in columns
@@ -212,7 +218,7 @@ function linear_train(
 
     param = Array{Parameter}(undef, 1)
     param[1] = Parameter(solver_type, eps, C, Cint(length(weights)),
-        pointer(weight_labels), pointer(weights), p, init_sol)
+        pointer(weight_labels), pointer(weights), p, nu, init_sol, regularize_bias)
 
     # construct problem
     (nodes, nodeptrs) = instances2nodes(instances)
@@ -231,14 +237,21 @@ function linear_train(
                 (Ptr{Problem}, Ptr{Parameter}),
                 problem, param)
     m = unsafe_wrap(Array, ptr, 1)[1]
-
     # extract w & _labels
     w_dim    = Int(m.nr_feature + (bias >= 0 ? 1 : 0))
     w_number = Int(m.nr_class == 2 && solver_type != MCSVM_CS ? 1 : m.nr_class)
     w        = copy(unsafe_wrap(Array, m.w, w_dim * w_number))
-    _labels  = copy(unsafe_wrap(Array, m.label, m.nr_class))
+
+    # Fill in labels vector
+    # using `_labels  = copy(unsafe_wrap(Array, m.label, m.nr_class))` segfaults
+    # when using `ONECLASS_SVM`. With this approach, we are just left with
+    # `_labels` being -1's, which seems better.
+    _labels = Vector{Cint}(undef, m.nr_class)
+    _labels .= -1 # initialize to some invalid state
+    ccall((:get_labels, liblinear), Cvoid, (Ptr{Model},Ptr{Vector{Cint}}), ptr, pointer(_labels))
+    rho = solver_type == ONECLASS_SVM ? m.rho : 0.0
     model    = LinearModel(solver_type, Int(m.nr_class), Int(m.nr_feature),
-                    w, _labels, reverse_labels, m.bias)
+                    w, _labels, reverse_labels, m.bias, rho)
     ccall((:free_model_content, liblinear), Cvoid, (Ptr{Model},), ptr)
 
     model
@@ -262,13 +275,20 @@ function linear_predict(
 
     m = Array{Model}(undef, 1)
     m[1] = Model(Parameter(model.solver_type, .0, .0, Cint(0),
-            convert(Ptr{Cint}, C_NULL), convert(Ptr{Float64}, C_NULL), .0,
-            convert(Ptr{Float64}, C_NULL)),
+            convert(Ptr{Cint}, C_NULL), convert(Ptr{Float64}, C_NULL), .0,.0,
+            convert(Ptr{Float64}, C_NULL), Cint(0)),
             model.nr_class, model.nr_feature, pointer(model.w),
-            pointer(model._labels), model.bias)
+            pointer(model._labels), model.bias, model.rho)
 
     (nodes, nodeptrs) = instances2nodes(instances)
-    class = Array{T}(undef, ninstances)
+
+    if model.solver_type == ONECLASS_SVM
+        # In this case we need to return inlier/outlier class labels
+        # which may not be of type `T`
+        class = Array{String}(undef, ninstances)
+    else
+        class = Array{T}(undef, ninstances)
+    end
     w_number = Int(model.nr_class == 2 && model.solver_type != MCSVM_CS ?
         1 : model.nr_class)
     decvalues = Array{Float64}(undef, w_number, ninstances)
@@ -280,7 +300,19 @@ function linear_predict(
             output = ccall((:predict_values, liblinear), Float64, (Ptr{Cvoid}, Ptr{FeatureNode}, Ptr{Float64}),
                 pointer(m), nodeptrs[i], pointer(decvalues, w_number*(i-1)+1))
         end
-        class[i] = model.labels[round(Int,output)]
+        output_int = round(Int,output)
+
+        # For one-class SVM, `predict_values` returns +/- 1
+        # corresponding to outliers or not. This doesn't seem to be documented,
+        # but the code clearly returns +/- 1:
+        # https://github.com/cjlin1/liblinear/blob/8dc206b782e07676dc0d00678bedd295ce85acf3/linear.cpp#L3295
+        # and that is the return from scipy as well.
+        if model.solver_type === ONECLASS_SVM
+            c = output_int == -1 ? "outlier" :  output_int == 1 ? "inlier" : error("Unexpected output $output_int")
+        else
+            c = model.labels[output_int]
+        end
+        class[i] = c
     end
 
     (class, decvalues)

diff --git a/test/validation.jl b/test/validation.jl
@@ -6,6 +6,11 @@
     correct[[30,42,43,65,67]] .= false
     @test (class .== labels[2:2:end]) == correct
 
+    @testset "one-class" begin
+        model = linear_train(labels[1:2:end], inst[:, 1:2:end]; verbose=true, solver_type=Cint(21))
+        (class, decvalues) = linear_predict(model, inst[:, 2:2:end], verbose=true)
+        @test all(in(("outlier", "inlier")), class)
+    end
     @testset "Sparse matrix" begin
         model = linear_train(labels[1:2:end], sparse(inst[:, 1:2:end]); verbose=true, solver_type=Cint(0))
         GC.gc()