From db68d68cbef5b6ec4121f8cef8b5cc31b0b649ee Mon Sep 17 00:00:00 2001 From: kaandocal <26488673+kaandocal@users.noreply.github.com> Date: Fri, 23 Apr 2021 13:18:52 +0100 Subject: [PATCH 1/2] Added support for time-varying HMMs --- src/HMMBase.jl | 4 + src/timevaryinghmm.jl | 455 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 459 insertions(+) create mode 100644 src/timevaryinghmm.jl diff --git a/src/HMMBase.jl b/src/HMMBase.jl index ccf40039..db3aa810 100644 --- a/src/HMMBase.jl +++ b/src/HMMBase.jl @@ -23,6 +23,8 @@ export # hmm.jl AbstractHMM, HMM, + TimeVaryingHMM, + fit_mle!, copy, rand, size, @@ -56,6 +58,8 @@ include("likelihoods.jl") include("utilities.jl") include("experimental.jl") +include("timevaryinghmm.jl") + # To be removed in a future version # --------------------------------- #!format: off diff --git a/src/timevaryinghmm.jl b/src/timevaryinghmm.jl new file mode 100644 index 00000000..1d5005c0 --- /dev/null +++ b/src/timevaryinghmm.jl @@ -0,0 +1,455 @@ + +""" + HMM([a, ]A, B) -> HMM + +Build an HMM with transition matrix `A` and observation distributions `B`. +If the initial state distribution `a` is not specified, a uniform distribution is assumed. + +Observations distributions can be of different types (for example `Normal` and `Exponential`), +but they must be of the same dimension. + +Alternatively, `B` can be an emission matrix where `B[i,j]` is the probability of observing symbol `j` in state `i`. + +**Arguments** +- `a::AbstractVector{T}`: initial probabilities vector. +- `A::AbstractMatrix{T}`: transition matrix. +- `B::AbstractVector{<:Distribution{F}}`: observations distributions. +- or `B::AbstractMatrix`: emission matrix. + +**Example** +```julia +using Distributions, HMMBase +# from distributions +hmm = HMM([0.9 0.1; 0.1 0.9], [Normal(0,1), Normal(10,1)]) +# from an emission matrix +hmm = HMM([0.9 0.1; 0.1 0.9], [0. 0.5 0.5; 0.25 0.25 0.5]) +``` +""" +struct TimeVaryingHMM{F,T} <: AbstractHMM{F} + a::Vector{T} + A::AbstractArray{T,3} + B::Matrix{Distribution{F}} + TimeVaryingHMM{F,T}(a, A, B) where {F,T} = assert_timevaryinghmm(a, A, B) && new(a, A, B) +end + +TimeVaryingHMM( + a::AbstractVector{T}, + A::AbstractArray{T,3}, + B::AbstractMatrix{<:Distribution{F}}, +) where {F,T} = TimeVaryingHMM{F,T}(a, A, B) + +TimeVaryingHMM(A::AbstractArray{T,3}, B::AbstractMatrix{<:Distribution{F}}) where {F,T} = + TimeVaryingHMM{F,T}(ones(size(A, 1)) ./ size(A, 1), A, B) + +function assert_timevaryinghmm(a::AbstractVector, A::AbstractArray{T,3} where T, B::AbstractMatrix{<:Distribution}) + @argcheck isprobvec(a) + @argcheck istransmats(A) + @argcheck all(length.(B) .== length(B[1])) ArgumentError("All distributions must have the same dimensions") + @argcheck size(A,3) + 1 == size(B,2) ArgumentError("Number of transition rates must match length of chain") + @argcheck length(a) == size(A,1) == size(B,1) + return true +end + +istransmats(A::AbstractArray{T,3}) where {T} = all(i->istransmat(@view A[:,:,i]), 1:size(A,3)) + +==(h1::TimeVaryingHMM, h2::TimeVaryingHMM) = (h1.a == h2.a) && (h1.A == h2.A) && (h1.B == h2.B) + +function rand( + rng::AbstractRNG, + hmm::TimeVaryingHMM; + init = rand(rng, Categorical(hmm.a)), + seq = false, +) + T = size(hmm.B, 2) + z = Vector{Int}(undef, T) + (T >= 1) && (z[1] = init) + for t = 2:T + z[t] = rand(rng, Categorical(hmm.A[z[t-1],:,t-1])) + end + y = rand(rng, hmm, z) + seq ? (z, y) : y +end + +function rand(rng::AbstractRNG, hmm::TimeVaryingHMM{Univariate}, z::AbstractVector{<:Integer}) + y = Vector{Float64}(undef, length(z)) + for t in eachindex(z) + y[t] = rand(rng, hmm.B[z[t],t]) + end + y +end + +function rand( + rng::AbstractRNG, + hmm::TimeVaryingHMM{Multivariate}, + z::AbstractVector{<:Integer}, +) + y = Matrix{Float64}(undef, length(z), size(hmm,2)) + for t in eachindex(z) + y[t, :] = rand(rng, hmm.B[z[t],t]) + end + y +end + +""" + size(hmm, [dim]) -> Int | Tuple + +Return the number of states in `hmm`, the dimension of the observations and the length of the chain. +""" +size(hmm::TimeVaryingHMM, dim = :) = (size(hmm.B, 1), length(hmm.B[1]), size(hmm.B, 2))[dim] + +copy(hmm::TimeVaryingHMM) = TimeVaryingHMM(copy(hmm.a), copy(hmm.A), copy(hmm.B)) + +function nparams(hmm::TimeVaryingHMM) + (length(hmm.a) - 1) + (size(hmm.A,1) * size(hmm.A,2) - size(hmm.A,1)) * size(hmm.A,3) +end + + +### + +function loglikelihoods!(LL::AbstractMatrix, hmm::TimeVaryingHMM{Univariate}, observations) + T, K = size(observations, 1), size(hmm, 1) + @argcheck size(LL) == (T, K) + @argcheck T == size(hmm, 3) + @inbounds for i in OneTo(K), t in OneTo(T) + LL[t,i] = logpdf(hmm.B[i,t], observations[t]) + end +end + +function loglikelihoods!(LL::AbstractMatrix, hmm::TimeVaryingHMM{Multivariate}, observations) + T, K = size(observations, 1), size(hmm, 1) + @argcheck size(LL) == (T, K) + @argcheck T == size(hmm, 3) + @inbounds for i in OneTo(K), t in OneTo(T) + LL[t,i] = logpdf(hmm.B[i,t], view(observations, t, :)) + end +end + + + +#### + +function update_A!( + A::AbstractArray{T,3} where {T}, + ξ::AbstractArray, + α::AbstractMatrix, + β::AbstractMatrix, + LL::AbstractMatrix, +) + @argcheck size(α, 1) == size(β, 1) == size(LL, 1) == size(ξ, 1) == size(A,3) + 1 + @argcheck size(α, 2) == + size(β, 2) == + size(LL, 2) == + size(A, 1) == + size(A, 2) == + size(ξ, 2) == + size(ξ, 3) + + T, K = size(LL) + + @inbounds for t in OneTo(T - 1) + m = vec_maximum(view(LL, t + 1, :)) + c = 0.0 + + for i in OneTo(K), j in OneTo(K) + ξ[t, i, j] = α[t, i] * A[i, j, t] * exp(LL[t+1, j] - m) * β[t+1, j] + c += ξ[t, i, j] + end + + for i in OneTo(K), j in OneTo(K) + ξ[t, i, j] /= c + end + end + + fill!(A, 0.0) + + @inbounds for t in OneTo(T - 1) + for i in OneTo(K) + c = 0.0 + + for j in OneTo(K) + A[i, j, t] += ξ[t, i, j] + c += A[i, j, t] + end + + for j in OneTo(K) + A[i, j, t] /= c + end + end + end +end + +# # In-place update of the observations distributions. +# function update_B!(B::AbstractMatrix, γ::AbstractMatrix, observations, estimator) +# @argcheck size(γ, 1) == size(observations, 1) +# @argcheck size(γ, 2) == size(B, 1) +# K = length(B) +# for i in OneTo(K) +# if sum(γ[:, i]) > 0 +# B[i] = estimator(typeof(B[i]), permutedims(observations), γ[:, i]) +# end +# end +# end + +function fit_mle!( + hmm::TimeVaryingHMM, + observations; + display = :none, + maxiter = 100, + tol = 1e-3, + robust = false, + estimator = fit_mle, + fit_dists = false +) + @argcheck display in [:none, :iter, :final] + @argcheck maxiter >= 0 + @argcheck fit_dists === false "Not yet supported" + + T, K = size(observations, 1), size(hmm, 1) + @argcheck T == size(hmm.B, 2) + history = EMHistory(false, 0, []) + + # Allocate memory for in-place updates + c = zeros(T) + α = zeros(T, K) + β = zeros(T, K) + γ = zeros(T, K) + ξ = zeros(T, K, K) + LL = zeros(T, K) + + loglikelihoods!(LL, hmm, observations) + robust && replace!(LL, -Inf => nextfloat(-Inf), Inf => log(prevfloat(Inf))) + + forwardlog!(α, c, hmm.a, hmm.A, LL) + backwardlog!(β, c, hmm.a, hmm.A, LL) + posteriors!(γ, α, β) + + logtot = sum(c) + (display == :iter) && println("Iteration 0: logtot = $logtot") + + for it = 1:maxiter + update_a!(hmm.a, α, β) + update_A!(hmm.A, ξ, α, β, LL) + fit_dists && update_B!(hmm.B, γ, observations, estimator) + + # Ensure the "connected-ness" of the states, + # this prevents case where there is no transitions + # between two extremely likely observations. + robust && (hmm.A .+= eps()) + + @check isprobvec(hmm.a) + @check istransmats(hmm.A) + + if fit_dists + loglikelihoods!(LL, hmm, observations) + robust && replace!(LL, -Inf => nextfloat(-Inf), Inf => log(prevfloat(Inf))) + end + + forwardlog!(α, c, hmm.a, hmm.A, LL) + backwardlog!(β, c, hmm.a, hmm.A, LL) + posteriors!(γ, α, β) + + logtotp = sum(c) + (display == :iter) && println("Iteration $it: logtot = $logtotp") + + push!(history.logtots, logtotp) + history.iterations += 1 + + if abs(logtotp - logtot) < tol + (display in [:iter, :final]) && + println("EM converged in $it iterations, logtot = $logtotp") + history.converged = true + break + end + + logtot = logtotp + end + + if !history.converged + if display in [:iter, :final] + println("EM has not converged after $(history.iterations) iterations, logtot = $logtot") + end + end + + history +end + + + + + + +# In-place forward pass, where α and c are allocated beforehand. +function forwardlog!( + α::AbstractMatrix, + c::AbstractVector, + a::AbstractVector, + A::AbstractArray{T,3} where {T}, + LL::AbstractMatrix, +) + @argcheck size(α, 1) == size(LL, 1) == size(c, 1) + @argcheck size(α, 2) == size(LL, 2) == size(a, 1) == size(A, 1) == size(A, 2) + + T, K = size(LL) + @argcheck T == size(A,3) + 1 + + fill!(α, 0.0) + fill!(c, 0.0) + + m = vec_maximum(view(LL, 1, :)) + + for j in OneTo(K) + α[1, j] = a[j] * exp(LL[1, j] - m) + c[1] += α[1, j] + end + + for j in OneTo(K) + α[1, j] /= c[1] + end + + c[1] = log(c[1]) + m + + @inbounds for t = 2:T + m = vec_maximum(view(LL, t, :)) + + for j in OneTo(K) + for i in OneTo(K) + α[t,j] += α[t-1, i] * A[i,j,t-1] + end + α[t, j] *= exp(LL[t, j] - m) + c[t] += α[t, j] + end + + for j in OneTo(K) + α[t, j] /= c[t] + end + + c[t] = log(c[t]) + m + end +end + +# In-place backward pass, where β and c are allocated beforehand. +function backwardlog!( + β::AbstractMatrix, + c::AbstractVector, + a::AbstractVector, + A::AbstractArray{T,3} where {T}, + LL::AbstractMatrix, +) + @argcheck size(β, 1) == size(LL, 1) == size(c, 1) + @argcheck size(β, 2) == size(LL, 2) == size(a, 1) == size(A, 1) == size(A, 2) + + T, K = size(LL) + @argcheck T == size(A,3) + 1 + L = zeros(K) + (T == 0) && return + + fill!(β, 0.0) + fill!(c, 0.0) + + for j in OneTo(K) + β[end, j] = 1.0 + end + + @inbounds for t = T-1:-1:1 + m = vec_maximum(view(LL, t + 1, :)) + + for i in OneTo(K) + L[i] = exp(LL[t+1, i] - m) + end + + for j in OneTo(K) + for i in OneTo(K) + β[t,j] += β[t+1, i] * A[j,i,t] * L[i] + end + c[t+1] += β[t, j] + end + + for j in OneTo(K) + β[t, j] /= c[t+1] + end + + c[t+1] = log(c[t+1]) + m + end + + m = vec_maximum(view(LL, 1, :)) + + for j in OneTo(K) + c[1] += a[j] * exp(LL[1, j] - m) * β[1, j] + end + + c[1] = log(c[1]) + m +end + +function forward(a::AbstractVector, A::AbstractArray{T,3} where {T}, LL::AbstractMatrix) + m = Matrix{Float64}(undef, size(LL)) + c = Vector{Float64}(undef, size(LL, 1)) + forwardlog!(m, c, a, A, LL) + m, sum(c) +end + +function backward(a::AbstractVector, A::AbstractArray{T,3} where {T}, LL::AbstractMatrix) + m = Matrix{Float64}(undef, size(LL)) + c = Vector{Float64}(undef, size(LL, 1)) + backwardlog!(m, c, a, A, LL) + m, sum(c) +end + +function posteriors(a::AbstractVector, A::AbstractArray{T,3} where {T}, LL::AbstractMatrix; kwargs...) + α, _ = forward(a, A, LL; kwargs...) + β, _ = backward(a, A, LL; kwargs...) + posteriors(α, β) +end + +function viterbilog!( + T1::AbstractMatrix, + T2::AbstractMatrix, + z::AbstractVector, + a::AbstractVector, + A::AbstractArray{T,3} where {T}, + LL::AbstractMatrix, +) + T, K = size(LL) + @argcheck T == size(A,3) + 1 + (T == 0) && return + + fill!(T1, 0.0) + fill!(T2, 0) + + al = log.(a) + Al = log.(A) + + for i in OneTo(K) + T1[1, i] = al[i] + LL[1, i] + end + + @inbounds for t = 2:T + for j in OneTo(K) + amax = 0 + vmax = -Inf + + for i in OneTo(K) + v = T1[t-1, i] + Al[i, j, t-1] + if v > vmax + amax = i + vmax = v + end + end + + T1[t, j] = vmax + LL[t, j] + T2[t, j] = amax + end + end + + z[T] = argmax(T1[T, :]) + for t = T-1:-1:1 + z[t] = T2[t+1, z[t+1]] + end +end + +function viterbi(a::AbstractVector, A::AbstractArray{T,3} where {T}, LL::AbstractMatrix) + T1 = Matrix{Float64}(undef, size(LL)) + T2 = Matrix{Int}(undef, size(LL)) + z = Vector{Int}(undef, size(LL, 1)) + viterbilog!(T1, T2, z, a, A, LL) + z +end From c9911ced352f8ad2cfd58ed42be604dbb9070b81 Mon Sep 17 00:00:00 2001 From: kaandocal <26488673+kaandocal@users.noreply.github.com> Date: Fri, 23 Apr 2021 13:26:42 +0100 Subject: [PATCH 2/2] Removed wrong docstring --- src/timevaryinghmm.jl | 26 -------------------------- 1 file changed, 26 deletions(-) diff --git a/src/timevaryinghmm.jl b/src/timevaryinghmm.jl index 1d5005c0..953f4b7e 100644 --- a/src/timevaryinghmm.jl +++ b/src/timevaryinghmm.jl @@ -1,30 +1,4 @@ -""" - HMM([a, ]A, B) -> HMM - -Build an HMM with transition matrix `A` and observation distributions `B`. -If the initial state distribution `a` is not specified, a uniform distribution is assumed. - -Observations distributions can be of different types (for example `Normal` and `Exponential`), -but they must be of the same dimension. - -Alternatively, `B` can be an emission matrix where `B[i,j]` is the probability of observing symbol `j` in state `i`. - -**Arguments** -- `a::AbstractVector{T}`: initial probabilities vector. -- `A::AbstractMatrix{T}`: transition matrix. -- `B::AbstractVector{<:Distribution{F}}`: observations distributions. -- or `B::AbstractMatrix`: emission matrix. - -**Example** -```julia -using Distributions, HMMBase -# from distributions -hmm = HMM([0.9 0.1; 0.1 0.9], [Normal(0,1), Normal(10,1)]) -# from an emission matrix -hmm = HMM([0.9 0.1; 0.1 0.9], [0. 0.5 0.5; 0.25 0.25 0.5]) -``` -""" struct TimeVaryingHMM{F,T} <: AbstractHMM{F} a::Vector{T} A::AbstractArray{T,3}