From 5e9026a1b1f628452f3b264c794a1fe74dc6074b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Niccol=C3=B2=20ANTONELLO?= Date: Fri, 26 Feb 2021 08:30:45 +0100 Subject: [PATCH] Initial commit Signed-off-by: Philip Abbet --- .gitignore | 37 ++ DatasetParsers.jl | 43 ++ LICENSE | 22 + Manifest.toml | 1159 +++++++++++++++++++++++++++++++++++++++++ Models.jl | 67 +++ Project.toml | 15 + README.md | 57 ++ RESULTS.md | 19 + Utils.jl | 165 ++++++ WFSTs.jl | 143 +++++ check_model.jl | 44 ++ conf/1a/feat_conf.jl | 21 + conf/1a/model_conf.jl | 20 + conf/1b/feat_conf.jl | 21 + conf/1b/model_conf.jl | 20 + conf/2a/feat_conf.jl | 21 + conf/2a/model_conf.jl | 20 + conf/2b/feat_conf.jl | 21 + conf/2b/model_conf.jl | 20 + demo.jl | 61 +++ eval.jl | 69 +++ plotstuff.jl | 61 +++ prepare_data.jl | 164 ++++++ train.jl | 156 ++++++ 24 files changed, 2446 insertions(+) create mode 100644 .gitignore create mode 100644 DatasetParsers.jl create mode 100644 LICENSE create mode 100644 Manifest.toml create mode 100644 Models.jl create mode 100644 Project.toml create mode 100644 README.md create mode 100644 RESULTS.md create mode 100644 Utils.jl create mode 100644 WFSTs.jl create mode 100644 check_model.jl create mode 100644 conf/1a/feat_conf.jl create mode 100644 conf/1a/model_conf.jl create mode 100644 conf/1b/feat_conf.jl create mode 100644 conf/1b/model_conf.jl create mode 100644 conf/2a/feat_conf.jl create mode 100644 conf/2a/model_conf.jl create mode 100644 conf/2b/feat_conf.jl create mode 100644 conf/2b/model_conf.jl create mode 100644 demo.jl create mode 100644 eval.jl create mode 100644 plotstuff.jl create mode 100644 prepare_data.jl create mode 100644 train.jl diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..1acc289 --- /dev/null +++ b/.gitignore @@ -0,0 +1,37 @@ +__pycache__ +*.pt +*.log +*.swp +*.gz +*.pdf +*.log +*.bbl +*.blg +*.aux +*.auxlock +*.dpth +*.md5 +*.dep +*.table +*.dvi +*.gnuplot +*.fdb_latexmk +*.fls +*.out +*.spl +*.nav +*.toc +*.snm +*.jld2 +*.bson +.DS_Store +_cache_* + +data/ +models/ +logs/ +figs/ +checkpoint/ +evals/ +env.sh +jl_* diff --git a/DatasetParsers.jl b/DatasetParsers.jl new file mode 100644 index 0000000..065bf6a --- /dev/null +++ b/DatasetParsers.jl @@ -0,0 +1,43 @@ +# Copyright (c) 2021 Idiap Research Institute, http://www.idiap.ch/ +# Niccolò Antonello + +export get_uttID2file +function get_uttID2file(dataset_path,folder::String) + uttID2file = Dict{String,String}() + for (root,dir,files) in walkdir(joinpath(dataset_path,folder);follow_symlinks=true) + wavs = files[findall(contains.(files,".wav"))] + folders = split(root, "/") + spkID = folders[end] + type = folders[end-1] + for f in wavs + sentenceID = split(f, "."; limit=2)[1] + uttID = "$(spkID)-$(type)-$(sentenceID)" + uttID2file[uttID] = joinpath(root,f) + end + end + return uttID2file +end + +export get_uttID2text +function get_uttID2text(uttID2file::Dict) + d = Dict( + 'z' => "ZERO", '3' => "THREE", '7' => "SEVEN", + 'o' => "OH", '4' => "FOUR", '8' => "EIGHT", + '1' => "ONE", '5' => "FIVE", '9' => "NINE", + '2' => "TWO", '6' => "SIX", 'a' =>"", 'b'=>"") + uttID2text = Dict{String,String}() + for uttID in keys(uttID2file) + text = split(uttID,"-")[3] + try + uttID2text[uttID] = strip(prod([d[t] for t in text].*" ")) + catch + error("$text is an invalid filename, invalid dataset!") + end + end + return uttID2text +end + +function get_uttID2wav(uttID2file::Dict; T=Float32) + uttID2wav = Dict(uttID => T.(load(uttID2file[uttID]).data)[:] + for uttID in keys(uttID2file)) +end diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..20c0e29 --- /dev/null +++ b/LICENSE @@ -0,0 +1,22 @@ +MIT License + +Copyright (c) 2021 Idiap Research Institute, http://www.idiap.ch/ +Written by Niccolò Antonello + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/Manifest.toml b/Manifest.toml new file mode 100644 index 0000000..4871acf --- /dev/null +++ b/Manifest.toml @@ -0,0 +1,1159 @@ +# This file is machine-generated - editing it directly is not advised + +[[AbstractFFTs]] +deps = ["LinearAlgebra"] +git-tree-sha1 = "051c95d6836228d120f5f4b984dd5aba1624f716" +uuid = "621f4979-c628-5d54-868e-fcf4e3e8185c" +version = "0.5.0" + +[[AbstractTrees]] +deps = ["Markdown"] +git-tree-sha1 = "33e450545eaf7699da1a6e755f9ea65f14077a45" +uuid = "1520ce14-60c1-5f80-bbc7-55ef81b5835c" +version = "0.3.3" + +[[Adapt]] +deps = ["LinearAlgebra"] +git-tree-sha1 = "ffcfa2d345aaee0ef3d8346a073d5dd03c983ebe" +uuid = "79e6a3ab-5dfb-504d-930d-738a2a938a0e" +version = "3.2.0" + +[[ArgParse]] +deps = ["Logging", "TextWrap"] +git-tree-sha1 = "4a8f4df432fd8e8a96a142c53f9432b9022a92e6" +uuid = "c7e460c6-2fb9-53a9-8c5b-16f535851c63" +version = "1.1.1" + +[[Artifacts]] +deps = ["Pkg"] +git-tree-sha1 = "c30985d8821e0cd73870b17b0ed0ce6dc44cb744" +uuid = "56f22d72-fd6d-98f1-02f0-08ddc0907c33" +version = "1.3.0" + +[[BFloat16s]] +deps = ["LinearAlgebra", "Test"] +git-tree-sha1 = "4af69e205efc343068dc8722b8dfec1ade89254a" +uuid = "ab4f0b2a-ad5b-11e8-123f-65d77653426b" +version = "0.1.0" + +[[BSON]] +git-tree-sha1 = "dd36d7cf3d185eeaaf64db902c15174b22f5dafb" +uuid = "fbb218c0-5317-5bc6-957e-2ee96dd4b1f0" +version = "0.2.6" + +[[Base64]] +uuid = "2a0f44e3-6c83-55bd-87e4-b1978d98bd5f" + +[[Blosc]] +deps = ["Blosc_jll"] +git-tree-sha1 = "84cf7d0f8fd46ca6f1b3e0305b4b4a37afe50fd6" +uuid = "a74b3585-a348-5f62-a45c-50e91977d574" +version = "0.7.0" + +[[Blosc_jll]] +deps = ["Libdl", "Lz4_jll", "Pkg", "Zlib_jll", "Zstd_jll"] +git-tree-sha1 = "aa9ef39b54a168c3df1b2911e7797e4feee50fbe" +uuid = "0b7ba130-8d10-5ba8-a3d6-c5182647fed9" +version = "1.14.3+1" + +[[Bzip2_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "c3598e525718abcc440f69cc6d5f60dda0a1b61e" +uuid = "6e34b625-4abd-537c-b88f-471c36dfa7a0" +version = "1.0.6+5" + +[[CEnum]] +git-tree-sha1 = "215a9aa4a1f23fbd05b92769fdd62559488d70e9" +uuid = "fa961155-64e5-5f13-b03f-caf6b980ea82" +version = "0.4.1" + +[[CUDA]] +deps = ["AbstractFFTs", "Adapt", "BFloat16s", "CEnum", "CompilerSupportLibraries_jll", "DataStructures", "ExprTools", "GPUArrays", "GPUCompiler", "LLVM", "Libdl", "LinearAlgebra", "Logging", "MacroTools", "NNlib", "Pkg", "Printf", "Random", "Reexport", "Requires", "SparseArrays", "Statistics", "TimerOutputs"] +git-tree-sha1 = "6ccc73b2d8b671f7a65c92b5f08f81422ebb7547" +uuid = "052768ef-5323-5732-b1bb-66c8b64840ba" +version = "2.4.1" + +[[Cairo_jll]] +deps = ["Artifacts", "Bzip2_jll", "Fontconfig_jll", "FreeType2_jll", "Glib_jll", "JLLWrappers", "LZO_jll", "Libdl", "Pixman_jll", "Pkg", "Xorg_libXext_jll", "Xorg_libXrender_jll", "Zlib_jll", "libpng_jll"] +git-tree-sha1 = "e2f47f6d8337369411569fd45ae5753ca10394c6" +uuid = "83423d85-b0ee-5818-9007-b63ccbeb887a" +version = "1.16.0+6" + +[[ChainRules]] +deps = ["ChainRulesCore", "Compat", "LinearAlgebra", "Random", "Reexport", "Requires", "Statistics"] +git-tree-sha1 = "8cb44c68fcc2a6eef1ed603110251a5cd81dd3af" +uuid = "082447d4-558c-5d27-93f4-14fc19e9eca2" +version = "0.7.52" + +[[ChainRulesCore]] +deps = ["Compat", "LinearAlgebra", "SparseArrays"] +git-tree-sha1 = "de4f08843c332d355852721adb1592bce7924da3" +uuid = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4" +version = "0.9.29" + +[[ClusterManagers]] +deps = ["Distributed", "Logging", "Pkg", "Sockets"] +git-tree-sha1 = "fdcb2d1c35096f74d40164154f4989f82bd8dd38" +repo-rev = "29f6a1fc39cf6182058c15408745585d3cc8dc18" +repo-url = "https://github.com/JuliaParallel/ClusterManagers.jl.git" +uuid = "34f1f09b-3a8b-5176-ab39-66d58a4d544e" +version = "0.4.0" + +[[CodecZlib]] +deps = ["TranscodingStreams", "Zlib_jll"] +git-tree-sha1 = "ded953804d019afa9a3f98981d99b33e3db7b6da" +uuid = "944b1d66-785c-5afd-91f1-9de20f533193" +version = "0.7.0" + +[[ColorSchemes]] +deps = ["ColorTypes", "Colors", "FixedPointNumbers", "Random", "StaticArrays"] +git-tree-sha1 = "3141757b5832ee7a0386db87997ee5a23ff20f4d" +uuid = "35d6a980-a343-548e-a6ea-1d62b119f2f4" +version = "3.10.2" + +[[ColorTypes]] +deps = ["FixedPointNumbers", "Random"] +git-tree-sha1 = "4bffea7ed1a9f0f3d1a131bbcd4b925548d75288" +uuid = "3da002f7-5984-5a60-b8a6-cbb66c0b333f" +version = "0.10.9" + +[[Colors]] +deps = ["ColorTypes", "FixedPointNumbers", "InteractiveUtils", "Reexport"] +git-tree-sha1 = "ac5f2213e56ed8a34a3dd2f681f4df1166b34929" +uuid = "5ae59095-9a9b-59fe-a467-6f913c188581" +version = "0.12.6" + +[[CommonSubexpressions]] +deps = ["MacroTools", "Test"] +git-tree-sha1 = "7b8a93dba8af7e3b42fecabf646260105ac373f7" +uuid = "bbf7d656-a473-5ed7-a52c-81e309532950" +version = "0.3.0" + +[[Compat]] +deps = ["Base64", "Dates", "DelimitedFiles", "Distributed", "InteractiveUtils", "LibGit2", "Libdl", "LinearAlgebra", "Markdown", "Mmap", "Pkg", "Printf", "REPL", "Random", "SHA", "Serialization", "SharedArrays", "Sockets", "SparseArrays", "Statistics", "Test", "UUIDs", "Unicode"] +git-tree-sha1 = "919c7f3151e79ff196add81d7f4e45d91bbf420b" +uuid = "34da2185-b29b-5c13-b0c7-acf172513d20" +version = "3.25.0" + +[[CompilerSupportLibraries_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "8e695f735fca77e9708e795eda62afdb869cbb70" +uuid = "e66e0078-7015-5450-92f7-15fbd957f2ae" +version = "0.3.4+0" + +[[Contour]] +deps = ["StaticArrays"] +git-tree-sha1 = "9f02045d934dc030edad45944ea80dbd1f0ebea7" +uuid = "d38c429a-6771-53c6-b99e-75d170b6e991" +version = "0.5.7" + +[[DSP]] +deps = ["FFTW", "IterTools", "LinearAlgebra", "Polynomials", "Random", "Reexport", "SpecialFunctions", "Statistics"] +git-tree-sha1 = "2a63cb5fc0e8c1f0f139475ef94228c7441dc7d0" +uuid = "717857b8-e6f2-59f4-9121-6e50c889abd2" +version = "0.6.10" + +[[DataAPI]] +git-tree-sha1 = "dfb3b7e89e395be1e25c2ad6d7690dc29cc53b1d" +uuid = "9a962f9c-6df0-11e9-0e5d-c546b8b5ee8a" +version = "1.6.0" + +[[DataStructures]] +deps = ["Compat", "InteractiveUtils", "OrderedCollections"] +git-tree-sha1 = "4437b64df1e0adccc3e5d1adbc3ac741095e4677" +uuid = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8" +version = "0.18.9" + +[[DataValueInterfaces]] +git-tree-sha1 = "bfc1187b79289637fa0ef6d4436ebdfe6905cbd6" +uuid = "e2d170a0-9d28-54be-80f0-106bbe20a464" +version = "1.0.0" + +[[Dates]] +deps = ["Printf"] +uuid = "ade2ca70-3891-5945-98fb-dc099432e06a" + +[[DelimitedFiles]] +deps = ["Mmap"] +uuid = "8bb1440f-4735-579b-a4ab-409b98df4dab" + +[[DiffResults]] +deps = ["StaticArrays"] +git-tree-sha1 = "c18e98cba888c6c25d1c3b048e4b3380ca956805" +uuid = "163ba53b-c6d8-5494-b064-1a9d43ac40c5" +version = "1.0.3" + +[[DiffRules]] +deps = ["NaNMath", "Random", "SpecialFunctions"] +git-tree-sha1 = "214c3fcac57755cfda163d91c58893a8723f93e9" +uuid = "b552c78f-8df3-52c6-915a-8e097449b14b" +version = "1.0.2" + +[[Distributed]] +deps = ["Random", "Serialization", "Sockets"] +uuid = "8ba89e20-285c-5b6f-9357-94700520ee1b" + +[[EarCut_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "92d8f9f208637e8d2d28c664051a00569c01493d" +uuid = "5ae413db-bbd1-5e63-b57d-d24a61df00f5" +version = "2.1.5+1" + +[[Expat_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "1402e52fcda25064f51c77a9655ce8680b76acf0" +uuid = "2e619515-83b5-522b-bb60-26c02a35a201" +version = "2.2.7+6" + +[[ExprTools]] +git-tree-sha1 = "10407a39b87f29d47ebaca8edbc75d7c302ff93e" +uuid = "e2ba6199-217a-4e67-a87a-7c52f15ade04" +version = "0.1.3" + +[[EzXML]] +deps = ["Printf", "XML2_jll"] +git-tree-sha1 = "0fa3b52a04a4e210aeb1626def9c90df3ae65268" +uuid = "8f5d6c58-4d21-5cfd-889c-e3ad7ee6a615" +version = "1.1.0" + +[[FFMPEG]] +deps = ["FFMPEG_jll", "x264_jll"] +git-tree-sha1 = "9a73ffdc375be61b0e4516d83d880b265366fe1f" +uuid = "c87230d0-a227-11e9-1b43-d7ebe4e7570a" +version = "0.4.0" + +[[FFMPEG_jll]] +deps = ["Artifacts", "Bzip2_jll", "FreeType2_jll", "FriBidi_jll", "JLLWrappers", "LAME_jll", "LibVPX_jll", "Libdl", "Ogg_jll", "OpenSSL_jll", "Opus_jll", "Pkg", "Zlib_jll", "libass_jll", "libfdk_aac_jll", "libvorbis_jll", "x264_jll", "x265_jll"] +git-tree-sha1 = "3cc57ad0a213808473eafef4845a74766242e05f" +uuid = "b22a6f82-2f65-5046-a5b2-351ab43fb4e5" +version = "4.3.1+4" + +[[FFTW]] +deps = ["AbstractFFTs", "FFTW_jll", "IntelOpenMP_jll", "Libdl", "LinearAlgebra", "MKL_jll", "Reexport"] +git-tree-sha1 = "8fda0934cb99db617171f7296dc361f4d6fa5424" +uuid = "7a1cc6ca-52ef-59f5-83cd-3a7055c09341" +version = "1.3.0" + +[[FFTW_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "5a0d4b6a22a34d17d53543bd124f4b08ed78e8b0" +uuid = "f5851436-0d7a-5f13-b9de-f02708fd171a" +version = "3.3.9+7" + +[[FLAC_jll]] +deps = ["Libdl", "Ogg_jll", "Pkg"] +git-tree-sha1 = "cd528d5083a589aab69dc6de73a901efa9d69586" +uuid = "1d38b3a6-207b-531b-80e8-c83f48dafa73" +version = "1.3.3+2" + +[[FileIO]] +deps = ["Pkg"] +git-tree-sha1 = "fee8955b9dfa7bec67117ef48085fb2b559b9c22" +uuid = "5789e2e9-d7fb-5bc7-8068-2c6fae9b9549" +version = "1.4.5" + +[[FillArrays]] +deps = ["LinearAlgebra", "Random", "SparseArrays"] +git-tree-sha1 = "bed538ad14d132aa8240bb2e8ab82fcd2fd2f548" +uuid = "1a297f60-69ca-5386-bcde-b61e274b549b" +version = "0.11.3" + +[[FiniteStateTransducers]] +deps = ["DataStructures", "Random"] +git-tree-sha1 = "42d75e0b4f7cbdc29911175ddaa61f14293c6f19" +uuid = "d0430b04-9e26-4b95-8372-754e5cd95c01" +version = "0.1.0" + +[[FixedPointNumbers]] +git-tree-sha1 = "d14a6fa5890ea3a7e5dcab6811114f132fec2b4b" +uuid = "53c48c17-4a7d-5ca2-90c5-79b7896eea93" +version = "0.6.1" + +[[Flux]] +deps = ["AbstractTrees", "Adapt", "CUDA", "CodecZlib", "Colors", "DelimitedFiles", "Functors", "Juno", "LinearAlgebra", "MacroTools", "NNlib", "Pkg", "Printf", "Random", "Reexport", "SHA", "Statistics", "StatsBase", "Test", "ZipFile", "Zygote"] +git-tree-sha1 = "c443bf5a8329573a68364106b2c29bb6938dc6f5" +uuid = "587475ba-b771-5e3f-ad9e-33799f191a9c" +version = "0.11.6" + +[[Fontconfig_jll]] +deps = ["Artifacts", "Bzip2_jll", "Expat_jll", "FreeType2_jll", "JLLWrappers", "Libdl", "Libuuid_jll", "Pkg", "Zlib_jll"] +git-tree-sha1 = "35895cf184ceaab11fd778b4590144034a167a2f" +uuid = "a3f928ae-7b40-5064-980b-68af3947d34b" +version = "2.13.1+14" + +[[Formatting]] +deps = ["Printf"] +git-tree-sha1 = "8339d61043228fdd3eb658d86c926cb282ae72a8" +uuid = "59287772-0a20-5a39-b81b-1366585eb4c0" +version = "0.4.2" + +[[ForwardDiff]] +deps = ["CommonSubexpressions", "DiffResults", "DiffRules", "NaNMath", "Random", "SpecialFunctions", "StaticArrays"] +git-tree-sha1 = "d48a40c0f54f29a5c8748cfb3225719accc72b77" +uuid = "f6369f11-7733-5829-9624-2563aa707210" +version = "0.10.16" + +[[FreeType2_jll]] +deps = ["Artifacts", "Bzip2_jll", "JLLWrappers", "Libdl", "Pkg", "Zlib_jll"] +git-tree-sha1 = "cbd58c9deb1d304f5a245a0b7eb841a2560cfec6" +uuid = "d7e528f0-a631-5988-bf34-fe36492bcfd7" +version = "2.10.1+5" + +[[FriBidi_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "0d20aed5b14dd4c9a2453c1b601d08e1149679cc" +uuid = "559328eb-81f9-559d-9380-de523a88c83c" +version = "1.0.5+6" + +[[Functors]] +deps = ["MacroTools"] +git-tree-sha1 = "f40adc6422f548176bb4351ebd29e4abf773040a" +uuid = "d9f16b24-f501-4c13-a1f2-28368ffc5196" +version = "0.1.0" + +[[GLFW_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Libglvnd_jll", "Pkg", "Xorg_libXcursor_jll", "Xorg_libXi_jll", "Xorg_libXinerama_jll", "Xorg_libXrandr_jll"] +git-tree-sha1 = "a1bbf700b5388bffc3d882f4f4d625cf1c714fd7" +uuid = "0656b61e-2033-5cc2-a64a-77c0f6c09b89" +version = "3.3.2+1" + +[[GPUArrays]] +deps = ["AbstractFFTs", "Adapt", "LinearAlgebra", "Printf", "Random", "Serialization"] +git-tree-sha1 = "f99a25fe0313121f2f9627002734c7d63b4dd3bd" +uuid = "0c68f7d7-f131-5f86-a1c3-88cf8149b2d7" +version = "6.2.0" + +[[GPUCompiler]] +deps = ["DataStructures", "InteractiveUtils", "LLVM", "Libdl", "Scratch", "Serialization", "TimerOutputs", "UUIDs"] +git-tree-sha1 = "c853c810b52a80f9aad79ab109207889e57f41ef" +uuid = "61eb1bfa-7361-4325-ad38-22787b887f55" +version = "0.8.3" + +[[GR]] +deps = ["Base64", "DelimitedFiles", "GR_jll", "HTTP", "JSON", "LinearAlgebra", "Pkg", "Printf", "Random", "Serialization", "Sockets", "Test", "UUIDs"] +git-tree-sha1 = "aaebdf5588281c2902f499b49e67953f2b409c9c" +uuid = "28b8d3ca-fb5f-59d9-8090-bfdbd6d07a71" +version = "0.54.0" + +[[GR_jll]] +deps = ["Artifacts", "Bzip2_jll", "Cairo_jll", "FFMPEG_jll", "Fontconfig_jll", "GLFW_jll", "JLLWrappers", "JpegTurbo_jll", "Libdl", "Libtiff_jll", "Pixman_jll", "Pkg", "Qt_jll", "Zlib_jll", "libpng_jll"] +git-tree-sha1 = "8aee6fa096b0cbdb05e71750c978b96a08c78951" +uuid = "d2c73de3-f751-5644-a686-071e5b155ba9" +version = "0.53.0+0" + +[[GeometryBasics]] +deps = ["EarCut_jll", "IterTools", "LinearAlgebra", "StaticArrays", "StructArrays", "Tables"] +git-tree-sha1 = "4d4f72691933d5b6ee1ff20e27a102c3ae99d123" +uuid = "5c1252a2-5f33-56bf-86c9-59e7332b4326" +version = "0.3.9" + +[[Gettext_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Libiconv_jll", "Pkg", "XML2_jll"] +git-tree-sha1 = "8c14294a079216000a0bdca5ec5a447f073ddc9d" +uuid = "78b55507-aeef-58d4-861c-77aaff3498b1" +version = "0.20.1+7" + +[[Glib_jll]] +deps = ["Artifacts", "Gettext_jll", "JLLWrappers", "Libdl", "Libffi_jll", "Libiconv_jll", "Libmount_jll", "PCRE_jll", "Pkg", "Zlib_jll"] +git-tree-sha1 = "04690cc5008b38ecbdfede949220bc7d9ba26397" +uuid = "7746bdde-850d-59dc-9ae8-88ece973131d" +version = "2.59.0+4" + +[[Grisu]] +git-tree-sha1 = "03d381f65183cb2d0af8b3425fde97263ce9a995" +uuid = "42e2da0e-8278-4e71-bc24-59509adca0fe" +version = "1.0.0" + +[[HDF5]] +deps = ["Blosc", "HDF5_jll", "Libdl", "Mmap", "Random"] +git-tree-sha1 = "0713cbabdf855852dfab3ce6447c87145f3d9ea8" +uuid = "f67ccb44-e63f-5c2f-98bd-6dc0ccc4ba2f" +version = "0.13.6" + +[[HDF5_jll]] +deps = ["Artifacts", "JLLWrappers", "LibCURL_jll", "Libdl", "OpenSSL_jll", "Pkg", "Zlib_jll"] +git-tree-sha1 = "fd83fa0bde42e01952757f01149dd968c06c4dba" +uuid = "0234f1f7-429e-5d53-9886-15a909be8d59" +version = "1.12.0+1" + +[[HMMGradients]] +deps = ["ChainRulesCore", "LinearAlgebra", "Random", "SparseArrays"] +git-tree-sha1 = "f8a736033242c0170ce8a808118b6c5097c9a374" +uuid = "ed22c0d8-4b10-4781-a02d-2b7b373fe96c" +version = "0.1.2" + +[[HTTP]] +deps = ["Base64", "Dates", "IniFile", "MbedTLS", "Sockets", "URIs"] +git-tree-sha1 = "942c1a9c750bbe79912b7bd060a420932afd35b8" +uuid = "cd3eb016-35fb-5094-929b-558a96fad6f3" +version = "0.9.3" + +[[IRTools]] +deps = ["InteractiveUtils", "MacroTools", "Test"] +git-tree-sha1 = "c67e7515a11f726f44083e74f218d134396d6510" +uuid = "7869d1d1-7146-5819-86e3-90919afe41df" +version = "0.4.2" + +[[IniFile]] +deps = ["Test"] +git-tree-sha1 = "098e4d2c533924c921f9f9847274f2ad89e018b8" +uuid = "83e8ac13-25f8-5344-8a64-a9f2b223428f" +version = "0.5.0" + +[[IntelOpenMP_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "d979e54b71da82f3a65b62553da4fc3d18c9004c" +uuid = "1d5cc7b8-4909-519e-a0f8-d0f5ad9712d0" +version = "2018.0.3+2" + +[[InteractiveUtils]] +deps = ["Markdown"] +uuid = "b77e0a4c-d291-57a0-90e8-8db25a27a240" + +[[IntervalSets]] +deps = ["Dates", "Statistics"] +git-tree-sha1 = "4214b48a62eb8f2c292b2ee34a508c256c0cdbc9" +uuid = "8197267c-284f-5f27-9208-e0e47529a953" +version = "0.3.2" + +[[Intervals]] +deps = ["Dates", "Printf", "RecipesBase", "Serialization", "TimeZones"] +git-tree-sha1 = "323a38ed1952d30586d0fe03412cde9399d3618b" +uuid = "d8418881-c3e1-53bb-8760-2df7ec849ed5" +version = "1.5.0" + +[[IterTools]] +git-tree-sha1 = "05110a2ab1fc5f932622ffea2a003221f4782c18" +uuid = "c8e1da08-722c-5040-9ed9-7db0dc04731e" +version = "1.3.0" + +[[IteratorInterfaceExtensions]] +git-tree-sha1 = "a3f24677c21f5bbe9d2a714f95dcd58337fb2856" +uuid = "82899510-4779-5014-852e-03e436cf321d" +version = "1.0.0" + +[[JLD2]] +deps = ["CodecZlib", "DataStructures", "MacroTools", "Mmap", "Pkg", "Printf", "Requires", "UUIDs"] +git-tree-sha1 = "bb9a457481adf060ab5898823a49d4f854ff4ddd" +uuid = "033835bb-8acc-5ee8-8aae-3f567f8a3819" +version = "0.4.0" + +[[JLLWrappers]] +git-tree-sha1 = "a431f5f2ca3f4feef3bd7a5e94b8b8d4f2f647a0" +uuid = "692b3bcd-3c85-4b1f-b108-f13ce0eb3210" +version = "1.2.0" + +[[JSON]] +deps = ["Dates", "Mmap", "Parsers", "Unicode"] +git-tree-sha1 = "81690084b6198a2e1da36fcfda16eeca9f9f24e4" +uuid = "682c06a0-de6a-54ab-a142-c8b1cf79cde6" +version = "0.21.1" + +[[JpegTurbo_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "9aff0587d9603ea0de2c6f6300d9f9492bbefbd3" +uuid = "aacddb02-875f-59d6-b918-886e6ef4fbf8" +version = "2.0.1+3" + +[[Juno]] +deps = ["Base64", "Logging", "Media", "Profile"] +git-tree-sha1 = "07cb43290a840908a771552911a6274bc6c072c7" +uuid = "e5e0dc1b-0480-54bc-9374-aad01c23163d" +version = "0.8.4" + +[[LAME_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "df381151e871f41ee86cee4f5f6fd598b8a68826" +uuid = "c1c5ebd0-6772-5130-a774-d5fcae4a789d" +version = "3.100.0+3" + +[[LLVM]] +deps = ["CEnum", "Libdl", "Printf", "Unicode"] +git-tree-sha1 = "b616937c31337576360cb9fb872ec7633af7b194" +uuid = "929cbde3-209d-540e-8aea-75f648917ca0" +version = "3.6.0" + +[[LZO_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "f128cd6cd05ffd6d3df0523ed99b90ff6f9b349a" +uuid = "dd4b983a-f0e5-5f8d-a1b7-129d4a5fb1ac" +version = "2.10.0+3" + +[[LaTeXStrings]] +git-tree-sha1 = "c7aebfecb1a60d59c0fe023a68ec947a208b1e6b" +uuid = "b964fa9f-0449-5b57-a5c2-d3ea65f4040f" +version = "1.2.0" + +[[Latexify]] +deps = ["Formatting", "InteractiveUtils", "LaTeXStrings", "MacroTools", "Markdown", "Printf", "Requires"] +git-tree-sha1 = "3a0084cec7bf157edcb45a67fac0647f88fe5eaf" +uuid = "23fbe1c1-3f47-55db-b15f-69d7ec21a316" +version = "0.14.7" + +[[LibCURL_jll]] +deps = ["LibSSH2_jll", "Libdl", "MbedTLS_jll", "Pkg", "Zlib_jll", "nghttp2_jll"] +git-tree-sha1 = "897d962c20031e6012bba7b3dcb7a667170dad17" +uuid = "deac9b47-8bc7-5906-a0fe-35ac56dc84c0" +version = "7.70.0+2" + +[[LibGit2]] +deps = ["Printf"] +uuid = "76f85450-5226-5b5a-8eaa-529ad045b433" + +[[LibSSH2_jll]] +deps = ["Libdl", "MbedTLS_jll", "Pkg"] +git-tree-sha1 = "717705533148132e5466f2924b9a3657b16158e8" +uuid = "29816b5a-b9ab-546f-933c-edad1886dfa8" +version = "1.9.0+3" + +[[LibSndFile]] +deps = ["FileIO", "Libdl", "LinearAlgebra", "Printf", "SampledSignals", "libsndfile_jll"] +git-tree-sha1 = "79b36b1457d209d7705e07d6e9bca9bb2524b520" +uuid = "b13ce0c6-77b0-50c6-a2db-140568b8d1a5" +version = "2.3.0" + +[[LibVPX_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "85fcc80c3052be96619affa2fe2e6d2da3908e11" +uuid = "dd192d2f-8180-539f-9fb4-cc70b1dcf69a" +version = "1.9.0+1" + +[[Libdl]] +uuid = "8f399da3-3557-5675-b5ff-fb832c97cbdb" + +[[Libffi_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "a2cd088a88c0d37eef7d209fd3d8712febce0d90" +uuid = "e9f186c6-92d2-5b65-8a66-fee21dc1b490" +version = "3.2.1+4" + +[[Libgcrypt_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Libgpg_error_jll", "Pkg"] +git-tree-sha1 = "b391a18ab1170a2e568f9fb8d83bc7c780cb9999" +uuid = "d4300ac3-e22c-5743-9152-c294e39db1e4" +version = "1.8.5+4" + +[[Libglvnd_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "Xorg_libX11_jll", "Xorg_libXext_jll"] +git-tree-sha1 = "7739f837d6447403596a75d19ed01fd08d6f56bf" +uuid = "7e76a0d4-f3c7-5321-8279-8d96eeed0f29" +version = "1.3.0+3" + +[[Libgpg_error_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "ec7f2e8ad5c9fa99fc773376cdbc86d9a5a23cb7" +uuid = "7add5ba3-2f88-524e-9cd5-f83b8a55f7b8" +version = "1.36.0+3" + +[[Libiconv_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "8e924324b2e9275a51407a4e06deb3455b1e359f" +uuid = "94ce4f54-9a6c-5748-9c1c-f9c7231a4531" +version = "1.16.0+7" + +[[Libmount_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "51ad0c01c94c1ce48d5cad629425035ad030bfd5" +uuid = "4b2f31a3-9ecc-558c-b454-b3730dcb73e9" +version = "2.34.0+3" + +[[Libtiff_jll]] +deps = ["Artifacts", "JLLWrappers", "JpegTurbo_jll", "Libdl", "Pkg", "Zlib_jll", "Zstd_jll"] +git-tree-sha1 = "291dd857901f94d683973cdf679984cdf73b56d0" +uuid = "89763e89-9b03-5906-acba-b20f662cd828" +version = "4.1.0+2" + +[[Libuuid_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "f879ae9edbaa2c74c922e8b85bb83cc84ea1450b" +uuid = "38a345b3-de98-5d2b-a5d3-14cd9215e700" +version = "2.34.0+7" + +[[LinearAlgebra]] +deps = ["Libdl"] +uuid = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" + +[[Logging]] +uuid = "56ddb016-857b-54e1-b83d-db4d58db5568" + +[[Lz4_jll]] +deps = ["Libdl", "Pkg"] +git-tree-sha1 = "51b1db0732bbdcfabb60e36095cc3ed9c0016932" +uuid = "5ced341a-0733-55b8-9ab6-a4889d929147" +version = "1.9.2+2" + +[[MFCC]] +deps = ["DSP", "Distributed", "FileIO", "HDF5", "SpecialFunctions", "Statistics", "WAV"] +git-tree-sha1 = "e8d6bb66e00f85ea7ba7f244da3b097d80825b3b" +uuid = "ca7b5df7-6146-5dcc-89ec-36256279a339" +version = "0.3.1" + +[[MKL_jll]] +deps = ["IntelOpenMP_jll", "Libdl", "Pkg"] +git-tree-sha1 = "eb540ede3aabb8284cb482aa41d00d6ca850b1f8" +uuid = "856f044c-d86e-5d09-b602-aeab76dc8ba7" +version = "2020.2.254+0" + +[[MacroTools]] +deps = ["Markdown", "Random"] +git-tree-sha1 = "6a8a2a625ab0dea913aba95c11370589e0239ff0" +uuid = "1914dd2f-81c6-5fcd-8719-6d5c9610ff09" +version = "0.5.6" + +[[Markdown]] +deps = ["Base64"] +uuid = "d6f4376e-aef5-505a-96c1-9c027394607a" + +[[MbedTLS]] +deps = ["Dates", "MbedTLS_jll", "Random", "Sockets"] +git-tree-sha1 = "1c38e51c3d08ef2278062ebceade0e46cefc96fe" +uuid = "739be429-bea8-5141-9913-cc70e7f3736d" +version = "1.0.3" + +[[MbedTLS_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "0eef589dd1c26a3ac9d753fe1a8bcad63f956fa6" +uuid = "c8ffd9c3-330d-5841-b78e-0817d7145fa1" +version = "2.16.8+1" + +[[Measures]] +git-tree-sha1 = "e498ddeee6f9fdb4551ce855a46f54dbd900245f" +uuid = "442fdcdd-2543-5da2-b0f3-8c86c306513e" +version = "0.3.1" + +[[Media]] +deps = ["MacroTools", "Test"] +git-tree-sha1 = "75a54abd10709c01f1b86b84ec225d26e840ed58" +uuid = "e89f7d12-3494-54d1-8411-f7d8b9ae1f27" +version = "0.5.0" + +[[Missings]] +deps = ["DataAPI"] +git-tree-sha1 = "f8c673ccc215eb50fcadb285f522420e29e69e1c" +uuid = "e1d29d7a-bbdc-5cf2-9ac0-f12de2c33e28" +version = "0.4.5" + +[[Mmap]] +uuid = "a63ad114-7e13-5084-954f-fe012c677804" + +[[Mocking]] +deps = ["ExprTools"] +git-tree-sha1 = "916b850daad0d46b8c71f65f719c49957e9513ed" +uuid = "78c3b35d-d492-501b-9361-3d52fe80e533" +version = "0.7.1" + +[[NNlib]] +deps = ["ChainRulesCore", "Compat", "LinearAlgebra", "Pkg", "Requires", "Statistics"] +git-tree-sha1 = "df42d0816edfc24f5b82a728f46381613c4dff79" +uuid = "872c559c-99b0-510c-b3b7-b6c96a88d5cd" +version = "0.7.14" + +[[NaNMath]] +git-tree-sha1 = "bfe47e760d60b82b66b61d2d44128b62e3a369fb" +uuid = "77ba4419-2d1f-58cd-9bb1-8ffee604a2e3" +version = "0.3.5" + +[[OffsetArrays]] +deps = ["Adapt"] +git-tree-sha1 = "76622f08645764e040b4d7e86d0ff471fd126ae4" +uuid = "6fe1bfb0-de20-5000-8ca7-80f57d26f881" +version = "1.5.3" + +[[Ogg_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "a42c0f138b9ebe8b58eba2271c5053773bde52d0" +uuid = "e7412a2a-1a6e-54c0-be00-318e2571c051" +version = "1.3.4+2" + +[[OpenSSL_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "71bbbc616a1d710879f5a1021bcba65ffba6ce58" +uuid = "458c3c95-2e84-50aa-8efc-19380b2a3a95" +version = "1.1.1+6" + +[[OpenSpecFun_jll]] +deps = ["Artifacts", "CompilerSupportLibraries_jll", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "9db77584158d0ab52307f8c04f8e7c08ca76b5b3" +uuid = "efe28fd5-8261-553b-a9e1-b2916fc3738e" +version = "0.5.3+4" + +[[Opus_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "f9d57f4126c39565e05a2b0264df99f497fc6f37" +uuid = "91d4177d-7536-5919-b921-800302f37372" +version = "1.3.1+3" + +[[OrderedCollections]] +git-tree-sha1 = "4fa2ba51070ec13fcc7517db714445b4ab986bdf" +uuid = "bac558e1-5e72-5ebc-8fee-abe8a469f55d" +version = "1.4.0" + +[[PCRE_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "1b556ad51dceefdbf30e86ffa8f528b73c7df2bb" +uuid = "2f80f16e-611a-54ab-bc61-aa92de5b98fc" +version = "8.42.0+4" + +[[Parsers]] +deps = ["Dates"] +git-tree-sha1 = "50c9a9ed8c714945e01cd53a21007ed3865ed714" +uuid = "69de0a69-1ddd-5017-9359-2bf0b02dc9f0" +version = "1.0.15" + +[[Pixman_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "6a20a83c1ae86416f0a5de605eaea08a552844a3" +uuid = "30392449-352a-5448-841d-b1acce4e97dc" +version = "0.40.0+0" + +[[Pkg]] +deps = ["Dates", "LibGit2", "Libdl", "Logging", "Markdown", "Printf", "REPL", "Random", "SHA", "UUIDs"] +uuid = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f" + +[[PlotThemes]] +deps = ["PlotUtils", "Requires", "Statistics"] +git-tree-sha1 = "a3a964ce9dc7898193536002a6dd892b1b5a6f1d" +uuid = "ccf2f8ad-2431-5c83-bf29-c5338b663b6a" +version = "2.0.1" + +[[PlotUtils]] +deps = ["ColorSchemes", "Colors", "Dates", "Printf", "Random", "Reexport", "Statistics"] +git-tree-sha1 = "ae9a295ac761f64d8c2ec7f9f24d21eb4ffba34d" +uuid = "995b91a9-d308-5afd-9ec6-746e21dbc043" +version = "1.0.10" + +[[Plots]] +deps = ["Base64", "Contour", "Dates", "FFMPEG", "FixedPointNumbers", "GR", "GeometryBasics", "JSON", "Latexify", "LinearAlgebra", "Measures", "NaNMath", "PlotThemes", "PlotUtils", "Printf", "REPL", "Random", "RecipesBase", "RecipesPipeline", "Reexport", "Requires", "Scratch", "Showoff", "SparseArrays", "Statistics", "StatsBase", "UUIDs"] +git-tree-sha1 = "7ecf7d0207e7208a5cad9fd3bd357f5d5eb16044" +uuid = "91a5bcdd-55d7-5caf-9e0b-520d859cae80" +version = "1.10.5" + +[[Polynomials]] +deps = ["Intervals", "LinearAlgebra", "OffsetArrays", "RecipesBase"] +git-tree-sha1 = "1c6c5b0c3713738d6b987903c529d80622c37e07" +uuid = "f27b6e38-b328-58d1-80ce-0feddd5e7a45" +version = "1.2.0" + +[[Printf]] +deps = ["Unicode"] +uuid = "de0858da-6303-5e67-8744-51eddeeeb8d7" + +[[Profile]] +deps = ["Printf"] +uuid = "9abbd945-dff8-562f-b5e8-e1ebf5ef1b79" + +[[Qt_jll]] +deps = ["Artifacts", "CompilerSupportLibraries_jll", "Fontconfig_jll", "Glib_jll", "JLLWrappers", "Libdl", "Libglvnd_jll", "OpenSSL_jll", "Pkg", "Xorg_libXext_jll", "Xorg_libxcb_jll", "Xorg_xcb_util_image_jll", "Xorg_xcb_util_keysyms_jll", "Xorg_xcb_util_renderutil_jll", "Xorg_xcb_util_wm_jll", "Zlib_jll", "xkbcommon_jll"] +git-tree-sha1 = "7760cfea90bec61814e31dfb204fa4b81bba7b57" +uuid = "ede63266-ebff-546c-83e0-1c6fb6d0efc8" +version = "5.15.2+1" + +[[REPL]] +deps = ["InteractiveUtils", "Markdown", "Sockets"] +uuid = "3fa0cd96-eef1-5676-8a61-b3b8758bbffb" + +[[Random]] +deps = ["Serialization"] +uuid = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" + +[[RecipesBase]] +git-tree-sha1 = "b3fb709f3c97bfc6e948be68beeecb55a0b340ae" +uuid = "3cdcf5f2-1ef4-517c-9805-6587b60abb01" +version = "1.1.1" + +[[RecipesPipeline]] +deps = ["Dates", "NaNMath", "PlotUtils", "RecipesBase"] +git-tree-sha1 = "c4d54a78e287de7ec73bbc928ce5eb3c60f80b24" +uuid = "01d81517-befc-4cb6-b9ec-a95719d0359c" +version = "0.3.1" + +[[Reexport]] +git-tree-sha1 = "57d8440b0c7d98fc4f889e478e80f268d534c9d5" +uuid = "189a3867-3050-52da-a836-e630ba90ab69" +version = "1.0.0" + +[[Requires]] +deps = ["UUIDs"] +git-tree-sha1 = "cfbac6c1ed70c002ec6361e7fd334f02820d6419" +uuid = "ae029012-a4dd-5104-9daa-d747884805df" +version = "1.1.2" + +[[SHA]] +uuid = "ea8e919c-243c-51af-8825-aaa63cd721ce" + +[[SampledSignals]] +deps = ["Compat", "DSP", "FFTW", "FixedPointNumbers", "IntervalSets", "LinearAlgebra", "TreeViews", "Unitful"] +git-tree-sha1 = "7f95f0a2ed381addc60101ed91d56f6b2c3f108b" +repo-rev = "6a9c6a58fe2a17baa8f5f074acd2b58aed0e3544" +repo-url = "https://github.com/JuliaAudio/SampledSignals.jl.git" +uuid = "bd7594eb-a658-542f-9e75-4c4d8908c167" +version = "2.1.0" + +[[Scratch]] +deps = ["Dates"] +git-tree-sha1 = "ad4b278adb62d185bbcb6864dc24959ab0627bf6" +uuid = "6c6a2e73-6563-6170-7368-637461726353" +version = "1.0.3" + +[[Serialization]] +uuid = "9e88b42a-f829-5b0c-bbe9-9e923198166b" + +[[SharedArrays]] +deps = ["Distributed", "Mmap", "Random", "Serialization"] +uuid = "1a1011a3-84de-559e-8e89-a11a2f7dc383" + +[[Showoff]] +deps = ["Dates", "Grisu"] +git-tree-sha1 = "ee010d8f103468309b8afac4abb9be2e18ff1182" +uuid = "992d4aef-0814-514b-bc4d-f2e9a6c4116f" +version = "0.3.2" + +[[Sockets]] +uuid = "6462fe0b-24de-5631-8697-dd941f90decc" + +[[SortingAlgorithms]] +deps = ["DataStructures", "Random", "Test"] +git-tree-sha1 = "03f5898c9959f8115e30bc7226ada7d0df554ddd" +uuid = "a2af1166-a08f-5f64-846c-94a0d3cef48c" +version = "0.3.1" + +[[SparseArrays]] +deps = ["LinearAlgebra", "Random"] +uuid = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" + +[[SpecialFunctions]] +deps = ["OpenSpecFun_jll"] +git-tree-sha1 = "d8d8b8a9f4119829410ecd706da4cc8594a1e020" +uuid = "276daf66-3868-5448-9aa4-cd146d93841b" +version = "0.10.3" + +[[StaticArrays]] +deps = ["LinearAlgebra", "Random", "Statistics"] +git-tree-sha1 = "9da72ed50e94dbff92036da395275ed114e04d49" +uuid = "90137ffa-7385-5640-81b9-e52037218182" +version = "1.0.1" + +[[Statistics]] +deps = ["LinearAlgebra", "SparseArrays"] +uuid = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" + +[[StatsBase]] +deps = ["DataAPI", "DataStructures", "LinearAlgebra", "Missings", "Printf", "Random", "SortingAlgorithms", "SparseArrays", "Statistics"] +git-tree-sha1 = "400aa43f7de43aeccc5b2e39a76a79d262202b76" +uuid = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91" +version = "0.33.3" + +[[StructArrays]] +deps = ["Adapt", "DataAPI", "Tables"] +git-tree-sha1 = "26ea43b4be7e919a2390c3c0f824e7eb4fc19a0a" +uuid = "09ab397b-f2b6-538f-b94a-2f83cf4a842a" +version = "0.5.0" + +[[TableTraits]] +deps = ["IteratorInterfaceExtensions"] +git-tree-sha1 = "b1ad568ba658d8cbb3b892ed5380a6f3e781a81e" +uuid = "3783bdb8-4a98-5b6b-af9a-565f29a5fe9c" +version = "1.0.0" + +[[Tables]] +deps = ["DataAPI", "DataValueInterfaces", "IteratorInterfaceExtensions", "LinearAlgebra", "TableTraits", "Test"] +git-tree-sha1 = "a716dde43d57fa537a19058d044b495301ba6565" +uuid = "bd369af6-aec1-5ad0-b16a-f7cc5008161c" +version = "1.3.2" + +[[Test]] +deps = ["Distributed", "InteractiveUtils", "Logging", "Random"] +uuid = "8dfed614-e22c-5e08-85e1-65c5234f0b40" + +[[TextWrap]] +git-tree-sha1 = "9250ef9b01b66667380cf3275b3f7488d0e25faf" +uuid = "b718987f-49a8-5099-9789-dcd902bef87d" +version = "1.0.1" + +[[TimeZones]] +deps = ["Dates", "EzXML", "Mocking", "Pkg", "Printf", "RecipesBase", "Serialization", "Unicode"] +git-tree-sha1 = "4ba8a9579a243400db412b50300cd61d7447e583" +uuid = "f269a46b-ccf7-5d73-abea-4c690281aa53" +version = "1.5.3" + +[[TimerOutputs]] +deps = ["Printf"] +git-tree-sha1 = "3318281dd4121ecf9713ce1383b9ace7d7476fdd" +uuid = "a759f4b9-e2f1-59dc-863e-4aeb61b1ea8f" +version = "0.5.7" + +[[TranscodingStreams]] +deps = ["Random", "Test"] +git-tree-sha1 = "7c53c35547de1c5b9d46a4797cf6d8253807108c" +uuid = "3bb67fe8-82b1-5028-8e26-92a6c54297fa" +version = "0.9.5" + +[[TreeViews]] +deps = ["Test"] +git-tree-sha1 = "8d0d7a3fe2f30d6a7f833a5f19f7c7a5b396eae6" +uuid = "a2a6695c-b41b-5b7d-aed9-dbfdeacea5d7" +version = "0.3.0" + +[[URIs]] +git-tree-sha1 = "7855809b88d7b16e9b029afd17880930626f54a2" +uuid = "5c2747f8-b7ea-4ff2-ba2e-563bfd36b1d4" +version = "1.2.0" + +[[UUIDs]] +deps = ["Random", "SHA"] +uuid = "cf7118a7-6976-5b1a-9a39-7adc72f591a4" + +[[Unicode]] +uuid = "4ec0a83e-493e-50e2-b9ac-8f72acf5a8f5" + +[[Unitful]] +deps = ["LinearAlgebra", "Random"] +git-tree-sha1 = "92bdf0ccfa9612b167d0adaadef832a09971ceb0" +uuid = "1986cc42-f94f-5a68-af5c-568840ba703d" +version = "0.17.0" + +[[WAV]] +deps = ["Base64", "FileIO", "Libdl", "Logging"] +git-tree-sha1 = "21b46ff53c571693f617c151a0497ea7749bdeba" +uuid = "8149f6b0-98f6-5db9-b78f-408fbbb8ef88" +version = "1.1.0" + +[[Wayland_jll]] +deps = ["Artifacts", "Expat_jll", "JLLWrappers", "Libdl", "Libffi_jll", "Pkg", "XML2_jll"] +git-tree-sha1 = "dc643a9b774da1c2781413fd7b6dcd2c56bb8056" +uuid = "a2964d1f-97da-50d4-b82a-358c7fce9d89" +version = "1.17.0+4" + +[[Wayland_protocols_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "Wayland_jll"] +git-tree-sha1 = "2839f1c1296940218e35df0bbb220f2a79686670" +uuid = "2381bf8a-dfd0-557d-9999-79630e7b1b91" +version = "1.18.0+4" + +[[XML2_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Libiconv_jll", "Pkg", "Zlib_jll"] +git-tree-sha1 = "be0db24f70aae7e2b89f2f3092e93b8606d659a6" +uuid = "02c8fc9c-b97f-50b9-bbe4-9be30ff0a78a" +version = "2.9.10+3" + +[[XSLT_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Libgcrypt_jll", "Pkg", "XML2_jll"] +git-tree-sha1 = "2b3eac39df218762d2d005702d601cd44c997497" +uuid = "aed1982a-8fda-507f-9586-7b0439959a61" +version = "1.1.33+4" + +[[Xorg_libX11_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "Xorg_libxcb_jll", "Xorg_xtrans_jll"] +git-tree-sha1 = "5be649d550f3f4b95308bf0183b82e2582876527" +uuid = "4f6342f7-b3d2-589e-9d20-edeb45f2b2bc" +version = "1.6.9+4" + +[[Xorg_libXau_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "4e490d5c960c314f33885790ed410ff3a94ce67e" +uuid = "0c0b7dd1-d40b-584c-a123-a41640f87eec" +version = "1.0.9+4" + +[[Xorg_libXcursor_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "Xorg_libXfixes_jll", "Xorg_libXrender_jll"] +git-tree-sha1 = "12e0eb3bc634fa2080c1c37fccf56f7c22989afd" +uuid = "935fb764-8cf2-53bf-bb30-45bb1f8bf724" +version = "1.2.0+4" + +[[Xorg_libXdmcp_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "4fe47bd2247248125c428978740e18a681372dd4" +uuid = "a3789734-cfe1-5b06-b2d0-1dd0d9d62d05" +version = "1.1.3+4" + +[[Xorg_libXext_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "Xorg_libX11_jll"] +git-tree-sha1 = "b7c0aa8c376b31e4852b360222848637f481f8c3" +uuid = "1082639a-0dae-5f34-9b06-72781eeb8cb3" +version = "1.3.4+4" + +[[Xorg_libXfixes_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "Xorg_libX11_jll"] +git-tree-sha1 = "0e0dc7431e7a0587559f9294aeec269471c991a4" +uuid = "d091e8ba-531a-589c-9de9-94069b037ed8" +version = "5.0.3+4" + +[[Xorg_libXi_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "Xorg_libXext_jll", "Xorg_libXfixes_jll"] +git-tree-sha1 = "89b52bc2160aadc84d707093930ef0bffa641246" +uuid = "a51aa0fd-4e3c-5386-b890-e753decda492" +version = "1.7.10+4" + +[[Xorg_libXinerama_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "Xorg_libXext_jll"] +git-tree-sha1 = "26be8b1c342929259317d8b9f7b53bf2bb73b123" +uuid = "d1454406-59df-5ea1-beac-c340f2130bc3" +version = "1.1.4+4" + +[[Xorg_libXrandr_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "Xorg_libXext_jll", "Xorg_libXrender_jll"] +git-tree-sha1 = "34cea83cb726fb58f325887bf0612c6b3fb17631" +uuid = "ec84b674-ba8e-5d96-8ba1-2a689ba10484" +version = "1.5.2+4" + +[[Xorg_libXrender_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "Xorg_libX11_jll"] +git-tree-sha1 = "19560f30fd49f4d4efbe7002a1037f8c43d43b96" +uuid = "ea2f1a96-1ddc-540d-b46f-429655e07cfa" +version = "0.9.10+4" + +[[Xorg_libpthread_stubs_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "6783737e45d3c59a4a4c4091f5f88cdcf0908cbb" +uuid = "14d82f49-176c-5ed1-bb49-ad3f5cbd8c74" +version = "0.1.0+3" + +[[Xorg_libxcb_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "XSLT_jll", "Xorg_libXau_jll", "Xorg_libXdmcp_jll", "Xorg_libpthread_stubs_jll"] +git-tree-sha1 = "daf17f441228e7a3833846cd048892861cff16d6" +uuid = "c7cfdc94-dc32-55de-ac96-5a1b8d977c5b" +version = "1.13.0+3" + +[[Xorg_libxkbfile_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "Xorg_libX11_jll"] +git-tree-sha1 = "926af861744212db0eb001d9e40b5d16292080b2" +uuid = "cc61e674-0454-545c-8b26-ed2c68acab7a" +version = "1.1.0+4" + +[[Xorg_xcb_util_image_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "Xorg_xcb_util_jll"] +git-tree-sha1 = "0fab0a40349ba1cba2c1da699243396ff8e94b97" +uuid = "12413925-8142-5f55-bb0e-6d7ca50bb09b" +version = "0.4.0+1" + +[[Xorg_xcb_util_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "Xorg_libxcb_jll"] +git-tree-sha1 = "e7fd7b2881fa2eaa72717420894d3938177862d1" +uuid = "2def613f-5ad1-5310-b15b-b15d46f528f5" +version = "0.4.0+1" + +[[Xorg_xcb_util_keysyms_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "Xorg_xcb_util_jll"] +git-tree-sha1 = "d1151e2c45a544f32441a567d1690e701ec89b00" +uuid = "975044d2-76e6-5fbe-bf08-97ce7c6574c7" +version = "0.4.0+1" + +[[Xorg_xcb_util_renderutil_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "Xorg_xcb_util_jll"] +git-tree-sha1 = "dfd7a8f38d4613b6a575253b3174dd991ca6183e" +uuid = "0d47668e-0667-5a69-a72c-f761630bfb7e" +version = "0.3.9+1" + +[[Xorg_xcb_util_wm_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "Xorg_xcb_util_jll"] +git-tree-sha1 = "e78d10aab01a4a154142c5006ed44fd9e8e31b67" +uuid = "c22f9ab0-d5fe-5066-847c-f4bb1cd4e361" +version = "0.4.1+1" + +[[Xorg_xkbcomp_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "Xorg_libxkbfile_jll"] +git-tree-sha1 = "4bcbf660f6c2e714f87e960a171b119d06ee163b" +uuid = "35661453-b289-5fab-8a00-3d9160c6a3a4" +version = "1.4.2+4" + +[[Xorg_xkeyboard_config_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "Xorg_xkbcomp_jll"] +git-tree-sha1 = "5c8424f8a67c3f2209646d4425f3d415fee5931d" +uuid = "33bec58e-1273-512f-9401-5d533626f822" +version = "2.27.0+4" + +[[Xorg_xtrans_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "79c31e7844f6ecf779705fbc12146eb190b7d845" +uuid = "c5fb5394-a638-5e4d-96e5-b29de1b5cf10" +version = "1.4.0+3" + +[[ZipFile]] +deps = ["Libdl", "Printf", "Zlib_jll"] +git-tree-sha1 = "c3a5637e27e914a7a445b8d0ad063d701931e9f7" +uuid = "a5390f91-8eb1-5f08-bee0-b1d1ffed6cea" +version = "0.9.3" + +[[Zlib_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "320228915c8debb12cb434c59057290f0834dbf6" +uuid = "83775a58-1f1d-513f-b197-d71354ab007a" +version = "1.2.11+18" + +[[Zstd_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "2c1332c54931e83f8f94d310fa447fd743e8d600" +uuid = "3161d3a3-bdf6-5164-811a-617609db77b4" +version = "1.4.8+0" + +[[Zygote]] +deps = ["AbstractFFTs", "ChainRules", "DiffRules", "Distributed", "FillArrays", "ForwardDiff", "IRTools", "InteractiveUtils", "LinearAlgebra", "MacroTools", "NaNMath", "Random", "Requires", "SpecialFunctions", "Statistics", "ZygoteRules"] +git-tree-sha1 = "52835a83f7c899cfcb95f796d584201812887ea8" +uuid = "e88e6eb3-aa80-5325-afca-941959d7151f" +version = "0.6.3" + +[[ZygoteRules]] +deps = ["MacroTools"] +git-tree-sha1 = "9e7a1e8ca60b742e508a315c17eef5211e7fbfd7" +uuid = "700de1a5-db45-46bc-99cf-38207098b444" +version = "0.2.1" + +[[alsa_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "ce395137bbc11d37ab57da33062c42de6b99d76e" +uuid = "45378030-f8ea-5b20-a7c7-1a9d95efb90e" +version = "1.2.1-1+2" + +[[libass_jll]] +deps = ["Artifacts", "Bzip2_jll", "FreeType2_jll", "FriBidi_jll", "JLLWrappers", "Libdl", "Pkg", "Zlib_jll"] +git-tree-sha1 = "acc685bcf777b2202a904cdcb49ad34c2fa1880c" +uuid = "0ac62f75-1d6f-5e53-bd7c-93b484bb37c0" +version = "0.14.0+4" + +[[libfdk_aac_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "7a5780a0d9c6864184b3a2eeeb833a0c871f00ab" +uuid = "f638f0a6-7fb0-5443-88ba-1cc74229b280" +version = "0.1.6+4" + +[[libpng_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "Zlib_jll"] +git-tree-sha1 = "6abbc424248097d69c0c87ba50fcb0753f93e0ee" +uuid = "b53b4c65-9356-5827-b1ea-8c7a1a84506f" +version = "1.6.37+6" + +[[libsndfile_jll]] +deps = ["Artifacts", "FLAC_jll", "JLLWrappers", "Libdl", "Ogg_jll", "Pkg", "alsa_jll", "libvorbis_jll"] +git-tree-sha1 = "fc7a0233235d1c4da6dfdf73f480c2619e9ea66d" +uuid = "5bf562c0-5a39-5b4f-b979-f64ac885830c" +version = "1.0.28+2" + +[[libvorbis_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Ogg_jll", "Pkg"] +git-tree-sha1 = "fa14ac25af7a4b8a7f61b287a124df7aab601bcd" +uuid = "f27f6e37-5d2b-51aa-960f-b287f2bc3b7a" +version = "1.3.6+6" + +[[nghttp2_jll]] +deps = ["Libdl", "Pkg"] +git-tree-sha1 = "8e2c44ab4d49ad9518f359ed8b62f83ba8beede4" +uuid = "8e850ede-7688-5339-a07c-302acd2aaf8d" +version = "1.40.0+2" + +[[x264_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "d713c1ce4deac133e3334ee12f4adff07f81778f" +uuid = "1270edf5-f2f9-52d2-97e9-ab00b5d0237a" +version = "2020.7.14+2" + +[[x265_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "487da2f8f2f0c8ee0e83f39d13037d6bbf0a45ab" +uuid = "dfaa095f-4041-5dcd-9319-2fabd8486b76" +version = "3.0.0+3" + +[[xkbcommon_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "Wayland_jll", "Wayland_protocols_jll", "Xorg_libxcb_jll", "Xorg_xkeyboard_config_jll"] +git-tree-sha1 = "ece2350174195bb31de1a63bea3a41ae1aa593b6" +uuid = "d8fb68d0-12a3-5cfd-a85a-d49703b185fd" +version = "0.9.1+5" diff --git a/Models.jl b/Models.jl new file mode 100644 index 0000000..ee24b75 --- /dev/null +++ b/Models.jl @@ -0,0 +1,67 @@ +# Copyright (c) 2021 Idiap Research Institute, http://www.idiap.ch/ +# Niccolò Antonello + +using Flux + +struct FullyConnected{T<:AbstractFloat} + M::Matrix{T} + b::Matrix{T} +end + +init_bias(Ny,Nx) = +2/sqrt(Float32(first(Flux.nfan(Ny,Nx)))) * Float32.(rand(Ny) .- 0.5) + +function FullyConnected(Nx::Int,Ny::Int) + M = Flux.kaiming_uniform(Ny,Nx)'[:,:] + b = reshape(init_bias(Ny,Nx),1,Ny) + return FullyConnected(M,b) +end + +# TODO: in future Flux versions this can be replaced by Dense +function (model::FullyConnected{T})(X::AbstractArray{T,3}) where {T} + Nt, Nx, Nb = size(X) + Ny = size(model.M,2) + + X = permutedims(X,(1,3,2)) + X = reshape(X,Nt*Nb,Nx) + + Y = X*model.M .+ model.b + + Y = reshape(Y,Nt,Nb,Ny) + Y = permutedims(Y,(1,3,2)) + return Y +end +Flux.@functor FullyConnected + +export get_convnet +function get_convnet(Nf,Ns; + Nhs=128*ones(Int,2), + Nks=[3,3], + strides=[1,3], + dilations=[1,2], + dropout=[0.0,0.0], + fout = x->logsoftmax(x,dims=2) + ) + T = Float32 + Nl = length(Nhs) + @assert length(Nhs) == length(Nks) == length(strides) == length(dilations) + convs = [Conv((Nks[i],), (i == 1 ? Nf : Nhs[i-1]) => Nhs[i], + stride=strides[i], + dilation=dilations[i], + pad=SamePad(), + init = Flux.kaiming_uniform, + bias = init_bias(Nhs[i], i==1 ? Nf : Nhs[i-1]) + ) for i=1:Nl] + bns = [BatchNorm(Nhs[i],relu) for i=1:Nl] + dro =[Dropout(dropout[i],dims=2) for i=1:Nl] + out = FullyConnected(Nhs[end],Ns) + layers = [] + for i=1:Nl + push!(layers,convs[i]) + push!(layers,bns[i]) + push!(layers,dro[i]) + end + push!(layers,out) + push!(layers,fout) + return Chain(layers...) +end diff --git a/Project.toml b/Project.toml new file mode 100644 index 0000000..fc29eee --- /dev/null +++ b/Project.toml @@ -0,0 +1,15 @@ +[deps] +ArgParse = "c7e460c6-2fb9-53a9-8c5b-16f535851c63" +BSON = "fbb218c0-5317-5bc6-957e-2ee96dd4b1f0" +ClusterManagers = "34f1f09b-3a8b-5176-ab39-66d58a4d544e" +DSP = "717857b8-e6f2-59f4-9121-6e50c889abd2" +FileIO = "5789e2e9-d7fb-5bc7-8068-2c6fae9b9549" +FiniteStateTransducers = "d0430b04-9e26-4b95-8372-754e5cd95c01" +Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c" +HMMGradients = "ed22c0d8-4b10-4781-a02d-2b7b373fe96c" +JLD2 = "033835bb-8acc-5ee8-8aae-3f567f8a3819" +LibSndFile = "b13ce0c6-77b0-50c6-a2db-140568b8d1a5" +MFCC = "ca7b5df7-6146-5dcc-89ec-36256279a339" +Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80" +SampledSignals = "bd7594eb-a658-542f-9e75-4c4d8908c167" +Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f" diff --git a/README.md b/README.md new file mode 100644 index 0000000..9b80205 --- /dev/null +++ b/README.md @@ -0,0 +1,57 @@ +# TIDIGITS recipe + +This repository contains a _recipe_ for training an ASR system using the [TIDIGITS database](https://catalog.ldc.upenn.edu/LDC93S10). +The recipe is entirely Julia-flavoured and uses following packages (among others): +* [Flux](https://github.com/FluxML/Flux.jl) as ML library +* [FiniteStateTransducers](https://github.com/idiap/FiniteStateTransducers.jl) for WFST compositions +* [HMMGradients](https://github.com/idiap/HMMGradients.jl) for maximum likelihood training + +Currently the training runs only on CPU and employs a simple greedy decoder. Stay tuned for more! + +### Installation + +Set in your environment the path `TIDIGITS_PATH=\your\path\to\tidigits`. +If you're using SGE set the command flags in `CPU_CMD`, i.e. the queue options. + +This can be done e.g. by running `source env.sh` before lunching Julia, where `env.sh` is a script that export these variables. +Alternatively, the environment variables can be specified [directly in the REPL](https://docs.julialang.org/en/v1/manual/environment-variables/). + +Run `julia --project -e 'using Pkg; Pkg.instantiate()'` to install all the dependencies. + +### Configuration + +Specify your current configuration in the folder `conf`. +The configuration files are loaded from the folder `conf/mysetup/`. +This folder must contain the following files: +* `feat_conf.jl` for feature extraction +* `model_conf.jl` for model and optimisation parameters (hyperparameters) +A couple of setups are present in this repository for reference in the folder `conf`. + +### Data preparation + +Run `julia --project prepare_data.jl --conf 2a` to extract feature and prepare training data using the configuration `2a`. +Features and transctiptions will be saved in the folder `data/uuid/`. +Here `uuid` is linked to `feat_conf.jl` file, meaning that if you create a new `model_conf.jl` without modifying feature extraction you don't need to run data preparation twice. +If SGE grid is available add the flag `--nj N` to split the work into `N` jobs. + +For the moment HMM configuration is fixed in `wfsts.jl` with a phone based 2-state HMM. + +### Training + +Training is performed running the script `julia --project prepare_data.jl --conf 2a`. +Notice that if you're just experimenting it is more convenient to run the experiment from Julia's REPL. +```julia +$ julia --project + +julia> include("train.jl") + +``` +Modify the `conf` by changing the default in the `ArgParse` table. + +### Evaluation + +Run the script `eval.jl` to calculate Word Error Rates (WER) and Phone Error Rate (PER). + +### Demo + +A live demo can be used by running `demo.jl` (requires [sox](http://sox.sourceforge.net/) to be installed in your system). diff --git a/RESULTS.md b/RESULTS.md new file mode 100644 index 0000000..86ac62a --- /dev/null +++ b/RESULTS.md @@ -0,0 +1,19 @@ +# Setup 1a (4 layer CNN with l1 regularisation) +* Phone Error Rate (PER): 0.899 % +* Word Error Rate (WER) : 1.217 % +* Accuracy: 0.965 + +# Setup 1b (4 layer CNN with softmax output) +* Phone Error Rate (PER): 1.477 % +* Word Error Rate (WER) : 2.074 % +* Accuracy: 0.943 + +# Setup 2a (8 layer CNN with l1 regularisation) +* Phone Error Rate (PER): 0.587 % +* Word Error Rate (WER) : 0.827 % +* Accuracy: 0.975 + +# Setup 2b (8 layer CNN with softmax output) +* Phone Error Rate (PER): 5.204 % +* Word Error Rate (WER) : 10.238 % +* Accuracy: 0.718 diff --git a/Utils.jl b/Utils.jl new file mode 100644 index 0000000..7d96fa3 --- /dev/null +++ b/Utils.jl @@ -0,0 +1,165 @@ +# Copyright (c) 2021 Idiap Research Institute, http://www.idiap.ch/ +# Niccolò Antonello + +using UUIDs + +export zeropad +function zeropad(x::Vector{Matrix{T}}) where {T} + Nt_max = maximum(size.(x,1)) + Nf = size(x[1],2) + Nb = length(x) + x_batched = zeros(T,Nt_max,Nf,Nb) + for (i,xi) in enumerate(x) + for f = 1:Nf, t = 1:size(xi,1) + x_batched[t,f,i] = xi[t,f] + end + end + return x_batched +end + +export posterior2phones +function posterior2phones(ippsym, gamma) + #z = [replace(ippsym[argmax(gamma[t,:])], r"[0-9\-]"=>"") for t in 1:size(gamma,1)] + p = [ippsym[argmax(gamma[t,:])] for t in 1:size(gamma,1)] # all phones symbols + k = [p[1]] # phones without repetitions + for t in 2:length(p) + if p[t-1] != p[t] + push!(k,p[t]) + end + end + k2 = String[] + for ki in k + if ki[end] == '1' # emitting symbol + push!(k2,replace(ki, r"[0-9\-]"=>"")) + end + end + return k2 +end + +export levenshtein +function levenshtein(s,t) + n,m = length(s),length(t) + D = zeros(Int,n+1,m+1) + + D[:,1] = 0:n # this is the cost we would have for insertion only + D[1,:] = 0:m # this is the cost we would have for deletion only + for i = 2:n+1, j = 2:m+1 + # check substition is needed + cost = s[i-1] == t[j-1] ? 0 : 1 + + D[i,j] = min( + D[i-1,j] + 1, # del + D[i,j-1] + 1, # ins + D[i-1,j-1] + cost, # subs / ok + ) + end + return D[n+1,m+1] +end + +export text2phones +function text2phones(lexicon,text; add_sil=true) + if add_sil + phones = [[lexicon[t]...,""] for t in split(text)] + else + phones = [lexicon[t] for t in split(text)] + end + phones = vcat(phones...) + if add_sil + phones = ["",phones...] + end + return phones +end + +export get_error_rate +function get_error_rate(uttID2seq::Dict, + uttID2seq_dec::Dict; kwargs...) + seqs, seq_decs = [], [] + for uttID in keys(uttID2seq) + push!(seqs,uttID2seq[uttID]) + push!(seq_decs,uttID2seq_dec[uttID]) + end + get_error_rate(seqs,seq_decs; kwargs...) +end + +function get_error_rate(seqs::Vector,seq_decs::Vector; is_split=false) + Nw = 0 + err = 0 + for i in eachindex(seqs) + seq, seq_dec = seqs[i], seq_decs[i] + if is_split == false + seq = split(seq;keepempty=false) + seq_dec = split(seq_dec; keepempty=false) + end + Nw += length(seq) + err += levenshtein(seq,seq_dec) + end + er = err/Nw +end + +export min_dist_word +function min_dist_word(prons,min_dist,word_phones) + d = [levenshtein(word_phones,pr) for pr in prons] + idxs = findall(d .<= min_dist) + if isempty(idxs) + return "" + else + return ilexicon[prons[idxs[argmin(d[idxs])]]] + end +end + +function add_word!(dec,prons,word_phones; min_dist=2) + t = try + ilexicon[word_phones] + catch + if min_dist == 0 + "" + else + min_dist_word(prons,min_dist,word_phones) + end + end + push!(dec,t) +end + +function phones2words_greedy(ilexicon,phones; min_dist=2) + prons = [keys(ilexicon)...] + word_phones = String[] + dec = String[] + for (i,p) in enumerate(phones) + if i == 1 + word_phones = String[] + if p != "" + push!(word_phones,p) + end + elseif (p == "") && i > 1 + if !isempty(word_phones) + add_word!(dec,prons,word_phones; min_dist=min_dist) + word_phones = String[] + end + else + if p != "" + push!(word_phones,p) + end + end + end + if !isempty(word_phones) + add_word!(dec,prons,word_phones; min_dist=min_dist) + end + return dec +end + +export check_env +function check_env() + if !("TIDIGITS_PATH" in keys(ENV)) + @warn "ENV[\"TIDIGITS_PATH\"] not exisitng: `export TIDIGITS_PATH=path/to/dataset` to your env." + end + if !("CPU_CMD" in keys(ENV)) + @warn "ENV[\"CPU_CMD\"] not exisitng: `export CPU_CMD='...'` to your env first. Only needed for SGE." + end +end + +export get_feat_dir +function get_feat_dir(setup; root="data") + uuid_folder = UUID("04a07b93-95e4-4b85-94b9-d3516eb06ea2") + conf = read("conf/$(setup)/feat_conf.jl",String) + return joinpath("data", string(uuid5(uuid_folder,conf))) +end diff --git a/WFSTs.jl b/WFSTs.jl new file mode 100644 index 0000000..ca5a9f2 --- /dev/null +++ b/WFSTs.jl @@ -0,0 +1,143 @@ +# Copyright (c) 2021 Idiap Research Institute, http://www.idiap.ch/ +# Niccolò Antonello + +using FiniteStateTransducers + +export get_L +# builds the L transducer +function get_L(lexicon::Dict{S,Vector{S}}) where {S<:AbstractString} + phones = sort!(unique!(vcat(values(lexicon)...))) + words = sort!([keys(lexicon)...]) + psym = Dict(p => i for (i,p) in enumerate(phones)) + wsym = Dict(w => i for (i,w) in enumerate(words )) + + L = WFST(psym,wsym) + add_states!(L,2) + initial!(L,1) + final!(L,2) + ϵ = get_eps(S) + c = 3 + for w in keys(lexicon) + pron=lexicon[w] + for (i,p) in enumerate(pron) + if i == 1 && (length(pron) != 1) + add_arc!(L,1,c,p,w,1) + elseif i == length(pron) + if i == 1 + add_arc!(L,1,2,p,w) + else + add_arc!(L,c,2,p,ϵ) + c += 1 + end + else + add_arc!(L,c,c+1,p,ϵ) + c += 1 + end + end + end + add_arc!(L,2,1,ϵ,ϵ) + return L +end + +export get_H +# builds the H transucer, 2 state phone per HMM +function get_H(psym; selfloop_prob=0.4) + isym = Dict{String,Int}() + c=1 + for p in sort([keys(psym)...]) + if p == "" + isym["$(p)1"] = c + isym["$(p)2"] = c+1 + c +=2 + else + for i=1:2 + isym["$p$i"] = c + c+=1 + end + end + end + Ns = length(psym) + + H = WFST(isym, psym) + add_states!(H,Ns+1) + initial!(H,1) + ϵ = get_eps(String) + for p in sort([keys(psym)...]) + if p == "" + # from initial state, assume silence + add_arc!(H, 1 , isym["1"]+1, "$(p)1", p) + end + # this avoids trivial solution of always staying in the same state + add_arc!(H, isym["$(p)1"]+1, isym["$(p)1"]+1, "$(p)1", ϵ,-log(selfloop_prob)) + # prob of transistion to other state unknown, set to 1 + add_arc!(H, isym["$(p)1"]+1, isym["$(p)2"]+1, "$(p)2", ϵ) + # to final state + final!(H,isym["$(p)2"]+1) + end + for s in keys(get_final(H)) + for p in keys(psym) + # prob of transistion to other phone unknown, set to 1 + add_arc!(H, s, isym["$(p)1"]+1, "$(p)1", p) # emitting state + end + end + return H +end + +export Hfst2trans +# convert the H transducer into transition matrix +function Hfst2trans(H::WFST) + Ns = length(get_isym(H)) + A = zeros(Float32,Ns,Ns) + state2outtr=Dict(i => (get_ilabel.(s),get_weight.(s)) for (i,s) in enumerate(H)) + for (p,s,n,d,e,a) in FiniteStateTransducers.DFS(H,1) + if d + intr = get_ilabel(a) + outtr,w = state2outtr[n] + for i in eachindex(outtr) + A[intr,outtr[i]] = exp(-get(w[i])) + end + end + end + return A +end + +export get_lexicon +function get_lexicon() + lexicon = Dict( + "" => [""], + "OH" => ["OW"], + "ZERO" => ["Z", "IH", "R", "OW"], + "ONE" => ["W", "AH", "N"], + "TWO" => ["T", "UW"], + "THREE" => ["TH", "R", "IY"], + "FOUR" => ["F", "AO", "R"], + "FIVE" => ["F", "AY", "V"], + "SIX" => ["S", "IH", "KS"], + "SEVEN" => ["S", "EH", "V", "AH", "N"], + "EIGHT" => ["EY", "T"], + "NINE" => ["N", "AY", "N"] + ) + ilexicon = Dict(lexicon[w] => w for w in keys(lexicon)) + return lexicon, ilexicon +end + +export get_HL +function get_HL(lexicon) + L = get_L(lexicon) + H = get_H(get_isym(L)) + return H,L +end + +export get_aA +function get_aA(H; use_log=true) + A = Hfst2trans(H) + Ns = size(A,1) + a = zeros(Float32,Ns) # initial state prob + a[H.isym["1"]] = one(Float32) + + if use_log + A .= log.(A) + a .= log.(a) + end + return a,A +end diff --git a/check_model.jl b/check_model.jl new file mode 100644 index 0000000..c64de42 --- /dev/null +++ b/check_model.jl @@ -0,0 +1,44 @@ +# Copyright (c) 2021 Idiap Research Institute, http://www.idiap.ch/ +# Niccolò Antonello +# +# This script is mainly for testing the model input/output works correctly +# +using HMMGradients, FiniteStateTransducers +using Random, Statistics, FileIO +using BSON + +setup="2a" + +include("WFSTs.jl") +include("Models.jl") +include("Utils.jl") +include("conf/$(setup)/feat_conf.jl") +include("conf/$(setup)/model_conf.jl") + +# get transition matrix +lexicon, ilexicon = get_lexicon() +H, L = get_HL(lexicon) +a, A = get_aA(H) +Ns = size(A,1) + +# init model +modely = get_convnet(Nf,Ns; + Nks=Nks, + Nhs=Nhs, + strides=strides, + dilations=dilations, + dropout=dropout, + fout=fout) + +Nt,Nb = rand(500:1000),4 +x = zeros(Float32,Nt,Nf,Nb) +t,b = 500,1 +x[t,:,b] .= 1.0 + +y = modely(x) +z = sum(y[:,:,b],dims=2) +Nt2 = ceil(Int,Nt/3) +@assert Nt2 == size(y,1) +println("Setup = $setup") +println("Num of parameters = $(sum(prod.(size.(params(modely)))))") +println("Context bins = $(subsample*sum( (!).(z .≈ z[100]) ))") diff --git a/conf/1a/feat_conf.jl b/conf/1a/feat_conf.jl new file mode 100644 index 0000000..9faaa03 --- /dev/null +++ b/conf/1a/feat_conf.jl @@ -0,0 +1,21 @@ +Fs = 16000 +Nf = 40 # input feat dim +# feature extraction (saved to data) +get_feats(x) = identity(x) +# on the fly feature processing +function feats_post(x) + Fs_original=20000 + Fs=16000 + x = load(x) + if typeof(x) <: Tuple + x = x[1] + else + x = x.data + end + x = x[:] + x = resample(x,Fs//Fs_original) + x = mfcc(x, float(Fs); + dither=true, numcep=40, nbands=40, minfreq=20.0, maxfreq=7600.0)[1] + return Float32.(x) +end +subsample = 3 # out subsampling diff --git a/conf/1a/model_conf.jl b/conf/1a/model_conf.jl new file mode 100644 index 0000000..81242e2 --- /dev/null +++ b/conf/1a/model_conf.jl @@ -0,0 +1,20 @@ + +# Model Configuration +## obs likelihood model configuration +Random.seed!(313) +Nhs = [256,256,256,256] # hidden layer dims +dilations = [1,3,5,7] +Nks = 3 .*ones(Int,length(Nhs)) # conv kernel dims +strides = ones(Int,length(Nhs)) +strides[end] = subsample # output subsampling +dropout = zeros(length(Nhs)) +fout = identity # function in last layer + +# training opts +λ1 = 1f-5 # l1 output regularization +lr = 1f-3 # learning rate +Nb = 16 # batch size +epochs_cur = 5 +epochs = 15 +opt = ADAM(lr) +curriculum_training = true diff --git a/conf/1b/feat_conf.jl b/conf/1b/feat_conf.jl new file mode 100644 index 0000000..9faaa03 --- /dev/null +++ b/conf/1b/feat_conf.jl @@ -0,0 +1,21 @@ +Fs = 16000 +Nf = 40 # input feat dim +# feature extraction (saved to data) +get_feats(x) = identity(x) +# on the fly feature processing +function feats_post(x) + Fs_original=20000 + Fs=16000 + x = load(x) + if typeof(x) <: Tuple + x = x[1] + else + x = x.data + end + x = x[:] + x = resample(x,Fs//Fs_original) + x = mfcc(x, float(Fs); + dither=true, numcep=40, nbands=40, minfreq=20.0, maxfreq=7600.0)[1] + return Float32.(x) +end +subsample = 3 # out subsampling diff --git a/conf/1b/model_conf.jl b/conf/1b/model_conf.jl new file mode 100644 index 0000000..7dd1f01 --- /dev/null +++ b/conf/1b/model_conf.jl @@ -0,0 +1,20 @@ + +# Model Configuration +## obs likelihood model configuration +Random.seed!(1) +Nhs = [256,256,256,256] # hidden layer dims +dilations = [1,3,5,7] +Nks = 3 .*ones(Int,length(Nhs)) # conv kernel dims +strides = ones(Int,length(Nhs)) +strides[end] = subsample # output subsampling +dropout = zeros(length(Nhs)) +fout = x -> logsoftmax(x,dims=2) # function in last layer + +# training opts +λ1 = 0f-5 # l1 output regularization +lr = 1f-4 # learning rate +Nb = 16 # batch size +epochs_cur = 5 +epochs = 15 +opt = Flux.Optimise.Optimiser(WeightDecay(1e-5),ADAM(lr)) +curriculum_training = true diff --git a/conf/2a/feat_conf.jl b/conf/2a/feat_conf.jl new file mode 100644 index 0000000..1588484 --- /dev/null +++ b/conf/2a/feat_conf.jl @@ -0,0 +1,21 @@ +Fs = 16000 +Nf = 40 # input feat dim +# feature extraction (saved to data) +function get_feats(x) + Fs_original=20000 + Fs=16000 + x = load(x) + if typeof(x) <: Tuple + x = x[1] + else + x = x.data + end + x = x[:] + x = resample(x,Fs//Fs_original) + x = mfcc(x, float(Fs); + dither=true, numcep=40, nbands=40, minfreq=20.0, maxfreq=7600.0)[1] + return Float32.(x) +end +# on the fly feature processing +feats_post(x) = identity(x) +subsample = 3 # out subsampling diff --git a/conf/2a/model_conf.jl b/conf/2a/model_conf.jl new file mode 100644 index 0000000..42f473c --- /dev/null +++ b/conf/2a/model_conf.jl @@ -0,0 +1,20 @@ + +# Model Configuration +## obs likelihood model configuration +Random.seed!(313) +Nhs = [256,256,256,256,256,256,256,256] # hidden layer dims +dilations = [1,1,3,3,5,5,7,11] +Nks = 3 .*ones(Int,length(Nhs)) # conv kernel dims +strides = ones(Int,length(Nhs)) +strides[end] = subsample # output subsampling +dropout = zeros(length(Nhs)) +fout = identity # function in last layer + +# training opts +λ1 = 1f-5 # l1 output regularization +lr = 5f-4 # learning rate +Nb = 16 # batch size +epochs_cur = 5 +epochs = 15 +opt = ADAM(lr) +curriculum_training = true diff --git a/conf/2b/feat_conf.jl b/conf/2b/feat_conf.jl new file mode 100644 index 0000000..1588484 --- /dev/null +++ b/conf/2b/feat_conf.jl @@ -0,0 +1,21 @@ +Fs = 16000 +Nf = 40 # input feat dim +# feature extraction (saved to data) +function get_feats(x) + Fs_original=20000 + Fs=16000 + x = load(x) + if typeof(x) <: Tuple + x = x[1] + else + x = x.data + end + x = x[:] + x = resample(x,Fs//Fs_original) + x = mfcc(x, float(Fs); + dither=true, numcep=40, nbands=40, minfreq=20.0, maxfreq=7600.0)[1] + return Float32.(x) +end +# on the fly feature processing +feats_post(x) = identity(x) +subsample = 3 # out subsampling diff --git a/conf/2b/model_conf.jl b/conf/2b/model_conf.jl new file mode 100644 index 0000000..aabed16 --- /dev/null +++ b/conf/2b/model_conf.jl @@ -0,0 +1,20 @@ + +# Model Configuration +## obs likelihood model configuration +Random.seed!(11) +Nhs = [256,256,256,256,256,256,256,256] # hidden layer dims +dilations = [1,1,3,3,5,5,7,11] +Nks = 3 .*ones(Int,length(Nhs)) # conv kernel dims +strides = ones(Int,length(Nhs)) +strides[end] = subsample # output subsampling +dropout = zeros(length(Nhs)) +fout = x -> logsoftmax(x,dims=2) # function in last layer + +# training opts +λ1 = 0f-5 # l1 output regularization +lr = 1f-4 # learning rate +Nb = 16 # batch size +epochs_cur = 5 +epochs = 15 +opt = Flux.Optimise.Optimiser(WeightDecay(1e-5),ADAM(lr)) +curriculum_training = true diff --git a/demo.jl b/demo.jl new file mode 100644 index 0000000..e17c897 --- /dev/null +++ b/demo.jl @@ -0,0 +1,61 @@ +# Copyright (c) 2021 Idiap Research Institute, http://www.idiap.ch/ +# Niccolò Antonello + +using HMMGradients, Flux +using Random, Statistics, LinearAlgebra +using FiniteStateTransducers +using BSON, JLD2, LibSndFile, FileIO, UUIDs +using MFCC, DSP +include("WFSTs.jl") +include("Models.jl") +include("Utils.jl") + +setup="2a" +duration=5 # duration of recoring in seconds +plot_stuff=false + +println(" + TIDIGIT demo + + $duration seconds will be recorded + + The following digits can be recognized: + ZERO OH ONE TWO THREE FOUR FIVE SIX SEVEN EIGHT NINE + (requires sox) + ") + + +include("conf/$(setup)/feat_conf.jl") +# get transition matrix +lexicon, ilexicon = get_lexicon() +H, L = get_HL(lexicon) +a, A = get_aA(H) +ippsym = get_iisym(H) + +model_folder = joinpath("models","$setup") +BSON.@load joinpath(model_folder,"best_modely_final.bson") best_modely +Flux.testmode!(best_modely) + +mkpath("data") +file = "data/test.wav" +run(`sox -d -r 16k -c 1 --clobber $file trim 0 $duration`) +x = get_feats(file) +x = feats_post(x) + +y = best_modely(Flux.unsqueeze(x,3)) +gamma = logposterior(size(y,1),a,A,y[:,:]) +phones = posterior2phones(ippsym, gamma) +dec = phones2words_greedy(ilexicon,phones; min_dist=2) + +println("\nDecoded Phones") +println(strip(prod(phones.*" "))) +println("\nDecoded Digits") +println(strip(prod(dec.*" "))) + +if plot_stuff + using Plots + p1 = heatmap(x', title="Input feats") + p2 = heatmap(y[:,:]', clims = (-10,0), title=strip(prod(phones.*" "))) + p3 = heatmap(gamma', clims = (-10,0), title=strip(prod(dec.*" "))) + plot(p1,p2,p3,layout=(3,1)) +end diff --git a/eval.jl b/eval.jl new file mode 100644 index 0000000..df8fb9c --- /dev/null +++ b/eval.jl @@ -0,0 +1,69 @@ +# Copyright (c) 2021 Idiap Research Institute, http://www.idiap.ch/ +# Niccolò Antonello + +using ArgParse + +s = ArgParseSettings() +@add_arg_table! s begin + "--conf" + help = "configuration setup" + arg_type = String + default = "2a" +end +parsed_args = parse_args(ARGS, s) +setup = parsed_args["conf"] + +using HMMGradients, Flux, Zygote +using Random, Statistics, LinearAlgebra +using FiniteStateTransducers +using DSP, MFCC +using BSON, JLD2, LibSndFile, FileIO, UUIDs +include("WFSTs.jl") +include("Models.jl") +include("Utils.jl") + +include("conf/$(setup)/feat_conf.jl") + +# get transition matrix +lexicon, ilexicon = get_lexicon() +H, L = get_HL(lexicon) +a, A = get_aA(H) +ippsym = get_iisym(H) + +BSON.@load "models/$setup/best_modely_final.bson" best_modely +Flux.testmode!(best_modely) + +feat_dir = get_feat_dir(setup) +data = load(joinpath(feat_dir,"train.jld2")) +uttID2feats, uttID2text, uttID2phones = data["uttID2feats"], data["uttID2text"], data["uttID2phones"] + +uttID2text_dec = Dict() +uttID2phones_dec = Dict() +min_dist=2 + +for uttID in keys(uttID2feats) + x = uttID2feats[uttID] + x = feats_post(x) + y = best_modely(Flux.unsqueeze(x,3)) + gamma = logposterior(size(y,1),a,A,y[:,:]) + ps = posterior2phones(ippsym,gamma) + ws = phones2words_greedy(ilexicon,ps; min_dist=min_dist) + uttID2phones_dec[uttID] = ps + uttID2text_dec[uttID] = strip(prod(ws.*" ")) +end + +uttID2err_textdec = Dict{String,Tuple{String,String}}() +for uttID in keys(uttID2text) + text, dec = uttID2text[uttID], uttID2text_dec[uttID] + if text != dec + uttID2err_textdec[uttID] = (text,dec) + end +end + +accuracy = 1-length(uttID2err_textdec) / length(uttID2text) +wer = get_error_rate(uttID2text, uttID2text_dec) +per = get_error_rate(uttID2phones, uttID2phones_dec; is_split=true) +println("# Setup $setup") +println("* Phone Error Rate (PER): $(round(per * 100, digits=3)) %") +println("* Word Error Rate (WER) : $(round(wer * 100, digits=3)) %") +println("* Accuracy: $(round(accuracy,digits=3))") diff --git a/plotstuff.jl b/plotstuff.jl new file mode 100644 index 0000000..7971754 --- /dev/null +++ b/plotstuff.jl @@ -0,0 +1,61 @@ +# Copyright (c) 2021 Idiap Research Institute, http://www.idiap.ch/ +# Niccolò Antonello +# +# This script can be used to check the +# output of the acoustic model and its decoding +# using two random utterances taken from the test set + +using HMMGradients, Flux, Zygote +using Random, Statistics, LinearAlgebra +using FiniteStateTransducers +using DSP, MFCC +using BSON, JLD2, LibSndFile, FileIO +include("WFSTs.jl") +include("Models.jl") +include("Utils.jl") + +setup="2a" + +include("conf/$(setup)/feat_conf.jl") +include("conf/$(setup)/model_conf.jl") + +# get transition matrix +lexicon, ilexicon = get_lexicon() +H, L = get_HL(lexicon) +a, A = get_aA(H) +ippsym = get_iisym(H) + +BSON.@load "models/$(setup)/current_modely.bson" best_modely +Flux.testmode!(best_modely) + +feat_dir = get_feat_dir(setup) +data = load(joinpath(feat_dir,"test.jld2")) +uttID2feats, uttID2text = data["uttID2feats"], data["uttID2text"] + +uttIDs = [keys(uttID2text)...] +i,j = rand(uttIDs),rand(uttIDs) + +xi,xj = feats_post(uttID2feats[i]), feats_post(uttID2feats[j]) +yi,yj = best_modely(Flux.unsqueeze(xi,3)),best_modely(Flux.unsqueeze(xj,3)) + +gammai = logposterior(size(yi,1),a,A,yi[:,:]) +gammaj = logposterior(size(yj,1),a,A,yj[:,:]) + +outi = posterior2phones(ippsym,gammai) +outj = posterior2phones(ippsym,gammaj) +outi[outi .== ""] .= " " +outj[outj .== ""] .= " " + +using Plots +pyplot() +psi = prod([prod(lexicon[w]) for w in split(uttID2text[i])].*" ") +p1i = heatmap(xi', title=uttID2text[i]) +p2i = heatmap(yi[:,:]', clims = (maximum(yj)-20,maximum(yj)), title=psi) +p3i = heatmap(gammai', clims = (-20,0), title=prod(outi)) + +psj = prod([prod(lexicon[w]) for w in split(uttID2text[j])].*" ") +p1j = heatmap(xj', title=uttID2text[j]) +p2j = heatmap(yj[:,:]', clims = (maximum(yj)-20,maximum(yj)), title=psj) +p3j = heatmap(gammaj', clims = (-20,0), title=prod(outj)) + +plot(p1i,p1j,p2i,p2j,p3i,p3j,layout=(3,2)) diff --git a/prepare_data.jl b/prepare_data.jl new file mode 100644 index 0000000..3ab56e4 --- /dev/null +++ b/prepare_data.jl @@ -0,0 +1,164 @@ +# Copyright (c) 2021 Idiap Research Institute, http://www.idiap.ch/ +# Niccolò Antonello + +using ArgParse +using Distributed, ClusterManagers +include("Utils.jl") + +function parse_commandline() + s = ArgParseSettings() + @add_arg_table! s begin + "--nj" + help = "number of jobs" + arg_type = Int + default = 1 + "--conf" + help = "configuration setup" + arg_type = String + default = "2a" + end + return parse_args(ARGS, s) +end + +# parse command line and add workers +parsed_args = parse_commandline() +nj, setup = parsed_args["nj"], parsed_args["conf"] +feat_dir = get_feat_dir(setup) +check_env() +if ispath(feat_dir) + error("Data already processed for this feature conf in $(feat_dir). Remove this folder to re-run feature extraction from scratch.") +end + +if nj > 1 + addprocs_sge(nj; + qsub_flags=split(ENV["CPU_CMD"]), + wd=mktempdir(pwd()), + exeflags="--project" + ) +end + +@everywhere begin + setup = $setup + using DSP, MFCC, HMMGradients, FiniteStateTransducers + using JLD2, LibSndFile, FileIO + include("Utils.jl") + include("WFSTs.jl") + include("DatasetParsers.jl") +end + +@everywhere function process_data(dataset_path,lexicon,L,H,Fs,subsample, + uttID2file,uttID2text,feat_dir,set,nj) + T = Float32 + + uttID2feats = Dict() + uttID2phones = Dict() + uttID2tr = Dict{String,Vector{Pair{Vector{Int},Vector{Int}}}}() + wsym = get_osym(L) + + for uttID in keys(uttID2file) + # process audio + x = uttID2file[uttID] + x = get_feats(x) + uttID2feats[uttID] = x + x = feats_post(x) + Nt = size(x,1) + + # process text + text = uttID2text[uttID] + uttID2phones[uttID] = text2phones(lexicon,text) + text = split(text;keepempty=false) + text = String.(vcat("",[[ti,""] for ti in text]...)) #silence between every word + S = linearfst(text,text, ones(typeofweight(L),length(text)), wsym, wsym) + HLS = rm_eps!(H∘(L∘S)) + Nt2 = subsample == 1 ? Nt : ceil(Int,Nt/3) + time2tr = wfst2tr(HLS,Nt2) + uttID2tr[uttID] = HMMGradients.t2tr2t2IJ(time2tr) + end + if nj > 1 + q = joinpath(feat_dir,"q_split_$set") + mkpath(q) + JLD2.@save joinpath(q,"$(myid()).jld2") uttID2feats uttID2tr uttID2phones + else + return uttID2feats, uttID2tr, uttID2text, uttID2phones + end +end + +function prepare_data(dataset_path,lexicon,L,H,Fs,subsample,set,feat_dir,nj) + println("Processing $set set with $nj jobs") + uttID2file = get_uttID2file(dataset_path,set) + uttID2text = get_uttID2text(uttID2file) + if nj > 1 + # split utterances + uttIDs = [keys(uttID2text)...] + Nu = length(uttIDs) + delta = div(Nu,nj) + uttIDss = [uttIDs[1+(i-1)*delta:(i==nj ? Nu : i*delta)] for i = 1:nj] + uttID2files = [filter(x -> x.first in uttIDs, uttID2file) for uttIDs in uttIDss] + uttID2texts = [filter(x -> x.first in uttIDs, uttID2text) for uttIDs in uttIDss] + pmap( + uttID2filetext -> + process_data(dataset_path,lexicon,L,H,Fs,subsample, + uttID2filetext[1],uttID2filetext[2], + feat_dir,set,nj), + zip(uttID2files,uttID2texts) + ) + uttID2feats = Dict() + uttID2phones = Dict() + uttID2tr = Dict{String,Vector{Pair{Vector{Int},Vector{Int}}}}() + println("Merging files") + q = joinpath(feat_dir,"q_split_$set") + for id in workers() + data = load(joinpath(q,"$id.jld2")) + uttID2feats_nj, uttID2tr_nj, uttID2phones_nj = + data["uttID2feats"], data["uttID2tr"], data["uttID2phones"] + merge!(uttID2feats , uttID2feats_nj ) + merge!(uttID2phones, uttID2phones_nj) + merge!(uttID2tr , uttID2tr_nj ) + end + rm(q;recursive=true) + return uttID2feats, uttID2tr, uttID2text, uttID2phones + else + process_data(dataset_path,lexicon,L,H,Fs,subsample,uttID2file,uttID2text,feat_dir,set,nj) + end +end + +### +@everywhere begin + include("conf/$(setup)/feat_conf.jl") + lexicon, ilexicon = get_lexicon() + H, L = get_HL(lexicon) +end + +dataset_path = ENV["TIDIGITS_PATH"] +T = @elapsed uttID2feats_train, uttID2tr_train, uttID2text_train, uttID2phones_train = +prepare_data(dataset_path,lexicon,L,H,Fs,subsample,"train",feat_dir,nj) +println("Done in $T sec") +T = @elapsed uttID2feats_test, uttID2tr_test, uttID2text_test, uttID2phones_test = +prepare_data(dataset_path,lexicon,L,H,Fs,subsample,"test",feat_dir,nj) +println("Done in $T sec") + +if nj > 1 + t = rmprocs(workers()) + wait(t) +end + +# test data in TIDIGITS has same size of train, so we repartition it +uttID2feats_all = merge(uttID2feats_train , uttID2feats_test ) +uttID2tr_all = merge(uttID2tr_train , uttID2tr_test ) +uttID2text_all = merge(uttID2text_train , uttID2text_test ) +uttID2phones_all= merge(uttID2phones_train, uttID2phones_test) +uttIDs_all = [keys(uttID2text_all)...] +Nu = length(uttIDs_all) +idx_train, idx_test = round(Int,Nu*0.7), round(Int,Nu*0.9) +set2uttID = Dict() +set2uttID["train"], set2uttID["test"], set2uttID["dev"] = uttIDs_all[1:idx_train], uttIDs_all[idx_train+1:idx_test], uttIDs_all[idx_test+1:end] + +mkpath(feat_dir) +for set in ("train","test","dev") + filename = set + uttID2feats = filter(x -> x.first in set2uttID[set], uttID2feats_all ) + uttID2tr = filter(x -> x.first in set2uttID[set], uttID2tr_all ) + uttID2text = filter(x -> x.first in set2uttID[set], uttID2text_all ) + uttID2phones = filter(x -> x.first in set2uttID[set], uttID2phones_all) + JLD2.@save joinpath(feat_dir,"$set.jld2") uttID2feats uttID2tr uttID2text uttID2phones +end diff --git a/train.jl b/train.jl new file mode 100644 index 0000000..8affba1 --- /dev/null +++ b/train.jl @@ -0,0 +1,156 @@ +# Copyright (c) 2021 Idiap Research Institute, http://www.idiap.ch/ +# Niccolò Antonello + +using ArgParse + +s = ArgParseSettings() +@add_arg_table! s begin + "--conf" + help = "configuration setup" + arg_type = String + default = "1a" +end +parsed_args = parse_args(ARGS, s) +setup = parsed_args["conf"] + +using HMMGradients, Flux, Zygote +using Random, Statistics, LinearAlgebra +using FiniteStateTransducers +using DSP, MFCC +using BSON, JLD2, LibSndFile, FileIO, UUIDs + +include("WFSTs.jl") +include("Models.jl") +include("Utils.jl") +include("conf/$(setup)/feat_conf.jl") +include("conf/$(setup)/model_conf.jl") + +# get transition matrix +lexicon, ilexicon = get_lexicon() +H, L = get_HL(lexicon) +a, A = get_aA(H) +Ns = size(A,1) + +# init model +modely = get_convnet(Nf,Ns; + Nks=Nks, + Nhs=Nhs, + strides=strides, + dilations=dilations, + dropout=dropout, + fout=fout) + +# load training data +feat_dir = get_feat_dir(setup) +data = load(joinpath(feat_dir,"train.jld2")) +uttID2feats, uttID2tr, uttID2text, uttID2phones = +data["uttID2feats"], data["uttID2tr"], data["uttID2text"], data["uttID2phones"] +# curriculum data (isolated words) +uttID2text_cur = filter(x->length(split(x.second))==1,uttID2text) + +# load dev data +data = load(joinpath(feat_dir,"dev.jld2")) +uttID2feats_dev, uttID2tr_dev, uttID2text_dev, uttID2phones_dev = +data["uttID2feats"], data["uttID2tr"], data["uttID2text"], data["uttID2phones"] +# curriculum data (isolated words) +uttID2text_dev_cur = filter(x->length(split(x.second))==1,uttID2text_dev) + +# dataloaders +Xs_cur = [uttID2feats[uttID] for uttID in keys(uttID2text_cur) ] +Ys_cur = [uttID2tr[uttID] for uttID in keys(uttID2text_cur) ] +Xs = [uttID2feats[uttID] for uttID in keys(uttID2feats) ] +Ys = [uttID2tr[uttID] for uttID in keys(uttID2tr) ] + +Xs_test_cur = [uttID2feats_dev[uttID] for uttID in keys(uttID2text_dev_cur)] +Ys_test_cur = [uttID2phones_dev[uttID] for uttID in keys(uttID2text_dev_cur)] +Xs_test = [uttID2feats_dev[uttID] for uttID in keys(uttID2feats_dev)] +Ys_test = [uttID2phones_dev[uttID] for uttID in keys(uttID2feats_dev)] + +N_cur = length(Xs_cur) +N = length(Xs) +N_test = length(Xs_test) + +cur_data = Flux.Data.DataLoader((Xs_cur ,Ys_cur ), batchsize=Nb, shuffle=false) +train_data = Flux.Data.DataLoader((Xs ,Ys ), batchsize=Nb, shuffle=true) +test_data_cur = Flux.Data.DataLoader((Xs_test_cur,Ys_test_cur), batchsize=Nb) +test_data = Flux.Data.DataLoader((Xs_test,Ys_test), batchsize=Nb) + +# define maximum likelihood function +function loss(Nt,t2tr,A,x,λ1) + y = modely(x) + yp = exp.(y) + f = nlogMLlog(Nt,t2tr,A,y) + λ1 * norm(yp,1) + return f +end + +function test(modely,a,A,ippsym,test_data) + Flux.testmode!(modely) + Nw = 0 + err = 0 + for (x,ps) in test_data + x = feats_post.(x) + Nts = ceil.(Int,size.(x,1)/3) + xb = zeropad(x) + y = modely(xb) + for i in eachindex(Nts) + gamma = logposterior(Nts[i],a,A,view(y,:,:,i)) + ps_dec = posterior2phones(ippsym,gamma) + Nw += length(ps[i]) + err += levenshtein(ps[i],ps_dec) + end + end + per = err / Nw + Flux.trainmode!(modely) + return per +end + +function train!(modely,a,A,H,opt,λ1,epochs,train_data,test_data) + Flux.trainmode!(modely) + N = length(train_data.data[1]) + ps = Flux.params(modely) + best_per = Inf + best_modely = deepcopy(modely) + ippsym = get_iisym(H) + for e in 1:epochs + cost = 0 + for (x,t2trs) in train_data + x = feats_post.(x) + Nts = length.(t2trs) .+ 1 + xb = zeropad(x) + train_loss, back = + Zygote.pullback(() -> loss(Nts,t2trs,A,xb,λ1), ps) + if isnan(train_loss) | isinf(train_loss) + error("Nan/Inf cost function!!") + end + cost += train_loss + gs = back(one(Float32)) + Flux.update!(opt, ps, gs) + end + per = test(modely,a,A,ippsym,test_data) + save_best = per <= best_per + if save_best + best_modely = deepcopy(modely) + best_per = per + BSON.@save "models/$setup/current_modely.bson" best_modely + end + println("epoch: $e cost: $(round(cost/N,digits=4)) PER: $(round(per*100,digits=3))" * (save_best ? " ⋆ " : "")) + end + Flux.testmode!(best_modely) + Flux.testmode!(modely) + return best_modely, modely +end + +model_folder = joinpath("models","$setup") +mkpath(model_folder) +println("Using setup: $setup") +println(read("conf/$setup/model_conf.jl",String)) +if curriculum_training + println("Curriculum training with $N_cur isolated words") + best_modely, modely = train!(modely,a,A,H,opt,λ1,epochs_cur,cur_data,test_data_cur) + BSON.@save joinpath(model_folder,"best_modely_curriculum.bson") best_modely + modely = deepcopy(best_modely) +end +println("Training with $N utterances") +best_modely, modely = train!(modely,a,A,H,opt,λ1,epochs,train_data,test_data) +BSON.@save joinpath(model_folder,"best_modely_final.bson") best_modely +BSON.@save joinpath(model_folder,"modely.bson") modely