From e591ff2e743dc64e5d76e8e3b4c4b9bb60217bca Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Mon, 1 Jul 2024 22:50:36 +0200 Subject: [PATCH] fix(initializer): do select backends that exist (#2694) we were not checking if the binary exists before picking these up from the asset dir. Signed-off-by: Ettore Di Giacinto --- Dockerfile | 2 ++ pkg/model/initializers.go | 39 ++++++++++++++++++++++++++++++--------- 2 files changed, 32 insertions(+), 9 deletions(-) diff --git a/Dockerfile b/Dockerfile index eb5c9b056e7a..ac42db5d7ad7 100644 --- a/Dockerfile +++ b/Dockerfile @@ -282,6 +282,8 @@ COPY --from=grpc /opt/grpc /usr/local # Rebuild with defaults backends WORKDIR /build + +## Build the binary RUN make build RUN if [ ! -d "/build/sources/go-piper/piper-phonemize/pi/lib/" ]; then \ diff --git a/pkg/model/initializers.go b/pkg/model/initializers.go index 92b3c0a0849e..901b4d993dcc 100644 --- a/pkg/model/initializers.go +++ b/pkg/model/initializers.go @@ -247,14 +247,23 @@ func selectGRPCProcess(backend, assetDir string, f16 bool) string { } if xsysinfo.HasCPUCaps(cpuid.AVX2) { - log.Info().Msgf("[%s] attempting to load with AVX2 variant", backend) - grpcProcess = backendPath(assetDir, LLamaCPPAVX2) + p := backendPath(assetDir, LLamaCPPAVX2) + if _, err := os.Stat(p); err == nil { + log.Info().Msgf("[%s] attempting to load with AVX2 variant", backend) + grpcProcess = p + } } else if xsysinfo.HasCPUCaps(cpuid.AVX) { - log.Info().Msgf("[%s] attempting to load with AVX variant", backend) - grpcProcess = backendPath(assetDir, LLamaCPPAVX) + p := backendPath(assetDir, LLamaCPPAVX) + if _, err := os.Stat(p); err == nil { + log.Info().Msgf("[%s] attempting to load with AVX variant", backend) + grpcProcess = p + } } else { - log.Info().Msgf("[%s] attempting to load with fallback variant", backend) - grpcProcess = backendPath(assetDir, LLamaCPPFallback) + p := backendPath(assetDir, LLamaCPPFallback) + if _, err := os.Stat(p); err == nil { + log.Info().Msgf("[%s] attempting to load with fallback variant", backend) + grpcProcess = p + } } return grpcProcess @@ -511,11 +520,23 @@ func (ml *ModelLoader) GreedyLoader(opts ...Option) (grpc.Backend, error) { } if autoDetect && key == LLamaCPP && err != nil { - backendToUse := LLamaCPPFallback + // try as hard as possible to run the llama.cpp variants + backendToUse := "" if xsysinfo.HasCPUCaps(cpuid.AVX2) { - backendToUse = LLamaCPPAVX2 + if _, err := os.Stat(backendPath(o.assetDir, LLamaCPPAVX2)); err == nil { + backendToUse = LLamaCPPAVX2 + } } else if xsysinfo.HasCPUCaps(cpuid.AVX) { - backendToUse = LLamaCPPAVX + if _, err := os.Stat(backendPath(o.assetDir, LLamaCPPAVX2)); err == nil { + backendToUse = LLamaCPPAVX + } + } else { + if _, err := os.Stat(backendPath(o.assetDir, LLamaCPPFallback)); err == nil { + backendToUse = LLamaCPPFallback + } else { + // If we don't have a fallback, just skip fallback + continue + } } // Autodetection failed, try the fallback