Select automatically intel backend

mudler · Jun 5, 2024 · 5e2e5fc · 5e2e5fc
1 parent 9be16b5
commit 5e2e5fc
Show file tree

Hide file tree

Showing 2 changed files with 34 additions and 5 deletions.
diff --git a/Makefile b/Makefile
@@ -328,6 +328,8 @@ ifeq ($(OS),Darwin)
 else
 	$(MAKE) backend-assets/grpc/llama-cpp-cuda
 	$(MAKE) backend-assets/grpc/llama-cpp-hipblas
+	$(MAKE) backend-assets/grpc/llama-cpp-sycl_f16
+	$(MAKE) backend-assets/grpc/llama-cpp-sycl_f32
 endif
 	$(MAKE) build
 	mkdir -p release

diff --git a/pkg/model/initializers.go b/pkg/model/initializers.go
@@ -38,7 +38,10 @@ const (
 	LLamaCPPFallback = "llama-cpp-fallback"
 	LLamaCPPCUDA     = "llama-cpp-cuda"
 	LLamaCPPHipblas  = "llama-cpp-hipblas"
-	LLamaCPPGRPC     = "llama-cpp-grpc"
+	LLamaCPPSycl16   = "llama-cpp-sycl_16"
+	LLamaCPPSycl32   = "llama-cpp-sycl_32"
+
+	LLamaCPPGRPC = "llama-cpp-grpc"
 
 	Gpt4AllLlamaBackend = "gpt4all-llama"
 	Gpt4AllMptBackend   = "gpt4all-mpt"
@@ -94,7 +97,7 @@ ENTRY:
 	if autoDetect {
 		// if we find the llama.cpp variants, show them of as a single backend (llama-cpp) as later we are going to pick that up
 		// when starting the service
-		foundLCPPAVX, foundLCPPAVX2, foundLCPPFallback, foundLCPPGRPC, foundLCPPCuda, foundLCPPHipblas := false, false, false, false, false, false
+		foundLCPPAVX, foundLCPPAVX2, foundLCPPFallback, foundLCPPGRPC, foundLCPPCuda, foundLCPPHipblas, foundSycl16, foundSycl32 := false, false, false, false, false, false, false, false
 		if _, ok := backends[LLamaCPP]; !ok {
 			for _, e := range entry {
 				if strings.Contains(e.Name(), LLamaCPPAVX2) && !foundLCPPAVX2 {
@@ -121,6 +124,14 @@ ENTRY:
 					backends[LLamaCPP] = append(backends[LLamaCPP], LLamaCPPHipblas)
 					foundLCPPHipblas = true
 				}
+				if strings.Contains(e.Name(), LLamaCPPSycl16) && !foundSycl16 {
+					backends[LLamaCPP] = append(backends[LLamaCPP], LLamaCPPSycl16)
+					foundSycl16 = true
+				}
+				if strings.Contains(e.Name(), LLamaCPPSycl32) && !foundSycl32 {
+					backends[LLamaCPP] = append(backends[LLamaCPP], LLamaCPPSycl32)
+					foundSycl32 = true
+				}
 			}
 		}
 	}
@@ -172,9 +183,10 @@ ENTRY:
 }
 
 // selectGRPCProcess selects the GRPC process to start based on system capabilities
-func selectGRPCProcess(backend, assetDir string) string {
+func selectGRPCProcess(backend, assetDir string, f16 bool) string {
 	foundCUDA := false
 	foundAMDGPU := false
+	foundIntelGPU := false
 	var grpcProcess string
 
 	// Select backend now just for llama.cpp
@@ -211,10 +223,24 @@ func selectGRPCProcess(backend, assetDir string) string {
 					log.Info().Msgf("GPU device found but no HIPBLAS backend present")
 				}
 			}
+			if strings.Contains(gpu.String(), "intel") {
+				backend := LLamaCPPSycl16
+				if !f16 {
+					backend = LLamaCPPSycl32
+				}
+				p := backendPath(assetDir, backend)
+				if _, err := os.Stat(p); err == nil {
+					log.Info().Msgf("[%s] attempting to load with Intel variant", backend)
+					grpcProcess = p
+					foundIntelGPU = true
+				} else {
+					log.Info().Msgf("GPU device found but no Intel backend present")
+				}
+			}
 		}
 	}
 
-	if foundCUDA || foundAMDGPU {
+	if foundCUDA || foundAMDGPU || foundIntelGPU {
 		return grpcProcess
 	}
 
@@ -236,6 +262,7 @@ func selectGRPCProcess(backend, assetDir string) string {
 // It also loads the model
 func (ml *ModelLoader) grpcModel(backend string, o *Options) func(string, string) (ModelAddress, error) {
 	return func(modelName, modelFile string) (ModelAddress, error) {
+
 		log.Debug().Msgf("Loading Model %s with gRPC (file: %s) (backend: %s): %+v", modelName, modelFile, backend, *o)
 
 		var client ModelAddress
@@ -284,7 +311,7 @@ func (ml *ModelLoader) grpcModel(backend string, o *Options) func(string, string
 
 			if autoDetect {
 				// autoDetect GRPC process to start based on system capabilities
-				if selectedProcess := selectGRPCProcess(backend, o.assetDir); selectedProcess != "" {
+				if selectedProcess := selectGRPCProcess(backend, o.assetDir, o.gRPCOptions.F16Memory); selectedProcess != "" {
 					grpcProcess = selectedProcess
 				}
 			}