diff --git a/.drone.yml b/.drone.yml
index 3c25e3927..178f419e5 100644
--- a/.drone.yml
+++ b/.drone.yml
@@ -162,7 +162,7 @@ steps:
       # Runner with no baked models = empty
       # See https://github.com/helixml/base-images
       # and https://github.com/helixml/base-images/releases
-      - TAG=2024-12-06a-empty
+      - TAG=2024-12-07a-empty
       - APP_VERSION=${DRONE_TAG:-${DRONE_COMMIT_SHA:-latest}}
     username: admin
     password:
@@ -204,7 +204,7 @@ steps:
       # Runner with small models = small
       # See https://github.com/helixml/base-images
       # and https://github.com/helixml/base-images/releases
-      - TAG=2024-12-06a-small
+      - TAG=2024-12-07a-small
       - APP_VERSION=${DRONE_TAG:-${DRONE_COMMIT_SHA:-latest}}
     username: admin
     password:
@@ -232,7 +232,7 @@ steps:
       # Runner with small models = small
       # See https://github.com/helixml/base-images
       # and https://github.com/helixml/base-images/releases
-      - TAG=2024-12-06a-small
+      - TAG=2024-12-07a-small
       - APP_VERSION=${DRONE_TAG:-${DRONE_COMMIT_SHA:-latest}}
     username: admin
     password:
@@ -277,7 +277,7 @@ steps:
       # Runner with large models = large
       # See https://github.com/helixml/base-images
       # and https://github.com/helixml/base-images/releases
-      - TAG=2024-12-06a-large
+      - TAG=2024-12-07a-large
       - APP_VERSION=${DRONE_TAG:-${DRONE_COMMIT_SHA:-latest}}
     username: admin
     password:
@@ -305,7 +305,7 @@ steps:
       # Runner with large models = large
       # See https://github.com/helixml/base-images
       # and https://github.com/helixml/base-images/releases
-      - TAG=2024-12-06a-large
+      - TAG=2024-12-07a-large
       - APP_VERSION=${DRONE_TAG:-${DRONE_COMMIT_SHA:-latest}}
     username: admin
     password:
diff --git a/Dockerfile.runner b/Dockerfile.runner
index f2f0f22fd..5f07d21d2 100644
--- a/Dockerfile.runner
+++ b/Dockerfile.runner
@@ -1,6 +1,6 @@
 #syntax=docker/dockerfile:1.4
 
-ARG TAG=main-empty
+ARG TAG=latest-small
 ARG UV_VERSION="0.5.4"
 
 FROM ghcr.io/astral-sh/uv:${UV_VERSION} AS uv
@@ -45,7 +45,7 @@ FROM registry.helix.ml/helix/runner-base:${TAG}
 
 # Install ollama
 RUN TEMP_DIR=$(mktemp -d /tmp/ollama_install_XXXXXX) && \
-    curl --retry 5 -L https://github.com/ollama/ollama/releases/download/v0.3.13/ollama-linux-amd64.tgz -o $TEMP_DIR/ollama.tgz && \
+    curl --retry 5 -L https://github.com/ollama/ollama/releases/download/v0.5.1/ollama-linux-amd64.tgz -o $TEMP_DIR/ollama.tgz && \
     tar -xzf $TEMP_DIR/ollama.tgz -C $TEMP_DIR && \
     mv $TEMP_DIR/bin/ollama /usr/bin/ollama && \
     chmod +x /usr/bin/ollama && \
diff --git a/api/pkg/config/runner_config.go b/api/pkg/config/runner_config.go
index e27e62822..14899bc32 100644
--- a/api/pkg/config/runner_config.go
+++ b/api/pkg/config/runner_config.go
@@ -29,7 +29,7 @@ type Runtimes struct {
 	V2Engine bool `envconfig:"RUNTIME_V2_ENGINE" default:"true"`
 	Axolotl  struct {
 		Enabled      bool          `envconfig:"RUNTIME_AXOLOTL_ENABLED" default:"true"`
-		WarmupModels []string      `envconfig:"RUNTIME_AXOLOTL_WARMUP_MODELS" default:"mistralai/Mistral-7B-Instruct-v0.1"`
+		WarmupModels []string      `envconfig:"RUNTIME_AXOLOTL_WARMUP_MODELS" default:""`
 		InstanceTTL  time.Duration `envconfig:"RUNTIME_AXOLOTL_INSTANCE_TTL" default:"10s"`
 	}
 	Ollama OllamaRuntimeConfig
@@ -37,6 +37,6 @@ type Runtimes struct {
 
 type OllamaRuntimeConfig struct {
 	Enabled      bool          `envconfig:"RUNTIME_OLLAMA_ENABLED" default:"true"`
-	WarmupModels []string      `envconfig:"RUNTIME_OLLAMA_WARMUP_MODELS" default:"llama3:instruct,llama3.1:8b-instruct-q8_0,llama3.2:1b-instruct-q8_0,llama3.2:3b-instruct-q8_0,phi3.5:3.8b-mini-instruct-q8_0"`
+	WarmupModels []string      `envconfig:"RUNTIME_OLLAMA_WARMUP_MODELS" default:"llama3.1:8b-instruct-q8_0"`
 	InstanceTTL  time.Duration `envconfig:"RUNTIME_OLLAMA_INSTANCE_TTL" default:"10s"`
 }
diff --git a/api/pkg/model/models.go b/api/pkg/model/models.go
index 7c235d3b3..ed0385931 100644
--- a/api/pkg/model/models.go
+++ b/api/pkg/model/models.go
@@ -177,25 +177,11 @@ const (
 
 func GetDefaultDiffusersModels() ([]*DiffusersGenericImage, error) {
 	return []*DiffusersGenericImage{
-		{
-			Id:          Model_Diffusers_SD35,
-			Name:        "Stable Diffusion 3.5 Medium",
-			Memory:      GB * 24,
-			Description: "Medium model, from Stability AI",
-			Hide:        false,
-		},
-		{
-			Id:          Model_Diffusers_SDTurbo,
-			Name:        "Stable Diffusion Turbo",
-			Memory:      GB * 5,
-			Description: "Turbo model, from Stability AI",
-			Hide:        false,
-		},
 		{
 			Id:          Model_Diffusers_FluxDev,
-			Name:        "Flux 1 Dev",
+			Name:        "FLUX.1-dev",
 			Memory:      GB * 39,
-			Description: "Dev model, from Black Forest Labs",
+			Description: "High quality image model, from Black Forest Labs",
 			Hide:        false,
 		},
 	}, nil
@@ -204,9 +190,9 @@ func GetDefaultDiffusersModels() ([]*DiffusersGenericImage, error) {
 // See also types/models.go for model name constants
 func GetDefaultOllamaModels() ([]*OllamaGenericText, error) {
 	models := []*OllamaGenericText{
-		// Latest models, Oct 2024 updates
+		// Latest models, Dec 2024 updates
 		{
-			Id:            "llama3.1:8b-instruct-q4_K_M", // https://ollama.com/library/llama3.1:8b-instruct-q4_K_M
+			Id:            "llama3.1:8b-instruct-q8_0", // https://ollama.com/library/llama3.1:8b-instruct-q8_0
 			Name:          "Llama 3.1 8B",
 			Memory:        GB * 15,
 			ContextLength: 32768, // goes up to 128k, but then uses 35GB
@@ -214,8 +200,8 @@ func GetDefaultOllamaModels() ([]*OllamaGenericText, error) {
 			Hide:          false,
 		},
 		{
-			Id:            "llama3.1:70b-instruct-q4_K_M", // https://ollama.com/library/llama3.1:70b-instruct-q4_K_M
-			Name:          "Llama 3.1 70B",
+			Id:            "llama3.3:70b-instruct-q4_K_M", // https://ollama.com/library/llama3.1:70b-instruct-q4_K_M
+			Name:          "Llama 3.3 70B",
 			Memory:        GB * 48,
 			ContextLength: 16384,
 			Description:   "Smarter but slower, from Meta - 4bit quantized, 16K context",
@@ -237,15 +223,6 @@ func GetDefaultOllamaModels() ([]*OllamaGenericText, error) {
 			Description:   "Small model, from Meta - 8bit quantized, 128K context",
 			Hide:          false,
 		},
-		// Old llama3:instruct, leaving in here because the id is in lots of our examples
-		{
-			Id:            "llama3:instruct", // https://ollama.com/library/llama3:instruct
-			Name:          "Llama 3 8B",
-			Memory:        MB * 6390,
-			ContextLength: 8192,
-			Description:   "Older model, from Meta - 4bit quantized, 8K context",
-			Hide:          false,
-		},
 		{
 			Id:            "phi3.5:3.8b-mini-instruct-q8_0", // https://ollama.com/library/phi3.5:3.8b-mini-instruct-q8_0
 			Name:          "Phi 3.5 3.8B",
@@ -254,30 +231,6 @@ func GetDefaultOllamaModels() ([]*OllamaGenericText, error) {
 			Description:   "Fast and good for everyday tasks, from Microsoft - 8bit quantized, 64K context",
 			Hide:          false,
 		},
-		{
-			Id:            "gemma2:2b-instruct-q8_0", // https://ollama.com/library/gemma2:2b-instruct-q8_0
-			Name:          "Gemma 2 2B",
-			Memory:        MB * 4916,
-			ContextLength: 8192,
-			Description:   "Fast and good for everyday tasks, from Google - 8bit quantized, 8K context",
-			Hide:          false,
-		},
-		{
-			Id:            "gemma2:9b-instruct-q8_0", // https://ollama.com/library/gemma2:9b-instruct-q8_0
-			Name:          "Gemma 2 9B",
-			Memory:        GB * 13,
-			ContextLength: 8192,
-			Description:   "Fast and good for everyday tasks, from Google - 8bit quantized, 8K context",
-			Hide:          false,
-		},
-		{
-			Id:            "gemma2:27b-instruct-q8_0", // https://ollama.com/library/gemma2:27b-instruct-q8_0
-			Name:          "Gemma 2 27B",
-			Memory:        GB * 34,
-			ContextLength: 8192,
-			Description:   "Large model with enhanced capabilities, from Google - 8bit quantized, 8K context",
-			Hide:          false,
-		},
 		{
 			Id:            "qwen2.5:7b-instruct-q8_0", // https://ollama.com/library/qwen2.5:7b-instruct-q8_0
 			Name:          "Qwen 2.5 7B",
@@ -286,22 +239,6 @@ func GetDefaultOllamaModels() ([]*OllamaGenericText, error) {
 			Description:   "Fast and good for everyday tasks, from Alibaba - 8bit quantized, 32K context",
 			Hide:          false,
 		},
-		{
-			Id:            "qwen2.5:72b", // https://ollama.com/library/qwen2.5:72b
-			Name:          "Qwen 2.5 72B",
-			Memory:        GB * 67,
-			ContextLength: 32768,
-			Description:   "Large model with enhanced capabilities, from Alibaba - 4bit quantized, 32K context",
-			Hide:          true, // hide for now since we can't run it in prod
-		},
-		{
-			Id:            "hermes3:8b-llama3.1-q8_0", // https://ollama.com/library/hermes3:8b-llama3.1-q8_0
-			Name:          "Hermes 3 8B",
-			Memory:        GB * 35,
-			ContextLength: 131072,
-			Description:   "Function calling and structured output, from Nous - 8bit quantized, 128K context",
-			Hide:          false,
-		},
 		{
 			Id:            "aya:8b-23-q8_0", // https://ollama.com/library/aya:8b-23-q8_0
 			Name:          "Aya 8B",
@@ -318,65 +255,24 @@ func GetDefaultOllamaModels() ([]*OllamaGenericText, error) {
 			Description:   "Large multi-lingual model from Cohere - 4bit quantized, 8K context",
 			Hide:          false,
 		},
-		// Still baked into images because of use in qapair gen
+		// Old llama3:instruct and ph3:instruct, leaving in here because the id
+		// is in lots of our examples and tests
 		{
-			Id:            "mixtral:instruct", // https://ollama.com/library/mixtral:instruct
-			Name:          "Mixtral",
-			Memory:        GB * 35,
-			ContextLength: 32768,
-			Description:   "Medium multi-lingual model, from Mistral - 4bit quantized, 32K context",
-			Hide:          false,
-		},
-
-		// ****************************************************************************
-		// ****************************************************************************
-		// ****************************************************************************
-		// ****************************************************************************
-		// ****************************************************************************
-		// ****************************************************************************
-		// OLDER MODELS, NO LONGER BAKED INTO IMAGES
-		// keeping just for backward compatibility (if anyone
-		// specifies them manually in their runner configuration)
-		// ****************************************************************************
-		// ****************************************************************************
-		// ****************************************************************************
-		// ****************************************************************************
-		// ****************************************************************************
-		// ****************************************************************************
-
-		// XXX TODO These memory requirements are all wrong, need to fix by
-		// running the models and looking at ollama ps (via the dashboard)
-		{
-			Id:            "mistral:7b-instruct", // https://ollama.com/library/mistral:7b-instruct
-			Name:          "Mistral 7B v0.3",
-			Memory:        MB * 4199,
-			ContextLength: 32768,
-			Hide:          true,
-		},
-		{
-			Id:            "codellama:70b-instruct-q2_K", // https://ollama.com/library/codellama:70b-instruct-q2_K
-			Name:          "CodeLlama 70B",
-			Memory:        GB * 25,
-			ContextLength: 2048,
-			Hide:          true,
-		},
-
-		// NousHermes2Pro
-		{
-			Id:            "adrienbrault/nous-hermes2pro:Q5_K_S", // https://ollama.com/adrienbrault/nous-hermes2pro:Q5_K_S
-			Name:          "Nous-Hermes 2 Pro",
-			Memory:        GB * 5,
-			ContextLength: 32768,
+			Id:            "llama3:instruct", // https://ollama.com/library/llama3:instruct
+			Name:          "Llama 3 8B",
+			Memory:        MB * 6390,
+			ContextLength: 8192,
+			Description:   "Older model, from Meta - 4bit quantized, 8K context",
 			Hide:          true,
 		},
 		{
-			Id:            "adrienbrault/nous-hermes2theta-llama3-8b:q8_0", // https://ollama.com/adrienbrault/nous-hermes2theta-llama3-8b:q8_0
-			Name:          "Nous-Hermes 2 Theta",
-			Memory:        MB * 8107,
-			ContextLength: 8192,
+			Id:            "phi3:instruct", // https://ollama.com/library/phi3:instruct
+			Name:          "Phi-3",
+			Memory:        MB * 2300,
+			ContextLength: 131072,
+			Description:   "Fast and good for everyday tasks",
 			Hide:          true,
 		},
-
 		{
 			Id:            "llama3:70b", // https://ollama.com/library/llama3:70b
 			Name:          "Llama 3 70B",
@@ -386,35 +282,11 @@ func GetDefaultOllamaModels() ([]*OllamaGenericText, error) {
 			Hide:          true,
 		},
 		{
-			Id:            "llama3:8b-instruct-fp16", // https://ollama.com/library/llama3:8b-instruct-fp16
-			Name:          "Llama 3 8B FP16",
-			Memory:        GB * 16,
-			ContextLength: 8192,
-			Description:   "Fast and good for everyday tasks",
-			Hide:          true,
-		},
-		{
-			Id:            "llama3:8b-instruct-q6_K", // https://ollama.com/library/llama3:8b-instruct-q6_K
-			Name:          "Llama 3 8B Q6_K",
-			Memory:        MB * 6295,
+			Id:            "gemma2:2b-instruct-q8_0", // https://ollama.com/library/gemma2:2b-instruct-q8_0
+			Name:          "Gemma 2 2B",
+			Memory:        MB * 4916,
 			ContextLength: 8192,
-			Description:   "Fast and good for everyday tasks",
-			Hide:          true,
-		},
-		{
-			Id:            "llama3:8b-instruct-q8_0", // https://ollama.com/library/llama3:8b-instruct-q8_0
-			Name:          "Llama 3 8B Q8_0",
-			Memory:        MB * 8107,
-			ContextLength: 4096,
-			Description:   "Large model with enhanced capabilities",
-			Hide:          true,
-		},
-		{
-			Id:            "phi3:instruct", // https://ollama.com/library/phi3:instruct
-			Name:          "Phi-3",
-			Memory:        MB * 2300,
-			ContextLength: 131072,
-			Description:   "Fast and good for everyday tasks",
+			Description:   "Fast and good for everyday tasks, from Google - 8bit quantized, 8K context",
 			Hide:          true,
 		},
 	}
diff --git a/api/pkg/runner/llm_ollama_model_instance.go b/api/pkg/runner/llm_ollama_model_instance.go
index 5d1a84af9..899e45bc2 100644
--- a/api/pkg/runner/llm_ollama_model_instance.go
+++ b/api/pkg/runner/llm_ollama_model_instance.go
@@ -324,6 +324,7 @@ func (i *OllamaInferenceModelInstance) startOllamaServer(_ context.Context) erro
 		"OLLAMA_MAX_LOADED_MODELS=1",
 		"OLLAMA_NUM_PARALLEL=1",
 		"OLLAMA_FLASH_ATTENTION=1",
+		"OLLAMA_KV_CACHE_TYPE=q8_0",
 		"HTTP_PROXY="+os.Getenv("HTTP_PROXY"),
 		"HTTPS_PROXY="+os.Getenv("HTTPS_PROXY"),
 		"OLLAMA_HOST="+ollamaHost,                 // Bind on localhost with random port
diff --git a/api/pkg/scheduler/scheduler_test.go b/api/pkg/scheduler/scheduler_test.go
index ea72eb9e4..9b6ba597f 100644
--- a/api/pkg/scheduler/scheduler_test.go
+++ b/api/pkg/scheduler/scheduler_test.go
@@ -330,7 +330,7 @@ func TestScheduler_RunnerWithWrongModel(t *testing.T) {
 	assert.NotNil(t, w)
 
 	// Test any new work will do part 2 -- new work only, ignore filter
-	err = createTestSession(scheduler, "test-request-2", "adrienbrault/nous-hermes2pro:Q5_K_S", "")
+	err = createTestSession(scheduler, "test-request-2", "phi3:instruct", "")
 	assert.NoError(t, err)
 	w, err = scheduler.WorkForRunner("test-runner", WorkloadTypeSession, true, "gemma2:2b-instruct-q8_0")
 	assert.NoError(t, err)
diff --git a/api/pkg/util/copydir/copy_dir.go b/api/pkg/util/copydir/copy_dir.go
index 6fbd9d862..5c1918002 100644
--- a/api/pkg/util/copydir/copy_dir.go
+++ b/api/pkg/util/copydir/copy_dir.go
@@ -5,14 +5,35 @@ import (
 	"os"
 	"path/filepath"
 	"strings"
+	"time"
+
+	"github.com/rs/zerolog/log"
 )
 
 func CopyDir(dst, src string) error {
+	startTime := time.Now()
 	src, err := filepath.EvalSymlinks(src)
 	if err != nil {
 		return err
 	}
 
+	// Check if source and destination are on the same filesystem
+	useSymlinks := sameFilesystem(src, dst)
+
+	// Add counters for operations and timing
+	stats := struct {
+		copies      int
+		symlinks    int
+		skipped     int
+		evalSymTime time.Duration
+		statTime    time.Duration
+		symTime     time.Duration
+		copyTime    time.Duration
+		walkTime    time.Duration
+	}{
+		evalSymTime: time.Since(startTime),
+	}
+
 	walkFn := func(path string, info os.FileInfo, err error) error {
 		if err != nil {
 			return err
@@ -54,30 +75,40 @@ func CopyDir(dst, src string) error {
 		// We're mainly copying content addressed blobs here, so this is
 		// probably fine.
 		// Must use Lstat to get the file status here in case the file is a symlink
+		statStart := time.Now()
 		dstInfo, err := os.Lstat(dstPath)
-		if err == nil && dstInfo.Size() == info.Size() {
-			return nil
+		if err == nil {
+			stats.statTime += time.Since(statStart)
+			if dstInfo.Size() == info.Size() {
+				stats.skipped++
+				return nil
+			}
 		}
 
 		// we don't want to try and copy the same file over itself.
+		statStart = time.Now()
 		if eq, err := SameFile(path, dstPath); eq {
+			stats.statTime += time.Since(statStart)
+			stats.skipped++
 			return nil
 		} else if err != nil {
+			stats.statTime += time.Since(statStart)
 			return err
 		}
 
-		// If the current path is a symlink, recreate the symlink relative to
-		// the dst directory
-		if info.Mode()&os.ModeSymlink == os.ModeSymlink {
-			target, err := os.Readlink(path)
-			if err != nil {
-				return err
+		// Try to create a symlink if we're on the same filesystem
+		if useSymlinks {
+			symStart := time.Now()
+			err = os.Symlink(path, dstPath)
+			stats.symTime += time.Since(symStart)
+			if err == nil {
+				stats.symlinks++
+				return nil
 			}
-
-			return os.Symlink(target, dstPath)
 		}
 
-		// If we have a file, copy the contents.
+		// If symlinking is disabled or fails, fall back to copying
+		copyStart := time.Now()
 		srcF, err := os.Open(path)
 		if err != nil {
 			return err
@@ -94,11 +125,32 @@ func CopyDir(dst, src string) error {
 			return err
 		}
 
-		// Chmod it
+		stats.copies++
+		stats.copyTime += time.Since(copyStart)
 		return os.Chmod(dstPath, info.Mode())
 	}
 
-	return filepath.Walk(src, walkFn)
+	walkStart := time.Now()
+	err = filepath.Walk(src, walkFn)
+	stats.walkTime = time.Since(walkStart)
+	if err != nil {
+		return err
+	}
+
+	log.Info().
+		Int("symlinks", stats.symlinks).
+		Int("copies", stats.copies).
+		Int("skipped", stats.skipped).
+		Dur("eval_symlinks_time", stats.evalSymTime).
+		Dur("stat_time", stats.statTime).
+		Dur("sym_time", stats.symTime).
+		Dur("copy_time", stats.copyTime).
+		Dur("walk_time", stats.walkTime).
+		Dur("total_time", time.Since(startTime)).
+		Str("src", src).
+		Str("dst", dst).
+		Msg("CopyDir completed")
+	return nil
 }
 
 // SameFile returns true if the two given paths refer to the same physical
@@ -126,5 +178,17 @@ func SameFile(a, b string) (bool, error) {
 		return false, err
 	}
 
+	// If b is a symlink, check if it points to a
+	if bInfo.Mode()&os.ModeSymlink != 0 {
+		target, err := os.Readlink(b)
+		if err != nil {
+			return false, err
+		}
+		// If the symlink points to our source file, they're the same
+		if target == a {
+			return true, nil
+		}
+	}
+
 	return os.SameFile(aInfo, bInfo), nil
 }
diff --git a/api/pkg/util/copydir/copy_dir_unix.go b/api/pkg/util/copydir/copy_dir_unix.go
new file mode 100644
index 000000000..5dd58b293
--- /dev/null
+++ b/api/pkg/util/copydir/copy_dir_unix.go
@@ -0,0 +1,16 @@
+//go:build !windows
+
+package copydir
+
+import "syscall"
+
+func sameFilesystem(path1, path2 string) bool {
+	var stat1, stat2 syscall.Stat_t
+	if err := syscall.Stat(path1, &stat1); err != nil {
+		return false
+	}
+	if err := syscall.Stat(path2, &stat2); err != nil {
+		return false
+	}
+	return stat1.Dev == stat2.Dev
+}
diff --git a/api/pkg/util/copydir/copy_dir_windows.go b/api/pkg/util/copydir/copy_dir_windows.go
new file mode 100644
index 000000000..9213557ad
--- /dev/null
+++ b/api/pkg/util/copydir/copy_dir_windows.go
@@ -0,0 +1,8 @@
+//go:build windows
+
+package copydir
+
+func sameFilesystem(path1, path2 string) bool {
+	// Windows build - just return false to force copy mode
+	return false
+}
diff --git a/charts/helix-runner/Chart.yaml b/charts/helix-runner/Chart.yaml
index f633921d6..0e08a882e 100644
--- a/charts/helix-runner/Chart.yaml
+++ b/charts/helix-runner/Chart.yaml
@@ -15,7 +15,7 @@ type: application
 # This is the chart version. This version number should be incremented each time you make changes
 # to the chart and its templates, including the app version.
 # Versions are expected to follow Semantic Versioning (https://semver.org/)
-version: 0.3.1
+version: 0.3.2
 
 # This is the version number of the application being deployed. This version number should be
 # incremented each time you make changes to the application. Versions are not expected to
diff --git a/docker-compose.dev.yaml b/docker-compose.dev.yaml
index 38d95d645..c963ec2a5 100644
--- a/docker-compose.dev.yaml
+++ b/docker-compose.dev.yaml
@@ -191,14 +191,19 @@ services:
       - api
   dev_gpu_runner:
     profiles: ["dev_gpu_runner"]
-    image: ${RUNNER_IMAGE:-registry.helix.ml/helix/runner:latest-large}
+    build:
+      context: .
+      dockerfile: Dockerfile.runner
+      args:
+        TAG: 2024-12-07a-small
+    #image: ${RUNNER_IMAGE:-registry.helix.ml/helix/runner:latest-large}
     entrypoint: ${RUNNER_ENTRYPOINT:-tail -f /dev/null}
     env_file:
       - .env
     volumes:
       - .:/workspace/helix
       - ./cog/helix_cog_wrapper.py:/workspace/cog-sdxl/helix_cog_wrapper.py
-      - ~/.cache/huggingface:/root/.cache/huggingface
+      # - ~/.cache/huggingface:/root/.cache/huggingface
       # comment these out if you don't have appropriate repos checked out
       #- ../cog-sdxl/predict.py:/workspace/cog-sdxl/predict.py
       #- ../cog-sdxl/weights.py:/workspace/cog-sdxl/weights.py