From a3880034e476a4fb5c1c3ba17635b08e5548fe88 Mon Sep 17 00:00:00 2001
From: Ximin Luo <infinity0@pwned.gg>
Date: Wed, 10 Jul 2024 12:01:49 +0100
Subject: [PATCH] Make --always-offload-from-vram actually work properly, fixes
 #3257

---
 ldm_patched/modules/model_management.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/ldm_patched/modules/model_management.py b/ldm_patched/modules/model_management.py
index 840d79a07..ca919200d 100644
--- a/ldm_patched/modules/model_management.py
+++ b/ldm_patched/modules/model_management.py
@@ -369,12 +369,12 @@ def free_memory(memory_required, device, keep_loaded=[]):
                 unloaded_model = True
 
     if unloaded_model:
-        soft_empty_cache()
+        soft_empty_cache(force=ALWAYS_VRAM_OFFLOAD)
     else:
         if vram_state != VRAMState.HIGH_VRAM:
             mem_free_total, mem_free_torch = get_free_memory(device, torch_free_too=True)
             if mem_free_torch > mem_free_total * 0.25:
-                soft_empty_cache()
+                soft_empty_cache(force=ALWAYS_VRAM_OFFLOAD)
 
 def load_models_gpu(models, memory_required=0):
     global vram_state