Don't assign volumes to stub (#493)

modal-labs · Nov 10, 2023 · 07bbddd · 07bbddd
1 parent 3771625
commit 07bbddd
Show file tree

Hide file tree

Showing 6 changed files with 23 additions and 28 deletions.
diff --git a/06_gpu_and_ml/dreambooth/dreambooth_app.py b/06_gpu_and_ml/dreambooth/dreambooth_app.py
@@ -78,7 +78,6 @@
 
 volume = Volume.persisted("dreambooth-finetuning-volume")
 MODEL_DIR = Path("/model")
-stub.volume = volume
 
 # ## Config
 #
@@ -270,7 +269,7 @@ def _exec_subprocess(cmd: list[str]):
     # The trained model artefacts have been output to the volume mounted at `MODEL_DIR`.
     # To persist these artefacts for use in future inference function calls, we 'commit' the changes
     # to the volume.
-    stub.volume.commit()
+    volume.commit()
 
 
 # ## The inference function.
@@ -292,7 +291,7 @@ def __enter__(self):
         from diffusers import DDIMScheduler, StableDiffusionPipeline
 
         # Reload the modal.Volume to ensure the latest state is accessible.
-        stub.volume.reload()
+        volume.reload()
 
         # set up a hugging face inference pipeline using our model
         ddim = DDIMScheduler.from_pretrained(MODEL_DIR, subfolder="scheduler")

diff --git a/06_gpu_and_ml/flan_t5/flan_t5_finetune.py b/06_gpu_and_ml/flan_t5/flan_t5_finetune.py
@@ -39,7 +39,6 @@
 
 stub = Stub(name="example-news-summarizer", image=image)
 output_vol = Volume.persisted("finetune-volume")
-stub.volume = output_vol
 
 # ### Handling preemption
 #
@@ -181,7 +180,7 @@ def on_save(self, args, state, control, **kwargs):
     trainer = Seq2SeqTrainer(
         model=model,
         args=training_args,
-        callbacks=[CheckpointCallback(stub.volume)],
+        callbacks=[CheckpointCallback(output_vol)],
         data_collator=data_collator,
         train_dataset=tokenized_xsum_train,
         eval_dataset=tokenized_xsum_test,
@@ -198,7 +197,7 @@ def on_save(self, args, state, control, **kwargs):
     # Save the trained model and tokenizer to the mounted volume
     model.save_pretrained(str(VOL_MOUNT_PATH / "model"))
     tokenizer.save_pretrained(str(VOL_MOUNT_PATH / "tokenizer"))
-    stub.volume.commit()
+    output_vol.commit()
     print("✅ done")
 
 

diff --git a/06_gpu_and_ml/spam-detect/spam_detect/app.py b/06_gpu_and_ml/spam-detect/spam_detect/app.py
@@ -18,4 +18,3 @@
 stub = modal.Stub(name="example-spam-detect-llm", image=image)
 # Used to store datasets, trained models, model metadata, config.
 volume = modal.Volume.persisted("example-spam-detect-vol")
-stub.volume = volume
diff --git a/06_gpu_and_ml/spam-detect/spam_detect/train.py b/06_gpu_and_ml/spam-detect/spam_detect/train.py
@@ -25,7 +25,7 @@
 import modal
 
 from . import config, dataset, models
-from .app import stub
+from .app import stub, volume
 
 
 def fetch_git_commit_hash(allow_dirty: bool) -> str:
@@ -72,26 +72,26 @@ def fetch_git_commit_hash(allow_dirty: bool) -> str:
     return result.stdout.decode().strip()
 
 
-@stub.function(volumes={config.VOLUME_DIR: stub.volume})
+@stub.function(volumes={config.VOLUME_DIR: volume})
 def init_volume():
     config.MODEL_STORE_DIR.mkdir(parents=True, exist_ok=True)
-    stub.volume.commit()  # Persist changes
+    volume.commit()  # Persist changes
 
 
 @stub.function(
     timeout=int(timedelta(minutes=8).total_seconds()),
-    volumes={config.VOLUME_DIR: stub.volume},
+    volumes={config.VOLUME_DIR: volume},
 )
 def prep_dataset():
     logger = config.get_logger()
     datasets_path = config.DATA_DIR
     datasets_path.mkdir(parents=True, exist_ok=True)
     dataset.download(base=datasets_path, logger=logger)
-    stub.volume.commit()  # Persist changes
+    volume.commit()  # Persist changes
 
 
 @stub.function(
-    volumes={config.VOLUME_DIR: stub.volume},
+    volumes={config.VOLUME_DIR: volume},
     secrets=[modal.Secret.from_dict({"PYTHONHASHSEED": "10"})],
     timeout=int(timedelta(minutes=30).total_seconds()),
 )
@@ -108,7 +108,7 @@ def train(
         model_registry_root=config.MODEL_STORE_DIR,
         git_commit_hash=git_commit_hash,
     )
-    stub.volume.commit()  # Persist changes
+    volume.commit()  # Persist changes
     logger.info(f"saved model to model store. {model_id=}")
     # Reload the model
     logger.info("🔁 testing reload of model")
@@ -121,7 +121,7 @@ def train(
 
 
 @stub.function(
-    volumes={config.VOLUME_DIR: stub.volume},
+    volumes={config.VOLUME_DIR: volume},
     secrets=[modal.Secret.from_dict({"PYTHONHASHSEED": "10"})],
     timeout=int(timedelta(minutes=30).total_seconds()),
     gpu=modal.gpu.T4(),
@@ -139,7 +139,7 @@ def train_gpu(
         model_registry_root=config.MODEL_STORE_DIR,
         git_commit_hash=git_commit_hash,
     )
-    stub.volume.commit()  # Persist changes
+    volume.commit()  # Persist changes
     logger.info(f"saved model to model store. {model_id=}")
 
 

diff --git a/06_gpu_and_ml/tensorflow/tensorflow_tutorial.py b/06_gpu_and_ml/tensorflow/tensorflow_tutorial.py
@@ -47,7 +47,7 @@
 # We want to run the web server for Tensorboard at the same time as we are training the Tensorflow model.
 # The easiest way to do this is to set up a shared filesystem between the training and the web server.
 
-stub.volume = NetworkFileSystem.new()
+volume = NetworkFileSystem.new()
 logdir = "/tensorboard"
 
 # ## Training function
@@ -61,9 +61,7 @@
 #   This makes it a bit easier to run this example even if you don't have Tensorflow installed on you local computer.
 
 
-@stub.function(
-    network_file_systems={logdir: stub.volume}, gpu="any", timeout=600
-)
+@stub.function(network_file_systems={logdir: volume}, gpu="any", timeout=600)
 def train():
     import pathlib
 
@@ -155,7 +153,7 @@ def train():
 # Note that this server will be exposed to the public internet!
 
 
-@stub.function(network_file_systems={logdir: stub.volume})
+@stub.function(network_file_systems={logdir: volume})
 @wsgi_app()
 def tensorboard_app():
     import tensorboard

diff --git a/10_integrations/covid_datasette.py b/10_integrations/covid_datasette.py
@@ -42,7 +42,7 @@
 # To separate database creation and maintenance from serving, we'll need the underlying
 # database file to be stored persistently. To achieve this we use a [`Volume`](/docs/guide/volumes).
 
-stub.volume = Volume.persisted("example-covid-datasette-cache-vol")
+volume = Volume.persisted("example-covid-datasette-cache-vol")
 
 VOLUME_DIR = "/cache-vol"
 REPORTS_DIR = pathlib.Path(VOLUME_DIR, "COVID-19")
@@ -59,7 +59,7 @@
 
 @stub.function(
     image=datasette_image,
-    volumes={VOLUME_DIR: stub.volume},
+    volumes={VOLUME_DIR: volume},
     retries=2,
 )
 def download_dataset(cache=True):
@@ -84,7 +84,7 @@ def download_dataset(cache=True):
     subprocess.run(f"mv {REPORTS_DIR / prefix}/* {REPORTS_DIR}", shell=True)
 
     print("Committing the volume...")
-    stub.volume.commit()
+    volume.commit()
 
     print("Finished downloading dataset.")
 
@@ -97,7 +97,7 @@ def download_dataset(cache=True):
 
 
 def load_daily_reports():
-    stub.volume.reload()
+    volume.reload()
     daily_reports = list(REPORTS_DIR.glob("*.csv"))
     if not daily_reports:
         raise RuntimeError(
@@ -159,7 +159,7 @@ def chunks(it, size):
 
 @stub.function(
     image=datasette_image,
-    volumes={VOLUME_DIR: stub.volume},
+    volumes={VOLUME_DIR: volume},
     timeout=900,
 )
 def prep_db():
@@ -185,7 +185,7 @@ def prep_db():
     db.close()
 
     print("Syncing DB with volume.")
-    stub.volume.commit()
+    volume.commit()
 
 
 # ## Keep it fresh
@@ -211,7 +211,7 @@ def refresh_db():
 
 @stub.function(
     image=datasette_image,
-    volumes={VOLUME_DIR: stub.volume},
+    volumes={VOLUME_DIR: volume},
 )
 @asgi_app()
 def app():