diff --git a/06_gpu_and_ml/dreambooth/dreambooth_app.py b/06_gpu_and_ml/dreambooth/dreambooth_app.py index f52e248b7..71bec97ef 100644 --- a/06_gpu_and_ml/dreambooth/dreambooth_app.py +++ b/06_gpu_and_ml/dreambooth/dreambooth_app.py @@ -78,7 +78,6 @@ volume = Volume.persisted("dreambooth-finetuning-volume") MODEL_DIR = Path("/model") -stub.volume = volume # ## Config # @@ -270,7 +269,7 @@ def _exec_subprocess(cmd: list[str]): # The trained model artefacts have been output to the volume mounted at `MODEL_DIR`. # To persist these artefacts for use in future inference function calls, we 'commit' the changes # to the volume. - stub.volume.commit() + volume.commit() # ## The inference function. @@ -292,7 +291,7 @@ def __enter__(self): from diffusers import DDIMScheduler, StableDiffusionPipeline # Reload the modal.Volume to ensure the latest state is accessible. - stub.volume.reload() + volume.reload() # set up a hugging face inference pipeline using our model ddim = DDIMScheduler.from_pretrained(MODEL_DIR, subfolder="scheduler") diff --git a/06_gpu_and_ml/flan_t5/flan_t5_finetune.py b/06_gpu_and_ml/flan_t5/flan_t5_finetune.py index dcbadaa56..7d1964f30 100644 --- a/06_gpu_and_ml/flan_t5/flan_t5_finetune.py +++ b/06_gpu_and_ml/flan_t5/flan_t5_finetune.py @@ -39,7 +39,6 @@ stub = Stub(name="example-news-summarizer", image=image) output_vol = Volume.persisted("finetune-volume") -stub.volume = output_vol # ### Handling preemption # @@ -181,7 +180,7 @@ def on_save(self, args, state, control, **kwargs): trainer = Seq2SeqTrainer( model=model, args=training_args, - callbacks=[CheckpointCallback(stub.volume)], + callbacks=[CheckpointCallback(output_vol)], data_collator=data_collator, train_dataset=tokenized_xsum_train, eval_dataset=tokenized_xsum_test, @@ -198,7 +197,7 @@ def on_save(self, args, state, control, **kwargs): # Save the trained model and tokenizer to the mounted volume model.save_pretrained(str(VOL_MOUNT_PATH / "model")) tokenizer.save_pretrained(str(VOL_MOUNT_PATH / "tokenizer")) - stub.volume.commit() + output_vol.commit() print("✅ done") diff --git a/06_gpu_and_ml/spam-detect/spam_detect/app.py b/06_gpu_and_ml/spam-detect/spam_detect/app.py index 223be2a09..f4c28ba30 100644 --- a/06_gpu_and_ml/spam-detect/spam_detect/app.py +++ b/06_gpu_and_ml/spam-detect/spam_detect/app.py @@ -18,4 +18,3 @@ stub = modal.Stub(name="example-spam-detect-llm", image=image) # Used to store datasets, trained models, model metadata, config. volume = modal.Volume.persisted("example-spam-detect-vol") -stub.volume = volume diff --git a/06_gpu_and_ml/spam-detect/spam_detect/train.py b/06_gpu_and_ml/spam-detect/spam_detect/train.py index 10b5fd7cf..fb155e43d 100644 --- a/06_gpu_and_ml/spam-detect/spam_detect/train.py +++ b/06_gpu_and_ml/spam-detect/spam_detect/train.py @@ -25,7 +25,7 @@ import modal from . import config, dataset, models -from .app import stub +from .app import stub, volume def fetch_git_commit_hash(allow_dirty: bool) -> str: @@ -72,26 +72,26 @@ def fetch_git_commit_hash(allow_dirty: bool) -> str: return result.stdout.decode().strip() -@stub.function(volumes={config.VOLUME_DIR: stub.volume}) +@stub.function(volumes={config.VOLUME_DIR: volume}) def init_volume(): config.MODEL_STORE_DIR.mkdir(parents=True, exist_ok=True) - stub.volume.commit() # Persist changes + volume.commit() # Persist changes @stub.function( timeout=int(timedelta(minutes=8).total_seconds()), - volumes={config.VOLUME_DIR: stub.volume}, + volumes={config.VOLUME_DIR: volume}, ) def prep_dataset(): logger = config.get_logger() datasets_path = config.DATA_DIR datasets_path.mkdir(parents=True, exist_ok=True) dataset.download(base=datasets_path, logger=logger) - stub.volume.commit() # Persist changes + volume.commit() # Persist changes @stub.function( - volumes={config.VOLUME_DIR: stub.volume}, + volumes={config.VOLUME_DIR: volume}, secrets=[modal.Secret.from_dict({"PYTHONHASHSEED": "10"})], timeout=int(timedelta(minutes=30).total_seconds()), ) @@ -108,7 +108,7 @@ def train( model_registry_root=config.MODEL_STORE_DIR, git_commit_hash=git_commit_hash, ) - stub.volume.commit() # Persist changes + volume.commit() # Persist changes logger.info(f"saved model to model store. {model_id=}") # Reload the model logger.info("🔁 testing reload of model") @@ -121,7 +121,7 @@ def train( @stub.function( - volumes={config.VOLUME_DIR: stub.volume}, + volumes={config.VOLUME_DIR: volume}, secrets=[modal.Secret.from_dict({"PYTHONHASHSEED": "10"})], timeout=int(timedelta(minutes=30).total_seconds()), gpu=modal.gpu.T4(), @@ -139,7 +139,7 @@ def train_gpu( model_registry_root=config.MODEL_STORE_DIR, git_commit_hash=git_commit_hash, ) - stub.volume.commit() # Persist changes + volume.commit() # Persist changes logger.info(f"saved model to model store. {model_id=}") diff --git a/06_gpu_and_ml/tensorflow/tensorflow_tutorial.py b/06_gpu_and_ml/tensorflow/tensorflow_tutorial.py index 1ea6dd90f..8fd44e05b 100644 --- a/06_gpu_and_ml/tensorflow/tensorflow_tutorial.py +++ b/06_gpu_and_ml/tensorflow/tensorflow_tutorial.py @@ -47,7 +47,7 @@ # We want to run the web server for Tensorboard at the same time as we are training the Tensorflow model. # The easiest way to do this is to set up a shared filesystem between the training and the web server. -stub.volume = NetworkFileSystem.new() +volume = NetworkFileSystem.new() logdir = "/tensorboard" # ## Training function @@ -61,9 +61,7 @@ # This makes it a bit easier to run this example even if you don't have Tensorflow installed on you local computer. -@stub.function( - network_file_systems={logdir: stub.volume}, gpu="any", timeout=600 -) +@stub.function(network_file_systems={logdir: volume}, gpu="any", timeout=600) def train(): import pathlib @@ -155,7 +153,7 @@ def train(): # Note that this server will be exposed to the public internet! -@stub.function(network_file_systems={logdir: stub.volume}) +@stub.function(network_file_systems={logdir: volume}) @wsgi_app() def tensorboard_app(): import tensorboard diff --git a/10_integrations/covid_datasette.py b/10_integrations/covid_datasette.py index 08e429bb9..2d9697b74 100644 --- a/10_integrations/covid_datasette.py +++ b/10_integrations/covid_datasette.py @@ -42,7 +42,7 @@ # To separate database creation and maintenance from serving, we'll need the underlying # database file to be stored persistently. To achieve this we use a [`Volume`](/docs/guide/volumes). -stub.volume = Volume.persisted("example-covid-datasette-cache-vol") +volume = Volume.persisted("example-covid-datasette-cache-vol") VOLUME_DIR = "/cache-vol" REPORTS_DIR = pathlib.Path(VOLUME_DIR, "COVID-19") @@ -59,7 +59,7 @@ @stub.function( image=datasette_image, - volumes={VOLUME_DIR: stub.volume}, + volumes={VOLUME_DIR: volume}, retries=2, ) def download_dataset(cache=True): @@ -84,7 +84,7 @@ def download_dataset(cache=True): subprocess.run(f"mv {REPORTS_DIR / prefix}/* {REPORTS_DIR}", shell=True) print("Committing the volume...") - stub.volume.commit() + volume.commit() print("Finished downloading dataset.") @@ -97,7 +97,7 @@ def download_dataset(cache=True): def load_daily_reports(): - stub.volume.reload() + volume.reload() daily_reports = list(REPORTS_DIR.glob("*.csv")) if not daily_reports: raise RuntimeError( @@ -159,7 +159,7 @@ def chunks(it, size): @stub.function( image=datasette_image, - volumes={VOLUME_DIR: stub.volume}, + volumes={VOLUME_DIR: volume}, timeout=900, ) def prep_db(): @@ -185,7 +185,7 @@ def prep_db(): db.close() print("Syncing DB with volume.") - stub.volume.commit() + volume.commit() # ## Keep it fresh @@ -211,7 +211,7 @@ def refresh_db(): @stub.function( image=datasette_image, - volumes={VOLUME_DIR: stub.volume}, + volumes={VOLUME_DIR: volume}, ) @asgi_app() def app():