From f07e532dd036d1ee6b3a9d6e98cc66a20c117954 Mon Sep 17 00:00:00 2001 From: pattonw Date: Wed, 14 Feb 2024 13:31:40 -0800 Subject: [PATCH] use toml's in file config store --- dacapo/store/config_store.py | 57 +++++++++++++++++++++++++ dacapo/store/file_config_store.py | 69 +++++++++++++++++++++---------- 2 files changed, 104 insertions(+), 22 deletions(-) diff --git a/dacapo/store/config_store.py b/dacapo/store/config_store.py index d962b79f5..8c91fd036 100644 --- a/dacapo/store/config_store.py +++ b/dacapo/store/config_store.py @@ -17,6 +17,45 @@ class DuplicateNameError(Exception): class ConfigStore(ABC): """Base class for configuration stores.""" + @property + @abstractmethod + def runs(self): + pass + + @property + @abstractmethod + def datasplits(self): + pass + + @property + @abstractmethod + def datasets(self): + pass + + @property + @abstractmethod + def arrays(self): + pass + + @property + @abstractmethod + def tasks(self): + pass + + @property + @abstractmethod + def trainers(self): + pass + + @property + @abstractmethod + def architectures(self): + pass + + @abstractmethod + def delete_config(self, database, config_name: str) -> None: + pass + @abstractmethod def store_run_config(self, run_config: "RunConfig") -> None: """Store a run config. This should also store the configs that are part @@ -34,6 +73,9 @@ def retrieve_run_config_names(self) -> List[str]: """Retrieve all run config names.""" pass + def delete_run_config(self, run_name: str) -> None: + self.delete_config(self.runs, run_name) + @abstractmethod def store_task_config(self, task_config: "TaskConfig") -> None: """Store a task config.""" @@ -49,6 +91,9 @@ def retrieve_task_config_names(self) -> List[str]: """Retrieve all task config names.""" pass + def delete_task_config(self, task_name: str) -> None: + self.delete_config(self.tasks, task_name) + @abstractmethod def store_architecture_config( self, architecture_config: "ArchitectureConfig" @@ -68,6 +113,9 @@ def retrieve_architecture_config_names(self) -> List[str]: """Retrieve all architecture config names.""" pass + def delete_architecture_config(self, architecture_name: str) -> None: + self.delete_config(self.architectures, architecture_name) + @abstractmethod def store_trainer_config(self, trainer_config: "TrainerConfig") -> None: """Store a trainer config.""" @@ -83,6 +131,9 @@ def retrieve_trainer_config_names(self) -> List[str]: """Retrieve all trainer config names.""" pass + def delete_trainer_config(self, trainer_name: str) -> None: + self.delete_config(self.trainers, trainer_name) + @abstractmethod def store_datasplit_config(self, datasplit_config: "DataSplitConfig") -> None: """Store a datasplit config.""" @@ -98,6 +149,9 @@ def retrieve_datasplit_config_names(self) -> List[str]: """Retrieve all datasplit names.""" pass + def delete_datasplit_config(self, datasplit_name: str) -> None: + self.delete_config(self.datasplits, datasplit_name) + @abstractmethod def store_array_config(self, array_config: "ArrayConfig") -> None: """Store a array config.""" @@ -112,3 +166,6 @@ def retrieve_array_config(self, array_name: str) -> "ArrayConfig": def retrieve_array_config_names(self) -> List[str]: """Retrieve all array names.""" pass + + def delete_array_config(self, array_name: str) -> None: + self.delete_config(self.arrays, array_name) diff --git a/dacapo/store/file_config_store.py b/dacapo/store/file_config_store.py index 98bfd6562..5fbe1ca5c 100644 --- a/dacapo/store/file_config_store.py +++ b/dacapo/store/file_config_store.py @@ -8,7 +8,7 @@ from dacapo.experiments.trainers import TrainerConfig import logging -import pickle +import toml from pathlib import Path logger = logging.getLogger(__name__) @@ -36,7 +36,7 @@ def retrieve_run_config(self, run_name): return converter.structure(run_doc, RunConfig) def retrieve_run_config_names(self): - return [f.name for f in self.runs.iterdir()] + return [f.name[:-5] for f in self.runs.iterdir()] def store_task_config(self, task_config): task_doc = converter.unstructure(task_config) @@ -47,7 +47,7 @@ def retrieve_task_config(self, task_name): return converter.structure(task_doc, TaskConfig) def retrieve_task_config_names(self): - return [f.name for f in self.tasks.iterdir()] + return [f.name[:-5] for f in self.tasks.iterdir()] def store_architecture_config(self, architecture_config): architecture_doc = converter.unstructure(architecture_config) @@ -58,7 +58,7 @@ def retrieve_architecture_config(self, architecture_name): return converter.structure(architecture_doc, ArchitectureConfig) def retrieve_architecture_config_names(self): - return [f.name for f in self.architectures.iterdir()] + return [f.name[:-5] for f in self.architectures.iterdir()] def store_trainer_config(self, trainer_config): trainer_doc = converter.unstructure(trainer_config) @@ -69,7 +69,7 @@ def retrieve_trainer_config(self, trainer_name): return converter.structure(trainer_doc, TrainerConfig) def retrieve_trainer_config_names(self): - return [f.name for f in self.trainers.iterdir()] + return [f.name[:-5] for f in self.trainers.iterdir()] def store_datasplit_config(self, datasplit_config): datasplit_doc = converter.unstructure(datasplit_config) @@ -80,7 +80,7 @@ def retrieve_datasplit_config(self, datasplit_name): return converter.structure(datasplit_doc, DataSplitConfig) def retrieve_datasplit_config_names(self): - return [f.name for f in self.datasplits.iterdir()] + return [f.name[:-5] for f in self.datasplits.iterdir()] def store_array_config(self, array_config): array_doc = converter.unstructure(array_config) @@ -91,19 +91,17 @@ def retrieve_array_config(self, array_name): return converter.structure(array_doc, ArrayConfig) def retrieve_array_config_names(self): - return [f.name for f in self.arrays.iterdir()] + return [f.name[:-5] for f in self.arrays.iterdir()] def __save_insert(self, collection, data, ignore=None): name = data["name"] - file_store = collection / name + file_store = collection / f"{name}.toml" if not file_store.exists(): - with file_store.open("wb") as fd: - pickle.dump(dict(data), fd) + toml.dump(dict(data), file_store.open("w")) else: - with file_store.open("rb") as fd: - existing = pickle.load(fd) + existing = toml.load(file_store.open("r")) if not self.__same_doc(existing, data, ignore): raise DuplicateNameError( @@ -113,10 +111,9 @@ def __save_insert(self, collection, data, ignore=None): ) def __load(self, collection, name): - file_store = collection / name + file_store = collection / f"{name}.toml" if file_store.exists(): - with file_store.open("rb") as fd: - return pickle.load(fd) + return toml.load(file_store.open("r")) else: raise ValueError(f"No config with name: {name} in collection: {collection}") @@ -138,17 +135,45 @@ def __init_db(self): pass def __open_collections(self): - self.users = self.path / "users" self.users.mkdir(exist_ok=True, parents=True) - self.runs = self.path / "runs" self.runs.mkdir(exist_ok=True, parents=True) - self.tasks = self.path / "tasks" self.tasks.mkdir(exist_ok=True, parents=True) - self.datasplits = self.path / "datasplits" self.datasplits.mkdir(exist_ok=True, parents=True) - self.arrays = self.path / "arrays" self.arrays.mkdir(exist_ok=True, parents=True) - self.architectures = self.path / "architectures" self.architectures.mkdir(exist_ok=True, parents=True) - self.trainers = self.path / "trainers" self.trainers.mkdir(exist_ok=True, parents=True) + + @property + def users(self) -> Path: + return self.path / "users" + + @property + def runs(self) -> Path: + return self.path / "runs" + + @property + def tasks(self) -> Path: + return self.path / "tasks" + + @property + def datasplits(self) -> Path: + return self.path / "datasplits" + + @property + def arrays(self) -> Path: + return self.path / "arrays" + + @property + def architectures(self) -> Path: + return self.path / "architectures" + + @property + def trainers(self) -> Path: + return self.path / "trainers" + + @property + def datasets(self) -> Path: + return self.path / "datasets" + + def delete_config(self, database: Path, config_name: str) -> None: + (database / f"{config_name}.toml").unlink()