Io class (#55)

* run all read/write operations through IO class, with Local and Docker implementations * add Docker for tests, but fail silently on MacOS and Windows
AbanteAI · Jul 18, 2024 · 5dc87d6 · 5dc87d6
1 parent 5bc1bbc
commit 5dc87d6
Show file tree

Hide file tree

Showing 30 changed files with 572 additions and 130 deletions.
diff --git a/.github/workflows/run-tests.yml b/.github/workflows/run-tests.yml
@@ -26,7 +26,7 @@ jobs:
         python -m pip install --upgrade pip
         pip install -e .
         pip install -e .[dev]
-
+  
     - name: Format check
       run: ruff format .
 

diff --git a/pyproject.toml b/pyproject.toml
@@ -7,13 +7,14 @@ packages=["ragdaemon"]
 
 [project]
 name = "ragdaemon"
-version = "0.7.8"
+version = "0.8.0"
 description = "Generate and render a call graph for a Python project."
 readme = "README.md"
 dependencies = [
     "astroid==3.2.2",
     "chromadb==0.4.24",
     "dict2xml==1.7.5",
+    "docker==7.1.0",
     "fastapi==0.109.2",
     "Jinja2==3.1.3",
     "networkx==3.2.1",
@@ -41,10 +42,10 @@ ragdaemon = "ragdaemon.__main__:run"
 [project.optional-dependencies]
 dev = [
     "ruff",
-    "pyright",
+    "pyright==1.1.372",
     "pytest",
     "pytest-asyncio"
 ]
 
 [tool.pyright]
-ignore = ["tests/sample"]
+ignore = ["tests/sample", "venv", ".venv"]
diff --git a/ragdaemon/__init__.py b/ragdaemon/__init__.py
@@ -1 +1 @@
-__version__ = "0.7.8"
+__version__ = "0.8.0"
diff --git a/ragdaemon/annotators/base_annotator.py b/ragdaemon/annotators/base_annotator.py
@@ -6,17 +6,20 @@
 
 from ragdaemon.database import Database
 from ragdaemon.graph import KnowledgeGraph
+from ragdaemon.io import IO
 
 
 class Annotator:
     name: str = "base_annotator"
 
     def __init__(
         self,
+        io: IO,
         verbose: int = 0,
         spice_client: Optional[Spice] = None,
         pipeline: Optional[dict[str, Annotator]] = None,
     ):
+        self.io = io
         self.verbose = verbose
         self.spice_client = spice_client
         pass

diff --git a/ragdaemon/annotators/chunker/__init__.py b/ragdaemon/annotators/chunker/__init__.py
@@ -165,7 +165,7 @@ async def annotate(
             # Load chunks into graph
             for chunk in chunks:
                 id, ref = chunk["id"], chunk["ref"]
-                document = get_document(ref, Path(graph.graph["cwd"]))
+                document = get_document(ref, self.io, type="chunk")
                 checksum = hash_str(document)
                 chunk_data = {
                     "id": id,

diff --git a/ragdaemon/annotators/diff.py b/ragdaemon/annotators/diff.py
@@ -1,11 +1,9 @@
 import json
 import re
 from copy import deepcopy
-from pathlib import Path
 
 from ragdaemon.annotators.base_annotator import Annotator
 from ragdaemon.database import Database, remove_add_to_db_duplicates
-from ragdaemon.get_paths import get_git_root_for_path
 from ragdaemon.graph import KnowledgeGraph
 from ragdaemon.errors import RagdaemonError
 from ragdaemon.utils import (
@@ -74,19 +72,17 @@ def id(self) -> str:
         return "DEFAULT" if not self.diff_args else self.diff_args
 
     def is_complete(self, graph: KnowledgeGraph, db: Database) -> bool:
-        cwd = Path(graph.graph["cwd"])
-        if not get_git_root_for_path(cwd, raise_error=False):
+        if not self.io.is_git_repo():
             return True
 
-        document = get_document(self.diff_args, cwd, type="diff")
+        document = get_document(self.diff_args, self.io, type="diff")
         checksum = hash_str(document)
         return self.id in graph and graph.nodes[self.id]["checksum"] == checksum
 
     async def annotate(
         self, graph: KnowledgeGraph, db: Database, refresh: str | bool = False
     ) -> KnowledgeGraph:
-        cwd = Path(graph.graph["cwd"])
-        if not get_git_root_for_path(cwd, raise_error=False):
+        if not self.io.is_git_repo():
             return graph
 
         graph_nodes = {
@@ -97,7 +93,7 @@ async def annotate(
         graph.remove_nodes_from(graph_nodes)
 
         checksums = dict[str, str]()
-        document = get_document(self.diff_args, cwd, type="diff")
+        document = get_document(self.diff_args, self.io, type="diff")
         checksum = hash_str(document)
         chunks = get_chunks_from_diff(id=self.id, diff=document)
         data = {
@@ -112,7 +108,7 @@ async def annotate(
         checksums[self.id] = checksum
 
         for chunk_id, chunk_ref in chunks.items():
-            document = get_document(chunk_ref, cwd, type="diff")
+            document = get_document(chunk_ref, self.io, type="diff")
             chunk_checksum = hash_str(document)
             data = {
                 "id": chunk_id,

diff --git a/ragdaemon/annotators/hierarchy.py b/ragdaemon/annotators/hierarchy.py
@@ -3,17 +3,17 @@
 
 from ragdaemon.annotators.base_annotator import Annotator
 from ragdaemon.database import Database, remove_add_to_db_duplicates
-from ragdaemon.get_paths import get_paths_for_directory
 from ragdaemon.graph import KnowledgeGraph
 from ragdaemon.errors import RagdaemonError
+from ragdaemon.io import IO
 from ragdaemon.utils import get_document, hash_str, truncate
 
 
-def files_checksum(cwd: Path, ignore_patterns: set[Path] = set()) -> str:
+def files_checksum(io: IO, ignore_patterns: set[Path] = set()) -> str:
     timestamps = ""
-    for path in get_paths_for_directory(cwd, exclude_patterns=ignore_patterns):
+    for path in io.get_paths_for_directory(exclude_patterns=ignore_patterns):
         try:
-            timestamps += str((cwd / path).stat().st_mtime)
+            timestamps += str(io.last_modified(path))
         except FileNotFoundError:
             pass
     return hash_str(timestamps)
@@ -28,9 +28,8 @@ def __init__(self, *args, ignore_patterns: set[Path] = set(), **kwargs):
         super().__init__(*args, **kwargs)
 
     def is_complete(self, graph: KnowledgeGraph, db: Database) -> bool:
-        cwd = Path(graph.graph["cwd"])
         return graph.graph.get("files_checksum") == files_checksum(
-            cwd, self.ignore_patterns
+            self.io, self.ignore_patterns
         )
 
     async def annotate(
@@ -45,12 +44,12 @@ async def annotate(
 
         # Load active files/dirs and checksums
         checksums = dict[Path, str]()
-        paths = get_paths_for_directory(cwd, exclude_patterns=self.ignore_patterns)
+        paths = self.io.get_paths_for_directory(exclude_patterns=self.ignore_patterns)
         directories = set()
         edges = set()
         for path in paths:
             path_str = path.as_posix()
-            document = get_document(path_str, cwd)
+            document = get_document(path_str, self.io)
             checksum = hash_str(document)
             data = {
                 "id": path_str,
@@ -115,5 +114,5 @@ async def annotate(
             add_to_db = remove_add_to_db_duplicates(**add_to_db)
             db.add(**add_to_db)
 
-        graph.graph["files_checksum"] = files_checksum(cwd, self.ignore_patterns)
+        graph.graph["files_checksum"] = files_checksum(self.io, self.ignore_patterns)
         return graph
diff --git a/ragdaemon/annotators/summarizer.py b/ragdaemon/annotators/summarizer.py
@@ -12,6 +12,7 @@
 from ragdaemon.database import Database, remove_update_db_duplicates
 from ragdaemon.graph import KnowledgeGraph
 from ragdaemon.errors import RagdaemonError
+from ragdaemon.io import IO
 from ragdaemon.utils import (
     DEFAULT_COMPLETION_MODEL,
     match_refresh,
@@ -84,6 +85,7 @@ def build_filetree(
 def get_document_and_context(
     node: str,
     graph: KnowledgeGraph,
+    io: IO,
     summary_field_id: str = "summary",
     model: Optional[TextModel] = None,
 ) -> tuple[str, str]:
@@ -98,12 +100,12 @@ def get_document_and_context(
     if data.get("type") == "directory":
         document = f"Directory: {node}"
     else:
-        cb = ContextBuilder(graph)
+        cb = ContextBuilder(graph, io)
         cb.add_id(node)
         document = cb.render()
 
     if data.get("type") == "chunk":
-        cb = ContextBuilder(graph)
+        cb = ContextBuilder(graph, io)
 
         # Parent chunks back to the file
         def get_hierarchical_parents(target: str, cb: ContextBuilder):
@@ -253,7 +255,11 @@ async def generate_summary(
             or summary_checksum != data.get(self.checksum_field_id)
         ):
             document, context = get_document_and_context(
-                node, graph, summary_field_id=self.summary_field_id, model=self.model
+                node,
+                graph,
+                self.io,
+                summary_field_id=self.summary_field_id,
+                model=self.model,
             )
             subprompt = "root" if node == "ROOT" else data.get("type", "")
             previous_summary = "" if _refresh else data.get(self.summary_field_id, "")

diff --git a/ragdaemon/context.py b/ragdaemon/context.py
@@ -8,6 +8,7 @@
 from dict2xml import dict2xml
 from ragdaemon.errors import RagdaemonError
 from ragdaemon.graph import KnowledgeGraph
+from ragdaemon.io import IO
 from ragdaemon.utils import get_document, parse_diff_id, parse_path_ref
 
 NestedStrDict = Union[str, Dict[str, "NestedStrDict"]]
@@ -36,15 +37,16 @@ def render_comments(comments: list[Comment]) -> str:
 class ContextBuilder:
     """Renders items from a graph into an llm-readable string."""
 
-    def __init__(self, graph: KnowledgeGraph, verbose: int = 0):
+    def __init__(self, graph: KnowledgeGraph, io: IO, verbose: int = 0):
         self.graph = graph
+        self.io = io
         self.verbose = verbose
         self.context = dict[
             str, dict[str, Any]
         ]()  # {path: {lines, tags, document, diff}}
 
     def copy(self):
-        duplicate = ContextBuilder(self.graph, self.verbose)
+        duplicate = ContextBuilder(self.graph, self.io, self.verbose)
         duplicate.context = deepcopy(self.context)
         return duplicate
 
@@ -73,8 +75,7 @@ def _add_path(self, path_str: str):
         if document is None:  # Truncated or deleted
             try:
                 # TODO: Add ignored files to the graph/database
-                cwd = Path(self.graph.graph["cwd"])
-                document = get_document(path_str, cwd, type="file")
+                document = get_document(path_str, self.io, type="file")
             except FileNotFoundError:
                 # Or could be deleted but have a diff
                 document = f"{path_str}\n[DELETED]"

diff --git a/ragdaemon/daemon.py b/ragdaemon/daemon.py
@@ -4,6 +4,7 @@
 from pathlib import Path
 from typing import Any, Dict, Iterable, Optional
 
+from docker.models.containers import Container
 from networkx.readwrite import json_graph
 from spice import Spice
 from spice.models import Model, TextModel
@@ -14,8 +15,8 @@
 from ragdaemon.context import ContextBuilder
 from ragdaemon.database import DEFAULT_EMBEDDING_MODEL, Database, get_db
 from ragdaemon.errors import RagdaemonError
-from ragdaemon.get_paths import get_paths_for_directory
 from ragdaemon.graph import KnowledgeGraph
+from ragdaemon.io import DockerIO, IO, LocalIO
 from ragdaemon.locate import locate
 from ragdaemon.utils import DEFAULT_COMPLETION_MODEL, match_refresh, mentat_dir_path
 
@@ -39,22 +40,23 @@ def __init__(
         cwd: Path,
         annotators: Optional[dict[str, dict]] = None,
         verbose: bool | int = 0,
-        graph_path: Optional[Path] = None,
         spice_client: Optional[Spice] = None,
         logging_dir: Optional[Path | str] = None,
         model: str = DEFAULT_EMBEDDING_MODEL,
         provider: Optional[str] = None,
+        container: Optional[Container] = None,
     ):
         self.cwd = cwd
+        if container is not None:
+            self.io: IO = DockerIO(cwd, container)
+        else:
+            self.io: IO = LocalIO(cwd)
         if isinstance(verbose, bool):
             verbose = 1 if verbose else 0
         self.verbose = verbose
-        if graph_path is not None:
-            self.graph_path = (cwd / graph_path).resolve()
-        else:
-            self.graph_path = (
-                mentat_dir_path / "ragdaemon" / f"ragdaemon-{self.cwd.name}.json"
-            )
+        self.graph_path = (
+            mentat_dir_path / "ragdaemon" / f"ragdaemon-{self.cwd.name}.json"
+        )
         self.graph_path.parent.mkdir(parents=True, exist_ok=True)
         if spice_client is None:
             spice_client = Spice(
@@ -82,6 +84,7 @@ def set_annotators(self, annotators: Optional[Dict[str, Dict]] = None):
         self.pipeline = {}
         for ann, kwargs in annotators.items():
             self.pipeline[ann] = annotators_map[ann](
+                io=self.io,
                 **kwargs,
                 verbose=self.verbose,
                 spice_client=self.spice_client,
@@ -92,7 +95,6 @@ def set_annotators(self, annotators: Optional[Dict[str, Dict]] = None):
     def db(self) -> Database:
         if not hasattr(self, "_db"):
             self._db = get_db(
-                self.cwd,
                 spice_client=self.spice_client,
                 embedding_model=self.embedding_model,
                 embedding_provider=self.embedding_provider,
@@ -130,13 +132,13 @@ async def update(self, refresh: str | bool = False):
 
     async def watch(self, interval=2, debounce=5):
         """Calls self.update interval debounce seconds after a file is modified."""
-        paths = get_paths_for_directory(self.cwd)
+        paths = self.io.get_paths_for_directory()
         last_updated = 0
         _update_task = None
         while True:
             await asyncio.sleep(interval)
-            paths = get_paths_for_directory(self.cwd)
-            _last_updated = max((self.cwd / path).stat().st_mtime for path in paths)
+            paths = self.io.get_paths_for_directory()
+            _last_updated = max(self.io.last_modified(path) for path in paths)
             if (
                 _last_updated > last_updated
                 and (time.time() - _last_updated) > debounce
@@ -171,7 +173,7 @@ def get_context(
         model: Model | str = DEFAULT_COMPLETION_MODEL,
     ) -> ContextBuilder:
         if context_builder is None:
-            context = ContextBuilder(self.graph, self.verbose)
+            context = ContextBuilder(self.graph, self.io, self.verbose)
         else:
             # TODO: Compare graph hashes, reconcile changes
             context = context_builder

diff --git a/ragdaemon/database/__init__.py b/ragdaemon/database/__init__.py
@@ -1,5 +1,4 @@
 import os  # noqa: F401
-from pathlib import Path
 from typing import Optional
 
 from spice import Spice
@@ -21,7 +20,6 @@
 
 
 def get_db(
-    cwd: Path,
     spice_client: Spice,
     embedding_model: str | None = None,
     embedding_provider: Optional[str] = None,
@@ -32,7 +30,6 @@ def get_db(
     # if embedding_model is not None and "PYTEST_CURRENT_TEST" not in os.environ:
     #     try:
     #         # db = ChromaDB(
-    #         #     cwd=cwd,
     #         #     db_path=db_path,
     #         #     spice_client=spice_client,
     #         #     embedding_model=embedding_model,
@@ -42,12 +39,12 @@ def get_db(
     #         # # In case the api key is wrong, try to embed something to trigger an error.
     #         # _ = db.add(ids="test", documents="test doc")
     #         # db.delete(ids="test")
-    #         db = PGDB(cwd=cwd, db_path=db_path, verbose=verbose)
+    #         db = PGDB(db_path=db_path, verbose=verbose)
     #         return db
     #     except Exception as e:
     #         if verbose > 1:
     #             print(
     #                 f"Failed to initialize Postgres Database: {e}. Falling back to LiteDB."
     #             )
     #         pass
-    return LiteDB(cwd=cwd, db_path=db_path, verbose=verbose)
+    return LiteDB(db_path=db_path, verbose=verbose)