Skip to content

Commit

Permalink
Io class (#55)
Browse files Browse the repository at this point in the history
* run all read/write operations through IO class, with Local and Docker implementations
* add Docker for tests, but fail silently on MacOS and Windows
  • Loading branch information
granawkins authored Jul 18, 2024
1 parent 5bc1bbc commit 5dc87d6
Show file tree
Hide file tree
Showing 30 changed files with 572 additions and 130 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/run-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ jobs:
python -m pip install --upgrade pip
pip install -e .
pip install -e .[dev]
- name: Format check
run: ruff format .

Expand Down
7 changes: 4 additions & 3 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,14 @@ packages=["ragdaemon"]

[project]
name = "ragdaemon"
version = "0.7.8"
version = "0.8.0"
description = "Generate and render a call graph for a Python project."
readme = "README.md"
dependencies = [
"astroid==3.2.2",
"chromadb==0.4.24",
"dict2xml==1.7.5",
"docker==7.1.0",
"fastapi==0.109.2",
"Jinja2==3.1.3",
"networkx==3.2.1",
Expand Down Expand Up @@ -41,10 +42,10 @@ ragdaemon = "ragdaemon.__main__:run"
[project.optional-dependencies]
dev = [
"ruff",
"pyright",
"pyright==1.1.372",
"pytest",
"pytest-asyncio"
]

[tool.pyright]
ignore = ["tests/sample"]
ignore = ["tests/sample", "venv", ".venv"]
2 changes: 1 addition & 1 deletion ragdaemon/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "0.7.8"
__version__ = "0.8.0"
3 changes: 3 additions & 0 deletions ragdaemon/annotators/base_annotator.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,17 +6,20 @@

from ragdaemon.database import Database
from ragdaemon.graph import KnowledgeGraph
from ragdaemon.io import IO


class Annotator:
name: str = "base_annotator"

def __init__(
self,
io: IO,
verbose: int = 0,
spice_client: Optional[Spice] = None,
pipeline: Optional[dict[str, Annotator]] = None,
):
self.io = io
self.verbose = verbose
self.spice_client = spice_client
pass
Expand Down
2 changes: 1 addition & 1 deletion ragdaemon/annotators/chunker/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,7 @@ async def annotate(
# Load chunks into graph
for chunk in chunks:
id, ref = chunk["id"], chunk["ref"]
document = get_document(ref, Path(graph.graph["cwd"]))
document = get_document(ref, self.io, type="chunk")
checksum = hash_str(document)
chunk_data = {
"id": id,
Expand Down
14 changes: 5 additions & 9 deletions ragdaemon/annotators/diff.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,9 @@
import json
import re
from copy import deepcopy
from pathlib import Path

from ragdaemon.annotators.base_annotator import Annotator
from ragdaemon.database import Database, remove_add_to_db_duplicates
from ragdaemon.get_paths import get_git_root_for_path
from ragdaemon.graph import KnowledgeGraph
from ragdaemon.errors import RagdaemonError
from ragdaemon.utils import (
Expand Down Expand Up @@ -74,19 +72,17 @@ def id(self) -> str:
return "DEFAULT" if not self.diff_args else self.diff_args

def is_complete(self, graph: KnowledgeGraph, db: Database) -> bool:
cwd = Path(graph.graph["cwd"])
if not get_git_root_for_path(cwd, raise_error=False):
if not self.io.is_git_repo():
return True

document = get_document(self.diff_args, cwd, type="diff")
document = get_document(self.diff_args, self.io, type="diff")
checksum = hash_str(document)
return self.id in graph and graph.nodes[self.id]["checksum"] == checksum

async def annotate(
self, graph: KnowledgeGraph, db: Database, refresh: str | bool = False
) -> KnowledgeGraph:
cwd = Path(graph.graph["cwd"])
if not get_git_root_for_path(cwd, raise_error=False):
if not self.io.is_git_repo():
return graph

graph_nodes = {
Expand All @@ -97,7 +93,7 @@ async def annotate(
graph.remove_nodes_from(graph_nodes)

checksums = dict[str, str]()
document = get_document(self.diff_args, cwd, type="diff")
document = get_document(self.diff_args, self.io, type="diff")
checksum = hash_str(document)
chunks = get_chunks_from_diff(id=self.id, diff=document)
data = {
Expand All @@ -112,7 +108,7 @@ async def annotate(
checksums[self.id] = checksum

for chunk_id, chunk_ref in chunks.items():
document = get_document(chunk_ref, cwd, type="diff")
document = get_document(chunk_ref, self.io, type="diff")
chunk_checksum = hash_str(document)
data = {
"id": chunk_id,
Expand Down
17 changes: 8 additions & 9 deletions ragdaemon/annotators/hierarchy.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,17 +3,17 @@

from ragdaemon.annotators.base_annotator import Annotator
from ragdaemon.database import Database, remove_add_to_db_duplicates
from ragdaemon.get_paths import get_paths_for_directory
from ragdaemon.graph import KnowledgeGraph
from ragdaemon.errors import RagdaemonError
from ragdaemon.io import IO
from ragdaemon.utils import get_document, hash_str, truncate


def files_checksum(cwd: Path, ignore_patterns: set[Path] = set()) -> str:
def files_checksum(io: IO, ignore_patterns: set[Path] = set()) -> str:
timestamps = ""
for path in get_paths_for_directory(cwd, exclude_patterns=ignore_patterns):
for path in io.get_paths_for_directory(exclude_patterns=ignore_patterns):
try:
timestamps += str((cwd / path).stat().st_mtime)
timestamps += str(io.last_modified(path))
except FileNotFoundError:
pass
return hash_str(timestamps)
Expand All @@ -28,9 +28,8 @@ def __init__(self, *args, ignore_patterns: set[Path] = set(), **kwargs):
super().__init__(*args, **kwargs)

def is_complete(self, graph: KnowledgeGraph, db: Database) -> bool:
cwd = Path(graph.graph["cwd"])
return graph.graph.get("files_checksum") == files_checksum(
cwd, self.ignore_patterns
self.io, self.ignore_patterns
)

async def annotate(
Expand All @@ -45,12 +44,12 @@ async def annotate(

# Load active files/dirs and checksums
checksums = dict[Path, str]()
paths = get_paths_for_directory(cwd, exclude_patterns=self.ignore_patterns)
paths = self.io.get_paths_for_directory(exclude_patterns=self.ignore_patterns)
directories = set()
edges = set()
for path in paths:
path_str = path.as_posix()
document = get_document(path_str, cwd)
document = get_document(path_str, self.io)
checksum = hash_str(document)
data = {
"id": path_str,
Expand Down Expand Up @@ -115,5 +114,5 @@ async def annotate(
add_to_db = remove_add_to_db_duplicates(**add_to_db)
db.add(**add_to_db)

graph.graph["files_checksum"] = files_checksum(cwd, self.ignore_patterns)
graph.graph["files_checksum"] = files_checksum(self.io, self.ignore_patterns)
return graph
12 changes: 9 additions & 3 deletions ragdaemon/annotators/summarizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from ragdaemon.database import Database, remove_update_db_duplicates
from ragdaemon.graph import KnowledgeGraph
from ragdaemon.errors import RagdaemonError
from ragdaemon.io import IO
from ragdaemon.utils import (
DEFAULT_COMPLETION_MODEL,
match_refresh,
Expand Down Expand Up @@ -84,6 +85,7 @@ def build_filetree(
def get_document_and_context(
node: str,
graph: KnowledgeGraph,
io: IO,
summary_field_id: str = "summary",
model: Optional[TextModel] = None,
) -> tuple[str, str]:
Expand All @@ -98,12 +100,12 @@ def get_document_and_context(
if data.get("type") == "directory":
document = f"Directory: {node}"
else:
cb = ContextBuilder(graph)
cb = ContextBuilder(graph, io)
cb.add_id(node)
document = cb.render()

if data.get("type") == "chunk":
cb = ContextBuilder(graph)
cb = ContextBuilder(graph, io)

# Parent chunks back to the file
def get_hierarchical_parents(target: str, cb: ContextBuilder):
Expand Down Expand Up @@ -253,7 +255,11 @@ async def generate_summary(
or summary_checksum != data.get(self.checksum_field_id)
):
document, context = get_document_and_context(
node, graph, summary_field_id=self.summary_field_id, model=self.model
node,
graph,
self.io,
summary_field_id=self.summary_field_id,
model=self.model,
)
subprompt = "root" if node == "ROOT" else data.get("type", "")
previous_summary = "" if _refresh else data.get(self.summary_field_id, "")
Expand Down
9 changes: 5 additions & 4 deletions ragdaemon/context.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from dict2xml import dict2xml
from ragdaemon.errors import RagdaemonError
from ragdaemon.graph import KnowledgeGraph
from ragdaemon.io import IO
from ragdaemon.utils import get_document, parse_diff_id, parse_path_ref

NestedStrDict = Union[str, Dict[str, "NestedStrDict"]]
Expand Down Expand Up @@ -36,15 +37,16 @@ def render_comments(comments: list[Comment]) -> str:
class ContextBuilder:
"""Renders items from a graph into an llm-readable string."""

def __init__(self, graph: KnowledgeGraph, verbose: int = 0):
def __init__(self, graph: KnowledgeGraph, io: IO, verbose: int = 0):
self.graph = graph
self.io = io
self.verbose = verbose
self.context = dict[
str, dict[str, Any]
]() # {path: {lines, tags, document, diff}}

def copy(self):
duplicate = ContextBuilder(self.graph, self.verbose)
duplicate = ContextBuilder(self.graph, self.io, self.verbose)
duplicate.context = deepcopy(self.context)
return duplicate

Expand Down Expand Up @@ -73,8 +75,7 @@ def _add_path(self, path_str: str):
if document is None: # Truncated or deleted
try:
# TODO: Add ignored files to the graph/database
cwd = Path(self.graph.graph["cwd"])
document = get_document(path_str, cwd, type="file")
document = get_document(path_str, self.io, type="file")
except FileNotFoundError:
# Or could be deleted but have a diff
document = f"{path_str}\n[DELETED]"
Expand Down
28 changes: 15 additions & 13 deletions ragdaemon/daemon.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from pathlib import Path
from typing import Any, Dict, Iterable, Optional

from docker.models.containers import Container
from networkx.readwrite import json_graph
from spice import Spice
from spice.models import Model, TextModel
Expand All @@ -14,8 +15,8 @@
from ragdaemon.context import ContextBuilder
from ragdaemon.database import DEFAULT_EMBEDDING_MODEL, Database, get_db
from ragdaemon.errors import RagdaemonError
from ragdaemon.get_paths import get_paths_for_directory
from ragdaemon.graph import KnowledgeGraph
from ragdaemon.io import DockerIO, IO, LocalIO
from ragdaemon.locate import locate
from ragdaemon.utils import DEFAULT_COMPLETION_MODEL, match_refresh, mentat_dir_path

Expand All @@ -39,22 +40,23 @@ def __init__(
cwd: Path,
annotators: Optional[dict[str, dict]] = None,
verbose: bool | int = 0,
graph_path: Optional[Path] = None,
spice_client: Optional[Spice] = None,
logging_dir: Optional[Path | str] = None,
model: str = DEFAULT_EMBEDDING_MODEL,
provider: Optional[str] = None,
container: Optional[Container] = None,
):
self.cwd = cwd
if container is not None:
self.io: IO = DockerIO(cwd, container)
else:
self.io: IO = LocalIO(cwd)
if isinstance(verbose, bool):
verbose = 1 if verbose else 0
self.verbose = verbose
if graph_path is not None:
self.graph_path = (cwd / graph_path).resolve()
else:
self.graph_path = (
mentat_dir_path / "ragdaemon" / f"ragdaemon-{self.cwd.name}.json"
)
self.graph_path = (
mentat_dir_path / "ragdaemon" / f"ragdaemon-{self.cwd.name}.json"
)
self.graph_path.parent.mkdir(parents=True, exist_ok=True)
if spice_client is None:
spice_client = Spice(
Expand Down Expand Up @@ -82,6 +84,7 @@ def set_annotators(self, annotators: Optional[Dict[str, Dict]] = None):
self.pipeline = {}
for ann, kwargs in annotators.items():
self.pipeline[ann] = annotators_map[ann](
io=self.io,
**kwargs,
verbose=self.verbose,
spice_client=self.spice_client,
Expand All @@ -92,7 +95,6 @@ def set_annotators(self, annotators: Optional[Dict[str, Dict]] = None):
def db(self) -> Database:
if not hasattr(self, "_db"):
self._db = get_db(
self.cwd,
spice_client=self.spice_client,
embedding_model=self.embedding_model,
embedding_provider=self.embedding_provider,
Expand Down Expand Up @@ -130,13 +132,13 @@ async def update(self, refresh: str | bool = False):

async def watch(self, interval=2, debounce=5):
"""Calls self.update interval debounce seconds after a file is modified."""
paths = get_paths_for_directory(self.cwd)
paths = self.io.get_paths_for_directory()
last_updated = 0
_update_task = None
while True:
await asyncio.sleep(interval)
paths = get_paths_for_directory(self.cwd)
_last_updated = max((self.cwd / path).stat().st_mtime for path in paths)
paths = self.io.get_paths_for_directory()
_last_updated = max(self.io.last_modified(path) for path in paths)
if (
_last_updated > last_updated
and (time.time() - _last_updated) > debounce
Expand Down Expand Up @@ -171,7 +173,7 @@ def get_context(
model: Model | str = DEFAULT_COMPLETION_MODEL,
) -> ContextBuilder:
if context_builder is None:
context = ContextBuilder(self.graph, self.verbose)
context = ContextBuilder(self.graph, self.io, self.verbose)
else:
# TODO: Compare graph hashes, reconcile changes
context = context_builder
Expand Down
7 changes: 2 additions & 5 deletions ragdaemon/database/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import os # noqa: F401
from pathlib import Path
from typing import Optional

from spice import Spice
Expand All @@ -21,7 +20,6 @@


def get_db(
cwd: Path,
spice_client: Spice,
embedding_model: str | None = None,
embedding_provider: Optional[str] = None,
Expand All @@ -32,7 +30,6 @@ def get_db(
# if embedding_model is not None and "PYTEST_CURRENT_TEST" not in os.environ:
# try:
# # db = ChromaDB(
# # cwd=cwd,
# # db_path=db_path,
# # spice_client=spice_client,
# # embedding_model=embedding_model,
Expand All @@ -42,12 +39,12 @@ def get_db(
# # # In case the api key is wrong, try to embed something to trigger an error.
# # _ = db.add(ids="test", documents="test doc")
# # db.delete(ids="test")
# db = PGDB(cwd=cwd, db_path=db_path, verbose=verbose)
# db = PGDB(db_path=db_path, verbose=verbose)
# return db
# except Exception as e:
# if verbose > 1:
# print(
# f"Failed to initialize Postgres Database: {e}. Falling back to LiteDB."
# )
# pass
return LiteDB(cwd=cwd, db_path=db_path, verbose=verbose)
return LiteDB(db_path=db_path, verbose=verbose)
Loading

0 comments on commit 5dc87d6

Please sign in to comment.