Skip to content

Commit

Permalink
feat: add wikilinks to question output (#135)
Browse files Browse the repository at this point in the history
  • Loading branch information
MartinBernstorff authored Apr 9, 2024
1 parent c303b31 commit 710d1db
Show file tree
Hide file tree
Showing 4 changed files with 69 additions and 3 deletions.
10 changes: 10 additions & 0 deletions memorymarker/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,13 +20,17 @@
AnthropicCompleter,
)
from memorymarker.question_generator.completers.openai_completer import (
OpenAICompleter,
OpenAIModelCompleter,
)
from memorymarker.question_generator.flows.question_flow import QuestionFlow
from memorymarker.question_generator.main import chunk_highlights
from memorymarker.question_generator.qa_responses import QAResponses
from memorymarker.question_generator.steps.qa_extractor import QuestionExtractionStep
from memorymarker.question_generator.steps.qa_generation import QuestionGenerationStep
from memorymarker.question_generator.steps.question_wikilinker import (
QuestionWikilinkerStep,
)
from memorymarker.question_generator.steps.reasoning import ReasoningStep

app = typer.Typer(no_args_is_help=True)
Expand Down Expand Up @@ -150,6 +154,12 @@ def typer_cli(
response_model=QAResponses, # type: ignore
)
),
QuestionWikilinkerStep(
completer=OpenAICompleter(
api_key=os.getenv("OPENAI_API_KEY", "No OPENAI_API"),
model="gpt-4-turbo-preview",
)
),
),
)(chunked_highlights[0:max_n])
)
Expand Down
14 changes: 12 additions & 2 deletions memorymarker/question_generator/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
AnthropicCompleter,
)
from memorymarker.question_generator.completers.openai_completer import (
OpenAICompleter,
OpenAIModelCompleter,
)
from memorymarker.question_generator.example_repo_airtable import (
Expand All @@ -25,6 +26,9 @@
from memorymarker.question_generator.qa_responses import QAResponses
from memorymarker.question_generator.steps.qa_extractor import QuestionExtractionStep
from memorymarker.question_generator.steps.qa_generation import QuestionGenerationStep
from memorymarker.question_generator.steps.question_wikilinker import (
QuestionWikilinkerStep,
)
from memorymarker.question_generator.steps.reasoning import ReasoningStep

if TYPE_CHECKING:
Expand Down Expand Up @@ -100,7 +104,7 @@ async def main():
# "stack is a data structure that contains a collection of elements where you can add and delete elements from just one end ",
# "A semaphore manages an internal counter",
# }
document_titles = {"Singly Linked List", "Jeg har set mit køns smerte"}
document_titles = {"Singly Linked List"}
input_highlights = _select_highlights_from_omnivore()
selected_highlights = input_highlights.filter(
lambda _: any(title in _.source_document.title for title in document_titles)
Expand All @@ -126,7 +130,7 @@ async def main():
grouped_highlights,
[
QuestionFlow(
_name="chunked_reasoning",
_name="chunked_reasoning_with_wikilinks",
steps=(
ReasoningStep(completer=base_completer),
QuestionGenerationStep(
Expand All @@ -139,6 +143,12 @@ async def main():
response_model=QAResponses, # type: ignore
)
),
QuestionWikilinkerStep(
completer=OpenAICompleter(
api_key=os.getenv("OPENAI_API_KEY", "No OPENAI_API"),
model="gpt-4-turbo-preview",
)
),
),
)
],
Expand Down
6 changes: 5 additions & 1 deletion memorymarker/question_generator/qa_responses.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from memorymarker.question_generator.reasoned_highlight import Highlights


@dataclass(frozen=True)
@dataclass
class QAPrompt:
hydrated_highlight: "Highlights | None"
question: str
Expand All @@ -34,5 +34,9 @@ def to_qaprompt(self, reasoned_highlight: "Highlights") -> QAPrompt:
)


class QuestionResponseModel(BaseModel):
question: str


class QAResponses(pydantic.BaseModel):
items: Sequence[QAPromptResponseModel]
42 changes: 42 additions & 0 deletions memorymarker/question_generator/steps/question_wikilinker.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
import asyncio
from dataclasses import dataclass
from typing import TYPE_CHECKING

from memorymarker.question_generator.steps.step import FlowStep

if TYPE_CHECKING:
from memorymarker.question_generator.completers.completer import Completer
from memorymarker.question_generator.qa_responses import QAPrompt
from memorymarker.question_generator.reasoned_highlight import Highlights


@dataclass(frozen=True)
class QuestionWikilinkerStep(FlowStep):
completer: "Completer"
prompt = """In the following, identify the important, domain-specific terms. Then, capitalise them, and surround them with wikilinks. There can be more than one important term. Identify terms as you would in a wikipedia article.
E.g.:
When working with version control, why is the git amend command misleading?
Turns into:
When working with [[Version control]], why is the [[Git amend]] command misleading?
Here is the question:
{question}
"""

def identity(self) -> str:
return f"{self.__class__.__name__}_{self.completer.identity()}"

async def _wikilink_prompt(self, question: "QAPrompt") -> "QAPrompt":
prompt = self.prompt.format(question=question.question)
response = await self.completer(prompt) # type: ignore
question.question = response
return question

async def __call__(self, highlight: "Highlights") -> "Highlights":
prompts = highlight.question_answer_pairs
wikilinked_prompts = await asyncio.gather(
*[self._wikilink_prompt(prompt) for prompt in prompts]
)
highlight.question_answer_pairs = wikilinked_prompts
return highlight

0 comments on commit 710d1db

Please sign in to comment.