Merge branch 'main' into main

dottxt-ai · Oct 16, 2024 · 3582fc8 · 3582fc8
2 parents 57171c0 + 969887e
commit 3582fc8
Show file tree

Hide file tree

Showing 11 changed files with 430 additions and 31 deletions.
diff --git a/benchmarks/bench_json_schema.py b/benchmarks/bench_json_schema.py
@@ -77,4 +77,4 @@ def time_json_schema_to_regex(self, schema_name):
     @cache_disabled()
     def time_json_schema_to_fsm(self, schema_name):
         regex = build_regex_from_schema(self.schema)
-        RegexGuide(regex, self.tokenizer)
+        RegexGuide.from_regex(regex, self.tokenizer)
diff --git a/benchmarks/bench_regex_guide.py b/benchmarks/bench_regex_guide.py
@@ -25,7 +25,7 @@ def setup(self, pattern_name):
 
     @cache_disabled()
     def time_regex_to_guide(self, pattern_name):
-        RegexGuide(self.pattern, self.tokenizer)
+        RegexGuide.from_regex(self.pattern, self.tokenizer)
 
 
 class MemoryRegexGuideBenchmark:
@@ -37,4 +37,4 @@ def setup(self, pattern_name):
 
     @cache_disabled()
     def peakmem_regex_to_guide(self, pattern_name):
-        RegexGuide(self.pattern, self.tokenizer)
+        RegexGuide.from_regex(self.pattern, self.tokenizer)
diff --git a/docs/reference/serve/lmstudio.md b/docs/reference/serve/lmstudio.md
@@ -0,0 +1,89 @@
+# Serve with LM Studio
+
+!!! tip "Would rather not self-host?"
+
+    If you want to get started quickly with JSON-structured generation you can call instead [.json](https://h1xbpbfsf0w.typeform.com/to/ZgBCvJHF), a [.txt](http://dottxt.co) API that guarantees valid JSON.
+
+[LM Studio](https://lmstudio.ai/) is an application that runs local LLMs. It flexibly mixes GPU and CPU compute in hardware-constrained environments.
+
+As of [LM Studio 0.3.4](https://lmstudio.ai/blog/lmstudio-v0.3.4), it natively supports Outlines for structured text generation, using an OpenAI-compatible endpoint.
+
+## Setup
+
+1. Install LM Studio by visiting their [downloads page](https://lmstudio.ai/download).
+2. Enable the LM Studio [server functionality](https://lmstudio.ai/docs/basics/server).
+3. Download [a model](https://lmstudio.ai/docs/basics#1-download-an-llm-to-your-computer).
+4. Install Python dependencies.
+```bash
+pip install pydantic openai
+```
+
+## Calling the server
+
+By default, LM Studio will serve from `http://localhost:1234`. If you are serving on a different port or host, make sure to change the `base_url` argument in `OpenAI` to the relevant location.
+
+```python
+class Testing(BaseModel):
+    """
+    A class representing a testing schema.
+    """
+    name: str
+    age: int
+
+openai_client = openai.OpenAI(
+    base_url="http://0.0.0.0:1234/v1",
+    api_key="dopeness"
+)
+
+# Make a request to the local LM Studio server
+response = openai_client.beta.chat.completions.parse(
+    model="hugging-quants/Llama-3.2-1B-Instruct-Q8_0-GGUF",
+    messages=[
+        {"role": "system", "content": "You are like so good at whatever you do."},
+        {"role": "user", "content": "My name is Cameron and I am 28 years old. What's my name and age?"}
+    ],
+    response_format=Testing
+)
+```
+
+You should receive a `ParsedChatCompletion[Testing]` object back:
+
+```python
+ParsedChatCompletion[Testing](
+    id='chatcmpl-3hykyf0fxus7jc90k6gwlw',
+    choices=[
+        ParsedChoice[Testing](
+            finish_reason='stop',
+            index=0,
+            logprobs=None,
+            message=ParsedChatCompletionMessage[Testing](
+                content='{ "age": 28, "name": "Cameron" }',
+                refusal=None,
+                role='assistant',
+                function_call=None,
+                tool_calls=[],
+                parsed=Testing(name='Cameron', age=28)
+            )
+        )
+    ],
+    created=1728595622,
+    model='lmstudio-community/Phi-3.1-mini-128k-instruct-GGUF/Phi-3.1-mini-128k-instruct-Q4_K_M.gguf',
+    object='chat.completion',
+    service_tier=None,
+    system_fingerprint='lmstudio-community/Phi-3.1-mini-128k-instruct-GGUF/Phi-3.1-mini-128k-instruct-
+Q4_K_M.gguf',
+    usage=CompletionUsage(
+        completion_tokens=17,
+        prompt_tokens=47,
+        total_tokens=64,
+        completion_tokens_details=None,
+        prompt_tokens_details=None
+    )
+)
+```
+
+You can retrieve your `Testing` object with
+
+```python
+response.choices[0].message.parsed
+```
diff --git a/outlines/fsm/guide.py b/outlines/fsm/guide.py
@@ -74,8 +74,8 @@ def copy(self):
 
 
 @cache()
-def create_states_mapping(regex_string, tokenizer):
-    return uncached_create_states_mapping(regex_string, tokenizer)
+def cached_create_states_mapping(regex_string, tokenizer, *args, **kwargs):
+    return uncached_create_states_mapping(regex_string, tokenizer, *args, **kwargs)
 
 
 class RegexGuide(CoreRegexGuide):
@@ -84,15 +84,19 @@ class RegexGuide(CoreRegexGuide):
     CoreRegexGuide with outlines cache
     """
 
-    def __init__(self, regex_string: str, tokenizer: "Tokenizer"):
-        (
-            self.states_to_token_maps,
-            self.empty_token_ids,
-            fsm_finals,
-        ) = create_states_mapping(regex_string, tokenizer)
-        self.eos_token_id = tokenizer.eos_token_id
-        self.final_states = fsm_finals | {-1}
-        self._cache_state_to_token_tensor()
+    @classmethod
+    def from_regex(
+        cls,
+        regex_string: str,
+        tokenizer,
+        **kwargs,
+    ):
+        return super().from_regex(
+            regex_string,
+            tokenizer,
+            _create_states_mapping=cached_create_states_mapping,
+            **kwargs,
+        )
 
 
 CFGState = collections.namedtuple("CFGState", ["parser_state", "prev_token"])

diff --git a/outlines/fsm/parsing.py b/outlines/fsm/parsing.py
@@ -1,10 +1,22 @@
 from copy import copy, deepcopy
 from dataclasses import dataclass
 from functools import lru_cache
-from typing import Any, Dict, FrozenSet, Iterator, Optional, Set, Tuple, Union
+from typing import (
+    Any,
+    Dict,
+    FrozenSet,
+    Generator,
+    Iterator,
+    List,
+    Optional,
+    Sequence,
+    Set,
+    Tuple,
+    Union,
+)
 
 import interegular
-from interegular.fsm import FSM
+from interegular.fsm import FSM, Alphabet, OblivionError
 from interegular.patterns import Unsupported
 from lark import Lark, Token
 from lark.common import LexerConf, ParserConf
@@ -35,11 +47,9 @@
 from lark.parsers.lalr_interactive_parser import InteractiveParser
 from lark.parsers.lalr_parser import LALR_Parser, ParseConf, ParserState, _Parser
 from outlines_core.fsm.regex import (
-    fsm_union,
-    get_sub_fsms_from_seq,
+    BetterFSM,
     get_token_transition_keys,
     make_deterministic_fsm,
-    walk_fsm,
 )
 
 PartialParseState = Tuple[str, int]
@@ -920,3 +930,198 @@ def terminals_to_fsms(lp: PartialLark) -> Dict[str, FSM]:
         symbol_names_and_fsms[terminal.name] = fsm
 
     return symbol_names_and_fsms
+
+
+def fsm_union(
+    fsms: Sequence[FSM],
+) -> Tuple[FSM, Dict[int, Tuple[Set[Tuple[int, int]], Set[int], Dict[int, Set[int]]]]]:
+    """Construct an FSM representing the union of the FSMs in `fsms`.
+
+    This is an updated version of `interegular.fsm.FSM.union` made to return an
+    extra map of component FSMs to the sets of state transitions that
+    correspond to them in the new FSM.
+
+    """
+
+    alphabet, new_to_old = Alphabet.union(*[fsm.alphabet for fsm in fsms])
+
+    indexed_fsms = tuple(enumerate(fsms))
+
+    initial = {i: fsm.initial for (i, fsm) in indexed_fsms}
+
+    # Dedicated function accepting a "superset" and returning the next
+    # "superset" obtained by following this transition in the new FSM
+    def follow(current_state, new_transition: int):
+        next = {}
+        for i, f in indexed_fsms:
+            old_transition = new_to_old[i][new_transition]
+            if (
+                i in current_state
+                and current_state[i] in f.map
+                and old_transition in f.map[current_state[i]]
+            ):
+                next[i] = f.map[current_state[i]][old_transition]
+        if not next:
+            raise OblivionError
+        return next
+
+    states = [initial]
+    finals: Set[int] = set()
+    map: Dict[int, Dict[int, int]] = {}
+
+    # Map component FSMs to their new state-to-state transitions, finals, and a
+    # map translating component FSM states to aggregate FSM states
+    fsms_to_trans_finals: Dict[
+        int, Tuple[Set[Tuple[int, int]], Set[int], Dict[int, Set[int]]]
+    ] = {}
+
+    i = 0
+    while i < len(states):
+        state = states[i]
+
+        # Add to the finals of the aggregate FSM whenever we hit a final in a
+        # component FSM
+        if any(state.get(j, -1) in fsm.finals for (j, fsm) in indexed_fsms):
+            finals.add(i)
+
+        # Compute the map for this state
+        map[i] = {}
+        for transition in alphabet.by_transition:
+            try:
+                next = follow(state, transition)
+            except OblivionError:
+                # Reached an oblivion state; don't list it
+                continue
+            else:
+                try:
+                    # TODO: Seems like this could--and should--be avoided
+                    j = states.index(next)
+                except ValueError:
+                    j = len(states)
+                    states.append(next)
+
+                map[i][transition] = j
+
+                for fsm_id, fsm_state in next.items():
+                    (
+                        fsm_transitions,
+                        fsm_finals,
+                        fsm_old_to_new,
+                    ) = fsms_to_trans_finals.setdefault(fsm_id, (set(), set(), {}))
+                    old_from = state[fsm_id]
+                    old_to = fsm_state
+                    fsm_old_to_new.setdefault(old_from, set()).add(i)
+                    fsm_old_to_new.setdefault(old_to, set()).add(j)
+                    fsm_transitions.add((i, j))
+                    if fsm_state in fsms[fsm_id].finals:
+                        fsm_finals.add(j)
+
+        i += 1
+
+    fsm = FSM(
+        alphabet=alphabet,
+        states=range(len(states)),
+        initial=0,
+        finals=finals,
+        map=map,
+        __no_validation__=True,
+    )
+
+    fsm, old_to_new_states = make_deterministic_fsm(fsm)
+    _fsms_to_trans_finals = {
+        fsm_id: (
+            {(old_to_new_states[s1], old_to_new_states[s2]) for s1, s2 in transitions},
+            {old_to_new_states[s] for s in finals},
+            {
+                old_state: {old_to_new_states[new_state] for new_state in new_states}
+                for old_state, new_states in old_to_new.items()
+            },
+        )
+        for fsm_id, (transitions, finals, old_to_new) in sorted(
+            fsms_to_trans_finals.items(), key=lambda x: x[0]
+        )
+    }
+
+    return (
+        fsm,
+        _fsms_to_trans_finals,
+    )
+
+
+def get_sub_fsms_from_seq(
+    state_seq: Sequence[int],
+    fsms_to_trans_finals: Dict[
+        int, Tuple[Set[Tuple[int, int]], Set[int], Dict[int, Set[int]]]
+    ],
+) -> Generator[Tuple[int, bool, bool], None, None]:
+    """Get the indices of the sub-FSMs in `fsm` that could have matched the state sequence `state_seq`.
+
+    Parameters
+    ----------
+    state_seq
+        A state sequence.
+    fsms_to_trans_finals
+        A map from FSM indices to tuples containing sets of their state transitions
+        and sets of the final/accept states.
+
+    Returns
+    -------
+    A generator returning tuples containing each sub-FSM index (in the order
+    they were union-ed to construct `fsm`) and booleans indicating whether or
+    not there is another valid transition from the last state in the sequence
+    for the associated sub-FSM (i.e. if the FSM can continue
+    accepting/matching) and whether or not the sequence ends in a final state
+    of the sub-FSM.
+    """
+    state_seq_transitions = set(zip(state_seq[:-1], state_seq[1:]))
+    last_fsm_state = state_seq[-1]
+    yield from (
+        (
+            # The sub-FMS index
+            fsm_idx,
+            # Is there another possible transition in this sub-FSM?
+            any(last_fsm_state == from_s for (from_s, to_s) in transitions),
+            # Is this sub-FSM in a final state?
+            state_seq[-1] in finals,
+        )
+        for fsm_idx, (transitions, finals, _) in fsms_to_trans_finals.items()
+        if state_seq_transitions.issubset(transitions)
+    )
+
+
+def walk_fsm(
+    fsm: BetterFSM,
+    token_transition_keys: Sequence[int],
+    start_state: int,
+    full_match: bool = True,
+) -> List[int]:
+    fsm_finals = fsm.finals
+
+    state = start_state
+    accepted_states: List[int] = []
+    last_final_idx: int = 0
+
+    fsm_transitions = fsm.flat_transition_map
+
+    # Iterate over token transition key sequence. The transition key
+    # sequence represents the FSM traversal rules of the tokens symbols.
+    for i, trans_key in enumerate(token_transition_keys):
+        new_state = fsm_transitions.get((state, trans_key))
+
+        if new_state is None:
+            if not full_match and last_final_idx > 0:
+                return accepted_states[:last_final_idx]
+
+            return []
+
+        state = new_state
+
+        if state in fsm_finals:
+            last_final_idx = i + 1
+
+        accepted_states.append(state)
+
+    if full_match and last_final_idx - 1 != i:
+        return []
+
+    return accepted_states
diff --git a/outlines/processors/structured.py b/outlines/processors/structured.py
@@ -149,7 +149,7 @@ def __init__(self, regex_string: str, tokenizer: "Tokenizer"):
         tokenizer
             An Outlines tokenizer
         """
-        guide = RegexGuide(regex_string, tokenizer)
+        guide = RegexGuide.from_regex(regex_string, tokenizer)
         super().__init__(tokenizer=tokenizer, guide=guide)