diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml new file mode 100644 index 0000000..f02f789 --- /dev/null +++ b/.github/workflows/docs.yml @@ -0,0 +1,42 @@ +name: documentation + +on: + workflow_dispatch: + push: + branches: + - main + paths: + - 'docs/**' + +permissions: + contents: write + +jobs: + docs: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - uses: actions/setup-python@v5 + with: + python-version: '3.12' + + - name: Install poetry + uses: abatilo/actions-poetry@v3 + + - name: Install dependencies + run: | + poetry install --without test,dev + + - name: Sphinx build + run: | + cd docs && make html + + - name: Deploy to GitHub Pages + uses: peaceiris/actions-gh-pages@v3 + if: ${{ github.event_name == 'push' && github.ref == 'refs/heads/main' }} + with: + publish_branch: gh-pages + github_token: ${{ secrets.GITHUB_TOKEN }} + publish_dir: docs/_build/ + force_orphan: true \ No newline at end of file diff --git a/.gitignore b/.gitignore index e6862a5..97c5350 100644 --- a/.gitignore +++ b/.gitignore @@ -160,4 +160,7 @@ cython_debug/ #.idea/ # Ruff cache -.ruff_cache/ \ No newline at end of file +.ruff_cache/ + +# Textemate cache +.textmate_cache/ diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index ba231b0..d68c161 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,7 +1,7 @@ repos: - repo: https://github.com/astral-sh/ruff-pre-commit # Ruff version. - rev: v0.2.2 + rev: v0.3.0 hooks: # Run the linter. - id: ruff @@ -19,5 +19,6 @@ repos: exclude: | (?x)^( example.py| - test\/.*.py + test\/.*.py| + docs\/.*.py )$ diff --git a/.vscode/launch.json b/.vscode/launch.json new file mode 100644 index 0000000..9102e55 --- /dev/null +++ b/.vscode/launch.json @@ -0,0 +1,16 @@ +{ + // Use IntelliSense to learn about possible attributes. + // Hover to view descriptions of existing attributes. + // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 + "version": "0.2.0", + "configurations": [ + { + "name": "Python Debugger: Current File", + "type": "debugpy", + "request": "launch", + "program": "${file}", + "console": "integratedTerminal", + "justMyCode": true + } + ] +} \ No newline at end of file diff --git a/README.md b/README.md index 462293d..47af605 100644 --- a/README.md +++ b/README.md @@ -9,7 +9,7 @@ # textmate-grammar-python -An interpreter for grammar files as defined by TextMate and used in VSCode, implemented in Python. TextMate grammars use the oniguruma dialect (https://github.com/kkos/oniguruma). Supports loading grammar files from JSON, PLIST, or YAML format. +A lexer and tokenizer for grammar files as defined by TextMate and used in VSCode, implemented in Python. TextMate grammars use the oniguruma dialect (https://github.com/kkos/oniguruma). Supports loading grammar files from JSON, PLIST, or YAML format. ## Usage @@ -65,8 +65,3 @@ Alternatively, with the keyword argument `flatten` the element is displayed as a ## TODO - Implement Begin/While pattern, required for other grammars. -## Sources -- [Textmate guide](https://www.apeth.com/nonblog/stories/textmatebundle.html) -- [VSCode Syntax Highlighting guide](https://code.visualstudio.com/api/language-extensions/syntax-highlight-guide) -- [vscode-textmate](https://github.com/microsoft/vscode-textmate) -- [Macromates texmate](https://macromates.com/textmate/manual/) diff --git a/docs/.gitignore b/docs/.gitignore new file mode 100644 index 0000000..cb3b4e7 --- /dev/null +++ b/docs/.gitignore @@ -0,0 +1 @@ +./_build diff --git a/docs/Makefile b/docs/Makefile new file mode 100644 index 0000000..d4bb2cb --- /dev/null +++ b/docs/Makefile @@ -0,0 +1,20 @@ +# Minimal makefile for Sphinx documentation +# + +# You can set these variables from the command line, and also +# from the environment for the first two. +SPHINXOPTS ?= +SPHINXBUILD ?= sphinx-build +SOURCEDIR = . +BUILDDIR = _build + +# Put it first so that "make" without argument is like "make help". +help: + @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) + +.PHONY: help Makefile + +# Catch-all target: route all unknown targets to Sphinx using the new +# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). +%: Makefile + @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) diff --git a/docs/apidocs/index.rst b/docs/apidocs/index.rst new file mode 100644 index 0000000..2ec03ef --- /dev/null +++ b/docs/apidocs/index.rst @@ -0,0 +1,11 @@ +API Reference +============= + +This page contains auto-generated API reference documentation [#f1]_. + +.. toctree:: + :titlesonly: + + textmate_grammar/textmate_grammar + +.. [#f1] Created with `sphinx-autodoc2 `_ diff --git a/docs/apidocs/textmate_grammar/textmate_grammar.cache.rst b/docs/apidocs/textmate_grammar/textmate_grammar.cache.rst new file mode 100644 index 0000000..997c238 --- /dev/null +++ b/docs/apidocs/textmate_grammar/textmate_grammar.cache.rst @@ -0,0 +1,147 @@ +:py:mod:`textmate_grammar.cache` +================================ + +.. py:module:: textmate_grammar.cache + +.. autodoc2-docstring:: textmate_grammar.cache + :allowtitles: + +Module Contents +--------------- + +Classes +~~~~~~~ + +.. list-table:: + :class: autosummary longtable + :align: left + + * - :py:obj:`TextmateCache ` + - .. autodoc2-docstring:: textmate_grammar.cache.TextmateCache + :summary: + * - :py:obj:`SimpleCache ` + - .. autodoc2-docstring:: textmate_grammar.cache.SimpleCache + :summary: + * - :py:obj:`ShelveCache ` + - .. autodoc2-docstring:: textmate_grammar.cache.ShelveCache + :summary: + +Functions +~~~~~~~~~ + +.. list-table:: + :class: autosummary longtable + :align: left + + * - :py:obj:`init_cache ` + - .. autodoc2-docstring:: textmate_grammar.cache.init_cache + :summary: + +Data +~~~~ + +.. list-table:: + :class: autosummary longtable + :align: left + + * - :py:obj:`CACHE_DIR ` + - .. autodoc2-docstring:: textmate_grammar.cache.CACHE_DIR + :summary: + * - :py:obj:`CACHE ` + - .. autodoc2-docstring:: textmate_grammar.cache.CACHE + :summary: + +API +~~~ + +.. py:data:: CACHE_DIR + :canonical: textmate_grammar.cache.CACHE_DIR + :value: 'resolve(...)' + + .. autodoc2-docstring:: textmate_grammar.cache.CACHE_DIR + +.. py:class:: TextmateCache + :canonical: textmate_grammar.cache.TextmateCache + + Bases: :py:obj:`typing.Protocol` + + .. autodoc2-docstring:: textmate_grammar.cache.TextmateCache + + .. py:method:: cache_valid(filepath: pathlib.Path) -> bool + :canonical: textmate_grammar.cache.TextmateCache.cache_valid + + .. autodoc2-docstring:: textmate_grammar.cache.TextmateCache.cache_valid + + .. py:method:: load(filepath: pathlib.Path) -> textmate_grammar.elements.ContentElement + :canonical: textmate_grammar.cache.TextmateCache.load + + .. autodoc2-docstring:: textmate_grammar.cache.TextmateCache.load + + .. py:method:: save(filePath: pathlib.Path, element: textmate_grammar.elements.ContentElement) -> None + :canonical: textmate_grammar.cache.TextmateCache.save + + .. autodoc2-docstring:: textmate_grammar.cache.TextmateCache.save + +.. py:class:: SimpleCache() + :canonical: textmate_grammar.cache.SimpleCache + + Bases: :py:obj:`textmate_grammar.cache.TextmateCache` + + .. autodoc2-docstring:: textmate_grammar.cache.SimpleCache + + .. rubric:: Initialization + + .. autodoc2-docstring:: textmate_grammar.cache.SimpleCache.__init__ + + .. py:method:: cache_valid(filepath: pathlib.Path) -> bool + :canonical: textmate_grammar.cache.SimpleCache.cache_valid + + .. autodoc2-docstring:: textmate_grammar.cache.SimpleCache.cache_valid + + .. py:method:: load(filepath: pathlib.Path) -> textmate_grammar.elements.ContentElement + :canonical: textmate_grammar.cache.SimpleCache.load + + .. autodoc2-docstring:: textmate_grammar.cache.SimpleCache.load + + .. py:method:: save(filepath: pathlib.Path, element: textmate_grammar.elements.ContentElement) -> None + :canonical: textmate_grammar.cache.SimpleCache.save + + .. autodoc2-docstring:: textmate_grammar.cache.SimpleCache.save + +.. py:class:: ShelveCache() + :canonical: textmate_grammar.cache.ShelveCache + + Bases: :py:obj:`textmate_grammar.cache.TextmateCache` + + .. autodoc2-docstring:: textmate_grammar.cache.ShelveCache + + .. rubric:: Initialization + + .. autodoc2-docstring:: textmate_grammar.cache.ShelveCache.__init__ + + .. py:method:: cache_valid(filepath: pathlib.Path) -> bool + :canonical: textmate_grammar.cache.ShelveCache.cache_valid + + .. autodoc2-docstring:: textmate_grammar.cache.ShelveCache.cache_valid + + .. py:method:: load(filepath: pathlib.Path) -> textmate_grammar.elements.ContentElement + :canonical: textmate_grammar.cache.ShelveCache.load + + .. autodoc2-docstring:: textmate_grammar.cache.ShelveCache.load + + .. py:method:: save(filepath: pathlib.Path, element: textmate_grammar.elements.ContentElement) -> None + :canonical: textmate_grammar.cache.ShelveCache.save + + .. autodoc2-docstring:: textmate_grammar.cache.ShelveCache.save + +.. py:data:: CACHE + :canonical: textmate_grammar.cache.CACHE + :type: textmate_grammar.cache.TextmateCache + :value: 'SimpleCache(...)' + + .. autodoc2-docstring:: textmate_grammar.cache.CACHE + +.. py:function:: init_cache(type: str = 'simple') -> textmate_grammar.cache.TextmateCache + :canonical: textmate_grammar.cache.init_cache + + .. autodoc2-docstring:: textmate_grammar.cache.init_cache diff --git a/docs/apidocs/textmate_grammar/textmate_grammar.elements.rst b/docs/apidocs/textmate_grammar/textmate_grammar.elements.rst new file mode 100644 index 0000000..70d248f --- /dev/null +++ b/docs/apidocs/textmate_grammar/textmate_grammar.elements.rst @@ -0,0 +1,129 @@ +:py:mod:`textmate_grammar.elements` +=================================== + +.. py:module:: textmate_grammar.elements + +.. autodoc2-docstring:: textmate_grammar.elements + :allowtitles: + +Module Contents +--------------- + +Classes +~~~~~~~ + +.. list-table:: + :class: autosummary longtable + :align: left + + * - :py:obj:`Capture ` + - .. autodoc2-docstring:: textmate_grammar.elements.Capture + :summary: + * - :py:obj:`ContentElement ` + - .. autodoc2-docstring:: textmate_grammar.elements.ContentElement + :summary: + * - :py:obj:`ContentBlockElement ` + - .. autodoc2-docstring:: textmate_grammar.elements.ContentBlockElement + :summary: + +Data +~~~~ + +.. list-table:: + :class: autosummary longtable + :align: left + + * - :py:obj:`TOKEN_DICT ` + - .. autodoc2-docstring:: textmate_grammar.elements.TOKEN_DICT + :summary: + +API +~~~ + +.. py:data:: TOKEN_DICT + :canonical: textmate_grammar.elements.TOKEN_DICT + :value: None + + .. autodoc2-docstring:: textmate_grammar.elements.TOKEN_DICT + +.. py:class:: Capture(handler: textmate_grammar.handler.ContentHandler, pattern: textmate_grammar.handler.Pattern, matching: textmate_grammar.handler.Match, parsers: dict[int, textmate_grammar.parser.GrammarParser], starting: tuple[int, int], boundary: tuple[int, int], key: str = '', **kwargs) + :canonical: textmate_grammar.elements.Capture + + .. autodoc2-docstring:: textmate_grammar.elements.Capture + + .. rubric:: Initialization + + .. autodoc2-docstring:: textmate_grammar.elements.Capture.__init__ + + .. py:method:: dispatch() -> list[textmate_grammar.elements.Capture | textmate_grammar.elements.ContentElement] + :canonical: textmate_grammar.elements.Capture.dispatch + + .. autodoc2-docstring:: textmate_grammar.elements.Capture.dispatch + +.. py:class:: ContentElement(token: str, grammar: dict, content: str, characters: dict[textmate_grammar.handler.POS, str], children: list[textmate_grammar.elements.Capture | textmate_grammar.elements.ContentElement] | None = None) + :canonical: textmate_grammar.elements.ContentElement + + .. autodoc2-docstring:: textmate_grammar.elements.ContentElement + + .. rubric:: Initialization + + .. autodoc2-docstring:: textmate_grammar.elements.ContentElement.__init__ + + .. py:property:: children + :canonical: textmate_grammar.elements.ContentElement.children + :type: list[textmate_grammar.elements.ContentElement] + + .. autodoc2-docstring:: textmate_grammar.elements.ContentElement.children + + .. py:method:: find(tokens: str | list[str], start_tokens: str | list[str] = '', hide_tokens: str | list[str] = '', stop_tokens: str | list[str] = '', depth: int = -1, attribute: str = '_subelements', stack: list[str] | None = None) -> typing.Generator[tuple[textmate_grammar.elements.ContentElement, list[str]], None, None] + :canonical: textmate_grammar.elements.ContentElement.find + + .. autodoc2-docstring:: textmate_grammar.elements.ContentElement.find + + .. py:method:: findall(tokens: str | list[str], start_tokens: str | list[str] = '', hide_tokens: str | list[str] = '', stop_tokens: str | list[str] = '', depth: int = -1, attribute: str = '_subelements') -> list[tuple[textmate_grammar.elements.ContentElement, list[str]]] + :canonical: textmate_grammar.elements.ContentElement.findall + + .. autodoc2-docstring:: textmate_grammar.elements.ContentElement.findall + + .. py:method:: to_dict(depth: int = -1, all_content: bool = False, **kwargs) -> dict + :canonical: textmate_grammar.elements.ContentElement.to_dict + + .. autodoc2-docstring:: textmate_grammar.elements.ContentElement.to_dict + + .. py:method:: flatten() -> list[tuple[tuple[int, int], str, list[str]]] + :canonical: textmate_grammar.elements.ContentElement.flatten + + .. autodoc2-docstring:: textmate_grammar.elements.ContentElement.flatten + + .. py:method:: print(flatten: bool = False, depth: int = -1, all_content: bool = False, **kwargs) -> None + :canonical: textmate_grammar.elements.ContentElement.print + + .. autodoc2-docstring:: textmate_grammar.elements.ContentElement.print + +.. py:class:: ContentBlockElement(begin: list[textmate_grammar.elements.Capture | textmate_grammar.elements.ContentElement] | None = None, end: list[textmate_grammar.elements.Capture | textmate_grammar.elements.ContentElement] | None = None, **kwargs) + :canonical: textmate_grammar.elements.ContentBlockElement + + Bases: :py:obj:`textmate_grammar.elements.ContentElement` + + .. autodoc2-docstring:: textmate_grammar.elements.ContentBlockElement + + .. rubric:: Initialization + + .. autodoc2-docstring:: textmate_grammar.elements.ContentBlockElement.__init__ + + .. py:property:: begin + :canonical: textmate_grammar.elements.ContentBlockElement.begin + :type: list[textmate_grammar.elements.ContentElement] + + .. autodoc2-docstring:: textmate_grammar.elements.ContentBlockElement.begin + + .. py:property:: end + :canonical: textmate_grammar.elements.ContentBlockElement.end + :type: list[textmate_grammar.elements.ContentElement] + + .. autodoc2-docstring:: textmate_grammar.elements.ContentBlockElement.end + + .. py:method:: to_dict(depth: int = -1, all_content: bool = False, **kwargs) -> dict + :canonical: textmate_grammar.elements.ContentBlockElement.to_dict + + .. autodoc2-docstring:: textmate_grammar.elements.ContentBlockElement.to_dict diff --git a/docs/apidocs/textmate_grammar/textmate_grammar.exceptions.rst b/docs/apidocs/textmate_grammar/textmate_grammar.exceptions.rst new file mode 100644 index 0000000..b1e7b4c --- /dev/null +++ b/docs/apidocs/textmate_grammar/textmate_grammar.exceptions.rst @@ -0,0 +1,68 @@ +:py:mod:`textmate_grammar.exceptions` +===================================== + +.. py:module:: textmate_grammar.exceptions + +.. autodoc2-docstring:: textmate_grammar.exceptions + :allowtitles: + +Module Contents +--------------- + +API +~~~ + +.. py:exception:: IncludedParserNotFound(key: str = 'UNKNOWN', **kwargs) + :canonical: textmate_grammar.exceptions.IncludedParserNotFound + + Bases: :py:obj:`Exception` + + .. autodoc2-docstring:: textmate_grammar.exceptions.IncludedParserNotFound + + .. rubric:: Initialization + + .. autodoc2-docstring:: textmate_grammar.exceptions.IncludedParserNotFound.__init__ + +.. py:exception:: IncompatibleFileType(extensions: list[str], **kwargs) + :canonical: textmate_grammar.exceptions.IncompatibleFileType + + Bases: :py:obj:`Exception` + + .. autodoc2-docstring:: textmate_grammar.exceptions.IncompatibleFileType + + .. rubric:: Initialization + + .. autodoc2-docstring:: textmate_grammar.exceptions.IncompatibleFileType.__init__ + +.. py:exception:: FileNotFound(file: str, **kwargs) + :canonical: textmate_grammar.exceptions.FileNotFound + + Bases: :py:obj:`Exception` + + .. autodoc2-docstring:: textmate_grammar.exceptions.FileNotFound + + .. rubric:: Initialization + + .. autodoc2-docstring:: textmate_grammar.exceptions.FileNotFound.__init__ + +.. py:exception:: FileNotParsed(file: str, **kwargs) + :canonical: textmate_grammar.exceptions.FileNotParsed + + Bases: :py:obj:`Exception` + + .. autodoc2-docstring:: textmate_grammar.exceptions.FileNotParsed + + .. rubric:: Initialization + + .. autodoc2-docstring:: textmate_grammar.exceptions.FileNotParsed.__init__ + +.. py:exception:: ImpossibleSpan(**kwargs) + :canonical: textmate_grammar.exceptions.ImpossibleSpan + + Bases: :py:obj:`Exception` + + .. autodoc2-docstring:: textmate_grammar.exceptions.ImpossibleSpan + + .. rubric:: Initialization + + .. autodoc2-docstring:: textmate_grammar.exceptions.ImpossibleSpan.__init__ diff --git a/docs/apidocs/textmate_grammar/textmate_grammar.grammars.markdown.rst b/docs/apidocs/textmate_grammar/textmate_grammar.grammars.markdown.rst new file mode 100644 index 0000000..58fc6d6 --- /dev/null +++ b/docs/apidocs/textmate_grammar/textmate_grammar.grammars.markdown.rst @@ -0,0 +1,39 @@ +:py:mod:`textmate_grammar.grammars.markdown` +============================================ + +.. py:module:: textmate_grammar.grammars.markdown + +.. autodoc2-docstring:: textmate_grammar.grammars.markdown + :allowtitles: + +Package Contents +---------------- + +Data +~~~~ + +.. list-table:: + :class: autosummary longtable + :align: left + + * - :py:obj:`tmLanguageFile ` + - .. autodoc2-docstring:: textmate_grammar.grammars.markdown.tmLanguageFile + :summary: + * - :py:obj:`tmLanguageYAML ` + - .. autodoc2-docstring:: textmate_grammar.grammars.markdown.tmLanguageYAML + :summary: + +API +~~~ + +.. py:data:: tmLanguageFile + :canonical: textmate_grammar.grammars.markdown.tmLanguageFile + :value: None + + .. autodoc2-docstring:: textmate_grammar.grammars.markdown.tmLanguageFile + +.. py:data:: tmLanguageYAML + :canonical: textmate_grammar.grammars.markdown.tmLanguageYAML + :value: None + + .. autodoc2-docstring:: textmate_grammar.grammars.markdown.tmLanguageYAML diff --git a/docs/apidocs/textmate_grammar/textmate_grammar.grammars.matlab.rst b/docs/apidocs/textmate_grammar/textmate_grammar.grammars.matlab.rst new file mode 100644 index 0000000..95bfd56 --- /dev/null +++ b/docs/apidocs/textmate_grammar/textmate_grammar.grammars.matlab.rst @@ -0,0 +1,39 @@ +:py:mod:`textmate_grammar.grammars.matlab` +========================================== + +.. py:module:: textmate_grammar.grammars.matlab + +.. autodoc2-docstring:: textmate_grammar.grammars.matlab + :allowtitles: + +Package Contents +---------------- + +Data +~~~~ + +.. list-table:: + :class: autosummary longtable + :align: left + + * - :py:obj:`tmLanguageFile ` + - .. autodoc2-docstring:: textmate_grammar.grammars.matlab.tmLanguageFile + :summary: + * - :py:obj:`tmLanguageYAML ` + - .. autodoc2-docstring:: textmate_grammar.grammars.matlab.tmLanguageYAML + :summary: + +API +~~~ + +.. py:data:: tmLanguageFile + :canonical: textmate_grammar.grammars.matlab.tmLanguageFile + :value: None + + .. autodoc2-docstring:: textmate_grammar.grammars.matlab.tmLanguageFile + +.. py:data:: tmLanguageYAML + :canonical: textmate_grammar.grammars.matlab.tmLanguageYAML + :value: None + + .. autodoc2-docstring:: textmate_grammar.grammars.matlab.tmLanguageYAML diff --git a/docs/apidocs/textmate_grammar/textmate_grammar.grammars.rst b/docs/apidocs/textmate_grammar/textmate_grammar.grammars.rst new file mode 100644 index 0000000..81e1433 --- /dev/null +++ b/docs/apidocs/textmate_grammar/textmate_grammar.grammars.rst @@ -0,0 +1,17 @@ +:py:mod:`textmate_grammar.grammars` +=================================== + +.. py:module:: textmate_grammar.grammars + +.. autodoc2-docstring:: textmate_grammar.grammars + :allowtitles: + +Subpackages +----------- + +.. toctree:: + :titlesonly: + :maxdepth: 3 + + textmate_grammar.grammars.markdown + textmate_grammar.grammars.matlab diff --git a/docs/apidocs/textmate_grammar/textmate_grammar.handler.rst b/docs/apidocs/textmate_grammar/textmate_grammar.handler.rst new file mode 100644 index 0000000..6543ea1 --- /dev/null +++ b/docs/apidocs/textmate_grammar/textmate_grammar.handler.rst @@ -0,0 +1,102 @@ +:py:mod:`textmate_grammar.handler` +================================== + +.. py:module:: textmate_grammar.handler + +.. autodoc2-docstring:: textmate_grammar.handler + :allowtitles: + +Module Contents +--------------- + +Classes +~~~~~~~ + +.. list-table:: + :class: autosummary longtable + :align: left + + * - :py:obj:`ContentHandler ` + - .. autodoc2-docstring:: textmate_grammar.handler.ContentHandler + :summary: + +Data +~~~~ + +.. list-table:: + :class: autosummary longtable + :align: left + + * - :py:obj:`POS ` + - .. autodoc2-docstring:: textmate_grammar.handler.POS + :summary: + +API +~~~ + +.. py:data:: POS + :canonical: textmate_grammar.handler.POS + :value: None + + .. autodoc2-docstring:: textmate_grammar.handler.POS + +.. py:class:: ContentHandler(source: str) + :canonical: textmate_grammar.handler.ContentHandler + + .. autodoc2-docstring:: textmate_grammar.handler.ContentHandler + + .. rubric:: Initialization + + .. autodoc2-docstring:: textmate_grammar.handler.ContentHandler.__init__ + + .. py:attribute:: notLookForwardEOL + :canonical: textmate_grammar.handler.ContentHandler.notLookForwardEOL + :value: 'compile(...)' + + .. autodoc2-docstring:: textmate_grammar.handler.ContentHandler.notLookForwardEOL + + .. py:method:: from_path(file_path: pathlib.Path) + :canonical: textmate_grammar.handler.ContentHandler.from_path + :classmethod: + + .. autodoc2-docstring:: textmate_grammar.handler.ContentHandler.from_path + + .. py:method:: next(pos: textmate_grammar.handler.POS, step: int = 1) -> textmate_grammar.handler.POS + :canonical: textmate_grammar.handler.ContentHandler.next + + .. autodoc2-docstring:: textmate_grammar.handler.ContentHandler.next + + .. py:method:: prev(pos: textmate_grammar.handler.POS, step: int = 1) -> textmate_grammar.handler.POS + :canonical: textmate_grammar.handler.ContentHandler.prev + + .. autodoc2-docstring:: textmate_grammar.handler.ContentHandler.prev + + .. py:method:: range(start: textmate_grammar.handler.POS, close: textmate_grammar.handler.POS) -> list[textmate_grammar.handler.POS] + :canonical: textmate_grammar.handler.ContentHandler.range + + .. autodoc2-docstring:: textmate_grammar.handler.ContentHandler.range + + .. py:method:: chars(start: textmate_grammar.handler.POS, close: textmate_grammar.handler.POS) -> dict[textmate_grammar.handler.POS, str] + :canonical: textmate_grammar.handler.ContentHandler.chars + + .. autodoc2-docstring:: textmate_grammar.handler.ContentHandler.chars + + .. py:method:: read_pos(start_pos: textmate_grammar.handler.POS, close_pos: textmate_grammar.handler.POS, skip_newline: bool = True) -> str + :canonical: textmate_grammar.handler.ContentHandler.read_pos + + .. autodoc2-docstring:: textmate_grammar.handler.ContentHandler.read_pos + + .. py:method:: read_line(pos: textmate_grammar.handler.POS) -> str + :canonical: textmate_grammar.handler.ContentHandler.read_line + + .. autodoc2-docstring:: textmate_grammar.handler.ContentHandler.read_line + + .. py:method:: read(start_pos: textmate_grammar.handler.POS, length: int = 1, skip_newline: bool = True) -> str + :canonical: textmate_grammar.handler.ContentHandler.read + + .. autodoc2-docstring:: textmate_grammar.handler.ContentHandler.read + + .. py:method:: search(pattern: onigurumacffi._Pattern, starting: textmate_grammar.handler.POS, boundary: textmate_grammar.handler.POS | None = None, greedy: bool = False, **kwargs) -> tuple[onigurumacffi._Match | None, tuple[textmate_grammar.handler.POS, textmate_grammar.handler.POS] | None] + :canonical: textmate_grammar.handler.ContentHandler.search + + .. autodoc2-docstring:: textmate_grammar.handler.ContentHandler.search diff --git a/docs/apidocs/textmate_grammar/textmate_grammar.language.rst b/docs/apidocs/textmate_grammar/textmate_grammar.language.rst new file mode 100644 index 0000000..f644977 --- /dev/null +++ b/docs/apidocs/textmate_grammar/textmate_grammar.language.rst @@ -0,0 +1,76 @@ +:py:mod:`textmate_grammar.language` +=================================== + +.. py:module:: textmate_grammar.language + +.. autodoc2-docstring:: textmate_grammar.language + :allowtitles: + +Module Contents +--------------- + +Classes +~~~~~~~ + +.. list-table:: + :class: autosummary longtable + :align: left + + * - :py:obj:`DummyParser ` + - .. autodoc2-docstring:: textmate_grammar.language.DummyParser + :summary: + * - :py:obj:`LanguageParser ` + - .. autodoc2-docstring:: textmate_grammar.language.LanguageParser + :summary: + +Data +~~~~ + +.. list-table:: + :class: autosummary longtable + :align: left + + * - :py:obj:`LANGUAGE_PARSERS ` + - .. autodoc2-docstring:: textmate_grammar.language.LANGUAGE_PARSERS + :summary: + +API +~~~ + +.. py:data:: LANGUAGE_PARSERS + :canonical: textmate_grammar.language.LANGUAGE_PARSERS + :value: None + + .. autodoc2-docstring:: textmate_grammar.language.LANGUAGE_PARSERS + +.. py:class:: DummyParser() + :canonical: textmate_grammar.language.DummyParser + + Bases: :py:obj:`textmate_grammar.parser.GrammarParser` + + .. autodoc2-docstring:: textmate_grammar.language.DummyParser + + .. rubric:: Initialization + + .. autodoc2-docstring:: textmate_grammar.language.DummyParser.__init__ + +.. py:class:: LanguageParser(grammar: dict, **kwargs) + :canonical: textmate_grammar.language.LanguageParser + + Bases: :py:obj:`textmate_grammar.parser.PatternsParser` + + .. autodoc2-docstring:: textmate_grammar.language.LanguageParser + + .. rubric:: Initialization + + .. autodoc2-docstring:: textmate_grammar.language.LanguageParser.__init__ + + .. py:method:: parse_file(filePath: str | pathlib.Path, **kwargs) -> textmate_grammar.elements.Capture | textmate_grammar.elements.ContentElement | None + :canonical: textmate_grammar.language.LanguageParser.parse_file + + .. autodoc2-docstring:: textmate_grammar.language.LanguageParser.parse_file + + .. py:method:: parse_string(input: str, **kwargs) + :canonical: textmate_grammar.language.LanguageParser.parse_string + + .. autodoc2-docstring:: textmate_grammar.language.LanguageParser.parse_string diff --git a/docs/apidocs/textmate_grammar/textmate_grammar.logger.rst b/docs/apidocs/textmate_grammar/textmate_grammar.logger.rst new file mode 100644 index 0000000..d2b25d4 --- /dev/null +++ b/docs/apidocs/textmate_grammar/textmate_grammar.logger.rst @@ -0,0 +1,183 @@ +:py:mod:`textmate_grammar.logger` +================================= + +.. py:module:: textmate_grammar.logger + +.. autodoc2-docstring:: textmate_grammar.logger + :allowtitles: + +Module Contents +--------------- + +Classes +~~~~~~~ + +.. list-table:: + :class: autosummary longtable + :align: left + + * - :py:obj:`LogFormatter ` + - .. autodoc2-docstring:: textmate_grammar.logger.LogFormatter + :summary: + * - :py:obj:`Logger ` + - .. autodoc2-docstring:: textmate_grammar.logger.Logger + :summary: + +Functions +~~~~~~~~~ + +.. list-table:: + :class: autosummary longtable + :align: left + + * - :py:obj:`track_depth ` + - .. autodoc2-docstring:: textmate_grammar.logger.track_depth + :summary: + +Data +~~~~ + +.. list-table:: + :class: autosummary longtable + :align: left + + * - :py:obj:`MAX_LENGTH ` + - .. autodoc2-docstring:: textmate_grammar.logger.MAX_LENGTH + :summary: + * - :py:obj:`LOGGER ` + - .. autodoc2-docstring:: textmate_grammar.logger.LOGGER + :summary: + +API +~~~ + +.. py:data:: MAX_LENGTH + :canonical: textmate_grammar.logger.MAX_LENGTH + :value: 79 + + .. autodoc2-docstring:: textmate_grammar.logger.MAX_LENGTH + +.. py:function:: track_depth(func) + :canonical: textmate_grammar.logger.track_depth + + .. autodoc2-docstring:: textmate_grammar.logger.track_depth + +.. py:class:: LogFormatter(fmt=None, datefmt=None, style='%', validate=True, *, defaults=None) + :canonical: textmate_grammar.logger.LogFormatter + + Bases: :py:obj:`logging.Formatter` + + .. autodoc2-docstring:: textmate_grammar.logger.LogFormatter + + .. rubric:: Initialization + + .. autodoc2-docstring:: textmate_grammar.logger.LogFormatter.__init__ + + .. py:attribute:: green + :canonical: textmate_grammar.logger.LogFormatter.green + :value: '\x1b[32;32m' + + .. autodoc2-docstring:: textmate_grammar.logger.LogFormatter.green + + .. py:attribute:: grey + :canonical: textmate_grammar.logger.LogFormatter.grey + :value: '\x1b[38;20m' + + .. autodoc2-docstring:: textmate_grammar.logger.LogFormatter.grey + + .. py:attribute:: yellow + :canonical: textmate_grammar.logger.LogFormatter.yellow + :value: '\x1b[33;20m' + + .. autodoc2-docstring:: textmate_grammar.logger.LogFormatter.yellow + + .. py:attribute:: red + :canonical: textmate_grammar.logger.LogFormatter.red + :value: '\x1b[31;20m' + + .. autodoc2-docstring:: textmate_grammar.logger.LogFormatter.red + + .. py:attribute:: bold_red + :canonical: textmate_grammar.logger.LogFormatter.bold_red + :value: '\x1b[31;1m' + + .. autodoc2-docstring:: textmate_grammar.logger.LogFormatter.bold_red + + .. py:attribute:: reset + :canonical: textmate_grammar.logger.LogFormatter.reset + :value: '\x1b[0m' + + .. autodoc2-docstring:: textmate_grammar.logger.LogFormatter.reset + + .. py:attribute:: format_string + :canonical: textmate_grammar.logger.LogFormatter.format_string + :value: '%(name)s:%(message)s' + + .. autodoc2-docstring:: textmate_grammar.logger.LogFormatter.format_string + + .. py:attribute:: FORMATS + :canonical: textmate_grammar.logger.LogFormatter.FORMATS + :value: None + + .. autodoc2-docstring:: textmate_grammar.logger.LogFormatter.FORMATS + + .. py:method:: format(record) + :canonical: textmate_grammar.logger.LogFormatter.format + + .. autodoc2-docstring:: textmate_grammar.logger.LogFormatter.format + +.. py:class:: Logger(**kwargs) + :canonical: textmate_grammar.logger.Logger + + .. autodoc2-docstring:: textmate_grammar.logger.Logger + + .. rubric:: Initialization + + .. autodoc2-docstring:: textmate_grammar.logger.Logger.__init__ + + .. py:attribute:: long_msg_div + :canonical: textmate_grammar.logger.Logger.long_msg_div + :value: '\x1b[1;32m ... \x1b[0m' + + .. autodoc2-docstring:: textmate_grammar.logger.Logger.long_msg_div + + .. py:method:: configure(parser: textmate_grammar.parser.GrammarParser, height: int, width: int, **kwargs) -> None + :canonical: textmate_grammar.logger.Logger.configure + + .. autodoc2-docstring:: textmate_grammar.logger.Logger.configure + + .. py:method:: format_message(message: str, parser: typing.Optional[textmate_grammar.parser.GrammarParser] = None, position: tuple[int, int] | None = None, depth: int = 0) -> str + :canonical: textmate_grammar.logger.Logger.format_message + + .. autodoc2-docstring:: textmate_grammar.logger.Logger.format_message + + .. py:method:: debug(*args, **kwargs) -> None + :canonical: textmate_grammar.logger.Logger.debug + + .. autodoc2-docstring:: textmate_grammar.logger.Logger.debug + + .. py:method:: info(*args, **kwargs) -> None + :canonical: textmate_grammar.logger.Logger.info + + .. autodoc2-docstring:: textmate_grammar.logger.Logger.info + + .. py:method:: warning(*args, **kwargs) -> None + :canonical: textmate_grammar.logger.Logger.warning + + .. autodoc2-docstring:: textmate_grammar.logger.Logger.warning + + .. py:method:: error(*args, **kwargs) -> None + :canonical: textmate_grammar.logger.Logger.error + + .. autodoc2-docstring:: textmate_grammar.logger.Logger.error + + .. py:method:: critical(*args, **kwargs) -> None + :canonical: textmate_grammar.logger.Logger.critical + + .. autodoc2-docstring:: textmate_grammar.logger.Logger.critical + +.. py:data:: LOGGER + :canonical: textmate_grammar.logger.LOGGER + :value: 'Logger(...)' + + .. autodoc2-docstring:: textmate_grammar.logger.LOGGER diff --git a/docs/apidocs/textmate_grammar/textmate_grammar.parser.rst b/docs/apidocs/textmate_grammar/textmate_grammar.parser.rst new file mode 100644 index 0000000..cd9e265 --- /dev/null +++ b/docs/apidocs/textmate_grammar/textmate_grammar.parser.rst @@ -0,0 +1,128 @@ +:py:mod:`textmate_grammar.parser` +================================= + +.. py:module:: textmate_grammar.parser + +.. autodoc2-docstring:: textmate_grammar.parser + :allowtitles: + +Module Contents +--------------- + +Classes +~~~~~~~ + +.. list-table:: + :class: autosummary longtable + :align: left + + * - :py:obj:`GrammarParser ` + - .. autodoc2-docstring:: textmate_grammar.parser.GrammarParser + :summary: + * - :py:obj:`TokenParser ` + - .. autodoc2-docstring:: textmate_grammar.parser.TokenParser + :summary: + * - :py:obj:`MatchParser ` + - .. autodoc2-docstring:: textmate_grammar.parser.MatchParser + :summary: + * - :py:obj:`ParserHasPatterns ` + - + * - :py:obj:`PatternsParser ` + - .. autodoc2-docstring:: textmate_grammar.parser.PatternsParser + :summary: + * - :py:obj:`BeginEndParser ` + - .. autodoc2-docstring:: textmate_grammar.parser.BeginEndParser + :summary: + * - :py:obj:`BeginWhileParser ` + - .. autodoc2-docstring:: textmate_grammar.parser.BeginWhileParser + :summary: + +API +~~~ + +.. py:class:: GrammarParser(grammar: dict, language: textmate_grammar.language.LanguageParser | None = None, key: str = '', is_capture: bool = False, **kwargs) + :canonical: textmate_grammar.parser.GrammarParser + + Bases: :py:obj:`abc.ABC` + + .. autodoc2-docstring:: textmate_grammar.parser.GrammarParser + + .. rubric:: Initialization + + .. autodoc2-docstring:: textmate_grammar.parser.GrammarParser.__init__ + + .. py:method:: initialize(grammar: dict, **kwargs) + :canonical: textmate_grammar.parser.GrammarParser.initialize + :staticmethod: + + .. autodoc2-docstring:: textmate_grammar.parser.GrammarParser.initialize + + .. py:method:: parse(handler: textmate_grammar.handler.ContentHandler, starting: textmate_grammar.handler.POS = (0, 0), boundary: textmate_grammar.handler.POS | None = None, **kwargs) -> tuple[bool, list[textmate_grammar.elements.Capture | textmate_grammar.elements.ContentElement], tuple[int, int] | None] + :canonical: textmate_grammar.parser.GrammarParser.parse + + .. autodoc2-docstring:: textmate_grammar.parser.GrammarParser.parse + + .. py:method:: match_and_capture(handler: textmate_grammar.handler.ContentHandler, pattern: textmate_grammar.handler.Pattern, starting: textmate_grammar.handler.POS, boundary: textmate_grammar.handler.POS, parsers: dict[int, textmate_grammar.parser.GrammarParser] | None = None, parent_capture: textmate_grammar.elements.Capture | None = None, **kwargs) -> tuple[tuple[textmate_grammar.handler.POS, textmate_grammar.handler.POS] | None, str, list[textmate_grammar.elements.Capture | textmate_grammar.elements.ContentElement]] + :canonical: textmate_grammar.parser.GrammarParser.match_and_capture + + .. autodoc2-docstring:: textmate_grammar.parser.GrammarParser.match_and_capture + +.. py:class:: TokenParser(grammar: dict, **kwargs) + :canonical: textmate_grammar.parser.TokenParser + + Bases: :py:obj:`textmate_grammar.parser.GrammarParser` + + .. autodoc2-docstring:: textmate_grammar.parser.TokenParser + + .. rubric:: Initialization + + .. autodoc2-docstring:: textmate_grammar.parser.TokenParser.__init__ + +.. py:class:: MatchParser(grammar: dict, **kwargs) + :canonical: textmate_grammar.parser.MatchParser + + Bases: :py:obj:`textmate_grammar.parser.GrammarParser` + + .. autodoc2-docstring:: textmate_grammar.parser.MatchParser + + .. rubric:: Initialization + + .. autodoc2-docstring:: textmate_grammar.parser.MatchParser.__init__ + +.. py:class:: ParserHasPatterns(grammar: dict, **kwargs) + :canonical: textmate_grammar.parser.ParserHasPatterns + + Bases: :py:obj:`textmate_grammar.parser.GrammarParser`, :py:obj:`abc.ABC` + +.. py:class:: PatternsParser(grammar: dict, **kwargs) + :canonical: textmate_grammar.parser.PatternsParser + + Bases: :py:obj:`textmate_grammar.parser.ParserHasPatterns` + + .. autodoc2-docstring:: textmate_grammar.parser.PatternsParser + + .. rubric:: Initialization + + .. autodoc2-docstring:: textmate_grammar.parser.PatternsParser.__init__ + +.. py:class:: BeginEndParser(grammar: dict, **kwargs) + :canonical: textmate_grammar.parser.BeginEndParser + + Bases: :py:obj:`textmate_grammar.parser.ParserHasPatterns` + + .. autodoc2-docstring:: textmate_grammar.parser.BeginEndParser + + .. rubric:: Initialization + + .. autodoc2-docstring:: textmate_grammar.parser.BeginEndParser.__init__ + +.. py:class:: BeginWhileParser(grammar: dict, **kwargs) + :canonical: textmate_grammar.parser.BeginWhileParser + + Bases: :py:obj:`textmate_grammar.parser.PatternsParser` + + .. autodoc2-docstring:: textmate_grammar.parser.BeginWhileParser + + .. rubric:: Initialization + + .. autodoc2-docstring:: textmate_grammar.parser.BeginWhileParser.__init__ diff --git a/docs/apidocs/textmate_grammar/textmate_grammar.rst b/docs/apidocs/textmate_grammar/textmate_grammar.rst new file mode 100644 index 0000000..76025e0 --- /dev/null +++ b/docs/apidocs/textmate_grammar/textmate_grammar.rst @@ -0,0 +1,31 @@ +:py:mod:`textmate_grammar` +========================== + +.. py:module:: textmate_grammar + +.. autodoc2-docstring:: textmate_grammar + :allowtitles: + +Subpackages +----------- + +.. toctree:: + :titlesonly: + :maxdepth: 3 + + textmate_grammar.grammars + +Submodules +---------- + +.. toctree:: + :titlesonly: + :maxdepth: 1 + + textmate_grammar.logger + textmate_grammar.exceptions + textmate_grammar.elements + textmate_grammar.handler + textmate_grammar.language + textmate_grammar.cache + textmate_grammar.parser diff --git a/docs/conf.py b/docs/conf.py new file mode 100644 index 0000000..3ed45f4 --- /dev/null +++ b/docs/conf.py @@ -0,0 +1,56 @@ +from datetime import date + +from textmate_grammar import __version__ + +# -- Project information ----------------------------------------------------- + +project = "Texmate Grammar Python" +version = __version__ +copyright = f"{date.today().year}, Mark Shui Hu" +author = "Mark Shui Hu" + +# -- General configuration --------------------------------------------------- + +extensions = [ + "myst_parser", + "autodoc2", + "sphinx.ext.intersphinx", + "sphinx.ext.viewcode", + "sphinx.ext.todo", +] +exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"] +intersphinx_mapping = { + "python": ("https://docs.python.org/3/", None), +} +myst_enable_extensions = ["fieldlist", "deflist"] + +# -- HTML output ------------------------------------------------- + +html_theme = "furo" +html_title = "Texmate Grammar Python" +html_theme_options = { + "top_of_page_button": "edit", + "source_repository": "https://github.com/watermarkhu/textmate-grammar-python/", + "source_branch": "main", + "source_directory": "docs/", +} +html_theme_options = { + "footer_icons": [ + { + "name": "GitHub", + "url": "https://github.com/watermarkhu/textmate-grammar-python", + "html": """ + + + + """, + "class": "", + }, + ], +} + +# --- Autodoc configuration ------ + +autodoc2_packages = ["../src/textmate_grammar"] +autodoc2_hidden_objects = ["dunder", "private", "inherited"] + diff --git a/docs/index.md b/docs/index.md new file mode 100644 index 0000000..f363f1e --- /dev/null +++ b/docs/index.md @@ -0,0 +1,25 @@ +[![PyPI - Version](https://img.shields.io/pypi/v/textmate-grammar-python.svg)](https://pypi.python.org/pypi/textmate-grammar-python) +[![PyPI - License](https://img.shields.io/pypi/l/textmate-grammar-python.svg)](https://github.com/watermarkhu/textmate-grammar-python/tree/main?tab=MIT-1-ov-file) +[![Ruff](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/astral-sh/ruff/main/assets/badge/v2.json)](https://github.com/astral-sh/ruff) +[![Checked with mypy](https://img.shields.io/badge/mypy-checked-blue)](http://mypy-lang.org/) +[![pre-commit](https://img.shields.io/badge/pre--commit-enabled-brightgreen?logo=pre-commit)](https://github.com/pre-commit/pre-commit) +[![Python versions](https://img.shields.io/pypi/pyversions/textmate-grammar-python.svg)](https://pypi.python.org/pypi/textmate-grammar-python) +[![CI/CD](https://github.com/watermarkhu/textmate-grammar-python/actions/workflows/ci.yml/badge.svg?branch=main)](https://github.com/watermarkhu/textmate-grammar-python/blob/main/.github/workflows/ci.yml) + + +# textmate-grammar-python + +A lexer and tokenizer for grammar files as defined by TextMate and used in VSCode, implemented in Python. TextMate grammars use the oniguruma dialect (https://github.com/kkos/oniguruma). Supports loading grammar files from JSON, PLIST, or YAML format. + +## Sources +- [Textmate guide](https://www.apeth.com/nonblog/stories/textmatebundle.html) +- [VSCode Syntax Highlighting guide](https://code.visualstudio.com/api/language-extensions/syntax-highlight-guide) +- [vscode-textmate](https://github.com/microsoft/vscode-textmate) +- [Macromates texmate](https://macromates.com/textmate/manual/) + + +```{toctree} +:maxdepth: 2 + +apidocs/index +``` diff --git a/docs/make.bat b/docs/make.bat new file mode 100644 index 0000000..32bb245 --- /dev/null +++ b/docs/make.bat @@ -0,0 +1,35 @@ +@ECHO OFF + +pushd %~dp0 + +REM Command file for Sphinx documentation + +if "%SPHINXBUILD%" == "" ( + set SPHINXBUILD=sphinx-build +) +set SOURCEDIR=. +set BUILDDIR=_build + +%SPHINXBUILD% >NUL 2>NUL +if errorlevel 9009 ( + echo. + echo.The 'sphinx-build' command was not found. Make sure you have Sphinx + echo.installed, then set the SPHINXBUILD environment variable to point + echo.to the full path of the 'sphinx-build' executable. Alternatively you + echo.may add the Sphinx directory to PATH. + echo. + echo.If you don't have Sphinx installed, grab it from + echo.https://www.sphinx-doc.org/ + exit /b 1 +) + +if "%1" == "" goto help + +%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% +goto end + +:help +%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% + +:end +popd diff --git a/example.py b/example.py index 4204619..ca03a37 100644 --- a/example.py +++ b/example.py @@ -1,13 +1,21 @@ import logging from pathlib import Path -from textmate_grammar.language import LanguageParser + +from textmate_grammar.cache import init_cache from textmate_grammar.grammars import matlab +from textmate_grammar.language import LanguageParser +# Initialize shelved cache +init_cache("shelve") + +# Initialize language parser logging.getLogger().setLevel(logging.DEBUG) logging.getLogger("textmate_grammar").setLevel(logging.INFO) parser = LanguageParser(matlab.GRAMMAR) +# Parse file filePath = Path(__file__).parent / "syntaxes" / "matlab" / "AnEnum.m" - element = parser.parse_file(filePath) + +# Print element element.print() diff --git a/pyproject.toml b/pyproject.toml index d02c73c..1beeac7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,8 +1,11 @@ -##################################### poetry ##################################### +[build-system] +requires = ["poetry-core"] +build-backend = "poetry.core.masonry.api" + [tool.poetry] name = "textmate-grammar-python" version = "0.2.0" -description = "An interpreter for grammar files as defined by TextMate and used in VSCode, implemented in Python. TextMate grammars use the oniguruma dialect (https://github.com/kkos/oniguruma). Supports loading grammar files from JSON, PLIST, or YAML format." +description = "A lexer and tokenizer for grammar files as defined by TextMate and used in VSCode, implemented in Python." authors = ["Mark Shui Hu "] license = "MIT" readme = "README.md" @@ -23,12 +26,18 @@ tox = "^4.11.4" [tool.poetry.group.dev.dependencies] pre-commit = "^3.6.0" mypy = "^1.8.0" +ruff = "^0.3.0" types-pyyaml = "^6.0.12.12" +poetry-bumpversion = "^0.3.2" +[tool.poetry.group.doc.dependencies] +sphinx = "^7.2.6" +sphinx-autodoc2 = "^0.5.0" +myst-parser = "^2.0.0" +furo = "^2024.1.29" +[tool.poetry_bumpversion.file."src/textmate_grammar/__init__.py"] -##################################### ruff ##################################### -ruff = "^0.2.1" [tool.ruff] include = ["pyproject.toml", "src/textmate_grammar/**/*.py"] exclude = ["src/textmate_grammar/grammars/"] @@ -47,12 +56,34 @@ line-ending = "auto" docstring-code-format = true docstring-code-line-length = "dynamic" -##################################### mypy ##################################### [tool.mypy] ignore_missing_imports = true -exclude = ["test/*", "example.py"] +exclude = ["test/*", "docs/*", "example.py"] -##################################### build ##################################### -[build-system] -requires = ["poetry-core"] -build-backend = "poetry.core.masonry.api" +[tool.tox] +legacy_tox_ini = """ + [tox] + isolated_build = true + + [testenv] + skip_install = true + allowlist_externals = poetry + commands = + poetry run pytest test/unit + + [testenv:mypy] + skip_install = true + allowlist_externals = mypy + commands = + mypy . + + [testenv:regression] + skip_install = true + allowlist_externals = poetry, bash, sudo + platform = linux + change_dir = {tox_root}/test/regression + commands_pre = + bash install.sh + commands = + poetry run pytest . +""" diff --git a/src/textmate_grammar/__init__.py b/src/textmate_grammar/__init__.py index e69de29..d3ec452 100644 --- a/src/textmate_grammar/__init__.py +++ b/src/textmate_grammar/__init__.py @@ -0,0 +1 @@ +__version__ = "0.2.0" diff --git a/src/textmate_grammar/cache.py b/src/textmate_grammar/cache.py new file mode 100644 index 0000000..16fd96e --- /dev/null +++ b/src/textmate_grammar/cache.py @@ -0,0 +1,160 @@ +import atexit +from pathlib import Path +from pickle import UnpicklingError +from typing import Protocol + +from .elements import ContentElement + +CACHE_DIR = (Path() / ".textmate_cache").resolve() +CACHE_DIR.mkdir(parents=True, exist_ok=True) + + +def _path_to_key(path: Path) -> str: + return str(path.resolve()) + + +class TextmateCache(Protocol): + """Interface for a Textmate cache.""" + + def cache_valid(self, filepath: Path) -> bool: + """ + Check if the cache for the given filepath is valid. + + :param filepath: The path to the file. + :return: True if the cache is valid, False otherwise. + """ + ... + + def load(self, filepath: Path) -> ContentElement: + """ + Load the content from the specified filepath. + + :param filepath: The path to the file to load. + :return: The loaded content element. + """ + ... + + def save(self, filePath: Path, element: ContentElement) -> None: + """ + Save the given content element to the specified file path. + + :param filePath: The file path where the content element should be saved. + :param element: The content element to be saved. + :return: None + """ + ... + + +class SimpleCache(TextmateCache): + """A simple cache implementation for storing content elements.""" + + def __init__(self) -> None: + """Initialize the SimpleCache.""" + self._element_cache: dict[str, ContentElement] = dict() + self._element_timestamp: dict[str, float] = dict() + + def cache_valid(self, filepath: Path) -> bool: + """Check if the cache is valid for the given filepath. + + :param filepath: The filepath to check. + :return: True if the cache is valid, False otherwise. + """ + key = _path_to_key(filepath) + if key not in self._element_cache: + return False + timestamp = filepath.resolve().stat().st_mtime + return timestamp == self._element_timestamp[key] + + def load(self, filepath: Path) -> ContentElement: + """Load the content element from the cache for the given filepath. + + :param filepath: The filepath to load the content element from. + :return: The loaded content element. + """ + key = _path_to_key(filepath) + return self._element_cache[key] + + def save(self, filepath: Path, element: ContentElement) -> None: + """Save the content element to the cache for the given filepath. + + :param filepath: The filepath to save the content element to. + :param element: The content element to save. + :return: None + """ + key = _path_to_key(filepath) + self._element_cache[key] = element + self._element_timestamp[key] = filepath.resolve().stat().st_mtime + + +class ShelveCache(TextmateCache): + """A cache implementation using the shelve module.""" + + def __init__(self) -> None: + """Initialize the ShelveCache.""" + import shelve + + database_path = CACHE_DIR / "textmate.db" + self._database = shelve.open(str(database_path)) + + def exit(): + self._database.sync() + self._database.close() + + atexit.register(exit) + + def cache_valid(self, filepath: Path) -> bool: + """Check if the cache is valid for the given filepath. + + :param filepath: The filepath to check. + :return: True if the cache is valid, False otherwise. + """ + key = _path_to_key(filepath) + if key not in self._database: + return False + timestamp = filepath.resolve().stat().st_mtime + try: + valid = timestamp == self._database[key][0] + except UnpicklingError: + valid = False + else: + valid = False + return valid + + def load(self, filepath: Path) -> ContentElement: + """Load the content element from the cache for the given filepath. + + :param filepath: The path for the cached content element. + :return: The loaded content element. + """ + key = _path_to_key(filepath) + return self._database[key][1] + + def save(self, filepath: Path, element: ContentElement) -> None: + """Save the content element to the cache for the given filepath. + + :param filepath: The filepath to save the content element to. + :param element: The content element to save. + """ + element._dispatch(nested=True) + key = _path_to_key(filepath) + timestamp = filepath.resolve().stat().st_mtime + self._database[key] = (timestamp, element) + + +CACHE: "TextmateCache" = SimpleCache() + + +def init_cache(type: str = "simple") -> "TextmateCache": + """ + Initialize the cache based on the given type. + + :param type: The type of cache to initialize. Defaults to "simple". + :return: The initialized cache object. + """ + global CACHE + match type: + case "shelve": + CACHE = ShelveCache() + case "simple": + CACHE = SimpleCache() + return CACHE diff --git a/src/textmate_grammar/elements.py b/src/textmate_grammar/elements.py index 7007642..c4bd5f6 100644 --- a/src/textmate_grammar/elements.py +++ b/src/textmate_grammar/elements.py @@ -33,7 +33,20 @@ def __init__( boundary: tuple[int, int], key: str = "", **kwargs, - ) -> None: + ): + """ + Initialize a new instance of the Element class. + + :param handler: The content handler for the element. + :param pattern: The pattern used for matching. + :param matching: The match object. + :param parsers: A dictionary of grammar parsers. + :param starting: The starting position of the element. + :param boundary: The boundary position of the element. + :param key: The key for the element. Defaults to "". + :param **kwargs: Additional keyword arguments. + :returns: None + """ self.handler = handler self.pattern = pattern self.matching = matching @@ -57,7 +70,13 @@ def __repr__(self) -> str: return f"@capture<{self.key}>" def dispatch(self) -> list[Capture | ContentElement]: - """Dispatches the remaining parse of the capture group.""" + """Dispatches the remaining parse of the capture group. + + This method iterates over the defined parsers for the capture group and dispatches the remaining parse + based on the captured elements. It returns a list of captured elements or captures. + + :return: A list of Capture or ContentElement objects representing the parsed elements. + """ elements = [] for group_id, parser in self.parsers.items(): if group_id > self.pattern.number_of_captures(): @@ -100,22 +119,29 @@ def dispatch(self) -> list[Capture | ContentElement]: return elements -def dispatch_list( +def _dispatch_list( pending_elements: list[Capture | ContentElement], parent: ContentElement | None = None ) -> list[ContentElement]: """Dispatches all captured parsers in the list.""" elements = [] for item in pending_elements: if isinstance(item, Capture): - captured_elements: list[ContentElement] = dispatch_list(item.dispatch()) + captured_elements: list[ContentElement] = _dispatch_list(item.dispatch()) elements.extend(captured_elements) elif item != parent: elements.append(item) return elements +def _str_to_list(input: str | list[str]) -> list[str]: + if isinstance(input, str): + return [input] if input else [] + else: + return input + + class ContentElement: - """The base grammar element object.""" + """The parsed grammar element.""" def __init__( self, @@ -125,15 +151,23 @@ def __init__( characters: dict[POS, str], children: list[Capture | ContentElement] | None = None, ) -> None: + """ + Initialize a new instance of the Element class. + + :param token: The token associated with the element. + :param grammar: The grammar associated with the element. + :param content: The content associated with the element. + :param characters: The characters associated with the element. + :param children: The children associated with the element. Defaults to None. + """ if children is None: children = [] self.token = token self.grammar = grammar self.content = content self.characters = characters - self._children_pending = children - self._children_dispached: list[ContentElement] = [] - self._dispatched_children: bool = False + self._children_captures = children + self._dispatched: bool = False @property def _subelements(self) -> list[ContentElement]: @@ -141,52 +175,72 @@ def _subelements(self) -> list[ContentElement]: @property def children(self) -> list[ContentElement]: - "Children elements" - if self._children_pending: - if not self._dispatched_children: - self._children_dispached = dispatch_list(self._children_pending, parent=self) - self._dispatched_children = True - return self._children_dispached - else: - return [] + """ + Returns a list of children elements. + + If the elements have not been dispatched yet, this method will dispatch them before returning. + + :return: A list of ContentElement objects representing the children elements. + """ + if not self._dispatched: + self._dispatch() + return self._children + + def _dispatch(self, nested: bool = False): + """ + Dispatches the content element and its children. + + :param nested: Indicates whether the dispatch is nested within another dispatch. + :type nested: bool + :return: None + """ + self._dispatched = True + self._children: list[ContentElement] = _dispatch_list(self._children_captures, parent=self) + self._children_captures = [] + if nested: + for child in self._children: + child._dispatch(True) def __eq__(self, other): if not isinstance(other, ContentElement): return False return bool(self.grammar == other.grammar and self.characters == other.characters) - def to_dict(self, verbosity: int = -1, all_content: bool = False, **kwargs) -> dict: - "Converts the object to dictionary." - out_dict = {"token": self.token} - if all_content or not self.children: - out_dict["content"] = self.content - if self.children: - out_dict["children"] = ( - self._list_property_to_dict( - "children", verbosity=verbosity - 1, all_content=all_content - ) - if verbosity - else self.children - ) - return out_dict - def find( self, tokens: str | list[str], + start_tokens: str | list[str] = "", + hide_tokens: str | list[str] = "", stop_tokens: str | list[str] = "", - verbosity: int = -1, - stack: list[str] | None = None, + depth: int = -1, attribute: str = "_subelements", + stack: list[str] | None = None, ) -> Generator[tuple[ContentElement, list[str]], None, None]: - """Find the next subelement that match the input token(s). + """ + Find content elements based on the given criteria. The find method will return a generator that globs though the element-tree, searching for the next subelement that matches the given token. + + :param tokens: The tokens to search for. Can be a single token or a list of tokens. + :param start_tokens: The tokens that mark the start of the search. Can be a single token or a list of tokens. + :param hide_tokens: The tokens to hide from the search results. Can be a single token or a list of tokens. + :param stop_tokens: The tokens that mark the end of the search. Can be a single token or a list of tokens. + :param depth: The maximum depth to search. Defaults to -1 (unlimited depth). + :param attribute: The attribute name to access the subelements. Defaults to "_subelements". + :param stack: The stack of tokens encountered during the search. Defaults to None. + + :yield: A tuple containing the found content element and the stack of tokens encountered. + + :raises ValueError: If the input tokens and stop_tokens are not disjoint. + + :return: None if no matching content elements are found. """ - if isinstance(tokens, str): - tokens = [tokens] - if isinstance(stop_tokens, str): - stop_tokens = [stop_tokens] if stop_tokens else [] + tokens = _str_to_list(tokens) + start_tokens = _str_to_list(start_tokens) + hide_tokens = _str_to_list(hide_tokens) + stop_tokens = _str_to_list(stop_tokens) + if not set(tokens).isdisjoint(set(stop_tokens)): raise ValueError("Input tokens and stop_tokens must be disjoint") @@ -194,8 +248,10 @@ def find( stack = [] stack += [self.token] - if verbosity: - verbosity -= 1 + start_found = not start_tokens + + if depth: + depth -= 1 children: list[ContentElement] = getattr(self, attribute, self._subelements) for child in children: if stop_tokens and ( @@ -204,11 +260,24 @@ def find( ): return None - if child.token in tokens or tokens == ["*"]: + if not start_found and child.token in start_tokens: + start_found = True + start_tokens = [] + + if ( + start_found + and (child.token in tokens or tokens == ["*"]) + and child.token not in hide_tokens + ): yield child, [e for e in stack] - if verbosity: + if depth: nested_generator = child.find( - tokens, verbosity=verbosity - 1, stack=[e for e in stack] + tokens, + start_tokens=start_tokens, + hide_tokens=hide_tokens, + stop_tokens=stop_tokens, + depth=depth - 1, + stack=[e for e in stack], ) yield from nested_generator return None @@ -216,17 +285,64 @@ def find( def findall( self, tokens: str | list[str], + start_tokens: str | list[str] = "", + hide_tokens: str | list[str] = "", stop_tokens: str | list[str] = "", - verbosity: int = -1, + depth: int = -1, attribute: str = "_subelements", ) -> list[tuple[ContentElement, list[str]]]: - """Returns subelements that match the input token(s).""" + """ + Find all occurrences of the specified tokens within the content element. + + :param tokens: The tokens to search for. + :param start_tokens: The tokens that must appear before the found tokens. Defaults to "". + :param hide_tokens: The tokens that should be hidden from the search. Defaults to "". + :param stop_tokens: The tokens that, if found, should stop the search. Defaults to "". + :param depth: The maximum depth to search. Defaults to -1 (unlimited depth). + :param attribute: The attribute to search within. Defaults to "_subelements". + + :return: A list of tuples containing the content element and the found tokens. + """ return list( - self.find(tokens, stop_tokens=stop_tokens, verbosity=verbosity, attribute=attribute) + self.find( + tokens, + start_tokens=start_tokens, + hide_tokens=hide_tokens, + stop_tokens=stop_tokens, + depth=depth, + attribute=attribute, + ) ) + def to_dict(self, depth: int = -1, all_content: bool = False, **kwargs) -> dict: + """ + Converts the object to a dictionary. + + :param depth: The depth of the conversion. Defaults to -1. + :param all_content: Whether to include all content or only the top-level content. Defaults to False. + + :return: The converted dictionary representation of the object. + """ + out_dict = {"token": self.token} + if all_content or not self.children: + out_dict["content"] = self.content + if self.children: + out_dict["children"] = ( + self._list_property_to_dict("children", depth=depth - 1, all_content=all_content) + if depth + else self.children + ) + return out_dict + def flatten(self) -> list[tuple[tuple[int, int], str, list[str]]]: - """Converts the object to a flattened array of tokens per index.""" + """ + Converts the object to a flattened array of tokens per index, similarly to vscode-textmate. + + :return: A list of tuples representing the flattened tokens. Each tuple contains: + - A tuple representing the starting and ending index of the token. + - The content of the token. + - A list of keys associated with the token. + """ token_dict = self._token_by_index(defaultdict(list)) tokens = [] for (_, key), group in groupby(sorted(token_dict.items()), lambda x: (x[0][0], x[1])): @@ -242,11 +358,20 @@ def flatten(self) -> list[tuple[tuple[int, int], str, list[str]]]: def print( self, flatten: bool = False, - verbosity: int = -1, + depth: int = -1, all_content: bool = False, **kwargs, ) -> None: - """Prints the current object recursively by converting to dictionary.""" + """ + Prints the current object recursively by converting it to a dictionary or a flattened array. + + :param flatten: If True, flattens the object before printing. Defaults to False. + :param depth: The maximum depth to print. Defaults to -1 (unlimited depth). + :param all_content: If True, includes all content in the printout. Defaults to False. + :param **kwargs: Additional keyword arguments to be passed to the pprint function. + + :return: None + """ if flatten: pprint( self.flatten(**kwargs), @@ -256,14 +381,23 @@ def print( ) else: pprint( - self.to_dict(verbosity=verbosity, all_content=all_content, **kwargs), + self.to_dict(depth=depth, all_content=all_content, **kwargs), sort_dicts=False, width=kwargs.pop("width", 150), **kwargs, ) def _token_by_index(self, token_dict: TOKEN_DICT | None = None) -> TOKEN_DICT: - """Recursively tokenize every index between start and close.""" + """Recursively tokenize every index between start and close. + + This method recursively tokenizes every index between the start and close positions of the element. + It populates a dictionary, `token_dict`, with the tokens corresponding to each index. + + :param token_dict: A dictionary to store the tokens. If None, a new dictionary is created. + :type token_dict: dict | None + :return: A dictionary containing the tokens for each index. + :rtype: dict + """ if token_dict is None: token_dict = defaultdict(list) for pos in self.characters: @@ -295,17 +429,22 @@ def __init__( end: list[Capture | ContentElement] | None = None, **kwargs, ) -> None: + """ + Initialize a new instance of the Element class. + + :param begin: A list of Capture or ContentElement objects representing the beginning captures of the element. Defaults to None. + :param end: A list of Capture or ContentElement objects representing the ending captures of the element. Defaults to None. + :param **kwargs: Additional keyword arguments to be passed to the parent class constructor. + + :return: None + """ if end is None: end = [] if begin is None: begin = [] super().__init__(**kwargs) - self._begin_pending = begin - self._end_pending = end - self._begin_dispached: list[ContentElement] = [] - self._end_dispached: list[ContentElement] = [] - self._dispatched_begin: bool = False - self._dispatched_end: bool = False + self._begin_captures = begin + self._end_captures = end @property def _subelements(self) -> list[ContentElement]: @@ -313,39 +452,61 @@ def _subelements(self) -> list[ContentElement]: @property def begin(self) -> list[ContentElement]: - "Begin elements" - if self._begin_pending: - if not self._dispatched_begin: - self._begin_dispached = dispatch_list(self._begin_pending, parent=self) - self._dispatched_begin = True - return self._begin_dispached - else: - return [] + """ + Returns the list of begin elements. + + If the elements have not been dispatched yet, this method will dispatch them before returning. + + :return: The list of begin elements. + """ + if not self._dispatched: + self._dispatch() + return self._begin @property def end(self) -> list[ContentElement]: - "End elements" - if self._end_pending: - if not self._dispatched_end: - self._end_dispached = dispatch_list(self._end_pending, parent=self) - self._dispatched_end = True - return self._end_dispached - else: - return [] + """ + Returns the end elements. + + If the elements have not been dispatched yet, this method will dispatch them before returning. - def to_dict(self, verbosity: int = -1, all_content: bool = False, **kwargs) -> dict: - out_dict = super().to_dict(verbosity=verbosity, all_content=all_content, **kwargs) + :return: A list of end elements. + """ + if not self._dispatched: + self._dispatch() + return self._end + + def _dispatch(self, nested: bool = False): + super()._dispatch(nested) + self._begin: list[ContentElement] = _dispatch_list(self._begin_captures, parent=self) + self._end: list[ContentElement] = _dispatch_list(self._end_captures, parent=self) + self._begin_captures, self._end_captures = [], [] + if nested: + for item in self._begin: + item._dispatch(True) + for item in self._end: + item._dispatch(True) + + def to_dict(self, depth: int = -1, all_content: bool = False, **kwargs) -> dict: + """ + Converts the element to a dictionary representation. + + :param depth: The depth of the conversion. Defaults to -1. + :param all_content: Whether to include all content. Defaults to False. + :param **kwargs: Additional keyword arguments. + + :return: The dictionary representation of the element. + """ + out_dict = super().to_dict(depth=depth, all_content=all_content, **kwargs) if self.begin: out_dict["begin"] = ( - self._list_property_to_dict("begin", verbosity=verbosity - 1, **kwargs) - if verbosity + self._list_property_to_dict("begin", depth=depth - 1, **kwargs) + if depth else self.begin ) if self.end: out_dict["end"] = ( - self._list_property_to_dict("end", verbosity=verbosity - 1, **kwargs) - if verbosity - else self.end + self._list_property_to_dict("end", depth=depth - 1, **kwargs) if depth else self.end ) ordered_keys = [ diff --git a/src/textmate_grammar/exceptions.py b/src/textmate_grammar/exceptions.py index 0abbcd3..4f42a08 100644 --- a/src/textmate_grammar/exceptions.py +++ b/src/textmate_grammar/exceptions.py @@ -1,29 +1,68 @@ class IncludedParserNotFound(Exception): + """Exception raised when an included parser is not found in the store.""" + def __init__(self, key: str = "UNKNOWN", **kwargs) -> None: + """ + Initialize the exception. + + :param key: The key of the included parser. + :param kwargs: Additional keyword arguments. + """ message = f"Included parser <{key}> not found in store." super().__init__(message, **kwargs) class IncompatibleFileType(Exception): + """Exception raised when the input file has an incompatible file type.""" + def __init__(self, extensions: list[str], **kwargs) -> None: + """ + Initialize the exception. + + :param extensions: List of compatible file extensions. + :param kwargs: Additional keyword arguments. + """ message = f"Input file must have extension {' / '.join(extensions)}" super().__init__(message, **kwargs) class FileNotFound(Exception): + """Exception raised when a file is not found.""" + def __init__(self, file: str, **kwargs) -> None: + """ + Initialize the exception. + + :param file: The path of the file. + :param kwargs: Additional keyword arguments. + """ message = f"File not found: {file}" super().__init__(message, **kwargs) class FileNotParsed(Exception): + """Exception raised when a file is not parsed.""" + def __init__(self, file: str, **kwargs) -> None: + """ + Initialize the exception. + + :param file: The path of the file. + :param kwargs: Additional keyword arguments. + """ message = f"File not parsed: {file}" super().__init__(message, **kwargs) class ImpossibleSpan(Exception): + """Exception raised when a span is impossible.""" + def __init__(self, **kwargs) -> None: + """ + Initialize the exception. + + :param kwargs: Additional keyword arguments. + """ super().__init__( "The closing position cannot be less or equal than the starting position", **kwargs, diff --git a/src/textmate_grammar/handler.py b/src/textmate_grammar/handler.py index 549c276..c8fe4a3 100644 --- a/src/textmate_grammar/handler.py +++ b/src/textmate_grammar/handler.py @@ -11,7 +11,6 @@ class ContentHandler: - """The handler object targetted for parsing. To parse a string or file, it needs to be loaded into the ContentHandler object. @@ -23,6 +22,17 @@ class ContentHandler: notLookForwardEOL = compile(r"(? None: + """ + Initialize a new instance of the Handler class. + + :param source: The source code to be processed. + :type source: str + + :ivar source: The source code to be processed. + :ivar lines: A list of lines in the source code, with a newline character at the end of each line. + :ivar line_lengths: A list of lengths of each line in the source code. + :ivar anchor: The current position in the source code. + """ self.source = source self.lines = [line + "\n" for line in source.split("\n")] self.line_lengths = [len(line) for line in self.lines] @@ -48,7 +58,12 @@ def _check_pos(self, pos: POS): raise ImpossibleSpan def next(self, pos: POS, step: int = 1) -> POS: - """Returns the next position on the current handler""" + """Returns the next position on the current handler. + + :param pos: The current position as a tuple (line, column). + :param step: The number of steps to move forward. Defaults to 1. + :return: The next position as a tuple (line, column). + """ if step > 1: pos = self.next(pos, step=step - 1) if pos[1] == self.line_lengths[pos[0]]: @@ -60,7 +75,12 @@ def next(self, pos: POS, step: int = 1) -> POS: return (pos[0], pos[1] + 1) def prev(self, pos: POS, step: int = 1) -> POS: - """Returns the previous position on the current handler.""" + """Returns the previous position on the current handler. + + :param pos: The current position as a tuple (line, column). + :param step: The number of steps to go back. Defaults to 1. + :return: The previous position as a tuple (line, column). + """ if step > 1: pos = self.prev(pos, step=step - 1) if pos[1] == 0: @@ -72,7 +92,13 @@ def prev(self, pos: POS, step: int = 1) -> POS: return (pos[0], pos[1] - 1) def range(self, start: POS, close: POS) -> list[POS]: - """Returns the range of positions between start and close""" + """ + Returns a list of positions between the start and close positions. + + :param start: The starting position. + :param close: The closing position. + :return: A list of positions between the start and close positions. + """ indices = [] if start[0] == close[0]: for lp in range(start[1], close[1]): @@ -88,13 +114,25 @@ def range(self, start: POS, close: POS) -> list[POS]: return indices def chars(self, start: POS, close: POS) -> dict[POS, str]: - """Returns the source per position""" + """ + Returns a dictionary mapping each position within the given range to the corresponding source character. + + :param start: The starting position of the range. + :param close: The closing position of the range. + :return: A dictionary mapping each position within the range to the corresponding source character. + """ indices = self.range(start, close) return {pos: self.read(pos) for pos in indices} def read_pos(self, start_pos: POS, close_pos: POS, skip_newline: bool = True) -> str: - """Reads the content between the start and end positions.""" + """Reads the content between the start and end positions. + :param start_pos: The starting position of the content. + :param close_pos: The closing position of the content. + :param skip_newline: Whether to skip the newline character at the end of the content. + :return: The content between the start and end positions. + :raises ImpossibleSpan: If the start position is greater than the close position. + """ self._check_pos(start_pos) self._check_pos(close_pos) if start_pos > close_pos: @@ -118,11 +156,25 @@ def read_pos(self, start_pos: POS, close_pos: POS, skip_newline: bool = True) -> return readout def read_line(self, pos: POS) -> str: + """ + Reads a line from the specified position and returns it. + + :param pos: The position of the line to read. The first element is the line number (0-based), + and the second element is the starting position within the line. + :return: The line starting from the specified position. + """ line = self.lines[pos[0]] return line[pos[1] :] def read(self, start_pos: POS, length: int = 1, skip_newline: bool = True) -> str: - """Reads the content from start for a length""" + """Reads the content from start for a length. + + :param start_pos: The starting position to read from. + :param length: The number of characters to read. Defaults to 1. + :param skip_newline: Whether to skip the newline character at the end of the read content. Defaults to True. + :return: The content read from the specified position. + :raises ImpossibleSpan: If the length is negative. + """ self._check_pos(start_pos) if length < 0: raise ImpossibleSpan @@ -160,15 +212,28 @@ def search( ) -> tuple[Match | None, tuple[POS, POS] | None]: """Matches the stream against a capture group. - The stream is matched against the input pattern. If there are any capture groups, - each is then subsequently parsed by the inputted parsers. The number of parsers therefor - must match the number of capture groups of the expression, or there must be a single parser - and no capture groups. - - leading_chars: - - 0: none allowed - - 1: whitespace characters allowed - - 2: any character allowed + :param pattern: The regular expression pattern to match against the stream. + :param starting: The starting position in the stream. + :param boundary: The boundary position in the stream. Defaults to None. + :param greedy: Determines if the matching should be greedy or not. Defaults to False. + :param kwargs: Additional keyword arguments. + + :return: A tuple containing the matching result and the span of the match. + + .. note:: + - The stream is matched against the input pattern. If there are any capture groups, + each is then subsequently parsed by the inputted parsers. The number of parsers therefore + must match the number of capture groups of the expression, or there must be a single parser + and no capture groups. + - The `greedy` parameter determines if the matching should be greedy or not. If set to True, + the matching will try to consume as much of the stream as possible. If set to False, + the matching will stop at the first match found. + - The `boundary` parameter can be used to specify a boundary position in the stream. If provided, + the matching will not go beyond this boundary position. + - The `leading_chars` parameter can be used to specify the type of leading characters allowed, with: + - `0`: none allowed + - `1`: whitespace characters allowed + - `2`: any character allowed. """ if pattern._pattern in ["\\z", "\\Z"]: diff --git a/src/textmate_grammar/language.py b/src/textmate_grammar/language.py index 8e29a52..539ea93 100644 --- a/src/textmate_grammar/language.py +++ b/src/textmate_grammar/language.py @@ -1,5 +1,6 @@ from pathlib import Path +from .cache import TextmateCache, init_cache from .elements import Capture, ContentElement from .exceptions import IncompatibleFileType from .handler import POS, ContentHandler @@ -27,6 +28,20 @@ class LanguageParser(PatternsParser): """The parser of a language grammar.""" def __init__(self, grammar: dict, **kwargs): + """ + Initialize a Language object. + + :param grammar: The grammar definition for the language. + :param kwargs: Additional keyword arguments. + + :ivar name: The name of the language. + :ivar uuid: The UUID of the language. + :ivar file_types: The file types associated with the language. + :ivar token: The scope name of the language. + :ivar repository: The repository of grammar rules for the language. + :ivar injections: The list of injection rules for the language. + :ivar _cache: The cache object for the language. + """ super().__init__(grammar, key=grammar.get("name", "myLanguage"), language=self, **kwargs) self.name = grammar.get("name", "") @@ -35,9 +50,10 @@ def __init__(self, grammar: dict, **kwargs): self.token = grammar.get("scopeName", "myScope") self.repository = {} self.injections: list[dict] = [] + self._cache: TextmateCache = init_cache() - # Initalize grammars in repository - for repo in gen_repositories(grammar): + # Initialize grammars in repository + for repo in _gen_repositories(grammar): for key, parser_grammar in repo.items(): self.repository[key] = GrammarParser.initialize( parser_grammar, key=key, language=self @@ -81,24 +97,43 @@ def _initialize_repository(self): super()._initialize_repository() def parse_file(self, filePath: str | Path, **kwargs) -> Capture | ContentElement | None: - """Parses an entire file with the current grammar""" - if type(filePath) != Path: - filePath = Path(filePath) + """ + Parses an entire file with the current grammar. + + :param filePath: The path to the file to be parsed. + :param kwargs: Additional keyword arguments to be passed to the parser. + :return: The parsed element if successful, None otherwise. + """ + if not isinstance(filePath, Path): + filePath = Path(filePath).resolve() if filePath.suffix.split(".")[-1] not in self.file_types: raise IncompatibleFileType(extensions=self.file_types) - handler = ContentHandler.from_path(filePath) - if handler.source == "": - return None + if self._cache.cache_valid(filePath): + element = self._cache.load(filePath) + else: + handler = ContentHandler.from_path(filePath) + if handler.source == "": + return None - # Configure logger - LOGGER.configure(self, height=len(handler.lines), width=max(handler.line_lengths)) + # Configure logger + LOGGER.configure(self, height=len(handler.lines), width=max(handler.line_lengths)) + element = self._parse_language(handler, **kwargs) # type: ignore - return self._parse_language(handler, **kwargs) + if element is not None: + element._dispatch(nested=True) + self._cache.save(filePath, element) + return element def parse_string(self, input: str, **kwargs): - """Parses an input string""" + """ + Parses an input string. + + :param input: The input string to be parsed. + :param kwargs: Additional keyword arguments. + :return: The result of parsing the input string. + """ handler = ContentHandler(input) # Configure logger LOGGER.configure(self, height=len(handler.lines), width=max(handler.line_lengths)) @@ -117,16 +152,16 @@ def _parse( return super()._parse(handler, starting, find_one=False, **kwargs) -def gen_repositories(grammar, key="repository"): +def _gen_repositories(grammar, key="repository"): """Recursively gets all repositories from a grammar dictionary""" if hasattr(grammar, "items"): for k, v in grammar.items(): if k == key: yield v if isinstance(v, dict): - for result in gen_repositories(v, key): + for result in _gen_repositories(v, key): yield result elif isinstance(v, list): for d in v: - for result in gen_repositories(d, key): + for result in _gen_repositories(d, key): yield result diff --git a/src/textmate_grammar/logger.py b/src/textmate_grammar/logger.py index 88cdfb9..3f319e1 100644 --- a/src/textmate_grammar/logger.py +++ b/src/textmate_grammar/logger.py @@ -20,6 +20,10 @@ def wrapper(*args, depth: int = -1, **kwargs): class LogFormatter(logging.Formatter): + """ + A custom log formatter that formats log records with color-coded messages. + """ + green = "\x1b[32;32m" grey = "\x1b[38;20m" yellow = "\x1b[33;20m" @@ -37,13 +41,21 @@ class LogFormatter(logging.Formatter): } def format(self, record): + """ + Formats the log record with the color-coded format based on the log level. + + :param record: The log record to be formatted. + :return: The formatted log message. + """ log_fmt = self.FORMATS.get(record.levelno) formatter = logging.Formatter(log_fmt) return formatter.format(record) class Logger: - """The logger object for the grammar parsers.""" + """ + The logger object for the grammar parsers. + """ long_msg_div = "\x1b[1;32m ... \x1b[0m" @@ -51,7 +63,7 @@ def __init__(self, **kwargs) -> None: self.id = None self.max_token_length = 50 self.line_decimals = 3 - self.position_decimials = 3 + self.position_decimals = 3 self.scope = "UNKNOWN" self.logger = logging.getLogger("textmate_grammar") channel = logging.StreamHandler() @@ -61,11 +73,11 @@ def __init__(self, **kwargs) -> None: def configure(self, parser: "GrammarParser", height: int, width: int, **kwargs) -> None: """Configures the logger to a specific grammar and content length""" self.line_decimals = len(str(height)) - self.position_decimials = len(str(width)) + self.position_decimals = len(str(width)) id = parser.token if parser.token else parser.key if self.id != id: self.id = id - tokens = gen_all_tokens(parser.grammar) + tokens = _gen_all_tokens(parser.grammar) self.max_token_length = max(len(token) for token in tokens) self.scope = parser.token @@ -76,13 +88,21 @@ def format_message( position: tuple[int, int] | None = None, depth: int = 0, ) -> str: - "Formats a logging message to the defined format" + """ + Formats a logging message to the defined format. + + :param message: The logging message to be formatted. + :param parser: The GrammarParser object associated with the message. Defaults to None. + :param position: The position tuple (line, column) associated with the message. Defaults to None. + :param depth: The depth of the message in the logging hierarchy. Defaults to 0. + :return: The formatted logging message. + """ if position: msg_pos = "{:{ll}d}-{:{lp}d}".format( - *position, ll=self.line_decimals, lp=self.position_decimials + *position, ll=self.line_decimals, lp=self.position_decimals ).replace(" ", "0") else: - msg_pos = "." * (self.line_decimals + self.position_decimials + 1) + msg_pos = "." * (self.line_decimals + self.position_decimals + 1) if parser: parser_id = parser.token if parser.token else parser.key @@ -131,7 +151,7 @@ def critical(self, *args, **kwargs) -> None: self.logger.critical(message) -def gen_all_tokens(grammar: dict, items: list[str] | None = None) -> list[str]: +def _gen_all_tokens(grammar: dict, items: list[str] | None = None) -> list[str]: if items is None: items = [] for key, value in grammar.items(): @@ -139,9 +159,9 @@ def gen_all_tokens(grammar: dict, items: list[str] | None = None) -> list[str]: items.append(value) elif isinstance(value, list): for nested_grammar in (item for item in value if isinstance(item, dict)): - gen_all_tokens(nested_grammar, items) + _gen_all_tokens(nested_grammar, items) elif isinstance(value, dict): - gen_all_tokens(value, items) + _gen_all_tokens(value, items) return items diff --git a/src/textmate_grammar/parser.py b/src/textmate_grammar/parser.py index 764fe4e..1262495 100644 --- a/src/textmate_grammar/parser.py +++ b/src/textmate_grammar/parser.py @@ -19,7 +19,13 @@ class GrammarParser(ABC): @staticmethod def initialize(grammar: dict, **kwargs): - "Initializes the parser based on the grammar." + """ + Initializes the parser based on the grammar. + + :param grammar: The grammar to initialize the parser with. + :param kwargs: Additional keyword arguments. + :return: The initialized parser. + """ if "include" in grammar: return grammar["include"] elif "match" in grammar: @@ -41,6 +47,17 @@ def __init__( is_capture: bool = False, **kwargs, ) -> None: + """ + Initialize a Parser object. + + :param grammar: The grammar dictionary. + :param language: The language parser object. Defaults to None. + :param key: The key for the parser. Defaults to "". + :param is_capture: Indicates if the parser is a capture. Defaults to False. + :param kwargs: Additional keyword arguments. + + :return: None + """ self.grammar = grammar self.language = language self.key = key @@ -85,8 +102,13 @@ def _parse( ) -> tuple[bool, list[Capture | ContentElement], tuple[int, int] | None]: """The abstract method which all parsers much implement - The _parse method is called by parse, which will additionally parse any nested Capture elements. - The _parse method should contain all the rules for the extended parser. + The ``_parse`` method is called by ``parse``, which will additionally parse any nested Capture elements. + The ``_parse`` method should contain all the rules for the extended parser. + + :param handler: The content handler to handle the parsed elements. + :param starting: The starting position of the parsing. + :param kwargs: Additional keyword arguments. + :return: A tuple containing the parsing result, a list of parsed elements, and the ending position of the parsing. """ pass @@ -105,7 +127,19 @@ def parse( boundary: POS | None = None, **kwargs, ) -> tuple[bool, list[Capture | ContentElement], tuple[int, int] | None]: - """The method to parse a handler using the current grammar.""" + """ + The method to parse a handler using the current grammar. + + :param handler: The ContentHandler object that will handle the parsed content. + :param starting: The starting position for parsing. Defaults to (0, 0). + :param boundary: The boundary position for parsing. Defaults to None. + :param **kwargs: Additional keyword arguments that can be passed to the parser. + + :return: A tuple containing: + - parsed: A boolean indicating whether the parsing was successful. + - elements: A list of Capture or ContentElement objects representing the parsed content. + - span: A tuple containing the starting and ending positions of the parsed content, or None if parsing failed. + """ if not self.initialized and self.language is not None: self.language._initialize_repository() parsed, elements, span = self._parse(handler, starting, boundary=boundary, **kwargs) @@ -124,8 +158,17 @@ def match_and_capture( """Matches a pattern and its capture groups. Matches the pattern on the handler between the starting and boundary positions. If a pattern is matched, - its capture groups are initalized as Capture objects. These are only parsed after the full handler has been + its capture groups are initialized as Capture objects. These are only parsed after the full handler has been parsed. This occurs in GrammarParser.parse when calling parse_captures. + + :param handler: The content handler to match the pattern on. + :param pattern: The pattern to match. + :param starting: The starting position for the match. + :param boundary: The boundary position for the match. + :param parsers: A dictionary of parsers. + :param parent_capture: The parent capture object. + :param kwargs: Additional keyword arguments. + :return: A tuple containing the span of the match, the matched string, and a list of capture objects or content elements. """ if parsers is None: parsers = {} diff --git a/syntaxes/matlab b/syntaxes/matlab index e367c68..f353382 160000 --- a/syntaxes/matlab +++ b/syntaxes/matlab @@ -1 +1 @@ -Subproject commit e367c686ff0f6949c7286aa7acedcd4311875ec1 +Subproject commit f3533822b2d740fd4128722854c98b9f1b5d07ee diff --git a/tox.ini b/tox.ini deleted file mode 100644 index 3a54e39..0000000 --- a/tox.ini +++ /dev/null @@ -1,24 +0,0 @@ -[tox] -isolated_build = true - -[testenv] -skip_install = true -allowlist_externals = poetry -commands = - poetry run pytest test/unit - -[testenv:mypy] -skip_install = true -allowlist_externals = mypy -commands = - mypy . - -[testenv:regression] -skip_install = true -allowlist_externals = poetry, bash, sudo -platform = linux -change_dir = {tox_root}/test/regression -commands_pre = - bash install.sh -commands = - poetry run pytest .