Skip to content

Commit

Permalink
Significantly improve yaml load times when the C loader is available (h…
Browse files Browse the repository at this point in the history
  • Loading branch information
bdraco authored Jun 13, 2022
1 parent b84e844 commit dca4d3c
Show file tree
Hide file tree
Showing 8 changed files with 192 additions and 80 deletions.
1 change: 1 addition & 0 deletions .github/workflows/ci.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ env:
PIP_CACHE: /tmp/pip-cache
SQLALCHEMY_WARN_20: 1
PYTHONASYNCIODEBUG: 1
HASS_CI: 1

concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/wheels.yml
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,7 @@ jobs:
wheels-key: ${{ secrets.WHEELS_KEY }}
wheels-user: wheels
env-file: true
apk: "build-base;cmake;git;linux-headers;libexecinfo-dev;bluez-dev;libffi-dev;openssl-dev;glib-dev;eudev-dev;libxml2-dev;libxslt-dev;libpng-dev;libjpeg-turbo-dev;tiff-dev;autoconf;automake;cups-dev;gmp-dev;mpfr-dev;mpc1-dev;ffmpeg-dev;gammu-dev;cargo"
apk: "build-base;cmake;git;linux-headers;libexecinfo-dev;bluez-dev;libffi-dev;openssl-dev;glib-dev;eudev-dev;libxml2-dev;libxslt-dev;libpng-dev;libjpeg-turbo-dev;tiff-dev;autoconf;automake;cups-dev;gmp-dev;mpfr-dev;mpc1-dev;ffmpeg-dev;gammu-dev;yaml-dev;cargo"
pip: "Cython;numpy;scikit-build"
skip-binary: aiohttp,grpcio
constraints: "homeassistant/package_constraints.txt"
Expand Down
1 change: 1 addition & 0 deletions Dockerfile.dev
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ RUN \
libavfilter-dev \
libpcap-dev \
libturbojpeg0 \
libyaml-dev \
libxml2 \
git \
cmake \
Expand Down
6 changes: 2 additions & 4 deletions homeassistant/scripts/check_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -191,7 +191,7 @@ def mock_secrets(ldr, node):

if secrets:
# Ensure !secrets point to the patched function
yaml_loader.SafeLineLoader.add_constructor("!secret", yaml_loader.secret_yaml)
yaml_loader.add_constructor("!secret", yaml_loader.secret_yaml)

def secrets_proxy(*args):
secrets = Secrets(*args)
Expand Down Expand Up @@ -219,9 +219,7 @@ def secrets_proxy(*args):
pat.stop()
if secrets:
# Ensure !secrets point to the original function
yaml_loader.SafeLineLoader.add_constructor(
"!secret", yaml_loader.secret_yaml
)
yaml_loader.add_constructor("!secret", yaml_loader.secret_yaml)

return res

Expand Down
176 changes: 124 additions & 52 deletions homeassistant/util/yaml/loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,22 @@
from collections import OrderedDict
from collections.abc import Iterator
import fnmatch
from io import StringIO
import logging
import os
from pathlib import Path
from typing import Any, TextIO, TypeVar, Union, overload

import yaml

try:
from yaml import CSafeLoader as FastestAvailableSafeLoader

HAS_C_LOADER = True
except ImportError:
HAS_C_LOADER = False
from yaml import SafeLoader as FastestAvailableSafeLoader # type: ignore[misc]

from homeassistant.exceptions import HomeAssistantError

from .const import SECRET_YAML
Expand Down Expand Up @@ -88,6 +97,30 @@ def _load_secret_yaml(self, secret_dir: Path) -> dict[str, str]:
return secrets


class SafeLoader(FastestAvailableSafeLoader):
"""The fastest available safe loader."""

def __init__(self, stream: Any, secrets: Secrets | None = None) -> None:
"""Initialize a safe line loader."""
self.stream = stream
if isinstance(stream, str):
self.name = "<unicode string>"
elif isinstance(stream, bytes):
self.name = "<byte string>"
else:
self.name = getattr(stream, "name", "<file>")
super().__init__(stream)
self.secrets = secrets

def get_name(self) -> str:
"""Get the name of the loader."""
return self.name

def get_stream_name(self) -> str:
"""Get the name of the stream."""
return self.stream.name or ""


class SafeLineLoader(yaml.SafeLoader):
"""Loader class that keeps track of line numbers."""

Expand All @@ -103,6 +136,17 @@ def compose_node(self, parent: yaml.nodes.Node, index: int) -> yaml.nodes.Node:
node.__line__ = last_line + 1 # type: ignore[attr-defined]
return node

def get_name(self) -> str:
"""Get the name of the loader."""
return self.name

def get_stream_name(self) -> str:
"""Get the name of the stream."""
return self.stream.name or ""


LoaderType = Union[SafeLineLoader, SafeLoader]


def load_yaml(fname: str, secrets: Secrets | None = None) -> JSON_TYPE:
"""Load a YAML file."""
Expand All @@ -114,60 +158,90 @@ def load_yaml(fname: str, secrets: Secrets | None = None) -> JSON_TYPE:
raise HomeAssistantError(exc) from exc


def parse_yaml(content: str | TextIO, secrets: Secrets | None = None) -> JSON_TYPE:
"""Load a YAML file."""
def parse_yaml(
content: str | TextIO | StringIO, secrets: Secrets | None = None
) -> JSON_TYPE:
"""Parse YAML with the fastest available loader."""
if not HAS_C_LOADER:
return _parse_yaml_pure_python(content, secrets)
try:
return _parse_yaml(SafeLoader, content, secrets)
except yaml.YAMLError:
# Loading failed, so we now load with the slow line loader
# since the C one will not give us line numbers
if isinstance(content, (StringIO, TextIO)):
# Rewind the stream so we can try again
content.seek(0, 0)
return _parse_yaml_pure_python(content, secrets)


def _parse_yaml_pure_python(
content: str | TextIO | StringIO, secrets: Secrets | None = None
) -> JSON_TYPE:
"""Parse YAML with the pure python loader (this is very slow)."""
try:
# If configuration file is empty YAML returns None
# We convert that to an empty dict
return (
yaml.load(content, Loader=lambda stream: SafeLineLoader(stream, secrets))
or OrderedDict()
)
return _parse_yaml(SafeLineLoader, content, secrets)
except yaml.YAMLError as exc:
_LOGGER.error(str(exc))
raise HomeAssistantError(exc) from exc


def _parse_yaml(
loader: type[SafeLoader] | type[SafeLineLoader],
content: str | TextIO,
secrets: Secrets | None = None,
) -> JSON_TYPE:
"""Load a YAML file."""
# If configuration file is empty YAML returns None
# We convert that to an empty dict
return (
yaml.load(content, Loader=lambda stream: loader(stream, secrets))
or OrderedDict()
)


@overload
def _add_reference(
obj: list | NodeListClass, loader: SafeLineLoader, node: yaml.nodes.Node
obj: list | NodeListClass,
loader: LoaderType,
node: yaml.nodes.Node,
) -> NodeListClass:
...


@overload
def _add_reference(
obj: str | NodeStrClass, loader: SafeLineLoader, node: yaml.nodes.Node
obj: str | NodeStrClass,
loader: LoaderType,
node: yaml.nodes.Node,
) -> NodeStrClass:
...


@overload
def _add_reference(
obj: _DictT, loader: SafeLineLoader, node: yaml.nodes.Node
) -> _DictT:
def _add_reference(obj: _DictT, loader: LoaderType, node: yaml.nodes.Node) -> _DictT:
...


def _add_reference(obj, loader: SafeLineLoader, node: yaml.nodes.Node): # type: ignore[no-untyped-def]
def _add_reference(obj, loader: LoaderType, node: yaml.nodes.Node): # type: ignore[no-untyped-def]
"""Add file reference information to an object."""
if isinstance(obj, list):
obj = NodeListClass(obj)
if isinstance(obj, str):
obj = NodeStrClass(obj)
setattr(obj, "__config_file__", loader.name)
setattr(obj, "__config_file__", loader.get_name())
setattr(obj, "__line__", node.start_mark.line)
return obj


def _include_yaml(loader: SafeLineLoader, node: yaml.nodes.Node) -> JSON_TYPE:
def _include_yaml(loader: LoaderType, node: yaml.nodes.Node) -> JSON_TYPE:
"""Load another YAML file and embeds it using the !include tag.
Example:
device_tracker: !include device_tracker.yaml
"""
fname = os.path.join(os.path.dirname(loader.name), node.value)
fname = os.path.join(os.path.dirname(loader.get_name()), node.value)
try:
return _add_reference(load_yaml(fname, loader.secrets), loader, node)
except FileNotFoundError as exc:
Expand All @@ -191,12 +265,10 @@ def _find_files(directory: str, pattern: str) -> Iterator[str]:
yield filename


def _include_dir_named_yaml(
loader: SafeLineLoader, node: yaml.nodes.Node
) -> OrderedDict:
def _include_dir_named_yaml(loader: LoaderType, node: yaml.nodes.Node) -> OrderedDict:
"""Load multiple files from directory as a dictionary."""
mapping: OrderedDict = OrderedDict()
loc = os.path.join(os.path.dirname(loader.name), node.value)
loc = os.path.join(os.path.dirname(loader.get_name()), node.value)
for fname in _find_files(loc, "*.yaml"):
filename = os.path.splitext(os.path.basename(fname))[0]
if os.path.basename(fname) == SECRET_YAML:
Expand All @@ -206,11 +278,11 @@ def _include_dir_named_yaml(


def _include_dir_merge_named_yaml(
loader: SafeLineLoader, node: yaml.nodes.Node
loader: LoaderType, node: yaml.nodes.Node
) -> OrderedDict:
"""Load multiple files from directory as a merged dictionary."""
mapping: OrderedDict = OrderedDict()
loc = os.path.join(os.path.dirname(loader.name), node.value)
loc = os.path.join(os.path.dirname(loader.get_name()), node.value)
for fname in _find_files(loc, "*.yaml"):
if os.path.basename(fname) == SECRET_YAML:
continue
Expand All @@ -221,10 +293,10 @@ def _include_dir_merge_named_yaml(


def _include_dir_list_yaml(
loader: SafeLineLoader, node: yaml.nodes.Node
loader: LoaderType, node: yaml.nodes.Node
) -> list[JSON_TYPE]:
"""Load multiple files from directory as a list."""
loc = os.path.join(os.path.dirname(loader.name), node.value)
loc = os.path.join(os.path.dirname(loader.get_name()), node.value)
return [
load_yaml(f, loader.secrets)
for f in _find_files(loc, "*.yaml")
Expand All @@ -233,10 +305,10 @@ def _include_dir_list_yaml(


def _include_dir_merge_list_yaml(
loader: SafeLineLoader, node: yaml.nodes.Node
loader: LoaderType, node: yaml.nodes.Node
) -> JSON_TYPE:
"""Load multiple files from directory as a merged list."""
loc: str = os.path.join(os.path.dirname(loader.name), node.value)
loc: str = os.path.join(os.path.dirname(loader.get_name()), node.value)
merged_list: list[JSON_TYPE] = []
for fname in _find_files(loc, "*.yaml"):
if os.path.basename(fname) == SECRET_YAML:
Expand All @@ -247,7 +319,7 @@ def _include_dir_merge_list_yaml(
return _add_reference(merged_list, loader, node)


def _ordered_dict(loader: SafeLineLoader, node: yaml.nodes.MappingNode) -> OrderedDict:
def _ordered_dict(loader: LoaderType, node: yaml.nodes.MappingNode) -> OrderedDict:
"""Load YAML mappings into an ordered dictionary to preserve key order."""
loader.flatten_mapping(node)
nodes = loader.construct_pairs(node)
Expand All @@ -259,14 +331,14 @@ def _ordered_dict(loader: SafeLineLoader, node: yaml.nodes.MappingNode) -> Order
try:
hash(key)
except TypeError as exc:
fname = getattr(loader.stream, "name", "")
fname = loader.get_stream_name()
raise yaml.MarkedYAMLError(
context=f'invalid key: "{key}"',
context_mark=yaml.Mark(fname, 0, line, -1, None, None), # type: ignore[arg-type]
) from exc

if key in seen:
fname = getattr(loader.stream, "name", "")
fname = loader.get_stream_name()
_LOGGER.warning(
'YAML file %s contains duplicate key "%s". Check lines %d and %d',
fname,
Expand All @@ -279,13 +351,13 @@ def _ordered_dict(loader: SafeLineLoader, node: yaml.nodes.MappingNode) -> Order
return _add_reference(OrderedDict(nodes), loader, node)


def _construct_seq(loader: SafeLineLoader, node: yaml.nodes.Node) -> JSON_TYPE:
def _construct_seq(loader: LoaderType, node: yaml.nodes.Node) -> JSON_TYPE:
"""Add line number and file name to Load YAML sequence."""
(obj,) = loader.construct_yaml_seq(node)
return _add_reference(obj, loader, node)


def _env_var_yaml(loader: SafeLineLoader, node: yaml.nodes.Node) -> str:
def _env_var_yaml(loader: LoaderType, node: yaml.nodes.Node) -> str:
"""Load environment variables and embed it into the configuration YAML."""
args = node.value.split()

Expand All @@ -298,27 +370,27 @@ def _env_var_yaml(loader: SafeLineLoader, node: yaml.nodes.Node) -> str:
raise HomeAssistantError(node.value)


def secret_yaml(loader: SafeLineLoader, node: yaml.nodes.Node) -> JSON_TYPE:
def secret_yaml(loader: LoaderType, node: yaml.nodes.Node) -> JSON_TYPE:
"""Load secrets and embed it into the configuration YAML."""
if loader.secrets is None:
raise HomeAssistantError("Secrets not supported in this YAML file")

return loader.secrets.get(loader.name, node.value)


SafeLineLoader.add_constructor("!include", _include_yaml)
SafeLineLoader.add_constructor(
yaml.resolver.BaseResolver.DEFAULT_MAPPING_TAG, _ordered_dict
)
SafeLineLoader.add_constructor(
yaml.resolver.BaseResolver.DEFAULT_SEQUENCE_TAG, _construct_seq
)
SafeLineLoader.add_constructor("!env_var", _env_var_yaml)
SafeLineLoader.add_constructor("!secret", secret_yaml)
SafeLineLoader.add_constructor("!include_dir_list", _include_dir_list_yaml)
SafeLineLoader.add_constructor("!include_dir_merge_list", _include_dir_merge_list_yaml)
SafeLineLoader.add_constructor("!include_dir_named", _include_dir_named_yaml)
SafeLineLoader.add_constructor(
"!include_dir_merge_named", _include_dir_merge_named_yaml
)
SafeLineLoader.add_constructor("!input", Input.from_node)
return loader.secrets.get(loader.get_name(), node.value)


def add_constructor(tag: Any, constructor: Any) -> None:
"""Add to constructor to all loaders."""
for yaml_loader in (SafeLoader, SafeLineLoader):
yaml_loader.add_constructor(tag, constructor)


add_constructor("!include", _include_yaml)
add_constructor(yaml.resolver.BaseResolver.DEFAULT_MAPPING_TAG, _ordered_dict)
add_constructor(yaml.resolver.BaseResolver.DEFAULT_SEQUENCE_TAG, _construct_seq)
add_constructor("!env_var", _env_var_yaml)
add_constructor("!secret", secret_yaml)
add_constructor("!include_dir_list", _include_dir_list_yaml)
add_constructor("!include_dir_merge_list", _include_dir_merge_list_yaml)
add_constructor("!include_dir_named", _include_dir_named_yaml)
add_constructor("!include_dir_merge_named", _include_dir_merge_named_yaml)
add_constructor("!input", Input.from_node)
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
*!* NOT YAML
-*!*- NOT YAML

Loading

0 comments on commit dca4d3c

Please sign in to comment.