Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support config files #169

Open
wants to merge 17 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions nbstripout/_nbstripout.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@
import sys
import warnings

from nbstripout._utils import strip_output, strip_zeppelin_output
from nbstripout._utils import strip_output, strip_zeppelin_output, merge_configuration_file
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
from nbstripout._utils import strip_output, strip_zeppelin_output, merge_configuration_file
from nbstripout._utils import strip_output, strip_zeppelin_output, merge_configuration_files

try:
# Jupyter >= 4
from nbformat import read, write, NO_CONVERT
Expand Down Expand Up @@ -377,7 +377,7 @@ def main():
help='Space separated list of extra keys to strip '
'from metadata, e.g. metadata.foo cell.metadata.bar')
parser.add_argument('--drop-empty-cells', action='store_true',
help='Remove cells where `source` is empty or contains only whitepace')
help='Remove cells where `source` is empty or contains only whitespace')
parser.add_argument('--drop-tagged-cells', default='',
help='Space separated list of cell-tags that remove an entire cell')
parser.add_argument('--strip-init-cells', action='store_true',
Expand All @@ -402,7 +402,8 @@ def main():
help='Prints stripped files to STDOUT')

parser.add_argument('files', nargs='*', help='Files to strip output from')
args = parser.parse_args()

args = merge_configuration_file(parser.parse_args())

git_config = ['git', 'config']

Expand Down
99 changes: 98 additions & 1 deletion nbstripout/_utils.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
from collections import defaultdict
from argparse import Namespace
import os
import sys
from collections import defaultdict
from typing import Any, Dict, Optional

Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please keep imports sorted

__all__ = ["pop_recursive", "strip_output", "strip_zeppelin_output", "MetadataError"]

Expand Down Expand Up @@ -153,3 +156,97 @@ def strip_output(nb, keep_output, keep_count, extra_keys=[], drop_empty_cells=Fa
for field in keys['cell']:
pop_recursive(cell, field)
return nb


def process_pyproject_toml(toml_file_path: str) -> Optional[Dict[str, Any]]:
"""Extract config mapping from pyproject.toml file."""
try:
import tomllib # python 3.11+
except ModuleNotFoundError:
import tomli as tomllib

with open(toml_file_path, 'rb') as f:
return tomllib.load(f).get('tool', {}).get('nbstripout', None)


def process_setup_cfg(cfg_file_path) -> Optional[Dict[str, Any]]:
"""Extract config mapping from setup.cfg file."""
import configparser

reader = configparser.ConfigParser()
reader.read(cfg_file_path)
if not reader.has_section('nbstripout'):
return None

return reader['nbstripout']


def merge_configuration_file(args: Namespace) -> Namespace:
"""Merge flags from config files into args."""
CONFIG_FILES = {
'pyproject.toml': process_pyproject_toml,
'setup.cfg': process_setup_cfg,
}
BOOL_TYPES = {
'yes': True,
'true': True,
'on': True,
'no': False,
'false': False,
'off': False
}

# Traverse the file tree common to all files given as argument looking for
# a configuration file
config_path = os.path.commonpath([os.path.abspath(file) for file in args.files]) if args.files else os.getcwd()
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please use pathlib wherever possible.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@kynan Thanks for the quick review. I believe I addressed all your comments except this one. I'm not too familiar with the pathlib module. Just did a quick search and couldn't find sth equivalent to os.path.commonpath(). Could you be more specific here?

Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, unfortunately there is no equivalent to commonpath in pathlib. I won't insist, so you can leave as-is :)

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There's a whole interesting thread on S/O on commonpath -
https://stackoverflow.com/a/43613742

Though not nessacarily helpful - they do end up still using commonpath, and just wrapping it with Path.

config = None
while True:
for config_file, processor in CONFIG_FILES.items():
config_file_path = os.path.join(config_path, config_file)
if os.path.isfile(config_file_path):
config = processor(config_file_path)
if config is not None:
break
if config is not None:
break
config_path, tail = os.path.split(config_path)
if not tail:
break

if config:
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I hadn't picked up on this in my first round review: The way you implemented this, config file settings take precedence over command line arguments, however it should be the other way around.

I realise this will be a bit harder to implement. You could turn the Namespace returned by argparse into a dict and merge that into the dict you get from reading the config files, where the argparse dict overwrites.

What makes this even more annoying is that you don't know if a value has been set by the caller or is simply the default :( I may need to look around for an idiomatic way to implement this.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sorry about the delay. Have to prioritize something else atm. Hope to come back to this at some point but if anyone sees fit, feel free to take over.

# merge config
for name, value in config.items():
if value is None:
continue
# additive string flags
if name in {'extra_keys'}:
args.extra_keys = f"{getattr(args, 'extra_keys', '')} {value}".strip()
# singular string flags
elif name in {'mode'}:
args.mode = value
# integer flags
elif name in {'max_size'}:
args.max_size = int(value)
# boolean flags
elif name in {
'dry_run',
'keep_count',
'keep_output',
'drop_empty_cells',
'drop_tagged_cells',
'strip_init_cells',
'_global',
'_system',
'force',
'textconv',
}:
if isinstance(value, str):
value = BOOL_TYPES.get(value, value)
if not isinstance(value, bool):
raise ValueError(f"Invalid value for {name}: {value}, expected bool")
if value:
setattr(args, name.replace('-', '_'), value)
else:
raise ValueError(f'{name} in the config file is not a valid option')

return args
212 changes: 212 additions & 0 deletions tests/test_read_config_files.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,212 @@
from argparse import Namespace
from typing import Any, Dict
from pathlib import Path

import pytest
from nbstripout._utils import merge_configuration_file


def assert_namespace(namespace: Namespace, expected_props: Dict[str, Any]):
keys = (prop for prop in dir(namespace) if not prop.startswith("_"))
actual_props = {key: getattr(namespace, key) for key in keys}
assert actual_props == expected_props


@pytest.fixture
def test_nb(tmp_path: Path) -> Path:
test_nb = tmp_path / "test_me.ipynb"
test_nb.touch()
return test_nb


@pytest.fixture
def nested_test_nb(tmp_path: Path) -> Path:
(tmp_path / "nested/file").mkdir(parents=True)
test_nb = tmp_path / "nested/file/test_me.ipynb"
test_nb.touch()
return test_nb


def test_no_config_file(test_nb: Path) -> None:

original_args = {"files": [test_nb]}
args = Namespace(**original_args)
assert merge_configuration_file(args)
assert_namespace(args, original_args)


def test_non_nested_pyproject_toml_empty(tmp_path: Path, test_nb: Path) -> None:
(tmp_path / "pyproject.toml").write_text('[tool.other]\nprop="value"\n')
args = Namespace(files=[test_nb])
assert merge_configuration_file(args)
assert_namespace(args, {"files": args.files})


def test_non_nested_pyproject_toml_non_empty(tmp_path: Path, test_nb: Path) -> None:
(tmp_path / "pyproject.toml").write_text(
"[tool.nbstripout]\ndrop_empty_cells=true\n",
)
args = Namespace(files=[test_nb])
assert merge_configuration_file(args)
assert_namespace(args, {"files": args.files, "drop_empty_cells": True})


def test_non_nested_setup_cfg_non_empty(tmp_path: Path, test_nb: Path) -> None:
(tmp_path / "setup.cfg").write_text(
"[other]\ndrop_empty_cells = yes\n",
)
args = Namespace(files=[test_nb])
assert merge_configuration_file(args)
assert_namespace(args, {"files": args.files})


def test_non_nested_setup_cfg_empty(tmp_path: Path, test_nb: Path) -> None:
(tmp_path / "setup.cfg").write_text(
"[nbstripout]\ndrop_empty_cells = yes\n",
)
args = Namespace(files=[test_nb])
assert merge_configuration_file(args)
assert_namespace(args, {"files": args.files, "drop_empty_cells": True})


def test_nested_file(tmp_path: Path, nested_test_nb: Path) -> None:
(tmp_path / "pyproject.toml").write_text(
"[tool.nbstripout]\ndrop_empty_cells=true\n",
)
args = Namespace(files=[nested_test_nb])
assert merge_configuration_file(args)
assert_namespace(args, {"files": args.files, "drop_empty_cells": True})


def test_common_path_nested_file_do_not_load(tmp_path: Path) -> None:
(tmp_path / "nested/file").mkdir(parents=True)
(tmp_path / "nested/other").mkdir()
test_nb1 = tmp_path / "nested/file/test_me.ipynb"
test_nb1.touch()
test_nb2 = tmp_path / "nested/other/test_me.ipynb"
test_nb2.touch()

(tmp_path / "nested/file/pyproject.toml").write_text(
"[tool.nbstripout]\ndrop_empty_cells=true\n",
)
args = Namespace(files=[test_nb1, test_nb2])
assert merge_configuration_file(args)
assert_namespace(args, {"files": args.files})


def test_common_path_nested_file_do_load(tmp_path: Path) -> None:
(tmp_path / "nested/file").mkdir(parents=True)
(tmp_path / "nested/other").mkdir()
test_nb1 = tmp_path / "nested/file/test_me.ipynb"
test_nb1.touch()
test_nb2 = tmp_path / "nested/other/test_me.ipynb"
test_nb2.touch()

(tmp_path / "nested/pyproject.toml").write_text(
"[tool.nbstripout]\ndrop_empty_cells=true\n",
)
args = Namespace(files=[test_nb1, test_nb2])
assert merge_configuration_file(args)
assert_namespace(args, {"files": args.files, "drop_empty_cells": True})


def test_continue_search_if_no_config_found(
tmp_path: Path, nested_test_nb: Path
) -> None:
(tmp_path / "nested/pyproject.toml").write_text(
'[tool.other]\nprop = "value"\n',
)
(tmp_path / "pyproject.toml").write_text(
"[tool.nbstripout]\ndrop_empty_cells = true\n",
)
args = Namespace(files=[nested_test_nb])
assert merge_configuration_file(args)
assert_namespace(args, {"files": args.files, "drop_empty_cells": True})


def test_stop_search_if_config_found(tmp_path: Path, nested_test_nb: Path) -> None:
(tmp_path / "nested/pyproject.toml").write_text(
"[tool.nbstripout]\n",
)
(tmp_path / "pyproject.toml").write_text(
"[tool.nbstripout]\ndrop_empty_cells = true\n",
)
args = Namespace(files=[nested_test_nb])
assert merge_configuration_file(args)
assert_namespace(args, {"files": args.files})


def test_dont_load_false(tmp_path: Path, test_nb: Path) -> None:
(tmp_path / "setup.cfg").write_text(
"[nbstripout]\ndrop_empty_cells = no\n",
)
args = Namespace(files=[test_nb])
assert merge_configuration_file(args)
assert_namespace(args, {"files": args.files})


def test_list_value_space_sep_string_pyproject_toml(
tmp_path: Path, test_nb: Path
) -> None:
(tmp_path / "pyproject.toml").write_text(
'[tool.nbstripout]\nextra_keys="foo bar"\n',
)
args = Namespace(files=[test_nb])
assert merge_configuration_file(args)
assert_namespace(args, {"files": args.files, "extra_keys": "foo bar"})


def test_list_value_setup_cfg(tmp_path: Path, test_nb: Path) -> None:
(tmp_path / "setup.cfg").write_text(
"[nbstripout]\nextra_keys=foo bar\n",
)
args = Namespace(files=[test_nb])
assert merge_configuration_file(args)
assert_namespace(args, {"files": args.files, "extra_keys": "foo bar"})


def test_unknown_property(tmp_path: Path, test_nb: Path) -> None:
(tmp_path / "pyproject.toml").write_text(
"[tool.nbstripout]\nunknown_prop=true\n",
)
args = Namespace(files=[test_nb])
with pytest.raises(ValueError):
merge_configuration_file(args)


def test_non_bool_value_for_bool_property(tmp_path: Path, test_nb: Path) -> None:
(tmp_path / "pyproject.toml").write_text(
'[tool.nbstripout]\ndrop_empty_cells="invalid"\n',
)
args = Namespace(files=[test_nb])
with pytest.raises(ValueError):
merge_configuration_file(args)


def test_non_bool_value_for_bool_property_in_setup_cfg(
tmp_path: Path, test_nb: Path
) -> None:
(tmp_path / "setup.cfg").write_text(
"[nbstripout]\ndrop_empty_cells=ok\n",
)
args = Namespace(files=[test_nb])
with pytest.raises(ValueError):
merge_configuration_file(args)


def test_non_list_value_for_list_property(tmp_path: Path, test_nb: Path) -> None:
(tmp_path / "pyproject.toml").write_text(
"[tool.nbstripout]\nexclude=true\n",
)
args = Namespace(files=[test_nb])
with pytest.raises(ValueError):
merge_configuration_file(args)


def test_merge_with_cli_additive_str_property(tmp_path: Path, test_nb: Path) -> None:
(tmp_path / "pyproject.toml").write_text(
'[tool.nbstripout]\nextra_keys="foo"\n',
)
args = Namespace(files=[test_nb], extra_keys="bar")
assert merge_configuration_file(args)
assert_namespace(args, {"files": args.files, "extra_keys": "bar foo"})