Skip to content

Commit

Permalink
Add 'exclude' field (#35)
Browse files Browse the repository at this point in the history
* Add 'exclude' arg

* Add exclude field

* Add test for exclude flag

* Update bids2table/__main__.py

Co-authored-by: Connor Lane <[email protected]>

* Update _b2t.py

* Update test_bids2table.py

* Fix typo in test

* Replace glob with crawler

* pass exclude arg to extract_bids_subdir

* Formatting

* Update bids2table/__main__.py

Co-authored-by: Connor Lane <[email protected]>

* Update bids2table/extractors/bids.py

Co-authored-by: Connor Lane <[email protected]>

* Update bids2table/_b2t.py

Co-authored-by: Connor Lane <[email protected]>

* Reformatting

* Remove unused import

---------

Co-authored-by: Connor Lane <[email protected]>
Co-authored-by: Elizabeth Kenneally <[email protected]>
  • Loading branch information
3 people authored Jun 22, 2024
1 parent 7650e8c commit abeff96
Show file tree
Hide file tree
Showing 4 changed files with 37 additions and 7 deletions.
7 changes: 7 additions & 0 deletions bids2table/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,12 @@ def main():
default=0,
help="Increase verbosity level.",
)
parser.add_argument(
"--exclude",
nargs="+",
default=None,
help="List of directory names or glob patterns to exclude from indexing.",
)

args = parser.parse_args()

Expand All @@ -68,6 +74,7 @@ def main():
overwrite=args.overwrite,
workers=args.workers,
worker_id=args.worker_id,
exclude=args.exclude,
return_table=False,
)

Expand Down
11 changes: 8 additions & 3 deletions bids2table/_b2t.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import logging
from functools import partial
from pathlib import Path
from typing import Optional
from typing import List, Optional

from elbow.builders import build_parquet, build_table
from elbow.sources.filesystem import Crawler
Expand All @@ -19,6 +19,7 @@ def bids2table(
with_meta: bool = True,
persistent: bool = False,
index_path: Optional[StrOrPath] = None,
exclude: Optional[List[str]] = None,
incremental: bool = False,
overwrite: bool = False,
workers: Optional[int] = None,
Expand All @@ -35,6 +36,7 @@ def bids2table(
persistent: whether to save index to disk as a Parquet dataset
index_path: path to BIDS Parquet index to generate or load. Defaults to `root /
"index.b2t"`. Index generation requires `persistent=True`.
exclude: Optional list of directory names or glob patterns to exclude from indexing.
incremental: update index incrementally with only new or changed files.
overwrite: overwrite previous index.
workers: number of parallel processes. If `None` or 1, run in the main
Expand All @@ -57,14 +59,17 @@ def bids2table(
if not root.is_dir():
raise FileNotFoundError(f"root directory {root} does not exists")

if exclude is None:
exclude = []

source = Crawler(
root=root,
include=["sub-*"], # find subject dirs
skip=["sub-*"], # but don't crawl into subject dirs
skip=["sub-*"] + exclude, # but don't crawl into subject dirs
dirs_only=True,
follow_links=True,
)
extract = partial(extract_bids_subdir, with_meta=with_meta)
extract = partial(extract_bids_subdir, exclude=exclude, with_meta=with_meta)

if index_path is None:
index_path = root / "index.b2t"
Expand Down
8 changes: 4 additions & 4 deletions bids2table/extractors/bids.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
import logging
from glob import iglob
from pathlib import Path
from typing import Generator, Optional
from typing import Generator, List, Optional

from elbow.extractors import extract_file_meta
from elbow.record import Record, concat
from elbow.sources.filesystem import Crawler
from elbow.typing import StrOrPath

from bids2table.entities import BIDSEntities
Expand Down Expand Up @@ -42,12 +42,12 @@ def extract_bids_file(path: StrOrPath, with_meta: bool = True) -> Optional[Recor


def extract_bids_subdir(
path: StrOrPath, with_meta: bool = True
path: StrOrPath, exclude: List[str], with_meta: bool = True
) -> Generator[Optional[Record], None, None]:
"""
Extract BIDS records recursively for all files in a sub-directory.
"""
for path in iglob(str(Path(path) / "**"), recursive=True):
for path in Crawler(root=path, skip=exclude, exclude=exclude, follow_links=True):
yield extract_bids_file(path, with_meta=with_meta)


Expand Down
18 changes: 18 additions & 0 deletions tests/test_bids2table.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,5 +62,23 @@ def test_bids2table_nonexist(tmp_path: Path):
bids2table(root=tmp_path / "nonexistent_dataset")


def test_bids2table_exclude(tmp_path: Path):
root = BIDS_EXAMPLES / "ds001"
index_path = tmp_path / "index_exclude.b2t"
exclude_list = ["anat"]

tab = bids2table(
root=root,
with_meta=True,
persistent=True,
index_path=index_path,
exclude=exclude_list,
)

# Check that the excluded strings are not in the indexed table
assert "ent__datatype" in tab.columns
assert "anat" not in tab["ent__datatype"].values


if __name__ == "__main__":
pytest.main([__file__])

0 comments on commit abeff96

Please sign in to comment.