From df03084400c93a9ddad0dee58eb5f360fdb8b187 Mon Sep 17 00:00:00 2001 From: rafelafrance Date: Thu, 3 Oct 2024 15:35:20 -0400 Subject: [PATCH] Revert "Add terms and allow custom trait fields" This reverts commit a3cfa98d28a3adc223442ff7f5d02affe472da35. --- pyproject.toml | 1 - traiter/merge_terms.py | 60 -------------------- traiter/pylib/rules/base.py | 8 +-- traiter/pylib/rules/terms/trs_terms.csv | 2 +- traiter/pylib/rules/terms/unit_tic_terms.csv | 2 +- 5 files changed, 6 insertions(+), 67 deletions(-) delete mode 100755 traiter/merge_terms.py diff --git a/pyproject.toml b/pyproject.toml index 43e41507..8cd4ec33 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -12,7 +12,6 @@ authors = [{name="Raphael LaFrance", email="rafelafrance@proton.me"}] requires-python = ">=3.11" dependencies = [ "regex", - "pandas", "python-dateutil", "spacy", ] diff --git a/traiter/merge_terms.py b/traiter/merge_terms.py deleted file mode 100755 index 783b5efd..00000000 --- a/traiter/merge_terms.py +++ /dev/null @@ -1,60 +0,0 @@ -#!/usr/bin/env python3 -import argparse -import textwrap -from pathlib import Path - -import pandas as pd - - -def main(): - args = parse_args() - - csvs = [ - pd.read_csv(f) - for f in sorted(args.term_dir.glob("*.csv")) - if f.stem not in args.exclude - ] - - merged = pd.concat(csvs) - merged = merged.sort_values(["label", "pattern"]) - merged.to_csv(args.merged_csv, index=False) - - -def parse_args() -> argparse.Namespace: - arg_parser = argparse.ArgumentParser( - formatter_class=argparse.RawDescriptionHelpFormatter, - description=textwrap.dedent("""Merge all terms into a single CSV file."""), - ) - - arg_parser.add_argument( - "--term-dir", - metavar="PATH", - type=Path, - required=True, - help="""Directory containing the term CSV files.""", - ) - - arg_parser.add_argument( - "--merged-csv", - metavar="PATH", - type=Path, - required=True, - help="""Output the merged data to this CSV file.""", - ) - - arg_parser.add_argument( - "--exclude", - metavar="STEM", - action="append", - help="""Exclude CSV files with these stems. Repeat for more than one file.""", - ) - - args = arg_parser.parse_args() - - args.exclude = args.exclude if args.exclude else [] - - return args - - -if __name__ == "__main__": - main() diff --git a/traiter/pylib/rules/base.py b/traiter/pylib/rules/base.py index 1230d7c4..38126a66 100644 --- a/traiter/pylib/rules/base.py +++ b/traiter/pylib/rules/base.py @@ -17,10 +17,10 @@ def __eq__(self, other): @classmethod def from_ent(cls, ent, **kwargs): - kwargs["start"] = kwargs["start"] if kwargs.get("start") else ent.start_char - kwargs["end"] = kwargs["end"] if kwargs.get("end") else ent.end_char - kwargs["_trait"] = kwargs["_trait"] if kwargs.get("_trait") else ent.label_ - kwargs["_text"] = kwargs["_text"] if kwargs.get("_text") else ent.text + kwargs["start"] = ent.start_char + kwargs["end"] = ent.end_char + kwargs["_trait"] = ent.label_ + kwargs["_text"] = ent.text return cls(**kwargs) def to_dict(self) -> dict: diff --git a/traiter/pylib/rules/terms/trs_terms.csv b/traiter/pylib/rules/terms/trs_terms.csv index 738b90fc..d7095a11 100644 --- a/traiter/pylib/rules/terms/trs_terms.csv +++ b/traiter/pylib/rules/terms/trs_terms.csv @@ -1,5 +1,5 @@ label,pattern,replace -trs_label,trs, +dtrs_label,trs, trs_label,trs:, sec_label,sec, sec_label,sec., diff --git a/traiter/pylib/rules/terms/unit_tic_terms.csv b/traiter/pylib/rules/terms/unit_tic_terms.csv index b09b822d..106d35f3 100644 --- a/traiter/pylib/rules/terms/unit_tic_terms.csv +++ b/traiter/pylib/rules/terms/unit_tic_terms.csv @@ -2,4 +2,4 @@ label,pattern,replace,factor_cm imperial_length,',ft,30.48 imperial_length,’,ft,30.48 imperial_length,"",ft,2.54 -imperial_inches,in,in,2.54 +imperial_length,in,in,2.54