From c0d6db36cd17eb49754dc6cf5372b5b097572eca Mon Sep 17 00:00:00 2001 From: Mohayemin Date: Wed, 25 Jan 2023 19:03:51 -0700 Subject: [PATCH] simplify data types to dictionary. add example of JSON output --- code/db/DataItem.py | 8 --- code/db/Db.py | 28 +++++------ code/db/LibPair.py | 7 --- code/db/Migration.py | 16 ------ code/format/YAMLFormat.py | 3 +- code/query/Detail.py | 2 +- code/query/Listing.py | 4 +- code/query/Query.py | 3 +- code/query/Result.py | 4 +- code/query/Summary.py | 33 ++++++------ code/tests/misc_test.py | 13 ++++- docs/examples.md | 102 +++++++++++++++++++++++++++++++++++++- 12 files changed, 148 insertions(+), 75 deletions(-) delete mode 100644 code/db/DataItem.py delete mode 100644 code/db/LibPair.py delete mode 100644 code/db/Migration.py diff --git a/code/db/DataItem.py b/code/db/DataItem.py deleted file mode 100644 index 8a49808..0000000 --- a/code/db/DataItem.py +++ /dev/null @@ -1,8 +0,0 @@ -from abc import ABC - - -class DataItem(ABC): - id: str - - def __getitem__(self, property: str): - return getattr(self, property) diff --git a/code/db/Db.py b/code/db/Db.py index 075d3a9..3f6d11c 100644 --- a/code/db/Db.py +++ b/code/db/Db.py @@ -1,26 +1,24 @@ import fnmatch -from typing import Type from pathlib import Path import yaml from core.Constants import MigrationKey, LibPairKey -from db.DataItem import DataItem -from db.LibPair import LibPair -from db.Migration import Migration + +DataItem = dict[str, any] class Db: - migrations: dict[str, Migration] - lib_pairs: dict[str, LibPair] + migrations: dict[str, DataItem] + lib_pairs: dict[str, DataItem] _mapping: dict[str, dict[str, DataItem]] def __init__(self, data_root: str): self.data_root = data_root def load(self): - self.migrations = self.load_items("migration", Migration) - self.lib_pairs = self.load_items("libpair", LibPair) + self.migrations = self.load_items("migration") + self.lib_pairs = self.load_items("libpair") self._mapping = { MigrationKey: self.migrations, LibPairKey: self.lib_pairs, @@ -38,10 +36,10 @@ def filter_list(self, data_type: str, filters: dict[str, str]): def get_item(self, data_type: str, id: str): return self._mapping[data_type][id] - def load_items(self, data_folder, data_type): + def load_items(self, data_folder): paths = Path(self.data_root, data_folder).glob("*.yaml") - items = (self.load_item(p, data_type) for p in paths) - dict = {item.id: item for item in items} + items = (self.load_item(p) for p in paths) + dict = {item["id"]: item for item in items} return dict @staticmethod @@ -56,10 +54,8 @@ def item_satisfies_filter(item: DataItem, filter_key: str, filter_value: str): pass @staticmethod - def load_item(yaml_path: Path, ctor: Type[DataItem]): + def load_item(yaml_path: Path): with open(yaml_path) as f: content = f.read() - obj = ctor() - dict = yaml.safe_load(content) - obj.__dict__.update(dict) - return obj + dict: DataItem = yaml.safe_load(content) + return dict diff --git a/code/db/LibPair.py b/code/db/LibPair.py deleted file mode 100644 index 17ddbdb..0000000 --- a/code/db/LibPair.py +++ /dev/null @@ -1,7 +0,0 @@ -from db.DataItem import DataItem - - -class LibPair(DataItem): - source: str - target: str - domain: str diff --git a/code/db/Migration.py b/code/db/Migration.py deleted file mode 100644 index 0573fa6..0000000 --- a/code/db/Migration.py +++ /dev/null @@ -1,16 +0,0 @@ -from db.DataItem import DataItem - - -class Migration(DataItem): - source: str - target: str - repo: str - commit: str - pair_id: str - commit_message: str - code_changes: list - - -class CodeChange: - filepath: str - lines: list[str] diff --git a/code/format/YAMLFormat.py b/code/format/YAMLFormat.py index 5b0be74..e12e550 100644 --- a/code/format/YAMLFormat.py +++ b/code/format/YAMLFormat.py @@ -1,10 +1,9 @@ import yaml -from core.to_dict import to_dict from format.OutputFormat import OutputFormat from query.Result import Result class YAMLFormat(OutputFormat): def format_impl(self, result: Result): - return yaml.safe_dump(to_dict(result.items), sort_keys=False) + return yaml.safe_dump(result.items, sort_keys=False) diff --git a/code/query/Detail.py b/code/query/Detail.py index a9c4957..6e3c709 100644 --- a/code/query/Detail.py +++ b/code/query/Detail.py @@ -1,4 +1,4 @@ -from db.DataItem import DataItem +from db.Db import DataItem from query.Query import ListQuery diff --git a/code/query/Listing.py b/code/query/Listing.py index 057fea5..eb64562 100644 --- a/code/query/Listing.py +++ b/code/query/Listing.py @@ -1,7 +1,7 @@ -from db.DataItem import DataItem +from db.Db import DataItem from query.Query import ListQuery class Listing(ListQuery): def format_item(self, item: DataItem): - return item.id + return item["id"] diff --git a/code/query/Query.py b/code/query/Query.py index 765ad5e..2ab5665 100644 --- a/code/query/Query.py +++ b/code/query/Query.py @@ -1,8 +1,7 @@ from abc import ABC, abstractmethod from core.Arguments import Arguments -from db.DataItem import DataItem -from db.Db import Db +from db.Db import Db, DataItem from query.Result import Result, ResultDisplayOption diff --git a/code/query/Result.py b/code/query/Result.py index a1eaac4..0d53ab1 100644 --- a/code/query/Result.py +++ b/code/query/Result.py @@ -1,5 +1,7 @@ from enum import Enum +from db.Db import DataItem + class ResultDisplayOption(Enum): COUNT_ONLY = "count_only" @@ -8,7 +10,7 @@ class ResultDisplayOption(Enum): class Result: - def __init__(self, items: list[object], display_option: ResultDisplayOption): + def __init__(self, items: list[DataItem], display_option: ResultDisplayOption): self.count = len(items) self.items = items self.display_option = display_option diff --git a/code/query/Summary.py b/code/query/Summary.py index 0fa3203..78ad42f 100644 --- a/code/query/Summary.py +++ b/code/query/Summary.py @@ -1,21 +1,20 @@ -from core.Constants import DataTypeKeys, DataTypeName, MigrationKey, LibPairKey -from db.LibPair import LibPair -from db.Migration import Migration +from core.Constants import MigrationKey, LibPairKey +from db.Db import DataItem from query.Query import Query from query.Result import Result, ResultDisplayOption class Summary(Query): def run(self): - migs: list[Migration] = self.db.get_list(MigrationKey) - all_lib_pairs: list[LibPair] = self.db.get_list(LibPairKey) - sources = {lp.source for lp in all_lib_pairs} - targets = {lp.target for lp in all_lib_pairs} + migs: list[DataItem] = self.db.get_list(MigrationKey) + all_lib_pairs: list[DataItem] = self.db.get_list(LibPairKey) + sources = {lp["source"] for lp in all_lib_pairs} + targets = {lp["target"] for lp in all_lib_pairs} libs = sources.union(targets) - domains = {lp.domain for lp in all_lib_pairs} - repos = {mg.repo for mg in migs} - commits = {mg.commit for mg in migs} - lib_pairs_having_migs = {mg.pair_id for mg in migs} + domains = {lp["domain"] for lp in all_lib_pairs} + repos = {mg["repo"] for mg in migs} + commits = {mg["commit"] for mg in migs} + lib_pairs_having_migs = {mg["pair_id"] for mg in migs} migs_having_code_changes = set() lib_pairs_having_code_changes = set() @@ -25,14 +24,14 @@ def run(self): file_count = 0 segments_count = 0 for mg in migs: - cc_in_mig = len(mg.code_changes) + cc_in_mig = len(mg["code_changes"]) if cc_in_mig: - migs_having_code_changes.add(mg.id) - lib_pairs_having_code_changes.add(mg.pair_id) - repos_having_code_changes.add(mg.repo) - commits_having_code_changes.add(mg.commit) + migs_having_code_changes.add(mg["id"]) + lib_pairs_having_code_changes.add(mg["pair_id"]) + repos_having_code_changes.add(mg["repo"]) + commits_having_code_changes.add(mg["commit"]) file_count += cc_in_mig - segments_count += sum(len(cc["lines"]) for cc in mg.code_changes) + segments_count += sum(len(cc["lines"]) for cc in mg["code_changes"]) result = { "analogous library pairs": len(all_lib_pairs), diff --git a/code/tests/misc_test.py b/code/tests/misc_test.py index 9e09557..355a710 100644 --- a/code/tests/misc_test.py +++ b/code/tests/misc_test.py @@ -3,7 +3,18 @@ def test_detail_1(): - args = Arguments(query="detail", data_type="mg", filters=["target=aiohttp"], output_format="json") + args = Arguments(query="detail", data_type="mg", filters=["target=aiohttp"], output_format="yaml") + run_query(args) + + +def test_detail_2(): + args = Arguments(query="detail", data_type="lp", filters=["target=aiohttp"], output_format="yaml") + run_query(args) + + +def test_detail_multiple_filters(): + args = Arguments(query="detail", data_type="mg", filters=["source=pyyaml", "target=ruamel.yaml"], + output_format="yaml") run_query(args) diff --git a/docs/examples.md b/docs/examples.md index dd531dc..8bd5498 100644 --- a/docs/examples.md +++ b/docs/examples.md @@ -45,7 +45,7 @@ python pymigbench.py count -dt lp -f domain="File reader/writer" ``` 6 items ``` -## List all _migrations_ +## List IDs of all _migrations_ **Command:** ```bash python pymigbench.py list -dt mg @@ -64,7 +64,7 @@ python pymigbench.py list -dt mg 157 items ``` -## Find all _migrations_ to target library _aiohttp_ +## List IDs of _migrations_ to target library _aiohttp_ **Command:** ```bash python pymigbench.py list -dt mg -f target=aiohttp @@ -86,3 +86,101 @@ python pymigbench.py list -dt mg -f target=aiohttp - d3a9a16_requests,aiohttp 11 items ``` + +## Show details of _migrations_ from _ruamel.yaml_ to _pyyaml_ in JSON format +**Command:** +```bash +python pymigbench.py detail -dt mg -f source=ruamel.yaml target=pyyaml -o json +``` +**Result:** +```json +2 items +[ + { + "id": "12e3e80_ruamel.yaml,pyyaml", + "source": "ruamel.yaml", + "target": "pyyaml", + "repo": "cloud-custodian/cloud-custodian", + "commit": "12e3e8084ddb2e7f5ccbc5ea3c3bd3e4c7e9c207", + "pair_id": "ruamel.yaml,pyyaml", + "commit_message": "tools/c7n_mailer - switch ruamel dependency to pyyaml (#5521)", + "commit_url": "https://github.com/cloud-custodian/cloud-custodian/commit/12e3e808", + "code_changes": [ + { + "filepath": "tools/c7n_mailer/c7n_mailer/replay.py", + "lines": [ + "25:18" + ] + }, + { + "filepath": "tools/c7n_mailer/c7n_mailer/utils.py", + "lines": [ + "28:22" + ] + }, + { + "filepath": "tools/c7n_mailer/c7n_mailer/cli.py", + "lines": [ + "15:10" + ] + } + ] + }, + { + "id": "b955ac9_ruamel.yaml,pyyaml", + "source": "ruamel.yaml", + "target": "pyyaml", + "repo": "microsoft/nni", + "commit": "b955ac99a46094d2d701d447e9df07509767cc32", + "pair_id": "ruamel.yaml,pyyaml", + "commit_message": "Use PyYAML instead of ruamel.yaml (#3702)", + "commit_url": "https://github.com/microsoft/nni/commit/b955ac99", + "code_changes": [ + { + "filepath": "nni/tools/nnictl/common_utils.py", + "lines": [ + "12:12" + ] + }, + { + "filepath": "test/nni_test/nnitest/utils.py", + "lines": [ + "12:12", + "46:46", + "51:51" + ] + }, + { + "filepath": "nni/experiment/config/common.py", + "lines": [ + "8:8", + "121:121" + ] + }, + { + "filepath": "test/nni_test/nnitest/run_tests.py", + "lines": [ + "12:12", + "83:83" + ] + }, + { + "filepath": "nni/experiment/config/base.py", + "lines": [ + "9:9", + "75:75" + ] + }, + { + "filepath": "nni/tools/package_utils/__init__.py", + "lines": [ + "9:9", + "218:218", + "229:229" + ] + } + ] + } +] +2 items +``` \ No newline at end of file