Skip to content

Commit

Permalink
simplify data types to dictionary.
Browse files Browse the repository at this point in the history
add example of JSON output
  • Loading branch information
mohayemin committed Jan 26, 2023
1 parent eb3c721 commit c0d6db3
Show file tree
Hide file tree
Showing 12 changed files with 148 additions and 75 deletions.
8 changes: 0 additions & 8 deletions code/db/DataItem.py

This file was deleted.

28 changes: 12 additions & 16 deletions code/db/Db.py
Original file line number Diff line number Diff line change
@@ -1,26 +1,24 @@
import fnmatch
from typing import Type
from pathlib import Path

import yaml

from core.Constants import MigrationKey, LibPairKey
from db.DataItem import DataItem
from db.LibPair import LibPair
from db.Migration import Migration

DataItem = dict[str, any]


class Db:
migrations: dict[str, Migration]
lib_pairs: dict[str, LibPair]
migrations: dict[str, DataItem]
lib_pairs: dict[str, DataItem]
_mapping: dict[str, dict[str, DataItem]]

def __init__(self, data_root: str):
self.data_root = data_root

def load(self):
self.migrations = self.load_items("migration", Migration)
self.lib_pairs = self.load_items("libpair", LibPair)
self.migrations = self.load_items("migration")
self.lib_pairs = self.load_items("libpair")
self._mapping = {
MigrationKey: self.migrations,
LibPairKey: self.lib_pairs,
Expand All @@ -38,10 +36,10 @@ def filter_list(self, data_type: str, filters: dict[str, str]):
def get_item(self, data_type: str, id: str):
return self._mapping[data_type][id]

def load_items(self, data_folder, data_type):
def load_items(self, data_folder):
paths = Path(self.data_root, data_folder).glob("*.yaml")
items = (self.load_item(p, data_type) for p in paths)
dict = {item.id: item for item in items}
items = (self.load_item(p) for p in paths)
dict = {item["id"]: item for item in items}
return dict

@staticmethod
Expand All @@ -56,10 +54,8 @@ def item_satisfies_filter(item: DataItem, filter_key: str, filter_value: str):
pass

@staticmethod
def load_item(yaml_path: Path, ctor: Type[DataItem]):
def load_item(yaml_path: Path):
with open(yaml_path) as f:
content = f.read()
obj = ctor()
dict = yaml.safe_load(content)
obj.__dict__.update(dict)
return obj
dict: DataItem = yaml.safe_load(content)
return dict
7 changes: 0 additions & 7 deletions code/db/LibPair.py

This file was deleted.

16 changes: 0 additions & 16 deletions code/db/Migration.py

This file was deleted.

3 changes: 1 addition & 2 deletions code/format/YAMLFormat.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,9 @@
import yaml

from core.to_dict import to_dict
from format.OutputFormat import OutputFormat
from query.Result import Result


class YAMLFormat(OutputFormat):
def format_impl(self, result: Result):
return yaml.safe_dump(to_dict(result.items), sort_keys=False)
return yaml.safe_dump(result.items, sort_keys=False)
2 changes: 1 addition & 1 deletion code/query/Detail.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from db.DataItem import DataItem
from db.Db import DataItem
from query.Query import ListQuery


Expand Down
4 changes: 2 additions & 2 deletions code/query/Listing.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from db.DataItem import DataItem
from db.Db import DataItem
from query.Query import ListQuery


class Listing(ListQuery):
def format_item(self, item: DataItem):
return item.id
return item["id"]
3 changes: 1 addition & 2 deletions code/query/Query.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
from abc import ABC, abstractmethod

from core.Arguments import Arguments
from db.DataItem import DataItem
from db.Db import Db
from db.Db import Db, DataItem
from query.Result import Result, ResultDisplayOption


Expand Down
4 changes: 3 additions & 1 deletion code/query/Result.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
from enum import Enum

from db.Db import DataItem


class ResultDisplayOption(Enum):
COUNT_ONLY = "count_only"
Expand All @@ -8,7 +10,7 @@ class ResultDisplayOption(Enum):


class Result:
def __init__(self, items: list[object], display_option: ResultDisplayOption):
def __init__(self, items: list[DataItem], display_option: ResultDisplayOption):
self.count = len(items)
self.items = items
self.display_option = display_option
33 changes: 16 additions & 17 deletions code/query/Summary.py
Original file line number Diff line number Diff line change
@@ -1,21 +1,20 @@
from core.Constants import DataTypeKeys, DataTypeName, MigrationKey, LibPairKey
from db.LibPair import LibPair
from db.Migration import Migration
from core.Constants import MigrationKey, LibPairKey
from db.Db import DataItem
from query.Query import Query
from query.Result import Result, ResultDisplayOption


class Summary(Query):
def run(self):
migs: list[Migration] = self.db.get_list(MigrationKey)
all_lib_pairs: list[LibPair] = self.db.get_list(LibPairKey)
sources = {lp.source for lp in all_lib_pairs}
targets = {lp.target for lp in all_lib_pairs}
migs: list[DataItem] = self.db.get_list(MigrationKey)
all_lib_pairs: list[DataItem] = self.db.get_list(LibPairKey)
sources = {lp["source"] for lp in all_lib_pairs}
targets = {lp["target"] for lp in all_lib_pairs}
libs = sources.union(targets)
domains = {lp.domain for lp in all_lib_pairs}
repos = {mg.repo for mg in migs}
commits = {mg.commit for mg in migs}
lib_pairs_having_migs = {mg.pair_id for mg in migs}
domains = {lp["domain"] for lp in all_lib_pairs}
repos = {mg["repo"] for mg in migs}
commits = {mg["commit"] for mg in migs}
lib_pairs_having_migs = {mg["pair_id"] for mg in migs}

migs_having_code_changes = set()
lib_pairs_having_code_changes = set()
Expand All @@ -25,14 +24,14 @@ def run(self):
file_count = 0
segments_count = 0
for mg in migs:
cc_in_mig = len(mg.code_changes)
cc_in_mig = len(mg["code_changes"])
if cc_in_mig:
migs_having_code_changes.add(mg.id)
lib_pairs_having_code_changes.add(mg.pair_id)
repos_having_code_changes.add(mg.repo)
commits_having_code_changes.add(mg.commit)
migs_having_code_changes.add(mg["id"])
lib_pairs_having_code_changes.add(mg["pair_id"])
repos_having_code_changes.add(mg["repo"])
commits_having_code_changes.add(mg["commit"])
file_count += cc_in_mig
segments_count += sum(len(cc["lines"]) for cc in mg.code_changes)
segments_count += sum(len(cc["lines"]) for cc in mg["code_changes"])

result = {
"analogous library pairs": len(all_lib_pairs),
Expand Down
13 changes: 12 additions & 1 deletion code/tests/misc_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,18 @@


def test_detail_1():
args = Arguments(query="detail", data_type="mg", filters=["target=aiohttp"], output_format="json")
args = Arguments(query="detail", data_type="mg", filters=["target=aiohttp"], output_format="yaml")
run_query(args)


def test_detail_2():
args = Arguments(query="detail", data_type="lp", filters=["target=aiohttp"], output_format="yaml")
run_query(args)


def test_detail_multiple_filters():
args = Arguments(query="detail", data_type="mg", filters=["source=pyyaml", "target=ruamel.yaml"],
output_format="yaml")
run_query(args)


Expand Down
102 changes: 100 additions & 2 deletions docs/examples.md
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ python pymigbench.py count -dt lp -f domain="File reader/writer"
```
6 items
```
## List all _migrations_
## List IDs of all _migrations_
**Command:**
```bash
python pymigbench.py list -dt mg
Expand All @@ -64,7 +64,7 @@ python pymigbench.py list -dt mg
157 items
```

## Find all _migrations_ to target library _aiohttp_
## List IDs of _migrations_ to target library _aiohttp_
**Command:**
```bash
python pymigbench.py list -dt mg -f target=aiohttp
Expand All @@ -86,3 +86,101 @@ python pymigbench.py list -dt mg -f target=aiohttp
- d3a9a16_requests,aiohttp
11 items
```

## Show details of _migrations_ from _ruamel.yaml_ to _pyyaml_ in JSON format
**Command:**
```bash
python pymigbench.py detail -dt mg -f source=ruamel.yaml target=pyyaml -o json
```
**Result:**
```json
2 items
[
{
"id": "12e3e80_ruamel.yaml,pyyaml",
"source": "ruamel.yaml",
"target": "pyyaml",
"repo": "cloud-custodian/cloud-custodian",
"commit": "12e3e8084ddb2e7f5ccbc5ea3c3bd3e4c7e9c207",
"pair_id": "ruamel.yaml,pyyaml",
"commit_message": "tools/c7n_mailer - switch ruamel dependency to pyyaml (#5521)",
"commit_url": "https://github.com/cloud-custodian/cloud-custodian/commit/12e3e808",
"code_changes": [
{
"filepath": "tools/c7n_mailer/c7n_mailer/replay.py",
"lines": [
"25:18"
]
},
{
"filepath": "tools/c7n_mailer/c7n_mailer/utils.py",
"lines": [
"28:22"
]
},
{
"filepath": "tools/c7n_mailer/c7n_mailer/cli.py",
"lines": [
"15:10"
]
}
]
},
{
"id": "b955ac9_ruamel.yaml,pyyaml",
"source": "ruamel.yaml",
"target": "pyyaml",
"repo": "microsoft/nni",
"commit": "b955ac99a46094d2d701d447e9df07509767cc32",
"pair_id": "ruamel.yaml,pyyaml",
"commit_message": "Use PyYAML instead of ruamel.yaml (#3702)",
"commit_url": "https://github.com/microsoft/nni/commit/b955ac99",
"code_changes": [
{
"filepath": "nni/tools/nnictl/common_utils.py",
"lines": [
"12:12"
]
},
{
"filepath": "test/nni_test/nnitest/utils.py",
"lines": [
"12:12",
"46:46",
"51:51"
]
},
{
"filepath": "nni/experiment/config/common.py",
"lines": [
"8:8",
"121:121"
]
},
{
"filepath": "test/nni_test/nnitest/run_tests.py",
"lines": [
"12:12",
"83:83"
]
},
{
"filepath": "nni/experiment/config/base.py",
"lines": [
"9:9",
"75:75"
]
},
{
"filepath": "nni/tools/package_utils/__init__.py",
"lines": [
"9:9",
"218:218",
"229:229"
]
}
]
}
]
2 items
```

0 comments on commit c0d6db3

Please sign in to comment.