diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..3b2ed08 --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +secrets.json +dist/ +*.egg-info/ +.venv/ diff --git a/api/.idea/.gitignore b/.idea/.gitignore similarity index 100% rename from api/.idea/.gitignore rename to .idea/.gitignore diff --git a/.idea/PyMigBench.iml b/.idea/PyMigBench.iml new file mode 100644 index 0000000..9cdf078 --- /dev/null +++ b/.idea/PyMigBench.iml @@ -0,0 +1,22 @@ + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/api/.idea/inspectionProfiles/Project_Default.xml b/.idea/inspectionProfiles/Project_Default.xml similarity index 93% rename from api/.idea/inspectionProfiles/Project_Default.xml rename to .idea/inspectionProfiles/Project_Default.xml index 3dba0a7..d89bad0 100644 --- a/api/.idea/inspectionProfiles/Project_Default.xml +++ b/.idea/inspectionProfiles/Project_Default.xml @@ -16,7 +16,7 @@ diff --git a/api/.idea/inspectionProfiles/profiles_settings.xml b/.idea/inspectionProfiles/profiles_settings.xml similarity index 100% rename from api/.idea/inspectionProfiles/profiles_settings.xml rename to .idea/inspectionProfiles/profiles_settings.xml diff --git a/.idea/misc.xml b/.idea/misc.xml new file mode 100644 index 0000000..0a6fa8d --- /dev/null +++ b/.idea/misc.xml @@ -0,0 +1,7 @@ + + + + + + \ No newline at end of file diff --git a/api/.idea/modules.xml b/.idea/modules.xml similarity index 56% rename from api/.idea/modules.xml rename to .idea/modules.xml index 4ce9659..aa70e7b 100644 --- a/api/.idea/modules.xml +++ b/.idea/modules.xml @@ -2,7 +2,7 @@ - + \ No newline at end of file diff --git a/api/.idea/vcs.xml b/.idea/vcs.xml similarity index 69% rename from api/.idea/vcs.xml rename to .idea/vcs.xml index 6c0b863..35eb1dd 100644 --- a/api/.idea/vcs.xml +++ b/.idea/vcs.xml @@ -1,6 +1,6 @@ - + \ No newline at end of file diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 0000000..4cffa32 --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1,3 @@ +exclude pymigbench_tests/* +exclude data/* +include version diff --git a/README.md b/README.md index fa5e8dd..98feaf0 100644 --- a/README.md +++ b/README.md @@ -1,10 +1,8 @@ PyMigBench is a benchmark of Python Library Migrations. This repository contains the data and the code the library that can be used to access the dataset. -## The library -The library documentation can be found [here](/api). - -## PyMigBench v2 +## Dataset +### PyMigBench v2 The current version, PyMigBench-2.0, includes 3,096 migration-related code changes from 335 migrations between 141 analogous library pairs. This includes all migrations from [PyMigBench v1](#pymigbench-v1) and additional migrations borrowed from the [SALM dataset](https://ieeexplore.ieee.org/document/10123560). The data also includes additional information per migration-related code change compared to v1. @@ -18,7 +16,7 @@ Use either of these links to reproduce the paper. We may update this repository to correct any mistakes or add more data and it may not synch with the paper. For, the latest data, use the [latest release](https://github.com/ualberta-smr/PyMigBench/releases/latest) in this repository. -## PyMigBench v1 +### PyMigBench v1 We recommend using PyMigBench v2 for any new research. However, you want to use the v1 dataset, you should look at [Release 1.0.3](https://github.com/ualberta-smr/PyMigBench/releases/v1.0.3). Cite the paper below if you use the v1 dataset. @@ -37,6 +35,50 @@ Cite the paper below if you use the v1 dataset. ``` +## Library + +### Installation +The library and the dataset should be at the same version to be compatible. +To install the library, run: +```bash +pip install pymigbench== +``` + +### Basic usage +To use the library, you need to have the dataset downloaded. +You can download the dataset from the [GitHub repository](https://github.com/ualberta-smr/pymigbench). + +```python +from pymigbench.database import Database +from pathlib import Path + +yaml_root = Path('repo-root/migration/') + +db = Database.load_from_dir(yaml_root) # Load the dataset from the directory +migs = db.migs() # Get all the migrations +``` + +### The constants +There are several enums to help you work with the dataset: +They are all in the `pymigbench.constants` module. Example: +```python +from pymigbench.constants import ProgramElement +``` + +### The migration-related objects +There are three main classes to encapsulate the data: `Migration`, `MigrationFile`, and `CodeChange`. + +`Migration` is the top level class representing one single migration, ie, one yaml file. +`Migration` has a list of `MigrationFile` objects, which represent the files that were changed in the migration. +`MigrationFile` has a list of `CodeChange` objects, which represent a single migration-related code change. +Each of these model classes has an `id()` method that returns a unique identifier for the object across the full dataset. +`CodeChange` object additionally has an `index` property and a `id_in_file()` method, which are unique within container file. +Each of the classes has some additional helper methods. + + + + + ## Contributors - [Mohayeminul Islam](https://mohayemin.github.io/) - [Ajay Kumar Jha](https://hifromajay.github.io/) diff --git a/api/.gitignore b/api/.gitignore deleted file mode 100644 index ebabab0..0000000 --- a/api/.gitignore +++ /dev/null @@ -1,4 +0,0 @@ -dist -pymigbench.egg-info -.publish -LICENSE diff --git a/api/.idea/.name b/api/.idea/.name deleted file mode 100644 index 9025bb1..0000000 --- a/api/.idea/.name +++ /dev/null @@ -1 +0,0 @@ -pymigbench.api \ No newline at end of file diff --git a/api/.idea/misc.xml b/api/.idea/misc.xml deleted file mode 100644 index a8e0c92..0000000 --- a/api/.idea/misc.xml +++ /dev/null @@ -1,10 +0,0 @@ - - - - - - - - \ No newline at end of file diff --git a/api/.idea/pymigbench.api.iml b/api/.idea/pymigbench.api.iml deleted file mode 100644 index 27e30e8..0000000 --- a/api/.idea/pymigbench.api.iml +++ /dev/null @@ -1,11 +0,0 @@ - - - - - - - - - - - \ No newline at end of file diff --git a/api/MANIFEST.in b/api/MANIFEST.in deleted file mode 100644 index f7bb6da..0000000 --- a/api/MANIFEST.in +++ /dev/null @@ -1,3 +0,0 @@ -exclude pymigbench_tests/* -include .publish/README.md -include .publish/version diff --git a/api/README.md b/api/README.md deleted file mode 100644 index de1bc34..0000000 --- a/api/README.md +++ /dev/null @@ -1,44 +0,0 @@ -This is a library to access the PyMigBench dataset. -Visit the [GitHub repository](https://github.com/ualberta-smr/pymigbench) to learn about the dataset. - -## Installation -The library and the dataset should be at the same version to be compatible. -To install the library, run: -```bash -pip install pymigbench== -``` - -## Usage -To use the library, you need to have the dataset downloaded. -You can download the dataset from the [GitHub repository](https://github.com/ualberta-smr/pymigbench). - -```python -from pymigbench.database import Database -from pathlib import Path - -yaml_root = Path('repo-root/migration/') - -db = Database.load_from_dir(yaml_root) # Load the dataset from the directory -migs = db.migs() # Get all the migrations -``` - -### The constants -There are several enums to help you work with the dataset: -They are all in the `pymigbench.constants` module. Example: -```python -from pymigbench.constants import ProgramElement -``` - -### The migration-related objects -There are three main classes to encapsulate the data: `Migration`, `MigrationFile`, and `CodeChange`. - -`Migration` is the top level class representing one single migration, ie, one yaml file. -`Migration` has a list of `MigrationFile` objects, which represent the files that were changed in the migration. -`MigrationFile` has a list of `CodeChange` objects, which represent a single migration-related code change. -Each of these model classes has an `id()` method that returns a unique identifier for the object across the full dataset. -`CodeChange` object additionally has an `index` property and a `id_in_file()` method, which are unique within container file. -Each of the classes has some additional helper methods. - - - - diff --git a/api/build.sh b/api/build.sh deleted file mode 100644 index 22c6cf3..0000000 --- a/api/build.sh +++ /dev/null @@ -1,6 +0,0 @@ -cp ../LICENSE . -cp ../version ./.publish/ - -rm -r dist -py -m build - diff --git a/api/publish.sh b/api/publish.sh deleted file mode 100644 index 556af48..0000000 --- a/api/publish.sh +++ /dev/null @@ -1 +0,0 @@ -py -m twine upload dist/* -u __token__ -p "$(cat .publish/pypi.token.txt)" \ No newline at end of file diff --git a/api/pymigbench/__init__.py b/pymigbench/__init__.py similarity index 100% rename from api/pymigbench/__init__.py rename to pymigbench/__init__.py diff --git a/api/pymigbench/code_change.py b/pymigbench/code_change.py similarity index 100% rename from api/pymigbench/code_change.py rename to pymigbench/code_change.py diff --git a/api/pymigbench/constants.py b/pymigbench/constants.py similarity index 100% rename from api/pymigbench/constants.py rename to pymigbench/constants.py diff --git a/api/pymigbench/database.py b/pymigbench/database.py similarity index 100% rename from api/pymigbench/database.py rename to pymigbench/database.py diff --git a/api/pymigbench/line_range.py b/pymigbench/line_range.py similarity index 100% rename from api/pymigbench/line_range.py rename to pymigbench/line_range.py diff --git a/api/pymigbench/line_replacement.py b/pymigbench/line_replacement.py similarity index 100% rename from api/pymigbench/line_replacement.py rename to pymigbench/line_replacement.py diff --git a/api/pymigbench/migration.py b/pymigbench/migration.py similarity index 100% rename from api/pymigbench/migration.py rename to pymigbench/migration.py diff --git a/api/pymigbench/migration_file.py b/pymigbench/migration_file.py similarity index 100% rename from api/pymigbench/migration_file.py rename to pymigbench/migration_file.py diff --git a/api/pymigbench/parsers.py b/pymigbench/parsers.py similarity index 100% rename from api/pymigbench/parsers.py rename to pymigbench/parsers.py diff --git a/api/pymigbench/types.py b/pymigbench/types.py similarity index 100% rename from api/pymigbench/types.py rename to pymigbench/types.py diff --git a/api/pymigbench_tests/__init__.py b/pymigbench_tests/__init__.py similarity index 100% rename from api/pymigbench_tests/__init__.py rename to pymigbench_tests/__init__.py diff --git a/api/pymigbench_tests/data_models_test.py b/pymigbench_tests/data_models_test.py similarity index 100% rename from api/pymigbench_tests/data_models_test.py rename to pymigbench_tests/data_models_test.py diff --git a/api/pymigbench_tests/line_range_test.py b/pymigbench_tests/line_range_test.py similarity index 100% rename from api/pymigbench_tests/line_range_test.py rename to pymigbench_tests/line_range_test.py diff --git a/api/pymigbench_tests/line_replacement_test.py b/pymigbench_tests/line_replacement_test.py similarity index 100% rename from api/pymigbench_tests/line_replacement_test.py rename to pymigbench_tests/line_replacement_test.py diff --git a/api/pymigbench_tests/parser_test.py b/pymigbench_tests/parser_test.py similarity index 100% rename from api/pymigbench_tests/parser_test.py rename to pymigbench_tests/parser_test.py diff --git a/api/pyproject.toml b/pyproject.toml similarity index 100% rename from api/pyproject.toml rename to pyproject.toml diff --git a/release.py b/release.py new file mode 100644 index 0000000..684ced9 --- /dev/null +++ b/release.py @@ -0,0 +1,84 @@ +""" +This file is not part of the library itself, rather is a script to build the library. +This should have minimal third-party dependencies so that it can be run in any environment. + +Third-party dependencies that should be globally installed: +- requests +""" +import json +import shutil +import subprocess +import sys + +import requests + + +class Release: + def __init__(self, non_interactive: bool): + self.non_interactive = non_interactive + self.version = open("./version").read().strip() + self.secrets = json.load(open("./secrets.json")) + self.pypi_release_url = f"https://pypi.org/project/pymigbench/{self.version}/" + self.github_repo = "ualberta-smr/pymigbench" + self.github_url = f"https://github.com/{self.github_repo}" + + def validate(self): + gh_tag = f"v{self.version}" + + try: + current_tag = run_command(["git", "describe", "--tags", "--exact-match"]) + if current_tag.strip() != gh_tag: + raise ValueError(f"Current tag {current_tag} does not match the expected tag {gh_tag}.") + except Exception as e: + raise ValueError(f"Current head is not tagged with the expected version: {gh_tag}") from e + + response = requests.head(self.pypi_release_url) + if response.status_code == 200: + raise ValueError(f"Version {self.version} already released on PyPI.") + + print("Validated") + print(f" Version: {self.version}") + return self + + def build(self): + print("Building...") + run_command(["python", "-m", "build"]) + print("Built successfully.") + return self + + def publish_pypi(self): + if self.non_interactive: + return self + if not self.non_interactive: + confirm = input("Do you want to publish to PyPI? Note that this is an irreversible process. (y/n): ") + if confirm.lower() != 'y': + print("Not publishing to PyPI.") + return self + print("Publishing to PyPI...") + run_command(["twine", "upload", "dist/*", "-u", "__token__", "-p", self.secrets['pypi']]) + print("Published to PyPI.") + return self + + +def run_command(commands: list[any]): + process = subprocess.Popen(commands, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True) + + lines = [] + while True: + output = process.stdout.readline() + if output == '' and process.poll() is not None: + break + if output: + lines.append(output.strip()) + print(output.strip()) + + return "\n".join(lines) + + +def main(): + non_interactive = "--non-interactive" in sys.argv + Release(non_interactive).build().validate().publish_pypi() + + +if __name__ == "__main__": + main() diff --git a/api/setup.py b/setup.py similarity index 82% rename from api/setup.py rename to setup.py index 1e6ddab..1b0badb 100644 --- a/api/setup.py +++ b/setup.py @@ -1,12 +1,11 @@ from setuptools import setup -version = open('./.publish/version').read().strip() setup( name='pymigbench', description='APIs to access the PyMigBench dataset', - long_description=open('./README.md').read(), + long_description=open('README.md').read(), long_description_content_type='text/markdown', - version=version, + version="2.2.4", packages=['pymigbench'], author='PyMigBench Team', author_email='mohayemin@ualberta.ca', diff --git a/version b/version index 6b4d157..0476155 100644 --- a/version +++ b/version @@ -1 +1 @@ -2.2.3 \ No newline at end of file +2.2.4 \ No newline at end of file