From 2ffb1ad95e68bcc891264a472e4693ccddf598e7 Mon Sep 17 00:00:00 2001 From: Robert Furth <44911447+Robert-Furth@users.noreply.github.com> Date: Wed, 11 Jan 2023 14:59:49 -0800 Subject: [PATCH] Merge BugSwarm/client into monorepo (#362) * Add bugswarm/client; edit `setup.py`s * Add bugswarm-client to build-artifacts action * Refactor build-artifacts.yml * Publish both common and client to PyPI * Manually update client version pending discussion * Update bugswarm-client alongside bugswarm-common * Use self-hosted to avoid usage limits * Bugfixes for build-artifacts workflow * If not in CI and env var unset, don't pin common * Style: make 'click' install requirement lowercase --- .github/workflows/build-artifacts.yml | 102 ++++++++++++++---- .github/workflows/main.yml | 52 +++++++++- .github/workflows/publish.yml | 11 +- bugswarm/client/README.md | 69 ++++++++++++ bugswarm/client/__init__.py | 0 bugswarm/client/bugswarm.py | 62 +++++++++++ bugswarm/client/command.py | 17 +++ bugswarm/client/docker.py | 144 ++++++++++++++++++++++++++ setup.client.py | 61 +++++++++++ setup.common.py | 2 +- setup.py | 2 + 11 files changed, 492 insertions(+), 30 deletions(-) create mode 100644 bugswarm/client/README.md create mode 100644 bugswarm/client/__init__.py create mode 100644 bugswarm/client/bugswarm.py create mode 100644 bugswarm/client/command.py create mode 100644 bugswarm/client/docker.py create mode 100644 setup.client.py diff --git a/.github/workflows/build-artifacts.yml b/.github/workflows/build-artifacts.yml index bec530d8..6f9bd2a4 100644 --- a/.github/workflows/build-artifacts.yml +++ b/.github/workflows/build-artifacts.yml @@ -1,45 +1,101 @@ -name: Build artifacts +name: Build Python artifacts -on: [workflow_call, workflow_dispatch] +on: + workflow_call: + inputs: + artifact-name: + required: true + type: string + source-path: + required: true + type: string + setup-file: + required: true + type: string + watched-paths: + required: true + type: string -jobs: - bugswarm-common: - name: Package bugswarm.common - runs-on: ubuntu-latest + workflow_dispatch: + inputs: + artifact-name: + required: true + type: string + source-path: + required: true + type: string + setup-file: + required: true + type: string +jobs: + check-changed-files: + name: Check for changed files + runs-on: ${{ (github.repository == 'BugSwarm/bugswarm-dev' && 'self-hosted') || 'ubuntu-latest' }} + container: bugswarm/images:ubuntu-20.04 + outputs: + any-changed: ${{ github.event_name == 'workflow_dispatch' || steps.changed-files.outputs.any_changed == 'true'}} steps: - uses: actions/checkout@v3 with: fetch-depth: 0 - - uses: actions/setup-python@v4 - with: - python-version: '3.8' - name: Get changed files list uses: tj-actions/changed-files@v34 id: changed-files with: - files: | - bugswarm/common/** - setup.common.py + files: ${{ inputs.watched-paths }} - - name: Prepare repo for packaging - if: steps.changed-files.outputs.any_changed == 'true' + build-artifacts: + name: Package ${{ inputs.artifact-name }} + runs-on: ${{ (github.repository == 'BugSwarm/bugswarm-dev' && 'self-hosted') || 'ubuntu-latest' }} + container: bugswarm/images:ubuntu-20.04 + needs: check-changed-files + if: needs.check-changed-files.outputs.any-changed == 'true' + + steps: + - uses: actions/checkout@v3 + + # - uses: actions/setup-python@v4 + # with: + # python-version: '3.8' + + # Make sure that the python build package works (only needed if we're in the container) + - run: sudo apt update; sudo apt install -y python3.8-venv + + # If we're building bugswarm-common, modify credentials.py + - name: Modify credentials (bugswarm-common only) + if: inputs.artifact-name == 'bugswarm-common-build' + working-directory: bugswarm/common run: | - pushd bugswarm/common mv credentials.sample.py credentials.py sed -i "s/COMMON_HOSTNAME = ''/COMMON_HOSTNAME = 'www.api.bugswarm.org'/g" credentials.py sed -i "s/\(.*\) = \(''\|\[\]\)/\1 = '#'/g" credentials.py - popd - mv bugswarm/common/README.md README.md - mv setup.common.py setup.py + + # If we're building bugswarm-client, check for a new version of bugswarm-common. + - name: Check for bugswarm-common artifact (bugswarm-client only) + id: bugswarm-common-artifact + if: inputs.artifact-name == 'bugswarm-client-build' + uses: actions/download-artifact@v3 + with: + name: bugswarm-common-build + path: tmp + continue-on-error: true + + - name: Prepare repo for packaging + env: + SOURCE_PATH: ${{ inputs.source-path }} + SETUP_FILE: ${{ inputs.setup-file }} + run: | + mv "${SOURCE_PATH}/README.md" README.md + mv "$SETUP_FILE" setup.py - name: Build wheel and source - if: steps.changed-files.outputs.any_changed == 'true' + env: + BSC_UPDATED: ${{ steps.bugswarm-common-artifact.outcome == 'success' || '' }} run: | - python3 -m pip install build - python3 -m build --sdist --wheel --outdir dist + python3.8 -m pip install build requests + python3.8 -m build --sdist --wheel --outdir dist - name: Upload dists - if: steps.changed-files.outputs.any_changed == 'true' uses: actions/upload-artifact@v3 with: - name: bugswarm-common-build + name: ${{ inputs.artifact-name }} path: ./dist diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 2c9ac96a..f0308de3 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -63,11 +63,33 @@ jobs: run: | python3.8 -m unittest discover - bugswarm_dev_build: - name: Build artifacts + bugswarm_dev_build_common: + name: Build bugswarm/common if: github.event_name == 'pull_request' needs: bugswarm_dev uses: ./.github/workflows/build-artifacts.yml + with: + artifact-name: bugswarm-common-build + source-path: bugswarm/common + setup-file: setup.common.py + watched-paths: | + bugswarm/common/** + setup.common.py + + bugswarm_dev_build_client: + name: Build bugswarm/client + if: github.event_name == 'pull_request' + needs: bugswarm_dev_build_common + uses: ./.github/workflows/build-artifacts.yml + with: + artifact-name: bugswarm-client-build + source-path: bugswarm/client + setup-file: setup.client.py + watched-paths: | + bugswarm/common/** + bugswarm/client/** + setup.common.py + setup.client.py bugswarm: name: Test in public repo @@ -123,8 +145,30 @@ jobs: run: | python3 -m unittest discover - bugswarm_build: - name: Build artifacts + bugswarm_build_common: + name: Build bugswarm/common if: github.event_name == 'push' && github.ref_name == 'master' needs: bugswarm uses: ./.github/workflows/build-artifacts.yml + with: + artifact-name: bugswarm-common-build + source-path: bugswarm/common + setup-file: setup.common.py + watched-paths: | + bugswarm/common/** + setup.common.py + + bugswarm_build_client: + name: Build bugswarm/client + if: github.event_name == 'push' && github.ref_name == 'master' + needs: bugswarm_build_common + uses: ./.github/workflows/build-artifacts.yml + with: + artifact-name: bugswarm-client-build + source-path: bugswarm/client + setup-file: setup.client.py + watched-paths: | + bugswarm/common/** + bugswarm/client/** + setup.common.py + setup.client.py diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index ed5fa3b0..15b09a22 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -8,8 +8,12 @@ on: jobs: bugswarm-common: - name: Publish bugswarm.common to PyPI + name: Publish ${{ matrix.artifact-name }} to PyPI runs-on: ubuntu-latest + matrix: + artifact-name: + - bugswarm-common-build + - bugswarm-client-build steps: # Because actions/download-artifact can only download artifacts from the same workflow run, @@ -20,12 +24,15 @@ jobs: # We don't want to fail the entire job if there aren't any new artifacts. # Make sure that all later steps check that steps.get-download-url.outcome == 'success'. continue-on-error: true + env: + ARTIFACT_NAME: ${{ matrix.artifact-name }} with: result-encoding: string script: | + const { ARTIFACT_NAME } = process.env; const mostRecentArtifact = (await github.rest.actions.listArtifactsForRepo({ ...context.repo, - name: "bugswarm-common-build", + name: ARTIFACT_NAME, })).data.artifacts[0]; const creationTime = new Date(mostRecentArtifact.created_at); diff --git a/bugswarm/client/README.md b/bugswarm/client/README.md new file mode 100644 index 00000000..d65ad914 --- /dev/null +++ b/bugswarm/client/README.md @@ -0,0 +1,69 @@ +# The BugSwarm Client + +![The BugSwarm Mascot](https://cloud.githubusercontent.com/assets/8139148/24324903/1101b9a2-114c-11e7-9340-316022ef57d5.png) + +The official command line client for the [BugSwarm](https://bugswarm.org) artifact dataset + +## Installation +> Requires Python 3. +``` +$ pip3 install bugswarm-client +``` + +## Usage +Download a Docker image and enter the Docker container associated with an artifact. +```shell +$ bugswarm run --image-tag +``` +> Depending on how Docker is configured on your machine, you may need to enter an administrator password. + +Download a Docker image and enter the Docker container with a shared folder between the container and the host machine. + +```shell +$ bugswarm run --image-tag --use-sandbox +``` + +Show metadata for an artifact. + +```shell +$ bugswarm show --image-tag --token +``` + +Show usage text for the entire tool or for a specific sub-command. + +```shell +$ bugswarm --help +$ bugswarm --help +``` + +Show the version. + +```shell +$ bugswarm --version +``` + +Please note that artifacts are first attempted to be pulled from `bugswarm/cached-images`, and if not found then they are attempted to be pulled from `bugswarm/images`. + +## Example + +```shell +$ bugswarm run --image-tag nutzam-nutz-140438299 +$ bugswarm show --image-tag nutzam-nutz-140438299 --token +``` + +> [Requires a token](http://www.bugswarm.org/contact/) + +## Development +Execute the following commands to install the tool in ["editable" mode](https://pip.pypa.io/en/stable/cli/pip_install/#editable-installs). +1. Clone this repository. + ``` + $ git clone https://github.com/BugSwarm/client.git + ``` +1. `cd` into the root directory of this repository. + ``` + $ cd client + ``` +1. Install the tool. + ``` + $ pip3 install --upgrade --force-reinstall -e . + ``` diff --git a/bugswarm/client/__init__.py b/bugswarm/client/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/bugswarm/client/bugswarm.py b/bugswarm/client/bugswarm.py new file mode 100644 index 00000000..ecc19dca --- /dev/null +++ b/bugswarm/client/bugswarm.py @@ -0,0 +1,62 @@ +import json +import logging +import os + +import click + +from bugswarm.common import log +from bugswarm.common.rest_api.database_api import DatabaseAPI + +from . import docker +from .command import MyCommand + + +@click.group() +@click.version_option(message='The BugSwarm Client, version %(version)s') +def cli(): + """A command line interface for the BugSwarm dataset.""" + # Configure logging. + log.config_logging(getattr(logging, 'INFO', None)) + + +@cli.command(cls=MyCommand) +@click.option('--image-tag', required=True, + type=str, + help='The artifact image tag.') +@click.option('--use-sandbox/--no-use-sandbox', default=False, + help='Whether to set up a directory that is shared by the host and container.') +@click.option('--pipe-stdin/--no-pipe-stdin', default=False, + help='If enabled, the contents of stdin are executed inside the container. ' + 'This option supports heredocs in shells that support them. ' + 'Disabled by default.') +@click.option('--rm/--no-rm', default=True, + help='If enabled, artifact containers will be cleaned up automatically after use. ' + 'Disable this behavior if you want to inspect the container filesystem after use. ' + 'Enabled by default.') +def run(image_tag, use_sandbox, pipe_stdin, rm): + """Start an artifact container.""" + # If the script does not already have sudo privileges, then explain to the user why the password prompt will appear. + if os.getuid() != 0: + log.info('Docker requires sudo privileges.') + docker.docker_run(image_tag, use_sandbox, pipe_stdin, rm) + + +@cli.command(cls=MyCommand) +@click.option('--image-tag', required=True, + type=str, + help='The artifact image tag.') +@click.option('--token', required=True, + type=str, + help='An authentication token for the BugSwarm database. ' + 'Please visit www.bugswarm.org/get-full-access for more information.') +def show(image_tag, token): + """Display artifact metadata.""" + token = token or '' + bugswarmapi = DatabaseAPI(token=token) + response = bugswarmapi.find_artifact(image_tag, error_if_not_found=False) + if not response.ok: + log.info('No artifact metadata found for image tag {}.'.format(image_tag)) + else: + artifact = response.json() + # Print without the INFO prefix so the output is easier to parse. + print(json.dumps(artifact, sort_keys=True, indent=4)) diff --git a/bugswarm/client/command.py b/bugswarm/client/command.py new file mode 100644 index 00000000..45f8c27e --- /dev/null +++ b/bugswarm/client/command.py @@ -0,0 +1,17 @@ +from bugswarm.common import outdated +from click import Command + + +class MyCommand(Command): + """ + A subclass of Click's Command class that checks if the client is outdated after invoking the command. + """ + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + + def invoke(self, ctx): + try: + super().invoke(ctx) + finally: + # Ask users to consider updating if a newer version of the client is available. + outdated.check_package_outdated('bugswarm-client') diff --git a/bugswarm/client/docker.py b/bugswarm/client/docker.py new file mode 100644 index 00000000..688364ec --- /dev/null +++ b/bugswarm/client/docker.py @@ -0,0 +1,144 @@ +import os +import subprocess +import sys + +from bugswarm.common import log +from bugswarm.common.shell_wrapper import ShellWrapper +import bugswarm.common.credentials as credentials + +SCRIPT_DEFAULT = '/bin/bash' +HOST_SANDBOX_DEFAULT = '~/bugswarm-sandbox' +CONTAINER_SANDBOX_DEFAULT = '/bugswarm-sandbox' + +if hasattr(credentials, 'DOCKER_HUB_REPO') and credentials.DOCKER_HUB_REPO != '#': + DOCKER_HUB_REPO = credentials.DOCKER_HUB_REPO +else: + DOCKER_HUB_REPO = 'bugswarm/images' +if hasattr(credentials, 'DOCKER_HUB_CACHED_REPO') and credentials.DOCKER_HUB_CACHED_REPO != '#': + DOCKER_HUB_CACHED_REPO = credentials.DOCKER_HUB_CACHED_REPO +else: + DOCKER_HUB_CACHED_REPO = 'bugswarm/cached-images' + + +# By default, this function downloads the image, enters the container, and executes '/bin/bash' in the container. +# The executed script can be changed by passing the script argument. +def docker_run(image_tag, use_sandbox, use_pipe_stdin, use_rm): + assert isinstance(image_tag, str) and not image_tag.isspace() + assert isinstance(use_sandbox, bool) + assert isinstance(use_pipe_stdin, bool) + assert isinstance(use_rm, bool) + + # First, try to pull the image. + ok, image_location = docker_pull(image_tag) + if not ok: + return False + + # Communicate progress to the user. + host_sandbox = _default_host_sandbox() + container_sandbox = CONTAINER_SANDBOX_DEFAULT + if use_sandbox: + if not os.path.exists(host_sandbox): + log.info('Creating', host_sandbox, 'as the host sandbox.') + os.makedirs(host_sandbox, exist_ok=True) + log.info('Binding host sandbox', host_sandbox, 'to container directory', container_sandbox) + + # Communicate progress to the user. + if use_pipe_stdin: + log.info('Entering the container and executing the contents of stdin inside the container.') + else: + log.info('Entering the container.') + + if use_rm: + log.info('The container will be cleaned up after use.') + + # Prepare the arguments for the docker run command. + volume_args = ['-v', '{}:{}'.format(host_sandbox, container_sandbox)] if use_sandbox else [] + # The -t option must not be used in order to use a heredoc. + input_args = ['-i'] if use_pipe_stdin else ['-i', '-t'] + subprocess_input = sys.stdin.read() if use_pipe_stdin else None + subprocess_universal_newlines = use_pipe_stdin + rm_args = ['--rm'] if use_rm else [] + # If we're using a shared directory, we need to modify the start script to change the permissions of the shared + # directory on the container side. However, this will also change the permissions on the host side. + script_args = [SCRIPT_DEFAULT] + if use_sandbox: + start_command = '"sudo chmod -R 777 {} && cd {} && umask 000 && cd .. && {}"'.format( + container_sandbox, container_sandbox, SCRIPT_DEFAULT) + # These arguments represent a command of the following form: + # /bin/bash -c "sudo chmod 777 && cd && umask 000 && /bin/bash" + # So bash will execute chmod and umask and then start a new bash shell. From the user's perspective, the chmod + # and umask commands happen transparently. That is, the user only sees the final new bash shell. + script_args = [SCRIPT_DEFAULT, '-c', start_command] + + # Try to run the image. + # The tail arguments must be at the end of the command. + tail_args = [image_location] + script_args + args = ['sudo', 'docker', 'run', '--privileged'] + rm_args + volume_args + input_args + tail_args + command = ' '.join(args) + print(command) + _, _, returncode = ShellWrapper.run_commands(command, + input=subprocess_input, + universal_newlines=subprocess_universal_newlines, + shell=True) + return returncode == 0 + + +def docker_pull(image_tag): + assert image_tag + assert isinstance(image_tag, str) + + # Exit early if the image already exists locally. + exists, image_location = _image_exists_locally(image_tag) + if exists: + return True, image_location + + image_location = _image_location(image_tag) + command = 'sudo docker pull {}'.format(image_location) + _, _, returncode = ShellWrapper.run_commands(command, shell=True) + if returncode != 0: + # Image is not cached. Attempt to pull from bugswarm/images. + image_location = '{}:{}'.format(DOCKER_HUB_REPO, image_tag) + command = 'sudo docker pull {}'.format(image_location) + _, _, returncode = ShellWrapper.run_commands(command, shell=True) + if returncode != 0: + # Image is not in bugswarm/images + log.error('Could not download the image', image_location) + else: + log.info('Downloaded the image', image_location + '.') + else: + log.info('Downloaded the image', image_location + '.') + return returncode == 0, image_location + + +# Returns True and image_location if the image already exists locally. +def _docker_image_inspect(image_tag): + image_location = _image_location(image_tag) + command = 'sudo docker image inspect {}'.format(image_location) + _, _, returncode = ShellWrapper.run_commands(command, + stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, shell=True) + # For a non-existent image, docker image inspect has a non-zero exit status. + if returncode != 0: + image_location = '{}:{}'.format(DOCKER_HUB_REPO, image_tag) + command = 'sudo docker image inspect {}'.format(image_location) + _, _, returncode = ShellWrapper.run_commands(command, + stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, shell=True) + if returncode == 0: + log.info('The image', image_location, 'already exists locally and is up to date.') + else: + log.info('The image', image_location, 'already exists locally and is up to date.') + return returncode == 0, image_location + + +# Returns True and image_location if the image already exists locally. +def _image_exists_locally(image_tag): + return _docker_image_inspect(image_tag) + + +def _image_location(image_tag): + assert image_tag + assert isinstance(image_tag, str) + return DOCKER_HUB_CACHED_REPO + ':' + image_tag + + +def _default_host_sandbox(): + return os.path.expanduser(HOST_SANDBOX_DEFAULT) diff --git a/setup.client.py b/setup.client.py new file mode 100644 index 00000000..552d099b --- /dev/null +++ b/setup.client.py @@ -0,0 +1,61 @@ +import os +from datetime import datetime + +from setuptools import find_packages, setup + +today = datetime.utcnow() +version = f'{today.year}.{today.month:02}.{today.day:02}' + + +def bs_common_version(): + in_ci = bool(os.environ.get('CI')) + + if in_ci: + if os.environ.get('BSC_UPDATED') == 'true': + # There was a new version of bugswarm-common in this same CI run; use today's date as the version + return f'=={version}' + + # Otherwise, get latest version from PyPI API + import requests + response = requests.get('https://pypi.org/pypi/bugswarm-common/json') + response.raise_for_status() + return '=={}'.format(response.json()['version']) + + # Use the value of the BUGSWARM_COMMON environment variable, if present + if 'BUGSWARM_COMMON' in os.environ: + return '=={}'.format(os.environ['BUGSWARM_COMMON']) + + # Don't pin bugswarm-common + return '' + + +setup( + name='bugswarm-client', + version=version, + url='https://github.com/BugSwarm/bugswarm', + author='BugSwarm', + author_email='dev.bugswarm@gmail.com', + + description='The official command line client for the BugSwarm artifact dataset', + long_description='The official command line client for the BugSwarm artifact dataset', + classifiers=[ + 'Programming Language :: Python :: 3', + 'License :: OSI Approved :: BSD License', + ], + zip_safe=False, + packages=find_packages(include=['bugswarm.client*']), + namespace_packages=[ + 'bugswarm', + ], + install_requires=[ + 'click==6.7', + 'requests>=2.20.0', + 'bugswarm-common' + bs_common_version(), + ], + + entry_points={ + 'console_scripts': [ + 'bugswarm = bugswarm.client.bugswarm:cli', + ], + }, +) diff --git a/setup.common.py b/setup.common.py index 4fddc7a1..ee3805cb 100644 --- a/setup.common.py +++ b/setup.common.py @@ -23,7 +23,7 @@ 'License :: OSI Approved :: BSD License', ], zip_safe=False, - packages=find_packages(exclude=['pair*', 'tests*', 'bugswarm.analyzer*']), + packages=find_packages(include=['bugswarm.common*']), install_requires=[ 'requests>=2.20.0', 'CacheControl==0.12.3', diff --git a/setup.py b/setup.py index 6d77853b..ee8ff1cc 100644 --- a/setup.py +++ b/setup.py @@ -33,5 +33,7 @@ 'lxml==4.6.5', 'packaging==20.7', 'urllib3==1.26.5', + + 'click==6.7', # For bugswarm.client ], )