chore: ported from Technica's zerodrive lib

Technica-Engineering · Oct 30, 2021 · 2c2a4b0 · 2c2a4b0
commit 2c2a4b0
Show file tree

Hide file tree

Showing 22 changed files with 692 additions and 0 deletions.
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,9 @@
+dist
+*.egg-*
+__pycache__
+*.pyc
+build
+htmlcov
+.nox
+.pytest*
+.coverage
diff --git a/.mkcommit.py b/.mkcommit.py
@@ -0,0 +1,15 @@
+from mkcommit import CommitMessage, to_stdout
+from mkcommit.suites import semantic
+
+
+def commit():
+    return CommitMessage(semantic.default_short(), semantic.default_long())
+
+
+def on_commit(msg: CommitMessage):
+    semantic.is_semantic(msg.first_line)
+    semantic.has_short_commit_msg_proper_length(msg.first_line)
+
+
+if __name__ == "__main__":
+    to_stdout(commit())
diff --git a/.vscode/settings.json b/.vscode/settings.json
@@ -0,0 +1,5 @@
+{
+    "python.linting.pylintEnabled": false,
+    "python.linting.flake8Enabled": true,
+    "python.linting.enabled": true
+}
diff --git a/MANIFEST.in b/MANIFEST.in
@@ -0,0 +1 @@
+include README.md
diff --git a/README.md b/README.md
@@ -0,0 +1,61 @@
+# Resourcerer - an automation tool for OneDrive
+
+## Features
+
+Resourcerer is a Python project that gives you an easy way of getting resources from OneDrive and other sources.
+
+## Use cases
+
+What we use Resourcerer for is sharing unit test and integration test resources accros projects that need it. Resources like these don't belong in artifact repositories or in GitLab/GitHub, so it makes most sense to store them on a network drive or in the cloud. Our first choice was OneDrive, since our team was already using it extensively to share all kinds of information.
+
+### How to use this for resource sharing
+
+1. Write a `resources.yaml` file and place it at the root of your project. For example the way we use it to get PCAPs:
+
+    ```yaml
+    test_resources:
+    - "analog.pcap"
+
+    source_folder: 'Software/resources'
+    target_folder: './tests/res'
+    ```
+
+    - `source_folder` is the path within the source driver, e.g. OneDrive with your test resource files you want to point to.
+
+    - `target_folder` is the path to a target directory where resources should be downloaded. It's relative with respect to the root of the project folder (for predictable behavior with CI tools).
+
+    - `test_resources` is a list of filenames (with file extensions) that should be fetched for this particular repository. It's important to explicitly specify which files are required, because the application always checks whether a resource exists before downloading it. We don't want to re-download hundreds of megabytes of files if we already have them stored.
+
+2. For OneDrive: in your environment (or in your CI pipeline), specify the following environment variables:
+
+    - `MSGRAPH_API_KEY` -> Secret for the Azure the application of choice (see `portal.azure.com`), you can also set it as a credential `MSGraphApiKey`, e.g. using Python's `keyring` package.
+    - `MSGRAPH_CLIENT_ID` -> Client ID for the application of choice
+    - `MSGRAPH_TENANT_ID` -> Azure Tenant ID
+    - `MSGRAPH_SITE_ID` -> Site ID for your OneDrive or SharePoint
+
+3. Run the `get_resources` script in your CI pipeline or anytime you want to download test resources. This Python package installs an executable script that should be available from within an environment in which it was installed.
+
+    So for example in `.gitlab-ci.yml`:
+
+    ```yaml
+    variables:
+      # connection to OneDrive:
+      MSGRAPH_CLIENT_ID: "<client-id>"
+      MSGRAPH_TENANT_ID: "<tenant-id>"
+      MSGRAPH_SITE_ID: "<site-id>"
+
+    before_script:
+      - pip install resourcerer
+      - get_resources
+    ```
+
+    The `get_resources` script will notify you whenever a file was encountered that was already there and it will also give you info on what files were specifically downloaded for easy debugging.
+
+### Importable functions
+
+You may of course decide to use this within your own Python app somewhere. In such case, we export two main functions that can be used:
+
+- `download_file` -> takes in a target path (with filename) and a URL, downloads from a publicly accessible location.
+- `download_from_onedrive` -> takes in a source path to a given file from OneDrive and a target path (without filename) to where the file should be downloaded. This function does not rename the downloaded resource.
+
+There is also a protected function `_download_from_onedrive` which can be used with custom OAuth2 token and custom `site_id` if you need this.
diff --git a/noxfile.py b/noxfile.py
@@ -0,0 +1,63 @@
+import nox
+import os
+import shutil
+
+
+@nox.session()
+def test(session: nox.Session):
+    """Run tests under coverage"""
+    session.install(
+        '.',
+        'coverage'
+    )
+    session.run(
+        'coverage', 'run', '-m', 'unittest', 'discover'
+    )
+
+
+@nox.session()
+def cov(session: nox.Session):
+    """Compile coverage to an XML file"""
+    session.install('coverage')
+    session.run('coverage', 'xml', '-i')
+
+
+@nox.session
+def lint(session: nox.Session):
+    """Run the linter"""
+    session.install('flake8')
+    session.run('flake8')
+
+
+@nox.session()
+def whl(session: nox.Session):
+    """Build the wheel"""
+    session.install(
+        'wheel',
+        'setuptools',
+        '.'
+    )
+    session.run('python', '-m', 'setup', 'bdist_wheel')
+
+
+@nox.session()
+def send(session: nox.Session):
+    """Send wheel to PyPi"""
+    session.install('twine')
+    session.run('twine', 'upload', 'dist/*.whl')
+
+
+@nox.session()
+def clean(session: nox.Session):
+    """Remove files that aren't needed anymore"""
+    delete = [
+        ".coverage",
+        "build",
+        "dist"
+    ]
+    for p in delete:
+        if os.path.exists(p):
+            if os.path.isdir(p):
+                shutil.rmtree(p)
+            else:
+                os.remove(p)
diff --git a/resourcerer/__init__.py b/resourcerer/__init__.py
@@ -0,0 +1,2 @@
+from .get_from_onedrive import *  # noqa:F401,F403
+from .send_to_onedrive import *  # noqa:F401,F403
diff --git a/resourcerer/delete_from_onedrive.py b/resourcerer/delete_from_onedrive.py
@@ -0,0 +1,17 @@
+import requests
+
+
+def _delete_from_onedrive(token, site_id, item_path):
+    delete_request_headers = {
+        "Authorization": f"Bearer {token}",
+    }
+
+    meta_link = f"https://graph.microsoft.com/v1.0/sites/{site_id}/drive/root:/{item_path}"
+
+    resp = requests.delete(meta_link, headers=delete_request_headers)
+    resp.raise_for_status()
+    print(f"File {item_path} has been successfully removed from OneDrive")
+
+
+def delete_from_onedrive(token, site_id, item_path):
+    return _delete_from_onedrive(token, site_id, item_path)
diff --git a/resourcerer/get_from_onedrive.py b/resourcerer/get_from_onedrive.py
@@ -0,0 +1,130 @@
+import requests
+import os
+from .shared import (try_from_response, SITE_ID, CLIENT_ID, TENANT_ID, auth_token,
+                     response_content_to_dict, download_file)
+
+
+def _obtain_download_link(token, site_id, item_path):
+    """Gets a direct download link to an item in OneDrive.
+
+    Args:
+        `token` (:obj:`str`): OAuth2 token as returned by `auth_token` function
+        `site_id` (:obj:`str`): Site ID, unique identifier of the Sharepoint site
+            under which a OneDrive instance is hosted. Can be extracted by looking
+            at the source code of the webpage in the browser. Just make sure
+            you don't confuse your Personal SiteId (your OneDrive instance)
+            with the site that serves actual shared resources.
+        `item_path` (:obj:`str`): Path relative to the root of OneDrive.
+
+    Returns:
+        Name and Direct download link (:obj:`tuple` of :obj:`str`, :obj:`str`).
+    """
+
+    download_request_headers = {
+        "Authorization": f"Bearer {token}",
+        # "Host": "technica-engineering.de"  # this seems to be the wrong hostname... too bad!
+    }
+
+    # download_link = f"https://graph.microsoft.com/v1.0/sites/{site_id}/drive/items/{item_id}/content"  # noqa: E501
+    meta_link = f"https://graph.microsoft.com/v1.0/sites/{site_id}/drive/root:/{item_path}"
+    # meta_link = f"https://graph.microsoft.com/v1.0/sites/{site_id}/drive/root"
+    # download_link = f"https://graph.microsoft.com/v1.0/sites/{site_id}/drive/root:/{item_path}/content"  # noqa: E501
+
+    resp = requests.get(meta_link, headers=download_request_headers)
+    resp = response_content_to_dict(resp)
+
+    download_link = None
+    name = None
+
+    name = try_from_response(resp, "name", "Cannot obtain filename from MS Graph API response.")
+
+    download_link = try_from_response(resp, "@microsoft.graph.downloadUrl",
+                                      "Failed to obtain direct download link from MS Graph API")
+
+    return name, download_link
+
+
+def _download_from_onedrive(token, site_id, item_path, target_path=None):
+    """Downloads a file from OneDrive
+
+    Args:
+        `token` (:obj:`str`): OAuth2 token as returned by `auth_token` function
+        `site_id` (:obj:`str`): Site ID, unique identifier of the Sharepoint site
+            under which a OneDrive instance is hosted. Can be extracted by looking
+            at the source code of the webpage in the browser. Just make sure
+            you don't confuse your Personal SiteId (your OneDrive instance)
+            with the site that serves actual shared resources.
+        `item_path` (:obj:`str`): Path relative to the root of OneDrive.
+        `target_path` (:obj:`str`): Path to a folder where a specific
+            file you're downloading shall be saved. If `None` (by default),
+            the file will be saved to **current working directory**.
+
+    Returns:
+        Output file path (:obj:`str`)
+    """
+
+    name, download_link = _obtain_download_link(token, site_id, item_path)
+
+    if target_path is None:
+        output_path = name
+    else:
+        output_path = os.path.join(target_path, name)
+
+    outpath = download_file(output_path, download_link)
+    print(f"File download succeded, can be found here: {outpath}")
+    return outpath
+
+
+def is_cached(target_file_name, target_file_directory):
+    """Checks whether a file exists in a given directory
+    (whether it's cached).
+
+    Args:
+        `target_file_name` (:obj:`str`): filename to search for
+        `target_file_directory` (:obj:`str`): path to the directory
+            where to search for a given file
+
+    Returns:
+        :obj:`bool`, `True` if the file has been found,
+        `False` otherwise
+    """
+    if not os.path.exists(target_file_directory):
+        return False
+    for f in os.listdir(target_file_directory):
+        if f == target_file_name:
+            return True
+    else:
+        return False
+
+
+def download_from_onedrive(item_path, target_path=None, check_cache_first=True):
+    """Downloads an item from OneDrive to a specified path
+    or current working directory with pre-set connection
+    to OneDrive (where Client ID, Tenant ID and Site ID)
+    are set in the Environment variables.
+
+    Args:
+        `item_path` (:obj:`str`): Path relative to the root of OneDrive.
+        `target_path` (:obj:`str`): Path to a folder where a specific
+            file you're downloading shall be saved. If `None` (by default),
+            the file will be saved to **current working directory**.
+        `check_cache_first` (:obj:`str`): If `True` (default), before
+            downloading anything from OneDrive the target path will
+            be checked whether the file of the same name already exists.
+            If that's the case this function will short circuit, printing
+            out the information that the file is already there and doesn't
+            need to be downloaded again.
+
+    Returns:
+        Output file path (:obj:`str`).
+    """
+
+    if check_cache_first:
+        name = os.path.split(item_path)[-1]
+        if is_cached(name, target_path):
+            full_target_path = os.path.join(target_path, name)
+            print(f"{full_target_path} says: I'm here! No need to download me again!")
+            return full_target_path
+
+    return _download_from_onedrive(
+        auth_token(CLIENT_ID, TENANT_ID), SITE_ID, item_path, target_path)
diff --git a/resourcerer/get_resources.py b/resourcerer/get_resources.py
@@ -0,0 +1,38 @@
+from .get_from_onedrive import download_from_onedrive
+from .parse_yaml import get_yaml_obj
+import os
+import argparse
+
+
+def close_resources(resources):
+    def run(callback, section, source, target):
+        for filename in resources[section]:
+            callback(
+                os.path.join(resources[source], filename).replace("\\", "/"),
+                resources[target]
+            )
+    return run
+
+
+def main(default_file="resources.yaml"):
+    parser = argparse.ArgumentParser(description='Test automation wrapper')
+    parser.add_argument('-f', '--file', type=str, help='Path to .kalash.yaml')
+    args = parser.parse_args()
+
+    if args.file:
+        file = args.file
+    else:
+        file = default_file
+
+    resources = get_yaml_obj(file)
+
+    close_resources(resources)(
+        download_from_onedrive,
+        'test_resources',
+        'source_folder',
+        'target_folder'
+    )
+
+
+if __name__ == "__main__":
+    main()
diff --git a/resourcerer/parse_yaml.py b/resourcerer/parse_yaml.py
@@ -0,0 +1,11 @@
+import yaml
+
+
+def get_yaml_obj(path=None):
+    if path:
+        resources_yaml_path = path
+    else:
+        resources_yaml_path = "resources.yaml"
+    with open(resources_yaml_path, "r") as f:
+        resources = yaml.full_load(f)
+    return resources
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		from .get_from_onedrive import * # noqa:F401,F403
		from .send_to_onedrive import * # noqa:F401,F403