From 1b202bb132b0a031f7866c115f90087c25424834 Mon Sep 17 00:00:00 2001 From: Hassan Syyid Date: Sun, 19 May 2024 17:51:51 -0400 Subject: [PATCH] Init commit --- .gitignore | 136 +++++++++++++++++++++++++++++++++ .secrets/.gitignore | 10 +++ README.md | 131 +++++++++++++++++++++++++++++++ meltano.yml | 30 ++++++++ output/.gitignore | 4 + pyproject.toml | 48 ++++++++++++ tap_canvas_catalog/__init__.py | 1 + tap_canvas_catalog/client.py | 128 +++++++++++++++++++++++++++++++ tap_canvas_catalog/streams.py | 55 +++++++++++++ tap_canvas_catalog/tap.py | 45 +++++++++++ tests/__init__.py | 1 + tests/conftest.py | 3 + tests/test_core.py | 23 ++++++ tox.ini | 51 +++++++++++++ 14 files changed, 666 insertions(+) create mode 100644 .gitignore create mode 100644 .secrets/.gitignore create mode 100644 README.md create mode 100644 meltano.yml create mode 100644 output/.gitignore create mode 100644 pyproject.toml create mode 100644 tap_canvas_catalog/__init__.py create mode 100644 tap_canvas_catalog/client.py create mode 100644 tap_canvas_catalog/streams.py create mode 100644 tap_canvas_catalog/tap.py create mode 100644 tests/__init__.py create mode 100644 tests/conftest.py create mode 100644 tests/test_core.py create mode 100644 tox.ini diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..ce4d0a8 --- /dev/null +++ b/.gitignore @@ -0,0 +1,136 @@ +# Secrets and internal config files +**/.secrets/* + +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +poetry.lock +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +pip-wheel-metadata/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +.python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +docker-compose.yml +.DS_Store diff --git a/.secrets/.gitignore b/.secrets/.gitignore new file mode 100644 index 0000000..33c6acd --- /dev/null +++ b/.secrets/.gitignore @@ -0,0 +1,10 @@ +# IMPORTANT! This folder is hidden from git - if you need to store config files or other secrets, +# make sure those are never staged for commit into your git repo. You can store them here or another +# secure location. +# +# Note: This may be redundant with the global .gitignore for, and is provided +# for redundancy. If the `.secrets` folder is not needed, you may delete it +# from the project. + +* +!.gitignore diff --git a/README.md b/README.md new file mode 100644 index 0000000..d5e9f14 --- /dev/null +++ b/README.md @@ -0,0 +1,131 @@ +# tap-canvas-catalog + +`tap-canvas-catalog` is a Singer tap for CanvasCatalog. + +Built with the [Meltano Tap SDK](https://sdk.meltano.com) for Singer Taps. + + + +## Configuration + +### Accepted Config Options + + + +A full list of supported settings and capabilities for this +tap is available by running: + +```bash +tap-canvas-catalog --about +``` + +### Configure using environment variables + +This Singer tap will automatically import any environment variables within the working directory's +`.env` if the `--config=ENV` is provided, such that config values will be considered if a matching +environment variable is set either in the terminal context or in the `.env` file. + +### Source Authentication and Authorization + + + +## Usage + +You can easily run `tap-canvas-catalog` by itself or in a pipeline using [Meltano](https://meltano.com/). + +### Executing the Tap Directly + +```bash +tap-canvas-catalog --version +tap-canvas-catalog --help +tap-canvas-catalog --config CONFIG --discover > ./catalog.json +``` + +## Developer Resources + +Follow these instructions to contribute to this project. + +### Initialize your Development Environment + +```bash +pipx install poetry +poetry install +``` + +### Create and Run Tests + +Create tests within the `tap_canvas_catalog/tests` subfolder and + then run: + +```bash +poetry run pytest +``` + +You can also test the `tap-canvas-catalog` CLI interface directly using `poetry run`: + +```bash +poetry run tap-canvas-catalog --help +``` + +### Testing with [Meltano](https://www.meltano.com) + +_**Note:** This tap will work in any Singer environment and does not require Meltano. +Examples here are for convenience and to streamline end-to-end orchestration scenarios._ + + + +Next, install Meltano (if you haven't already) and any needed plugins: + +```bash +# Install meltano +pipx install meltano +# Initialize meltano within this directory +cd tap-canvas-catalog +meltano install +``` + +Now you can test and orchestrate using Meltano: + +```bash +# Test invocation: +meltano invoke tap-canvas-catalog --version +# OR run a test `elt` pipeline: +meltano elt tap-canvas-catalog target-jsonl +``` + +### SDK Dev Guide + +See the [dev guide](https://sdk.meltano.com/en/latest/dev_guide.html) for more instructions on how to use the SDK to +develop your own taps and targets. diff --git a/meltano.yml b/meltano.yml new file mode 100644 index 0000000..8814a20 --- /dev/null +++ b/meltano.yml @@ -0,0 +1,30 @@ +version: 1 +send_anonymous_usage_stats: true +project_id: "tap-canvas-catalog" +default_environment: test +environments: +- name: test +plugins: + extractors: + - name: "tap-canvas-catalog" + namespace: "tap_canvas_catalog" + pip_url: -e . + capabilities: + - state + - catalog + - discover + - about + - stream-maps + config: + start_date: '2010-01-01T00:00:00Z' + settings: + # TODO: To configure using Meltano, declare settings and their types here: + - name: username + - name: password + kind: password + - name: start_date + value: '2010-01-01T00:00:00Z' + loaders: + - name: target-jsonl + variant: andyh1203 + pip_url: target-jsonl diff --git a/output/.gitignore b/output/.gitignore new file mode 100644 index 0000000..80ff9d2 --- /dev/null +++ b/output/.gitignore @@ -0,0 +1,4 @@ +# This directory is used as a target by target-jsonl, so ignore all files + +* +!.gitignore diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..8869496 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,48 @@ +[tool.poetry] +name = "tap-canvas-catalog" +version = "0.0.1" +description = "`tap-canvas-catalog` is a Singer tap for CanvasCatalog, built with the Meltano Singer SDK." +readme = "README.md" +authors = ["hotglue"] +keywords = [ + "ELT", + "CanvasCatalog", +] +license = "Apache 2.0" + +[tool.poetry.dependencies] +python = "<3.12,>=3.7.1" +singer-sdk = { version="^0.23.0" } +fs-s3fs = { version = "^1.1.1", optional = true } +requests = "^2.28.2" + +[tool.poetry.group.dev.dependencies] +pytest = "^7.2.1" +flake8 = "^5.0.4" +darglint = "^1.8.1" +black = "^23.1.0" +pyupgrade = "^3.3.1" +mypy = "^1.0.0" +isort = "^5.11.5" +singer-sdk = { version="^0.23.0", extras = ["testing"] } +types-requests = "^2.28.11.12" + +[tool.poetry.extras] +s3 = ["fs-s3fs"] + +[tool.isort] +profile = "black" +multi_line_output = 3 # Vertical Hanging Indent +src_paths = "tap_canvas_catalog" + +[tool.mypy] +python_version = "3.9" +warn_unused_configs = true + +[build-system] +requires = ["poetry-core>=1.0.8"] +build-backend = "poetry.core.masonry.api" + +[tool.poetry.scripts] +# CLI declaration +tap-canvas-catalog = 'tap_canvas_catalog.tap:TapCanvasCatalog.cli' diff --git a/tap_canvas_catalog/__init__.py b/tap_canvas_catalog/__init__.py new file mode 100644 index 0000000..61cb26e --- /dev/null +++ b/tap_canvas_catalog/__init__.py @@ -0,0 +1 @@ +"""Tap for CanvasCatalog.""" diff --git a/tap_canvas_catalog/client.py b/tap_canvas_catalog/client.py new file mode 100644 index 0000000..1536dfd --- /dev/null +++ b/tap_canvas_catalog/client.py @@ -0,0 +1,128 @@ +"""REST client handling, including CanvasCatalogStream base class.""" + +from __future__ import annotations + +from pathlib import Path +from typing import Any, Callable, Iterable + +import requests +import dateutil.parser +import backoff + +from singer_sdk.authenticators import APIKeyAuthenticator +from singer_sdk.helpers.jsonpath import extract_jsonpath +from singer_sdk.streams import RESTStream + +_Auth = Callable[[requests.PreparedRequest], requests.PreparedRequest] +SCHEMAS_DIR = Path(__file__).parent / Path("./schemas") + + +class CanvasCatalogStream(RESTStream): + """CanvasCatalog stream class.""" + + # OR use a dynamic url_base: + @property + def url_base(self) -> str: + """Return the API URL root, configurable via tap settings.""" + return self.config.get("api_url") or "https://catalog-dev.inseng.net/api/v1" + + records_jsonpath = "$[*]" # Or override `parse_response`. + + @property + def authenticator(self) -> APIKeyAuthenticator: + """Return a new authenticator object. + + Returns: + An authenticator instance. + """ + api_key = self.config.get("api_key", "") + return APIKeyAuthenticator.create_for_stream( + self, + key="Authorization", + value=f'Token token="{api_key}"', + location="header", + ) + + @property + def http_headers(self) -> dict: + """Return the http headers needed. + + Returns: + A dictionary of HTTP headers. + """ + headers = {} + if "user_agent" in self.config: + headers["User-Agent"] = self.config.get("user_agent") + # If not using an authenticator, you may also provide inline auth headers: + # headers["Private-Token"] = self.config.get("auth_token") + return headers + + def get_next_page_token( + self, + response: requests.Response, + previous_token: Any | None, + ) -> Any | None: + """Return a token for identifying next page or None if no more pages. + + Args: + response: The HTTP ``requests.Response`` object. + previous_token: The previous page token value. + + Returns: + The next pagination token. + """ + next_page_token = previous_token or 1 + + all_matches = list(extract_jsonpath( + self.records_jsonpath, response.json() + )) + + if len(all_matches) > 0: + return next_page_token + 1 + + return None + + def get_url_params( + self, + context: dict | None, + next_page_token: Any | None, + ) -> dict[str, Any]: + """Return a dictionary of values to be used in URL parameterization. + + Args: + context: The stream context. + next_page_token: The next page index or value. + + Returns: + A dictionary of URL query parameters. + """ + params: dict = {} + if next_page_token: + params["page"] = next_page_token + return params + + def backoff_wait_generator(self): + return backoff.expo(base=2, factor=5) + + def backoff_max_tries(self) -> int: + return 7 + + def post_process(self, row: dict, context: dict | None = None) -> dict | None: + """As needed, append or transform raw data to match expected structure. + + Args: + row: An individual record from the stream. + context: The stream context. + + Returns: + The updated record dictionary, or ``None`` to skip the record. + """ + # if self.replication_key: + # replication_date = self.get_starting_timestamp(context) + # if replication_date: + # updated_at = dateutil.parser.parse(row.get(self.replication_key)) + + # if updated_at > replication_date: + # return row + + return row diff --git a/tap_canvas_catalog/streams.py b/tap_canvas_catalog/streams.py new file mode 100644 index 0000000..80bf8fc --- /dev/null +++ b/tap_canvas_catalog/streams.py @@ -0,0 +1,55 @@ +"""Stream type classes for tap-canvas-catalog.""" + +from __future__ import annotations + +from pathlib import Path + +from singer_sdk import typing as th # JSON Schema typing helpers + +from tap_canvas_catalog.client import CanvasCatalogStream + +class UsersStream(CanvasCatalogStream): + name = "users" + path = "/users" + primary_keys = ["id"] + records_jsonpath = "$.users[*]" + replication_key = "updated_at" + + schema = th.PropertiesList( + th.Property( + "id", + th.IntegerType, + ), + th.Property( + "root_account_id", + th.IntegerType, + ), + th.Property( + "canvas_user_id", + th.IntegerType, + ), + th.Property( + "canvas_root_account_uuid", + th.StringType, + ), + th.Property( + "user_name", + th.StringType, + ), + th.Property( + "email_address", + th.StringType, + ), + th.Property( + "custom_fields", + th.CustomType({"type": ["object", "string"]}), + ), + th.Property( + "created_at", + th.DateTimeType, + ), + th.Property( + "updated_at", + th.DateTimeType, + ), + ).to_dict() diff --git a/tap_canvas_catalog/tap.py b/tap_canvas_catalog/tap.py new file mode 100644 index 0000000..6787d19 --- /dev/null +++ b/tap_canvas_catalog/tap.py @@ -0,0 +1,45 @@ +"""CanvasCatalog tap class.""" + +from __future__ import annotations + +from singer_sdk import Tap +from singer_sdk import typing as th # JSON schema typing helpers + +# TODO: Import your custom stream types here: +from tap_canvas_catalog import streams + + +class TapCanvasCatalog(Tap): + """CanvasCatalog tap class.""" + + name = "tap-canvas-catalog" + + # TODO: Update this section with the actual config values you expect: + config_jsonschema = th.PropertiesList( + th.Property( + "api_key", + th.StringType, + required=True, + secret=True, # Flag config as protected. + description="The token to authenticate against the API service", + ), + th.Property( + "start_date", + th.DateTimeType, + description="The earliest record date to sync", + ), + ).to_dict() + + def discover_streams(self) -> list[streams.CanvasCatalogStream]: + """Return a list of discovered streams. + + Returns: + A list of discovered streams. + """ + return [ + streams.UsersStream(self), + ] + + +if __name__ == "__main__": + TapCanvasCatalog.cli() diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..6f6633a --- /dev/null +++ b/tests/__init__.py @@ -0,0 +1 @@ +"""Test suite for tap-canvas-catalog.""" diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000..6bb3ec2 --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,3 @@ +"""Test Configuration.""" + +pytest_plugins = ("singer_sdk.testing.pytest_plugin",) diff --git a/tests/test_core.py b/tests/test_core.py new file mode 100644 index 0000000..d6f0012 --- /dev/null +++ b/tests/test_core.py @@ -0,0 +1,23 @@ +"""Tests standard tap features using the built-in SDK tests library.""" + +import datetime + +from singer_sdk.testing import get_tap_test_class + +from tap_canvas_catalog.tap import TapCanvasCatalog + + +SAMPLE_CONFIG = { + "start_date": datetime.datetime.now(datetime.timezone.utc).strftime("%Y-%m-%d") + # TODO: Initialize minimal tap config +} + + +# Run standard built-in tap tests from the SDK: +TestTapCanvasCatalog = get_tap_test_class( + tap_class=TapCanvasCatalog, + config=SAMPLE_CONFIG +) + + +# TODO: Create additional tests as appropriate for your tap. diff --git a/tox.ini b/tox.ini new file mode 100644 index 0000000..0fa1573 --- /dev/null +++ b/tox.ini @@ -0,0 +1,51 @@ +# This file can be used to customize tox tests as well as other test frameworks like flake8 and mypy + +[tox] +envlist = py39 +isolated_build = true + +[testenv] +allowlist_externals = poetry +commands = + poetry install -v + poetry run pytest + poetry run black --check tap_canvas_catalog/ + poetry run flake8 tap_canvas_catalog + poetry run pydocstyle tap_canvas_catalog + poetry run mypy tap_canvas_catalog --exclude='tap_canvas_catalog/tests' + +[testenv:pytest] +# Run the python tests. +# To execute, run `tox -e pytest` +envlist = py37, py38, py39, py310, py311 +commands = + poetry install -v + poetry run pytest + +[testenv:format] +# Attempt to auto-resolve lint errors before they are raised. +# To execute, run `tox -e format` +commands = + poetry install -v + poetry run black tap_canvas_catalog/ + poetry run isort tap_canvas_catalog + +[testenv:lint] +# Raise an error if lint and style standards are not met. +# To execute, run `tox -e lint` +commands = + poetry install -v + poetry run black --check --diff tap_canvas_catalog/ + poetry run isort --check tap_canvas_catalog + poetry run flake8 tap_canvas_catalog + # refer to mypy.ini for specific settings + poetry run mypy tap_canvas_catalog --exclude='tap_canvas_catalog/tests' + +[flake8] +docstring-convention = google +ignore = W503 +max-line-length = 88 +max-complexity = 10 + +[pydocstyle] +ignore = D105,D203,D213