From 1335ce8dce1c84e30bd29028ea3b4aa2ebef008f Mon Sep 17 00:00:00 2001 From: Eric Zhang Date: Wed, 3 Jan 2024 15:27:02 -0500 Subject: [PATCH] Delete 10_integrations/meltano directory Our Meltano integration hasn't been working for some time, and it depends on NFS --- 10_integrations/meltano/meltano_modal.py | 92 ------------------- .../meltano/meltano_project/.gitignore | 3 - .../meltano/meltano_project/analyze/.gitkeep | 0 .../meltano_project/downloads/.gitignore | 2 - .../meltano/meltano_project/extract/.gitkeep | 0 .../meltano/meltano_project/load/.gitkeep | 0 .../meltano/meltano_project/meltano.yml | 49 ---------- .../meltano/meltano_project/notebook/.gitkeep | 0 .../meltano_project/orchestrate/.gitkeep | 0 .../meltano/meltano_project/output/.gitignore | 2 - .../extractors/tap-csv--meltanolabs.lock | 40 -------- .../loaders/target-sqlite--meltanolabs.lock | 39 -------- .../meltano/meltano_project/requirements.txt | 0 .../meltano_project/transform/.gitkeep | 0 .../utils/download_sample_data/main.py | 20 ---- .../meltano/meltano_project/utils/poetry.lock | 84 ----------------- .../meltano_project/utils/pyproject.toml | 18 ---- 17 files changed, 349 deletions(-) delete mode 100644 10_integrations/meltano/meltano_modal.py delete mode 100644 10_integrations/meltano/meltano_project/.gitignore delete mode 100644 10_integrations/meltano/meltano_project/analyze/.gitkeep delete mode 100644 10_integrations/meltano/meltano_project/downloads/.gitignore delete mode 100644 10_integrations/meltano/meltano_project/extract/.gitkeep delete mode 100644 10_integrations/meltano/meltano_project/load/.gitkeep delete mode 100644 10_integrations/meltano/meltano_project/meltano.yml delete mode 100644 10_integrations/meltano/meltano_project/notebook/.gitkeep delete mode 100644 10_integrations/meltano/meltano_project/orchestrate/.gitkeep delete mode 100644 10_integrations/meltano/meltano_project/output/.gitignore delete mode 100644 10_integrations/meltano/meltano_project/plugins/extractors/tap-csv--meltanolabs.lock delete mode 100644 10_integrations/meltano/meltano_project/plugins/loaders/target-sqlite--meltanolabs.lock delete mode 100644 10_integrations/meltano/meltano_project/requirements.txt delete mode 100644 10_integrations/meltano/meltano_project/transform/.gitkeep delete mode 100644 10_integrations/meltano/meltano_project/utils/download_sample_data/main.py delete mode 100644 10_integrations/meltano/meltano_project/utils/poetry.lock delete mode 100644 10_integrations/meltano/meltano_project/utils/pyproject.toml diff --git a/10_integrations/meltano/meltano_modal.py b/10_integrations/meltano/meltano_modal.py deleted file mode 100644 index 710460b20..000000000 --- a/10_integrations/meltano/meltano_modal.py +++ /dev/null @@ -1,92 +0,0 @@ -# --- -# lambda-test: false -# cmd: ["modal", "run", "10_integrations/meltano/meltano_modal.py::extract_and_load"] -# --- -import os -import shutil -import subprocess -from pathlib import Path - -import modal - -LOCAL_PROJECT_ROOT = Path(__file__).parent / "meltano_project" -REMOTE_PROJECT_ROOT = "/meltano_project" -PERSISTED_VOLUME_PATH = "/persisted" -REMOTE_DB_PATH = Path(f"{PERSISTED_VOLUME_PATH}/meltano.db") -REMOTE_LOGS_PATH = Path(f"{REMOTE_PROJECT_ROOT}/.meltano/logs") -PERSISTED_LOGS_DIR = Path(f"{PERSISTED_VOLUME_PATH}/logs") - -meltano_source_mount = modal.Mount.from_local_dir( - LOCAL_PROJECT_ROOT, - remote_path=REMOTE_PROJECT_ROOT, - condition=lambda path: not any(p.startswith(".") for p in Path(path).parts), -) - -storage = modal.NetworkFileSystem.persisted("meltano_volume") - -meltano_conf = modal.Secret.from_dict( - { - "MELTANO_PROJECT_ROOT": REMOTE_PROJECT_ROOT, - "MELTANO_DATABASE_URI": f"sqlite:///{REMOTE_DB_PATH}", - "SQLITE_WAREHOUSE": f"{PERSISTED_VOLUME_PATH}/jaffle_shop_raw", - "MELTANO_ENVIRONMENT": "modal", - } -) - - -def install_project_deps(): - os.environ[ - "MELTANO_DATABASE_URI" - ] = "sqlite:////.empty_meltano.db" # dummy during installation - subprocess.check_call(["meltano", "install"]) - # delete empty logs dir, so running containers can add a symlink instead - shutil.rmtree(REMOTE_LOGS_PATH, ignore_errors=True) - - -meltano_img = ( - modal.Image.debian_slim() - .apt_install("git") - .pip_install("meltano") - .copy_mount(meltano_source_mount) - .run_function(install_project_deps, secret=meltano_conf) -) - - -stub = modal.Stub( - image=meltano_img, - secrets=[meltano_conf], -) - - -def symlink_logs(): - # symlink logs so that they end up in persisted network file system - # we can get rid of this if meltano gets a way to configure - # the logging directory - if not REMOTE_LOGS_PATH.exists(): - PERSISTED_LOGS_DIR.mkdir(exist_ok=True, parents=True) - REMOTE_LOGS_PATH.symlink_to(PERSISTED_LOGS_DIR) - - -# Run this example using `modal run meltano_modal.py::extract_and_load` -@stub.function( - network_file_systems={PERSISTED_VOLUME_PATH: storage}, - schedule=modal.Period(days=1), -) -def extract_and_load(): - symlink_logs() - subprocess.call( - ["meltano", "run", "download_sample_data", "tap-csv", "target-sqlite"] - ) - - -# Interactive sqlite3 exploration using `modal run meltano_modal.py::explore` -@stub.function( - interactive=True, - network_file_systems={PERSISTED_VOLUME_PATH: storage}, - timeout=86400, - image=modal.Image.debian_slim().apt_install("sqlite3"), - secrets=[meltano_conf], -) -def explore(): - # explore the output database interactively using the sqlite3 shell - os.execlp("sqlite3", "sqlite3", os.environ["SQLITE_WAREHOUSE"] + ".db") diff --git a/10_integrations/meltano/meltano_project/.gitignore b/10_integrations/meltano/meltano_project/.gitignore deleted file mode 100644 index 15e24c3f8..000000000 --- a/10_integrations/meltano/meltano_project/.gitignore +++ /dev/null @@ -1,3 +0,0 @@ -/venv -/.meltano -.env diff --git a/10_integrations/meltano/meltano_project/analyze/.gitkeep b/10_integrations/meltano/meltano_project/analyze/.gitkeep deleted file mode 100644 index e69de29bb..000000000 diff --git a/10_integrations/meltano/meltano_project/downloads/.gitignore b/10_integrations/meltano/meltano_project/downloads/.gitignore deleted file mode 100644 index c96a04f00..000000000 --- a/10_integrations/meltano/meltano_project/downloads/.gitignore +++ /dev/null @@ -1,2 +0,0 @@ -* -!.gitignore \ No newline at end of file diff --git a/10_integrations/meltano/meltano_project/extract/.gitkeep b/10_integrations/meltano/meltano_project/extract/.gitkeep deleted file mode 100644 index e69de29bb..000000000 diff --git a/10_integrations/meltano/meltano_project/load/.gitkeep b/10_integrations/meltano/meltano_project/load/.gitkeep deleted file mode 100644 index e69de29bb..000000000 diff --git a/10_integrations/meltano/meltano_project/meltano.yml b/10_integrations/meltano/meltano_project/meltano.yml deleted file mode 100644 index f4a49aa14..000000000 --- a/10_integrations/meltano/meltano_project/meltano.yml +++ /dev/null @@ -1,49 +0,0 @@ -version: 1 -default_environment: local -project_id: 3b1408e8-f8ba-4387-8167-95d8619585ac -environments: - - name: local - env: - SQLITE_WAREHOUSE: jaffle_shop_raw - - name: modal -plugins: - extractors: - - name: tap-csv - variant: meltanolabs - pip_url: git+https://github.com/MeltanoLabs/tap-csv.git - config: - files: - - entity: customers - path: downloads/jaffle_shop_customers.csv - keys: - - ID - - FIRST_NAME - - LAST_NAME - - entity: orders - path: downloads/jaffle_shop_orders.csv - keys: - - ID - - USER_ID - - ORDER_DATE - - STATUS - - entity: payments - path: downloads/stripe_payments.csv - keys: - - ID - - ORDERID - - PAYMENTMETHOD - - STATUS - - AMOUNT - - CREATED - loaders: - - name: target-sqlite - variant: meltanolabs - pip_url: git+https://github.com/MeltanoLabs/target-sqlite.git - config: - database: ${SQLITE_WAREHOUSE} - batch_size: 50 - utilities: - - name: download_sample_data - namespace: utils - pip_url: -e ${MELTANO_PROJECT_ROOT}/utils/ - executable: download_sample_data diff --git a/10_integrations/meltano/meltano_project/notebook/.gitkeep b/10_integrations/meltano/meltano_project/notebook/.gitkeep deleted file mode 100644 index e69de29bb..000000000 diff --git a/10_integrations/meltano/meltano_project/orchestrate/.gitkeep b/10_integrations/meltano/meltano_project/orchestrate/.gitkeep deleted file mode 100644 index e69de29bb..000000000 diff --git a/10_integrations/meltano/meltano_project/output/.gitignore b/10_integrations/meltano/meltano_project/output/.gitignore deleted file mode 100644 index d6b7ef32c..000000000 --- a/10_integrations/meltano/meltano_project/output/.gitignore +++ /dev/null @@ -1,2 +0,0 @@ -* -!.gitignore diff --git a/10_integrations/meltano/meltano_project/plugins/extractors/tap-csv--meltanolabs.lock b/10_integrations/meltano/meltano_project/plugins/extractors/tap-csv--meltanolabs.lock deleted file mode 100644 index f60b08cda..000000000 --- a/10_integrations/meltano/meltano_project/plugins/extractors/tap-csv--meltanolabs.lock +++ /dev/null @@ -1,40 +0,0 @@ -{ - "plugin_type": "extractors", - "name": "tap-csv", - "namespace": "tap_csv", - "variant": "meltanolabs", - "label": "Comma Separated Values (CSV)", - "docs": "https://hub.meltano.com/extractors/tap-csv--meltanolabs", - "repo": "https://github.com/MeltanoLabs/tap-csv", - "pip_url": "git+https://github.com/MeltanoLabs/tap-csv.git", - "description": "Generic data extractor of CSV (comma separated value) files", - "logo_url": "https://hub.meltano.com/assets/logos/extractors/csv.png", - "capabilities": [ - "discover", - "catalog", - "state" - ], - "settings_group_validation": [ - [ - "files" - ], - [ - "csv_files_definition" - ] - ], - "settings": [ - { - "name": "files", - "kind": "array", - "label": "Files", - "description": "Array of objects with `entity`, `path`, `keys`, and `encoding` [Optioanl] keys:\n\n* `entity`: The entity name, used as the table name for the data loaded from that CSV.\n* `path`: Local path (relative to the project's root) to the file to be ingested. Note that this may be a directory, in which case all files in that directory and any of its subdirectories will be recursively processed\n* `keys`: The names of the columns that constitute the unique keys for that entity.\n* `encoding`: [Optional] The file encoding to use when reading the file (i.e. \"latin1\", \"UTF-8\"). Use this setting when you get a UnicodeDecodeError error.\n Each input CSV file must be a traditionally-delimited CSV (comma separated columns, newlines indicate new rows, double quoted values).\n\nThe following entries are passed through in an internal CSV dialect that then is used to configure the CSV reader:\n\n* `delimiter`: A one-character string used to separate fields. It defaults to ','.\n* `doublequote`: Controls how instances of quotechar appearing inside a field should themselves be quoted. When True, the character is doubled. When False, the escapechar is used as a prefix to the quotechar. It defaults to True.\n* `escapechar`: A one-character string used by the reader, where the escapechar removes any special meaning from the following character. It defaults to None, which disables escaping.\n* `quotechar`: A one-character string used to quote fields containing special characters, such as the delimiter or quotechar, or which contain new-line characters. It defaults to '\"'.\n* `skipinitialspace`: When True, spaces immediately following the delimiter are ignored. The default is False.\n* `strict`: When True, raise exception Error on bad CSV input. The default is False.\n\nThe first row is the header defining the attribute name for that column and will result to a column of the same name in the database. It must have a valid format with no spaces or special characters (like for example `!` or `@`, etc).\n" - }, - { - "name": "csv_files_definition", - "label": "CSV Files Definition", - "documentation": "https://github.com/MeltanoLabs/tap-csv#settings", - "description": "Project-relative path to JSON file holding array of objects as described under [Files](#files) - with `entity`, `path`, `keys`, and other optional keys:\n\n```json\n[\n {\n \"entity\": \"\",\n \"path\": \"\",\n \"keys\": [\"\"],\n },\n // ...\n]\n```\n", - "placeholder": "Ex. files-def.json" - } - ] -} \ No newline at end of file diff --git a/10_integrations/meltano/meltano_project/plugins/loaders/target-sqlite--meltanolabs.lock b/10_integrations/meltano/meltano_project/plugins/loaders/target-sqlite--meltanolabs.lock deleted file mode 100644 index aae57c101..000000000 --- a/10_integrations/meltano/meltano_project/plugins/loaders/target-sqlite--meltanolabs.lock +++ /dev/null @@ -1,39 +0,0 @@ -{ - "plugin_type": "loaders", - "name": "target-sqlite", - "namespace": "target_sqlite", - "variant": "meltanolabs", - "label": "SQLite", - "docs": "https://hub.meltano.com/loaders/target-sqlite--meltanolabs", - "repo": "https://github.com/MeltanoLabs/target-sqlite", - "pip_url": "git+https://github.com/MeltanoLabs/target-sqlite.git", - "description": "SQLite database loader", - "logo_url": "https://hub.meltano.com/assets/logos/loaders/sqlite.png", - "settings_group_validation": [ - [ - "batch_size" - ] - ], - "settings": [ - { - "name": "database", - "value": "warehouse", - "label": "Database Name", - "description": "Name of the SQLite database file to be used or created, relative to the project root.\n\nThe `.db` extension is optional and will be added automatically when omitted.\n" - }, - { - "name": "batch_size", - "kind": "integer", - "value": 50, - "label": "Batch Size", - "description": "How many records are sent to SQLite at a time." - }, - { - "name": "timestamp_column", - "value": "__loaded_at", - "label": "Timestamp Column", - "description": "Name of the column used for recording the timestamp when data are loaded to SQLite." - } - ], - "dialect": "sqlite" -} \ No newline at end of file diff --git a/10_integrations/meltano/meltano_project/requirements.txt b/10_integrations/meltano/meltano_project/requirements.txt deleted file mode 100644 index e69de29bb..000000000 diff --git a/10_integrations/meltano/meltano_project/transform/.gitkeep b/10_integrations/meltano/meltano_project/transform/.gitkeep deleted file mode 100644 index e69de29bb..000000000 diff --git a/10_integrations/meltano/meltano_project/utils/download_sample_data/main.py b/10_integrations/meltano/meltano_project/utils/download_sample_data/main.py deleted file mode 100644 index 6fdca9d50..000000000 --- a/10_integrations/meltano/meltano_project/utils/download_sample_data/main.py +++ /dev/null @@ -1,20 +0,0 @@ -# fetches the tutorial data used by dbt in their tutorials: https://docs.getdbt.com/docs/get-started/getting-started-dbt-core -from pathlib import Path - -import requests - -output_dir = Path("downloads") -dbt_sample_files = [ - "https://dbt-tutorial-public.s3-us-west-2.amazonaws.com/jaffle_shop_customers.csv", - "https://dbt-tutorial-public.s3-us-west-2.amazonaws.com/jaffle_shop_orders.csv", - "https://dbt-tutorial-public.s3-us-west-2.amazonaws.com/stripe_payments.csv", -] - - -def run(): - output_dir.mkdir(parents=True, exist_ok=True) - for url in dbt_sample_files: - content = requests.get(url).content.decode("utf-8-sig") # stupid bom - filename = url.rsplit("/", 1)[1] - print(f"Fetched csv data: {filename}") - (output_dir / filename).write_bytes(content.encode("utf8")) diff --git a/10_integrations/meltano/meltano_project/utils/poetry.lock b/10_integrations/meltano/meltano_project/utils/poetry.lock deleted file mode 100644 index fa0617e72..000000000 --- a/10_integrations/meltano/meltano_project/utils/poetry.lock +++ /dev/null @@ -1,84 +0,0 @@ -[[package]] -name = "certifi" -version = "2022.12.7" -description = "Python package for providing Mozilla's CA Bundle." -category = "main" -optional = false -python-versions = ">=3.6" - -[[package]] -name = "charset-normalizer" -version = "2.1.1" -description = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet." -category = "main" -optional = false -python-versions = ">=3.6.0" - -[package.extras] -unicode_backport = ["unicodedata2"] - -[[package]] -name = "idna" -version = "3.4" -description = "Internationalized Domain Names in Applications (IDNA)" -category = "main" -optional = false -python-versions = ">=3.5" - -[[package]] -name = "requests" -version = "2.28.1" -description = "Python HTTP for Humans." -category = "main" -optional = false -python-versions = ">=3.7, <4" - -[package.dependencies] -certifi = ">=2017.4.17" -charset-normalizer = ">=2,<3" -idna = ">=2.5,<4" -urllib3 = ">=1.21.1,<1.27" - -[package.extras] -socks = ["PySocks (>=1.5.6,!=1.5.7)"] -use_chardet_on_py3 = ["chardet (>=3.0.2,<6)"] - -[[package]] -name = "urllib3" -version = "1.26.14" -description = "HTTP library with thread-safe connection pooling, file post, and more." -category = "main" -optional = false -python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*" - -[package.extras] -brotli = ["brotli (>=1.0.9)", "brotlicffi (>=0.8.0)", "brotlipy (>=0.6.0)"] -secure = ["certifi", "cryptography (>=1.3.4)", "idna (>=2.0.0)", "ipaddress", "pyOpenSSL (>=0.14)", "urllib3-secure-extra"] -socks = ["PySocks (>=1.5.6,!=1.5.7,<2.0)"] - -[metadata] -lock-version = "1.1" -python-versions = "^3.9" -content-hash = "57023848b3e1e5f7f7970725bbc3c1669558579faaa10b433df88d1623277dc1" - -[metadata.files] -certifi = [ - {file = "certifi-2022.12.7-py3-none-any.whl", hash = "sha256:4ad3232f5e926d6718ec31cfc1fcadfde020920e278684144551c91769c7bc18"}, - {file = "certifi-2022.12.7.tar.gz", hash = "sha256:35824b4c3a97115964b408844d64aa14db1cc518f6562e8d7261699d1350a9e3"}, -] -charset-normalizer = [ - {file = "charset-normalizer-2.1.1.tar.gz", hash = "sha256:5a3d016c7c547f69d6f81fb0db9449ce888b418b5b9952cc5e6e66843e9dd845"}, - {file = "charset_normalizer-2.1.1-py3-none-any.whl", hash = "sha256:83e9a75d1911279afd89352c68b45348559d1fc0506b054b346651b5e7fee29f"}, -] -idna = [ - {file = "idna-3.4-py3-none-any.whl", hash = "sha256:90b77e79eaa3eba6de819a0c442c0b4ceefc341a7a2ab77d7562bf49f425c5c2"}, - {file = "idna-3.4.tar.gz", hash = "sha256:814f528e8dead7d329833b91c5faa87d60bf71824cd12a7530b5526063d02cb4"}, -] -requests = [ - {file = "requests-2.28.1-py3-none-any.whl", hash = "sha256:8fefa2a1a1365bf5520aac41836fbee479da67864514bdb821f31ce07ce65349"}, - {file = "requests-2.28.1.tar.gz", hash = "sha256:7c5599b102feddaa661c826c56ab4fee28bfd17f5abca1ebbe3e7f19d7c97983"}, -] -urllib3 = [ - {file = "urllib3-1.26.14-py2.py3-none-any.whl", hash = "sha256:75edcdc2f7d85b137124a6c3c9fc3933cdeaa12ecb9a6a959f22797a0feca7e1"}, - {file = "urllib3-1.26.14.tar.gz", hash = "sha256:076907bf8fd355cde77728471316625a4d2f7e713c125f51953bb5b3eecf4f72"}, -] diff --git a/10_integrations/meltano/meltano_project/utils/pyproject.toml b/10_integrations/meltano/meltano_project/utils/pyproject.toml deleted file mode 100644 index ef8ea5558..000000000 --- a/10_integrations/meltano/meltano_project/utils/pyproject.toml +++ /dev/null @@ -1,18 +0,0 @@ -[tool.poetry] -name = "download-sample-data" -version = "0.1.0" -description = "" -authors = ["Elias Freider "] -packages = [{include = "download_sample_data"}] - -[tool.poetry.dependencies] -python = "^3.9" -requests = "^2.28.1" - - -[build-system] -requires = ["poetry-core"] -build-backend = "poetry.core.masonry.api" - -[tool.poetry.scripts] -download_sample_data = 'download_sample_data.main:run'