From 6af8d25bdab737680b1a46f35bdaf91e9fb05f12 Mon Sep 17 00:00:00 2001 From: Pat Nadolny Date: Fri, 16 Feb 2024 13:11:25 -0600 Subject: [PATCH] Adds loaders target-parquet (automattic) (#1678) --- _data/default_variants.yml | 2 +- .../loaders/target-parquet/automattic.yml | 110 ++++++++++++++++++ schemas/common/loaders_specific.schema.json | 3 +- 3 files changed, 113 insertions(+), 2 deletions(-) create mode 100644 _data/meltano/loaders/target-parquet/automattic.yml diff --git a/_data/default_variants.yml b/_data/default_variants.yml index 7e358bc69..24fbf1a2d 100644 --- a/_data/default_variants.yml +++ b/_data/default_variants.yml @@ -622,7 +622,7 @@ loaders: target-mysql: thkwag target-oracle: radbrt target-pardot: anelendata - target-parquet: estrategiahq + target-parquet: automattic target-pinecone: meltanolabs target-postgres: meltanolabs target-redshift: transferwise diff --git a/_data/meltano/loaders/target-parquet/automattic.yml b/_data/meltano/loaders/target-parquet/automattic.yml new file mode 100644 index 000000000..5cca73101 --- /dev/null +++ b/_data/meltano/loaders/target-parquet/automattic.yml @@ -0,0 +1,110 @@ +capabilities: +- about +- schema-flattening +- stream-maps +- validate-records +description: Columnar Storage Format +domain_url: https://parquet.apache.org/ +executable: target-parquet +keywords: +- file +- meltano_sdk +label: Parquet +logo_url: /assets/logos/loaders/parquet.png +maintenance_status: active +name: target-parquet +namespace: target_parquet +next_steps: '' +pip_url: git+https://github.com/Automattic/target-parquet.git +quality: silver +repo: https://github.com/Automattic/target-parquet +settings: +- description: Add metadata to records. + kind: boolean + label: Add Record Metadata + name: add_record_metadata +- description: (default - gzip) Compression methods have to be supported by Pyarrow, + and currently the compression modes available are - snappy, zstd, brotli and gzip. + kind: string + label: Compression Method + name: compression_method + value: gzip +- description: Destination Path + kind: string + label: Destination Path + name: destination_path +- description: Extra fields to add to the flattened record. (e.g. extra_col1=value1,extra_col2=value2) + kind: string + label: Extra Fields + name: extra_fields +- description: Extra fields types. (e.g. extra_col1=string,extra_col2=integer) + kind: string + label: Extra Fields Types + name: extra_fields_types +- description: 'One or more LCID locale strings to produce localized output for: https://faker.readthedocs.io/en/master/#localization' + kind: array + label: Faker Config Locale + name: faker_config.locale +- description: 'Value to seed the Faker generator for deterministic output: https://faker.readthedocs.io/en/master/#seeding-the-generator' + kind: string + label: Faker Config Seed + name: faker_config.seed +- description: "'True' to enable schema flattening and automatically expand nested + properties." + kind: boolean + label: Flattening Enabled + name: flattening_enabled +- description: The max depth to flatten schemas. + kind: integer + label: Flattening Max Depth + name: flattening_max_depth +- description: The method to use when loading data into the destination. `append-only` + will always write all input records whether that records already exists or not. + `upsert` will update existing records and insert new records. `overwrite` will + delete all existing records and insert all input records. + kind: options + label: Load Method + name: load_method + options: + - label: Append Only + value: append-only + - label: Upsert + value: upsert + - label: Overwrite + value: overwrite + value: append-only +- description: Max records to write in one batch. It can control the memory usage + of the target. + kind: integer + label: Max Batch Size + name: max_batch_size + value: 10000 +- description: Max size of pyarrow table in MB (before writing to parquet file). It + can control the memory usage of the target. + kind: integer + label: Max Pyarrow Table Size + name: max_pyarrow_table_size + value: 800 +- description: Extra fields to add to the flattened record. (e.g. extra_col1,extra_col2) + kind: string + label: Partition Cols + name: partition_cols +- description: User-defined config values to be used within map expressions. + kind: object + label: Stream Map Config + name: stream_map_config +- description: Config object for stream maps capability. For more information check + out [Stream Maps](https://sdk.meltano.com/en/latest/stream_maps.html). + kind: object + label: Stream Maps + name: stream_maps +- description: Whether to validate the schema of the incoming streams. + kind: boolean + label: Validate Records + name: validate_records + value: true +settings_group_validation: +- [] +settings_preamble: '' +usage: '' +variant: automattic diff --git a/schemas/common/loaders_specific.schema.json b/schemas/common/loaders_specific.schema.json index 11cacbe9e..bc742c5ab 100644 --- a/schemas/common/loaders_specific.schema.json +++ b/schemas/common/loaders_specific.schema.json @@ -20,7 +20,8 @@ "datatype-failsafe", "record-flattening", "schema-flattening", - "target-schema" + "target-schema", + "validate-records" ] } },