diff --git a/README.md b/README.md index b5f3964..4360851 100644 --- a/README.md +++ b/README.md @@ -56,6 +56,7 @@ Setting | Required | Type | Description | `sheet_id` | Required | String | Your target google sheet id `stream_name` | Optional | String | Optionailly rename the stream and output file or table from the tap `child_sheet_name` | Optional | String | Optionally choose a different sheet from your Google Sheet file +`key_properties` | Optional | Array of Strings | Optionally choose primary key column(s) from your Google Sheet file. Example: `["column_one", "column_two"]` ### Environment Variable @@ -66,6 +67,7 @@ These settings expand into environment variables of: - `TAP_GOOGLE_SHEETS_SHEET_ID` - `TAP_GOOGLE_SHEETS_STREAM_NAME` - `TAP_GOOGLE_SHEETS_CHILD_SHEET_NAME` +- `TAP_GOOGLE_SHEETS_KEY_PROPERTIES` --- @@ -85,6 +87,8 @@ These settings expand into environment variables of: * The tap will again replace any spaces in column names with underscores. +* When using the `key_properties` setting, you must choose columns with no null values. + ### Loaders Tested - [target-jsonl](https://hub.meltano.com/targets/jsonl) @@ -98,8 +102,6 @@ These settings expand into environment variables of: ## Roadmap - [ ] Add setting to optionally allow the selection of a range of data from a sheet. (Add an optional range setting). -- [ ] Add setting to enable primary key, and select primary key(s) column(s). - - [ ] Improve default behavior of a sheet with multiple columns of the same name and `target-postgres`. @@ -114,7 +116,7 @@ Currently if have duplicate column names, a database will either: Use pip to install a release from GitHub. ```bash -pip install git+https://github.com/Matatika/tap-shopify@vx.x.x +pip install git+https://github.com/Matatika/tap-google-sheets@vx.x.x ``` ## Usage diff --git a/tap_google_sheets/tap.py b/tap_google_sheets/tap.py index 3f88b14..f87c8ee 100644 --- a/tap_google_sheets/tap.py +++ b/tap_google_sheets/tap.py @@ -45,6 +45,12 @@ class TapGoogleSheets(Tap): + " your Google Sheet", required=False, ), + th.Property( + "key_properties", + th.ArrayType(th.StringType), + description="Optionally choose one or more primary key columns", + required=False, + ), ).to_dict() def discover_streams(self) -> List[Stream]: @@ -53,6 +59,7 @@ def discover_streams(self) -> List[Stream]: stream_name = self.config.get("stream_name") or self.get_sheet_name() stream_name = stream_name.replace(" ", "_") + key_properties = self.config.get("key_properties", []) google_sheet_data = self.get_sheet_data() @@ -68,6 +75,7 @@ def discover_streams(self) -> List[Stream]: ) stream.child_sheet_name = child_sheet_name stream.selected + stream.primary_keys = key_properties streams.append(stream) return streams diff --git a/tap_google_sheets/tests/test_ignoring_unnamed_columns.py b/tap_google_sheets/tests/test_ignoring_unnamed_columns.py index a1b40f1..0ffda7c 100644 --- a/tap_google_sheets/tests/test_ignoring_unnamed_columns.py +++ b/tap_google_sheets/tests/test_ignoring_unnamed_columns.py @@ -22,7 +22,6 @@ def setUp(self): @responses.activate() def test_ignoring_unnamed_columns(self): - self.missing_column_response = { "values": [ ["Column_One", "", "Column_Two"], diff --git a/tap_google_sheets/tests/test_key_properties_setting.py b/tap_google_sheets/tests/test_key_properties_setting.py new file mode 100644 index 0000000..531ebbc --- /dev/null +++ b/tap_google_sheets/tests/test_key_properties_setting.py @@ -0,0 +1,57 @@ +"""Tests tap setting key_properties.""" + +import unittest + +import responses + +from tap_google_sheets.tap import TapGoogleSheets + + +class TestKeyPropertiesSetting(unittest.TestCase): + """Test class for tap setting key_properties""" + + def setUp(self): + self.mock_config = { + "oauth_credentials": { + "client_id": "123", + "client_secret": "123", + "refresh_token": "123", + }, + "sheet_id": "12345", + } + self.mock_config["key_properties"] = ["column_one", "column_two"] + + @responses.activate() + def test_key_properties_being_set_in_stream(self): + """""" + self.column_response = {"values": [["Column One", "Column Two"], ["1", "1"]]} + + responses.add( + responses.POST, + "https://oauth2.googleapis.com/token", + json={"access_token": "new_token"}, + status=200, + ), + responses.add( + responses.GET, + "https://www.googleapis.com/drive/v2/files/12345", + json={"title": "File Name One"}, + status=200, + ), + responses.add( + responses.GET, + "https://sheets.googleapis.com/v4/spreadsheets/12345/values/!1:1", + json={ + "range": "!1:1", + "values": [["Column One", "Column Two"]], + }, + status=200, + ) + + tap = TapGoogleSheets(config=self.mock_config) + + # Assert that key_properties in tap streams equal to the setting key_properties + for stream in tap.catalog_dict.get("streams"): + self.assertEquals( + stream.get("key_properties"), tap.config.get("key_properties") + ) diff --git a/tap_google_sheets/tests/test_underscoring_column_names.py b/tap_google_sheets/tests/test_underscoring_column_names.py index 9bd0010..5673bbf 100644 --- a/tap_google_sheets/tests/test_underscoring_column_names.py +++ b/tap_google_sheets/tests/test_underscoring_column_names.py @@ -22,7 +22,6 @@ def setUp(self): @responses.activate() def test_underscoring_column_names(self): - self.column_response = {"values": [["Column One", "Column Two"], ["1", "1"]]} responses.add(