From b7b0d792ec1e601dd67773ceb585ce53635e88e7 Mon Sep 17 00:00:00 2001 From: harshpatel4_crest Date: Mon, 31 Jan 2022 18:28:10 +0530 Subject: [PATCH 1/7] added missing test cases --- tests/test_all_fields.py | 98 +++++++++++++++++++++++++++++++++ tests/test_automatic_fields.py | 17 +++++- tests/test_bookmarks_updated.py | 13 +++++ tests/test_discovery.py | 8 +++ tests/test_start_date.py | 50 ++++++++++++----- 5 files changed, 171 insertions(+), 15 deletions(-) create mode 100644 tests/test_all_fields.py diff --git a/tests/test_all_fields.py b/tests/test_all_fields.py new file mode 100644 index 00000000..cea3fd6a --- /dev/null +++ b/tests/test_all_fields.py @@ -0,0 +1,98 @@ +import os + +from tap_tester import runner, menagerie +from base import BaseTapTest + +class AllFieldsTest(BaseTapTest): + + @staticmethod + def name(): + return "tap_tester_shopify_all_fields_test" + + def get_properties(self, original: bool = True): + """Configuration properties required for the tap.""" + return_value = { + 'start_date': '2021-04-01T00:00:00Z', + 'shop': 'talenddatawearhouse', + 'date_window_size': 30 + } + + return return_value + + @staticmethod + def get_credentials(original_credentials: bool = True): + """Authentication information for the test account""" + + return { + 'api_key': os.getenv('TAP_SHOPIFY_API_KEY_TALENDDATAWEARHOUSE') + } + + def test_run(self): + """ + Ensure running the tap with all streams and fields selected results in the + replication of all fields. + - Verify no unexpected streams were replicated + - Verify that more than just the automatic fields are replicated for each stream + """ + + expected_streams = self.expected_streams() + + # instantiate connection + conn_id = self.create_connection() + + # run check mode + found_catalogs = menagerie.get_catalogs(conn_id) + + # table and field selection + test_catalogs_all_fields = [catalog for catalog in found_catalogs + if catalog.get('stream_name') in expected_streams] + self.select_all_streams_and_fields(conn_id, test_catalogs_all_fields, select_all_fields=True) + + # grab metadata after performing table-and-field selection to set expectations + stream_to_all_catalog_fields = dict() # used for asserting all fields are replicated + for catalog in test_catalogs_all_fields: + stream_id, stream_name = catalog['stream_id'], catalog['stream_name'] + catalog_entry = menagerie.get_annotated_schema(conn_id, stream_id) + fields_from_field_level_md = [md_entry['breadcrumb'][1] + for md_entry in catalog_entry['metadata'] + if md_entry['breadcrumb'] != []] + stream_to_all_catalog_fields[stream_name] = set(fields_from_field_level_md) + + # run initial sync + record_count_by_stream = self.run_sync(conn_id) + synced_records = runner.get_records_from_target_output() + + # Verify no unexpected streams were replicated + synced_stream_names = set(synced_records.keys()) + self.assertSetEqual(expected_streams, synced_stream_names) + + for stream in expected_streams: + with self.subTest(stream=stream): + + # expected values + expected_automatic_keys = self.expected_primary_keys().get(stream, set()) | self.expected_replication_keys().get(stream, set()) + # get all expected keys + expected_all_keys = stream_to_all_catalog_fields[stream] + + # collect actual values + messages = synced_records.get(stream) + + actual_all_keys = set() + # collect actual values + for message in messages['messages']: + if message['action'] == 'upsert': + actual_all_keys.update(message['data'].keys()) + + # Verify that you get some records for each stream + self.assertGreater(record_count_by_stream.get(stream, -1), 0) + + # verify all fields for a stream were replicated + self.assertGreater(len(expected_all_keys), len(expected_automatic_keys)) + self.assertTrue(expected_automatic_keys.issubset(expected_all_keys), msg=f'{expected_automatic_keys-expected_all_keys} is not in "expected_all_keys"') + + if stream == 'abandoned_checkouts': + expected_all_keys.remove('billing_address') + elif stream == 'orders': + expected_all_keys.remove('order_adjustments') + + self.assertSetEqual(expected_all_keys, actual_all_keys) diff --git a/tests/test_automatic_fields.py b/tests/test_automatic_fields.py index 76158e29..8fa63207 100644 --- a/tests/test_automatic_fields.py +++ b/tests/test_automatic_fields.py @@ -31,6 +31,7 @@ def automatic_test(self, conn_id, testable_streams): """ Verify that for each stream you can get multiple pages of data when no fields are selected and only the automatic fields are replicated. + Verify that all replicated records have unique primary key values. PREREQUISITE For EACH stream add enough data that you surpass the limit of a single @@ -52,6 +53,7 @@ def automatic_test(self, conn_id, testable_streams): record_count_by_stream = self.run_sync(conn_id) actual_fields_by_stream = runner.examine_target_output_for_fields() + synced_records = runner.get_records_from_target_output() for stream in incremental_streams: with self.subTest(stream=stream): @@ -60,6 +62,11 @@ def automatic_test(self, conn_id, testable_streams): # SKIP THIS ASSERTION FOR STREAMS WHERE YOU CANNOT GET # MORE THAN 1 PAGE OF DATA IN THE TEST ACCOUNT stream_metadata = self.expected_metadata().get(stream, {}) + expected_primary_keys = self.expected_primary_keys().get(stream, set()) + + # collect records + messages = synced_records.get(stream) + minimum_record_count = stream_metadata.get( self.API_LIMIT, self.get_properties().get('result_per_page', self.DEFAULT_RESULTS_PER_PAGE) @@ -72,7 +79,15 @@ def automatic_test(self, conn_id, testable_streams): # verify that only the automatic fields are sent to the target self.assertEqual( actual_fields_by_stream.get(stream, set()), - self.expected_primary_keys().get(stream, set()) | + expected_primary_keys | self.expected_replication_keys().get(stream, set()), msg="The fields sent to the target are not the automatic fields" ) + + # Verify that all replicated records have unique primary key values. + records_pks_set = {tuple([message.get('data').get(primary_key) for primary_key in expected_primary_keys]) + for message in messages.get('messages')} + records_pks_list = [tuple([message.get('data').get(primary_key) for primary_key in expected_primary_keys]) + for message in messages.get('messages')] + self.assertCountEqual(records_pks_set, records_pks_list, + msg="We have duplicate records for {}".format(stream)) diff --git a/tests/test_bookmarks_updated.py b/tests/test_bookmarks_updated.py index c39ce1a3..87f78f34 100644 --- a/tests/test_bookmarks_updated.py +++ b/tests/test_bookmarks_updated.py @@ -15,6 +15,17 @@ class BookmarkTest(BaseTapTest): def name(): return "tap_tester_shopify_bookmark_test" + # function for verifying the date format + def is_expected_date_format(self, date): + try: + # parse date + dt.strptime(date, "%Y-%m-%dT%H:%M:%S.%fZ") + except ValueError: + # return False if date is in not expected format + return False + # return True in case of no error + return True + def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.start_date = '2021-04-01T00:00:00Z' @@ -112,7 +123,9 @@ def bookmarks_test(self, conn_id, testable_streams): # verify the syncs sets a bookmark of the expected form self.assertIsNotNone(first_bookmark_value) + self.assertTrue(self.is_expected_date_format(first_bookmark_value)) self.assertIsNotNone(second_bookmark_value) + self.assertTrue(self.is_expected_date_format(second_bookmark_value)) # verify the 2nd bookmark is equal to 1st sync bookmark #NOT A BUG (IS the expected behaviour for shopify as they are using date windowing : TDL-17096 : 2nd bookmark value is getting assigned from the execution time rather than the actual bookmark time. This is an invalid assertion for shopify diff --git a/tests/test_discovery.py b/tests/test_discovery.py index 5a2377dd..c9246a02 100644 --- a/tests/test_discovery.py +++ b/tests/test_discovery.py @@ -74,6 +74,14 @@ def test_run(self): self.assertTrue(len(stream_properties) == 1, msg="There is more than one top level breadcrumb") + # collect fields + actual_fields = [] + for md_entry in metadata: + if md_entry['breadcrumb'] != []: + actual_fields.append(md_entry['breadcrumb'][1]) + # Verify there are no duplicate/conflicting metadata entries. + self.assertEqual(len(actual_fields), len(set(actual_fields)), msg="There are duplicate entries in the fields of '{}' stream".format(stream)) + # verify replication key(s) self.assertEqual( set(stream_properties[0].get( diff --git a/tests/test_start_date.py b/tests/test_start_date.py index 52635141..57c8bf42 100644 --- a/tests/test_start_date.py +++ b/tests/test_start_date.py @@ -72,6 +72,7 @@ def test_run(self): # Count actual rows synced first_sync_records = runner.get_records_from_target_output() + first_min_bookmarks = self.min_bookmarks_by_stream(first_sync_records) # set the start date for a new connection based off bookmarks largest value first_max_bookmarks = self.max_bookmarks_by_stream(first_sync_records) @@ -112,27 +113,48 @@ def test_run(self): for stream in incremental_streams: with self.subTest(stream=stream): + # get primary key values for both sync records + expected_primary_keys = self.expected_primary_keys()[stream] + primary_keys_list_1 = [tuple(message.get('data').get(expected_pk) for expected_pk in expected_primary_keys) + for message in first_sync_records.get(stream).get('messages') + if message.get('action') == 'upsert'] + primary_keys_list_2 = [tuple(message.get('data').get(expected_pk) for expected_pk in expected_primary_keys) + for message in second_sync_records.get(stream).get('messages') + if message.get('action') == 'upsert'] + primary_keys_sync_1 = set(primary_keys_list_1) + primary_keys_sync_2 = set(primary_keys_list_2) + # verify that each stream has less records than the first connection sync self.assertGreaterEqual( first_sync_record_count.get(stream, 0), second_sync_record_count.get(stream, 0), msg="second had more records, start_date usage not verified") - # verify all data from 2nd sync >= start_date - target_mark = second_min_bookmarks.get(stream, {"mark": None}) - target_value = next(iter(target_mark.values())) # there should be only one + # Verify by primary key values, that all records of the 2nd sync are included in the 1st sync since 2nd sync has a later start date. + self.assertTrue(primary_keys_sync_2.issubset(primary_keys_sync_1)) + + # verify all data from both syncs >= start_date + first_sync_target_mark = first_min_bookmarks.get(stream, {"mark": None}) + second_sync_target_mark = second_min_bookmarks.get(stream, {"mark": None}) + + # get start dates for both syncs + first_sync_start_date = self.get_properties()["start_date"] + second_sync_start_date = self.start_date + + for start_date, target_mark in zip((first_sync_start_date, second_sync_start_date), (first_sync_target_mark, second_sync_target_mark)): + target_value = next(iter(target_mark.values())) # there should be only one - if target_value: + if target_value: - # it's okay if there isn't target data for a stream - try: - target_value = self.local_to_utc(parse(target_value)) + # it's okay if there isn't target data for a stream + try: + target_value = self.local_to_utc(parse(target_value)) - # verify that the minimum bookmark sent to the target for the second sync - # is greater than or equal to the start date - self.assertGreaterEqual(target_value, - self.local_to_utc(parse(self.start_date))) + # verify that the minimum bookmark sent to the target for the second sync + # is greater than or equal to the start date + self.assertGreaterEqual(target_value, + self.local_to_utc(parse(start_date))) - except (OverflowError, ValueError, TypeError): - print("bookmarks cannot be converted to dates, " - "can't test start_date for {}".format(stream)) + except (OverflowError, ValueError, TypeError): + print("bookmarks cannot be converted to dates, " + "can't test start_date for {}".format(stream)) From 7fd70102cba25bbb78337815a07da93fb0a6e40c Mon Sep 17 00:00:00 2001 From: harshpatel4_crest Date: Tue, 1 Feb 2022 17:32:26 +0530 Subject: [PATCH 2/7] removed abandoned_checkouts from all test cases except discovery --- tests/base.py | 12 ++++++++++-- tests/test_all_fields.py | 2 ++ tests/test_bookmarks_updated.py | 6 +++++- tests/test_discovery.py | 9 +++++---- tests/test_start_date.py | 8 +++++++- 5 files changed, 29 insertions(+), 8 deletions(-) diff --git a/tests/base.py b/tests/base.py index 2d45ba7b..b8b8138c 100644 --- a/tests/base.py +++ b/tests/base.py @@ -132,7 +132,11 @@ def expected_metadata(self): def expected_streams(self): """A set of expected stream names""" - return set(self.expected_metadata().keys()) + # removed "abandoned_checkouts", as per the Doc: + # https://help.shopify.com/en/manual/orders/abandoned-checkouts?st_source=admin&st_campaign=abandoned_checkouts_footer&utm_source=admin&utm_campaign=abandoned_checkouts_footer#review-your-abandoned-checkouts + # abandoned checkouts are saved in the Shopify admin for three months. + # Every Monday, abandoned checkouts that are older than three months are removed from your admin. + return set(self.expected_metadata().keys()) - {"abandoned_checkouts"} def child_streams(self): """ @@ -301,7 +305,11 @@ def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.start_date = self.get_properties().get("start_date") self.store_1_streams = {'custom_collections', 'orders', 'products', 'customers', 'locations', 'inventory_levels', 'inventory_items', 'events'} - self.store_2_streams = {'abandoned_checkouts', 'collects', 'metafields', 'transactions', 'order_refunds', 'products', 'locations', 'inventory_levels', 'inventory_items', 'events'} + # removed 'abandoned_checkouts' from store 2 streams, as per the Doc: + # https://help.shopify.com/en/manual/orders/abandoned-checkouts?st_source=admin&st_campaign=abandoned_checkouts_footer&utm_source=admin&utm_campaign=abandoned_checkouts_footer#review-your-abandoned-checkouts + # abandoned checkouts are saved in the Shopify admin for three months. + # Every Monday, abandoned checkouts that are older than three months are removed from your admin. + self.store_2_streams = {'collects', 'metafields', 'transactions', 'order_refunds', 'products', 'locations', 'inventory_levels', 'inventory_items', 'events'} #modified this method to accommodate replication key in the current_state def calculated_states_by_stream(self, current_state): diff --git a/tests/test_all_fields.py b/tests/test_all_fields.py index cea3fd6a..04d87f5a 100644 --- a/tests/test_all_fields.py +++ b/tests/test_all_fields.py @@ -93,6 +93,8 @@ def test_run(self): if stream == 'abandoned_checkouts': expected_all_keys.remove('billing_address') elif stream == 'orders': + # No field named 'order_adjustments' present in the 'order' object + # Documentation: https://shopify.dev/api/admin-rest/2021-10/resources/order#resource_object expected_all_keys.remove('order_adjustments') self.assertSetEqual(expected_all_keys, actual_all_keys) diff --git a/tests/test_bookmarks_updated.py b/tests/test_bookmarks_updated.py index 87f78f34..a2506c9d 100644 --- a/tests/test_bookmarks_updated.py +++ b/tests/test_bookmarks_updated.py @@ -37,7 +37,11 @@ def __init__(self, *args, **kwargs): # creating this global variable for store 2 which is required only for this test, all the other test are referencing from base global store_2_streams - store_2_streams = {'abandoned_checkouts', 'collects', 'metafields', 'transactions', 'order_refunds', 'products', 'locations', 'inventory_items', 'events', 'customers', 'custom_collections', 'orders'} + # removed 'abandoned_checkouts' from store 2 streams, as per the Doc: + # https://help.shopify.com/en/manual/orders/abandoned-checkouts?st_source=admin&st_campaign=abandoned_checkouts_footer&utm_source=admin&utm_campaign=abandoned_checkouts_footer#review-your-abandoned-checkouts + # abandoned checkouts are saved in the Shopify admin for three months. + # Every Monday, abandoned checkouts that are older than three months are removed from your admin. + store_2_streams = {'collects', 'metafields', 'transactions', 'order_refunds', 'products', 'locations', 'inventory_items', 'events', 'customers', 'custom_collections', 'orders'} def test_run_store_2(self): with self.subTest(store="store_2"): diff --git a/tests/test_discovery.py b/tests/test_discovery.py index c9246a02..b35738c0 100644 --- a/tests/test_discovery.py +++ b/tests/test_discovery.py @@ -33,23 +33,24 @@ def test_run(self): • verify that all other fields have inclusion of available (metadata and schema) """ conn_id = self.create_connection() + expected_streams = self.expected_streams() | {"abandoned_checkouts"} # Verify number of actual streams discovered match expected found_catalogs = menagerie.get_catalogs(conn_id) self.assertGreater(len(found_catalogs), 0, msg="unable to locate schemas for connection {}".format(conn_id)) self.assertEqual(len(found_catalogs), - len(self.expected_streams()), + len(expected_streams), msg="Expected {} streams, actual was {} for connection {}," " actual {}".format( - len(self.expected_streams()), + len(expected_streams), len(found_catalogs), found_catalogs, conn_id)) # Verify the stream names discovered were what we expect found_catalog_names = {c['tap_stream_id'] for c in found_catalogs} - self.assertEqual(set(self.expected_streams()), + self.assertEqual(set(expected_streams), set(found_catalog_names), msg="Expected streams don't match actual streams") @@ -58,7 +59,7 @@ def test_run(self): self.assertTrue(all([re.fullmatch(r"[a-z_]+", name) for name in found_catalog_names]), msg="One or more streams don't follow standard naming") - for stream in self.expected_streams(): + for stream in expected_streams: with self.subTest(stream=stream): catalog = next(iter([catalog for catalog in found_catalogs if catalog["stream_name"] == stream])) diff --git a/tests/test_start_date.py b/tests/test_start_date.py index 57c8bf42..810517a1 100644 --- a/tests/test_start_date.py +++ b/tests/test_start_date.py @@ -56,7 +56,13 @@ def test_run(self): # Select all streams and all fields within streams found_catalogs = menagerie.get_catalogs(conn_id) - incremental_streams = {key for key, value in self.expected_replication_method().items() + # removed 'abandoned_checkouts', as per the Doc: + # https://help.shopify.com/en/manual/orders/abandoned-checkouts?st_source=admin&st_campaign=abandoned_checkouts_footer&utm_source=admin&utm_campaign=abandoned_checkouts_footer#review-your-abandoned-checkouts + # abandoned checkouts are saved in the Shopify admin for three months. + # Every Monday, abandoned checkouts that are older than three months are removed from your admin. + expected_replication_method = self.expected_replication_method() + expected_replication_method.pop("abandoned_checkouts") + incremental_streams = {key for key, value in expected_replication_method.items() if value == self.INCREMENTAL} # IF THERE ARE STREAMS THAT SHOULD NOT BE TESTED From 3088df84204faf4d20931950a113f92fc4329fb9 Mon Sep 17 00:00:00 2001 From: harshpatel4_crest Date: Wed, 2 Feb 2022 14:17:55 +0530 Subject: [PATCH 3/7] added comment for abandoned checkouts in test cases --- tests/base.py | 2 ++ tests/test_bookmarks_updated.py | 1 + tests/test_start_date.py | 1 + 3 files changed, 4 insertions(+) diff --git a/tests/base.py b/tests/base.py index b8b8138c..8e453eb4 100644 --- a/tests/base.py +++ b/tests/base.py @@ -136,6 +136,7 @@ def expected_streams(self): # https://help.shopify.com/en/manual/orders/abandoned-checkouts?st_source=admin&st_campaign=abandoned_checkouts_footer&utm_source=admin&utm_campaign=abandoned_checkouts_footer#review-your-abandoned-checkouts # abandoned checkouts are saved in the Shopify admin for three months. # Every Monday, abandoned checkouts that are older than three months are removed from your admin. + # Also no POST call is available for this endpoint: https://shopify.dev/api/admin-rest/2022-01/resources/abandoned-checkouts return set(self.expected_metadata().keys()) - {"abandoned_checkouts"} def child_streams(self): @@ -309,6 +310,7 @@ def __init__(self, *args, **kwargs): # https://help.shopify.com/en/manual/orders/abandoned-checkouts?st_source=admin&st_campaign=abandoned_checkouts_footer&utm_source=admin&utm_campaign=abandoned_checkouts_footer#review-your-abandoned-checkouts # abandoned checkouts are saved in the Shopify admin for three months. # Every Monday, abandoned checkouts that are older than three months are removed from your admin. + # Also no POST call is available for this endpoint: https://shopify.dev/api/admin-rest/2022-01/resources/abandoned-checkouts self.store_2_streams = {'collects', 'metafields', 'transactions', 'order_refunds', 'products', 'locations', 'inventory_levels', 'inventory_items', 'events'} #modified this method to accommodate replication key in the current_state diff --git a/tests/test_bookmarks_updated.py b/tests/test_bookmarks_updated.py index a2506c9d..258779ec 100644 --- a/tests/test_bookmarks_updated.py +++ b/tests/test_bookmarks_updated.py @@ -41,6 +41,7 @@ def __init__(self, *args, **kwargs): # https://help.shopify.com/en/manual/orders/abandoned-checkouts?st_source=admin&st_campaign=abandoned_checkouts_footer&utm_source=admin&utm_campaign=abandoned_checkouts_footer#review-your-abandoned-checkouts # abandoned checkouts are saved in the Shopify admin for three months. # Every Monday, abandoned checkouts that are older than three months are removed from your admin. + # Also no POST call is available for this endpoint: https://shopify.dev/api/admin-rest/2022-01/resources/abandoned-checkouts store_2_streams = {'collects', 'metafields', 'transactions', 'order_refunds', 'products', 'locations', 'inventory_items', 'events', 'customers', 'custom_collections', 'orders'} def test_run_store_2(self): diff --git a/tests/test_start_date.py b/tests/test_start_date.py index 810517a1..c2b13c22 100644 --- a/tests/test_start_date.py +++ b/tests/test_start_date.py @@ -60,6 +60,7 @@ def test_run(self): # https://help.shopify.com/en/manual/orders/abandoned-checkouts?st_source=admin&st_campaign=abandoned_checkouts_footer&utm_source=admin&utm_campaign=abandoned_checkouts_footer#review-your-abandoned-checkouts # abandoned checkouts are saved in the Shopify admin for three months. # Every Monday, abandoned checkouts that are older than three months are removed from your admin. + # Also no POST call is available for this endpoint: https://shopify.dev/api/admin-rest/2022-01/resources/abandoned-checkouts expected_replication_method = self.expected_replication_method() expected_replication_method.pop("abandoned_checkouts") incremental_streams = {key for key, value in expected_replication_method.items() From 3daafa725e476af626bb9946dfdfd95f09254523 Mon Sep 17 00:00:00 2001 From: harshpatel4_crest Date: Thu, 3 Mar 2022 19:00:26 +0530 Subject: [PATCH 4/7] added code comments and function comments --- tests/test_discovery.py | 1 + tests/test_start_date.py | 4 ++++ 2 files changed, 5 insertions(+) diff --git a/tests/test_discovery.py b/tests/test_discovery.py index b35738c0..c7f885d7 100644 --- a/tests/test_discovery.py +++ b/tests/test_discovery.py @@ -24,6 +24,7 @@ def test_run(self): • Verify stream names follow naming convention streams should only have lowercase alphas and underscores • verify there is only 1 top level breadcrumb + • Verify there are no duplicate/conflicting metadata entries. • verify replication key(s) • verify primary key(s) • verify that if there is a replication key we are doing INCREMENTAL otherwise FULL diff --git a/tests/test_start_date.py b/tests/test_start_date.py index c2b13c22..3b3c83f0 100644 --- a/tests/test_start_date.py +++ b/tests/test_start_date.py @@ -20,6 +20,8 @@ class StartDateTest(BaseTapTest): • verify that a sync with a later start date has at least one record synced and less records than the 1st sync with a previous start date • verify that each stream has less records than the earlier start date sync + • Verify by primary key values, that all records of the 2nd sync are included + in the 1st sync since 2nd sync has a later start date. • verify all data from later start data has bookmark values >= start_date • verify that the minimum bookmark sent to the target for the later start_date sync is greater than or equal to the start date @@ -148,6 +150,8 @@ def test_run(self): first_sync_start_date = self.get_properties()["start_date"] second_sync_start_date = self.start_date + # loop over minimum bookmark and the start date/state file date for each syncs to verify + # the minimum bookmark is greater then or equal to start date/state file date for start_date, target_mark in zip((first_sync_start_date, second_sync_start_date), (first_sync_target_mark, second_sync_target_mark)): target_value = next(iter(target_mark.values())) # there should be only one From 273ce0e75ad6625f004beead45ebd45ec850d6b4 Mon Sep 17 00:00:00 2001 From: harshpatel4_crest Date: Wed, 9 Mar 2022 13:02:26 +0530 Subject: [PATCH 5/7] updated all fields test --- tests/test_all_fields.py | 25 +++---------------------- 1 file changed, 3 insertions(+), 22 deletions(-) diff --git a/tests/test_all_fields.py b/tests/test_all_fields.py index 04d87f5a..07683fdd 100644 --- a/tests/test_all_fields.py +++ b/tests/test_all_fields.py @@ -1,5 +1,3 @@ -import os - from tap_tester import runner, menagerie from base import BaseTapTest @@ -9,24 +7,6 @@ class AllFieldsTest(BaseTapTest): def name(): return "tap_tester_shopify_all_fields_test" - def get_properties(self, original: bool = True): - """Configuration properties required for the tap.""" - return_value = { - 'start_date': '2021-04-01T00:00:00Z', - 'shop': 'talenddatawearhouse', - 'date_window_size': 30 - } - - return return_value - - @staticmethod - def get_credentials(original_credentials: bool = True): - """Authentication information for the test account""" - - return { - 'api_key': os.getenv('TAP_SHOPIFY_API_KEY_TALENDDATAWEARHOUSE') - } - def test_run(self): """ Ensure running the tap with all streams and fields selected results in the @@ -35,10 +15,11 @@ def test_run(self): - Verify that more than just the automatic fields are replicated for each stream """ + self.start_date = "2021-04-01T00:00:00Z" expected_streams = self.expected_streams() - # instantiate connection - conn_id = self.create_connection() + # instantiate connection to run on "talenddatawearhouse" + conn_id = self.create_connection(original_properties=False, original_credentials=False) # run check mode found_catalogs = menagerie.get_catalogs(conn_id) From 7bfc94ec45bbc35ba61d3ea7a299a55263883474 Mon Sep 17 00:00:00 2001 From: harshpatel4_crest Date: Mon, 11 Apr 2022 14:18:13 +0530 Subject: [PATCH 6/7] include abandoned_checkouts stream and updated start date test case --- tests/base.py | 14 ++------------ tests/test_bookmarks_updated.py | 7 +------ tests/test_discovery.py | 2 +- tests/test_start_date.py | 34 ++++++++++++++++++++++----------- 4 files changed, 27 insertions(+), 30 deletions(-) diff --git a/tests/base.py b/tests/base.py index 8e453eb4..2d45ba7b 100644 --- a/tests/base.py +++ b/tests/base.py @@ -132,12 +132,7 @@ def expected_metadata(self): def expected_streams(self): """A set of expected stream names""" - # removed "abandoned_checkouts", as per the Doc: - # https://help.shopify.com/en/manual/orders/abandoned-checkouts?st_source=admin&st_campaign=abandoned_checkouts_footer&utm_source=admin&utm_campaign=abandoned_checkouts_footer#review-your-abandoned-checkouts - # abandoned checkouts are saved in the Shopify admin for three months. - # Every Monday, abandoned checkouts that are older than three months are removed from your admin. - # Also no POST call is available for this endpoint: https://shopify.dev/api/admin-rest/2022-01/resources/abandoned-checkouts - return set(self.expected_metadata().keys()) - {"abandoned_checkouts"} + return set(self.expected_metadata().keys()) def child_streams(self): """ @@ -306,12 +301,7 @@ def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.start_date = self.get_properties().get("start_date") self.store_1_streams = {'custom_collections', 'orders', 'products', 'customers', 'locations', 'inventory_levels', 'inventory_items', 'events'} - # removed 'abandoned_checkouts' from store 2 streams, as per the Doc: - # https://help.shopify.com/en/manual/orders/abandoned-checkouts?st_source=admin&st_campaign=abandoned_checkouts_footer&utm_source=admin&utm_campaign=abandoned_checkouts_footer#review-your-abandoned-checkouts - # abandoned checkouts are saved in the Shopify admin for three months. - # Every Monday, abandoned checkouts that are older than three months are removed from your admin. - # Also no POST call is available for this endpoint: https://shopify.dev/api/admin-rest/2022-01/resources/abandoned-checkouts - self.store_2_streams = {'collects', 'metafields', 'transactions', 'order_refunds', 'products', 'locations', 'inventory_levels', 'inventory_items', 'events'} + self.store_2_streams = {'abandoned_checkouts', 'collects', 'metafields', 'transactions', 'order_refunds', 'products', 'locations', 'inventory_levels', 'inventory_items', 'events'} #modified this method to accommodate replication key in the current_state def calculated_states_by_stream(self, current_state): diff --git a/tests/test_bookmarks_updated.py b/tests/test_bookmarks_updated.py index 258779ec..87f78f34 100644 --- a/tests/test_bookmarks_updated.py +++ b/tests/test_bookmarks_updated.py @@ -37,12 +37,7 @@ def __init__(self, *args, **kwargs): # creating this global variable for store 2 which is required only for this test, all the other test are referencing from base global store_2_streams - # removed 'abandoned_checkouts' from store 2 streams, as per the Doc: - # https://help.shopify.com/en/manual/orders/abandoned-checkouts?st_source=admin&st_campaign=abandoned_checkouts_footer&utm_source=admin&utm_campaign=abandoned_checkouts_footer#review-your-abandoned-checkouts - # abandoned checkouts are saved in the Shopify admin for three months. - # Every Monday, abandoned checkouts that are older than three months are removed from your admin. - # Also no POST call is available for this endpoint: https://shopify.dev/api/admin-rest/2022-01/resources/abandoned-checkouts - store_2_streams = {'collects', 'metafields', 'transactions', 'order_refunds', 'products', 'locations', 'inventory_items', 'events', 'customers', 'custom_collections', 'orders'} + store_2_streams = {'abandoned_checkouts', 'collects', 'metafields', 'transactions', 'order_refunds', 'products', 'locations', 'inventory_items', 'events', 'customers', 'custom_collections', 'orders'} def test_run_store_2(self): with self.subTest(store="store_2"): diff --git a/tests/test_discovery.py b/tests/test_discovery.py index c7f885d7..52807de6 100644 --- a/tests/test_discovery.py +++ b/tests/test_discovery.py @@ -34,7 +34,7 @@ def test_run(self): • verify that all other fields have inclusion of available (metadata and schema) """ conn_id = self.create_connection() - expected_streams = self.expected_streams() | {"abandoned_checkouts"} + expected_streams = self.expected_streams() # Verify number of actual streams discovered match expected found_catalogs = menagerie.get_catalogs(conn_id) diff --git a/tests/test_start_date.py b/tests/test_start_date.py index 3b3c83f0..b603c7df 100644 --- a/tests/test_start_date.py +++ b/tests/test_start_date.py @@ -58,16 +58,12 @@ def test_run(self): # Select all streams and all fields within streams found_catalogs = menagerie.get_catalogs(conn_id) - # removed 'abandoned_checkouts', as per the Doc: - # https://help.shopify.com/en/manual/orders/abandoned-checkouts?st_source=admin&st_campaign=abandoned_checkouts_footer&utm_source=admin&utm_campaign=abandoned_checkouts_footer#review-your-abandoned-checkouts - # abandoned checkouts are saved in the Shopify admin for three months. - # Every Monday, abandoned checkouts that are older than three months are removed from your admin. - # Also no POST call is available for this endpoint: https://shopify.dev/api/admin-rest/2022-01/resources/abandoned-checkouts - expected_replication_method = self.expected_replication_method() - expected_replication_method.pop("abandoned_checkouts") - incremental_streams = {key for key, value in expected_replication_method.items() + incremental_streams = {key for key, value in self.expected_replication_method().items() if value == self.INCREMENTAL} + # get expected replication keys + expected_replication_keys = self.expected_replication_keys() + # IF THERE ARE STREAMS THAT SHOULD NOT BE TESTED # REPLACE THE EMPTY SET BELOW WITH THOSE STREAMS @@ -133,6 +129,14 @@ def test_run(self): primary_keys_sync_1 = set(primary_keys_list_1) primary_keys_sync_2 = set(primary_keys_list_2) + # get replication key-values for all records for both syncs + replication_key_sync_1 = [message.get('data').get(expected_rk) for expected_rk in expected_replication_keys.get(stream) + for message in first_sync_records.get(stream).get('messages') + if message.get('action') == 'upsert'] + replication_key_sync_2 = [message.get('data').get(expected_rk) for expected_rk in expected_replication_keys.get(stream) + for message in second_sync_records.get(stream).get('messages') + if message.get('action') == 'upsert'] + # verify that each stream has less records than the first connection sync self.assertGreaterEqual( first_sync_record_count.get(stream, 0), @@ -150,9 +154,12 @@ def test_run(self): first_sync_start_date = self.get_properties()["start_date"] second_sync_start_date = self.start_date - # loop over minimum bookmark and the start date/state file date for each syncs to verify - # the minimum bookmark is greater then or equal to start date/state file date - for start_date, target_mark in zip((first_sync_start_date, second_sync_start_date), (first_sync_target_mark, second_sync_target_mark)): + # loop over minimum bookmark, the start date/state file date and replication key records for each + # syncs to verify the minimum bookmark is greater then or equal to start date/state file date + for start_date, target_mark, record_replication_keys in zip( + (first_sync_start_date, second_sync_start_date), + (first_sync_target_mark, second_sync_target_mark), + (replication_key_sync_1, replication_key_sync_2)): target_value = next(iter(target_mark.values())) # there should be only one if target_value: @@ -169,3 +176,8 @@ def test_run(self): except (OverflowError, ValueError, TypeError): print("bookmarks cannot be converted to dates, " "can't test start_date for {}".format(stream)) + + # loop over every replication key records and verify we have + # synced records greater than start date/state file date + for record_replication_key in record_replication_keys: + self.assertGreaterEqual(record_replication_key, start_date) From 012242ed7143ed01cda56b7464ab26dc065e3189 Mon Sep 17 00:00:00 2001 From: harshpatel4_crest Date: Mon, 11 Apr 2022 19:09:22 +0530 Subject: [PATCH 7/7] removed unnecessary assertion in start date test --- tests/test_start_date.py | 29 +++-------------------------- 1 file changed, 3 insertions(+), 26 deletions(-) diff --git a/tests/test_start_date.py b/tests/test_start_date.py index b603c7df..6a2791eb 100644 --- a/tests/test_start_date.py +++ b/tests/test_start_date.py @@ -77,7 +77,6 @@ def test_run(self): # Count actual rows synced first_sync_records = runner.get_records_from_target_output() - first_min_bookmarks = self.min_bookmarks_by_stream(first_sync_records) # set the start date for a new connection based off bookmarks largest value first_max_bookmarks = self.max_bookmarks_by_stream(first_sync_records) @@ -109,7 +108,6 @@ def test_run(self): second_sync_record_count = self.run_sync(conn_id) second_total_records = reduce(lambda a, b: a + b, second_sync_record_count.values(), 0) second_sync_records = runner.get_records_from_target_output() - second_min_bookmarks = self.min_bookmarks_by_stream(second_sync_records) # verify that at least one record synced and less records synced than the 1st connection self.assertGreater(second_total_records, 0) @@ -146,36 +144,15 @@ def test_run(self): # Verify by primary key values, that all records of the 2nd sync are included in the 1st sync since 2nd sync has a later start date. self.assertTrue(primary_keys_sync_2.issubset(primary_keys_sync_1)) - # verify all data from both syncs >= start_date - first_sync_target_mark = first_min_bookmarks.get(stream, {"mark": None}) - second_sync_target_mark = second_min_bookmarks.get(stream, {"mark": None}) - # get start dates for both syncs first_sync_start_date = self.get_properties()["start_date"] second_sync_start_date = self.start_date - # loop over minimum bookmark, the start date/state file date and replication key records for each - # syncs to verify the minimum bookmark is greater then or equal to start date/state file date - for start_date, target_mark, record_replication_keys in zip( + # loop over the start date/state file date and replication key records for each syncs + # to verify the records we synced are greater than the start date/state file date + for start_date, record_replication_keys in zip( (first_sync_start_date, second_sync_start_date), - (first_sync_target_mark, second_sync_target_mark), (replication_key_sync_1, replication_key_sync_2)): - target_value = next(iter(target_mark.values())) # there should be only one - - if target_value: - - # it's okay if there isn't target data for a stream - try: - target_value = self.local_to_utc(parse(target_value)) - - # verify that the minimum bookmark sent to the target for the second sync - # is greater than or equal to the start date - self.assertGreaterEqual(target_value, - self.local_to_utc(parse(start_date))) - - except (OverflowError, ValueError, TypeError): - print("bookmarks cannot be converted to dates, " - "can't test start_date for {}".format(stream)) # loop over every replication key records and verify we have # synced records greater than start date/state file date