From b7b0d792ec1e601dd67773ceb585ce53635e88e7 Mon Sep 17 00:00:00 2001
From: harshpatel4_crest <harsh.patel4@crestdatasys.com>
Date: Mon, 31 Jan 2022 18:28:10 +0530
Subject: [PATCH 1/7] added missing test cases

---
 tests/test_all_fields.py        | 98 +++++++++++++++++++++++++++++++++
 tests/test_automatic_fields.py  | 17 +++++-
 tests/test_bookmarks_updated.py | 13 +++++
 tests/test_discovery.py         |  8 +++
 tests/test_start_date.py        | 50 ++++++++++++-----
 5 files changed, 171 insertions(+), 15 deletions(-)
 create mode 100644 tests/test_all_fields.py

diff --git a/tests/test_all_fields.py b/tests/test_all_fields.py
new file mode 100644
index 00000000..cea3fd6a
--- /dev/null
+++ b/tests/test_all_fields.py
@@ -0,0 +1,98 @@
+import os
+
+from tap_tester import runner, menagerie
+from base import BaseTapTest
+
+class AllFieldsTest(BaseTapTest):
+
+    @staticmethod
+    def name():
+        return "tap_tester_shopify_all_fields_test"
+
+    def get_properties(self, original: bool = True):
+        """Configuration properties required for the tap."""
+        return_value = {
+            'start_date': '2021-04-01T00:00:00Z',
+            'shop': 'talenddatawearhouse',
+            'date_window_size': 30
+        }
+
+        return return_value
+
+    @staticmethod
+    def get_credentials(original_credentials: bool = True):
+        """Authentication information for the test account"""
+
+        return {
+            'api_key': os.getenv('TAP_SHOPIFY_API_KEY_TALENDDATAWEARHOUSE')
+        }
+
+    def test_run(self):
+        """
+        Ensure running the tap with all streams and fields selected results in the
+        replication of all fields.
+        - Verify no unexpected streams were replicated
+        - Verify that more than just the automatic fields are replicated for each stream
+        """
+
+        expected_streams = self.expected_streams()
+
+        # instantiate connection
+        conn_id = self.create_connection()
+
+        # run check mode
+        found_catalogs = menagerie.get_catalogs(conn_id)
+
+        # table and field selection
+        test_catalogs_all_fields = [catalog for catalog in found_catalogs
+                                    if catalog.get('stream_name') in expected_streams]
+        self.select_all_streams_and_fields(conn_id, test_catalogs_all_fields, select_all_fields=True)
+
+        # grab metadata after performing table-and-field selection to set expectations
+        stream_to_all_catalog_fields = dict() # used for asserting all fields are replicated
+        for catalog in test_catalogs_all_fields:
+            stream_id, stream_name = catalog['stream_id'], catalog['stream_name']
+            catalog_entry = menagerie.get_annotated_schema(conn_id, stream_id)
+            fields_from_field_level_md = [md_entry['breadcrumb'][1]
+                                          for md_entry in catalog_entry['metadata']
+                                          if md_entry['breadcrumb'] != []]
+            stream_to_all_catalog_fields[stream_name] = set(fields_from_field_level_md)
+
+        # run initial sync
+        record_count_by_stream = self.run_sync(conn_id)
+        synced_records = runner.get_records_from_target_output()
+
+        # Verify no unexpected streams were replicated
+        synced_stream_names = set(synced_records.keys())
+        self.assertSetEqual(expected_streams, synced_stream_names)
+
+        for stream in expected_streams:
+            with self.subTest(stream=stream):
+
+                # expected values
+                expected_automatic_keys = self.expected_primary_keys().get(stream, set()) | self.expected_replication_keys().get(stream, set())
+                # get all expected keys
+                expected_all_keys = stream_to_all_catalog_fields[stream]
+
+                # collect actual values
+                messages = synced_records.get(stream)
+
+                actual_all_keys = set()
+                # collect actual values
+                for message in messages['messages']:
+                    if message['action'] == 'upsert':
+                        actual_all_keys.update(message['data'].keys())
+
+                # Verify that you get some records for each stream
+                self.assertGreater(record_count_by_stream.get(stream, -1), 0)
+
+                # verify all fields for a stream were replicated
+                self.assertGreater(len(expected_all_keys), len(expected_automatic_keys))
+                self.assertTrue(expected_automatic_keys.issubset(expected_all_keys), msg=f'{expected_automatic_keys-expected_all_keys} is not in "expected_all_keys"')
+
+                if stream == 'abandoned_checkouts':
+                    expected_all_keys.remove('billing_address')
+                elif stream == 'orders':
+                    expected_all_keys.remove('order_adjustments')
+
+                self.assertSetEqual(expected_all_keys, actual_all_keys)
diff --git a/tests/test_automatic_fields.py b/tests/test_automatic_fields.py
index 76158e29..8fa63207 100644
--- a/tests/test_automatic_fields.py
+++ b/tests/test_automatic_fields.py
@@ -31,6 +31,7 @@ def automatic_test(self, conn_id, testable_streams):
         """
         Verify that for each stream you can get multiple pages of data
         when no fields are selected and only the automatic fields are replicated.
+        Verify that all replicated records have unique primary key values.
 
         PREREQUISITE
         For EACH stream add enough data that you surpass the limit of a single
@@ -52,6 +53,7 @@ def automatic_test(self, conn_id, testable_streams):
         record_count_by_stream = self.run_sync(conn_id)
 
         actual_fields_by_stream = runner.examine_target_output_for_fields()
+        synced_records = runner.get_records_from_target_output()
 
         for stream in incremental_streams:
             with self.subTest(stream=stream):
@@ -60,6 +62,11 @@ def automatic_test(self, conn_id, testable_streams):
                 # SKIP THIS ASSERTION FOR STREAMS WHERE YOU CANNOT GET
                 # MORE THAN 1 PAGE OF DATA IN THE TEST ACCOUNT
                 stream_metadata = self.expected_metadata().get(stream, {})
+                expected_primary_keys = self.expected_primary_keys().get(stream, set())
+
+                # collect records
+                messages = synced_records.get(stream)
+
                 minimum_record_count = stream_metadata.get(
                     self.API_LIMIT,
                     self.get_properties().get('result_per_page', self.DEFAULT_RESULTS_PER_PAGE)
@@ -72,7 +79,15 @@ def automatic_test(self, conn_id, testable_streams):
                 # verify that only the automatic fields are sent to the target
                 self.assertEqual(
                     actual_fields_by_stream.get(stream, set()),
-                    self.expected_primary_keys().get(stream, set()) |
+                    expected_primary_keys |
                     self.expected_replication_keys().get(stream, set()),
                     msg="The fields sent to the target are not the automatic fields"
                 )
+
+                # Verify that all replicated records have unique primary key values.
+                records_pks_set = {tuple([message.get('data').get(primary_key) for primary_key in expected_primary_keys])
+                                          for message in messages.get('messages')}
+                records_pks_list = [tuple([message.get('data').get(primary_key) for primary_key in expected_primary_keys])
+                                           for message in messages.get('messages')]
+                self.assertCountEqual(records_pks_set, records_pks_list,
+                                      msg="We have duplicate records for {}".format(stream))
diff --git a/tests/test_bookmarks_updated.py b/tests/test_bookmarks_updated.py
index c39ce1a3..87f78f34 100644
--- a/tests/test_bookmarks_updated.py
+++ b/tests/test_bookmarks_updated.py
@@ -15,6 +15,17 @@ class BookmarkTest(BaseTapTest):
     def name():
         return "tap_tester_shopify_bookmark_test"
 
+    # function for verifying the date format
+    def is_expected_date_format(self, date):
+        try:
+            # parse date
+            dt.strptime(date, "%Y-%m-%dT%H:%M:%S.%fZ")
+        except ValueError:
+            # return False if date is in not expected format
+            return False
+        # return True in case of no error
+        return True
+
     def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
         self.start_date = '2021-04-01T00:00:00Z'
@@ -112,7 +123,9 @@ def bookmarks_test(self, conn_id, testable_streams):
 
                 # verify the syncs sets a bookmark of the expected form
                 self.assertIsNotNone(first_bookmark_value)
+                self.assertTrue(self.is_expected_date_format(first_bookmark_value))
                 self.assertIsNotNone(second_bookmark_value)
+                self.assertTrue(self.is_expected_date_format(second_bookmark_value))
 
                 # verify the 2nd bookmark is equal to 1st sync bookmark
                 #NOT A BUG (IS the expected behaviour for shopify as they are using date windowing : TDL-17096 : 2nd bookmark value is getting assigned from the execution time rather than the actual bookmark time. This is an invalid assertion for shopify
diff --git a/tests/test_discovery.py b/tests/test_discovery.py
index 5a2377dd..c9246a02 100644
--- a/tests/test_discovery.py
+++ b/tests/test_discovery.py
@@ -74,6 +74,14 @@ def test_run(self):
                 self.assertTrue(len(stream_properties) == 1,
                                 msg="There is more than one top level breadcrumb")
 
+                # collect fields
+                actual_fields = []
+                for md_entry in metadata:
+                    if md_entry['breadcrumb'] != []:
+                        actual_fields.append(md_entry['breadcrumb'][1])
+                # Verify there are no duplicate/conflicting metadata entries.
+                self.assertEqual(len(actual_fields), len(set(actual_fields)), msg="There are duplicate entries in the fields of '{}' stream".format(stream))
+
                 # verify replication key(s)
                 self.assertEqual(
                     set(stream_properties[0].get(
diff --git a/tests/test_start_date.py b/tests/test_start_date.py
index 52635141..57c8bf42 100644
--- a/tests/test_start_date.py
+++ b/tests/test_start_date.py
@@ -72,6 +72,7 @@ def test_run(self):
 
         # Count actual rows synced
         first_sync_records = runner.get_records_from_target_output()
+        first_min_bookmarks = self.min_bookmarks_by_stream(first_sync_records)
 
         # set the start date for a new connection based off bookmarks largest value
         first_max_bookmarks = self.max_bookmarks_by_stream(first_sync_records)
@@ -112,27 +113,48 @@ def test_run(self):
         for stream in incremental_streams:
             with self.subTest(stream=stream):
 
+                # get primary key values for both sync records
+                expected_primary_keys = self.expected_primary_keys()[stream]
+                primary_keys_list_1 = [tuple(message.get('data').get(expected_pk) for expected_pk in expected_primary_keys)
+                                       for message in first_sync_records.get(stream).get('messages')
+                                       if message.get('action') == 'upsert']
+                primary_keys_list_2 = [tuple(message.get('data').get(expected_pk) for expected_pk in expected_primary_keys)
+                                       for message in second_sync_records.get(stream).get('messages')
+                                       if message.get('action') == 'upsert']
+                primary_keys_sync_1 = set(primary_keys_list_1)
+                primary_keys_sync_2 = set(primary_keys_list_2)
+
                 # verify that each stream has less records than the first connection sync
                 self.assertGreaterEqual(
                     first_sync_record_count.get(stream, 0),
                     second_sync_record_count.get(stream, 0),
                     msg="second had more records, start_date usage not verified")
 
-                # verify all data from 2nd sync >= start_date
-                target_mark = second_min_bookmarks.get(stream, {"mark": None})
-                target_value = next(iter(target_mark.values()))  # there should be only one
+                # Verify by primary key values, that all records of the 2nd sync are included in the 1st sync since 2nd sync has a later start date.
+                self.assertTrue(primary_keys_sync_2.issubset(primary_keys_sync_1))
+
+                # verify all data from both syncs >= start_date
+                first_sync_target_mark = first_min_bookmarks.get(stream, {"mark": None})
+                second_sync_target_mark = second_min_bookmarks.get(stream, {"mark": None})
+
+                # get start dates for both syncs
+                first_sync_start_date = self.get_properties()["start_date"]
+                second_sync_start_date = self.start_date
+
+                for start_date, target_mark in zip((first_sync_start_date, second_sync_start_date), (first_sync_target_mark, second_sync_target_mark)):
+                    target_value = next(iter(target_mark.values()))  # there should be only one
 
-                if target_value:
+                    if target_value:
 
-                    # it's okay if there isn't target data for a stream
-                    try:
-                        target_value = self.local_to_utc(parse(target_value))
+                        # it's okay if there isn't target data for a stream
+                        try:
+                            target_value = self.local_to_utc(parse(target_value))
 
-                        # verify that the minimum bookmark sent to the target for the second sync
-                        # is greater than or equal to the start date
-                        self.assertGreaterEqual(target_value,
-                                                self.local_to_utc(parse(self.start_date)))
+                            # verify that the minimum bookmark sent to the target for the second sync
+                            # is greater than or equal to the start date
+                            self.assertGreaterEqual(target_value,
+                                                    self.local_to_utc(parse(start_date)))
 
-                    except (OverflowError, ValueError, TypeError):
-                        print("bookmarks cannot be converted to dates, "
-                              "can't test start_date for {}".format(stream))
+                        except (OverflowError, ValueError, TypeError):
+                            print("bookmarks cannot be converted to dates, "
+                                "can't test start_date for {}".format(stream))

From 7fd70102cba25bbb78337815a07da93fb0a6e40c Mon Sep 17 00:00:00 2001
From: harshpatel4_crest <harsh.patel4@crestdatasys.com>
Date: Tue, 1 Feb 2022 17:32:26 +0530
Subject: [PATCH 2/7] removed abandoned_checkouts from all test cases except
 discovery

---
 tests/base.py                   | 12 ++++++++++--
 tests/test_all_fields.py        |  2 ++
 tests/test_bookmarks_updated.py |  6 +++++-
 tests/test_discovery.py         |  9 +++++----
 tests/test_start_date.py        |  8 +++++++-
 5 files changed, 29 insertions(+), 8 deletions(-)

diff --git a/tests/base.py b/tests/base.py
index 2d45ba7b..b8b8138c 100644
--- a/tests/base.py
+++ b/tests/base.py
@@ -132,7 +132,11 @@ def expected_metadata(self):
 
     def expected_streams(self):
         """A set of expected stream names"""
-        return set(self.expected_metadata().keys())
+        # removed "abandoned_checkouts", as per the Doc:
+        #   https://help.shopify.com/en/manual/orders/abandoned-checkouts?st_source=admin&st_campaign=abandoned_checkouts_footer&utm_source=admin&utm_campaign=abandoned_checkouts_footer#review-your-abandoned-checkouts
+        # abandoned checkouts are saved in the Shopify admin for three months.
+        # Every Monday, abandoned checkouts that are older than three months are removed from your admin.
+        return set(self.expected_metadata().keys()) - {"abandoned_checkouts"}
 
     def child_streams(self):
         """
@@ -301,7 +305,11 @@ def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
         self.start_date = self.get_properties().get("start_date")
         self.store_1_streams = {'custom_collections', 'orders', 'products', 'customers', 'locations', 'inventory_levels', 'inventory_items', 'events'}
-        self.store_2_streams = {'abandoned_checkouts', 'collects', 'metafields', 'transactions', 'order_refunds', 'products', 'locations', 'inventory_levels', 'inventory_items', 'events'}
+        # removed 'abandoned_checkouts' from store 2 streams, as per the Doc:
+        #   https://help.shopify.com/en/manual/orders/abandoned-checkouts?st_source=admin&st_campaign=abandoned_checkouts_footer&utm_source=admin&utm_campaign=abandoned_checkouts_footer#review-your-abandoned-checkouts
+        # abandoned checkouts are saved in the Shopify admin for three months.
+        # Every Monday, abandoned checkouts that are older than three months are removed from your admin.
+        self.store_2_streams = {'collects', 'metafields', 'transactions', 'order_refunds', 'products', 'locations', 'inventory_levels', 'inventory_items', 'events'}
 
     #modified this method to accommodate replication key in the current_state
     def calculated_states_by_stream(self, current_state):
diff --git a/tests/test_all_fields.py b/tests/test_all_fields.py
index cea3fd6a..04d87f5a 100644
--- a/tests/test_all_fields.py
+++ b/tests/test_all_fields.py
@@ -93,6 +93,8 @@ def test_run(self):
                 if stream == 'abandoned_checkouts':
                     expected_all_keys.remove('billing_address')
                 elif stream == 'orders':
+                    # No field named 'order_adjustments' present in the 'order' object
+                    #   Documentation: https://shopify.dev/api/admin-rest/2021-10/resources/order#resource_object
                     expected_all_keys.remove('order_adjustments')
 
                 self.assertSetEqual(expected_all_keys, actual_all_keys)
diff --git a/tests/test_bookmarks_updated.py b/tests/test_bookmarks_updated.py
index 87f78f34..a2506c9d 100644
--- a/tests/test_bookmarks_updated.py
+++ b/tests/test_bookmarks_updated.py
@@ -37,7 +37,11 @@ def __init__(self, *args, **kwargs):
 
     # creating this global variable for store 2 which is required only for this test, all the other test are referencing from base
     global store_2_streams
-    store_2_streams = {'abandoned_checkouts', 'collects', 'metafields', 'transactions', 'order_refunds', 'products', 'locations', 'inventory_items', 'events', 'customers', 'custom_collections', 'orders'}
+    # removed 'abandoned_checkouts' from store 2 streams, as per the Doc:
+    #   https://help.shopify.com/en/manual/orders/abandoned-checkouts?st_source=admin&st_campaign=abandoned_checkouts_footer&utm_source=admin&utm_campaign=abandoned_checkouts_footer#review-your-abandoned-checkouts
+    # abandoned checkouts are saved in the Shopify admin for three months.
+    # Every Monday, abandoned checkouts that are older than three months are removed from your admin.
+    store_2_streams = {'collects', 'metafields', 'transactions', 'order_refunds', 'products', 'locations', 'inventory_items', 'events', 'customers', 'custom_collections', 'orders'}
 
     def test_run_store_2(self):
         with self.subTest(store="store_2"):
diff --git a/tests/test_discovery.py b/tests/test_discovery.py
index c9246a02..b35738c0 100644
--- a/tests/test_discovery.py
+++ b/tests/test_discovery.py
@@ -33,23 +33,24 @@ def test_run(self):
         • verify that all other fields have inclusion of available (metadata and schema)
         """
         conn_id = self.create_connection()
+        expected_streams = self.expected_streams() | {"abandoned_checkouts"}
 
         # Verify number of actual streams discovered match expected
         found_catalogs = menagerie.get_catalogs(conn_id)
         self.assertGreater(len(found_catalogs), 0,
                            msg="unable to locate schemas for connection {}".format(conn_id))
         self.assertEqual(len(found_catalogs),
-                         len(self.expected_streams()),
+                         len(expected_streams),
                          msg="Expected {} streams, actual was {} for connection {},"
                              " actual {}".format(
-                                 len(self.expected_streams()),
+                                 len(expected_streams),
                                  len(found_catalogs),
                                  found_catalogs,
                                  conn_id))
 
         # Verify the stream names discovered were what we expect
         found_catalog_names = {c['tap_stream_id'] for c in found_catalogs}
-        self.assertEqual(set(self.expected_streams()),
+        self.assertEqual(set(expected_streams),
                          set(found_catalog_names),
                          msg="Expected streams don't match actual streams")
 
@@ -58,7 +59,7 @@ def test_run(self):
         self.assertTrue(all([re.fullmatch(r"[a-z_]+", name) for name in found_catalog_names]),
                         msg="One or more streams don't follow standard naming")
 
-        for stream in self.expected_streams():
+        for stream in expected_streams:
             with self.subTest(stream=stream):
                 catalog = next(iter([catalog for catalog in found_catalogs
                                      if catalog["stream_name"] == stream]))
diff --git a/tests/test_start_date.py b/tests/test_start_date.py
index 57c8bf42..810517a1 100644
--- a/tests/test_start_date.py
+++ b/tests/test_start_date.py
@@ -56,7 +56,13 @@ def test_run(self):
 
         # Select all streams and all fields within streams
         found_catalogs = menagerie.get_catalogs(conn_id)
-        incremental_streams = {key for key, value in self.expected_replication_method().items()
+        # removed 'abandoned_checkouts', as per the Doc:
+        #   https://help.shopify.com/en/manual/orders/abandoned-checkouts?st_source=admin&st_campaign=abandoned_checkouts_footer&utm_source=admin&utm_campaign=abandoned_checkouts_footer#review-your-abandoned-checkouts
+        # abandoned checkouts are saved in the Shopify admin for three months.
+        # Every Monday, abandoned checkouts that are older than three months are removed from your admin.
+        expected_replication_method = self.expected_replication_method()
+        expected_replication_method.pop("abandoned_checkouts")
+        incremental_streams = {key for key, value in expected_replication_method.items()
                                if value == self.INCREMENTAL}
 
         # IF THERE ARE STREAMS THAT SHOULD NOT BE TESTED

From 3088df84204faf4d20931950a113f92fc4329fb9 Mon Sep 17 00:00:00 2001
From: harshpatel4_crest <harsh.patel4@crestdatasys.com>
Date: Wed, 2 Feb 2022 14:17:55 +0530
Subject: [PATCH 3/7] added comment for abandoned checkouts in test cases

---
 tests/base.py                   | 2 ++
 tests/test_bookmarks_updated.py | 1 +
 tests/test_start_date.py        | 1 +
 3 files changed, 4 insertions(+)

diff --git a/tests/base.py b/tests/base.py
index b8b8138c..8e453eb4 100644
--- a/tests/base.py
+++ b/tests/base.py
@@ -136,6 +136,7 @@ def expected_streams(self):
         #   https://help.shopify.com/en/manual/orders/abandoned-checkouts?st_source=admin&st_campaign=abandoned_checkouts_footer&utm_source=admin&utm_campaign=abandoned_checkouts_footer#review-your-abandoned-checkouts
         # abandoned checkouts are saved in the Shopify admin for three months.
         # Every Monday, abandoned checkouts that are older than three months are removed from your admin.
+        # Also no POST call is available for this endpoint: https://shopify.dev/api/admin-rest/2022-01/resources/abandoned-checkouts
         return set(self.expected_metadata().keys()) - {"abandoned_checkouts"}
 
     def child_streams(self):
@@ -309,6 +310,7 @@ def __init__(self, *args, **kwargs):
         #   https://help.shopify.com/en/manual/orders/abandoned-checkouts?st_source=admin&st_campaign=abandoned_checkouts_footer&utm_source=admin&utm_campaign=abandoned_checkouts_footer#review-your-abandoned-checkouts
         # abandoned checkouts are saved in the Shopify admin for three months.
         # Every Monday, abandoned checkouts that are older than three months are removed from your admin.
+        # Also no POST call is available for this endpoint: https://shopify.dev/api/admin-rest/2022-01/resources/abandoned-checkouts
         self.store_2_streams = {'collects', 'metafields', 'transactions', 'order_refunds', 'products', 'locations', 'inventory_levels', 'inventory_items', 'events'}
 
     #modified this method to accommodate replication key in the current_state
diff --git a/tests/test_bookmarks_updated.py b/tests/test_bookmarks_updated.py
index a2506c9d..258779ec 100644
--- a/tests/test_bookmarks_updated.py
+++ b/tests/test_bookmarks_updated.py
@@ -41,6 +41,7 @@ def __init__(self, *args, **kwargs):
     #   https://help.shopify.com/en/manual/orders/abandoned-checkouts?st_source=admin&st_campaign=abandoned_checkouts_footer&utm_source=admin&utm_campaign=abandoned_checkouts_footer#review-your-abandoned-checkouts
     # abandoned checkouts are saved in the Shopify admin for three months.
     # Every Monday, abandoned checkouts that are older than three months are removed from your admin.
+    # Also no POST call is available for this endpoint: https://shopify.dev/api/admin-rest/2022-01/resources/abandoned-checkouts
     store_2_streams = {'collects', 'metafields', 'transactions', 'order_refunds', 'products', 'locations', 'inventory_items', 'events', 'customers', 'custom_collections', 'orders'}
 
     def test_run_store_2(self):
diff --git a/tests/test_start_date.py b/tests/test_start_date.py
index 810517a1..c2b13c22 100644
--- a/tests/test_start_date.py
+++ b/tests/test_start_date.py
@@ -60,6 +60,7 @@ def test_run(self):
         #   https://help.shopify.com/en/manual/orders/abandoned-checkouts?st_source=admin&st_campaign=abandoned_checkouts_footer&utm_source=admin&utm_campaign=abandoned_checkouts_footer#review-your-abandoned-checkouts
         # abandoned checkouts are saved in the Shopify admin for three months.
         # Every Monday, abandoned checkouts that are older than three months are removed from your admin.
+        # Also no POST call is available for this endpoint: https://shopify.dev/api/admin-rest/2022-01/resources/abandoned-checkouts
         expected_replication_method = self.expected_replication_method()
         expected_replication_method.pop("abandoned_checkouts")
         incremental_streams = {key for key, value in expected_replication_method.items()

From 3daafa725e476af626bb9946dfdfd95f09254523 Mon Sep 17 00:00:00 2001
From: harshpatel4_crest <harsh.patel4@crestdatasys.com>
Date: Thu, 3 Mar 2022 19:00:26 +0530
Subject: [PATCH 4/7] added code comments and function comments

---
 tests/test_discovery.py  | 1 +
 tests/test_start_date.py | 4 ++++
 2 files changed, 5 insertions(+)

diff --git a/tests/test_discovery.py b/tests/test_discovery.py
index b35738c0..c7f885d7 100644
--- a/tests/test_discovery.py
+++ b/tests/test_discovery.py
@@ -24,6 +24,7 @@ def test_run(self):
         • Verify stream names follow naming convention
           streams should only have lowercase alphas and underscores
         • verify there is only 1 top level breadcrumb
+        • Verify there are no duplicate/conflicting metadata entries.
         • verify replication key(s)
         • verify primary key(s)
         • verify that if there is a replication key we are doing INCREMENTAL otherwise FULL
diff --git a/tests/test_start_date.py b/tests/test_start_date.py
index c2b13c22..3b3c83f0 100644
--- a/tests/test_start_date.py
+++ b/tests/test_start_date.py
@@ -20,6 +20,8 @@ class StartDateTest(BaseTapTest):
     • verify that a sync with a later start date has at least one record synced
       and less records than the 1st sync with a previous start date
     • verify that each stream has less records than the earlier start date sync
+    • Verify by primary key values, that all records of the 2nd sync are included
+      in the 1st sync since 2nd sync has a later start date.
     • verify all data from later start data has bookmark values >= start_date
     • verify that the minimum bookmark sent to the target for the later start_date sync
       is greater than or equal to the start date
@@ -148,6 +150,8 @@ def test_run(self):
                 first_sync_start_date = self.get_properties()["start_date"]
                 second_sync_start_date = self.start_date
 
+                # loop over minimum bookmark and the start date/state file date for each syncs to verify
+                # the minimum bookmark is greater then or equal to start date/state file date
                 for start_date, target_mark in zip((first_sync_start_date, second_sync_start_date), (first_sync_target_mark, second_sync_target_mark)):
                     target_value = next(iter(target_mark.values()))  # there should be only one
 

From 273ce0e75ad6625f004beead45ebd45ec850d6b4 Mon Sep 17 00:00:00 2001
From: harshpatel4_crest <harsh.patel4@crestdatasys.com>
Date: Wed, 9 Mar 2022 13:02:26 +0530
Subject: [PATCH 5/7] updated all fields test

---
 tests/test_all_fields.py | 25 +++----------------------
 1 file changed, 3 insertions(+), 22 deletions(-)

diff --git a/tests/test_all_fields.py b/tests/test_all_fields.py
index 04d87f5a..07683fdd 100644
--- a/tests/test_all_fields.py
+++ b/tests/test_all_fields.py
@@ -1,5 +1,3 @@
-import os
-
 from tap_tester import runner, menagerie
 from base import BaseTapTest
 
@@ -9,24 +7,6 @@ class AllFieldsTest(BaseTapTest):
     def name():
         return "tap_tester_shopify_all_fields_test"
 
-    def get_properties(self, original: bool = True):
-        """Configuration properties required for the tap."""
-        return_value = {
-            'start_date': '2021-04-01T00:00:00Z',
-            'shop': 'talenddatawearhouse',
-            'date_window_size': 30
-        }
-
-        return return_value
-
-    @staticmethod
-    def get_credentials(original_credentials: bool = True):
-        """Authentication information for the test account"""
-
-        return {
-            'api_key': os.getenv('TAP_SHOPIFY_API_KEY_TALENDDATAWEARHOUSE')
-        }
-
     def test_run(self):
         """
         Ensure running the tap with all streams and fields selected results in the
@@ -35,10 +15,11 @@ def test_run(self):
         - Verify that more than just the automatic fields are replicated for each stream
         """
 
+        self.start_date = "2021-04-01T00:00:00Z"
         expected_streams = self.expected_streams()
 
-        # instantiate connection
-        conn_id = self.create_connection()
+        # instantiate connection to run on "talenddatawearhouse"
+        conn_id = self.create_connection(original_properties=False, original_credentials=False)
 
         # run check mode
         found_catalogs = menagerie.get_catalogs(conn_id)

From 7bfc94ec45bbc35ba61d3ea7a299a55263883474 Mon Sep 17 00:00:00 2001
From: harshpatel4_crest <harsh.patel4@crestdatasys.com>
Date: Mon, 11 Apr 2022 14:18:13 +0530
Subject: [PATCH 6/7] include abandoned_checkouts stream and updated start date
 test case

---
 tests/base.py                   | 14 ++------------
 tests/test_bookmarks_updated.py |  7 +------
 tests/test_discovery.py         |  2 +-
 tests/test_start_date.py        | 34 ++++++++++++++++++++++-----------
 4 files changed, 27 insertions(+), 30 deletions(-)

diff --git a/tests/base.py b/tests/base.py
index 8e453eb4..2d45ba7b 100644
--- a/tests/base.py
+++ b/tests/base.py
@@ -132,12 +132,7 @@ def expected_metadata(self):
 
     def expected_streams(self):
         """A set of expected stream names"""
-        # removed "abandoned_checkouts", as per the Doc:
-        #   https://help.shopify.com/en/manual/orders/abandoned-checkouts?st_source=admin&st_campaign=abandoned_checkouts_footer&utm_source=admin&utm_campaign=abandoned_checkouts_footer#review-your-abandoned-checkouts
-        # abandoned checkouts are saved in the Shopify admin for three months.
-        # Every Monday, abandoned checkouts that are older than three months are removed from your admin.
-        # Also no POST call is available for this endpoint: https://shopify.dev/api/admin-rest/2022-01/resources/abandoned-checkouts
-        return set(self.expected_metadata().keys()) - {"abandoned_checkouts"}
+        return set(self.expected_metadata().keys())
 
     def child_streams(self):
         """
@@ -306,12 +301,7 @@ def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
         self.start_date = self.get_properties().get("start_date")
         self.store_1_streams = {'custom_collections', 'orders', 'products', 'customers', 'locations', 'inventory_levels', 'inventory_items', 'events'}
-        # removed 'abandoned_checkouts' from store 2 streams, as per the Doc:
-        #   https://help.shopify.com/en/manual/orders/abandoned-checkouts?st_source=admin&st_campaign=abandoned_checkouts_footer&utm_source=admin&utm_campaign=abandoned_checkouts_footer#review-your-abandoned-checkouts
-        # abandoned checkouts are saved in the Shopify admin for three months.
-        # Every Monday, abandoned checkouts that are older than three months are removed from your admin.
-        # Also no POST call is available for this endpoint: https://shopify.dev/api/admin-rest/2022-01/resources/abandoned-checkouts
-        self.store_2_streams = {'collects', 'metafields', 'transactions', 'order_refunds', 'products', 'locations', 'inventory_levels', 'inventory_items', 'events'}
+        self.store_2_streams = {'abandoned_checkouts', 'collects', 'metafields', 'transactions', 'order_refunds', 'products', 'locations', 'inventory_levels', 'inventory_items', 'events'}
 
     #modified this method to accommodate replication key in the current_state
     def calculated_states_by_stream(self, current_state):
diff --git a/tests/test_bookmarks_updated.py b/tests/test_bookmarks_updated.py
index 258779ec..87f78f34 100644
--- a/tests/test_bookmarks_updated.py
+++ b/tests/test_bookmarks_updated.py
@@ -37,12 +37,7 @@ def __init__(self, *args, **kwargs):
 
     # creating this global variable for store 2 which is required only for this test, all the other test are referencing from base
     global store_2_streams
-    # removed 'abandoned_checkouts' from store 2 streams, as per the Doc:
-    #   https://help.shopify.com/en/manual/orders/abandoned-checkouts?st_source=admin&st_campaign=abandoned_checkouts_footer&utm_source=admin&utm_campaign=abandoned_checkouts_footer#review-your-abandoned-checkouts
-    # abandoned checkouts are saved in the Shopify admin for three months.
-    # Every Monday, abandoned checkouts that are older than three months are removed from your admin.
-    # Also no POST call is available for this endpoint: https://shopify.dev/api/admin-rest/2022-01/resources/abandoned-checkouts
-    store_2_streams = {'collects', 'metafields', 'transactions', 'order_refunds', 'products', 'locations', 'inventory_items', 'events', 'customers', 'custom_collections', 'orders'}
+    store_2_streams = {'abandoned_checkouts', 'collects', 'metafields', 'transactions', 'order_refunds', 'products', 'locations', 'inventory_items', 'events', 'customers', 'custom_collections', 'orders'}
 
     def test_run_store_2(self):
         with self.subTest(store="store_2"):
diff --git a/tests/test_discovery.py b/tests/test_discovery.py
index c7f885d7..52807de6 100644
--- a/tests/test_discovery.py
+++ b/tests/test_discovery.py
@@ -34,7 +34,7 @@ def test_run(self):
         • verify that all other fields have inclusion of available (metadata and schema)
         """
         conn_id = self.create_connection()
-        expected_streams = self.expected_streams() | {"abandoned_checkouts"}
+        expected_streams = self.expected_streams()
 
         # Verify number of actual streams discovered match expected
         found_catalogs = menagerie.get_catalogs(conn_id)
diff --git a/tests/test_start_date.py b/tests/test_start_date.py
index 3b3c83f0..b603c7df 100644
--- a/tests/test_start_date.py
+++ b/tests/test_start_date.py
@@ -58,16 +58,12 @@ def test_run(self):
 
         # Select all streams and all fields within streams
         found_catalogs = menagerie.get_catalogs(conn_id)
-        # removed 'abandoned_checkouts', as per the Doc:
-        #   https://help.shopify.com/en/manual/orders/abandoned-checkouts?st_source=admin&st_campaign=abandoned_checkouts_footer&utm_source=admin&utm_campaign=abandoned_checkouts_footer#review-your-abandoned-checkouts
-        # abandoned checkouts are saved in the Shopify admin for three months.
-        # Every Monday, abandoned checkouts that are older than three months are removed from your admin.
-        # Also no POST call is available for this endpoint: https://shopify.dev/api/admin-rest/2022-01/resources/abandoned-checkouts
-        expected_replication_method = self.expected_replication_method()
-        expected_replication_method.pop("abandoned_checkouts")
-        incremental_streams = {key for key, value in expected_replication_method.items()
+        incremental_streams = {key for key, value in self.expected_replication_method().items()
                                if value == self.INCREMENTAL}
 
+        # get expected replication keys
+        expected_replication_keys = self.expected_replication_keys()
+
         # IF THERE ARE STREAMS THAT SHOULD NOT BE TESTED
         # REPLACE THE EMPTY SET BELOW WITH THOSE STREAMS
 
@@ -133,6 +129,14 @@ def test_run(self):
                 primary_keys_sync_1 = set(primary_keys_list_1)
                 primary_keys_sync_2 = set(primary_keys_list_2)
 
+                # get replication key-values for all records for both syncs
+                replication_key_sync_1 = [message.get('data').get(expected_rk) for expected_rk in expected_replication_keys.get(stream)
+                                          for message in first_sync_records.get(stream).get('messages')
+                                          if message.get('action') == 'upsert']
+                replication_key_sync_2 = [message.get('data').get(expected_rk) for expected_rk in expected_replication_keys.get(stream)
+                                          for message in second_sync_records.get(stream).get('messages')
+                                          if message.get('action') == 'upsert']
+
                 # verify that each stream has less records than the first connection sync
                 self.assertGreaterEqual(
                     first_sync_record_count.get(stream, 0),
@@ -150,9 +154,12 @@ def test_run(self):
                 first_sync_start_date = self.get_properties()["start_date"]
                 second_sync_start_date = self.start_date
 
-                # loop over minimum bookmark and the start date/state file date for each syncs to verify
-                # the minimum bookmark is greater then or equal to start date/state file date
-                for start_date, target_mark in zip((first_sync_start_date, second_sync_start_date), (first_sync_target_mark, second_sync_target_mark)):
+                # loop over minimum bookmark, the start date/state file date and replication key records for each
+                # syncs to verify the minimum bookmark is greater then or equal to start date/state file date
+                for start_date, target_mark, record_replication_keys in zip(
+                    (first_sync_start_date, second_sync_start_date),
+                    (first_sync_target_mark, second_sync_target_mark),
+                    (replication_key_sync_1, replication_key_sync_2)):
                     target_value = next(iter(target_mark.values()))  # there should be only one
 
                     if target_value:
@@ -169,3 +176,8 @@ def test_run(self):
                         except (OverflowError, ValueError, TypeError):
                             print("bookmarks cannot be converted to dates, "
                                 "can't test start_date for {}".format(stream))
+
+                    # loop over every replication key records and verify we have
+                    # synced records greater than start date/state file date
+                    for record_replication_key in record_replication_keys:
+                        self.assertGreaterEqual(record_replication_key, start_date)

From 012242ed7143ed01cda56b7464ab26dc065e3189 Mon Sep 17 00:00:00 2001
From: harshpatel4_crest <harsh.patel4@crestdatasys.com>
Date: Mon, 11 Apr 2022 19:09:22 +0530
Subject: [PATCH 7/7] removed unnecessary assertion in start date test

---
 tests/test_start_date.py | 29 +++--------------------------
 1 file changed, 3 insertions(+), 26 deletions(-)

diff --git a/tests/test_start_date.py b/tests/test_start_date.py
index b603c7df..6a2791eb 100644
--- a/tests/test_start_date.py
+++ b/tests/test_start_date.py
@@ -77,7 +77,6 @@ def test_run(self):
 
         # Count actual rows synced
         first_sync_records = runner.get_records_from_target_output()
-        first_min_bookmarks = self.min_bookmarks_by_stream(first_sync_records)
 
         # set the start date for a new connection based off bookmarks largest value
         first_max_bookmarks = self.max_bookmarks_by_stream(first_sync_records)
@@ -109,7 +108,6 @@ def test_run(self):
         second_sync_record_count = self.run_sync(conn_id)
         second_total_records = reduce(lambda a, b: a + b, second_sync_record_count.values(), 0)
         second_sync_records = runner.get_records_from_target_output()
-        second_min_bookmarks = self.min_bookmarks_by_stream(second_sync_records)
 
         # verify that at least one record synced and less records synced than the 1st connection
         self.assertGreater(second_total_records, 0)
@@ -146,36 +144,15 @@ def test_run(self):
                 # Verify by primary key values, that all records of the 2nd sync are included in the 1st sync since 2nd sync has a later start date.
                 self.assertTrue(primary_keys_sync_2.issubset(primary_keys_sync_1))
 
-                # verify all data from both syncs >= start_date
-                first_sync_target_mark = first_min_bookmarks.get(stream, {"mark": None})
-                second_sync_target_mark = second_min_bookmarks.get(stream, {"mark": None})
-
                 # get start dates for both syncs
                 first_sync_start_date = self.get_properties()["start_date"]
                 second_sync_start_date = self.start_date
 
-                # loop over minimum bookmark, the start date/state file date and replication key records for each
-                # syncs to verify the minimum bookmark is greater then or equal to start date/state file date
-                for start_date, target_mark, record_replication_keys in zip(
+                # loop over the start date/state file date and replication key records for each syncs
+                # to verify the records we synced are greater than the start date/state file date
+                for start_date, record_replication_keys in zip(
                     (first_sync_start_date, second_sync_start_date),
-                    (first_sync_target_mark, second_sync_target_mark),
                     (replication_key_sync_1, replication_key_sync_2)):
-                    target_value = next(iter(target_mark.values()))  # there should be only one
-
-                    if target_value:
-
-                        # it's okay if there isn't target data for a stream
-                        try:
-                            target_value = self.local_to_utc(parse(target_value))
-
-                            # verify that the minimum bookmark sent to the target for the second sync
-                            # is greater than or equal to the start date
-                            self.assertGreaterEqual(target_value,
-                                                    self.local_to_utc(parse(start_date)))
-
-                        except (OverflowError, ValueError, TypeError):
-                            print("bookmarks cannot be converted to dates, "
-                                "can't test start_date for {}".format(stream))
 
                     # loop over every replication key records and verify we have
                     # synced records greater than start date/state file date