From 67ac76cecbb4c799ec3a61bbbb7f194624d72359 Mon Sep 17 00:00:00 2001 From: RuslanBergenov Date: Mon, 24 Jan 2022 17:57:40 -0700 Subject: [PATCH] docs: warnings about risks of using incremental (MERGE) replication method --- README.md | 2 +- target_bigquery/processhandler.py | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 4c86021..19aae79 100644 --- a/README.md +++ b/README.md @@ -153,7 +153,7 @@ sample [target-config.json](/sample_config/target-config-exchange-rates-api.json * `truncate`: Deleting all previous rows and uploading the new ones to the table * `incremental`: **Upserting** new rows into the table, using the **primary key** given by the tap connector (if it finds an old row with same key, updates it. Otherwise it inserts the new row) - - WARNING: we do not recommend using `incremental` option as it might result in loss of production data. We recommend using `append` option instead which will preserve historical data. + - WARNING: We do not recommend using `incremental` option (which uses `MERGE` SQL statement). It might result in loss of production data, because historical records get updated. Instead, we recommend using the `append` replication method, which will preserve historical data. Sample **target-config.json** file: diff --git a/target_bigquery/processhandler.py b/target_bigquery/processhandler.py index 620bc76..16f2e96 100644 --- a/target_bigquery/processhandler.py +++ b/target_bigquery/processhandler.py @@ -266,8 +266,7 @@ def _do_temp_table_based_load(self, rows): incremental_success = False if self.incremental: self.logger.info(f"Copy {tmp_table_name} to {self.tables[stream]} by INCREMENTAL") - #TODO: reword the warning about this replication method - self.logger.warning(f"INCREMENTAL replication method might result in data loss because we are editing the production data during the sync operation. We recommend that you use APPEND target-bigquery replication instead.") + self.logger.warning(f"INCREMENTAL replication method (MERGE SQL statement) is not recommended. It might result in loss of production data, because historical records get updated during the sync operation. Instead, we recommend using the APPEND replication method, which will preserve historical data.") table_id = f"{self.project_id}.{self.dataset.dataset_id}.{self.tables[stream]}" try: self.client.get_table(table_id)