Skip to content

Commit

Permalink
cleaning duplicates (#4)
Browse files Browse the repository at this point in the history
  • Loading branch information
keyn4 authored Aug 1, 2024
1 parent 4e64c65 commit b36b558
Show file tree
Hide file tree
Showing 2 changed files with 11 additions and 0 deletions.
3 changes: 3 additions & 0 deletions tap_pipedrive/stream.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,9 @@


class PipedriveStream(object):
def __init__(self):
self.ids = []

tap = None
endpoint = ''
key_properties = []
Expand Down
8 changes: 8 additions & 0 deletions tap_pipedrive/tap.py
Original file line number Diff line number Diff line change
Expand Up @@ -278,7 +278,15 @@ def do_paginate(self, stream, stream_metadata):
# records with metrics
with singer.metrics.record_counter(stream.schema) as counter:
with singer.Transformer(singer.NO_INTEGER_DATETIME_PARSING) as optimus_prime:
stream_name = stream.get_name()
for row in self.iterate_response(response):
# logic to avoid duplicates HGI-6285
if row["id"] not in stream.ids:
stream.ids.append(row["id"])
else:
logger.info(f"id '{row['id']}' was previously fetched and processed for {stream_name}, skipping duplicate value...")
continue

row = stream.process_row(row)
if not row: # in case of a non-empty response with an empty element
continue
Expand Down

0 comments on commit b36b558

Please sign in to comment.