Skip to content

Commit

Permalink
brought back old fields/id/before_write verifis
Browse files Browse the repository at this point in the history
  • Loading branch information
henri123lemoine committed Aug 6, 2023
1 parent f6c763c commit 1bfe090
Showing 1 changed file with 12 additions and 24 deletions.
36 changes: 12 additions & 24 deletions align_data/db/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,34 +96,22 @@ def generate_id_string(self) -> str:

def verify_fields(self):
missing = [field for field in self.__id_fields if not getattr(self, field)]
if missing:
logger.warning(f'Entry is missing the following fields: {missing}')
return 'missing'
return 'not_missing'
assert not missing, f'Entry is missing the following fields: {missing}'

def verify_id(self):
assert self.id is not None, "Entry is missing id"

id_string = self.generate_id_string()
id_from_fields = hashlib.md5(id_string).hexdigest()
assert self.id == id_from_fields, f"Entry id {self.id} does not match id from id_fields, {id_from_fields}"

@classmethod
def before_write(cls, mapper, connection, target):
session = Session(connection)

# Check if an Article with the same id already exists
db_article = session.query(Article).filter(Article.id == target.id).one_or_none()
if db_article is not None:
# Compare fields and update if necessary
for field in ['title', 'url', 'source', 'source_type', 'authors', 'text', 'date_published', 'meta']:
if getattr(db_article, field) != getattr(target, field):
setattr(db_article, field, getattr(target, field))
db_article.pinecone_update_required = True
return

# Verify required fields
if target.verify_fields() == 'not_missing':
target.incomplete = False
target.pinecone_update_required = True
else:
target.incomplete = True
target.verify_fields()

# Generate id if necessary
if target.id is None:
if target.id:
target.verify_id()
else:
id_string = target.generate_id_string()
target.id = hashlib.md5(id_string).hexdigest()

Expand Down

0 comments on commit 1bfe090

Please sign in to comment.