Skip to content

Commit

Permalink
fixing bug in SafeFastCoref which led to empty texts
Browse files Browse the repository at this point in the history
  • Loading branch information
KasperFyhn committed Nov 27, 2024
1 parent 28782f7 commit 244e18f
Showing 1 changed file with 15 additions and 11 deletions.
26 changes: 15 additions & 11 deletions src/conspiracies/docprocessing/coref/safefastcoref.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@

import logging

from spacy.util import minibatch

logging.getLogger("fastcoref").setLevel(logging.WARNING)


Expand All @@ -15,17 +17,19 @@ def __init__(self, component: FastCorefResolver):

def pipe(self, stream: Iterable, batch_size: int = 128):
"""Wrap the pipe method of the component."""
try:
yield from self.component.pipe(
stream,
batch_size=batch_size,
resolve_text=True,
)
except Exception as e:
# Log the error and return the unprocessed documents
logging.error(f"Error in SafeFastCoref pipe: {e}")
for doc in stream:
yield doc # Return the original document
for mb in minibatch(stream, size=batch_size):
try:
yield from self.component.pipe(
mb,
batch_size=batch_size,
resolve_text=True,
)
except Exception as e:
# Log the error and return the unprocessed documents
logging.error(f"Error in SafeFastCoref pipe: {e}")
for doc in mb:
doc._.resolved_text = doc.text
yield doc # Return the original document

def __call__(self, doc):
"""Wrap the __call__ method of the component."""
Expand Down

0 comments on commit 244e18f

Please sign in to comment.