-
Notifications
You must be signed in to change notification settings - Fork 7
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Start indexing search documents via celery
- Loading branch information
1 parent
b0518de
commit 369d8c0
Showing
17 changed files
with
272 additions
and
473 deletions.
There are no files selected for viewing
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,93 @@ | ||
from random import randrange | ||
|
||
from celery import chain, shared_task | ||
from sqlalchemy import select | ||
|
||
from palace.manager.celery.task import Task | ||
from palace.manager.service.celery.celery import QueueNames | ||
from palace.manager.sqlalchemy.model.work import Work | ||
from palace.manager.util.log import elapsed_time_logging | ||
|
||
|
||
def exponential_backoff(retries: int) -> int: | ||
return 3**retries + randrange(0, 3) | ||
|
||
|
||
@shared_task(queue=QueueNames.default, bind=True, max_retries=5) | ||
def search_reindex(task: Task, offset: int = 0, batch_size: int = 500) -> None: | ||
index = task.services.search.index() | ||
|
||
task.log.info( | ||
f"Running search reindex at offset {offset} with batch size {batch_size}." | ||
) | ||
|
||
with ( | ||
task.session() as session, | ||
elapsed_time_logging( | ||
log_method=task.log.info, | ||
message_prefix="Works queried from database", | ||
skip_start=True, | ||
), | ||
): | ||
works = [ | ||
w.id | ||
for w in session.execute( | ||
select(Work.id) | ||
.where(Work.presentation_ready == True) | ||
.order_by(Work.id) | ||
.limit(batch_size) | ||
.offset(offset) | ||
) | ||
] | ||
documents = Work.to_search_documents(session, works) | ||
|
||
with elapsed_time_logging( | ||
log_method=task.log.info, message_prefix="Works added to index", skip_start=True | ||
): | ||
failed_documents = index.add_documents(documents=documents) | ||
if failed_documents: | ||
wait_time = exponential_backoff(task.request.retries) | ||
task.log.error( | ||
f"Failed to index {len(failed_documents)} works. Retrying in {wait_time} seconds." | ||
) | ||
raise task.retry(countdown=wait_time) | ||
|
||
if len(works) == batch_size: | ||
# This task is complete, but there are more works waiting to be indexed. Requeue ourselves | ||
# to process the next batch. | ||
raise task.replace( | ||
search_reindex.s(offset=offset + batch_size, batch_size=batch_size) | ||
) | ||
|
||
task.log.info("Finished search reindex.") | ||
|
||
|
||
@shared_task(queue=QueueNames.default, bind=True) | ||
def update_read_pointer(task: Task) -> None: | ||
task.log.info("Updating read pointer.") | ||
service = task.services.search.service() | ||
revision_directory = task.services.search.revision_directory() | ||
revision = revision_directory.highest() | ||
service.read_pointer_set(revision) | ||
task.log.info( | ||
f"Updated read pointer ({service.base_revision_name()} v{revision.version})." | ||
) | ||
|
||
|
||
@shared_task(queue=QueueNames.default, bind=True) | ||
def index_work(task: Task, work_id: int) -> None: | ||
index = task.services.search.index() | ||
with task.session() as session: | ||
[document] = Work.to_search_documents(session, [work_id]) | ||
error = index.add_document(document=document) | ||
if error: | ||
wait_time = exponential_backoff(task.request.retries) | ||
task.log.error( | ||
f"Failed to index work {work_id}: {error}. Retrying in {wait_time} seconds." | ||
) | ||
raise task.retry(countdown=wait_time) | ||
|
||
task.log.info(f"Indexed work {work_id}.") | ||
|
||
|
||
do_migration = chain(search_reindex.si(), update_read_pointer.si()) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.