diff --git a/impresso/models/userBitmap.py b/impresso/models/userBitmap.py index 17974ee..521a3b4 100644 --- a/impresso/models/userBitmap.py +++ b/impresso/models/userBitmap.py @@ -66,6 +66,12 @@ def get_up_to_date_bitmap(self) -> bytes: return int_to_bytes(value) def get_bitmap_as_int(self): + """ + Converts the bitmap from bytes to an integer. + + Returns: + int: The bitmap as an integer. + """ return int.from_bytes(self.bitmap, byteorder="big") def get_bitmap_as_key_str(self): diff --git a/impresso/tasks.py b/impresso/tasks.py index 57262ce..ea35ad2 100644 --- a/impresso/tasks.py +++ b/impresso/tasks.py @@ -112,7 +112,7 @@ def export_query_as_csv_progress( job_id: int, query: str, search_query_id: int, - user_bitmap_key: str, + user_bitmap_key: int, query_hash: str = "", progress: float = 0.0, skip: int = 0, @@ -130,7 +130,7 @@ def export_query_as_csv_progress( job_id (int): The ID of the job to update. query (str): The query string to execute. search_query_id (int): The ID of the search query. - user_bitmap_key (str): The user bitmap key. + user_bitmap_key (int): The user bitmap key, as int. query_hash (str, optional): The hash of the query. Defaults to an empty string. skip (int, optional): The number of records to skip. Defaults to 0. limit (int, optional): The maximum number of records to retrieve per page. Defaults to 100. @@ -199,20 +199,16 @@ def export_query_as_csv( description=description, extra={"query": query, "query_hash": query_hash}, ) + attachment = Attachment.create_from_job(job, extension="csv") # if decri # get user bitmap, if any - try: - user = User.objects.get(pk=user_id) - user_bitmap_key = user.bitmap.get_bitmap_as_key_str() - except User.bitmap.RelatedObjectDoesNotExist: - logger.info(f"[job:{job.pk} user:{user_id}] no bitmap found for user!") - user_bitmap_key = bin(UserBitmap.USER_PLAN_GUEST)[:2] - + user_bitmap, created = UserBitmap.objects.get_or_create(user_id=user_id) logger.info( f"[job:{job.pk} user:{user_id}] launched! " - f"query:{query_hash} bitmap:{user_bitmap_key} description:{description}" + f"- Using bitmap {user_bitmap.get_bitmap_as_int()} (created:{created}) " + f"- attachment:{attachment.pk}" ) - attachment = Attachment.create_from_job(job, extension="csv") + update_job_progress( task=self, job=job, @@ -226,7 +222,7 @@ def export_query_as_csv( query=query, query_hash=query_hash, search_query_id=search_query_id, - user_bitmap_key=user_bitmap_key, + user_bitmap_key=user_bitmap.get_bitmap_as_int(), ) @@ -238,12 +234,22 @@ def export_collection_as_csv( query: str, query_hash: str = "", ) -> None: - try: - user = User.objects.get(pk=user_id) - user_bitmap_key = user.bitmap.get_bitmap_as_key_str() - except User.bitmap.RelatedObjectDoesNotExist: - logger.warning(f"[job:{job.pk} user:{user_id}] no bitmap found for user!") - user_bitmap_key = bin(UserBitmap.USER_PLAN_GUEST)[:2] + """ + Initiates a job to export a collection as a CSV file and starts the export_query_as_csv_progress task + like export_query_as_csv. + + Args: + self: The instance of the class. + user_id (int): The ID of the user initiating the export. + collection_id (int): The ID of the collection to be exported. + query (str): The query string to be exported. + query_hash (str, optional): A hash of the query string. Defaults to an empty string. + + Returns: + None + + """ + user_bitmap, created = UserBitmap.objects.get_or_create(user_id=user_id) try: collection = Collection.objects.get(pk=collection_id, creator__id=user_id) except Collection.DoesNotExist: @@ -260,12 +266,15 @@ def export_collection_as_csv( "query_hash": query_hash, }, ) + # create empty attachment and attach automatically to the job + attachment = Attachment.create_from_job(job, extension="csv") logger.info( f"[job:{job.pk} user:{user_id}] launched! " - f"query:{query_hash} bitmap:{user_bitmap_key} description:{job.description}" + f"- Using bitmap {user_bitmap.get_bitmap_as_int()} (created:{created}) " + f"- attachment:{attachment.pk} " + f"- query:{query_hash} description:{job.description}" ) - # create empty attachment and attach automatically to the job - Attachment.create_from_job(job, extension="csv") + # add query to extra. Job status should be INIT update_job_progress( task=self, @@ -279,7 +288,7 @@ def export_collection_as_csv( job_id=job.pk, query=query, query_hash=query_hash, - user_bitmap_key=user_bitmap_key, + user_bitmap_key=user_bitmap.get_bitmap_as_int(), ) diff --git a/impresso/tests/test_solr.py b/impresso/tests/test_solr.py index ade23c1..f6236a4 100644 --- a/impresso/tests/test_solr.py +++ b/impresso/tests/test_solr.py @@ -1,6 +1,7 @@ import unittest from impresso.utils.solr import serialize_solr_doc_content_item_to_plain_dict from impresso.utils.solr import mapper_doc_redact_contents +from impresso.utils.bitmask import BitMask64 class SolrTestCase(unittest.TestCase): @@ -49,7 +50,7 @@ def test_mapper_doc_redact_contents(self): result_redacted = mapper_doc_redact_contents( doc={**doc}, # not working user bitmask key - user_bitmap_key="0000", + user_bitmask=BitMask64("0000"), ) self.assertEqual(result_redacted.get("content"), "[redacted]") self.assertEqual(result_redacted.get("title"), doc.get("title")) @@ -57,7 +58,7 @@ def test_mapper_doc_redact_contents(self): result_ok = mapper_doc_redact_contents( doc={**doc}, # working user bitmask key - user_bitmap_key="1100", # 0b10110101 + user_bitmask=BitMask64("1100"), # 0b10110101 ) self.assertEqual( result_ok.get("content"), diff --git a/impresso/utils/solr.py b/impresso/utils/solr.py index e4ea7f2..f189c7b 100644 --- a/impresso/utils/solr.py +++ b/impresso/utils/solr.py @@ -31,7 +31,7 @@ def serialize_solr_doc_content_item_to_plain_dict( return result -def mapper_doc_redact_contents(doc: dict, user_bitmap_key: str) -> dict: +def mapper_doc_redact_contents(doc: dict, user_bitmask: BitMask64) -> dict: """ Redacts the content of a document based on its bitmap key (_bm_get_tr_s) or its availability and year. @@ -43,7 +43,7 @@ def mapper_doc_redact_contents(doc: dict, user_bitmap_key: str) -> dict: Args: doc (dict): A dictionary representing the document obtained via the serializer function . to be considered valid, tt must contain the key "year". - user_bitmap_key (str): The user's bitmap key, as string. + user_bitmask (BitMask64): The user's bitmap key, as BitMask64 instance. Returns: dict: The modified document dictionary with redacted content if applicable. @@ -62,12 +62,12 @@ def mapper_doc_redact_contents(doc: dict, user_bitmap_key: str) -> dict: if doc.get("_bm_get_tr_i", None) is not None: is_transcript_available = is_access_allowed( - accessor=BitMask64(user_bitmap_key), + accessor=user_bitmask, content=BitMask64(doc["_bm_get_tr_i"], reverse=True), ) elif doc.get("_bm_get_tr_s", None) is not None: is_transcript_available = is_access_allowed( - accessor=BitMask64(user_bitmap_key), + accessor=user_bitmask, # nop need to reverse if this is a string content=BitMask64(doc["_bm_get_tr_s"]), ) diff --git a/impresso/utils/tasks/export.py b/impresso/utils/tasks/export.py index 00ce0be..30dfc76 100644 --- a/impresso/utils/tasks/export.py +++ b/impresso/utils/tasks/export.py @@ -8,6 +8,7 @@ from ...models import Job from ...solr import find_all from ...utils.tasks import get_pagination +from ...utils.bitmask import BitMask64 from ...utils.solr import ( mapper_doc_remove_private_collections, mapper_doc_redact_contents, @@ -44,7 +45,7 @@ def helper_export_query_as_csv_progress( job: Job, query: str, query_hash: str, - user_bitmap_key: str, + user_bitmap_key: int, ignore_fields: list = [], skip: int = 0, limit: int = 100, @@ -59,6 +60,8 @@ def helper_export_query_as_csv_progress( Args: job (Job): The job object containing user profile information. query (str): The SOLR query string. + query_hash (str): The hash of the query string. + user_bitmap_key (int): The user's bitmap key. skip (int, optional): The number of items to skip. Defaults to 0. limit (int, optional): The maximum number of items per page. Defaults to 0. logger (Any, optional): The logger object. Defaults to None. @@ -93,7 +96,7 @@ def helper_export_query_as_csv_progress( loops, progress, ) - + user_bitmask = BitMask64(user_bitmap_key) logger.info( f"[job:{job.pk} user:{job.creator.pk}] Opening file in APPEND mode:" f"{job.attachment.upload.path}" @@ -158,7 +161,7 @@ def helper_export_query_as_csv_progress( ) content_item = mapper_doc_redact_contents( doc=content_item, - user_bitmap_key=user_bitmap_key, + user_bitmask=user_bitmask, ) # removed unwanted fields from the content_item content_item = {k: v for k, v in content_item.items() if k in fieldnames}