Skip to content

Commit

Permalink
finalize export csv
Browse files Browse the repository at this point in the history
  • Loading branch information
danieleguido committed Nov 14, 2024
1 parent 2b573c1 commit a6510e9
Show file tree
Hide file tree
Showing 5 changed files with 50 additions and 31 deletions.
6 changes: 6 additions & 0 deletions impresso/models/userBitmap.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,12 @@ def get_up_to_date_bitmap(self) -> bytes:
return int_to_bytes(value)

def get_bitmap_as_int(self):
"""
Converts the bitmap from bytes to an integer.
Returns:
int: The bitmap as an integer.
"""
return int.from_bytes(self.bitmap, byteorder="big")

def get_bitmap_as_key_str(self):
Expand Down
53 changes: 31 additions & 22 deletions impresso/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ def export_query_as_csv_progress(
job_id: int,
query: str,
search_query_id: int,
user_bitmap_key: str,
user_bitmap_key: int,
query_hash: str = "",
progress: float = 0.0,
skip: int = 0,
Expand All @@ -130,7 +130,7 @@ def export_query_as_csv_progress(
job_id (int): The ID of the job to update.
query (str): The query string to execute.
search_query_id (int): The ID of the search query.
user_bitmap_key (str): The user bitmap key.
user_bitmap_key (int): The user bitmap key, as int.
query_hash (str, optional): The hash of the query. Defaults to an empty string.
skip (int, optional): The number of records to skip. Defaults to 0.
limit (int, optional): The maximum number of records to retrieve per page. Defaults to 100.
Expand Down Expand Up @@ -199,20 +199,16 @@ def export_query_as_csv(
description=description,
extra={"query": query, "query_hash": query_hash},
)
attachment = Attachment.create_from_job(job, extension="csv")
# if decri
# get user bitmap, if any
try:
user = User.objects.get(pk=user_id)
user_bitmap_key = user.bitmap.get_bitmap_as_key_str()
except User.bitmap.RelatedObjectDoesNotExist:
logger.info(f"[job:{job.pk} user:{user_id}] no bitmap found for user!")
user_bitmap_key = bin(UserBitmap.USER_PLAN_GUEST)[:2]

user_bitmap, created = UserBitmap.objects.get_or_create(user_id=user_id)
logger.info(
f"[job:{job.pk} user:{user_id}] launched! "
f"query:{query_hash} bitmap:{user_bitmap_key} description:{description}"
f"- Using bitmap {user_bitmap.get_bitmap_as_int()} (created:{created}) "
f"- attachment:{attachment.pk}"
)
attachment = Attachment.create_from_job(job, extension="csv")

update_job_progress(
task=self,
job=job,
Expand All @@ -226,7 +222,7 @@ def export_query_as_csv(
query=query,
query_hash=query_hash,
search_query_id=search_query_id,
user_bitmap_key=user_bitmap_key,
user_bitmap_key=user_bitmap.get_bitmap_as_int(),
)


Expand All @@ -238,12 +234,22 @@ def export_collection_as_csv(
query: str,
query_hash: str = "",
) -> None:
try:
user = User.objects.get(pk=user_id)
user_bitmap_key = user.bitmap.get_bitmap_as_key_str()
except User.bitmap.RelatedObjectDoesNotExist:
logger.warning(f"[job:{job.pk} user:{user_id}] no bitmap found for user!")
user_bitmap_key = bin(UserBitmap.USER_PLAN_GUEST)[:2]
"""
Initiates a job to export a collection as a CSV file and starts the export_query_as_csv_progress task
like export_query_as_csv.
Args:
self: The instance of the class.
user_id (int): The ID of the user initiating the export.
collection_id (int): The ID of the collection to be exported.
query (str): The query string to be exported.
query_hash (str, optional): A hash of the query string. Defaults to an empty string.
Returns:
None
"""
user_bitmap, created = UserBitmap.objects.get_or_create(user_id=user_id)
try:
collection = Collection.objects.get(pk=collection_id, creator__id=user_id)
except Collection.DoesNotExist:
Expand All @@ -260,12 +266,15 @@ def export_collection_as_csv(
"query_hash": query_hash,
},
)
# create empty attachment and attach automatically to the job
attachment = Attachment.create_from_job(job, extension="csv")
logger.info(
f"[job:{job.pk} user:{user_id}] launched! "
f"query:{query_hash} bitmap:{user_bitmap_key} description:{job.description}"
f"- Using bitmap {user_bitmap.get_bitmap_as_int()} (created:{created}) "
f"- attachment:{attachment.pk} "
f"- query:{query_hash} description:{job.description}"
)
# create empty attachment and attach automatically to the job
Attachment.create_from_job(job, extension="csv")

# add query to extra. Job status should be INIT
update_job_progress(
task=self,
Expand All @@ -279,7 +288,7 @@ def export_collection_as_csv(
job_id=job.pk,
query=query,
query_hash=query_hash,
user_bitmap_key=user_bitmap_key,
user_bitmap_key=user_bitmap.get_bitmap_as_int(),
)


Expand Down
5 changes: 3 additions & 2 deletions impresso/tests/test_solr.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import unittest
from impresso.utils.solr import serialize_solr_doc_content_item_to_plain_dict
from impresso.utils.solr import mapper_doc_redact_contents
from impresso.utils.bitmask import BitMask64


class SolrTestCase(unittest.TestCase):
Expand Down Expand Up @@ -49,15 +50,15 @@ def test_mapper_doc_redact_contents(self):
result_redacted = mapper_doc_redact_contents(
doc={**doc},
# not working user bitmask key
user_bitmap_key="0000",
user_bitmask=BitMask64("0000"),
)
self.assertEqual(result_redacted.get("content"), "[redacted]")
self.assertEqual(result_redacted.get("title"), doc.get("title"))

result_ok = mapper_doc_redact_contents(
doc={**doc},
# working user bitmask key
user_bitmap_key="1100", # 0b10110101
user_bitmask=BitMask64("1100"), # 0b10110101
)
self.assertEqual(
result_ok.get("content"),
Expand Down
8 changes: 4 additions & 4 deletions impresso/utils/solr.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ def serialize_solr_doc_content_item_to_plain_dict(
return result


def mapper_doc_redact_contents(doc: dict, user_bitmap_key: str) -> dict:
def mapper_doc_redact_contents(doc: dict, user_bitmask: BitMask64) -> dict:
"""
Redacts the content of a document based on its bitmap key (_bm_get_tr_s)
or its availability and year.
Expand All @@ -43,7 +43,7 @@ def mapper_doc_redact_contents(doc: dict, user_bitmap_key: str) -> dict:
Args:
doc (dict): A dictionary representing the document obtained via the serializer function .
to be considered valid, tt must contain the key "year".
user_bitmap_key (str): The user's bitmap key, as string.
user_bitmask (BitMask64): The user's bitmap key, as BitMask64 instance.
Returns:
dict: The modified document dictionary with redacted content if applicable.
Expand All @@ -62,12 +62,12 @@ def mapper_doc_redact_contents(doc: dict, user_bitmap_key: str) -> dict:

if doc.get("_bm_get_tr_i", None) is not None:
is_transcript_available = is_access_allowed(
accessor=BitMask64(user_bitmap_key),
accessor=user_bitmask,
content=BitMask64(doc["_bm_get_tr_i"], reverse=True),
)
elif doc.get("_bm_get_tr_s", None) is not None:
is_transcript_available = is_access_allowed(
accessor=BitMask64(user_bitmap_key),
accessor=user_bitmask,
# nop need to reverse if this is a string
content=BitMask64(doc["_bm_get_tr_s"]),
)
Expand Down
9 changes: 6 additions & 3 deletions impresso/utils/tasks/export.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from ...models import Job
from ...solr import find_all
from ...utils.tasks import get_pagination
from ...utils.bitmask import BitMask64
from ...utils.solr import (
mapper_doc_remove_private_collections,
mapper_doc_redact_contents,
Expand Down Expand Up @@ -44,7 +45,7 @@ def helper_export_query_as_csv_progress(
job: Job,
query: str,
query_hash: str,
user_bitmap_key: str,
user_bitmap_key: int,
ignore_fields: list = [],
skip: int = 0,
limit: int = 100,
Expand All @@ -59,6 +60,8 @@ def helper_export_query_as_csv_progress(
Args:
job (Job): The job object containing user profile information.
query (str): The SOLR query string.
query_hash (str): The hash of the query string.
user_bitmap_key (int): The user's bitmap key.
skip (int, optional): The number of items to skip. Defaults to 0.
limit (int, optional): The maximum number of items per page. Defaults to 0.
logger (Any, optional): The logger object. Defaults to None.
Expand Down Expand Up @@ -93,7 +96,7 @@ def helper_export_query_as_csv_progress(
loops,
progress,
)

user_bitmask = BitMask64(user_bitmap_key)
logger.info(
f"[job:{job.pk} user:{job.creator.pk}] Opening file in APPEND mode:"
f"{job.attachment.upload.path}"
Expand Down Expand Up @@ -158,7 +161,7 @@ def helper_export_query_as_csv_progress(
)
content_item = mapper_doc_redact_contents(
doc=content_item,
user_bitmap_key=user_bitmap_key,
user_bitmask=user_bitmask,
)
# removed unwanted fields from the content_item
content_item = {k: v for k, v in content_item.items() if k in fieldnames}
Expand Down

0 comments on commit a6510e9

Please sign in to comment.