Skip to content

Commit

Permalink
feature/bitmap solr (#76)
Browse files Browse the repository at this point in the history
* Create Github workflow test.yml

* add tests for job models and bitmap check with in-memory sqlite db

* update settings IMPRESSO_SOLR_FIELDS  and add IMPRESSO_SOLR_FIELDS _AS_LIST. Change django command accordingly

* move Solr helpers function to specific `imrpesso.utils.solr` module

* use dotenv lib to retrieve variables

* lint with black and add documentation to Profile Model

* Update tasks.py

* upgrade to django 5.1.3

* improve admin for user bitmaps

* finalize export csv

* update progress method to send a more structured message


---------

Co-authored-by: Daniele Guido <[email protected]>
  • Loading branch information
danieleguido and danieleguido authored Nov 18, 2024
1 parent 17245e1 commit f876ac1
Show file tree
Hide file tree
Showing 26 changed files with 1,252 additions and 436 deletions.
53 changes: 53 additions & 0 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
name: Tests
on:
push:
branches: ['develop']
pull_request:
branches: ['develop']
jobs:
test:
runs-on: ubuntu-latest

steps:
- name: Check out the code
uses: actions/checkout@v4

- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: '3.12.4'

- name: Install pipenv
run: pip install pipenv

- name: Install dependencies
run: pipenv install --dev

- name: Add a .env file
run: |
echo "SECRET_KEY=ThisisaVeryverysecretkey" >> .env
echo "DEBUG=True" >> .env
echo "ALLOWED_HOSTS=localhost" >> .env
echo "CSRF_TRUSTED_ORIGINS=http://localhost" >> .env
echo "IMPRESSO_DB_ENGINE=mysal" >> .env
echo "IMPRESSO_DB_NAME=xxxxxxxxxxxxxxxxxx" >> .env
echo "IMPRESSO_DB_USER=xxxxxxxxxxxxxxxxxx" >> .env
echo "IMPRESSO_DB_PASSWORD=xxxxxxxxxxxxxx" >> .env
echo "IMPRESSO_DB_HOST=localhost" >> .env
echo "IMPRESSO_DB_PORT=0000" >> .env
echo "IMPRESSO_SOLR_URL=http://localhost:8983/solr" >> .env
echo "IMPRESSO_SOLR_PASSAGES_URL=http://localhost:8983/solr/passages" >> .env
echo "IMPRESSO_SOLR_USER=ssssssssssssssss" >> .env
echo "IMPRESSO_SOLR_USER_WRITE=ssssssssss" >> .env
echo "IMPRESSO_SOLR_PASSWORD=ssssssssssss" >> .env
echo "IMPRESSO_SOLR_PASSWORD_WRITE=ssssss" >> .env
- name: Set up debug logging, this requires a secific debug folder
run: |
mkdir -p logs
touch logs/debug.log
- name: Run Django tests
run: pipenv run ./manage.py test
env:
DJANGO_SETTINGS_MODULE: impresso.settings # replace with your actual settings module
3 changes: 2 additions & 1 deletion Pipfile
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,11 @@ pip = "*"
celery = "*"
requests = "*"
redis = "*"
django = "==5.0.8"
django = "==5.1.3"
pymysql = "*"
django-registration = "*"
gunicorn = "*"
python-dotenv = "*"

[dev-packages]
"flake8" = "*"
Expand Down
43 changes: 26 additions & 17 deletions Pipfile.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

42 changes: 22 additions & 20 deletions impresso/admin.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from django.contrib.auth.admin import UserAdmin as BaseUserAdmin
from django.contrib.auth.models import User
from django.utils.translation import ngettext

from django.utils import timezone
from .models import Profile, Issue, Job, Page, Newspaper
from .models import SearchQuery, ContentItem
from .models import Collection, CollectableItem, Tag, TaggableItem
Expand Down Expand Up @@ -38,34 +38,36 @@ class UserBitmapAdmin(admin.ModelAdmin):
"date_accepted_terms",
)
search_fields = ["user__username", "user__email"]
actions = ["set_terms_accepted_date"]

def num_subscriptions(self, obj):
return obj.subscriptions.count()

def bitmap_display(self, obj):
if obj.bitmap is None:
return ""
return bin(int.from_bytes(obj.bitmap, byteorder="big"))
return bin(obj.get_bitmap_as_int())

def user_plan_display(self, obj):
if obj.bitmap is None:
return "-"
bitmap_int = int.from_bytes(obj.bitmap, byteorder="big")
bitmap_length = bitmap_int.bit_length()
# Extract the first 5 bits
bitmap_plan = (
bitmap_int >> (bitmap_length - UserBitmap.BITMAP_PLAN_MAX_LENGTH)
) & 0b11111
if bitmap_plan == UserBitmap.USER_PLAN_GUEST:
return "Guest"
if bitmap_plan == UserBitmap.USER_PLAN_AUTH_USER:
return "Impresso Registered User"
if bitmap_plan == UserBitmap.USER_PLAN_EDUCATIONAL:
return "Student or Teacher - Educational User"
if bitmap_plan == UserBitmap.USER_PLAN_RESEARCHER:
return "Researcher - Academic User"

return bin(bitmap_plan)
return obj.get_user_plan()

@admin.action(description="Accept the terms of use for selected users")
def set_terms_accepted_date(self, request, queryset):
# for each user, do a proper save
updated = queryset.count()
for user_bitmap in queryset:
user_bitmap.date_accepted_terms = timezone.now()
user_bitmap.save()
self.message_user(
request,
ngettext(
"%d user accepted the terms of use.",
"%d users accepted the terms of use.",
updated,
)
% updated,
messages.SUCCESS,
)

user_plan_display.short_description = "User Plan"

Expand Down
65 changes: 43 additions & 22 deletions impresso/base.py
Original file line number Diff line number Diff line change
@@ -1,23 +1,54 @@
import os, re
from django.core.exceptions import ImproperlyConfigured
from pathlib import Path # python3 only
from dotenv import dotenv_values
from typing import Any, Optional

# # e.g. set ENV=production to get .production.env file
dotenv_filename = (
".{0}.env".format(os.environ.get("ENV", "")) if "ENV" in os.environ else ".env"
)
dotenv_path = str(Path(".") / dotenv_filename)
dotenv_dict = dotenv_values(dotenv_path=dotenv_path, verbose=True)

print(f"Loading env file: \033[94m{dotenv_path}\033[0m")
# check that the file exists
if not os.path.exists(dotenv_path):
raise ImproperlyConfigured("No .env file found at {0}".format(dotenv_path))

# for k, v in dotenv_dict.items():
# print("{0}={1}".format(k, v))


def get_env_variable(var_name: str, default: Optional[Any] = None) -> Any:
"""
Retrieve the value of an environment variable based on the selected environment file.
The function first checks if the variable is defined in the dotenv file corresponding to the
current environment mode, as determined by the `ENV` setting. If `ENV` is set to a specific value
(e.g., `test`), the function loads variables from `.test.env`. If the variable is not found in
the dotenv file, it then checks the system's environment variables. If still not found, it returns
the `default` value if provided, or raises an error if required.
def dotenv_values(dotenv_path):
lines = []
with open(dotenv_path) as fp:
lines = fp.read().splitlines()
Environment Modes:
Set `ENV` to specify which dotenv file to load:
- `ENV=production` loads `.production.env`.
- `ENV=test` loads `.test.env`.
- If `ENV` is not set, the default `.env` file may be used.
# get tuples of values,property splitting each line of the file
lines = map(lambda l: tuple(re.split(r"\s*=\s*", l, 1)), filter(None, lines))
lines = list(lines)
print(f"dotenv_values: found {len(lines)} valid lines")
if not lines:
return dict()
return dict(lines)
Args:
var_name (str): Name of the environment variable to retrieve.
default (Optional[Any]): Value to return if the variable is not found. Defaults to None.
Returns:
Any: The value of the environment variable or the `default` value if not found.
def get_env_variable(var_name, default=None):
Raises:
ImproperlyConfigured: If the environment variable is not found and no `default` is provided.
Example:
>>> get_env_variable('DATABASE_URL', default='sqlite:///:memory:')
"""
if var_name in dotenv_dict:
return dotenv_dict[var_name]
try:
Expand All @@ -27,13 +58,3 @@ def get_env_variable(var_name, default=None):
return default
error_msg = "Set the %s environment variable" % var_name
raise ImproperlyConfigured(error_msg)


# e.g. set ENV=production to get .production.env file
dotenv_filename = (
".{0}.env".format(os.environ.get("ENV", "")) if "ENV" in os.environ else ".env"
)
dotenv_path = str(Path(".") / dotenv_filename)
dotenv_dict = dotenv_values(dotenv_path=dotenv_path)

print("loading env file: {0}".format(dotenv_filename))
38 changes: 20 additions & 18 deletions impresso/management/commands/checksystemhealth.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,17 +4,6 @@
from django.conf import settings
from django.db import connection

FLS = [
"id",
"content_length_i",
"snippet_plain",
"bm_explore_s",
"bm_get_tr_s",
"bm_get_img_s",
"meta_journal_s",
"meta_partnerid_s",
]


class Command(BaseCommand):
help = "Check SOLR connectivity"
Expand All @@ -26,7 +15,11 @@ def handle(self, *args, **options):
database_name = cursor.fetchone()[0]

self.stdout.write(
f"Current Database: \n \033[94m{database_name}\033[0m\n\n"
f"Current Database: \n"
f" host: \033[94m{settings.DATABASES["default"]["HOST"]}\033[0m\n"
f" port: \033[94m{settings.DATABASES["default"]["PORT"]}\033[0m\n"
f" engine: \033[94m{settings.DATABASES["default"]["ENGINE"]}\033[0m\n"
f" name: \033[94m{database_name}\033[0m\n\n"
)
cursor.execute("SHOW TABLES")
tables = [t[0] for t in cursor.fetchall()]
Expand All @@ -41,31 +34,40 @@ def handle(self, *args, **options):
self.stderr.write(f"Invalid SOLR URL: {solr_url}")
return

self.stdout.write(
f"SOLR fl list (available for export): \n - {'\n - '.join(settings.IMPRESSO_SOLR_FIELDS_AS_LIST)}"
)
params = {
"q": "*:*",
"rows": 2,
"fl": ",".join(FLS),
"fl": settings.IMPRESSO_SOLR_FIELDS,
}
solr_response = requests.get(
solr_url,
auth=settings.IMPRESSO_SOLR_AUTH,
params=params,
)
solr_status = solr_response.status_code

self.stdout.write(f"SOLR URL: \n - {solr_url}")
self.stdout.write(f"SOLR Status: \n - {solr_status}")

if solr_status != 200:
self.stderr.write(f"Error: {solr_response.text}")
return
# example result
# n of rows in solr
solr_num_rows = solr_response.json()["response"]["numFound"]
self.stdout.write(f"SOLR Num Rows: \n - {solr_num_rows}")
# example result

docs = solr_response.json()["response"]["docs"]
self.stdout.write(f"\n SOLR Example Docs:")
self.stdout.write(f"SOLR Example Docs:")

for doc in docs:
self.stdout.write(f" - \nid:\033[94m{doc.get('id')}\033[0m")
self.stdout.write(f"\n - {doc.get(settings.IMPRESSO_SOLR_ID_FIELD)}")
for field in settings.IMPRESSO_SOLR_FIELDS_AS_LIST:
self.stdout.write(f" ├── {field}: \033[93m{doc.get(field)}\033[0m")

for field in FLS:
self.stdout.write(f" {field}: {doc.get(field)}")
# ping redis
self.stdout.write("\nChecking Redis connectivity...")
import redis
Expand Down
Loading

0 comments on commit f876ac1

Please sign in to comment.