Skip to content

Commit

Permalink
Merge pull request #853 from kobotoolbox/azure-overwrite-files-as-false
Browse files Browse the repository at this point in the history
Do not override files when using Azure Storage
  • Loading branch information
bufke authored Nov 2, 2022
2 parents e8bd6a9 + 0d95c31 commit dcc5cb8
Show file tree
Hide file tree
Showing 3 changed files with 59 additions and 8 deletions.
59 changes: 55 additions & 4 deletions onadata/libs/utils/export_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from bson import json_util
from django.conf import settings
from django.core.files.base import File, ContentFile
from django.core.files.storage import FileSystemStorage
from django.core.files.temp import NamedTemporaryFile
from django.core.files.storage import get_storage_class
from django.contrib.auth.models import User
Expand Down Expand Up @@ -675,18 +676,18 @@ def generate_attachments_zip_export(
export_type,
filename)

export_filename = get_storage_class()().save(file_path, ContentFile(b''))
absolute_filename = _get_absolute_filename(file_path)

with get_storage_class()().open(export_filename, 'wb') as destination_file:
with get_storage_class()().open(absolute_filename, 'wb') as destination_file:
create_attachments_zipfile(
attachments,
output_file=destination_file,
)

dir_name, basename = os.path.split(export_filename)
dir_name, basename = os.path.split(absolute_filename)

# get or create export object
if(export_id):
if export_id:
export = Export.objects.get(id=export_id)
else:
export = Export.objects.create(xform=xform, export_type=export_type)
Expand Down Expand Up @@ -763,3 +764,53 @@ def kml_export_data(id_string, user):
})

return data_for_template


def _get_absolute_filename(filename: str) -> str:
"""
Get absolute filename related to storage root.
"""

storage_class = get_storage_class()()
filename = storage_class.generate_filename(filename)

# We cannot call `self.result.save()` before reopening the file
# in write mode (i.e. open(filename, 'wb')). because it does not work
# with AzureStorage.
# Unfortunately, `self.result.save()` does few things that we need to
# reimplement here:
# - Create parent folders (if they do not exist) for local storage
# - Get a unique filename if filename already exists on storage

# Copied from `FileSystemStorage._save()` 😢
if isinstance(storage_class, FileSystemStorage):
full_path = storage_class.path(filename)

# Create any intermediate directories that do not exist.
directory = os.path.dirname(full_path)
if not os.path.exists(directory):
try:
if storage_class.directory_permissions_mode is not None:
# os.makedirs applies the global umask, so we reset it,
# for consistency with file_permissions_mode behavior.
old_umask = os.umask(0)
try:
os.makedirs(
directory, storage_class.directory_permissions_mode
)
finally:
os.umask(old_umask)
else:
os.makedirs(directory)
except FileExistsError:
# There's a race between os.path.exists() and os.makedirs().
# If os.makedirs() fails with FileExistsError, the directory
# was created concurrently.
pass
if not os.path.isdir(directory):
raise IOError("%s exists and is not a directory." % directory)

# Store filenames with forward slashes, even on Windows.
filename = filename.replace('\\', '/')

return storage_class.get_available_name(filename)
5 changes: 3 additions & 2 deletions onadata/libs/utils/viewer_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,10 +56,11 @@ def report_exception(subject, info, exc_info=None):
info += "".join(traceback.format_exception(*exc_info))

if settings.DEBUG:
print(subject)
print(info)
print(subject, flush=True)
print(info, flush=True)
else:
mail_admins(subject=subject, message=info)
logging.error(info, exc_info=exc_info)


def django_file(path, field_name, content_type):
Expand Down
3 changes: 1 addition & 2 deletions onadata/settings/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -556,7 +556,6 @@ def skip_suspicious_operations(record):
AZURE_ACCOUNT_KEY = env.str('AZURE_ACCOUNT_KEY')
AZURE_CONTAINER = env.str('AZURE_CONTAINER')
AZURE_URL_EXPIRATION_SECS = env.int('AZURE_URL_EXPIRATION_SECS', None)
AZURE_OVERWRITE_FILES = True

GOOGLE_ANALYTICS_PROPERTY_ID = env.str("GOOGLE_ANALYTICS_TOKEN", False)
GOOGLE_ANALYTICS_DOMAIN = "auto"
Expand Down Expand Up @@ -678,7 +677,7 @@ def skip_suspicious_operations(record):

# Celery defaults to having as many workers as there are cores. To avoid
# excessive resource consumption, don't spawn more than 6 workers by default
# even if there more than 6 cores.
# even if there are more than 6 cores.
CELERY_WORKER_MAX_CONCURRENCY = int(os.environ.get('CELERYD_MAX_CONCURRENCY', 6))
if multiprocessing.cpu_count() > CELERY_WORKER_MAX_CONCURRENCY:
CELERY_WORKER_CONCURRENCY = CELERY_WORKER_MAX_CONCURRENCY
Expand Down

0 comments on commit dcc5cb8

Please sign in to comment.