From 1776547e913d3624dd98391c2b5659c5783cb9f1 Mon Sep 17 00:00:00 2001 From: Konstantin Baierer Date: Thu, 15 Oct 2020 12:34:40 +0200 Subject: [PATCH 1/4] wrap atomic_write preserving permissions of mets.xml --- ocrd/ocrd/workspace.py | 2 +- ocrd/ocrd/workspace_backup.py | 4 +--- ocrd/requirements.txt | 1 - ocrd_utils/ocrd_utils/__init__.py | 1 + ocrd_utils/ocrd_utils/os.py | 18 ++++++++++++++---- ocrd_utils/requirements.txt | 1 + tests/test_workspace.py | 18 ++++++++++++++++-- 7 files changed, 34 insertions(+), 11 deletions(-) diff --git a/ocrd/ocrd/workspace.py b/ocrd/ocrd/workspace.py index 2bc0c600b..1b46fce19 100644 --- a/ocrd/ocrd/workspace.py +++ b/ocrd/ocrd/workspace.py @@ -5,12 +5,12 @@ import cv2 from PIL import Image import numpy as np -from atomicwrites import atomic_write from deprecated.sphinx import deprecated from ocrd_models import OcrdMets, OcrdExif, OcrdFile from ocrd_models.ocrd_page import parse from ocrd_utils import ( + atomic_write, getLogger, image_from_polygon, coordinates_of_segment, diff --git a/ocrd/ocrd/workspace_backup.py b/ocrd/ocrd/workspace_backup.py index 57030abc9..c6ef1d29f 100644 --- a/ocrd/ocrd/workspace_backup.py +++ b/ocrd/ocrd/workspace_backup.py @@ -6,12 +6,10 @@ import hashlib from ocrd_models import OcrdMets -from ocrd_utils import getLogger +from ocrd_utils import getLogger, atomic_write from .constants import BACKUP_DIR -from atomicwrites import atomic_write - def _chksum(s): return hashlib.sha256(s).hexdigest() diff --git a/ocrd/requirements.txt b/ocrd/requirements.txt index 3a6f75a89..2da0163b7 100644 --- a/ocrd/requirements.txt +++ b/ocrd/requirements.txt @@ -7,5 +7,4 @@ opencv-python-headless Flask jsonschema pyyaml -atomicwrites >= 1.3.0 Deprecated == 1.2.0 diff --git a/ocrd_utils/ocrd_utils/__init__.py b/ocrd_utils/ocrd_utils/__init__.py index 4d2ba3ce7..c0e9dd67d 100644 --- a/ocrd_utils/ocrd_utils/__init__.py +++ b/ocrd_utils/ocrd_utils/__init__.py @@ -128,6 +128,7 @@ from .os import ( abspath, + atomic_write, pushd_popd, unzip_file_to_dir) diff --git a/ocrd_utils/ocrd_utils/os.py b/ocrd_utils/ocrd_utils/os.py index 738839f93..a67fd2d1f 100644 --- a/ocrd_utils/ocrd_utils/os.py +++ b/ocrd_utils/ocrd_utils/os.py @@ -5,12 +5,14 @@ 'abspath', 'pushd_popd', 'unzip_file_to_dir', + 'atomic_write', ] +from atomicwrites import atomic_write as atomic_write_ from tempfile import TemporaryDirectory import contextlib -from os import getcwd, chdir -import os.path +from os import getcwd, chdir, stat, chmod +from os.path import exists, abspath as abspath_ from zipfile import ZipFile @@ -22,7 +24,7 @@ def abspath(url): """ if url.startswith('file://'): url = url[len('file://'):] - return os.path.abspath(url) + return abspath_(url) @contextlib.contextmanager def pushd_popd(newcwd=None, tempdir=False): @@ -53,4 +55,12 @@ def unzip_file_to_dir(path_to_zip, output_directory): z.extractall(output_directory) z.close() - +@contextlib.contextmanager +def atomic_write(fpath, overwrite=False): + if exists(fpath): + mode = stat(fpath).st_mode + else: + mode = 0o664 + with atomic_write_(fpath, overwrite=True) as f: + yield f + chmod(fpath, mode) diff --git a/ocrd_utils/requirements.txt b/ocrd_utils/requirements.txt index 02733a265..71f670878 100644 --- a/ocrd_utils/requirements.txt +++ b/ocrd_utils/requirements.txt @@ -2,3 +2,4 @@ Pillow >= 7.2.0 # All current versions of TensorFlow require numpy < 1.19.0, # so make sure that now newer version is installed here. numpy >= 1.17.0, < 1.19.0 +atomicwrites >= 1.3.0 diff --git a/tests/test_workspace.py b/tests/test_workspace.py index 0e85627ab..ee91a9c88 100644 --- a/tests/test_workspace.py +++ b/tests/test_workspace.py @@ -1,4 +1,5 @@ -from os import walk +from os import walk, stat, chmod +from stat import filemode from subprocess import run, PIPE from os.path import join, exists, abspath, basename, dirname from tempfile import TemporaryDirectory, mkdtemp @@ -10,7 +11,7 @@ from tests.base import TestCase, assets, main, copy_of_directory from ocrd_models.ocrd_page import parseString -from ocrd_utils import pushd_popd +from ocrd_utils import pushd_popd, initLogging from ocrd.resolver import Resolver from ocrd.workspace import Workspace @@ -278,6 +279,19 @@ def test_image_from_page_basic(self): img, info, exif = ws.image_from_page(pcgts.get_Page(), page_id='PHYS_0017') self.assertEquals(info['features'], 'binarized,clipped') + def test_mets_permissions(self): + initLogging() + with TemporaryDirectory() as tempdir: + ws = self.resolver.workspace_from_nothing(tempdir) + ws.save_mets() + mets_path = join(ws.directory, 'mets.xml') + assert filemode(stat(mets_path).st_mode) == '-rw-rw-r--' + chmod(mets_path, 0o777) + ws.save_mets() + assert filemode(stat(mets_path).st_mode) == '-rwxrwxrwx' + + + if __name__ == '__main__': main(__file__) From f92c5a3c804b5b7e54c5bef541bc5b8d831b8491 Mon Sep 17 00:00:00 2001 From: Konstantin Baierer Date: Tue, 20 Oct 2020 18:18:02 +0200 Subject: [PATCH 2/4] use @pabs3 workaround for untitaker/python-atomicwrites#42 --- ocrd/ocrd/workspace.py | 2 +- ocrd/ocrd/workspace_backup.py | 2 +- ocrd_utils/ocrd_utils/os.py | 31 ++++++++++++++++++++++--------- tests/test_workspace.py | 7 ++++--- 4 files changed, 28 insertions(+), 14 deletions(-) diff --git a/ocrd/ocrd/workspace.py b/ocrd/ocrd/workspace.py index 1b46fce19..117b5f592 100644 --- a/ocrd/ocrd/workspace.py +++ b/ocrd/ocrd/workspace.py @@ -252,7 +252,7 @@ def save_mets(self): log.info("Saving mets '%s'", self.mets_target) if self.automatic_backup: WorkspaceBackupManager(self).add() - with atomic_write(self.mets_target, overwrite=True) as f: + with atomic_write(self.mets_target) as f: f.write(self.mets.to_xml(xmllint=True).decode('utf-8')) def resolve_image_exif(self, image_url): diff --git a/ocrd/ocrd/workspace_backup.py b/ocrd/ocrd/workspace_backup.py index c6ef1d29f..a23223b54 100644 --- a/ocrd/ocrd/workspace_backup.py +++ b/ocrd/ocrd/workspace_backup.py @@ -83,7 +83,7 @@ def add(self): mets_file = join(d, 'mets.xml') log.info("Backing up to %s" % mets_file) makedirs(d) - with atomic_write(mets_file, overwrite=True) as f: + with atomic_write(mets_file) as f: f.write(mets_str.decode('utf-8')) return chksum diff --git a/ocrd_utils/ocrd_utils/os.py b/ocrd_utils/ocrd_utils/os.py index a67fd2d1f..c3112de39 100644 --- a/ocrd_utils/ocrd_utils/os.py +++ b/ocrd_utils/ocrd_utils/os.py @@ -8,10 +8,10 @@ 'atomic_write', ] -from atomicwrites import atomic_write as atomic_write_ +from atomicwrites import atomic_write as atomic_write_, AtomicWriter from tempfile import TemporaryDirectory import contextlib -from os import getcwd, chdir, stat, chmod +from os import getcwd, chdir, stat, fchmod, umask from os.path import exists, abspath as abspath_ from zipfile import ZipFile @@ -55,12 +55,25 @@ def unzip_file_to_dir(path_to_zip, output_directory): z.extractall(output_directory) z.close() + + +# ht @pabs3 +# https://github.com/untitaker/python-atomicwrites/issues/42 +class AtomicWriterPerms(AtomicWriter): + def get_fileobject(self, **kwargs): + f = super().get_fileobject(**kwargs) + try: + mode = stat(self._path).st_mode + except FileNotFoundError: + # Creating a new file, emulate what os.open() does + mask = umask(0) + umask(mask) + mode = 0o664 & ~mask + fd = f.fileno() + fchmod(fd, mode) + return f + @contextlib.contextmanager -def atomic_write(fpath, overwrite=False): - if exists(fpath): - mode = stat(fpath).st_mode - else: - mode = 0o664 - with atomic_write_(fpath, overwrite=True) as f: +def atomic_write(fpath): + with atomic_write_(fpath, writer_cls=AtomicWriterPerms, overwrite=True) as f: yield f - chmod(fpath, mode) diff --git a/tests/test_workspace.py b/tests/test_workspace.py index ee91a9c88..336bf1d23 100644 --- a/tests/test_workspace.py +++ b/tests/test_workspace.py @@ -1,4 +1,4 @@ -from os import walk, stat, chmod +from os import walk, stat, chmod, umask from stat import filemode from subprocess import run, PIPE from os.path import join, exists, abspath, basename, dirname @@ -280,12 +280,13 @@ def test_image_from_page_basic(self): self.assertEquals(info['features'], 'binarized,clipped') def test_mets_permissions(self): - initLogging() with TemporaryDirectory() as tempdir: ws = self.resolver.workspace_from_nothing(tempdir) ws.save_mets() mets_path = join(ws.directory, 'mets.xml') - assert filemode(stat(mets_path).st_mode) == '-rw-rw-r--' + mask = umask(0) + umask(mask) + assert '%o' % (stat(mets_path).st_mode & ~mask) == '100644' chmod(mets_path, 0o777) ws.save_mets() assert filemode(stat(mets_path).st_mode) == '-rwxrwxrwx' From 8d88bfa9790ad1686a8225185a28fd83bdacd493 Mon Sep 17 00:00:00 2001 From: Konstantin Baierer Date: Wed, 21 Oct 2020 13:41:04 +0200 Subject: [PATCH 3/4] fix test for file permissions with umask, sigh --- tests/test_workspace.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_workspace.py b/tests/test_workspace.py index 336bf1d23..fdb49c183 100644 --- a/tests/test_workspace.py +++ b/tests/test_workspace.py @@ -286,7 +286,7 @@ def test_mets_permissions(self): mets_path = join(ws.directory, 'mets.xml') mask = umask(0) umask(mask) - assert '%o' % (stat(mets_path).st_mode & ~mask) == '100644' + assert (stat(mets_path).st_mode & ~mask) == 0o100664 & ~mask chmod(mets_path, 0o777) ws.save_mets() assert filemode(stat(mets_path).st_mode) == '-rwxrwxrwx' From 898dfd47fc0c5ec9fbb26a9459eb199af85889d2 Mon Sep 17 00:00:00 2001 From: Konstantin Baierer Date: Wed, 21 Oct 2020 13:49:05 +0200 Subject: [PATCH 4/4] permissions test: no masking of file mode in assertion LHS, ht @stweil --- tests/test_workspace.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_workspace.py b/tests/test_workspace.py index fdb49c183..3f3d65bfa 100644 --- a/tests/test_workspace.py +++ b/tests/test_workspace.py @@ -286,7 +286,7 @@ def test_mets_permissions(self): mets_path = join(ws.directory, 'mets.xml') mask = umask(0) umask(mask) - assert (stat(mets_path).st_mode & ~mask) == 0o100664 & ~mask + assert (stat(mets_path).st_mode) == 0o100664 & ~mask chmod(mets_path, 0o777) ws.save_mets() assert filemode(stat(mets_path).st_mode) == '-rwxrwxrwx'