Skip to content

Commit

Permalink
Add support for reading from TAR archives to flatdata-py (heremaps#182)
Browse files Browse the repository at this point in the history
Signed-off-by: Christian Ocker <[email protected]>
  • Loading branch information
fermeise committed Oct 6, 2021
1 parent 3edaa5d commit 7c50cfd
Show file tree
Hide file tree
Showing 3 changed files with 147 additions and 0 deletions.
6 changes: 6 additions & 0 deletions flatdata-py/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,12 @@

Python 3 implementation of [flatdata](https://github.com/heremaps/flatdata).

## Running the tests

```sh
python3 -m nose
```

## Basic usage

Once you have [created a flatdata schema file](../README.md#creating-a-schema), you can generate a Python module to read your existing `flatdata` archive:
Expand Down
101 changes: 101 additions & 0 deletions flatdata-py/flatdata/lib/tar_archive_resource_storage.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
'''
Copyright (c) 2021 HERE Europe B.V.
See the LICENSE file in the root of this project for license details.
'''

import tarfile

from .errors import CorruptResourceError
from .errors import MissingResourceError
from .file_resource_storage import FileResourceStorage


class TarArchiveResourceStorage:
"""
Resource storage based on a memory-mapped TAR archive.
"""

def __init__(self, tar_map, file_entries, dir_entries, sub_path):
self.tar_map = tar_map
self.file_entries = file_entries
self.dir_entries = dir_entries
self.sub_path = sub_path

@classmethod
def create(cls, tar_path, sub_path=""):
tar_map = FileResourceStorage.memory_map(tar_path)
file_entries = dict()
dir_entries = set()
with tarfile.open(tar_path, "r:") as tar:
for file in tar:
name = file.name
if name.startswith("./"):
name = name[2:]
if file.type == tarfile.GNUTYPE_SPARSE:
raise CorruptResourceError("Sparse files are not supported")
if file.isreg():
file_entries[name] = (file.offset_data, file.size)
if file.isdir():
dir_entries.add(name)

return cls(tar_map, file_entries, dir_entries, sub_path)

def get(self, key, is_optional=False):
path = self._path(key)
if path in self.file_entries:
(offset, length) = self.file_entries[path]
return MemoryMapSection(self.tar_map, offset, length)

if path in self.dir_entries:
return TarArchiveResourceStorage(self.tar_map, self.file_entries, self.dir_entries, path)

if not is_optional:
raise MissingResourceError(key)
else:
return None

def _path(self, key):
if not self.sub_path:
return key
else:
return self.sub_path + '/' + key


class MemoryMapSection:
"""
Represent a slice of a memory mapped file.
Keeps track of its position, as to emulate pointing to a dedicated file.
"""

def __init__(self, inner, offset, length):
self.inner = inner
self.offset = offset
self.length = length;
self.pos = 0

def __len__(self):
return self.size()

def __getitem__(self, key):
if isinstance(key, slice):
start = key.start if key.start is not None else 0
start = self.offset + min(start, self.length)
stop = key.stop if key.stop is not None else self.length
stop = self.offset + min(stop, self.length)
return self.inner[slice(start, stop, key.step)]
else:
if key < self.length:
return self.inner.__getitem__(self.offset + key)
else:
raise IndexError('index out of range')

def read(self, n=None):
if n is None:
n = self.length - self.pos
self.inner.seek(self.offset + self.pos)
data = self.inner.read(min(n, self.length - self.pos))
self.pos += len(data)
return data

def size(self):
return min(self.length, self.inner.size() - self.offset)
40 changes: 40 additions & 0 deletions flatdata-py/tests/test_tar_resource_storage.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
from common import *
from flatdata.generator.engine import Engine
from flatdata.lib.tar_archive_resource_storage import TarArchiveResourceStorage

from nose.tools import eq_
import tarfile
import tempfile
import os


def check_signed_struct(s):
eq_(-0x1, s.a)
eq_(0x01234567, s.b)
eq_(-0x28, s.c)
eq_(0, s.d)


def test_tar_resource_storage():
module = Engine(INSTANCE_TEST_SCHEMA).render_python_module()
valid_data = {
"Archive.archive": ARCHIVE_SIGNATURE_PAYLOAD,
"Archive.archive.schema": module.backward_compatibility_Archive.schema().encode(),
"resource": RESOURCE_PAYLOAD,
"resource.schema": module.backward_compatibility_Archive.resource_schema('resource').encode()
}

with tempfile.TemporaryDirectory() as tmpdir:
archive_path = os.path.join(tmpdir, "archive.tar")
cwd = os.getcwd()
os.chdir(tmpdir)
tar = tarfile.open(archive_path, "w")
for key, value in valid_data.items():
with open(os.path.join(tmpdir, key), "wb") as file:
file.write(value)
tar.add(key)
tar.close()
os.chdir(cwd)

archive = module.backward_compatibility_Archive(TarArchiveResourceStorage.create(archive_path))
check_signed_struct(archive.resource)

0 comments on commit 7c50cfd

Please sign in to comment.