Skip to content

Commit

Permalink
Add support for reading from TAR archives to flatdata-cpp (heremaps#182
Browse files Browse the repository at this point in the history
…) (heremaps#208)

Signed-off-by: Christian Ocker <[email protected]>

This allows read-only access for TARed flatdata archives.
This can be useful in case the user does not want to deal with folders/larger amount of files.
  • Loading branch information
fermeise authored Sep 28, 2021
1 parent 1983223 commit 3edaa5d
Show file tree
Hide file tree
Showing 7 changed files with 602 additions and 1 deletion.
85 changes: 85 additions & 0 deletions flatdata-cpp/include/flatdata/MemoryMappedTarFileStorage.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
/**
* Copyright (c) 2021 HERE Europe B.V.
* See the LICENSE file in the root of this project for license details.
*/

#pragma once

#include "MemoryDescriptor.h"
#include "internal/TarReader.h"

#include <boost/interprocess/file_mapping.hpp>
#include <boost/interprocess/mapped_region.hpp>

#include <cstdio>
#include <map>
#include <memory>

namespace flatdata
{
class MemoryMappedTarFileStorage
{
public:
explicit MemoryMappedTarFileStorage( const char* tar_path );

MemoryDescriptor read( const char* path ) const;

private:
boost::interprocess::mapped_region m_region;
std::map< std::string, MemoryDescriptor > m_files;
};

inline MemoryMappedTarFileStorage::MemoryMappedTarFileStorage( const char* tar_path )
{
try
{
boost::interprocess::file_mapping file( tar_path, boost::interprocess::read_only );
boost::interprocess::mapped_region region( file, boost::interprocess::read_only );
if ( region.get_size( ) == 0 )
{
return;
}

m_region = std::move( region );
}
catch ( boost::interprocess::interprocess_exception& )
{
return;
}

MemoryDescriptor tar_archive( static_cast< const unsigned char* >( m_region.get_address( ) ),
m_region.get_size( ) );
std::vector< internal::TarFileEntry > file_entries;
try
{
file_entries = internal::read_tar_file_entries( tar_archive );
}
catch ( const std::runtime_error& e )
{
throw std::runtime_error( std::string( "Error reading TAR archive: " ) + e.what( ) );
}

for ( const auto& file : file_entries )
{
std::string path = file.name.substr( 0, 2 ) == "./" ? file.name.substr( 2 ) : file.name;
m_files.emplace(
std::move( path ),
MemoryDescriptor(
static_cast< const unsigned char* >( m_region.get_address( ) ) + file.offset,
std::min( file.size, m_region.get_size( ) - file.offset ) ) );
}
}

inline MemoryDescriptor
MemoryMappedTarFileStorage::read( const char* path ) const
{
auto found = m_files.find( path );
if ( found != m_files.end( ) )
{
return found->second;
}

return MemoryDescriptor( );
}

} // namespace flatdata
126 changes: 126 additions & 0 deletions flatdata-cpp/include/flatdata/TarFileResourceStorage.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
/**
* Copyright (c) 2021 HERE Europe B.V.
* See the LICENSE file in the root of this project for license details.
*/

#pragma once

#include "MemoryMappedTarFileStorage.h"
#include "ResourceStorage.h"

#include <boost/filesystem.hpp>

#include <fstream>

namespace flatdata
{
/**
* @brief Read-only resource storage for reading flatdata archives inside a TAR file.
*/
class TarFileResourceStorage : public ResourceStorage
{
public:
/**
* @brief Create resource storage for a TAR file
* @param tar_path The path to the TAR file
* @param tar_path The path inside the TAR file
* @return TarFileResourceStorage or nullptr on error
*/
static std::unique_ptr< TarFileResourceStorage > create( const char* tar_path,
const char* sub_path = "" );

std::unique_ptr< ResourceStorage > create_directory( const char* key ) override;
std::unique_ptr< ResourceStorage > directory( const char* key ) override;
bool exists( const char* key ) override;

protected:
std::shared_ptr< std::ostream > create_output_stream( const char* key ) override;
MemoryDescriptor read_resource( const char* key ) override;

private:
TarFileResourceStorage( std::shared_ptr< const MemoryMappedTarFileStorage > storage,
const std::string& tar_path,
const std::string& sub_path );
std::string get_path( const char* key ) const;

private:
std::shared_ptr< const MemoryMappedTarFileStorage > m_storage;
std::string m_tar_path;
std::string m_sub_path;
};

// -------------------------------------------------------------------------------------------------

inline std::unique_ptr< TarFileResourceStorage >
TarFileResourceStorage::create( const char* tar_path, const char* sub_path )
{
std::shared_ptr< const MemoryMappedTarFileStorage > storage;
try
{
storage.reset( new MemoryMappedTarFileStorage( tar_path ) );
}
catch ( const std::runtime_error& e )
{
std::clog << e.what( ) << std::endl;
return nullptr;
}

return std::unique_ptr< TarFileResourceStorage >(
new TarFileResourceStorage( storage, tar_path, sub_path ) );
}

inline std::shared_ptr< std::ostream >
TarFileResourceStorage::create_output_stream( const char* )
{
// Writing to TAR files is not supported
return nullptr;
}

inline TarFileResourceStorage::TarFileResourceStorage(
std::shared_ptr< const MemoryMappedTarFileStorage > storage,
const std::string& tar_path,
const std::string& sub_path )
: m_storage( std::move( storage ) )
, m_tar_path( tar_path )
, m_sub_path( sub_path )
{
}

inline std::string
TarFileResourceStorage::get_path( const char* key ) const
{
const char TAR_PATH_SEPARATOR = '/';

return m_sub_path.empty( ) ? std::string( key ) : m_sub_path + TAR_PATH_SEPARATOR + key;
}

inline MemoryDescriptor
TarFileResourceStorage::read_resource( const char* key )
{
if ( !exists( key ) )
{
return MemoryDescriptor( );
}
return m_storage->read( get_path( key ).c_str( ) );
}

inline std::unique_ptr< ResourceStorage >
TarFileResourceStorage::create_directory( const char* key )
{
return directory( key );
}

inline std::unique_ptr< ResourceStorage >
TarFileResourceStorage::directory( const char* key )
{
return std::unique_ptr< TarFileResourceStorage >(
new TarFileResourceStorage( m_storage, m_tar_path, get_path( key ) ) );
}

inline bool
TarFileResourceStorage::exists( const char* key )
{
return m_storage->read( get_path( key ).c_str( ) ).data( ) != nullptr;
}

} // namespace flatdata
3 changes: 2 additions & 1 deletion flatdata-cpp/include/flatdata/flatdata.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,4 +23,5 @@
#include "MultiVector.h"
#include "ResourceStorage.h"
#include "Struct.h"
#include "Vector.h"
#include "TarFileResourceStorage.h"
#include "Vector.h"
1 change: 1 addition & 0 deletions flatdata-cpp/include/flatdata/internal/Reader.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
#include <boost/optional/optional_io.hpp>

#include <cstring>
#include <utility>

namespace flatdata
{
Expand Down
27 changes: 27 additions & 0 deletions flatdata-cpp/include/flatdata/internal/TarReader.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
/**
* Copyright (c) 2021 HERE Europe B.V.
* See the LICENSE file in the root of this project for license details.
*/

#pragma once

#include <flatdata/MemoryDescriptor.h>

#include <stddef.h>
#include <string>
#include <vector>

namespace flatdata
{
namespace internal
{
struct TarFileEntry
{
std::string name;
size_t offset = 0;
size_t size = 0;
};

std::vector< TarFileEntry > read_tar_file_entries( MemoryDescriptor data );
} // namespace internal
} // namespace flatdata
Loading

0 comments on commit 3edaa5d

Please sign in to comment.