From 8e6324763be4860c818cd1b4fed548f2025ee45d Mon Sep 17 00:00:00 2001 From: Matthias Petri Date: Thu, 22 May 2014 14:46:00 +1000 Subject: [PATCH] Add memory mapped int_vector and simplify int_vec This commit introduces a non-const data() member which allows raw write access to the data stored in an int_vector similar to what is possible with the standard std::vector. this allows "unfriending" several of the util:: helper functions that modify int_vectors. The second addition to the library is a memory mapped int_vector (int_vector_mapper) which provides the same functionality as a regular int_vector but is memory mapped from a file. Thus, operations such as util::bit_compress can now be performed without loading the int_vector to memory. The int_vector_mapper is soley used as a resource handle to the data stored in the file. All operations are forwarded to the int_vector implementation. Thus, unlike the int_vector_buffer, the mapper can be used in regular stl algorithms as it provides const and non const access similar to the regular int_vector. The mapper additionally supports the push_back and resize operations which can be used to write data to disk. Temporary storage on disk can be realized using the temp_file_buffer class which creates a int_vector_mapper object from a temporary file which is deleted after the int_vector_mapper object is destroyed. --- examples/int-vector-mapper.cpp | 88 +++++++++ include/sdsl/construct_sa.hpp | 6 +- include/sdsl/int_vector.hpp | 19 +- include/sdsl/int_vector_mapper.hpp | 288 +++++++++++++++++++++++++++++ include/sdsl/io.hpp | 2 +- include/sdsl/util.hpp | 24 +-- test/IntVectorMapperTest.cpp | 285 ++++++++++++++++++++++++++++ test/Makefile | 4 + 8 files changed, 692 insertions(+), 24 deletions(-) create mode 100644 examples/int-vector-mapper.cpp create mode 100644 include/sdsl/int_vector_mapper.hpp create mode 100644 test/IntVectorMapperTest.cpp diff --git a/examples/int-vector-mapper.cpp b/examples/int-vector-mapper.cpp new file mode 100644 index 000000000..7c55a4fcf --- /dev/null +++ b/examples/int-vector-mapper.cpp @@ -0,0 +1,88 @@ +#include +#include +#include + +using namespace sdsl; +using namespace std; + +int main(int argc, char* argv[]) +{ + if (argc < 1) { + cout << "Usage: " << argv[0] << endl; + cout << "(1) Writes an int_vector sequentially to a file" << endl; + cout << "(2) Streams the content from file" << endl; + cout << "(3) Remove the file" << endl; + return 1; + } + string tmp_file = "tmp_file.sdsl"; + size_t size = 10000000; + std::mt19937_64 rng(13); + uint8_t width = 0; + int_vector<> iv(size,0,64); + int_vector<64> ivf(size,0); + std::vector stdv(size,0); + + // (1) write an int vector to disk + { + // write sequentially random values to disk + for (uint64_t i=0; i ivm(tmp_file); + if (ivm.size() != size) { + std::cerr << "ERROR: ivm.size()="<< ivm.size() << " != " << size << std::endl; + return 1; + } + if (ivm.width() != width) { + std::cerr << "ERROR: ivm.width()="<< ivm.width() << " != " << width << std::endl; + return 1; + } + rng.seed(13); // To get the same values than before use the same seed + for (uint64_t i=0; i::create(); + for(const auto& val : stdv) { + tmp_buf.push_back(val); + } + if(tmp_buf != stdv) { + std::cerr << "ERROR: tmp_buf CMP failed." << std::endl; + } + + // tmp buf file is deleted automatically + } + + return 0; +} diff --git a/include/sdsl/construct_sa.hpp b/include/sdsl/construct_sa.hpp index 4963839d4..cacb8626d 100644 --- a/include/sdsl/construct_sa.hpp +++ b/include/sdsl/construct_sa.hpp @@ -88,7 +88,7 @@ void calculate_sa(const unsigned char* c, typename int_vector::si if (32 == fixedIntWidth or(0==fixedIntWidth and 32 >= oldIntWidth)) { sa.width(32); sa.resize(len); - divsufsort(c, (int32_t*)sa.m_data, len); + divsufsort(c, (int32_t*)sa.data(), len); // copy integers back to the right positions if (oldIntWidth!=32) { for (size_type i=0; i::si throw std::logic_error("width of int_vector is to small for the text!!!"); } int_vector<> sufarray(len,0,32); - divsufsort(c, (int32_t*)sufarray.m_data, len); + divsufsort(c, (int32_t*)sufarray.data(), len); for (size_type i=0; i::si uint8_t oldIntWidth = sa.width(); sa.width(64); sa.resize(len); - divsufsort64(c, (int64_t*)sa.m_data, len); + divsufsort64(c, (int64_t*)sa.data(), len); // copy integers back to the right positions if (oldIntWidth!=64) { for (size_type i=0; i class int_vector_const_iterator; +template +class int_vector_mapper; + template // forward declaration class rank_support_v; @@ -269,19 +272,12 @@ class int_vector friend class int_vector_iterator_base; friend class int_vector_iterator; friend class int_vector_const_iterator; + friend class int_vector_mapper; friend class coder::elias_delta; friend class coder::elias_gamma; friend class coder::fibonacci; friend class memory_manager; - friend void util::set_random_bits(int_vector& v, int); - friend void util::_set_zero_bits(int_vector&); - friend void util::_set_one_bits(int_vector&); - friend void util::bit_compress(int_vector&); - friend void util::set_to_value(int_vector&, uint64_t); - friend bool load_vector_from_file(int_vector&, const std::string&,uint8_t,uint8_t); - friend void algorithm::calculate_sa(const unsigned char* c, typename int_vector::size_type len, int_vector& sa); - enum { fixed_int_width = t_width }; // make template parameter accessible private: @@ -377,6 +373,13 @@ for (auto x : il) { return m_data; } + //! Pointer to the raw data of the int_vector + /*! \returns pointer to the raw data of the int_vector + */ + uint64_t* data() { + return m_data; + } + //! Get the integer value of the binary string of length len starting at position idx in the int_vector. /*! \param idx Starting index of the binary representation of the integer. \param len Length of the binary representation of the integer. Default value is 64. diff --git a/include/sdsl/int_vector_mapper.hpp b/include/sdsl/int_vector_mapper.hpp new file mode 100644 index 000000000..12cbd0838 --- /dev/null +++ b/include/sdsl/int_vector_mapper.hpp @@ -0,0 +1,288 @@ +#ifndef SDSL_INT_VECTOR_MAPPER +#define SDSL_INT_VECTOR_MAPPER + +#include "int_vector.hpp" +#include +#include +#include + +namespace sdsl { + +template +class int_vector_mapper { + static_assert(t_width <= 64, + "int_vector_mapper: width must be at most 64 bits."); +public: + typedef typename int_vector::difference_type difference_type; + typedef typename int_vector::value_type value_type; + typedef typename int_vector::size_type size_type; + typedef typename int_vector::int_width_type width_type; +public: + const size_type append_block_size = 1000000; +private: + uint8_t* m_mapped_data = nullptr; + uint64_t m_file_size_bytes = 0; + off_t m_data_offset = 0; + int m_fd = -1; + int_vector m_wrapper; + std::string m_file_name; + bool m_delete_on_close; +public: + int_vector_mapper() = delete; + int_vector_mapper(const int_vector_mapper&) = delete; + int_vector_mapper& operator=(const int_vector_mapper&) = delete; +public: + ~int_vector_mapper() { + if (m_mapped_data) { + if (m_data_offset) { + // update size in the on disk representation and + // truncate if necessary + uint64_t* size_in_file = (uint64_t*)m_mapped_data; + if (*size_in_file != m_wrapper.m_size) { + *size_in_file = m_wrapper.m_size; + } + if(t_width==0) { + // if size is variable and we map a sdsl vector + // we might have to update the stored width + uint8_t stored_width = m_mapped_data[8]; + if (stored_width != m_wrapper.m_width) { + m_mapped_data[8] = m_wrapper.m_width; + } + } + } + // do we have to truncate? + size_type current_bit_size = m_wrapper.m_size; + size_type data_size_in_bytes = ((current_bit_size + 63) >> 6) << 3; + munmap(m_mapped_data, m_file_size_bytes); + if (m_file_size_bytes != data_size_in_bytes + m_data_offset) { + int tret = ftruncate(m_fd, data_size_in_bytes + m_data_offset); + if (tret == -1) { + std::string truncate_error + = std::string("int_vector_mapper: truncate error. ") + + std::string(strerror(errno)); + throw std::runtime_error(truncate_error); + } + } + } + if (m_fd != -1) { + close(m_fd); + if(m_delete_on_close) { + sdsl::remove(m_file_name); + } + } + m_wrapper.m_data = nullptr; + m_wrapper.m_size = 0; + } + int_vector_mapper(int_vector_mapper&& ivm) { + m_wrapper.m_data = ivm.m_wrapper.m_data; + m_wrapper.m_size = ivm.m_wrapper.m_size; + m_wrapper.width(ivm.m_wrapper.width()); + m_file_name = ivm.m_file_name; + m_delete_on_close = ivm.m_delete_on_close; + ivm.m_wrapper.m_data = nullptr; + ivm.m_wrapper.m_size = 0; + ivm.m_mapped_data = nullptr; + ivm.m_fd = -1; + } + int_vector_mapper& operator=(int_vector_mapper&& ivm) { + m_wrapper.m_data = ivm.m_wrapper.m_data; + m_wrapper.m_size = ivm.m_wrapper.m_size; + m_wrapper.width(ivm.m_wrapper.width()); + m_file_name = ivm.m_file_name; + m_delete_on_close = ivm.m_delete_on_close; + ivm.m_wrapper.m_data = nullptr; + ivm.m_wrapper.m_size = 0; + ivm.m_mapped_data = nullptr; + ivm.m_fd = -1; + return (*this); + } + int_vector_mapper(const std::string& key,const cache_config& config) + : int_vector_mapper(cache_file_name(key, config)) {} + int_vector_mapper(const std::string filename, + bool is_plain = false, + bool delete_on_close = false) : + m_file_name(filename), m_delete_on_close(delete_on_close) + { + size_type size_in_bits = 0; + uint8_t int_width = t_width; + { + std::ifstream f(filename); + if (!f.is_open()) { + throw std::runtime_error( + "int_vector_mapper: file does not exist."); + } + if (!is_plain) { + int_vector::read_header(size_in_bits, int_width, f); + } + } + m_file_size_bytes = util::file_size(filename); + + if (!is_plain) { + m_data_offset = t_width ? 8 : 9; + } else { + if (8 != t_width and 16 != t_width and 32 != t_width and 64 + != t_width) { + throw std::runtime_error("int_vector_mapper: plain vector can " + "only be of width 8, 16, 32, 64."); + } + size_in_bits = m_file_size_bytes * 8; + } + + // open backend file + m_fd = open(filename.c_str(), O_RDWR); + if (m_fd == -1) { + std::string open_error + = std::string("int_vector_mapper: open file error. ") + + std::string(strerror(errno)); + throw std::runtime_error(open_error); + } + + // prepare wrapper and mmap + width(int_width); + bit_resize(size_in_bits); + } + std::string file_name() const { return m_file_name; } + uint8_t width() const { return m_wrapper.width(); } + void width(const uint8_t new_int_width) { + m_wrapper.width(new_int_width); + } + uint64_t size() const { return m_wrapper.size(); } + void bit_resize(const size_type bit_size) { + size_type new_size_in_bytes = ((bit_size + 63) >> 6) << 3; + if (m_file_size_bytes != new_size_in_bytes + m_data_offset) { + if (m_mapped_data) munmap(m_mapped_data, m_file_size_bytes); + int tret = ftruncate(m_fd, new_size_in_bytes + m_data_offset); + if (tret == -1) { + std::string truncate_error + = std::string("int_vector_mapper: truncate error. ") + + std::string(strerror(errno)); + throw std::runtime_error(truncate_error); + } + m_file_size_bytes = new_size_in_bytes + m_data_offset; + } + m_mapped_data + = (uint8_t*)mmap(NULL, m_file_size_bytes, PROT_READ | PROT_WRITE, + MAP_SHARED, m_fd, 0); + if (m_mapped_data == MAP_FAILED) { + std::string mmap_error + = std::string("int_vector_mapper: mmap error. ") + + std::string(strerror(errno)); + throw std::runtime_error(mmap_error); + } + auto ret = madvise(m_mapped_data, m_file_size_bytes, MADV_SEQUENTIAL); + if (ret == -1) { + perror("Error trying to hint sequential access"); + } + // update wrapper + m_wrapper.m_data = (uint64_t*)(m_mapped_data + m_data_offset); + m_wrapper.m_size = bit_size; + } + void resize(const size_type size) { + size_type size_in_bits = size * width(); + bit_resize(size_in_bits); + } + auto begin() -> typename int_vector::iterator { + return m_wrapper.begin(); + } + auto end() -> typename int_vector::iterator { + return m_wrapper.end(); + } + auto begin() const -> typename int_vector::const_iterator { + return m_wrapper.begin(); + } + auto end() const -> typename int_vector::const_iterator { + return m_wrapper.end(); + } + auto operator[](const size_type& idx) const + -> typename int_vector::const_reference + { + return m_wrapper[idx]; + } + auto operator[](const size_type& idx) + -> typename int_vector::reference + { + return m_wrapper[idx]; + } + const uint64_t* data() const { return m_wrapper.data(); } + uint64_t* data() { return m_wrapper.data(); } + value_type get_int(size_type idx, const uint8_t len = 64) const { + return m_wrapper.get_int(idx, len); + } + void set_int(size_type idx, value_type x, const uint8_t len = 64) { + m_wrapper.set_int(idx, x, len); + } + void push_back(value_type x) { + if (capacity() < size() + 1) { + size_type old_size = m_wrapper.m_size; + size_type size_in_bits = (size() + append_block_size) * width(); + bit_resize(size_in_bits); + m_wrapper.m_size = old_size; + } + m_wrapper[size()] = x; + // update size in wrapper only + m_wrapper.m_size += width(); + } + size_type capacity() const { + size_t data_size_in_bits = 8 * (m_file_size_bytes - m_data_offset); + return data_size_in_bits / width(); + } + size_type bit_size() const { + return m_wrapper.bit_size(); + } + template + bool operator==(const container& v) const { + return std::equal( begin(), end(), v.begin()); + } + bool operator==(const int_vector& v) const { + return m_wrapper == v; + } + bool operator==(const int_vector_mapper& v) const { + return m_wrapper == v.m_wrapper; + } + template + bool operator!=(const container& v) const { + return !(*this==v); + } + void flip() { + m_wrapper.flip(); + } + bool empty() const { + return m_wrapper.empty(); + } +}; + +template +class temp_file_buffer { +private: + static std::string tmp_file(const std::string& dir) { + char tmp_file_name[1024] = {0}; + sprintf (tmp_file_name, "%s/tmp_mapper_file_XXXXXX.sdsl",dir.c_str()); + int fd = mkstemps(tmp_file_name,5); + if(fd == -1) { + throw std::runtime_error("could not create temporary file."); + } + close(fd); + return std::string(tmp_file_name,strlen(tmp_file_name)); + } +public: + static int_vector_mapper create() { + auto file_name = tmp_file("/tmp"); + return create(file_name); + } + static int_vector_mapper create(const cache_config& config) { + auto file_name = tmp_file(config.dir); + return create(file_name); + } + static int_vector_mapper create(const std::string& file_name) { + //write empty int_vector to init the file + int_vector tmp_vector; + store_to_file(tmp_vector,file_name); + return int_vector_mapper(file_name,false,true); + } +}; + +typedef int_vector_mapper<1> bit_vector_mapper; + +} // end of namespace + +#endif diff --git a/include/sdsl/io.hpp b/include/sdsl/io.hpp index 50da029f1..64e17311d 100644 --- a/include/sdsl/io.hpp +++ b/include/sdsl/io.hpp @@ -211,7 +211,7 @@ bool load_vector_from_file(t_int_vec& v, const std::string& file, uint8_t num_by v.width(std::min((int)8*num_bytes, (int)max_int_width)); v.resize(file_size / num_bytes); if (8 == t_int_vec::fixed_int_width and 1 == num_bytes) { // if int_vector<8> is created from byte alphabet file - in.read((char*)v.m_data, file_size); + in.read((char*)v.data(), file_size); } else { size_t idx=0; const size_t block_size = conf::SDSL_BLOCK_SIZE*num_bytes; diff --git a/include/sdsl/util.hpp b/include/sdsl/util.hpp index a8731d397..65a9a230c 100644 --- a/include/sdsl/util.hpp +++ b/include/sdsl/util.hpp @@ -46,6 +46,7 @@ #include #include #include +#include // macros to transform a defined name to a string #define SDSL_STR(x) #x @@ -352,7 +353,7 @@ void util::set_random_bits(t_int_vec& v, int seed) } else rng.seed(seed); - uint64_t* data = v.m_data; + uint64_t* data = v.data(); if (v.empty()) return; *data = rng(); @@ -373,17 +374,16 @@ void util::mod(t_int_vec& v, typename t_int_vec::size_type m) template void util::bit_compress(t_int_vec& v) { - typename t_int_vec::value_type max=0; - for (typename t_int_vec::size_type i=0; i < v.size(); ++i) { - if (v[i] > max) { - max = v[i]; - } + auto max_elem = std::max_element(v.begin(),v.end()); + uint64_t max = 0; + if(max_elem != v.end()) { + max = *max_elem; } uint8_t min_width = bits::hi(max)+1; uint8_t old_width = v.width(); if (old_width > min_width) { - const uint64_t* read_data = v.m_data; - uint64_t* write_data = v.m_data; + const uint64_t* read_data = v.data(); + uint64_t* write_data = v.data(); uint8_t read_offset = 0; uint8_t write_offset = 0; for (typename t_int_vec::size_type i=0; i < v.size(); ++i) { @@ -415,7 +415,7 @@ void util::expand_width(t_int_vec& v, uint8_t new_width) template void util::_set_zero_bits(t_int_vec& v) { - uint64_t* data = v.m_data; + uint64_t* data = v.data(); if (v.empty()) return; // TODO: replace by memset() but take care of size_t in the argument! @@ -428,7 +428,7 @@ void util::_set_zero_bits(t_int_vec& v) template void util::_set_one_bits(t_int_vec& v) { - uint64_t* data = v.m_data; + uint64_t* data = v.data(); if (v.empty()) return; *data = 0xFFFFFFFFFFFFFFFFULL; @@ -440,10 +440,10 @@ void util::_set_one_bits(t_int_vec& v) template void util::set_to_value(t_int_vec& v, uint64_t k) { - uint64_t* data = v.m_data; + uint64_t* data = v.data(); if (v.empty()) return; - uint8_t int_width = v.m_width; + uint8_t int_width = v.width(); if (int_width == 0) { throw std::logic_error("util::set_to_value can not be performed with int_width=0!"); } diff --git a/test/IntVectorMapperTest.cpp b/test/IntVectorMapperTest.cpp new file mode 100644 index 000000000..55abe2085 --- /dev/null +++ b/test/IntVectorMapperTest.cpp @@ -0,0 +1,285 @@ +#include "sdsl/int_vector_mapper.hpp" +#include "sdsl/util.hpp" +#include "gtest/gtest.h" +#include +#include +#include +#include + +namespace +{ + +typedef sdsl::int_vector<>::size_type size_type; +typedef sdsl::int_vector<>::value_type value_type; + +// The fixture for testing class int_vector. +class IntVectorMapperTest : public ::testing::Test +{ + protected: + + IntVectorMapperTest() {} + + virtual ~IntVectorMapperTest() {} + + virtual void SetUp() { + std::mt19937_64 rng; + { + std::uniform_int_distribution distribution(1, 100000); + auto dice = bind(distribution, rng); + for (size_type i=0; i<10; ++i) { + vec_sizes.push_back(dice()); + } + } + } + + virtual void TearDown() {} + + std::vector vec_sizes = {1,64,65,127,128}; // different sizes for the vectors +}; + +TEST_F(IntVectorMapperTest, iterator) +{ + // test plain + for(const auto& size : vec_sizes) { + std::vector vec(size); + sdsl::util::set_to_id(vec); + { + std::ofstream ofs("tmp/int_vector_mapper_itrtest"); + sdsl::serialize_vector(vec,ofs); + } + { + sdsl::int_vector_mapper<64> ivm("tmp/int_vector_mapper_itrtest",true); + ASSERT_EQ(size,ivm.size()); + ASSERT_TRUE(std::equal(ivm.begin(),ivm.end(),vec.begin())); + ASSERT_EQ(size,std::distance(ivm.begin(),ivm.end())); + } + { + sdsl::int_vector_mapper<64> ivm("tmp/int_vector_mapper_itrtest",true); + auto itr = ivm.end()-1; + for(size_t i=0;i vec(size); + sdsl::util::set_to_id(vec); + store_to_file(vec,"tmp/int_vector_mapper_itrtest"); + { + sdsl::int_vector_mapper<25> ivm("tmp/int_vector_mapper_itrtest"); + ASSERT_EQ(size,ivm.size()); + ASSERT_TRUE(std::equal(ivm.begin(),ivm.end(),vec.begin())); + ASSERT_EQ(size,std::distance(ivm.begin(),ivm.end())); + } + { + sdsl::int_vector_mapper<25> ivm("tmp/int_vector_mapper_itrtest"); + auto itr = ivm.end()-1; + for(size_t i=0;i vec(size); + sdsl::util::set_to_id(vec); + sdsl::util::bit_compress(vec); + store_to_file(vec,"tmp/int_vector_mapper_itrtest"); + { + sdsl::int_vector_mapper<> ivm("tmp/int_vector_mapper_itrtest"); + ASSERT_EQ(size,ivm.size()); + ASSERT_EQ(vec.width(),ivm.width()); + ASSERT_TRUE(std::equal(ivm.begin(),ivm.end(),vec.begin())); + ASSERT_EQ(size,std::distance(ivm.begin(),ivm.end())); + } + { + sdsl::int_vector_mapper<> ivm("tmp/int_vector_mapper_itrtest"); + auto itr = ivm.end()-1; + for(size_t i=0;i vec(size); + sdsl::util::set_to_id(vec); + { + std::ofstream ofs("tmp/int_vector_mapper_push_backtest"); + sdsl::serialize_vector(vec,ofs); + } + { + sdsl::int_vector_mapper<64> ivm("tmp/int_vector_mapper_push_backtest",true); + ASSERT_EQ(size,ivm.size()); + for(size_t i=0;i ivm("tmp/int_vector_mapper_push_backtest",true); + ASSERT_EQ(vec.size(),ivm.size()); + ASSERT_TRUE(std::equal(ivm.begin(),ivm.end(),vec.begin())); + ASSERT_EQ(vec.size(),std::distance(ivm.begin(),ivm.end())); + } + sdsl::remove("tmp/int_vector_mapper_itrtest"); + } + + // test fixed width + for(const auto& size : vec_sizes) { + sdsl::int_vector<31> vec(size); + std::vector stdvec(size); + sdsl::util::set_to_id(vec); + sdsl::util::set_to_id(stdvec); + store_to_file(vec,"tmp/int_vector_mapper_push_backtest"); + { + sdsl::int_vector_mapper<31> ivm("tmp/int_vector_mapper_push_backtest"); + ASSERT_EQ(size,ivm.size()); + for(size_t i=0;i ivm("tmp/int_vector_mapper_push_backtest"); + ASSERT_EQ(stdvec.size(),ivm.size()); + ASSERT_TRUE(std::equal(ivm.begin(),ivm.end(),stdvec.begin())); + ASSERT_EQ(stdvec.size(),std::distance(ivm.begin(),ivm.end())); + } + sdsl::remove("tmp/int_vector_mapper_push_backtest"); + } + + // test variable width + for(const auto& size : vec_sizes) { + sdsl::int_vector<> vec(size); + std::vector stdvec(size); + sdsl::util::set_to_id(vec); + sdsl::util::set_to_id(stdvec); + sdsl::util::bit_compress(vec); + store_to_file(vec,"tmp/int_vector_mapper_push_backtest"); + { + sdsl::int_vector_mapper<> ivm("tmp/int_vector_mapper_push_backtest"); + ASSERT_EQ(size,ivm.size()); + for(size_t i=0;i ivm("tmp/int_vector_mapper_push_backtest"); + ASSERT_EQ(stdvec.size(),ivm.size()); + ASSERT_TRUE(std::equal(ivm.begin(),ivm.end(),stdvec.begin())); + ASSERT_EQ(stdvec.size(),std::distance(ivm.begin(),ivm.end())); + } + sdsl::remove("tmp/int_vector_mapper_push_backtest"); + } +} + +TEST_F(IntVectorMapperTest, bit_compress ) +{ + for(const auto& size : vec_sizes) { + sdsl::int_vector<> vec(size); + sdsl::util::set_to_id(vec); + store_to_file(vec,"tmp/int_vector_mapper_bit_compress_test_uncompressed"); + sdsl::util::bit_compress(vec); + store_to_file(vec,"tmp/int_vector_mapper_bit_compress_test"); + { + sdsl::int_vector_mapper<> ivmc("tmp/int_vector_mapper_bit_compress_test"); + sdsl::int_vector_mapper<> ivmu("tmp/int_vector_mapper_bit_compress_test_uncompressed"); + ASSERT_TRUE(std::equal(ivmc.begin(),ivmc.end(),ivmu.begin())); + ASSERT_TRUE(std::equal(ivmc.begin(),ivmc.end(),vec.begin())); + } + { + sdsl::int_vector_mapper<> ivmu("tmp/int_vector_mapper_bit_compress_test_uncompressed"); + ASSERT_TRUE(std::equal(ivmu.begin(),ivmu.end(),vec.begin())); + sdsl::util::bit_compress(ivmu); + ASSERT_TRUE(std::equal(ivmu.begin(),ivmu.end(),vec.begin())); + } + { + sdsl::int_vector_mapper<> ivmc("tmp/int_vector_mapper_bit_compress_test"); + sdsl::int_vector_mapper<> ivmu("tmp/int_vector_mapper_bit_compress_test_uncompressed"); + ASSERT_EQ(ivmc.size(),ivmu.size()); + ASSERT_EQ(ivmc.width(),ivmu.width()); + ASSERT_TRUE(std::equal(ivmc.begin(),ivmc.end(),ivmu.begin())); + } + sdsl::remove("tmp/int_vector_mapper_bit_compress_test_uncompressed"); + sdsl::remove("tmp/int_vector_mapper_bit_compress_test"); + } +} + +TEST_F(IntVectorMapperTest, bitvector_mapping ) +{ + for(const auto& size : vec_sizes) { + sdsl::bit_vector bv(size); + sdsl::util::set_random_bits(bv,4711); + store_to_file(bv,"tmp/bit_vector_mapper_test"); + { + // load/store test + sdsl::bit_vector_mapper bvm("tmp/bit_vector_mapper_test"); + ASSERT_EQ(bvm.size(),bv.size()); + ASSERT_EQ(bvm.width(),bv.width()); + ASSERT_TRUE(std::equal(bvm.begin(),bvm.end(),bv.begin())); + ASSERT_EQ(sdsl::util::cnt_one_bits(bv),sdsl::util::cnt_one_bits(bvm)); + } + { + // flip test + sdsl::bit_vector_mapper bvm("tmp/bit_vector_mapper_test"); + bvm.flip(); + bv.flip(); + ASSERT_TRUE(std::equal(bvm.begin(),bvm.end(),bv.begin())); + ASSERT_EQ(sdsl::util::cnt_one_bits(bv),sdsl::util::cnt_one_bits(bvm)); + } + { + // load/store after flip + sdsl::bit_vector_mapper bvm("tmp/bit_vector_mapper_test"); + ASSERT_TRUE(std::equal(bvm.begin(),bvm.end(),bv.begin())); + ASSERT_EQ(sdsl::util::cnt_one_bits(bv),sdsl::util::cnt_one_bits(bvm)); + } + sdsl::remove("tmp/bit_vector_mapper_test"); + } +} + +TEST_F(IntVectorMapperTest, temp_buffer_test ) +{ + for(const auto& size : vec_sizes) { + sdsl::int_vector<> vec(size); + sdsl::util::set_to_id(vec); + std::string tmp_file_name; + { + auto tmp_buf = sdsl::temp_file_buffer<31>::create(); + tmp_file_name = tmp_buf.file_name(); + ASSERT_EQ(tmp_buf.width(),31); + ASSERT_EQ(tmp_buf.size(),0); + ASSERT_TRUE(tmp_buf.empty()); + for(const auto& val : vec) { + tmp_buf.push_back(val); + } + ASSERT_EQ(tmp_buf.size(),vec.size()); + ASSERT_TRUE(std::equal(tmp_buf.begin(),tmp_buf.end(),vec.begin())); + } + // check that the file is gone + std::ifstream cfs(tmp_file_name); + ASSERT_FALSE(cfs.is_open()); + } +} + +} // namespace + +int main(int argc, char** argv) +{ + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/test/Makefile b/test/Makefile index 75268f381..1bd6df9d6 100644 --- a/test/Makefile +++ b/test/Makefile @@ -64,6 +64,7 @@ test: bits-test \ coder-test \ int-vector-test \ inv-perm-support-test \ + int-vector-mapper-test \ int-vector-buffer-test \ bit-vector-test \ rank-support-test\ @@ -117,6 +118,9 @@ inv-perm-support-test: ./InvPermSupportTest.x int-vector-buffer-test: ./IntVectorBufferTest.x @$(PREFIX) ./IntVectorBufferTest.x +int-vector-mapper-test: ./IntVectorMapperTest.x + @$(PREFIX) ./IntVectorMapperTest.x + sorted-int-stack-test: ./SortedIntStackTest.x @$(PREFIX) ./SortedIntStackTest.x