Skip to content

Commit

Permalink
Merge pull request simongog#84 from mpetri/clear-inverse-permutation-…
Browse files Browse the repository at this point in the history
…syntax

Reworked psi[] lf[] and isa[] access syntax for all CSAs
  • Loading branch information
simongog committed Sep 11, 2013
2 parents f215cd7 + a4084c2 commit b116082
Show file tree
Hide file tree
Showing 17 changed files with 370 additions and 435 deletions.
2 changes: 1 addition & 1 deletion examples/hugepages.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ void do_something(const tCsa& csa)
uint64_t sum=0;
auto start = timer::now();
for (size_t i=0; i<csa.size() and i<10000000; ++i) {
sum+=csa.psi(i);
sum+=csa.lf[i];
}
auto stop = timer::now();
cout << "runtime in s: " << duration_cast<seconds>(stop-start).count() << endl;
Expand Down
88 changes: 45 additions & 43 deletions include/sdsl/construct_lcp_helper.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,57 +6,59 @@
#include <list>
#include <vector>

namespace sdsl{
namespace sdsl
{


void insert_lcp_values(int_vector<> &partial_lcp, bit_vector &index_done, std::string lcp_file, uint64_t max_lcp_value, uint64_t lcp_value_offset);
void insert_lcp_values(int_vector<>& partial_lcp, bit_vector& index_done, std::string lcp_file, uint64_t max_lcp_value, uint64_t lcp_value_offset);

template<class tWT>
void create_C_array(std::vector<uint64_t> &C, const tWT &wt){
uint64_t quantity; // quantity of characters in interval
std::vector<unsigned char> cs(wt.sigma); // list of characters in the interval
std::vector<uint64_t> rank_c_i(wt.sigma); // number of occurrence of character in [0 .. i-1]
std::vector<uint64_t> rank_c_j(wt.sigma); // number of occurrence of character in [0 .. j-1]
void create_C_array(std::vector<uint64_t>& C, const tWT& wt)
{
uint64_t quantity; // quantity of characters in interval
std::vector<unsigned char> cs(wt.sigma); // list of characters in the interval
std::vector<uint64_t> rank_c_i(wt.sigma); // number of occurrence of character in [0 .. i-1]
std::vector<uint64_t> rank_c_j(wt.sigma); // number of occurrence of character in [0 .. j-1]

C = std::vector<uint64_t>(257, 0);
C = std::vector<uint64_t>(257, 0);
wt.interval_symbols(0, wt.size(), quantity, cs, rank_c_i, rank_c_j);
for(uint64_t i=0; i<quantity; ++i) {
for (uint64_t i=0; i<quantity; ++i) {
unsigned char c = cs[i];
C[c+1] = rank_c_j[i];
}
for(uint64_t i=1; i<C.size()-1; ++i) {
for (uint64_t i=1; i<C.size()-1; ++i) {
C[i+1] += C[i];
}
}


class buffered_char_queue
{
typedef bit_vector::size_type size_type;
typedef std::queue<uint8_t> tQ;
private:
static const uint32_t m_buffer_size = 10000;//409600;
uint8_t m_write_buf[m_buffer_size];
uint8_t m_read_buf[m_buffer_size];
size_type m_widx; // write index
size_type m_ridx; // read index
bool m_sync; // are read and write buffer the same?
size_type m_disk_buffered_blocks; // number of blocks written to disk and not read again yet
char m_c;
size_type m_rb; // read blocks
size_type m_wb; // written blocks

std::string m_file_name;

std::fstream m_stream;

public:

buffered_char_queue();
void init(const std::string& dir, char c);
~buffered_char_queue();
void push_back(uint8_t x);
uint8_t pop_front();
typedef bit_vector::size_type size_type;
typedef std::queue<uint8_t> tQ;
private:
static const uint32_t m_buffer_size = 10000;//409600;
uint8_t m_write_buf[m_buffer_size];
uint8_t m_read_buf[m_buffer_size];
size_type m_widx; // write index
size_type m_ridx; // read index
bool m_sync; // are read and write buffer the same?
size_type m_disk_buffered_blocks; // number of blocks written to disk and not read again yet
char m_c;
size_type m_rb; // read blocks
size_type m_wb; // written blocks

std::string m_file_name;

std::fstream m_stream;

public:

buffered_char_queue();
void init(const std::string& dir, char c);
~buffered_char_queue();
void push_back(uint8_t x);
uint8_t pop_front();
};

typedef std::list<int_vector<>::size_type> tLI;
Expand All @@ -65,19 +67,19 @@ typedef std::vector<int_vector<>::size_type> tVI;
template<class size_type_class>
void push_front_m_index(size_type_class i, uint8_t c, tLI(&m_list)[256], uint8_t (&m_chars)[256], size_type_class& m_char_count)
{
if (m_list[c].empty()) {
m_chars[m_char_count++] = c;
}
m_list[c].push_front(i);
if (m_list[c].empty()) {
m_chars[m_char_count++] = c;
}
m_list[c].push_front(i);
}

template<class size_type_class>
void push_back_m_index(size_type_class i, uint8_t c, tLI(&m_list)[256], uint8_t (&m_chars)[256], size_type_class& m_char_count)
{
if (m_list[c].empty()) {
m_chars[m_char_count++] = c;
}
m_list[c].push_back(i);
if (m_list[c].empty()) {
m_chars[m_char_count++] = c;
}
m_list[c].push_back(i);
}

void lcp_info(tMSS& file_map);
Expand Down
41 changes: 15 additions & 26 deletions include/sdsl/csa_bitcompressed.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -68,12 +68,14 @@ class csa_bitcompressed
typedef int_vector<>::size_type size_type; // STL Container requirement
typedef size_type csa_size_type;
typedef ptrdiff_t difference_type; // STL Container requirement
typedef psi_of_sa_and_isa<csa_bitcompressed> psi_type;
typedef traverse_csa_saisa<csa_bitcompressed,true> psi_type;
typedef traverse_csa_saisa<csa_bitcompressed,false> lf_type;
typedef bwt_of_csa_psi<csa_bitcompressed> bwt_type;
typedef text_of_csa<csa_bitcompressed> text_type;
typedef first_row_of_csa<csa_bitcompressed> first_row_type;
typedef _sa_order_sampling<csa_bitcompressed,0> sa_sample_type;
typedef int_vector<> isa_sample_type;
typedef isa_sample_type isa_type;
typedef t_alphabet_strat alphabet_type;
typedef typename alphabet_type::char_type char_type; // Note: This is the char type of the CSA not the WT!
typedef typename alphabet_type::comp_char_type comp_char_type;
Expand All @@ -90,25 +92,25 @@ class csa_bitcompressed
private:
sa_sample_type m_sa; // vector for suffix array values
isa_sample_type m_isa; // vector for inverse suffix array values
psi_type m_psi; // wrapper class for psi function values
alphabet_type m_alphabet;

void copy(const csa_bitcompressed& csa) {
m_sa = csa.m_sa;
m_isa = csa.m_isa;
m_alphabet = csa.m_alphabet;
m_psi = psi_type(this);
}
public:
const typename alphabet_type::char2comp_type& char2comp = m_alphabet.char2comp;
const typename alphabet_type::comp2char_type& comp2char = m_alphabet.comp2char;
const typename alphabet_type::C_type& C = m_alphabet.C;
const typename alphabet_type::sigma_type& sigma = m_alphabet.sigma;
const psi_type& psi = m_psi;
const bwt_type bwt = bwt_type(this);
const bwt_type L = bwt_type(this);
const first_row_type F = first_row_type(this);
const text_type text = text_type(this);
const psi_type psi = psi_type(*this);
const lf_type lf = lf_type(*this);
const bwt_type bwt = bwt_type(*this);
const bwt_type L = bwt_type(*this);
const isa_type& isa = m_isa;
const first_row_type F = first_row_type(*this);
const text_type text = text_type(*this);
const sa_sample_type& sa_sample = m_sa;
const isa_sample_type& isa_sample = m_isa;

Expand All @@ -120,10 +122,7 @@ class csa_bitcompressed
}

//! Constructor
csa_bitcompressed(cache_config& config): char2comp(m_alphabet.char2comp),
comp2char(m_alphabet.comp2char), C(m_alphabet.C),
sigma(m_alphabet.sigma), psi(m_psi), bwt(this),
text(this), sa_sample(m_sa), isa_sample(m_isa) {
csa_bitcompressed(cache_config& config) {
std::string text_file = cache_file_name(key_trait<alphabet_type::int_width>::KEY_TEXT,config);
int_vector_buffer<alphabet_type::int_width> text_buf(text_file);
int_vector_buffer<> sa_buf(cache_file_name(constants::KEY_SA,config));
Expand All @@ -137,7 +136,7 @@ class csa_bitcompressed
m_sa.swap(tmp_sample);
}
set_isa_samples<csa_bitcompressed>(sa_buf, m_isa);
m_psi = psi_type(this);

if (!store_to_file(m_isa, cache_file_name(constants::KEY_ISA,config), true)) {
throw std::ios_base::failure("#csa_bitcompressed: Cannot store ISA to file system!");
} else {
Expand Down Expand Up @@ -176,8 +175,6 @@ class csa_bitcompressed
m_sa.swap(csa.m_sa);
m_isa.swap(csa.m_isa);
m_alphabet.swap(csa.m_alphabet);
m_psi = psi_type(this);
csa.m_psi = psi_type(&csa);
}
}

Expand Down Expand Up @@ -206,13 +203,6 @@ class csa_bitcompressed
return m_sa[i];
}

//! ()-operator return inverse suffix array values
/*! \param i Index of the value. \f$ i \in [0..size()-1]\f$.
*/
inline value_type operator()(size_type i)const {
return m_isa[i];
}

//! Assignment Operator.
/*!
* Required for the Assignable Concept of the STL.
Expand Down Expand Up @@ -242,7 +232,6 @@ class csa_bitcompressed
m_sa.load(in);
m_isa.load(in);
m_alphabet.load(in);
m_psi = psi_type(this);
}

size_type get_sample_dens()const {
Expand All @@ -266,15 +255,15 @@ class csa_bitcompressed
size_type lower_b = C[cc], upper_b = C[((size_type)1)+cc]; // lower_b inclusive, upper_b exclusive
while (lower_b+1 < upper_b) {
size_type mid = (lower_b+upper_b)/2;
if (m_psi[mid] >= i)
if (psi[mid] >= i)
upper_b = mid;
else
lower_b = mid;
}
if (lower_b > C[cc])
return lower_b - C[cc] + 1;
else { // lower_b == m_C[cc]
return m_psi[lower_b] < i;// 1 if m_psi[lower_b]<i, 0 otherwise
return psi[lower_b] < i;// 1 if psi[lower_b]<i, 0 otherwise
}
}

Expand All @@ -292,7 +281,7 @@ class csa_bitcompressed
if (cc==0 and c!=0) // character is not in the text => return size()
return size();
if (C[cc]+i-1 < C[((size_type)1)+cc]) {
return m_psi[C[cc]+i-1];
return psi[C[cc]+i-1];
}
return size();
}
Expand Down
39 changes: 13 additions & 26 deletions include/sdsl/csa_sada.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -75,8 +75,10 @@ class csa_sada
typedef size_type csa_size_type;
typedef ptrdiff_t difference_type;
typedef t_enc_vec enc_vector_type;
typedef psi_of_csa_psi<csa_sada> psi_type;
typedef enc_vector_type psi_type;
typedef traverse_csa_psi<csa_sada,false> lf_type;
typedef bwt_of_csa_psi<csa_sada> bwt_type;
typedef isa_of_csa_psi<csa_sada> isa_type;
typedef text_of_csa<csa_sada> text_type;
typedef first_row_of_csa<csa_sada> first_row_type;
typedef typename t_sa_sample_strat::template type<csa_sada>::sample_type sa_sample_type;
Expand All @@ -90,7 +92,8 @@ class csa_sada
typedef csa_tag index_category;
typedef psi_tag extract_category;

friend class psi_of_csa_psi<csa_sada>; // for access of m_psi
friend class traverse_csa_psi<csa_sada,true>;
friend class traverse_csa_psi<csa_sada,false>;

static const uint32_t linear_decode_limit = 100000;
private:
Expand Down Expand Up @@ -125,11 +128,13 @@ class csa_sada
const typename alphabet_type::comp2char_type& comp2char = m_alphabet.comp2char;
const typename alphabet_type::C_type& C = m_alphabet.C;
const typename alphabet_type::sigma_type& sigma = m_alphabet.sigma;
const psi_type psi = psi_type(this);
const bwt_type bwt = bwt_type(this);
const bwt_type L = bwt_type(this);
const first_row_type F = first_row_type(this);
const text_type text = text_type(this);
const psi_type& psi = m_psi;
const lf_type lf = lf_type(*this);
const bwt_type bwt = bwt_type(*this);
const isa_type isa = isa_type(*this);
const bwt_type L = bwt_type(*this);
const first_row_type F = first_row_type(*this);
const text_type text = text_type(*this);
const sa_sample_type& sa_sample = m_sa_sample;
const isa_sample_type& isa_sample = m_isa_sample;

Expand Down Expand Up @@ -213,14 +218,6 @@ class csa_sada
*/
inline value_type operator[](size_type i)const;

//! ()-operator return inverse suffix array values
/*! \param i Index of the value. \f$ i \in [0..size()-1]\f$.
* \par Time complexity
* \f$ \Order{s_{SA^{-1}}\cdot t_{\Psi}} \f$, where every \f$s_{SA^{-1}}\f$th suffix array entry is sampled and \f$t_{\Psi}\f$
* is the access time for an element in the \f$\Psi\f$-function.
*/
inline value_type operator()(size_type i)const;

//! Assignment Operator.
/*!
* Required for the Assignable Concept of the STL.
Expand Down Expand Up @@ -412,7 +409,7 @@ inline auto csa_sada<t_enc_vec, t_dens, t_inv_dens, t_sa_sample_strat, t_isa, t_
{
size_type off = 0;
while (!m_sa_sample.is_sampled(i)) { // while i mod t_dens != 0 (SA[i] is not sampled) SG: auf keinen Fall get_sample_dens nehmen, ist total langsam
i = m_psi[i]; // go to the position where SA[i]+1 is located
i = psi[i]; // go to the position where SA[i]+1 is located
++off; // add 1 to the offset
}
value_type result = m_sa_sample.sa_value(i);
Expand All @@ -422,16 +419,6 @@ inline auto csa_sada<t_enc_vec, t_dens, t_inv_dens, t_sa_sample_strat, t_isa, t_
return result-off;
}

template<class t_enc_vec, uint32_t t_dens, uint32_t t_inv_dens, class t_sa_sample_strat, class t_isa, class t_alphabet_strat>
inline auto csa_sada<t_enc_vec, t_dens, t_inv_dens, t_sa_sample_strat, t_isa, t_alphabet_strat>::operator()(size_type i)const -> value_type
{
value_type result = m_isa_sample[i/t_inv_dens]; // get the rightmost sampled isa value
i = i % t_inv_dens;
while (i--) {
result = m_psi[result];
}
return result;
}

template<class t_enc_vec, uint32_t t_dens, uint32_t t_inv_dens, class t_sa_sample_strat, class t_isa, class t_alphabet_strat>
auto csa_sada<t_enc_vec, t_dens, t_inv_dens, t_sa_sample_strat, t_isa, t_alphabet_strat>::serialize(std::ostream& out, structure_tree_node* v, std::string name)const -> size_type
Expand Down
Loading

0 comments on commit b116082

Please sign in to comment.