Skip to content

Commit

Permalink
Added documentation
Browse files Browse the repository at this point in the history
  • Loading branch information
cpockrandt committed Aug 8, 2018
1 parent 54db157 commit 8e8099b
Show file tree
Hide file tree
Showing 5 changed files with 533 additions and 73 deletions.
44 changes: 40 additions & 4 deletions include/seqan3/index/all.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,10 +31,46 @@
// DAMAGE.
//

/*!\file
* \author Christopher Pockrandt <christopher.pockrandt AT fu-berlin.de>
* \brief Meta-header for the index module.
*/
/*!\file
* \author Christopher Pockrandt <christopher.pockrandt AT fu-berlin.de>
* \brief Meta-header for the index module.
*
* \defgroup index Index
*
* ## Introduction
*
* Indices are a core component for searching large amounts of data and are used for tools such as read
* mappers, assemblers or protein search tools. There are currently two major kind of indices: FM indices and
* k-mer indices (also known as q-gram indices).
*
* Besides searching the index yourself using the iterator interfaces, SeqAn3 also provides a very powerful
* search module that makes using iterators and implementing your own index-based search algorithms
* superfluous.
*
* ## FM Indices
*
* You can choose between unidirectional and bidirectional FM indices (which can be thought of suffix trees
* and affix trees, i.e. a combination of suffix and prefix trees being able to search a pattern from left to
* right, right to left and character by character in any arbitrary order). Rougly speaking bidirectional
* FM indices are more powerful for approximate string matching for the cost of a higher space consumption
* (depending on the configuration between a factor of TODO and TODO).
*
* The FM indices are based on the SDSL (succinct data structure library). You are able to specify the
* underlying implementation of the SDSL to adjust it to your needs, as well as choose one of the preconfigured
* indices that are suitable for common applications in sequence analysis.
*
* Even though the SDSL supports both byte and integer alphabets, SeqAn3 is optimized for byte alphabets. For
* integer alphabets you currently cannot use any of the index interfaces of SeqAn3.
*
* All FM indices have a suffix-tree-like interface. Even though FM indices are actually prefix trees, they
* can be searched like a suffix tree for convenience, i.e. there is no need to reverse the text before
* indexing, the pattern before searching or recomputing text positions afterwards.
*
* ## k-mer Indices
*
* Coming soon. Stay tuned!
*
*/

#pragma once

Expand Down
61 changes: 60 additions & 1 deletion include/seqan3/index/concept.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,16 @@
namespace seqan3
{

/*!\addtogroup index
* \{
*/

/*!\interface seqan3::fm_index_traits_concept <>
* \brief Concept for FM Index traits.
*
* The traits object must contain an index type of the SDSL namespace.
*/
//!\cond
template <typename t>
concept bool fm_index_traits_concept = requires (t v,
typename t::sdsl_index_type::size_type lb,
Expand All @@ -66,7 +76,29 @@ concept bool fm_index_traits_concept = requires (t v,
{ sdsl::backward_search(typename t::sdsl_index_type{}, lb, rb, typename t::sdsl_index_type::char_type{},
lb, rb) } -> typename t::sdsl_index_type::size_type;
};
//!\endcond

/*!\name Requirements for seqan3::fm_index_traits_concept
* \relates seqan3::fm_index_traits_concept
* \brief The SDSL index must support the following interface to work with SeqAn3.
* \{
*/

/*!\typedef typename t::sdsl_index_type sdsl_index_type
* \memberof seqan3::fm_index_traits_concept
* \brief Declares the type of the underlying SDSL index.
*/
//!\}

// TODO DOC: sdsl index requirements (maybe move them into a separate concept)


/*!\interface seqan3::fm_index_concept <>
* \brief Concept for FM Indices.
*
* This concept defines the interface for unidirectional FM indices.
*/
//!\cond
template <typename t>
concept bool fm_index_concept = requires (t v)
{
Expand Down Expand Up @@ -99,6 +131,32 @@ concept bool fm_index_concept = requires (t v)
{ v.load(std::string{}) } -> bool;
{ v.store(std::string{}) } -> bool;
};
//!\endcond

/*!\name Requirements for seqan3::fm_index_concept
* \relates seqan3::fm_index_concept
* \brief You can expect these member types and member functions on all types that satisfy seqan3::fm_index_concept.
* \{
*/

/*!\typedef typename t::char_type char_type
* \memberof seqan3::fm_index_concept
* \brief Type of the underlying character of text_type.
*/

/*!\typedef typename t::size_type size_type
* \memberof seqan3::fm_index_concept
* \brief TODO.
*/

/*!\typedef typename t::iterator_type iterator_type
* \memberof seqan3::fm_index_concept
* \brief Type of the iterator.
*/

// TODO

//!\}

template <typename t>
concept bool fm_index_iterator_concept = requires (t it)
Expand All @@ -119,7 +177,7 @@ concept bool fm_index_iterator_concept = requires (t it)
{ it.down(typename t::index_type::char_type{}) } -> bool;
{ it.down(std::vector<typename t::index_type::char_type>{}) } -> bool;
{ it.right() } -> bool;

{ it.children() } -> std::array<t, alphabet_size_v<typename t::index_type::char_type>>;

{ it.depth() } -> typename t::size_type;
Expand All @@ -128,5 +186,6 @@ concept bool fm_index_iterator_concept = requires (t it)
{ it.locate() } -> std::vector<typename t::size_type>;
{ it.lazy_locate() } -> auto;
};
//!\}

} // namespace seqan3
25 changes: 24 additions & 1 deletion include/seqan3/index/detail/fm_index_iterator.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -47,18 +47,36 @@
namespace seqan3::detail
{

// NOTE: index_t requires fm_index_concept. circular dependency
/*!\addtogroup index
* \{
*/

//!\privatesection

/*!\brief Internal representation of the node of an FM index iterator.
* \ingroup fm_index
* \tparam index_t The type of the underlying index; must satisfy seqan3::fm_index_concept.
*/
template <typename index_t>
struct fm_index_iterator_node
{
//!\brief TODO.
using size_type = typename index_t::size_type;
/*!\brief The type of the reduced alphabet type. (The reduced alphabet might be smaller than the original alphabet
* in case not all possible characters occur in the indexed text.)
*/
using comp_char_type = typename index_t::comp_char_type;

//!\brief Left suffix array bound.
size_type lb;
//!\brief Right suffix array bound.
size_type rb;
//!\brief Depth of the node in the suffix tree, i.e. length of the searched sequence.
size_type depth;
//!\brief Label of the last edge moved down. Needed for right().
comp_char_type last_char;

//!\brief Comparison of two iterator nodes.
bool operator==(fm_index_iterator_node const & rhs) const
{
// NOTE: last_char is implementation specific for right().
Expand All @@ -67,10 +85,15 @@ struct fm_index_iterator_node
return lb == rhs.lb && rb == rhs.rb && depth == rhs.depth;
}

//!\brief Comparison of two iterator nodes.
bool operator!=(fm_index_iterator_node const & rhs) const
{
return !(*this == rhs);
}
};

//!\publicsection

//!\}

}
Loading

0 comments on commit 8e8099b

Please sign in to comment.