Skip to content

Commit

Permalink
Fixed some issues based on eseilers review
Browse files Browse the repository at this point in the history
  • Loading branch information
cpockrandt committed Aug 7, 2018
1 parent c51cfac commit 9287193
Show file tree
Hide file tree
Showing 7 changed files with 91 additions and 34 deletions.
39 changes: 38 additions & 1 deletion include/seqan3/index/all.hpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,41 @@
// ============================================================================
// SeqAn - The Library for Sequence Analysis
// ============================================================================
//
// Copyright (c) 2006-2018, Knut Reinert & Freie Universitaet Berlin
// Copyright (c) 2016-2018, Knut Reinert & MPI Molekulare Genetik
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of Knut Reinert or the FU Berlin nor the names of
// its contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
// ARE DISCLAIMED. IN NO EVENT SHALL KNUT REINERT OR THE FU BERLIN BE LIABLE
// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
// OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
// DAMAGE.
//

/*!\file
* \author Christopher Pockrandt <christopher.pockrandt AT fu-berlin.de>
* \brief Meta-header for the index module.
*/

#pragma once

#include <seqan3/index/fm_index.hpp>
// #include <seqan3/index/bidirectional_suffix_array.hpp>
13 changes: 8 additions & 5 deletions include/seqan3/index/concept.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@
// SeqAn - The Library for Sequence Analysis
// ============================================================================
//
// Copyright (c) 2006-2017, Knut Reinert & Freie Universitaet Berlin
// Copyright (c) 2016-2017, Knut Reinert & MPI Molekulare Genetik
// Copyright (c) 2006-2018, Knut Reinert & Freie Universitaet Berlin
// Copyright (c) 2016-2018, Knut Reinert & MPI Molekulare Genetik
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
Expand Down Expand Up @@ -31,14 +31,17 @@
// DAMAGE.
//
// ============================================================================
// Author: Christopher Pockrandt <christopher.pockrandt AT fu-berlin.de>
// ============================================================================

/*!\file
* \author Christopher Pockrandt <christopher.pockrandt AT fu-berlin.de>
* \brief Contains the concepts for the seqan3::fm_index and its traits and iterators.
*/

#pragma once

#include <seqan3/alphabet/nucleotide/dna4.hpp>
#include <seqan3/core/metafunction/range.hpp>
#include <seqan3/range/concept.hpp>
#include <seqan3/std/concept/range.hpp>

#include <sdsl/suffix_arrays.hpp>

Expand Down
16 changes: 7 additions & 9 deletions include/seqan3/index/detail/fm_index_iterator.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,11 @@
// DAMAGE.
//
// ============================================================================
// Author: Christopher Pockrandt <christopher.pockrandt AT fu-berlin.de>
// ============================================================================

/*!\file
* \author Christopher Pockrandt <christopher.pockrandt AT fu-berlin.de>
* \brief Contains the internal representation of a node of the seqan3::fm_index_iterator.
*/

#pragma once

Expand All @@ -42,16 +45,11 @@

#include <seqan3/core/platform.hpp>


/*!\file
* \brief TODO
* \author Christopher Pockrandt <christopher.pockrandt AT fu-berlin.de>
*/

namespace seqan3::detail
{

template <typename index_t> // TODO: suffix_array_concept does not work here. not looked into it yet
// NOTE: index_t requires fm_index_concept. circular dependency
template <typename index_t>
struct fm_index_iterator_node
{
using size_type = typename index_t::size_type;
Expand Down
16 changes: 10 additions & 6 deletions include/seqan3/index/fm_index.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@
// SeqAn - The Library for Sequence Analysis
// ============================================================================
//
// Copyright (c) 2006-2017, Knut Reinert & Freie Universitaet Berlin
// Copyright (c) 2016-2017, Knut Reinert & MPI Molekulare Genetik
// Copyright (c) 2006-2018, Knut Reinert & Freie Universitaet Berlin
// Copyright (c) 2016-2018, Knut Reinert & MPI Molekulare Genetik
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
Expand Down Expand Up @@ -31,10 +31,13 @@
// DAMAGE.
//
// ============================================================================
// Author: Christopher Pockrandt <christopher.pockrandt AT fu-berlin.de>
// ============================================================================

// #include <seqan3/index/concept.hpp>
/*!\file
* \author Christopher Pockrandt <christopher.pockrandt AT fu-berlin.de>
* \brief Contains the unidirectional seqan3::fm_index.
*/

#include <seqan3/index/concept.hpp>
#include <seqan3/index/fm_index_iterator.hpp>
#include <seqan3/core/metafunction/range.hpp>

Expand Down Expand Up @@ -65,7 +68,8 @@ struct fm_index_default_traits

// TODO: noexcept is still missing

// TODO: check whether input_range_concept is the correct one! depends on open decisions in sdsl (im-construction, writing in-memory data to tmpfs and on the construction algorithms)
// TODO: check whether input_range_concept is the correct one! depends on open decisions in sdsl
// (im-construction, writing in-memory data to tmpfs and on the construction algorithms)

/*!\brief The SeqAn FM Index.
* \ingroup fm_index
Expand Down
30 changes: 22 additions & 8 deletions include/seqan3/index/fm_index_iterator.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@
// SeqAn - The Library for Sequence Analysis
// ============================================================================
//
// Copyright (c) 2006-2017, Knut Reinert & Freie Universitaet Berlin
// Copyright (c) 2016-2017, Knut Reinert & MPI Molekulare Genetik
// Copyright (c) 2006-2018, Knut Reinert & Freie Universitaet Berlin
// Copyright (c) 2016-2018, Knut Reinert & MPI Molekulare Genetik
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
Expand Down Expand Up @@ -31,16 +31,21 @@
// DAMAGE.
//
// ============================================================================
// Author: Christopher Pockrandt <christopher.pockrandt AT fu-berlin.de>
// ============================================================================

/*!\file
* \author Christopher Pockrandt <christopher.pockrandt AT fu-berlin.de>
* \brief Contains the seqan3::fm_index_iterator used to search in the unidirectional seqan3::fm_index.
*/

#pragma once

#include <seqan3/alphabet/all.hpp>

#include <seqan3/index/detail/fm_index_iterator.hpp>
#include <seqan3/index/concept.hpp>
#include <seqan3/std/concept/iterator.hpp>
#include <seqan3/core/metafunction/range.hpp>
#include <seqan3/range/concept.hpp>
#include <seqan3/std/concept/range.hpp>

#include <sdsl/suffix_trees.hpp>

Expand All @@ -51,7 +56,8 @@
namespace seqan3
{

// TODO: remove mapping by overwriting backward_search. one only has to deal with incomplete alphabets then (maybe add own alphabet type to sdsl?)
// TODO: remove mapping by overwriting backward_search.
// one only has to deal with incomplete alphabets then (maybe add own alphabet type to sdsl?)

// TODO: to_rank() + 1 consistent with comp_char, mapping and implicit_sentinel?

Expand Down Expand Up @@ -109,7 +115,12 @@ class fm_index_iterator
{
typename index_type::comp_char_type c = 1; // NOTE: start with 0 or 1 depending on implicit_sentintel
typename index_type::size_type _lb, _rb;
for (; c < index->m_index.sigma && !sdsl::backward_search(index->m_index, node.lb, node.rb, index->m_index.comp2char[c], _lb, _rb); ++c) {}
while (c < index->m_index.sigma &&
!sdsl::backward_search(index->m_index, node.lb, node.rb, index->m_index.comp2char[c], _lb, _rb))
{
++c;
}

if (c != index->m_index.sigma)
{
parent_lb = node.lb;
Expand Down Expand Up @@ -187,8 +198,11 @@ class fm_index_iterator
typename index_type::comp_char_type c = node.last_char + 1;
typename index_type::size_type _lb, _rb;

while (c < index->m_index.sigma && !sdsl::backward_search(index->m_index, parent_lb, parent_rb, index->m_index.comp2char[c], _lb, _rb))
while (c < index->m_index.sigma &&
!sdsl::backward_search(index->m_index, parent_lb, parent_rb, index->m_index.comp2char[c], _lb, _rb))
{
++c;
}

if (c != index->m_index.sigma)
{
Expand Down
7 changes: 4 additions & 3 deletions test/unit/index/fm_index_iterator_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@
// SeqAn - The Library for Sequence Analysis
// ============================================================================
//
// Copyright (c) 2006-2017, Knut Reinert & Freie Universitaet Berlin
// Copyright (c) 2016-2017, Knut Reinert & MPI Molekulare Genetik
// Copyright (c) 2006-2018, Knut Reinert & Freie Universitaet Berlin
// Copyright (c) 2016-2018, Knut Reinert & MPI Molekulare Genetik
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
Expand Down Expand Up @@ -199,7 +199,8 @@ TYPED_TEST(fm_index_iterator_test, incomplete_alphabet)
EXPECT_EQ(it, iterator_type(sa));
}

// search a char that does not occur in the text (some rank that is neither the smallest nor the highest smallest occurring in text)
// search a char that does not occur in the text
// (some rank that is neither the smallest nor the highest occurring in text)
{
text_type text {"ATATAT"_dna4};
index_type sa{text};
Expand Down
4 changes: 2 additions & 2 deletions test/unit/index/fm_index_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@
// SeqAn - The Library for Sequence Analysis
// ============================================================================
//
// Copyright (c) 2006-2017, Knut Reinert & Freie Universitaet Berlin
// Copyright (c) 2016-2017, Knut Reinert & MPI Molekulare Genetik
// Copyright (c) 2006-2018, Knut Reinert & Freie Universitaet Berlin
// Copyright (c) 2016-2018, Knut Reinert & MPI Molekulare Genetik
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
Expand Down

0 comments on commit 9287193

Please sign in to comment.