diff --git a/README.md b/README.md index 6af46f4..aa28835 100644 --- a/README.md +++ b/README.md @@ -26,7 +26,7 @@ COMPATIBILITY This library is compatible with C++98, but if you give compile it with C++11 or later, this library uses `std::move()` instead of value copy and thus you can sort move-only types (see [#9](https://github.com/gfx/cpp-TimSort/pull/9) for details). -You can disable use of `std::move()` by passing the macro '-DDISABLE_STD_MOVE'. +You can disable use of `std::move()` by passing the macro '-DGFX_TIMSORT_USE_CXX11=0' SEE ALSO ================== diff --git a/test/test.cpp b/test/test.cpp index 5efbcd7..6f37116 100644 --- a/test/test.cpp +++ b/test/test.cpp @@ -10,7 +10,7 @@ #include "timsort.hpp" -#if ENABLE_STD_MOVE +#if GFX_TIMSORT_USE_CXX11 #warning std::move() enabled #else #warning std::move() disabled diff --git a/timsort.hpp b/timsort.hpp index 77dd7a2..a9fe3cd 100644 --- a/timsort.hpp +++ b/timsort.hpp @@ -29,7 +29,7 @@ #ifndef GFX_TIMSORT_HPP #define GFX_TIMSORT_HPP -#include +#include // std::allocator #include #include // std::copy #include // std::less @@ -41,10 +41,88 @@ #define GFX_TIMSORT_LOG(expr) ((void)0) #endif -// If compiler supports both type traits and move semantics - will cover most but not all compilers/std libraries: -#if (defined(_MSC_VER) && _MSC_VER >= 1700) || ((defined(__cplusplus) && __cplusplus >= 201103L && !defined(_LIBCPP_VERSION)) && ((!defined(__GNUC__) || __GNUC__ >= 5)) && (!defined(__GLIBCXX__) || __GLIBCXX__ >= 20150422)) +// The "GFX_TIMSORT_USE_CXX11" define can be used to control whether we +// use C++11 extensions like type-traits and move semantics. By default +// it is enabled unless we suspect that the compiler or STL is too old +// to support them: +#ifndef GFX_TIMSORT_USE_CXX11 +# define GFX_TIMSORT_USE_CXX11 1 +# ifdef _MSC_VER +# if _MSC_VER < 1700 +# undef GFX_TIMSORT_USE_CXX11 +# endif +# else +# ifdef __cplusplus +# if __cplusplus < 201103L +# undef GFX_TIMSORT_USE_CXX11 +# endif +# else +# undef GFX_TIMSORT_USE_CXX11 +# endif +# if defined(__cplusplus) && __cplusplus < 201103L +# undef GFX_TIMSORT_USE_CXX11 +# endif +# ifdef _GLIBCXX_RELEASE + // This setting only got added in gcc 7.1, so its presence always + // indicates a C++11-ready STL +# elif defined(__GLIBCXX__) + // Before 7.1, the only way to test the version of libstdc++ is the + // __GLIBCXX__ date macro. However, it's not monotonically increasing + // releases kepy being made from older branches. The best we can do is + // to disallow any version that is definitely before gcc 5.1 (the first + // version that had enough C++11 support for us) and then blacklist + // dates that are known to correspond with non-working versions. + // + // Note this really only is a problem when mixing compilers and STL (i.e. + // compiling using clang but using gcc's libstdc++) Otherwise we'll + // correctly reject the gcc compiler if it's too old later. +# if __GLIBCXX__ < 20150422 +# undef GFX_TIMSORT_USE_CXX11 +# endif +# if __GLIBCXX__ == 20150426 +# undef GFX_TIMSORT_USE_CXX11 // gcc 4.8.4+patches shipped with Ubuntu LTS 14.04 +# endif +# if __GLIBCXX__ == 20150623 +# undef GFX_TIMSORT_USE_CXX11 // gcc 4.8.5 +# endif +# if __GLIBCXX__ == 20150626 +# undef GFX_TIMSORT_USE_CXX11 // gcc 4.9.3 +# endif +# if __GLIBCXX__ == 20160803 +# undef GFX_TIMSORT_USE_CXX11 // gcc 4.9.5 +# endif +# elif defined(__GLIBCPP__) // *really* old version of libstdc++ +# undef GFX_TIMSORT_USE_CXX11 +# endif +# ifdef _LIBCPP_VERSION +# if defined(_LIBCPP_HAS_NO_RVALUE_REFERENCES) || defined(_LIBCPP_CXX03_LANG) +# undef GFX_TIMSORT_USE_CXX11 +# endif +# endif +# ifdef __clang__ +# ifdef __has_feature +# if !(__has_feature(cxx_rvalue_references) && __has_feature(is_trivially_copyable)) +# undef GFX_TIMSORT_USE_CXX11 +# endif +# else +# undef GFX_TIMSORT_USE_CXX11 +# endif +# elif defined(__GNUC__) +# if __GNUC__ < 5 +# undef GFX_TIMSORT_USE_CXX11 +# endif +# endif +# endif +# ifndef GFX_TIMSORT_USE_CXX11 +# define GFX_TIMSORT_USE_CXX11 0 +# endif +#endif + +#if GFX_TIMSORT_USE_CXX11 + #include #include // iterator_traits #include // std::move + #include // std::memcpy #define GFX_TIMSORT_MOVE(x) (std::is_move_constructible::value && std::is_move_assignable::value) ? std::move(x) : (x) #define GFX_TIMSORT_MOVE_RANGE(in1, in2, out) \ @@ -126,34 +204,299 @@ template class Compare { func_type less_; }; -template class TimSort { +// Some details shared between the two different implementations of TimSortMergeSpace<> +template struct TimSortMergeSpaceBase : public std::allocator { + value_t *startp_; + value_t *endp_; + value_t const *alloc_limitp_; + TimSortMergeSpaceBase() : startp_(0), endp_(0), alloc_limitp_(0) { + } + LengthType next_capacity() const { + if (startp_ == 0) { + return (sizeof(*startp_) > 32) ? 1 : (64 / sizeof(*startp_)); + } + LengthType const old_cap = alloc_limitp_ - startp_; + assert(old_cap > 0); + return (old_cap * 3) >> 1; + } +#if GFX_TIMSORT_USE_CXX11 + private: + TimSortMergeSpaceBase(const TimSortMergeSpaceBase&) = delete; + TimSortMergeSpaceBase& operator=(const TimSortMergeSpaceBase&) = delete; +#endif +}; + +// This is a generic memory buffer for temporary holding space during TimSort +// merge operations. This implementation will work for any supported "value_t" +// type (one that is at least move-constructable and move-copyable) +// +// This just provides a temporary buffer which can have elements moved +// into it via the move_in() method and then iterated using begin()/size() +template class TimSortMergeSpace { + public: + typedef value_t *iterator; + + iterator begin() { + return base_.startp_; + } + LengthType size() const { + return base_.endp_ - base_.startp_; + } + private: + TimSortMergeSpaceBase base_; + value_t *ctor_limitp_; + + void destruct() { + if (base_.startp_ != 0) { + iterator const e = ctor_limitp_; + iterator p = begin(); + do { + p->~value_t(); + } while (++p < e); + base_.deallocate(base_.startp_, base_.alloc_limitp_ - base_.startp_); + } + } + public: + TimSortMergeSpace() : ctor_limitp_(0) { + } + ~TimSortMergeSpace() { + destruct(); + } + void move_in(RandomAccessIterator const in_begin, LengthType const len) { + assert(len > 0); + value_t *nend = base_.startp_ + len; + RandomAccessIterator const in_end = in_begin + len; + if (nend <= ctor_limitp_) { + GFX_TIMSORT_MOVE_RANGE(in_begin, in_end, base_.startp_); + } else { + // We'll have to construct at least one new element; s..in_end + // represents the range of source elements that will need + // that treatment + RandomAccessIterator s; + if (nend <= base_.alloc_limitp_) { + // We don't need to allocate new memory, but we do need + // to call the constructor on some of our elements since + // we haven't been this size before. The others we can + // just use move_range() on: + s = in_begin + (ctor_limitp_ - base_.startp_); + GFX_TIMSORT_MOVE_RANGE(in_begin, s, base_.startp_); + } else { + // Our current allocation is too small so allocate a + // new array entirely + LengthType const new_cap = std::max(base_.next_capacity(), len); + destruct(); + value_t *nv; + try { + nv = base_.allocate(new_cap); + } catch (...) { + base_.startp_ = 0; + base_.endp_ = 0; + base_.alloc_limitp_ = 0; + ctor_limitp_ = 0; + throw; + } + base_.startp_ = nv; + base_.alloc_limitp_ = nv + new_cap; + ctor_limitp_ = nv; + nend = nv + len; + s = in_begin; + } + do { + new(ctor_limitp_) value_t(GFX_TIMSORT_MOVE(*s)); + ++ctor_limitp_; + } while (++s < in_end); + assert(nend <= ctor_limitp_); + assert(ctor_limitp_ <= base_.alloc_limitp_); + } + base_.endp_ = nend; + } +}; + +#if GFX_TIMSORT_USE_CXX11 +// Simpler specialization of TimSortMergeSpace<> for trivially_constructable +// value_t's. Here we don't need to bother calling the constructor/destructors +// on each element so we can use move_range(). This is significantly faster +// because it lets us use the well optimized memcpy() instead of a per-element loop. +template class TimSortMergeSpace { + public: + typedef value_t *iterator; + + iterator begin() { + return base_.startp_; + } + LengthType size() const { + return base_.endp_ - base_.startp_; + } + private: + TimSortMergeSpaceBase base_; + + void destruct() { + if (base_.startp_ != 0) { + base_.deallocate(base_.startp_, base_.alloc_limitp_ - base_.startp_); + } + } + public: + TimSortMergeSpace() { + } + ~TimSortMergeSpace() { + destruct(); + } + void move_in(RandomAccessIterator const in_begin, LengthType const len) { + assert(len > 0); + value_t *nend = base_.startp_ + len; + if (nend > base_.alloc_limitp_) { + // Our current allocation is too small so allocate a + // new array entirely + LengthType const new_cap = std::max(base_.next_capacity(), len); + destruct(); + value_t *nv; + try { + nv = base_.allocate(new_cap); + } catch (...) { + base_.startp_ = 0; + base_.endp_ = 0; + base_.alloc_limitp_ = 0; + throw; + } + base_.startp_ = nv; + base_.alloc_limitp_ = nv + new_cap; + nend = nv + len; + assert(nend <= base_.alloc_limitp_); + } + // The most correct way to copy these trivially-constructable elements + // into base_.startp_ would be to do: + // GFX_TIMSORT_MOVE_RANGE(in_begin, in_begin + len, base_.startp_); + // Since the types are default-constructable the STL will be able to + // reduce this to a call to std::memmove() However, we know that we're + // never dealing with overlapping memory here, so it's a tiny bit + // faster to use std::memcpy() instead. + std::memcpy(base_.startp_, &*in_begin, len * sizeof(base_.startp_[0])); + base_.endp_ = nend; + } +}; +#endif + +// Dynamically-allocated stack of pending "runs" that TimSort needs to merge. +template class TimSortRunStack { + public: + struct run { + RandomAccessIterator base; + LengthType len; + }; + private: + struct contents : public std::allocator { + contents() : startp_(0), endp_(0), alloc_limitp_(0) { + } + struct run *startp_; + struct run *endp_; + struct run const *alloc_limitp_; + + struct run *push_back_grow() { + assert(endp_ == alloc_limitp_); + LengthType const old_cap = (endp_ - startp_); + LengthType const new_cap = (startp_ == 0) ? 16 : ((old_cap * 3) >> 1); + assert(new_cap > old_cap); + struct run *nv = this->allocate(new_cap); + if (startp_ != 0) { + std::copy(startp_, endp_, nv); + this->deallocate(startp_, endp_ - startp_); + } + startp_ = nv; + // endp_ is set by our caller + alloc_limitp_ = nv + new_cap; + return nv + old_cap + 1; + } + }; + struct contents c_; +#if GFX_TIMSORT_USE_CXX11 + TimSortRunStack(const TimSortRunStack&) = delete; + TimSortRunStack& operator=(const TimSortRunStack&) = delete; +#endif + public: + TimSortRunStack() { + } + ~TimSortRunStack() { + c_.deallocate(c_.startp_, c_.alloc_limitp_ - c_.startp_); + } + void push_back(RandomAccessIterator const runBase, LengthType const runLen) { + struct run *nend = c_.endp_ + 1; + if (nend > c_.alloc_limitp_) { + nend = c_.push_back_grow(); + } + c_.endp_ = nend; + nend[-1].base = runBase; + nend[-1].len = runLen; + } + void pop_back() { + assert(c_.endp_ > c_.startp_); + --c_.endp_; + } + LengthType size() const { + return c_.endp_ - c_.startp_; + } + struct run& operator[](LengthType const i) { + return c_.startp_[i]; + } +}; + +namespace timsort_constants { +static const int MIN_GALLOP = 7; +static const int MIN_MERGE = 32; +} // namespace + +// This holds all of the TimSort state that is invariant with respect to +// LessFunction. In other words, this template expansion can be shared +// by the compiler when sorting the same type in different orderings. +template class TimSortState { typedef RandomAccessIterator iter_t; typedef typename std::iterator_traits::value_type value_t; - typedef typename std::iterator_traits::reference ref_t; typedef typename std::iterator_traits::difference_type diff_t; - typedef Compare compare_t; - static const int MIN_MERGE = 32; + TimSortRunStack pending_; - compare_t comp_; - - static const int MIN_GALLOP = 7; +#if GFX_TIMSORT_USE_CXX11 + typedef TimSortMergeSpace::value> merge_space; +#else + typedef TimSortMergeSpace merge_space; +#endif + merge_space tmp_; // temp storage for merges int minGallop_; // default to MIN_GALLOP - std::vector tmp_; // temp storage for merges - typedef typename std::vector::iterator tmp_iter_t; + TimSortState() : minGallop_(timsort_constants::MIN_GALLOP) { + } + ~TimSortState() { + } - struct run { - iter_t base; - diff_t len; + static diff_t minRunLength(diff_t n) { + using namespace timsort_constants; + assert(n >= 0); - run(iter_t const b, diff_t const l) : base(b), len(l) { + diff_t r = 0; + while (n >= MIN_MERGE) { + r |= (n & 1); + n >>= 1; } - }; - std::vector pending_; + return n + r; + } + template friend class TimSort; +}; + +template class TimSort +{ + TimSortState state_; + + typedef RandomAccessIterator iter_t; + typedef typename std::iterator_traits::value_type value_t; + typedef typename std::iterator_traits::difference_type diff_t; + typedef typename std::iterator_traits::reference ref_t; + typedef typename TimSortState::merge_space::iterator tmp_iter_t; + typedef Compare compare_t; + + compare_t comp_; static void sort(iter_t const lo, iter_t const hi, compare_t c) { + using namespace timsort_constants; assert(lo <= hi); diff_t nRemaining = (hi - lo); @@ -169,7 +512,7 @@ template class TimSort { } TimSort ts(c); - diff_t const minRun = minRunLength(nRemaining); + diff_t const minRun = TimSortState::minRunLength(nRemaining); iter_t cur = lo; do { diff_t runLen = countRunAndMakeAscending(cur, hi, c); @@ -180,7 +523,7 @@ template class TimSort { runLen = force; } - ts.pushRun(cur, runLen); + ts.state_.pending_.push_back(cur, runLen); ts.mergeCollapse(); cur += runLen; @@ -189,10 +532,10 @@ template class TimSort { assert(cur == hi); ts.mergeForceCollapse(); - assert(ts.pending_.size() == 1); + assert(ts.state_.pending_.size() == 1); GFX_TIMSORT_LOG("size: " << (hi - lo) << " tmp_.size(): " << ts.tmp_.size() - << " pending_.size(): " << ts.pending_.size()); + << " pending_.size(): " << ts.state_.pending_.size()); } // sort() static void binarySort(iter_t const lo, iter_t const hi, iter_t start, compare_t compare) { @@ -234,35 +577,20 @@ template class TimSort { return runHi - lo; } - static diff_t minRunLength(diff_t n) { - assert(n >= 0); - - diff_t r = 0; - while (n >= MIN_MERGE) { - r |= (n & 1); - n >>= 1; - } - return n + r; - } - - TimSort(compare_t c) : comp_(c), minGallop_(MIN_GALLOP) { - } - - void pushRun(iter_t const runBase, diff_t const runLen) { - pending_.push_back(run(runBase, runLen)); + explicit TimSort(compare_t c) : comp_(c) { } void mergeCollapse() { - while (pending_.size() > 1) { - diff_t n = pending_.size() - 2; + while (state_.pending_.size() > 1) { + diff_t n = state_.pending_.size() - 2; - if ((n > 0 && pending_[n - 1].len <= pending_[n].len + pending_[n + 1].len) || - (n > 1 && pending_[n - 2].len <= pending_[n - 1].len + pending_[n].len)) { - if (pending_[n - 1].len < pending_[n + 1].len) { + if ((n > 0 && state_.pending_[n - 1].len <= state_.pending_[n].len + state_.pending_[n + 1].len) || + (n > 1 && state_.pending_[n - 2].len <= state_.pending_[n - 1].len + state_.pending_[n].len)) { + if (state_.pending_[n - 1].len < state_.pending_[n + 1].len) { --n; } mergeAt(n); - } else if (pending_[n].len <= pending_[n + 1].len) { + } else if (state_.pending_[n].len <= state_.pending_[n + 1].len) { mergeAt(n); } else { break; @@ -271,10 +599,10 @@ template class TimSort { } void mergeForceCollapse() { - while (pending_.size() > 1) { - diff_t n = pending_.size() - 2; + while (state_.pending_.size() > 1) { + diff_t n = state_.pending_.size() - 2; - if (n > 0 && pending_[n - 1].len < pending_[n + 1].len) { + if (n > 0 && state_.pending_[n - 1].len < state_.pending_[n + 1].len) { --n; } mergeAt(n); @@ -282,26 +610,26 @@ template class TimSort { } void mergeAt(diff_t const i) { - diff_t const stackSize = pending_.size(); + diff_t const stackSize = state_.pending_.size(); assert(stackSize >= 2); assert(i >= 0); assert(i == stackSize - 2 || i == stackSize - 3); - iter_t base1 = pending_[i].base; - diff_t len1 = pending_[i].len; - iter_t base2 = pending_[i + 1].base; - diff_t len2 = pending_[i + 1].len; + iter_t base1 = state_.pending_[i].base; + diff_t len1 = state_.pending_[i].len; + iter_t base2 = state_.pending_[i + 1].base; + diff_t len2 = state_.pending_[i + 1].len; assert(len1 > 0 && len2 > 0); assert(base1 + len1 == base2); - pending_[i].len = len1 + len2; + state_.pending_[i].len = len1 + len2; if (i == stackSize - 3) { - pending_[i + 1] = pending_[i + 2]; + state_.pending_[i + 1] = state_.pending_[i + 2]; } - pending_.pop_back(); + state_.pending_.pop_back(); diff_t const k = gallopRight(*base2, base1, len1, 0); assert(k >= 0); @@ -417,11 +745,12 @@ template class TimSort { } void mergeLo(iter_t const base1, diff_t len1, iter_t const base2, diff_t len2) { + using namespace timsort_constants; assert(len1 > 0 && len2 > 0 && base1 + len1 == base2); - copy_to_tmp(base1, len1); + state_.tmp_.move_in(base1, len1); - tmp_iter_t cursor1 = tmp_.begin(); + tmp_iter_t cursor1 = state_.tmp_.begin(); iter_t cursor2 = base2; iter_t dest = base1; @@ -436,7 +765,7 @@ template class TimSort { return; } - int minGallop(minGallop_); + int minGallop(state_.minGallop_); // outer: while (true) { @@ -519,7 +848,7 @@ template class TimSort { minGallop += 2; } // end of "outer" loop - minGallop_ = std::min(minGallop, 1); + state_.minGallop_ = std::min(minGallop, 1); if (len1 == 1) { assert(len2 > 0); @@ -534,17 +863,18 @@ template class TimSort { } void mergeHi(iter_t const base1, diff_t len1, iter_t const base2, diff_t len2) { + using namespace timsort_constants; assert(len1 > 0 && len2 > 0 && base1 + len1 == base2); - copy_to_tmp(base2, len2); + state_.tmp_.move_in(base2, len2); iter_t cursor1 = base1 + (len1 - 1); - tmp_iter_t cursor2 = tmp_.begin() + (len2 - 1); + tmp_iter_t cursor2 = state_.tmp_.begin() + (len2 - 1); iter_t dest = base2 + (len2 - 1); *(dest--) = GFX_TIMSORT_MOVE(*(cursor1--)); if (--len1 == 0) { - GFX_TIMSORT_MOVE_RANGE(tmp_.begin(), tmp_.begin() + len2, dest - (len2 - 1)); + GFX_TIMSORT_MOVE_RANGE(state_.tmp_.begin(), state_.tmp_.begin() + len2, dest - (len2 - 1)); return; } if (len2 == 1) { @@ -555,7 +885,7 @@ template class TimSort { return; } - int minGallop(minGallop_); + int minGallop(state_.minGallop_); // outer: while (true) { @@ -609,7 +939,7 @@ template class TimSort { break; } - count2 = len2 - gallopLeft(*cursor1, tmp_.begin(), len2, len2 - 1); + count2 = len2 - gallopLeft(*cursor1, state_.tmp_.begin(), len2, len2 - 1); if (count2 != 0) { dest -= count2; cursor2 -= count2; @@ -638,7 +968,7 @@ template class TimSort { minGallop += 2; } // end of "outer" loop - minGallop_ = std::min(minGallop, 1); + state_.minGallop_ = std::min(minGallop, 1); if (len2 == 1) { assert(len1 > 0); @@ -649,16 +979,10 @@ template class TimSort { assert(len2 != 0 && "Comparison function violates its general contract"); assert(len1 == 0); assert(len2 > 1); - GFX_TIMSORT_MOVE_RANGE(tmp_.begin(), tmp_.begin() + len2, dest - (len2 - 1)); + GFX_TIMSORT_MOVE_RANGE(state_.tmp_.begin(), state_.tmp_.begin() + len2, dest - (len2 - 1)); } } - void copy_to_tmp(iter_t const begin, diff_t const len) { - tmp_.clear(); - tmp_.reserve(len); - GFX_TIMSORT_MOVE_RANGE(begin, begin + len, std::back_inserter(tmp_)); - } - // the only interface is the friend timsort() function template friend void timsort(IterT first, IterT last, LessT c); };