Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove GSL in favor of stdlib #593

Merged
merged 1 commit into from
Dec 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,6 @@ target_link_libraries(pisa
Boost::boost
mio
mio_base
GSL
spdlog
fmt::fmt
range-v3
Expand Down
3 changes: 0 additions & 3 deletions external/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -47,9 +47,6 @@ add_library(simdcomp STATIC ${CMAKE_CURRENT_SOURCE_DIR}/simdcomp/src/simdbitpack
${CMAKE_CURRENT_SOURCE_DIR}/simdcomp/src/simdcomputil.c
)

# Add GSL
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/GSL EXCLUDE_FROM_ALL)

# Add Boost
if (NOT PISA_SYSTEM_BOOST)
add_subdirectory(boost-cmake)
Expand Down
5 changes: 3 additions & 2 deletions include/pisa/codec/block_codec_registry.hpp
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
#pragma once

#include <algorithm>
#include <memory>
#include <span>
#include <string_view>

#include <fmt/format.h>
#include <gsl/span>

#include "codec/block_codec.hpp"

Expand Down Expand Up @@ -44,6 +45,6 @@ struct BlockCodecRegistry {
/**
* Lists the names of all known block codecs.
*/
[[nodiscard]] constexpr auto get_block_codec_names() -> gsl::span<std::string_view const>;
[[nodiscard]] constexpr auto get_block_codec_names() -> std::span<std::string_view const>;

} // namespace pisa
1 change: 0 additions & 1 deletion include/pisa/concepts/container.hpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@

// Copyright 2024 PISA developers
//
// Licensed under the Apache License, Version 2.0 (the "License");
Expand Down
6 changes: 3 additions & 3 deletions include/pisa/invert.hpp
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
#pragma once

#include <optional>
#include <span>
#include <thread>
#include <vector>

#include <gsl/span>
#include <range/v3/view/iota.hpp>
#include <tbb/blocked_range.h>

Expand All @@ -16,7 +16,7 @@ namespace pisa { namespace invert {
using PostingIterator = typename std::vector<Posting>::iterator;
using Documents = std::unordered_map<Term_Id, std::vector<Document_Id>>;
using Frequencies = std::unordered_map<Term_Id, std::vector<Frequency>>;
using DocumentRange = gsl::span<gsl::span<Term_Id const>>;
using DocumentRange = std::span<std::span<Term_Id const>>;

/// Inverted index abstraction used internally in the inverting process.
///
Expand All @@ -42,7 +42,7 @@ namespace pisa { namespace invert {

/// A single slice view over a chunk of a forward index.
struct ForwardIndexSlice {
gsl::span<gsl::span<Term_Id const>> documents;
std::span<std::span<Term_Id const>> documents;
ranges::iota_view<Document_Id, Document_Id> document_ids;
};

Expand Down
5 changes: 2 additions & 3 deletions include/pisa/io.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,10 @@
#include <exception>
#include <filesystem>
#include <iostream>
#include <span>
#include <string>
#include <vector>

#include <gsl/span>

namespace pisa::io {

/// Indicates that a file was not found.
Expand Down Expand Up @@ -45,6 +44,6 @@ void for_each_line(std::istream& is, Function fn) {
[[nodiscard]] auto load_data(std::string const& data_file) -> std::vector<char>;

/// Writes bytes to a file.
void write_data(std::string const& data_file, gsl::span<std::byte const> bytes);
void write_data(std::string const& data_file, std::span<std::byte const> bytes);

} // namespace pisa::io
1 change: 0 additions & 1 deletion include/pisa/linear_quantizer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
#include <cstdint>

#include <fmt/format.h>
#include <gsl/gsl_assert>

namespace pisa {

Expand Down
10 changes: 5 additions & 5 deletions include/pisa/memory_source.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@

#include <filesystem>
#include <memory>
#include <span>
#include <vector>

#include <gsl/span>
#include <mio/mmap.hpp>

namespace pisa {
Expand All @@ -29,7 +29,7 @@ class MemorySource {
/// Constructs a memory source from a vector.
///
/// NOTE: This is non-owning source, so tread carefully!
[[nodiscard]] static auto from_span(gsl::span<char> span) -> MemorySource;
[[nodiscard]] static auto from_span(std::span<char> span) -> MemorySource;

/// Constructs a memory source using a memory mapped file.
///
Expand Down Expand Up @@ -65,13 +65,13 @@ class MemorySource {
[[nodiscard]] auto size() const -> size_type;

/// Full span over memory.
[[nodiscard]] auto span() const -> gsl::span<value_type const>;
[[nodiscard]] auto span() const -> std::span<value_type const>;

/// Subspan of memory.
///
/// \throws std::out_of_range if offset + size is out of bounds
[[nodiscard]] auto subspan(size_type offset, size_type size = gsl::dynamic_extent) const
-> gsl::span<value_type const>;
[[nodiscard]] auto subspan(size_type offset, size_type size = std::dynamic_extent) const
-> std::span<value_type const>;

/// Type erasure interface. Any type implementing it are supported as memory source.
struct Interface {
Expand Down
40 changes: 20 additions & 20 deletions include/pisa/payload_vector.hpp
Original file line number Diff line number Diff line change
@@ -1,16 +1,16 @@
#pragma once

#include <algorithm>
#include <filesystem>
#include <fstream>
#include <iostream>
#include <iterator>
#include <optional>
#include <span>
#include <string_view>
#include <vector>

#include <fmt/format.h>
#include <gsl/gsl_assert>
#include <gsl/span>

namespace pisa {

Expand All @@ -24,8 +24,8 @@ namespace detail {
using value_type = Payload_View;
using difference_type = std::make_signed_t<size_type>;

typename gsl::span<size_type const>::iterator offset_iter;
typename gsl::span<std::byte const>::iterator payload_iter;
typename std::span<size_type const>::iterator offset_iter;
typename std::span<std::byte const>::iterator payload_iter;

constexpr auto operator++() -> Payload_Vector_Iterator& {
++offset_iter;
Expand Down Expand Up @@ -194,7 +194,7 @@ auto encode_payload_vector(InputIterator first, InputIterator last, PayloadEncod
}

template <typename Payload, typename PayloadEncodingFn>
auto encode_payload_vector(gsl::span<Payload const> values, PayloadEncodingFn encoding_fn) {
auto encode_payload_vector(std::span<Payload const> values, PayloadEncodingFn encoding_fn) {
return encode_payload_vector(values.begin(), values.end(), encoding_fn);
}

Expand All @@ -207,13 +207,13 @@ auto encode_payload_vector(InputIterator first, InputIterator last) {
});
}

inline auto encode_payload_vector(gsl::span<std::string const> values) {
inline auto encode_payload_vector(std::span<std::string const> values) {
return encode_payload_vector(values.begin(), values.end());
}

template <typename... T>
constexpr auto unpack_head(gsl::span<std::byte const> mem)
-> std::tuple<T..., gsl::span<std::byte const>> {
constexpr auto unpack_head(std::span<std::byte const> mem)
-> std::tuple<T..., std::span<std::byte const>> {
static_assert(detail::all_pod<T...>::value);
auto offset = detail::sizeofs<T...>::value;
if (offset > mem.size()) {
Expand All @@ -223,10 +223,10 @@ constexpr auto unpack_head(gsl::span<std::byte const> mem)
}
auto tail = mem.subspan(offset);
auto head = detail::unpack<T...>(mem.data());
return std::tuple_cat(head, std::tuple<gsl::span<std::byte const>>(tail));
return std::tuple_cat(head, std::tuple<std::span<std::byte const>>(tail));
}

[[nodiscard]] inline auto split(gsl::span<std::byte const> mem, std::size_t offset) {
[[nodiscard]] inline auto split(std::span<std::byte const> mem, std::size_t offset) {
if (offset > mem.size()) {
throw std::runtime_error(
fmt::format("Cannot split span of size {} at position {}", mem.size(), offset)
Expand All @@ -236,14 +236,14 @@ constexpr auto unpack_head(gsl::span<std::byte const> mem)
}

template <typename T>
[[nodiscard]] auto cast_span(gsl::span<std::byte const> mem) -> gsl::span<T const> {
[[nodiscard]] auto cast_span(std::span<std::byte const> mem) -> std::span<T const> {
auto type_size = sizeof(T);
if (mem.size() % type_size != 0) {
throw std::runtime_error(
fmt::format("Failed to cast byte-span to span of T of size {}", type_size)
);
}
return gsl::make_span(reinterpret_cast<T const*>(mem.data()), mem.size() / type_size);
return std::span(reinterpret_cast<T const*>(mem.data()), mem.size() / type_size);
}

template <typename Payload_View = std::string_view>
Expand All @@ -257,17 +257,17 @@ class Payload_Vector {
explicit Payload_Vector(Payload_Vector_Buffer const& container)
: offsets_(container.offsets), payloads_(container.payloads) {}

Payload_Vector(gsl::span<size_type const> offsets, gsl::span<std::byte const> payloads)
Payload_Vector(std::span<size_type const> offsets, std::span<std::byte const> payloads)
: offsets_(offsets), payloads_(payloads) {}

template <typename ContiguousContainer>
[[nodiscard]] constexpr static auto from(ContiguousContainer&& mem) -> Payload_Vector {
return from(gsl::make_span(reinterpret_cast<std::byte const*>(mem.data()), mem.size()));
return from(std::span(reinterpret_cast<std::byte const*>(mem.data()), mem.size()));
}

[[nodiscard]] static auto from(gsl::span<std::byte const> mem) -> Payload_Vector {
[[nodiscard]] static auto from(std::span<std::byte const> mem) -> Payload_Vector {
size_type length;
gsl::span<std::byte const> tail;
std::span<std::byte const> tail;
try {
std::tie(length, tail) = unpack_head<size_type>(mem);
} catch (std::runtime_error const& err) {
Expand All @@ -276,7 +276,7 @@ class Payload_Vector {
);
}

gsl::span<std::byte const> offsets, payloads;
std::span<std::byte const> offsets, payloads;
try {
std::tie(offsets, payloads) = split(tail, (length + 1U) * sizeof(size_type));
} catch (std::runtime_error const& err) {
Expand Down Expand Up @@ -314,8 +314,8 @@ class Payload_Vector {
}

private:
gsl::span<size_type const> offsets_;
gsl::span<std::byte const> payloads_;
std::span<size_type const> offsets_;
std::span<std::byte const> payloads_;
};

/// Find the position of `value` in a sorted range.
Expand All @@ -339,7 +339,7 @@ auto binary_search(Iter begin, Iter end, T value, Compare cmp = std::less<>{})
/// It calls the function overload that takes iterators. See that overload's documentation for more
/// information.
template <typename T, typename Compare = std::less<T>>
auto binary_search(gsl::span<std::add_const_t<T>> range, T value, Compare cmp = std::less<T>{})
auto binary_search(std::span<std::add_const_t<T>> range, T value, Compare cmp = std::less<T>{})
-> std::optional<std::ptrdiff_t> {
return pisa::binary_search(range.begin(), range.end(), value, cmp);
}
Expand Down
22 changes: 13 additions & 9 deletions include/pisa/reorder_docids.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,16 @@
#include <cstdint>
#include <fstream>
#include <random>
#include <span>
#include <string>
#include <vector>

#include <gsl/span>
#include <spdlog/spdlog.h>

#include "binary_freq_collection.hpp"
#include "payload_vector.hpp"
#include "recursive_graph_bisection.hpp"
#include "span.hpp"
#include "util/inverted_index_utils.hpp"
#include "util/progress.hpp"

Expand Down Expand Up @@ -80,8 +81,8 @@ namespace detail {
forward_index fwd = options.input_fwd
? forward_index::read(*options.input_fwd)
: forward_index::from_inverted_index(
options.input_basename, options.min_length, options.compress_fwd
);
options.input_basename, options.min_length, options.compress_fwd
);

if (options.output_fwd) {
forward_index::write(fwd, *options.output_fwd);
Expand Down Expand Up @@ -137,7 +138,7 @@ struct ReorderOptions {
inline auto reorder_postings(
binary_freq_collection const& input,
std::string_view output_basename,
gsl::span<std::uint32_t const> mapping
std::span<std::uint32_t const> mapping
) {
pisa::progress progress("Reassigning IDs in posting lists", input.size());

Expand All @@ -149,7 +150,7 @@ inline auto reorder_postings(
std::vector<std::pair<std::uint32_t, std::uint32_t>> posting_list;
for (const auto& seq: input) {
for (size_t i = 0; i < seq.docs.size(); ++i) {
posting_list.emplace_back(mapping[seq.docs.begin()[i]], seq.freqs.begin()[i]);
posting_list.emplace_back(pisa::at(mapping, seq.docs.begin()[i]), seq.freqs.begin()[i]);
}

std::sort(posting_list.begin(), posting_list.end());
Expand All @@ -169,7 +170,7 @@ inline auto reorder_postings(
inline auto reorder_lexicon(
std::string const& input_lexicon,
std::string const& output_lexicon,
gsl::span<std::uint32_t const> mapping
std::span<std::uint32_t const> mapping
)

{
Expand All @@ -187,16 +188,19 @@ inline auto reorder_lexicon(
inline auto reorder_sizes(
binary_collection const& input_sizes,
std::uint64_t num_docs,
gsl::span<std::uint32_t const> mapping,
std::span<std::uint32_t const> mapping,
std::string_view output_basename
) {
pisa::progress progress("Reordering document sizes", num_docs);
auto sizes = *input_sizes.begin();
if (sizes.size() != num_docs) {
throw std::invalid_argument("Invalid sizes file");
}
if (mapping.size() != num_docs) {
throw std::invalid_argument("Invalid mapping size");
}

auto size_sequence = gsl::span(sizes.begin(), sizes.size());
auto size_sequence = std::span(sizes.begin(), sizes.size());
std::vector<std::uint32_t> new_sizes(num_docs);
for (size_t i = 0; i < num_docs; ++i) {
new_sizes[mapping[i]] = size_sequence[i];
Expand All @@ -212,7 +216,7 @@ inline void reorder_from_mapping(
binary_freq_collection const& input_collection,
binary_collection const& input_sizes,
ReorderOptions const& options,
gsl::span<std::uint32_t const> mapping
std::span<std::uint32_t const> mapping
) {
auto num_docs = input_collection.num_docs();
reorder_sizes(input_sizes, num_docs, mapping, options.output_basename);
Expand Down
7 changes: 3 additions & 4 deletions include/pisa/sharding.hpp
Original file line number Diff line number Diff line change
@@ -1,11 +1,10 @@
#pragma once

#include <optional>
#include <span>

#include <gsl/span>
#include <spdlog/spdlog.h>

#include "io.hpp"
#include "type_safe.hpp"
#include "vec_map.hpp"

Expand All @@ -18,10 +17,10 @@ format_shard(std::string_view basename, Shard_Id shard, std::string_view suffix

auto resolve_shards(std::string_view basename, std::string_view suffix = {}) -> std::vector<Shard_Id>;

auto mapping_from_files(std::istream* full_titles, gsl::span<std::istream*> shard_titles)
auto mapping_from_files(std::istream* full_titles, std::span<std::istream*> shard_titles)
-> VecMap<Document_Id, Shard_Id>;

auto mapping_from_files(std::string const& full_titles, gsl::span<std::string const> shard_titles)
auto mapping_from_files(std::string const& full_titles, std::span<std::string const> shard_titles)
-> VecMap<Document_Id, Shard_Id>;

auto create_random_mapping(
Expand Down
Loading