diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml new file mode 100644 index 0000000..319a0a1 --- /dev/null +++ b/.github/workflows/build.yml @@ -0,0 +1,45 @@ +name: Build + +on: + push: + pull_request: + +jobs: + build: + strategy: + matrix: + compiler: + - { name: gcc, version: 11} + - { name: gcc, version: 12} + - { name: gcc, version: 13} + - { name: gcc, version: 14} + - { name: clang, version: 16} + - { name: clang, version: 17} + - { name: clang, version: 18} + name: Build (${{ matrix.compiler.name }} ${{ matrix.compiler.version }}) + runs-on: ubuntu-24.04 + steps: + - name: Install dependencies + run: | + sudo add-apt-repository universe + sudo apt-get update + sudo apt-get install --assume-yes --no-install-recommends ca-certificates cmake git + - name: Install GCC + if: ${{ matrix.compiler.name == 'gcc' }} + run: | + sudo apt-get install --assume-yes --no-install-recommends gcc-${{ matrix.compiler.version }} g++-${{ matrix.compiler.version }} + echo "CC=/usr/bin/gcc-${{ matrix.compiler.version }}" >> $GITHUB_ENV + echo "CXX=/usr/bin/g++-${{ matrix.compiler.version }}" >> $GITHUB_ENV + - name: Install Clang + if: ${{ matrix.compiler.name == 'clang' }} + run: | + sudo apt-get install --assume-yes --no-install-recommends clang-${{ matrix.compiler.version }} + echo "CC=/usr/bin/clang-${{ matrix.compiler.version }}" >> $GITHUB_ENV + echo "CXX=/usr/bin/clang++-${{ matrix.compiler.version }}" >> $GITHUB_ENV + - uses: actions/checkout@v4 + with: + submodules: recursive + - name: Build + run: | + cmake -B ./build + cmake --build ./build --parallel diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml index 8f618a2..4a21326 100644 --- a/.github/workflows/codeql.yml +++ b/.github/workflows/codeql.yml @@ -2,12 +2,7 @@ name: "CodeQL" on: push: - branches: [ "master" ] pull_request: - # The branches below must be a subset of the branches above - branches: [ "master" ] - # schedule: - # - cron: '28 20 * * 4' jobs: analyze: @@ -18,26 +13,16 @@ jobs: contents: read security-events: write - strategy: - fail-fast: false - matrix: - language: [ 'cpp' ] - # CodeQL supports [ 'cpp', 'csharp', 'go', 'java', 'javascript', 'python', 'ruby' ] - # Learn more about CodeQL language support at https://aka.ms/codeql-docs/language-support - steps: - name: Checkout repository - uses: actions/checkout@v3 + uses: actions/checkout@v4 + with: + submodules: recursive - # Initializes the CodeQL tools for scanning. - name: Initialize CodeQL uses: github/codeql-action/init@v2 with: - languages: ${{ matrix.language }} - - # Compile - - name: Checkout submodules - run: git submodule update --init --recursive + languages: 'cpp' - name: Creating build directory run: cmake -E make_directory ./build @@ -54,4 +39,4 @@ jobs: - name: Perform CodeQL Analysis uses: github/codeql-action/analyze@v2 with: - category: "/language:${{matrix.language}}" + category: "/language:cpp" diff --git a/.gitignore b/.gitignore index 22699d3..d1aa56d 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,5 @@ build/ old/ .DS_Store +cmake-build* +.idea diff --git a/CMakeLists.txt b/CMakeLists.txt index 3cb1862..d979b6b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -11,12 +11,8 @@ if(NOT TARGET PTHASH) target_include_directories(PTHASH INTERFACE .) target_compile_features(PTHASH INTERFACE cxx_std_17) - if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang") - target_compile_options(PTHASH INTERFACE -stdlib=libc++) - endif() MESSAGE(STATUS "Compiling for processor: " ${CMAKE_HOST_SYSTEM_PROCESSOR}) - if (UNIX AND (CMAKE_HOST_SYSTEM_PROCESSOR STREQUAL "x86_64")) MESSAGE(STATUS "Compiling with flags: -march=native -mbmi2 -msse4.2") target_compile_options(PTHASH INTERFACE -march=native) @@ -57,6 +53,8 @@ endif() # Only add benchmarks and tests when compiling PTHash itself, not when added as a dependency if(CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR) set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}) + target_compile_options(PTHASH INTERFACE -Werror) + add_executable(build src/build.cpp) target_link_libraries(build PRIVATE PTHASH) add_executable(example src/example.cpp) diff --git a/external/bits b/external/bits index 7aa6eee..8f810e4 160000 --- a/external/bits +++ b/external/bits @@ -1 +1 @@ -Subproject commit 7aa6eee43951190d258cb7f66349a87dccb70b3c +Subproject commit 8f810e48f93ba70884bd6cc37112e22896eb1397 diff --git a/include/builders/internal_memory_builder_single_phf.hpp b/include/builders/internal_memory_builder_single_phf.hpp index 4dfd9e5..435438b 100644 --- a/include/builders/internal_memory_builder_single_phf.hpp +++ b/include/builders/internal_memory_builder_single_phf.hpp @@ -178,7 +178,7 @@ struct internal_memory_builder_single_phf { std::swap(m_num_keys, other.m_num_keys); std::swap(m_num_buckets, other.m_num_buckets); std::swap(m_table_size, other.m_table_size); - std::swap(m_bucketer, other.m_bucketer); + m_bucketer.swap(other.m_bucketer); m_pilots.swap(other.m_pilots); m_free_slots.swap(other.m_free_slots); } diff --git a/include/utils/bucketers.hpp b/include/utils/bucketers.hpp index 3c541b7..504fe70 100644 --- a/include/utils/bucketers.hpp +++ b/include/utils/bucketers.hpp @@ -8,7 +8,7 @@ namespace pthash { template struct table_bucketer { - table_bucketer() : base(Bucketer()) {} + table_bucketer() : base(Bucketer()), fulcrums() {} void init(const uint64_t num_buckets, const double lambda, const uint64_t table_size, const double alpha) { @@ -41,6 +41,11 @@ struct table_bucketer { return base.num_buckets() + fulcrums.size() * 64; } + void swap(table_bucketer& other) { + base.swap(other.base); + std::swap(fulcrums, other.fulcrums); + } + template void visit(Visitor& visitor) const { visit_impl(visitor, *this); @@ -64,7 +69,7 @@ struct table_bucketer { }; struct opt_bucketer { - opt_bucketer() {} + opt_bucketer() : c(0), m_num_buckets(0), m_alpha(0), m_alpha_factor(0) {} inline double baseFunc(const double normalized_hash) const { return (normalized_hash + (1 - normalized_hash) * std::log(1 - normalized_hash)) * @@ -106,6 +111,13 @@ struct opt_bucketer { 8 * sizeof(m_alpha_factor); } + void swap(opt_bucketer& other) { + std::swap(c, other.c); + std::swap(m_num_buckets, other.m_num_buckets); + std::swap(m_alpha, other.m_alpha); + std::swap(m_alpha_factor, other.m_alpha_factor); + } + template void visit(Visitor& visitor) const { visit_impl(visitor, *this); @@ -148,7 +160,7 @@ struct skew_bucketer { } inline uint64_t bucket(uint64_t hash) const { - static const uint64_t T = constants::a * UINT64_MAX; + static const uint64_t T = constants::a * static_cast(UINT64_MAX); return (hash < T) ? fastmod::fastmod_u64(hash, m_M_num_dense_buckets, m_num_dense_buckets) : m_num_dense_buckets + fastmod::fastmod_u64(hash, m_M_num_sparse_buckets, m_num_sparse_buckets); @@ -194,7 +206,7 @@ struct skew_bucketer { }; struct range_bucketer { - range_bucketer() {} + range_bucketer() : m_num_buckets(0), m_M_num_buckets(0) {} void init(const uint64_t num_buckets) { m_num_buckets = num_buckets; @@ -212,6 +224,11 @@ struct range_bucketer { return 8 * (sizeof(m_num_buckets) + sizeof(m_M_num_buckets)); } + void swap(range_bucketer& other) { + std::swap(m_num_buckets, other.m_num_buckets); + std::swap(m_M_num_buckets, other.m_M_num_buckets); + } + template void visit(Visitor& visitor) const { visit_impl(visitor, *this); @@ -229,8 +246,8 @@ struct range_bucketer { visitor.visit(t.m_M_num_buckets); } - uint64_t m_num_buckets{}; - __uint128_t m_M_num_buckets{}; + uint64_t m_num_buckets; + __uint128_t m_M_num_buckets; }; struct uniform_bucketer { @@ -254,6 +271,11 @@ struct uniform_bucketer { return 8 * (sizeof(m_num_buckets) + sizeof(m_M_num_buckets)); } + void swap(uniform_bucketer& other) { + std::swap(m_num_buckets, other.m_num_buckets); + std::swap(m_M_num_buckets, other.m_M_num_buckets); + } + template void visit(Visitor& visitor) const { visit_impl(visitor, *this); diff --git a/include/utils/hasher.hpp b/include/utils/hasher.hpp index 093b8fa..1185db5 100644 --- a/include/utils/hasher.hpp +++ b/include/utils/hasher.hpp @@ -2,7 +2,6 @@ // See also https://github.com/jermp/bench_hash_functions -#include #include namespace pthash { @@ -171,7 +170,7 @@ struct murmurhash2_64 { typedef hash64 hash_type; // generic range of bytes - static inline hash64 hash(byte_range range, uint64_t seed) { + static inline hash64 hash(byte_range const& range, uint64_t seed) { return MurmurHash2_64(range.begin, range.end - range.begin, seed); } @@ -181,7 +180,7 @@ struct murmurhash2_64 { } // specialization for uint64_t - static inline hash64 hash(uint64_t val, uint64_t seed) { + static inline hash64 hash(uint64_t const& val, uint64_t seed) { return MurmurHash2_64(reinterpret_cast(&val), sizeof(val), seed); } }; @@ -190,7 +189,7 @@ struct murmurhash2_128 { typedef hash128 hash_type; // generic range of bytes - static inline hash128 hash(byte_range range, uint64_t seed) { + static inline hash128 hash(byte_range const& range, uint64_t seed) { return {MurmurHash2_64(range.begin, range.end - range.begin, seed), MurmurHash2_64(range.begin, range.end - range.begin, ~seed)}; } @@ -202,7 +201,7 @@ struct murmurhash2_128 { } // specialization for uint64_t - static inline hash128 hash(uint64_t val, uint64_t seed) { + static inline hash128 hash(uint64_t const& val, uint64_t seed) { return {MurmurHash2_64(reinterpret_cast(&val), sizeof(val), seed), MurmurHash2_64(reinterpret_cast(&val), sizeof(val), ~seed)}; } @@ -217,12 +216,12 @@ struct xxhash128 { } // specialization for uint64_t - static inline hash_type hash(uint64_t val, uint64_t seed) { + static inline hash_type hash(uint64_t const& val, uint64_t seed) { return XXH128(&val, sizeof(val), seed); } // specialization for std::pair - static inline hash_type hash(std::pair val, uint64_t seed) { + static inline hash_type hash(std::pair const& val, uint64_t seed) { return XXH128(&val, sizeof(val), seed); } }; diff --git a/src/example.cpp b/src/example.cpp index 34bcd28..9a72419 100644 --- a/src/example.cpp +++ b/src/example.cpp @@ -41,7 +41,7 @@ int main() { // pthash_type; typedef dense_partitioned_phf, // bucketer inter_R, // encoder type true, // minimal pthash_search_type::add_displacement // additive displacement