diff --git a/include/info.cpp b/include/info.cpp index 15f27b3..963bcf3 100644 --- a/include/info.cpp +++ b/include/info.cpp @@ -44,12 +44,20 @@ double bits_per_kmer_formula(uint64_t k, /* kmer length */ /* summing (M-1) provides an upper bound to the num. of super-kmers */ double Z = (2.0 * n) / (k - m + 2) + (M - 1); - double num_bits = 2 * N + Z * (5.0 + std::ceil(std::log2(N))) + - M * (2.0 + std::ceil(std::log2(static_cast(N) / M))); + /* A cache line is 64 B = 512 bits --> + max window_size that fits in a cache line is 512/2 = 256 + assuming a 2-bit encoded stream. */ + const uint64_t window_size = 1; /* 256; */ + + double num_bits = + 2 * N + Z * (5.0 + std::ceil(std::log2(std::ceil(static_cast(N) / window_size)))) + + M * (2.0 + std::ceil(std::log2(static_cast(N) / M))); return num_bits / n; } +double perc(uint64_t amount, uint64_t total) { return (amount * 100.0) / total; } + void dictionary::print_space_breakdown() const { const uint64_t num_bytes = (num_bits() + 7) / 8; std::cout << "total index size: " << num_bytes << " [B] -- " @@ -58,20 +66,21 @@ void dictionary::print_space_breakdown() const { std::cout << " minimizers: " << static_cast(m_minimizers.num_bits()) / size() << " [bits/kmer] (" << static_cast(m_minimizers.num_bits()) / m_minimizers.size() - << " [bits/key])\n"; + << " [bits/key]) -- " << perc(m_minimizers.num_bits(), num_bits()) << "%\n"; std::cout << " pieces: " << static_cast(m_buckets.pieces.num_bits()) / size() - << " [bits/kmer]\n"; + << " [bits/kmer] -- " << perc(m_buckets.pieces.num_bits(), num_bits()) << "%\n"; std::cout << " num_super_kmers_before_bucket: " << static_cast(m_buckets.num_super_kmers_before_bucket.num_bits()) / size() - << " [bits/kmer]\n"; + << " [bits/kmer] -- " + << perc(m_buckets.num_super_kmers_before_bucket.num_bits(), num_bits()) << "%\n"; std::cout << " offsets: " << static_cast(8 * m_buckets.offsets.bytes()) / size() - << " [bits/kmer]\n"; + << " [bits/kmer] -- " << perc(8 * m_buckets.offsets.bytes(), num_bits()) << "%\n"; std::cout << " strings: " << static_cast(8 * m_buckets.strings.bytes()) / size() - << " [bits/kmer]\n"; + << " [bits/kmer] -- " << perc(8 * m_buckets.strings.bytes(), num_bits()) << "%\n"; std::cout << " skew_index: " << static_cast(m_skew_index.num_bits()) / size() - << " [bits/kmer]\n"; + << " [bits/kmer] -- " << perc(m_skew_index.num_bits(), num_bits()) << "%\n"; std::cout << " weights: " << static_cast(m_weights.num_bits()) / size() - << " [bits/kmer]\n"; + << " [bits/kmer] -- " << perc(m_weights.num_bits(), num_bits()) << "%\n"; m_weights.print_space_breakdown(size()); std::cout << " --------------\n"; std::cout << " total: " << static_cast(num_bits()) / size() << " [bits/kmer]"