diff --git a/README.md b/README.md
index cc38a16..b6a7092 100644
--- a/README.md
+++ b/README.md
@@ -15,6 +15,10 @@ two basic queries are supported:
 - i = Lookup(g), where i is in [0,n) if the k-mer g is found in the dictionary or i = -1 otherwise;
 - g = Access(i), where g is the k-mer associated to the identifier i.
 
+If also the abundances of the k-mers (their frequency counts) are stored in the dictionary, then the dictionary is said to be *weighted* and it also supports:
+
+- c = Abundance(i), where i is a given k-mer identifier.
+
 A membership query (determine if a given k-mer is present in the dictionary or not) is, therefore, supported by means of the lookup query.
 The dictionary can also stream through all k-mers of a given DNA file
 (.fasta or .fastq formats) to determine their membership to the dictionary.
@@ -91,44 +95,47 @@ where the code was compiled (see the section [Compiling the Code](#compiling-the
 
 to show the usage of the driver program (reported below for convenience).
 
-	Usage: ./build [-h,--help] input_filename k m [-s seed] [-l l] [-c c] [--canonical-parsing] [-o output_filename] [--check] [--bench] [--verbose]
-	
+	Usage: ./build [-h,--help] input_filename k m [-s seed] [-l l] [-c c] [--canonical-parsing] [--abundances] [-o output_filename] [--check] [--bench] [--verbose]
+
 	 input_filename
 		Must be a FASTA file (.fa/fasta extension) compressed with gzip (.gz) or not:
 		- without duplicate nor invalid kmers
 		- one DNA sequence per line.
 		For example, it could be the de Bruijn graph topology output by BCALM.
-	
+
 	 k
 		K-mer length (must be <= 31).
-	
+
 	 m
 		Minimizer length (must be < k).
-	
+
 	 [-s seed]
 		Seed for construction (default is 1).
-	
+
 	 [-l l]
 		A (integer) constant that controls the space/time trade-off of the dictionary. A reasonable values lies between 2 and 12 (default is 6).
-	
+
 	 [-c c]
 		A (floating point) constant that trades construction speed for space effectiveness of minimal perfect hashing. A reasonable value lies between 3.0 and 10.0 (default is 3.000000).
-	
+
 	 [--canonical-parsing]
 		Canonical parsing of k-mers. This option changes the parsing and results in a trade-off between index space and lookup time.
-	
+
+	 [--abundances]
+		Also store the abundances in compressed format.
+
 	 [-o output_filename]
 		Output file name where the data structure will be serialized.
-	
+
 	 [--check]
 		Check correctness after construction.
-	
+
 	 [--bench]
 		Run benchmark after construction.
-	
+
 	 [--verbose]
 		Verbose output during construction.
-	
+
 	 [-h,--help]
 		Print this help text and silently exits.
 		
@@ -154,6 +161,10 @@ To run a performance benchmark after construction of the index,
 use:
 
 	./bench salmonella_enterica.index
+
+To also store the abundances, use the option `--abundances`:
+
+	./build ../data/unitigs_stitched/with_abundances/salmonella_enterica_k31_ust.abundances.fa.gz 31 13 --abundances --check --verbose
 	
 ### Example 2
 
diff --git a/data/unitigs_stitched/with_abundances/ecoli1_k31_ust.abundances.fa.gz b/data/unitigs_stitched/with_abundances/ecoli1_k31_ust.abundances.fa.gz
new file mode 100644
index 0000000..e5e0b03
Binary files /dev/null and b/data/unitigs_stitched/with_abundances/ecoli1_k31_ust.abundances.fa.gz differ
diff --git a/data/unitigs_stitched/with_abundances/salmonella_enterica_k31_ust.abundances.fa.gz b/data/unitigs_stitched/with_abundances/salmonella_enterica_k31_ust.abundances.fa.gz
new file mode 100644
index 0000000..1199845
Binary files /dev/null and b/data/unitigs_stitched/with_abundances/salmonella_enterica_k31_ust.abundances.fa.gz differ
diff --git a/include/abundances.hpp b/include/abundances.hpp
new file mode 100644
index 0000000..f4a1a58
--- /dev/null
+++ b/include/abundances.hpp
@@ -0,0 +1,242 @@
+#pragma once
+
+#include <vector>
+#include <unordered_map>  // count the distinct abundances
+#include "ef_sequence.hpp"
+
+namespace sshash {
+
+struct abundances {
+    struct builder {
+        builder() : m_most_frequent_abundance(0) {}
+
+        void init(uint64_t most_frequent_abundance) {
+            m_most_frequent_abundance = most_frequent_abundance;
+            m_kmer_id_interval_lengths.push_back(0);
+            m_abundance_interval_lengths.push_back(0);
+        }
+
+        void eat(uint64_t abundance) {
+            assert(abundance > 0);
+            auto it = m_abundances_map.find(abundance);
+            if (it != m_abundances_map.cend()) {  // found
+                (*it).second += 1;
+            } else {
+                m_abundances_map[abundance] = 1;
+            }
+        }
+
+        void push_kmer_id_interval(uint64_t value, uint64_t length) {
+            m_kmer_id_interval_values.push_back(value);
+            m_kmer_id_interval_lengths.push_back(m_kmer_id_interval_lengths.back() + length);
+        }
+
+        void push_abundance_interval(uint64_t value, uint64_t length) {
+            m_abundance_interval_values.push_back(value);
+            m_abundance_interval_lengths.push_back(m_abundance_interval_lengths.back() + length);
+        }
+
+        uint64_t num_kmer_id_intervals() const { return m_kmer_id_interval_values.size(); }
+        uint64_t num_abundance_intervals() const { return m_abundance_interval_values.size(); }
+
+        void finalize(uint64_t num_kmers) {
+            assert(
+                std::is_sorted(m_kmer_id_interval_values.begin(), m_kmer_id_interval_values.end()));
+            assert(std::is_sorted(m_kmer_id_interval_lengths.begin(),
+                                  m_kmer_id_interval_lengths.end()));
+            assert(std::is_sorted(m_abundance_interval_lengths.begin(),
+                                  m_abundance_interval_lengths.end()));
+
+            std::cout << "num_kmer_id_intervals " << num_kmer_id_intervals() << std::endl;
+            std::cout << "num_abundance_intervals " << num_abundance_intervals() << std::endl;
+
+            uint64_t num_distinct_abundances = m_abundances_map.size();
+
+            std::cout << "found " << num_distinct_abundances << " distint abundances (ceil(log2("
+                      << num_distinct_abundances
+                      << ")) = " << std::ceil(std::log2(num_distinct_abundances)) << ")"
+                      << std::endl;
+
+            m_abundances.reserve(num_distinct_abundances);
+            uint64_t n = 0;
+            uint64_t largest_ab = 0;
+            for (auto p : m_abundances_map) {
+                if (p.first > largest_ab) largest_ab = p.first;
+                n += p.second;
+                m_abundances.push_back(p);
+            }
+            assert(largest_ab > 0);
+
+            std::cout << "largest_ab+1 = " << largest_ab + 1 << " (ceil(log2(" << largest_ab + 1
+                      << ")) = " << std::ceil(std::log2(largest_ab + 1)) << ")" << std::endl;
+
+            if (n != num_kmers) {
+                std::cout << "ERROR: expected " << num_kmers << " kmers but got " << n << std::endl;
+                throw std::runtime_error("file is malformed");
+            }
+
+            std::sort(m_abundances.begin(), m_abundances.end(), [](auto const& x, auto const& y) {
+                if (x.second != y.second) return x.second > y.second;
+                return x.first < y.first;
+            });
+
+            /* If this tests fails, then we need to change the value of
+                constants::most_frequent_abundance */
+            if (m_most_frequent_abundance != m_abundances.front().first) {
+                throw std::runtime_error("the most frequent abundance is not " +
+                                         std::to_string(constants::most_frequent_abundance));
+            }
+
+            uint64_t rest = num_kmers - m_abundances.front().second;
+            std::cout << "kmers that do not have the most frequent ab: " << rest << " ("
+                      << (rest * 100.0) / num_kmers << "%)" << std::endl;
+            std::cout << "cumulative_kmer_id_interval_lengths " << m_kmer_id_interval_lengths.back()
+                      << '/' << rest << std::endl;
+            std::cout << "cumulative_abundance_interval_lengths "
+                      << m_abundance_interval_lengths.back() << '/' << rest << std::endl;
+
+            m_abundance_dictionary_builder.resize(num_distinct_abundances,
+                                                  std::ceil(std::log2(largest_ab + 1)));
+            for (uint64_t id = 0; id != num_distinct_abundances; ++id) {
+                uint64_t ab_value = m_abundances[id].first;
+                m_abundance_dictionary_builder.set(id, ab_value);
+                m_abundances_map[ab_value] = id;
+            }
+        }
+
+        void build(abundances& index) {
+            std::swap(index.m_most_frequent_abundance, m_most_frequent_abundance);
+
+            index.m_kmer_id_interval_values.encode(m_kmer_id_interval_values.begin(),
+                                                   m_kmer_id_interval_values.size());
+            index.m_kmer_id_interval_lengths.encode(m_kmer_id_interval_lengths.begin(),
+                                                    m_kmer_id_interval_lengths.size());
+
+            uint64_t num_distinct_abundances = m_abundance_dictionary_builder.size();
+            pthash::compact_vector::builder abundance_interval_values;
+            abundance_interval_values.resize(
+                m_abundance_interval_values.size(),
+                num_distinct_abundances == 1 ? 1 : std::ceil(std::log2(num_distinct_abundances)));
+            for (uint64_t i = 0; i != m_abundance_interval_values.size(); ++i) {
+                uint64_t abundance = m_abundance_interval_values[i];
+                uint64_t id = m_abundances_map[abundance];
+                assert(id < num_distinct_abundances);
+                abundance_interval_values.set(i, id);
+            }
+            abundance_interval_values.build(index.m_abundance_interval_values);
+            index.m_abundance_interval_lengths.encode(m_abundance_interval_lengths.begin(),
+                                                      m_abundance_interval_lengths.size());
+
+            m_abundance_dictionary_builder.build(index.m_abundance_dictionary);
+        }
+
+        // return the average empirical entropy per abundance
+        double print_info(uint64_t num_kmers) {
+            assert(!m_abundances.empty());
+            double expected_ab_value = 0.0;
+            double entropy_ab = 0.0;
+            uint64_t print = 0;
+            for (auto p : m_abundances) {
+                double prob = static_cast<double>(p.second) / num_kmers;
+                expected_ab_value += p.first * prob;
+                entropy_ab += prob * std::log2(1.0 / prob);
+                print += 1;
+                if (print <= 10) {
+                    std::cout << "ab:" << p.first << " freq:" << p.second << " ("
+                              << (p.second * 100.0) / num_kmers << "%)" << std::endl;
+                }
+            }
+            std::cout << "expected_ab_value " << expected_ab_value << std::endl;
+            std::cout << "entropy_ab " << entropy_ab << " [bits/kmer]" << std::endl;
+            return entropy_ab;
+        }
+
+    private:
+        uint64_t m_most_frequent_abundance;
+
+        /* (abundance,frequency) pairs during construction, then (abundance,id) after sorting */
+        std::unordered_map<uint64_t, uint64_t> m_abundances_map;
+        std::vector<std::pair<uint64_t, uint64_t>> m_abundances;  // (abundance,frequency)
+
+        std::vector<uint64_t> m_kmer_id_interval_values;
+        std::vector<uint64_t> m_kmer_id_interval_lengths;
+
+        std::vector<uint64_t> m_abundance_interval_values;
+        std::vector<uint64_t> m_abundance_interval_lengths;
+
+        pthash::compact_vector::builder m_abundance_dictionary_builder;
+    };
+
+    bool empty() const { return m_abundance_dictionary.size() == 0; }
+
+    uint64_t abundance(uint64_t kmer_id) const {
+        bool is_present = false;
+        uint64_t rank = 0;
+
+        auto [pos, val] = m_kmer_id_interval_values.next_geq(kmer_id);
+        if (val == kmer_id) {
+            is_present = true;
+            rank = m_kmer_id_interval_lengths.access(pos);
+        } else {
+            if (pos > 0) {
+                --pos;
+                val = m_kmer_id_interval_values.access(pos);
+                rank = m_kmer_id_interval_lengths.access(pos);
+                uint64_t length = m_kmer_id_interval_lengths.access(pos + 1) - rank;
+                if (kmer_id < val + length) {
+                    is_present = true;
+                    assert(kmer_id >= val);
+                    rank += kmer_id - val;
+                }
+            }
+        }
+
+        if (!is_present) return m_most_frequent_abundance;
+
+        uint64_t i = m_abundance_interval_lengths.prev_leq(rank);
+        uint64_t id = m_abundance_interval_values.access(i);
+        uint64_t abundance = m_abundance_dictionary.access(id);
+
+        return abundance;
+    }
+
+    uint64_t num_bits() const {
+        return sizeof(m_most_frequent_abundance) * 8 + m_kmer_id_interval_values.num_bits() +
+               m_kmer_id_interval_lengths.num_bits() + m_abundance_interval_values.bytes() * 8 +
+               m_abundance_interval_lengths.num_bits() + m_abundance_dictionary.bytes() * 8;
+    }
+
+    template <typename Visitor>
+    void visit(Visitor& visitor) {
+        visitor.visit(m_most_frequent_abundance);
+        visitor.visit(m_kmer_id_interval_values);
+        visitor.visit(m_kmer_id_interval_lengths);
+        visitor.visit(m_abundance_interval_values);
+        visitor.visit(m_abundance_interval_lengths);
+        visitor.visit(m_abundance_dictionary);
+    }
+
+private:
+    uint64_t m_most_frequent_abundance;
+
+    /*****
+       We model abundances as two lists of intervals.
+       Each interval is a pair (value,length).
+       - First list: kmer_ids list. In this case, a pair (value,length)
+       represents all kmer_ids = value, value+1, value+2, ..., value+length-1.
+       - Second list: abundance list. In this case, a pair (value,length)
+       represents that the abundance [value] repeats for [length] times.
+       */
+
+    ef_sequence<true> m_kmer_id_interval_values;
+    ef_sequence<false> m_kmer_id_interval_lengths;
+
+    pthash::compact_vector m_abundance_interval_values;
+    ef_sequence<true> m_abundance_interval_lengths;
+    /***/
+
+    /* Abundance dictionary, listing all distinct abundances sorted by decreasing frequency. */
+    pthash::compact_vector m_abundance_dictionary;
+};
+
+}  // namespace sshash
\ No newline at end of file
diff --git a/include/builder/build.cpp b/include/builder/build.cpp
index fcff19a..fc95df6 100644
--- a/include/builder/build.cpp
+++ b/include/builder/build.cpp
@@ -3,7 +3,6 @@
 #include "../util.hpp"
 
 #include "../dictionary.hpp"
-#include "../buckets.hpp"
 #include "../util.hpp"
 #include "build_util_types.hpp"
 
@@ -16,11 +15,19 @@
 
 namespace sshash {
 
+void expect(char got, char expected) {
+    if (got != expected) {
+        std::cout << "got '" << got << "' but expected '" << expected << "'" << std::endl;
+        throw parse_runtime_error();
+    }
+}
+
 struct parse_data {
     parse_data() : num_kmers(0) {}
     uint64_t num_kmers;
     minimizers_tuples minimizers;
     compact_string_pool strings;
+    abundances::builder abundances_builder;
 };
 
 void parse_file(std::istream& is, parse_data& data, build_configuration const& build_config) {
@@ -81,10 +88,98 @@ void parse_file(std::istream& is, parse_data& data, build_configuration const& b
         return true;
     };
 
+    uint64_t seq_len = 0;
+    data.abundances_builder.init(constants::most_frequent_abundance);
+
+    /* intervals of kmer_ids */
+    uint64_t kmer_id_value = constants::invalid;
+    uint64_t kmer_id_length = 1;
+
+    /* intervals of abundances */
+    uint64_t ab_value = constants::invalid;
+    uint64_t ab_length = 1;
+
+    auto parse_header = [&]() {
+        if (line.empty()) return;
+
+        /*
+            Heder format:
+            >[seq_id] LN:i:[seq_len] ab:Z:[ab_seq]
+            where [ab_seq] is a space-separated sequence of integer counters (the abundances),
+            whose length is equal to [seq_len]-k+1
+        */
+
+        // example header: '>12 LN:i:41 ab:Z:2 2 2 2 2 2 2 2 2 2 2'
+
+        expect(line[0], '>');
+        uint64_t i = 0;
+        i = line.find_first_of(' ', i);
+        if (i == std::string::npos) throw parse_runtime_error();
+
+        i += 1;
+        expect(line[i + 0], 'L');
+        expect(line[i + 1], 'N');
+        expect(line[i + 2], ':');
+        expect(line[i + 3], 'i');
+        expect(line[i + 4], ':');
+        i += 5;
+        uint64_t j = line.find_first_of(' ', i);
+        if (j == std::string::npos) throw parse_runtime_error();
+
+        char* end;
+        seq_len = std::strtoull(line.data() + i, &end, 10);
+        i = j + 1;
+        expect(line[i + 0], 'a');
+        expect(line[i + 1], 'b');
+        expect(line[i + 2], ':');
+        expect(line[i + 3], 'Z');
+        expect(line[i + 4], ':');
+        i += 5;
+
+        kmer_id_value = constants::invalid;
+        kmer_id_length = 1;
+        for (uint64_t j = 0, num_kmers = data.num_kmers; j != seq_len - k + 1; ++j, ++num_kmers) {
+            uint64_t ab = std::strtoull(line.data() + i, &end, 10);
+            i = line.find_first_of(' ', i) + 1;
+
+            data.abundances_builder.eat(ab);
+
+            if (ab != constants::most_frequent_abundance) {
+                if (kmer_id_value == constants::invalid) {
+                    kmer_id_value = num_kmers;
+                    kmer_id_length = 1;
+                } else {
+                    if (num_kmers == kmer_id_value + kmer_id_length) kmer_id_length += 1;
+                }
+
+                if (ab == ab_value) {
+                    ab_length += 1;
+                } else {
+                    if (ab_value != constants::invalid) {
+                        data.abundances_builder.push_abundance_interval(ab_value, ab_length);
+                    }
+                    ab_value = ab;
+                    ab_length = 1;
+                }
+
+            } else {
+                if (kmer_id_value != constants::invalid) {
+                    data.abundances_builder.push_kmer_id_interval(kmer_id_value, kmer_id_length);
+                }
+                kmer_id_value = constants::invalid;
+            }
+        }
+
+        if (kmer_id_value != constants::invalid) {
+            data.abundances_builder.push_kmer_id_interval(kmer_id_value, kmer_id_length);
+        }
+    };
+
     while (!is.eof()) {
-        std::getline(is, line);  // skip header line
-        std::getline(is, line);
+        std::getline(is, line);  // header line
+        if (build_config.store_abundances) parse_header();
 
+        std::getline(is, line);  // DNA sequence
         if (line.size() < k) continue;
 
         if (++num_read_lines % 100000 == 0) {
@@ -98,6 +193,12 @@ void parse_file(std::istream& is, parse_data& data, build_configuration const& b
         prev_minimizer = constants::invalid;
         num_read_bases += line.size();
 
+        if (build_config.store_abundances and seq_len != line.size()) {
+            std::cout << "ERROR: expected a sequence of length " << seq_len
+                      << " but got one of length " << line.size() << std::endl;
+            throw std::runtime_error("file is malformed");
+        }
+
         while (end != line.size() - k + 1) {
             char const* kmer = line.data() + end;
             assert(util::is_valid(kmer, k));
@@ -129,6 +230,11 @@ void parse_file(std::istream& is, parse_data& data, build_configuration const& b
     builder.finalize();
     builder.build(data.strings);
 
+    if (build_config.store_abundances) {
+        data.abundances_builder.push_abundance_interval(ab_value, ab_length);
+        data.abundances_builder.finalize(data.num_kmers);
+    }
+
     std::cout << "read " << num_read_lines << " lines, " << num_read_bases << " bases, "
               << data.num_kmers << " kmers" << std::endl;
     std::cout << "num_kmers " << data.num_kmers << std::endl;
@@ -209,11 +315,6 @@ buckets_statistics build_index(parse_data& data, minimizers const& m_minimizers,
     return buckets_stats;
 }
 
-struct empty_bucket_runtime_error : public std::runtime_error {
-    empty_bucket_runtime_error()
-        : std::runtime_error("try a different choice of l or change seed") {}
-};
-
 void build_skew_index(skew_index& m_skew_index, parse_data& data, buckets const& m_buckets,
                       build_configuration const& build_config,
                       buckets_statistics const& buckets_stats) {
@@ -449,6 +550,24 @@ void dictionary::build(std::string const& filename, build_configuration const& b
     timer.reset();
     /******/
 
+    if (build_config.store_abundances) {
+        /* step 1.1: compress abundances ***/
+        timer.start();
+        data.abundances_builder.build(m_abundances);
+        timer.stop();
+        timings.push_back(timer.elapsed());
+        print_time(timings.back(), data.num_kmers, "step 1.1.: 'build_abundances'");
+        timer.reset();
+        /******/
+        if (build_config.verbose) {
+            double entropy_ab = data.abundances_builder.print_info(data.num_kmers);
+            double avg_bits_per_ab = static_cast<double>(m_abundances.num_bits()) / data.num_kmers;
+            std::cout << "abundances: " << avg_bits_per_ab << " [bits/kmer]" << std::endl;
+            std::cout << "  (" << entropy_ab / avg_bits_per_ab
+                      << "x smaller than the empirical entropy)" << std::endl;
+        }
+    }
+
     /* step 2: sort minimizers and build MPHF ***/
     timer.start();
     data.minimizers.sort();
diff --git a/include/builder/build_util_types.hpp b/include/builder/build_util_types.hpp
index d73732b..03370dc 100644
--- a/include/builder/build_util_types.hpp
+++ b/include/builder/build_util_types.hpp
@@ -2,6 +2,16 @@
 
 namespace sshash {
 
+struct empty_bucket_runtime_error : public std::runtime_error {
+    empty_bucket_runtime_error()
+        : std::runtime_error("try a different choice of l or change seed") {}
+};
+
+struct parse_runtime_error : public std::runtime_error {
+    parse_runtime_error()
+        : std::runtime_error("did you provide an input file with abundance counts?") {}
+};
+
 struct compact_string_pool {
     compact_string_pool() {}
 
diff --git a/include/dictionary.hpp b/include/dictionary.hpp
index 4c1743b..41b478d 100644
--- a/include/dictionary.hpp
+++ b/include/dictionary.hpp
@@ -4,6 +4,7 @@
 #include "minimizers.hpp"
 #include "buckets.hpp"
 #include "skew_index.hpp"
+#include "abundances.hpp"
 
 namespace sshash {
 
@@ -17,6 +18,7 @@ struct dictionary {
     uint64_t k() const { return m_k; }
     uint64_t m() const { return m_m; }
     bool canonicalized() const { return m_canonical_parsing; }
+    bool weighted() const { return !m_abundances.empty(); }
 
     uint64_t lookup_uint64(uint64_t uint64_kmer) const {
         uint64_t minimizer = util::compute_minimizer(uint64_kmer, m_k, m_m, m_seed);
@@ -87,6 +89,11 @@ struct dictionary {
         m_buckets.access(kmer_id, string_kmer, m_k);
     }
 
+    uint64_t abundance(uint64_t kmer_id) const {
+        assert(kmer_id < size());
+        return m_abundances.abundance(kmer_id);
+    }
+
     bool is_member(char const* string_kmer, bool check_reverse_complement_too = true) const {
         return lookup(string_kmer, check_reverse_complement_too) != constants::invalid;
     }
@@ -128,7 +135,8 @@ struct dictionary {
     uint64_t num_bits() const {
         return 8 * (sizeof(m_size) + sizeof(m_seed) + sizeof(m_k) + sizeof(m_m) +
                     sizeof(m_canonical_parsing)) +
-               m_minimizers.num_bits() + m_buckets.num_bits() + m_skew_index.num_bits();
+               m_minimizers.num_bits() + m_buckets.num_bits() + m_skew_index.num_bits() +
+               m_abundances.num_bits();
     }
 
     void print_info() const;
@@ -144,6 +152,7 @@ struct dictionary {
         visitor.visit(m_minimizers);
         visitor.visit(m_buckets);
         visitor.visit(m_skew_index);
+        visitor.visit(m_abundances);
     }
 
 private:
@@ -155,6 +164,7 @@ struct dictionary {
     minimizers m_minimizers;
     buckets m_buckets;
     skew_index m_skew_index;
+    abundances m_abundances;
 };
 
 }  // namespace sshash
\ No newline at end of file
diff --git a/include/ef_sequence.hpp b/include/ef_sequence.hpp
index 86c71e0..6b27a2c 100644
--- a/include/ef_sequence.hpp
+++ b/include/ef_sequence.hpp
@@ -150,9 +150,7 @@ struct ef_sequence {
     }
 
     inline bool contains(uint64_t x) const { return next_geq(x).second == x; }
-
     inline uint64_t back() const { return m_universe; }
-
     inline uint64_t size() const { return m_low_bits.size(); }
 
     uint64_t num_bits() const {
diff --git a/include/print_info.cpp b/include/print_info.cpp
index 8208f0a..44769fd 100644
--- a/include/print_info.cpp
+++ b/include/print_info.cpp
@@ -41,6 +41,8 @@ void dictionary::print_space_breakdown() const {
               << " [bits/kmer]\n";
     std::cout << "  skew_index: " << static_cast<double>(m_skew_index.num_bits()) / size()
               << " [bits/kmer]\n";
+    std::cout << "  abundances: " << static_cast<double>(m_abundances.num_bits()) / size()
+              << " [bits/kmer]\n";
     std::cout << "  --------------\n";
     std::cout << "  total: " << static_cast<double>(num_bits()) / size() << " [bits/kmer]"
               << std::endl;
@@ -52,7 +54,8 @@ void dictionary::print_info() const {
     std::cout << "k = " << k() << '\n';
     std::cout << "num_minimizers = " << m_minimizers.size() << std::endl;
     std::cout << "m = " << m() << '\n';
-    std::cout << "canonical_parsing = " << (canonicalized() ? "true" : "false") << '\n';
+    std::cout << "canonicalized = " << (canonicalized() ? "true" : "false") << '\n';
+    std::cout << "weighted = " << (weighted() ? "true" : "false") << '\n';
 
     std::cout << "num_strings = " << m_buckets.offsets.size() << '\n';
     std::cout << "num_pieces = " << m_buckets.pieces.size() << " (+"
diff --git a/include/util.hpp b/include/util.hpp
index 088fbb2..c33172c 100644
--- a/include/util.hpp
+++ b/include/util.hpp
@@ -17,6 +17,7 @@ constexpr uint64_t hashcode_bits = 64;
 constexpr double c = 3.0;  // for PThash
 constexpr uint64_t min_l = 6;
 constexpr uint64_t max_l = 12;
+constexpr uint64_t most_frequent_abundance = 1;  // usual value
 }  // namespace constants
 
 typedef pthash::murmurhash2_64 base_hasher_type;
@@ -65,6 +66,7 @@ struct build_configuration {
         , c(constants::c)
 
         , canonical_parsing(false)
+        , store_abundances(false)
         , verbose(true) {}
 
     uint64_t k;  // kmer size
@@ -75,13 +77,14 @@ struct build_configuration {
     double c;    // drive PTHash trade-off
 
     bool canonical_parsing;
+    bool store_abundances;
     bool verbose;
 
     void print() const {
         std::cout << "k = " << k << ", m = " << m << ", seed = " << seed << ", l = " << l
                   << ", c = " << c
                   << ", canonical_parsing = " << (canonical_parsing ? "true" : "false")
-                  << std::endl;
+                  << ", store_abundances = " << (store_abundances ? "true" : "false") << std::endl;
     }
 };
 
diff --git a/src/build.cpp b/src/build.cpp
index b8acc64..d7b7be5 100644
--- a/src/build.cpp
+++ b/src/build.cpp
@@ -39,6 +39,8 @@ int main(int argc, char** argv) {
                "Canonical parsing of k-mers. This option changes the parsing and results in a "
                "trade-off between index space and lookup time.",
                "--canonical-parsing", true);
+    parser.add("store_abundances", "Also store the abundances in compressed format.",
+               "--abundances", true);
     parser.add("output_filename", "Output file name where the data structure will be serialized.",
                "-o", false);
     parser.add("check", "Check correctness after construction.", "--check", true);
@@ -61,6 +63,7 @@ int main(int argc, char** argv) {
     if (parser.parsed("l")) build_config.l = parser.get<double>("l");
     if (parser.parsed("c")) build_config.c = parser.get<double>("c");
     build_config.canonical_parsing = parser.get<bool>("canonical_parsing");
+    build_config.store_abundances = parser.get<bool>("store_abundances");
     build_config.verbose = parser.get<bool>("verbose");
     build_config.print();
 
@@ -70,6 +73,7 @@ int main(int argc, char** argv) {
     bool check = parser.get<bool>("check");
     if (check) {
         check_correctness_lookup_access(dict, input_filename);
+        if (build_config.store_abundances) check_correctness_abundances(dict, input_filename);
         check_correctness_iterator(dict);
     }
     bool bench = parser.get<bool>("bench");
diff --git a/src/check_utils.hpp b/src/check_utils.hpp
index 222007a..e99f688 100644
--- a/src/check_utils.hpp
+++ b/src/check_utils.hpp
@@ -16,8 +16,6 @@ bool check_correctness_lookup_access(std::istream& is, dictionary const& dict) {
     std::string expected_kmer_str(k, 0);
 
     std::cout << "checking correctness of access and positive lookup..." << std::endl;
-    pthash::bit_vector_builder got(n, 0);
-    __uint128_t sum = 0;
 
     while (appendline(is, line)) {
         if (line.size() == pos || line[pos] == '>' || line[pos] == ';') {
@@ -38,22 +36,17 @@ bool check_correctness_lookup_access(std::istream& is, dictionary const& dict) {
             }
             util::uint64_to_string_no_reverse(uint64_kmer, expected_kmer_str.data(), k);
             uint64_t id = dict.lookup(expected_kmer_str.c_str());
+
+            /*
+                Since we assume that we stream through the file from which the index was built,
+                ids are assigned sequentially to kmers, so it must be id == num_kmers.
+            */
+            if (id != num_kmers) std::cout << "wrong id assigned" << std::endl;
+
             if (id == constants::invalid) {
                 std::cout << "kmer '" << expected_kmer_str << "' not found!" << std::endl;
             }
             assert(id != constants::invalid);
-            if (id >= n) {
-                std::cout << "ERROR: id out of range " << id << "/" << n << std::endl;
-                return false;
-            }
-
-            if (got.get(id) == true) {
-                std::cout << "id " << id << " was already assigned!" << std::endl;
-                return false;
-            }
-
-            got.set(id);
-            sum += id;
 
             // check access
             dict.access(id, got_kmer_str.data());
@@ -76,16 +69,6 @@ bool check_correctness_lookup_access(std::istream& is, dictionary const& dict) {
     }
     std::cout << "checked " << num_kmers << " kmers" << std::endl;
 
-    if (n != num_kmers) {
-        std::cout << "expected " << n << " kmers but checked " << num_kmers << std::endl;
-        return false;
-    }
-
-    if (sum != (n * (n - 1)) / 2) {
-        std::cout << "ERROR: index contains duplicates" << std::endl;
-        return false;
-    }
-
     std::cout << "EVERYTHING OK!" << std::endl;
 
     std::cout << "checking correctness of negative lookup with random kmers..." << std::endl;
@@ -106,6 +89,57 @@ bool check_correctness_lookup_access(std::istream& is, dictionary const& dict) {
     return true;
 }
 
+bool check_correctness_abundances(std::istream& is, dictionary const& dict) {
+    uint64_t k = dict.k();
+    std::string line;
+    uint64_t kmer_id = 0;
+
+    if (!dict.weighted()) {
+        std::cerr << "ERROR: the dictionary does not store any abundance" << std::endl;
+        return false;
+    }
+
+    std::cout << "checking correctness of abundances..." << std::endl;
+
+    while (!is.eof()) {
+        std::getline(is, line);  // header line
+        if (line.empty()) break;
+
+        uint64_t i = 0;
+        i = line.find_first_of(' ', i);
+        assert(i != std::string::npos);
+
+        i += 1;
+        i += 5;  // skip "LN:i:"
+        uint64_t j = line.find_first_of(' ', i);
+        assert(j != std::string::npos);
+
+        char* end;
+        uint64_t seq_len = std::strtoull(line.data() + i, &end, 10);
+        i = j + 1;
+        i += 5;  // skip "ab:Z:"
+
+        for (uint64_t j = 0; j != seq_len - k + 1; ++j, ++kmer_id) {
+            uint64_t expected_ab = std::strtoull(line.data() + i, &end, 10);
+            i = line.find_first_of(' ', i) + 1;
+            uint64_t got_ab = dict.abundance(kmer_id);
+            if (expected_ab != got_ab) {
+                std::cout << "ERROR for kmer_id " << kmer_id << ": expected_ab " << expected_ab
+                          << " but got_ab " << got_ab << std::endl;
+            }
+            if (kmer_id != 0 and kmer_id % 5000000 == 0) {
+                std::cout << "checked " << kmer_id << " abundances" << std::endl;
+            }
+        }
+
+        std::getline(is, line);  // skip DNA sequence
+    }
+
+    std::cout << "checked " << kmer_id << " abundances" << std::endl;
+    std::cout << "EVERYTHING OK!" << std::endl;
+    return true;
+}
+
 /*
    The input file must be the one the index was built from.
    Throughout the code, we assume the input does not contain any duplicate.
@@ -124,6 +158,23 @@ bool check_correctness_lookup_access(dictionary const& dict, std::string const&
     return good;
 }
 
+/*
+   The input file must be the one the index was built from.
+*/
+bool check_correctness_abundances(dictionary const& dict, std::string const& filename) {
+    std::ifstream is(filename.c_str());
+    if (!is.good()) throw std::runtime_error("error in opening the file '" + filename + "'");
+    bool good = true;
+    if (util::ends_with(filename, ".gz")) {
+        zip_istream zis(is);
+        good = check_correctness_abundances(zis, dict);
+    } else {
+        good = check_correctness_abundances(is, dict);
+    }
+    is.close();
+    return good;
+}
+
 bool check_dictionary(dictionary const& dict) {
     uint64_t k = dict.k();
     uint64_t n = dict.size();