From 9ff3ce62b2a4ba2f61c11d040fab74c89fbde7a9 Mon Sep 17 00:00:00 2001
From: jermp <jeis90@gmail.com>
Date: Tue, 1 Mar 2022 16:43:31 +0100
Subject: [PATCH] README updated

---
 README.md | 40 ++++++++++++++++++++++++----------------
 1 file changed, 24 insertions(+), 16 deletions(-)

diff --git a/README.md b/README.md
index bdba58f..b6a7092 100644
--- a/README.md
+++ b/README.md
@@ -15,6 +15,10 @@ two basic queries are supported:
 - i = Lookup(g), where i is in [0,n) if the k-mer g is found in the dictionary or i = -1 otherwise;
 - g = Access(i), where g is the k-mer associated to the identifier i.
 
+If also the abundances of the k-mers (their frequency counts) are stored in the dictionary, then the dictionary is said to be *weighted* and it also supports:
+
+- c = Abundance(i), where i is a given k-mer identifier.
+
 A membership query (determine if a given k-mer is present in the dictionary or not) is, therefore, supported by means of the lookup query.
 The dictionary can also stream through all k-mers of a given DNA file
 (.fasta or .fastq formats) to determine their membership to the dictionary.
@@ -91,47 +95,47 @@ where the code was compiled (see the section [Compiling the Code](#compiling-the
 
 to show the usage of the driver program (reported below for convenience).
 
-	Usage: ./build [-h,--help] input_filename k m [-s seed] [-n max_num_kmers] [-l l] [-c c] [--canonical-parsing] [-o output_filename] [--check] [--bench] [--verbose]
-	
+	Usage: ./build [-h,--help] input_filename k m [-s seed] [-l l] [-c c] [--canonical-parsing] [--abundances] [-o output_filename] [--check] [--bench] [--verbose]
+
 	 input_filename
 		Must be a FASTA file (.fa/fasta extension) compressed with gzip (.gz) or not:
 		- without duplicate nor invalid kmers
 		- one DNA sequence per line.
 		For example, it could be the de Bruijn graph topology output by BCALM.
-	
+
 	 k
 		K-mer length (must be <= 31).
-	
+
 	 m
 		Minimizer length (must be < k).
-	
+
 	 [-s seed]
 		Seed for construction (default is 1).
-	
-	 [-n max_num_kmers]
-		Build the dictionary from at most this number of k-mers.
-	
+
 	 [-l l]
 		A (integer) constant that controls the space/time trade-off of the dictionary. A reasonable values lies between 2 and 12 (default is 6).
-	
+
 	 [-c c]
 		A (floating point) constant that trades construction speed for space effectiveness of minimal perfect hashing. A reasonable value lies between 3.0 and 10.0 (default is 3.000000).
-	
+
 	 [--canonical-parsing]
 		Canonical parsing of k-mers. This option changes the parsing and results in a trade-off between index space and lookup time.
-	
+
+	 [--abundances]
+		Also store the abundances in compressed format.
+
 	 [-o output_filename]
 		Output file name where the data structure will be serialized.
-	
+
 	 [--check]
 		Check correctness after construction.
-	
+
 	 [--bench]
 		Run benchmark after construction.
-	
+
 	 [--verbose]
 		Verbose output during construction.
-	
+
 	 [-h,--help]
 		Print this help text and silently exits.
 		
@@ -157,6 +161,10 @@ To run a performance benchmark after construction of the index,
 use:
 
 	./bench salmonella_enterica.index
+
+To also store the abundances, use the option `--abundances`:
+
+	./build ../data/unitigs_stitched/with_abundances/salmonella_enterica_k31_ust.abundances.fa.gz 31 13 --abundances --check --verbose
 	
 ### Example 2