Various changes

* cleaned up examples directory * Removed unnecessary *.cmake files
hmusta · Apr 5, 2013 · 63cb47c · 63cb47c
1 parent d254245
commit 63cb47c
Show file tree

Hide file tree

Showing 42 changed files with 521 additions and 674 deletions.
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -34,7 +34,6 @@ endif()
 add_subdirectory(external)
 add_subdirectory(include)
 add_subdirectory(lib)
-add_subdirectory(test)
-add_subdirectory(examples)
-add_subdirectory(benchmark)	
 
+configure_file("${CMAKE_CURRENT_SOURCE_DIR}/Make.helper.cmake" 
+		       "${CMAKE_CURRENT_SOURCE_DIR}/Make.helper" @ONLY)	
diff --git a/Make.helper.cmake b/Make.helper.cmake
@@ -0,0 +1,2 @@
+LIB_DIR = @CMAKE_INSTALL_PREFIX@/lib
+INC_DIR = @CMAKE_INSTALL_PREFIX@/include
diff --git a/README.md b/README.md
@@ -73,7 +73,7 @@ Details are in our [comprehensive experimental study][SPE].
       [wt_rlg](./include/sdsl/wt_rlg.hpp),
       [wt_rlg8](./include/sdsl/wt_rlg8.hpp))
  * Compressed Suffix Arrays (CSA) (all immutable)
-   * [csa_bitcompressed][./include/sdsl/csa_bitcompressed.hpp] is based on the bitcompressed SA and inverse SA.
+   * [csa_bitcompressed](./include/sdsl/csa_bitcompressed.hpp) is based on the bitcompressed SA and inverse SA.
    * [csa_wt](./include/sdsl/csa_wt.hpp) is based on a WT of the BWT.
    * [csa_sada](./include/sdsl/csa_sada.hpp) is based on the compressed
       ![\Psi](http://latex.codecogs.com/gif.latex?%5CPsi)-function
@@ -226,9 +226,13 @@ how esay it is to use succinct data structures.
 
 ## Construction of Suffix Arrays
 
-The current version includes Yuta Mori's incredible fast suffix array
-construction library [libdivsufsort](http://code.google.com/p/libdivsufsort/)
-version 2.0.1.
+We have included the code of two excellent suffix array
+construction algorithms.
+
+* Yuta Mori's incredible fast suffix [libdivsufsort][DIVSUF]
+  algorithm (version 2.0.1) for byte-alphabets.
+* An adapted version of Jesper Larsson's implementation of the
+  algorithm of [Larson and Sadakane][LS] for integer-alphabets.
 
 
 ## Contributors
@@ -253,3 +257,5 @@ Bug reports:
 [gcc]: http://gcc.gnu.org/ "GNU Compiler Collection"
 [DBLPCSTRES]: http://people.eng.unimelb.edu.au/sgog/sdsl_explore/dblp.xml.100MB_cst_sada_wt_rlmn_lcp_tree2.html "CST visualization"
 [SPE]: http://people.eng.unimelb.edu.au/sgog/optimized.pdf "Preprint SP&amp;E article"
+[DIVSUF]: http://code.google.com/p/libdivsufsort/ "libdivsufsort"
+[LS]: http://www.sciencedirect.com/science/article/pii/S0304397507005257 "Larson &amp; Sadakane Algorithm"
diff --git a/benchmark/CMakeLists.txt b/benchmark/CMakeLists.txt
diff --git a/benchmark/Make.helper.cmake b/benchmark/Make.helper.cmake
diff --git a/benchmark/indexing_count/Makefile b/benchmark/indexing_count/Makefile
@@ -26,18 +26,22 @@ INFO_FILES  = $(foreach IDX_ID,$(IDX_IDS),\
 TIME_FILES  = $(foreach IDX_ID,$(IDX_IDS),\
 		         $(foreach TC_ID,$(TC_IDS),\
 					 $(foreach COMPILE_ID,$(COMPILE_IDS),results/$(TC_ID).$(IDX_ID).$(COMPILE_ID))))
-
-all: $(BUILD_EXECS) $(QUERY_EXECS) info pattern
+COMP_FILES  = $(addsuffix .z.info,$(TC_PATHS))
+
+all: $(BUILD_EXECS) $(QUERY_EXECS) pattern
 
 info: $(INFO_EXECS) $(INFO_FILES) 
+	cd ../../examples; make json2html.x
 
 indexes: $(INDEXES)
 
 input: $(TC_PATHS)
 
 pattern: input $(PATTERNS) $(BIN_DIR)/genpatterns
 
-timing: input $(INDEXES) pattern $(TIME_FILES)
+compression: input $(COMP_FILES)
+
+timing: input $(INDEXES) pattern $(TIME_FILES) compression info
 	@cat $(TIME_FILES) > $(RESULT_FILE)
 	@cd visualize; make
 
@@ -70,6 +74,7 @@ info/%.json: $(INDEXES)
 	$(eval IDX_ID:=$(call dim,2,$*)) 
 	@echo "Generating info for $(IDX_ID) on $(TC_ID)"
 	@$(BIN_DIR)/info_$(IDX_ID) indexes/$(TC_ID).$(IDX_ID) > $@ 
+	@../../examples/json2html.x $@ > $@.html
 
 $(PAT_DIR)/%.pattern: $(BIN_DIR)/genpatterns
 	@echo $*
@@ -113,16 +118,16 @@ $(BIN_DIR)/info_%: $(SRC_DIR)/info.cpp index.config
 include ../Make.download
 
 clean:
-	@echo "Remove executables"
+	@echo "Remove executables and indexes"
 	@rm -f $(QUERY_EXECS) $(LOCATE_EXECS) $(BUILD_EXECS) $(INFO_EXECS) \
-		   $(BIN_DIR)/genpatterns
+		   $(INFO_FILES) $(INDEXES) $(BIN_DIR)/genpatterns
 
 cleanresults: 
 	@echo "Remove result files"
-	@rm -f $(TIME_FILES) $(RESULT_FILE)
+	@rm -f $(TIME_FILES) $(RESULT_FILE) $(INFO_FILES)
+	@rm -f $(PATTERNS)
 
 cleanall: clean cleanresults
 	@echo "Remove all generated files."
-	@rm -f $(INDEXES) $(INFO_FILES) $(PATTERNS)
 	@rm -f $(TMP_DIR)/* 
 	@rm -f $(PAT_DIR)/*
diff --git a/benchmark/indexing_count/src/info.cpp b/benchmark/indexing_count/src/info.cpp
@@ -12,11 +12,11 @@ using namespace std;
 int main(int argc, char* argv[])
 {
     char* filename;
-    if (argc < 2)	{
+    if (argc < 2)  {
         cout << "./" << argv[0] << " index_file " << endl;
         return 1;
     }
     CSA_TYPE csa;
-    load_from_file(csa, string(argv[1]) + "." + string(SUF));
+    load_from_file(csa, argv[1]);
     write_structure<JSON_FORMAT>(csa, cout);
 }
diff --git a/benchmark/indexing_count/test_case.config b/benchmark/indexing_count/test_case.config
@@ -8,3 +8,9 @@ DBLPXML;../data/dblp.xml.200MB;dblp.xml.200MB;http://pizzachili.di.unipi.it/text
 DNA;../data/dna.200MB;dna.200MB;http://pizzachili.di.unipi.it/texts/dna/dna.200MB.gz
 PROTEINS;../data/proteins.200MB;proteins.200MB;http://pizzachili.di.unipi.it/texts/protein/proteins.200MB.gz
 SOURCES;../data/sources.200MB;sources.200MB;http://pizzachili.di.unipi.it/texts/code/sources.200MB.gz
+#INFLUENZA;../data/influenza;influenza;http://pizzachili.dcc.uchile.cl/repcorpus/real/influenza.gz
+#EINSTEIN-de;../data/einstein.de.txt;einstein-de;http://pizzachili.dcc.uchile.cl/repcorpus/real/einstein.de.txt.gz
+#EINSTEIN-en;../data/einstein.en.txt;einstein-en;http://pizzachili.dcc.uchile.cl/repcorpus/real/einstein.en.txt.gz
+#PARA;../data/para;para;http://pizzachili.dcc.uchile.cl/repcorpus/real/para.gz
+#WORLDLEADER;../data/world_leaders;world-leaders;http://pizzachili.dcc.uchile.cl/repcorpus/real/world_leaders.gz
+#E-COLI;../data/Escherichia_Coli;E.coli;http://pizzachili.dcc.uchile.cl/repcorpus/real/Escherichia_Coli.gz
diff --git a/benchmark/indexing_count/visualize/count.R b/benchmark/indexing_count/visualize/count.R
@@ -1,4 +1,5 @@
 library(xtable) # if not installed call install.packages("xtable")
+library(plyr)
 
 source("../../basic_functions.R")
 
@@ -7,13 +8,17 @@ idx_config <- readConfig("../index.config",c("IDX_ID","SDSL_TYPE","LATEX-NAME"))
 tc_config <- readConfig("../test_case.config",c("TC_ID","PATH","LATEX-NAME","URL"))
 compile_config <- readConfig("../compile_options.config",c("COMPILE_ID","OPTIONS"))
 
+# Create data frame which maps test cases names to their index in the list
+tc_ord <- data.frame("ord"=seq(1,nrow(tc_config)),"LATEX-NAME")
+rownames(tc_ord) <- tc_config[["TC_ID"]]
 
 # Load report information
 
 config <- readConfig("index-filter.config",c("IDX_ID"))
 
 # Load data
 raw <- data_frame_from_key_value_pairs( "../results/all.txt" )
+#
 # Filer indexes
 raw               <- raw[raw[["IDX_ID"]]%in%config[["IDX_ID"]],] 
 raw[["IDX_ID"]] <- factor(raw[["IDX_ID"]])
@@ -29,23 +34,26 @@ raw <- raw[order(raw[["TC_ID"]]),]
 
 data <- split(raw, raw[["COMPILE_ID"]])
 
+
+
 form_table <- function(d, order=NA){
+# calculate the mean time per IDX_ID,TC_ID
     d <- aggregate(d[c('Time','Space')], 
                    by=list(IDX_ID=d[['IDX_ID']],
-                           TC_ID=d[['TC_ID']],
-                           COMPILE_ID=d[['COMPILE_ID']]),
+                           TC_ID=d[['TC_ID']]),
                    FUN=mean,na.rm=TRUE)
-    dByProgram <- split(d, d[["IDX_ID"]])
-    table <- data.frame(dByProgram[[1]]["TC_ID"])
+	d <- d[ order(tc_ord[as.character(d[["TC_ID"]]),"ord"]), ]
+    dd <- split(d, d[["IDX_ID"]])
+    table <- data.frame(dd[[1]]["TC_ID"])
     names(table) <- c("TC_ID")
     table[["TC_ID"]] <- paste("\\textsc{", tc_config[table[['TC_ID']], "LATEX-NAME"],"}") 
     names(table) <- c(" ")
-    prog_name <- names(dByProgram)
+    prog_name <- names(dd)
     if( !is.na(order) ){
         prog_name <- order
     }
     for( prog in prog_name ){
-        sel <- dByProgram[[prog]]
+        sel <- dd[[prog]]
         table <- cbind(table, " "=rep("", length(sel["Time"])))
         table <- cbind(table, round(sel["Time"],3))
         table <- cbind(table, sel["Space"]*100)

diff --git a/benchmark/indexing_count/visualize/tbl-index-info.tex b/benchmark/indexing_count/visualize/tbl-index-info.tex
diff --git a/benchmark/indexing_extract/Makefile b/benchmark/indexing_extract/Makefile
@@ -32,7 +32,7 @@ TIME_FILES  = $(foreach IDX_ID,$(IDX_IDS),\
 				  $(foreach SAMPLE_ID,$(SAMPLE_IDS),results/$(TC_ID).$(IDX_ID).$(SAMPLE_ID))))
 COMP_FILES  = $(addsuffix .z.info,$(TC_PATHS))
 
-all: $(BUILD_EXECS) $(QUERY_EXECS) info intervals
+all: $(BUILD_EXECS) $(QUERY_EXECS) intervals
 
 info: $(INFO_EXECS) $(INFO_FILES) 
 
@@ -44,7 +44,7 @@ input: $(TC_PATHS)
 
 compression: input $(COMP_FILES)
 
-timing: input $(INDEXES) intervals $(TIME_FILES) compression
+timing: input $(INDEXES) intervals $(TIME_FILES) compression info
 	@cat $(TIME_FILES) > $(RESULT_FILE)
 	@cd visualize; make
 
@@ -153,4 +153,4 @@ cleanall: clean cleanresults
 	@echo "Remove all generated files."
 	@rm -f $(INDEXES) $(INFO_FILES) $(PATTERNS)
 	@rm -f $(TMP_DIR)/* 
-	@rm -f $(IVL_DIR)/*
+	@rm -f $(INTERVALS)
diff --git a/benchmark/indexing_locate/Makefile b/benchmark/indexing_locate/Makefile
@@ -32,7 +32,7 @@ TIME_FILES  = $(foreach IDX_ID,$(IDX_IDS),\
 				  $(foreach SAMPLE_ID,$(SAMPLE_IDS),results/$(TC_ID).$(IDX_ID).$(SAMPLE_ID))))
 COMP_FILES  = $(addsuffix .z.info,$(TC_PATHS))
 
-all: $(BUILD_EXECS) $(QUERY_EXECS) info pattern
+all: $(BUILD_EXECS) $(QUERY_EXECS) pattern
 
 info: $(INFO_EXECS) $(INFO_FILES) 
 
@@ -44,7 +44,7 @@ pattern: input $(PATTERNS) $(BIN_DIR)/pattern_random
 
 compression: input $(COMP_FILES)
 
-timing: input $(INDEXES) pattern $(TIME_FILES) compression
+timing: input $(INDEXES) pattern $(TIME_FILES) compression info
 	@cat $(TIME_FILES) > $(RESULT_FILE)
 	@cd visualize; make
 

diff --git a/benchmark/indexing_locate/src/info.cpp b/benchmark/indexing_locate/src/info.cpp
@@ -12,7 +12,7 @@ using namespace std;
 int main(int argc, char* argv[])
 {
     char* filename;
-    if (argc < 2)	{
+    if (argc < 2)  {
         cout << "./" << argv[0] << " index_file " << endl;
         return 1;
     }

diff --git a/examples/64bit_array2int_vector.cpp b/examples/64bit_array2int_vector.cpp
@@ -14,6 +14,10 @@ using namespace std;
 
 int main(int argc, char* argv[])
 {
+    if (argc < 2) {
+        cout << "Usage: " << argv[0] << " int_file" << endl;
+        return 1;
+    }
     size_t x = util::file_size(argv[1]);
     const int BPI=8;
     cout<<"file size in bytes = "<<x<<endl;

diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt
diff --git a/examples/bit_vector.cpp b/examples/bit_vector.cpp
@@ -1,11 +1,10 @@
-#include <sdsl/int_vector.hpp>
-#include <sdsl/suffix_trees.hpp>
+#include <sdsl/bit_vectors.hpp>
 #include <iostream>
 
 using namespace std;
 using namespace sdsl;
 
-int main(int argc, char* argv[])
+int main()
 {
 
     bit_vector b(10000000, 0);
@@ -44,16 +43,4 @@ int main(int argc, char* argv[])
 
     write_structure<JSON_FORMAT>(rrrb, cout);
     cout<<endl;
-
-    typedef cst_sada<> tCst;
-    tCst cst;
-
-    construct(cst, argv[1], 1);
-
-    for (tCst::const_iterator it = cst.begin(); it!=cst.end(); ++it) {
-        if (it.visit() == 1) {
-            cout << cst.depth(*it) << "-["<< cst.lb(*it)<<","<<cst.rb(*it)<<"]" << endl;
-
-        }
-    }
 }
diff --git a/examples/csa_alphabet_strategy.cpp b/examples/csa_alphabet_strategy.cpp
@@ -1,34 +1,37 @@
 /* This example shows how the representation of the alphabet dependent
  * part of a CST can be altered by using policy classes.
- *
- * Author: Simon Gog
  */
-
 #include <sdsl/suffix_arrays.hpp>
 #include <iostream>
 #include <string>
 
 using namespace sdsl;
 using namespace std;
 
-template<class Csa>
-void csa_info(Csa& csa, const char* file, bool json)
+template<class csa_t>
+void csa_info(csa_t& csa, const char* file, bool json)
 {
-    cout << "file: " << file << endl;
+    cout << "file          : " << file << endl;
     construct(csa, file, 1);
-    cout << "csa of type " << util::demangle(typeid(csa).name()) << endl;
+    cout << "csa of type   : " << util::demangle(typeid(csa).name()) << endl;
     cout << "size in bytes : " << size_in_bytes(csa) << endl;
     if (json) {
+        cout << "---------------" << endl;
         cout << "json output: " << endl;
         write_structure<JSON_FORMAT>(csa, cout);
         cout << endl;
     }
+    cout << "---------------" << endl;
 }
 
 int main(int argc, char* argv[])
 {
     if (argc < 2) {
-        cout << "Usage: " << argv[0] << " file [json]" << endl;
+        cout << "Usage: " << argv[0] << " file [JSON]" << endl;
+        cout << " (1) Constructs CSAs 2 csa_sada and 2 csa_wt, with" << endl;
+        cout << "     alphabet strategies." << endl;
+        cout << " (2) Outputs type and size. If JSON is specified," << endl;
+        cout << "     also the structure in JSON-format." << endl;
         return 1;
     }
     bool json = false;
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		LIB_DIR = @CMAKE_INSTALL_PREFIX@/lib
		INC_DIR = @CMAKE_INSTALL_PREFIX@/include