Skip to content

Commit

Permalink
Various changes
Browse files Browse the repository at this point in the history
* cleaned up examples directory
* Removed unnecessary *.cmake files
  • Loading branch information
simongog committed Apr 5, 2013
1 parent d254245 commit 63cb47c
Show file tree
Hide file tree
Showing 42 changed files with 521 additions and 674 deletions.
5 changes: 2 additions & 3 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,6 @@ endif()
add_subdirectory(external)
add_subdirectory(include)
add_subdirectory(lib)
add_subdirectory(test)
add_subdirectory(examples)
add_subdirectory(benchmark)

configure_file("${CMAKE_CURRENT_SOURCE_DIR}/Make.helper.cmake"
"${CMAKE_CURRENT_SOURCE_DIR}/Make.helper" @ONLY)
2 changes: 2 additions & 0 deletions Make.helper.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
LIB_DIR = @CMAKE_INSTALL_PREFIX@/lib
INC_DIR = @CMAKE_INSTALL_PREFIX@/include
14 changes: 10 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ Details are in our [comprehensive experimental study][SPE].
[wt_rlg](./include/sdsl/wt_rlg.hpp),
[wt_rlg8](./include/sdsl/wt_rlg8.hpp))
* Compressed Suffix Arrays (CSA) (all immutable)
* [csa_bitcompressed][./include/sdsl/csa_bitcompressed.hpp] is based on the bitcompressed SA and inverse SA.
* [csa_bitcompressed](./include/sdsl/csa_bitcompressed.hpp) is based on the bitcompressed SA and inverse SA.
* [csa_wt](./include/sdsl/csa_wt.hpp) is based on a WT of the BWT.
* [csa_sada](./include/sdsl/csa_sada.hpp) is based on the compressed
![\Psi](http://latex.codecogs.com/gif.latex?%5CPsi)-function
Expand Down Expand Up @@ -226,9 +226,13 @@ how esay it is to use succinct data structures.

## Construction of Suffix Arrays

The current version includes Yuta Mori's incredible fast suffix array
construction library [libdivsufsort](http://code.google.com/p/libdivsufsort/)
version 2.0.1.
We have included the code of two excellent suffix array
construction algorithms.

* Yuta Mori's incredible fast suffix [libdivsufsort][DIVSUF]
algorithm (version 2.0.1) for byte-alphabets.
* An adapted version of Jesper Larsson's implementation of the
algorithm of [Larson and Sadakane][LS] for integer-alphabets.


## Contributors
Expand All @@ -253,3 +257,5 @@ Bug reports:
[gcc]: http://gcc.gnu.org/ "GNU Compiler Collection"
[DBLPCSTRES]: http://people.eng.unimelb.edu.au/sgog/sdsl_explore/dblp.xml.100MB_cst_sada_wt_rlmn_lcp_tree2.html "CST visualization"
[SPE]: http://people.eng.unimelb.edu.au/sgog/optimized.pdf "Preprint SP&E article"
[DIVSUF]: http://code.google.com/p/libdivsufsort/ "libdivsufsort"
[LS]: http://www.sciencedirect.com/science/article/pii/S0304397507005257 "Larson & Sadakane Algorithm"
2 changes: 0 additions & 2 deletions benchmark/CMakeLists.txt

This file was deleted.

39 changes: 0 additions & 39 deletions benchmark/Make.helper.cmake

This file was deleted.

19 changes: 12 additions & 7 deletions benchmark/indexing_count/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -26,18 +26,22 @@ INFO_FILES = $(foreach IDX_ID,$(IDX_IDS),\
TIME_FILES = $(foreach IDX_ID,$(IDX_IDS),\
$(foreach TC_ID,$(TC_IDS),\
$(foreach COMPILE_ID,$(COMPILE_IDS),results/$(TC_ID).$(IDX_ID).$(COMPILE_ID))))

all: $(BUILD_EXECS) $(QUERY_EXECS) info pattern
COMP_FILES = $(addsuffix .z.info,$(TC_PATHS))

all: $(BUILD_EXECS) $(QUERY_EXECS) pattern

info: $(INFO_EXECS) $(INFO_FILES)
cd ../../examples; make json2html.x

indexes: $(INDEXES)

input: $(TC_PATHS)

pattern: input $(PATTERNS) $(BIN_DIR)/genpatterns

timing: input $(INDEXES) pattern $(TIME_FILES)
compression: input $(COMP_FILES)

timing: input $(INDEXES) pattern $(TIME_FILES) compression info
@cat $(TIME_FILES) > $(RESULT_FILE)
@cd visualize; make

Expand Down Expand Up @@ -70,6 +74,7 @@ info/%.json: $(INDEXES)
$(eval IDX_ID:=$(call dim,2,$*))
@echo "Generating info for $(IDX_ID) on $(TC_ID)"
@$(BIN_DIR)/info_$(IDX_ID) indexes/$(TC_ID).$(IDX_ID) > $@
@../../examples/json2html.x $@ > $@.html

$(PAT_DIR)/%.pattern: $(BIN_DIR)/genpatterns
@echo $*
Expand Down Expand Up @@ -113,16 +118,16 @@ $(BIN_DIR)/info_%: $(SRC_DIR)/info.cpp index.config
include ../Make.download

clean:
@echo "Remove executables"
@echo "Remove executables and indexes"
@rm -f $(QUERY_EXECS) $(LOCATE_EXECS) $(BUILD_EXECS) $(INFO_EXECS) \
$(BIN_DIR)/genpatterns
$(INFO_FILES) $(INDEXES) $(BIN_DIR)/genpatterns

cleanresults:
@echo "Remove result files"
@rm -f $(TIME_FILES) $(RESULT_FILE)
@rm -f $(TIME_FILES) $(RESULT_FILE) $(INFO_FILES)
@rm -f $(PATTERNS)

cleanall: clean cleanresults
@echo "Remove all generated files."
@rm -f $(INDEXES) $(INFO_FILES) $(PATTERNS)
@rm -f $(TMP_DIR)/*
@rm -f $(PAT_DIR)/*
4 changes: 2 additions & 2 deletions benchmark/indexing_count/src/info.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,11 @@ using namespace std;
int main(int argc, char* argv[])
{
char* filename;
if (argc < 2) {
if (argc < 2) {
cout << "./" << argv[0] << " index_file " << endl;
return 1;
}
CSA_TYPE csa;
load_from_file(csa, string(argv[1]) + "." + string(SUF));
load_from_file(csa, argv[1]);
write_structure<JSON_FORMAT>(csa, cout);
}
6 changes: 6 additions & 0 deletions benchmark/indexing_count/test_case.config
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,9 @@ DBLPXML;../data/dblp.xml.200MB;dblp.xml.200MB;http://pizzachili.di.unipi.it/text
DNA;../data/dna.200MB;dna.200MB;http://pizzachili.di.unipi.it/texts/dna/dna.200MB.gz
PROTEINS;../data/proteins.200MB;proteins.200MB;http://pizzachili.di.unipi.it/texts/protein/proteins.200MB.gz
SOURCES;../data/sources.200MB;sources.200MB;http://pizzachili.di.unipi.it/texts/code/sources.200MB.gz
#INFLUENZA;../data/influenza;influenza;http://pizzachili.dcc.uchile.cl/repcorpus/real/influenza.gz
#EINSTEIN-de;../data/einstein.de.txt;einstein-de;http://pizzachili.dcc.uchile.cl/repcorpus/real/einstein.de.txt.gz
#EINSTEIN-en;../data/einstein.en.txt;einstein-en;http://pizzachili.dcc.uchile.cl/repcorpus/real/einstein.en.txt.gz
#PARA;../data/para;para;http://pizzachili.dcc.uchile.cl/repcorpus/real/para.gz
#WORLDLEADER;../data/world_leaders;world-leaders;http://pizzachili.dcc.uchile.cl/repcorpus/real/world_leaders.gz
#E-COLI;../data/Escherichia_Coli;E.coli;http://pizzachili.dcc.uchile.cl/repcorpus/real/Escherichia_Coli.gz
20 changes: 14 additions & 6 deletions benchmark/indexing_count/visualize/count.R
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
library(xtable) # if not installed call install.packages("xtable")
library(plyr)

source("../../basic_functions.R")

Expand All @@ -7,13 +8,17 @@ idx_config <- readConfig("../index.config",c("IDX_ID","SDSL_TYPE","LATEX-NAME"))
tc_config <- readConfig("../test_case.config",c("TC_ID","PATH","LATEX-NAME","URL"))
compile_config <- readConfig("../compile_options.config",c("COMPILE_ID","OPTIONS"))

# Create data frame which maps test cases names to their index in the list
tc_ord <- data.frame("ord"=seq(1,nrow(tc_config)),"LATEX-NAME")
rownames(tc_ord) <- tc_config[["TC_ID"]]

# Load report information

config <- readConfig("index-filter.config",c("IDX_ID"))

# Load data
raw <- data_frame_from_key_value_pairs( "../results/all.txt" )
#
# Filer indexes
raw <- raw[raw[["IDX_ID"]]%in%config[["IDX_ID"]],]
raw[["IDX_ID"]] <- factor(raw[["IDX_ID"]])
Expand All @@ -29,23 +34,26 @@ raw <- raw[order(raw[["TC_ID"]]),]

data <- split(raw, raw[["COMPILE_ID"]])



form_table <- function(d, order=NA){
# calculate the mean time per IDX_ID,TC_ID
d <- aggregate(d[c('Time','Space')],
by=list(IDX_ID=d[['IDX_ID']],
TC_ID=d[['TC_ID']],
COMPILE_ID=d[['COMPILE_ID']]),
TC_ID=d[['TC_ID']]),
FUN=mean,na.rm=TRUE)
dByProgram <- split(d, d[["IDX_ID"]])
table <- data.frame(dByProgram[[1]]["TC_ID"])
d <- d[ order(tc_ord[as.character(d[["TC_ID"]]),"ord"]), ]
dd <- split(d, d[["IDX_ID"]])
table <- data.frame(dd[[1]]["TC_ID"])
names(table) <- c("TC_ID")
table[["TC_ID"]] <- paste("\\textsc{", tc_config[table[['TC_ID']], "LATEX-NAME"],"}")
names(table) <- c(" ")
prog_name <- names(dByProgram)
prog_name <- names(dd)
if( !is.na(order) ){
prog_name <- order
}
for( prog in prog_name ){
sel <- dByProgram[[prog]]
sel <- dd[[prog]]
table <- cbind(table, " "=rep("", length(sel["Time"])))
table <- cbind(table, round(sel["Time"],3))
table <- cbind(table, sel["Space"]*100)
Expand Down
11 changes: 0 additions & 11 deletions benchmark/indexing_count/visualize/tbl-index-info.tex

This file was deleted.

6 changes: 3 additions & 3 deletions benchmark/indexing_extract/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ TIME_FILES = $(foreach IDX_ID,$(IDX_IDS),\
$(foreach SAMPLE_ID,$(SAMPLE_IDS),results/$(TC_ID).$(IDX_ID).$(SAMPLE_ID))))
COMP_FILES = $(addsuffix .z.info,$(TC_PATHS))

all: $(BUILD_EXECS) $(QUERY_EXECS) info intervals
all: $(BUILD_EXECS) $(QUERY_EXECS) intervals

info: $(INFO_EXECS) $(INFO_FILES)

Expand All @@ -44,7 +44,7 @@ input: $(TC_PATHS)

compression: input $(COMP_FILES)

timing: input $(INDEXES) intervals $(TIME_FILES) compression
timing: input $(INDEXES) intervals $(TIME_FILES) compression info
@cat $(TIME_FILES) > $(RESULT_FILE)
@cd visualize; make

Expand Down Expand Up @@ -153,4 +153,4 @@ cleanall: clean cleanresults
@echo "Remove all generated files."
@rm -f $(INDEXES) $(INFO_FILES) $(PATTERNS)
@rm -f $(TMP_DIR)/*
@rm -f $(IVL_DIR)/*
@rm -f $(INTERVALS)
4 changes: 2 additions & 2 deletions benchmark/indexing_locate/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ TIME_FILES = $(foreach IDX_ID,$(IDX_IDS),\
$(foreach SAMPLE_ID,$(SAMPLE_IDS),results/$(TC_ID).$(IDX_ID).$(SAMPLE_ID))))
COMP_FILES = $(addsuffix .z.info,$(TC_PATHS))

all: $(BUILD_EXECS) $(QUERY_EXECS) info pattern
all: $(BUILD_EXECS) $(QUERY_EXECS) pattern

info: $(INFO_EXECS) $(INFO_FILES)

Expand All @@ -44,7 +44,7 @@ pattern: input $(PATTERNS) $(BIN_DIR)/pattern_random

compression: input $(COMP_FILES)

timing: input $(INDEXES) pattern $(TIME_FILES) compression
timing: input $(INDEXES) pattern $(TIME_FILES) compression info
@cat $(TIME_FILES) > $(RESULT_FILE)
@cd visualize; make

Expand Down
2 changes: 1 addition & 1 deletion benchmark/indexing_locate/src/info.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ using namespace std;
int main(int argc, char* argv[])
{
char* filename;
if (argc < 2) {
if (argc < 2) {
cout << "./" << argv[0] << " index_file " << endl;
return 1;
}
Expand Down
4 changes: 4 additions & 0 deletions examples/64bit_array2int_vector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,10 @@ using namespace std;

int main(int argc, char* argv[])
{
if (argc < 2) {
cout << "Usage: " << argv[0] << " int_file" << endl;
return 1;
}
size_t x = util::file_size(argv[1]);
const int BPI=8;
cout<<"file size in bytes = "<<x<<endl;
Expand Down
2 changes: 0 additions & 2 deletions examples/CMakeLists.txt

This file was deleted.

17 changes: 2 additions & 15 deletions examples/bit_vector.cpp
Original file line number Diff line number Diff line change
@@ -1,11 +1,10 @@
#include <sdsl/int_vector.hpp>
#include <sdsl/suffix_trees.hpp>
#include <sdsl/bit_vectors.hpp>
#include <iostream>

using namespace std;
using namespace sdsl;

int main(int argc, char* argv[])
int main()
{

bit_vector b(10000000, 0);
Expand Down Expand Up @@ -44,16 +43,4 @@ int main(int argc, char* argv[])

write_structure<JSON_FORMAT>(rrrb, cout);
cout<<endl;

typedef cst_sada<> tCst;
tCst cst;

construct(cst, argv[1], 1);

for (tCst::const_iterator it = cst.begin(); it!=cst.end(); ++it) {
if (it.visit() == 1) {
cout << cst.depth(*it) << "-["<< cst.lb(*it)<<","<<cst.rb(*it)<<"]" << endl;

}
}
}
19 changes: 11 additions & 8 deletions examples/csa_alphabet_strategy.cpp
Original file line number Diff line number Diff line change
@@ -1,34 +1,37 @@
/* This example shows how the representation of the alphabet dependent
* part of a CST can be altered by using policy classes.
*
* Author: Simon Gog
*/

#include <sdsl/suffix_arrays.hpp>
#include <iostream>
#include <string>

using namespace sdsl;
using namespace std;

template<class Csa>
void csa_info(Csa& csa, const char* file, bool json)
template<class csa_t>
void csa_info(csa_t& csa, const char* file, bool json)
{
cout << "file: " << file << endl;
cout << "file : " << file << endl;
construct(csa, file, 1);
cout << "csa of type " << util::demangle(typeid(csa).name()) << endl;
cout << "csa of type : " << util::demangle(typeid(csa).name()) << endl;
cout << "size in bytes : " << size_in_bytes(csa) << endl;
if (json) {
cout << "---------------" << endl;
cout << "json output: " << endl;
write_structure<JSON_FORMAT>(csa, cout);
cout << endl;
}
cout << "---------------" << endl;
}

int main(int argc, char* argv[])
{
if (argc < 2) {
cout << "Usage: " << argv[0] << " file [json]" << endl;
cout << "Usage: " << argv[0] << " file [JSON]" << endl;
cout << " (1) Constructs CSAs 2 csa_sada and 2 csa_wt, with" << endl;
cout << " alphabet strategies." << endl;
cout << " (2) Outputs type and size. If JSON is specified," << endl;
cout << " also the structure in JSON-format." << endl;
return 1;
}
bool json = false;
Expand Down
Loading

0 comments on commit 63cb47c

Please sign in to comment.