Skip to content

Commit

Permalink
Merge pull request #154 from openvenues/setup_datadir_functions
Browse files Browse the repository at this point in the history
Setup datadir functions
  • Loading branch information
albarrentine authored Jan 9, 2017
2 parents a2b84a0 + bbc9172 commit 577f26e
Show file tree
Hide file tree
Showing 12 changed files with 124 additions and 30 deletions.
2 changes: 1 addition & 1 deletion configure.ac
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# -*- Autoconf -*-
# Process this file with autoconf to produce a configure script.

AC_INIT([libpostal], [0.3])
AC_INIT([libpostal], [0.3.3])

AM_INIT_AUTOMAKE([foreign subdir-objects])
AC_CONFIG_SRCDIR([src])
Expand Down
3 changes: 2 additions & 1 deletion src/address_dictionary.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,8 @@

#define ALL_LANGUAGES "all"

#define DEFAULT_ADDRESS_EXPANSION_PATH LIBPOSTAL_DATA_DIR PATH_SEPARATOR "address_expansions" PATH_SEPARATOR "address_dictionary.dat"
#define ADDRESS_DICTIONARY_DATA_FILE "address_dictionary.dat"
#define DEFAULT_ADDRESS_EXPANSION_PATH LIBPOSTAL_DATA_DIR PATH_SEPARATOR LIBPOSTAL_ADDRESS_EXPANSIONS_SUBDIR PATH_SEPARATOR ADDRESS_DICTIONARY_DATA_FILE

#define NULL_CANONICAL_INDEX -1

Expand Down
2 changes: 1 addition & 1 deletion src/features.c
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ void feature_array_add(cstring_array *features, size_t count, ...) {
cstring_array_start_token(features);

bool strip_separator = true;
char_array_append_vjoined(features->str, FEATURE_SEPARATOR_CHAR, strip_separator, count, args);
char_array_add_vjoined(features->str, FEATURE_SEPARATOR_CHAR, strip_separator, count, args);
va_end(args);
}

Expand Down
15 changes: 15 additions & 0 deletions src/file_utils.c
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,21 @@ bool is_relative_path(struct dirent *ent) {
return strcmp(ent->d_name, ".") == 0 || strcmp(ent->d_name, "..") == 0;
}

char *path_vjoin(int n, va_list args) {
char_array *path = char_array_new();
if (path == NULL) return NULL;
char_array_add_vjoined(path, PATH_SEPARATOR, true, n, args);
return char_array_to_string(path);
}

char *path_join(int n, ...) {
va_list args;
va_start(args, n);
char *path = path_vjoin(n, args);
va_end(args);
return path;
}

inline uint64_t file_deserialize_uint64(unsigned char *buf) {
return ((uint64_t)buf[0] << 56) |
((uint64_t)buf[1] << 48) |
Expand Down
4 changes: 4 additions & 0 deletions src/file_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
#include <sys/types.h>

#include "libpostal_config.h"
#include "string_utils.h"

#ifdef HAVE_DIRENT_H
#include <dirent.h>
Expand Down Expand Up @@ -55,6 +56,9 @@ char *file_getline(FILE * f);

bool is_relative_path(struct dirent *ent);

char *path_join(int n, ...);
char *path_vjoin(int n, va_list args);

uint64_t file_deserialize_uint64(unsigned char *buf);
bool file_read_uint64(FILE *file, uint64_t *value);
bool file_write_uint64(FILE *file, uint64_t value);
Expand Down
91 changes: 76 additions & 15 deletions src/libpostal.c
Original file line number Diff line number Diff line change
Expand Up @@ -1054,47 +1054,108 @@ address_parser_response_t *parse_address(char *address, address_parser_options_t
return parsed;
}

bool libpostal_setup(void) {
if (!transliteration_module_setup(NULL)) {
log_error("Error loading transliteration module, LIBPOSTAL_DATA_DIR=%s\n", LIBPOSTAL_DATA_DIR);
bool libpostal_setup_datadir(char *datadir) {
char *transliteration_path = NULL;
char *numex_path = NULL;
char *address_dictionary_path = NULL;

if (datadir != NULL) {
transliteration_path = path_join(3, datadir, LIBPOSTAL_TRANSLITERATION_SUBDIR, TRANSLITERATION_DATA_FILE);
numex_path = path_join(3, datadir, LIBPOSTAL_NUMEX_SUBDIR, NUMEX_DATA_FILE);
address_dictionary_path = path_join(3, datadir, LIBPOSTAL_ADDRESS_EXPANSIONS_SUBDIR, ADDRESS_DICTIONARY_DATA_FILE);
}

if (!transliteration_module_setup(transliteration_path)) {
log_error("Error loading transliteration module, dir=%s\n", transliteration_path);
return false;
}

if (!numex_module_setup(NULL)) {
log_error("Error loading numex module, LIBPOSTAL_DATA_DIR=%s\n", LIBPOSTAL_DATA_DIR);
if (!numex_module_setup(numex_path)) {
log_error("Error loading numex module, dir=%s\n", numex_path);
return false;
}

if (!address_dictionary_module_setup(NULL)) {
log_error("Error loading dictionary module, LIBPOSTAL_DATA_DIR=%s\n", LIBPOSTAL_DATA_DIR);
if (!address_dictionary_module_setup(address_dictionary_path)) {
log_error("Error loading dictionary module, dir=%s\n", address_dictionary_path);
return false;
}

if (transliteration_path != NULL) {
free(transliteration_path);
}

if (numex_path != NULL) {
free(numex_path);
}

if (address_dictionary_path != NULL) {
free(address_dictionary_path);
}

return true;
}

bool libpostal_setup_language_classifier(void) {
if (!language_classifier_module_setup(NULL)) {
log_error("Error loading language classifier, LIBPOSTAL_DATA_DIR=%s\n", LIBPOSTAL_DATA_DIR);
bool libpostal_setup(void) {
return libpostal_setup_datadir(NULL);
}

bool libpostal_setup_language_classifier_datadir(char *datadir) {
char *language_classifier_dir = NULL;

if (datadir != NULL) {
language_classifier_dir = path_join(2, datadir, LIBPOSTAL_LANGUAGE_CLASSIFIER_SUBDIR);
}

if (!language_classifier_module_setup(language_classifier_dir)) {
log_error("Error loading language classifier, dir=%s\n", language_classifier_dir);
return false;
}

if (language_classifier_dir != NULL) {
free(language_classifier_dir);
}

return true;
}

bool libpostal_setup_parser(void) {
if (!geodb_module_setup(NULL)) {
log_error("Error loading geodb module, LIBPOSTAL_DATA_DIR=%s\n", LIBPOSTAL_DATA_DIR);
bool libpostal_setup_language_classifier(void) {
return libpostal_setup_language_classifier_datadir(NULL);
}

bool libpostal_setup_parser_datadir(char *datadir) {
char *parser_dir = NULL;
char *geodb_dir = NULL;

if (datadir != NULL) {
parser_dir = path_join(2, datadir, LIBPOSTAL_ADDRESS_PARSER_SUBDIR);
geodb_dir = path_join(2, datadir, LIBPOSTAL_GEODB_SUBDIR);
}

if (!geodb_module_setup(geodb_dir)) {
log_error("Error loading geodb module, dir=%s\n", geodb_dir);
return false;
}

if (!address_parser_module_setup(NULL)) {
log_error("Error loading address parser module, LIBPOSTAL_DATA_DIR=%s\n", LIBPOSTAL_DATA_DIR);
if (!address_parser_module_setup(parser_dir)) {
log_error("Error loading address parser module, dir=%s\n", parser_dir);
return false;
}

if (parser_dir != NULL) {
free(parser_dir);
}

if (geodb_dir != NULL) {
free(geodb_dir);
}

return true;
}

bool libpostal_setup_parser(void) {
return libpostal_setup_parser_datadir(NULL);
}

void libpostal_teardown(void) {
transliteration_module_teardown();

Expand Down
3 changes: 3 additions & 0 deletions src/libpostal.h
Original file line number Diff line number Diff line change
Expand Up @@ -92,12 +92,15 @@ address_parser_response_t *parse_address(char *address, address_parser_options_t
// Setup/teardown methods

bool libpostal_setup(void);
bool libpostal_setup_datadir(char *datadir);
void libpostal_teardown(void);

bool libpostal_setup_parser(void);
bool libpostal_setup_parser_datadir(char *datadir);
void libpostal_teardown_parser(void);

bool libpostal_setup_language_classifier(void);
bool libpostal_setup_language_classifier_datadir(char *datadir);
void libpostal_teardown_language_classifier(void);

#ifdef __cplusplus
Expand Down
20 changes: 14 additions & 6 deletions src/libpostal_config.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,20 @@
#error LIBPOSTAL_DATA_DIR not defined!
#endif

#define LIBPOSTAL_ADDRESS_PARSER_DIR LIBPOSTAL_DATA_DIR PATH_SEPARATOR "address_parser"
#define LIBPOSTAL_DICTIONARIES_DIR LIBPOSTAL_DATA_DIR PATH_SEPARATOR "dictionaries"
#define LIBPOSTAL_GEONAMES_DIR LIBPOSTAL_DATA_DIR PATH_SEPARATOR "geonames"
#define LIBPOSTAL_GEODB_DIR LIBPOSTAL_DATA_DIR PATH_SEPARATOR "geodb"
#define LIBPOSTAL_LANGUAGE_CLASSIFIER_DIR LIBPOSTAL_DATA_DIR PATH_SEPARATOR "language_classifier"
#define LIBPOSTAL_TRANSLITERATION_DIR LIBPOSTAL_DATA_DIR PATH_SEPARATOR "transliteration"
#define LIBPOSTAL_ADDRESS_PARSER_SUBDIR "address_parser"
#define LIBPOSTAL_ADDRESS_PARSER_DIR LIBPOSTAL_DATA_DIR PATH_SEPARATOR LIBPOSTAL_ADDRESS_PARSER_SUBDIR
#define LIBPOSTAL_ADDRESS_EXPANSIONS_SUBDIR "address_expansions"
#define LIBPOSTAL_ADDRESS_EXPANSIONS_DIR LIBPOSTAL_DATA_DIR PATH_SEPARATOR LIBPOSTAL_ADDRESS_EXPANSIONS_SUBDIR
#define LIBPOSTAL_GEONAMES_SUBDIR "geonames"
#define LIBPOSTAL_GEONAMES_DIR LIBPOSTAL_DATA_DIR PATH_SEPARATOR LIBPOSTAL_GEONAMES_SUBDIR
#define LIBPOSTAL_GEODB_SUBDIR "geodb"
#define LIBPOSTAL_GEODB_DIR LIBPOSTAL_DATA_DIR PATH_SEPARATOR LIBPOSTAL_GEODB_SUBDIR
#define LIBPOSTAL_LANGUAGE_CLASSIFIER_SUBDIR "language_classifier"
#define LIBPOSTAL_LANGUAGE_CLASSIFIER_DIR LIBPOSTAL_DATA_DIR PATH_SEPARATOR LIBPOSTAL_LANGUAGE_CLASSIFIER_SUBDIR
#define LIBPOSTAL_NUMEX_SUBDIR "numex"
#define LIBPOSTAL_NUMEX_DIR LIBPOSTAL_DATA_DIR PATH_SEPARATOR LIBPOSTAL_NUMEX_SUBDIR
#define LIBPOSTAL_TRANSLITERATION_SUBDIR "transliteration"
#define LIBPOSTAL_TRANSLITERATION_DIR LIBPOSTAL_DATA_DIR PATH_SEPARATOR LIBPOSTAL_TRANSLITERATION_SUBDIR

#define GEODB_BLOOM_FILTER_SIZE 100000000
#define GEODB_BLOOM_FILTER_ERROR 0.001
Expand Down
3 changes: 2 additions & 1 deletion src/numex.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,8 @@
#include "trie.h"
#include "trie_search.h"

#define DEFAULT_NUMEX_PATH LIBPOSTAL_DATA_DIR PATH_SEPARATOR "numex" PATH_SEPARATOR "numex.dat"
#define NUMEX_DATA_FILE "numex.dat"
#define DEFAULT_NUMEX_PATH LIBPOSTAL_DATA_DIR PATH_SEPARATOR "numex" PATH_SEPARATOR NUMEX_DATA_FILE

#define LATIN_LANGUAGE_CODE "la"

Expand Down
6 changes: 3 additions & 3 deletions src/string_utils.c
Original file line number Diff line number Diff line change
Expand Up @@ -595,7 +595,7 @@ inline void char_array_add_len(char_array *array, char *str, size_t len) {
}


void char_array_append_vjoined(char_array *array, char *separator, bool strip_separator, int count, va_list args) {
void char_array_add_vjoined(char_array *array, char *separator, bool strip_separator, int count, va_list args) {
if (count <= 0) {
return;
}
Expand Down Expand Up @@ -625,15 +625,15 @@ void char_array_append_vjoined(char_array *array, char *separator, bool strip_se
inline void char_array_add_joined(char_array *array, char *separator, bool strip_separator, int count, ...) {
va_list args;
va_start(args, count);
char_array_append_vjoined(array, separator, strip_separator, count, args);
char_array_add_vjoined(array, separator, strip_separator, count, args);
va_end(args);
}

inline void char_array_cat_joined(char_array *array, char *separator, bool strip_separator, int count, ...) {
char_array_strip_nul_byte(array);
va_list args;
va_start(args, count);
char_array_append_vjoined(array, separator, strip_separator, count, args);
char_array_add_vjoined(array, separator, strip_separator, count, args);
va_end(args);
}

Expand Down
2 changes: 1 addition & 1 deletion src/string_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@ void char_array_cat_vprintf(char_array *array, char *format, va_list args);
void char_array_cat_printf(char_array *array, char *format, ...);

// Mainly for paths or delimited strings
void char_array_append_vjoined(char_array *array, char *separator, bool strip_separator, int count, va_list args);
void char_array_add_vjoined(char_array *array, char *separator, bool strip_separator, int count, va_list args);
void char_array_add_joined(char_array *array, char *separator, bool strip_separator, int count, ...);
void char_array_cat_joined(char_array *array, char *separator, bool strip_separator, int count, ...);

Expand Down
3 changes: 2 additions & 1 deletion src/transliterate.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,8 @@

#define LATIN_ASCII "latin-ascii"

#define DEFAULT_TRANSLITERATION_PATH LIBPOSTAL_TRANSLITERATION_DIR PATH_SEPARATOR "transliteration.dat"
#define TRANSLITERATION_DATA_FILE "transliteration.dat"
#define DEFAULT_TRANSLITERATION_PATH LIBPOSTAL_TRANSLITERATION_DIR PATH_SEPARATOR TRANSLITERATION_DATA_FILE

#define MAX_TRANS_NAME_LEN 100

Expand Down

0 comments on commit 577f26e

Please sign in to comment.