Skip to content

Commit

Permalink
Change parser to full-ragel
Browse files Browse the repository at this point in the history
  • Loading branch information
slobodan-ilic committed Jun 24, 2024
1 parent fc836e7 commit 6f500cb
Show file tree
Hide file tree
Showing 4 changed files with 225 additions and 30 deletions.
178 changes: 176 additions & 2 deletions src/spss/readstat_sav_parse_mr_name.c
Original file line number Diff line number Diff line change
Expand Up @@ -283,9 +283,183 @@ readstat_error_t extract_mr_data(const char *line, mr_set_t *result) {


readstat_error_t parse_mr_line(const char *line, mr_set_t *result) {
readstat_error_t retval = READSTAT_OK;
*result = (mr_set_t){0};
return extract_mr_data(line, result);
}


#line 292 "./src/spss/readstat_sav_parse_mr_name.c"
static const char _mr_parser_actions[] = {
0, 1, 0
};

static const char _mr_parser_key_offsets[] = {
0, 0, 1, 2, 4
};

static const char _mr_parser_trans_keys[] = {
36, 10, 0, 10, 10, 0
};

static const char _mr_parser_single_lengths[] = {
0, 1, 1, 2, 1
};

static const char _mr_parser_range_lengths[] = {
0, 0, 0, 0, 0
};

static const char _mr_parser_index_offsets[] = {
0, 0, 2, 4, 7
};

static const char _mr_parser_indicies[] = {
0, 1, 2, 0, 3, 2, 0, 2,
0, 0
};

static const char _mr_parser_trans_targs[] = {
2, 0, 3, 4
};

static const char _mr_parser_trans_actions[] = {
0, 0, 1, 0
};

static const int mr_parser_start = 1;

static const int mr_parser_en_main = 1;


#line 157 "./src/spss/readstat_sav_parse_mr_name.rl"


readstat_error_t parse_mr_string(const char *line, mr_set_t **mr_sets, size_t *n_mr_lines) {
readstat_error_t retval = READSTAT_OK;
int cs = 0;
char *p = (char *)line;
char *start = p;
char *pe = p + strlen(p) + 1;
*mr_sets = NULL;
*n_mr_lines = 0;


#line 348 "./src/spss/readstat_sav_parse_mr_name.c"
{
cs = mr_parser_start;
}

#line 169 "./src/spss/readstat_sav_parse_mr_name.rl"

#line 355 "./src/spss/readstat_sav_parse_mr_name.c"
{
int _klen;
unsigned int _trans;
const char *_acts;
unsigned int _nacts;
const char *_keys;

if ( p == pe )
goto _test_eof;
if ( cs == 0 )
goto _out;
_resume:
_keys = _mr_parser_trans_keys + _mr_parser_key_offsets[cs];
_trans = _mr_parser_index_offsets[cs];

_klen = _mr_parser_single_lengths[cs];
if ( _klen > 0 ) {
const char *_lower = _keys;
const char *_mid;
const char *_upper = _keys + _klen - 1;
while (1) {
if ( _upper < _lower )
break;

_mid = _lower + ((_upper-_lower) >> 1);
if ( (*p) < *_mid )
_upper = _mid - 1;
else if ( (*p) > *_mid )
_lower = _mid + 1;
else {
_trans += (unsigned int)(_mid - _keys);
goto _match;
}
}
_keys += _klen;
_trans += _klen;
}

_klen = _mr_parser_range_lengths[cs];
if ( _klen > 0 ) {
const char *_lower = _keys;
const char *_mid;
const char *_upper = _keys + (_klen<<1) - 2;
while (1) {
if ( _upper < _lower )
break;

_mid = _lower + (((_upper-_lower) >> 1) & ~1);
if ( (*p) < _mid[0] )
_upper = _mid - 2;
else if ( (*p) > _mid[1] )
_lower = _mid + 2;
else {
_trans += (unsigned int)((_mid - _keys)>>1);
goto _match;
}
}
_trans += _klen;
}

_match:
_trans = _mr_parser_indicies[_trans];
cs = _mr_parser_trans_targs[_trans];

retval = extract_mr_data(line, result);
if ( _mr_parser_trans_actions[_trans] == 0 )
goto _again;

_acts = _mr_parser_actions + _mr_parser_trans_actions[_trans];
_nacts = (unsigned int) *_acts++;
while ( _nacts-- > 0 )
{
switch ( *_acts++ )
{
case 0:
#line 140 "./src/spss/readstat_sav_parse_mr_name.rl"
{
char *mln = (char *)malloc(p - start);
memcpy(mln, start + 1, p - start);
mln[p - start - 1] = '\0';
*mr_sets = realloc(*mr_sets, ((*n_mr_lines) + 1) * sizeof(mr_set_t));
retval = parse_mr_line(mln, &(*mr_sets)[*n_mr_lines]);
if (retval != READSTAT_OK) goto cleanup;
(*n_mr_lines)++;
start = p + 1;
}
break;
#line 442 "./src/spss/readstat_sav_parse_mr_name.c"
}
}

_again:
if ( cs == 0 )
goto _out;
if ( ++p != pe )
goto _resume;
_test_eof: {}
_out: {}
}

#line 170 "./src/spss/readstat_sav_parse_mr_name.rl"

if (cs < 4 || p != pe) {
retval = READSTAT_ERROR_BAD_MR_STRING;
goto cleanup;
}

(void)mr_parser_en_main;

cleanup:
return retval;
}
2 changes: 1 addition & 1 deletion src/spss/readstat_sav_parse_mr_name.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,6 @@
#include "../readstat.h"
#include "../readstat_malloc.h"

readstat_error_t parse_mr_line(const char *line, mr_set_t *result);
readstat_error_t parse_mr_string(const char *line, mr_set_t **mr_sets, size_t *n_mr_lines);

#endif // READSTAT_PARSE_MR_NAME_H
52 changes: 47 additions & 5 deletions src/spss/readstat_sav_parse_mr_name.rl
Original file line number Diff line number Diff line change
Expand Up @@ -60,13 +60,13 @@
mr_subvariables[mr_subvar_count++] = subvar;
}

name = (alnum | '_')+ '=' > extract_mr_name;
nc = (alnum | '_'); # name character
name = nc+ '=' > extract_mr_name;
type = ('C' | 'D'){1} > extract_mr_type;
counted_value = digit* ' ' > extract_counted_value;
label = digit+ ' '+ > extract_label;

nc = (alnum | '_'); # name character
end = (space | '\0'); # token terminator
end = (space | '\0'); # subvar token terminator
subvariable = (nc+ end >extract_subvar);

main := name type counted_value label subvariable+;
Expand Down Expand Up @@ -130,9 +130,51 @@ cleanup:


readstat_error_t parse_mr_line(const char *line, mr_set_t *result) {
readstat_error_t retval = READSTAT_OK;
*result = (mr_set_t){0};
return extract_mr_data(line, result);
}

%%{
machine mr_parser;

action mr_line {
char *mln = (char *)malloc(p - start);
memcpy(mln, start + 1, p - start);
mln[p - start - 1] = '\0';
*mr_sets = realloc(*mr_sets, ((*n_mr_lines) + 1) * sizeof(mr_set_t));
retval = parse_mr_line(mln, &(*mr_sets)[*n_mr_lines]);
if (retval != READSTAT_OK) goto cleanup;
(*n_mr_lines)++;
start = p + 1;
}
line_start = '$';
line_end = '\n';
line_char = any - (line_end + line_start);
mr_line = line_start line_char* line_end > mr_line;
main := mr_line+ '\0';

write data nofinal noerror;
}%%

retval = extract_mr_data(line, result);
readstat_error_t parse_mr_string(const char *line, mr_set_t **mr_sets, size_t *n_mr_lines) {
readstat_error_t retval = READSTAT_OK;
int cs = 0;
char *p = (char *)line;
char *start = p;
char *pe = p + strlen(p) + 1;
*mr_sets = NULL;
*n_mr_lines = 0;

%% write init;
%% write exec;

if (cs < %%{ write first_final; }%% || p != pe) {
retval = READSTAT_ERROR_BAD_MR_STRING;
goto cleanup;
}

(void)mr_parser_en_main;

cleanup:
return retval;
}
23 changes: 1 addition & 22 deletions src/spss/readstat_sav_read.c
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/types.h>
#include <stdint.h>
#include <math.h>
Expand Down Expand Up @@ -30,10 +29,6 @@
#define DATA_BUFFER_SIZE 65536
#define VERY_LONG_STRING_MAX_LENGTH INT_MAX

// #ifdef _WIN32
// #define strtok_r(s,d,p) strtok_s(s,d,p)
// #endif

/* Others defined in table below */

/* See http://msdn.microsoft.com/en-us/library/dd317756(VS.85).aspx */
Expand Down Expand Up @@ -172,23 +167,7 @@ static readstat_error_t sav_read_multiple_response_sets(size_t data_len, sav_ctx
goto cleanup;
}

// char *saveptr;
// char *token = strtok_r(mr_string, "$\n", &saveptr);
char *token = strtok(mr_string, "$\n");

int num_lines = 0;
while (token != NULL) {
if ((ctx->mr_sets = readstat_realloc(ctx->mr_sets, (num_lines + 1) * sizeof(mr_set_t))) == NULL) {
retval = READSTAT_ERROR_MALLOC;
goto cleanup;
}
retval = parse_mr_line(token, &ctx->mr_sets[num_lines]);
if (retval != READSTAT_OK) goto cleanup;
num_lines++;
// token = strtok_r(NULL, "$\n", &saveptr);
token = strtok(NULL, "$\n");
}
ctx->multiple_response_sets_length = num_lines;
retval = parse_mr_string(mr_string, &ctx->mr_sets, &ctx->multiple_response_sets_length);

cleanup:
free(mr_string);
Expand Down

0 comments on commit 6f500cb

Please sign in to comment.