Skip to content

Commit

Permalink
Implement a handler for file header. This allows clients to learn the…
Browse files Browse the repository at this point in the history
… file version, compression, writer version, etc. Before, this information was impossible to extract from the library.
  • Loading branch information
rafal-c committed Oct 2, 2023
1 parent 4a80bf3 commit 6b1a981
Show file tree
Hide file tree
Showing 5 changed files with 70 additions and 12 deletions.
27 changes: 27 additions & 0 deletions src/rdata.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,22 @@ typedef enum rdata_file_format_e {
RDATA_SINGLE_OBJECT
} rdata_file_format_t;

typedef enum rdata_compression_e {
RDATA_COMPRESSION_NONE,
RDATA_COMPRESSION_GZIP,
RDATA_COMPRESSION_BZIP2,
RDATA_COMPRESSION_LZMA
} rdata_compression_t;

#pragma pack(push, 1)
typedef struct rdata_header_s {
char header[2];
uint32_t format_version;
uint32_t writer_version;
uint32_t reader_version;
} rdata_header_t;
#pragma pack(pop)

const char *rdata_error_message(rdata_error_t error_code);

typedef int (*rdata_column_handler)(const char *name, rdata_type_t type,
Expand All @@ -52,6 +68,15 @@ typedef int (*rdata_column_name_handler)(const char *value, int index, void *ctx
typedef void (*rdata_error_handler)(const char *error_message, void *ctx);
typedef int (*rdata_progress_handler)(double progress, void *ctx);

/* Callback type for passing meta-information from file header to the caller. Arguments:
* - compression - compression algorithm used on the file,
* - header_line - first 5 bytes of the file containing information about file type (RData vs RDS, ascii vs binary, etc.),
* - header - structure with information about file version, writer R version and minimal R version required to read the file back,
* - ctx - user context, same as in all other handlers
* header_line and header must not be free'd by the user.
*/
typedef int (*rdata_header_handler)(rdata_compression_t compression, const char *header_line, const rdata_header_t* header, void *ctx);

#if defined(_MSC_VER)
#include <BaseTsd.h>
typedef SSIZE_T ssize_t;
Expand Down Expand Up @@ -96,6 +121,7 @@ typedef struct rdata_parser_s {
rdata_column_handler dim_handler;
rdata_text_value_handler dim_name_handler;
rdata_error_handler error_handler;
rdata_header_handler header_handler;
rdata_io_t *io;
} rdata_parser_t;

Expand All @@ -111,6 +137,7 @@ rdata_error_t rdata_set_value_label_handler(rdata_parser_t *parser, rdata_text_v
rdata_error_t rdata_set_dim_handler(rdata_parser_t *parser, rdata_column_handler dim_handler);
rdata_error_t rdata_set_dim_name_handler(rdata_parser_t *parser, rdata_text_value_handler dim_name_handler);
rdata_error_t rdata_set_error_handler(rdata_parser_t *parser, rdata_error_handler error_handler);
rdata_error_t rdata_set_header_handler(rdata_parser_t *parser, rdata_header_handler header_handler);
rdata_error_t rdata_set_open_handler(rdata_parser_t *parser, rdata_open_handler open_handler);
rdata_error_t rdata_set_close_handler(rdata_parser_t *parser, rdata_close_handler close_handler);
rdata_error_t rdata_set_seek_handler(rdata_parser_t *parser, rdata_seek_handler seek_handler);
Expand Down
10 changes: 3 additions & 7 deletions src/rdata_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,6 @@

#pragma pack(push, 1)

typedef struct rdata_v2_header_s {
char header[2];
uint32_t format_version;
uint32_t writer_version;
uint32_t reader_version;
} rdata_v2_header_t;

typedef struct rdata_sexptype_header_s {
unsigned int type:8;
unsigned int object:1;
Expand Down Expand Up @@ -78,3 +71,6 @@ typedef struct rdata_sexptype_info_s {
#define RDATA_SEXPTYPE_LANGUAGE_OBJECT_ATTR 240
#define RDATA_SEXPTYPE_PAIRLIST_ATTR 239
#define RDATA_PSEUDO_SXP_ALTREP 238

/* we read this many characters from the beginning of the file to determine file format */
#define RDATA_HEADER_LENGTH 5
5 changes: 5 additions & 0 deletions src/rdata_parser.c
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,11 @@ rdata_error_t rdata_set_error_handler(rdata_parser_t *parser, rdata_error_handle
return RDATA_OK;
}

rdata_error_t rdata_set_header_handler(rdata_parser_t *parser, rdata_header_handler header_handler) {
parser->header_handler = header_handler;
return RDATA_OK;
}

rdata_error_t rdata_set_open_handler(rdata_parser_t *parser, rdata_open_handler open_handler) {
parser->io->open = open_handler;
return RDATA_OK;
Expand Down
38 changes: 34 additions & 4 deletions src/rdata_read.c
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,8 @@ typedef struct rdata_ctx_s {
rdata_text_value_handler value_label_handler;
rdata_column_handler dim_handler;
rdata_text_value_handler dim_name_handler;
rdata_error_handler error_handler;
rdata_error_handler error_handler;
rdata_header_handler header_handler;
void *user_ctx;
#if HAVE_BZIP2
bz_stream *bz_strm;
Expand Down Expand Up @@ -656,9 +657,10 @@ void free_rdata_ctx(rdata_ctx_t *ctx) {
rdata_error_t rdata_parse(rdata_parser_t *parser, const char *filename, void *user_ctx) {
int is_rdata = 0;
rdata_error_t retval = RDATA_OK;
rdata_v2_header_t v2_header;
rdata_header_t v2_header;
rdata_ctx_t *ctx = rdata_ctx_init(parser->io, filename);
char *encoding = NULL;
rdata_compression_t compression = RDATA_COMPRESSION_NONE;

if (ctx == NULL) {
retval = RDATA_ERROR_OPEN;
Expand All @@ -675,15 +677,16 @@ rdata_error_t rdata_parse(rdata_parser_t *parser, const char *filename, void *us
ctx->dim_handler = parser->dim_handler;
ctx->dim_name_handler = parser->dim_name_handler;
ctx->error_handler = parser->error_handler;
ctx->header_handler = parser->header_handler;

ctx->is_dimnames = false;

if ((retval = init_stream(ctx)) != RDATA_OK) {
goto cleanup;
}

char header_line[5];
if (read_st(ctx, &header_line, sizeof(header_line)) != sizeof(header_line)) {
char header_line[RDATA_HEADER_LENGTH] = "";
if (read_st(ctx, &header_line, RDATA_HEADER_LENGTH) != RDATA_HEADER_LENGTH) {
retval = RDATA_ERROR_READ;
goto cleanup;
}
Expand All @@ -704,6 +707,33 @@ rdata_error_t rdata_parse(rdata_parser_t *parser, const char *filename, void *us
v2_header.reader_version = byteswap4(v2_header.reader_version);
}

if (ctx->header_handler) {
#if HAVE_BZIP2
if (ctx->bz_strm) {
compression = RDATA_COMPRESSION_BZIP2;
}
#endif
#if HAVE_APPLE_COMPRESSION
if (ctx->compression_strm) {
compression = RDATA_COMPRESSION_LZMA;
}
#endif
#if HAVE_ZLIB
if (ctx->z_strm) {
compression = RDATA_COMPRESSION_GZIP;
}
#endif
#if HAVE_LZMA
if (ctx->lzma_strm) {
compression = RDATA_COMPRESSION_LZMA;
}
#endif
if(ctx->header_handler(compression, header_line, &v2_header, ctx->user_ctx)) {
retval = RDATA_ERROR_USER_ABORT;
goto cleanup;
}
}

if (is_rdata && v2_header.format_version != header_line[3] - '0') {
retval = RDATA_ERROR_PARSE;
goto cleanup;
Expand Down
2 changes: 1 addition & 1 deletion src/rdata_write.c
Original file line number Diff line number Diff line change
Expand Up @@ -243,7 +243,7 @@ rdata_error_t rdata_begin_file(rdata_writer_t *writer, void *user_ctx) {
goto cleanup;
}

rdata_v2_header_t v2_header;
rdata_header_t v2_header;
memcpy(v2_header.header, "X\n", sizeof("X\n")-1);
v2_header.format_version = 2;
v2_header.reader_version = 131840;
Expand Down

0 comments on commit 6b1a981

Please sign in to comment.