Skip to content

Commit

Permalink
Create a native C library from Glue Schema Registry Java library
Browse files Browse the repository at this point in the history
  • Loading branch information
blacktooth committed Jul 23, 2022
1 parent 8b3a416 commit 939148f
Show file tree
Hide file tree
Showing 21 changed files with 1,125 additions and 0 deletions.
16 changes: 16 additions & 0 deletions native-schema-registry/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
# Native Schema Registry

This module provides a native shared library (.so, .dll) version of the AWS Glue Schema Registry SerDes.
It uses GraalVM to generate the shared library.

## Build

Requires GraalVM (21.0+) with native-image support.

The C data types module needs to be built before building the Java module.

```asm
cd c && cmake -S. -Bbuild
cd build && cmake --build . --target native_schema_registry_c_data_types
cd ../../ && mvn package -P native-image
```
12 changes: 12 additions & 0 deletions native-schema-registry/c/.clang-tidy
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
---
Checks: 'clang-diagnostic-*,clang-analyzer-*,readability-*,modernize-*,bugprone-*,misc-*,google-runtime-int,llvm-header-guard,fuchsia-restrict-system-includes,-clang-analyzer-valist.Uninitialized,-clang-analyzer-security.insecureAPI.rand,-clang-analyzer-alpha.*,-readability-magic-numbers,-readability-non-const-parameter,-readability-avoid-const-params-in-decls,-readability-else-after-return,-readability-isolate-declaration,-readability-uppercase-literal-suffix,-bugprone-sizeof-expression'
WarningsAsErrors: '*'
HeaderFilterRegex: '.*\.[h]$'
FormatStyle: 'file'
CheckOptions:
- key: readability-braces-around-statements.ShortStatementLines
value: '1'
- key: google-runtime-int.TypeSuffix
value: '_t'
- key: fuchsia-restrict-system-includes.Includes
value: '*,-stdint.h,-stdbool.h,-assert.h'
35 changes: 35 additions & 0 deletions native-schema-registry/c/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
cmake_minimum_required(VERSION 3.19)
IF (APPLE)
#Defaults to Apple Compile. Explicitly install and set to gcc.
set(CMAKE_C_COMPILER gcc)
ENDIF()
project(native_schema_registry_c C)
set(CMAKE_C_STANDARD 99)

##Global variables
#Path to GraalVM generated shared library
set(LIB_NATIVE_SCHEMA_REGISTRY_PATH "${PROJECT_SOURCE_DIR}/../target")
set(LIB_NATIVE_SCHEMA_REGISTRY_LIBRARY_NAME_PREFIX libnativeschemaregistry)
##OS Specific variables
IF (WIN32)
set(LIB_NATIVE_SCHEMA_REGISTRY_LIBRARY_NAME ${LIB_NATIVE_SCHEMA_REGISTRY_LIBRARY_NAME_PREFIX}.dll)
set(LIB_NATIVE_SCHEMA_REGISTRY_LIBRARY_OBJ_NAME ${LIB_NATIVE_SCHEMA_REGISTRY_LIBRARY_NAME_PREFIX}.lib)
ELSEIF(APPLE)
set(LIB_NATIVE_SCHEMA_REGISTRY_LIBRARY_NAME ${LIB_NATIVE_SCHEMA_REGISTRY_LIBRARY_NAME_PREFIX}.dylib)
ELSE()
set(LIB_NATIVE_SCHEMA_REGISTRY_LIBRARY_NAME ${LIB_NATIVE_SCHEMA_REGISTRY_LIBRARY_NAME_PREFIX}.so)
ENDIF()

set(DATA_TYPES_MODULE_NAME native_schema_registry_c_data_types)
set(SERDE_MODULE_NAME native_schema_registry_c)
set(AWS_COMMON_MEMALLOC aws_common_memalloc)
set(NATIVE_SCHEMA_REGISTRY_MODULE_NAME libnativeschemaregistry)
set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS ON)

include_directories("include")
include_directories(${LIB_NATIVE_SCHEMA_REGISTRY_PATH})

add_subdirectory("src")
include (CTest)
enable_testing()
add_subdirectory("test")
46 changes: 46 additions & 0 deletions native-schema-registry/c/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
# Native Schema Registry in C

This module provides a C language based API for the schema registry serializer / de-serializers.

## Build
We use CMake to build the targets in this module.

### Compile
```asm
#Run in c directory
cmake -S. -Bbuild
cd build
cmake --build .
#### Clean
cmake --build . --target clean
```
### Testing
```asm
ctest .
#Re-run failed tests with verbose output
ctest --rerun-failed --output-on-failure
```

### Code Analysis
Code is statically analyzed using clang-tidy.

### Coverage
Code coverage checks using gcov and lcov and fail if the coverage is below threshold.

#### Installation
You might have to install these modules using your OS package manager.

### Sanitizers
We use address,leak sanitizers to detect memory leaks and any potential issues during build. As of now, they only work on Linux.

### Platform Support

TBD

## License

**Project License** [Apache License Version 2.0](https://github.com/awslabs/aws-glue-schema-registry/blob/master/LICENSE.txt)

N.B.: Although this repository is released under the Apache-2.0 license, its build dependencies include the third party Swig project. The Swig project's licensing includes the GPL-3.0 license.
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
#ifndef GLUE_SCHEMA_REGISTRY_DESERIALIZER_H
#define GLUE_SCHEMA_REGISTRY_DESERIALIZER_H

#include "glue_schema_registry_schema.h"
#include "glue_schema_registry_error.h"
#include "mutable_byte_array.h"
#include "read_only_byte_array.h"
#include <stdbool.h>

typedef struct glue_schema_registry_deserializer {
//This is used for storing the instance context. Currently, being used for managing GraalVM instance.
void *instance_context;
} glue_schema_registry_deserializer;

glue_schema_registry_deserializer *new_glue_schema_registry_deserializer(glue_schema_registry_error **p_err);

void delete_glue_schema_registry_deserializer(glue_schema_registry_deserializer *deserializer);

mutable_byte_array *glue_schema_registry_deserializer_decode(glue_schema_registry_deserializer *deserializer,
read_only_byte_array *array,
glue_schema_registry_error **p_err);

glue_schema_registry_schema *
glue_schema_registry_deserializer_decode_schema(glue_schema_registry_deserializer *deserializer,
read_only_byte_array *array,
glue_schema_registry_error **p_err);

bool glue_schema_registry_deserializer_can_decode(glue_schema_registry_deserializer *deserializer,
read_only_byte_array *array,
glue_schema_registry_error **p_err);

#endif //GLUE_SCHEMA_REGISTRY_DESERIALIZER_H
54 changes: 54 additions & 0 deletions native-schema-registry/c/include/glue_schema_registry_error.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
#ifndef NATIVE_SCHEMA_REGISTRY_GLUE_SCHEMA_REGISTRY_ERROR_H
#define NATIVE_SCHEMA_REGISTRY_GLUE_SCHEMA_REGISTRY_ERROR_H

#include <stdio.h>

//Error codes are arbitrarily listed from 5000. No specific reason.
#define ERR_CODE_INVALID_STATE 5000
#define ERR_CODE_NULL_PARAMETERS 5001
#define ERR_CODE_GRAALVM_INIT_EXCEPTION 5002
#define ERR_CODE_GRAALVM_TEARDOWN_EXCEPTION 5003
#define ERR_CODE_INVALID_PARAMETERS 5004
#define ERR_CODE_RUNTIME_ERROR 5005

//TODO: Improve error reporting to respect logging levels.
#define log_warn(msg, code) fprintf(stderr, "WARN: %s, Code: %d\n", msg, code)

#define MAX_ERROR_MSG_LEN 10000

/** Defines the glue_schema_registry_error structure for holding error messages and codes
* resulting from function executions.
*/
typedef struct glue_schema_registry_error {
char * msg;
int code;
} glue_schema_registry_error;

glue_schema_registry_error * new_glue_schema_registry_error(const char * err_msg, int err_code);

void delete_glue_schema_registry_error(glue_schema_registry_error *error);

//Copies the given error's msg into dst array trimming the size as necessary.
void glue_schema_registry_error_get_msg(glue_schema_registry_error *error, char *dst, size_t len);

/**
* Creates an instance of glue_schema_registry_error and writes it to the given
* glue_schema_registry_error pointer holder (*p_err). It is expected that *p_err
* is initialized by caller.
* @param p_err Initialized glue_schema_registry_error pointer holder.
* @param msg Error message to write.
* @param code Non-zero error code.
*/
void throw_error(glue_schema_registry_error **p_err, const char *msg, int code);

/**
* Creates a pointer to hold an instance of glue_schema_registry_error
*/
glue_schema_registry_error **new_glue_schema_registry_error_holder(void);

/**
* Deletes the pointer holder of glue_schema_registry_error and it's content.
*/
void delete_glue_schema_registry_error_holder(glue_schema_registry_error **p_err);

#endif //NATIVE_SCHEMA_REGISTRY_GLUE_SCHEMA_REGISTRY_ERROR_H
41 changes: 41 additions & 0 deletions native-schema-registry/c/include/glue_schema_registry_schema.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
#ifndef GLUE_SCHEMA_REGISTRY_SCHEMA_H
#define GLUE_SCHEMA_REGISTRY_SCHEMA_H

#include "glue_schema_registry_error.h"

/*
* Glue Schema Registry Schema structure that represents
* schema object required by Glue Schema Registry Serializers / De-serializers.
*/
typedef struct glue_schema_registry_schema {
//String name of the schema
char * schema_name;

//Complete definition of the schema as String
char * schema_def;

//Data format name, JSON, AVRO, PROTOBUF as String
char * data_format;

} glue_schema_registry_schema;

//Creates a new instance of glue_schema_registry_schema
glue_schema_registry_schema *new_glue_schema_registry_schema(
const char * schema_name,
const char * schema_def,
const char * data_format,
glue_schema_registry_error ** p_err
);

//Deletes the glue schema registry schema.
void delete_glue_schema_registry_schema(glue_schema_registry_schema * schema);

//Gets different attributes from glue_schema_registry_schema instance.
//These getter methods are translated into "Getter" methods in target languages.
const char * glue_schema_registry_schema_get_schema_name(glue_schema_registry_schema * schema);

const char * glue_schema_registry_schema_get_schema_def(glue_schema_registry_schema * schema);

const char * glue_schema_registry_schema_get_data_format(glue_schema_registry_schema * schema);

#endif //GLUE_SCHEMA_REGISTRY_SCHEMA_H
25 changes: 25 additions & 0 deletions native-schema-registry/c/include/glue_schema_registry_serializer.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
#ifndef GLUE_SCHEMA_REGISTRY_SERIALIZER_H
#define GLUE_SCHEMA_REGISTRY_SERIALIZER_H

#include "glue_schema_registry_schema.h"
#include "glue_schema_registry_error.h"
#include "mutable_byte_array.h"
#include "read_only_byte_array.h"

typedef struct glue_schema_registry_serializer {
//This is used for storing the instance context. Currently being used for managing GraalVM instance.
void *instance_context;
} glue_schema_registry_serializer;

glue_schema_registry_serializer *new_glue_schema_registry_serializer(glue_schema_registry_error **p_err);

void delete_glue_schema_registry_serializer(glue_schema_registry_serializer *serializer);

//Encodes the GSR Schema with a byte array.
mutable_byte_array *glue_schema_registry_serializer_encode(glue_schema_registry_serializer *serializer,
read_only_byte_array * array,
const char * transport_name,
glue_schema_registry_schema *gsr_schema,
glue_schema_registry_error **p_err);

#endif //GLUE_SCHEMA_REGISTRY_SERIALIZER_H
16 changes: 16 additions & 0 deletions native-schema-registry/c/include/memory_allocator.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
#ifndef NATIVE_SCHEMA_REGISTRY_MEMORY_ALLOCATOR_H
#define NATIVE_SCHEMA_REGISTRY_MEMORY_ALLOCATOR_H

#include <stdlib.h>

/*
* Wrapper over AWS SDK Common memory allocator.
*/

void *aws_common_malloc(size_t size);

void *aws_common_calloc(size_t count, size_t size);

void aws_common_free(void *ptr);

#endif //NATIVE_SCHEMA_REGISTRY_MEMORY_ALLOCATOR_H
45 changes: 45 additions & 0 deletions native-schema-registry/c/include/mutable_byte_array.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
#ifndef MUTABLE_BYTE_ARRAY_H
#define MUTABLE_BYTE_ARRAY_H
#include <stdlib.h>
#include "glue_schema_registry_error.h"

//Integer.MAX_VALUE in Java
//This gives ~2.1Gb limit on a record.
#define MAX_BYTES_LIMIT 2147483647
/**
* A mutable byte array that allows write / updating bytes in a fixed array of size `max_len`.
*/
typedef struct mutable_byte_array {
unsigned char * data;
size_t max_len;
} mutable_byte_array;

/**
* Initializes a mutable byte array of size `len`
* The data is initially set to '0'
* Caller can optionally provide pointer holder to glue_schema_registry_error to read error messages.
*/
mutable_byte_array * new_mutable_byte_array(size_t len, glue_schema_registry_error **p_err);

/**
* Free the data and the pointer to the mutable byte array.
*/
void delete_mutable_byte_array(mutable_byte_array * array);

/**
* Get the reference to the array contents.
*/
unsigned char * mutable_byte_array_get_data(mutable_byte_array * array);

/**
* Writes a single byte at given index in the byte array.
*/
void mutable_byte_array_write(mutable_byte_array * array, size_t index, unsigned char byte,
glue_schema_registry_error **p_err);

/**
* Return the len of the byte-array
*/
size_t mutable_byte_array_get_max_len(mutable_byte_array * array);

#endif //MUTABLE_BYTE_ARRAY_H
35 changes: 35 additions & 0 deletions native-schema-registry/c/include/read_only_byte_array.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
#ifndef READ_ONLY_BYTE_ARRAY_H
#define READ_ONLY_BYTE_ARRAY_H
#include <stdlib.h>
#include "glue_schema_registry_error.h"

typedef struct read_only_byte_array {
unsigned char * data;
size_t len;
} read_only_byte_array;

/**
* Creates a read-only byte array that points to given memory location to
* provide a view over the data. Attempts to modify the data can result in
* unintended consequences or crashes.
* The caller must guarantee the memory is valid and is of exactly `len`
* Caller can optionally provide pointer holder to glue_schema_registry_error to read error messages.
*/
read_only_byte_array * new_read_only_byte_array(unsigned char *data, size_t len, glue_schema_registry_error **p_err);

/**
* Deletes the byte array instance but not the underlying data.
*/
void delete_read_only_byte_array(read_only_byte_array * array);

/**
* Gets the reference to data pointed by this array.
*/
unsigned char * read_only_byte_array_get_data(read_only_byte_array * array);

/**
* Gets the len of the data being pointed by this array.
*/
size_t read_only_byte_array_get_len(read_only_byte_array * array);

#endif //READ_ONLY_BYTE_ARRAY_H
Loading

0 comments on commit 939148f

Please sign in to comment.