From d7d94cf458fd64f29537e59b9e652ff94d9065a6 Mon Sep 17 00:00:00 2001 From: Chris Mikkelson Date: Tue, 12 Oct 2021 14:52:47 -0500 Subject: [PATCH 01/24] input_nmsg.c: use more specific result codes --- nmsg/input_nmsg.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/nmsg/input_nmsg.c b/nmsg/input_nmsg.c index 4d4f9630d..16e3724a2 100644 --- a/nmsg/input_nmsg.c +++ b/nmsg/input_nmsg.c @@ -249,11 +249,11 @@ _input_nmsg_unpack_container2(const uint8_t *buf, size_t buf_len, *nmsg = nmsg__nmsg__unpack(NULL, u_len, u_buf); free(u_buf); if (*nmsg == NULL) - return (nmsg_res_failure); + return (nmsg_res_parse_error); } else { *nmsg = nmsg__nmsg__unpack(NULL, buf_len, buf); if (*nmsg == NULL) - return (nmsg_res_failure); + return (nmsg_res_parse_error); } return (nmsg_res_success); @@ -375,7 +375,7 @@ _input_nmsg_read_container_zmq(nmsg_input_t input, Nmsg__Nmsg **nmsg) { /* read the NMSG container */ if (zmq_recvmsg(input->stream->zmq, &zmsg, 0) == -1) { - res = nmsg_res_failure; + res = nmsg_res_read_failure; goto out; } nmsg_timespec_get(&input->stream->now); @@ -555,7 +555,7 @@ do_read_file(nmsg_input_t input, ssize_t bytes_needed, ssize_t bytes_max) { while (bytes_needed > 0) { bytes_read = read(buf->fd, buf->end, bytes_max); if (bytes_read < 0) - return (nmsg_res_failure); + return (nmsg_res_read_failure); if (bytes_read == 0) return (nmsg_res_eof); buf->end += bytes_read; From 50139327d0c7481ef7682eaf8fc27cb318d79ade Mon Sep 17 00:00:00 2001 From: Chris Mikkelson Date: Tue, 12 Oct 2021 14:55:14 -0500 Subject: [PATCH 02/24] Add debugging output to more input failures. --- nmsg/input_nmsg.c | 16 ++++++++++++---- nmsg/zbuf.c | 1 + 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/nmsg/input_nmsg.c b/nmsg/input_nmsg.c index 16e3724a2..e8ae705d1 100644 --- a/nmsg/input_nmsg.c +++ b/nmsg/input_nmsg.c @@ -213,12 +213,16 @@ _input_nmsg_unpack_container(nmsg_input_t input, Nmsg__Nmsg **nmsg, return (res); *nmsg = nmsg__nmsg__unpack(NULL, u_len, u_buf); free(u_buf); - if (*nmsg == NULL) + if (*nmsg == NULL) { + _nmsg_dprintf(1, "%s: failed to unpack container\n", __func__); return (nmsg_res_parse_error); + } } else { *nmsg = nmsg__nmsg__unpack(NULL, buf_len, buf); - if (*nmsg == NULL) + if (*nmsg == NULL) { + _nmsg_dprintf(1, "%s: failed to unpack container\n", __func__); return (nmsg_res_parse_error); + } } return (res); @@ -384,7 +388,9 @@ _input_nmsg_read_container_zmq(nmsg_input_t input, Nmsg__Nmsg **nmsg) { buf = zmq_msg_data(&zmsg); buf_len = zmq_msg_size(&zmsg); if (buf_len < NMSG_HDRLSZ_V2) { - res = nmsg_res_failure; + _nmsg_dprintf(1, "%s: received truncated message (%zu bytes)", + __func__, buf_len); + res = nmsg_res_parse_error; goto out; } @@ -423,8 +429,10 @@ _input_nmsg_deserialize_header(const uint8_t *buf, size_t buf_len, static const char magic[] = NMSG_MAGIC; uint16_t version; - if (buf_len < NMSG_LENHDRSZ_V2) + if (buf_len < NMSG_LENHDRSZ_V2) { + _nmsg_dprintf(1, "%s: failed to deserialize header\n", __func__); return (nmsg_res_failure); + } /* check magic */ if (memcmp(buf, magic, sizeof(magic)) != 0) diff --git a/nmsg/zbuf.c b/nmsg/zbuf.c index 330f42405..65f131b77 100644 --- a/nmsg/zbuf.c +++ b/nmsg/zbuf.c @@ -139,6 +139,7 @@ nmsg_zbuf_inflate(nmsg_zbuf_t zb, size_t z_len, u_char *z_buf, zret = inflate(&zb->zs, Z_NO_FLUSH); if (zret != Z_STREAM_END || zb->zs.avail_out != 0) { + _nmsg_dprintf(1, "%s: inflate() failed\n", __func__); free(*u_buf); return (nmsg_res_failure); } From e53285f7e1a7310e087d71f9a0113cd772630210 Mon Sep 17 00:00:00 2001 From: Stuart Kemp <126508436+skempdt@users.noreply.github.com> Date: Tue, 20 Feb 2024 13:56:12 -0600 Subject: [PATCH 03/24] Use stdatomic functions for thread-safe operations. (#130) * Use stdatomic functions instead of an explicit mutex to protect operations. --- nmsg/base/dnsqr.c | 17 +++++------------ nmsg/io.c | 21 +++++++++++---------- nmsg/output_nmsg.c | 8 ++------ nmsg/private.h | 3 ++- 4 files changed, 20 insertions(+), 29 deletions(-) diff --git a/nmsg/base/dnsqr.c b/nmsg/base/dnsqr.c index 4c09b221d..74e0b328b 100644 --- a/nmsg/base/dnsqr.c +++ b/nmsg/base/dnsqr.c @@ -27,6 +27,7 @@ #include #include #include +#include #include #include @@ -78,7 +79,7 @@ typedef struct { size_t len_table; - bool stop; + atomic_bool stop; int capture_qr; int capture_rd; bool zero_resolver_address; @@ -1838,7 +1839,7 @@ dnsqr_trim(dnsqr_ctx_t *ctx) { assert(he->dnsqr->n_query_time_sec > 0); assert(he->dnsqr->n_query_time_nsec > 0); if (ctx->count > ctx->max_values || - ctx->stop == true || + atomic_load_explicit(&ctx->stop, memory_order_relaxed) || ctx->now.tv_sec - he->dnsqr->query_time_sec[0] > ctx->query_timeout) { dnsqr = he->dnsqr; @@ -2660,13 +2661,7 @@ dnsqr_pkt_to_payload(void *clos, nmsg_pcap_t pcap, nmsg_message_t *m) { return (nmsg_res_success); } } else { - bool stop; - - pthread_mutex_lock(&ctx->lock); - stop = ctx->stop; - pthread_mutex_unlock(&ctx->lock); - - if (stop == true) + if (atomic_load_explicit(&ctx->stop, memory_order_relaxed)) return (nmsg_res_eof); } @@ -2674,9 +2669,7 @@ dnsqr_pkt_to_payload(void *clos, nmsg_pcap_t pcap, nmsg_message_t *m) { if (res == nmsg_res_success) { return (do_packet(ctx, pcap, m, pkt_data, pkt_hdr, &ts)); } else if (res == nmsg_res_eof) { - pthread_mutex_lock(&ctx->lock); - ctx->stop = true; - pthread_mutex_unlock(&ctx->lock); + atomic_store_explicit(&ctx->stop, true, memory_order_relaxed); return (nmsg_res_again); } diff --git a/nmsg/io.c b/nmsg/io.c index 5ed7e9526..3118c44c8 100644 --- a/nmsg/io.c +++ b/nmsg/io.c @@ -74,7 +74,7 @@ struct nmsg_io { nmsg_io_close_fp close_fp; nmsg_io_output_mode output_mode; pthread_mutex_t lock; - uint64_t count_nmsg_payload_out; + atomic_uint_fast64_t io_count_nmsg_payload_out; unsigned count, interval, interval_offset; bool interval_randomized; volatile bool stop; @@ -167,7 +167,7 @@ nmsg_io_get_stats(nmsg_io_t io, uint64_t *sum_in, uint64_t *sum_out, *container_recvs += recvs; } - *sum_out = io->count_nmsg_payload_out; + *sum_out = atomic_load_explicit(&io->io_count_nmsg_payload_out, memory_order_relaxed); return nmsg_res_success; } @@ -315,11 +315,14 @@ nmsg_io_destroy(nmsg_io_t *io) { nmsg_io_filter_vec_destroy(&(*io)->filters); /* print statistics */ - if ((*io)->debug >= 2 && (*io)->count_nmsg_payload_out > 0) - _nmsg_dprintfv((*io)->debug, 2, "nmsg_io: io=%p" - " count_nmsg_payload_out=%" PRIu64 "\n", - (void *)(*io), - (*io)->count_nmsg_payload_out); + if ((*io)->debug >= 2) { + uint64_t pl_out = atomic_load_explicit(&(*io)->io_count_nmsg_payload_out, memory_order_relaxed); + + if (pl_out > 0) + _nmsg_dprintfv((*io)->debug, 2, "nmsg_io: io=%p" + " count_nmsg_payload_out=%" PRIu64 "\n", + (void *)(*io), pl_out); + } free(*io); *io = NULL; } @@ -725,9 +728,7 @@ io_write(struct nmsg_io_thr *iothr, struct nmsg_io_output *io_output, if (res != nmsg_res_success) return (res); - pthread_mutex_lock(&io->lock); - io->count_nmsg_payload_out += 1; - pthread_mutex_unlock(&io->lock); + atomic_fetch_add_explicit(&io->io_count_nmsg_payload_out, 1, memory_order_relaxed); return (res); } diff --git a/nmsg/output_nmsg.c b/nmsg/output_nmsg.c index 9086bc6c2..0211231f1 100644 --- a/nmsg/output_nmsg.c +++ b/nmsg/output_nmsg.c @@ -151,9 +151,7 @@ container_write(nmsg_output_t output, nmsg_container_t *co) uint8_t *buf; /* Multiple threads can enter here at once. */ - pthread_mutex_lock(&output->stream->w_lock); - seq = output->stream->sequence++; /* TODO: Replace with "atomic fetch and add". */ - pthread_mutex_unlock(&output->stream->w_lock); + seq = atomic_fetch_add_explicit(&output->stream->so_sequence_num, 1, memory_order_relaxed); res = nmsg_container_serialize(*co, &buf, &buf_len, true, /* do_header */ output->stream->do_zlib, seq, output->stream->sequence_id); @@ -330,9 +328,7 @@ frag_write(nmsg_output_t output, nmsg_container_t co) max_fragsz = ostr->bufsz - 32; /* Multiple threads can enter here at once. */ - pthread_mutex_lock(&ostr->w_lock); - seq = ostr->sequence++; /* TODO: Replace with "atomic fetch and add". */ - pthread_mutex_unlock(&ostr->w_lock); + seq = atomic_fetch_add_explicit(&ostr->so_sequence_num, 1, memory_order_relaxed); res = nmsg_container_serialize(co, &packed, &len, false, /* do_header */ ostr->do_zlib, seq, ostr->sequence_id); diff --git a/nmsg/private.h b/nmsg/private.h index 08d4ebe4f..3a86babd4 100644 --- a/nmsg/private.h +++ b/nmsg/private.h @@ -42,6 +42,7 @@ #include #include #include +#include #include #include #include @@ -301,7 +302,7 @@ struct nmsg_stream_output { unsigned group; bool do_zlib; bool do_sequence; - uint32_t sequence; + atomic_uint_fast32_t so_sequence_num; uint64_t sequence_id; }; From 5e49e9623929d29d9d0fda4ff378de7cf16f301f Mon Sep 17 00:00:00 2001 From: Stephen Watt Date: Tue, 23 Apr 2024 15:12:31 -0400 Subject: [PATCH 04/24] Clarify interaction of libnmsg and custom atexit() handlers. --- nmsg/nmsg.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/nmsg/nmsg.h b/nmsg/nmsg.h index 0d617bdc6..d55447884 100644 --- a/nmsg/nmsg.h +++ b/nmsg/nmsg.h @@ -123,6 +123,12 @@ typedef nmsg_res (*nmsg_cb_message_read)(nmsg_message_t *msg, void *user); * function returned #nmsg_res_success before using any other libnmsg function. * The library should only be initialized once. * + * While this function performs explicit initialization of libnmsg, it provides + * a mechanism of automatic finalization by installing an atexit() handler. + * Thus nmsg_init() MUST be called not only before using any other libnmsg + * function, but also before the registration of any other atexit() handler + * which performs any nmsg functionality (e.g. nmsg object/context cleanup). + * * \return nmsg_res_success On successful library initialization. * \return nmsg_res_failure If libnmsg is not usable, or if #nmsg_init() was * called more than once. From 9f435b458d455199a381fa8428a5026a0121cad5 Mon Sep 17 00:00:00 2001 From: Stephen Watt Date: Tue, 23 Apr 2024 15:17:51 -0400 Subject: [PATCH 05/24] Eliminate unnecessary usage scroll when indicating nmsgtool invocation errors. --- .gitignore | 2 ++ src/nmsgtool.c | 6 ++++-- src/process_args.c | 2 +- 3 files changed, 7 insertions(+), 3 deletions(-) diff --git a/.gitignore b/.gitignore index 577647289..5b28d4d83 100644 --- a/.gitignore +++ b/.gitignore @@ -59,3 +59,5 @@ test-suite.log tests/test-io tests/test-misc tests/test-parse +tests/*/*.out +tests/*/test.sh diff --git a/src/nmsgtool.c b/src/nmsgtool.c index 31f58f34f..dae665f65 100644 --- a/src/nmsgtool.c +++ b/src/nmsgtool.c @@ -381,10 +381,12 @@ int main(int argc, char **argv) { void usage(const char *msg) { - if (msg) + if (msg != NULL) fprintf(stderr, "%s: usage error: %s\n", argv_program, msg); + else + argv_usage(args, ARGV_USAGE_DEFAULT); + nmsg_io_destroy(&ctx.io); - argv_usage(args, ARGV_USAGE_DEFAULT); exit(msg == NULL ? EXIT_SUCCESS : EXIT_FAILURE); } diff --git a/src/process_args.c b/src/process_args.c index 881b8097d..a886d3dd7 100644 --- a/src/process_args.c +++ b/src/process_args.c @@ -373,7 +373,7 @@ process_args(nmsgtool_ctx *c) { /* validation */ if (c->n_inputs == 0) - usage("no data sources specified"); + usage("no data sources specified (-h for more help)"); if (c->n_outputs == 0) { /* implicit "-o -" */ add_pres_output(c, "-"); From eb170e4be7688c2b47855dfd12cbf67544eee573 Mon Sep 17 00:00:00 2001 From: Stephen Watt Date: Fri, 10 May 2024 16:55:48 -0400 Subject: [PATCH 06/24] Ensure that any libjson-c package info overrides system paths. --- Makefile.am | 9 +++------ tests/nmsg-dnsqr-tests/test.sh.in | 2 +- 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/Makefile.am b/Makefile.am index 57ee59031..5cf4efde2 100644 --- a/Makefile.am +++ b/Makefile.am @@ -329,6 +329,7 @@ fltmod_nmsg_flt1_sample_la_SOURCES = \ bin_PROGRAMS += src/nmsgtool src_nmsgtool_LDADD = \ nmsg/libnmsg.la \ + $(json_c_LIBS) \ $(libpcap_LIBS) \ $(libzmq_LIBS) src_nmsgtool_SOURCES = \ @@ -473,14 +474,10 @@ examples_nmsg_callback_SOURCES = examples/nmsg_callback.c examples_print_srcip_LDADD = nmsg/libnmsg.la examples_print_srcip_SOURCES = examples/print_srcip.c -examples_nmsg_dnsqr2pcap_LDADD = \ - nmsg/libnmsg.la \ - $(libpcap_LIBS) +examples_nmsg_dnsqr2pcap_LDADD = nmsg/libnmsg.la $(libpcap_LIBS) examples_nmsg_dnsqr2pcap_SOURCES = examples/nmsg-dnsqr2pcap.c -examples_nmsg_packet2pcap_LDADD = \ - nmsg/libnmsg.la \ - $(libpcap_LIBS) +examples_nmsg_packet2pcap_LDADD = nmsg/libnmsg.la $(libpcap_LIBS) examples_nmsg_packet2pcap_SOURCES = examples/nmsg-packet2pcap.c examples_print_version_LDADD = nmsg/libnmsg.la diff --git a/tests/nmsg-dnsqr-tests/test.sh.in b/tests/nmsg-dnsqr-tests/test.sh.in index 78c7f1d8a..27444e35b 100755 --- a/tests/nmsg-dnsqr-tests/test.sh.in +++ b/tests/nmsg-dnsqr-tests/test.sh.in @@ -79,7 +79,7 @@ check pcap-to-nmsg ######## # try example code too -env LD_LIBRARY_PATH=@abs_top_builddir@/nmsg/.libs/ @abs_top_builddir@/examples/.libs/nmsg-dnsqr2pcap ${SOURCE}.nmsg ${OUTPUT}.nmsg.pcap.out +env LD_LIBRARY_PATH=@abs_top_builddir@/nmsg/.libs/:$LD_LIBRARY_PATH @abs_top_builddir@/examples/.libs/nmsg-dnsqr2pcap ${SOURCE}.nmsg ${OUTPUT}.nmsg.pcap.out check read nmsg base:dnsqr and generate pcap output using example cmp -s ${SOURCE}.pcap ${OUTPUT}.nmsg.pcap.out check example-nmsg-to-pcap From b26a91a5ddaceff6e513c8da2358bedf082b8f8b Mon Sep 17 00:00:00 2001 From: Demian Vladi <126811849+dvladi77@users.noreply.github.com> Date: Thu, 23 May 2024 11:44:45 -0700 Subject: [PATCH 07/24] Implemented Kafka input/output (#131) --- .gitignore | 1 + Makefile.am | 62 ++- README.md | 8 +- configure.ac | 10 + debian/control | 1 + doc/docbook/nmsgtool.1 | 32 ++ doc/docbook/nmsgtool.docbook | 43 ++ nmsg/input.c | 68 +++ nmsg/input.h | 40 ++ nmsg/input_json.c | 28 ++ nmsg/input_nmsg.c | 92 ++-- nmsg/kafkaio.c | 811 ++++++++++++++++++++++++++++++ nmsg/kafkaio.h | 106 ++++ nmsg/msgmod/message.c | 16 + nmsg/msgmod/transparent.h | 9 +- nmsg/msgmod/transparent_payload.c | 177 +++++-- nmsg/output.c | 91 +++- nmsg/output.h | 34 ++ nmsg/output_json.c | 49 ++ nmsg/output_nmsg.c | 6 + nmsg/private.h | 46 ++ src/io.c | 183 +++++++ src/nmsgtool.c | 38 ++ src/nmsgtool.h | 12 +- src/process_args.c | 40 +- tests/test-private.c | 169 +++++++ 26 files changed, 2082 insertions(+), 90 deletions(-) create mode 100644 nmsg/kafkaio.c create mode 100644 nmsg/kafkaio.h create mode 100644 tests/test-private.c diff --git a/.gitignore b/.gitignore index 5b28d4d83..8ca85375b 100644 --- a/.gitignore +++ b/.gitignore @@ -59,5 +59,6 @@ test-suite.log tests/test-io tests/test-misc tests/test-parse +tests/test-private tests/*/*.out tests/*/test.sh diff --git a/Makefile.am b/Makefile.am index 5cf4efde2..ef3b9e1de 100644 --- a/Makefile.am +++ b/Makefile.am @@ -18,6 +18,7 @@ AM_CFLAGS = \ $(libprotobuf_c_CFLAGS) \ $(libwdns_CFLAGS) \ $(libzmq_CFLAGS) \ + $(librdkafka_CFLAGS) \ $(json_c_CFLAGS) AM_LDFLAGS = @@ -51,6 +52,7 @@ EXTRA_DIST += \ tests/test-layout-fltmod_plugin.c \ tests/test-nmsg_output_set_rate.c \ tests/test-parse.c \ + tests/test-private.c \ tests/test-io.c \ tests/test-misc.c \ tests/udp-checksum-tests/test.sh.in \ @@ -127,20 +129,19 @@ nmsg_libnmsg_la_LDFLAGS = \ $(AM_LDFLAGS) \ -version-info $(VERSION_INFO) \ -export-symbols-regex "^(nmsg_[a-z].*)" -nmsg_libnmsg_la_LIBADD = \ +LIBNMSG_LIB_DEPS = \ $(libpcap_LIBS) \ $(libprotobuf_c_LIBS) \ $(libzmq_LIBS) \ + $(librdkafka_LIBS) \ $(json_c_LIBS) -nmsg_libnmsg_la_SOURCES = \ - libmy/crc32c.c libmy/crc32c.h libmy/crc32c-slicing.c libmy/crc32c-sse42.c \ - libmy/list.h \ - libmy/my_time.h \ - libmy/my_rate.c libmy/my_rate.h \ - libmy/tree.h \ - libmy/b64_decode.c libmy/b64_decode.h \ - libmy/b64_encode.c libmy/b64_encode.h \ - libmy/fast_inet_ntop.c libmy/fast_inet_ntop.h \ +nmsg_libnmsg_la_LIBADD = $(LIBNMSG_LIB_DEPS) +LIBNMSG_LIB_MODULES = \ + libmy/crc32c.c libmy/crc32c-slicing.c libmy/crc32c-sse42.c \ + libmy/my_rate.c \ + libmy/b64_decode.c \ + libmy/b64_encode.c \ + libmy/fast_inet_ntop.c \ nmsg/alias.c \ nmsg/asprintf.c \ nmsg/brate.c \ @@ -161,37 +162,50 @@ nmsg_libnmsg_la_SOURCES = \ nmsg/io.c \ nmsg/ipdg.c \ nmsg/ipreasm.c \ - nmsg/ipreasm.h \ nmsg/msgmodset.c \ nmsg/nmsg.c \ - nmsg/nmsg_json.h \ - nmsg/nmsg_port_net.h \ nmsg/output.c \ nmsg/output_json.c \ nmsg/output_nmsg.c \ nmsg/output_pres.c \ nmsg/payload.c \ nmsg/pcap_input.c \ - nmsg/private.h \ nmsg/random.c \ nmsg/rate.c \ nmsg/res.c \ nmsg/sock.c \ nmsg/strbuf.c \ nmsg/timespec.c \ - nmsg/version.c nmsg/version.h \ + nmsg/version.c \ nmsg/zmqio.c \ + nmsg/kafkaio.c \ nmsg/zbuf.c \ nmsg/msgmod/lookup.c \ nmsg/msgmod/message.c \ nmsg/msgmod/msgmod.c \ nmsg/msgmod/transparent.c \ - nmsg/msgmod/transparent.h \ nmsg/msgmod/transparent_json.c \ nmsg/msgmod/transparent_message.c \ nmsg/msgmod/transparent_module.c \ nmsg/msgmod/transparent_payload.c \ nmsg/msgmod/transparent_pres.c +nmsg_libnmsg_la_SOURCES = \ + libmy/crc32c.h \ + libmy/list.h \ + libmy/my_time.h \ + libmy/my_rate.h \ + libmy/tree.h \ + libmy/b64_decode.h \ + libmy/b64_encode.h \ + libmy/fast_inet_ntop.h \ + nmsg/ipreasm.h \ + nmsg/nmsg_json.h \ + nmsg/nmsg_port_net.h \ + nmsg/private.h \ + nmsg/version.h \ + nmsg/kafkaio.h \ + nmsg/msgmod/transparent.h \ + $(LIBNMSG_LIB_MODULES) nodist_nmsg_libnmsg_la_SOURCES = \ nmsg/nmsg.pb-c.c \ nmsg/nmsg.pb-c.h @@ -329,9 +343,9 @@ fltmod_nmsg_flt1_sample_la_SOURCES = \ bin_PROGRAMS += src/nmsgtool src_nmsgtool_LDADD = \ nmsg/libnmsg.la \ - $(json_c_LIBS) \ $(libpcap_LIBS) \ - $(libzmq_LIBS) + $(libzmq_LIBS) \ + $(librdkafka_LIBS) src_nmsgtool_SOURCES = \ libmy/argv.c \ libmy/argv.h \ @@ -429,10 +443,20 @@ check_PROGRAMS += tests/test-parse tests_test_parse_LDADD = nmsg/libnmsg.la tests_test_parse_SOURCES = tests/test-parse.c +TESTS += tests/test-private +check_PROGRAMS += tests/test-private +tests_test_private_LDFLAGS = -rdynamic +PRIVATE_TEST_MODULES = $(LIBNMSG_LIB_MODULES:.c=.o) +tests_test_private_LDADD = \ + $(PRIVATE_TEST_MODULES) \ + nmsg/nmsg.pb-c.o \ + $(LIBNMSG_LIB_DEPS) +tests_test_private_SOURCES = tests/test-private.c + TESTS += tests/test-io check_PROGRAMS += tests/test-io tests_test_io_CPPFLAGS = -DSRCDIR="\"$(abs_srcdir)\"" $(AM_CPPFLAGS) -tests_test_io_LDADD = nmsg/libnmsg.la $(libzmq_LIBS) +tests_test_io_LDADD = nmsg/libnmsg.la $(libzmq_LIBS) $(librdkafka_LIBS) tests_test_io_SOURCES = tests/test-io.c TESTS += tests/test-misc diff --git a/README.md b/README.md index ebbc60478..5f415e7af 100644 --- a/README.md +++ b/README.md @@ -19,13 +19,15 @@ nmsg has the following external dependencies: * [zmq](http://zeromq.org/) +* [rdkafka](https://github.com/confluentinc/librdkafka) + * [json-c](https://github.com/json-c/json-c) * [zlib](http://www.zlib.net/) On Debian systems, the following packages should be installed, if available: - pkg-config libpcap0.8-dev libprotobuf-c-dev protobuf-c-compiler libzmq3-dev libjson-c-dev zlib1g-dev + pkg-config libpcap0.8-dev libprotobuf-c-dev protobuf-c-compiler libzmq3-dev librdkafka-dev libjson-c-dev zlib1g-dev Note that on Debian systems, binary packages of nmsg and its dependencies are available from @@ -36,6 +38,7 @@ Debian-based systems. On FreeBSD systems, the following ports should be installed, if available: devel/libzmq + devel/librdkafka devel/json-c devel/pkgconf devel/protobuf @@ -54,6 +57,9 @@ script. Support for `libzmq` can be disabled by passing the `--without-libzmq` parameter to the `configure` script. +Support for `librdkafka` can be disabled by passing the `--without-librdkafka` parameter +to the `configure` script. + Support for `json-c` can be disabled by passing the `--without-json-c` parameter to the `configure` script. diff --git a/configure.ac b/configure.ac index 770e54349..46d703c92 100644 --- a/configure.ac +++ b/configure.ac @@ -149,6 +149,15 @@ AS_IF([test -z "$PROTOC_C"], PKG_CHECK_MODULES([libwdns], [libwdns >= 0.12.0]) +AC_ARG_WITH([librdkafka], AS_HELP_STRING([--without-librdkafka], [Disable kafka support])) +if test "x$with_librdkafka" != "xno"; then + PKG_CHECK_MODULES([librdkafka], [rdkafka >= 1.1.0]) + AC_DEFINE([HAVE_LIBRDKAFKA], [1], [Define to 1 if librdkafka support is enabled.]) + use_librdkafka="true" +else + use_librdkafka="false" +fi + AC_ARG_WITH([libzmq], AS_HELP_STRING([--without-libzmq], [Disable zmq support])) if test "x$with_libzmq" != "xno"; then PKG_CHECK_MODULES([libzmq], [libzmq >= 4.2.0]) @@ -226,6 +235,7 @@ AC_MSG_RESULT([ bigendian: ${ac_cv_c_bigendian} libzmq support: ${use_libzmq} + librdkafka support: ${use_librdkafka} json-c support: ${use_json_c} building html docs: ${DOC_HTML_MSG} diff --git a/debian/control b/debian/control index 00f2123ab..76af6a780 100644 --- a/debian/control +++ b/debian/control @@ -13,6 +13,7 @@ Build-Depends: libprotobuf-c-dev (>= 1.0.1~), libwdns-dev (>= 0.12.0~), libzmq3-dev (>= 4.2.0~), + librdkafka-dev (>= 1.1.0~), libjson-c-dev (>= 0.13.0~), pkg-config, protobuf-c-compiler (>= 1.0.1~), diff --git a/doc/docbook/nmsgtool.1 b/doc/docbook/nmsgtool.1 index f2affa3d8..d818c983e 100644 --- a/doc/docbook/nmsgtool.1 +++ b/doc/docbook/nmsgtool.1 @@ -202,6 +202,25 @@ parameter\&. For example, means "on the hour"\&. .RE .PP +\fB\--kafkakey\fR \fIfieldname\fR +.RS 4 +Use the named NMSG message field's value in canonical representation as key for payloads sent to Kafka. +.RE +.PP +\fB\-\-readtopic\fR \fIkafka\fR +.RS 4 +Read NMSG payloads from a Kafka endpoint\&. +The address has the format of \fBproto:topic[#partition|%group_id]@broker[:port][,offset]\fR +where a choice of specifying either a partition number or consumer group ID is optional, as is the offset value. +However, a protocol value of either \fBnmsg\fR or \fBjson\fR must be supplied. +Offset is either a numerical value or the string 'oldest' or 'newest' in order +to start retrieval at the oldest/newest messages in the Kafka topic. +.sp +An example of possible arguments to +\fB\-\-readtopic\fR +is "nmsg:ch202#0@kafka.local.com:9092,3000" to indicate that nmsgtool shall read \fBnmsg\fR containers from topic "ch202" on partition 0 at offset 3000 from Kafka broker kafka.local.com, port 9092. +.RE +.PP \fB\-R\fR, \fB\-\-randomize\fR .RS 4 Randomize the initial offset within the interval that the process is stopped or outputs are reopened\&. @@ -430,6 +449,19 @@ are required\&. Write NMSG payloads to a file\&. .RE .PP +\fB\-\-writetopic\fR \fIkafka\fR +.RS 4 +Write NMSG payloads to a Kafka endpoint. +The address has the format of \fBproto:topic[#partition|%group_id]@broker[:port]\fR +where the choice of specifying either a partition number or consumer group ID is optional, +but a protocol value of either \fBnmsg\fR or \fBjson\fR must be supplied. +.sp +An examples of possible arguments to +\fB\--writetopic\fR +is "nmsg:ch202#0@kafka.local.com:9092" to indicate that nmsgtool shall write \fBnmsg\fR containers to topic "ch202" on partition 0 to Kafka broker kafka.local.com, port 9092. +Note that nmsgtool ignores offsets for Kafka producers. +.RE +.PP \fB\-o\fR \fIfile\fR, \fB\-\-writepres\fR \fIfile\fR .RS 4 Write presentation format payloads to a file\&. diff --git a/doc/docbook/nmsgtool.docbook b/doc/docbook/nmsgtool.docbook index e4a9792a8..52648352d 100644 --- a/doc/docbook/nmsgtool.docbook +++ b/doc/docbook/nmsgtool.docbook @@ -223,6 +223,33 @@ + + fieldname + + + Use the named NMSG message field's value in canonical representation as key for + payloads sent to Kafka. + + + + + + kafka + + + Read NMSG payloads from a Kafka endpoint. The address kafka + has format proto:topic[#partition|%group_id]@broker[:port][,offset]. Either a partition + number or a consumer group ID may be optionally supplied. Also optional is an offset + consisting of either a numerical value or the string 'oldest' or 'newest' in order to + start retrieval at the oldest/newest messages in the Kafka topic. + An example of a possible kafka endpoint is + "nmsg:ch202#0@kafka.local.com:9092,3000" to indicate that nmsgtool shall read nmsg + containers from topic "ch202" on partition 0 at offset 3000 from the Kafka broker at + kafka.local.com, port 9092. + + + + filter filter @@ -432,6 +459,22 @@ + + kafka + + + Write NMSG payloads to a Kafka endpoint. The address kafka + has format proto:topic[#partition|%group_id]@broker[:port]. + Either a partition number or a consumer group ID may be optionally supplied. + An example of a possible kafka endpoint is + "nmsg:ch202#0@kafka.local.com:9092" to indicate that nmsgtool shall write + nmsg containers to topic "ch202" on partition 0 to Kafka + broker kafka.local.com, port 9092. + Note that nmsgtool ignores offsets for Kafka producers. + + + + file file diff --git a/nmsg/input.c b/nmsg/input.c index eea01f355..bf7010c6e 100644 --- a/nmsg/input.c +++ b/nmsg/input.c @@ -38,6 +38,56 @@ nmsg_input_open_sock(int fd) { return (input_open_stream(nmsg_stream_type_sock, fd)); } +#if (defined HAVE_LIBRDKAFKA) && (defined HAVE_JSON_C) +nmsg_input_t +nmsg_input_open_kafka_json(const char *address) +{ + struct nmsg_input *input; + + input = calloc(1, sizeof(*input)); + if (input == NULL) + return (NULL); + + input->kafka = calloc(1, sizeof(*(input->kafka))); + if (input->kafka == NULL) { + free(input); + return (NULL); + } + + input->type = nmsg_input_type_kafka_json; + input->read_fp = _input_kafka_json_read; + + input->kafka->ctx = kafka_create_consumer(address, NMSG_RBUF_TIMEOUT); + if (input->kafka->ctx == NULL) { + free(input->kafka); + free(input); + return (NULL); + } + + return (input); +} +#else /* (defined HAVE_LIBRDKAFKA) && (defined HAVE_JSON_C) */ +nmsg_input_t +nmsg_input_open_kafka_json(const char *address __attribute__((unused))) { + return (NULL); +} +#endif /* (defined HAVE_LIBRDKAFKA) && (defined HAVE_JSON_C) */ + +#ifdef HAVE_LIBRDKAFKA +nmsg_input_t +_input_open_kafka(void *s) { + struct nmsg_input *input; + + input = input_open_stream_base(nmsg_stream_type_kafka); + if (input == NULL) + return (input); + + input->stream->kafka = s; + + return (input); +} +#endif /* HAVE_LIBRDKAFKA */ + #ifdef HAVE_LIBZMQ nmsg_input_t nmsg_input_open_zmq(void *s) { @@ -217,6 +267,12 @@ nmsg_input_close(nmsg_input_t *input) { switch ((*input)->type) { case nmsg_input_type_stream: _nmsg_brate_destroy(&((*input)->stream->brate)); +#ifdef HAVE_LIBRDKAFKA + if ((*input)->stream->type == nmsg_stream_type_kafka) + kafka_ctx_destroy(&(*input)->stream->kafka); +#else /* HAVE_LIBRDKAFKA */ + assert((*input)->stream->type != nmsg_stream_type_kafka); +#endif /* HAVE_LIBRDKAFKA */ #ifdef HAVE_LIBZMQ if ((*input)->stream->type == nmsg_stream_type_zmq) zmq_close((*input)->stream->zmq); @@ -238,6 +294,12 @@ nmsg_input_close(nmsg_input_t *input) { fclose((*input)->json->fp); free((*input)->json); break; + case nmsg_input_type_kafka_json: +#ifdef HAVE_LIBRDKAFKA + kafka_ctx_destroy(&(*input)->kafka->ctx); + free((*input)->kafka); +#endif /* HAVE_LIBRDKAFKA */ + break; case nmsg_input_type_callback: free((*input)->callback); break; @@ -468,6 +530,12 @@ input_open_stream_base(nmsg_stream_type type) { #else /* HAVE_LIBZMQ */ assert(type != nmsg_stream_type_zmq); #endif /* HAVE_LIBZMQ */ + } else if (type == nmsg_stream_type_kafka) { +#ifdef HAVE_LIBRDKAFKA + input->stream->stream_read_fp = _input_nmsg_read_container_kafka; +#else /* HAVE_LIBRDKAFKA */ + assert(type != nmsg_stream_type_kafka); +#endif /* HAVE_LIBRDKAFKA */ } /* nmsg_zbuf */ diff --git a/nmsg/input.h b/nmsg/input.h index df14c3660..e15dcb720 100644 --- a/nmsg/input.h +++ b/nmsg/input.h @@ -57,6 +57,7 @@ typedef enum { nmsg_input_type_pres, /*%< presentation form */ nmsg_input_type_callback, nmsg_input_type_json, /*%< JSON form */ + nmsg_input_type_kafka_json, /*%< NMSG payloads from Kafka in JSON form */ } nmsg_input_type; /** @@ -119,6 +120,31 @@ nmsg_input_open_zmq(void *s); nmsg_input_t nmsg_input_open_zmq_endpoint(void *zmq_ctx, const char *ep); +/** + * Create a Kafka consumer and initialize a new NMSG stream input from it. + * + * This function takes an endpoint argument of format + * "proto:topic[#partition|%group_id]@broker[:port][,offset]" + * Either a partition number or a consumer group ID may be optionally supplied. + * + * offset can be either the special value "oldest" or "newest" to start from + * the oldest/newest messages in the topic, respectively. + * Only if a partition number has been specified can offset be a numeric value. + * Note that only new consumer group IDs will honor these directives. + * + * The value of proto must be either "nmsg" (binary container input) or "json" + * (JSON-serialized payloads) and either or both a partition number and offset + * value may be optionally supplied. + * + * \see nmsg_output_open_kafka_endpoint() + * + * \param[in] addr Kafka endpoint address string + * + * \return Opaque pointer that is NULL on failure or non-NULL on success. + */ +nmsg_input_t +nmsg_input_open_kafka_endpoint(const char *ep); + /** * Initialize a new nmsg input closure. This allows a user-provided callback to * function as an nmsg input, for instance to participate in an nmsg_io loop. @@ -174,6 +200,20 @@ nmsg_input_open_pres(int fd, nmsg_msgmod_t msgmod); nmsg_input_t nmsg_input_open_json(int fd); +/** + * Initialize a new NMSG JSON form input from a Kafka broker. + * + * See nmsg_output_open_json for details of the JSON format, or + * nmsg_input_open_kafka_endpoint for the details of the address string. + * + * \param[in] Kafka endpoint address string. + * + * \return Opaque pointer that is NULL on failure or non-NULL on success. + */ +nmsg_input_t +nmsg_input_open_kafka_json(const char *address); + + /** * Initialize a new NMSG pcap input from a pcap descriptor. * diff --git a/nmsg/input_json.c b/nmsg/input_json.c index be0474fc0..2844e971d 100644 --- a/nmsg/input_json.c +++ b/nmsg/input_json.c @@ -21,6 +21,34 @@ /* Internal functions. */ +#if (defined HAVE_JSON_C) && (defined HAVE_LIBRDKAFKA) +nmsg_res +_input_kafka_json_read(nmsg_input_t input, nmsg_message_t *msg) { + nmsg_res res; + char *buf; + size_t buf_len; + + res = kafka_read_start(input->kafka->ctx, (uint8_t **) &buf, &buf_len); + if (res != nmsg_res_success) { + kafka_read_finish(input->kafka->ctx); + return res; + } + + if (buf_len == 0) + return nmsg_res_failure; + + res = nmsg_message_from_json((const char *) buf, msg); + + if (res == nmsg_res_parse_error) { + _nmsg_dprintf(2, "Kafka JSON parse error: \"%s\"\n", buf); + res = nmsg_res_again; + } + + kafka_read_finish(input->kafka->ctx); + return res; +} +#endif /* (defined HAVE_JSON_C) && (defined HAVE_LIBRDKAFKA) */ + #ifdef HAVE_JSON_C nmsg_res _input_json_read(nmsg_input_t input, nmsg_message_t *msg) { diff --git a/nmsg/input_nmsg.c b/nmsg/input_nmsg.c index 81eefcb61..070c8af83 100644 --- a/nmsg/input_nmsg.c +++ b/nmsg/input_nmsg.c @@ -351,14 +351,71 @@ _input_nmsg_read_container_sock(nmsg_input_t input, Nmsg__Nmsg **nmsg) { return (res); } +#if defined(HAVE_LIBRDKAFKA) || defined(HAVE_LIBZMQ) +static nmsg_res +_input_process_buffer_into_container(nmsg_input_t input, Nmsg__Nmsg **nmsg, uint8_t *buf, size_t buf_len) +{ + nmsg_res res; + ssize_t msgsize; + + if (buf_len < NMSG_HDRLSZ_V2) + return nmsg_res_failure; + + /* deserialize the NMSG header */ + res = _input_nmsg_deserialize_header(buf, buf_len, &msgsize, &input->stream->flags); + if (res != nmsg_res_success) + return res; + + buf += NMSG_HDRLSZ_V2; + + /* the entire message must have been read by caller */ + if ((size_t) msgsize != (buf_len - NMSG_HDRLSZ_V2)) + return nmsg_res_parse_error; + + /* unpack message */ + res = _input_nmsg_unpack_container(input, nmsg, buf, msgsize); + + /* update seqsrc counts */ + if (input->stream->verify_seqsrc && *nmsg != NULL) { + struct nmsg_seqsrc *seqsrc = _input_seqsrc_get(input, *nmsg); + if (seqsrc != NULL) + _input_seqsrc_update(input, seqsrc, *nmsg); + } + + /* expire old outstanding fragments */ + _input_frag_gc(input->stream); + + return nmsg_res_success; +} +#endif /* defined(HAVE_LIBRDKAFKA) || defined(HAVE_LIBZMQ) */ + +#ifdef HAVE_LIBRDKAFKA +nmsg_res +_input_nmsg_read_container_kafka(nmsg_input_t input, Nmsg__Nmsg **nmsg) { + nmsg_res res; + uint8_t *buf; + size_t buf_len; + + res = kafka_read_start(input->stream->kafka, &buf, &buf_len); + if (res != nmsg_res_success) { + kafka_read_finish(input->stream->kafka); + return res; + } + + nmsg_timespec_get(&input->stream->now); + + res = _input_process_buffer_into_container(input, nmsg, buf, buf_len); + + kafka_read_finish(input->stream->kafka); + return res; +} +#endif /* HAVE_LIBRDKAFKA */ + #ifdef HAVE_LIBZMQ nmsg_res _input_nmsg_read_container_zmq(nmsg_input_t input, Nmsg__Nmsg **nmsg) { int ret; nmsg_res res; - uint8_t *buf; - size_t buf_len; - ssize_t msgsize = 0; zmq_msg_t zmsg; zmq_pollitem_t zitems[1]; @@ -383,34 +440,7 @@ _input_nmsg_read_container_zmq(nmsg_input_t input, Nmsg__Nmsg **nmsg) { nmsg_timespec_get(&input->stream->now); /* get buffer from the ZMQ message */ - buf = zmq_msg_data(&zmsg); - buf_len = zmq_msg_size(&zmsg); - if (buf_len < NMSG_HDRLSZ_V2) { - res = nmsg_res_failure; - goto out; - } - - /* deserialize the NMSG header */ - res = _input_nmsg_deserialize_header(buf, buf_len, &msgsize, &input->stream->flags); - if (res != nmsg_res_success) - goto out; - buf += NMSG_HDRLSZ_V2; - - /* the entire message must have been read by zmq_recvmsg() */ - assert((size_t) msgsize == buf_len - NMSG_HDRLSZ_V2); - - /* unpack message */ - res = _input_nmsg_unpack_container(input, nmsg, buf, msgsize); - - /* update seqsrc counts */ - if (input->stream->verify_seqsrc && *nmsg != NULL) { - struct nmsg_seqsrc *seqsrc = _input_seqsrc_get(input, *nmsg); - if (seqsrc != NULL) - _input_seqsrc_update(input, seqsrc, *nmsg); - } - - /* expire old outstanding fragments */ - _input_frag_gc(input->stream); + res = _input_process_buffer_into_container(input, nmsg, zmq_msg_data(&zmsg), zmq_msg_size(&zmsg)); out: zmq_msg_close(&zmsg); diff --git a/nmsg/kafkaio.c b/nmsg/kafkaio.c new file mode 100644 index 000000000..088d69272 --- /dev/null +++ b/nmsg/kafkaio.c @@ -0,0 +1,811 @@ +/* + * Copyright (c) 2024 DomainTools LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* Import. */ + +#include "private.h" + +#ifdef HAVE_LIBRDKAFKA + +typedef enum { + kafka_state_init = 1, + kafka_state_ready, + kafka_state_flush, + kafka_state_break, +} kafka_state; + +struct kafka_ctx { + kafka_state state; + char *topic_str; + char *broker; + char *group_id; + int partition; + bool consumer; /* consumer or producer */ + int timeout; + uint64_t consumed; + uint64_t produced; + uint64_t delivered; + int64_t offset; + rd_kafka_t *handle; + rd_kafka_topic_t *topic; + rd_kafka_message_t *message; + rd_kafka_queue_t *queue; +}; + +/* Forward. */ + +static bool _kafka_addr_init(kafka_ctx_t ctx, const char *addr); + +static kafka_ctx_t _kafka_init_kafka(const char *addr, bool consumer, int timeout); + +static void _kafka_flush(kafka_ctx_t ctx); + +static void _kafka_ctx_destroy(kafka_ctx_t ctx); + +static void _kafka_error_cb(rd_kafka_t *rk, int err, const char *reason, void *opaque); + +static void _kafka_delivery_cb(rd_kafka_t *rk, const rd_kafka_message_t *rkmessage, void *opaque); + +static bool _kafka_config_set_option(rd_kafka_conf_t *config, const char *option, const char *value); + +static bool _kafka_init_consumer(kafka_ctx_t ctx, rd_kafka_conf_t *config); + +static bool _kafka_init_producer(kafka_ctx_t ctx, rd_kafka_conf_t *config); + +/* Private. */ + +static bool +_kafka_addr_init(kafka_ctx_t ctx, const char *addr) +{ + char *pound, *at, *comma, *percent; + char str_part[16], str_off[64]; + ssize_t len; + + pound = strchr(addr, '#'); + at = strchr(addr, '@'); + comma = strchr(addr, ','); + percent = strchr(addr, '%'); + + /* @ is mandatory */ + if (at == NULL) { + _nmsg_dprintf(2, "%s: missing '@' in Kafka endpoint: %s\n", __func__, addr); + return false; + } + + if (comma != NULL && comma < at) { + _nmsg_dprintf(2, "%s: invalid offset position: %s\n", __func__, addr); + return false; + } + + ctx->group_id = NULL; + + if (pound != NULL) { + if (pound > at) { + _nmsg_dprintf(2, "%s: invalid partition position: %s\n", __func__, addr); + return false; + } + if (percent != NULL) { + _nmsg_dprintf(2, "%s: cannot use group and partition together: %s\n", __func__, addr); + return false; + } + sscanf(pound + 1, "%d", &ctx->partition); + } else { + ctx->partition = RD_KAFKA_PARTITION_UA; + if (percent != NULL) { + if (percent > at) { + _nmsg_dprintf(2, "%s: invalid group position: %s\n", __func__, addr); + return false; + } + len = at - percent - 1; + if (len <= 0) { + _nmsg_dprintf(2, "%s: group id cannot be empty: %s\n", __func__, addr); + return false; + } + ctx->group_id = strndup(percent + 1, len); + pound = percent; + } else + pound = at; + } + + len = pound - addr; + if (len <= 0) { + _nmsg_dprintf(2, "%s: invalid Kafka endpoint: %s\n", __func__, addr); + return false; + } + + ctx->topic_str = my_malloc(len + 1); + strncpy(ctx->topic_str, addr, len); + ctx->topic_str[len] = '\0'; + + if (comma != NULL) { + len = comma - at - 1; + if (len <= 0) { + _nmsg_dprintf(2, "%s: invalid Kafka endpoint: %s\n", __func__, addr); + return false; + } + + ctx->broker = my_malloc(len + 1); + strncpy(ctx->broker, at + 1, len); + ctx->broker[len] = '\0'; + ++comma; + + /* Oldest and newewst are applicable universally, but not numerical offsets. */ + if (strcasecmp(comma, "oldest") == 0) + ctx->offset = RD_KAFKA_OFFSET_BEGINNING; + else if (strcasecmp(comma, "newest") == 0) + ctx->offset = RD_KAFKA_OFFSET_END; + else if ((pound != NULL) && (isdigit(*comma) || (*comma == '-' && isdigit(*(comma+1))))) + sscanf(comma, "%ld", &ctx->offset); + else { + _nmsg_dprintf(2, "%s: invalid offset in Kafka endpoint: %s\n", __func__, comma); + return false; + } + + } else { + ctx->broker = my_malloc(strlen(at)); + strcpy(ctx->broker, at + 1); + ctx->offset = RD_KAFKA_OFFSET_END; + } + + if (ctx->offset == RD_KAFKA_OFFSET_BEGINNING) + strcpy(str_off, "oldest"); + else if (ctx->offset == RD_KAFKA_OFFSET_END) + strcpy(str_off, "newest"); + else + sprintf(str_off, "%ld", ctx->offset); + + if (ctx->partition == RD_KAFKA_PARTITION_UA) + strcpy(str_part, "unassigned"); + else + sprintf(str_part, "%d", ctx->partition); + + _nmsg_dprintf(3, "%s: broker: %s, topic: %s, partition: %s, offset: %s (consumer group: %s)\n", + __func__, ctx->broker, ctx->topic_str, str_part, str_off, + (ctx->group_id == NULL ? "none" : ctx->group_id)); + + return true; +} + +static bool +_kafka_config_set_option(rd_kafka_conf_t *config, const char *option, const char *value) { + char errstr[1024]; + rd_kafka_conf_res_t res; + + res = rd_kafka_conf_set(config, option, value, errstr, sizeof(errstr)); + if (res != RD_KAFKA_CONF_OK) { + _nmsg_dprintf(2, "%s: failed to set Kafka option %s = %s (err %d: %s)\n", + __func__, option, value, res, errstr); + return false; + } + + return true; +} + +static bool +_kafka_init_consumer(kafka_ctx_t ctx, rd_kafka_conf_t *config) +{ + struct addrinfo *ai; + struct addrinfo hints = {0}; + char errstr[1024], client_id[256], hostname[256]; + rd_kafka_topic_partition_list_t *subscription; + rd_kafka_conf_res_t res; + rd_kafka_topic_conf_t *topic_conf; + + if (!_kafka_config_set_option(config, "enable.partition.eof", "true")) { + rd_kafka_conf_destroy(config); + return false; + } + +#if RD_KAFKA_VERSION >= 0x010600ff + _kafka_config_set_option(config, "allow.auto.create.topics", "false"); +#endif /* RD_KAFKA_VERSION > 0x010100ff */ + gethostname(hostname, sizeof(hostname)); + hostname[sizeof(hostname) - 1] = '\0'; + + hints.ai_family = AF_INET; + hints.ai_socktype = SOCK_STREAM; + hints.ai_flags = AI_CANONNAME; + + if (getaddrinfo(hostname, NULL, &hints, &ai) == 0) { + if(ai->ai_canonname != NULL) { + strncpy(hostname, ai->ai_canonname, sizeof(hostname)); + hostname[sizeof(hostname) - 1] = '\0'; + } + + freeaddrinfo(ai); + } + + if (snprintf(client_id, sizeof(client_id), "nmsgtool.%010u@%s", + getpid(), hostname) == sizeof(client_id)) + client_id[sizeof(client_id) - 1 ] = '\0'; + _nmsg_dprintf(3, "%s: client ID: %s\n", __func__, client_id); + if (!_kafka_config_set_option(config, "client.id", client_id)) { + rd_kafka_conf_destroy(config); + return false; + } + + if (ctx->group_id != NULL) { + const char *reset; + + if (!_kafka_config_set_option(config, "group.id", ctx->group_id)) { + rd_kafka_conf_destroy(config); + return false; + } + + reset = ctx->offset == RD_KAFKA_OFFSET_END ? "latest" : "earliest"; + if (!_kafka_config_set_option(config, "auto.offset.reset", reset)) { + rd_kafka_conf_destroy(config); + return false; + } + } + + /* Create Kafka consumer handle */ + ctx->handle = rd_kafka_new(RD_KAFKA_CONSUMER, config, errstr, sizeof(errstr)); + if (ctx->handle == NULL) { + rd_kafka_conf_destroy(config); + _nmsg_dprintf(2, "%s: failed to create Kafka consumer: %s\n", __func__, errstr); + return false; + } + /* Now handle owns the configuration */ + + if (ctx->group_id != NULL) { + rd_kafka_poll_set_consumer(ctx->handle); + subscription = rd_kafka_topic_partition_list_new(1); + if (subscription == NULL) { + _nmsg_dprintf(2, "%s: failed to create partition list\n", __func__); + return false; + } + + rd_kafka_topic_partition_list_add(subscription, ctx->topic_str, ctx->partition); + + res = rd_kafka_subscribe(ctx->handle, subscription); + + rd_kafka_topic_partition_list_destroy(subscription); + if (res != RD_KAFKA_CONF_OK) { + _nmsg_dprintf(2, "%s: failed to subscribe to partition list\n", __func__); + return false; + } + } else { + /* Topic configuration */ + topic_conf = rd_kafka_topic_conf_new(); + if (topic_conf == NULL) { + _nmsg_dprintf(2, "%s: failed to create topic configuration\n", __func__); + return false; + } + + /* Create topic */ + ctx->topic = rd_kafka_topic_new(ctx->handle, ctx->topic_str, topic_conf); + if (ctx->topic == NULL) { + _nmsg_dprintf(2, "%s: failed to create topic %s\n", + __func__, ctx->topic_str); + return false; + } + } + + ctx->state = kafka_state_ready; + + return true; +} + +static bool +_kafka_init_producer(kafka_ctx_t ctx, rd_kafka_conf_t *config) +{ + char errstr[1024]; + rd_kafka_topic_conf_t *topic_conf; + + rd_kafka_conf_set_dr_msg_cb(config, _kafka_delivery_cb); + + /* Create Kafka producer handle */ + ctx->handle = rd_kafka_new(RD_KAFKA_PRODUCER, config, errstr, sizeof(errstr)); + if (ctx->handle == NULL) { + rd_kafka_conf_destroy(config); + _nmsg_dprintf(2, "%s: failed to create Kafka producer: %s\n", __func__, errstr); + return false; + } + /* Now handle owns the configuration */ + + /* Topic configuration */ + topic_conf = rd_kafka_topic_conf_new(); + if (topic_conf == NULL) { + _nmsg_dprintf(2, "%s: failed to create topic configuration\n", __func__); + return false; + } + + /* Create topic */ + ctx->topic = rd_kafka_topic_new(ctx->handle, ctx->topic_str, topic_conf); + if (ctx->topic == NULL) { + _nmsg_dprintf(2, "%s: failed to create topic %s\n", __func__, ctx->topic_str); + return false; + } + + ctx->state = kafka_state_ready; + return true; +} + +static kafka_ctx_t +_kafka_init_kafka(const char *addr, bool consumer, int timeout) +{ + struct kafka_ctx *ctx; + uint8_t tmp_addr[16]; + char tmp[sizeof("4294967295")] = {0}, ip_str[INET6_ADDRSTRLEN + 2] = {0}, *pi; + const char *af = "any"; + bool result; + rd_kafka_conf_t *config; + + ctx = my_calloc(1, sizeof(struct kafka_ctx)); + + ctx->state = kafka_state_init; + ctx->timeout = timeout; + ctx->consumer = consumer; + + if (!_kafka_addr_init(ctx, addr)) { + _kafka_ctx_destroy(ctx); + return NULL; + } + + config = rd_kafka_conf_new(); + if (config == NULL) { + _kafka_ctx_destroy(ctx); + _nmsg_dprintf(2, "%s: failed to create Kafka configuration\n", __func__); + return NULL; + } + + /* + * It is possible for an IP address to be surrounded by brackets. + * In the case of IPv6 this is necessary to distinguish the optional + * trailing port from the final octets of the represented address. + */ + if (ctx->broker[0] == '[') { + strncpy(ip_str, ctx->broker + 1, sizeof(ip_str) - 1); + pi = strchr(ip_str, ']'); + } else { + strncpy(ip_str, ctx->broker, sizeof(ip_str) - 1); + pi = strrchr(ip_str, ':'); + } + + if (pi != NULL) + *pi = '\0'; + + if (inet_pton(AF_INET, ip_str, tmp_addr) == 1) + af = "v4"; + else if (inet_pton(AF_INET6, ip_str, tmp_addr) == 1) + af = "v6"; + + _kafka_config_set_option(config, "broker.address.family", af); + + rd_kafka_conf_set_opaque(config, ctx); + rd_kafka_conf_set_error_cb(config, _kafka_error_cb); + + snprintf(tmp, sizeof(tmp), "%d", SIGIO); + if (!_kafka_config_set_option(config, "internal.termination.signal", tmp) || + !_kafka_config_set_option(config, "bootstrap.servers", ctx->broker)) { + rd_kafka_conf_destroy(config); + _kafka_ctx_destroy(ctx); + return NULL; + } + + result = ctx->consumer ? _kafka_init_consumer(ctx, config) : + _kafka_init_producer(ctx, config); + if (!result) { + _kafka_ctx_destroy(ctx); + return NULL; + } + + return ctx; +} + +static void +_kafka_flush(kafka_ctx_t ctx) { + rd_kafka_resp_err_t res = RD_KAFKA_RESP_ERR_NO_ERROR; + _nmsg_dprintf(3, "%s: flushing Kafka queue\n", __func__); + while (ctx->state != kafka_state_break && + rd_kafka_outq_len(ctx->handle) > 0 && + (res == RD_KAFKA_RESP_ERR_NO_ERROR || res == RD_KAFKA_RESP_ERR__TIMED_OUT)) + res = rd_kafka_flush(ctx->handle, ctx->timeout); +} + +static void +_kafka_ctx_destroy(kafka_ctx_t ctx) +{ + if (ctx->state > kafka_state_init) { + if (ctx->consumer) { + if (ctx->group_id == NULL) /* Stop consuming */ + rd_kafka_consume_stop(ctx->topic, ctx->partition); + else + rd_kafka_consumer_close(ctx->handle); + + rd_kafka_poll(ctx->handle, ctx->timeout); + + _nmsg_dprintf(3, "%s: consumed %lu messages\n", __func__, ctx->consumed); + } else { + _kafka_flush(ctx); + + _nmsg_dprintf(3, "%s: produced %lu messages\n", __func__, ctx->produced); + _nmsg_dprintf(3, "%s: delivered %lu messages\n", __func__, ctx->delivered); + _nmsg_dprintf(3, "%s: internal queue has %d messages \n", __func__, rd_kafka_outq_len(ctx->handle)); + } + } + + if (ctx->group_id != NULL) + free(ctx->group_id); + + /* Destroy consumer queue (if any) */ + if (ctx->queue != NULL) + rd_kafka_queue_destroy(ctx->queue); + + /* Destroy topic */ + if (ctx->topic != NULL) + rd_kafka_topic_destroy(ctx->topic); + + /* Destroy handle */ + if (ctx->handle != NULL) + rd_kafka_destroy(ctx->handle); + + if (ctx->topic_str != NULL) + my_free(ctx->topic_str); + + if (ctx->broker != NULL) + my_free(ctx->broker); + + my_free(ctx); +} + +static void +_kafka_error_cb(rd_kafka_t *rk, int err, const char *reason, void *opaque) +{ + kafka_ctx_t ctx = (kafka_ctx_t) opaque; + rd_kafka_resp_err_t err_kafka = (rd_kafka_resp_err_t) err; + + switch(err_kafka) { + case RD_KAFKA_RESP_ERR__UNKNOWN_PARTITION: + case RD_KAFKA_RESP_ERR_UNKNOWN_TOPIC_OR_PART: + case RD_KAFKA_RESP_ERR_OFFSET_OUT_OF_RANGE: + /* At the moment treat any broker's error as fatal */ + default: + ctx->state = kafka_state_break; + _nmsg_dprintf(2, "%s: got Kafka error %d: %s\n", __func__, err, reason); + break; + } +} + +static void +_kafka_delivery_cb(rd_kafka_t *rk, const rd_kafka_message_t *rkmessage, void *opaque) +{ + kafka_ctx_t ctx = (kafka_ctx_t) opaque; + if (rkmessage == NULL) + return; + if (rkmessage->err != RD_KAFKA_RESP_ERR_NO_ERROR) { + _nmsg_dprintf(2, "%s: got Kafka error %d: %s\n", __func__, rkmessage->err, + rd_kafka_message_errstr(ctx->message)); + ctx->state = kafka_state_break; + rd_kafka_yield(rk); + } + ctx->delivered++; +} + +static bool +_kafka_consumer_start_queue(kafka_ctx_t ctx) { + bool res = true; + int ndx; + rd_kafka_resp_err_t err; + const rd_kafka_metadata_t *mdata; + rd_kafka_metadata_topic_t * topic; + + for(ndx = 0; ndx < 10; ++ndx) { + err = rd_kafka_metadata(ctx->handle, 0, ctx->topic, &mdata, NMSG_RBUF_TIMEOUT); + if (err == RD_KAFKA_RESP_ERR_NO_ERROR) + break; + } + if (err != RD_KAFKA_RESP_ERR_NO_ERROR) { + _nmsg_dprintf(2, "%s: failed to get Kafka topic %s metadata (err %d: %s)\n", + __func__, ctx->topic_str, err, rd_kafka_err2str(err)); + return false; + } + + if (mdata->topic_cnt != 1) { + _nmsg_dprintf(2, "%s: received invalid metadata for topic %s\n", __func__, ctx->topic_str); + res = false; + goto out; + } + + topic = &mdata->topics[0]; + + if (topic->partition_cnt == 0) { + _nmsg_dprintf(2, "%s: topic %s has no partitions\n", __func__, ctx->topic_str); + res = false; + goto out; + } + + ctx->queue = rd_kafka_queue_new(ctx->handle); + if (ctx->queue == NULL) { + _nmsg_dprintf(2, "%s: failed to create consume queue for topic %s\n", __func__, ctx->topic_str); + res = false; + goto out; + } + + for(ndx = 0; ndx < topic->partition_cnt; ++ndx) { + if (rd_kafka_consume_start_queue(ctx->topic, ndx, ctx->offset, ctx->queue) == -1) { + err = rd_kafka_last_error(); + _nmsg_dprintf(2, "%s: failed to start Kafka consumer (err %d: %s)\n", + __func__, err, rd_kafka_err2str(err)); + res = false; + goto out; + } + } + +out: + rd_kafka_metadata_destroy(mdata); + return res; +} + +/* Export. */ + +void +kafka_ctx_destroy(kafka_ctx_t *ctx) +{ + if (ctx != NULL && *ctx != NULL) { + _kafka_ctx_destroy(*ctx); + *ctx = NULL; + } +} + +nmsg_res +kafka_read_start(kafka_ctx_t ctx, uint8_t **buf, size_t *len) +{ + nmsg_res res = nmsg_res_success; + if (buf == NULL || len == NULL || ctx == NULL || + !ctx->consumer || ctx->state != kafka_state_ready) + return nmsg_res_failure; + + *buf = NULL; + *len = 0; + + if (ctx->group_id != NULL) + ctx->message = rd_kafka_consumer_poll(ctx->handle, ctx->timeout); + else { + /* Poll for errors, etc. */ + rd_kafka_poll(ctx->handle, 0); + if (ctx->queue == NULL) + ctx->message = rd_kafka_consume(ctx->topic, ctx->partition, ctx->timeout); + else + ctx->message = rd_kafka_consume_queue(ctx->queue, ctx->timeout); + } + + if (ctx->message != NULL) { + if (ctx->message->err == RD_KAFKA_RESP_ERR_NO_ERROR) { + *buf = ctx->message->payload; + *len = ctx->message->len; + ctx->consumed++; + } else { + if (ctx->message->err == RD_KAFKA_RESP_ERR__PARTITION_EOF) + res = nmsg_res_again; + else { + _kafka_error_cb(ctx->handle, ctx->message->err, + rd_kafka_message_errstr(ctx->message), ctx); + res = nmsg_res_failure; + } + + /* Return error message to kafka */ + rd_kafka_message_destroy(ctx->message); + ctx->message = NULL; + } + } else + res = (errno == ETIMEDOUT ? nmsg_res_again : nmsg_res_failure); + + return res; +} + +nmsg_res +kafka_read_finish(kafka_ctx_t ctx) +{ + if (ctx == NULL || !ctx->consumer || ctx->state != kafka_state_ready) + return nmsg_res_failure; + + if (ctx->message != NULL) { + /* Return message to rdkafka */ + rd_kafka_message_destroy(ctx->message); + ctx->message = NULL; + } + + return nmsg_res_success; +} + +nmsg_res +kafka_write(kafka_ctx_t ctx, const uint8_t *key, size_t key_len, const uint8_t *buf, size_t buf_len) +{ + int res; + if (ctx == NULL || ctx->consumer || ctx->state != kafka_state_ready) + return nmsg_res_failure; + + while (ctx->state == kafka_state_ready) { + res = rd_kafka_produce(ctx->topic, ctx->partition, RD_KAFKA_MSG_F_FREE, + (void *) buf, buf_len, /* Payload and length */ + (void *) key, key_len, /* Optional key and its length */ + NULL); /* Opaque data in message->_private. */ + + if (res == 0) { + ctx->produced++; + break; + } else if (errno != ENOBUFS) { + _nmsg_dprintf(1, "%s: failed to produce Kafka message #%d: %s\n", + __func__, errno, rd_kafka_err2str(errno)); + return nmsg_res_failure; + } + rd_kafka_poll(ctx->handle, ctx->timeout); + } + + /* Poll with no timeout to trigger delivery reports without waiting */ + rd_kafka_poll(ctx->handle, 0); + return nmsg_res_success; +} + +kafka_ctx_t +kafka_create_consumer(const char *addr, int timeout) +{ + kafka_ctx_t ctx; + rd_kafka_resp_err_t err; + + if (addr == NULL) + return NULL; + + ctx = _kafka_init_kafka(addr, true, timeout); + if (ctx == NULL) + return NULL; + + /* Either a partition # or no consumer group ID has been supplied. */ + if (ctx->topic != NULL) { + if (ctx->partition != RD_KAFKA_PARTITION_UA) { + /* Start consuming */ + if (rd_kafka_consume_start(ctx->topic, ctx->partition, ctx->offset) == -1) { + err = rd_kafka_last_error(); + _kafka_ctx_destroy(ctx); + _nmsg_dprintf(2, "%s: failed to start Kafka consumer (err %d: %s)\n", + __func__, err, rd_kafka_err2str(err)); + return NULL; + } + } else if (!_kafka_consumer_start_queue(ctx)) { /* no partition # */ + _kafka_ctx_destroy(ctx); + return NULL; + } + } + return ctx; +} + +kafka_ctx_t +kafka_create_producer(const char *addr, int timeout) +{ + if (addr == NULL) + return NULL; + + return _kafka_init_kafka(addr, false, timeout); +} + +nmsg_input_t +nmsg_input_open_kafka_endpoint(const char *ep) +{ + kafka_ctx_t ctx; + + ctx = kafka_create_consumer(ep, NMSG_RBUF_TIMEOUT); + if (ctx == NULL) + return NULL; + + return _input_open_kafka(ctx); +} + +nmsg_output_t +nmsg_output_open_kafka_endpoint(const char *ep, size_t bufsz) +{ + kafka_ctx_t ctx; + + ctx = kafka_create_producer(ep, NMSG_RBUF_TIMEOUT); + if (ctx == NULL) + return NULL; + + return _output_open_kafka(ctx, bufsz); +} + +void +kafka_stop(kafka_ctx_t ctx) +{ + if (ctx == NULL && ctx->consumer) + return; + ctx->state = kafka_state_break; +} + +void +kafka_flush(kafka_ctx_t ctx) +{ + if (ctx == NULL && ctx->consumer) + return; + _kafka_flush(ctx); +} + +#else /* HAVE_LIBRDKAFKA */ + +/* Export. */ + +#include "kafkaio.h" + +struct kafka_ctx { + int state; +}; + +void +kafka_ctx_destroy(kafka_ctx_t *ctx __attribute__((unused))) +{ +} + +nmsg_res +kafka_read_start(kafka_ctx_t ctx __attribute__((unused)), + uint8_t **buf __attribute__((unused)), + size_t *len __attribute__((unused))) +{ + return nmsg_res_failure; +} + +nmsg_res +kafka_read_finish(kafka_ctx_t ctx __attribute__((unused))) +{ + return nmsg_res_failure; +} + +nmsg_res +kafka_write(kafka_ctx_t ctx __attribute__((unused)), + const uint8_t *key __attribute__((unused)), + size_t key_len __attribute__((unused)), + const uint8_t *buf __attribute__((unused)), + size_t buf_len __attribute__((unused))) +{ + return nmsg_res_failure; +} + +kafka_ctx_t +kafka_create_consumer(const char *addr __attribute__((unused)), + int timeout __attribute__((unused))) +{ + return NULL; +} + +kafka_ctx_t +kafka_create_producer(const char *addr __attribute__((unused)), + int timeout __attribute__((unused))) +{ + return NULL; +} + +nmsg_input_t +nmsg_input_open_kafka_endpoint(const char *ep __attribute__((unused))) +{ + return NULL; +} + +nmsg_output_t +nmsg_output_open_kafka_endpoint(const char *ep __attribute__((unused)), + size_t bufsz __attribute__((unused))) +{ + return NULL; +} + +void +kafka_stop(kafka_ctx_t ctx __attribute__((unused))) +{ +} + +void kafka_flush(kafka_ctx_t ctx __attribute__((unused))) +{ +} + +#endif /* HAVE_LIBRDKAFKA */ diff --git a/nmsg/kafkaio.h b/nmsg/kafkaio.h new file mode 100644 index 000000000..a26eae339 --- /dev/null +++ b/nmsg/kafkaio.h @@ -0,0 +1,106 @@ +/* + * Copyright (c) 2024 DomainTools LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef NMSG_KAFKAIO_H +#define NMSG_KAFKAIO_H + +/** + * Forward declaration of Kafka context. + */ +typedef struct kafka_ctx * kafka_ctx_t; + +/** + * Destroy an NMSG Kafka context. + * + * \param[in] ctx an NMSG Kafka context to be destroyed. + */ +void kafka_ctx_destroy(kafka_ctx_t *ctx); + +/** + * Create a Kafka consumer. + * + * \param[in] addr NMSG Kafka address string in format proto:topic[#partition|%group_id]@broker[:port][,offset]. + * \param[in] timeout in milliseconds. + * + * \return Opaque pointer that is NULL on failure or non-NULL on success. + */ +kafka_ctx_t kafka_create_consumer(const char *addr, int timeout); + +/** + * Create a Kafka producer. + * + * \param[in] addr NMSG Kafka address string in format proto:topic[#partition|%group_id]@broker[:port]. + * \param[in] timeout in milliseconds. + * + * \return Opaque pointer that is NULL on failure or non-NULL on success. + */ +kafka_ctx_t kafka_create_producer(const char *addr, int timeout); + +/** + * Start reading a message from an NMSG Kafka consumer. + * This read operation must be terminated with a call to kafka_read_finish(). + * + * \param[in] ctx NMSG Kafka consumer context. + * \param[out] buf double pointer that will receive the address of the next read message. + * \param[out] len pointer to a variable to hold the received message size. + * + * \return nmsg_res_success on success and nmsg_res_failure otherwise. + */ +nmsg_res kafka_read_start(kafka_ctx_t ctx, uint8_t **buf, size_t *len); + +/** + * End reading a message from an NMSG Kafka consumer. + * + * \param[in] ctx NMSG Kafka consumer context. + * + * \return nmsg_res_success on success and nmsg_res_failure otherwise. + */ +nmsg_res kafka_read_finish(kafka_ctx_t ctx); + +/** + * Write a message to an NMSG Kafka producer. + * + * \param[in] ctx NMSG Kafka producer context. + * \param[in] key pointer to an optional key to be sent (or NULL). + * \param[in] key_len the size of the key to be written, in bytes. + * \param[in] buf pointer to the data to be sent. + * \param[in] buf_len the size of the data to be written, in bytes. + * + * \return nmsg_res_success on success and nmsg_res_failure otherwise. + * + * Note: Kafka takes ownership of the buffer passed as buf and destroys it + * once the message is delivered. However, Kafka does NOT also take + * ownership of the key pointer. + */ +nmsg_res kafka_write(kafka_ctx_t ctx, + const uint8_t *key, size_t key_len, + const uint8_t *buf, size_t buf_len); + +/** + * Signal Kafka to stop producing messages + * + * @param ctx NMSG Kafka (producer) context. + */ +void kafka_stop(kafka_ctx_t ctx); + +/** + * Flush Kafka producer queue + * + * @param ctx NMSG Kafka (producer) context. + */ +void kafka_flush(kafka_ctx_t ctx); + +#endif /* NMSG_KAFKAIO_H */ diff --git a/nmsg/msgmod/message.c b/nmsg/msgmod/message.c index 777bb6d1f..ed6b13221 100644 --- a/nmsg/msgmod/message.c +++ b/nmsg/msgmod/message.c @@ -504,6 +504,22 @@ _nmsg_message_to_json(nmsg_output_t output, nmsg_message_t msg, struct nmsg_strb } } +#ifdef HAVE_LIBRDKAFKA +nmsg_res +_nmsg_message_get_field_value_as_key(nmsg_message_t msg, const char *name, struct nmsg_strbuf *sb) { + if (msg->mod == NULL) + return (nmsg_res_failure); + switch (msg->mod->plugin->type) { + case nmsg_msgmod_type_transparent: + return _nmsg_message_payload_get_field_value_as_key(msg, name, sb); + case nmsg_msgmod_type_opaque: + return (nmsg_res_notimpl); + default: + return (nmsg_res_notimpl); + } +} +#endif /* HAVE_LIBRDKAFKA */ + nmsg_res nmsg_message_to_json(nmsg_message_t msg, char **json) { nmsg_res res; diff --git a/nmsg/msgmod/transparent.h b/nmsg/msgmod/transparent.h index 2efb07151..9c8e854b0 100644 --- a/nmsg/msgmod/transparent.h +++ b/nmsg/msgmod/transparent.h @@ -64,8 +64,15 @@ nmsg_res _nmsg_msgmod_pres_to_payload_finalize(struct nmsg_msgmod *mod, void *cl, uint8_t **pbuf, size_t *sz); +#ifdef HAVE_LIBRDKAFKA nmsg_res -_nmsg_message_payload_to_json(nmsg_output_t output, struct nmsg_message *msg, struct nmsg_strbuf *sb); +_nmsg_message_payload_get_field_value_as_key(struct nmsg_message *msg, + const char *field_name, struct nmsg_strbuf *sb); +#endif /* HAVE_LIBRDKAFKA */ + +nmsg_res +_nmsg_message_payload_to_json(nmsg_output_t output, + struct nmsg_message *msg, struct nmsg_strbuf *sb); nmsg_res _nmsg_message_payload_to_json_load(struct nmsg_message *msg, diff --git a/nmsg/msgmod/transparent_payload.c b/nmsg/msgmod/transparent_payload.c index 6d0197db6..11d965463 100644 --- a/nmsg/msgmod/transparent_payload.c +++ b/nmsg/msgmod/transparent_payload.c @@ -275,6 +275,129 @@ _nmsg_message_payload_to_pres_load(struct nmsg_message *msg, return (nmsg_res_success); } +static nmsg_res +_nmsg_nmsg_mod_ip_to_string(ProtobufCBinaryData *bdata, bool enquote, + struct nmsg_strbuf *g) { + char sip[INET6_ADDRSTRLEN]; + int family = 0; + + if (bdata->data == NULL) { + append_json_value_null(g); + return nmsg_res_success; + } + + if (bdata->len == 4) { + family = AF_INET; + } else if (bdata->len == 16) { + family = AF_INET6; + } + + if (family && fast_inet_ntop(family, bdata->data, sip, sizeof(sip))) { + if (enquote) + append_json_value_string_noescape(g, sip, strlen(sip)); + else + return nmsg_strbuf_append_str(g, sip, strlen(sip)); + } else { + append_json_value_null(g); + } + + return nmsg_res_success; +} + +#ifdef HAVE_LIBRDKAFKA + +/* + * The determination of a key value from a named nmsg message field is as follows: + * + * 1. If the field doesn't exist, return an error. + * 2. If the field data can't be retrieved, return an empty buffer. + * 3. If the field has a formatter function, return the raw string returned by it. + * 4. If the field is an enum value, return the corresponding canonical string value. + * If the enum value has no string mapping, return a numeric (string) representation. + * 5. For strings, return the ASCII string value without any terminating NUL byte. + * 6. For IP (v4 or v6) addresses, return the dotted representational string value. + * 7. For all other simple numeric primitive types, return a numeric (string) representation. + * 8. For all other values (including byte sequences), return the raw binary payload field data. + */ +nmsg_res +_nmsg_message_payload_get_field_value_as_key(nmsg_message_t msg, const char *field_name, struct nmsg_strbuf *sb) { + nmsg_res res; + struct nmsg_msgmod_field *field; + ProtobufCBinaryData bdata; + + field = _nmsg_msgmod_lookup_field(msg->mod, field_name); + if (field == NULL) + return nmsg_res_failure; + + res = nmsg_message_get_field(msg, field_name, 0, (void **) &bdata.data, &bdata.len); + if (res != nmsg_res_success) { + /* If field is present but no data, return empty buffer. */ + nmsg_strbuf_reset(sb); + *sb->data = '\0'; + return nmsg_res_success; + } + + if (field->format != NULL) { + char *endline = ""; + + if (field->type == nmsg_msgmod_ft_uint16 || field->type == nmsg_msgmod_ft_int16) { + uint16_t val; + uint32_t val32; + memcpy(&val32, bdata.data, sizeof(uint32_t)); + val = (uint16_t) val32; + res = field->format(msg, field, &val, sb, endline); + } else { + res = field->format(msg, field, (void *) &bdata, sb, endline); + } + + /* If format failed, then fall through and return raw payload. */ + if (res == nmsg_res_success) + return res; + } else if (PBFIELD_ONE_PRESENT(msg->message, field)) { + + if (field->type == nmsg_msgmod_ft_enum) { + ProtobufCEnumDescriptor *enum_descr; + bool enum_found = false; + unsigned enum_value; + + enum_descr = (ProtobufCEnumDescriptor *) field->descr->descriptor; + enum_value = *((unsigned *) bdata.data); + + for (unsigned i = 0; i < enum_descr->n_values; i++) { + if ((unsigned) enum_descr->values[i].value == enum_value) { + res = nmsg_strbuf_append_str(sb, enum_descr->values[i].name, + strlen(enum_descr->values[i].name)); + enum_found = true; + break; + } + } + + if (!enum_found) + append_json_value_int(sb, enum_value); + + return res; + } else { + switch(field->type) { + /* Trim trailing nul byte present in strings. */ + case nmsg_msgmod_ft_string: + case nmsg_msgmod_ft_mlstring: + if (bdata.len > 0 && bdata.data[bdata.len - 1] == 0) + bdata.len--; + break; + case nmsg_msgmod_ft_bytes: + break; + case nmsg_msgmod_ft_ip: + return _nmsg_nmsg_mod_ip_to_string(&bdata, false, sb); + default: + return _nmsg_message_payload_to_json_load(msg, field, bdata.data, sb); + } + } + } + + return nmsg_strbuf_append_str(sb, (const char *) bdata.data, bdata.len); +} +#endif /* HAVE_LIBRDKAFKA */ + nmsg_res _nmsg_message_payload_to_json(nmsg_output_t output, struct nmsg_message *msg, struct nmsg_strbuf *sb) { Nmsg__NmsgPayload *np; @@ -330,9 +453,14 @@ _nmsg_message_payload_to_json(nmsg_output_t output, struct nmsg_message *msg, st declare_json_value(sb, "mname", false); append_json_value_string(sb, mname, strlen(mname)); - if (output != NULL && output->json->source != 0) - source_val = output->json->source; - else if (np->has_source) + if (output != NULL) { + if (output->type == nmsg_output_type_json) + source_val = output->json->source; + else if (output->type == nmsg_output_type_kafka_json) + source_val = output->kafka->source; + } + + if (source_val == 0 && np->has_source) source_val = np->source; if (source_val != 0) { @@ -341,9 +469,14 @@ _nmsg_message_payload_to_json(nmsg_output_t output, struct nmsg_message *msg, st append_json_value_string(sb, sb_tmp, sb_tmp_len); } - if (output != NULL && output->json->operator != 0) - oper_val = output->json->operator; - else if (np->has_operator_) + if (output != NULL) { + if (output->type == nmsg_output_type_json) + oper_val = output->json->operator; + else if (output->type == nmsg_output_type_kafka_json) + oper_val = output->kafka->operator; + } + + if (oper_val == 0 && np->has_operator_) oper_val = np->operator_; if (oper_val != 0) { @@ -356,9 +489,14 @@ _nmsg_message_payload_to_json(nmsg_output_t output, struct nmsg_message *msg, st append_json_value_int(sb, oper_val); } - if (output != NULL && output->json->group != 0) - group_val = output->json->group; - else if (np->has_group) + if (output != NULL) { + if (output->type == nmsg_output_type_json) + group_val = output->json->group; + else if (output->type == nmsg_output_type_kafka_json) + group_val = output->kafka->group; + } + + if (group_val == 0 && np->has_group) group_val = np->group; if (group_val != 0) { @@ -712,26 +850,7 @@ _nmsg_message_payload_to_json_load(struct nmsg_message *msg, break; } case nmsg_msgmod_ft_ip: { - char sip[INET6_ADDRSTRLEN]; - int family = 0; - - bdata = (ProtobufCBinaryData *) ptr; - if (bdata->data == NULL) { - append_json_value_null(g); - break; - } - - if (bdata->len == 4) { - family = AF_INET; - } else if (bdata->len == 16) { - family = AF_INET6; - } - - if (family && fast_inet_ntop(family, bdata->data, sip, sizeof(sip))) { - append_json_value_string_noescape(g, sip, strlen(sip)); - } else { - append_json_value_null(g); - } + res = _nmsg_nmsg_mod_ip_to_string((ProtobufCBinaryData *) ptr, true, g); break; } case nmsg_msgmod_ft_uint16: { diff --git a/nmsg/output.c b/nmsg/output.c index fd93f053b..d41c2a185 100644 --- a/nmsg/output.c +++ b/nmsg/output.c @@ -37,6 +37,62 @@ nmsg_output_open_sock(int fd, size_t bufsz) { return (output_open_stream(nmsg_stream_type_sock, fd, bufsz)); } +#ifdef HAVE_LIBRDKAFKA +nmsg_output_t +nmsg_output_open_kafka_json(const char *addr, const char *key_field) +{ + struct nmsg_output *output; + + output = calloc(1, sizeof(*output)); + if (output == NULL) + return (NULL); + + output->kafka = calloc(1, sizeof(*(output->kafka))); + if (output->kafka == NULL) { + free(output); + return (NULL); + } + + output->type = nmsg_output_type_kafka_json; + output->write_fp = _output_kafka_json_write; + output->flush_fp = _output_kafka_json_flush; + + output->kafka->ctx = kafka_create_producer(addr, NMSG_RBUF_TIMEOUT); + if (!output->kafka->ctx) { + free(output->kafka); + free(output); + return NULL; + } + + if (key_field != NULL) + output->kafka->key_field = strdup(key_field); + + return output; +}; +#else /* HAVE_LIBRDKAFKA */ +nmsg_output_t +nmsg_output_open_kafka_json(const char *addr __attribute__((unused)), + const char *key_field __attribute__((unused))) +{ + return (NULL); +} +#endif /* HAVE_LIBRDKAFKA */ + +#ifdef HAVE_LIBRDKAFKA +nmsg_output_t +_output_open_kafka(void *s, size_t bufsz) { + struct nmsg_output *output; + + output = output_open_stream_base(nmsg_stream_type_kafka, bufsz); + if (output == NULL) + return (output); + + output->stream->kafka = s; + + return (output); +} +#endif /* HAVE_LIBRDKAFKA */ + #ifdef HAVE_LIBZMQ nmsg_output_t nmsg_output_open_zmq(void *s, size_t bufsz) { @@ -53,7 +109,7 @@ nmsg_output_open_zmq(void *s, size_t bufsz) { #else /* HAVE_LIBZMQ */ nmsg_output_t nmsg_output_open_zmq(void *s __attribute__((unused)), - size_t bufsz __attribute__((unused))) + size_t bufsz __attribute__((unused))) { return (NULL); } @@ -178,6 +234,12 @@ nmsg_output_close(nmsg_output_t *output) { res = _output_nmsg_flush(*output); if ((*output)->stream->random != NULL) nmsg_random_destroy(&((*output)->stream->random)); +#ifdef HAVE_LIBRDKAFKA + if ((*output)->stream->type == nmsg_stream_type_kafka) + kafka_ctx_destroy(&(*output)->stream->kafka); +#else /* HAVE_LIBRDKAFKA */ + assert((*output)->stream->type != nmsg_stream_type_kafka); +#endif /* HAVE_LIBRDKAFKA */ #ifdef HAVE_LIBZMQ if ((*output)->stream->type == nmsg_stream_type_zmq) zmq_close((*output)->stream->zmq); @@ -206,6 +268,16 @@ nmsg_output_close(nmsg_output_t *output) { fclose((*output)->json->fp); free((*output)->json); break; + case nmsg_output_type_kafka_json: +#ifdef HAVE_LIBRDKAFKA + kafka_ctx_destroy(&(*output)->kafka->ctx); + if ((*output)->kafka->key_field != NULL) + free((void *) (*output)->kafka->key_field); + free((*output)->kafka); +#else /* HAVE_LIBRDKAFKA */ + assert((*output)->type != nmsg_output_type_kafka_json); +#endif /* HAVE_LIBRDKAFKA */ + break; case nmsg_output_type_callback: free((*output)->callback); break; @@ -226,6 +298,7 @@ nmsg_output_set_buffered(nmsg_output_t output, bool buffered) { break; case nmsg_output_type_json: output->json->flush = !(buffered); + case nmsg_output_type_kafka_json: default: break; } @@ -301,6 +374,8 @@ nmsg_output_set_source(nmsg_output_t output, unsigned source) { case nmsg_output_type_json: output->json->source = source; break; + case nmsg_output_type_kafka_json: + output->kafka->source = source; default: break; } @@ -318,6 +393,8 @@ nmsg_output_set_operator(nmsg_output_t output, unsigned operator) { case nmsg_output_type_json: output->json->operator = operator; break; + case nmsg_output_type_kafka_json: + output->kafka->operator = operator; default: break; } @@ -335,6 +412,8 @@ nmsg_output_set_group(nmsg_output_t output, unsigned group) { case nmsg_output_type_json: output->json->group = group; break; + case nmsg_output_type_kafka_json: + output->kafka->group = group; default: break; } @@ -343,6 +422,16 @@ nmsg_output_set_group(nmsg_output_t output, unsigned group) { void _output_stop(nmsg_output_t output) { output->stop = true; +#ifdef HAVE_LIBRDKAFKA +#ifdef HAVE_JSON_C + if (output->type == nmsg_output_type_kafka_json) + kafka_stop(output->kafka->ctx); +#endif /* HAVE_JSON_C */ + if (output->type == nmsg_output_type_stream && + output->stream != NULL && + output->stream->type == nmsg_stream_type_kafka) + kafka_stop(output->stream->kafka); +#endif /* HAVE_LIBRDKAFKA */ } /* Private functions. */ diff --git a/nmsg/output.h b/nmsg/output.h index 13652c1e9..186dd3403 100644 --- a/nmsg/output.h +++ b/nmsg/output.h @@ -40,6 +40,7 @@ typedef enum { nmsg_output_type_pres, nmsg_output_type_callback, nmsg_output_type_json, + nmsg_output_type_kafka_json, } nmsg_output_type; /** @@ -117,6 +118,24 @@ nmsg_output_open_zmq(void *s, size_t bufsz); nmsg_output_t nmsg_output_open_zmq_endpoint(void *zmq_ctx, const char *ep, size_t bufsz); +/** + * Create a Kafka producer and initialize a new NMSG stream output from it. + * + * This function takes an endpoint argument of format + * "proto:topic[#partition|%group_id]@broker[:port]" + * + * \see nmsg_input_open_kafka_endpoint() + * + * \param[in] addr Kafka endpoint address string + * + * \param[in] bufsz Value between #NMSG_WBUFSZ_MIN and #NMSG_WBUFSZ_MAX. + * + * \return Opaque pointer that is NULL on failure or non-NULL on success. + */ + +nmsg_output_t +nmsg_output_open_kafka_endpoint(const char *ep, size_t bufsz); + /** * Initialize a new presentation format (ASCII lines) nmsg output. * @@ -158,6 +177,21 @@ nmsg_output_open_pres(int fd); nmsg_output_t nmsg_output_open_json(int fd); +/** + * Initialize a new NMSG JSON form output to a Kafka broker. + * + * See nmsg_output_open_json for details of the JSON format, or + * nmsg_input_open_kafka_endpoint for the details of the address string. + * + * \param[in] Kafka endpoint address string. + * \param[in] An optional NMSG field name whose content will be used as a + * a Kafka producer key. Otherwise, its value should be NULL. + * + * \return Opaque pointer that is NULL on failure or non-NULL on success. + */ +nmsg_output_t +nmsg_output_open_kafka_json(const char *addr, const char *key_field); + /** * Initialize a new nmsg output closure. This allows a user-provided callback to * function as an nmsg output, for instance to participate in an nmsg_io loop. diff --git a/nmsg/output_json.c b/nmsg/output_json.c index 371fdb224..d35279844 100644 --- a/nmsg/output_json.c +++ b/nmsg/output_json.c @@ -21,6 +21,55 @@ /* Internal functions. */ +#ifdef HAVE_LIBRDKAFKA +nmsg_res +_output_kafka_json_write(nmsg_output_t output, nmsg_message_t msg) { + nmsg_res res; + struct nmsg_strbuf_storage sbs, key_sbs; + struct nmsg_strbuf *sb = _nmsg_strbuf_init(&sbs); + struct nmsg_strbuf *key_sb = NULL; + uint8_t *buf, *key = NULL; + size_t len, key_len = 0; + + res = _nmsg_message_to_json(output, msg, sb); + if (res != nmsg_res_success) + goto out; + + if (output->kafka->key_field != NULL) { + key_sb = _nmsg_strbuf_init(&key_sbs); + res = _nmsg_message_get_field_value_as_key(msg, output->kafka->key_field, key_sb); + + if (res != nmsg_res_success) + goto out; + + key_len = nmsg_strbuf_len(key_sb); + key = (uint8_t *) key_sb->data; + } + + len = nmsg_strbuf_len(sb); + buf = (uint8_t *) _nmsg_strbuf_detach(sb); + if (!buf) { + res = nmsg_res_failure; + goto out; + } + + res = kafka_write(output->kafka->ctx, key, key_len, buf, len); + +out: + if (key_sb != NULL) + _nmsg_strbuf_destroy(&key_sbs); + + _nmsg_strbuf_destroy(&sbs); + return res; +} + +nmsg_res +_output_kafka_json_flush(nmsg_output_t output) { + kafka_flush(output->kafka->ctx); + return nmsg_res_success; +} +#endif /* HAVE_LIBRDKAFKA */ + nmsg_res _output_json_write(nmsg_output_t output, nmsg_message_t msg) { nmsg_res res; diff --git a/nmsg/output_nmsg.c b/nmsg/output_nmsg.c index 0211231f1..ef289cd1d 100644 --- a/nmsg/output_nmsg.c +++ b/nmsg/output_nmsg.c @@ -274,6 +274,12 @@ send_buffer(nmsg_output_t output, uint8_t *buf, size_t len) #else /* HAVE_LIBZMQ */ assert(ostr->type != nmsg_stream_type_zmq); #endif /* HAVE_LIBZMQ */ + } else if (ostr->type == nmsg_stream_type_kafka) { +#ifdef HAVE_LIBRDKAFKA + res = kafka_write(output->stream->kafka, NULL, 0, buf, len); +#else /* HAVE_LIBRDKAFKA */ + assert(ostr->type != nmsg_stream_type_kafka); +#endif /* HAVE_LIBRDKAFKA */ } else { assert(0); } diff --git a/nmsg/private.h b/nmsg/private.h index 3a86babd4..e02febec8 100644 --- a/nmsg/private.h +++ b/nmsg/private.h @@ -62,6 +62,10 @@ # include #endif /* HAVE_LIBZMQ */ +#ifdef HAVE_LIBRDKAFKA +#include +#endif /* HAVE_LIBRDKAFKA */ + #ifdef HAVE_JSON_C #include #endif /* HAVE_JSON_C */ @@ -83,6 +87,10 @@ #include "libmy/vector.h" #include "libmy/fast_inet_ntop.h" +#ifdef HAVE_LIBRDKAFKA +#include "kafkaio.h" +#endif /* HAVE_LIBRDKAFKA */ + /* Macros. */ #define STR(x) #x @@ -117,6 +125,7 @@ typedef enum { nmsg_stream_type_file, nmsg_stream_type_sock, nmsg_stream_type_zmq, + nmsg_stream_type_kafka, nmsg_stream_type_null, } nmsg_stream_type; @@ -251,6 +260,18 @@ struct nmsg_json { unsigned group; }; +/* nmsg_kafka_json: used by nmsg_input and nmsg_output */ +struct nmsg_kafka_json { +#ifdef HAVE_LIBRDKAFKA + kafka_ctx_t ctx; + const char *key_field; +#endif /* HAVE_LIBRDKAFKA */ + bool flush; + unsigned source; + unsigned operator; + unsigned group; +}; + /* nmsg_stream_input: used by nmsg_input */ struct nmsg_stream_input { nmsg_stream_type type; @@ -258,6 +279,9 @@ struct nmsg_stream_input { #ifdef HAVE_LIBZMQ void *zmq; #endif /* HAVE_LIBZMQ */ +#ifdef HAVE_LIBRDKAFKA + kafka_ctx_t kafka; +#endif /* HAVE_LIBRDKAFKA */ Nmsg__Nmsg *nmsg; unsigned np_index; size_t nc_size; @@ -292,6 +316,9 @@ struct nmsg_stream_output { #ifdef HAVE_LIBZMQ void *zmq; #endif /* HAVE_LIBZMQ */ +#ifdef HAVE_LIBRDKAFKA + kafka_ctx_t kafka; +#endif /* HAVE_LIBRDKAFKA */ nmsg_container_t c; size_t bufsz; nmsg_random_t random; @@ -328,6 +355,7 @@ struct nmsg_input { struct nmsg_pcap *pcap; struct nmsg_pres *pres; struct nmsg_json *json; + struct nmsg_kafka_json *kafka; struct nmsg_callback_input *callback; }; nmsg_input_read_fp read_fp; @@ -346,6 +374,7 @@ struct nmsg_output { struct nmsg_stream_output *stream; struct nmsg_pres *pres; struct nmsg_json *json; + struct nmsg_kafka_json *kafka; struct nmsg_callback_output *callback; }; nmsg_output_write_fp write_fp; @@ -477,6 +506,9 @@ nmsg_message_t _nmsg_message_from_payload(Nmsg__NmsgPayload *np); nmsg_message_t _nmsg_message_dup(struct nmsg_message *msg); nmsg_res _nmsg_message_dup_protobuf(const struct nmsg_message *msg, ProtobufCMessage **dst); nmsg_res _nmsg_message_to_json(nmsg_output_t output, nmsg_message_t msg, struct nmsg_strbuf *sb); +#ifdef HAVE_LIBRDKAFKA +nmsg_res _nmsg_message_get_field_value_as_key(nmsg_message_t msg, const char *name, struct nmsg_strbuf *sb); +#endif /* HAVE_LIBRDKAFKA */ /* from msgmodset.c */ @@ -509,6 +541,9 @@ nmsg_res _input_nmsg_unpack_container(nmsg_input_t, Nmsg__Nmsg **, uint8_t *, s nmsg_res _input_nmsg_unpack_container2(const uint8_t *, size_t, unsigned, Nmsg__Nmsg **); nmsg_res _input_nmsg_read_container_file(nmsg_input_t, Nmsg__Nmsg **); nmsg_res _input_nmsg_read_container_sock(nmsg_input_t, Nmsg__Nmsg **); +#ifdef HAVE_LIBRDKAFKA +nmsg_res _input_nmsg_read_container_kafka(nmsg_input_t, Nmsg__Nmsg **); +#endif /* HAVE_LIBRDKAFKA */ #ifdef HAVE_LIBZMQ nmsg_res _input_nmsg_read_container_zmq(nmsg_input_t, Nmsg__Nmsg **); #endif /* HAVE_LIBZMQ */ @@ -530,14 +565,21 @@ nmsg_res _input_pres_read(nmsg_input_t, nmsg_message_t *); /* from input_json.c */ nmsg_res _input_json_read(nmsg_input_t, nmsg_message_t *); +#ifdef HAVE_LIBRDKAFKA +nmsg_res _input_kafka_json_read(nmsg_input_t, nmsg_message_t *); +#endif /* HAVE_LIBRDKAFKA */ /* from input_seqsrc.c */ struct nmsg_seqsrc * _input_seqsrc_get(nmsg_input_t, Nmsg__Nmsg *); void _input_seqsrc_destroy(nmsg_input_t); size_t _input_seqsrc_update(nmsg_input_t, struct nmsg_seqsrc *, Nmsg__Nmsg *); +/* from input.c */ +nmsg_input_t _input_open_kafka(void *s); + /* from output.c */ void _output_stop(nmsg_output_t); +nmsg_output_t _output_open_kafka(void *s, size_t bufsz); /* from output_nmsg.c */ nmsg_res _output_nmsg_flush(nmsg_output_t); @@ -548,6 +590,10 @@ nmsg_res _output_pres_write(nmsg_output_t, nmsg_message_t); /* from output_json.c */ nmsg_res _output_json_write(nmsg_output_t, nmsg_message_t); +#ifdef HAVE_LIBRDKAFKA +nmsg_res _output_kafka_json_write(nmsg_output_t output, nmsg_message_t msg); +nmsg_res _output_kafka_json_flush(nmsg_output_t); +#endif /* HAVE_LIBRDKAFKA */ /* from brate.c */ struct nmsg_brate * _nmsg_brate_init(size_t target_byte_rate); diff --git a/src/io.c b/src/io.c index 5d7efa96c..dec4b79f6 100644 --- a/src/io.c +++ b/src/io.c @@ -29,6 +29,10 @@ # include #endif /* HAVE_LIBZMQ */ +#ifdef HAVE_LIBRDKAFKA +#include +#endif /* HAVE_LIBRDKAFKA */ + #include "kickfile.h" #include "nmsgtool.h" @@ -36,6 +40,14 @@ static const int on = 1; +static const char * +_strip_prefix_if_exists(const char *str, const char *prefix) { + if (strstr(str, prefix) != str) + return NULL; + + return str + strlen(prefix); +} + void add_sock_input(nmsgtool_ctx *c, const char *ss) { char *t; @@ -197,6 +209,177 @@ add_sock_output(nmsgtool_ctx *c, const char *ss) { } } +#if (defined HAVE_JSON_C) && (defined HAVE_LIBRDKAFKA) +static void +_add_kafka_json_input(nmsgtool_ctx *c, const char *str_address) { + nmsg_input_t input; + nmsg_res res; + + input = nmsg_input_open_kafka_json(str_address); + if (input == NULL) { + fprintf(stderr, "%s: nmsg_input_open_kafka_json() failed\n", + argv_program); + exit(1); + } + setup_nmsg_input(c, input); + res = nmsg_io_add_input(c->io, input, NULL); + if (res != nmsg_res_success) { + fprintf(stderr, "%s: nmsg_io_add_input() failed\n", + argv_program); + exit(1); + } + if (c->debug >= 2) + fprintf(stderr, "%s: nmsg Kafka json input: %s\n", argv_program, + str_address); + c->n_inputs += 1; +} +#else /* (defined HAVE_JSON_C) && (defined HAVE_LIBRDKAFKA) */ +static void +_add_kafka_json_input(nmsgtool_ctx *c __attribute__((unused)), + const char *str_address __attribute__((unused))) { + fprintf(stderr, "%s: Error: compiled without librdkafka or json-c support\n", + argv_program); + exit(EXIT_FAILURE); +} +#endif /* (defined HAVE_JSON_C) && (defined HAVE_LIBRDKAFKA) */ + +#ifdef HAVE_LIBRDKAFKA +static void +_add_kafka_json_output(nmsgtool_ctx *c, const char *str_address) { + nmsg_output_t output; + nmsg_res res; + + output = nmsg_output_open_kafka_json(str_address, c->kafka_key_field); + if (output == NULL) { + fprintf(stderr, "%s: nmsg_output_open_kafka_json() failed\n", + argv_program); + exit(1); + } + setup_nmsg_output(c, output); + if (c->kicker != NULL) + res = nmsg_io_add_output(c->io, output, (void *) -1); + else + res = nmsg_io_add_output(c->io, output, NULL); + if (res != nmsg_res_success) { + fprintf(stderr, "%s: nmsg_io_add_output() failed\n", argv_program); + exit(1); + } + if (c->debug >= 2) + fprintf(stderr, "%s: nmsg Kafka json output: %s\n", argv_program, + str_address); + c->n_outputs += 1; +} +#else /* HAVE_LIBRDKAFKA */ +static void +_add_kafka_json_output(nmsgtool_ctx *c __attribute__((unused)), + const char *str_address __attribute__((unused))) { + fprintf(stderr, "%s: Error: compiled without librdkafka or json-c support\n", + argv_program); + exit(EXIT_FAILURE); +} +#endif /* HAVE_LIBRDKAFKA */ + +#ifdef HAVE_LIBRDKAFKA +static void +_add_kafka_nmsg_input(nmsgtool_ctx *c, const char *str_address) { + nmsg_res res; + nmsg_input_t input; + + input = nmsg_input_open_kafka_endpoint(str_address); + if (c->debug >= 2) + fprintf(stderr, "%s: nmsg Kafka input: %s\n", argv_program, str_address); + if (input == NULL) { + fprintf(stderr, "%s: nmsg_input_open_kafka_endpoint() failed\n", argv_program); + exit(1); + } + setup_nmsg_input(c, input); + res = nmsg_io_add_input(c->io, input, NULL); + if (res != nmsg_res_success) { + fprintf(stderr, "%s: nmsg_io_add_input() failed\n", argv_program); + exit(1); + } + c->n_inputs += 1; +} + +static void +_add_kafka_nmsg_output(nmsgtool_ctx *c, const char *str_address) { + nmsg_res res; + nmsg_output_t output; + + output = nmsg_output_open_kafka_endpoint(str_address, NMSG_WBUFSZ_JUMBO); + if (c->debug >= 2) + fprintf(stderr, "%s: nmsg Kafka output: %s\n", argv_program, str_address); + if (output == NULL) { + fprintf(stderr, "%s: nmsg_output_open_kafka_endpoint() failed\n", argv_program); + exit(1); + } + setup_nmsg_output(c, output); + if (c->kicker != NULL) + res = nmsg_io_add_output(c->io, output, (void *) -1); + else + res = nmsg_io_add_output(c->io, output, NULL); + if (res != nmsg_res_success) { + fprintf(stderr, "%s: nmsg_io_add_output() failed\n", argv_program); + exit(1); + } + c->n_outputs += 1; +} +#else /* HAVE_LIBRDKAFKA */ +static void +_add_kafka_nmsg_input(nmsgtool_ctx *c __attribute__((unused)), + const char *str_address __attribute__((unused))) +{ + fprintf(stderr, "%s: Error: compiled without librdkafka support\n", + argv_program); + exit(EXIT_FAILURE); +} + +static void +_add_kafka_nmsg_output(nmsgtool_ctx *c __attribute__((unused)), + const char *str_address __attribute__((unused))) +{ + fprintf(stderr, "%s: Error: compiled without librdkafka support\n", + argv_program); + exit(EXIT_FAILURE); +} +#endif /* HAVE_LIBRDKAFKA */ + +void +add_kafka_input(nmsgtool_ctx *c, const char *str_address) { + const char *addr = _strip_prefix_if_exists(str_address, "nmsg:"); + if (addr != NULL) { + _add_kafka_nmsg_input(c, addr); + return; + } + + addr = _strip_prefix_if_exists(str_address, "json:"); + if (addr != NULL) { + _add_kafka_json_input(c, addr); + return; + } + fprintf(stderr, "%s: Error: nmsg or json format must be set for Kafka topic\n", + argv_program); + exit(EXIT_FAILURE); +} + +void +add_kafka_output(nmsgtool_ctx *c, const char *str_address) { + const char *addr = _strip_prefix_if_exists(str_address, "nmsg:"); + if (addr != NULL) { + _add_kafka_nmsg_output(c, addr); + return; + } + + addr = _strip_prefix_if_exists(str_address, "json:"); + if (addr != NULL) { + _add_kafka_json_output(c, addr); + return; + } + fprintf(stderr, "%s: Error: nmsg or json format must be set for Kafka topic\n", + argv_program); + exit(EXIT_FAILURE); +} + #ifdef HAVE_LIBZMQ void add_zsock_input(nmsgtool_ctx *c, const char *str_socket) { diff --git a/src/nmsgtool.c b/src/nmsgtool.c index dae665f65..ac94f3085 100644 --- a/src/nmsgtool.c +++ b/src/nmsgtool.c @@ -142,6 +142,30 @@ static argv_t args[] = { &ctx.kicker, "cmd", "make -c, -t continuous; run cmd on new files" }, + {'\0', "kafkakey", + ARGV_CHAR_P, + &ctx.kafka_key_field, + "fieldname", +#if defined(HAVE_LIBRDKAFKA) && defined(HAVE_JSON_C) + "nmsg field for Kafka producer key" }, +#else /* defined(HAVE_LIBRDKAFKA) && defined(HAVE_JSON_C) */ + "nmsg field for Kafka producer key (no support)" }, +#endif /* defined(HAVE_LIBRDKAFKA) && defined(HAVE_JSON_C) */ + + + {'\0', "readtopic", + ARGV_CHAR_P | ARGV_FLAG_ARRAY, + &ctx.r_kafka, + "kafka", +#ifdef HAVE_LIBRDKAFKA +#ifdef HAVE_JSON_C + "read nmsg data from Kafka (binary or json)" }, +#else /* HAVE_JSON_C */ + "read nmsg containers from Kafka topic" }, +#endif /* HAVE_JSON_C */ +#else /* HAVE_LIBRDKAFKA */ + "read nmsg data from Kafka topic (no support)" }, +#endif /* HAVE_LIBRDKAFKA */ { 'l', "readsock", ARGV_CHAR_P | ARGV_FLAG_ARRAY, @@ -284,6 +308,20 @@ static argv_t args[] = { "file", "write nmsg data to file" }, + { '\0', "writetopic", + ARGV_CHAR_P | ARGV_FLAG_ARRAY, + &ctx.w_kafka, + "kafka", +#ifdef HAVE_LIBRDKAFKA +#ifdef HAVE_JSON_C + "write nmsg data to Kafka (binary or json)" }, +#else /* HAVE_JSON_C */ + "write nmsg containers to to Kafka topic" }, +#endif /* HAVE_JSON_C */ +#else /* HAVE_LIBRDKAFKA */ + "write nmsg data to Kafka topic (no support)" }, +#endif /* HAVE_LIBRDKAFKA */ + { 'Z', "readzchan", ARGV_CHAR_P | ARGV_FLAG_ARRAY, &ctx.r_zchannel, diff --git a/src/nmsgtool.h b/src/nmsgtool.h index 072092119..4ee8f228c 100644 --- a/src/nmsgtool.h +++ b/src/nmsgtool.h @@ -34,6 +34,10 @@ # include #endif /* HAVE_LIBZMQ */ +#ifdef HAVE_LIBRDKAFKA +#include +#endif /* HAVE_LIBRDKAFKA */ + #include "libmy/argv.h" union nmsgtool_sockaddr { @@ -46,11 +50,11 @@ typedef union nmsgtool_sockaddr nmsgtool_sockaddr; typedef struct { /* parameters */ argv_array_t filters; - argv_array_t r_nmsg, r_pres, r_sock, r_zsock, r_channel, r_zchannel, r_json; + argv_array_t r_nmsg, r_pres, r_kafka, r_sock, r_zsock, r_channel, r_zchannel, r_json; argv_array_t r_pcapfile, r_pcapif; - argv_array_t w_nmsg, w_pres, w_sock, w_zsock, w_json; + argv_array_t w_nmsg, w_pres, w_sock, w_kafka, w_zsock, w_json; bool help, mirror, unbuffered, zlibout, daemon, version, interval_randomized; - char *endline, *kicker, *mname, *vname, *bpfstr, *filter_policy; + char *endline, *kicker, *mname, *vname, *bpfstr, *filter_policy, *kafka_key_field; int debug, signal; unsigned mtu, count, interval, rate, freq, byte_rate; char *set_source_str, *set_operator_str, *set_group_str; @@ -121,6 +125,8 @@ void add_json_input(nmsgtool_ctx *, const char *); void add_json_output(nmsgtool_ctx *, const char *); void add_sock_input(nmsgtool_ctx *, const char *); void add_sock_output(nmsgtool_ctx *, const char *); +void add_kafka_input(nmsgtool_ctx *, const char *); +void add_kafka_output(nmsgtool_ctx *, const char *); void add_zsock_input(nmsgtool_ctx *, const char *); void add_zsock_output(nmsgtool_ctx *, const char *); void add_filter_module(nmsgtool_ctx *, const char *); diff --git a/src/process_args.c b/src/process_args.c index a886d3dd7..5c6b26291 100644 --- a/src/process_args.c +++ b/src/process_args.c @@ -89,12 +89,31 @@ process_args(nmsgtool_ctx *c) { usage(NULL); if (c->version) { -#ifdef HAVE_LIBZMQ - fprintf(stderr, "%s: version %s\n", argv_program, PACKAGE_VERSION); -#else /* HAVE_LIBZMQ */ - fprintf(stderr, "%s: version %s (without libzmq support)\n", - argv_program, PACKAGE_VERSION); + int support = 0; + fprintf(stderr, "%s: version %s", argv_program, PACKAGE_VERSION); +#ifndef HAVE_LIBZMQ + support |= 1; #endif /* HAVE_LIBZMQ */ +#ifndef HAVE_LIBRDKAFKA + support |= 2; +#endif + if (support > 0) { + fprintf(stderr, " ("); + switch(support) { + case 1: + fprintf(stderr, "without libzmq support"); + break; + case 2: + fprintf(stderr, "without librdkafka support"); + break; + case 3: + fprintf(stderr, "without libzmq or librdkafka support"); + default: + break; + } + fprintf(stderr, ")"); + } + fprintf(stderr, "\n"); exit(EXIT_SUCCESS); } @@ -156,6 +175,15 @@ process_args(nmsgtool_ctx *c) { c->kicker = strdup(t); } +#if defined(HAVE_LIBRDKAFKA) && defined(HAVE_JSON_C) + /* kafka key */ + if (c->kafka_key_field == NULL) { + t = getenv("NMSG_KAFKA_KEY"); + if (t != NULL) + c->kafka_key_field = t; + } +#endif /* defined(HAVE_LIBRDKAFKA) && defined(HAVE_JSON_C) */ + /* set source, operator, group */ if (c->set_source_str != NULL) { c->set_source = read_uint32_nz(c->set_source_str); @@ -297,6 +325,8 @@ process_args(nmsgtool_ctx *c) { process_args_loop(c->w_sock, add_sock_output); process_args_loop(c->r_zsock, add_zsock_input); process_args_loop(c->w_zsock, add_zsock_output); + process_args_loop(c->r_kafka, add_kafka_input); + process_args_loop(c->w_kafka, add_kafka_output); process_args_loop(c->r_nmsg, add_file_input); process_args_loop(c->w_nmsg, add_file_output); diff --git a/tests/test-private.c b/tests/test-private.c new file mode 100644 index 000000000..b610125fc --- /dev/null +++ b/tests/test-private.c @@ -0,0 +1,169 @@ +/* + * Copyright (c) 2024 DomainTools LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include + + +#include "errors.h" + +#include "nmsg.h" +#include "private.h" +#include "nmsg/msgmod/transparent.h" + +#define NAME "test-private" + +/* This module contains unit tests that target the private NMSG library functions */ + +#define QUOTE(...) #__VA_ARGS__ + + +#if (defined HAVE_LIBRDKAFKA) && (defined HAVE_JSON_C) +typedef struct { + const char *field; + size_t length; + const char *response; +} kafka_key_answer_t; + +typedef struct { + const char *payload; + const kafka_key_answer_t *answer; +} kafka_key_task_t; + + +#define A1_MESSAGE QUOTE({"time":"2018-02-20 22:01:47.303896708","vname":"base","mname":"http","source":"abcdef01","message":{"type":"unknown","dstip":"192.0.2.2","dstport":80,"request":"GET /"}}) + +/* Type base:http */ +const kafka_key_answer_t a1[] = { + { "type", 7, "unknown" }, /* enum */ + { "dstip", 9, "192.0.2.2" }, /* bytes | nmsg_msgmod_ft_ip */ + { "dstport", 2, "80" }, /* uint32 */ + { "request", 5, "GET /"}, /* bytes | nmsg_msgmod_ft_mlstring */ + { NULL, 0, NULL } +}; + +#define A2_MESSAGE QUOTE({"time":"2023-04-21 14:39:10.039412373","vname":"base","mname":"dnsobs","message":{"time":1682087949,"response_ip":"::1","qname":"xxx.xxx.xxx.xxx.","qtype":"A","qclass":"IN","rcode":"NOERROR","response":"W5SEIAABAAMABAABBHh4eHgEeHh4eAV4eHh4eAN4eHgDeHh4C3h4eHh4eHh4eHh4A3h4eAAAAQABwAwAAQABAAAAPAAEAAAAAMAMAAEAAQAAADwABAAAAADADAABAAEAAAA8AAQAAAAAwBYAAgABAAKjAAAXB3h4eHh4eHgJeHh4eHh4eHh4A3h4eADAFgACAAEAAqMAABkHeHh4eHh4eAl4eHh4eHh4eHgCeHgCeHgAwBYAAgABAAKjAAATBnh4eHh4eAl4eHh4eHh4eHjAMMAWAAIAAQACowAAFgZ4eHh4eHgJeHh4eHh4eHh4A3h4eAAAACkQAAAAgAAAAA==","response_json":{"header":{"opcode":"QUERY","rcode":"NOERROR","id":23444,"flags":["qr","aa","ad"],"opt":{"edns":{"version":0,"flags":["do"],"udp":4096,"options":[]}}},"question":[{"qname":"xxxx.xxxx.xxxxx.xxx.xxx.xxxxxxxxxxx.xxx.","qclass":"IN","qtype":"A"}],"answer":[{"rrname":"xxxx.xxxx.xxxxx.xxx.xxx.xxxxxxxxxxx.xxx.","rrttl":60,"rrclass":"IN","rrtype":"A","rdata":["0.0.0.0","0.0.0.0","0.0.0.0"]}],"authority":[{"rrname":"xxxxx.xxx.xxx.xxxxxxxxxxx.xxx.","rrttl":172800,"rrclass":"IN","rrtype":"NS","rdata":["xxxxxxx.xxxxxxxxx.xxx.","xxxxxxx.xxxxxxxxx.xx.xx.","xxxxxx.xxxxxxxxx.xxx.","xxxxxx.xxxxxxxxx.xxx."]}],"additional":[]},"query_zone":"xxx.xxx.xxx.xxx.","geoid":"ESIzRA==","sensor_id":"423a35c7"}}) + +#define A3_MESSAGE QUOTE({"time":"2023-05-01 18:27:26.142008000","vname":"base","mname":"dnsqr","message":{"type":"UDP_UNANSWERED_QUERY","query_ip":"0.0.0.0","response_ip":"0.0.0.0","proto":"UDP","query_port":5353,"response_port":5353,"id":0,"qname":"_microsoft_mcc._tcp.local.","qclass":"CLASS32769","qtype":"TYPE149","query_packet":["RQAARz7IAAABEa3DrB5AAeAAAPsU6RTpADNfHQAAAAAAAQAAAAAAAA5fbWljcm9zb2Z0X21jYwRfdGNwBWxvY2FsAAAMgAE="],"query_time_sec":[1682965646],"query_time_nsec":[142008000],"response_packet":[],"response_time_sec":[],"response_time_nsec":[],"timeout":72.502578999999997222,"query":"AAAAAAABAAAAAAAADl9taWNyb3NvZnRfbWNjBF90Y3AFbG9jYWwAAAyAAQ==","query_json":{"header":{"opcode":"QUERY","rcode":"NOERROR","id":0,"flags":[]},"question":[{"qname":"_microsoft_mcc._tcp.local.","qclass":"CLASS32769","qtype":"PTR"}],"answer":[],"authority":[],"additional":[]}}}) + +/* Type base:dnsobs */ +const kafka_key_answer_t a2[] = { + { "time", 10, "1682087949" }, /* uint64 */ + { "response_ip", 3, "::1" }, /* bytes | nmsg_msgmod_ft_ip */ + { "qname", 16, "xxx.xxx.xxx.xxx." }, /* bytes | dns_name_format */ + { "qtype", 1, "A" }, /* uint32 | dns_type_format */ + { "qclass", 2, "IN" }, /* uint32 | dns_class_format */ + { "rcode", 7, "NOERROR" }, /* uint32 i| dnsqr_rcode_format */ + { "query_zone", 16, "xxx.xxx.xxx.xxx." }, /* bytes | dns_name_format */ + { "geoid", 4, "\x11\x22\x33\x44" }, /* bytes */ + { "sensor_id", 1, "4" }, /* fixed32 | dnsobs_sid_format */ + { NULL, 0, NULL } +}; + +/* Type base:dnsqr */ +const kafka_key_answer_t a3[] = { + { "type", 20, "UDP_UNANSWERED_QUERY" }, /* enum */ + { "query_ip", 7, "0.0.0.0" }, /* bytes | nmsg_msgmod_ft_ip */ + { "response_ip", 7, "0.0.0.0" }, /* bytes | nmsg_msgmod_ft_ip */ + { "proto", 3, "UDP" }, /* uint32 | dnsqr_proto_format */ + { "query_port", 4, "5353" }, /* uint32 */ + { "response_port", 4, "5353" }, /* uint32 */ + { "id", 1, "0" }, /* uint32 */ + { "qname", 26, "_microsoft_mcc._tcp.local." }, /* bytes | dns_name_format */ + { "qclass", 10, "CLASS32769" }, /* uint32 | dns_class_format */ + { "qtype", 7, "TYPE149" }, /* uint32 | dns_type_format */ + { "query_time_sec", 8, "\x8e\x04\x50\x64\x00\x00\x00\x00" }, /* int64 */ + { "query_time_nsec", 4, "\xc0\xde\x76\x08" }, /* sfixed32: 142008000 */ + { "response_time_sec", 0, "\0" }, /* int64 */ + { "response_time_nsec", 0, "\0" }, /* sfixed32 */ + { "timeout", 21, "72.502578999999997222" }, /* double */ + { NULL, 0, NULL } +}; + +const kafka_key_task_t tasks[] = { + { A1_MESSAGE, a1 }, + { A2_MESSAGE, a2 }, + { A3_MESSAGE, a3 }, + { NULL, NULL } +}; + +/* Unit tests for verifying the content of kafka producer keys extracted from nmsg fields */ + +static int +test_kafka_key(void) { + nmsg_input_t i; + nmsg_message_t m; + FILE *f; + int fd; + const kafka_key_task_t *t; + const kafka_key_answer_t *a; + struct nmsg_strbuf_storage tbs; + struct nmsg_strbuf *tb = _nmsg_strbuf_init(&tbs); + + /* Create test file */ + f = tmpfile(); + check_return(f != NULL); + + fd = fileno(f); + check_return(fd != -1); + + t = tasks; + while (t->payload != NULL) { + check_return(write(fd, t->payload, strlen(t->payload)) == (ssize_t) strlen(t->payload)); + check_return(write(fd, "\n", 1) == 1); + ++t; + } + + check_return(lseek(fd, SEEK_SET, 0) == 0); + + i = nmsg_input_open_json(fd); + check_return(i != NULL); + + t = tasks; + while (t->payload != NULL) { + check_return(nmsg_input_read(i, &m) == nmsg_res_success); + a = t->answer; + printf("Question [%s]\n",t->payload); + while (a->field != NULL) { + check_return(_nmsg_message_payload_get_field_value_as_key(m, a->field, tb) == nmsg_res_success); + printf("Key [%s] Expect [%s][%lu] - Result [%s][%lu]\n", a->field, a->response, a->length, tb->data, nmsg_strbuf_len(tb)); + check_return(memcmp(tb->data, a->response, a->length) == 0); + check_return(nmsg_strbuf_len(tb) == a->length); + nmsg_strbuf_reset(tb); + ++a; + } + nmsg_message_destroy(&m); + ++t; + } + + _nmsg_strbuf_destroy(&tbs); + fclose(f); + + l_return_test_status(); +} +#endif /* (defined HAVE_LIBRDKAFKA) && (defined HAVE_JSON_C) */ + +int +main(void) +{ + check_abort(nmsg_init() == nmsg_res_success); + +#if (defined HAVE_LIBRDKAFKA) && (defined HAVE_JSON_C) + check_explicit2_display_only(test_kafka_key() == 0, "test-private / test_kafka_key"); +#endif /* (defined HAVE_LIBRDKAFKA) && (defined HAVE_JSON_C) */ + + g_check_test_status(false); +} From fe841d35ad3124cc9e88ebbcdeeeab47ffa31f75 Mon Sep 17 00:00:00 2001 From: Demian Vladi <126811849+dvladi77@users.noreply.github.com> Date: Tue, 4 Jun 2024 15:15:01 -0700 Subject: [PATCH 08/24] Added support for exporting metrics to Prometheus (#134) --- Makefile.am | 12 +++++- README.md | 4 ++ configure.ac | 13 +++++++ src/dt_prom.c | 103 +++++++++++++++++++++++++++++++++++++++++++++++++ src/dt_prom.h | 54 ++++++++++++++++++++++++++ src/nmsgtool.c | 85 ++++++++++++++++++++++++++++++++++++++++ src/nmsgtool.h | 1 + 7 files changed, 270 insertions(+), 2 deletions(-) create mode 100644 src/dt_prom.c create mode 100644 src/dt_prom.h diff --git a/Makefile.am b/Makefile.am index ef3b9e1de..1e3318992 100644 --- a/Makefile.am +++ b/Makefile.am @@ -19,7 +19,9 @@ AM_CFLAGS = \ $(libwdns_CFLAGS) \ $(libzmq_CFLAGS) \ $(librdkafka_CFLAGS) \ - $(json_c_CFLAGS) + $(json_c_CFLAGS) \ + $(libmicrohttpd_CFLAGS) \ + $(libprom_CFLAGS) AM_LDFLAGS = EXTRA_DIST += ChangeLog @@ -345,7 +347,9 @@ src_nmsgtool_LDADD = \ nmsg/libnmsg.la \ $(libpcap_LIBS) \ $(libzmq_LIBS) \ - $(librdkafka_LIBS) + $(librdkafka_LIBS) \ + $(libmicrohttpd_LIBS) \ + $(libprom_LIBS) src_nmsgtool_SOURCES = \ libmy/argv.c \ libmy/argv.h \ @@ -361,6 +365,10 @@ src_nmsgtool_SOURCES = \ src/rwfile.c \ src/unescape.c +if USE_DT_PROM +src_nmsgtool_SOURCES += src/dt_prom.c \ + src/dt_prom.h +endif # ## ### Tests diff --git a/README.md b/README.md index 5f415e7af..817cb6bb5 100644 --- a/README.md +++ b/README.md @@ -25,6 +25,10 @@ nmsg has the following external dependencies: * [zlib](http://www.zlib.net/) +* prometheus integration (optional) + * [libprometheus](https://github.com/digitalocean/prometheus-client-c) + * [libmicrohttpd](https://www.gnu.org/software/libmicrohttpd/) + On Debian systems, the following packages should be installed, if available: pkg-config libpcap0.8-dev libprotobuf-c-dev protobuf-c-compiler libzmq3-dev librdkafka-dev libjson-c-dev zlib1g-dev diff --git a/configure.ac b/configure.ac index 46d703c92..8833c7749 100644 --- a/configure.ac +++ b/configure.ac @@ -176,6 +176,18 @@ else use_json_c="false" fi +AC_ARG_WITH([prometheus], AS_HELP_STRING([--with-prometheus], [Enable prometheus metrics])) +if test "x$with_prometheus" == "xyes"; then + PKG_CHECK_MODULES([libmicrohttpd], [libmicrohttpd >= 0.9.0]) + PKG_CHECK_MODULES([libprom], [libprom >= 0.1.0]) + AC_DEFINE([HAVE_PROMETHEUS], [1], [Define to 1 if prometheus metrics are enabled.]) + USE_PROMETHEUS="yes" + AM_CONDITIONAL([USE_DT_PROM], [true]) +else + USE_PROMETHEUS="no" + AM_CONDITIONAL([USE_DT_PROM], [false]) +fi + AC_CHECK_HEADER([zlib.h], [], [ AC_MSG_ERROR([required header file not found]) ]) AC_CHECK_LIB([z], [deflate], [], [ AC_MSG_ERROR([required library not found]) ]) @@ -237,6 +249,7 @@ AC_MSG_RESULT([ libzmq support: ${use_libzmq} librdkafka support: ${use_librdkafka} json-c support: ${use_json_c} + prometheus support: ${USE_PROMETHEUS} building html docs: ${DOC_HTML_MSG} building manpage docs: ${DOC_MAN_MSG} diff --git a/src/dt_prom.c b/src/dt_prom.c new file mode 100644 index 000000000..fd690f4e6 --- /dev/null +++ b/src/dt_prom.c @@ -0,0 +1,103 @@ +/* + * Copyright (c) 2023 DomainTools LLC + * Copyright (c) 2008-2019, 2021 by Farsight Security, Inc. + * + * Prometheus+microhttpd embedding routines. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "dt_prom.h" + +#include + +#include + + +static prom_callback g_prom_cb; + + +#if MHD_VERSION >= 0x00097002 +static enum MHD_Result +#else +static int +#endif +promhttp_handler(void *cls, struct MHD_Connection *connection, const char *url, const char *method, + const char *version __attribute__((unused)), const char *upload_data __attribute__((unused)), + size_t *upload_data_size __attribute__((unused)), void **con_cls __attribute__((unused))) +{ + static pthread_mutex_t lock = PTHREAD_MUTEX_INITIALIZER; + const char *buf; + struct MHD_Response *response; + enum MHD_ResponseMemoryMode mmode = MHD_RESPMEM_PERSISTENT; + unsigned int status_code = MHD_HTTP_BAD_REQUEST; + int ret; + + pthread_mutex_lock(&lock); + + if (g_prom_cb(cls) < 0) { + buf = "Statistics retrieval failure\n"; + status_code = MHD_HTTP_INTERNAL_SERVER_ERROR; + goto resp; + } + + if (strcmp(method, "GET") != 0) + buf = "Invalid HTTP Method\n"; + else if (strcmp(url, "/") == 0) { + buf = "OK\n"; + status_code = MHD_HTTP_OK; + } else if (strcmp(url, "/metrics") == 0) { + buf = prom_collector_registry_bridge(PROM_COLLECTOR_REGISTRY_DEFAULT); + mmode = MHD_RESPMEM_MUST_FREE; + status_code = MHD_HTTP_OK; + } else + buf = "Bad Request\n"; + +resp: + response = MHD_create_response_from_buffer(strlen(buf), (void *)buf, mmode); + ret = MHD_queue_response(connection, status_code, response); + MHD_destroy_response(response); + pthread_mutex_unlock(&lock); + + return ret; +} + +static int +init_microhttpd(void *clos, unsigned short port) +{ + struct MHD_Daemon *daemon; +#if MHD_VERSION >= 0x00095300 + const int flags = MHD_USE_INTERNAL_POLLING_THREAD; +#else + const int flags = MHD_USE_POLL_INTERNALLY; +#endif + + daemon = MHD_start_daemon(flags, port, NULL, NULL, &promhttp_handler, clos, MHD_OPTION_END); + return (daemon != NULL ? 0 : -1); +} + +int +init_prometheus(prom_callback cbfn, void *clos, unsigned short port) +{ + static unsigned int once = 0; + + if (once++ > 0) + return -1; + + if (prom_collector_registry_default_init() != 0) + return -1; + + g_prom_cb = cbfn; + + return (init_microhttpd(clos, port)); +} diff --git a/src/dt_prom.h b/src/dt_prom.h new file mode 100644 index 000000000..af15a9684 --- /dev/null +++ b/src/dt_prom.h @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2023 DomainTools LLC + * Copyright (c) 2008-2019, 2021 by Farsight Security, Inc. + * + * Prometheus+microhttpd helper/function definitions for embedding. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef DT_PROM_H +#define DT_PROM_H + +#include + + +#define INIT_PROM_CTR(ctr,name,desc) \ + ctr = prom_collector_registry_must_register_metric(prom_counter_new(name, desc, 0, NULL)); + +/* Note: label MUST be of type char * and NOT a string literal. */ +#define INIT_PROM_CTR_L(ctr,name,desc,label) \ + ctr = prom_collector_registry_must_register_metric(prom_counter_new(name, desc, 1, &label)); + +#define INIT_PROM_GAUGE(gauge,name,desc) \ + gauge = prom_collector_registry_must_register_metric(prom_gauge_new(name, desc, 0, NULL)); + +/* Note: label MUST be of type char * and NOT a string literal. */ +#define INIT_PROM_GAUGE_L(gauge,name,desc,label) \ + gauge = prom_collector_registry_must_register_metric(prom_gauge_new(name, desc, 1, &label)); + +/* This user callback returns 0 on success or -1 on failure. */ +typedef int (*prom_callback)(void *clos); + +/* + * Initialize the prometheus subsystem. + * + * cbfn is a mandatory callback function that will be called with the user- + * defined value in clos each time prometheus metrics are queried. + * + * Port denotes an HTTP listening port for exporting the prometheus metrics + * via libmicrohttpd. + */ +int init_prometheus(prom_callback cbfn, void *clos, unsigned short port); + +#endif /* DT_PROM_H */ diff --git a/src/nmsgtool.c b/src/nmsgtool.c index ac94f3085..f5771c26b 100644 --- a/src/nmsgtool.c +++ b/src/nmsgtool.c @@ -31,6 +31,10 @@ #include "nmsgtool.h" #include "kickfile.h" +#ifdef HAVE_PROMETHEUS +#include "dt_prom.h" +#endif /* HAVE_PROMETHEUS */ + /* Globals. */ static nmsgtool_ctx ctx; @@ -219,6 +223,16 @@ static argv_t args[] = { "ACCEPT|DROP", "default filter chain policy" }, + { '\0', "prometheus", + ARGV_U_SHORT, + &ctx.prom_port, + "prometheus port", +#ifdef HAVE_PROMETHEUS + "serve prometheus counters on port" }, +#else /* HAVE_PROMETHEUS */ + "serve prometheus counters on port (no support)" }, +#endif /* HAVE_PROMETHEUS */ + { 'r', "readnmsg", ARGV_CHAR_P | ARGV_FLAG_ARRAY, &ctx.r_nmsg, @@ -341,7 +355,19 @@ static argv_t args[] = { { ARGV_LAST, 0, 0, 0, 0, 0 } }; +#ifdef HAVE_PROMETHEUS +/* For payloads */ +static prom_counter_t *total_payloads_in, *total_payloads_out; +/* For containers */ +static prom_counter_t *total_container_recvs, *total_container_drops; +#endif /* HAVE_PROMETHEUS */ + + /* Forward. */ +#ifdef HAVE_PROMETHEUS +static void init_prometheus_counters(void); +static int nmsgtool_prom_handler(void *clos); +#endif /* HAVE_PROMETHEUS */ static void print_io_stats(nmsg_io_t); static void io_close(struct nmsg_io_close_event *); @@ -376,6 +402,17 @@ int main(int argc, char **argv) { assert(ctx.io != NULL); nmsg_io_set_close_fp(ctx.io, io_close); +#ifdef HAVE_PROMETHEUS + if (ctx.prom_port > 0) { + if (init_prometheus(nmsgtool_prom_handler, ctx.io, ctx.prom_port) < 0) { + fprintf(stderr, "Error: failed to initialize prometheus subsystem\n"); + exit(EXIT_FAILURE); + } + + init_prometheus_counters(); + } +#endif /* HAVE_PROMETHEUS */ + /* process arguments and load inputs/outputs into the nmsg_io engine */ process_args(&ctx); @@ -449,6 +486,54 @@ setup_nmsg_input(nmsgtool_ctx *c, nmsg_input_t input) { /* Private functions. */ +#ifdef HAVE_PROMETHEUS + +static void +init_prometheus_counters(void) +{ + const char *label = "nmsgtool"; + + /* NMSG payload counters */ + INIT_PROM_CTR_L(total_payloads_in, "total_payloads_in", "total number of nmsg payloads received", label); + assert(total_payloads_in != NULL); + INIT_PROM_CTR_L(total_payloads_out, "total_payloads_out", "total number of nmsg payloads sent", label); + assert(total_payloads_out != NULL); + + /* NMSG container counters */ + INIT_PROM_CTR_L(total_container_recvs, "total_container_recvs", "total number of nmsg containers received", label); + assert(total_container_recvs != NULL); + INIT_PROM_CTR_L(total_container_drops, "total_container_drops", "total number of nmsg containers lost", label); + assert(total_container_drops != NULL); +} + +/* This is the prometheus callback function. clos is a nmsg_io_t, + * which gives us the handle to get nmsg statistics. Always returns 0, which means success. */ +static int nmsgtool_prom_handler(void *clos) { + const char *label = "nmsgtool"; + int retval = 0; + nmsg_io_t io = (nmsg_io_t) clos; + static uint64_t last_sum_in = 0, last_sum_out = 0, last_container_drops = 0, last_container_recvs = 0; + uint64_t sum_in = 0, sum_out = 0, container_drops = 0, container_recvs = 0; + if (nmsg_io_get_stats(io, &sum_in, &sum_out, &container_recvs, &container_drops) != nmsg_res_success) + retval = -1; + + if (retval == 0) { + if (prom_counter_add(total_payloads_in, sum_in - last_sum_in, &label) != 0 || + prom_counter_add(total_payloads_out, sum_out - last_sum_out, &label) != 0 || + prom_counter_add(total_container_recvs, container_recvs - last_container_recvs, &label) != 0 || + prom_counter_add(total_container_drops, container_drops - last_container_drops, &label) != 0) + retval = -1; + + last_sum_in = sum_in; + last_sum_out = sum_out; + last_container_recvs = container_recvs; + last_container_drops = container_drops; + } + + return retval; +} +#endif /* HAVE_PROMETHEUS */ + static void print_io_stats(nmsg_io_t io) { uint64_t sum_in = 0, sum_out = 0, container_drops = 0, container_recvs = 0; diff --git a/src/nmsgtool.h b/src/nmsgtool.h index 4ee8f228c..a50fcebcf 100644 --- a/src/nmsgtool.h +++ b/src/nmsgtool.h @@ -56,6 +56,7 @@ typedef struct { bool help, mirror, unbuffered, zlibout, daemon, version, interval_randomized; char *endline, *kicker, *mname, *vname, *bpfstr, *filter_policy, *kafka_key_field; int debug, signal; + unsigned short prom_port; unsigned mtu, count, interval, rate, freq, byte_rate; char *set_source_str, *set_operator_str, *set_group_str; char *get_source_str, *get_operator_str, *get_group_str; From 623acce80d6cfc35fc197d1259b76088391618d7 Mon Sep 17 00:00:00 2001 From: "Jeremy C. Reed" Date: Wed, 29 May 2024 14:47:08 +0000 Subject: [PATCH 09/24] fix test to use portable regular expression for bound size --- tests/group-operator-source-tests/test.sh.in | 34 ++++++++++---------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/tests/group-operator-source-tests/test.sh.in b/tests/group-operator-source-tests/test.sh.in index a80de86a7..2a3fcc950 100755 --- a/tests/group-operator-source-tests/test.sh.in +++ b/tests/group-operator-source-tests/test.sh.in @@ -94,7 +94,7 @@ echo Testing missing group and operator ... echo "$JSON_PAYLOAD" | $NMSGTOOL -j - > ${OUTPUT}/test-missing.pres.out check read json base:http and create base:http presentation output -head -1 ${OUTPUT}/test-missing.pres.out | egrep ' \[1:4 base http\] \[[0-9a-f]{,8}\] \[\] \[\] $' >/dev/null +head -1 ${OUTPUT}/test-missing.pres.out | egrep ' \[1:4 base http\] \[[0-9a-f]{8}\] \[\] \[\] $' >/dev/null check no numerical operator and group in presentation echo "$JSON_PAYLOAD" | $NMSGTOOL -j - -J ${OUTPUT}/test-missing.json.out @@ -109,7 +109,7 @@ echo Testing numerical operator without aliases results in numerical operator .. echo "$JSON_PAYLOAD" | sed 's/"mname":"http","message"/"mname":"http","operator":201,"message"/' | $NMSGTOOL -j - > ${OUTPUT}/test-operator-no-aliases.pres.out check read json base:http and create base:http presentation output -head -1 ${OUTPUT}/test-operator-no-aliases.pres.out | egrep ' \[1:4 base http\] \[[0-9a-f]{,8}\] \[201\] \[\] $' >/dev/null +head -1 ${OUTPUT}/test-operator-no-aliases.pres.out | egrep ' \[1:4 base http\] \[[0-9a-f]{8}\] \[201\] \[\] $' >/dev/null check numerical operator in presentation echo "$JSON_PAYLOAD" | sed 's/"mname":"http","message"/"mname":"http","operator":201,"message"/' | $NMSGTOOL -j - -J ${OUTPUT}/test-operator-no-aliases.json.out @@ -122,7 +122,7 @@ echo Testing numerical group without aliases results in numerical group ... echo "$JSON_PAYLOAD" | sed 's/"mname":"http","message"/"mname":"http","group":101,"message"/' | $NMSGTOOL -j - > ${OUTPUT}/test-group-no-aliases.pres.out check read json base:http and create base:http presentation output -head -1 ${OUTPUT}/test-group-no-aliases.pres.out | egrep ' \[1:4 base http\] \[[0-9a-f]{,8}\] \[\] \[101\] $' >/dev/null +head -1 ${OUTPUT}/test-group-no-aliases.pres.out | egrep ' \[1:4 base http\] \[[0-9a-f]{8}\] \[\] \[101\] $' >/dev/null check numerical group in presentation echo "$JSON_PAYLOAD" | sed 's/"mname":"http","message"/"mname":"http","group":101,"message"/' | $NMSGTOOL -j - -J ${OUTPUT}/test-group-no-aliases.json.out @@ -135,7 +135,7 @@ echo Testing named operator without aliases results in no operator ... echo "$JSON_PAYLOAD" | sed 's/"mname":"http","message"/"mname":"http","operator":"OperatorA","message"/' | $NMSGTOOL -j - > ${OUTPUT}/test-operator-name-no-aliases.pres.out check read json base:http and create base:http presentation output -head -1 ${OUTPUT}/test-operator-name-no-aliases.pres.out | egrep ' \[1:4 base http\] \[[0-9a-f]{,8}\] \[\] \[\] $' >/dev/null +head -1 ${OUTPUT}/test-operator-name-no-aliases.pres.out | egrep ' \[1:4 base http\] \[[0-9a-f]{8}\] \[\] \[\] $' >/dev/null check no operator in presentation echo "$JSON_PAYLOAD" | sed 's/"mname":"http","message"/"mname":"http","operator":"OperatorA","message"/' | $NMSGTOOL -j - -J ${OUTPUT}/test-operator-name-no-aliases.json.out @@ -148,7 +148,7 @@ echo Testing named group without aliases results in no group ... echo "$JSON_PAYLOAD" | sed 's/"mname":"http","message"/"mname":"http","group":"GroupA","message"/' | $NMSGTOOL -j - > ${OUTPUT}/test-group-name-no-aliases.pres.out check read json base:http and create base:http presentation output -head -1 ${OUTPUT}/test-group-name-no-aliases.pres.out | egrep ' \[1:4 base http\] \[[0-9a-f]{,8}\] \[\] \[\] $' >/dev/null +head -1 ${OUTPUT}/test-group-name-no-aliases.pres.out | egrep ' \[1:4 base http\] \[[0-9a-f]{8}\] \[\] \[\] $' >/dev/null check no group in presentation echo "$JSON_PAYLOAD" | sed 's/"mname":"http","message"/"mname":"http","group":"GroupA","message"/' | $NMSGTOOL -j - -J ${OUTPUT}/test-group-name-no-aliases.json.out @@ -178,7 +178,7 @@ echo Testing numerical operator with aliases results with named operator ... echo "$JSON_PAYLOAD" | sed 's/"mname":"http","message"/"mname":"http","operator":101,"message"/' | $NMSGTOOL -j - > ${OUTPUT}/test-operator-aliases.pres.out check read json base:http and create base:http presentation output -head -1 ${OUTPUT}/test-operator-aliases.pres.out | egrep ' \[1:4 base http\] \[[0-9a-f]{,8}\] \[QA\] \[\] $' >/dev/null +head -1 ${OUTPUT}/test-operator-aliases.pres.out | egrep ' \[1:4 base http\] \[[0-9a-f]{8}\] \[QA\] \[\] $' >/dev/null check named operator in presentation echo "$JSON_PAYLOAD" | sed 's/"mname":"http","message"/"mname":"http","operator":101,"message"/' | $NMSGTOOL -j - -J ${OUTPUT}/test-operator-aliases.json.out @@ -190,7 +190,7 @@ echo Testing numerical group with aliases results with named group ... echo "$JSON_PAYLOAD" | sed 's/"mname":"http","message"/"mname":"http","group":201,"message"/' | $NMSGTOOL -j - > ${OUTPUT}/test-group-aliases.pres.out check read json base:http and create base:http presentation output -head -1 ${OUTPUT}/test-group-aliases.pres.out | egrep ' \[1:4 base http\] \[[0-9a-f]{,8}\] \[\] \[Test\] $' >/dev/null +head -1 ${OUTPUT}/test-group-aliases.pres.out | egrep ' \[1:4 base http\] \[[0-9a-f]{8}\] \[\] \[Test\] $' >/dev/null check named group in presentation echo "$JSON_PAYLOAD" | sed 's/"mname":"http","message"/"mname":"http","group":201,"message"/' | $NMSGTOOL -j - -J ${OUTPUT}/test-group-aliases.json.out @@ -202,7 +202,7 @@ echo Testing named operator with aliases ... echo "$JSON_PAYLOAD" | sed 's/"mname":"http","message"/"mname":"http","operator":"ExampleOperator","message"/' | $NMSGTOOL -j - > ${OUTPUT}/test-operator-name-aliases.pres.out check read json base:http and create base:http presentation output -head -1 ${OUTPUT}/test-operator-name-aliases.pres.out | egrep ' \[1:4 base http\] \[[0-9a-f]{,8}\] \[ExampleOperator\] \[\] $' >/dev/null +head -1 ${OUTPUT}/test-operator-name-aliases.pres.out | egrep ' \[1:4 base http\] \[[0-9a-f]{8}\] \[ExampleOperator\] \[\] $' >/dev/null check named operator in presentation echo "$JSON_PAYLOAD" | sed 's/"mname":"http","message"/"mname":"http","operator":"ExampleOperator","message"/' | $NMSGTOOL -j - -J ${OUTPUT}/test-operator-name-aliases.json.out @@ -214,7 +214,7 @@ echo Testing named group with aliases ... echo "$JSON_PAYLOAD" | sed 's/"mname":"http","message"/"mname":"http","group":"ExampleGroup","message"/' | $NMSGTOOL -j - > ${OUTPUT}/test-group-name-aliases.pres.out check read json base:http and create base:http presentation output -head -1 ${OUTPUT}/test-group-name-aliases.pres.out | egrep ' \[1:4 base http\] \[[0-9a-f]{,8}\] \[\] \[ExampleGroup\] $' >/dev/null +head -1 ${OUTPUT}/test-group-name-aliases.pres.out | egrep ' \[1:4 base http\] \[[0-9a-f]{8}\] \[\] \[ExampleGroup\] $' >/dev/null check named group in presentation echo "$JSON_PAYLOAD" | sed 's/"mname":"http","message"/"mname":"http","group":"ExampleGroup","message"/' | $NMSGTOOL -j - -J ${OUTPUT}/test-group-name-aliases.json.out @@ -226,7 +226,7 @@ echo Testing named operator not in aliases results in no operator ... echo "$JSON_PAYLOAD" | sed 's/"mname":"http","message"/"mname":"http","operator":"UnknownOperator","message"/' | $NMSGTOOL -j - > ${OUTPUT}/test-operator-unknown-name-aliases.pres.out check read json base:http and create base:http presentation output -head -1 ${OUTPUT}/test-operator-unknown-name-aliases.pres.out | egrep ' \[1:4 base http\] \[[0-9a-f]{,8}\] \[\] \[\] $' >/dev/null +head -1 ${OUTPUT}/test-operator-unknown-name-aliases.pres.out | egrep ' \[1:4 base http\] \[[0-9a-f]{8}\] \[\] \[\] $' >/dev/null check no operator in presentation echo "$JSON_PAYLOAD" | sed 's/"mname":"http","message"/"mname":"http","operator":"UnknownOperator","message"/' | $NMSGTOOL -j - -J ${OUTPUT}/test-operator-unknown-name-aliases.json.out @@ -238,7 +238,7 @@ echo Testing named group not in aliases results in no group ... echo "$JSON_PAYLOAD" | sed 's/"mname":"http","message"/"mname":"http","group":"UnknownGroup","message"/' | $NMSGTOOL -j - > ${OUTPUT}/test-group-unknown-name-aliases.pres.out check read json base:http and create base:http presentation output -head -1 ${OUTPUT}/test-group-unknown-name-aliases.pres.out | egrep ' \[1:4 base http\] \[[0-9a-f]{,8}\] \[\] \[\] $' >/dev/null +head -1 ${OUTPUT}/test-group-unknown-name-aliases.pres.out | egrep ' \[1:4 base http\] \[[0-9a-f]{8}\] \[\] \[\] $' >/dev/null check no group in presentation echo "$JSON_PAYLOAD" | sed 's/"mname":"http","message"/"mname":"http","group":"UnknownGroup","message"/' | $NMSGTOOL -j - -J ${OUTPUT}/test-group-unknown-name-aliases.json.out @@ -250,7 +250,7 @@ echo Testing setting numerical operator with aliases results in named operator . echo "$JSON_PAYLOAD" | $NMSGTOOL -j - --setoperator 101 > ${OUTPUT}/test-setoperator-aliases.pres.out check read json base:http and create base:http presentation output -head -1 ${OUTPUT}/test-setoperator-aliases.pres.out | egrep ' \[1:4 base http\] \[[0-9a-f]{,8}\] \[QA\] \[\] $' >/dev/null +head -1 ${OUTPUT}/test-setoperator-aliases.pres.out | egrep ' \[1:4 base http\] \[[0-9a-f]{8}\] \[QA\] \[\] $' >/dev/null check named operator in presentation echo "$JSON_PAYLOAD" | $NMSGTOOL -j - --setoperator 101 -J ${OUTPUT}/test-setoperator-aliases.json.out @@ -262,7 +262,7 @@ echo Testing setting numerical group with aliases results in named group ... echo "$JSON_PAYLOAD" | $NMSGTOOL -j - --setgroup 201 > ${OUTPUT}/test-setgroup-aliases.pres.out check read json base:http and create base:http presentation output -head -1 ${OUTPUT}/test-setgroup-aliases.pres.out | egrep ' \[1:4 base http\] \[[0-9a-f]{,8}\] \[\] \[Test\] $' >/dev/null +head -1 ${OUTPUT}/test-setgroup-aliases.pres.out | egrep ' \[1:4 base http\] \[[0-9a-f]{8}\] \[\] \[Test\] $' >/dev/null check named group in presentation echo "$JSON_PAYLOAD" | $NMSGTOOL -j - --setgroup 201 -J ${OUTPUT}/test-setgroup-aliases.json.out @@ -276,7 +276,7 @@ echo Testing setting named operator with aliases ... echo "$JSON_PAYLOAD" | $NMSGTOOL -j - --setoperator ExampleOperator > ${OUTPUT}/test-setoperator-named-aliases.pres.out check read json base:http and create base:http presentation output -head -1 ${OUTPUT}/test-setoperator-named-aliases.pres.out | egrep ' \[1:4 base http\] \[[0-9a-f]{,8}\] \[ExampleOperator\] \[\] $' >/dev/null +head -1 ${OUTPUT}/test-setoperator-named-aliases.pres.out | egrep ' \[1:4 base http\] \[[0-9a-f]{8}\] \[ExampleOperator\] \[\] $' >/dev/null check named operator in presentation echo "$JSON_PAYLOAD" | $NMSGTOOL -j - --setoperator ExampleOperator -J ${OUTPUT}/test-setoperator-named-aliases.json.out @@ -288,7 +288,7 @@ echo Testing setting named group with aliases ... echo "$JSON_PAYLOAD" | $NMSGTOOL -j - --setgroup ExampleGroup > ${OUTPUT}/test-setgroup-named-aliases.pres.out check read json base:http and create base:http presentation output -head -1 ${OUTPUT}/test-setgroup-named-aliases.pres.out | egrep ' \[1:4 base http\] \[[0-9a-f]{,8}\] \[\] \[ExampleGroup\] $' >/dev/null +head -1 ${OUTPUT}/test-setgroup-named-aliases.pres.out | egrep ' \[1:4 base http\] \[[0-9a-f]{8}\] \[\] \[ExampleGroup\] $' >/dev/null check named group in presentation echo "$JSON_PAYLOAD" | $NMSGTOOL -j - --setgroup ExampleGroup -J ${OUTPUT}/test-setgroup-named-aliases.json.out @@ -302,7 +302,7 @@ echo Testing setting numerical operator not in aliases results in numeric operat echo "$JSON_PAYLOAD" | $NMSGTOOL -j - --setoperator 1234 > ${OUTPUT}/test-setoperator-not-aliases.pres.out check read json base:http and create base:http presentation output -head -1 ${OUTPUT}/test-setoperator-not-aliases.pres.out | egrep ' \[1:4 base http\] \[[0-9a-f]{,8}\] \[1234\] \[\] $' >/dev/null +head -1 ${OUTPUT}/test-setoperator-not-aliases.pres.out | egrep ' \[1:4 base http\] \[[0-9a-f]{8}\] \[1234\] \[\] $' >/dev/null check numeric operator in presentation echo "$JSON_PAYLOAD" | $NMSGTOOL -j - --setoperator 1234 -J ${OUTPUT}/test-setoperator-not-aliases.json.out @@ -314,7 +314,7 @@ echo Testing setting numerical group not in aliases results in numeric group ... echo "$JSON_PAYLOAD" | $NMSGTOOL -j - --setgroup 4321 > ${OUTPUT}/test-setgroup-not-aliases.pres.out check read json base:http and create base:http presentation output -head -1 ${OUTPUT}/test-setgroup-not-aliases.pres.out | egrep ' \[1:4 base http\] \[[0-9a-f]{,8}\] \[\] \[4321\] $' >/dev/null +head -1 ${OUTPUT}/test-setgroup-not-aliases.pres.out | egrep ' \[1:4 base http\] \[[0-9a-f]{8}\] \[\] \[4321\] $' >/dev/null check numeric group in presentation echo "$JSON_PAYLOAD" | $NMSGTOOL -j - --setgroup 4321 -J ${OUTPUT}/test-setgroup-not-aliases.json.out From 269db3ad3d167a84e987f54668b7eddb64d1bcf7 Mon Sep 17 00:00:00 2001 From: "Jeremy C. Reed" Date: Wed, 29 May 2024 15:11:19 +0000 Subject: [PATCH 10/24] for test use shlibpath_var variable for shared libraries variable name instead of using hard-coded LD_LIBRARY_PATH --- configure.ac | 1 + tests/nmsg-dnsqr-tests/test.sh.in | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/configure.ac b/configure.ac index 8833c7749..fe9779692 100644 --- a/configure.ac +++ b/configure.ac @@ -51,6 +51,7 @@ AC_CONFIG_FILES([tests/nmsg-dns-tests/test.sh], AC_CONFIG_FILES([tests/nmsg-dnsobs-tests/test.sh], [chmod +x tests/nmsg-dnsobs-tests/test.sh]) +AC_SUBST(shlibpath_var) AC_CONFIG_FILES([tests/nmsg-dnsqr-tests/test.sh], [chmod +x tests/nmsg-dnsqr-tests/test.sh]) diff --git a/tests/nmsg-dnsqr-tests/test.sh.in b/tests/nmsg-dnsqr-tests/test.sh.in index 27444e35b..4cbeb3c1f 100755 --- a/tests/nmsg-dnsqr-tests/test.sh.in +++ b/tests/nmsg-dnsqr-tests/test.sh.in @@ -79,7 +79,7 @@ check pcap-to-nmsg ######## # try example code too -env LD_LIBRARY_PATH=@abs_top_builddir@/nmsg/.libs/:$LD_LIBRARY_PATH @abs_top_builddir@/examples/.libs/nmsg-dnsqr2pcap ${SOURCE}.nmsg ${OUTPUT}.nmsg.pcap.out +env @shlibpath_var@=@abs_top_builddir@/nmsg/.libs/:$@shlibpath_var@ @abs_top_builddir@/examples/.libs/nmsg-dnsqr2pcap ${SOURCE}.nmsg ${OUTPUT}.nmsg.pcap.out check read nmsg base:dnsqr and generate pcap output using example cmp -s ${SOURCE}.pcap ${OUTPUT}.nmsg.pcap.out check example-nmsg-to-pcap From 2ab9b5a4d8a704fc64c8ea2e75b6d992b850f3a3 Mon Sep 17 00:00:00 2001 From: Chris Mikkelson Date: Tue, 18 Jun 2024 16:25:50 -0500 Subject: [PATCH 11/24] Fix various compiler warnings * Use snprintf instead of sprintf to address MacOS deprecation warning. * Use proper format for uint64_t, int64_t * Correct result type for rd_kafka_subscribe --- nmsg/kafkaio.c | 16 ++++++++-------- nmsg/strbuf.c | 2 +- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/nmsg/kafkaio.c b/nmsg/kafkaio.c index 088d69272..7d21b0a46 100644 --- a/nmsg/kafkaio.c +++ b/nmsg/kafkaio.c @@ -148,7 +148,7 @@ _kafka_addr_init(kafka_ctx_t ctx, const char *addr) else if (strcasecmp(comma, "newest") == 0) ctx->offset = RD_KAFKA_OFFSET_END; else if ((pound != NULL) && (isdigit(*comma) || (*comma == '-' && isdigit(*(comma+1))))) - sscanf(comma, "%ld", &ctx->offset); + sscanf(comma, "%"PRIi64, &ctx->offset); else { _nmsg_dprintf(2, "%s: invalid offset in Kafka endpoint: %s\n", __func__, comma); return false; @@ -165,12 +165,12 @@ _kafka_addr_init(kafka_ctx_t ctx, const char *addr) else if (ctx->offset == RD_KAFKA_OFFSET_END) strcpy(str_off, "newest"); else - sprintf(str_off, "%ld", ctx->offset); + snprintf(str_off, sizeof(str_off), "%"PRIi64, ctx->offset); if (ctx->partition == RD_KAFKA_PARTITION_UA) strcpy(str_part, "unassigned"); else - sprintf(str_part, "%d", ctx->partition); + snprintf(str_part, sizeof(str_part), "%d", ctx->partition); _nmsg_dprintf(3, "%s: broker: %s, topic: %s, partition: %s, offset: %s (consumer group: %s)\n", __func__, ctx->broker, ctx->topic_str, str_part, str_off, @@ -201,7 +201,7 @@ _kafka_init_consumer(kafka_ctx_t ctx, rd_kafka_conf_t *config) struct addrinfo hints = {0}; char errstr[1024], client_id[256], hostname[256]; rd_kafka_topic_partition_list_t *subscription; - rd_kafka_conf_res_t res; + rd_kafka_resp_err_t res; rd_kafka_topic_conf_t *topic_conf; if (!_kafka_config_set_option(config, "enable.partition.eof", "true")) { @@ -274,7 +274,7 @@ _kafka_init_consumer(kafka_ctx_t ctx, rd_kafka_conf_t *config) res = rd_kafka_subscribe(ctx->handle, subscription); rd_kafka_topic_partition_list_destroy(subscription); - if (res != RD_KAFKA_CONF_OK) { + if (res != RD_KAFKA_RESP_ERR_NO_ERROR) { _nmsg_dprintf(2, "%s: failed to subscribe to partition list\n", __func__); return false; } @@ -429,12 +429,12 @@ _kafka_ctx_destroy(kafka_ctx_t ctx) rd_kafka_poll(ctx->handle, ctx->timeout); - _nmsg_dprintf(3, "%s: consumed %lu messages\n", __func__, ctx->consumed); + _nmsg_dprintf(3, "%s: consumed %"PRIu64" messages\n", __func__, ctx->consumed); } else { _kafka_flush(ctx); - _nmsg_dprintf(3, "%s: produced %lu messages\n", __func__, ctx->produced); - _nmsg_dprintf(3, "%s: delivered %lu messages\n", __func__, ctx->delivered); + _nmsg_dprintf(3, "%s: produced %"PRIu64" messages\n", __func__, ctx->produced); + _nmsg_dprintf(3, "%s: delivered %"PRIu64" messages\n", __func__, ctx->delivered); _nmsg_dprintf(3, "%s: internal queue has %d messages \n", __func__, rd_kafka_outq_len(ctx->handle)); } } diff --git a/nmsg/strbuf.c b/nmsg/strbuf.c index 04e4d130f..68a9a8451 100644 --- a/nmsg/strbuf.c +++ b/nmsg/strbuf.c @@ -169,7 +169,7 @@ nmsg_strbuf_append_str_json(struct nmsg_strbuf *sb, const char *str, size_t len) } else { char hexbuf[8]; - sprintf(hexbuf, "\\u00%.2x", *(const unsigned char*) scan); + snprintf(hexbuf, sizeof(hexbuf), "\\u00%.2x", *(const unsigned char*) scan); res = nmsg_strbuf_append_str(sb, hexbuf, 6); } From 278af135eefc0fd74c014409e487c5345b821c7f Mon Sep 17 00:00:00 2001 From: Demian Vladi Date: Mon, 24 Jun 2024 11:08:36 -0700 Subject: [PATCH 12/24] Use correct function for error --- nmsg/kafkaio.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nmsg/kafkaio.c b/nmsg/kafkaio.c index 7d21b0a46..cbb562dab 100644 --- a/nmsg/kafkaio.c +++ b/nmsg/kafkaio.c @@ -489,7 +489,7 @@ _kafka_delivery_cb(rd_kafka_t *rk, const rd_kafka_message_t *rkmessage, void *op return; if (rkmessage->err != RD_KAFKA_RESP_ERR_NO_ERROR) { _nmsg_dprintf(2, "%s: got Kafka error %d: %s\n", __func__, rkmessage->err, - rd_kafka_message_errstr(ctx->message)); + rd_kafka_err2str(rkmessage->err)); ctx->state = kafka_state_break; rd_kafka_yield(rk); } From 849581790bd8aacc623eba20e78467863b96c886 Mon Sep 17 00:00:00 2001 From: Maximillian Crawford Date: Wed, 26 Jun 2024 14:06:31 -0400 Subject: [PATCH 13/24] Metadata updates for version 1.2.0 --- ChangeLog | 8 ++++++++ configure.ac | 4 ++-- debian/changelog | 8 ++++++++ nmsg.spec | 2 +- 4 files changed, 19 insertions(+), 3 deletions(-) diff --git a/ChangeLog b/ChangeLog index a4be59d8f..54da4a8e9 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,11 @@ +nmsg (1.2.0) + + * Add support for Kafka i/o in JSON and binary forms. + + * Add support for prometheus metrics export. + + * Replace mutex protected counters and flags with stdatomic operations. + nmsg (1.1.2) * Now allow nmsgtool --setgroup, --setoperator, and --setsource to diff --git a/configure.ac b/configure.ac index fe9779692..f807e4950 100644 --- a/configure.ac +++ b/configure.ac @@ -1,8 +1,8 @@ AC_PREREQ(2.64) m4_define(nmsg_major_version, 1) -m4_define(nmsg_minor_version, 1) -m4_define(nmsg_patchlevel_version, 2) +m4_define(nmsg_minor_version, 2) +m4_define(nmsg_patchlevel_version, 0) m4_define(nmsg_version, nmsg_major_version.nmsg_minor_version.nmsg_patchlevel_version) m4_define(nmsg_version_number, diff --git a/debian/changelog b/debian/changelog index 597b33a16..978340cfd 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,3 +1,11 @@ +nmsg (1.2.0-1) debian-fsi; urgency=medium + + * Add support for Kafka i/o in JSON and binary forms. + * Add support for prometheus metrics export. + * Replace mutex protected counters and flags with stdatomic operations. + + -- Farsight Security Inc Wed, 26 Jun 2024 13:56:58 -0400 + nmsg (1.1.2-1) debian-fsi; urgency=medium * Now allow nmsgtool --setgroup, --setoperator, and --setsource to diff --git a/nmsg.spec b/nmsg.spec index 01474384e..4bcf2c2e9 100644 --- a/nmsg.spec +++ b/nmsg.spec @@ -1,5 +1,5 @@ Name: nmsg -Version: 1.1.2 +Version: 1.2.0 Release: 1%{?dist} Summary: network message encapsulation library From 2576f0a1ba5468da875ec0df8e5e7f215f97f9f0 Mon Sep 17 00:00:00 2001 From: Maximillian Crawford Date: Wed, 26 Jun 2024 14:16:00 -0400 Subject: [PATCH 14/24] Update copyright dates --- COPYRIGHT | 2 +- debian/copyright | 2 +- nmsg/base/dnsqr.c | 2 +- nmsg/input.c | 2 +- nmsg/input.h | 1 + nmsg/input_json.c | 2 +- nmsg/input_nmsg.c | 1 + nmsg/io.c | 2 +- nmsg/msgmod/message.c | 2 +- nmsg/msgmod/transparent.h | 2 +- nmsg/msgmod/transparent_payload.c | 2 +- nmsg/nmsg.h | 1 + nmsg/output.c | 2 +- nmsg/output.h | 1 + nmsg/output_json.c | 2 +- nmsg/output_nmsg.c | 2 +- nmsg/private.h | 2 +- nmsg/strbuf.c | 2 +- nmsg/zbuf.c | 1 + src/dt_prom.c | 2 +- src/dt_prom.h | 2 +- src/io.c | 2 +- src/nmsgtool.c | 2 +- src/nmsgtool.h | 2 +- src/process_args.c | 2 +- 25 files changed, 25 insertions(+), 20 deletions(-) diff --git a/COPYRIGHT b/COPYRIGHT index 6d3162823..75147ddcf 100644 --- a/COPYRIGHT +++ b/COPYRIGHT @@ -1,6 +1,6 @@ Most nmsg code is under the following copyright and license: - Copyright (c) 2023 DomainTools LLC + Copyright (c) 2023-2024 DomainTools LLC Copyright (c) 2008-2021 by Farsight Security, Inc. Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/debian/copyright b/debian/copyright index 60d9d773a..bfc820d94 100644 --- a/debian/copyright +++ b/debian/copyright @@ -1,7 +1,7 @@ Format: http://www.debian.org/doc/packaging-manuals/copyright-format/1.0/ Files: * -Copyright: 2023 DomainTools LLC +Copyright: 2023-2024 DomainTools LLC 2008-2021 by Farsight Security, Inc. License: Apache-2.0 Licensed under the Apache License, Version 2.0 (the "License"); you diff --git a/nmsg/base/dnsqr.c b/nmsg/base/dnsqr.c index 74e0b328b..41332e5e8 100644 --- a/nmsg/base/dnsqr.c +++ b/nmsg/base/dnsqr.c @@ -1,7 +1,7 @@ /* dnsqr nmsg message module */ /* - * Copyright (c) 2023 DomainTools LLC + * Copyright (c) 2023-2024 DomainTools LLC * Copyright (c) 2010-2016, 2018, 2019, 2021 by Farsight Security, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/nmsg/input.c b/nmsg/input.c index bf7010c6e..421455f9d 100644 --- a/nmsg/input.c +++ b/nmsg/input.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023 DomainTools LLC + * Copyright (c) 2023-2024 DomainTools LLC * Copyright (c) 2008-2019 by Farsight Security, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/nmsg/input.h b/nmsg/input.h index e15dcb720..5aa580bc1 100644 --- a/nmsg/input.h +++ b/nmsg/input.h @@ -1,4 +1,5 @@ /* + * Copyright (c) 2024 DomainTools LLC * Copyright (c) 2008-2015, 2017-2019 by Farsight Security, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/nmsg/input_json.c b/nmsg/input_json.c index 2844e971d..4a5d20689 100644 --- a/nmsg/input_json.c +++ b/nmsg/input_json.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023 DomainTools LLC + * Copyright (c) 2023-2024 DomainTools LLC * Copyright (c) 2015, 2019 by Farsight Security, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/nmsg/input_nmsg.c b/nmsg/input_nmsg.c index b8503ae25..8c6a794d8 100644 --- a/nmsg/input_nmsg.c +++ b/nmsg/input_nmsg.c @@ -1,4 +1,5 @@ /* + * Copyright (c) 2024 DomainTools LLC * Copyright (c) 2009-2019 by Farsight Security, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/nmsg/io.c b/nmsg/io.c index 3118c44c8..587e8b536 100644 --- a/nmsg/io.c +++ b/nmsg/io.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023 DomainTools LLC + * Copyright (c) 2023-2024 DomainTools LLC * Copyright (c) 2008-2021 by Farsight Security, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/nmsg/msgmod/message.c b/nmsg/msgmod/message.c index ed6b13221..8110d48f7 100644 --- a/nmsg/msgmod/message.c +++ b/nmsg/msgmod/message.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023 DomainTools LLC + * Copyright (c) 2023-2024 DomainTools LLC * Copyright (c) 2009-2012, 2015-2016, 2018-2019 by Farsight Security, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/nmsg/msgmod/transparent.h b/nmsg/msgmod/transparent.h index 9c8e854b0..19d44efdc 100644 --- a/nmsg/msgmod/transparent.h +++ b/nmsg/msgmod/transparent.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023 DomainTools LLC + * Copyright (c) 2023-2024 DomainTools LLC * Copyright (c) 2009, 2015 by Farsight Security, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/nmsg/msgmod/transparent_payload.c b/nmsg/msgmod/transparent_payload.c index 11d965463..7743902e6 100644 --- a/nmsg/msgmod/transparent_payload.c +++ b/nmsg/msgmod/transparent_payload.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023 DomainTools LLC + * Copyright (c) 2023-2024 DomainTools LLC * Copyright (c) 2009-2017, 2019 by Farsight Security, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/nmsg/nmsg.h b/nmsg/nmsg.h index d55447884..e393acee2 100644 --- a/nmsg/nmsg.h +++ b/nmsg/nmsg.h @@ -2,6 +2,7 @@ #define NMSG_H /* + * Copyright (c) 2024 DomainTools LLC * Copyright (c) 2008-2015 by Farsight Security, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/nmsg/output.c b/nmsg/output.c index d41c2a185..5321f8193 100644 --- a/nmsg/output.c +++ b/nmsg/output.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023 DomainTools LLC + * Copyright (c) 2023-2024 DomainTools LLC * Copyright (c) 2008-2019 by Farsight Security, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/nmsg/output.h b/nmsg/output.h index 186dd3403..eabb94501 100644 --- a/nmsg/output.h +++ b/nmsg/output.h @@ -1,4 +1,5 @@ /* + * Copyright (c) 2024 DomainTools LLC * Copyright (c) 2008-2019 by Farsight Security, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/nmsg/output_json.c b/nmsg/output_json.c index d35279844..65d0391eb 100644 --- a/nmsg/output_json.c +++ b/nmsg/output_json.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023 DomainTools LLC + * Copyright (c) 2023-2024 DomainTools LLC * Copyright (c) 2015 by Farsight Security, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/nmsg/output_nmsg.c b/nmsg/output_nmsg.c index ef289cd1d..f97c7b855 100644 --- a/nmsg/output_nmsg.c +++ b/nmsg/output_nmsg.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023 DomainTools LLC + * Copyright (c) 2023-2024 DomainTools LLC * Copyright (c) 2008-2019 by Farsight Security, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/nmsg/private.h b/nmsg/private.h index e02febec8..28f2358fd 100644 --- a/nmsg/private.h +++ b/nmsg/private.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023 DomainTools LLC + * Copyright (c) 2023-2024 DomainTools LLC * Copyright (c) 2008-2015, 2019, 2021 by Farsight Security, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/nmsg/strbuf.c b/nmsg/strbuf.c index 68a9a8451..2749bba55 100644 --- a/nmsg/strbuf.c +++ b/nmsg/strbuf.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023 DomainTools LLC + * Copyright (c) 2023-2024 DomainTools LLC * Copyright (c) 2009, 2012-2013, 2016 by Farsight Security, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/nmsg/zbuf.c b/nmsg/zbuf.c index 65f131b77..3138988e1 100644 --- a/nmsg/zbuf.c +++ b/nmsg/zbuf.c @@ -1,4 +1,5 @@ /* + * Copyright (c) 2024 DomainTools LLC * Copyright (c) 2009, 2011-2013, 2021 by Farsight Security, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/src/dt_prom.c b/src/dt_prom.c index fd690f4e6..0a6fa7fd6 100644 --- a/src/dt_prom.c +++ b/src/dt_prom.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023 DomainTools LLC + * Copyright (c) 2023-2024 DomainTools LLC * Copyright (c) 2008-2019, 2021 by Farsight Security, Inc. * * Prometheus+microhttpd embedding routines. diff --git a/src/dt_prom.h b/src/dt_prom.h index af15a9684..fcadfe223 100644 --- a/src/dt_prom.h +++ b/src/dt_prom.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023 DomainTools LLC + * Copyright (c) 2023-2024 DomainTools LLC * Copyright (c) 2008-2019, 2021 by Farsight Security, Inc. * * Prometheus+microhttpd helper/function definitions for embedding. diff --git a/src/io.c b/src/io.c index dec4b79f6..c0c42d1db 100644 --- a/src/io.c +++ b/src/io.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023 DomainTools LLC + * Copyright (c) 2023-2024 DomainTools LLC * Copyright (c) 2008-2019, 2021 by Farsight Security, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/src/nmsgtool.c b/src/nmsgtool.c index f5771c26b..9b6bc0ca9 100644 --- a/src/nmsgtool.c +++ b/src/nmsgtool.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023 DomainTools LLC + * Copyright (c) 2023-2024 DomainTools LLC * Copyright (c) 2008-2021 by Farsight Security, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/src/nmsgtool.h b/src/nmsgtool.h index a50fcebcf..d3f5ab3c5 100644 --- a/src/nmsgtool.h +++ b/src/nmsgtool.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023 DomainTools LLC + * Copyright (c) 2023-2024 DomainTools LLC * Copyright (c) 2008-2019, 2021 by Farsight Security, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/src/process_args.c b/src/process_args.c index 5c6b26291..d65a0fa46 100644 --- a/src/process_args.c +++ b/src/process_args.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023 DomainTools LLC + * Copyright (c) 2023-2024 DomainTools LLC * Copyright (c) 2008-2015, 2019, 2021 by Farsight Security, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); From 5135277bcce6233c796fff66f66cec98051568cb Mon Sep 17 00:00:00 2001 From: Demian Vladi <126811849+dvladi77@users.noreply.github.com> Date: Tue, 2 Jul 2024 15:04:54 -0700 Subject: [PATCH 15/24] Kafka qa changes (#147) * QA review fixes --------- Co-authored-by: D Waitzman --- doc/docbook/nmsgtool.1 | 64 +++++++++++++++++------------------- doc/docbook/nmsgtool.docbook | 39 +++++++++++++--------- nmsg/kafkaio.c | 9 +++++ src/io.c | 16 ++++++--- src/nmsgtool.c | 8 ++--- src/process_args.c | 2 +- 6 files changed, 81 insertions(+), 57 deletions(-) diff --git a/doc/docbook/nmsgtool.1 b/doc/docbook/nmsgtool.1 index d818c983e..512884bb6 100644 --- a/doc/docbook/nmsgtool.1 +++ b/doc/docbook/nmsgtool.1 @@ -1,13 +1,13 @@ '\" t .\" Title: nmsgtool -.\" Author: [FIXME: author] [see http://docbook.sf.net/el/author] -.\" Generator: DocBook XSL Stylesheets v1.79.1 -.\" Date: 07/01/2021 +.\" Author: [FIXME: author] [see http://www.docbook.org/tdg5/en/html/author] +.\" Generator: DocBook XSL Stylesheets v1.79.2 +.\" Date: 06/28/2024 .\" Manual: .\" Source: .\" Language: English .\" -.TH "NMSGTOOL" "1" "07/01/2021" "" "" +.TH "NMSGTOOL" "1" "06/28/2024" "" "" .\" ----------------------------------------------------------------- .\" * Define some portability stuff .\" ----------------------------------------------------------------- @@ -202,25 +202,6 @@ parameter\&. For example, means "on the hour"\&. .RE .PP -\fB\--kafkakey\fR \fIfieldname\fR -.RS 4 -Use the named NMSG message field's value in canonical representation as key for payloads sent to Kafka. -.RE -.PP -\fB\-\-readtopic\fR \fIkafka\fR -.RS 4 -Read NMSG payloads from a Kafka endpoint\&. -The address has the format of \fBproto:topic[#partition|%group_id]@broker[:port][,offset]\fR -where a choice of specifying either a partition number or consumer group ID is optional, as is the offset value. -However, a protocol value of either \fBnmsg\fR or \fBjson\fR must be supplied. -Offset is either a numerical value or the string 'oldest' or 'newest' in order -to start retrieval at the oldest/newest messages in the Kafka topic. -.sp -An example of possible arguments to -\fB\-\-readtopic\fR -is "nmsg:ch202#0@kafka.local.com:9092,3000" to indicate that nmsgtool shall read \fBnmsg\fR containers from topic "ch202" on partition 0 at offset 3000 from Kafka broker kafka.local.com, port 9092. -.RE -.PP \fB\-R\fR, \fB\-\-randomize\fR .RS 4 Randomize the initial offset within the interval that the process is stopped or outputs are reopened\&. @@ -241,6 +222,21 @@ specifies the command to run on output files after rotation\&. If is set to the empty string \*(Aq\*(Aq, then no command is executed and only file rotation is performed\&. .RE .PP +\fB\-\-kafkakey\fR \fIfieldname\fR +.RS 4 +Use the value of the named NMSG message field in its canonical representation as the key for payloads in the JSON format that are sent to Kafka\&. Can also be set via NMSG_KAFKA_KEY environment variable\&. +.RE +.PP +\fB\-\-readkafka\fR \fIkafka\fR +.RS 4 +Read NMSG payloads in either binary or JSON format from a Kafka endpoint\&. The address +\fIkafka\fR +has format +\fBproto:topic[#partition|%group_id]@broker[:port][,offset]\fR\&. Either a partition number or a consumer group ID may be optionally supplied\&. Also optional is an offset consisting of either a numerical value or the string \*(Aqoldest\*(Aq or \*(Aqnewest\*(Aq in order to start retrieval at the oldest/newest messages in the Kafka topic\&. An example of a possible +\fIkafka\fR +endpoint is "nmsg:ch202#0@kafka\&.example\&.com:9092,3000" to indicate that nmsgtool shall read nmsg containers from topic "ch202" on partition 0 at offset 3000 from the Kafka broker at kafka\&.example\&.com, port 9092\&. +.RE +.PP \fB\-b\fR \fIfilter\fR, \fB\-\-bpf\fR \fIfilter\fR .RS 4 Filter pcap inputs (\fB\-p\fR @@ -290,6 +286,11 @@ may be used to specify the policy action to take if all filters in the filter ch is specified, any messages which are declined by the filter chain will be silently discarded\&. .RE .PP +\fB\-\-promport\fR \fIport\fR +.RS 4 +Deliver counted statistics, such as the total number of payloads sent or received, to Prometheus on the specified port\&. +.RE +.PP \fB\-r\fR \fIfile\fR, \fB\-\-readnmsg\fR \fIfile\fR .RS 4 Read NMSG payloads from a file\&. @@ -449,17 +450,14 @@ are required\&. Write NMSG payloads to a file\&. .RE .PP -\fB\-\-writetopic\fR \fIkafka\fR +\fB\-\-writekafka\fR \fIkafka\fR .RS 4 -Write NMSG payloads to a Kafka endpoint. -The address has the format of \fBproto:topic[#partition|%group_id]@broker[:port]\fR -where the choice of specifying either a partition number or consumer group ID is optional, -but a protocol value of either \fBnmsg\fR or \fBjson\fR must be supplied. -.sp -An examples of possible arguments to -\fB\--writetopic\fR -is "nmsg:ch202#0@kafka.local.com:9092" to indicate that nmsgtool shall write \fBnmsg\fR containers to topic "ch202" on partition 0 to Kafka broker kafka.local.com, port 9092. -Note that nmsgtool ignores offsets for Kafka producers. +Write NMSG payloads in either binary or JSON format to a Kafka endpoint\&. The address +\fIkafka\fR +has format +\fBproto:topic[#partition|%group_id]@broker[:port]\fR\&. Either a partition number or a consumer group ID may be optionally supplied\&. An example of a possible +\fIkafka\fR +endpoint is "nmsg:ch202#0@kafka\&.example\&.com:9092" to indicate that nmsgtool shall write nmsg containers to topic "ch202" on partition 0 to Kafka broker kafka\&.example\&.com, port 9092\&. Note that nmsgtool ignores offsets for Kafka producers\&. .RE .PP \fB\-o\fR \fIfile\fR, \fB\-\-writepres\fR \fIfile\fR diff --git a/doc/docbook/nmsgtool.docbook b/doc/docbook/nmsgtool.docbook index 52648352d..413c0bbd3 100644 --- a/doc/docbook/nmsgtool.docbook +++ b/doc/docbook/nmsgtool.docbook @@ -227,25 +227,26 @@ fieldname - Use the named NMSG message field's value in canonical representation as key for - payloads sent to Kafka. + Use the value of the named NMSG message field in its canonical representation as the key for payloads + in the JSON format that are sent to Kafka. Can also be set via NMSG_KAFKA_KEY environment variable. - kafka + kafka - Read NMSG payloads from a Kafka endpoint. The address kafka - has format proto:topic[#partition|%group_id]@broker[:port][,offset]. Either a partition - number or a consumer group ID may be optionally supplied. Also optional is an offset - consisting of either a numerical value or the string 'oldest' or 'newest' in order to - start retrieval at the oldest/newest messages in the Kafka topic. + Read NMSG payloads in either binary or JSON format from a Kafka endpoint. + The address kafka has format proto:topic[#partition|%group_id]@broker[:port][,offset]. + Either a partition number or a consumer group ID may be optionally supplied. + Also optional is an offset consisting of either a numerical value or the string + 'oldest' or 'newest' in order to start retrieval at the oldest/newest + messages in the Kafka topic. An example of a possible kafka endpoint is - "nmsg:ch202#0@kafka.local.com:9092,3000" to indicate that nmsgtool shall read nmsg + "nmsg:ch202#0@kafka.example.com:9092,3000" to indicate that nmsgtool shall read nmsg containers from topic "ch202" on partition 0 at offset 3000 from the Kafka broker at - kafka.local.com, port 9092. + kafka.example.com, port 9092. @@ -310,6 +311,14 @@ + + port + + Deliver counted statistics, such as the total number of payloads sent + or received, to Prometheus on the specified port. + + + file file @@ -460,16 +469,16 @@ - kafka + kafka - Write NMSG payloads to a Kafka endpoint. The address kafka - has format proto:topic[#partition|%group_id]@broker[:port]. + Write NMSG payloads in either binary or JSON format to a Kafka endpoint. + The address kafka has format proto:topic[#partition|%group_id]@broker[:port]. Either a partition number or a consumer group ID may be optionally supplied. An example of a possible kafka endpoint is - "nmsg:ch202#0@kafka.local.com:9092" to indicate that nmsgtool shall write + "nmsg:ch202#0@kafka.example.com:9092" to indicate that nmsgtool shall write nmsg containers to topic "ch202" on partition 0 to Kafka - broker kafka.local.com, port 9092. + broker kafka.example.com, port 9092. Note that nmsgtool ignores offsets for Kafka producers. diff --git a/nmsg/kafkaio.c b/nmsg/kafkaio.c index cbb562dab..3fcb67b6d 100644 --- a/nmsg/kafkaio.c +++ b/nmsg/kafkaio.c @@ -59,6 +59,8 @@ static void _kafka_error_cb(rd_kafka_t *rk, int err, const char *reason, void *o static void _kafka_delivery_cb(rd_kafka_t *rk, const rd_kafka_message_t *rkmessage, void *opaque); +static void _kafka_log_cb(const rd_kafka_t *rk, int level, const char *fac, const char *buf); + static bool _kafka_config_set_option(rd_kafka_conf_t *config, const char *option, const char *value); static bool _kafka_init_consumer(kafka_ctx_t ctx, rd_kafka_conf_t *config); @@ -388,6 +390,7 @@ _kafka_init_kafka(const char *addr, bool consumer, int timeout) rd_kafka_conf_set_opaque(config, ctx); rd_kafka_conf_set_error_cb(config, _kafka_error_cb); + rd_kafka_conf_set_log_cb(config, _kafka_log_cb); snprintf(tmp, sizeof(tmp), "%d", SIGIO); if (!_kafka_config_set_option(config, "internal.termination.signal", tmp) || @@ -496,6 +499,12 @@ _kafka_delivery_cb(rd_kafka_t *rk, const rd_kafka_message_t *rkmessage, void *op ctx->delivered++; } +static void +_kafka_log_cb(const rd_kafka_t *rk, int level, const char *fac, const char *buf) +{ + _nmsg_dprintf(3, "%s: %d: %s - %s\n", __func__, level, fac, buf); +} + static bool _kafka_consumer_start_queue(kafka_ctx_t ctx) { bool res = true; diff --git a/src/io.c b/src/io.c index dec4b79f6..e7ee4524b 100644 --- a/src/io.c +++ b/src/io.c @@ -351,14 +351,18 @@ add_kafka_input(nmsgtool_ctx *c, const char *str_address) { _add_kafka_nmsg_input(c, addr); return; } - +#ifdef HAVE_JSON_C addr = _strip_prefix_if_exists(str_address, "json:"); if (addr != NULL) { _add_kafka_json_input(c, addr); return; } - fprintf(stderr, "%s: Error: nmsg or json format must be set for Kafka topic\n", + fprintf(stderr, "%s: Error: nmsg or json protocol must be set for Kafka topic\n", argv_program); +#else /* HAVE_JSON_C */ + fprintf(stderr, "%s: Error: nmsg protocol must be set for Kafka topic\n", + argv_program); +#endif /* HAVE_JSON_C */ exit(EXIT_FAILURE); } @@ -369,14 +373,18 @@ add_kafka_output(nmsgtool_ctx *c, const char *str_address) { _add_kafka_nmsg_output(c, addr); return; } - +#ifdef HAVE_JSON_C addr = _strip_prefix_if_exists(str_address, "json:"); if (addr != NULL) { _add_kafka_json_output(c, addr); return; } - fprintf(stderr, "%s: Error: nmsg or json format must be set for Kafka topic\n", + fprintf(stderr, "%s: Error: nmsg or json protocol must be set for Kafka topic\n", argv_program); +#else /* HAVE_JSON_C */ + fprintf(stderr, "%s: Error: nmsg protocol must be set for Kafka topic\n", + argv_program); +#endif /* HAVE_JSON_C */ exit(EXIT_FAILURE); } diff --git a/src/nmsgtool.c b/src/nmsgtool.c index f5771c26b..963e793a6 100644 --- a/src/nmsgtool.c +++ b/src/nmsgtool.c @@ -157,7 +157,7 @@ static argv_t args[] = { #endif /* defined(HAVE_LIBRDKAFKA) && defined(HAVE_JSON_C) */ - {'\0', "readtopic", + {'\0', "readkafka", ARGV_CHAR_P | ARGV_FLAG_ARRAY, &ctx.r_kafka, "kafka", @@ -223,10 +223,10 @@ static argv_t args[] = { "ACCEPT|DROP", "default filter chain policy" }, - { '\0', "prometheus", + { '\0', "promport", ARGV_U_SHORT, &ctx.prom_port, - "prometheus port", + "port", #ifdef HAVE_PROMETHEUS "serve prometheus counters on port" }, #else /* HAVE_PROMETHEUS */ @@ -322,7 +322,7 @@ static argv_t args[] = { "file", "write nmsg data to file" }, - { '\0', "writetopic", + { '\0', "writekafka", ARGV_CHAR_P | ARGV_FLAG_ARRAY, &ctx.w_kafka, "kafka", diff --git a/src/process_args.c b/src/process_args.c index 5c6b26291..8aeb9b9cb 100644 --- a/src/process_args.c +++ b/src/process_args.c @@ -107,7 +107,7 @@ process_args(nmsgtool_ctx *c) { fprintf(stderr, "without librdkafka support"); break; case 3: - fprintf(stderr, "without libzmq or librdkafka support"); + fprintf(stderr, "without libzmq and librdkafka support"); default: break; } From 579bb2cc66e865d86c36b1f492d10f1a82c8705d Mon Sep 17 00:00:00 2001 From: Allan LeSage Date: Wed, 3 Jul 2024 00:08:26 +0000 Subject: [PATCH 16/24] Add kafka-related symbols. --- debian/libnmsg8.symbols | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/debian/libnmsg8.symbols b/debian/libnmsg8.symbols index 3b8116674..51c6844a2 100644 --- a/debian/libnmsg8.symbols +++ b/debian/libnmsg8.symbols @@ -28,6 +28,8 @@ libnmsg.so.8 libnmsg8 #MINVER# nmsg_input_open_callback@Base 0.7.0 nmsg_input_open_file@Base 0.5.0 nmsg_input_open_json@Base 0.10.0 + nmsg_input_open_kafka_endpoint@Base 1.2.0 + nmsg_input_open_kafka_json@Base 1.2.0 nmsg_input_open_null@Base 0.7.0 nmsg_input_open_pcap@Base 0.5.0 nmsg_input_open_pres@Base 0.5.0 @@ -130,6 +132,8 @@ libnmsg.so.8 libnmsg8 #MINVER# nmsg_output_open_callback@Base 0.5.0 nmsg_output_open_file@Base 0.5.0 nmsg_output_open_json@Base 0.10.0 + nmsg_output_open_kafka_endpoint@Base 1.2.0 + nmsg_output_open_kafka_json@Base 1.2.0 nmsg_output_open_pres@Base 0.11.1 nmsg_output_open_sock@Base 0.5.0 nmsg_output_open_zmq@Base 0.14.0 From f8b375a96712f3f052920b1f92dc08fee30bc40a Mon Sep 17 00:00:00 2001 From: Allan LeSage Date: Wed, 3 Jul 2024 00:26:51 +0000 Subject: [PATCH 17/24] Increment libtool version to 10:0:2. --- Makefile.am | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile.am b/Makefile.am index 1e3318992..d091a86dc 100644 --- a/Makefile.am +++ b/Makefile.am @@ -125,7 +125,7 @@ nobase_nodist_include_HEADERS = \ lib_LTLIBRARIES = nmsg/libnmsg.la -VERSION_INFO = 9:1:1 +VERSION_INFO = 10:0:2 nmsg_libnmsg_la_LDFLAGS = \ $(AM_LDFLAGS) \ From 489078ec4b9f206a45d961d0a717967fe90e0a28 Mon Sep 17 00:00:00 2001 From: Demian Vladi <126811849+dvladi77@users.noreply.github.com> Date: Wed, 3 Jul 2024 09:52:54 -0700 Subject: [PATCH 18/24] Producer reconnect to broker (#146) Treat transport/brokers down/message timeout issues as non-fatal. --------- Co-authored-by: D Waitzman Co-authored-by: Stephen Watt --- nmsg/kafkaio.c | 96 ++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 78 insertions(+), 18 deletions(-) diff --git a/nmsg/kafkaio.c b/nmsg/kafkaio.c index 3fcb67b6d..4141a392e 100644 --- a/nmsg/kafkaio.c +++ b/nmsg/kafkaio.c @@ -38,6 +38,7 @@ struct kafka_ctx { uint64_t consumed; uint64_t produced; uint64_t delivered; + uint64_t dropped; int64_t offset; rd_kafka_t *handle; rd_kafka_topic_t *topic; @@ -67,6 +68,8 @@ static bool _kafka_init_consumer(kafka_ctx_t ctx, rd_kafka_conf_t *config); static bool _kafka_init_producer(kafka_ctx_t ctx, rd_kafka_conf_t *config); +static void _kafka_set_state(kafka_ctx_t ctx, const char *func, kafka_state state); + /* Private. */ static bool @@ -181,6 +184,31 @@ _kafka_addr_init(kafka_ctx_t ctx, const char *addr) return true; } +static const char * +_kafka_state_to_str(kafka_state state) +{ + switch(state) { + case kafka_state_init: + return "init"; + case kafka_state_ready: + return "ready"; + case kafka_state_flush: + return "flush"; + case kafka_state_break: + return "break"; + default: + return "unknown"; + } + +} + +static void +_kafka_set_state(kafka_ctx_t ctx, const char *func, kafka_state state) { + _nmsg_dprintf(3, "%s changing state from %s to %s\n", func, + _kafka_state_to_str(ctx->state), _kafka_state_to_str(state)); + ctx->state = state; +} + static bool _kafka_config_set_option(rd_kafka_conf_t *config, const char *option, const char *value) { char errstr[1024]; @@ -222,7 +250,7 @@ _kafka_init_consumer(kafka_ctx_t ctx, rd_kafka_conf_t *config) hints.ai_flags = AI_CANONNAME; if (getaddrinfo(hostname, NULL, &hints, &ai) == 0) { - if(ai->ai_canonname != NULL) { + if (ai->ai_canonname != NULL) { strncpy(hostname, ai->ai_canonname, sizeof(hostname)); hostname[sizeof(hostname) - 1] = '\0'; } @@ -233,6 +261,7 @@ _kafka_init_consumer(kafka_ctx_t ctx, rd_kafka_conf_t *config) if (snprintf(client_id, sizeof(client_id), "nmsgtool.%010u@%s", getpid(), hostname) == sizeof(client_id)) client_id[sizeof(client_id) - 1 ] = '\0'; + _nmsg_dprintf(3, "%s: client ID: %s\n", __func__, client_id); if (!_kafka_config_set_option(config, "client.id", client_id)) { rd_kafka_conf_destroy(config); @@ -297,7 +326,7 @@ _kafka_init_consumer(kafka_ctx_t ctx, rd_kafka_conf_t *config) } } - ctx->state = kafka_state_ready; + _kafka_set_state(ctx, __func__, kafka_state_ready); return true; } @@ -310,6 +339,11 @@ _kafka_init_producer(kafka_ctx_t ctx, rd_kafka_conf_t *config) rd_kafka_conf_set_dr_msg_cb(config, _kafka_delivery_cb); + if (!_kafka_config_set_option(config, "enable.idempotence", "true")) { + rd_kafka_conf_destroy(config); + return false; + } + /* Create Kafka producer handle */ ctx->handle = rd_kafka_new(RD_KAFKA_PRODUCER, config, errstr, sizeof(errstr)); if (ctx->handle == NULL) { @@ -333,7 +367,7 @@ _kafka_init_producer(kafka_ctx_t ctx, rd_kafka_conf_t *config) return false; } - ctx->state = kafka_state_ready; + _kafka_set_state(ctx, __func__, kafka_state_ready); return true; } @@ -349,7 +383,7 @@ _kafka_init_kafka(const char *addr, bool consumer, int timeout) ctx = my_calloc(1, sizeof(struct kafka_ctx)); - ctx->state = kafka_state_init; + _kafka_set_state(ctx, __func__, kafka_state_init); ctx->timeout = timeout; ctx->consumer = consumer; @@ -438,7 +472,9 @@ _kafka_ctx_destroy(kafka_ctx_t ctx) _nmsg_dprintf(3, "%s: produced %"PRIu64" messages\n", __func__, ctx->produced); _nmsg_dprintf(3, "%s: delivered %"PRIu64" messages\n", __func__, ctx->delivered); - _nmsg_dprintf(3, "%s: internal queue has %d messages \n", __func__, rd_kafka_outq_len(ctx->handle)); + _nmsg_dprintf(3, "%s: dropped %"PRIu64" messages\n", __func__, ctx->dropped); + _nmsg_dprintf(3, "%s: internal queue has %d messages \n", __func__, + rd_kafka_outq_len(ctx->handle)); } } @@ -471,15 +507,24 @@ _kafka_error_cb(rd_kafka_t *rk, int err, const char *reason, void *opaque) { kafka_ctx_t ctx = (kafka_ctx_t) opaque; rd_kafka_resp_err_t err_kafka = (rd_kafka_resp_err_t) err; - - switch(err_kafka) { + if (ctx == NULL) { + _nmsg_dprintf(2, "%s: unexpected Kafka opaque is NULL", __func__); + return; + } + switch (err_kafka) { + /* Keep retrying on socket disconnect, brokers down and message timeout */ + case RD_KAFKA_RESP_ERR__TRANSPORT: + case RD_KAFKA_RESP_ERR__ALL_BROKERS_DOWN: + case RD_KAFKA_RESP_ERR__MSG_TIMED_OUT: + _nmsg_dprintf(2, "%s: got Kafka error %d: %s\n", __func__, err, reason); + break; case RD_KAFKA_RESP_ERR__UNKNOWN_PARTITION: case RD_KAFKA_RESP_ERR_UNKNOWN_TOPIC_OR_PART: case RD_KAFKA_RESP_ERR_OFFSET_OUT_OF_RANGE: /* At the moment treat any broker's error as fatal */ default: - ctx->state = kafka_state_break; _nmsg_dprintf(2, "%s: got Kafka error %d: %s\n", __func__, err, reason); + _kafka_set_state(ctx, __func__, kafka_state_break); break; } } @@ -488,15 +533,30 @@ static void _kafka_delivery_cb(rd_kafka_t *rk, const rd_kafka_message_t *rkmessage, void *opaque) { kafka_ctx_t ctx = (kafka_ctx_t) opaque; - if (rkmessage == NULL) + if (rkmessage == NULL) { + rd_kafka_yield(rk); return; - if (rkmessage->err != RD_KAFKA_RESP_ERR_NO_ERROR) { - _nmsg_dprintf(2, "%s: got Kafka error %d: %s\n", __func__, rkmessage->err, - rd_kafka_err2str(rkmessage->err)); - ctx->state = kafka_state_break; + } + + if (ctx == NULL) { + _nmsg_dprintf(2, "%s: unexpected Kafka opaque is NULL", __func__); rd_kafka_yield(rk); + return; } - ctx->delivered++; + if (rkmessage->err != RD_KAFKA_RESP_ERR_NO_ERROR) { + int level = 2; + if (rkmessage->err != RD_KAFKA_RESP_ERR__MSG_TIMED_OUT) { + _kafka_set_state(ctx, __func__, kafka_state_break); + rd_kafka_yield(rk); + } else { + ctx->dropped++; + level = 4; + } + _nmsg_dprintf(level, "%s: got Kafka error %d: %s\n", __func__, rkmessage->err, + rd_kafka_err2str(rkmessage->err)); + + } else + ctx->delivered++; } static void @@ -513,7 +573,7 @@ _kafka_consumer_start_queue(kafka_ctx_t ctx) { const rd_kafka_metadata_t *mdata; rd_kafka_metadata_topic_t * topic; - for(ndx = 0; ndx < 10; ++ndx) { + for (ndx = 0; ndx < 10; ++ndx) { err = rd_kafka_metadata(ctx->handle, 0, ctx->topic, &mdata, NMSG_RBUF_TIMEOUT); if (err == RD_KAFKA_RESP_ERR_NO_ERROR) break; @@ -545,7 +605,7 @@ _kafka_consumer_start_queue(kafka_ctx_t ctx) { goto out; } - for(ndx = 0; ndx < topic->partition_cnt; ++ndx) { + for (ndx = 0; ndx < topic->partition_cnt; ++ndx) { if (rd_kafka_consume_start_queue(ctx->topic, ndx, ctx->offset, ctx->queue) == -1) { err = rd_kafka_last_error(); _nmsg_dprintf(2, "%s: failed to start Kafka consumer (err %d: %s)\n", @@ -658,7 +718,7 @@ kafka_write(kafka_ctx_t ctx, const uint8_t *key, size_t key_len, const uint8_t * /* Poll with no timeout to trigger delivery reports without waiting */ rd_kafka_poll(ctx->handle, 0); - return nmsg_res_success; + return ((ctx->state == kafka_state_ready) ? nmsg_res_success : nmsg_res_failure); } kafka_ctx_t @@ -731,7 +791,7 @@ kafka_stop(kafka_ctx_t ctx) { if (ctx == NULL && ctx->consumer) return; - ctx->state = kafka_state_break; + _kafka_set_state(ctx, __func__, kafka_state_break); } void From 64ea5d8cbc86393854bd58ce27b79deff2b07ce4 Mon Sep 17 00:00:00 2001 From: Demian Vladi <126811849+dvladi77@users.noreply.github.com> Date: Wed, 3 Jul 2024 15:11:26 -0700 Subject: [PATCH 19/24] Fix seg fault (#150) Put correct order of operation in kafka destroy routine --------- Co-authored-by: D Waitzman --- nmsg/kafkaio.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/nmsg/kafkaio.c b/nmsg/kafkaio.c index 4141a392e..d0b3eb552 100644 --- a/nmsg/kafkaio.c +++ b/nmsg/kafkaio.c @@ -468,13 +468,12 @@ _kafka_ctx_destroy(kafka_ctx_t ctx) _nmsg_dprintf(3, "%s: consumed %"PRIu64" messages\n", __func__, ctx->consumed); } else { - _kafka_flush(ctx); - _nmsg_dprintf(3, "%s: produced %"PRIu64" messages\n", __func__, ctx->produced); _nmsg_dprintf(3, "%s: delivered %"PRIu64" messages\n", __func__, ctx->delivered); _nmsg_dprintf(3, "%s: dropped %"PRIu64" messages\n", __func__, ctx->dropped); _nmsg_dprintf(3, "%s: internal queue has %d messages \n", __func__, rd_kafka_outq_len(ctx->handle)); + _kafka_flush(ctx); } } From 19be8a1951a147a77cb06eba0249d615d30696d7 Mon Sep 17 00:00:00 2001 From: "Jeremy C. Reed" Date: Wed, 3 Jul 2024 22:13:28 +0000 Subject: [PATCH 20/24] fix freebsd ports references --- README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 817cb6bb5..49f545e18 100644 --- a/README.md +++ b/README.md @@ -20,7 +20,7 @@ nmsg has the following external dependencies: * [zmq](http://zeromq.org/) * [rdkafka](https://github.com/confluentinc/librdkafka) - + * [json-c](https://github.com/json-c/json-c) * [zlib](http://www.zlib.net/) @@ -41,8 +41,8 @@ Debian-based systems. On FreeBSD systems, the following ports should be installed, if available: - devel/libzmq - devel/librdkafka + net/libzmq3 + net/librdkafka devel/json-c devel/pkgconf devel/protobuf From c460911ad3323fb3180f414a32f7cde591eb3f00 Mon Sep 17 00:00:00 2001 From: "Jeremy C. Reed" Date: Wed, 3 Jul 2024 22:14:00 +0000 Subject: [PATCH 21/24] fix doxygen parameters for new kafka functions comments/docs only change --- nmsg/input.h | 4 ++-- nmsg/output.h | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/nmsg/input.h b/nmsg/input.h index e15dcb720..6644429fc 100644 --- a/nmsg/input.h +++ b/nmsg/input.h @@ -138,7 +138,7 @@ nmsg_input_open_zmq_endpoint(void *zmq_ctx, const char *ep); * * \see nmsg_output_open_kafka_endpoint() * - * \param[in] addr Kafka endpoint address string + * \param[in] ep Kafka endpoint address string * * \return Opaque pointer that is NULL on failure or non-NULL on success. */ @@ -206,7 +206,7 @@ nmsg_input_open_json(int fd); * See nmsg_output_open_json for details of the JSON format, or * nmsg_input_open_kafka_endpoint for the details of the address string. * - * \param[in] Kafka endpoint address string. + * \param[in] address Kafka endpoint address string. * * \return Opaque pointer that is NULL on failure or non-NULL on success. */ diff --git a/nmsg/output.h b/nmsg/output.h index 186dd3403..f0e30e041 100644 --- a/nmsg/output.h +++ b/nmsg/output.h @@ -126,7 +126,7 @@ nmsg_output_open_zmq_endpoint(void *zmq_ctx, const char *ep, size_t bufsz); * * \see nmsg_input_open_kafka_endpoint() * - * \param[in] addr Kafka endpoint address string + * \param[in] ep Kafka endpoint address string * * \param[in] bufsz Value between #NMSG_WBUFSZ_MIN and #NMSG_WBUFSZ_MAX. * @@ -183,9 +183,9 @@ nmsg_output_open_json(int fd); * See nmsg_output_open_json for details of the JSON format, or * nmsg_input_open_kafka_endpoint for the details of the address string. * - * \param[in] Kafka endpoint address string. - * \param[in] An optional NMSG field name whose content will be used as a - * a Kafka producer key. Otherwise, its value should be NULL. + * \param[in] addr Kafka endpoint address string. + * \param[in] key_field An optional NMSG field name whose content will be + * used as a Kafka producer key. Otherwise, its value should be NULL. * * \return Opaque pointer that is NULL on failure or non-NULL on success. */ From b172d019ea2118333f9da7dcf3fceeaf58a2f76f Mon Sep 17 00:00:00 2001 From: Maximillian Crawford Date: Fri, 5 Jul 2024 12:10:32 -0400 Subject: [PATCH 22/24] Updated copyright date and changelogs per @reedjc --- ChangeLog | 5 +++++ debian/changelog | 3 +++ nmsg/zbuf.c | 2 +- 3 files changed, 9 insertions(+), 1 deletion(-) diff --git a/ChangeLog b/ChangeLog index 54da4a8e9..d575c218e 100644 --- a/ChangeLog +++ b/ChangeLog @@ -6,6 +6,11 @@ nmsg (1.2.0) * Replace mutex protected counters and flags with stdatomic operations. + * New command line options: --kafkakey, --readkafka, --writekafka, --promport + + * New functions: nmsg_input_open_kafka_endpoint(), nmsg_input_open_kafka_json(), + nmsg_output_open_kafka_endpoint(), nmsg_output_open_kafka_json() + nmsg (1.1.2) * Now allow nmsgtool --setgroup, --setoperator, and --setsource to diff --git a/debian/changelog b/debian/changelog index 978340cfd..5410daebd 100644 --- a/debian/changelog +++ b/debian/changelog @@ -3,6 +3,9 @@ nmsg (1.2.0-1) debian-fsi; urgency=medium * Add support for Kafka i/o in JSON and binary forms. * Add support for prometheus metrics export. * Replace mutex protected counters and flags with stdatomic operations. + * New command line options: --kafkakey, --readkafka, --writekafka, --promport + * New functions: nmsg_input_open_kafka_endpoint(), nmsg_input_open_kafka_json(), + nmsg_output_open_kafka_endpoint(), nmsg_output_open_kafka_json() -- Farsight Security Inc Wed, 26 Jun 2024 13:56:58 -0400 diff --git a/nmsg/zbuf.c b/nmsg/zbuf.c index 3138988e1..4af73dbd5 100644 --- a/nmsg/zbuf.c +++ b/nmsg/zbuf.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2024 DomainTools LLC + * Copyright (c) 2023 DomainTools LLC * Copyright (c) 2009, 2011-2013, 2021 by Farsight Security, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); From b54a22f5e98be7ac87a10e06879215076d975976 Mon Sep 17 00:00:00 2001 From: Maximillian Crawford Date: Mon, 8 Jul 2024 11:59:57 -0400 Subject: [PATCH 23/24] Further copyright updates --- src/dt_prom.c | 2 +- src/dt_prom.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/dt_prom.c b/src/dt_prom.c index 0a6fa7fd6..b8c2944a4 100644 --- a/src/dt_prom.c +++ b/src/dt_prom.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023-2024 DomainTools LLC + * Copyright (c) 2024 DomainTools LLC * Copyright (c) 2008-2019, 2021 by Farsight Security, Inc. * * Prometheus+microhttpd embedding routines. diff --git a/src/dt_prom.h b/src/dt_prom.h index fcadfe223..d1eabe7b5 100644 --- a/src/dt_prom.h +++ b/src/dt_prom.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023-2024 DomainTools LLC + * Copyright (c) 2024 DomainTools LLC * Copyright (c) 2008-2019, 2021 by Farsight Security, Inc. * * Prometheus+microhttpd helper/function definitions for embedding. From bee8924f9d1772d4b235c0ca791839bf336a3d30 Mon Sep 17 00:00:00 2001 From: Maximillian Crawford Date: Mon, 8 Jul 2024 16:42:10 -0400 Subject: [PATCH 24/24] Remove incorrect old copyright info --- src/dt_prom.c | 1 - src/dt_prom.h | 1 - 2 files changed, 2 deletions(-) diff --git a/src/dt_prom.c b/src/dt_prom.c index b8c2944a4..76dc47734 100644 --- a/src/dt_prom.c +++ b/src/dt_prom.c @@ -1,6 +1,5 @@ /* * Copyright (c) 2024 DomainTools LLC - * Copyright (c) 2008-2019, 2021 by Farsight Security, Inc. * * Prometheus+microhttpd embedding routines. * diff --git a/src/dt_prom.h b/src/dt_prom.h index d1eabe7b5..13c00c1c7 100644 --- a/src/dt_prom.h +++ b/src/dt_prom.h @@ -1,6 +1,5 @@ /* * Copyright (c) 2024 DomainTools LLC - * Copyright (c) 2008-2019, 2021 by Farsight Security, Inc. * * Prometheus+microhttpd helper/function definitions for embedding. *