From 5012615a8c7ca6f1fde5b1905dace5498c82e09f Mon Sep 17 00:00:00 2001 From: Kira Traynor Date: Wed, 29 May 2024 11:42:17 -0600 Subject: [PATCH 1/5] Fix tests and remove extra overridden methods. --- .../example/connector/RandomDocConnector.java | 30 +++++++------------ .../connector/RandomDocConnectorTest.java | 12 +++----- .../lucille/example/stage/AddUnitsTest.java | 2 +- 3 files changed, 16 insertions(+), 28 deletions(-) diff --git a/src/main/java/com/lucille/example/connector/RandomDocConnector.java b/src/main/java/com/lucille/example/connector/RandomDocConnector.java index 4f979fd..031b080 100644 --- a/src/main/java/com/lucille/example/connector/RandomDocConnector.java +++ b/src/main/java/com/lucille/example/connector/RandomDocConnector.java @@ -4,6 +4,8 @@ import com.kmwllc.lucille.core.*; import com.typesafe.config.Config; import java.util.*; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; /** * Connector implementation that creates x number documents with randomly generated field values. @@ -15,32 +17,32 @@ */ public class RandomDocConnector extends AbstractConnector { + private static final Logger log = LoggerFactory.getLogger(RandomDocConnector.class); + private int numDocs; private List fieldNames; private Random rand = new Random(); - private int MAX = Integer.MAX_VALUE; - public RandomDocConnector(Config config) { + public RandomDocConnector(Config config) throws ConnectorException { super(config); + if ( config.getInt("numDocs") > 1000000) { + throw new ConnectorException("The number of documents (numDocs) cannot be grater than 1000000."); + } numDocs = config.getInt("numDocs"); fieldNames = config.getStringList("fieldNames"); } - @Override - public void preExecute(String runId) { - // calculate maximum bound for random number generator - this.MAX = this.numDocs * 1000; - } - @Override public void execute(Publisher publisher) throws ConnectorException { + int randBound = this.numDocs * 1000; + log.info("Generating {} documents with random values.", this.numDocs); for (int i = 0; i < this.numDocs; i++) { Document doc = Document.create(Integer.toString(i)); for (String field : this.fieldNames) { - doc.setField(field, this.rand.nextInt(this.MAX)); + doc.setField(field, this.rand.nextInt(randBound)); } try { publisher.publish(doc); @@ -49,14 +51,4 @@ public void execute(Publisher publisher) throws ConnectorException { } } } - - @Override - public void postExecute(String runId) throws ConnectorException { - super.postExecute(runId); - } - - @Override - public void close() throws ConnectorException { - super.close(); - } } diff --git a/src/test/java/com/lucille/example/connector/RandomDocConnectorTest.java b/src/test/java/com/lucille/example/connector/RandomDocConnectorTest.java index de1f77e..72198b8 100644 --- a/src/test/java/com/lucille/example/connector/RandomDocConnectorTest.java +++ b/src/test/java/com/lucille/example/connector/RandomDocConnectorTest.java @@ -1,6 +1,8 @@ package com.lucille.example.connector; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; + import com.kmwllc.lucille.core.Connector; import com.kmwllc.lucille.core.Document; import com.kmwllc.lucille.core.Publisher; @@ -25,14 +27,8 @@ public void testExecute() throws Exception { List docs = messenger.getDocsSentForProcessing(); // ensure doc count is correct assertEquals(50, docs.size()); - // ensure all expected fields are there ["randTime", "randNum"] - try { - docs.forEach(document -> document.validateFieldNames("randTime", "randNum")); - assert true; - } catch (IllegalArgumentException e) { - assert false; - throw new IllegalArgumentException(e); - } + // ensure all expected fields are in the documents: ["randTime", "randNum"] + docs.forEach(document -> assertTrue(document.has("randTime") && document.has("randNum"))); } } diff --git a/src/test/java/com/lucille/example/stage/AddUnitsTest.java b/src/test/java/com/lucille/example/stage/AddUnitsTest.java index d223ccb..de60f70 100644 --- a/src/test/java/com/lucille/example/stage/AddUnitsTest.java +++ b/src/test/java/com/lucille/example/stage/AddUnitsTest.java @@ -9,7 +9,7 @@ public class AddUnitsTest { - private StageFactory factory = StageFactory.of(AddUnitsStage.class); + private final StageFactory factory = StageFactory.of(AddUnitsStage.class); @Test public void testAddUnitBefore() throws StageException { From 47284e181e5f62fbc14801d7ac53ffb4a419bb7e Mon Sep 17 00:00:00 2001 From: Kira Traynor Date: Wed, 29 May 2024 14:17:08 -0600 Subject: [PATCH 2/5] Add opensearch docs to readme --- README.md | 27 +++++++++++++++++---------- example.conf | 1 + 2 files changed, 18 insertions(+), 10 deletions(-) diff --git a/README.md b/README.md index 2ee1429..b175220 100644 --- a/README.md +++ b/README.md @@ -1,19 +1,26 @@ -# Lucille Example Project (Maven) +# Lucille Example Project This project is an example of how a developer can leverage [Lucille]([url](https://github.com/kmwtechnology/lucille)), the opensource search ETL solution, for their own use case. +You can create your own stages, connectors, etc. by adding them to the src code and using them in a configuration file. -You can find the current release of [Lucille on maven central]([url](https://mvnrepository.com/artifact/com.kmwllc/lucille-core)). - -# Requirements - +## Requirements - Java 11 -# Getting Started +## Maven +You can find the current release of [Lucille on maven central]([url](https://mvnrepository.com/artifact/com.kmwllc/lucille-core)). -- Include `lucille-core` and `lucille-bom` as a maven dependency. -- Set up the run configurations. You can find `example.conf`. +### Getting Started - Compile the code to create the necessary jar files, `mvn clean install` in the top directory. -- Run `./lucille.sh` which runs a java process +- Run `./lucille.sh` in the top directory which runs a java process that takes the configuration in `example.conf` to extract, transform, and index the data. +- The example creates dummy docs, transforms the data a little, and creates docs to be indexed into OpenSearch. +The default here does not actually send the docs, but if you want to actually see the indexed data, here are some instructions for setting up OpenSearch locally. + - [OpenSearch Installation Docs](https://opensearch.org/docs/latest/install-and-configure/install-opensearch/index/) + - We would reccommend using docker to install OpenSearch if you are already familiar with docker. + - Once installed, make sure the OpenSearch section `example.conf` is set up correctly for your configuration of OpenSearch (localhost port, user/password) AND set indexer.sendEnabled to `true`. + - + +## Gradle +TODO + -You can create your own stages, connectors, etc. by adding them to the src code and using them in a configuration file. diff --git a/example.conf b/example.conf index 6712d63..a1f9e1d 100644 --- a/example.conf +++ b/example.conf @@ -34,6 +34,7 @@ pipelines: [ indexer { type: "OpenSearch" + sendEnabled: false # enable to actually index docs to OpenSearch } # OpenSearch From d0b233e6377f122eb3842179e85d40f1cb1190ea Mon Sep 17 00:00:00 2001 From: Kira Traynor Date: Mon, 22 Jul 2024 10:11:58 -0600 Subject: [PATCH 3/5] Reformat file lines, add assembly to create tar, add custom logging, and add unit test to run lucille. --- example.conf => conf/example.conf | 14 +- lucille.sh | 2 +- pom.xml | 131 ++++++++++-------- src/assembly/assembly.xml | 32 +++++ src/main/resources/log4j2.xml | 32 +++++ .../java/com/lucille/example/RunLucille.java | 17 +++ src/test/resources/JsonLayout.json | 48 +++++++ 7 files changed, 212 insertions(+), 64 deletions(-) rename example.conf => conf/example.conf (83%) create mode 100644 src/assembly/assembly.xml create mode 100644 src/main/resources/log4j2.xml create mode 100644 src/test/java/com/lucille/example/RunLucille.java create mode 100644 src/test/resources/JsonLayout.json diff --git a/example.conf b/conf/example.conf similarity index 83% rename from example.conf rename to conf/example.conf index a1f9e1d..98044fb 100644 --- a/example.conf +++ b/conf/example.conf @@ -9,13 +9,13 @@ # and indexed into an OpenSearch index connectors: [ - { - class: "com.lucille.example.connector.RandomDocConnector" - name: "test_connector" - numDocs: 10 - fieldNames: ["randTime"] - pipeline: "simple_pipeline" - } + { + class: "com.lucille.example.connector.RandomDocConnector" + name: "test_connector" + numDocs: 10 + fieldNames: ["randTime"] + pipeline: "simple_pipeline" + } ] pipelines: [ diff --git a/lucille.sh b/lucille.sh index 1c90a4e..ecd9c69 100755 --- a/lucille.sh +++ b/lucille.sh @@ -1,3 +1,3 @@ #!/bin/bash # run this script from top level lucille-example-mvn directory via ./lucille.sh -java -Dconfig.file=example.conf -cp "target/classes/lib/*:target/*" com.kmwllc.lucille.core.Runner -local \ No newline at end of file +java -Dlog4j.configurationFile=log4j2.xml -Dconfig.file=conf/example.conf -cp "target/lib/*:target/*" com.kmwllc.lucille.core.Runner -local \ No newline at end of file diff --git a/pom.xml b/pom.xml index 7ab0fc4..7246ad0 100644 --- a/pom.xml +++ b/pom.xml @@ -2,64 +2,83 @@ - 4.0.0 + 4.0.0 - com.lucille.example - lucille-example - 1.0-SNAPSHOT + com.lucille.example + lucille-example + 1.0-SNAPSHOT - - 11 - 11 - + + 11 + 11 + - - - com.kmwllc - lucille-core - 0.2.1 - provided - - - com.kmwllc - lucille-bom - 0.2.1 - pom - - - com.typesafe - config - 1.4.1 - - - junit - junit - 4.13.1 - test - - + + + com.kmwllc + lucille-core + 0.2.1 + provided + + + com.kmwllc + lucille-bom + 0.2.1 + pom + + + com.typesafe + config + 1.4.1 + + + junit + junit + 4.13.1 + test + + - - - - org.apache.maven.plugins - maven-dependency-plugin - - - copy-dependencies - prepare-package - - copy-dependencies - - - ${project.build.directory}/classes/lib - false - false - true - - - - - - + + + + org.apache.maven.plugins + maven-dependency-plugin + + + copy-dependencies + prepare-package + + copy-dependencies + + + ${project.build.directory}/lib + false + false + true + + + + + + org.apache.maven.plugins + maven-assembly-plugin + + + package-assembly + package + + single + + + lucille-example-${project.version} + + src/assembly/assembly.xml + + + + + + + \ No newline at end of file diff --git a/src/assembly/assembly.xml b/src/assembly/assembly.xml new file mode 100644 index 0000000..b90920a --- /dev/null +++ b/src/assembly/assembly.xml @@ -0,0 +1,32 @@ + + + bin + + tar.gz + + false + + + + ${project.basedir}/src/main/script/bin + /bin + true + 0755 + + + ${project.basedir}/src/main/conf + conf + + + ${project.basedir}/target + + *.jar + + lib + + + + diff --git a/src/main/resources/log4j2.xml b/src/main/resources/log4j2.xml new file mode 100644 index 0000000..2822926 --- /dev/null +++ b/src/main/resources/log4j2.xml @@ -0,0 +1,32 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/src/test/java/com/lucille/example/RunLucille.java b/src/test/java/com/lucille/example/RunLucille.java new file mode 100644 index 0000000..eb30be1 --- /dev/null +++ b/src/test/java/com/lucille/example/RunLucille.java @@ -0,0 +1,17 @@ +package com.lucille.example; + +import com.kmwllc.lucille.core.Runner; + +/** + * An alternative to using the lucille.sh. Use this test to be able to debug parts of your run configuration. + */ +public class RunLucille { + public static void main(String[] args) throws Exception { + // if no config.file is given to the run configuration, using example.conf + String configFile = "conf/example.conf"; + System.getProperty("config.file", configFile); + + Runner.main(args); + } + +} diff --git a/src/test/resources/JsonLayout.json b/src/test/resources/JsonLayout.json new file mode 100644 index 0000000..ef5c704 --- /dev/null +++ b/src/test/resources/JsonLayout.json @@ -0,0 +1,48 @@ +{ + "@timestamp": { + "$resolver": "timestamp", + "pattern": { + "format": "yy/MM/dd'T'HH:mm:ss'Z'", + "timeZone": "UTC" + } + }, + "log.level": { + "$resolver": "level", + "field": "name" + }, + "message": { + "$resolver": "message", + "stringified": true + }, + "process.thread.name": { + "$resolver": "thread", + "field": "name" + }, + "log.logger": { + "$resolver": "logger", + "field": "name" + }, + "labels": { + "$resolver": "mdc", + "flatten": true, + "stringified": true + }, + "tags": { + "$resolver": "ndc" + }, + "error.type": { + "$resolver": "exception", + "field": "className" + }, + "error.message": { + "$resolver": "exception", + "field": "message" + }, + "error.stack_trace": { + "$resolver": "exception", + "field": "stackTrace", + "stackTrace": { + "stringified": true + } + } +} \ No newline at end of file From 5079f5be844b5cbb3b13aa806fb4da1e4938ca9d Mon Sep 17 00:00:00 2001 From: Kira Traynor Date: Tue, 30 Jul 2024 11:58:10 -0600 Subject: [PATCH 4/5] Remove unnecessary local flag --- lucille.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lucille.sh b/lucille.sh index ecd9c69..bdcf1ad 100755 --- a/lucille.sh +++ b/lucille.sh @@ -1,3 +1,3 @@ #!/bin/bash # run this script from top level lucille-example-mvn directory via ./lucille.sh -java -Dlog4j.configurationFile=log4j2.xml -Dconfig.file=conf/example.conf -cp "target/lib/*:target/*" com.kmwllc.lucille.core.Runner -local \ No newline at end of file +java -Dlog4j.configurationFile=log4j2.xml -Dconfig.file=conf/example.conf -cp "target/lib/*:target/*" com.kmwllc.lucille.core.Runner \ No newline at end of file From 317a311c210337f8cc113f87f316445527c064bf Mon Sep 17 00:00:00 2001 From: Kira Traynor Date: Tue, 30 Jul 2024 11:59:25 -0600 Subject: [PATCH 5/5] This project builds on java versions higher than 11 as well. --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index b175220..ace1726 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,7 @@ This project is an example of how a developer can leverage [Lucille]([url](https You can create your own stages, connectors, etc. by adding them to the src code and using them in a configuration file. ## Requirements -- Java 11 +- Java 11 or higher ## Maven You can find the current release of [Lucille on maven central]([url](https://mvnrepository.com/artifact/com.kmwllc/lucille-core)).