kmwtechnology · Jozurf · Oct 21, 2024 · Oct 21, 2024 · Oct 22, 2024 · Oct 24, 2024
diff --git a/application-example.conf b/application-example.conf
@@ -54,7 +54,7 @@ pipelines: [
         dest: ["output1", "output2", "output3"],
 
         # stage-specific parameter
-        update_mode: "overwrite"
+        updateMode: "overwrite"
       }
     ]
   },

diff --git a/lucille-core/src/main/java/com/kmwllc/lucille/core/UpdateMode.java b/lucille-core/src/main/java/com/kmwllc/lucille/core/UpdateMode.java
@@ -5,7 +5,7 @@
 public enum UpdateMode {
   APPEND("append"), OVERWRITE("overwrite"), SKIP("skip");
 
-  public static final String CONFIG_PATH = "update_mode";
+  public static final String CONFIG_PATH = "updateMode";
   public static final UpdateMode DEFAULT = OVERWRITE;
 
   private String text;

diff --git a/lucille-core/src/main/java/com/kmwllc/lucille/stage/AddRandomField.java b/lucille-core/src/main/java/com/kmwllc/lucille/stage/AddRandomField.java
@@ -60,13 +60,13 @@ public class AddRandomField extends Stage {
   private List<String> uniqueValues;
 
   public AddRandomField(Config config) throws StageException {
-    super(config, new StageSpec().withOptionalProperties("input_data_path", "field_name", "range_size", "min_num_of_terms",
-        "max_num_of_terms", "is_nested"));
-    this.inputDataPath = ConfigUtils.getOrDefault(config, "input_data_path", null);
-    this.fieldName = ConfigUtils.getOrDefault(config, "field_name", "data");
-    this.minNumOfTerms = ConfigUtils.getOrDefault(config, "min_num_of_terms", null);
-    this.maxNumOfTerms = ConfigUtils.getOrDefault(config, "max_num_of_terms", null);
-    this.isNested = ConfigUtils.getOrDefault(config, "is_nested", false);
+    super(config, new StageSpec().withOptionalProperties("inputDataPath", "fieldName", "rangeSize", "minNumOfTerms",
+        "maxNumOfTerms", "isNested"));
+    this.inputDataPath = ConfigUtils.getOrDefault(config, "inputDataPath", null);
+    this.fieldName = ConfigUtils.getOrDefault(config, "fieldName", "data");
+    this.minNumOfTerms = ConfigUtils.getOrDefault(config, "minNumOfTerms", null);
+    this.maxNumOfTerms = ConfigUtils.getOrDefault(config, "maxNumOfTerms", null);
+    this.isNested = ConfigUtils.getOrDefault(config, "isNested", false);
     this.dataArr = null;
     this.rangeSize = null;
     this.uniqueValues = null;
@@ -86,7 +86,7 @@ public AddRandomField(Config config) throws StageException {
   @Override
   public void start() throws StageException {
     this.dataArr = this.inputDataPath != null ? getFileData(this.inputDataPath) : null;
-    this.rangeSize = ConfigUtils.getOrDefault(config, "range_size",
+    this.rangeSize = ConfigUtils.getOrDefault(config, "rangeSize",
         this.dataArr != null ? this.dataArr.size() : this.maxNumOfTerms);
     this.uniqueValues = getUniqueValues(this.dataArr != null, this.dataArr);
 

diff --git a/lucille-core/src/main/java/com/kmwllc/lucille/stage/ApplyRegex.java b/lucille-core/src/main/java/com/kmwllc/lucille/stage/ApplyRegex.java
@@ -25,10 +25,10 @@
  *   - regex (String) : A regex expression to find matches for. Matches will be extracted and placed in the destination fields.
  *     If the regex includes capturing groups, the value of the first group will be used.
  *   <br>
- *   - update_mode (String. Optional) : Determines how writing will be handling if the destination field is already populated.
+ *   - updateMode (String. Optional) : Determines how writing will be handling if the destination field is already populated.
  *   <br>
  *     Can be 'overwrite', 'append' or 'skip'. Defaults to 'overwrite'.
- *   - ignore_case (Boolean, Optional) : Determines whether the regex matcher should ignore case. Defaults to false.
+ *   - ignoreCase (Boolean, Optional) : Determines whether the regex matcher should ignore case. Defaults to false.
  *   <br>
  *   - multiline (Boolean, Optional) : Determines whether the regex matcher should allow matches across multiple lines. Defaults to false.
  *   <br>
@@ -52,14 +52,14 @@ public class ApplyRegex extends Stage {
 
   public ApplyRegex(Config config) {
     super(config, new StageSpec().withRequiredProperties("source", "dest", "regex")
-        .withOptionalProperties("update_mode", "ignore_case", "multiline", "dotall", "literal"));
+        .withOptionalProperties("updateMode", "ignoreCase", "multiline", "dotall", "literal"));
 
     this.sourceFields = config.getStringList("source");
     this.destFields = config.getStringList("dest");
     this.regexExpr = config.getString("regex");
     this.updateMode = UpdateMode.fromConfig(config);
 
-    this.ignoreCase = ConfigUtils.getOrDefault(config, "ignore_case", false);
+    this.ignoreCase = ConfigUtils.getOrDefault(config, "ignoreCase", false);
     this.multiline = ConfigUtils.getOrDefault(config, "multiline", false);
     this.dotall = ConfigUtils.getOrDefault(config, "dotall", false);
     this.literal = ConfigUtils.getOrDefault(config, "literal", false);

diff --git a/lucille-core/src/main/java/com/kmwllc/lucille/stage/Base64Decode.java b/lucille-core/src/main/java/com/kmwllc/lucille/stage/Base64Decode.java
@@ -15,9 +15,9 @@ public class Base64Decode extends Stage {
   private String outputField;
 
   public Base64Decode(Config config) {
-    super(config, new StageSpec().withRequiredProperties("input_field", "output_field"));
-    inputField = config.getString("input_field");
-    outputField = config.getString("output_field");
+    super(config, new StageSpec().withRequiredProperties("inputField", "outputField"));
+    inputField = config.getString("inputField");
+    outputField = config.getString("outputField");
   }
 
   @Override

diff --git a/lucille-core/src/main/java/com/kmwllc/lucille/stage/ChunkText.java b/lucille-core/src/main/java/com/kmwllc/lucille/stage/ChunkText.java
@@ -26,25 +26,25 @@
  * - source (String) : field of which Chunking Stage will chunk the text.
  * - dest (String, optional): the name of the field that will hold the chunk contents in the children documents.
  *   Defaults to "chunk".
- * - chunking_method (Type Enum, optional) : how to split contents in source. Defaults to Sentence chunking
+ * - chunkingMethod (Type Enum, optional) : how to split contents in source. Defaults to Sentence chunking
  *  1. fixed chunking ("fixed"): split by variable lengthToSplit
  *  2. paragraph chunking ("paragraph"): split by 2 consecutive line break sequence (\n, \r, \r\n) with optional whitespaces between,
  *     e.g. \n\n \n \n
  *  3. sentence chunking ("sentence"): use openNLP sentence model for splitting
  *  4. custom chunking ("custom"): regex option in config required, used to split content
  * - regex (String, only for custom chunking): regEx that will be used to split chunks
- * - length_to_split (Integer, only for fixed chunking): length of characters of each initial chunk before processing
- * - pre_merge_min_chunk_len (Integer, optional): removes and append chunk to the neighboring chunk if below given number of characters,
+ * - lengthToSplit (Integer, only for fixed chunking): length of characters of each initial chunk before processing
+ * - preMergeMinChunkLen (Integer, optional): removes and append chunk to the neighboring chunk if below given number of characters,
  *    defaults appending to next chunk.
- * - pre_merge_max_chunk_len (Integer, optional): truncates the chunks if over given amount, applies before merging and overlapping
- * - chunks_to_merge (Integer, optional) : how many chunks to merge into the final new Chunk before overlapping is taken place.
+ * - preMergeMaxChunkLen (Integer, optional): truncates the chunks if over given amount, applies before merging and overlapping
+ * - chunksToMerge (Integer, optional) : how many chunks to merge into the final new Chunk before overlapping is taken place.
  *    defaults to 1, keeping the chunks as they were after splitting.
- *    e.g. chunks_to_merge: 2 -> { chunk1/chunk2, chunk3/chunk4, chunk5/chunk6}
- * - overlap_percentage (Integer, optional) : adds on neighboring chunk's content based on percentage of current chunk, defaults to 0
- * - chunks_to_overlap (Integer, optional) : indicate the number of overlap of smaller chunks to overlap while merging into final chunk
- *    e.g. chunks_to_overlap: 1 -> { chunk1/chunk2/chunk3, chunk3/chunk4/chunk5, chunk5/chunk6/chunk7}
- *         chunks_to_overlap: 2 -> { chunk1/chunk2/chunk3, chunk2/chunk3/chunk4, chunk3/chunk4/chunk5}
- * - character_limit (Integer, optional) : hard limit number of characters in the final chunk. Truncate rest. Performed after
+ *    e.g. chunksToMerge: 2 -> { chunk1/chunk2, chunk3/chunk4, chunk5/chunk6}
+ * - overlapPercentage (Integer, optional) : adds on neighboring chunk's content based on percentage of current chunk, defaults to 0
+ * - chunksToOverlap (Integer, optional) : indicate the number of overlap of smaller chunks to overlap while merging into final chunk
+ *    e.g. chunksToOverlap: 1 -> { chunk1/chunk2/chunk3, chunk3/chunk4/chunk5, chunk5/chunk6/chunk7}
+ *         chunksToOverlap: 2 -> { chunk1/chunk2/chunk3, chunk2/chunk3/chunk4, chunk3/chunk4/chunk5}
+ * - characterLimit (Integer, optional) : hard limit number of characters in the final chunk. Truncate rest. Performed after
  *   merging & overlapping if they are set.
  *
  *  - child document fields:
@@ -53,25 +53,25 @@
  *       - "offset" : number of character offset from start of document
  *       - "length" : number of characters in this chunk
  *       - "chunk_number" : chunk number
- *       - "total_chunk_number" : total chunk number produced from parent document
+ *       - "total_chunks" : total chunk number produced from parent document
  *       - "chunk" : the chunk contents. field name can be changed with config option "dest"
  *
  *  e.g. of paragraph chunking configuration, with a minimum size of 50 characters per chunk
  *  {
  *   source: "text"
- *   chunking_method: "paragraph"
- *   pre_merge_min_chunk_len: 50
- *   clean_chunks: true
+ *   chunkingMethod: "paragraph"
+ *   preMergeMinChunkLen: 50
+ *   cleanChunks: true
  *  }
  *
  *  e.g. of sentence chunking configuration with 5 sentences per chunk and 1 sentence of overlap, with a limit of 2000 characters
  *  {
  *   source: "text"
- *   chunking_method: "sentence"
- *   chunks_to_merge: 5
- *   chunks_to_overlap: 1
- *   clean_chunks: true
- *   character_limit: 2000
+ *   chunkingMethod: "sentence"
+ *   chunksToMerge: 5
+ *   chunksToOverlap: 1
+ *   cleanChunks: true
+ *   characterLimit: 2000
  *  }
  */
 
@@ -94,25 +94,25 @@ public class ChunkText extends Stage {
 
   public ChunkText(Config config) throws StageException {
     super(config, new StageSpec()
-        .withOptionalProperties("chunking_method", "chunks_to_merge", "dest", "regex", "character_limit",
-            "clean_chunks", "overlap_percentage", "length_to_split", "pre_merge_min_chunk_len", "pre_merge_max_chunk_len",
-            "chunks_to_overlap")
+        .withOptionalProperties("chunkingMethod", "chunksToMerge", "dest", "regex", "characterLimit",
+            "cleanChunks", "overlapPercentage", "lengthToSplit", "preMergeMinChunkLen", "preMergeMaxChunkLen",
+            "chunksToOverlap")
         .withRequiredProperties("source"));
     this.source = config.getString("source");
     this.dest = config.hasPath("dest") ? config.getString("dest") : "chunk";
     this.method = ChunkingMethod.fromConfig(config);
     this.regEx = config.hasPath("regex") ? config.getString("regex") : "";
-    this.lengthToSplit = config.hasPath("length_to_split") && config.getInt("length_to_split") > 0
-        ? config.getInt("length_to_split") : null;
-    this.cleanChunks = config.hasPath("clean_chunks") ? config.getBoolean("clean_chunks") : false;
-    this.preMergeMinChunkLen = config.hasPath("pre_merge_min_chunk_len") && config.getInt("pre_merge_min_chunk_len") > 0
-        ? config.getInt("pre_merge_min_chunk_len") : -1;
-    this.preMergeMaxChunkLen = config.hasPath("pre_merge_max_chunk_len") && config.getInt("pre_merge_max_chunk_len") > 0
-        ? config.getInt("pre_merge_max_chunk_len") : -1;
-    this.chunksToMerge = config.hasPath("chunks_to_merge") ? config.getInt("chunks_to_merge") : 1;
-    this.chunksToOverlap = config.hasPath("chunks_to_overlap") ? config.getInt("chunks_to_overlap") : null;
-    this.overlapPercentage = config.hasPath("overlap_percentage") ? config.getInt("overlap_percentage") : 0;
-    this.characterLimit = config.hasPath("character_limit") ? config.getInt("character_limit") : -1;
+    this.lengthToSplit = config.hasPath("lengthToSplit") && config.getInt("lengthToSplit") > 0
+        ? config.getInt("lengthToSplit") : null;
+    this.cleanChunks = config.hasPath("cleanChunks") ? config.getBoolean("cleanChunks") : false;
+    this.preMergeMinChunkLen = config.hasPath("preMergeMinChunkLen") && config.getInt("preMergeMinChunkLen") > 0
+        ? config.getInt("preMergeMinChunkLen") : -1;
+    this.preMergeMaxChunkLen = config.hasPath("preMergeMaxChunkLen") && config.getInt("preMergeMaxChunkLen") > 0
+        ? config.getInt("preMergeMaxChunkLen") : -1;
+    this.chunksToMerge = config.hasPath("chunksToMerge") ? config.getInt("chunksToMerge") : 1;
+    this.chunksToOverlap = config.hasPath("chunksToOverlap") ? config.getInt("chunksToOverlap") : null;
+    this.overlapPercentage = config.hasPath("overlapPercentage") ? config.getInt("overlapPercentage") : 0;
+    this.characterLimit = config.hasPath("characterLimit") ? config.getInt("characterLimit") : -1;
     if (chunksToMerge < 1) {
       throw new StageException("Chunks to merge configuration must be greater than 1 if merging chunks is desired or equal to 1 if undesired.");
     }

diff --git a/lucille-core/src/main/java/com/kmwllc/lucille/stage/Concatenate.java b/lucille-core/src/main/java/com/kmwllc/lucille/stage/Concatenate.java
@@ -19,11 +19,11 @@
  *
  *   - source (List&lt;String&gt;) : list of source field names
  *   - dest (String) : Destination field. This Stage only supports supplying a single destination field.
- *   - format_string (String) : The format String, which will have field values substituted into its placeholders
+ *   - formatString (String) : The format String, which will have field values substituted into its placeholders
  *   - defualt_inputs (Map&lt;String, String&gt;, Optional) : Mapping of input fields to a default value. You do not have to
  *   supply a default for every input field, if a default is not provided, the default behavior will be to leave the
  *   wildcard for the field in place. Defaults to an empty Map.
- *   - update_mode (String, Optional) : Determines how writing will be handling if the destination field is already populated.
+ *   - updateMode (String, Optional) : Determines how writing will be handling if the destination field is already populated.
  *       Can be 'overwrite', 'append' or 'skip'. Defaults to 'overwrite'.
  */
 public class Concatenate extends Stage {
@@ -37,14 +37,14 @@ public class Concatenate extends Stage {
 
   public Concatenate(Config config) {
     super(config, new StageSpec()
-        .withRequiredProperties("dest", "format_string")
-        .withOptionalProperties("update_mode")
-        .withOptionalParents("default_inputs"));
+        .withRequiredProperties("dest", "formatString")
+        .withOptionalProperties("updateMode")
+        .withOptionalParents("defaultInputs"));
 
     this.destField = config.getString("dest");
-    this.formatStr = config.getString("format_string");
-    this.defaultInputs = config.hasPath("default_inputs") ?
-        config.getConfig("default_inputs").root().unwrapped() : new HashMap<>();
+    this.formatStr = config.getString("formatString");
+    this.defaultInputs = config.hasPath("defaultInputs") ?
+        config.getConfig("defaultInputs").root().unwrapped() : new HashMap<>();
     // defaultInputs = set.stream().collect(Collectors.toMap(Entry::getKey, Entry::getValue));
     this.updateMode = UpdateMode.fromConfig(config);
     this.fields = new ArrayList<>();

diff --git a/lucille-core/src/main/java/com/kmwllc/lucille/stage/CopyFields.java b/lucille-core/src/main/java/com/kmwllc/lucille/stage/CopyFields.java
@@ -21,7 +21,7 @@
  *   - source (List&lt;String&gt;) : list of source field names
  *   - dest (List&lt;String&gt;) : list of destination field names. You can either supply the same number of source and destination fields
  *       for a 1-1 mapping of results or supply one destination field for all of the source fields to be mapped into.
- *   - update_mode (String, Optional) : Determines how writing will be handling if the destination field is already populated.
+ *   - updateMode (String, Optional) : Determines how writing will be handling if the destination field is already populated.
  *      Can be 'overwrite', 'append' or 'skip'. Defaults to 'overwrite'.
  */
 public class CopyFields extends Stage {
@@ -33,7 +33,7 @@ public class CopyFields extends Stage {
   public CopyFields(Config config) {
     super(config, new StageSpec()
         .withRequiredProperties("source", "dest")
-        .withOptionalProperties("update_mode"));
+        .withOptionalProperties("updateMode"));
     this.sourceFields = config.getStringList("source");
     this.destFields = config.getStringList("dest");
     this.updateMode = UpdateMode.fromConfig(config);

diff --git a/lucille-core/src/main/java/com/kmwllc/lucille/stage/CreateStaticTeaser.java b/lucille-core/src/main/java/com/kmwllc/lucille/stage/CreateStaticTeaser.java
@@ -21,7 +21,7 @@
  *   - dest (List&lt;String&gt;) : list of destination field names. You can either supply the same number of source and destination fields
  *       for a 1-1 mapping of results or supply one destination field for all of the source fields to be mapped into.
  *   - maxLength (Integer) : The maximum number of characters to include in the extracted teaser.
- *   - update_mode (String, Optional) : Determines how writing will be handling if the destination field is already populated.
+ *   - updateMode (String, Optional) : Determines how writing will be handling if the destination field is already populated.
  *      Can be 'overwrite', 'append' or 'skip'. Defaults to 'overwrite'.
  */
 public class CreateStaticTeaser extends Stage {
@@ -34,7 +34,7 @@ public class CreateStaticTeaser extends Stage {
   public CreateStaticTeaser(Config config) {
     super(config, new StageSpec()
         .withRequiredProperties("source", "dest", "maxLength")
-        .withOptionalProperties("update_mode"));
+        .withOptionalProperties("updateMode"));
 
     this.sourceFields = config.getStringList("source");
     this.destFields = config.getStringList("dest");

diff --git a/lucille-core/src/main/java/com/kmwllc/lucille/stage/DetectLanguage.java b/lucille-core/src/main/java/com/kmwllc/lucille/stage/DetectLanguage.java
@@ -27,9 +27,9 @@
  * - source (List&lt;String&gt;) : List of source field names.
  * - dest (List&lt;String&gt;) : List of destination field names. You can either supply the same number of source and destination fields
  * for a 1-1 mapping of results or supply one destination field for all of the source fields to be mapped into.
- * - min_length (Integer) : The min length of Strings to be considered for language detection. Shorter Strings will be ignored.
- * - max_length (Integer) : The max length of Strings to be considered for language detection. Longer Strings will be truncated.
- * - min_probability (Double) : The min probability for a language result to be considered valid. Results below this threshold
+ * - minLength (Integer) : The min length of Strings to be considered for language detection. Shorter Strings will be ignored.
+ * - maxLength (Integer) : The max length of Strings to be considered for language detection. Longer Strings will be truncated.
+ * - minProbability (Double) : The min probability for a language result to be considered valid. Results below this threshold
  * will be ignored.
  */
 public class DetectLanguage extends Stage {
@@ -52,17 +52,17 @@ public class DetectLanguage extends Stage {
   private Detector detector;
 
   public DetectLanguage(Config config) {
-    super(config, new StageSpec().withRequiredProperties("source", "language_field")
-        .withOptionalProperties("language_confidence_field", "min_length", "max_length",
-            "min_probability", "update_mode"));
+    super(config, new StageSpec().withRequiredProperties("source", "languageField")
+        .withOptionalProperties("languageConfidenceField", "minLength", "maxLength",
+            "minProbability", "updateMode"));
 
     this.sourceFields = config.getStringList("source");
-    this.languageField = config.getString("language_field");
-    this.languageConfidenceField = config.hasPath("language_confidence_field") ?
-        config.getString("language_confidence_field") : "languageConfidence";
-    this.minLength = config.hasPath("min_length") ? config.getInt("min_length") : 50;
-    this.maxLength = config.hasPath("max_length") ? config.getInt("max_length") : 10_000;
-    this.minProbability = config.hasPath("min_probability") ? config.getDouble("min_probability") : .95;
+    this.languageField = config.getString("languageField");
+    this.languageConfidenceField = config.hasPath("languageConfidenceField") ?
+        config.getString("languageConfidenceField") : "languageConfidence";
+    this.minLength = config.hasPath("minLength") ? config.getInt("minLength") : 50;
+    this.maxLength = config.hasPath("maxLength") ? config.getInt("maxLength") : 10_000;
+    this.minProbability = config.hasPath("minProbability") ? config.getDouble("minProbability") : .95;
     this.updateMode = UpdateMode.fromConfig(config);
   }