From d16bd40d5593c2737079518b5ccbf3e5cc06c70b Mon Sep 17 00:00:00 2001
From: oalam
Date: Mon, 17 Jun 2019 15:04:20 +0200
Subject: [PATCH 1/7] documentation : add category tag + yaml processors
---
.../commonlogs/gitlab/ParseGitlabLog.java | 8 +-
.../logisland/processor/AddFields.java | 5 +-
.../logisland/processor/ApplyRegexp.java | 12 +-
.../processor/ConvertFieldsType.java | 6 +-
.../ConvertSimpleDateFormatFields.java | 7 +-
.../logisland/processor/DebugStream.java | 7 +-
.../logisland/processor/EvaluateJsonPath.java | 6 +-
.../logisland/processor/ExpandMapFields.java | 5 +-
.../logisland/processor/FilterRecords.java | 15 +-
.../hurence/logisland/processor/FlatMap.java | 5 +-
.../processor/GenerateRandomRecord.java | 5 +-
.../hurence/logisland/processor/ModifyId.java | 6 +-
.../logisland/processor/NormalizeFields.java | 7 +-
.../logisland/processor/ParseProperties.java | 5 +-
.../logisland/processor/RemoveFields.java | 5 +-
.../processor/SelectDistinctRecords.java | 5 +-
.../hurence/logisland/processor/SendMail.java | 5 +-
.../logisland/processor/SetJsonAsFields.java | 5 +-
.../logisland/processor/SplitField.java | 6 +-
.../logisland/processor/SplitText.java | 7 +-
.../processor/SplitTextMultiline.java | 4 +
.../processor/SplitTextWithProperties.java | 3 +
.../processor/alerting/CheckAlerts.java | 5 +-
.../processor/alerting/CheckThresholds.java | 5 +-
.../processor/alerting/ComputeTags.java | 5 +-
.../processor/datastore/BulkPut.java | 5 +-
.../processor/datastore/EnrichRecords.java | 5 +-
.../processor/datastore/MultiGet.java | 6 +-
.../processor/bro/ParseBroEvent.java | 5 +-
.../processor/netflow/ParseNetflowEvent.java | 5 +-
.../networkpacket/ParseNetworkPacket.java | 3 +
.../elasticsearch/BulkAddElasticsearch.java | 6 +-
.../EnrichRecordsElasticsearch.java | 5 +-
.../elasticsearch/MultiGetElasticsearch.java | 5 +-
.../processor/enrichment/IpToFqdn.java | 5 +-
.../processor/enrichment/IpToGeo.java | 5 +-
.../processor/excel/ExcelExtract.java | 8 +-
.../excel/ExcelExtractProperties.java | 6 +-
.../processor/hbase/FetchHBaseRow.java | 5 +-
.../processor/hbase/PutHBaseCell.java | 6 +-
.../logisland/processor/DetectOutliers.java | 5 +-
.../hurence/logisland/processor/MatchIP.java | 6 +-
.../logisland/processor/MatchQuery.java | 5 +-
.../logisland/processor/SampleRecords.java | 5 +-
.../processor/scripting/python/RunPython.java | 7 +-
.../main/resources/nltk/sem/cooper_storage.py | 2 +-
.../processor/useragent/ParseUserAgent.java | 5 +-
.../webAnalytics/ConsolidateSession.java | 5 +-
.../webAnalytics/IncrementalWebSession.java | 5 +-
...OfTraffic.java => SetSourceOfTraffic.java} | 15 +-
.../processor/webAnalytics/URLDecoder.java | 5 +-
.../IncrementalWebSessionTest.java | 3 +-
.../webAnalytics/setSourceOfTrafficTest.java | 8 +-
.../processor/xml/EvaluateXPath.java | 5 +-
.../cassandra/CassandraControllerService.java | 4 +
.../Elasticsearch_2_4_0_ClientService.java | 3 +
.../Elasticsearch_5_4_0_ClientService.java | 3 +
.../Elasticsearch_6_6_2_ClientService.java | 3 +
.../hbase/HBase_1_1_2_ClientService.java | 3 +
.../influxdb/InfluxDBControllerService.java | 3 +
.../cache/CSVKeyValueCacheService.java | 3 +
.../cache/LRUKeyValueCacheService.java | 3 +
.../maxmind/MaxmindIpToGeoService.java | 3 +
.../mongodb/MongoDBControllerService.java | 3 +
.../StandardProxyConfigurationService.java | 3 +
.../service/RedisKeyValueCacheService.java | 3 +
.../service/lookup/RestLookupService.java | 3 +
.../solr/Solr_5_5_5_ClientService.java | 3 +
.../solr/Solr_6_6_2_ClientService.java | 3 +
.../solr/Solr_6_4_2_ChronixClientService.java | 3 +
.../annotation/documentation/Category.java | 37 +
.../documentation/ComponentCategory.java | 15 +
logisland-documentation/components.rst | 2 +-
logisland-documentation/pom.xml | 2 +
.../logisland/documentation/DocGenerator.java | 1 +
.../documentation/DocGeneratorUtils.java | 87 +-
.../yaml/YamlDocumentationWriter.java | 392 ++
.../documentation/yaml/YamlPrintWriter.java | 100 +
.../user/components/common-processors.rst | 1183 ++--
.../user/components/components.yaml | 678 ++
.../user/components/engines/components.yaml | 24 +
.../components/engines/engine-vanilla.yaml | 19 +
.../user/components/other-processors.rst | 5552 +++++++++++++++--
.../user/components/services.rst | 3472 ++++++++++-
84 files changed, 10772 insertions(+), 1156 deletions(-)
rename logisland-components/logisland-processors/logisland-processor-web-analytics/src/main/java/com/hurence/logisland/processor/webAnalytics/{setSourceOfTraffic.java => SetSourceOfTraffic.java} (98%)
create mode 100644 logisland-core/logisland-api/src/main/java/com/hurence/logisland/annotation/documentation/Category.java
create mode 100644 logisland-core/logisland-api/src/main/java/com/hurence/logisland/annotation/documentation/ComponentCategory.java
create mode 100644 logisland-documentation/src/main/java/com/hurence/logisland/documentation/yaml/YamlDocumentationWriter.java
create mode 100644 logisland-documentation/src/main/java/com/hurence/logisland/documentation/yaml/YamlPrintWriter.java
create mode 100644 logisland-documentation/user/components/components.yaml
create mode 100644 logisland-documentation/user/components/engines/components.yaml
create mode 100644 logisland-documentation/user/components/engines/engine-vanilla.yaml
diff --git a/logisland-components/logisland-processors/logisland-processor-common-logs/src/main/java/com/hurence/logisland/processor/commonlogs/gitlab/ParseGitlabLog.java b/logisland-components/logisland-processors/logisland-processor-common-logs/src/main/java/com/hurence/logisland/processor/commonlogs/gitlab/ParseGitlabLog.java
index 71277a8c1..f321f6bd8 100644
--- a/logisland-components/logisland-processors/logisland-processor-common-logs/src/main/java/com/hurence/logisland/processor/commonlogs/gitlab/ParseGitlabLog.java
+++ b/logisland-components/logisland-processors/logisland-processor-common-logs/src/main/java/com/hurence/logisland/processor/commonlogs/gitlab/ParseGitlabLog.java
@@ -15,9 +15,7 @@
*/
package com.hurence.logisland.processor.commonlogs.gitlab;
-import com.hurence.logisland.annotation.documentation.CapabilityDescription;
-import com.hurence.logisland.annotation.documentation.ExtraDetailFile;
-import com.hurence.logisland.annotation.documentation.Tags;
+import com.hurence.logisland.annotation.documentation.*;
import com.hurence.logisland.component.PropertyDescriptor;
import com.hurence.logisland.processor.*;
import com.hurence.logisland.record.Field;
@@ -37,9 +35,7 @@
import java.util.List;
import java.util.Map;
-/**
- * Gitlab logs processor
- */
+@Category(ComponentCategory.PARSING)
@Tags({"logs", "gitlab"})
@CapabilityDescription(
"The Gitlab logs processor is the Logisland entry point to get and process `Gitlab `_ logs."
diff --git a/logisland-components/logisland-processors/logisland-processor-common/src/main/java/com/hurence/logisland/processor/AddFields.java b/logisland-components/logisland-processors/logisland-processor-common/src/main/java/com/hurence/logisland/processor/AddFields.java
index d82d35aaa..523db6c7e 100644
--- a/logisland-components/logisland-processors/logisland-processor-common/src/main/java/com/hurence/logisland/processor/AddFields.java
+++ b/logisland-components/logisland-processors/logisland-processor-common/src/main/java/com/hurence/logisland/processor/AddFields.java
@@ -17,9 +17,7 @@
import com.hurence.logisland.annotation.behavior.DynamicProperties;
import com.hurence.logisland.annotation.behavior.DynamicProperty;
-import com.hurence.logisland.annotation.documentation.CapabilityDescription;
-import com.hurence.logisland.annotation.documentation.ExtraDetailFile;
-import com.hurence.logisland.annotation.documentation.Tags;
+import com.hurence.logisland.annotation.documentation.*;
import com.hurence.logisland.component.AllowableValue;
import com.hurence.logisland.component.PropertyDescriptor;
import com.hurence.logisland.component.PropertyValue;
@@ -33,6 +31,7 @@
import java.util.*;
+@Category(ComponentCategory.PROCESSING)
@Tags({"record", "fields", "Add"})
@CapabilityDescription("Add one or more field to records")
@ExtraDetailFile("./details/common-processors/AddFields-Detail.rst")
diff --git a/logisland-components/logisland-processors/logisland-processor-common/src/main/java/com/hurence/logisland/processor/ApplyRegexp.java b/logisland-components/logisland-processors/logisland-processor-common/src/main/java/com/hurence/logisland/processor/ApplyRegexp.java
index c83ee4f08..c9ee2863b 100644
--- a/logisland-components/logisland-processors/logisland-processor-common/src/main/java/com/hurence/logisland/processor/ApplyRegexp.java
+++ b/logisland-components/logisland-processors/logisland-processor-common/src/main/java/com/hurence/logisland/processor/ApplyRegexp.java
@@ -16,27 +16,19 @@
package com.hurence.logisland.processor;
import com.hurence.logisland.annotation.behavior.DynamicProperty;
-import com.hurence.logisland.annotation.documentation.CapabilityDescription;
-import com.hurence.logisland.annotation.documentation.ExtraDetailFile;
-import com.hurence.logisland.annotation.documentation.SeeAlso;
-import com.hurence.logisland.annotation.documentation.Tags;
+import com.hurence.logisland.annotation.documentation.*;
import com.hurence.logisland.component.AllowableValue;
import com.hurence.logisland.component.PropertyDescriptor;
-import com.hurence.logisland.record.FieldDictionary;
-import com.hurence.logisland.record.FieldType;
import com.hurence.logisland.record.Record;
-import com.hurence.logisland.record.StandardRecord;
-import com.hurence.logisland.util.time.DateUtil;
import com.hurence.logisland.validator.StandardValidators;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-import java.text.ParseException;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
-
+@Category(ComponentCategory.PROCESSING)
@Tags({"parser", "regex", "log", "record"})
@CapabilityDescription("This processor is used to create a new set of fields from one field (using regexp).")
@SeeAlso(value = {ApplyRegexp.class}, classNames = {"com.hurence.logisland.processor.RegexpProcessor"})
diff --git a/logisland-components/logisland-processors/logisland-processor-common/src/main/java/com/hurence/logisland/processor/ConvertFieldsType.java b/logisland-components/logisland-processors/logisland-processor-common/src/main/java/com/hurence/logisland/processor/ConvertFieldsType.java
index e2d4dcc1a..ff2461c1f 100644
--- a/logisland-components/logisland-processors/logisland-processor-common/src/main/java/com/hurence/logisland/processor/ConvertFieldsType.java
+++ b/logisland-components/logisland-processors/logisland-processor-common/src/main/java/com/hurence/logisland/processor/ConvertFieldsType.java
@@ -15,11 +15,8 @@
*/
package com.hurence.logisland.processor;
-import com.google.common.collect.Lists;
import com.hurence.logisland.annotation.behavior.DynamicProperty;
-import com.hurence.logisland.annotation.documentation.CapabilityDescription;
-import com.hurence.logisland.annotation.documentation.ExtraDetailFile;
-import com.hurence.logisland.annotation.documentation.Tags;
+import com.hurence.logisland.annotation.documentation.*;
import com.hurence.logisland.component.PropertyDescriptor;
import com.hurence.logisland.record.Field;
import com.hurence.logisland.record.FieldType;
@@ -30,6 +27,7 @@
import java.util.*;
+@Category(ComponentCategory.PROCESSING)
@Tags({"type", "fields", "update", "convert"})
@CapabilityDescription("Converts a field value into the given type. does nothing if conversion is not possible")
@DynamicProperty(name = "field",
diff --git a/logisland-components/logisland-processors/logisland-processor-common/src/main/java/com/hurence/logisland/processor/ConvertSimpleDateFormatFields.java b/logisland-components/logisland-processors/logisland-processor-common/src/main/java/com/hurence/logisland/processor/ConvertSimpleDateFormatFields.java
index e5d77a47a..3ea38b652 100644
--- a/logisland-components/logisland-processors/logisland-processor-common/src/main/java/com/hurence/logisland/processor/ConvertSimpleDateFormatFields.java
+++ b/logisland-components/logisland-processors/logisland-processor-common/src/main/java/com/hurence/logisland/processor/ConvertSimpleDateFormatFields.java
@@ -16,22 +16,19 @@
package com.hurence.logisland.processor;
import com.hurence.logisland.annotation.behavior.DynamicProperty;
-import com.hurence.logisland.annotation.documentation.CapabilityDescription;
-import com.hurence.logisland.annotation.documentation.ExtraDetailFile;
-import com.hurence.logisland.annotation.documentation.Tags;
+import com.hurence.logisland.annotation.documentation.*;
import com.hurence.logisland.component.AllowableValue;
import com.hurence.logisland.component.PropertyDescriptor;
-import com.hurence.logisland.record.Field;
import com.hurence.logisland.record.FieldType;
import com.hurence.logisland.record.Record;
import com.hurence.logisland.validator.StandardValidators;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.*;
+@Category(ComponentCategory.PROCESSING)
@Tags({"record", "fields", "Add"})
@CapabilityDescription("Convert one or more field representing a date into a Unix Epoch Time (time in milliseconds since &st January 1970, 00:00:00 GMT)...")
@DynamicProperty(name = "field name to add",
diff --git a/logisland-components/logisland-processors/logisland-processor-common/src/main/java/com/hurence/logisland/processor/DebugStream.java b/logisland-components/logisland-processors/logisland-processor-common/src/main/java/com/hurence/logisland/processor/DebugStream.java
index c59a161f6..5bfabbc72 100644
--- a/logisland-components/logisland-processors/logisland-processor-common/src/main/java/com/hurence/logisland/processor/DebugStream.java
+++ b/logisland-components/logisland-processors/logisland-processor-common/src/main/java/com/hurence/logisland/processor/DebugStream.java
@@ -15,9 +15,7 @@
*/
package com.hurence.logisland.processor;
-import com.hurence.logisland.annotation.documentation.CapabilityDescription;
-import com.hurence.logisland.annotation.documentation.ExtraDetailFile;
-import com.hurence.logisland.annotation.documentation.Tags;
+import com.hurence.logisland.annotation.documentation.*;
import com.hurence.logisland.component.AllowableValue;
import com.hurence.logisland.component.PropertyDescriptor;
import com.hurence.logisland.record.Record;
@@ -25,8 +23,6 @@
import com.hurence.logisland.serializer.RecordSerializer;
import com.hurence.logisland.serializer.StringSerializer;
import com.hurence.logisland.validator.StandardValidators;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
@@ -37,6 +33,7 @@
import java.util.Collections;
import java.util.List;
+@Category(ComponentCategory.UTILS)
@Tags({"record", "debug"})
@CapabilityDescription("This is a processor that logs incoming records")
@ExtraDetailFile("./details/common-processors/DebugStream-Detail.rst")
diff --git a/logisland-components/logisland-processors/logisland-processor-common/src/main/java/com/hurence/logisland/processor/EvaluateJsonPath.java b/logisland-components/logisland-processors/logisland-processor-common/src/main/java/com/hurence/logisland/processor/EvaluateJsonPath.java
index f15a139ed..f7b502239 100644
--- a/logisland-components/logisland-processors/logisland-processor-common/src/main/java/com/hurence/logisland/processor/EvaluateJsonPath.java
+++ b/logisland-components/logisland-processors/logisland-processor-common/src/main/java/com/hurence/logisland/processor/EvaluateJsonPath.java
@@ -16,9 +16,7 @@
package com.hurence.logisland.processor;
import com.hurence.logisland.annotation.behavior.DynamicProperty;
-import com.hurence.logisland.annotation.documentation.CapabilityDescription;
-import com.hurence.logisland.annotation.documentation.ExtraDetailFile;
-import com.hurence.logisland.annotation.documentation.Tags;
+import com.hurence.logisland.annotation.documentation.*;
import com.hurence.logisland.component.PropertyDescriptor;
import com.hurence.logisland.record.FieldDictionary;
import com.hurence.logisland.record.FieldType;
@@ -38,7 +36,7 @@
import java.util.concurrent.ConcurrentMap;
import java.util.concurrent.atomic.AtomicReference;
-
+@Category(ComponentCategory.PARSING)
@Tags({"JSON", "evaluate", "JsonPath"})
@CapabilityDescription("Evaluates one or more JsonPath expressions against the content of a FlowFile. "
+ "The results of those expressions are assigned to Records Fields "
diff --git a/logisland-components/logisland-processors/logisland-processor-common/src/main/java/com/hurence/logisland/processor/ExpandMapFields.java b/logisland-components/logisland-processors/logisland-processor-common/src/main/java/com/hurence/logisland/processor/ExpandMapFields.java
index 7ece668a9..0b636dc6a 100644
--- a/logisland-components/logisland-processors/logisland-processor-common/src/main/java/com/hurence/logisland/processor/ExpandMapFields.java
+++ b/logisland-components/logisland-processors/logisland-processor-common/src/main/java/com/hurence/logisland/processor/ExpandMapFields.java
@@ -15,9 +15,7 @@
*/
package com.hurence.logisland.processor;
-import com.hurence.logisland.annotation.documentation.CapabilityDescription;
-import com.hurence.logisland.annotation.documentation.ExtraDetailFile;
-import com.hurence.logisland.annotation.documentation.Tags;
+import com.hurence.logisland.annotation.documentation.*;
import com.hurence.logisland.component.AllowableValue;
import com.hurence.logisland.component.PropertyDescriptor;
import com.hurence.logisland.record.Field;
@@ -29,6 +27,7 @@
import java.util.*;
+@Category(ComponentCategory.PROCESSING)
@Tags({"record", "fields", "Expand", "Map"})
@CapabilityDescription("Expands the content of a MAP field to the root.")
@ExtraDetailFile("./details/common-processors/ExpandMapFields-Detail.rst")
diff --git a/logisland-components/logisland-processors/logisland-processor-common/src/main/java/com/hurence/logisland/processor/FilterRecords.java b/logisland-components/logisland-processors/logisland-processor-common/src/main/java/com/hurence/logisland/processor/FilterRecords.java
index b7f36fb9c..344bd5f27 100644
--- a/logisland-components/logisland-processors/logisland-processor-common/src/main/java/com/hurence/logisland/processor/FilterRecords.java
+++ b/logisland-components/logisland-processors/logisland-processor-common/src/main/java/com/hurence/logisland/processor/FilterRecords.java
@@ -1,12 +1,12 @@
/**
* Copyright (C) 2016 Hurence (support@hurence.com)
- *
+ *
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@@ -15,13 +15,9 @@
*/
package com.hurence.logisland.processor;
-import com.google.common.collect.Lists;
-import com.hurence.logisland.annotation.documentation.CapabilityDescription;
-import com.hurence.logisland.annotation.documentation.ExtraDetailFile;
-import com.hurence.logisland.annotation.documentation.Tags;
+import com.hurence.logisland.annotation.documentation.*;
import com.hurence.logisland.component.PropertyDescriptor;
import com.hurence.logisland.record.FieldDictionary;
-import com.hurence.logisland.record.FieldType;
import com.hurence.logisland.record.Record;
import com.hurence.logisland.validator.StandardValidators;
import org.slf4j.Logger;
@@ -33,6 +29,7 @@
import java.util.function.Predicate;
import java.util.stream.Collectors;
+@Category(ComponentCategory.PROCESSING)
@Tags({"record", "fields", "remove", "delete"})
@CapabilityDescription("Keep only records based on a given field value")
@ExtraDetailFile("./details/common-processors/FilterRecords-Detail.rst")
diff --git a/logisland-components/logisland-processors/logisland-processor-common/src/main/java/com/hurence/logisland/processor/FlatMap.java b/logisland-components/logisland-processors/logisland-processor-common/src/main/java/com/hurence/logisland/processor/FlatMap.java
index ee2aaf920..069845719 100644
--- a/logisland-components/logisland-processors/logisland-processor-common/src/main/java/com/hurence/logisland/processor/FlatMap.java
+++ b/logisland-components/logisland-processors/logisland-processor-common/src/main/java/com/hurence/logisland/processor/FlatMap.java
@@ -15,9 +15,7 @@
*/
package com.hurence.logisland.processor;
-import com.hurence.logisland.annotation.documentation.CapabilityDescription;
-import com.hurence.logisland.annotation.documentation.ExtraDetailFile;
-import com.hurence.logisland.annotation.documentation.Tags;
+import com.hurence.logisland.annotation.documentation.*;
import com.hurence.logisland.component.PropertyDescriptor;
import com.hurence.logisland.record.*;
import com.hurence.logisland.validator.StandardValidators;
@@ -25,6 +23,7 @@
import java.util.*;
import java.util.stream.Collectors;
+@Category(ComponentCategory.PROCESSING)
@Tags({"record", "fields", "flatmap", "flatten"})
@CapabilityDescription("Converts each field records into a single flatten record...")
@ExtraDetailFile("./details/common-processors/FlatMap-Detail.rst")
diff --git a/logisland-components/logisland-processors/logisland-processor-common/src/main/java/com/hurence/logisland/processor/GenerateRandomRecord.java b/logisland-components/logisland-processors/logisland-processor-common/src/main/java/com/hurence/logisland/processor/GenerateRandomRecord.java
index dc1ff8728..a42272c73 100644
--- a/logisland-components/logisland-processors/logisland-processor-common/src/main/java/com/hurence/logisland/processor/GenerateRandomRecord.java
+++ b/logisland-components/logisland-processors/logisland-processor-common/src/main/java/com/hurence/logisland/processor/GenerateRandomRecord.java
@@ -15,9 +15,7 @@
*/
package com.hurence.logisland.processor;
-import com.hurence.logisland.annotation.documentation.CapabilityDescription;
-import com.hurence.logisland.annotation.documentation.ExtraDetailFile;
-import com.hurence.logisland.annotation.documentation.Tags;
+import com.hurence.logisland.annotation.documentation.*;
import com.hurence.logisland.component.PropertyDescriptor;
import com.hurence.logisland.record.Record;
import com.hurence.logisland.util.avro.eventgenerator.DataGenerator;
@@ -31,6 +29,7 @@
import java.util.Collections;
import java.util.List;
+@Category(ComponentCategory.PROCESSING)
@Tags({"record", "avro", "generator"})
@CapabilityDescription("This is a processor that make random records given an Avro schema")
@ExtraDetailFile("./details/common-processors/GenerateRandomRecord-Detail.rst")
diff --git a/logisland-components/logisland-processors/logisland-processor-common/src/main/java/com/hurence/logisland/processor/ModifyId.java b/logisland-components/logisland-processors/logisland-processor-common/src/main/java/com/hurence/logisland/processor/ModifyId.java
index 5d69ce177..79d31e60e 100644
--- a/logisland-components/logisland-processors/logisland-processor-common/src/main/java/com/hurence/logisland/processor/ModifyId.java
+++ b/logisland-components/logisland-processors/logisland-processor-common/src/main/java/com/hurence/logisland/processor/ModifyId.java
@@ -16,9 +16,7 @@
package com.hurence.logisland.processor;
import com.google.common.collect.Lists;
-import com.hurence.logisland.annotation.documentation.CapabilityDescription;
-import com.hurence.logisland.annotation.documentation.ExtraDetailFile;
-import com.hurence.logisland.annotation.documentation.Tags;
+import com.hurence.logisland.annotation.documentation.*;
import com.hurence.logisland.component.AllowableValue;
import com.hurence.logisland.component.PropertyDescriptor;
import com.hurence.logisland.record.FieldDictionary;
@@ -27,7 +25,6 @@
import com.hurence.logisland.validator.ValidationContext;
import com.hurence.logisland.validator.ValidationResult;
import org.apache.commons.codec.binary.Hex;
-import org.apache.commons.lang3.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -36,6 +33,7 @@
import java.security.NoSuchAlgorithmException;
import java.util.*;
+@Category(ComponentCategory.PROCESSING)
@Tags({"record", "id", "idempotent", "generate", "modify"})
@CapabilityDescription("modify id of records or generate it following defined rules")
@ExtraDetailFile("./details/common-processors/ModifyId-Detail.rst")
diff --git a/logisland-components/logisland-processors/logisland-processor-common/src/main/java/com/hurence/logisland/processor/NormalizeFields.java b/logisland-components/logisland-processors/logisland-processor-common/src/main/java/com/hurence/logisland/processor/NormalizeFields.java
index 3542a4943..29e534be5 100644
--- a/logisland-components/logisland-processors/logisland-processor-common/src/main/java/com/hurence/logisland/processor/NormalizeFields.java
+++ b/logisland-components/logisland-processors/logisland-processor-common/src/main/java/com/hurence/logisland/processor/NormalizeFields.java
@@ -16,19 +16,16 @@
package com.hurence.logisland.processor;
import com.hurence.logisland.annotation.behavior.DynamicProperty;
-import com.hurence.logisland.annotation.documentation.CapabilityDescription;
-import com.hurence.logisland.annotation.documentation.ExtraDetailFile;
-import com.hurence.logisland.annotation.documentation.Tags;
+import com.hurence.logisland.annotation.documentation.*;
import com.hurence.logisland.component.AllowableValue;
import com.hurence.logisland.component.PropertyDescriptor;
import com.hurence.logisland.record.Field;
import com.hurence.logisland.record.Record;
import com.hurence.logisland.validator.StandardValidators;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
import java.util.*;
+@Category(ComponentCategory.PROCESSING)
@Tags({"record", "fields", "normalizer"})
@CapabilityDescription("Changes the name of a field according to a provided name mapping...")
@DynamicProperty(name = "alternative mapping",
diff --git a/logisland-components/logisland-processors/logisland-processor-common/src/main/java/com/hurence/logisland/processor/ParseProperties.java b/logisland-components/logisland-processors/logisland-processor-common/src/main/java/com/hurence/logisland/processor/ParseProperties.java
index 4ef1e8339..00b511796 100644
--- a/logisland-components/logisland-processors/logisland-processor-common/src/main/java/com/hurence/logisland/processor/ParseProperties.java
+++ b/logisland-components/logisland-processors/logisland-processor-common/src/main/java/com/hurence/logisland/processor/ParseProperties.java
@@ -17,9 +17,7 @@
import com.google.common.collect.Lists;
import com.hurence.logisland.annotation.behavior.DynamicProperty;
-import com.hurence.logisland.annotation.documentation.CapabilityDescription;
-import com.hurence.logisland.annotation.documentation.ExtraDetailFile;
-import com.hurence.logisland.annotation.documentation.Tags;
+import com.hurence.logisland.annotation.documentation.*;
import com.hurence.logisland.component.PropertyDescriptor;
import com.hurence.logisland.record.Field;
import com.hurence.logisland.record.FieldType;
@@ -33,6 +31,7 @@
import java.util.regex.Matcher;
import java.util.regex.Pattern;
+@Category(ComponentCategory.PARSING)
@Tags({"record", "properties", "parser"})
@CapabilityDescription("Parse a field made of key=value fields separated by spaces\n" +
"a string like \"a=1 b=2 c=3\" will add a,b & c fields, respectively with values 1,2 & 3 to the current Record")
diff --git a/logisland-components/logisland-processors/logisland-processor-common/src/main/java/com/hurence/logisland/processor/RemoveFields.java b/logisland-components/logisland-processors/logisland-processor-common/src/main/java/com/hurence/logisland/processor/RemoveFields.java
index 7cd79eccf..678826a2c 100644
--- a/logisland-components/logisland-processors/logisland-processor-common/src/main/java/com/hurence/logisland/processor/RemoveFields.java
+++ b/logisland-components/logisland-processors/logisland-processor-common/src/main/java/com/hurence/logisland/processor/RemoveFields.java
@@ -16,9 +16,7 @@
package com.hurence.logisland.processor;
import com.google.common.collect.Lists;
-import com.hurence.logisland.annotation.documentation.CapabilityDescription;
-import com.hurence.logisland.annotation.documentation.ExtraDetailFile;
-import com.hurence.logisland.annotation.documentation.Tags;
+import com.hurence.logisland.annotation.documentation.*;
import com.hurence.logisland.component.PropertyDescriptor;
import com.hurence.logisland.component.PropertyValue;
import com.hurence.logisland.record.FieldDictionary;
@@ -34,6 +32,7 @@
import java.util.Collections;
import java.util.List;
+@Category(ComponentCategory.PROCESSING)
@Tags({"record", "fields", "remove", "delete", "keep"})
@CapabilityDescription("Removes a list of fields defined by a comma separated list of field names or keeps only fields " +
"defined by a comma separated list of field names.")
diff --git a/logisland-components/logisland-processors/logisland-processor-common/src/main/java/com/hurence/logisland/processor/SelectDistinctRecords.java b/logisland-components/logisland-processors/logisland-processor-common/src/main/java/com/hurence/logisland/processor/SelectDistinctRecords.java
index c62cd65a6..aa4fb0c39 100644
--- a/logisland-components/logisland-processors/logisland-processor-common/src/main/java/com/hurence/logisland/processor/SelectDistinctRecords.java
+++ b/logisland-components/logisland-processors/logisland-processor-common/src/main/java/com/hurence/logisland/processor/SelectDistinctRecords.java
@@ -16,9 +16,7 @@
package com.hurence.logisland.processor;
import com.google.common.collect.Lists;
-import com.hurence.logisland.annotation.documentation.CapabilityDescription;
-import com.hurence.logisland.annotation.documentation.ExtraDetailFile;
-import com.hurence.logisland.annotation.documentation.Tags;
+import com.hurence.logisland.annotation.documentation.*;
import com.hurence.logisland.component.PropertyDescriptor;
import com.hurence.logisland.record.FieldDictionary;
import com.hurence.logisland.record.Record;
@@ -35,6 +33,7 @@
import java.util.function.Predicate;
import java.util.stream.Collectors;
+@Category(ComponentCategory.PROCESSING)
@Tags({"record", "fields", "remove", "delete"})
@CapabilityDescription("Keep only distinct records based on a given field")
@ExtraDetailFile("./details/common-processors/SelectDistinctRecords-Detail.rst")
diff --git a/logisland-components/logisland-processors/logisland-processor-common/src/main/java/com/hurence/logisland/processor/SendMail.java b/logisland-components/logisland-processors/logisland-processor-common/src/main/java/com/hurence/logisland/processor/SendMail.java
index 5e3325bcd..72d4a214d 100644
--- a/logisland-components/logisland-processors/logisland-processor-common/src/main/java/com/hurence/logisland/processor/SendMail.java
+++ b/logisland-components/logisland-processors/logisland-processor-common/src/main/java/com/hurence/logisland/processor/SendMail.java
@@ -15,9 +15,7 @@
*/
package com.hurence.logisland.processor;
-import com.hurence.logisland.annotation.documentation.CapabilityDescription;
-import com.hurence.logisland.annotation.documentation.ExtraDetailFile;
-import com.hurence.logisland.annotation.documentation.Tags;
+import com.hurence.logisland.annotation.documentation.*;
import com.hurence.logisland.component.PropertyDescriptor;
import com.hurence.logisland.processor.*;
import com.hurence.logisland.record.Field;
@@ -64,6 +62,7 @@
* and the format of src attribute in the img html tag should be like:
*
*/
+@Category(ComponentCategory.ALERTING)
@Tags({"smtp", "email", "e-mail", "mail", "mailer", "sendmail", "message", "alert", "html"})
@CapabilityDescription(
"The SendMail processor is aimed at sending an email (like for instance an alert email) from an incoming record."
diff --git a/logisland-components/logisland-processors/logisland-processor-common/src/main/java/com/hurence/logisland/processor/SetJsonAsFields.java b/logisland-components/logisland-processors/logisland-processor-common/src/main/java/com/hurence/logisland/processor/SetJsonAsFields.java
index 9a2c34d7e..fc04c5e02 100644
--- a/logisland-components/logisland-processors/logisland-processor-common/src/main/java/com/hurence/logisland/processor/SetJsonAsFields.java
+++ b/logisland-components/logisland-processors/logisland-processor-common/src/main/java/com/hurence/logisland/processor/SetJsonAsFields.java
@@ -15,9 +15,7 @@
*/
package com.hurence.logisland.processor;
-import com.hurence.logisland.annotation.documentation.CapabilityDescription;
-import com.hurence.logisland.annotation.documentation.ExtraDetailFile;
-import com.hurence.logisland.annotation.documentation.Tags;
+import com.hurence.logisland.annotation.documentation.*;
import com.hurence.logisland.component.PropertyDescriptor;
import com.hurence.logisland.record.Field;
import com.hurence.logisland.record.FieldDictionary;
@@ -30,6 +28,7 @@
import java.util.*;
+@Category(ComponentCategory.PARSING)
@Tags({"json"})
@CapabilityDescription(
"The SetJsonAsFields processor reads the content of a string field containing a json string and sets each " +
diff --git a/logisland-components/logisland-processors/logisland-processor-common/src/main/java/com/hurence/logisland/processor/SplitField.java b/logisland-components/logisland-processors/logisland-processor-common/src/main/java/com/hurence/logisland/processor/SplitField.java
index 7d7fbdd7f..ad1271078 100644
--- a/logisland-components/logisland-processors/logisland-processor-common/src/main/java/com/hurence/logisland/processor/SplitField.java
+++ b/logisland-components/logisland-processors/logisland-processor-common/src/main/java/com/hurence/logisland/processor/SplitField.java
@@ -16,10 +16,7 @@
package com.hurence.logisland.processor;
import com.hurence.logisland.annotation.behavior.DynamicProperty;
-import com.hurence.logisland.annotation.documentation.CapabilityDescription;
-import com.hurence.logisland.annotation.documentation.ExtraDetailFile;
-import com.hurence.logisland.annotation.documentation.SeeAlso;
-import com.hurence.logisland.annotation.documentation.Tags;
+import com.hurence.logisland.annotation.documentation.*;
import com.hurence.logisland.component.AllowableValue;
import com.hurence.logisland.component.PropertyDescriptor;
import com.hurence.logisland.record.FieldType;
@@ -32,6 +29,7 @@
import java.util.regex.Pattern;
+@Category(ComponentCategory.PARSING)
@Tags({"parser", "split", "log", "record"})
@CapabilityDescription("This processor is used to create a new set of fields from one field (using split).")
@SeeAlso(value = {SplitField.class}, classNames = {"com.hurence.logisland.processor.SplitField"})
diff --git a/logisland-components/logisland-processors/logisland-processor-common/src/main/java/com/hurence/logisland/processor/SplitText.java b/logisland-components/logisland-processors/logisland-processor-common/src/main/java/com/hurence/logisland/processor/SplitText.java
index 146924028..5c8e4be63 100644
--- a/logisland-components/logisland-processors/logisland-processor-common/src/main/java/com/hurence/logisland/processor/SplitText.java
+++ b/logisland-components/logisland-processors/logisland-processor-common/src/main/java/com/hurence/logisland/processor/SplitText.java
@@ -16,10 +16,7 @@
package com.hurence.logisland.processor;
import com.hurence.logisland.annotation.behavior.DynamicProperty;
-import com.hurence.logisland.annotation.documentation.CapabilityDescription;
-import com.hurence.logisland.annotation.documentation.ExtraDetailFile;
-import com.hurence.logisland.annotation.documentation.SeeAlso;
-import com.hurence.logisland.annotation.documentation.Tags;
+import com.hurence.logisland.annotation.documentation.*;
import com.hurence.logisland.component.PropertyDescriptor;
import com.hurence.logisland.record.FieldDictionary;
import com.hurence.logisland.record.FieldType;
@@ -37,7 +34,7 @@
import java.util.regex.Matcher;
import java.util.regex.Pattern;
-
+@Category(ComponentCategory.PARSING)
@Tags({"parser", "regex", "log", "record"})
@CapabilityDescription("This is a processor that is used to split a String into fields according to a given Record mapping")
@SeeAlso(value = {SplitTextMultiline.class}, classNames = {"com.hurence.logisland.processor.SplitTextMultiline"})
diff --git a/logisland-components/logisland-processors/logisland-processor-common/src/main/java/com/hurence/logisland/processor/SplitTextMultiline.java b/logisland-components/logisland-processors/logisland-processor-common/src/main/java/com/hurence/logisland/processor/SplitTextMultiline.java
index dc233ab66..c17eeca17 100644
--- a/logisland-components/logisland-processors/logisland-processor-common/src/main/java/com/hurence/logisland/processor/SplitTextMultiline.java
+++ b/logisland-components/logisland-processors/logisland-processor-common/src/main/java/com/hurence/logisland/processor/SplitTextMultiline.java
@@ -15,6 +15,8 @@
*/
package com.hurence.logisland.processor;
+import com.hurence.logisland.annotation.documentation.Category;
+import com.hurence.logisland.annotation.documentation.ComponentCategory;
import com.hurence.logisland.annotation.documentation.ExtraDetailFile;
import com.hurence.logisland.component.PropertyDescriptor;
import com.hurence.logisland.record.FieldDictionary;
@@ -31,6 +33,8 @@
import java.util.regex.Matcher;
import java.util.regex.Pattern;
+
+@Category(ComponentCategory.PARSING)
@ExtraDetailFile("./details/common-processors/SplitTextMultiline-Detail.rst")
public class SplitTextMultiline extends AbstractProcessor {
diff --git a/logisland-components/logisland-processors/logisland-processor-common/src/main/java/com/hurence/logisland/processor/SplitTextWithProperties.java b/logisland-components/logisland-processors/logisland-processor-common/src/main/java/com/hurence/logisland/processor/SplitTextWithProperties.java
index 3f6057220..4c70f6ea8 100644
--- a/logisland-components/logisland-processors/logisland-processor-common/src/main/java/com/hurence/logisland/processor/SplitTextWithProperties.java
+++ b/logisland-components/logisland-processors/logisland-processor-common/src/main/java/com/hurence/logisland/processor/SplitTextWithProperties.java
@@ -15,6 +15,8 @@
*/
package com.hurence.logisland.processor;
+import com.hurence.logisland.annotation.documentation.Category;
+import com.hurence.logisland.annotation.documentation.ComponentCategory;
import com.hurence.logisland.annotation.documentation.ExtraDetailFile;
import com.hurence.logisland.component.PropertyDescriptor;
import com.hurence.logisland.record.Field;
@@ -30,6 +32,7 @@
import java.util.regex.Matcher;
import java.util.regex.Pattern;
+@Category(ComponentCategory.PARSING)
@ExtraDetailFile("./details/common-processors/SplitTextWithProperties-Detail.rst")
public class SplitTextWithProperties extends SplitText {
diff --git a/logisland-components/logisland-processors/logisland-processor-common/src/main/java/com/hurence/logisland/processor/alerting/CheckAlerts.java b/logisland-components/logisland-processors/logisland-processor-common/src/main/java/com/hurence/logisland/processor/alerting/CheckAlerts.java
index 7191b6b6b..8383fb4fb 100644
--- a/logisland-components/logisland-processors/logisland-processor-common/src/main/java/com/hurence/logisland/processor/alerting/CheckAlerts.java
+++ b/logisland-components/logisland-processors/logisland-processor-common/src/main/java/com/hurence/logisland/processor/alerting/CheckAlerts.java
@@ -16,9 +16,7 @@
package com.hurence.logisland.processor.alerting;
import com.hurence.logisland.annotation.behavior.DynamicProperty;
-import com.hurence.logisland.annotation.documentation.CapabilityDescription;
-import com.hurence.logisland.annotation.documentation.ExtraDetailFile;
-import com.hurence.logisland.annotation.documentation.Tags;
+import com.hurence.logisland.annotation.documentation.*;
import com.hurence.logisland.component.PropertyDescriptor;
import com.hurence.logisland.processor.ProcessContext;
import com.hurence.logisland.record.FieldDictionary;
@@ -32,6 +30,7 @@
import javax.script.ScriptException;
import java.util.*;
+@Category(ComponentCategory.ALERTING)
@Tags({"record", "alerting", "thresholds", "opc", "tag"})
@CapabilityDescription("Add one or more records representing alerts. Using a datastore.")
@ExtraDetailFile("./details/common-processors/CheckAlerts-Detail.rst")
diff --git a/logisland-components/logisland-processors/logisland-processor-common/src/main/java/com/hurence/logisland/processor/alerting/CheckThresholds.java b/logisland-components/logisland-processors/logisland-processor-common/src/main/java/com/hurence/logisland/processor/alerting/CheckThresholds.java
index 707fd3337..badb956e0 100644
--- a/logisland-components/logisland-processors/logisland-processor-common/src/main/java/com/hurence/logisland/processor/alerting/CheckThresholds.java
+++ b/logisland-components/logisland-processors/logisland-processor-common/src/main/java/com/hurence/logisland/processor/alerting/CheckThresholds.java
@@ -16,9 +16,7 @@
package com.hurence.logisland.processor.alerting;
import com.hurence.logisland.annotation.behavior.DynamicProperty;
-import com.hurence.logisland.annotation.documentation.CapabilityDescription;
-import com.hurence.logisland.annotation.documentation.ExtraDetailFile;
-import com.hurence.logisland.annotation.documentation.Tags;
+import com.hurence.logisland.annotation.documentation.*;
import com.hurence.logisland.component.PropertyDescriptor;
import com.hurence.logisland.processor.ProcessContext;
import com.hurence.logisland.record.*;
@@ -29,6 +27,7 @@
import javax.script.ScriptException;
import java.util.*;
+@Category(ComponentCategory.ALERTING)
@Tags({"record", "threshold", "tag", "alerting"})
@CapabilityDescription("Compute threshold cross from given formulas.\n\n" +
"- each dynamic property will return a new record according to the formula definition\n" +
diff --git a/logisland-components/logisland-processors/logisland-processor-common/src/main/java/com/hurence/logisland/processor/alerting/ComputeTags.java b/logisland-components/logisland-processors/logisland-processor-common/src/main/java/com/hurence/logisland/processor/alerting/ComputeTags.java
index 717da5aaa..e771971da 100644
--- a/logisland-components/logisland-processors/logisland-processor-common/src/main/java/com/hurence/logisland/processor/alerting/ComputeTags.java
+++ b/logisland-components/logisland-processors/logisland-processor-common/src/main/java/com/hurence/logisland/processor/alerting/ComputeTags.java
@@ -16,9 +16,7 @@
package com.hurence.logisland.processor.alerting;
import com.hurence.logisland.annotation.behavior.DynamicProperty;
-import com.hurence.logisland.annotation.documentation.CapabilityDescription;
-import com.hurence.logisland.annotation.documentation.ExtraDetailFile;
-import com.hurence.logisland.annotation.documentation.Tags;
+import com.hurence.logisland.annotation.documentation.*;
import com.hurence.logisland.component.PropertyDescriptor;
import com.hurence.logisland.processor.ProcessContext;
import com.hurence.logisland.record.FieldDictionary;
@@ -34,6 +32,7 @@
import java.util.List;
import java.util.Map;
+@Category(ComponentCategory.ENRICHMENT)
@Tags({"record", "fields", "Add"})
@CapabilityDescription("Compute tag cross from given formulas.\n\n" +
"- each dynamic property will return a new record according to the formula definition\n" +
diff --git a/logisland-components/logisland-processors/logisland-processor-common/src/main/java/com/hurence/logisland/processor/datastore/BulkPut.java b/logisland-components/logisland-processors/logisland-processor-common/src/main/java/com/hurence/logisland/processor/datastore/BulkPut.java
index 49fdb0e8c..626600aa4 100644
--- a/logisland-components/logisland-processors/logisland-processor-common/src/main/java/com/hurence/logisland/processor/datastore/BulkPut.java
+++ b/logisland-components/logisland-processors/logisland-processor-common/src/main/java/com/hurence/logisland/processor/datastore/BulkPut.java
@@ -16,9 +16,7 @@
package com.hurence.logisland.processor.datastore;
-import com.hurence.logisland.annotation.documentation.CapabilityDescription;
-import com.hurence.logisland.annotation.documentation.ExtraDetailFile;
-import com.hurence.logisland.annotation.documentation.Tags;
+import com.hurence.logisland.annotation.documentation.*;
import com.hurence.logisland.component.AllowableValue;
import com.hurence.logisland.component.PropertyDescriptor;
import com.hurence.logisland.processor.ProcessContext;
@@ -30,6 +28,7 @@
import java.text.SimpleDateFormat;
import java.util.*;
+@Category(ComponentCategory.DATASTORE)
@Tags({"datastore", "record", "put", "bulk"})
@CapabilityDescription("Indexes the content of a Record in a Datastore using bulk processor")
@ExtraDetailFile("./details/common-processors/BulkPut-Detail.rst")
diff --git a/logisland-components/logisland-processors/logisland-processor-common/src/main/java/com/hurence/logisland/processor/datastore/EnrichRecords.java b/logisland-components/logisland-processors/logisland-processor-common/src/main/java/com/hurence/logisland/processor/datastore/EnrichRecords.java
index 45197a381..003c0e4cd 100644
--- a/logisland-components/logisland-processors/logisland-processor-common/src/main/java/com/hurence/logisland/processor/datastore/EnrichRecords.java
+++ b/logisland-components/logisland-processors/logisland-processor-common/src/main/java/com/hurence/logisland/processor/datastore/EnrichRecords.java
@@ -15,9 +15,7 @@
*/
package com.hurence.logisland.processor.datastore;
-import com.hurence.logisland.annotation.documentation.CapabilityDescription;
-import com.hurence.logisland.annotation.documentation.ExtraDetailFile;
-import com.hurence.logisland.annotation.documentation.Tags;
+import com.hurence.logisland.annotation.documentation.*;
import com.hurence.logisland.component.PropertyDescriptor;
import com.hurence.logisland.processor.ProcessContext;
import com.hurence.logisland.record.FieldDictionary;
@@ -36,6 +34,7 @@
import java.util.regex.Pattern;
import java.util.stream.Collectors;
+@Category(ComponentCategory.ENRICHMENT)
@Tags({"datastore", "enricher"})
@CapabilityDescription("Enrich input records with content indexed in datastore using multiget queries.\n" +
"Each incoming record must be possibly enriched with information stored in datastore. \n" +
diff --git a/logisland-components/logisland-processors/logisland-processor-common/src/main/java/com/hurence/logisland/processor/datastore/MultiGet.java b/logisland-components/logisland-processors/logisland-processor-common/src/main/java/com/hurence/logisland/processor/datastore/MultiGet.java
index 42ea6709b..d80869d6e 100644
--- a/logisland-components/logisland-processors/logisland-processor-common/src/main/java/com/hurence/logisland/processor/datastore/MultiGet.java
+++ b/logisland-components/logisland-processors/logisland-processor-common/src/main/java/com/hurence/logisland/processor/datastore/MultiGet.java
@@ -16,9 +16,7 @@
package com.hurence.logisland.processor.datastore;
-import com.hurence.logisland.annotation.documentation.CapabilityDescription;
-import com.hurence.logisland.annotation.documentation.ExtraDetailFile;
-import com.hurence.logisland.annotation.documentation.Tags;
+import com.hurence.logisland.annotation.documentation.*;
import com.hurence.logisland.component.PropertyDescriptor;
import com.hurence.logisland.processor.ProcessContext;
import com.hurence.logisland.processor.ProcessError;
@@ -32,6 +30,8 @@
import java.util.*;
+
+@Category(ComponentCategory.DATASTORE)
@Tags({"datastore","get", "multiget"})
@CapabilityDescription("Retrieves a content from datastore using datastore multiget queries.\n" +
"Each incoming record contains information regarding the datastore multiget query that will be performed. This information is stored in record fields whose names are configured in the plugin properties (see below) :\n" +
diff --git a/logisland-components/logisland-processors/logisland-processor-cyber-security/src/main/java/com/hurence/logisland/processor/bro/ParseBroEvent.java b/logisland-components/logisland-processors/logisland-processor-cyber-security/src/main/java/com/hurence/logisland/processor/bro/ParseBroEvent.java
index 870ca535d..1c05a19c3 100644
--- a/logisland-components/logisland-processors/logisland-processor-cyber-security/src/main/java/com/hurence/logisland/processor/bro/ParseBroEvent.java
+++ b/logisland-components/logisland-processors/logisland-processor-cyber-security/src/main/java/com/hurence/logisland/processor/bro/ParseBroEvent.java
@@ -15,9 +15,7 @@
*/
package com.hurence.logisland.processor.bro;
-import com.hurence.logisland.annotation.documentation.CapabilityDescription;
-import com.hurence.logisland.annotation.documentation.ExtraDetailFile;
-import com.hurence.logisland.annotation.documentation.Tags;
+import com.hurence.logisland.annotation.documentation.*;
import com.hurence.logisland.component.PropertyDescriptor;
import com.hurence.logisland.processor.*;
import com.hurence.logisland.record.Field;
@@ -40,6 +38,7 @@
/**
* Bro (https://www.bro.org/) processor
*/
+@Category(ComponentCategory.SECURITY)
@Tags({"bro", "security", "IDS", "NIDS"})
@CapabilityDescription(
"The ParseBroEvent processor is the Logisland entry point to get and process `Bro `_ events."
diff --git a/logisland-components/logisland-processors/logisland-processor-cyber-security/src/main/java/com/hurence/logisland/processor/netflow/ParseNetflowEvent.java b/logisland-components/logisland-processors/logisland-processor-cyber-security/src/main/java/com/hurence/logisland/processor/netflow/ParseNetflowEvent.java
index 5c16b7a2f..22ed01f8e 100644
--- a/logisland-components/logisland-processors/logisland-processor-cyber-security/src/main/java/com/hurence/logisland/processor/netflow/ParseNetflowEvent.java
+++ b/logisland-components/logisland-processors/logisland-processor-cyber-security/src/main/java/com/hurence/logisland/processor/netflow/ParseNetflowEvent.java
@@ -15,9 +15,7 @@
*/
package com.hurence.logisland.processor.netflow;
-import com.hurence.logisland.annotation.documentation.CapabilityDescription;
-import com.hurence.logisland.annotation.documentation.ExtraDetailFile;
-import com.hurence.logisland.annotation.documentation.Tags;
+import com.hurence.logisland.annotation.documentation.*;
import com.hurence.logisland.component.PropertyDescriptor;
import com.hurence.logisland.processor.AbstractProcessor;
import com.hurence.logisland.processor.ProcessContext;
@@ -34,6 +32,7 @@
/**
* Netflow (http://www.cisco.com/c/en/us/td/docs/ios/solutions_docs/netflow/nfwhite.html) processor
*/
+@Category(ComponentCategory.SECURITY)
@Tags({"netflow", "security"})
@CapabilityDescription(
"The `Netflow V5 `_ processor "
diff --git a/logisland-components/logisland-processors/logisland-processor-cyber-security/src/main/java/com/hurence/logisland/processor/networkpacket/ParseNetworkPacket.java b/logisland-components/logisland-processors/logisland-processor-cyber-security/src/main/java/com/hurence/logisland/processor/networkpacket/ParseNetworkPacket.java
index d73bb9c94..978a2036f 100644
--- a/logisland-components/logisland-processors/logisland-processor-cyber-security/src/main/java/com/hurence/logisland/processor/networkpacket/ParseNetworkPacket.java
+++ b/logisland-components/logisland-processors/logisland-processor-cyber-security/src/main/java/com/hurence/logisland/processor/networkpacket/ParseNetworkPacket.java
@@ -16,6 +16,8 @@
package com.hurence.logisland.processor.networkpacket;
import com.hurence.logisland.annotation.documentation.CapabilityDescription;
+import com.hurence.logisland.annotation.documentation.Category;
+import com.hurence.logisland.annotation.documentation.ComponentCategory;
import com.hurence.logisland.annotation.documentation.Tags;
import com.hurence.logisland.component.PropertyDescriptor;
import com.hurence.logisland.processor.AbstractProcessor;
@@ -42,6 +44,7 @@
/**
* PCap processor
*/
+@Category(ComponentCategory.SECURITY)
@Tags({"PCap", "security", "IDS", "NIDS"})
@CapabilityDescription(
"The ParseNetworkPacket processor is the LogIsland entry point to parse network packets captured either off-the-wire (stream mode) or in pcap format (batch mode). "
diff --git a/logisland-components/logisland-processors/logisland-processor-elasticsearch/src/main/java/com/hurence/logisland/processor/elasticsearch/BulkAddElasticsearch.java b/logisland-components/logisland-processors/logisland-processor-elasticsearch/src/main/java/com/hurence/logisland/processor/elasticsearch/BulkAddElasticsearch.java
index 4dbbef12e..2d9bb21b0 100644
--- a/logisland-components/logisland-processors/logisland-processor-elasticsearch/src/main/java/com/hurence/logisland/processor/elasticsearch/BulkAddElasticsearch.java
+++ b/logisland-components/logisland-processors/logisland-processor-elasticsearch/src/main/java/com/hurence/logisland/processor/elasticsearch/BulkAddElasticsearch.java
@@ -16,9 +16,7 @@
package com.hurence.logisland.processor.elasticsearch;
-import com.hurence.logisland.annotation.documentation.CapabilityDescription;
-import com.hurence.logisland.annotation.documentation.ExtraDetailFile;
-import com.hurence.logisland.annotation.documentation.Tags;
+import com.hurence.logisland.annotation.documentation.*;
import com.hurence.logisland.component.AllowableValue;
import com.hurence.logisland.component.PropertyDescriptor;
import com.hurence.logisland.processor.ProcessContext;
@@ -30,6 +28,8 @@
import java.text.SimpleDateFormat;
import java.util.*;
+
+@Category(ComponentCategory.DATASTORE)
@Tags({"elasticsearch"})
@CapabilityDescription("Indexes the content of a Record in Elasticsearch using elasticsearch's bulk processor")
@ExtraDetailFile("./details/BulkAddElasticsearch-Detail.rst")
diff --git a/logisland-components/logisland-processors/logisland-processor-elasticsearch/src/main/java/com/hurence/logisland/processor/elasticsearch/EnrichRecordsElasticsearch.java b/logisland-components/logisland-processors/logisland-processor-elasticsearch/src/main/java/com/hurence/logisland/processor/elasticsearch/EnrichRecordsElasticsearch.java
index 975566ad1..6e047a31b 100644
--- a/logisland-components/logisland-processors/logisland-processor-elasticsearch/src/main/java/com/hurence/logisland/processor/elasticsearch/EnrichRecordsElasticsearch.java
+++ b/logisland-components/logisland-processors/logisland-processor-elasticsearch/src/main/java/com/hurence/logisland/processor/elasticsearch/EnrichRecordsElasticsearch.java
@@ -16,9 +16,7 @@
package com.hurence.logisland.processor.elasticsearch;
-import com.hurence.logisland.annotation.documentation.CapabilityDescription;
-import com.hurence.logisland.annotation.documentation.ExtraDetailFile;
-import com.hurence.logisland.annotation.documentation.Tags;
+import com.hurence.logisland.annotation.documentation.*;
import com.hurence.logisland.component.PropertyDescriptor;
import com.hurence.logisland.processor.ProcessContext;
import com.hurence.logisland.processor.ProcessError;
@@ -38,6 +36,7 @@
import java.util.regex.Pattern;
import java.util.stream.Collectors;
+@Category(ComponentCategory.ENRICHMENT)
@Tags({"elasticsearch"})
@CapabilityDescription("Enrich input records with content indexed in elasticsearch using multiget queries.\n" +
"Each incoming record must be possibly enriched with information stored in elasticsearch. \n" +
diff --git a/logisland-components/logisland-processors/logisland-processor-elasticsearch/src/main/java/com/hurence/logisland/processor/elasticsearch/MultiGetElasticsearch.java b/logisland-components/logisland-processors/logisland-processor-elasticsearch/src/main/java/com/hurence/logisland/processor/elasticsearch/MultiGetElasticsearch.java
index 1a9c681ae..67b86601a 100644
--- a/logisland-components/logisland-processors/logisland-processor-elasticsearch/src/main/java/com/hurence/logisland/processor/elasticsearch/MultiGetElasticsearch.java
+++ b/logisland-components/logisland-processors/logisland-processor-elasticsearch/src/main/java/com/hurence/logisland/processor/elasticsearch/MultiGetElasticsearch.java
@@ -16,9 +16,7 @@
package com.hurence.logisland.processor.elasticsearch;
-import com.hurence.logisland.annotation.documentation.CapabilityDescription;
-import com.hurence.logisland.annotation.documentation.ExtraDetailFile;
-import com.hurence.logisland.annotation.documentation.Tags;
+import com.hurence.logisland.annotation.documentation.*;
import com.hurence.logisland.component.PropertyDescriptor;
import com.hurence.logisland.processor.ProcessContext;
import com.hurence.logisland.processor.ProcessError;
@@ -30,6 +28,7 @@
import java.util.*;
+@Category(ComponentCategory.DATASTORE)
@Tags({"elasticsearch"})
@CapabilityDescription("Retrieves a content indexed in elasticsearch using elasticsearch multiget queries.\n" +
"Each incoming record contains information regarding the elasticsearch multiget query that will be performed. This information is stored in record fields whose names are configured in the plugin properties (see below) :\n" +
diff --git a/logisland-components/logisland-processors/logisland-processor-enrichment/src/main/java/com/hurence/logisland/processor/enrichment/IpToFqdn.java b/logisland-components/logisland-processors/logisland-processor-enrichment/src/main/java/com/hurence/logisland/processor/enrichment/IpToFqdn.java
index ea57f53c9..b94ab8d2a 100644
--- a/logisland-components/logisland-processors/logisland-processor-enrichment/src/main/java/com/hurence/logisland/processor/enrichment/IpToFqdn.java
+++ b/logisland-components/logisland-processors/logisland-processor-enrichment/src/main/java/com/hurence/logisland/processor/enrichment/IpToFqdn.java
@@ -15,9 +15,7 @@
*/
package com.hurence.logisland.processor.enrichment;
-import com.hurence.logisland.annotation.documentation.CapabilityDescription;
-import com.hurence.logisland.annotation.documentation.ExtraDetailFile;
-import com.hurence.logisland.annotation.documentation.Tags;
+import com.hurence.logisland.annotation.documentation.*;
import com.hurence.logisland.classloading.PluginProxy;
import com.hurence.logisland.component.PropertyDescriptor;
import com.hurence.logisland.logging.ComponentLog;
@@ -40,6 +38,7 @@
* An input field from the record has the IP as value. An new field is created and its value is the FQDN matching the
* IP address.
*/
+@Category(ComponentCategory.ENRICHMENT)
@Tags({"dns", "ip", "fqdn", "domain", "address", "fqhn", "reverse", "resolution", "enrich"})
@CapabilityDescription("Translates an IP address into a FQDN (Fully Qualified Domain Name). An input field from the" +
" record has the IP as value. An new field is created and its value is the FQDN matching the IP address. The" +
diff --git a/logisland-components/logisland-processors/logisland-processor-enrichment/src/main/java/com/hurence/logisland/processor/enrichment/IpToGeo.java b/logisland-components/logisland-processors/logisland-processor-enrichment/src/main/java/com/hurence/logisland/processor/enrichment/IpToGeo.java
index bc11bfa98..6a2f12add 100644
--- a/logisland-components/logisland-processors/logisland-processor-enrichment/src/main/java/com/hurence/logisland/processor/enrichment/IpToGeo.java
+++ b/logisland-components/logisland-processors/logisland-processor-enrichment/src/main/java/com/hurence/logisland/processor/enrichment/IpToGeo.java
@@ -15,9 +15,7 @@
*/
package com.hurence.logisland.processor.enrichment;
-import com.hurence.logisland.annotation.documentation.CapabilityDescription;
-import com.hurence.logisland.annotation.documentation.ExtraDetailFile;
-import com.hurence.logisland.annotation.documentation.Tags;
+import com.hurence.logisland.annotation.documentation.*;
import com.hurence.logisland.classloading.PluginProxy;
import com.hurence.logisland.component.PropertyDescriptor;
import com.hurence.logisland.component.PropertyValue;
@@ -34,6 +32,7 @@
import static com.hurence.logisland.service.iptogeo.IpToGeoService.*;
+@Category(ComponentCategory.ENRICHMENT)
@Tags({"geo", "enrich", "ip"})
@CapabilityDescription("Looks up geolocation information for an IP address. The attribute that contains the IP address to lookup must be provided in the **"
+ IpAbstractProcessor.PROP_IP_ADDRESS_FIELD + "** property. By default, the geo information are put in a hierarchical structure. " +
diff --git a/logisland-components/logisland-processors/logisland-processor-excel/src/main/java/com/hurence/logisland/processor/excel/ExcelExtract.java b/logisland-components/logisland-processors/logisland-processor-excel/src/main/java/com/hurence/logisland/processor/excel/ExcelExtract.java
index 904bb0901..cb51e62bd 100644
--- a/logisland-components/logisland-processors/logisland-processor-excel/src/main/java/com/hurence/logisland/processor/excel/ExcelExtract.java
+++ b/logisland-components/logisland-processors/logisland-processor-excel/src/main/java/com/hurence/logisland/processor/excel/ExcelExtract.java
@@ -15,9 +15,7 @@
*/
package com.hurence.logisland.processor.excel;
-import com.hurence.logisland.annotation.documentation.CapabilityDescription;
-import com.hurence.logisland.annotation.documentation.ExtraDetailFile;
-import com.hurence.logisland.annotation.documentation.Tags;
+import com.hurence.logisland.annotation.documentation.*;
import com.hurence.logisland.component.PropertyDescriptor;
import com.hurence.logisland.processor.AbstractProcessor;
import com.hurence.logisland.processor.ProcessContext;
@@ -41,9 +39,7 @@
import java.util.stream.Collectors;
import java.util.stream.Stream;
-/**
- * Consumes a Microsoft Excel document and converts each spreadsheet row to a {@link Record}.
- */
+@Category(ComponentCategory.PARSING)
@Tags({"excel", "processor", "poi"})
@CapabilityDescription("Consumes a Microsoft Excel document and converts each worksheet's line to a structured " +
"record. The processor is assuming to receive raw excel file as input record.")
diff --git a/logisland-components/logisland-processors/logisland-processor-excel/src/main/java/com/hurence/logisland/processor/excel/ExcelExtractProperties.java b/logisland-components/logisland-processors/logisland-processor-excel/src/main/java/com/hurence/logisland/processor/excel/ExcelExtractProperties.java
index eb48daf46..3460af3b9 100644
--- a/logisland-components/logisland-processors/logisland-processor-excel/src/main/java/com/hurence/logisland/processor/excel/ExcelExtractProperties.java
+++ b/logisland-components/logisland-processors/logisland-processor-excel/src/main/java/com/hurence/logisland/processor/excel/ExcelExtractProperties.java
@@ -15,6 +15,8 @@
*/
package com.hurence.logisland.processor.excel;
+import com.hurence.logisland.annotation.documentation.Category;
+import com.hurence.logisland.annotation.documentation.ComponentCategory;
import com.hurence.logisland.component.PropertyDescriptor;
import com.hurence.logisland.processor.ProcessContext;
import com.hurence.logisland.validator.StandardValidators;
@@ -26,9 +28,7 @@
import java.util.regex.Pattern;
import java.util.stream.Collectors;
-/**
- * Common options for {@link ExcelExtract} processor.
- */
+@Category(ComponentCategory.PARSING)
public class ExcelExtractProperties implements Serializable {
public static final PropertyDescriptor RECORD_TYPE = new PropertyDescriptor.Builder()
diff --git a/logisland-components/logisland-processors/logisland-processor-hbase/src/main/java/com/hurence/logisland/processor/hbase/FetchHBaseRow.java b/logisland-components/logisland-processors/logisland-processor-hbase/src/main/java/com/hurence/logisland/processor/hbase/FetchHBaseRow.java
index 3acf08b8d..99e47e5db 100644
--- a/logisland-components/logisland-processors/logisland-processor-hbase/src/main/java/com/hurence/logisland/processor/hbase/FetchHBaseRow.java
+++ b/logisland-components/logisland-processors/logisland-processor-hbase/src/main/java/com/hurence/logisland/processor/hbase/FetchHBaseRow.java
@@ -17,9 +17,7 @@
import com.hurence.logisland.annotation.behavior.WritesAttribute;
import com.hurence.logisland.annotation.behavior.WritesAttributes;
-import com.hurence.logisland.annotation.documentation.CapabilityDescription;
-import com.hurence.logisland.annotation.documentation.ExtraDetailFile;
-import com.hurence.logisland.annotation.documentation.Tags;
+import com.hurence.logisland.annotation.documentation.*;
import com.hurence.logisland.classloading.PluginProxy;
import com.hurence.logisland.component.AllowableValue;
import com.hurence.logisland.component.PropertyDescriptor;
@@ -43,6 +41,7 @@
import java.util.*;
import java.util.regex.Pattern;
+@Category(ComponentCategory.DATASTORE)
@Tags({"hbase", "scan", "fetch", "get", "enrich"})
@CapabilityDescription("Fetches a row from an HBase table. The Destination property controls whether the cells are added as flow file attributes, " +
"or the row is written to the flow file content as JSON. This processor may be used to fetch a fixed row on a interval by specifying the " +
diff --git a/logisland-components/logisland-processors/logisland-processor-hbase/src/main/java/com/hurence/logisland/processor/hbase/PutHBaseCell.java b/logisland-components/logisland-processors/logisland-processor-hbase/src/main/java/com/hurence/logisland/processor/hbase/PutHBaseCell.java
index d72fba81d..c2cf6cd4f 100644
--- a/logisland-components/logisland-processors/logisland-processor-hbase/src/main/java/com/hurence/logisland/processor/hbase/PutHBaseCell.java
+++ b/logisland-components/logisland-processors/logisland-processor-hbase/src/main/java/com/hurence/logisland/processor/hbase/PutHBaseCell.java
@@ -16,9 +16,7 @@
package com.hurence.logisland.processor.hbase;
-import com.hurence.logisland.annotation.documentation.CapabilityDescription;
-import com.hurence.logisland.annotation.documentation.ExtraDetailFile;
-import com.hurence.logisland.annotation.documentation.Tags;
+import com.hurence.logisland.annotation.documentation.*;
import com.hurence.logisland.component.PropertyDescriptor;
import com.hurence.logisland.processor.ProcessContext;
import com.hurence.logisland.service.hbase.put.PutColumn;
@@ -35,7 +33,7 @@
import java.util.Collections;
import java.util.List;
-
+@Category(ComponentCategory.DATASTORE)
@Tags({"hadoop", "hbase"})
@CapabilityDescription("Adds the Contents of a Record to HBase as the value of a single cell")
@ExtraDetailFile("./details/PutHBaseCell-Detail.rst")
diff --git a/logisland-components/logisland-processors/logisland-processor-outlier-detection/src/main/java/com/hurence/logisland/processor/DetectOutliers.java b/logisland-components/logisland-processors/logisland-processor-outlier-detection/src/main/java/com/hurence/logisland/processor/DetectOutliers.java
index 3e5229527..41c0d0a3a 100644
--- a/logisland-components/logisland-processors/logisland-processor-outlier-detection/src/main/java/com/hurence/logisland/processor/DetectOutliers.java
+++ b/logisland-components/logisland-processors/logisland-processor-outlier-detection/src/main/java/com/hurence/logisland/processor/DetectOutliers.java
@@ -26,9 +26,7 @@
import com.caseystella.analytics.outlier.streaming.OutlierConfig;
import com.caseystella.analytics.outlier.streaming.mad.SketchyMovingMAD;
import com.hurence.logisland.annotation.behavior.Stateful;
-import com.hurence.logisland.annotation.documentation.CapabilityDescription;
-import com.hurence.logisland.annotation.documentation.ExtraDetailFile;
-import com.hurence.logisland.annotation.documentation.Tags;
+import com.hurence.logisland.annotation.documentation.*;
import com.hurence.logisland.component.PropertyDescriptor;
import com.hurence.logisland.validator.ValidationContext;
import com.hurence.logisland.validator.ValidationResult;
@@ -43,6 +41,7 @@
import java.util.*;
+@Category(ComponentCategory.ANALYTICS)
@Stateful
@Tags({"analytic", "outlier", "record", "iot", "timeseries"})
@CapabilityDescription("Outlier Analysis: A Hybrid Approach\n" +
diff --git a/logisland-components/logisland-processors/logisland-processor-querymatcher/src/main/java/com/hurence/logisland/processor/MatchIP.java b/logisland-components/logisland-processors/logisland-processor-querymatcher/src/main/java/com/hurence/logisland/processor/MatchIP.java
index 3437da9e5..a3ca151a8 100644
--- a/logisland-components/logisland-processors/logisland-processor-querymatcher/src/main/java/com/hurence/logisland/processor/MatchIP.java
+++ b/logisland-components/logisland-processors/logisland-processor-querymatcher/src/main/java/com/hurence/logisland/processor/MatchIP.java
@@ -16,9 +16,7 @@
package com.hurence.logisland.processor;
import com.hurence.logisland.annotation.behavior.DynamicProperty;
-import com.hurence.logisland.annotation.documentation.CapabilityDescription;
-import com.hurence.logisland.annotation.documentation.ExtraDetailFile;
-import com.hurence.logisland.annotation.documentation.Tags;
+import com.hurence.logisland.annotation.documentation.*;
import com.hurence.logisland.component.PropertyDescriptor;
import com.hurence.logisland.record.Record;
import com.hurence.logisland.validator.StandardValidators;
@@ -40,6 +38,8 @@
import org.apache.commons.lang3.tuple.ImmutablePair;
import org.apache.commons.lang3.tuple.Pair;
+
+@Category(ComponentCategory.ALERTING)
@Tags({"analytic", "percolator", "record", "record", "query", "lucene"})
@CapabilityDescription("IP address Query matching (using `Luwak )`_\n\n" +
"You can use this processor to handle custom events matching IP address (CIDR)\n" +
diff --git a/logisland-components/logisland-processors/logisland-processor-querymatcher/src/main/java/com/hurence/logisland/processor/MatchQuery.java b/logisland-components/logisland-processors/logisland-processor-querymatcher/src/main/java/com/hurence/logisland/processor/MatchQuery.java
index 341b2ae99..b286f5e16 100644
--- a/logisland-components/logisland-processors/logisland-processor-querymatcher/src/main/java/com/hurence/logisland/processor/MatchQuery.java
+++ b/logisland-components/logisland-processors/logisland-processor-querymatcher/src/main/java/com/hurence/logisland/processor/MatchQuery.java
@@ -16,9 +16,7 @@
package com.hurence.logisland.processor;
import com.hurence.logisland.annotation.behavior.DynamicProperty;
-import com.hurence.logisland.annotation.documentation.CapabilityDescription;
-import com.hurence.logisland.annotation.documentation.ExtraDetailFile;
-import com.hurence.logisland.annotation.documentation.Tags;
+import com.hurence.logisland.annotation.documentation.*;
import com.hurence.logisland.component.PropertyDescriptor;
import com.hurence.logisland.record.Record;
import com.hurence.logisland.validator.StandardValidators;
@@ -37,6 +35,7 @@
import java.util.*;
+@Category(ComponentCategory.ALERTING)
@Tags({"analytic", "percolator", "record", "record", "query", "lucene"})
@CapabilityDescription("Query matching based on `Luwak `_\n\n" +
"you can use this processor to handle custom events defined by lucene queries\n" +
diff --git a/logisland-components/logisland-processors/logisland-processor-sampling/src/main/java/com/hurence/logisland/processor/SampleRecords.java b/logisland-components/logisland-processors/logisland-processor-sampling/src/main/java/com/hurence/logisland/processor/SampleRecords.java
index 7b330b7e7..b1a570c2e 100644
--- a/logisland-components/logisland-processors/logisland-processor-sampling/src/main/java/com/hurence/logisland/processor/SampleRecords.java
+++ b/logisland-components/logisland-processors/logisland-processor-sampling/src/main/java/com/hurence/logisland/processor/SampleRecords.java
@@ -15,9 +15,7 @@
*/
package com.hurence.logisland.processor;
-import com.hurence.logisland.annotation.documentation.CapabilityDescription;
-import com.hurence.logisland.annotation.documentation.ExtraDetailFile;
-import com.hurence.logisland.annotation.documentation.Tags;
+import com.hurence.logisland.annotation.documentation.*;
import com.hurence.logisland.component.AllowableValue;
import com.hurence.logisland.component.PropertyDescriptor;
import com.hurence.logisland.record.FieldDictionary;
@@ -37,6 +35,7 @@
import java.util.stream.Collectors;
+@Category(ComponentCategory.TIMESERIES)
@Tags({"analytic", "sampler", "record", "iot", "timeseries"})
@CapabilityDescription("Query matching based on `Luwak `_\n\n" +
"you can use this processor to handle custom events defined by lucene queries\n" +
diff --git a/logisland-components/logisland-processors/logisland-processor-scripting/src/main/java/com/hurence/logisland/processor/scripting/python/RunPython.java b/logisland-components/logisland-processors/logisland-processor-scripting/src/main/java/com/hurence/logisland/processor/scripting/python/RunPython.java
index 7a3b99052..e2d2a373c 100644
--- a/logisland-components/logisland-processors/logisland-processor-scripting/src/main/java/com/hurence/logisland/processor/scripting/python/RunPython.java
+++ b/logisland-components/logisland-processors/logisland-processor-scripting/src/main/java/com/hurence/logisland/processor/scripting/python/RunPython.java
@@ -15,9 +15,7 @@
*/
package com.hurence.logisland.processor.scripting.python;
-import com.hurence.logisland.annotation.documentation.CapabilityDescription;
-import com.hurence.logisland.annotation.documentation.ExtraDetailFile;
-import com.hurence.logisland.annotation.documentation.Tags;
+import com.hurence.logisland.annotation.documentation.*;
import com.hurence.logisland.component.PropertyDescriptor;
import com.hurence.logisland.processor.*;
import com.hurence.logisland.record.Record;
@@ -52,9 +50,10 @@
* - doc for tutorial (inline?, file? , both?)
*/
+@Category(ComponentCategory.PROCESSING)
@Tags({"scripting", "python"})
@CapabilityDescription(
- " !!!! WARNING !!!!\n\nThe RunPython processor is currently an experimental feature : it is delivered as is, with the"
+ "!!!! WARNING !!!!\n\nThe RunPython processor is currently an experimental feature : it is delivered as is, with the"
+ " current set of features and is subject to modifications in API or anything else in further logisland releases"
+ " without warnings. There is no tutorial yet. If you want to play with this processor, use the python-processing.yml"
+ " example and send the apache logs of the index apache logs tutorial. The debug stream processor at the end"
diff --git a/logisland-components/logisland-processors/logisland-processor-scripting/src/main/resources/nltk/sem/cooper_storage.py b/logisland-components/logisland-processors/logisland-processor-scripting/src/main/resources/nltk/sem/cooper_storage.py
index 3a1878ea6..5042d16ef 100644
--- a/logisland-components/logisland-processors/logisland-processor-scripting/src/main/resources/nltk/sem/cooper_storage.py
+++ b/logisland-components/logisland-processors/logisland-processor-scripting/src/main/resources/nltk/sem/cooper_storage.py
@@ -24,7 +24,7 @@ def __init__(self, featstruct):
self.featstruct = featstruct
self.readings = []
try:
- self.core = featstruct['CORE']
+ self.core = featstruct['UTILS']
self.store = featstruct['STORE']
except KeyError:
print("%s is not a Cooper storage structure" % featstruct)
diff --git a/logisland-components/logisland-processors/logisland-processor-useragent/src/main/java/com/hurence/logisland/processor/useragent/ParseUserAgent.java b/logisland-components/logisland-processors/logisland-processor-useragent/src/main/java/com/hurence/logisland/processor/useragent/ParseUserAgent.java
index d2b5cb000..2cfa900d6 100644
--- a/logisland-components/logisland-processors/logisland-processor-useragent/src/main/java/com/hurence/logisland/processor/useragent/ParseUserAgent.java
+++ b/logisland-components/logisland-processors/logisland-processor-useragent/src/main/java/com/hurence/logisland/processor/useragent/ParseUserAgent.java
@@ -15,9 +15,7 @@
*/
package com.hurence.logisland.processor.useragent;
-import com.hurence.logisland.annotation.documentation.CapabilityDescription;
-import com.hurence.logisland.annotation.documentation.ExtraDetailFile;
-import com.hurence.logisland.annotation.documentation.Tags;
+import com.hurence.logisland.annotation.documentation.*;
import com.hurence.logisland.classloading.PluginLoader;
import com.hurence.logisland.component.PropertyDescriptor;
import com.hurence.logisland.processor.*;
@@ -41,6 +39,7 @@
/**
* HTTP user-agent processor
*/
+@Category(ComponentCategory.ENRICHMENT)
@Tags({"User-Agent", "clickstream", "DMP"})
@CapabilityDescription(
"The user-agent processor allows to decompose User-Agent value from an HTTP header into several attributes of interest."
diff --git a/logisland-components/logisland-processors/logisland-processor-web-analytics/src/main/java/com/hurence/logisland/processor/webAnalytics/ConsolidateSession.java b/logisland-components/logisland-processors/logisland-processor-web-analytics/src/main/java/com/hurence/logisland/processor/webAnalytics/ConsolidateSession.java
index da53debc7..47c4b9e44 100644
--- a/logisland-components/logisland-processors/logisland-processor-web-analytics/src/main/java/com/hurence/logisland/processor/webAnalytics/ConsolidateSession.java
+++ b/logisland-components/logisland-processors/logisland-processor-web-analytics/src/main/java/com/hurence/logisland/processor/webAnalytics/ConsolidateSession.java
@@ -15,9 +15,7 @@
*/
package com.hurence.logisland.processor.webAnalytics;
-import com.hurence.logisland.annotation.documentation.CapabilityDescription;
-import com.hurence.logisland.annotation.documentation.ExtraDetailFile;
-import com.hurence.logisland.annotation.documentation.Tags;
+import com.hurence.logisland.annotation.documentation.*;
import com.hurence.logisland.component.PropertyDescriptor;
import com.hurence.logisland.processor.*;
import com.hurence.logisland.record.*;
@@ -33,6 +31,7 @@
/**
* Consolidate session processor
*/
+@Category(ComponentCategory.ANALYTICS)
@Tags({"analytics", "web", "session"})
@CapabilityDescription(
value = "The ConsolidateSession processor is the Logisland entry point to get and process events from the Web Analytics."
diff --git a/logisland-components/logisland-processors/logisland-processor-web-analytics/src/main/java/com/hurence/logisland/processor/webAnalytics/IncrementalWebSession.java b/logisland-components/logisland-processors/logisland-processor-web-analytics/src/main/java/com/hurence/logisland/processor/webAnalytics/IncrementalWebSession.java
index a2f625ea6..eebf08a08 100644
--- a/logisland-components/logisland-processors/logisland-processor-web-analytics/src/main/java/com/hurence/logisland/processor/webAnalytics/IncrementalWebSession.java
+++ b/logisland-components/logisland-processors/logisland-processor-web-analytics/src/main/java/com/hurence/logisland/processor/webAnalytics/IncrementalWebSession.java
@@ -15,9 +15,7 @@
*/
package com.hurence.logisland.processor.webAnalytics;
-import com.hurence.logisland.annotation.documentation.CapabilityDescription;
-import com.hurence.logisland.annotation.documentation.ExtraDetailFile;
-import com.hurence.logisland.annotation.documentation.Tags;
+import com.hurence.logisland.annotation.documentation.*;
import com.hurence.logisland.classloading.PluginProxy;
import com.hurence.logisland.component.PropertyDescriptor;
import com.hurence.logisland.processor.AbstractProcessor;
@@ -57,6 +55,7 @@
import java.util.regex.Pattern;
import java.util.stream.Collectors;
+@Category(ComponentCategory.ANALYTICS)
@Tags({"analytics", "web", "session"})
@CapabilityDescription(
value = "This processor creates and updates web-sessions based on incoming web-events." +
diff --git a/logisland-components/logisland-processors/logisland-processor-web-analytics/src/main/java/com/hurence/logisland/processor/webAnalytics/setSourceOfTraffic.java b/logisland-components/logisland-processors/logisland-processor-web-analytics/src/main/java/com/hurence/logisland/processor/webAnalytics/SetSourceOfTraffic.java
similarity index 98%
rename from logisland-components/logisland-processors/logisland-processor-web-analytics/src/main/java/com/hurence/logisland/processor/webAnalytics/setSourceOfTraffic.java
rename to logisland-components/logisland-processors/logisland-processor-web-analytics/src/main/java/com/hurence/logisland/processor/webAnalytics/SetSourceOfTraffic.java
index e4b8fdc11..84d126d8a 100644
--- a/logisland-components/logisland-processors/logisland-processor-web-analytics/src/main/java/com/hurence/logisland/processor/webAnalytics/setSourceOfTraffic.java
+++ b/logisland-components/logisland-processors/logisland-processor-web-analytics/src/main/java/com/hurence/logisland/processor/webAnalytics/SetSourceOfTraffic.java
@@ -15,9 +15,7 @@
*/
package com.hurence.logisland.processor.webAnalytics;
-import com.hurence.logisland.annotation.documentation.CapabilityDescription;
-import com.hurence.logisland.annotation.documentation.ExtraDetailFile;
-import com.hurence.logisland.annotation.documentation.Tags;
+import com.hurence.logisland.annotation.documentation.*;
import com.hurence.logisland.classloading.PluginProxy;
import com.hurence.logisland.component.PropertyDescriptor;
import com.hurence.logisland.processor.AbstractProcessor;
@@ -40,8 +38,9 @@
import java.util.*;
import java.util.regex.Pattern;
-import static com.hurence.logisland.processor.webAnalytics.setSourceOfTraffic.*;
+import static com.hurence.logisland.processor.webAnalytics.SetSourceOfTraffic.*;
+@Category(ComponentCategory.ANALYTICS)
@Tags({"session", "traffic", "source", "web", "analytics"})
@CapabilityDescription("Compute the source of traffic of a web session. Users arrive at a website or application through a variety of sources, \n" +
"including advertising/paying campaigns, search engines, social networks, referring sites or direct access. \n" +
@@ -50,13 +49,13 @@
"i-e: **" + PROP_UTM_SOURCE + "**, **" + PROP_UTM_MEDIUM + "**, **" + PROP_UTM_CAMPAIGN + "**, **" + PROP_UTM_CONTENT + "**, **" + PROP_UTM_TERM + "**)\n" +
", the referer (**" + PROP_REFERER + "** property) and the first visited page of the session (**" + PROP_FIRST_VISITED_PAGE + "** property).\n" +
"By default the source of traffic information are placed in a flat structure (specified by the **" + PROP_SOURCE_OF_TRAFFIC_SUFFIX + "** property\n" +
- "with a default value of " + SOURCE_OF_TRAFFIC_SUFFIX_NAME + "). To work properly the setSourceOfTraffic processor needs to have access to an \n" +
+ "with a default value of " + SOURCE_OF_TRAFFIC_SUFFIX_NAME + "). To work properly the SetSourceOfTraffic processor needs to have access to an \n" +
"Elasticsearch index containing a list of the most popular search engines and social networks. The ES index (specified by the **" + PROP_ES_INDEX + "** property) " +
"should be structured such that the _id of an ES document MUST be the name of the domain. If the domain is a search engine, the related ES doc MUST have a boolean field " +
"(default being " + SEARCH_ENGINE_SITE + ") specified by the property **" + PROP_ES_SEARCH_ENGINE + "** with a value set to true. If the domain is a social network " +
", the related ES doc MUST have a boolean field (default being " + SOCIAL_NETWORK_SITE + ") specified by the property **" + PROP_ES_SOCIAL_NETWORK + "** with a value set to true. ")
-@ExtraDetailFile("./details/setSourceOfTraffic-Detail.rst")
-public class setSourceOfTraffic extends AbstractProcessor {
+@ExtraDetailFile("./details/SetSourceOfTraffic-Detail.rst")
+public class SetSourceOfTraffic extends AbstractProcessor {
protected static final String PROP_ES_INDEX = "es.index";
private static final String PROP_ES_TYPE = "es.type";
@@ -574,7 +573,7 @@ private boolean has_domain_flag(String domain, String flag, ProcessContext conte
/**
* Attempt to find domain related info from the cache
*/
- setSourceOfTraffic.CacheEntry cacheEntry = null;
+ SetSourceOfTraffic.CacheEntry cacheEntry = null;
try {
cacheEntry = cacheService.get(domain);
} catch (Exception e) {
diff --git a/logisland-components/logisland-processors/logisland-processor-web-analytics/src/main/java/com/hurence/logisland/processor/webAnalytics/URLDecoder.java b/logisland-components/logisland-processors/logisland-processor-web-analytics/src/main/java/com/hurence/logisland/processor/webAnalytics/URLDecoder.java
index b762173ae..3162d93ce 100644
--- a/logisland-components/logisland-processors/logisland-processor-web-analytics/src/main/java/com/hurence/logisland/processor/webAnalytics/URLDecoder.java
+++ b/logisland-components/logisland-processors/logisland-processor-web-analytics/src/main/java/com/hurence/logisland/processor/webAnalytics/URLDecoder.java
@@ -16,9 +16,7 @@
package com.hurence.logisland.processor.webAnalytics;
import com.hurence.logisland.annotation.behavior.DynamicProperty;
-import com.hurence.logisland.annotation.documentation.CapabilityDescription;
-import com.hurence.logisland.annotation.documentation.ExtraDetailFile;
-import com.hurence.logisland.annotation.documentation.Tags;
+import com.hurence.logisland.annotation.documentation.*;
import com.hurence.logisland.component.PropertyDescriptor;
import com.hurence.logisland.processor.AbstractProcessor;
import com.hurence.logisland.processor.ProcessContext;
@@ -31,6 +29,7 @@
import java.io.UnsupportedEncodingException;
import java.util.*;
+@Category(ComponentCategory.ANALYTICS)
@Tags({"record", "fields", "Decode"})
@CapabilityDescription("Decode one or more field containing an URL with possibly special chars encoded\n" +
"...")
diff --git a/logisland-components/logisland-processors/logisland-processor-web-analytics/src/test/java/com/hurence/logisland/processor/webAnalytics/IncrementalWebSessionTest.java b/logisland-components/logisland-processors/logisland-processor-web-analytics/src/test/java/com/hurence/logisland/processor/webAnalytics/IncrementalWebSessionTest.java
index 8af489fdb..d13c1a66c 100644
--- a/logisland-components/logisland-processors/logisland-processor-web-analytics/src/test/java/com/hurence/logisland/processor/webAnalytics/IncrementalWebSessionTest.java
+++ b/logisland-components/logisland-processors/logisland-processor-web-analytics/src/test/java/com/hurence/logisland/processor/webAnalytics/IncrementalWebSessionTest.java
@@ -37,7 +37,6 @@
import javax.management.MalformedObjectNameException;
import javax.management.ObjectName;
-import java.io.IOException;
import java.text.SimpleDateFormat;
import java.time.Duration;
import java.time.Instant;
@@ -1054,7 +1053,7 @@ private TestRunner newTestRunner()
runner.addControllerService("elasticsearchClient", elasticsearchClient);
runner.enableControllerService(elasticsearchClient);
- runner.setProperty(setSourceOfTraffic.ELASTICSEARCH_CLIENT_SERVICE, "elasticsearchClient");
+ runner.setProperty(SetSourceOfTraffic.ELASTICSEARCH_CLIENT_SERVICE, "elasticsearchClient");
runner.setProperty(IncrementalWebSession.ES_SESSION_INDEX_FIELD, SESSION_INDEX);
runner.setProperty(IncrementalWebSession.ES_SESSION_TYPE_NAME, SESSION_TYPE);
diff --git a/logisland-components/logisland-processors/logisland-processor-web-analytics/src/test/java/com/hurence/logisland/processor/webAnalytics/setSourceOfTrafficTest.java b/logisland-components/logisland-processors/logisland-processor-web-analytics/src/test/java/com/hurence/logisland/processor/webAnalytics/setSourceOfTrafficTest.java
index d3f647cb4..5aa176deb 100644
--- a/logisland-components/logisland-processors/logisland-processor-web-analytics/src/test/java/com/hurence/logisland/processor/webAnalytics/setSourceOfTrafficTest.java
+++ b/logisland-components/logisland-processors/logisland-processor-web-analytics/src/test/java/com/hurence/logisland/processor/webAnalytics/setSourceOfTrafficTest.java
@@ -27,7 +27,7 @@
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-import static com.hurence.logisland.processor.webAnalytics.setSourceOfTraffic.ES_INDEX_FIELD;
+import static com.hurence.logisland.processor.webAnalytics.SetSourceOfTraffic.ES_INDEX_FIELD;
public class setSourceOfTrafficTest {
@@ -310,18 +310,18 @@ public void testAdwordsReferer() throws InitializationException {
private TestRunner getTestRunner() throws InitializationException {
- final TestRunner runner = TestRunners.newTestRunner("com.hurence.logisland.processor.webAnalytics.setSourceOfTraffic");
+ final TestRunner runner = TestRunners.newTestRunner("com.hurence.logisland.processor.webAnalytics.SetSourceOfTraffic");
// create the controller service and link it to the test processor
final MockElasticsearchClientService elasticsearchClient = new MockElasticsearchClientService();
runner.addControllerService("elasticsearchClient", elasticsearchClient);
runner.enableControllerService(elasticsearchClient);
- runner.setProperty(setSourceOfTraffic.ELASTICSEARCH_CLIENT_SERVICE, "elasticsearchClient");
+ runner.setProperty(SetSourceOfTraffic.ELASTICSEARCH_CLIENT_SERVICE, "elasticsearchClient");
final MockCacheService cacheService = new MockCacheService();
runner.addControllerService("cacheService", cacheService);
runner.enableControllerService(cacheService);
- runner.setProperty(setSourceOfTraffic.CONFIG_CACHE_SERVICE, "cacheService");
+ runner.setProperty(SetSourceOfTraffic.CONFIG_CACHE_SERVICE, "cacheService");
runner.setProperty(ES_INDEX_FIELD.getName(), "index1");
diff --git a/logisland-components/logisland-processors/logisland-processor-xml/src/main/java/com/hurence/logisland/processor/xml/EvaluateXPath.java b/logisland-components/logisland-processors/logisland-processor-xml/src/main/java/com/hurence/logisland/processor/xml/EvaluateXPath.java
index ed36811d1..010afca1e 100644
--- a/logisland-components/logisland-processors/logisland-processor-xml/src/main/java/com/hurence/logisland/processor/xml/EvaluateXPath.java
+++ b/logisland-components/logisland-processors/logisland-processor-xml/src/main/java/com/hurence/logisland/processor/xml/EvaluateXPath.java
@@ -17,9 +17,7 @@
import com.hurence.logisland.annotation.behavior.DynamicProperty;
import com.hurence.logisland.annotation.behavior.WritesAttribute;
-import com.hurence.logisland.annotation.documentation.CapabilityDescription;
-import com.hurence.logisland.annotation.documentation.ExtraDetailFile;
-import com.hurence.logisland.annotation.documentation.Tags;
+import com.hurence.logisland.annotation.documentation.*;
import com.hurence.logisland.component.AllowableValue;
import com.hurence.logisland.component.PropertyDescriptor;
import com.hurence.logisland.logging.ComponentLog;
@@ -53,6 +51,7 @@
import static javax.xml.xpath.XPathConstants.STRING;
+@Category(ComponentCategory.PARSING)
@Tags({"XML", "evaluate", "XPath"})
@CapabilityDescription("Evaluates one or more XPaths against the content of a record. The results of those XPaths are assigned to "
+ "new attributes in the records, depending on configuration of the "
diff --git a/logisland-components/logisland-services/logisland-service-cassandra/logisland-service-cassandra-client/src/main/java/com/hurence/logisland/service/cassandra/CassandraControllerService.java b/logisland-components/logisland-services/logisland-service-cassandra/logisland-service-cassandra-client/src/main/java/com/hurence/logisland/service/cassandra/CassandraControllerService.java
index 1c7eddc0c..304f631ca 100644
--- a/logisland-components/logisland-services/logisland-service-cassandra/logisland-service-cassandra-client/src/main/java/com/hurence/logisland/service/cassandra/CassandraControllerService.java
+++ b/logisland-components/logisland-services/logisland-service-cassandra/logisland-service-cassandra-client/src/main/java/com/hurence/logisland/service/cassandra/CassandraControllerService.java
@@ -18,6 +18,8 @@
import com.datastax.driver.core.Cluster;
import com.datastax.driver.core.Session;
import com.hurence.logisland.annotation.documentation.CapabilityDescription;
+import com.hurence.logisland.annotation.documentation.Category;
+import com.hurence.logisland.annotation.documentation.ComponentCategory;
import com.hurence.logisland.annotation.documentation.Tags;
import com.hurence.logisland.annotation.lifecycle.OnDisabled;
import com.hurence.logisland.annotation.lifecycle.OnStopped;
@@ -36,6 +38,8 @@
import java.util.*;
import java.util.concurrent.*;
+
+@Category(ComponentCategory.DATASTORE)
@Tags({"cassandra", "service"})
@CapabilityDescription(
"Provides a controller service that for the moment only allows to bulkput records into cassandra."
diff --git a/logisland-components/logisland-services/logisland-service-elasticsearch/logisland-service-elasticsearch_2_4_0-client/src/main/java/com/hurence/logisland/service/elasticsearch/Elasticsearch_2_4_0_ClientService.java b/logisland-components/logisland-services/logisland-service-elasticsearch/logisland-service-elasticsearch_2_4_0-client/src/main/java/com/hurence/logisland/service/elasticsearch/Elasticsearch_2_4_0_ClientService.java
index 632204aac..39c301057 100644
--- a/logisland-components/logisland-services/logisland-service-elasticsearch/logisland-service-elasticsearch_2_4_0-client/src/main/java/com/hurence/logisland/service/elasticsearch/Elasticsearch_2_4_0_ClientService.java
+++ b/logisland-components/logisland-services/logisland-service-elasticsearch/logisland-service-elasticsearch_2_4_0-client/src/main/java/com/hurence/logisland/service/elasticsearch/Elasticsearch_2_4_0_ClientService.java
@@ -16,6 +16,8 @@
package com.hurence.logisland.service.elasticsearch;
import com.hurence.logisland.annotation.documentation.CapabilityDescription;
+import com.hurence.logisland.annotation.documentation.Category;
+import com.hurence.logisland.annotation.documentation.ComponentCategory;
import com.hurence.logisland.annotation.documentation.Tags;
import com.hurence.logisland.annotation.lifecycle.OnDisabled;
import com.hurence.logisland.annotation.lifecycle.OnEnabled;
@@ -70,6 +72,7 @@
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicBoolean;
+@Category(ComponentCategory.DATASTORE)
@Tags({ "elasticsearch", "client"})
@CapabilityDescription("Implementation of ElasticsearchClientService for Elasticsearch 2.4.0.")
public class Elasticsearch_2_4_0_ClientService extends AbstractControllerService implements ElasticsearchClientService {
diff --git a/logisland-components/logisland-services/logisland-service-elasticsearch/logisland-service-elasticsearch_5_4_0-client/src/main/java/com/hurence/logisland/service/elasticsearch/Elasticsearch_5_4_0_ClientService.java b/logisland-components/logisland-services/logisland-service-elasticsearch/logisland-service-elasticsearch_5_4_0-client/src/main/java/com/hurence/logisland/service/elasticsearch/Elasticsearch_5_4_0_ClientService.java
index f8e341e0a..5e9240353 100644
--- a/logisland-components/logisland-services/logisland-service-elasticsearch/logisland-service-elasticsearch_5_4_0-client/src/main/java/com/hurence/logisland/service/elasticsearch/Elasticsearch_5_4_0_ClientService.java
+++ b/logisland-components/logisland-services/logisland-service-elasticsearch/logisland-service-elasticsearch_5_4_0-client/src/main/java/com/hurence/logisland/service/elasticsearch/Elasticsearch_5_4_0_ClientService.java
@@ -16,6 +16,8 @@
package com.hurence.logisland.service.elasticsearch;
import com.hurence.logisland.annotation.documentation.CapabilityDescription;
+import com.hurence.logisland.annotation.documentation.Category;
+import com.hurence.logisland.annotation.documentation.ComponentCategory;
import com.hurence.logisland.annotation.documentation.Tags;
import com.hurence.logisland.annotation.lifecycle.OnDisabled;
import com.hurence.logisland.annotation.lifecycle.OnEnabled;
@@ -74,6 +76,7 @@
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicBoolean;
+@Category(ComponentCategory.DATASTORE)
@Tags({ "elasticsearch", "client"})
@CapabilityDescription("Implementation of ElasticsearchClientService for Elasticsearch 5.4.0.")
public class Elasticsearch_5_4_0_ClientService extends AbstractControllerService implements ElasticsearchClientService {
diff --git a/logisland-components/logisland-services/logisland-service-elasticsearch/logisland-service-elasticsearch_6_6_2-client/src/main/java/com/hurence/logisland/service/elasticsearch/Elasticsearch_6_6_2_ClientService.java b/logisland-components/logisland-services/logisland-service-elasticsearch/logisland-service-elasticsearch_6_6_2-client/src/main/java/com/hurence/logisland/service/elasticsearch/Elasticsearch_6_6_2_ClientService.java
index 4508af71b..e602dfa60 100644
--- a/logisland-components/logisland-services/logisland-service-elasticsearch/logisland-service-elasticsearch_6_6_2-client/src/main/java/com/hurence/logisland/service/elasticsearch/Elasticsearch_6_6_2_ClientService.java
+++ b/logisland-components/logisland-services/logisland-service-elasticsearch/logisland-service-elasticsearch_6_6_2-client/src/main/java/com/hurence/logisland/service/elasticsearch/Elasticsearch_6_6_2_ClientService.java
@@ -17,6 +17,8 @@
package com.hurence.logisland.service.elasticsearch;
import com.hurence.logisland.annotation.documentation.CapabilityDescription;
+import com.hurence.logisland.annotation.documentation.Category;
+import com.hurence.logisland.annotation.documentation.ComponentCategory;
import com.hurence.logisland.annotation.documentation.Tags;
import com.hurence.logisland.annotation.lifecycle.OnDisabled;
import com.hurence.logisland.annotation.lifecycle.OnEnabled;
@@ -72,6 +74,7 @@
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.function.BiConsumer;
+@Category(ComponentCategory.DATASTORE)
@Tags({ "elasticsearch", "client"})
@CapabilityDescription("Implementation of ElasticsearchClientService for Elasticsearch 6.6.2.")
public class Elasticsearch_6_6_2_ClientService extends AbstractControllerService implements ElasticsearchClientService {
diff --git a/logisland-components/logisland-services/logisland-service-hbase/logisland-service-hbase_1_1_2-client/src/main/java/com/hurence/logisland/service/hbase/HBase_1_1_2_ClientService.java b/logisland-components/logisland-services/logisland-service-hbase/logisland-service-hbase_1_1_2-client/src/main/java/com/hurence/logisland/service/hbase/HBase_1_1_2_ClientService.java
index 53a16a1b8..6dd484af1 100644
--- a/logisland-components/logisland-services/logisland-service-hbase/logisland-service-hbase_1_1_2-client/src/main/java/com/hurence/logisland/service/hbase/HBase_1_1_2_ClientService.java
+++ b/logisland-components/logisland-services/logisland-service-hbase/logisland-service-hbase_1_1_2-client/src/main/java/com/hurence/logisland/service/hbase/HBase_1_1_2_ClientService.java
@@ -17,6 +17,8 @@
import com.hurence.logisland.annotation.behavior.DynamicProperty;
import com.hurence.logisland.annotation.documentation.CapabilityDescription;
+import com.hurence.logisland.annotation.documentation.Category;
+import com.hurence.logisland.annotation.documentation.ComponentCategory;
import com.hurence.logisland.annotation.documentation.Tags;
import com.hurence.logisland.annotation.lifecycle.OnDisabled;
import com.hurence.logisland.annotation.lifecycle.OnEnabled;
@@ -67,6 +69,7 @@
import java.util.Map;
import java.util.concurrent.atomic.AtomicReference;
+@Category(ComponentCategory.DATASTORE)
@Tags({ "hbase", "client"})
@CapabilityDescription("Implementation of HBaseClientService for HBase 1.1.2. This service can be configured by providing " +
"a comma-separated list of configuration files, or by specifying values for the other properties. If configuration files " +
diff --git a/logisland-components/logisland-services/logisland-service-influxdb/logisland-service-influxdb-client/src/main/java/com/hurence/logisland/service/influxdb/InfluxDBControllerService.java b/logisland-components/logisland-services/logisland-service-influxdb/logisland-service-influxdb-client/src/main/java/com/hurence/logisland/service/influxdb/InfluxDBControllerService.java
index 1b6e91867..cbed28d2d 100644
--- a/logisland-components/logisland-services/logisland-service-influxdb/logisland-service-influxdb-client/src/main/java/com/hurence/logisland/service/influxdb/InfluxDBControllerService.java
+++ b/logisland-components/logisland-services/logisland-service-influxdb/logisland-service-influxdb-client/src/main/java/com/hurence/logisland/service/influxdb/InfluxDBControllerService.java
@@ -16,6 +16,8 @@
package com.hurence.logisland.service.influxdb;
import com.hurence.logisland.annotation.documentation.CapabilityDescription;
+import com.hurence.logisland.annotation.documentation.Category;
+import com.hurence.logisland.annotation.documentation.ComponentCategory;
import com.hurence.logisland.annotation.documentation.Tags;
import com.hurence.logisland.annotation.lifecycle.OnDisabled;
import com.hurence.logisland.annotation.lifecycle.OnStopped;
@@ -41,6 +43,7 @@
import java.util.*;
import java.util.concurrent.*;
+@Category(ComponentCategory.DATASTORE)
@Tags({"influxdb", "service", "time series"})
@CapabilityDescription(
"Provides a controller service that for the moment only allows to bulkput records into influxdb."
diff --git a/logisland-components/logisland-services/logisland-service-inmemory-cache/src/main/java/com/hurence/logisland/service/cache/CSVKeyValueCacheService.java b/logisland-components/logisland-services/logisland-service-inmemory-cache/src/main/java/com/hurence/logisland/service/cache/CSVKeyValueCacheService.java
index 1c3440dcd..206f7ee43 100644
--- a/logisland-components/logisland-services/logisland-service-inmemory-cache/src/main/java/com/hurence/logisland/service/cache/CSVKeyValueCacheService.java
+++ b/logisland-components/logisland-services/logisland-service-inmemory-cache/src/main/java/com/hurence/logisland/service/cache/CSVKeyValueCacheService.java
@@ -16,6 +16,8 @@
package com.hurence.logisland.service.cache;
import com.hurence.logisland.annotation.documentation.CapabilityDescription;
+import com.hurence.logisland.annotation.documentation.Category;
+import com.hurence.logisland.annotation.documentation.ComponentCategory;
import com.hurence.logisland.annotation.documentation.Tags;
import com.hurence.logisland.component.AllowableValue;
import com.hurence.logisland.component.InitializationException;
@@ -45,6 +47,7 @@
import java.util.*;
import java.util.concurrent.atomic.AtomicReference;
+@Category(ComponentCategory.DATASTORE)
@Tags({"csv", "service", "cache"})
@CapabilityDescription("A cache that store csv lines as records loaded from a file")
public class CSVKeyValueCacheService extends LRUKeyValueCacheService implements DatastoreClientService {
diff --git a/logisland-components/logisland-services/logisland-service-inmemory-cache/src/main/java/com/hurence/logisland/service/cache/LRUKeyValueCacheService.java b/logisland-components/logisland-services/logisland-service-inmemory-cache/src/main/java/com/hurence/logisland/service/cache/LRUKeyValueCacheService.java
index b16ee07b8..a4858a717 100644
--- a/logisland-components/logisland-services/logisland-service-inmemory-cache/src/main/java/com/hurence/logisland/service/cache/LRUKeyValueCacheService.java
+++ b/logisland-components/logisland-services/logisland-service-inmemory-cache/src/main/java/com/hurence/logisland/service/cache/LRUKeyValueCacheService.java
@@ -16,6 +16,8 @@
package com.hurence.logisland.service.cache;
import com.hurence.logisland.annotation.documentation.CapabilityDescription;
+import com.hurence.logisland.annotation.documentation.Category;
+import com.hurence.logisland.annotation.documentation.ComponentCategory;
import com.hurence.logisland.annotation.documentation.Tags;
import com.hurence.logisland.annotation.lifecycle.OnEnabled;
import com.hurence.logisland.component.InitializationException;
@@ -46,6 +48,7 @@
*
* @see LinkedHashMap
*/
+@Category(ComponentCategory.DATASTORE)
@Tags({"cache", "service", "key", "value", "pair", "LRU"})
@CapabilityDescription("A controller service for caching data by key value pair with LRU (last recently used) strategy. using LinkedHashMap")
public class LRUKeyValueCacheService extends AbstractControllerService implements CacheService {
diff --git a/logisland-components/logisland-services/logisland-service-ip-to-geo/logisland-service-ip-to-geo-maxmind/src/main/java/com/hurence/logisland/service/iptogeo/maxmind/MaxmindIpToGeoService.java b/logisland-components/logisland-services/logisland-service-ip-to-geo/logisland-service-ip-to-geo-maxmind/src/main/java/com/hurence/logisland/service/iptogeo/maxmind/MaxmindIpToGeoService.java
index 356ee6fc7..86722f917 100644
--- a/logisland-components/logisland-services/logisland-service-ip-to-geo/logisland-service-ip-to-geo-maxmind/src/main/java/com/hurence/logisland/service/iptogeo/maxmind/MaxmindIpToGeoService.java
+++ b/logisland-components/logisland-services/logisland-service-ip-to-geo/logisland-service-ip-to-geo-maxmind/src/main/java/com/hurence/logisland/service/iptogeo/maxmind/MaxmindIpToGeoService.java
@@ -15,6 +15,8 @@
*/
package com.hurence.logisland.service.iptogeo.maxmind;
+import com.hurence.logisland.annotation.documentation.Category;
+import com.hurence.logisland.annotation.documentation.ComponentCategory;
import com.hurence.logisland.service.iptogeo.IpToGeoService;
import com.hurence.logisland.annotation.documentation.CapabilityDescription;
import com.hurence.logisland.annotation.documentation.Tags;
@@ -41,6 +43,7 @@
import org.apache.hadoop.fs.Path;
import java.net.URI;
+@Category(ComponentCategory.ENRICHMENT)
@Tags({"ip", "service", "geo", "maxmind"})
@CapabilityDescription("Implementation of the IP 2 GEO Service using maxmind lite db file")
public class MaxmindIpToGeoService extends AbstractControllerService implements IpToGeoService {
diff --git a/logisland-components/logisland-services/logisland-service-mongodb/logisland-service-mongodb-client/src/main/java/com/hurence/logisland/service/mongodb/MongoDBControllerService.java b/logisland-components/logisland-services/logisland-service-mongodb/logisland-service-mongodb-client/src/main/java/com/hurence/logisland/service/mongodb/MongoDBControllerService.java
index 643ca3584..9246e6cf0 100644
--- a/logisland-components/logisland-services/logisland-service-mongodb/logisland-service-mongodb-client/src/main/java/com/hurence/logisland/service/mongodb/MongoDBControllerService.java
+++ b/logisland-components/logisland-services/logisland-service-mongodb/logisland-service-mongodb-client/src/main/java/com/hurence/logisland/service/mongodb/MongoDBControllerService.java
@@ -16,6 +16,8 @@
package com.hurence.logisland.service.mongodb;
import com.hurence.logisland.annotation.documentation.CapabilityDescription;
+import com.hurence.logisland.annotation.documentation.Category;
+import com.hurence.logisland.annotation.documentation.ComponentCategory;
import com.hurence.logisland.annotation.documentation.Tags;
import com.hurence.logisland.annotation.lifecycle.OnDisabled;
import com.hurence.logisland.annotation.lifecycle.OnEnabled;
@@ -48,6 +50,7 @@
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
+@Category(ComponentCategory.DATASTORE)
@Tags({"mongo", "mongodb", "service"})
@CapabilityDescription(
"Provides a controller service that wraps most of the functionality of the MongoDB driver."
diff --git a/logisland-components/logisland-services/logisland-service-proxy/src/main/java/com/hurence/logisland/rest/service/proxy/StandardProxyConfigurationService.java b/logisland-components/logisland-services/logisland-service-proxy/src/main/java/com/hurence/logisland/rest/service/proxy/StandardProxyConfigurationService.java
index 0b1729157..9598ae3b8 100644
--- a/logisland-components/logisland-services/logisland-service-proxy/src/main/java/com/hurence/logisland/rest/service/proxy/StandardProxyConfigurationService.java
+++ b/logisland-components/logisland-services/logisland-service-proxy/src/main/java/com/hurence/logisland/rest/service/proxy/StandardProxyConfigurationService.java
@@ -16,6 +16,8 @@
package com.hurence.logisland.rest.service.proxy;
import com.hurence.logisland.annotation.documentation.CapabilityDescription;
+import com.hurence.logisland.annotation.documentation.Category;
+import com.hurence.logisland.annotation.documentation.ComponentCategory;
import com.hurence.logisland.annotation.documentation.Tags;
import com.hurence.logisland.annotation.lifecycle.OnEnabled;
import com.hurence.logisland.component.InitializationException;
@@ -40,6 +42,7 @@
* Processor inspired from StandardProxyConfigurationService nifi processor
*
*/
+@Category(ComponentCategory.UTILS)
@CapabilityDescription("Provides a set of configurations for different NiFi components to use a proxy server.")
@Tags({"Proxy"})
public class StandardProxyConfigurationService extends AbstractControllerService implements ProxyConfigurationService {
diff --git a/logisland-components/logisland-services/logisland-service-redis/src/main/java/com/hurence/logisland/redis/service/RedisKeyValueCacheService.java b/logisland-components/logisland-services/logisland-service-redis/src/main/java/com/hurence/logisland/redis/service/RedisKeyValueCacheService.java
index a03459552..7cfc038a1 100644
--- a/logisland-components/logisland-services/logisland-service-redis/src/main/java/com/hurence/logisland/redis/service/RedisKeyValueCacheService.java
+++ b/logisland-components/logisland-services/logisland-service-redis/src/main/java/com/hurence/logisland/redis/service/RedisKeyValueCacheService.java
@@ -16,6 +16,8 @@
package com.hurence.logisland.redis.service;
import com.hurence.logisland.annotation.documentation.CapabilityDescription;
+import com.hurence.logisland.annotation.documentation.Category;
+import com.hurence.logisland.annotation.documentation.ComponentCategory;
import com.hurence.logisland.annotation.documentation.Tags;
import com.hurence.logisland.annotation.lifecycle.OnEnabled;
import com.hurence.logisland.component.AllowableValue;
@@ -57,6 +59,7 @@
*
*
You specify default TTL
*/
+@Category(ComponentCategory.DATASTORE)
@Tags({"cache", "service", "key", "value", "pair", "redis"})
@CapabilityDescription("A controller service for caching records by key value pair with LRU (last recently used) strategy. using LinkedHashMap")
public class RedisKeyValueCacheService extends AbstractControllerService implements DatastoreClientService, CacheService {
diff --git a/logisland-components/logisland-services/logisland-service-rest/src/main/java/com/hurence/logisland/rest/service/lookup/RestLookupService.java b/logisland-components/logisland-services/logisland-service-rest/src/main/java/com/hurence/logisland/rest/service/lookup/RestLookupService.java
index f62560691..f70440b2f 100644
--- a/logisland-components/logisland-services/logisland-service-rest/src/main/java/com/hurence/logisland/rest/service/lookup/RestLookupService.java
+++ b/logisland-components/logisland-services/logisland-service-rest/src/main/java/com/hurence/logisland/rest/service/lookup/RestLookupService.java
@@ -22,6 +22,8 @@
import com.hurence.logisland.annotation.behavior.DynamicProperties;
import com.hurence.logisland.annotation.behavior.DynamicProperty;
import com.hurence.logisland.annotation.documentation.CapabilityDescription;
+import com.hurence.logisland.annotation.documentation.Category;
+import com.hurence.logisland.annotation.documentation.ComponentCategory;
import com.hurence.logisland.annotation.documentation.Tags;
import com.hurence.logisland.annotation.lifecycle.OnDisabled;
import com.hurence.logisland.annotation.lifecycle.OnEnabled;
@@ -59,6 +61,7 @@
* Processor inspired from RestLookupService nifi processor
*
*/
+@Category(ComponentCategory.DATASTORE)
@Tags({ "rest", "lookup", "json", "xml", "http" })
@CapabilityDescription("Use a REST service to look up values.")
@DynamicProperties({
diff --git a/logisland-components/logisland-services/logisland-service-solr/logisland-service-solr_5_5_5-client/src/main/java/com/hurence/logisland/service/solr/Solr_5_5_5_ClientService.java b/logisland-components/logisland-services/logisland-service-solr/logisland-service-solr_5_5_5-client/src/main/java/com/hurence/logisland/service/solr/Solr_5_5_5_ClientService.java
index 0ec2ef497..92ed67288 100644
--- a/logisland-components/logisland-services/logisland-service-solr/logisland-service-solr_5_5_5-client/src/main/java/com/hurence/logisland/service/solr/Solr_5_5_5_ClientService.java
+++ b/logisland-components/logisland-services/logisland-service-solr/logisland-service-solr_5_5_5-client/src/main/java/com/hurence/logisland/service/solr/Solr_5_5_5_ClientService.java
@@ -16,6 +16,8 @@
package com.hurence.logisland.service.solr;
import com.hurence.logisland.annotation.documentation.CapabilityDescription;
+import com.hurence.logisland.annotation.documentation.Category;
+import com.hurence.logisland.annotation.documentation.ComponentCategory;
import com.hurence.logisland.annotation.documentation.Tags;
import com.hurence.logisland.service.solr.api.SolrClientService;
import org.apache.solr.client.solrj.SolrClient;
@@ -23,6 +25,7 @@
import org.apache.solr.client.solrj.impl.HttpSolrClient;
import org.slf4j.LoggerFactory;
+@Category(ComponentCategory.DATASTORE)
@Tags({ "solr", "client"})
@CapabilityDescription("Implementation of ElasticsearchClientService for Solr 5.5.5.")
public class Solr_5_5_5_ClientService extends SolrClientService {
diff --git a/logisland-components/logisland-services/logisland-service-solr/logisland-service-solr_6_6_2-client/src/main/java/com/hurence/logisland/service/solr/Solr_6_6_2_ClientService.java b/logisland-components/logisland-services/logisland-service-solr/logisland-service-solr_6_6_2-client/src/main/java/com/hurence/logisland/service/solr/Solr_6_6_2_ClientService.java
index ebd3f6acf..6346ef4f6 100644
--- a/logisland-components/logisland-services/logisland-service-solr/logisland-service-solr_6_6_2-client/src/main/java/com/hurence/logisland/service/solr/Solr_6_6_2_ClientService.java
+++ b/logisland-components/logisland-services/logisland-service-solr/logisland-service-solr_6_6_2-client/src/main/java/com/hurence/logisland/service/solr/Solr_6_6_2_ClientService.java
@@ -16,6 +16,8 @@
package com.hurence.logisland.service.solr;
import com.hurence.logisland.annotation.documentation.CapabilityDescription;
+import com.hurence.logisland.annotation.documentation.Category;
+import com.hurence.logisland.annotation.documentation.ComponentCategory;
import com.hurence.logisland.annotation.documentation.Tags;
import com.hurence.logisland.service.datastore.DatastoreClientServiceException;
import com.hurence.logisland.service.solr.api.SolrClientService;
@@ -31,6 +33,7 @@
import java.io.IOException;
import java.util.*;
+@Category(ComponentCategory.DATASTORE)
@Tags({ "solr", "client"})
@CapabilityDescription("Implementation of ElasticsearchClientService for Solr 5.5.5.")
public class Solr_6_6_2_ClientService extends SolrClientService {
diff --git a/logisland-components/logisland-services/logisland-service-solr/logisland-service-solr_chronix_6_4_2-client/src/main/java/com/hurence/logisland/service/solr/Solr_6_4_2_ChronixClientService.java b/logisland-components/logisland-services/logisland-service-solr/logisland-service-solr_chronix_6_4_2-client/src/main/java/com/hurence/logisland/service/solr/Solr_6_4_2_ChronixClientService.java
index 1df25599b..44da5175c 100644
--- a/logisland-components/logisland-services/logisland-service-solr/logisland-service-solr_chronix_6_4_2-client/src/main/java/com/hurence/logisland/service/solr/Solr_6_4_2_ChronixClientService.java
+++ b/logisland-components/logisland-services/logisland-service-solr/logisland-service-solr_chronix_6_4_2-client/src/main/java/com/hurence/logisland/service/solr/Solr_6_4_2_ChronixClientService.java
@@ -16,6 +16,8 @@
package com.hurence.logisland.service.solr;
import com.hurence.logisland.annotation.documentation.CapabilityDescription;
+import com.hurence.logisland.annotation.documentation.Category;
+import com.hurence.logisland.annotation.documentation.ComponentCategory;
import com.hurence.logisland.annotation.documentation.Tags;
import com.hurence.logisland.annotation.lifecycle.OnEnabled;
import com.hurence.logisland.component.InitializationException;
@@ -47,6 +49,7 @@
import java.util.concurrent.Executors;
import java.util.stream.Collectors;
+@Category(ComponentCategory.DATASTORE)
@Tags({"solr", "client"})
@CapabilityDescription("Implementation of ChronixClientService for Solr 6 4 2")
public class Solr_6_4_2_ChronixClientService extends AbstractControllerService implements DatastoreClientService {
diff --git a/logisland-core/logisland-api/src/main/java/com/hurence/logisland/annotation/documentation/Category.java b/logisland-core/logisland-api/src/main/java/com/hurence/logisland/annotation/documentation/Category.java
new file mode 100644
index 000000000..440629b73
--- /dev/null
+++ b/logisland-core/logisland-api/src/main/java/com/hurence/logisland/annotation/documentation/Category.java
@@ -0,0 +1,37 @@
+/**
+ * Copyright (C) 2016 Hurence (support@hurence.com)
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.hurence.logisland.annotation.documentation;
+
+import com.hurence.logisland.controller.ControllerService;
+import com.hurence.logisland.processor.Processor;
+
+import java.lang.annotation.*;
+
+/**
+ * Annotation that can be applied to a {@link Processor} or {@link ControllerService} in order to
+ * associate category keyword with the component. This annotation do not affect the
+ * component in any way but serve as additional documentation and can be used to
+ * sort/filter Processors/Services.
+ *
+ */
+@Documented
+@Target({ElementType.TYPE})
+@Retention(RetentionPolicy.RUNTIME)
+@Inherited
+public @interface Category {
+
+ String value();
+}
diff --git a/logisland-core/logisland-api/src/main/java/com/hurence/logisland/annotation/documentation/ComponentCategory.java b/logisland-core/logisland-api/src/main/java/com/hurence/logisland/annotation/documentation/ComponentCategory.java
new file mode 100644
index 000000000..dca71251b
--- /dev/null
+++ b/logisland-core/logisland-api/src/main/java/com/hurence/logisland/annotation/documentation/ComponentCategory.java
@@ -0,0 +1,15 @@
+package com.hurence.logisland.annotation.documentation;
+
+public class ComponentCategory {
+
+ public static final String MISC = "misc";
+ public static final String UTILS = "utils";
+ public static final String PROCESSING = "processing";
+ public static final String PARSING = "parsing";
+ public static final String DATASTORE = "datastore";
+ public static final String ALERTING = "alerting";
+ public static final String SECURITY = "security";
+ public static final String ENRICHMENT = "enrichment";
+ public static final String ANALYTICS = "analytics";
+ public static final String TIMESERIES = "timeseries";
+}
diff --git a/logisland-documentation/components.rst b/logisland-documentation/components.rst
index c97247ca6..11dc8afb6 100644
--- a/logisland-documentation/components.rst
+++ b/logisland-documentation/components.rst
@@ -2875,7 +2875,7 @@ com.hurence.logisland:logisland-processor-web-analytics:1.1.2
Class
_____
-com.hurence.logisland.processor.webAnalytics.setSourceOfTraffic
+com.hurence.logisland.processor.webAnalytics.SetSourceOfTraffic
Tags
____
diff --git a/logisland-documentation/pom.xml b/logisland-documentation/pom.xml
index 4200962e2..b23ff5028 100644
--- a/logisland-documentation/pom.xml
+++ b/logisland-documentation/pom.xml
@@ -364,6 +364,7 @@ THIS MODULE DOCUMENTATION DEPENDENCIES
${generate-components-dir}
-f
other-processors
+ -a
@@ -460,6 +461,7 @@ THIS MODULE DOCUMENTATION DEPENDENCIES
${generate-components-dir}
-f
services
+ -a
diff --git a/logisland-documentation/src/main/java/com/hurence/logisland/documentation/DocGenerator.java b/logisland-documentation/src/main/java/com/hurence/logisland/documentation/DocGenerator.java
index d3ad7b169..8948d89ee 100644
--- a/logisland-documentation/src/main/java/com/hurence/logisland/documentation/DocGenerator.java
+++ b/logisland-documentation/src/main/java/com/hurence/logisland/documentation/DocGenerator.java
@@ -89,6 +89,7 @@ public static void main(String[] args) {
File rootDocDir = new File(dir);
DocGeneratorUtils.generate(rootDocDir, fileName, "rst", append);
+ DocGeneratorUtils.generate(rootDocDir, "components", "yaml", append);
}
private static void printUsage(Options options) {
diff --git a/logisland-documentation/src/main/java/com/hurence/logisland/documentation/DocGeneratorUtils.java b/logisland-documentation/src/main/java/com/hurence/logisland/documentation/DocGeneratorUtils.java
index 47761ad2f..328202203 100644
--- a/logisland-documentation/src/main/java/com/hurence/logisland/documentation/DocGeneratorUtils.java
+++ b/logisland-documentation/src/main/java/com/hurence/logisland/documentation/DocGeneratorUtils.java
@@ -15,6 +15,8 @@
*/
package com.hurence.logisland.documentation;
+import com.hurence.logisland.annotation.documentation.Category;
+import com.hurence.logisland.annotation.documentation.ComponentCategory;
import com.hurence.logisland.classloading.PluginLoader;
import com.hurence.logisland.classloading.PluginProxy;
import com.hurence.logisland.component.ComponentContext;
@@ -25,6 +27,7 @@
import com.hurence.logisland.documentation.json.JsonDocumentationWriter;
import com.hurence.logisland.documentation.rst.RstDocumentationWriter;
import com.hurence.logisland.documentation.util.ClassFinder;
+import com.hurence.logisland.documentation.yaml.YamlDocumentationWriter;
import com.hurence.logisland.engine.ProcessingEngine;
import com.hurence.logisland.processor.Processor;
import com.hurence.logisland.stream.RecordStream;
@@ -37,7 +40,7 @@
import java.util.Comparator;
import java.util.Map;
import java.util.TreeMap;
-import java.util.stream.IntStream;
+import java.util.stream.Collectors;
/**
* Uses the ExtensionManager to get a list of Processor, ControllerService, and
@@ -46,6 +49,7 @@
public class DocGeneratorUtils {
private static final Logger logger = LoggerFactory.getLogger(DocGeneratorUtils.class);
+
/**
* Generates documentation into the work/docs dir specified from a specified set of class
*/
@@ -127,6 +131,31 @@ public static void generate(final File docsDirectory,
logger.warn(e.getMessage());
throw new RuntimeException(e);//so we know there is something wrong with doc generation
}
+ } else if (writerType.equals("yaml")) {
+ final File baseDocumenationFile = new File(docsDirectory, filename + "." + writerType);
+ if (baseDocumenationFile.exists() && !append)
+ baseDocumenationFile.delete();
+ if (!baseDocumenationFile.exists()) {
+
+ try (final PrintWriter writer = new PrintWriter(new FileOutputStream(baseDocumenationFile, true))) {
+ writer.println(
+ "--- # document start\n" +
+ "\n" +
+ "categories:\n" +
+ " - processing\n" +
+ " - parsing\n" +
+ " - datastore\n" +
+ " - alerting\n" +
+ " - security\n" +
+ " - enrichment\n" +
+ " - analytics\n" +
+ " - timeseries\n" +
+ "extensions:");
+ } catch (FileNotFoundException e) {
+ logger.warn(e.getMessage());
+ throw new RuntimeException(e);//so we know there is something wrong with doc generation
+ }
+ }
}
Class[] sortedExtensionsClasses = new Class[extensionClasses.size()];
@@ -153,20 +182,56 @@ public int compare(Class s1, Class s2) {
}
});
+
logger.info("Generating {} documentation for {} components in: {}",
writerType,
Arrays.stream(sortedExtensionsClasses).count(),
docsDirectory);
Arrays.stream(sortedExtensionsClasses)
- .forEach(extensionClass -> {
- final Class componentClass = extensionClass.asSubclass(ConfigurableComponent.class);
- try {
- document(docsDirectory, componentClass, writerType, filename);
- } catch (Exception e) {
- logger.error("Unexpected error for " + extensionClass, e);
- throw new RuntimeException(e);//so we know there is something wrong with doc generation
- }
+ .collect(Collectors.groupingBy(configurableComponent -> {
+ final Category categoryAnnot = (Category) configurableComponent.asSubclass(ConfigurableComponent.class).getAnnotation(
+ Category.class);
+
+ final String category;
+ if (categoryAnnot != null) {
+ category = categoryAnnot.value();
+ } else {
+ category = ComponentCategory.MISC;
+ }
+
+ return category;
+ }
+ ))
+ .entrySet()
+ .forEach(entry -> {
+
+ /* String category = entry.getKey();
+ if (writerType.equals("yaml")) {
+
+ final File baseDocumenationFile = new File(docsDirectory, filename + "." + writerType);
+ try (final PrintWriter writer = new PrintWriter(new FileOutputStream(baseDocumenationFile, true))) {
+ writer.println(
+ " - category: " + category + "\n" +
+ " extensions:\n");
+ } catch (FileNotFoundException e) {
+ logger.warn(e.getMessage());
+ throw new RuntimeException(e);//so we know there is something wrong with doc generation
+ }
+
+
+ }*/
+
+ entry.getValue().forEach(extensionClass -> {
+
+ final Class componentClass = extensionClass.asSubclass(ConfigurableComponent.class);
+ try {
+ document(docsDirectory, componentClass, writerType, filename);
+ } catch (Exception e) {
+ logger.error("Unexpected error for " + extensionClass, e);
+ throw new RuntimeException(e);//so we know there is something wrong with doc generation
+ }
+ });
});
@@ -218,7 +283,7 @@ private static void document(final File docsDir,
final DocumentationWriter writer = getDocumentWriter(componentClass, writerType);
- final File baseDocumenationFile = new File(docsDir,filename + "." + writerType);
+ final File baseDocumenationFile = new File(docsDir, filename + "." + writerType);
try (final OutputStream output = new BufferedOutputStream(new FileOutputStream(baseDocumenationFile, true))) {
writer.write(component, output);
@@ -260,6 +325,8 @@ private static DocumentationWriter getDocumentWriter(final Class extends Confi
return new RstDocumentationWriter();
case "json":
return new JsonDocumentationWriter();
+ case "yaml":
+ return new YamlDocumentationWriter();
default:
return null;
}
diff --git a/logisland-documentation/src/main/java/com/hurence/logisland/documentation/yaml/YamlDocumentationWriter.java b/logisland-documentation/src/main/java/com/hurence/logisland/documentation/yaml/YamlDocumentationWriter.java
new file mode 100644
index 000000000..aeb33b1fd
--- /dev/null
+++ b/logisland-documentation/src/main/java/com/hurence/logisland/documentation/yaml/YamlDocumentationWriter.java
@@ -0,0 +1,392 @@
+/**
+ * Copyright (C) 2016 Hurence (support@hurence.com)
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.hurence.logisland.documentation.yaml;
+
+import com.hurence.logisland.annotation.behavior.DynamicProperties;
+import com.hurence.logisland.annotation.behavior.DynamicProperty;
+import com.hurence.logisland.annotation.documentation.*;
+import com.hurence.logisland.classloading.PluginClassLoader;
+import com.hurence.logisland.classloading.PluginLoader;
+import com.hurence.logisland.component.AllowableValue;
+import com.hurence.logisland.component.ConfigurableComponent;
+import com.hurence.logisland.component.PropertyDescriptor;
+import com.hurence.logisland.documentation.DocumentationWriter;
+
+import java.io.OutputStream;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+
+
+/**
+ * Generates reStructuredText documentation for a ConfigurableComponent.
+ *
+ * http://docutils.sourceforge.net/docs/ref/rst/restructuredtext.html
+ * http://docutils.sourceforge.net/docs/ref/rst/directives.html
+ */
+public class YamlDocumentationWriter implements DocumentationWriter {
+
+ /**
+ * The filename where additional user specified information may be stored.
+ */
+ public static final String ADDITIONAL_DETAILS_RST = "additionalDetails.rst";
+
+ @Override
+ public void write(final ConfigurableComponent configurableComponent, final OutputStream streamToWriteTo) {
+
+ final YamlPrintWriter yamlPrintWriter = new YamlPrintWriter(streamToWriteTo, true);
+
+
+ writeDescription(configurableComponent, yamlPrintWriter);
+ writeTags(configurableComponent, yamlPrintWriter);
+ /* writeProperties(configurableComponent, yamlPrintWriter);
+ writeDynamicProperties(configurableComponent, yamlPrintWriter);
+ writeAdditionalBodyInfo(configurableComponent, yamlPrintWriter);
+ writeSeeAlso(configurableComponent, yamlPrintWriter);*/
+
+ yamlPrintWriter.close();
+ }
+
+
+
+ /**
+ * Gets the class name of the component.
+ *
+ * @param configurableComponent the component to describe
+ * @return the class name of the component
+ */
+ protected String getTitle(final ConfigurableComponent configurableComponent) {
+ return configurableComponent.getClass().getSimpleName();
+ }
+
+
+
+
+
+
+ private void writeTags(final ConfigurableComponent configurableComponent,
+ final YamlPrintWriter rstWriter) {
+ final Tags tags = configurableComponent.getClass().getAnnotation(Tags.class);
+
+ if (tags != null) {
+ final String tagString = "[" + join(tags.value(), ", ") + "]";
+ rstWriter.writeProperty(2, "tags",tagString);
+ }
+ }
+
+ static String join(final String[] toJoin, final String delimiter) {
+ final StringBuilder sb = new StringBuilder();
+ for (int i = 0; i < toJoin.length; i++) {
+ sb.append(toJoin[i]);
+ if (i < toJoin.length - 1) {
+ sb.append(delimiter);
+ }
+ }
+ return sb.toString();
+ }
+
+ /**
+ * Writes a description of the configurable component.
+ *
+ * @param configurableComponent the component to describe
+ * @param rstWriter the stream writer
+ */
+ protected void writeDescription(final ConfigurableComponent configurableComponent,
+ final YamlPrintWriter rstWriter) {
+ rstWriter.writeProperty(1, "- name", getTitle(configurableComponent));
+
+ rstWriter.writeProperty(2, "description", getDescription(configurableComponent));
+ rstWriter.writeProperty(2, "category", getCategory(configurableComponent));
+
+ PluginClassLoader cl = (PluginClassLoader) PluginLoader.getRegistry().get(configurableComponent.getClass().getCanonicalName());
+ if (cl != null) {
+ rstWriter.writeProperty(2,"module", cl.getModuleInfo().getArtifact());
+ }
+
+ rstWriter.writeProperty(2, "class", configurableComponent.getClass().getCanonicalName());
+ }
+
+
+ /**
+ * Gets a description of the ConfigurableComponent using the
+ * CapabilityDescription annotation.
+ *
+ * @param configurableComponent the component to describe
+ * @return a description of the configurableComponent
+ */
+ protected String getDescription(final ConfigurableComponent configurableComponent) {
+ final CapabilityDescription capabilityDescription = configurableComponent.getClass().getAnnotation(
+ CapabilityDescription.class);
+
+ final String description;
+ if (capabilityDescription != null) {
+ description = capabilityDescription.value();
+ } else {
+ description = "No description provided.";
+ }
+
+ return description;
+ }
+
+ /**
+ * Gets a description of the ConfigurableComponent using the
+ * CapabilityDescription annotation.
+ *
+ * @param configurableComponent the component to describe
+ * @return a description of the configurableComponent
+ */
+ protected String getCategory(final ConfigurableComponent configurableComponent) {
+ final Category categoryAnnot = configurableComponent.getClass().getAnnotation(
+ Category.class);
+
+ final String category;
+ if (categoryAnnot != null) {
+ category = categoryAnnot.value();
+ } else {
+ category = ComponentCategory.MISC;
+ }
+
+ return category;
+ }
+
+ /**
+ * Writes the PropertyDescriptors out as a table.
+ *
+ * @param configurableComponent the component to describe
+ * @param rstWriter the stream writer
+ */
+ protected void writeProperties(final ConfigurableComponent configurableComponent,
+ final YamlPrintWriter rstWriter) {
+
+ /* final List properties = configurableComponent.getPropertyDescriptors();
+ rstWriter.writeSectionTitle(3, "Properties");
+
+ if (properties.size() > 0) {
+ final boolean containsExpressionLanguage = containsExpressionLanguage(configurableComponent);
+ final boolean containsSensitiveProperties = containsSensitiveProperties(configurableComponent);
+ rstWriter.print("In the list below, the names of required properties appear in ");
+ rstWriter.printStrong("bold");
+ rstWriter.print(". Any other properties (not in bold) are considered optional. " +
+ "The table also indicates any default values");
+ if (containsExpressionLanguage) {
+ if (!containsSensitiveProperties) {
+ rstWriter.print(", and ");
+ } else {
+ rstWriter.print(", ");
+ }
+ rstWriter.print("whether a property supports the ");
+ rstWriter.writeLink("Expression Language", "expression-language.html");
+ }
+ if (containsSensitiveProperties) {
+ rstWriter.print(", and whether a property is considered \"sensitive\".");
+// ", meaning that its value will be encrypted. Before entering a "
+// + "value in a sensitive property, ensure that the ");
+
+// rstWriter.printStrong("logisland.properties");
+// rstWriter.print(" file has " + "an entry for the property ");
+// rstWriter.printStrong("logisland.sensitive.props.key");
+ }
+ rstWriter.println(".");
+
+ rstWriter.printCsvTable("allowable-values",
+ new String[]{"Name", "Description", "Allowable Values", "Default Value", "Sensitive", "EL"},
+ new int[]{20, 60, 30, 20, 10, 10},
+ '\\');
+
+
+ // write the individual properties
+ for (PropertyDescriptor property : properties) {
+
+ rstWriter.print(" \"");
+ if (property.isRequired()) {
+ rstWriter.printStrong(property.getName().replace("\"", "\\\""));
+ } else {
+ rstWriter.print(property.getName().replace("\"", "\\\""));
+ }
+ rstWriter.print("\", ");
+
+ rstWriter.print("\"");
+ if (property.getDescription() != null && property.getDescription().trim().length() > 0) {
+ rstWriter.print(property.getDescription().replace("\n", "\n\n ").replace("\"", "\\\""));
+ } else {
+ rstWriter.print("No Description Provided.");
+ }
+ rstWriter.print("\", ");
+
+ rstWriter.print("\"");
+ writeValidValues(rstWriter, property);
+ rstWriter.print("\", ");
+
+
+ rstWriter.print("\"");
+ rstWriter.print(property.getDefaultValue() == null ? null : property.getDefaultValue().replace("\"", "\\\""));
+ rstWriter.print("\", ");
+
+
+ rstWriter.print("\"");
+ if (property.isSensitive()) {
+ rstWriter.printStrong("true");
+ } else {
+ rstWriter.print("false");
+ }
+ rstWriter.print("\", ");
+
+
+ rstWriter.print("\"");
+ if (property.isExpressionLanguageSupported()) {
+ rstWriter.printStrong("true");
+ } else {
+ rstWriter.print("false");
+ }
+
+
+ rstWriter.println("\"");
+
+ }
+
+
+ } else {
+ rstWriter.println("This component has no required or optional properties.");
+ }*/
+ }
+
+ /**
+ * Indicates whether or not the component contains at least one sensitive property.
+ *
+ * @param component the component to interogate
+ * @return whether or not the component contains at least one sensitive property.
+ */
+ private boolean containsSensitiveProperties(final ConfigurableComponent component) {
+ for (PropertyDescriptor descriptor : component.getPropertyDescriptors()) {
+ if (descriptor.isSensitive()) {
+ return true;
+ }
+ }
+ return false;
+ }
+
+ /**
+ * Indicates whether or not the component contains at least one property that supports Expression Language.
+ *
+ * @param component the component to interogate
+ * @return whether or not the component contains at least one sensitive property.
+ */
+ private boolean containsExpressionLanguage(final ConfigurableComponent component) {
+ for (PropertyDescriptor descriptor : component.getPropertyDescriptors()) {
+ if (descriptor.isExpressionLanguageSupported()) {
+ return true;
+ }
+ }
+ return false;
+ }
+
+ private void writeDynamicProperties(final ConfigurableComponent configurableComponent,
+ final YamlPrintWriter rstWriter) {
+
+ /* final List dynamicProperties = getDynamicProperties(configurableComponent);
+
+ if (dynamicProperties != null && dynamicProperties.size() > 0) {
+ rstWriter.writeSectionTitle(3, "Dynamic Properties");
+ rstWriter.println("Dynamic Properties allow the user to specify both the name and value of a property.");
+ rstWriter.printCsvTable("dynamic-properties",
+ new String[]{"Name", "Value", "Description", "Allowable Values", "Default Value", "EL"},
+ new int[]{20, 20, 40, 40, 20, 10},
+ '\\');
+
+ for (final DynamicProperty dynamicProperty : dynamicProperties) {
+
+ rstWriter.print(" \"");
+ rstWriter.print(dynamicProperty.name().replace("\"", "\\\""));
+ rstWriter.print("\", ");
+
+ rstWriter.print("\"");
+ rstWriter.print(dynamicProperty.value().replace("\"", "\\\""));
+ rstWriter.print("\", ");
+
+ rstWriter.print("\"");
+ rstWriter.print(dynamicProperty.description().replace("\"", "\\\""));
+ rstWriter.print("\", ");
+
+ final PropertyDescriptor descriptorExample = configurableComponent.getPropertyDescriptor(dynamicProperty.nameForDoc());
+
+ rstWriter.print("\"");
+ writeValidValues(rstWriter, descriptorExample);
+ rstWriter.print("\", ");
+
+ rstWriter.print("\"");
+ rstWriter.print(descriptorExample.getDefaultValue() == null ? null : descriptorExample.getDefaultValue().replace("\"", "\\\""));
+ rstWriter.print("\", ");
+
+ if (dynamicProperty.supportsExpressionLanguage()) {
+ rstWriter.printStrong("true");
+ } else
+ rstWriter.print("false");
+ rstWriter.println();
+ }
+
+ }*/
+ }
+
+ private List getDynamicProperties(ConfigurableComponent configurableComponent) {
+ final List dynamicProperties = new ArrayList<>();
+ final DynamicProperties dynProps = configurableComponent.getClass().getAnnotation(DynamicProperties.class);
+ if (dynProps != null) {
+ Collections.addAll(dynamicProperties, dynProps.value());
+ }
+
+ final DynamicProperty dynProp = configurableComponent.getClass().getAnnotation(DynamicProperty.class);
+ if (dynProp != null) {
+ dynamicProperties.add(dynProp);
+ }
+
+ return dynamicProperties;
+ }
+
+ private void writeValidValueDescription(YamlPrintWriter rstWriter, String description) {
+ rstWriter.print(description);
+// rstWriter.writeImage("_static/iconInfo.png", description, null, null, null, null);
+ }
+
+ /**
+ * Interrogates a PropertyDescriptor to get a list of AllowableValues, if
+ * there are none, nothing is written to the stream.
+ *
+ * @param rstWriter the stream writer to use
+ * @param property the property to describe
+ */
+ protected void writeValidValues(YamlPrintWriter rstWriter, PropertyDescriptor property) {
+ if (property.getAllowableValues() != null && property.getAllowableValues().size() > 0) {
+
+ boolean first = true;
+ for (AllowableValue value : property.getAllowableValues()) {
+ if (!first) {
+ rstWriter.print(", ");
+ } else {
+ first = false;
+ }
+ rstWriter.print(value.getValue() == null ? null : value.getValue().replace("\"", "\\\""));
+// rstWriter.print(value.getDisplayName());
+ if (value.getDescription() != null) {
+ writeValidValueDescription(rstWriter, " (" + value.getDescription().replace("\"", "\\\"") + ")");
+ }
+ }
+ }
+ }
+
+
+
+
+}
diff --git a/logisland-documentation/src/main/java/com/hurence/logisland/documentation/yaml/YamlPrintWriter.java b/logisland-documentation/src/main/java/com/hurence/logisland/documentation/yaml/YamlPrintWriter.java
new file mode 100644
index 000000000..0518a8835
--- /dev/null
+++ b/logisland-documentation/src/main/java/com/hurence/logisland/documentation/yaml/YamlPrintWriter.java
@@ -0,0 +1,100 @@
+/**
+ * Copyright (C) 2016 Hurence (support@hurence.com)
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.hurence.logisland.documentation.yaml;
+
+import java.io.OutputStream;
+import java.io.PrintWriter;
+import java.util.Arrays;
+
+public class YamlPrintWriter extends PrintWriter {
+
+ private static final Character[] SECTION_DELIMITERS = new Character[]{'=', '-', '_', '.', ':', '`', '\'', '\"', '~', '^', '*', '+', '#'};
+
+ public YamlPrintWriter(OutputStream out, boolean autoFlush) {
+ super(out, autoFlush);
+ }
+
+
+ public void printDescriptionString(String descriptionString) {
+ println(descriptionString);
+ }
+
+
+ public void writeProperty(final int sectionLevel, final String key, final String value) {
+ assert key != null;
+ assert !key.isEmpty();
+
+ char[] charArray = new char[2 * sectionLevel];
+ Arrays.fill(charArray, ' ');
+ String delimiter = ": ";
+
+ print(charArray);
+ print(key);
+ print(delimiter);
+
+ if (value.contains("\n")) {
+ String[] desc = value.split("\n");
+ println(" >");
+ for (String line : desc) {
+ print(charArray);
+ print(" ");
+ println(line);
+ }
+ } else {
+ println(value);
+ }
+
+
+ }
+
+
+ public void printCsvTable(final String title, final String[] headers, final int[] widths, final Character escape) {
+ println();
+ print(".. csv-table:: ");
+ println(title);
+
+
+ if (headers != null) {
+ StringBuilder strHeaders = new StringBuilder();
+ for (int i = 0; i < headers.length; i++) {
+ strHeaders.append('"');
+ strHeaders.append(headers[i]);
+ strHeaders.append('"');
+ if (i < headers.length - 1)
+ strHeaders.append(',');
+ }
+ print(" :header: ");
+ println(strHeaders.toString());
+ }
+
+ if (widths != null) {
+ StringBuilder strWidths = new StringBuilder();
+ for (int i = 0; i < widths.length; i++) {
+ strWidths.append(widths[i]);
+ if (i < widths.length - 1)
+ strWidths.append(',');
+ }
+ print(" :widths: ");
+ println(strWidths.toString());
+ }
+ if (escape != null) {
+ print(" :escape: ");
+ println(escape);
+ }
+
+ println();
+ }
+}
diff --git a/logisland-documentation/user/components/common-processors.rst b/logisland-documentation/user/components/common-processors.rst
index db2e37c66..407ded30a 100644
--- a/logisland-documentation/user/components/common-processors.rst
+++ b/logisland-documentation/user/components/common-processors.rst
@@ -9,147 +9,17 @@ Find below the list.
----------
-.. _com.hurence.logisland.processor.AddFields:
-
-AddFields
----------
-Add one or more field to records
-
-Module
-______
-com.hurence.logisland:logisland-processor-common:1.1.2
-
-Class
-_____
-com.hurence.logisland.processor.AddFields
-
-Tags
-____
-record, fields, Add
-
-Properties
-__________
-In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
-
-.. csv-table:: allowable-values
- :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
- :widths: 20,60,30,20,10,10
- :escape: \
-
- "conflict.resolution.policy", "What to do when a field with the same name already exists ?", "overwrite_existing (if field already exist), keep_only_old_field (keep only old field)", "keep_only_old_field", "false", "false"
-
-Dynamic Properties
-__________________
-Dynamic Properties allow the user to specify both the name and value of a property.
-
-.. csv-table:: dynamic-properties
- :header: "Name","Value","Description","Allowable Values","Default Value","EL"
- :widths: 20,20,40,40,20,10
- :escape: \
-
- "Name of the field to add", "Value of the field to add", "Add a field to the record with the specified value. Expression language can be used.You can not add a field that end with '.type' as this suffix is used to specify the type of fields to add", "", "null", **true**
- "Name of the field to add with the suffix '.field.type'", "Type of the field to add", "Add a field to the record with the specified type. These properties are only used if a correspondant property without the suffix '.field.type' is already defined. If this property is not defined, default type for adding fields is String.You can only use Logisland predefined type fields.", "NULL, STRING, INT, LONG, ARRAY, FLOAT, DOUBLE, BYTES, RECORD, MAP, ENUM, BOOLEAN, UNION, DATETIME", "STRING", false
- "Name of the field to add with the suffix '.field.name'", "Name of the field to add using expression language", "Add a field to the record with the specified name (which is evaluated using expression language). These properties are only used if a correspondant property without the suffix '.field.name' is already defined. If this property is not defined, the name of the field to add is the key of the first dynamic property (which is the main and only required dynamic property).", "", "null", **true**
-
-Extra informations
-__________________
-.. include:: ./details/common-processors/AddFields-Detail.rst
-----------
-
-.. _com.hurence.logisland.processor.ApplyRegexp:
+.. _com.hurence.logisland.processor.alerting.ComputeTags:
-ApplyRegexp
+ComputeTags
-----------
-This processor is used to create a new set of fields from one field (using regexp).
-
-Module
-______
-com.hurence.logisland:logisland-processor-common:1.1.2
-
-Class
-_____
-com.hurence.logisland.processor.ApplyRegexp
-
-Tags
-____
-parser, regex, log, record
-
-Properties
-__________
-In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
-
-.. csv-table:: allowable-values
- :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
- :widths: 20,60,30,20,10,10
- :escape: \
-
- "conflict.resolution.policy", "What to do when a field with the same name already exists ?", "overwrite_existing (if field already exist), keep_only_old_field (keep only old field)", "keep_only_old_field", "false", "false"
-
-Dynamic Properties
-__________________
-Dynamic Properties allow the user to specify both the name and value of a property.
-
-.. csv-table:: dynamic-properties
- :header: "Name","Value","Description","Allowable Values","Default Value","EL"
- :widths: 20,20,40,40,20,10
- :escape: \
-
- "alternative regex & mapping", "another regex that could match", "This processor is used to create a new set of fields from one field (using regexp).", "", "null", **true**
-
-Extra informations
-__________________
-.. include:: ./details/common-processors/ApplyRegexp-Detail.rst
-See Also:
-_________
-`com.hurence.logisland.processor.ApplyRegexp`_
-
-----------
-
-.. _com.hurence.logisland.processor.datastore.BulkPut:
-
-BulkPut
--------
-Indexes the content of a Record in a Datastore using bulk processor
-
-Module
-______
-com.hurence.logisland:logisland-processor-common:1.1.2
-
-Class
-_____
-com.hurence.logisland.processor.datastore.BulkPut
-
-Tags
-____
-datastore, record, put, bulk
-
-Properties
-__________
-In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values, and whether a property supports the `Expression Language `_ .
-
-.. csv-table:: allowable-values
- :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
- :widths: 20,60,30,20,10,10
- :escape: \
-
- "**datastore.client.service**", "The instance of the Controller Service to use for accessing datastore.", "", "null", "false", "false"
- "**default.collection**", "The name of the collection/index/table to insert into", "", "null", "false", "**true**"
- "default.type", "The type of this document (required by Elasticsearch for indexing and searching)", "", "null", "false", "**true**"
- "**timebased.collection**", "do we add a date suffix", "no (no date added to default index), today (today's date added to default index), yesterday (yesterday's date added to default index)", "no", "false", "false"
- "date.format", "simple date format for date suffix. default : yyyy.MM.dd", "", "yyyy.MM.dd", "false", "false"
- "collection.field", "the name of the event field containing es index name => will override index value if set", "", "null", "false", "**true**"
- "type.field", "the name of the event field containing es doc type => will override type value if set", "", "null", "false", "false"
-
-Extra informations
-__________________
-.. include:: ./details/common-processors/BulkPut-Detail.rst
-----------
+Compute tag cross from given formulas.
-.. _com.hurence.logisland.processor.alerting.CheckAlerts:
+- each dynamic property will return a new record according to the formula definition
+- the record name will be set to the property name
+- the record time will be set to the current timestamp
-CheckAlerts
------------
-Add one or more records representing alerts. Using a datastore.
+a threshold_cross has the following properties : count, sum, avg, time, duration, value
Module
______
@@ -157,11 +27,11 @@ com.hurence.logisland:logisland-processor-common:1.1.2
Class
_____
-com.hurence.logisland.processor.alerting.CheckAlerts
+com.hurence.logisland.processor.alerting.ComputeTags
Tags
____
-record, alerting, thresholds, opc, tag
+record, fields, Add
Properties
__________
@@ -276,8 +146,6 @@ In the list below, the names of required properties appear in **bold**. Any othe
"datastore.cache.collection", "The collection where to find cached objects", "", "test", "false", "false"
"js.cache.service", "The cache service to be used to store already sanitized JS expressions. If not specified a in-memory unlimited hash map will be used.", "", "null", "false", "false"
"output.record.type", "the type of the output record", "", "event", "false", "false"
- "profile.activation.condition", "A javascript expression that activates this alerting profile when true", "", "0==0", "false", "false"
- "alert.criticity", "from 0 to ...", "", "0", "false", "false"
Dynamic Properties
__________________
@@ -292,19 +160,24 @@ Dynamic Properties allow the user to specify both the name and value of a proper
Extra informations
__________________
-.. include:: ./details/common-processors/CheckAlerts-Detail.rst
+.. include:: ./details/common-processors/ComputeTags-Detail.rst
----------
-.. _com.hurence.logisland.processor.alerting.CheckThresholds:
+.. _com.hurence.logisland.processor.datastore.EnrichRecords:
-CheckThresholds
----------------
-Compute threshold cross from given formulas.
+EnrichRecords
+-------------
+Enrich input records with content indexed in datastore using multiget queries.
+Each incoming record must be possibly enriched with information stored in datastore.
+The plugin properties are :
-- each dynamic property will return a new record according to the formula definition
-- the record name will be set to the property name
-- the record time will be set to the current timestamp
+- es.index (String) : Name of the datastore index on which the multiget query will be performed. This field is mandatory and should not be empty, otherwise an error output record is sent for this specific incoming record.
+- record.key (String) : Name of the field in the input record containing the id to lookup document in elastic search. This field is mandatory.
+- es.key (String) : Name of the datastore key on which the multiget query will be performed. This field is mandatory.
+- includes (ArrayList) : List of patterns to filter in (include) fields to retrieve. Supports wildcards. This field is not mandatory.
+- excludes (ArrayList) : List of patterns to filter out (exclude) fields to retrieve. Supports wildcards. This field is not mandatory.
+Each outcoming record holds at least the input record plus potentially one or more fields coming from of one datastore document.
Module
______
@@ -312,127 +185,191 @@ com.hurence.logisland:logisland-processor-common:1.1.2
Class
_____
-com.hurence.logisland.processor.alerting.CheckThresholds
+com.hurence.logisland.processor.datastore.EnrichRecords
Tags
____
-record, threshold, tag, alerting
+datastore, enricher
Properties
__________
-In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values, and whether a property supports the `Expression Language `_ .
.. csv-table:: allowable-values
:header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
:widths: 20,60,30,20,10,10
:escape: \
- "max.cpu.time", "maximum CPU time in milliseconds allowed for script execution.", "", "100", "false", "false"
- "max.memory", "maximum memory in Bytes which JS executor thread can allocate", "", "51200", "false", "false"
- "allow.no.brace", "Force, to check if all blocks are enclosed with curly braces \"\"{}\"\".
-
- .. raw:: html
-
-
+ "**datastore.client.service**", "The instance of the Controller Service to use for accessing datastore.", "", "null", "false", "false"
+ "record.key", "The name of field in the input record containing the document id to use in ES multiget query", "", "null", "false", "**true**"
+ "includes.field", "The name of the ES fields to include in the record.", "", "*", "false", "**true**"
+ "excludes.field", "The name of the ES fields to exclude.", "", "N/A", "false", "false"
+ "type.name", "The typle of record to look for", "", "null", "false", "**true**"
+ "collection.name", "The name of the collection to look for", "", "null", "false", "**true**"
- Explanation: all loops (for, do-while, while, and if-else, and functions
+Extra informations
+__________________
+.. include:: ./details/common-processors/EnrichRecords-Detail.rst
+----------
- should use braces, because poison_pill() function will be inserted after
+.. _com.hurence.logisland.processor.DebugStream:
- each open brace \"\"{\"\", to ensure interruption checking. Otherwise simple
+DebugStream
+-----------
+This is a processor that logs incoming records
- code like:
+Module
+______
+com.hurence.logisland:logisland-processor-common:1.1.2
-
+Class
+_____
+com.hurence.logisland.processor.DebugStream
- while(true) while(true) {
+Tags
+____
+record, debug
- // do nothing
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
- }
+.. csv-table:: allowable-values
+ :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+ :widths: 20,60,30,20,10,10
+ :escape: \
-
+ "**event.serializer**", "the way to serialize event", "json (serialize events as json blocs), string (serialize events as toString() blocs)", "json", "false", "false"
- or even:
+Extra informations
+__________________
+.. include:: ./details/common-processors/DebugStream-Detail.rst
+----------
-
+.. _com.hurence.logisland.processor.datastore.BulkPut:
- while(true)
+BulkPut
+-------
+Indexes the content of a Record in a Datastore using bulk processor
-
+Module
+______
+com.hurence.logisland:logisland-processor-common:1.1.2
- cause unbreakable loop, which force this sandbox to use {@link Thread#stop()}
+Class
+_____
+com.hurence.logisland.processor.datastore.BulkPut
- which make JVM unstable.
+Tags
+____
+datastore, record, put, bulk
-
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values, and whether a property supports the `Expression Language `_ .
-
+.. csv-table:: allowable-values
+ :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+ :widths: 20,60,30,20,10,10
+ :escape: \
- Properly writen code (even in bad intention) like:
-
-
-
- while(true) { while(true) {
-
- // do nothing
-
- }}
-
-
-
- will be changed into:
+ "**datastore.client.service**", "The instance of the Controller Service to use for accessing datastore.", "", "null", "false", "false"
+ "**default.collection**", "The name of the collection/index/table to insert into", "", "null", "false", "**true**"
+ "default.type", "The type of this document (required by Elasticsearch for indexing and searching)", "", "null", "false", "**true**"
+ "**timebased.collection**", "do we add a date suffix", "no (no date added to default index), today (today's date added to default index), yesterday (yesterday's date added to default index)", "no", "false", "false"
+ "date.format", "simple date format for date suffix. default : yyyy.MM.dd", "", "yyyy.MM.dd", "false", "false"
+ "collection.field", "the name of the event field containing es index name => will override index value if set", "", "null", "false", "**true**"
+ "type.field", "the name of the event field containing es doc type => will override type value if set", "", "null", "false", "false"
-
+Extra informations
+__________________
+.. include:: ./details/common-processors/BulkPut-Detail.rst
+----------
- while(true) {poison_pill();
+.. _com.hurence.logisland.processor.datastore.MultiGet:
- while(true) {poison_pill();
+MultiGet
+--------
+Retrieves a content from datastore using datastore multiget queries.
+Each incoming record contains information regarding the datastore multiget query that will be performed. This information is stored in record fields whose names are configured in the plugin properties (see below) :
- // do nothing
+ - collection (String) : name of the datastore collection on which the multiget query will be performed. This field is mandatory and should not be empty, otherwise an error output record is sent for this specific incoming record.
+ - type (String) : name of the datastore type on which the multiget query will be performed. This field is not mandatory.
+ - ids (String) : comma separated list of document ids to fetch. This field is mandatory and should not be empty, otherwise an error output record is sent for this specific incoming record.
+ - includes (String) : comma separated list of patterns to filter in (include) fields to retrieve. Supports wildcards. This field is not mandatory.
+ - excludes (String) : comma separated list of patterns to filter out (exclude) fields to retrieve. Supports wildcards. This field is not mandatory.
- }
+Each outcoming record holds data of one datastore retrieved document. This data is stored in these fields :
- }
+ - collection (same field name as the incoming record) : name of the datastore collection.
+ - type (same field name as the incoming record) : name of the datastore type.
+ - id (same field name as the incoming record) : retrieved document id.
+ - a list of String fields containing :
-
+ - field name : the retrieved field name
+ - field value : the retrieved field value
- which finish nicely when interrupted.
+Module
+______
+com.hurence.logisland:logisland-processor-common:1.1.2
-
+Class
+_____
+com.hurence.logisland.processor.datastore.MultiGet
- For legacy code, this check can be turned off, but with no guarantee, the
+Tags
+____
+datastore, get, multiget
- JS thread will gracefully finish when interrupted.
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
-
", "", "false", "false", "false"
- "max.prepared.statements", "The size of prepared statements LRU cache. If 0, this is disabled.
+.. csv-table:: allowable-values
+ :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+ :widths: 20,60,30,20,10,10
+ :escape: \
- .. raw:: html
+ "**datastore.client.service**", "The instance of the Controller Service to use for accessing datastore.", "", "null", "false", "false"
+ "**collection.field**", "the name of the incoming records field containing es collection name to use in multiget query. ", "", "null", "false", "false"
+ "**type.field**", "the name of the incoming records field containing es type name to use in multiget query", "", "null", "false", "false"
+ "**ids.field**", "the name of the incoming records field containing es document Ids to use in multiget query", "", "null", "false", "false"
+ "**includes.field**", "the name of the incoming records field containing es includes to use in multiget query", "", "null", "false", "false"
+ "**excludes.field**", "the name of the incoming records field containing es excludes to use in multiget query", "", "null", "false", "false"
-
+Extra informations
+__________________
+.. include:: ./details/common-processors/MultiGet-Detail.rst
+----------
- Each statements when setMaxCPUTime(long) is set is prepared to
+.. _com.hurence.logisland.processor.AddFields:
- quit itself when time exceeded. To execute only once this procedure per
+AddFields
+---------
+Add one or more field to records
- statement set this value.
+Module
+______
+com.hurence.logisland:logisland-processor-common:1.1.2
-
+Class
+_____
+com.hurence.logisland.processor.AddFields
-
+Tags
+____
+record, fields, Add
- When setMaxCPUTime(long) is set 0, this value is ignored.
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
-
+.. csv-table:: allowable-values
+ :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+ :widths: 20,60,30,20,10,10
+ :escape: \
- ", "", "30", "false", "false"
- "**datastore.client.service**", "The instance of the Controller Service to use for accessing datastore.", "", "null", "false", "false"
- "datastore.cache.collection", "The collection where to find cached objects", "", "test", "false", "false"
- "js.cache.service", "The cache service to be used to store already sanitized JS expressions. If not specified a in-memory unlimited hash map will be used.", "", "null", "false", "false"
- "output.record.type", "the type of the output record", "", "event", "false", "false"
- "record.ttl", "How long (in ms) do the record will remain in cache", "", "30000", "false", "false"
- "min.update.time.ms", "The minimum amount of time (in ms) that we expect between two consecutive update of the same threshold record", "", "200", "false", "false"
+ "conflict.resolution.policy", "What to do when a field with the same name already exists ?", "overwrite_existing (if field already exist), keep_only_old_field (keep only old field)", "keep_only_old_field", "false", "false"
Dynamic Properties
__________________
@@ -443,24 +380,20 @@ Dynamic Properties allow the user to specify both the name and value of a proper
:widths: 20,20,40,40,20,10
:escape: \
- "field to add", "a default value", "Add a field to the record with the default value", "", "null", false
+ "Name of the field to add", "Value of the field to add", "Add a field to the record with the specified value. Expression language can be used.You can not add a field that end with '.type' as this suffix is used to specify the type of fields to add", "", "null", **true**
+ "Name of the field to add with the suffix '.field.type'", "Type of the field to add", "Add a field to the record with the specified type. These properties are only used if a correspondant property without the suffix '.field.type' is already defined. If this property is not defined, default type for adding fields is String.You can only use Logisland predefined type fields.", "NULL, STRING, INT, LONG, ARRAY, FLOAT, DOUBLE, BYTES, RECORD, MAP, ENUM, BOOLEAN, UNION, DATETIME", "STRING", false
+ "Name of the field to add with the suffix '.field.name'", "Name of the field to add using expression language", "Add a field to the record with the specified name (which is evaluated using expression language). These properties are only used if a correspondant property without the suffix '.field.name' is already defined. If this property is not defined, the name of the field to add is the key of the first dynamic property (which is the main and only required dynamic property).", "", "null", **true**
Extra informations
__________________
-.. include:: ./details/common-processors/CheckThresholds-Detail.rst
+.. include:: ./details/common-processors/AddFields-Detail.rst
----------
-.. _com.hurence.logisland.processor.alerting.ComputeTags:
+.. _com.hurence.logisland.processor.ApplyRegexp:
-ComputeTags
+ApplyRegexp
-----------
-Compute tag cross from given formulas.
-
-- each dynamic property will return a new record according to the formula definition
-- the record name will be set to the property name
-- the record time will be set to the current timestamp
-
-a threshold_cross has the following properties : count, sum, avg, time, duration, value
+This processor is used to create a new set of fields from one field (using regexp).
Module
______
@@ -468,11 +401,11 @@ com.hurence.logisland:logisland-processor-common:1.1.2
Class
_____
-com.hurence.logisland.processor.alerting.ComputeTags
+com.hurence.logisland.processor.ApplyRegexp
Tags
____
-record, fields, Add
+parser, regex, log, record
Properties
__________
@@ -483,110 +416,7 @@ In the list below, the names of required properties appear in **bold**. Any othe
:widths: 20,60,30,20,10,10
:escape: \
- "max.cpu.time", "maximum CPU time in milliseconds allowed for script execution.", "", "100", "false", "false"
- "max.memory", "maximum memory in Bytes which JS executor thread can allocate", "", "51200", "false", "false"
- "allow.no.brace", "Force, to check if all blocks are enclosed with curly braces \"\"{}\"\".
-
- .. raw:: html
-
-
-
- Explanation: all loops (for, do-while, while, and if-else, and functions
-
- should use braces, because poison_pill() function will be inserted after
-
- each open brace \"\"{\"\", to ensure interruption checking. Otherwise simple
-
- code like:
-
-
-
- while(true) while(true) {
-
- // do nothing
-
- }
-
-
-
- or even:
-
-
-
- while(true)
-
-
-
- cause unbreakable loop, which force this sandbox to use {@link Thread#stop()}
-
- which make JVM unstable.
-
-
-
-
-
- Properly writen code (even in bad intention) like:
-
-
-
- while(true) { while(true) {
-
- // do nothing
-
- }}
-
-
-
- will be changed into:
-
-
-
- while(true) {poison_pill();
-
- while(true) {poison_pill();
-
- // do nothing
-
- }
-
- }
-
-
-
- which finish nicely when interrupted.
-
-
-
- For legacy code, this check can be turned off, but with no guarantee, the
-
- JS thread will gracefully finish when interrupted.
-
-
", "", "false", "false", "false"
- "max.prepared.statements", "The size of prepared statements LRU cache. If 0, this is disabled.
-
- .. raw:: html
-
-
-
- Each statements when setMaxCPUTime(long) is set is prepared to
-
- quit itself when time exceeded. To execute only once this procedure per
-
- statement set this value.
-
-
-
-
-
- When setMaxCPUTime(long) is set 0, this value is ignored.
-
-
-
- ", "", "30", "false", "false"
- "**datastore.client.service**", "The instance of the Controller Service to use for accessing datastore.", "", "null", "false", "false"
- "datastore.cache.collection", "The collection where to find cached objects", "", "test", "false", "false"
- "js.cache.service", "The cache service to be used to store already sanitized JS expressions. If not specified a in-memory unlimited hash map will be used.", "", "null", "false", "false"
- "output.record.type", "the type of the output record", "", "event", "false", "false"
+ "conflict.resolution.policy", "What to do when a field with the same name already exists ?", "overwrite_existing (if field already exist), keep_only_old_field (keep only old field)", "keep_only_old_field", "false", "false"
Dynamic Properties
__________________
@@ -597,11 +427,15 @@ Dynamic Properties allow the user to specify both the name and value of a proper
:widths: 20,20,40,40,20,10
:escape: \
- "field to add", "a default value", "Add a field to the record with the default value", "", "null", false
+ "alternative regex & mapping", "another regex that could match", "This processor is used to create a new set of fields from one field (using regexp).", "", "null", **true**
Extra informations
__________________
-.. include:: ./details/common-processors/ComputeTags-Detail.rst
+.. include:: ./details/common-processors/ApplyRegexp-Detail.rst
+See Also:
+_________
+`com.hurence.logisland.processor.ApplyRegexp`_
+
----------
.. _com.hurence.logisland.processor.ConvertFieldsType:
@@ -689,138 +523,7 @@ __________________
.. include:: ./details/common-processors/ConvertSimpleDateFormatFields-Detail.rst
----------
-.. _com.hurence.logisland.processor.DebugStream:
-
-DebugStream
------------
-This is a processor that logs incoming records
-
-Module
-______
-com.hurence.logisland:logisland-processor-common:1.1.2
-
-Class
-_____
-com.hurence.logisland.processor.DebugStream
-
-Tags
-____
-record, debug
-
-Properties
-__________
-In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
-
-.. csv-table:: allowable-values
- :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
- :widths: 20,60,30,20,10,10
- :escape: \
-
- "**event.serializer**", "the way to serialize event", "json (serialize events as json blocs), string (serialize events as toString() blocs)", "json", "false", "false"
-
-Extra informations
-__________________
-.. include:: ./details/common-processors/DebugStream-Detail.rst
-----------
-
-.. _com.hurence.logisland.processor.datastore.EnrichRecords:
-
-EnrichRecords
--------------
-Enrich input records with content indexed in datastore using multiget queries.
-Each incoming record must be possibly enriched with information stored in datastore.
-The plugin properties are :
-
-- es.index (String) : Name of the datastore index on which the multiget query will be performed. This field is mandatory and should not be empty, otherwise an error output record is sent for this specific incoming record.
-- record.key (String) : Name of the field in the input record containing the id to lookup document in elastic search. This field is mandatory.
-- es.key (String) : Name of the datastore key on which the multiget query will be performed. This field is mandatory.
-- includes (ArrayList) : List of patterns to filter in (include) fields to retrieve. Supports wildcards. This field is not mandatory.
-- excludes (ArrayList) : List of patterns to filter out (exclude) fields to retrieve. Supports wildcards. This field is not mandatory.
-
-Each outcoming record holds at least the input record plus potentially one or more fields coming from of one datastore document.
-
-Module
-______
-com.hurence.logisland:logisland-processor-common:1.1.2
-
-Class
-_____
-com.hurence.logisland.processor.datastore.EnrichRecords
-
-Tags
-____
-datastore, enricher
-
-Properties
-__________
-In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values, and whether a property supports the `Expression Language `_ .
-
-.. csv-table:: allowable-values
- :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
- :widths: 20,60,30,20,10,10
- :escape: \
-
- "**datastore.client.service**", "The instance of the Controller Service to use for accessing datastore.", "", "null", "false", "false"
- "record.key", "The name of field in the input record containing the document id to use in ES multiget query", "", "null", "false", "**true**"
- "includes.field", "The name of the ES fields to include in the record.", "", "*", "false", "**true**"
- "excludes.field", "The name of the ES fields to exclude.", "", "N/A", "false", "false"
- "type.name", "The typle of record to look for", "", "null", "false", "**true**"
- "collection.name", "The name of the collection to look for", "", "null", "false", "**true**"
-
-Extra informations
-__________________
-.. include:: ./details/common-processors/EnrichRecords-Detail.rst
-----------
-
-.. _com.hurence.logisland.processor.EvaluateJsonPath:
-
-EvaluateJsonPath
-----------------
-Evaluates one or more JsonPath expressions against the content of a FlowFile. The results of those expressions are assigned to Records Fields depending on configuration of the Processor. JsonPaths are entered by adding user-defined properties; the name of the property maps to the Field Name into which the result will be placed. The value of the property must be a valid JsonPath expression. A Return Type of 'auto-detect' will make a determination based off the configured destination. If the JsonPath evaluates to a JSON array or JSON object and the Return Type is set to 'scalar' the Record will be routed to error. A Return Type of JSON can return scalar values if the provided JsonPath evaluates to the specified value. If the expression matches nothing, Fields will be created with empty strings as the value
-
-Module
-______
-com.hurence.logisland:logisland-processor-common:1.1.2
-
-Class
-_____
-com.hurence.logisland.processor.EvaluateJsonPath
-
-Tags
-____
-JSON, evaluate, JsonPath
-
-Properties
-__________
-In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
-
-.. csv-table:: allowable-values
- :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
- :widths: 20,60,30,20,10,10
- :escape: \
-
- "**return.type**", "Indicates the desired return type of the JSON Path expressions. Selecting 'auto-detect' will set the return type to 'json' or 'scalar' ", "json, scalar", "scalar", "false", "false"
- "**path.not.found.behavior**", "Indicates how to handle missing JSON path expressions. Selecting 'warn' will generate a warning when a JSON path expression is not found.", "warn, ignore", "ignore", "false", "false"
- "**Null Value Representation**", "Indicates the desired representation of JSON Path expressions resulting in a null value.", "empty string, the string 'null'", "empty string", "false", "false"
- "**json.input.field.name**", "the name of the field containing the json string", "", "record_value", "false", "false"
-
-Dynamic Properties
-__________________
-Dynamic Properties allow the user to specify both the name and value of a property.
-
-.. csv-table:: dynamic-properties
- :header: "Name","Value","Description","Allowable Values","Default Value","EL"
- :widths: 20,20,40,40,20,10
- :escape: \
-
- "A Record field", "A JsonPath expression", "will be set to any JSON objects that match the JsonPath. ", "", "null", false
-
-Extra informations
-__________________
-.. include:: ./details/common-processors/EvaluateJsonPath-Detail.rst
-----------
-
-.. _com.hurence.logisland.processor.ExpandMapFields:
+.. _com.hurence.logisland.processor.ExpandMapFields:
ExpandMapFields
---------------
@@ -1004,62 +707,6 @@ __________________
.. include:: ./details/common-processors/ModifyId-Detail.rst
----------
-.. _com.hurence.logisland.processor.datastore.MultiGet:
-
-MultiGet
---------
-Retrieves a content from datastore using datastore multiget queries.
-Each incoming record contains information regarding the datastore multiget query that will be performed. This information is stored in record fields whose names are configured in the plugin properties (see below) :
-
- - collection (String) : name of the datastore collection on which the multiget query will be performed. This field is mandatory and should not be empty, otherwise an error output record is sent for this specific incoming record.
- - type (String) : name of the datastore type on which the multiget query will be performed. This field is not mandatory.
- - ids (String) : comma separated list of document ids to fetch. This field is mandatory and should not be empty, otherwise an error output record is sent for this specific incoming record.
- - includes (String) : comma separated list of patterns to filter in (include) fields to retrieve. Supports wildcards. This field is not mandatory.
- - excludes (String) : comma separated list of patterns to filter out (exclude) fields to retrieve. Supports wildcards. This field is not mandatory.
-
-Each outcoming record holds data of one datastore retrieved document. This data is stored in these fields :
-
- - collection (same field name as the incoming record) : name of the datastore collection.
- - type (same field name as the incoming record) : name of the datastore type.
- - id (same field name as the incoming record) : retrieved document id.
- - a list of String fields containing :
-
- - field name : the retrieved field name
- - field value : the retrieved field value
-
-Module
-______
-com.hurence.logisland:logisland-processor-common:1.1.2
-
-Class
-_____
-com.hurence.logisland.processor.datastore.MultiGet
-
-Tags
-____
-datastore, get, multiget
-
-Properties
-__________
-In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
-
-.. csv-table:: allowable-values
- :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
- :widths: 20,60,30,20,10,10
- :escape: \
-
- "**datastore.client.service**", "The instance of the Controller Service to use for accessing datastore.", "", "null", "false", "false"
- "**collection.field**", "the name of the incoming records field containing es collection name to use in multiget query. ", "", "null", "false", "false"
- "**type.field**", "the name of the incoming records field containing es type name to use in multiget query", "", "null", "false", "false"
- "**ids.field**", "the name of the incoming records field containing es document Ids to use in multiget query", "", "null", "false", "false"
- "**includes.field**", "the name of the incoming records field containing es includes to use in multiget query", "", "null", "false", "false"
- "**excludes.field**", "the name of the incoming records field containing es excludes to use in multiget query", "", "null", "false", "false"
-
-Extra informations
-__________________
-.. include:: ./details/common-processors/MultiGet-Detail.rst
-----------
-
.. _com.hurence.logisland.processor.NormalizeFields:
NormalizeFields
@@ -1105,41 +752,6 @@ __________________
.. include:: ./details/common-processors/NormalizeFields-Detail.rst
----------
-.. _com.hurence.logisland.processor.ParseProperties:
-
-ParseProperties
----------------
-Parse a field made of key=value fields separated by spaces
-a string like "a=1 b=2 c=3" will add a,b & c fields, respectively with values 1,2 & 3 to the current Record
-
-Module
-______
-com.hurence.logisland:logisland-processor-common:1.1.2
-
-Class
-_____
-com.hurence.logisland.processor.ParseProperties
-
-Tags
-____
-record, properties, parser
-
-Properties
-__________
-In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
-
-.. csv-table:: allowable-values
- :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
- :widths: 20,60,30,20,10,10
- :escape: \
-
- "**properties.field**", "the field containing the properties to split and treat", "", "null", "false", "false"
-
-Extra informations
-__________________
-.. include:: ./details/common-processors/ParseProperties-Detail.rst
-----------
-
.. _com.hurence.logisland.processor.RemoveFields:
RemoveFields
@@ -1209,21 +821,11 @@ __________________
.. include:: ./details/common-processors/SelectDistinctRecords-Detail.rst
----------
-.. _com.hurence.logisland.processor.SendMail:
-
-SendMail
---------
-The SendMail processor is aimed at sending an email (like for instance an alert email) from an incoming record. There are three ways an incoming record can generate an email according to the special fields it must embed. Here is a list of the record fields that generate a mail and how they work:
-
-- **mail_text**: this is the simplest way for generating a mail. If present, this field means to use its content (value) as the payload of the mail to send. The mail is sent in text format if there is only this special field in the record. Otherwise, used with either mail_html or mail_use_template, the content of mail_text is the aletrnative text to the HTML mail that is generated.
-
-- **mail_html**: this field specifies that the mail should be sent as HTML and the value of the field is mail payload. If mail_text is also present, its value is used as the alternative text for the mail. mail_html cannot be used with mail_use_template: only one of those two fields should be present in the record.
-
-- **mail_use_template**: If present, this field specifies that the mail should be sent as HTML and the HTML content is to be generated from the template in the processor configuration key **html.template**. The template can contain parameters which must also be present in the record as fields. See documentation of html.template for further explanations. mail_use_template cannot be used with mail_html: only one of those two fields should be present in the record.
-
- If **allow_overwrite** configuration key is true, any mail.* (dot format) configuration key may be overwritten with a matching field in the record of the form mail_* (underscore format). For instance if allow_overwrite is true and mail.to is set to config_address@domain.com, a record generating a mail with a mail_to field set to record_address@domain.com will send a mail to record_address@domain.com.
+.. _com.hurence.logisland.processor.EvaluateJsonPath:
- Apart from error records (when he is unable to process the incoming record or to send the mail), this processor is not expected to produce any output records.
+EvaluateJsonPath
+----------------
+Evaluates one or more JsonPath expressions against the content of a FlowFile. The results of those expressions are assigned to Records Fields depending on configuration of the Processor. JsonPaths are entered by adding user-defined properties; the name of the property maps to the Field Name into which the result will be placed. The value of the property must be a valid JsonPath expression. A Return Type of 'auto-detect' will make a determination based off the configured destination. If the JsonPath evaluates to a JSON array or JSON object and the Return Type is set to 'scalar' the Record will be routed to error. A Return Type of JSON can return scalar values if the provided JsonPath evaluates to the specified value. If the expression matches nothing, Fields will be created with empty strings as the value
Module
______
@@ -1231,11 +833,11 @@ com.hurence.logisland:logisland-processor-common:1.1.2
Class
_____
-com.hurence.logisland.processor.SendMail
+com.hurence.logisland.processor.EvaluateJsonPath
Tags
____
-smtp, email, e-mail, mail, mailer, sendmail, message, alert, html
+JSON, evaluate, JsonPath
Properties
__________
@@ -1246,24 +848,60 @@ In the list below, the names of required properties appear in **bold**. Any othe
:widths: 20,60,30,20,10,10
:escape: \
- "debug", "Enable debug. If enabled, debug information are written to stdout.", "", "false", "false", "false"
- "**smtp.server**", "FQDN, hostname or IP address of the SMTP server to use.", "", "null", "false", "false"
- "smtp.port", "TCP port number of the SMTP server to use.", "", "25", "false", "false"
- "smtp.security.username", "SMTP username.", "", "null", "false", "false"
- "smtp.security.password", "SMTP password.", "", "null", "false", "false"
- "smtp.security.ssl", "Use SSL under SMTP or not (SMTPS). Default is false.", "", "false", "false", "false"
- "**mail.from.address**", "Valid mail sender email address.", "", "null", "false", "false"
- "mail.from.name", "Mail sender name.", "", "null", "false", "false"
- "**mail.bounce.address**", "Valid bounce email address (where error mail is sent if the mail is refused by the recipient server).", "", "null", "false", "false"
- "mail.replyto.address", "Reply to email address.", "", "null", "false", "false"
- "mail.subject", "Mail subject.", "", "[LOGISLAND] Automatic email", "false", "false"
- "mail.to", "Comma separated list of email recipients. If not set, the record must have a mail_to field and allow_overwrite configuration key should be true.", "", "null", "false", "false"
- "allow_overwrite", "If true, allows to overwrite processor configuration with special record fields (mail_to, mail_from_address, mail_from_name, mail_bounce_address, mail_replyto_address, mail_subject). If false, special record fields are ignored and only processor configuration keys are used.", "", "true", "false", "false"
- "html.template", "HTML template to use. It is used when the incoming record contains a mail_use_template field. The template may contain some parameters. The parameter format in the template is of the form ${xxx}. For instance ${param_user} in the template means that a field named param_user must be present in the record and its value will replace the ${param_user} string in the HTML template when the mail will be sent. If some parameters are declared in the template, everyone of them must be present in the record as fields, otherwise the record will generate an error record. If an incoming record contains a mail_use_template field, a template must be present in the configuration and the HTML mail format will be used. If the record also contains a mail_text field, its content will be used as an alternative text message to be used in the mail reader program of the recipient if it does not supports HTML.", "", "null", "false", "false"
+ "**return.type**", "Indicates the desired return type of the JSON Path expressions. Selecting 'auto-detect' will set the return type to 'json' or 'scalar' ", "json, scalar", "scalar", "false", "false"
+ "**path.not.found.behavior**", "Indicates how to handle missing JSON path expressions. Selecting 'warn' will generate a warning when a JSON path expression is not found.", "warn, ignore", "ignore", "false", "false"
+ "**Null Value Representation**", "Indicates the desired representation of JSON Path expressions resulting in a null value.", "empty string, the string 'null'", "empty string", "false", "false"
+ "**json.input.field.name**", "the name of the field containing the json string", "", "record_value", "false", "false"
+
+Dynamic Properties
+__________________
+Dynamic Properties allow the user to specify both the name and value of a property.
+
+.. csv-table:: dynamic-properties
+ :header: "Name","Value","Description","Allowable Values","Default Value","EL"
+ :widths: 20,20,40,40,20,10
+ :escape: \
+
+ "A Record field", "A JsonPath expression", "will be set to any JSON objects that match the JsonPath. ", "", "null", false
+
+Extra informations
+__________________
+.. include:: ./details/common-processors/EvaluateJsonPath-Detail.rst
+----------
+
+.. _com.hurence.logisland.processor.ParseProperties:
+
+ParseProperties
+---------------
+Parse a field made of key=value fields separated by spaces
+a string like "a=1 b=2 c=3" will add a,b & c fields, respectively with values 1,2 & 3 to the current Record
+
+Module
+______
+com.hurence.logisland:logisland-processor-common:1.1.2
+
+Class
+_____
+com.hurence.logisland.processor.ParseProperties
+
+Tags
+____
+record, properties, parser
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
+
+.. csv-table:: allowable-values
+ :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+ :widths: 20,60,30,20,10,10
+ :escape: \
+
+ "**properties.field**", "the field containing the properties to split and treat", "", "null", "false", "false"
Extra informations
__________________
-.. include:: ./details/common-processors/SendMail-Detail.rst
+.. include:: ./details/common-processors/ParseProperties-Detail.rst
----------
.. _com.hurence.logisland.processor.SetJsonAsFields:
@@ -1500,3 +1138,366 @@ __________________
See Also:
_________
`com.hurence.logisland.processor.SplitTextMultiline`_
+
+----------
+
+.. _com.hurence.logisland.processor.alerting.CheckAlerts:
+
+CheckAlerts
+-----------
+Add one or more records representing alerts. Using a datastore.
+
+Module
+______
+com.hurence.logisland:logisland-processor-common:1.1.2
+
+Class
+_____
+com.hurence.logisland.processor.alerting.CheckAlerts
+
+Tags
+____
+record, alerting, thresholds, opc, tag
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
+
+.. csv-table:: allowable-values
+ :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+ :widths: 20,60,30,20,10,10
+ :escape: \
+
+ "max.cpu.time", "maximum CPU time in milliseconds allowed for script execution.", "", "100", "false", "false"
+ "max.memory", "maximum memory in Bytes which JS executor thread can allocate", "", "51200", "false", "false"
+ "allow.no.brace", "Force, to check if all blocks are enclosed with curly braces \"\"{}\"\".
+
+ .. raw:: html
+
+
+
+ Explanation: all loops (for, do-while, while, and if-else, and functions
+
+ should use braces, because poison_pill() function will be inserted after
+
+ each open brace \"\"{\"\", to ensure interruption checking. Otherwise simple
+
+ code like:
+
+
+
+ while(true) while(true) {
+
+ // do nothing
+
+ }
+
+
+
+ or even:
+
+
+
+ while(true)
+
+
+
+ cause unbreakable loop, which force this sandbox to use {@link Thread#stop()}
+
+ which make JVM unstable.
+
+
+
+
+
+ Properly writen code (even in bad intention) like:
+
+
+
+ while(true) { while(true) {
+
+ // do nothing
+
+ }}
+
+
+
+ will be changed into:
+
+
+
+ while(true) {poison_pill();
+
+ while(true) {poison_pill();
+
+ // do nothing
+
+ }
+
+ }
+
+
+
+ which finish nicely when interrupted.
+
+
+
+ For legacy code, this check can be turned off, but with no guarantee, the
+
+ JS thread will gracefully finish when interrupted.
+
+
", "", "false", "false", "false"
+ "max.prepared.statements", "The size of prepared statements LRU cache. If 0, this is disabled.
+
+ .. raw:: html
+
+
+
+ Each statements when setMaxCPUTime(long) is set is prepared to
+
+ quit itself when time exceeded. To execute only once this procedure per
+
+ statement set this value.
+
+
+
+
+
+ When setMaxCPUTime(long) is set 0, this value is ignored.
+
+
+
+ ", "", "30", "false", "false"
+ "**datastore.client.service**", "The instance of the Controller Service to use for accessing datastore.", "", "null", "false", "false"
+ "datastore.cache.collection", "The collection where to find cached objects", "", "test", "false", "false"
+ "js.cache.service", "The cache service to be used to store already sanitized JS expressions. If not specified a in-memory unlimited hash map will be used.", "", "null", "false", "false"
+ "output.record.type", "the type of the output record", "", "event", "false", "false"
+ "profile.activation.condition", "A javascript expression that activates this alerting profile when true", "", "0==0", "false", "false"
+ "alert.criticity", "from 0 to ...", "", "0", "false", "false"
+
+Dynamic Properties
+__________________
+Dynamic Properties allow the user to specify both the name and value of a property.
+
+.. csv-table:: dynamic-properties
+ :header: "Name","Value","Description","Allowable Values","Default Value","EL"
+ :widths: 20,20,40,40,20,10
+ :escape: \
+
+ "field to add", "a default value", "Add a field to the record with the default value", "", "null", false
+
+Extra informations
+__________________
+.. include:: ./details/common-processors/CheckAlerts-Detail.rst
+----------
+
+.. _com.hurence.logisland.processor.alerting.CheckThresholds:
+
+CheckThresholds
+---------------
+Compute threshold cross from given formulas.
+
+- each dynamic property will return a new record according to the formula definition
+- the record name will be set to the property name
+- the record time will be set to the current timestamp
+
+
+Module
+______
+com.hurence.logisland:logisland-processor-common:1.1.2
+
+Class
+_____
+com.hurence.logisland.processor.alerting.CheckThresholds
+
+Tags
+____
+record, threshold, tag, alerting
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
+
+.. csv-table:: allowable-values
+ :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+ :widths: 20,60,30,20,10,10
+ :escape: \
+
+ "max.cpu.time", "maximum CPU time in milliseconds allowed for script execution.", "", "100", "false", "false"
+ "max.memory", "maximum memory in Bytes which JS executor thread can allocate", "", "51200", "false", "false"
+ "allow.no.brace", "Force, to check if all blocks are enclosed with curly braces \"\"{}\"\".
+
+ .. raw:: html
+
+
+
+ Explanation: all loops (for, do-while, while, and if-else, and functions
+
+ should use braces, because poison_pill() function will be inserted after
+
+ each open brace \"\"{\"\", to ensure interruption checking. Otherwise simple
+
+ code like:
+
+
+
+ while(true) while(true) {
+
+ // do nothing
+
+ }
+
+
+
+ or even:
+
+
+
+ while(true)
+
+
+
+ cause unbreakable loop, which force this sandbox to use {@link Thread#stop()}
+
+ which make JVM unstable.
+
+
+
+
+
+ Properly writen code (even in bad intention) like:
+
+
+
+ while(true) { while(true) {
+
+ // do nothing
+
+ }}
+
+
+
+ will be changed into:
+
+
+
+ while(true) {poison_pill();
+
+ while(true) {poison_pill();
+
+ // do nothing
+
+ }
+
+ }
+
+
+
+ which finish nicely when interrupted.
+
+
+
+ For legacy code, this check can be turned off, but with no guarantee, the
+
+ JS thread will gracefully finish when interrupted.
+
+
", "", "false", "false", "false"
+ "max.prepared.statements", "The size of prepared statements LRU cache. If 0, this is disabled.
+
+ .. raw:: html
+
+
+
+ Each statements when setMaxCPUTime(long) is set is prepared to
+
+ quit itself when time exceeded. To execute only once this procedure per
+
+ statement set this value.
+
+
+
+
+
+ When setMaxCPUTime(long) is set 0, this value is ignored.
+
+
+
+ ", "", "30", "false", "false"
+ "**datastore.client.service**", "The instance of the Controller Service to use for accessing datastore.", "", "null", "false", "false"
+ "datastore.cache.collection", "The collection where to find cached objects", "", "test", "false", "false"
+ "js.cache.service", "The cache service to be used to store already sanitized JS expressions. If not specified a in-memory unlimited hash map will be used.", "", "null", "false", "false"
+ "output.record.type", "the type of the output record", "", "event", "false", "false"
+ "record.ttl", "How long (in ms) do the record will remain in cache", "", "30000", "false", "false"
+ "min.update.time.ms", "The minimum amount of time (in ms) that we expect between two consecutive update of the same threshold record", "", "200", "false", "false"
+
+Dynamic Properties
+__________________
+Dynamic Properties allow the user to specify both the name and value of a property.
+
+.. csv-table:: dynamic-properties
+ :header: "Name","Value","Description","Allowable Values","Default Value","EL"
+ :widths: 20,20,40,40,20,10
+ :escape: \
+
+ "field to add", "a default value", "Add a field to the record with the default value", "", "null", false
+
+Extra informations
+__________________
+.. include:: ./details/common-processors/CheckThresholds-Detail.rst
+----------
+
+.. _com.hurence.logisland.processor.SendMail:
+
+SendMail
+--------
+The SendMail processor is aimed at sending an email (like for instance an alert email) from an incoming record. There are three ways an incoming record can generate an email according to the special fields it must embed. Here is a list of the record fields that generate a mail and how they work:
+
+- **mail_text**: this is the simplest way for generating a mail. If present, this field means to use its content (value) as the payload of the mail to send. The mail is sent in text format if there is only this special field in the record. Otherwise, used with either mail_html or mail_use_template, the content of mail_text is the aletrnative text to the HTML mail that is generated.
+
+- **mail_html**: this field specifies that the mail should be sent as HTML and the value of the field is mail payload. If mail_text is also present, its value is used as the alternative text for the mail. mail_html cannot be used with mail_use_template: only one of those two fields should be present in the record.
+
+- **mail_use_template**: If present, this field specifies that the mail should be sent as HTML and the HTML content is to be generated from the template in the processor configuration key **html.template**. The template can contain parameters which must also be present in the record as fields. See documentation of html.template for further explanations. mail_use_template cannot be used with mail_html: only one of those two fields should be present in the record.
+
+ If **allow_overwrite** configuration key is true, any mail.* (dot format) configuration key may be overwritten with a matching field in the record of the form mail_* (underscore format). For instance if allow_overwrite is true and mail.to is set to config_address@domain.com, a record generating a mail with a mail_to field set to record_address@domain.com will send a mail to record_address@domain.com.
+
+ Apart from error records (when he is unable to process the incoming record or to send the mail), this processor is not expected to produce any output records.
+
+Module
+______
+com.hurence.logisland:logisland-processor-common:1.1.2
+
+Class
+_____
+com.hurence.logisland.processor.SendMail
+
+Tags
+____
+smtp, email, e-mail, mail, mailer, sendmail, message, alert, html
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
+
+.. csv-table:: allowable-values
+ :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+ :widths: 20,60,30,20,10,10
+ :escape: \
+
+ "debug", "Enable debug. If enabled, debug information are written to stdout.", "", "false", "false", "false"
+ "**smtp.server**", "FQDN, hostname or IP address of the SMTP server to use.", "", "null", "false", "false"
+ "smtp.port", "TCP port number of the SMTP server to use.", "", "25", "false", "false"
+ "smtp.security.username", "SMTP username.", "", "null", "false", "false"
+ "smtp.security.password", "SMTP password.", "", "null", "false", "false"
+ "smtp.security.ssl", "Use SSL under SMTP or not (SMTPS). Default is false.", "", "false", "false", "false"
+ "**mail.from.address**", "Valid mail sender email address.", "", "null", "false", "false"
+ "mail.from.name", "Mail sender name.", "", "null", "false", "false"
+ "**mail.bounce.address**", "Valid bounce email address (where error mail is sent if the mail is refused by the recipient server).", "", "null", "false", "false"
+ "mail.replyto.address", "Reply to email address.", "", "null", "false", "false"
+ "mail.subject", "Mail subject.", "", "[LOGISLAND] Automatic email", "false", "false"
+ "mail.to", "Comma separated list of email recipients. If not set, the record must have a mail_to field and allow_overwrite configuration key should be true.", "", "null", "false", "false"
+ "allow_overwrite", "If true, allows to overwrite processor configuration with special record fields (mail_to, mail_from_address, mail_from_name, mail_bounce_address, mail_replyto_address, mail_subject). If false, special record fields are ignored and only processor configuration keys are used.", "", "true", "false", "false"
+ "html.template", "HTML template to use. It is used when the incoming record contains a mail_use_template field. The template may contain some parameters. The parameter format in the template is of the form ${xxx}. For instance ${param_user} in the template means that a field named param_user must be present in the record and its value will replace the ${param_user} string in the HTML template when the mail will be sent. If some parameters are declared in the template, everyone of them must be present in the record as fields, otherwise the record will generate an error record. If an incoming record contains a mail_use_template field, a template must be present in the configuration and the HTML mail format will be used. If the record also contains a mail_text field, its content will be used as an alternative text message to be used in the mail reader program of the recipient if it does not supports HTML.", "", "null", "false", "false"
+
+Extra informations
+__________________
+.. include:: ./details/common-processors/SendMail-Detail.rst
\ No newline at end of file
diff --git a/logisland-documentation/user/components/components.yaml b/logisland-documentation/user/components/components.yaml
new file mode 100644
index 000000000..e1af3ce05
--- /dev/null
+++ b/logisland-documentation/user/components/components.yaml
@@ -0,0 +1,678 @@
+--- # document start
+
+categories:
+ - processing
+ - parsing
+ - datastore
+ - alerting
+ - security
+ - enrichment
+ - analytics
+ - timeseries
+extensions:
+ - name: ComputeTags
+ description: >
+ Compute tag cross from given formulas.
+
+ - each dynamic property will return a new record according to the formula definition
+ - the record name will be set to the property name
+ - the record time will be set to the current timestamp
+
+ a threshold_cross has the following properties : count, sum, avg, time, duration, value
+ category: enrichment
+ module: com.hurence.logisland:logisland-processor-common:1.1.2
+ class: com.hurence.logisland.processor.alerting.ComputeTags
+ tags: [record, fields, Add]
+ - name: EnrichRecords
+ description: >
+ Enrich input records with content indexed in datastore using multiget queries.
+ Each incoming record must be possibly enriched with information stored in datastore.
+ The plugin properties are :
+
+ - es.index (String) : Name of the datastore index on which the multiget query will be performed. This field is mandatory and should not be empty, otherwise an error output record is sent for this specific incoming record.
+ - record.key (String) : Name of the field in the input record containing the id to lookup document in elastic search. This field is mandatory.
+ - es.key (String) : Name of the datastore key on which the multiget query will be performed. This field is mandatory.
+ - includes (ArrayList) : List of patterns to filter in (include) fields to retrieve. Supports wildcards. This field is not mandatory.
+ - excludes (ArrayList) : List of patterns to filter out (exclude) fields to retrieve. Supports wildcards. This field is not mandatory.
+
+ Each outcoming record holds at least the input record plus potentially one or more fields coming from of one datastore document.
+ category: enrichment
+ module: com.hurence.logisland:logisland-processor-common:1.1.2
+ class: com.hurence.logisland.processor.datastore.EnrichRecords
+ tags: [datastore, enricher]
+ - name: DebugStream
+ description: This is a processor that logs incoming records
+ category: utils
+ module: com.hurence.logisland:logisland-processor-common:1.1.2
+ class: com.hurence.logisland.processor.DebugStream
+ tags: [record, debug]
+ - name: BulkPut
+ description: Indexes the content of a Record in a Datastore using bulk processor
+ category: datastore
+ module: com.hurence.logisland:logisland-processor-common:1.1.2
+ class: com.hurence.logisland.processor.datastore.BulkPut
+ tags: [datastore, record, put, bulk]
+ - name: MultiGet
+ description: >
+ Retrieves a content from datastore using datastore multiget queries.
+ Each incoming record contains information regarding the datastore multiget query that will be performed. This information is stored in record fields whose names are configured in the plugin properties (see below) :
+
+ - collection (String) : name of the datastore collection on which the multiget query will be performed. This field is mandatory and should not be empty, otherwise an error output record is sent for this specific incoming record.
+ - type (String) : name of the datastore type on which the multiget query will be performed. This field is not mandatory.
+ - ids (String) : comma separated list of document ids to fetch. This field is mandatory and should not be empty, otherwise an error output record is sent for this specific incoming record.
+ - includes (String) : comma separated list of patterns to filter in (include) fields to retrieve. Supports wildcards. This field is not mandatory.
+ - excludes (String) : comma separated list of patterns to filter out (exclude) fields to retrieve. Supports wildcards. This field is not mandatory.
+
+ Each outcoming record holds data of one datastore retrieved document. This data is stored in these fields :
+
+ - collection (same field name as the incoming record) : name of the datastore collection.
+ - type (same field name as the incoming record) : name of the datastore type.
+ - id (same field name as the incoming record) : retrieved document id.
+ - a list of String fields containing :
+
+ - field name : the retrieved field name
+ - field value : the retrieved field value
+ category: datastore
+ module: com.hurence.logisland:logisland-processor-common:1.1.2
+ class: com.hurence.logisland.processor.datastore.MultiGet
+ tags: [datastore, get, multiget]
+ - name: AddFields
+ description: Add one or more field to records
+ category: processing
+ module: com.hurence.logisland:logisland-processor-common:1.1.2
+ class: com.hurence.logisland.processor.AddFields
+ tags: [record, fields, Add]
+ - name: ApplyRegexp
+ description: This processor is used to create a new set of fields from one field (using regexp).
+ category: processing
+ module: com.hurence.logisland:logisland-processor-common:1.1.2
+ class: com.hurence.logisland.processor.ApplyRegexp
+ tags: [parser, regex, log, record]
+ - name: ConvertFieldsType
+ description: Converts a field value into the given type. does nothing if conversion is not possible
+ category: processing
+ module: com.hurence.logisland:logisland-processor-common:1.1.2
+ class: com.hurence.logisland.processor.ConvertFieldsType
+ tags: [type, fields, update, convert]
+ - name: ConvertSimpleDateFormatFields
+ description: Convert one or more field representing a date into a Unix Epoch Time (time in milliseconds since &st January 1970, 00:00:00 GMT)...
+ category: processing
+ module: com.hurence.logisland:logisland-processor-common:1.1.2
+ class: com.hurence.logisland.processor.ConvertSimpleDateFormatFields
+ tags: [record, fields, Add]
+ - name: ExpandMapFields
+ description: Expands the content of a MAP field to the root.
+ category: processing
+ module: com.hurence.logisland:logisland-processor-common:1.1.2
+ class: com.hurence.logisland.processor.ExpandMapFields
+ tags: [record, fields, Expand, Map]
+ - name: FilterRecords
+ description: Keep only records based on a given field value
+ category: processing
+ module: com.hurence.logisland:logisland-processor-common:1.1.2
+ class: com.hurence.logisland.processor.FilterRecords
+ tags: [record, fields, remove, delete]
+ - name: FlatMap
+ description: Converts each field records into a single flatten record...
+ category: processing
+ module: com.hurence.logisland:logisland-processor-common:1.1.2
+ class: com.hurence.logisland.processor.FlatMap
+ tags: [record, fields, flatmap, flatten]
+ - name: GenerateRandomRecord
+ description: This is a processor that make random records given an Avro schema
+ category: processing
+ module: com.hurence.logisland:logisland-processor-common:1.1.2
+ class: com.hurence.logisland.processor.GenerateRandomRecord
+ tags: [record, avro, generator]
+ - name: ModifyId
+ description: modify id of records or generate it following defined rules
+ category: processing
+ module: com.hurence.logisland:logisland-processor-common:1.1.2
+ class: com.hurence.logisland.processor.ModifyId
+ tags: [record, id, idempotent, generate, modify]
+ - name: NormalizeFields
+ description: Changes the name of a field according to a provided name mapping...
+ category: processing
+ module: com.hurence.logisland:logisland-processor-common:1.1.2
+ class: com.hurence.logisland.processor.NormalizeFields
+ tags: [record, fields, normalizer]
+ - name: RemoveFields
+ description: Removes a list of fields defined by a comma separated list of field names or keeps only fields defined by a comma separated list of field names.
+ category: processing
+ module: com.hurence.logisland:logisland-processor-common:1.1.2
+ class: com.hurence.logisland.processor.RemoveFields
+ tags: [record, fields, remove, delete, keep]
+ - name: SelectDistinctRecords
+ description: Keep only distinct records based on a given field
+ category: processing
+ module: com.hurence.logisland:logisland-processor-common:1.1.2
+ class: com.hurence.logisland.processor.SelectDistinctRecords
+ tags: [record, fields, remove, delete]
+ - name: EvaluateJsonPath
+ description: Evaluates one or more JsonPath expressions against the content of a FlowFile. The results of those expressions are assigned to Records Fields depending on configuration of the Processor. JsonPaths are entered by adding user-defined properties; the name of the property maps to the Field Name into which the result will be placed. The value of the property must be a valid JsonPath expression. A Return Type of 'auto-detect' will make a determination based off the configured destination. If the JsonPath evaluates to a JSON array or JSON object and the Return Type is set to 'scalar' the Record will be routed to error. A Return Type of JSON can return scalar values if the provided JsonPath evaluates to the specified value. If the expression matches nothing, Fields will be created with empty strings as the value
+ category: parsing
+ module: com.hurence.logisland:logisland-processor-common:1.1.2
+ class: com.hurence.logisland.processor.EvaluateJsonPath
+ tags: [JSON, evaluate, JsonPath]
+ - name: ParseProperties
+ description: >
+ Parse a field made of key=value fields separated by spaces
+ a string like "a=1 b=2 c=3" will add a,b & c fields, respectively with values 1,2 & 3 to the current Record
+ category: parsing
+ module: com.hurence.logisland:logisland-processor-common:1.1.2
+ class: com.hurence.logisland.processor.ParseProperties
+ tags: [record, properties, parser]
+ - name: SetJsonAsFields
+ description: The SetJsonAsFields processor reads the content of a string field containing a json string and sets each json attribute as a field of the current record. Note that this could be achieved with the EvaluateJsonPath processor, but this implies to declare each json first level attribute in the configuration and also to know by advance every one of them. Whereas for this simple case, the SetJsonAsFields processor does not require such a configuration and will work with any incoming json, regardless of the list of first level attributes.
+ category: parsing
+ module: com.hurence.logisland:logisland-processor-common:1.1.2
+ class: com.hurence.logisland.processor.SetJsonAsFields
+ tags: [json]
+ - name: SplitField
+ description: This processor is used to create a new set of fields from one field (using split).
+ category: parsing
+ module: com.hurence.logisland:logisland-processor-common:1.1.2
+ class: com.hurence.logisland.processor.SplitField
+ tags: [parser, split, log, record]
+ - name: SplitText
+ description: This is a processor that is used to split a String into fields according to a given Record mapping
+ category: parsing
+ module: com.hurence.logisland:logisland-processor-common:1.1.2
+ class: com.hurence.logisland.processor.SplitText
+ tags: [parser, regex, log, record]
+ - name: SplitTextMultiline
+ description: No description provided.
+ category: parsing
+ module: com.hurence.logisland:logisland-processor-common:1.1.2
+ class: com.hurence.logisland.processor.SplitTextMultiline
+ - name: SplitTextWithProperties
+ description: This is a processor that is used to split a String into fields according to a given Record mapping
+ category: parsing
+ module: com.hurence.logisland:logisland-processor-common:1.1.2
+ class: com.hurence.logisland.processor.SplitTextWithProperties
+ tags: [parser, regex, log, record]
+ - name: CheckAlerts
+ description: Add one or more records representing alerts. Using a datastore.
+ category: alerting
+ module: com.hurence.logisland:logisland-processor-common:1.1.2
+ class: com.hurence.logisland.processor.alerting.CheckAlerts
+ tags: [record, alerting, thresholds, opc, tag]
+ - name: CheckThresholds
+ description: >
+ Compute threshold cross from given formulas.
+
+ - each dynamic property will return a new record according to the formula definition
+ - the record name will be set to the property name
+ - the record time will be set to the current timestamp
+ category: alerting
+ module: com.hurence.logisland:logisland-processor-common:1.1.2
+ class: com.hurence.logisland.processor.alerting.CheckThresholds
+ tags: [record, threshold, tag, alerting]
+ - name: SendMail
+ description: >
+ The SendMail processor is aimed at sending an email (like for instance an alert email) from an incoming record. There are three ways an incoming record can generate an email according to the special fields it must embed. Here is a list of the record fields that generate a mail and how they work:
+
+ - **mail_text**: this is the simplest way for generating a mail. If present, this field means to use its content (value) as the payload of the mail to send. The mail is sent in text format if there is only this special field in the record. Otherwise, used with either mail_html or mail_use_template, the content of mail_text is the aletrnative text to the HTML mail that is generated.
+
+ - **mail_html**: this field specifies that the mail should be sent as HTML and the value of the field is mail payload. If mail_text is also present, its value is used as the alternative text for the mail. mail_html cannot be used with mail_use_template: only one of those two fields should be present in the record.
+
+ - **mail_use_template**: If present, this field specifies that the mail should be sent as HTML and the HTML content is to be generated from the template in the processor configuration key **html.template**. The template can contain parameters which must also be present in the record as fields. See documentation of html.template for further explanations. mail_use_template cannot be used with mail_html: only one of those two fields should be present in the record.
+
+ If **allow_overwrite** configuration key is true, any mail.* (dot format) configuration key may be overwritten with a matching field in the record of the form mail_* (underscore format). For instance if allow_overwrite is true and mail.to is set to config_address@domain.com, a record generating a mail with a mail_to field set to record_address@domain.com will send a mail to record_address@domain.com.
+
+ Apart from error records (when he is unable to process the incoming record or to send the mail), this processor is not expected to produce any output records.
+ category: alerting
+ module: com.hurence.logisland:logisland-processor-common:1.1.2
+ class: com.hurence.logisland.processor.SendMail
+ tags: [smtp, email, e-mail, mail, mailer, sendmail, message, alert, html]
+ - name: ParseUserAgent
+ description: The user-agent processor allows to decompose User-Agent value from an HTTP header into several attributes of interest. There is no standard format for User-Agent strings, hence it is not easily possible to use regexp to handle them. This processor rely on the `YAUAA library `_ to do the heavy work.
+ category: enrichment
+ module: com.hurence.logisland:logisland-processor-useragent:1.1.2
+ class: com.hurence.logisland.processor.useragent.ParseUserAgent
+ tags: [User-Agent, clickstream, DMP]
+ - name: ConsolidateSession
+ description: >
+ The ConsolidateSession processor is the Logisland entry point to get and process events from the Web Analytics.As an example here is an incoming event from the Web Analytics:
+
+ "fields": [{ "name": "timestamp", "type": "long" },{ "name": "remoteHost", "type": "string"},{ "name": "record_type", "type": ["null", "string"], "default": null },{ "name": "record_id", "type": ["null", "string"], "default": null },{ "name": "location", "type": ["null", "string"], "default": null },{ "name": "hitType", "type": ["null", "string"], "default": null },{ "name": "eventCategory", "type": ["null", "string"], "default": null },{ "name": "eventAction", "type": ["null", "string"], "default": null },{ "name": "eventLabel", "type": ["null", "string"], "default": null },{ "name": "localPath", "type": ["null", "string"], "default": null },{ "name": "q", "type": ["null", "string"], "default": null },{ "name": "n", "type": ["null", "int"], "default": null },{ "name": "referer", "type": ["null", "string"], "default": null },{ "name": "viewportPixelWidth", "type": ["null", "int"], "default": null },{ "name": "viewportPixelHeight", "type": ["null", "int"], "default": null },{ "name": "screenPixelWidth", "type": ["null", "int"], "default": null },{ "name": "screenPixelHeight", "type": ["null", "int"], "default": null },{ "name": "partyId", "type": ["null", "string"], "default": null },{ "name": "sessionId", "type": ["null", "string"], "default": null },{ "name": "pageViewId", "type": ["null", "string"], "default": null },{ "name": "is_newSession", "type": ["null", "boolean"],"default": null },{ "name": "userAgentString", "type": ["null", "string"], "default": null },{ "name": "pageType", "type": ["null", "string"], "default": null },{ "name": "UserId", "type": ["null", "string"], "default": null },{ "name": "B2Bunit", "type": ["null", "string"], "default": null },{ "name": "pointOfService", "type": ["null", "string"], "default": null },{ "name": "companyID", "type": ["null", "string"], "default": null },{ "name": "GroupCode", "type": ["null", "string"], "default": null },{ "name": "userRoles", "type": ["null", "string"], "default": null },{ "name": "is_PunchOut", "type": ["null", "string"], "default": null }]The ConsolidateSession processor groups the records by sessions and compute the duration between now and the last received event. If the distance from the last event is beyond a given threshold (by default 30mn), then the session is considered closed.The ConsolidateSession is building an aggregated session object for each active session.This aggregated object includes: - The actual session duration. - A boolean representing wether the session is considered active or closed. Note: it is possible to ressurect a session if for instance an event arrives after a session has been marked closed. - User related infos: userId, B2Bunit code, groupCode, userRoles, companyId - First visited page: URL - Last visited page: URL The properties to configure the processor are: - sessionid.field: Property name containing the session identifier (default: sessionId). - timestamp.field: Property name containing the timestamp of the event (default: timestamp). - session.timeout: Timeframe of inactivity (in seconds) after which a session is considered closed (default: 30mn). - visitedpage.field: Property name containing the page visited by the customer (default: location). - fields.to.return: List of fields to return in the aggregated object. (default: N/A)
+ category: analytics
+ module: com.hurence.logisland:logisland-processor-web-analytics:1.1.2
+ class: com.hurence.logisland.processor.webAnalytics.ConsolidateSession
+ tags: [analytics, web, session]
+ - name: DetectOutliers
+ description: >
+ Outlier Analysis: A Hybrid Approach
+
+ In order to function at scale, a two-phase approach is taken
+
+ For every data point
+
+ - Detect outlier candidates using a robust estimator of variability (e.g. median absolute deviation) that uses distributional sketching (e.g. Q-trees)
+ - Gather a biased sample (biased by recency)
+ - Extremely deterministic in space and cheap in computation
+
+ For every outlier candidate
+
+ - Use traditional, more computationally complex approaches to outlier analysis (e.g. Robust PCA) on the biased sample
+ - Expensive computationally, but run infrequently
+
+ This becomes a data filter which can be attached to a timeseries data stream within a distributed computational framework (i.e. Storm, Spark, Flink, NiFi) to detect outliers.
+ category: analytics
+ module: com.hurence.logisland:logisland-processor-outlier-detection:1.1.2
+ class: com.hurence.logisland.processor.DetectOutliers
+ tags: [analytic, outlier, record, iot, timeseries]
+ - name: IncrementalWebSession
+ description: >
+ This processor creates and updates web-sessions based on incoming web-events. Note that both web-sessions and web-events are stored in elasticsearch.
+ Firstly, web-events are grouped by their session identifier and processed in chronological order.
+ Then each web-session associated to each group is retrieved from elasticsearch.
+ In case none exists yet then a new web session is created based on the first web event.
+ The following fields of the newly created web session are set based on the associated web event: session identifier, first timestamp, first visited page. Secondly, once created, or retrieved, the web session is updated by the remaining web-events.
+ Updates have impacts on fields of the web session such as event counter, last visited page, session duration, ...
+ Before updates are actually applied, checks are performed to detect rules that would trigger the creation of a new session:
+
+ the duration between the web session and the web event must not exceed the specified time-out,
+ the web session and the web event must have timestamps within the same day (at midnight a new web session is created),
+ source of traffic (campaign, ...) must be the same on the web session and the web event.
+
+ When a breaking rule is detected, a new web session is created with a new session identifier where as remaining web-events still have the original session identifier. The new session identifier is the original session suffixed with the character '#' followed with an incremented counter. This new session identifier is also set on the remaining web-events.
+ Finally when all web events were applied, all web events -potentially modified with a new session identifier- are save in elasticsearch. And web sessions are passed to the next processor.
+
+ WebSession information are:
+ - first and last visited page
+ - first and last timestamp of processed event
+ - total number of processed events
+ - the userId
+ - a boolean denoting if the web-session is still active or not
+ - an integer denoting the duration of the web-sessions
+ - optional fields that may be retrieved from the processed events
+ category: analytics
+ module: com.hurence.logisland:logisland-processor-web-analytics:1.1.2
+ class: com.hurence.logisland.processor.webAnalytics.IncrementalWebSession
+ tags: [analytics, web, session]
+ - name: SetSourceOfTraffic
+ description: >
+ Compute the source of traffic of a web session. Users arrive at a website or application through a variety of sources,
+ including advertising/paying campaigns, search engines, social networks, referring sites or direct access.
+ When analysing user experience on a webshop, it is crucial to collect, process, and report the campaign and traffic-source data.
+ To compute the source of traffic of a web session, the user has to provide the utm_* related properties if available
+ i-e: **utm_source.field**, **utm_medium.field**, **utm_campaign.field**, **utm_content.field**, **utm_term.field**)
+ , the referer (**referer.field** property) and the first visited page of the session (**first.visited.page.field** property).
+ By default the source of traffic information are placed in a flat structure (specified by the **source_of_traffic.suffix** property
+ with a default value of source_of_traffic). To work properly the SetSourceOfTraffic processor needs to have access to an
+ Elasticsearch index containing a list of the most popular search engines and social networks. The ES index (specified by the **es.index** property) should be structured such that the _id of an ES document MUST be the name of the domain. If the domain is a search engine, the related ES doc MUST have a boolean field (default being search_engine) specified by the property **es.search_engine.field** with a value set to true. If the domain is a social network , the related ES doc MUST have a boolean field (default being social_network) specified by the property **es.social_network.field** with a value set to true.
+ category: analytics
+ module: com.hurence.logisland:logisland-processor-web-analytics:1.1.2
+ class: com.hurence.logisland.processor.webAnalytics.SetSourceOfTraffic
+ tags: [session, traffic, source, web, analytics]
+ - name: URLDecoder
+ description: >
+ Decode one or more field containing an URL with possibly special chars encoded
+ ...
+ category: analytics
+ module: com.hurence.logisland:logisland-processor-web-analytics:1.1.2
+ class: com.hurence.logisland.processor.webAnalytics.URLDecoder
+ tags: [record, fields, Decode]
+ - name: EnrichRecordsElasticsearch
+ description: >
+ Enrich input records with content indexed in elasticsearch using multiget queries.
+ Each incoming record must be possibly enriched with information stored in elasticsearch.
+ Each outcoming record holds at least the input record plus potentially one or more fields coming from of one elasticsearch document.
+ category: enrichment
+ module: com.hurence.logisland:logisland-processor-elasticsearch:1.1.2
+ class: com.hurence.logisland.processor.elasticsearch.EnrichRecordsElasticsearch
+ tags: [elasticsearch]
+ - name: IpToFqdn
+ description: Translates an IP address into a FQDN (Fully Qualified Domain Name). An input field from the record has the IP as value. An new field is created and its value is the FQDN matching the IP address. The resolution mechanism is based on the underlying operating system. The resolution request may take some time, specially if the IP address cannot be translated into a FQDN. For these reasons this processor relies on the logisland cache service so that once a resolution occurs or not, the result is put into the cache. That way, the real request for the same IP is not re-triggered during a certain period of time, until the cache entry expires. This timeout is configurable but by default a request for the same IP is not triggered before 24 hours to let the time to the underlying DNS system to be potentially updated.
+ category: enrichment
+ module: com.hurence.logisland:logisland-processor-enrichment:1.1.2
+ class: com.hurence.logisland.processor.enrichment.IpToFqdn
+ tags: [dns, ip, fqdn, domain, address, fqhn, reverse, resolution, enrich]
+ - name: IpToGeo
+ description: Looks up geolocation information for an IP address. The attribute that contains the IP address to lookup must be provided in the **ip.address.field** property. By default, the geo information are put in a hierarchical structure. That is, if the name of the IP field is 'X', then the the geo attributes added by enrichment are added under a father field named X_geo. "_geo" is the default hierarchical suffix that may be changed with the **geo.hierarchical.suffix** property. If one wants to put the geo fields at the same level as the IP field, then the **geo.hierarchical** property should be set to false and then the geo attributes are created at the same level as him with the naming pattern X_geo_. "_geo_" is the default flat suffix but this may be changed with the **geo.flat.suffix** property. The IpToGeo processor requires a reference to an Ip to Geo service. This must be defined in the **iptogeo.service** property. The added geo fields are dependant on the underlying Ip to Geo service. The **geo.fields** property must contain the list of geo fields that should be created if data is available for the IP to resolve. This property defaults to "*" which means to add every available fields. If one only wants a subset of the fields, one must define a comma separated list of fields as a value for the **geo.fields** property. The list of the available geo fields is in the description of the **geo.fields** property.
+ category: enrichment
+ module: com.hurence.logisland:logisland-processor-enrichment:1.1.2
+ class: com.hurence.logisland.processor.enrichment.IpToGeo
+ tags: [geo, enrich, ip]
+ - name: ParseBroEvent
+ description: >
+ The ParseBroEvent processor is the Logisland entry point to get and process `Bro `_ events. The `Bro-Kafka plugin `_ should be used and configured in order to have Bro events sent to Kafka. See the `Bro/Logisland tutorial `_ for an example of usage for this processor. The ParseBroEvent processor does some minor pre-processing on incoming Bro events from the Bro-Kafka plugin to adapt them to Logisland.
+
+ Basically the events coming from the Bro-Kafka plugin are JSON documents with a first level field indicating the type of the event. The ParseBroEvent processor takes the incoming JSON document, sets the event type in a record_type field and sets the original sub-fields of the JSON event as first level fields in the record. Also any dot in a field name is transformed into an underscore. Thus, for instance, the field id.orig_h becomes id_orig_h. The next processors in the stream can then process the Bro events generated by this ParseBroEvent processor.
+
+ As an example here is an incoming event from Bro:
+
+ {
+
+ "conn": {
+
+ "id.resp_p": 9092,
+
+ "resp_pkts": 0,
+
+ "resp_ip_bytes": 0,
+
+ "local_orig": true,
+
+ "orig_ip_bytes": 0,
+
+ "orig_pkts": 0,
+
+ "missed_bytes": 0,
+
+ "history": "Cc",
+
+ "tunnel_parents": [],
+
+ "id.orig_p": 56762,
+
+ "local_resp": true,
+
+ "uid": "Ct3Ms01I3Yc6pmMZx7",
+
+ "conn_state": "OTH",
+
+ "id.orig_h": "172.17.0.2",
+
+ "proto": "tcp",
+
+ "id.resp_h": "172.17.0.3",
+
+ "ts": 1487596886.953917
+
+ }
+
+ }
+
+ It gets processed and transformed into the following Logisland record by the ParseBroEvent processor:
+
+ "@timestamp": "2017-02-20T13:36:32Z"
+
+ "record_id": "6361f80a-c5c9-4a16-9045-4bb51736333d"
+
+ "record_time": 1487597792782
+
+ "record_type": "conn"
+
+ "id_resp_p": 9092
+
+ "resp_pkts": 0
+
+ "resp_ip_bytes": 0
+
+ "local_orig": true
+
+ "orig_ip_bytes": 0
+
+ "orig_pkts": 0
+
+ "missed_bytes": 0
+
+ "history": "Cc"
+
+ "tunnel_parents": []
+
+ "id_orig_p": 56762
+
+ "local_resp": true
+
+ "uid": "Ct3Ms01I3Yc6pmMZx7"
+
+ "conn_state": "OTH"
+
+ "id_orig_h": "172.17.0.2"
+
+ "proto": "tcp"
+
+ "id_resp_h": "172.17.0.3"
+
+ "ts": 1487596886.953917
+ category: security
+ module: com.hurence.logisland:logisland-processor-cyber-security:1.1.2
+ class: com.hurence.logisland.processor.bro.ParseBroEvent
+ tags: [bro, security, IDS, NIDS]
+ - name: ParseNetflowEvent
+ description: >
+ The `Netflow V5 `_ processor is the Logisland entry point to process Netflow (V5) events. NetFlow is a feature introduced on Cisco routers that provides the ability to collect IP network traffic.We can distinguish 2 components:
+
+ - Flow exporter: aggregates packets into flows and exports flow records (binary format) towards one or more flow collectors
+
+ - Flow collector: responsible for reception, storage and pre-processing of flow data received from a flow exporter
+
+ The collected data are then available for analysis purpose (intrusion detection, traffic analysis...)
+ Netflow are sent to kafka in order to be processed by logisland.
+ In the tutorial we will simulate Netflow traffic using `nfgen `_. this traffic will be sent to port 2055. The we rely on nifi to listen of that port for incoming netflow (V5) traffic and send them to a kafka topic. The Netflow processor could thus treat these events and generate corresponding logisland records. The following processors in the stream can then process the Netflow records generated by this processor.
+ category: security
+ module: com.hurence.logisland:logisland-processor-cyber-security:1.1.2
+ class: com.hurence.logisland.processor.netflow.ParseNetflowEvent
+ tags: [netflow, security]
+ - name: ParseNetworkPacket
+ description: The ParseNetworkPacket processor is the LogIsland entry point to parse network packets captured either off-the-wire (stream mode) or in pcap format (batch mode). In batch mode, the processor decodes the bytes of the incoming pcap record, where a Global header followed by a sequence of [packet header, packet data] pairs are stored. Then, each incoming pcap event is parsed into n packet records. The fields of packet headers are then extracted and made available in dedicated record fields. See the `Capturing Network packets tutorial `_ for an example of usage of this processor.
+ category: security
+ module: com.hurence.logisland:logisland-processor-cyber-security:1.1.2
+ class: com.hurence.logisland.processor.networkpacket.ParseNetworkPacket
+ tags: [PCap, security, IDS, NIDS]
+ - name: SampleRecords
+ description: >
+ Query matching based on `Luwak `_
+
+ you can use this processor to handle custom events defined by lucene queries
+ a new record is added to output each time a registered query is matched
+
+ A query is expressed as a lucene query against a field like for example:
+
+ .. code::
+
+ message:'bad exception'
+ error_count:[10 TO *]
+ bytes_out:5000
+ user_name:tom*
+
+ Please read the `Lucene syntax guide `_ for supported operations
+
+ .. warning::
+ don't forget to set numeric fields property to handle correctly numeric ranges queries
+ category: timeseries
+ module: com.hurence.logisland:logisland-processor-sampling:1.1.2
+ class: com.hurence.logisland.processor.SampleRecords
+ tags: [analytic, sampler, record, iot, timeseries]
+ - name: BulkAddElasticsearch
+ description: Indexes the content of a Record in Elasticsearch using elasticsearch's bulk processor
+ category: datastore
+ module: com.hurence.logisland:logisland-processor-elasticsearch:1.1.2
+ class: com.hurence.logisland.processor.elasticsearch.BulkAddElasticsearch
+ tags: [elasticsearch]
+ - name: FetchHBaseRow
+ description: Fetches a row from an HBase table. The Destination property controls whether the cells are added as flow file attributes, or the row is written to the flow file content as JSON. This processor may be used to fetch a fixed row on a interval by specifying the table and row id directly in the processor, or it may be used to dynamically fetch rows by referencing the table and row id from incoming flow files.
+ category: datastore
+ module: com.hurence.logisland:logisland-processor-hbase:1.1.2
+ class: com.hurence.logisland.processor.hbase.FetchHBaseRow
+ tags: [hbase, scan, fetch, get, enrich]
+ - name: MultiGetElasticsearch
+ description: >
+ Retrieves a content indexed in elasticsearch using elasticsearch multiget queries.
+ Each incoming record contains information regarding the elasticsearch multiget query that will be performed. This information is stored in record fields whose names are configured in the plugin properties (see below) :
+
+ - index (String) : name of the elasticsearch index on which the multiget query will be performed. This field is mandatory and should not be empty, otherwise an error output record is sent for this specific incoming record.
+ - type (String) : name of the elasticsearch type on which the multiget query will be performed. This field is not mandatory.
+ - ids (String) : comma separated list of document ids to fetch. This field is mandatory and should not be empty, otherwise an error output record is sent for this specific incoming record.
+ - includes (String) : comma separated list of patterns to filter in (include) fields to retrieve. Supports wildcards. This field is not mandatory.
+ - excludes (String) : comma separated list of patterns to filter out (exclude) fields to retrieve. Supports wildcards. This field is not mandatory.
+
+ Each outcoming record holds data of one elasticsearch retrieved document. This data is stored in these fields :
+
+ - index (same field name as the incoming record) : name of the elasticsearch index.
+ - type (same field name as the incoming record) : name of the elasticsearch type.
+ - id (same field name as the incoming record) : retrieved document id.
+ - a list of String fields containing :
+
+ * field name : the retrieved field name
+ * field value : the retrieved field value
+ category: datastore
+ module: com.hurence.logisland:logisland-processor-elasticsearch:1.1.2
+ class: com.hurence.logisland.processor.elasticsearch.MultiGetElasticsearch
+ tags: [elasticsearch]
+ - name: PutHBaseCell
+ description: Adds the Contents of a Record to HBase as the value of a single cell
+ category: datastore
+ module: com.hurence.logisland:logisland-processor-hbase:1.1.2
+ class: com.hurence.logisland.processor.hbase.PutHBaseCell
+ tags: [hadoop, hbase]
+ - name: RunPython
+ description: >
+ !!!! WARNING !!!!
+
+ The RunPython processor is currently an experimental feature : it is delivered as is, with the current set of features and is subject to modifications in API or anything else in further logisland releases without warnings. There is no tutorial yet. If you want to play with this processor, use the python-processing.yml example and send the apache logs of the index apache logs tutorial. The debug stream processor at the end of the stream should output events in stderr file of the executors from the spark console.
+
+ This processor allows to implement and run a processor written in python. This can be done in 2 ways. Either directly defining the process method code in the **script.code.process** configuration property or poiting to an external python module script file in the **script.path** configuration property. Directly defining methods is called the inline mode whereas using a script file is called the file mode. Both ways are mutually exclusive. Whether using the inline of file mode, your python code may depend on some python dependencies. If the set of python dependencies already delivered with the Logisland framework is not sufficient, you can use the **dependencies.path** configuration property to give their location. Currently only the nltk python library is delivered with Logisland.
+ category: processing
+ module: com.hurence.logisland:logisland-processor-scripting:1.1.2
+ class: com.hurence.logisland.processor.scripting.python.RunPython
+ tags: [scripting, python]
+ - name: EvaluateXPath
+ description: Evaluates one or more XPaths against the content of a record. The results of those XPaths are assigned to new attributes in the records, depending on configuration of the Processor. XPaths are entered by adding user-defined properties; the name of the property maps to the Attribute Name into which the result will be placed. The value of the property must be a valid XPath expression. If the expression matches nothing, no attributes is added.
+ category: parsing
+ module: com.hurence.logisland:logisland-processor-xml:1.1.2
+ class: com.hurence.logisland.processor.xml.EvaluateXPath
+ tags: [XML, evaluate, XPath]
+ - name: ExcelExtract
+ description: Consumes a Microsoft Excel document and converts each worksheet's line to a structured record. The processor is assuming to receive raw excel file as input record.
+ category: parsing
+ module: com.hurence.logisland:logisland-processor-excel:1.1.2
+ class: com.hurence.logisland.processor.excel.ExcelExtract
+ tags: [excel, processor, poi]
+ - name: ParseGitlabLog
+ description: The Gitlab logs processor is the Logisland entry point to get and process `Gitlab `_ logs. This allows for instance to monitor activities in your Gitlab server. The expected input of this processor are records from the production_json.log log file of Gitlab which contains JSON records. You can for instance use the `kafkacat `_ command to inject those logs into kafka and thus Logisland.
+ category: parsing
+ module: com.hurence.logisland:logisland-processor-common-logs:1.1.2
+ class: com.hurence.logisland.processor.commonlogs.gitlab.ParseGitlabLog
+ tags: [logs, gitlab]
+ - name: MatchIP
+ description: >
+ IP address Query matching (using `Luwak )`_
+
+ You can use this processor to handle custom events matching IP address (CIDR)
+ The record sent from a matching an IP address record is tagged appropriately.
+
+ A query is expressed as a lucene query against a field like for example:
+
+ .. code::
+
+ message:'bad exception'
+ error_count:[10 TO *]
+ bytes_out:5000
+ user_name:tom*
+
+ Please read the `Lucene syntax guide `_ for supported operations
+
+ .. warning::
+
+ don't forget to set numeric fields property to handle correctly numeric ranges queries
+ category: alerting
+ module: com.hurence.logisland:logisland-processor-querymatcher:1.1.2
+ class: com.hurence.logisland.processor.MatchIP
+ tags: [analytic, percolator, record, record, query, lucene]
+ - name: MatchQuery
+ description: >
+ Query matching based on `Luwak `_
+
+ you can use this processor to handle custom events defined by lucene queries
+ a new record is added to output each time a registered query is matched
+
+ A query is expressed as a lucene query against a field like for example:
+
+ .. code::
+
+ message:'bad exception'
+ error_count:[10 TO *]
+ bytes_out:5000
+ user_name:tom*
+
+ Please read the `Lucene syntax guide `_ for supported operations
+
+ .. warning::
+
+ don't forget to set numeric fields property to handle correctly numeric ranges queries
+ category: alerting
+ module: com.hurence.logisland:logisland-processor-querymatcher:1.1.2
+ class: com.hurence.logisland.processor.MatchQuery
+ tags: [analytic, percolator, record, record, query, lucene]
+ - name: MaxmindIpToGeoService
+ description: Implementation of the IP 2 GEO Service using maxmind lite db file
+ category: enrichment
+ module: com.hurence.logisland:logisland-service-ip-to-geo-maxmind:1.1.2
+ class: com.hurence.logisland.service.iptogeo.maxmind.MaxmindIpToGeoService
+ tags: [ip, service, geo, maxmind]
+ - name: CSVKeyValueCacheService
+ description: A cache that store csv lines as records loaded from a file
+ category: datastore
+ module: com.hurence.logisland:logisland-service-inmemory-cache:1.1.2
+ class: com.hurence.logisland.service.cache.CSVKeyValueCacheService
+ tags: [csv, service, cache]
+ - name: CassandraControllerService
+ description: Provides a controller service that for the moment only allows to bulkput records into cassandra.
+ category: datastore
+ module: com.hurence.logisland:logisland-service-cassandra-client:1.1.2
+ class: com.hurence.logisland.service.cassandra.CassandraControllerService
+ tags: [cassandra, service]
+ - name: Elasticsearch_2_4_0_ClientService
+ description: Implementation of ElasticsearchClientService for Elasticsearch 2.4.0.
+ category: datastore
+ module: com.hurence.logisland:logisland-service-elasticsearch_2_4_0-client:1.1.2
+ class: com.hurence.logisland.service.elasticsearch.Elasticsearch_2_4_0_ClientService
+ tags: [elasticsearch, client]
+ - name: Elasticsearch_5_4_0_ClientService
+ description: Implementation of ElasticsearchClientService for Elasticsearch 5.4.0.
+ category: datastore
+ module: com.hurence.logisland:logisland-service-elasticsearch_5_4_0-client:1.1.2
+ class: com.hurence.logisland.service.elasticsearch.Elasticsearch_5_4_0_ClientService
+ tags: [elasticsearch, client]
+ - name: HBase_1_1_2_ClientService
+ description: Implementation of HBaseClientService for HBase 1.1.2. This service can be configured by providing a comma-separated list of configuration files, or by specifying values for the other properties. If configuration files are provided, they will be loaded first, and the values of the additional properties will override the values from the configuration files. In addition, any user defined properties on the processor will also be passed to the HBase configuration.
+ category: datastore
+ module: com.hurence.logisland:logisland-service-hbase_1_1_2-client:1.1.2
+ class: com.hurence.logisland.service.hbase.HBase_1_1_2_ClientService
+ tags: [hbase, client]
+ - name: InfluxDBControllerService
+ description: Provides a controller service that for the moment only allows to bulkput records into influxdb.
+ category: datastore
+ module: com.hurence.logisland:logisland-service-influxdb-client:1.1.2
+ class: com.hurence.logisland.service.influxdb.InfluxDBControllerService
+ tags: [influxdb, service, time series]
+ - name: LRUKeyValueCacheService
+ description: A controller service for caching data by key value pair with LRU (last recently used) strategy. using LinkedHashMap
+ category: datastore
+ module: com.hurence.logisland:logisland-service-inmemory-cache:1.1.2
+ class: com.hurence.logisland.service.cache.LRUKeyValueCacheService
+ tags: [cache, service, key, value, pair, LRU]
+ - name: MongoDBControllerService
+ description: Provides a controller service that wraps most of the functionality of the MongoDB driver.
+ category: datastore
+ module: com.hurence.logisland:logisland-service-mongodb-client:1.1.2
+ class: com.hurence.logisland.service.mongodb.MongoDBControllerService
+ tags: [mongo, mongodb, service]
+ - name: RedisKeyValueCacheService
+ description: A controller service for caching records by key value pair with LRU (last recently used) strategy. using LinkedHashMap
+ category: datastore
+ module: com.hurence.logisland:logisland-service-redis:1.1.2
+ class: com.hurence.logisland.redis.service.RedisKeyValueCacheService
+ tags: [cache, service, key, value, pair, redis]
+ - name: Solr_5_5_5_ClientService
+ description: Implementation of ElasticsearchClientService for Solr 5.5.5.
+ category: datastore
+ module: com.hurence.logisland:logisland-service-solr_5_5_5-client:1.1.2
+ class: com.hurence.logisland.service.solr.Solr_5_5_5_ClientService
+ tags: [solr, client]
+ - name: Solr_6_4_2_ChronixClientService
+ description: Implementation of ChronixClientService for Solr 6 4 2
+ category: datastore
+ module: com.hurence.logisland:logisland-service-solr_chronix_6.4.2-client:1.1.2
+ class: com.hurence.logisland.service.solr.Solr_6_4_2_ChronixClientService
+ tags: [solr, client]
+ - name: Solr_6_6_2_ClientService
+ description: Implementation of ElasticsearchClientService for Solr 5.5.5.
+ category: datastore
+ module: com.hurence.logisland:logisland-service-solr_6_6_2-client:1.1.2
+ class: com.hurence.logisland.service.solr.Solr_6_6_2_ClientService
+ tags: [solr, client]
diff --git a/logisland-documentation/user/components/engines/components.yaml b/logisland-documentation/user/components/engines/components.yaml
new file mode 100644
index 000000000..b357cf598
--- /dev/null
+++ b/logisland-documentation/user/components/engines/components.yaml
@@ -0,0 +1,24 @@
+--- # document start
+
+categories:
+ - processing
+ - parsing
+ - datastore
+ - alerting
+ - security
+ - enrichment
+ - analytics
+ - timeseries
+extensions:
+ - name: AmqpClientPipelineStream
+ description: No description provided.
+ category: misc
+ class: com.hurence.logisland.engine.vanilla.stream.amqp.AmqpClientPipelineStream
+ - name: KafkaStreamsPipelineStream
+ description: No description provided.
+ category: misc
+ class: com.hurence.logisland.engine.vanilla.stream.kafka.KafkaStreamsPipelineStream
+ - name: PlainJavaEngine
+ description: No description provided.
+ category: misc
+ class: com.hurence.logisland.engine.vanilla.PlainJavaEngine
diff --git a/logisland-documentation/user/components/engines/engine-vanilla.yaml b/logisland-documentation/user/components/engines/engine-vanilla.yaml
new file mode 100644
index 000000000..1d3bf4711
--- /dev/null
+++ b/logisland-documentation/user/components/engines/engine-vanilla.yaml
@@ -0,0 +1,19 @@
+--- # document start
+
+categories:
+
+ - category: misc
+ extensions:
+
+ - name: AmqpClientPipelineStream
+ description: No description provided.
+ category: misc
+ class: com.hurence.logisland.engine.vanilla.stream.amqp.AmqpClientPipelineStream
+ - name: KafkaStreamsPipelineStream
+ description: No description provided.
+ category: misc
+ class: com.hurence.logisland.engine.vanilla.stream.kafka.KafkaStreamsPipelineStream
+ - name: PlainJavaEngine
+ description: No description provided.
+ category: misc
+ class: com.hurence.logisland.engine.vanilla.PlainJavaEngine
diff --git a/logisland-documentation/user/components/other-processors.rst b/logisland-documentation/user/components/other-processors.rst
index 24150f514..9cc1043d8 100644
--- a/logisland-documentation/user/components/other-processors.rst
+++ b/logisland-documentation/user/components/other-processors.rst
@@ -50,45 +50,6 @@ __________________
.. include:: ./details/ParseUserAgent-Detail.rst
----------
-.. _com.hurence.logisland.processor.elasticsearch.BulkAddElasticsearch:
-
-BulkAddElasticsearch
---------------------
-Indexes the content of a Record in Elasticsearch using elasticsearch's bulk processor
-
-Module
-______
-com.hurence.logisland:logisland-processor-elasticsearch:1.1.2
-
-Class
-_____
-com.hurence.logisland.processor.elasticsearch.BulkAddElasticsearch
-
-Tags
-____
-elasticsearch
-
-Properties
-__________
-In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values, and whether a property supports the `Expression Language `_ .
-
-.. csv-table:: allowable-values
- :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
- :widths: 20,60,30,20,10,10
- :escape: \
-
- "**elasticsearch.client.service**", "The instance of the Controller Service to use for accessing Elasticsearch.", "", "null", "false", "false"
- "**default.index**", "The name of the index to insert into", "", "null", "false", "**true**"
- "**default.type**", "The type of this document (used by Elasticsearch for indexing and searching)", "", "null", "false", "**true**"
- "**timebased.index**", "do we add a date suffix", "no (no date added to default index), today (today's date added to default index), yesterday (yesterday's date added to default index)", "no", "false", "false"
- "es.index.field", "the name of the event field containing es index name => will override index value if set", "", "null", "false", "false"
- "es.type.field", "the name of the event field containing es doc type => will override type value if set", "", "null", "false", "false"
-
-Extra informations
-__________________
-.. include:: ./details/BulkAddElasticsearch-Detail.rst
-----------
-
.. _com.hurence.logisland.processor.webAnalytics.ConsolidateSession:
ConsolidateSession
@@ -216,64 +177,112 @@ __________________
.. include:: ./details/DetectOutliers-Detail.rst
----------
-.. _com.hurence.logisland.processor.elasticsearch.EnrichRecordsElasticsearch:
+.. _com.hurence.logisland.processor.webAnalytics.IncrementalWebSession:
+
+IncrementalWebSession
+---------------------
+This processor creates and updates web-sessions based on incoming web-events. Note that both web-sessions and web-events are stored in elasticsearch.
+ Firstly, web-events are grouped by their session identifier and processed in chronological order.
+ Then each web-session associated to each group is retrieved from elasticsearch.
+ In case none exists yet then a new web session is created based on the first web event.
+ The following fields of the newly created web session are set based on the associated web event: session identifier, first timestamp, first visited page. Secondly, once created, or retrieved, the web session is updated by the remaining web-events.
+ Updates have impacts on fields of the web session such as event counter, last visited page, session duration, ...
+ Before updates are actually applied, checks are performed to detect rules that would trigger the creation of a new session:
+
+ the duration between the web session and the web event must not exceed the specified time-out,
+ the web session and the web event must have timestamps within the same day (at midnight a new web session is created),
+ source of traffic (campaign, ...) must be the same on the web session and the web event.
+
+ When a breaking rule is detected, a new web session is created with a new session identifier where as remaining web-events still have the original session identifier. The new session identifier is the original session suffixed with the character '#' followed with an incremented counter. This new session identifier is also set on the remaining web-events.
+ Finally when all web events were applied, all web events -potentially modified with a new session identifier- are save in elasticsearch. And web sessions are passed to the next processor.
+
+WebSession information are:
+- first and last visited page
+- first and last timestamp of processed event
+- total number of processed events
+- the userId
+- a boolean denoting if the web-session is still active or not
+- an integer denoting the duration of the web-sessions
+- optional fields that may be retrieved from the processed events
+
-EnrichRecordsElasticsearch
---------------------------
-Enrich input records with content indexed in elasticsearch using multiget queries.
-Each incoming record must be possibly enriched with information stored in elasticsearch.
-Each outcoming record holds at least the input record plus potentially one or more fields coming from of one elasticsearch document.
Module
______
-com.hurence.logisland:logisland-processor-elasticsearch:1.1.2
+com.hurence.logisland:logisland-processor-web-analytics:1.1.2
Class
_____
-com.hurence.logisland.processor.elasticsearch.EnrichRecordsElasticsearch
+com.hurence.logisland.processor.webAnalytics.IncrementalWebSession
Tags
____
-elasticsearch
+analytics, web, session
Properties
__________
-In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values, and whether a property supports the `Expression Language `_ .
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
.. csv-table:: allowable-values
:header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
:widths: 20,60,30,20,10,10
:escape: \
+ "debug", "Enable debug. If enabled, debug information are logged.", "", "false", "false", "false"
+ "**es.session.index.field**", "Name of the field in the record defining the ES index containing the web session documents.", "", "null", "false", "false"
+ "**es.session.type.name**", "Name of the ES type of web session documents.", "", "null", "false", "false"
+ "**es.event.index.prefix**", "Prefix of the index containing the web event documents.", "", "null", "false", "false"
+ "**es.event.type.name**", "Name of the ES type of web event documents.", "", "null", "false", "false"
+ "**es.mapping.event.to.session.index.name**", "Name of the ES index containing the mapping of web session documents.", "", "null", "false", "false"
+ "sessionid.field", "the name of the field containing the session id => will override default value if set", "", "sessionId", "false", "false"
+ "timestamp.field", "the name of the field containing the timestamp => will override default value if set", "", "h2kTimestamp", "false", "false"
+ "visitedpage.field", "the name of the field containing the visited page => will override default value if set", "", "location", "false", "false"
+ "userid.field", "the name of the field containing the userId => will override default value if set", "", "userId", "false", "false"
+ "fields.to.return", "the list of fields to return", "", "null", "false", "false"
+ "firstVisitedPage.out.field", "the name of the field containing the first visited page => will override default value if set", "", "firstVisitedPage", "false", "false"
+ "lastVisitedPage.out.field", "the name of the field containing the last visited page => will override default value if set", "", "lastVisitedPage", "false", "false"
+ "isSessionActive.out.field", "the name of the field stating whether the session is active or not => will override default value if set", "", "is_sessionActive", "false", "false"
+ "sessionDuration.out.field", "the name of the field containing the session duration => will override default value if set", "", "sessionDuration", "false", "false"
+ "sessionInactivityDuration.out.field", "the name of the field containing the session inactivity duration => will override default value if set", "", "sessionInactivityDuration", "false", "false"
+ "session.timeout", "session timeout in sec", "", "1800", "false", "false"
+ "eventsCounter.out.field", "the name of the field containing the session duration => will override default value if set", "", "eventsCounter", "false", "false"
+ "firstEventDateTime.out.field", "the name of the field containing the date of the first event => will override default value if set", "", "firstEventDateTime", "false", "false"
+ "lastEventDateTime.out.field", "the name of the field containing the date of the last event => will override default value if set", "", "lastEventDateTime", "false", "false"
+ "newSessionReason.out.field", "the name of the field containing the reason why a new session was created => will override default value if set", "", "reasonForNewSession", "false", "false"
+ "transactionIds.out.field", "the name of the field containing all transactionIds => will override default value if set", "", "transactionIds", "false", "false"
+ "source_of_traffic.suffix", "Prefix for the source of the traffic related fields", "", "source_of_traffic", "false", "false"
"**elasticsearch.client.service**", "The instance of the Controller Service to use for accessing Elasticsearch.", "", "null", "false", "false"
- "**record.key**", "The name of field in the input record containing the document id to use in ES multiget query", "", "null", "false", "**true**"
- "**es.index**", "The name of the ES index to use in multiget query. ", "", "null", "false", "**true**"
- "es.type", "The name of the ES type to use in multiget query.", "", "default", "false", "**true**"
- "es.includes.field", "The name of the ES fields to include in the record.", "", "*", "false", "**true**"
- "es.excludes.field", "The name of the ES fields to exclude.", "", "N/A", "false", "false"
Extra informations
__________________
-.. include:: ./details/EnrichRecordsElasticsearch-Detail.rst
+.. include:: ./details/IncrementalWebSession-Detail.rst
----------
-.. _com.hurence.logisland.processor.xml.EvaluateXPath:
+.. _com.hurence.logisland.processor.webAnalytics.SetSourceOfTraffic:
-EvaluateXPath
--------------
-Evaluates one or more XPaths against the content of a record. The results of those XPaths are assigned to new attributes in the records, depending on configuration of the Processor. XPaths are entered by adding user-defined properties; the name of the property maps to the Attribute Name into which the result will be placed. The value of the property must be a valid XPath expression. If the expression matches nothing, no attributes is added.
+SetSourceOfTraffic
+------------------
+Compute the source of traffic of a web session. Users arrive at a website or application through a variety of sources,
+including advertising/paying campaigns, search engines, social networks, referring sites or direct access.
+When analysing user experience on a webshop, it is crucial to collect, process, and report the campaign and traffic-source data.
+To compute the source of traffic of a web session, the user has to provide the utm_* related properties if available
+i-e: **utm_source.field**, **utm_medium.field**, **utm_campaign.field**, **utm_content.field**, **utm_term.field**)
+, the referer (**referer.field** property) and the first visited page of the session (**first.visited.page.field** property).
+By default the source of traffic information are placed in a flat structure (specified by the **source_of_traffic.suffix** property
+with a default value of source_of_traffic). To work properly the SetSourceOfTraffic processor needs to have access to an
+Elasticsearch index containing a list of the most popular search engines and social networks. The ES index (specified by the **es.index** property) should be structured such that the _id of an ES document MUST be the name of the domain. If the domain is a search engine, the related ES doc MUST have a boolean field (default being search_engine) specified by the property **es.search_engine.field** with a value set to true. If the domain is a social network , the related ES doc MUST have a boolean field (default being social_network) specified by the property **es.social_network.field** with a value set to true.
Module
______
-com.hurence.logisland:logisland-processor-xml:1.1.2
+com.hurence.logisland:logisland-processor-web-analytics:1.1.2
Class
_____
-com.hurence.logisland.processor.xml.EvaluateXPath
+com.hurence.logisland.processor.webAnalytics.SetSourceOfTraffic
Tags
____
-XML, evaluate, XPath
+session, traffic, source, web, analytics
Properties
__________
@@ -284,9 +293,59 @@ In the list below, the names of required properties appear in **bold**. Any othe
:widths: 20,60,30,20,10,10
:escape: \
- "**source**", "Indicates the attribute containing the xml data to evaluate xpath against.", "", "null", "false", "false"
- "**validate_dtd**", "Specifies whether or not the XML content should be validated against the DTD.", "true, false", "true", "false", "false"
- "conflict.resolution.policy", "What to do when a field with the same name already exists ?", "overwrite_existing (if field already exist), keep_only_old_field (keep only old field)", "keep_only_old_field", "false", "false"
+ "referer.field", "Name of the field containing the referer value in the session", "", "referer", "false", "false"
+ "first.visited.page.field", "Name of the field containing the first visited page in the session", "", "firstVisitedPage", "false", "false"
+ "utm_source.field", "Name of the field containing the utm_source value in the session", "", "utm_source", "false", "false"
+ "utm_medium.field", "Name of the field containing the utm_medium value in the session", "", "utm_medium", "false", "false"
+ "utm_campaign.field", "Name of the field containing the utm_campaign value in the session", "", "utm_campaign", "false", "false"
+ "utm_content.field", "Name of the field containing the utm_content value in the session", "", "utm_content", "false", "false"
+ "utm_term.field", "Name of the field containing the utm_term value in the session", "", "utm_term", "false", "false"
+ "source_of_traffic.suffix", "Suffix for the source of the traffic related fields", "", "source_of_traffic", "false", "false"
+ "source_of_traffic.hierarchical", "Should the additional source of trafic information fields be added under a hierarchical father field or not.", "", "false", "false", "false"
+ "**elasticsearch.client.service**", "The instance of the Controller Service to use for accessing Elasticsearch.", "", "null", "false", "false"
+ "**cache.service**", "Name of the cache service to use.", "", "null", "false", "false"
+ "cache.validity.timeout", "Timeout validity (in seconds) of an entry in the cache.", "", "0", "false", "false"
+ "debug", "If true, an additional debug field is added. If the source info fields prefix is X, a debug field named X_from_cache contains a boolean value to indicate the origin of the source fields. The default value for this property is false (debug is disabled).", "", "false", "false", "false"
+ "**es.index**", "Name of the ES index containing the list of search engines and social network. ", "", "null", "false", "false"
+ "es.type", "Name of the ES type to use.", "", "default", "false", "false"
+ "es.search_engine.field", "Name of the ES field used to specify that the domain is a search engine.", "", "search_engine", "false", "false"
+ "es.social_network.field", "Name of the ES field used to specify that the domain is a social network.", "", "social_network", "false", "false"
+
+Extra informations
+__________________
+.. include:: ./details/SetSourceOfTraffic-Detail.rst
+----------
+
+.. _com.hurence.logisland.processor.webAnalytics.URLDecoder:
+
+URLDecoder
+----------
+Decode one or more field containing an URL with possibly special chars encoded
+...
+
+Module
+______
+com.hurence.logisland:logisland-processor-web-analytics:1.1.2
+
+Class
+_____
+com.hurence.logisland.processor.webAnalytics.URLDecoder
+
+Tags
+____
+record, fields, Decode
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
+
+.. csv-table:: allowable-values
+ :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+ :widths: 20,60,30,20,10,10
+ :escape: \
+
+ "**decode.fields**", "List of fields (URL) to decode", "", "null", "false", "false"
+ "charset", "Charset to use to decode the URL", "", "UTF-8", "false", "false"
Dynamic Properties
__________________
@@ -297,96 +356,4675 @@ Dynamic Properties allow the user to specify both the name and value of a proper
:widths: 20,20,40,40,20,10
:escape: \
- "An attribute", "An XPath expression", " the attribute is set to the result of the XPath Expression.", "", "null", false
+ "fields to decode", "a default value", "Decode one or more fields from the record ", "", "null", false
Extra informations
__________________
-.. include:: ./details/EvaluateXPath-Detail.rst
+.. include:: ./details/URLDecoder-Detail.rst
----------
-.. _com.hurence.logisland.processor.excel.ExcelExtract:
+.. _com.hurence.logisland.processor.elasticsearch.EnrichRecordsElasticsearch:
-ExcelExtract
-------------
-Consumes a Microsoft Excel document and converts each worksheet's line to a structured record. The processor is assuming to receive raw excel file as input record.
+EnrichRecordsElasticsearch
+--------------------------
+Enrich input records with content indexed in elasticsearch using multiget queries.
+Each incoming record must be possibly enriched with information stored in elasticsearch.
+Each outcoming record holds at least the input record plus potentially one or more fields coming from of one elasticsearch document.
Module
______
-com.hurence.logisland:logisland-processor-excel:1.1.2
+com.hurence.logisland:logisland-processor-elasticsearch:1.1.2
Class
_____
-com.hurence.logisland.processor.excel.ExcelExtract
+com.hurence.logisland.processor.elasticsearch.EnrichRecordsElasticsearch
Tags
____
-excel, processor, poi
+elasticsearch
Properties
__________
-In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values, and whether a property supports the `Expression Language `_ .
.. csv-table:: allowable-values
:header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
:widths: 20,60,30,20,10,10
:escape: \
- "sheets", "Comma separated list of Excel document sheet names that should be extracted from the excel document. If this property is left blank then all of the sheets will be extracted from the Excel document. You can specify regular expressions. Any sheets not specified in this value will be ignored.", "", "", "false", "false"
- "skip.columns", "Comma delimited list of column numbers to skip. Use the columns number and not the letter designation. Use this to skip over columns anywhere in your worksheet that you don't want extracted as part of the record.", "", "", "false", "false"
- "field.names", "The comma separated list representing the names of columns of extracted cells. Order matters! You should use either field.names either field.row.header but not both together.", "", "null", "false", "false"
- "skip.rows", "The row number of the first row to start processing.Use this to skip over rows of data at the top of your worksheet that are not part of the dataset.Empty rows of data anywhere in the spreadsheet will always be skipped, no matter what this value is set to.", "", "0", "false", "false"
- "record.type", "Default type of record", "", "excel_record", "false", "false"
- "field.row.header", "If set, field names mapping will be extracted from the specified row number. You should use either field.names either field.row.header but not both together.", "", "null", "false", "false"
+ "**elasticsearch.client.service**", "The instance of the Controller Service to use for accessing Elasticsearch.", "", "null", "false", "false"
+ "**record.key**", "The name of field in the input record containing the document id to use in ES multiget query", "", "null", "false", "**true**"
+ "**es.index**", "The name of the ES index to use in multiget query. ", "", "null", "false", "**true**"
+ "es.type", "The name of the ES type to use in multiget query.", "", "default", "false", "**true**"
+ "es.includes.field", "The name of the ES fields to include in the record.", "", "*", "false", "**true**"
+ "es.excludes.field", "The name of the ES fields to exclude.", "", "N/A", "false", "false"
Extra informations
__________________
-.. include:: ./details/ExcelExtract-Detail.rst
+.. include:: ./details/EnrichRecordsElasticsearch-Detail.rst
----------
-.. _com.hurence.logisland.processor.hbase.FetchHBaseRow:
+.. _com.hurence.logisland.processor.enrichment.IpToFqdn:
-FetchHBaseRow
--------------
-Fetches a row from an HBase table. The Destination property controls whether the cells are added as flow file attributes, or the row is written to the flow file content as JSON. This processor may be used to fetch a fixed row on a interval by specifying the table and row id directly in the processor, or it may be used to dynamically fetch rows by referencing the table and row id from incoming flow files.
+IpToFqdn
+--------
+Translates an IP address into a FQDN (Fully Qualified Domain Name). An input field from the record has the IP as value. An new field is created and its value is the FQDN matching the IP address. The resolution mechanism is based on the underlying operating system. The resolution request may take some time, specially if the IP address cannot be translated into a FQDN. For these reasons this processor relies on the logisland cache service so that once a resolution occurs or not, the result is put into the cache. That way, the real request for the same IP is not re-triggered during a certain period of time, until the cache entry expires. This timeout is configurable but by default a request for the same IP is not triggered before 24 hours to let the time to the underlying DNS system to be potentially updated.
Module
______
-com.hurence.logisland:logisland-processor-hbase:1.1.2
+com.hurence.logisland:logisland-processor-enrichment:1.1.2
Class
_____
-com.hurence.logisland.processor.hbase.FetchHBaseRow
+com.hurence.logisland.processor.enrichment.IpToFqdn
Tags
____
-hbase, scan, fetch, get, enrich
+dns, ip, fqdn, domain, address, fqhn, reverse, resolution, enrich
Properties
__________
-In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values, and whether a property supports the `Expression Language `_ .
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
.. csv-table:: allowable-values
:header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
:widths: 20,60,30,20,10,10
:escape: \
- "**hbase.client.service**", "The instance of the Controller Service to use for accessing HBase.", "", "null", "false", "false"
- "**table.name.field**", "The field containing the name of the HBase Table to fetch from.", "", "null", "false", "**true**"
- "**row.identifier.field**", "The field containing the identifier of the row to fetch.", "", "null", "false", "**true**"
- "columns.field", "The field containing an optional comma-separated list of \"\":\"\" pairs to fetch. To return all columns for a given family, leave off the qualifier such as \"\",\"\".", "", "null", "false", "**true**"
- "record.serializer", "the serializer needed to i/o the record in the HBase row", "com.hurence.logisland.serializer.KryoSerializer (serialize events as json blocs), com.hurence.logisland.serializer.JsonSerializer (serialize events as json blocs), com.hurence.logisland.serializer.AvroSerializer (serialize events as avro blocs), none (send events as bytes)", "com.hurence.logisland.serializer.KryoSerializer", "false", "false"
- "record.schema", "the avro schema definition for the Avro serialization", "", "null", "false", "false"
- "table.name.default", "The table to use if table name field is not set", "", "null", "false", "false"
+ "**ip.address.field**", "The name of the field containing the ip address to use.", "", "null", "false", "false"
+ "**fqdn.field**", "The field that will contain the full qualified domain name corresponding to the ip address.", "", "null", "false", "false"
+ "overwrite.fqdn.field", "If the field should be overwritten when it already exists.", "", "false", "false", "false"
+ "**cache.service**", "The name of the cache service to use.", "", "null", "false", "false"
+ "cache.max.time", "The amount of time, in seconds, for which a cached FQDN value is valid in the cache service. After this delay, the next new request to translate the same IP into FQDN will trigger a new reverse DNS request and the result will overwrite the entry in the cache. This allows two things: if the IP was not resolved into a FQDN, this will get a chance to obtain a FQDN if the DNS system has been updated, if the IP is resolved into a FQDN, this will allow to be more accurate if the DNS system has been updated. A value of 0 seconds disables this expiration mechanism. The default value is 84600 seconds, which corresponds to new requests triggered every day if a record with the same IP passes every day in the processor.", "", "84600", "false", "false"
+ "resolution.timeout", "The amount of time, in milliseconds, to wait at most for the resolution to occur. This avoids to block the stream for too much time. Default value is 1000ms. If the delay expires and no resolution could occur before, the FQDN field is not created. A special value of 0 disables the logisland timeout and the resolution request may last for many seconds if the IP cannot be translated into a FQDN by the underlying operating system. In any case, whether the timeout occurs in logisland of in the operating system, the fact that a timeout occurs is kept in the cache system so that a resolution request for the same IP will not occur before the cache entry expires.", "", "1000", "false", "false"
+ "debug", "If true, some additional debug fields are added. If the FQDN field is named X, a debug field named X_os_resolution_time_ms contains the resolution time in ms (using the operating system, not the cache). This field is added whether the resolution occurs or time is out. A debug field named X_os_resolution_timeout contains a boolean value to indicate if the timeout occurred. Finally, a debug field named X_from_cache contains a boolean value to indicate the origin of the FQDN field. The default value for this property is false (debug is disabled.", "", "false", "false", "false"
Extra informations
__________________
-.. include:: ./details/FetchHBaseRow-Detail.rst
+.. include:: ./details/IpToFqdn-Detail.rst
----------
-.. _com.hurence.logisland.processor.webAnalytics.IncrementalWebSession:
+.. _com.hurence.logisland.processor.enrichment.IpToGeo:
-IncrementalWebSession
----------------------
+IpToGeo
+-------
+Looks up geolocation information for an IP address. The attribute that contains the IP address to lookup must be provided in the **ip.address.field** property. By default, the geo information are put in a hierarchical structure. That is, if the name of the IP field is 'X', then the the geo attributes added by enrichment are added under a father field named X_geo. "_geo" is the default hierarchical suffix that may be changed with the **geo.hierarchical.suffix** property. If one wants to put the geo fields at the same level as the IP field, then the **geo.hierarchical** property should be set to false and then the geo attributes are created at the same level as him with the naming pattern X_geo_. "_geo_" is the default flat suffix but this may be changed with the **geo.flat.suffix** property. The IpToGeo processor requires a reference to an Ip to Geo service. This must be defined in the **iptogeo.service** property. The added geo fields are dependant on the underlying Ip to Geo service. The **geo.fields** property must contain the list of geo fields that should be created if data is available for the IP to resolve. This property defaults to "*" which means to add every available fields. If one only wants a subset of the fields, one must define a comma separated list of fields as a value for the **geo.fields** property. The list of the available geo fields is in the description of the **geo.fields** property.
+
+Module
+______
+com.hurence.logisland:logisland-processor-enrichment:1.1.2
+
+Class
+_____
+com.hurence.logisland.processor.enrichment.IpToGeo
+
+Tags
+____
+geo, enrich, ip
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
+
+.. csv-table:: allowable-values
+ :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+ :widths: 20,60,30,20,10,10
+ :escape: \
+
+ "**ip.address.field**", "The name of the field containing the ip address to use.", "", "null", "false", "false"
+ "**iptogeo.service**", "The reference to the IP to Geo service to use.", "", "null", "false", "false"
+ "geo.fields", "Comma separated list of geo information fields to add to the record. Defaults to '*', which means to include all available fields. If a list of fields is specified and the data is not available, the geo field is not created. The geo fields are dependant on the underlying defined Ip to Geo service. The currently only supported type of Ip to Geo service is the Maxmind Ip to Geo service. This means that the currently supported list of geo fields is the following:**continent**: the identified continent for this IP address. **continent_code**: the identified continent code for this IP address. **city**: the identified city for this IP address. **latitude**: the identified latitude for this IP address. **longitude**: the identified longitude for this IP address. **location**: the identified location for this IP address, defined as Geo-point expressed as a string with the format: 'latitude,longitude'. **accuracy_radius**: the approximate accuracy radius, in kilometers, around the latitude and longitude for the location. **time_zone**: the identified time zone for this IP address. **subdivision_N**: the identified subdivision for this IP address. N is a one-up number at the end of the attribute name, starting with 0. **subdivision_isocode_N**: the iso code matching the identified subdivision_N. **country**: the identified country for this IP address. **country_isocode**: the iso code for the identified country for this IP address. **postalcode**: the identified postal code for this IP address. **lookup_micros**: the number of microseconds that the geo lookup took. The Ip to Geo service must have the lookup_micros property enabled in order to have this field available.", "", "*", "false", "false"
+ "geo.hierarchical", "Should the additional geo information fields be added under a hierarchical father field or not.", "", "true", "false", "false"
+ "geo.hierarchical.suffix", "Suffix to use for the field holding geo information. If geo.hierarchical is true, then use this suffix appended to the IP field name to define the father field name. This may be used for instance to distinguish between geo fields with various locales using many Ip to Geo service instances.", "", "_geo", "false", "false"
+ "geo.flat.suffix", "Suffix to use for geo information fields when they are flat. If geo.hierarchical is false, then use this suffix appended to the IP field name but before the geo field name. This may be used for instance to distinguish between geo fields with various locales using many Ip to Geo service instances.", "", "_geo_", "false", "false"
+ "**cache.service**", "The name of the cache service to use.", "", "null", "false", "false"
+ "debug", "If true, an additional debug field is added. If the geo info fields prefix is X, a debug field named X_from_cache contains a boolean value to indicate the origin of the geo fields. The default value for this property is false (debug is disabled).", "", "false", "false", "false"
+
+Extra informations
+__________________
+.. include:: ./details/IpToGeo-Detail.rst
+----------
+
+.. _com.hurence.logisland.processor.bro.ParseBroEvent:
+
+ParseBroEvent
+-------------
+The ParseBroEvent processor is the Logisland entry point to get and process `Bro `_ events. The `Bro-Kafka plugin `_ should be used and configured in order to have Bro events sent to Kafka. See the `Bro/Logisland tutorial `_ for an example of usage for this processor. The ParseBroEvent processor does some minor pre-processing on incoming Bro events from the Bro-Kafka plugin to adapt them to Logisland.
+
+Basically the events coming from the Bro-Kafka plugin are JSON documents with a first level field indicating the type of the event. The ParseBroEvent processor takes the incoming JSON document, sets the event type in a record_type field and sets the original sub-fields of the JSON event as first level fields in the record. Also any dot in a field name is transformed into an underscore. Thus, for instance, the field id.orig_h becomes id_orig_h. The next processors in the stream can then process the Bro events generated by this ParseBroEvent processor.
+
+As an example here is an incoming event from Bro:
+
+{
+
+ "conn": {
+
+ "id.resp_p": 9092,
+
+ "resp_pkts": 0,
+
+ "resp_ip_bytes": 0,
+
+ "local_orig": true,
+
+ "orig_ip_bytes": 0,
+
+ "orig_pkts": 0,
+
+ "missed_bytes": 0,
+
+ "history": "Cc",
+
+ "tunnel_parents": [],
+
+ "id.orig_p": 56762,
+
+ "local_resp": true,
+
+ "uid": "Ct3Ms01I3Yc6pmMZx7",
+
+ "conn_state": "OTH",
+
+ "id.orig_h": "172.17.0.2",
+
+ "proto": "tcp",
+
+ "id.resp_h": "172.17.0.3",
+
+ "ts": 1487596886.953917
+
+ }
+
+ }
+
+It gets processed and transformed into the following Logisland record by the ParseBroEvent processor:
+
+"@timestamp": "2017-02-20T13:36:32Z"
+
+"record_id": "6361f80a-c5c9-4a16-9045-4bb51736333d"
+
+"record_time": 1487597792782
+
+"record_type": "conn"
+
+"id_resp_p": 9092
+
+"resp_pkts": 0
+
+"resp_ip_bytes": 0
+
+"local_orig": true
+
+"orig_ip_bytes": 0
+
+"orig_pkts": 0
+
+"missed_bytes": 0
+
+"history": "Cc"
+
+"tunnel_parents": []
+
+"id_orig_p": 56762
+
+"local_resp": true
+
+"uid": "Ct3Ms01I3Yc6pmMZx7"
+
+"conn_state": "OTH"
+
+"id_orig_h": "172.17.0.2"
+
+"proto": "tcp"
+
+"id_resp_h": "172.17.0.3"
+
+"ts": 1487596886.953917
+
+Module
+______
+com.hurence.logisland:logisland-processor-cyber-security:1.1.2
+
+Class
+_____
+com.hurence.logisland.processor.bro.ParseBroEvent
+
+Tags
+____
+bro, security, IDS, NIDS
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
+
+.. csv-table:: allowable-values
+ :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+ :widths: 20,60,30,20,10,10
+ :escape: \
+
+ "debug", "Enable debug. If enabled, the original JSON string is embedded in the record_value field of the record.", "", "false", "false", "false"
+
+Extra informations
+__________________
+.. include:: ./details/ParseBroEvent-Detail.rst
+----------
+
+.. _com.hurence.logisland.processor.netflow.ParseNetflowEvent:
+
+ParseNetflowEvent
+-----------------
+The `Netflow V5 `_ processor is the Logisland entry point to process Netflow (V5) events. NetFlow is a feature introduced on Cisco routers that provides the ability to collect IP network traffic.We can distinguish 2 components:
+
+ - Flow exporter: aggregates packets into flows and exports flow records (binary format) towards one or more flow collectors
+
+ - Flow collector: responsible for reception, storage and pre-processing of flow data received from a flow exporter
+
+The collected data are then available for analysis purpose (intrusion detection, traffic analysis...)
+Netflow are sent to kafka in order to be processed by logisland.
+In the tutorial we will simulate Netflow traffic using `nfgen `_. this traffic will be sent to port 2055. The we rely on nifi to listen of that port for incoming netflow (V5) traffic and send them to a kafka topic. The Netflow processor could thus treat these events and generate corresponding logisland records. The following processors in the stream can then process the Netflow records generated by this processor.
+
+Module
+______
+com.hurence.logisland:logisland-processor-cyber-security:1.1.2
+
+Class
+_____
+com.hurence.logisland.processor.netflow.ParseNetflowEvent
+
+Tags
+____
+netflow, security
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
+
+.. csv-table:: allowable-values
+ :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+ :widths: 20,60,30,20,10,10
+ :escape: \
+
+ "debug", "Enable debug. If enabled, the original JSON string is embedded in the record_value field of the record.", "", "false", "false", "false"
+ "output.record.type", "the output type of the record", "", "netflowevent", "false", "false"
+ "enrich.record", "Enrich data. If enabledthe netflow record is enriched with inferred data", "", "false", "false", "false"
+
+Extra informations
+__________________
+.. include:: ./details/ParseNetflowEvent-Detail.rst
+----------
+
+.. _com.hurence.logisland.processor.networkpacket.ParseNetworkPacket:
+
+ParseNetworkPacket
+------------------
+The ParseNetworkPacket processor is the LogIsland entry point to parse network packets captured either off-the-wire (stream mode) or in pcap format (batch mode). In batch mode, the processor decodes the bytes of the incoming pcap record, where a Global header followed by a sequence of [packet header, packet data] pairs are stored. Then, each incoming pcap event is parsed into n packet records. The fields of packet headers are then extracted and made available in dedicated record fields. See the `Capturing Network packets tutorial `_ for an example of usage of this processor.
+
+Module
+______
+com.hurence.logisland:logisland-processor-cyber-security:1.1.2
+
+Class
+_____
+com.hurence.logisland.processor.networkpacket.ParseNetworkPacket
+
+Tags
+____
+PCap, security, IDS, NIDS
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
+
+.. csv-table:: allowable-values
+ :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+ :widths: 20,60,30,20,10,10
+ :escape: \
+
+ "debug", "Enable debug.", "", "false", "false", "false"
+ "**flow.mode**", "Flow Mode. Indicate whether packets are provided in batch mode (via pcap files) or in stream mode (without headers). Allowed values are batch and stream.", "batch, stream", "null", "false", "false"
+
+Extra informations
+__________________
+No additional information is provided
+
+----------
+
+.. _com.hurence.logisland.processor.SampleRecords:
+
+SampleRecords
+-------------
+Query matching based on `Luwak `_
+
+you can use this processor to handle custom events defined by lucene queries
+a new record is added to output each time a registered query is matched
+
+A query is expressed as a lucene query against a field like for example:
+
+.. code::
+
+ message:'bad exception'
+ error_count:[10 TO *]
+ bytes_out:5000
+ user_name:tom*
+
+Please read the `Lucene syntax guide `_ for supported operations
+
+.. warning::
+ don't forget to set numeric fields property to handle correctly numeric ranges queries
+
+Module
+______
+com.hurence.logisland:logisland-processor-sampling:1.1.2
+
+Class
+_____
+com.hurence.logisland.processor.SampleRecords
+
+Tags
+____
+analytic, sampler, record, iot, timeseries
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
+
+.. csv-table:: allowable-values
+ :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+ :widths: 20,60,30,20,10,10
+ :escape: \
+
+ "record.value.field", "the name of the numeric field to sample", "", "record_value", "false", "false"
+ "record.time.field", "the name of the time field to sample", "", "record_time", "false", "false"
+ "**sampling.algorithm**", "the implementation of the algorithm", "none, lttb, average, first_item, min_max, mode_median", "null", "false", "false"
+ "**sampling.parameter**", "the parmater of the algorithm", "", "null", "false", "false"
+
+Extra informations
+__________________
+.. include:: ./details/SampleRecords-Detail.rst
+----------
+
+.. _com.hurence.logisland.processor.elasticsearch.BulkAddElasticsearch:
+
+BulkAddElasticsearch
+--------------------
+Indexes the content of a Record in Elasticsearch using elasticsearch's bulk processor
+
+Module
+______
+com.hurence.logisland:logisland-processor-elasticsearch:1.1.2
+
+Class
+_____
+com.hurence.logisland.processor.elasticsearch.BulkAddElasticsearch
+
+Tags
+____
+elasticsearch
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values, and whether a property supports the `Expression Language `_ .
+
+.. csv-table:: allowable-values
+ :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+ :widths: 20,60,30,20,10,10
+ :escape: \
+
+ "**elasticsearch.client.service**", "The instance of the Controller Service to use for accessing Elasticsearch.", "", "null", "false", "false"
+ "**default.index**", "The name of the index to insert into", "", "null", "false", "**true**"
+ "**default.type**", "The type of this document (used by Elasticsearch for indexing and searching)", "", "null", "false", "**true**"
+ "**timebased.index**", "do we add a date suffix", "no (no date added to default index), today (today's date added to default index), yesterday (yesterday's date added to default index)", "no", "false", "false"
+ "es.index.field", "the name of the event field containing es index name => will override index value if set", "", "null", "false", "false"
+ "es.type.field", "the name of the event field containing es doc type => will override type value if set", "", "null", "false", "false"
+
+Extra informations
+__________________
+.. include:: ./details/BulkAddElasticsearch-Detail.rst
+----------
+
+.. _com.hurence.logisland.processor.hbase.FetchHBaseRow:
+
+FetchHBaseRow
+-------------
+Fetches a row from an HBase table. The Destination property controls whether the cells are added as flow file attributes, or the row is written to the flow file content as JSON. This processor may be used to fetch a fixed row on a interval by specifying the table and row id directly in the processor, or it may be used to dynamically fetch rows by referencing the table and row id from incoming flow files.
+
+Module
+______
+com.hurence.logisland:logisland-processor-hbase:1.1.2
+
+Class
+_____
+com.hurence.logisland.processor.hbase.FetchHBaseRow
+
+Tags
+____
+hbase, scan, fetch, get, enrich
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values, and whether a property supports the `Expression Language `_ .
+
+.. csv-table:: allowable-values
+ :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+ :widths: 20,60,30,20,10,10
+ :escape: \
+
+ "**hbase.client.service**", "The instance of the Controller Service to use for accessing HBase.", "", "null", "false", "false"
+ "**table.name.field**", "The field containing the name of the HBase Table to fetch from.", "", "null", "false", "**true**"
+ "**row.identifier.field**", "The field containing the identifier of the row to fetch.", "", "null", "false", "**true**"
+ "columns.field", "The field containing an optional comma-separated list of \"\":\"\" pairs to fetch. To return all columns for a given family, leave off the qualifier such as \"\",\"\".", "", "null", "false", "**true**"
+ "record.serializer", "the serializer needed to i/o the record in the HBase row", "com.hurence.logisland.serializer.KryoSerializer (serialize events as json blocs), com.hurence.logisland.serializer.JsonSerializer (serialize events as json blocs), com.hurence.logisland.serializer.AvroSerializer (serialize events as avro blocs), none (send events as bytes)", "com.hurence.logisland.serializer.KryoSerializer", "false", "false"
+ "record.schema", "the avro schema definition for the Avro serialization", "", "null", "false", "false"
+ "table.name.default", "The table to use if table name field is not set", "", "null", "false", "false"
+
+Extra informations
+__________________
+.. include:: ./details/FetchHBaseRow-Detail.rst
+----------
+
+.. _com.hurence.logisland.processor.elasticsearch.MultiGetElasticsearch:
+
+MultiGetElasticsearch
+---------------------
+Retrieves a content indexed in elasticsearch using elasticsearch multiget queries.
+Each incoming record contains information regarding the elasticsearch multiget query that will be performed. This information is stored in record fields whose names are configured in the plugin properties (see below) :
+
+ - index (String) : name of the elasticsearch index on which the multiget query will be performed. This field is mandatory and should not be empty, otherwise an error output record is sent for this specific incoming record.
+ - type (String) : name of the elasticsearch type on which the multiget query will be performed. This field is not mandatory.
+ - ids (String) : comma separated list of document ids to fetch. This field is mandatory and should not be empty, otherwise an error output record is sent for this specific incoming record.
+ - includes (String) : comma separated list of patterns to filter in (include) fields to retrieve. Supports wildcards. This field is not mandatory.
+ - excludes (String) : comma separated list of patterns to filter out (exclude) fields to retrieve. Supports wildcards. This field is not mandatory.
+
+Each outcoming record holds data of one elasticsearch retrieved document. This data is stored in these fields :
+
+ - index (same field name as the incoming record) : name of the elasticsearch index.
+ - type (same field name as the incoming record) : name of the elasticsearch type.
+ - id (same field name as the incoming record) : retrieved document id.
+ - a list of String fields containing :
+
+ * field name : the retrieved field name
+ * field value : the retrieved field value
+
+Module
+______
+com.hurence.logisland:logisland-processor-elasticsearch:1.1.2
+
+Class
+_____
+com.hurence.logisland.processor.elasticsearch.MultiGetElasticsearch
+
+Tags
+____
+elasticsearch
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
+
+.. csv-table:: allowable-values
+ :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+ :widths: 20,60,30,20,10,10
+ :escape: \
+
+ "**elasticsearch.client.service**", "The instance of the Controller Service to use for accessing Elasticsearch.", "", "null", "false", "false"
+ "**es.index.field**", "the name of the incoming records field containing es index name to use in multiget query. ", "", "null", "false", "false"
+ "**es.type.field**", "the name of the incoming records field containing es type name to use in multiget query", "", "null", "false", "false"
+ "**es.ids.field**", "the name of the incoming records field containing es document Ids to use in multiget query", "", "null", "false", "false"
+ "**es.includes.field**", "the name of the incoming records field containing es includes to use in multiget query", "", "null", "false", "false"
+ "**es.excludes.field**", "the name of the incoming records field containing es excludes to use in multiget query", "", "null", "false", "false"
+
+Extra informations
+__________________
+.. include:: ./details/MultiGetElasticsearch-Detail.rst
+----------
+
+.. _com.hurence.logisland.processor.hbase.PutHBaseCell:
+
+PutHBaseCell
+------------
+Adds the Contents of a Record to HBase as the value of a single cell
+
+Module
+______
+com.hurence.logisland:logisland-processor-hbase:1.1.2
+
+Class
+_____
+com.hurence.logisland.processor.hbase.PutHBaseCell
+
+Tags
+____
+hadoop, hbase
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values, and whether a property supports the `Expression Language `_ .
+
+.. csv-table:: allowable-values
+ :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+ :widths: 20,60,30,20,10,10
+ :escape: \
+
+ "**hbase.client.service**", "The instance of the Controller Service to use for accessing HBase.", "", "null", "false", "false"
+ "**table.name.field**", "The field containing the name of the HBase Table to put data into", "", "null", "false", "**true**"
+ "row.identifier.field", "Specifies field containing the Row ID to use when inserting data into HBase", "", "null", "false", "**true**"
+ "row.identifier.encoding.strategy", "Specifies the data type of Row ID used when inserting data into HBase. The default behavior is to convert the row id to a UTF-8 byte array. Choosing Binary will convert a binary formatted string to the correct byte[] representation. The Binary option should be used if you are using Binary row keys in HBase", "String (Stores the value of row id as a UTF-8 String.), Binary (Stores the value of the rows id as a binary byte array. It expects that the row id is a binary formatted string.)", "String", "false", "false"
+ "**column.family.field**", "The field containing the Column Family to use when inserting data into HBase", "", "null", "false", "**true**"
+ "**column.qualifier.field**", "The field containing the Column Qualifier to use when inserting data into HBase", "", "null", "false", "**true**"
+ "**batch.size**", "The maximum number of Records to process in a single execution. The Records will be grouped by table, and a single Put per table will be performed.", "", "25", "false", "false"
+ "record.schema", "the avro schema definition for the Avro serialization", "", "null", "false", "false"
+ "record.serializer", "the serializer needed to i/o the record in the HBase row", "com.hurence.logisland.serializer.KryoSerializer (serialize events as json blocs), com.hurence.logisland.serializer.JsonSerializer (serialize events as json blocs), com.hurence.logisland.serializer.AvroSerializer (serialize events as avro blocs), none (send events as bytes)", "com.hurence.logisland.serializer.KryoSerializer", "false", "false"
+ "table.name.default", "The table table to use if table name field is not set", "", "null", "false", "false"
+ "column.family.default", "The column family to use if column family field is not set", "", "null", "false", "false"
+ "column.qualifier.default", "The column qualifier to use if column qualifier field is not set", "", "null", "false", "false"
+
+Extra informations
+__________________
+.. include:: ./details/PutHBaseCell-Detail.rst
+----------
+
+.. _com.hurence.logisland.processor.scripting.python.RunPython:
+
+RunPython
+---------
+ !!!! WARNING !!!!
+
+The RunPython processor is currently an experimental feature : it is delivered as is, with the current set of features and is subject to modifications in API or anything else in further logisland releases without warnings. There is no tutorial yet. If you want to play with this processor, use the python-processing.yml example and send the apache logs of the index apache logs tutorial. The debug stream processor at the end of the stream should output events in stderr file of the executors from the spark console.
+
+This processor allows to implement and run a processor written in python. This can be done in 2 ways. Either directly defining the process method code in the **script.code.process** configuration property or poiting to an external python module script file in the **script.path** configuration property. Directly defining methods is called the inline mode whereas using a script file is called the file mode. Both ways are mutually exclusive. Whether using the inline of file mode, your python code may depend on some python dependencies. If the set of python dependencies already delivered with the Logisland framework is not sufficient, you can use the **dependencies.path** configuration property to give their location. Currently only the nltk python library is delivered with Logisland.
+
+Module
+______
+com.hurence.logisland:logisland-processor-scripting:1.1.2
+
+Class
+_____
+com.hurence.logisland.processor.scripting.python.RunPython
+
+Tags
+____
+scripting, python
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
+
+.. csv-table:: allowable-values
+ :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+ :widths: 20,60,30,20,10,10
+ :escape: \
+
+ "script.code.imports", "For inline mode only. This is the python code that should hold the import statements if required.", "", "null", "false", "false"
+ "script.code.init", "The python code to be called when the processor is initialized. This is the python equivalent of the init method code for a java processor. This is not mandatory but can only be used if **script.code.process** is defined (inline mode).", "", "null", "false", "false"
+ "script.code.process", "The python code to be called to process the records. This is the pyhton equivalent of the process method code for a java processor. For inline mode, this is the only minimum required configuration property. Using this property, you may also optionally define the **script.code.init** and **script.code.imports** properties.", "", "null", "false", "false"
+ "script.path", "The path to the user's python processor script. Use this property for file mode. Your python code must be in a python file with the following constraints: let's say your pyhton script is named MyProcessor.py. Then MyProcessor.py is a module file that must contain a class named MyProcessor which must inherits from the Logisland delivered class named AbstractProcessor. You can then define your code in the process method and in the other traditional methods (init...) as you would do in java in a class inheriting from the AbstractProcessor java class.", "", "null", "false", "false"
+ "dependencies.path", "The path to the additional dependencies for the user's python code, whether using inline or file mode. This is optional as your code may not have additional dependencies. If you defined **script.path** (so using file mode) and if **dependencies.path** is not defined, Logisland will scan a potential directory named **dependencies** in the same directory where the script file resides and if it exists, any python code located there will be loaded as dependency as needed.", "", "null", "false", "false"
+ "logisland.dependencies.path", "The path to the directory containing the python dependencies shipped with logisland. You should not have to tune this parameter.", "", "null", "false", "false"
+
+Extra informations
+__________________
+.. include:: ./details/RunPython-Detail.rst
+----------
+
+.. _com.hurence.logisland.processor.xml.EvaluateXPath:
+
+EvaluateXPath
+-------------
+Evaluates one or more XPaths against the content of a record. The results of those XPaths are assigned to new attributes in the records, depending on configuration of the Processor. XPaths are entered by adding user-defined properties; the name of the property maps to the Attribute Name into which the result will be placed. The value of the property must be a valid XPath expression. If the expression matches nothing, no attributes is added.
+
+Module
+______
+com.hurence.logisland:logisland-processor-xml:1.1.2
+
+Class
+_____
+com.hurence.logisland.processor.xml.EvaluateXPath
+
+Tags
+____
+XML, evaluate, XPath
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
+
+.. csv-table:: allowable-values
+ :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+ :widths: 20,60,30,20,10,10
+ :escape: \
+
+ "**source**", "Indicates the attribute containing the xml data to evaluate xpath against.", "", "null", "false", "false"
+ "**validate_dtd**", "Specifies whether or not the XML content should be validated against the DTD.", "true, false", "true", "false", "false"
+ "conflict.resolution.policy", "What to do when a field with the same name already exists ?", "overwrite_existing (if field already exist), keep_only_old_field (keep only old field)", "keep_only_old_field", "false", "false"
+
+Dynamic Properties
+__________________
+Dynamic Properties allow the user to specify both the name and value of a property.
+
+.. csv-table:: dynamic-properties
+ :header: "Name","Value","Description","Allowable Values","Default Value","EL"
+ :widths: 20,20,40,40,20,10
+ :escape: \
+
+ "An attribute", "An XPath expression", " the attribute is set to the result of the XPath Expression.", "", "null", false
+
+Extra informations
+__________________
+.. include:: ./details/EvaluateXPath-Detail.rst
+----------
+
+.. _com.hurence.logisland.processor.excel.ExcelExtract:
+
+ExcelExtract
+------------
+Consumes a Microsoft Excel document and converts each worksheet's line to a structured record. The processor is assuming to receive raw excel file as input record.
+
+Module
+______
+com.hurence.logisland:logisland-processor-excel:1.1.2
+
+Class
+_____
+com.hurence.logisland.processor.excel.ExcelExtract
+
+Tags
+____
+excel, processor, poi
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
+
+.. csv-table:: allowable-values
+ :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+ :widths: 20,60,30,20,10,10
+ :escape: \
+
+ "sheets", "Comma separated list of Excel document sheet names that should be extracted from the excel document. If this property is left blank then all of the sheets will be extracted from the Excel document. You can specify regular expressions. Any sheets not specified in this value will be ignored.", "", "", "false", "false"
+ "skip.columns", "Comma delimited list of column numbers to skip. Use the columns number and not the letter designation. Use this to skip over columns anywhere in your worksheet that you don't want extracted as part of the record.", "", "", "false", "false"
+ "field.names", "The comma separated list representing the names of columns of extracted cells. Order matters! You should use either field.names either field.row.header but not both together.", "", "null", "false", "false"
+ "skip.rows", "The row number of the first row to start processing.Use this to skip over rows of data at the top of your worksheet that are not part of the dataset.Empty rows of data anywhere in the spreadsheet will always be skipped, no matter what this value is set to.", "", "0", "false", "false"
+ "record.type", "Default type of record", "", "excel_record", "false", "false"
+ "field.row.header", "If set, field names mapping will be extracted from the specified row number. You should use either field.names either field.row.header but not both together.", "", "null", "false", "false"
+
+Extra informations
+__________________
+.. include:: ./details/ExcelExtract-Detail.rst
+----------
+
+.. _com.hurence.logisland.processor.commonlogs.gitlab.ParseGitlabLog:
+
+ParseGitlabLog
+--------------
+The Gitlab logs processor is the Logisland entry point to get and process `Gitlab `_ logs. This allows for instance to monitor activities in your Gitlab server. The expected input of this processor are records from the production_json.log log file of Gitlab which contains JSON records. You can for instance use the `kafkacat `_ command to inject those logs into kafka and thus Logisland.
+
+Module
+______
+com.hurence.logisland:logisland-processor-common-logs:1.1.2
+
+Class
+_____
+com.hurence.logisland.processor.commonlogs.gitlab.ParseGitlabLog
+
+Tags
+____
+logs, gitlab
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
+
+.. csv-table:: allowable-values
+ :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+ :widths: 20,60,30,20,10,10
+ :escape: \
+
+ "debug", "Enable debug. If enabled, the original JSON string is embedded in the record_value field of the record.", "", "false", "false", "false"
+
+Extra informations
+__________________
+.. include:: ./details/ParseGitlabLog-Detail.rst
+----------
+
+.. _com.hurence.logisland.processor.MatchIP:
+
+MatchIP
+-------
+IP address Query matching (using `Luwak )`_
+
+You can use this processor to handle custom events matching IP address (CIDR)
+The record sent from a matching an IP address record is tagged appropriately.
+
+A query is expressed as a lucene query against a field like for example:
+
+.. code::
+
+ message:'bad exception'
+ error_count:[10 TO *]
+ bytes_out:5000
+ user_name:tom*
+
+Please read the `Lucene syntax guide `_ for supported operations
+
+.. warning::
+
+ don't forget to set numeric fields property to handle correctly numeric ranges queries
+
+Module
+______
+com.hurence.logisland:logisland-processor-querymatcher:1.1.2
+
+Class
+_____
+com.hurence.logisland.processor.MatchIP
+
+Tags
+____
+analytic, percolator, record, record, query, lucene
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
+
+.. csv-table:: allowable-values
+ :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+ :widths: 20,60,30,20,10,10
+ :escape: \
+
+ "numeric.fields", "a comma separated string of numeric field to be matched", "", "null", "false", "false"
+ "output.record.type", "the output type of the record", "", "alert_match", "false", "false"
+ "record.type.updatePolicy", "Record type update policy", "", "overwrite", "false", "false"
+ "policy.onmatch", "the policy applied to match events: 'first' (default value) match events are tagged with the name and value of the first query that matched;'all' match events are tagged with all names and values of the queries that matched.", "", "first", "false", "false"
+ "policy.onmiss", "the policy applied to miss events: 'discard' (default value) drop events that did not match any query;'forward' include also events that did not match any query.", "", "discard", "false", "false"
+
+Dynamic Properties
+__________________
+Dynamic Properties allow the user to specify both the name and value of a property.
+
+.. csv-table:: dynamic-properties
+ :header: "Name","Value","Description","Allowable Values","Default Value","EL"
+ :widths: 20,20,40,40,20,10
+ :escape: \
+
+ "query", "some Lucene query", "generate a new record when this query is matched", "", "null", **true**
+
+Extra informations
+__________________
+.. include:: ./details/MatchIP-Detail.rst
+----------
+
+.. _com.hurence.logisland.processor.MatchQuery:
+
+MatchQuery
+----------
+Query matching based on `Luwak `_
+
+you can use this processor to handle custom events defined by lucene queries
+a new record is added to output each time a registered query is matched
+
+A query is expressed as a lucene query against a field like for example:
+
+.. code::
+
+ message:'bad exception'
+ error_count:[10 TO *]
+ bytes_out:5000
+ user_name:tom*
+
+Please read the `Lucene syntax guide `_ for supported operations
+
+.. warning::
+
+ don't forget to set numeric fields property to handle correctly numeric ranges queries
+
+Module
+______
+com.hurence.logisland:logisland-processor-querymatcher:1.1.2
+
+Class
+_____
+com.hurence.logisland.processor.MatchQuery
+
+Tags
+____
+analytic, percolator, record, record, query, lucene
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
+
+.. csv-table:: allowable-values
+ :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+ :widths: 20,60,30,20,10,10
+ :escape: \
+
+ "numeric.fields", "a comma separated string of numeric field to be matched", "", "null", "false", "false"
+ "output.record.type", "the output type of the record", "", "alert_match", "false", "false"
+ "record.type.updatePolicy", "Record type update policy", "", "overwrite", "false", "false"
+ "policy.onmatch", "the policy applied to match events: 'first' (default value) match events are tagged with the name and value of the first query that matched;'all' match events are tagged with all names and values of the queries that matched.", "", "first", "false", "false"
+ "policy.onmiss", "the policy applied to miss events: 'discard' (default value) drop events that did not match any query;'forward' include also events that did not match any query.", "", "discard", "false", "false"
+
+Dynamic Properties
+__________________
+Dynamic Properties allow the user to specify both the name and value of a property.
+
+.. csv-table:: dynamic-properties
+ :header: "Name","Value","Description","Allowable Values","Default Value","EL"
+ :widths: 20,20,40,40,20,10
+ :escape: \
+
+ "query", "some Lucene query", "generate a new record when this query is matched", "", "null", **true**
+
+Extra informations
+__________________
+.. include:: ./details/MatchQuery-Detail.rst
+----------
+
+.. _com.hurence.logisland.processor.useragent.ParseUserAgent:
+
+ParseUserAgent
+--------------
+The user-agent processor allows to decompose User-Agent value from an HTTP header into several attributes of interest. There is no standard format for User-Agent strings, hence it is not easily possible to use regexp to handle them. This processor rely on the `YAUAA library `_ to do the heavy work.
+
+Module
+______
+com.hurence.logisland:logisland-processor-useragent:1.1.2
+
+Class
+_____
+com.hurence.logisland.processor.useragent.ParseUserAgent
+
+Tags
+____
+User-Agent, clickstream, DMP
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
+
+.. csv-table:: allowable-values
+ :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+ :widths: 20,60,30,20,10,10
+ :escape: \
+
+ "debug", "Enable debug.", "", "false", "false", "false"
+ "cache.enabled", "Enable caching. Caching to avoid to redo the same computation for many identical User-Agent strings.", "", "true", "false", "false"
+ "cache.size", "Set the size of the cache.", "", "1000", "false", "false"
+ "**useragent.field**", "Must contain the name of the field that contains the User-Agent value in the incoming record.", "", "null", "false", "false"
+ "useragent.keep", "Defines if the field that contained the User-Agent must be kept or not in the resulting records.", "", "true", "false", "false"
+ "confidence.enabled", "Enable confidence reporting. Each field will report a confidence attribute with a value comprised between 0 and 10000.", "", "false", "false", "false"
+ "ambiguity.enabled", "Enable ambiguity reporting. Reports a count of ambiguities.", "", "false", "false", "false"
+ "fields", "Defines the fields to be returned.", "", "DeviceClass, DeviceName, DeviceBrand, DeviceCpu, DeviceFirmwareVersion, DeviceVersion, OperatingSystemClass, OperatingSystemName, OperatingSystemVersion, OperatingSystemNameVersion, OperatingSystemVersionBuild, LayoutEngineClass, LayoutEngineName, LayoutEngineVersion, LayoutEngineVersionMajor, LayoutEngineNameVersion, LayoutEngineNameVersionMajor, LayoutEngineBuild, AgentClass, AgentName, AgentVersion, AgentVersionMajor, AgentNameVersion, AgentNameVersionMajor, AgentBuild, AgentLanguage, AgentLanguageCode, AgentInformationEmail, AgentInformationUrl, AgentSecurity, AgentUuid, FacebookCarrier, FacebookDeviceClass, FacebookDeviceName, FacebookDeviceVersion, FacebookFBOP, FacebookFBSS, FacebookOperatingSystemName, FacebookOperatingSystemVersion, Anonymized, HackerAttackVector, HackerToolkit, KoboAffiliate, KoboPlatformId, IECompatibilityVersion, IECompatibilityVersionMajor, IECompatibilityNameVersion, IECompatibilityNameVersionMajor, __SyntaxError__, Carrier, GSAInstallationID, WebviewAppName, WebviewAppNameVersionMajor, WebviewAppVersion, WebviewAppVersionMajor", "false", "false"
+
+Extra informations
+__________________
+.. include:: ./details/ParseUserAgent-Detail.rst
+----------
+
+.. _com.hurence.logisland.processor.webAnalytics.ConsolidateSession:
+
+ConsolidateSession
+------------------
+The ConsolidateSession processor is the Logisland entry point to get and process events from the Web Analytics.As an example here is an incoming event from the Web Analytics:
+
+"fields": [{ "name": "timestamp", "type": "long" },{ "name": "remoteHost", "type": "string"},{ "name": "record_type", "type": ["null", "string"], "default": null },{ "name": "record_id", "type": ["null", "string"], "default": null },{ "name": "location", "type": ["null", "string"], "default": null },{ "name": "hitType", "type": ["null", "string"], "default": null },{ "name": "eventCategory", "type": ["null", "string"], "default": null },{ "name": "eventAction", "type": ["null", "string"], "default": null },{ "name": "eventLabel", "type": ["null", "string"], "default": null },{ "name": "localPath", "type": ["null", "string"], "default": null },{ "name": "q", "type": ["null", "string"], "default": null },{ "name": "n", "type": ["null", "int"], "default": null },{ "name": "referer", "type": ["null", "string"], "default": null },{ "name": "viewportPixelWidth", "type": ["null", "int"], "default": null },{ "name": "viewportPixelHeight", "type": ["null", "int"], "default": null },{ "name": "screenPixelWidth", "type": ["null", "int"], "default": null },{ "name": "screenPixelHeight", "type": ["null", "int"], "default": null },{ "name": "partyId", "type": ["null", "string"], "default": null },{ "name": "sessionId", "type": ["null", "string"], "default": null },{ "name": "pageViewId", "type": ["null", "string"], "default": null },{ "name": "is_newSession", "type": ["null", "boolean"],"default": null },{ "name": "userAgentString", "type": ["null", "string"], "default": null },{ "name": "pageType", "type": ["null", "string"], "default": null },{ "name": "UserId", "type": ["null", "string"], "default": null },{ "name": "B2Bunit", "type": ["null", "string"], "default": null },{ "name": "pointOfService", "type": ["null", "string"], "default": null },{ "name": "companyID", "type": ["null", "string"], "default": null },{ "name": "GroupCode", "type": ["null", "string"], "default": null },{ "name": "userRoles", "type": ["null", "string"], "default": null },{ "name": "is_PunchOut", "type": ["null", "string"], "default": null }]The ConsolidateSession processor groups the records by sessions and compute the duration between now and the last received event. If the distance from the last event is beyond a given threshold (by default 30mn), then the session is considered closed.The ConsolidateSession is building an aggregated session object for each active session.This aggregated object includes: - The actual session duration. - A boolean representing wether the session is considered active or closed. Note: it is possible to ressurect a session if for instance an event arrives after a session has been marked closed. - User related infos: userId, B2Bunit code, groupCode, userRoles, companyId - First visited page: URL - Last visited page: URL The properties to configure the processor are: - sessionid.field: Property name containing the session identifier (default: sessionId). - timestamp.field: Property name containing the timestamp of the event (default: timestamp). - session.timeout: Timeframe of inactivity (in seconds) after which a session is considered closed (default: 30mn). - visitedpage.field: Property name containing the page visited by the customer (default: location). - fields.to.return: List of fields to return in the aggregated object. (default: N/A)
+
+Module
+______
+com.hurence.logisland:logisland-processor-web-analytics:1.1.2
+
+Class
+_____
+com.hurence.logisland.processor.webAnalytics.ConsolidateSession
+
+Tags
+____
+analytics, web, session
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
+
+.. csv-table:: allowable-values
+ :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+ :widths: 20,60,30,20,10,10
+ :escape: \
+
+ "debug", "Enable debug. If enabled, the original JSON string is embedded in the record_value field of the record.", "", "null", "false", "false"
+ "session.timeout", "session timeout in sec", "", "1800", "false", "false"
+ "sessionid.field", "the name of the field containing the session id => will override default value if set", "", "sessionId", "false", "false"
+ "timestamp.field", "the name of the field containing the timestamp => will override default value if set", "", "h2kTimestamp", "false", "false"
+ "visitedpage.field", "the name of the field containing the visited page => will override default value if set", "", "location", "false", "false"
+ "userid.field", "the name of the field containing the userId => will override default value if set", "", "userId", "false", "false"
+ "fields.to.return", "the list of fields to return", "", "null", "false", "false"
+ "firstVisitedPage.out.field", "the name of the field containing the first visited page => will override default value if set", "", "firstVisitedPage", "false", "false"
+ "lastVisitedPage.out.field", "the name of the field containing the last visited page => will override default value if set", "", "lastVisitedPage", "false", "false"
+ "isSessionActive.out.field", "the name of the field stating whether the session is active or not => will override default value if set", "", "is_sessionActive", "false", "false"
+ "sessionDuration.out.field", "the name of the field containing the session duration => will override default value if set", "", "sessionDuration", "false", "false"
+ "eventsCounter.out.field", "the name of the field containing the session duration => will override default value if set", "", "eventsCounter", "false", "false"
+ "firstEventDateTime.out.field", "the name of the field containing the date of the first event => will override default value if set", "", "firstEventDateTime", "false", "false"
+ "lastEventDateTime.out.field", "the name of the field containing the date of the last event => will override default value if set", "", "lastEventDateTime", "false", "false"
+ "sessionInactivityDuration.out.field", "the name of the field containing the session inactivity duration => will override default value if set", "", "sessionInactivityDuration", "false", "false"
+
+Extra informations
+__________________
+.. include:: ./details/ConsolidateSession-Detail.rst
+----------
+
+.. _com.hurence.logisland.processor.DetectOutliers:
+
+DetectOutliers
+--------------
+Outlier Analysis: A Hybrid Approach
+
+In order to function at scale, a two-phase approach is taken
+
+For every data point
+
+- Detect outlier candidates using a robust estimator of variability (e.g. median absolute deviation) that uses distributional sketching (e.g. Q-trees)
+- Gather a biased sample (biased by recency)
+- Extremely deterministic in space and cheap in computation
+
+For every outlier candidate
+
+- Use traditional, more computationally complex approaches to outlier analysis (e.g. Robust PCA) on the biased sample
+- Expensive computationally, but run infrequently
+
+This becomes a data filter which can be attached to a timeseries data stream within a distributed computational framework (i.e. Storm, Spark, Flink, NiFi) to detect outliers.
+
+Module
+______
+com.hurence.logisland:logisland-processor-outlier-detection:1.1.2
+
+Class
+_____
+com.hurence.logisland.processor.DetectOutliers
+
+Tags
+____
+analytic, outlier, record, iot, timeseries
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
+
+.. csv-table:: allowable-values
+ :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+ :widths: 20,60,30,20,10,10
+ :escape: \
+
+ "**value.field**", "the numeric field to get the value", "", "record_value", "false", "false"
+ "**time.field**", "the numeric field to get the value", "", "record_time", "false", "false"
+ "output.record.type", "the output type of the record", "", "alert_match", "false", "false"
+ "**rotation.policy.type**", "...", "by_amount, by_time, never", "by_amount", "false", "false"
+ "**rotation.policy.amount**", "...", "", "100", "false", "false"
+ "**rotation.policy.unit**", "...", "milliseconds, seconds, hours, days, months, years, points", "points", "false", "false"
+ "**chunking.policy.type**", "...", "by_amount, by_time, never", "by_amount", "false", "false"
+ "**chunking.policy.amount**", "...", "", "100", "false", "false"
+ "**chunking.policy.unit**", "...", "milliseconds, seconds, hours, days, months, years, points", "points", "false", "false"
+ "sketchy.outlier.algorithm", "...", "SKETCHY_MOVING_MAD", "SKETCHY_MOVING_MAD", "false", "false"
+ "batch.outlier.algorithm", "...", "RAD", "RAD", "false", "false"
+ "global.statistics.min", "minimum value", "", "null", "false", "false"
+ "global.statistics.max", "maximum value", "", "null", "false", "false"
+ "global.statistics.mean", "mean value", "", "null", "false", "false"
+ "global.statistics.stddev", "standard deviation value", "", "null", "false", "false"
+ "**zscore.cutoffs.normal**", "zscoreCutoffs level for normal outlier", "", "0.000000000000001", "false", "false"
+ "**zscore.cutoffs.moderate**", "zscoreCutoffs level for moderate outlier", "", "1.5", "false", "false"
+ "**zscore.cutoffs.severe**", "zscoreCutoffs level for severe outlier", "", "10.0", "false", "false"
+ "zscore.cutoffs.notEnoughData", "zscoreCutoffs level for notEnoughData outlier", "", "100", "false", "false"
+ "smooth", "do smoothing ?", "", "false", "false", "false"
+ "decay", "the decay", "", "0.1", "false", "false"
+ "**min.amount.to.predict**", "minAmountToPredict", "", "100", "false", "false"
+ "min_zscore_percentile", "minZscorePercentile", "", "50.0", "false", "false"
+ "reservoir_size", "the size of points reservoir", "", "100", "false", "false"
+ "rpca.force.diff", "No Description Provided.", "", "null", "false", "false"
+ "rpca.lpenalty", "No Description Provided.", "", "null", "false", "false"
+ "rpca.min.records", "No Description Provided.", "", "null", "false", "false"
+ "rpca.spenalty", "No Description Provided.", "", "null", "false", "false"
+ "rpca.threshold", "No Description Provided.", "", "null", "false", "false"
+
+Extra informations
+__________________
+.. include:: ./details/DetectOutliers-Detail.rst
+----------
+
+.. _com.hurence.logisland.processor.webAnalytics.IncrementalWebSession:
+
+IncrementalWebSession
+---------------------
+This processor creates and updates web-sessions based on incoming web-events. Note that both web-sessions and web-events are stored in elasticsearch.
+ Firstly, web-events are grouped by their session identifier and processed in chronological order.
+ Then each web-session associated to each group is retrieved from elasticsearch.
+ In case none exists yet then a new web session is created based on the first web event.
+ The following fields of the newly created web session are set based on the associated web event: session identifier, first timestamp, first visited page. Secondly, once created, or retrieved, the web session is updated by the remaining web-events.
+ Updates have impacts on fields of the web session such as event counter, last visited page, session duration, ...
+ Before updates are actually applied, checks are performed to detect rules that would trigger the creation of a new session:
+
+ the duration between the web session and the web event must not exceed the specified time-out,
+ the web session and the web event must have timestamps within the same day (at midnight a new web session is created),
+ source of traffic (campaign, ...) must be the same on the web session and the web event.
+
+ When a breaking rule is detected, a new web session is created with a new session identifier where as remaining web-events still have the original session identifier. The new session identifier is the original session suffixed with the character '#' followed with an incremented counter. This new session identifier is also set on the remaining web-events.
+ Finally when all web events were applied, all web events -potentially modified with a new session identifier- are save in elasticsearch. And web sessions are passed to the next processor.
+
+WebSession information are:
+- first and last visited page
+- first and last timestamp of processed event
+- total number of processed events
+- the userId
+- a boolean denoting if the web-session is still active or not
+- an integer denoting the duration of the web-sessions
+- optional fields that may be retrieved from the processed events
+
+
+
+Module
+______
+com.hurence.logisland:logisland-processor-web-analytics:1.1.2
+
+Class
+_____
+com.hurence.logisland.processor.webAnalytics.IncrementalWebSession
+
+Tags
+____
+analytics, web, session
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
+
+.. csv-table:: allowable-values
+ :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+ :widths: 20,60,30,20,10,10
+ :escape: \
+
+ "debug", "Enable debug. If enabled, debug information are logged.", "", "false", "false", "false"
+ "**es.session.index.field**", "Name of the field in the record defining the ES index containing the web session documents.", "", "null", "false", "false"
+ "**es.session.type.name**", "Name of the ES type of web session documents.", "", "null", "false", "false"
+ "**es.event.index.prefix**", "Prefix of the index containing the web event documents.", "", "null", "false", "false"
+ "**es.event.type.name**", "Name of the ES type of web event documents.", "", "null", "false", "false"
+ "**es.mapping.event.to.session.index.name**", "Name of the ES index containing the mapping of web session documents.", "", "null", "false", "false"
+ "sessionid.field", "the name of the field containing the session id => will override default value if set", "", "sessionId", "false", "false"
+ "timestamp.field", "the name of the field containing the timestamp => will override default value if set", "", "h2kTimestamp", "false", "false"
+ "visitedpage.field", "the name of the field containing the visited page => will override default value if set", "", "location", "false", "false"
+ "userid.field", "the name of the field containing the userId => will override default value if set", "", "userId", "false", "false"
+ "fields.to.return", "the list of fields to return", "", "null", "false", "false"
+ "firstVisitedPage.out.field", "the name of the field containing the first visited page => will override default value if set", "", "firstVisitedPage", "false", "false"
+ "lastVisitedPage.out.field", "the name of the field containing the last visited page => will override default value if set", "", "lastVisitedPage", "false", "false"
+ "isSessionActive.out.field", "the name of the field stating whether the session is active or not => will override default value if set", "", "is_sessionActive", "false", "false"
+ "sessionDuration.out.field", "the name of the field containing the session duration => will override default value if set", "", "sessionDuration", "false", "false"
+ "sessionInactivityDuration.out.field", "the name of the field containing the session inactivity duration => will override default value if set", "", "sessionInactivityDuration", "false", "false"
+ "session.timeout", "session timeout in sec", "", "1800", "false", "false"
+ "eventsCounter.out.field", "the name of the field containing the session duration => will override default value if set", "", "eventsCounter", "false", "false"
+ "firstEventDateTime.out.field", "the name of the field containing the date of the first event => will override default value if set", "", "firstEventDateTime", "false", "false"
+ "lastEventDateTime.out.field", "the name of the field containing the date of the last event => will override default value if set", "", "lastEventDateTime", "false", "false"
+ "newSessionReason.out.field", "the name of the field containing the reason why a new session was created => will override default value if set", "", "reasonForNewSession", "false", "false"
+ "transactionIds.out.field", "the name of the field containing all transactionIds => will override default value if set", "", "transactionIds", "false", "false"
+ "source_of_traffic.suffix", "Prefix for the source of the traffic related fields", "", "source_of_traffic", "false", "false"
+ "**elasticsearch.client.service**", "The instance of the Controller Service to use for accessing Elasticsearch.", "", "null", "false", "false"
+
+Extra informations
+__________________
+.. include:: ./details/IncrementalWebSession-Detail.rst
+----------
+
+.. _com.hurence.logisland.processor.webAnalytics.SetSourceOfTraffic:
+
+SetSourceOfTraffic
+------------------
+Compute the source of traffic of a web session. Users arrive at a website or application through a variety of sources,
+including advertising/paying campaigns, search engines, social networks, referring sites or direct access.
+When analysing user experience on a webshop, it is crucial to collect, process, and report the campaign and traffic-source data.
+To compute the source of traffic of a web session, the user has to provide the utm_* related properties if available
+i-e: **utm_source.field**, **utm_medium.field**, **utm_campaign.field**, **utm_content.field**, **utm_term.field**)
+, the referer (**referer.field** property) and the first visited page of the session (**first.visited.page.field** property).
+By default the source of traffic information are placed in a flat structure (specified by the **source_of_traffic.suffix** property
+with a default value of source_of_traffic). To work properly the SetSourceOfTraffic processor needs to have access to an
+Elasticsearch index containing a list of the most popular search engines and social networks. The ES index (specified by the **es.index** property) should be structured such that the _id of an ES document MUST be the name of the domain. If the domain is a search engine, the related ES doc MUST have a boolean field (default being search_engine) specified by the property **es.search_engine.field** with a value set to true. If the domain is a social network , the related ES doc MUST have a boolean field (default being social_network) specified by the property **es.social_network.field** with a value set to true.
+
+Module
+______
+com.hurence.logisland:logisland-processor-web-analytics:1.1.2
+
+Class
+_____
+com.hurence.logisland.processor.webAnalytics.SetSourceOfTraffic
+
+Tags
+____
+session, traffic, source, web, analytics
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
+
+.. csv-table:: allowable-values
+ :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+ :widths: 20,60,30,20,10,10
+ :escape: \
+
+ "referer.field", "Name of the field containing the referer value in the session", "", "referer", "false", "false"
+ "first.visited.page.field", "Name of the field containing the first visited page in the session", "", "firstVisitedPage", "false", "false"
+ "utm_source.field", "Name of the field containing the utm_source value in the session", "", "utm_source", "false", "false"
+ "utm_medium.field", "Name of the field containing the utm_medium value in the session", "", "utm_medium", "false", "false"
+ "utm_campaign.field", "Name of the field containing the utm_campaign value in the session", "", "utm_campaign", "false", "false"
+ "utm_content.field", "Name of the field containing the utm_content value in the session", "", "utm_content", "false", "false"
+ "utm_term.field", "Name of the field containing the utm_term value in the session", "", "utm_term", "false", "false"
+ "source_of_traffic.suffix", "Suffix for the source of the traffic related fields", "", "source_of_traffic", "false", "false"
+ "source_of_traffic.hierarchical", "Should the additional source of trafic information fields be added under a hierarchical father field or not.", "", "false", "false", "false"
+ "**elasticsearch.client.service**", "The instance of the Controller Service to use for accessing Elasticsearch.", "", "null", "false", "false"
+ "**cache.service**", "Name of the cache service to use.", "", "null", "false", "false"
+ "cache.validity.timeout", "Timeout validity (in seconds) of an entry in the cache.", "", "0", "false", "false"
+ "debug", "If true, an additional debug field is added. If the source info fields prefix is X, a debug field named X_from_cache contains a boolean value to indicate the origin of the source fields. The default value for this property is false (debug is disabled).", "", "false", "false", "false"
+ "**es.index**", "Name of the ES index containing the list of search engines and social network. ", "", "null", "false", "false"
+ "es.type", "Name of the ES type to use.", "", "default", "false", "false"
+ "es.search_engine.field", "Name of the ES field used to specify that the domain is a search engine.", "", "search_engine", "false", "false"
+ "es.social_network.field", "Name of the ES field used to specify that the domain is a social network.", "", "social_network", "false", "false"
+
+Extra informations
+__________________
+.. include:: ./details/SetSourceOfTraffic-Detail.rst
+----------
+
+.. _com.hurence.logisland.processor.webAnalytics.URLDecoder:
+
+URLDecoder
+----------
+Decode one or more field containing an URL with possibly special chars encoded
+...
+
+Module
+______
+com.hurence.logisland:logisland-processor-web-analytics:1.1.2
+
+Class
+_____
+com.hurence.logisland.processor.webAnalytics.URLDecoder
+
+Tags
+____
+record, fields, Decode
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
+
+.. csv-table:: allowable-values
+ :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+ :widths: 20,60,30,20,10,10
+ :escape: \
+
+ "**decode.fields**", "List of fields (URL) to decode", "", "null", "false", "false"
+ "charset", "Charset to use to decode the URL", "", "UTF-8", "false", "false"
+
+Dynamic Properties
+__________________
+Dynamic Properties allow the user to specify both the name and value of a property.
+
+.. csv-table:: dynamic-properties
+ :header: "Name","Value","Description","Allowable Values","Default Value","EL"
+ :widths: 20,20,40,40,20,10
+ :escape: \
+
+ "fields to decode", "a default value", "Decode one or more fields from the record ", "", "null", false
+
+Extra informations
+__________________
+.. include:: ./details/URLDecoder-Detail.rst
+----------
+
+.. _com.hurence.logisland.processor.elasticsearch.EnrichRecordsElasticsearch:
+
+EnrichRecordsElasticsearch
+--------------------------
+Enrich input records with content indexed in elasticsearch using multiget queries.
+Each incoming record must be possibly enriched with information stored in elasticsearch.
+Each outcoming record holds at least the input record plus potentially one or more fields coming from of one elasticsearch document.
+
+Module
+______
+com.hurence.logisland:logisland-processor-elasticsearch:1.1.2
+
+Class
+_____
+com.hurence.logisland.processor.elasticsearch.EnrichRecordsElasticsearch
+
+Tags
+____
+elasticsearch
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values, and whether a property supports the `Expression Language `_ .
+
+.. csv-table:: allowable-values
+ :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+ :widths: 20,60,30,20,10,10
+ :escape: \
+
+ "**elasticsearch.client.service**", "The instance of the Controller Service to use for accessing Elasticsearch.", "", "null", "false", "false"
+ "**record.key**", "The name of field in the input record containing the document id to use in ES multiget query", "", "null", "false", "**true**"
+ "**es.index**", "The name of the ES index to use in multiget query. ", "", "null", "false", "**true**"
+ "es.type", "The name of the ES type to use in multiget query.", "", "default", "false", "**true**"
+ "es.includes.field", "The name of the ES fields to include in the record.", "", "*", "false", "**true**"
+ "es.excludes.field", "The name of the ES fields to exclude.", "", "N/A", "false", "false"
+
+Extra informations
+__________________
+.. include:: ./details/EnrichRecordsElasticsearch-Detail.rst
+----------
+
+.. _com.hurence.logisland.processor.enrichment.IpToFqdn:
+
+IpToFqdn
+--------
+Translates an IP address into a FQDN (Fully Qualified Domain Name). An input field from the record has the IP as value. An new field is created and its value is the FQDN matching the IP address. The resolution mechanism is based on the underlying operating system. The resolution request may take some time, specially if the IP address cannot be translated into a FQDN. For these reasons this processor relies on the logisland cache service so that once a resolution occurs or not, the result is put into the cache. That way, the real request for the same IP is not re-triggered during a certain period of time, until the cache entry expires. This timeout is configurable but by default a request for the same IP is not triggered before 24 hours to let the time to the underlying DNS system to be potentially updated.
+
+Module
+______
+com.hurence.logisland:logisland-processor-enrichment:1.1.2
+
+Class
+_____
+com.hurence.logisland.processor.enrichment.IpToFqdn
+
+Tags
+____
+dns, ip, fqdn, domain, address, fqhn, reverse, resolution, enrich
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
+
+.. csv-table:: allowable-values
+ :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+ :widths: 20,60,30,20,10,10
+ :escape: \
+
+ "**ip.address.field**", "The name of the field containing the ip address to use.", "", "null", "false", "false"
+ "**fqdn.field**", "The field that will contain the full qualified domain name corresponding to the ip address.", "", "null", "false", "false"
+ "overwrite.fqdn.field", "If the field should be overwritten when it already exists.", "", "false", "false", "false"
+ "**cache.service**", "The name of the cache service to use.", "", "null", "false", "false"
+ "cache.max.time", "The amount of time, in seconds, for which a cached FQDN value is valid in the cache service. After this delay, the next new request to translate the same IP into FQDN will trigger a new reverse DNS request and the result will overwrite the entry in the cache. This allows two things: if the IP was not resolved into a FQDN, this will get a chance to obtain a FQDN if the DNS system has been updated, if the IP is resolved into a FQDN, this will allow to be more accurate if the DNS system has been updated. A value of 0 seconds disables this expiration mechanism. The default value is 84600 seconds, which corresponds to new requests triggered every day if a record with the same IP passes every day in the processor.", "", "84600", "false", "false"
+ "resolution.timeout", "The amount of time, in milliseconds, to wait at most for the resolution to occur. This avoids to block the stream for too much time. Default value is 1000ms. If the delay expires and no resolution could occur before, the FQDN field is not created. A special value of 0 disables the logisland timeout and the resolution request may last for many seconds if the IP cannot be translated into a FQDN by the underlying operating system. In any case, whether the timeout occurs in logisland of in the operating system, the fact that a timeout occurs is kept in the cache system so that a resolution request for the same IP will not occur before the cache entry expires.", "", "1000", "false", "false"
+ "debug", "If true, some additional debug fields are added. If the FQDN field is named X, a debug field named X_os_resolution_time_ms contains the resolution time in ms (using the operating system, not the cache). This field is added whether the resolution occurs or time is out. A debug field named X_os_resolution_timeout contains a boolean value to indicate if the timeout occurred. Finally, a debug field named X_from_cache contains a boolean value to indicate the origin of the FQDN field. The default value for this property is false (debug is disabled.", "", "false", "false", "false"
+
+Extra informations
+__________________
+.. include:: ./details/IpToFqdn-Detail.rst
+----------
+
+.. _com.hurence.logisland.processor.enrichment.IpToGeo:
+
+IpToGeo
+-------
+Looks up geolocation information for an IP address. The attribute that contains the IP address to lookup must be provided in the **ip.address.field** property. By default, the geo information are put in a hierarchical structure. That is, if the name of the IP field is 'X', then the the geo attributes added by enrichment are added under a father field named X_geo. "_geo" is the default hierarchical suffix that may be changed with the **geo.hierarchical.suffix** property. If one wants to put the geo fields at the same level as the IP field, then the **geo.hierarchical** property should be set to false and then the geo attributes are created at the same level as him with the naming pattern X_geo_. "_geo_" is the default flat suffix but this may be changed with the **geo.flat.suffix** property. The IpToGeo processor requires a reference to an Ip to Geo service. This must be defined in the **iptogeo.service** property. The added geo fields are dependant on the underlying Ip to Geo service. The **geo.fields** property must contain the list of geo fields that should be created if data is available for the IP to resolve. This property defaults to "*" which means to add every available fields. If one only wants a subset of the fields, one must define a comma separated list of fields as a value for the **geo.fields** property. The list of the available geo fields is in the description of the **geo.fields** property.
+
+Module
+______
+com.hurence.logisland:logisland-processor-enrichment:1.1.2
+
+Class
+_____
+com.hurence.logisland.processor.enrichment.IpToGeo
+
+Tags
+____
+geo, enrich, ip
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
+
+.. csv-table:: allowable-values
+ :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+ :widths: 20,60,30,20,10,10
+ :escape: \
+
+ "**ip.address.field**", "The name of the field containing the ip address to use.", "", "null", "false", "false"
+ "**iptogeo.service**", "The reference to the IP to Geo service to use.", "", "null", "false", "false"
+ "geo.fields", "Comma separated list of geo information fields to add to the record. Defaults to '*', which means to include all available fields. If a list of fields is specified and the data is not available, the geo field is not created. The geo fields are dependant on the underlying defined Ip to Geo service. The currently only supported type of Ip to Geo service is the Maxmind Ip to Geo service. This means that the currently supported list of geo fields is the following:**continent**: the identified continent for this IP address. **continent_code**: the identified continent code for this IP address. **city**: the identified city for this IP address. **latitude**: the identified latitude for this IP address. **longitude**: the identified longitude for this IP address. **location**: the identified location for this IP address, defined as Geo-point expressed as a string with the format: 'latitude,longitude'. **accuracy_radius**: the approximate accuracy radius, in kilometers, around the latitude and longitude for the location. **time_zone**: the identified time zone for this IP address. **subdivision_N**: the identified subdivision for this IP address. N is a one-up number at the end of the attribute name, starting with 0. **subdivision_isocode_N**: the iso code matching the identified subdivision_N. **country**: the identified country for this IP address. **country_isocode**: the iso code for the identified country for this IP address. **postalcode**: the identified postal code for this IP address. **lookup_micros**: the number of microseconds that the geo lookup took. The Ip to Geo service must have the lookup_micros property enabled in order to have this field available.", "", "*", "false", "false"
+ "geo.hierarchical", "Should the additional geo information fields be added under a hierarchical father field or not.", "", "true", "false", "false"
+ "geo.hierarchical.suffix", "Suffix to use for the field holding geo information. If geo.hierarchical is true, then use this suffix appended to the IP field name to define the father field name. This may be used for instance to distinguish between geo fields with various locales using many Ip to Geo service instances.", "", "_geo", "false", "false"
+ "geo.flat.suffix", "Suffix to use for geo information fields when they are flat. If geo.hierarchical is false, then use this suffix appended to the IP field name but before the geo field name. This may be used for instance to distinguish between geo fields with various locales using many Ip to Geo service instances.", "", "_geo_", "false", "false"
+ "**cache.service**", "The name of the cache service to use.", "", "null", "false", "false"
+ "debug", "If true, an additional debug field is added. If the geo info fields prefix is X, a debug field named X_from_cache contains a boolean value to indicate the origin of the geo fields. The default value for this property is false (debug is disabled).", "", "false", "false", "false"
+
+Extra informations
+__________________
+.. include:: ./details/IpToGeo-Detail.rst
+----------
+
+.. _com.hurence.logisland.processor.bro.ParseBroEvent:
+
+ParseBroEvent
+-------------
+The ParseBroEvent processor is the Logisland entry point to get and process `Bro `_ events. The `Bro-Kafka plugin `_ should be used and configured in order to have Bro events sent to Kafka. See the `Bro/Logisland tutorial `_ for an example of usage for this processor. The ParseBroEvent processor does some minor pre-processing on incoming Bro events from the Bro-Kafka plugin to adapt them to Logisland.
+
+Basically the events coming from the Bro-Kafka plugin are JSON documents with a first level field indicating the type of the event. The ParseBroEvent processor takes the incoming JSON document, sets the event type in a record_type field and sets the original sub-fields of the JSON event as first level fields in the record. Also any dot in a field name is transformed into an underscore. Thus, for instance, the field id.orig_h becomes id_orig_h. The next processors in the stream can then process the Bro events generated by this ParseBroEvent processor.
+
+As an example here is an incoming event from Bro:
+
+{
+
+ "conn": {
+
+ "id.resp_p": 9092,
+
+ "resp_pkts": 0,
+
+ "resp_ip_bytes": 0,
+
+ "local_orig": true,
+
+ "orig_ip_bytes": 0,
+
+ "orig_pkts": 0,
+
+ "missed_bytes": 0,
+
+ "history": "Cc",
+
+ "tunnel_parents": [],
+
+ "id.orig_p": 56762,
+
+ "local_resp": true,
+
+ "uid": "Ct3Ms01I3Yc6pmMZx7",
+
+ "conn_state": "OTH",
+
+ "id.orig_h": "172.17.0.2",
+
+ "proto": "tcp",
+
+ "id.resp_h": "172.17.0.3",
+
+ "ts": 1487596886.953917
+
+ }
+
+ }
+
+It gets processed and transformed into the following Logisland record by the ParseBroEvent processor:
+
+"@timestamp": "2017-02-20T13:36:32Z"
+
+"record_id": "6361f80a-c5c9-4a16-9045-4bb51736333d"
+
+"record_time": 1487597792782
+
+"record_type": "conn"
+
+"id_resp_p": 9092
+
+"resp_pkts": 0
+
+"resp_ip_bytes": 0
+
+"local_orig": true
+
+"orig_ip_bytes": 0
+
+"orig_pkts": 0
+
+"missed_bytes": 0
+
+"history": "Cc"
+
+"tunnel_parents": []
+
+"id_orig_p": 56762
+
+"local_resp": true
+
+"uid": "Ct3Ms01I3Yc6pmMZx7"
+
+"conn_state": "OTH"
+
+"id_orig_h": "172.17.0.2"
+
+"proto": "tcp"
+
+"id_resp_h": "172.17.0.3"
+
+"ts": 1487596886.953917
+
+Module
+______
+com.hurence.logisland:logisland-processor-cyber-security:1.1.2
+
+Class
+_____
+com.hurence.logisland.processor.bro.ParseBroEvent
+
+Tags
+____
+bro, security, IDS, NIDS
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
+
+.. csv-table:: allowable-values
+ :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+ :widths: 20,60,30,20,10,10
+ :escape: \
+
+ "debug", "Enable debug. If enabled, the original JSON string is embedded in the record_value field of the record.", "", "false", "false", "false"
+
+Extra informations
+__________________
+.. include:: ./details/ParseBroEvent-Detail.rst
+----------
+
+.. _com.hurence.logisland.processor.netflow.ParseNetflowEvent:
+
+ParseNetflowEvent
+-----------------
+The `Netflow V5 `_ processor is the Logisland entry point to process Netflow (V5) events. NetFlow is a feature introduced on Cisco routers that provides the ability to collect IP network traffic.We can distinguish 2 components:
+
+ - Flow exporter: aggregates packets into flows and exports flow records (binary format) towards one or more flow collectors
+
+ - Flow collector: responsible for reception, storage and pre-processing of flow data received from a flow exporter
+
+The collected data are then available for analysis purpose (intrusion detection, traffic analysis...)
+Netflow are sent to kafka in order to be processed by logisland.
+In the tutorial we will simulate Netflow traffic using `nfgen `_. this traffic will be sent to port 2055. The we rely on nifi to listen of that port for incoming netflow (V5) traffic and send them to a kafka topic. The Netflow processor could thus treat these events and generate corresponding logisland records. The following processors in the stream can then process the Netflow records generated by this processor.
+
+Module
+______
+com.hurence.logisland:logisland-processor-cyber-security:1.1.2
+
+Class
+_____
+com.hurence.logisland.processor.netflow.ParseNetflowEvent
+
+Tags
+____
+netflow, security
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
+
+.. csv-table:: allowable-values
+ :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+ :widths: 20,60,30,20,10,10
+ :escape: \
+
+ "debug", "Enable debug. If enabled, the original JSON string is embedded in the record_value field of the record.", "", "false", "false", "false"
+ "output.record.type", "the output type of the record", "", "netflowevent", "false", "false"
+ "enrich.record", "Enrich data. If enabledthe netflow record is enriched with inferred data", "", "false", "false", "false"
+
+Extra informations
+__________________
+.. include:: ./details/ParseNetflowEvent-Detail.rst
+----------
+
+.. _com.hurence.logisland.processor.networkpacket.ParseNetworkPacket:
+
+ParseNetworkPacket
+------------------
+The ParseNetworkPacket processor is the LogIsland entry point to parse network packets captured either off-the-wire (stream mode) or in pcap format (batch mode). In batch mode, the processor decodes the bytes of the incoming pcap record, where a Global header followed by a sequence of [packet header, packet data] pairs are stored. Then, each incoming pcap event is parsed into n packet records. The fields of packet headers are then extracted and made available in dedicated record fields. See the `Capturing Network packets tutorial `_ for an example of usage of this processor.
+
+Module
+______
+com.hurence.logisland:logisland-processor-cyber-security:1.1.2
+
+Class
+_____
+com.hurence.logisland.processor.networkpacket.ParseNetworkPacket
+
+Tags
+____
+PCap, security, IDS, NIDS
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
+
+.. csv-table:: allowable-values
+ :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+ :widths: 20,60,30,20,10,10
+ :escape: \
+
+ "debug", "Enable debug.", "", "false", "false", "false"
+ "**flow.mode**", "Flow Mode. Indicate whether packets are provided in batch mode (via pcap files) or in stream mode (without headers). Allowed values are batch and stream.", "batch, stream", "null", "false", "false"
+
+Extra informations
+__________________
+No additional information is provided
+
+----------
+
+.. _com.hurence.logisland.processor.SampleRecords:
+
+SampleRecords
+-------------
+Query matching based on `Luwak `_
+
+you can use this processor to handle custom events defined by lucene queries
+a new record is added to output each time a registered query is matched
+
+A query is expressed as a lucene query against a field like for example:
+
+.. code::
+
+ message:'bad exception'
+ error_count:[10 TO *]
+ bytes_out:5000
+ user_name:tom*
+
+Please read the `Lucene syntax guide `_ for supported operations
+
+.. warning::
+ don't forget to set numeric fields property to handle correctly numeric ranges queries
+
+Module
+______
+com.hurence.logisland:logisland-processor-sampling:1.1.2
+
+Class
+_____
+com.hurence.logisland.processor.SampleRecords
+
+Tags
+____
+analytic, sampler, record, iot, timeseries
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
+
+.. csv-table:: allowable-values
+ :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+ :widths: 20,60,30,20,10,10
+ :escape: \
+
+ "record.value.field", "the name of the numeric field to sample", "", "record_value", "false", "false"
+ "record.time.field", "the name of the time field to sample", "", "record_time", "false", "false"
+ "**sampling.algorithm**", "the implementation of the algorithm", "none, lttb, average, first_item, min_max, mode_median", "null", "false", "false"
+ "**sampling.parameter**", "the parmater of the algorithm", "", "null", "false", "false"
+
+Extra informations
+__________________
+.. include:: ./details/SampleRecords-Detail.rst
+----------
+
+.. _com.hurence.logisland.processor.elasticsearch.BulkAddElasticsearch:
+
+BulkAddElasticsearch
+--------------------
+Indexes the content of a Record in Elasticsearch using elasticsearch's bulk processor
+
+Module
+______
+com.hurence.logisland:logisland-processor-elasticsearch:1.1.2
+
+Class
+_____
+com.hurence.logisland.processor.elasticsearch.BulkAddElasticsearch
+
+Tags
+____
+elasticsearch
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values, and whether a property supports the `Expression Language `_ .
+
+.. csv-table:: allowable-values
+ :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+ :widths: 20,60,30,20,10,10
+ :escape: \
+
+ "**elasticsearch.client.service**", "The instance of the Controller Service to use for accessing Elasticsearch.", "", "null", "false", "false"
+ "**default.index**", "The name of the index to insert into", "", "null", "false", "**true**"
+ "**default.type**", "The type of this document (used by Elasticsearch for indexing and searching)", "", "null", "false", "**true**"
+ "**timebased.index**", "do we add a date suffix", "no (no date added to default index), today (today's date added to default index), yesterday (yesterday's date added to default index)", "no", "false", "false"
+ "es.index.field", "the name of the event field containing es index name => will override index value if set", "", "null", "false", "false"
+ "es.type.field", "the name of the event field containing es doc type => will override type value if set", "", "null", "false", "false"
+
+Extra informations
+__________________
+.. include:: ./details/BulkAddElasticsearch-Detail.rst
+----------
+
+.. _com.hurence.logisland.processor.hbase.FetchHBaseRow:
+
+FetchHBaseRow
+-------------
+Fetches a row from an HBase table. The Destination property controls whether the cells are added as flow file attributes, or the row is written to the flow file content as JSON. This processor may be used to fetch a fixed row on a interval by specifying the table and row id directly in the processor, or it may be used to dynamically fetch rows by referencing the table and row id from incoming flow files.
+
+Module
+______
+com.hurence.logisland:logisland-processor-hbase:1.1.2
+
+Class
+_____
+com.hurence.logisland.processor.hbase.FetchHBaseRow
+
+Tags
+____
+hbase, scan, fetch, get, enrich
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values, and whether a property supports the `Expression Language `_ .
+
+.. csv-table:: allowable-values
+ :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+ :widths: 20,60,30,20,10,10
+ :escape: \
+
+ "**hbase.client.service**", "The instance of the Controller Service to use for accessing HBase.", "", "null", "false", "false"
+ "**table.name.field**", "The field containing the name of the HBase Table to fetch from.", "", "null", "false", "**true**"
+ "**row.identifier.field**", "The field containing the identifier of the row to fetch.", "", "null", "false", "**true**"
+ "columns.field", "The field containing an optional comma-separated list of \"\":\"\" pairs to fetch. To return all columns for a given family, leave off the qualifier such as \"\",\"\".", "", "null", "false", "**true**"
+ "record.serializer", "the serializer needed to i/o the record in the HBase row", "com.hurence.logisland.serializer.KryoSerializer (serialize events as json blocs), com.hurence.logisland.serializer.JsonSerializer (serialize events as json blocs), com.hurence.logisland.serializer.AvroSerializer (serialize events as avro blocs), none (send events as bytes)", "com.hurence.logisland.serializer.KryoSerializer", "false", "false"
+ "record.schema", "the avro schema definition for the Avro serialization", "", "null", "false", "false"
+ "table.name.default", "The table to use if table name field is not set", "", "null", "false", "false"
+
+Extra informations
+__________________
+.. include:: ./details/FetchHBaseRow-Detail.rst
+----------
+
+.. _com.hurence.logisland.processor.elasticsearch.MultiGetElasticsearch:
+
+MultiGetElasticsearch
+---------------------
+Retrieves a content indexed in elasticsearch using elasticsearch multiget queries.
+Each incoming record contains information regarding the elasticsearch multiget query that will be performed. This information is stored in record fields whose names are configured in the plugin properties (see below) :
+
+ - index (String) : name of the elasticsearch index on which the multiget query will be performed. This field is mandatory and should not be empty, otherwise an error output record is sent for this specific incoming record.
+ - type (String) : name of the elasticsearch type on which the multiget query will be performed. This field is not mandatory.
+ - ids (String) : comma separated list of document ids to fetch. This field is mandatory and should not be empty, otherwise an error output record is sent for this specific incoming record.
+ - includes (String) : comma separated list of patterns to filter in (include) fields to retrieve. Supports wildcards. This field is not mandatory.
+ - excludes (String) : comma separated list of patterns to filter out (exclude) fields to retrieve. Supports wildcards. This field is not mandatory.
+
+Each outcoming record holds data of one elasticsearch retrieved document. This data is stored in these fields :
+
+ - index (same field name as the incoming record) : name of the elasticsearch index.
+ - type (same field name as the incoming record) : name of the elasticsearch type.
+ - id (same field name as the incoming record) : retrieved document id.
+ - a list of String fields containing :
+
+ * field name : the retrieved field name
+ * field value : the retrieved field value
+
+Module
+______
+com.hurence.logisland:logisland-processor-elasticsearch:1.1.2
+
+Class
+_____
+com.hurence.logisland.processor.elasticsearch.MultiGetElasticsearch
+
+Tags
+____
+elasticsearch
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
+
+.. csv-table:: allowable-values
+ :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+ :widths: 20,60,30,20,10,10
+ :escape: \
+
+ "**elasticsearch.client.service**", "The instance of the Controller Service to use for accessing Elasticsearch.", "", "null", "false", "false"
+ "**es.index.field**", "the name of the incoming records field containing es index name to use in multiget query. ", "", "null", "false", "false"
+ "**es.type.field**", "the name of the incoming records field containing es type name to use in multiget query", "", "null", "false", "false"
+ "**es.ids.field**", "the name of the incoming records field containing es document Ids to use in multiget query", "", "null", "false", "false"
+ "**es.includes.field**", "the name of the incoming records field containing es includes to use in multiget query", "", "null", "false", "false"
+ "**es.excludes.field**", "the name of the incoming records field containing es excludes to use in multiget query", "", "null", "false", "false"
+
+Extra informations
+__________________
+.. include:: ./details/MultiGetElasticsearch-Detail.rst
+----------
+
+.. _com.hurence.logisland.processor.hbase.PutHBaseCell:
+
+PutHBaseCell
+------------
+Adds the Contents of a Record to HBase as the value of a single cell
+
+Module
+______
+com.hurence.logisland:logisland-processor-hbase:1.1.2
+
+Class
+_____
+com.hurence.logisland.processor.hbase.PutHBaseCell
+
+Tags
+____
+hadoop, hbase
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values, and whether a property supports the `Expression Language `_ .
+
+.. csv-table:: allowable-values
+ :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+ :widths: 20,60,30,20,10,10
+ :escape: \
+
+ "**hbase.client.service**", "The instance of the Controller Service to use for accessing HBase.", "", "null", "false", "false"
+ "**table.name.field**", "The field containing the name of the HBase Table to put data into", "", "null", "false", "**true**"
+ "row.identifier.field", "Specifies field containing the Row ID to use when inserting data into HBase", "", "null", "false", "**true**"
+ "row.identifier.encoding.strategy", "Specifies the data type of Row ID used when inserting data into HBase. The default behavior is to convert the row id to a UTF-8 byte array. Choosing Binary will convert a binary formatted string to the correct byte[] representation. The Binary option should be used if you are using Binary row keys in HBase", "String (Stores the value of row id as a UTF-8 String.), Binary (Stores the value of the rows id as a binary byte array. It expects that the row id is a binary formatted string.)", "String", "false", "false"
+ "**column.family.field**", "The field containing the Column Family to use when inserting data into HBase", "", "null", "false", "**true**"
+ "**column.qualifier.field**", "The field containing the Column Qualifier to use when inserting data into HBase", "", "null", "false", "**true**"
+ "**batch.size**", "The maximum number of Records to process in a single execution. The Records will be grouped by table, and a single Put per table will be performed.", "", "25", "false", "false"
+ "record.schema", "the avro schema definition for the Avro serialization", "", "null", "false", "false"
+ "record.serializer", "the serializer needed to i/o the record in the HBase row", "com.hurence.logisland.serializer.KryoSerializer (serialize events as json blocs), com.hurence.logisland.serializer.JsonSerializer (serialize events as json blocs), com.hurence.logisland.serializer.AvroSerializer (serialize events as avro blocs), none (send events as bytes)", "com.hurence.logisland.serializer.KryoSerializer", "false", "false"
+ "table.name.default", "The table table to use if table name field is not set", "", "null", "false", "false"
+ "column.family.default", "The column family to use if column family field is not set", "", "null", "false", "false"
+ "column.qualifier.default", "The column qualifier to use if column qualifier field is not set", "", "null", "false", "false"
+
+Extra informations
+__________________
+.. include:: ./details/PutHBaseCell-Detail.rst
+----------
+
+.. _com.hurence.logisland.processor.scripting.python.RunPython:
+
+RunPython
+---------
+ !!!! WARNING !!!!
+
+The RunPython processor is currently an experimental feature : it is delivered as is, with the current set of features and is subject to modifications in API or anything else in further logisland releases without warnings. There is no tutorial yet. If you want to play with this processor, use the python-processing.yml example and send the apache logs of the index apache logs tutorial. The debug stream processor at the end of the stream should output events in stderr file of the executors from the spark console.
+
+This processor allows to implement and run a processor written in python. This can be done in 2 ways. Either directly defining the process method code in the **script.code.process** configuration property or poiting to an external python module script file in the **script.path** configuration property. Directly defining methods is called the inline mode whereas using a script file is called the file mode. Both ways are mutually exclusive. Whether using the inline of file mode, your python code may depend on some python dependencies. If the set of python dependencies already delivered with the Logisland framework is not sufficient, you can use the **dependencies.path** configuration property to give their location. Currently only the nltk python library is delivered with Logisland.
+
+Module
+______
+com.hurence.logisland:logisland-processor-scripting:1.1.2
+
+Class
+_____
+com.hurence.logisland.processor.scripting.python.RunPython
+
+Tags
+____
+scripting, python
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
+
+.. csv-table:: allowable-values
+ :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+ :widths: 20,60,30,20,10,10
+ :escape: \
+
+ "script.code.imports", "For inline mode only. This is the python code that should hold the import statements if required.", "", "null", "false", "false"
+ "script.code.init", "The python code to be called when the processor is initialized. This is the python equivalent of the init method code for a java processor. This is not mandatory but can only be used if **script.code.process** is defined (inline mode).", "", "null", "false", "false"
+ "script.code.process", "The python code to be called to process the records. This is the pyhton equivalent of the process method code for a java processor. For inline mode, this is the only minimum required configuration property. Using this property, you may also optionally define the **script.code.init** and **script.code.imports** properties.", "", "null", "false", "false"
+ "script.path", "The path to the user's python processor script. Use this property for file mode. Your python code must be in a python file with the following constraints: let's say your pyhton script is named MyProcessor.py. Then MyProcessor.py is a module file that must contain a class named MyProcessor which must inherits from the Logisland delivered class named AbstractProcessor. You can then define your code in the process method and in the other traditional methods (init...) as you would do in java in a class inheriting from the AbstractProcessor java class.", "", "null", "false", "false"
+ "dependencies.path", "The path to the additional dependencies for the user's python code, whether using inline or file mode. This is optional as your code may not have additional dependencies. If you defined **script.path** (so using file mode) and if **dependencies.path** is not defined, Logisland will scan a potential directory named **dependencies** in the same directory where the script file resides and if it exists, any python code located there will be loaded as dependency as needed.", "", "null", "false", "false"
+ "logisland.dependencies.path", "The path to the directory containing the python dependencies shipped with logisland. You should not have to tune this parameter.", "", "null", "false", "false"
+
+Extra informations
+__________________
+.. include:: ./details/RunPython-Detail.rst
+----------
+
+.. _com.hurence.logisland.processor.xml.EvaluateXPath:
+
+EvaluateXPath
+-------------
+Evaluates one or more XPaths against the content of a record. The results of those XPaths are assigned to new attributes in the records, depending on configuration of the Processor. XPaths are entered by adding user-defined properties; the name of the property maps to the Attribute Name into which the result will be placed. The value of the property must be a valid XPath expression. If the expression matches nothing, no attributes is added.
+
+Module
+______
+com.hurence.logisland:logisland-processor-xml:1.1.2
+
+Class
+_____
+com.hurence.logisland.processor.xml.EvaluateXPath
+
+Tags
+____
+XML, evaluate, XPath
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
+
+.. csv-table:: allowable-values
+ :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+ :widths: 20,60,30,20,10,10
+ :escape: \
+
+ "**source**", "Indicates the attribute containing the xml data to evaluate xpath against.", "", "null", "false", "false"
+ "**validate_dtd**", "Specifies whether or not the XML content should be validated against the DTD.", "true, false", "true", "false", "false"
+ "conflict.resolution.policy", "What to do when a field with the same name already exists ?", "overwrite_existing (if field already exist), keep_only_old_field (keep only old field)", "keep_only_old_field", "false", "false"
+
+Dynamic Properties
+__________________
+Dynamic Properties allow the user to specify both the name and value of a property.
+
+.. csv-table:: dynamic-properties
+ :header: "Name","Value","Description","Allowable Values","Default Value","EL"
+ :widths: 20,20,40,40,20,10
+ :escape: \
+
+ "An attribute", "An XPath expression", " the attribute is set to the result of the XPath Expression.", "", "null", false
+
+Extra informations
+__________________
+.. include:: ./details/EvaluateXPath-Detail.rst
+----------
+
+.. _com.hurence.logisland.processor.excel.ExcelExtract:
+
+ExcelExtract
+------------
+Consumes a Microsoft Excel document and converts each worksheet's line to a structured record. The processor is assuming to receive raw excel file as input record.
+
+Module
+______
+com.hurence.logisland:logisland-processor-excel:1.1.2
+
+Class
+_____
+com.hurence.logisland.processor.excel.ExcelExtract
+
+Tags
+____
+excel, processor, poi
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
+
+.. csv-table:: allowable-values
+ :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+ :widths: 20,60,30,20,10,10
+ :escape: \
+
+ "sheets", "Comma separated list of Excel document sheet names that should be extracted from the excel document. If this property is left blank then all of the sheets will be extracted from the Excel document. You can specify regular expressions. Any sheets not specified in this value will be ignored.", "", "", "false", "false"
+ "skip.columns", "Comma delimited list of column numbers to skip. Use the columns number and not the letter designation. Use this to skip over columns anywhere in your worksheet that you don't want extracted as part of the record.", "", "", "false", "false"
+ "field.names", "The comma separated list representing the names of columns of extracted cells. Order matters! You should use either field.names either field.row.header but not both together.", "", "null", "false", "false"
+ "skip.rows", "The row number of the first row to start processing.Use this to skip over rows of data at the top of your worksheet that are not part of the dataset.Empty rows of data anywhere in the spreadsheet will always be skipped, no matter what this value is set to.", "", "0", "false", "false"
+ "record.type", "Default type of record", "", "excel_record", "false", "false"
+ "field.row.header", "If set, field names mapping will be extracted from the specified row number. You should use either field.names either field.row.header but not both together.", "", "null", "false", "false"
+
+Extra informations
+__________________
+.. include:: ./details/ExcelExtract-Detail.rst
+----------
+
+.. _com.hurence.logisland.processor.commonlogs.gitlab.ParseGitlabLog:
+
+ParseGitlabLog
+--------------
+The Gitlab logs processor is the Logisland entry point to get and process `Gitlab `_ logs. This allows for instance to monitor activities in your Gitlab server. The expected input of this processor are records from the production_json.log log file of Gitlab which contains JSON records. You can for instance use the `kafkacat `_ command to inject those logs into kafka and thus Logisland.
+
+Module
+______
+com.hurence.logisland:logisland-processor-common-logs:1.1.2
+
+Class
+_____
+com.hurence.logisland.processor.commonlogs.gitlab.ParseGitlabLog
+
+Tags
+____
+logs, gitlab
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
+
+.. csv-table:: allowable-values
+ :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+ :widths: 20,60,30,20,10,10
+ :escape: \
+
+ "debug", "Enable debug. If enabled, the original JSON string is embedded in the record_value field of the record.", "", "false", "false", "false"
+
+Extra informations
+__________________
+.. include:: ./details/ParseGitlabLog-Detail.rst
+----------
+
+.. _com.hurence.logisland.processor.MatchIP:
+
+MatchIP
+-------
+IP address Query matching (using `Luwak )`_
+
+You can use this processor to handle custom events matching IP address (CIDR)
+The record sent from a matching an IP address record is tagged appropriately.
+
+A query is expressed as a lucene query against a field like for example:
+
+.. code::
+
+ message:'bad exception'
+ error_count:[10 TO *]
+ bytes_out:5000
+ user_name:tom*
+
+Please read the `Lucene syntax guide `_ for supported operations
+
+.. warning::
+
+ don't forget to set numeric fields property to handle correctly numeric ranges queries
+
+Module
+______
+com.hurence.logisland:logisland-processor-querymatcher:1.1.2
+
+Class
+_____
+com.hurence.logisland.processor.MatchIP
+
+Tags
+____
+analytic, percolator, record, record, query, lucene
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
+
+.. csv-table:: allowable-values
+ :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+ :widths: 20,60,30,20,10,10
+ :escape: \
+
+ "numeric.fields", "a comma separated string of numeric field to be matched", "", "null", "false", "false"
+ "output.record.type", "the output type of the record", "", "alert_match", "false", "false"
+ "record.type.updatePolicy", "Record type update policy", "", "overwrite", "false", "false"
+ "policy.onmatch", "the policy applied to match events: 'first' (default value) match events are tagged with the name and value of the first query that matched;'all' match events are tagged with all names and values of the queries that matched.", "", "first", "false", "false"
+ "policy.onmiss", "the policy applied to miss events: 'discard' (default value) drop events that did not match any query;'forward' include also events that did not match any query.", "", "discard", "false", "false"
+
+Dynamic Properties
+__________________
+Dynamic Properties allow the user to specify both the name and value of a property.
+
+.. csv-table:: dynamic-properties
+ :header: "Name","Value","Description","Allowable Values","Default Value","EL"
+ :widths: 20,20,40,40,20,10
+ :escape: \
+
+ "query", "some Lucene query", "generate a new record when this query is matched", "", "null", **true**
+
+Extra informations
+__________________
+.. include:: ./details/MatchIP-Detail.rst
+----------
+
+.. _com.hurence.logisland.processor.MatchQuery:
+
+MatchQuery
+----------
+Query matching based on `Luwak `_
+
+you can use this processor to handle custom events defined by lucene queries
+a new record is added to output each time a registered query is matched
+
+A query is expressed as a lucene query against a field like for example:
+
+.. code::
+
+ message:'bad exception'
+ error_count:[10 TO *]
+ bytes_out:5000
+ user_name:tom*
+
+Please read the `Lucene syntax guide `_ for supported operations
+
+.. warning::
+
+ don't forget to set numeric fields property to handle correctly numeric ranges queries
+
+Module
+______
+com.hurence.logisland:logisland-processor-querymatcher:1.1.2
+
+Class
+_____
+com.hurence.logisland.processor.MatchQuery
+
+Tags
+____
+analytic, percolator, record, record, query, lucene
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
+
+.. csv-table:: allowable-values
+ :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+ :widths: 20,60,30,20,10,10
+ :escape: \
+
+ "numeric.fields", "a comma separated string of numeric field to be matched", "", "null", "false", "false"
+ "output.record.type", "the output type of the record", "", "alert_match", "false", "false"
+ "record.type.updatePolicy", "Record type update policy", "", "overwrite", "false", "false"
+ "policy.onmatch", "the policy applied to match events: 'first' (default value) match events are tagged with the name and value of the first query that matched;'all' match events are tagged with all names and values of the queries that matched.", "", "first", "false", "false"
+ "policy.onmiss", "the policy applied to miss events: 'discard' (default value) drop events that did not match any query;'forward' include also events that did not match any query.", "", "discard", "false", "false"
+
+Dynamic Properties
+__________________
+Dynamic Properties allow the user to specify both the name and value of a property.
+
+.. csv-table:: dynamic-properties
+ :header: "Name","Value","Description","Allowable Values","Default Value","EL"
+ :widths: 20,20,40,40,20,10
+ :escape: \
+
+ "query", "some Lucene query", "generate a new record when this query is matched", "", "null", **true**
+
+Extra informations
+__________________
+.. include:: ./details/MatchQuery-Detail.rst
+----------
+
+.. _com.hurence.logisland.processor.useragent.ParseUserAgent:
+
+ParseUserAgent
+--------------
+The user-agent processor allows to decompose User-Agent value from an HTTP header into several attributes of interest. There is no standard format for User-Agent strings, hence it is not easily possible to use regexp to handle them. This processor rely on the `YAUAA library `_ to do the heavy work.
+
+Module
+______
+com.hurence.logisland:logisland-processor-useragent:1.1.2
+
+Class
+_____
+com.hurence.logisland.processor.useragent.ParseUserAgent
+
+Tags
+____
+User-Agent, clickstream, DMP
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
+
+.. csv-table:: allowable-values
+ :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+ :widths: 20,60,30,20,10,10
+ :escape: \
+
+ "debug", "Enable debug.", "", "false", "false", "false"
+ "cache.enabled", "Enable caching. Caching to avoid to redo the same computation for many identical User-Agent strings.", "", "true", "false", "false"
+ "cache.size", "Set the size of the cache.", "", "1000", "false", "false"
+ "**useragent.field**", "Must contain the name of the field that contains the User-Agent value in the incoming record.", "", "null", "false", "false"
+ "useragent.keep", "Defines if the field that contained the User-Agent must be kept or not in the resulting records.", "", "true", "false", "false"
+ "confidence.enabled", "Enable confidence reporting. Each field will report a confidence attribute with a value comprised between 0 and 10000.", "", "false", "false", "false"
+ "ambiguity.enabled", "Enable ambiguity reporting. Reports a count of ambiguities.", "", "false", "false", "false"
+ "fields", "Defines the fields to be returned.", "", "DeviceClass, DeviceName, DeviceBrand, DeviceCpu, DeviceFirmwareVersion, DeviceVersion, OperatingSystemClass, OperatingSystemName, OperatingSystemVersion, OperatingSystemNameVersion, OperatingSystemVersionBuild, LayoutEngineClass, LayoutEngineName, LayoutEngineVersion, LayoutEngineVersionMajor, LayoutEngineNameVersion, LayoutEngineNameVersionMajor, LayoutEngineBuild, AgentClass, AgentName, AgentVersion, AgentVersionMajor, AgentNameVersion, AgentNameVersionMajor, AgentBuild, AgentLanguage, AgentLanguageCode, AgentInformationEmail, AgentInformationUrl, AgentSecurity, AgentUuid, FacebookCarrier, FacebookDeviceClass, FacebookDeviceName, FacebookDeviceVersion, FacebookFBOP, FacebookFBSS, FacebookOperatingSystemName, FacebookOperatingSystemVersion, Anonymized, HackerAttackVector, HackerToolkit, KoboAffiliate, KoboPlatformId, IECompatibilityVersion, IECompatibilityVersionMajor, IECompatibilityNameVersion, IECompatibilityNameVersionMajor, __SyntaxError__, Carrier, GSAInstallationID, WebviewAppName, WebviewAppNameVersionMajor, WebviewAppVersion, WebviewAppVersionMajor", "false", "false"
+
+Extra informations
+__________________
+.. include:: ./details/ParseUserAgent-Detail.rst
+----------
+
+.. _com.hurence.logisland.processor.webAnalytics.ConsolidateSession:
+
+ConsolidateSession
+------------------
+The ConsolidateSession processor is the Logisland entry point to get and process events from the Web Analytics.As an example here is an incoming event from the Web Analytics:
+
+"fields": [{ "name": "timestamp", "type": "long" },{ "name": "remoteHost", "type": "string"},{ "name": "record_type", "type": ["null", "string"], "default": null },{ "name": "record_id", "type": ["null", "string"], "default": null },{ "name": "location", "type": ["null", "string"], "default": null },{ "name": "hitType", "type": ["null", "string"], "default": null },{ "name": "eventCategory", "type": ["null", "string"], "default": null },{ "name": "eventAction", "type": ["null", "string"], "default": null },{ "name": "eventLabel", "type": ["null", "string"], "default": null },{ "name": "localPath", "type": ["null", "string"], "default": null },{ "name": "q", "type": ["null", "string"], "default": null },{ "name": "n", "type": ["null", "int"], "default": null },{ "name": "referer", "type": ["null", "string"], "default": null },{ "name": "viewportPixelWidth", "type": ["null", "int"], "default": null },{ "name": "viewportPixelHeight", "type": ["null", "int"], "default": null },{ "name": "screenPixelWidth", "type": ["null", "int"], "default": null },{ "name": "screenPixelHeight", "type": ["null", "int"], "default": null },{ "name": "partyId", "type": ["null", "string"], "default": null },{ "name": "sessionId", "type": ["null", "string"], "default": null },{ "name": "pageViewId", "type": ["null", "string"], "default": null },{ "name": "is_newSession", "type": ["null", "boolean"],"default": null },{ "name": "userAgentString", "type": ["null", "string"], "default": null },{ "name": "pageType", "type": ["null", "string"], "default": null },{ "name": "UserId", "type": ["null", "string"], "default": null },{ "name": "B2Bunit", "type": ["null", "string"], "default": null },{ "name": "pointOfService", "type": ["null", "string"], "default": null },{ "name": "companyID", "type": ["null", "string"], "default": null },{ "name": "GroupCode", "type": ["null", "string"], "default": null },{ "name": "userRoles", "type": ["null", "string"], "default": null },{ "name": "is_PunchOut", "type": ["null", "string"], "default": null }]The ConsolidateSession processor groups the records by sessions and compute the duration between now and the last received event. If the distance from the last event is beyond a given threshold (by default 30mn), then the session is considered closed.The ConsolidateSession is building an aggregated session object for each active session.This aggregated object includes: - The actual session duration. - A boolean representing wether the session is considered active or closed. Note: it is possible to ressurect a session if for instance an event arrives after a session has been marked closed. - User related infos: userId, B2Bunit code, groupCode, userRoles, companyId - First visited page: URL - Last visited page: URL The properties to configure the processor are: - sessionid.field: Property name containing the session identifier (default: sessionId). - timestamp.field: Property name containing the timestamp of the event (default: timestamp). - session.timeout: Timeframe of inactivity (in seconds) after which a session is considered closed (default: 30mn). - visitedpage.field: Property name containing the page visited by the customer (default: location). - fields.to.return: List of fields to return in the aggregated object. (default: N/A)
+
+Module
+______
+com.hurence.logisland:logisland-processor-web-analytics:1.1.2
+
+Class
+_____
+com.hurence.logisland.processor.webAnalytics.ConsolidateSession
+
+Tags
+____
+analytics, web, session
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
+
+.. csv-table:: allowable-values
+ :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+ :widths: 20,60,30,20,10,10
+ :escape: \
+
+ "debug", "Enable debug. If enabled, the original JSON string is embedded in the record_value field of the record.", "", "null", "false", "false"
+ "session.timeout", "session timeout in sec", "", "1800", "false", "false"
+ "sessionid.field", "the name of the field containing the session id => will override default value if set", "", "sessionId", "false", "false"
+ "timestamp.field", "the name of the field containing the timestamp => will override default value if set", "", "h2kTimestamp", "false", "false"
+ "visitedpage.field", "the name of the field containing the visited page => will override default value if set", "", "location", "false", "false"
+ "userid.field", "the name of the field containing the userId => will override default value if set", "", "userId", "false", "false"
+ "fields.to.return", "the list of fields to return", "", "null", "false", "false"
+ "firstVisitedPage.out.field", "the name of the field containing the first visited page => will override default value if set", "", "firstVisitedPage", "false", "false"
+ "lastVisitedPage.out.field", "the name of the field containing the last visited page => will override default value if set", "", "lastVisitedPage", "false", "false"
+ "isSessionActive.out.field", "the name of the field stating whether the session is active or not => will override default value if set", "", "is_sessionActive", "false", "false"
+ "sessionDuration.out.field", "the name of the field containing the session duration => will override default value if set", "", "sessionDuration", "false", "false"
+ "eventsCounter.out.field", "the name of the field containing the session duration => will override default value if set", "", "eventsCounter", "false", "false"
+ "firstEventDateTime.out.field", "the name of the field containing the date of the first event => will override default value if set", "", "firstEventDateTime", "false", "false"
+ "lastEventDateTime.out.field", "the name of the field containing the date of the last event => will override default value if set", "", "lastEventDateTime", "false", "false"
+ "sessionInactivityDuration.out.field", "the name of the field containing the session inactivity duration => will override default value if set", "", "sessionInactivityDuration", "false", "false"
+
+Extra informations
+__________________
+.. include:: ./details/ConsolidateSession-Detail.rst
+----------
+
+.. _com.hurence.logisland.processor.DetectOutliers:
+
+DetectOutliers
+--------------
+Outlier Analysis: A Hybrid Approach
+
+In order to function at scale, a two-phase approach is taken
+
+For every data point
+
+- Detect outlier candidates using a robust estimator of variability (e.g. median absolute deviation) that uses distributional sketching (e.g. Q-trees)
+- Gather a biased sample (biased by recency)
+- Extremely deterministic in space and cheap in computation
+
+For every outlier candidate
+
+- Use traditional, more computationally complex approaches to outlier analysis (e.g. Robust PCA) on the biased sample
+- Expensive computationally, but run infrequently
+
+This becomes a data filter which can be attached to a timeseries data stream within a distributed computational framework (i.e. Storm, Spark, Flink, NiFi) to detect outliers.
+
+Module
+______
+com.hurence.logisland:logisland-processor-outlier-detection:1.1.2
+
+Class
+_____
+com.hurence.logisland.processor.DetectOutliers
+
+Tags
+____
+analytic, outlier, record, iot, timeseries
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
+
+.. csv-table:: allowable-values
+ :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+ :widths: 20,60,30,20,10,10
+ :escape: \
+
+ "**value.field**", "the numeric field to get the value", "", "record_value", "false", "false"
+ "**time.field**", "the numeric field to get the value", "", "record_time", "false", "false"
+ "output.record.type", "the output type of the record", "", "alert_match", "false", "false"
+ "**rotation.policy.type**", "...", "by_amount, by_time, never", "by_amount", "false", "false"
+ "**rotation.policy.amount**", "...", "", "100", "false", "false"
+ "**rotation.policy.unit**", "...", "milliseconds, seconds, hours, days, months, years, points", "points", "false", "false"
+ "**chunking.policy.type**", "...", "by_amount, by_time, never", "by_amount", "false", "false"
+ "**chunking.policy.amount**", "...", "", "100", "false", "false"
+ "**chunking.policy.unit**", "...", "milliseconds, seconds, hours, days, months, years, points", "points", "false", "false"
+ "sketchy.outlier.algorithm", "...", "SKETCHY_MOVING_MAD", "SKETCHY_MOVING_MAD", "false", "false"
+ "batch.outlier.algorithm", "...", "RAD", "RAD", "false", "false"
+ "global.statistics.min", "minimum value", "", "null", "false", "false"
+ "global.statistics.max", "maximum value", "", "null", "false", "false"
+ "global.statistics.mean", "mean value", "", "null", "false", "false"
+ "global.statistics.stddev", "standard deviation value", "", "null", "false", "false"
+ "**zscore.cutoffs.normal**", "zscoreCutoffs level for normal outlier", "", "0.000000000000001", "false", "false"
+ "**zscore.cutoffs.moderate**", "zscoreCutoffs level for moderate outlier", "", "1.5", "false", "false"
+ "**zscore.cutoffs.severe**", "zscoreCutoffs level for severe outlier", "", "10.0", "false", "false"
+ "zscore.cutoffs.notEnoughData", "zscoreCutoffs level for notEnoughData outlier", "", "100", "false", "false"
+ "smooth", "do smoothing ?", "", "false", "false", "false"
+ "decay", "the decay", "", "0.1", "false", "false"
+ "**min.amount.to.predict**", "minAmountToPredict", "", "100", "false", "false"
+ "min_zscore_percentile", "minZscorePercentile", "", "50.0", "false", "false"
+ "reservoir_size", "the size of points reservoir", "", "100", "false", "false"
+ "rpca.force.diff", "No Description Provided.", "", "null", "false", "false"
+ "rpca.lpenalty", "No Description Provided.", "", "null", "false", "false"
+ "rpca.min.records", "No Description Provided.", "", "null", "false", "false"
+ "rpca.spenalty", "No Description Provided.", "", "null", "false", "false"
+ "rpca.threshold", "No Description Provided.", "", "null", "false", "false"
+
+Extra informations
+__________________
+.. include:: ./details/DetectOutliers-Detail.rst
+----------
+
+.. _com.hurence.logisland.processor.webAnalytics.IncrementalWebSession:
+
+IncrementalWebSession
+---------------------
+This processor creates and updates web-sessions based on incoming web-events. Note that both web-sessions and web-events are stored in elasticsearch.
+ Firstly, web-events are grouped by their session identifier and processed in chronological order.
+ Then each web-session associated to each group is retrieved from elasticsearch.
+ In case none exists yet then a new web session is created based on the first web event.
+ The following fields of the newly created web session are set based on the associated web event: session identifier, first timestamp, first visited page. Secondly, once created, or retrieved, the web session is updated by the remaining web-events.
+ Updates have impacts on fields of the web session such as event counter, last visited page, session duration, ...
+ Before updates are actually applied, checks are performed to detect rules that would trigger the creation of a new session:
+
+ the duration between the web session and the web event must not exceed the specified time-out,
+ the web session and the web event must have timestamps within the same day (at midnight a new web session is created),
+ source of traffic (campaign, ...) must be the same on the web session and the web event.
+
+ When a breaking rule is detected, a new web session is created with a new session identifier where as remaining web-events still have the original session identifier. The new session identifier is the original session suffixed with the character '#' followed with an incremented counter. This new session identifier is also set on the remaining web-events.
+ Finally when all web events were applied, all web events -potentially modified with a new session identifier- are save in elasticsearch. And web sessions are passed to the next processor.
+
+WebSession information are:
+- first and last visited page
+- first and last timestamp of processed event
+- total number of processed events
+- the userId
+- a boolean denoting if the web-session is still active or not
+- an integer denoting the duration of the web-sessions
+- optional fields that may be retrieved from the processed events
+
+
+
+Module
+______
+com.hurence.logisland:logisland-processor-web-analytics:1.1.2
+
+Class
+_____
+com.hurence.logisland.processor.webAnalytics.IncrementalWebSession
+
+Tags
+____
+analytics, web, session
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
+
+.. csv-table:: allowable-values
+ :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+ :widths: 20,60,30,20,10,10
+ :escape: \
+
+ "debug", "Enable debug. If enabled, debug information are logged.", "", "false", "false", "false"
+ "**es.session.index.field**", "Name of the field in the record defining the ES index containing the web session documents.", "", "null", "false", "false"
+ "**es.session.type.name**", "Name of the ES type of web session documents.", "", "null", "false", "false"
+ "**es.event.index.prefix**", "Prefix of the index containing the web event documents.", "", "null", "false", "false"
+ "**es.event.type.name**", "Name of the ES type of web event documents.", "", "null", "false", "false"
+ "**es.mapping.event.to.session.index.name**", "Name of the ES index containing the mapping of web session documents.", "", "null", "false", "false"
+ "sessionid.field", "the name of the field containing the session id => will override default value if set", "", "sessionId", "false", "false"
+ "timestamp.field", "the name of the field containing the timestamp => will override default value if set", "", "h2kTimestamp", "false", "false"
+ "visitedpage.field", "the name of the field containing the visited page => will override default value if set", "", "location", "false", "false"
+ "userid.field", "the name of the field containing the userId => will override default value if set", "", "userId", "false", "false"
+ "fields.to.return", "the list of fields to return", "", "null", "false", "false"
+ "firstVisitedPage.out.field", "the name of the field containing the first visited page => will override default value if set", "", "firstVisitedPage", "false", "false"
+ "lastVisitedPage.out.field", "the name of the field containing the last visited page => will override default value if set", "", "lastVisitedPage", "false", "false"
+ "isSessionActive.out.field", "the name of the field stating whether the session is active or not => will override default value if set", "", "is_sessionActive", "false", "false"
+ "sessionDuration.out.field", "the name of the field containing the session duration => will override default value if set", "", "sessionDuration", "false", "false"
+ "sessionInactivityDuration.out.field", "the name of the field containing the session inactivity duration => will override default value if set", "", "sessionInactivityDuration", "false", "false"
+ "session.timeout", "session timeout in sec", "", "1800", "false", "false"
+ "eventsCounter.out.field", "the name of the field containing the session duration => will override default value if set", "", "eventsCounter", "false", "false"
+ "firstEventDateTime.out.field", "the name of the field containing the date of the first event => will override default value if set", "", "firstEventDateTime", "false", "false"
+ "lastEventDateTime.out.field", "the name of the field containing the date of the last event => will override default value if set", "", "lastEventDateTime", "false", "false"
+ "newSessionReason.out.field", "the name of the field containing the reason why a new session was created => will override default value if set", "", "reasonForNewSession", "false", "false"
+ "transactionIds.out.field", "the name of the field containing all transactionIds => will override default value if set", "", "transactionIds", "false", "false"
+ "source_of_traffic.suffix", "Prefix for the source of the traffic related fields", "", "source_of_traffic", "false", "false"
+ "**elasticsearch.client.service**", "The instance of the Controller Service to use for accessing Elasticsearch.", "", "null", "false", "false"
+
+Extra informations
+__________________
+.. include:: ./details/IncrementalWebSession-Detail.rst
+----------
+
+.. _com.hurence.logisland.processor.webAnalytics.SetSourceOfTraffic:
+
+SetSourceOfTraffic
+------------------
+Compute the source of traffic of a web session. Users arrive at a website or application through a variety of sources,
+including advertising/paying campaigns, search engines, social networks, referring sites or direct access.
+When analysing user experience on a webshop, it is crucial to collect, process, and report the campaign and traffic-source data.
+To compute the source of traffic of a web session, the user has to provide the utm_* related properties if available
+i-e: **utm_source.field**, **utm_medium.field**, **utm_campaign.field**, **utm_content.field**, **utm_term.field**)
+, the referer (**referer.field** property) and the first visited page of the session (**first.visited.page.field** property).
+By default the source of traffic information are placed in a flat structure (specified by the **source_of_traffic.suffix** property
+with a default value of source_of_traffic). To work properly the SetSourceOfTraffic processor needs to have access to an
+Elasticsearch index containing a list of the most popular search engines and social networks. The ES index (specified by the **es.index** property) should be structured such that the _id of an ES document MUST be the name of the domain. If the domain is a search engine, the related ES doc MUST have a boolean field (default being search_engine) specified by the property **es.search_engine.field** with a value set to true. If the domain is a social network , the related ES doc MUST have a boolean field (default being social_network) specified by the property **es.social_network.field** with a value set to true.
+
+Module
+______
+com.hurence.logisland:logisland-processor-web-analytics:1.1.2
+
+Class
+_____
+com.hurence.logisland.processor.webAnalytics.SetSourceOfTraffic
+
+Tags
+____
+session, traffic, source, web, analytics
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
+
+.. csv-table:: allowable-values
+ :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+ :widths: 20,60,30,20,10,10
+ :escape: \
+
+ "referer.field", "Name of the field containing the referer value in the session", "", "referer", "false", "false"
+ "first.visited.page.field", "Name of the field containing the first visited page in the session", "", "firstVisitedPage", "false", "false"
+ "utm_source.field", "Name of the field containing the utm_source value in the session", "", "utm_source", "false", "false"
+ "utm_medium.field", "Name of the field containing the utm_medium value in the session", "", "utm_medium", "false", "false"
+ "utm_campaign.field", "Name of the field containing the utm_campaign value in the session", "", "utm_campaign", "false", "false"
+ "utm_content.field", "Name of the field containing the utm_content value in the session", "", "utm_content", "false", "false"
+ "utm_term.field", "Name of the field containing the utm_term value in the session", "", "utm_term", "false", "false"
+ "source_of_traffic.suffix", "Suffix for the source of the traffic related fields", "", "source_of_traffic", "false", "false"
+ "source_of_traffic.hierarchical", "Should the additional source of trafic information fields be added under a hierarchical father field or not.", "", "false", "false", "false"
+ "**elasticsearch.client.service**", "The instance of the Controller Service to use for accessing Elasticsearch.", "", "null", "false", "false"
+ "**cache.service**", "Name of the cache service to use.", "", "null", "false", "false"
+ "cache.validity.timeout", "Timeout validity (in seconds) of an entry in the cache.", "", "0", "false", "false"
+ "debug", "If true, an additional debug field is added. If the source info fields prefix is X, a debug field named X_from_cache contains a boolean value to indicate the origin of the source fields. The default value for this property is false (debug is disabled).", "", "false", "false", "false"
+ "**es.index**", "Name of the ES index containing the list of search engines and social network. ", "", "null", "false", "false"
+ "es.type", "Name of the ES type to use.", "", "default", "false", "false"
+ "es.search_engine.field", "Name of the ES field used to specify that the domain is a search engine.", "", "search_engine", "false", "false"
+ "es.social_network.field", "Name of the ES field used to specify that the domain is a social network.", "", "social_network", "false", "false"
+
+Extra informations
+__________________
+.. include:: ./details/SetSourceOfTraffic-Detail.rst
+----------
+
+.. _com.hurence.logisland.processor.webAnalytics.URLDecoder:
+
+URLDecoder
+----------
+Decode one or more field containing an URL with possibly special chars encoded
+...
+
+Module
+______
+com.hurence.logisland:logisland-processor-web-analytics:1.1.2
+
+Class
+_____
+com.hurence.logisland.processor.webAnalytics.URLDecoder
+
+Tags
+____
+record, fields, Decode
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
+
+.. csv-table:: allowable-values
+ :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+ :widths: 20,60,30,20,10,10
+ :escape: \
+
+ "**decode.fields**", "List of fields (URL) to decode", "", "null", "false", "false"
+ "charset", "Charset to use to decode the URL", "", "UTF-8", "false", "false"
+
+Dynamic Properties
+__________________
+Dynamic Properties allow the user to specify both the name and value of a property.
+
+.. csv-table:: dynamic-properties
+ :header: "Name","Value","Description","Allowable Values","Default Value","EL"
+ :widths: 20,20,40,40,20,10
+ :escape: \
+
+ "fields to decode", "a default value", "Decode one or more fields from the record ", "", "null", false
+
+Extra informations
+__________________
+.. include:: ./details/URLDecoder-Detail.rst
+----------
+
+.. _com.hurence.logisland.processor.elasticsearch.EnrichRecordsElasticsearch:
+
+EnrichRecordsElasticsearch
+--------------------------
+Enrich input records with content indexed in elasticsearch using multiget queries.
+Each incoming record must be possibly enriched with information stored in elasticsearch.
+Each outcoming record holds at least the input record plus potentially one or more fields coming from of one elasticsearch document.
+
+Module
+______
+com.hurence.logisland:logisland-processor-elasticsearch:1.1.2
+
+Class
+_____
+com.hurence.logisland.processor.elasticsearch.EnrichRecordsElasticsearch
+
+Tags
+____
+elasticsearch
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values, and whether a property supports the `Expression Language `_ .
+
+.. csv-table:: allowable-values
+ :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+ :widths: 20,60,30,20,10,10
+ :escape: \
+
+ "**elasticsearch.client.service**", "The instance of the Controller Service to use for accessing Elasticsearch.", "", "null", "false", "false"
+ "**record.key**", "The name of field in the input record containing the document id to use in ES multiget query", "", "null", "false", "**true**"
+ "**es.index**", "The name of the ES index to use in multiget query. ", "", "null", "false", "**true**"
+ "es.type", "The name of the ES type to use in multiget query.", "", "default", "false", "**true**"
+ "es.includes.field", "The name of the ES fields to include in the record.", "", "*", "false", "**true**"
+ "es.excludes.field", "The name of the ES fields to exclude.", "", "N/A", "false", "false"
+
+Extra informations
+__________________
+.. include:: ./details/EnrichRecordsElasticsearch-Detail.rst
+----------
+
+.. _com.hurence.logisland.processor.enrichment.IpToFqdn:
+
+IpToFqdn
+--------
+Translates an IP address into a FQDN (Fully Qualified Domain Name). An input field from the record has the IP as value. An new field is created and its value is the FQDN matching the IP address. The resolution mechanism is based on the underlying operating system. The resolution request may take some time, specially if the IP address cannot be translated into a FQDN. For these reasons this processor relies on the logisland cache service so that once a resolution occurs or not, the result is put into the cache. That way, the real request for the same IP is not re-triggered during a certain period of time, until the cache entry expires. This timeout is configurable but by default a request for the same IP is not triggered before 24 hours to let the time to the underlying DNS system to be potentially updated.
+
+Module
+______
+com.hurence.logisland:logisland-processor-enrichment:1.1.2
+
+Class
+_____
+com.hurence.logisland.processor.enrichment.IpToFqdn
+
+Tags
+____
+dns, ip, fqdn, domain, address, fqhn, reverse, resolution, enrich
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
+
+.. csv-table:: allowable-values
+ :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+ :widths: 20,60,30,20,10,10
+ :escape: \
+
+ "**ip.address.field**", "The name of the field containing the ip address to use.", "", "null", "false", "false"
+ "**fqdn.field**", "The field that will contain the full qualified domain name corresponding to the ip address.", "", "null", "false", "false"
+ "overwrite.fqdn.field", "If the field should be overwritten when it already exists.", "", "false", "false", "false"
+ "**cache.service**", "The name of the cache service to use.", "", "null", "false", "false"
+ "cache.max.time", "The amount of time, in seconds, for which a cached FQDN value is valid in the cache service. After this delay, the next new request to translate the same IP into FQDN will trigger a new reverse DNS request and the result will overwrite the entry in the cache. This allows two things: if the IP was not resolved into a FQDN, this will get a chance to obtain a FQDN if the DNS system has been updated, if the IP is resolved into a FQDN, this will allow to be more accurate if the DNS system has been updated. A value of 0 seconds disables this expiration mechanism. The default value is 84600 seconds, which corresponds to new requests triggered every day if a record with the same IP passes every day in the processor.", "", "84600", "false", "false"
+ "resolution.timeout", "The amount of time, in milliseconds, to wait at most for the resolution to occur. This avoids to block the stream for too much time. Default value is 1000ms. If the delay expires and no resolution could occur before, the FQDN field is not created. A special value of 0 disables the logisland timeout and the resolution request may last for many seconds if the IP cannot be translated into a FQDN by the underlying operating system. In any case, whether the timeout occurs in logisland of in the operating system, the fact that a timeout occurs is kept in the cache system so that a resolution request for the same IP will not occur before the cache entry expires.", "", "1000", "false", "false"
+ "debug", "If true, some additional debug fields are added. If the FQDN field is named X, a debug field named X_os_resolution_time_ms contains the resolution time in ms (using the operating system, not the cache). This field is added whether the resolution occurs or time is out. A debug field named X_os_resolution_timeout contains a boolean value to indicate if the timeout occurred. Finally, a debug field named X_from_cache contains a boolean value to indicate the origin of the FQDN field. The default value for this property is false (debug is disabled.", "", "false", "false", "false"
+
+Extra informations
+__________________
+.. include:: ./details/IpToFqdn-Detail.rst
+----------
+
+.. _com.hurence.logisland.processor.enrichment.IpToGeo:
+
+IpToGeo
+-------
+Looks up geolocation information for an IP address. The attribute that contains the IP address to lookup must be provided in the **ip.address.field** property. By default, the geo information are put in a hierarchical structure. That is, if the name of the IP field is 'X', then the the geo attributes added by enrichment are added under a father field named X_geo. "_geo" is the default hierarchical suffix that may be changed with the **geo.hierarchical.suffix** property. If one wants to put the geo fields at the same level as the IP field, then the **geo.hierarchical** property should be set to false and then the geo attributes are created at the same level as him with the naming pattern X_geo_. "_geo_" is the default flat suffix but this may be changed with the **geo.flat.suffix** property. The IpToGeo processor requires a reference to an Ip to Geo service. This must be defined in the **iptogeo.service** property. The added geo fields are dependant on the underlying Ip to Geo service. The **geo.fields** property must contain the list of geo fields that should be created if data is available for the IP to resolve. This property defaults to "*" which means to add every available fields. If one only wants a subset of the fields, one must define a comma separated list of fields as a value for the **geo.fields** property. The list of the available geo fields is in the description of the **geo.fields** property.
+
+Module
+______
+com.hurence.logisland:logisland-processor-enrichment:1.1.2
+
+Class
+_____
+com.hurence.logisland.processor.enrichment.IpToGeo
+
+Tags
+____
+geo, enrich, ip
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
+
+.. csv-table:: allowable-values
+ :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+ :widths: 20,60,30,20,10,10
+ :escape: \
+
+ "**ip.address.field**", "The name of the field containing the ip address to use.", "", "null", "false", "false"
+ "**iptogeo.service**", "The reference to the IP to Geo service to use.", "", "null", "false", "false"
+ "geo.fields", "Comma separated list of geo information fields to add to the record. Defaults to '*', which means to include all available fields. If a list of fields is specified and the data is not available, the geo field is not created. The geo fields are dependant on the underlying defined Ip to Geo service. The currently only supported type of Ip to Geo service is the Maxmind Ip to Geo service. This means that the currently supported list of geo fields is the following:**continent**: the identified continent for this IP address. **continent_code**: the identified continent code for this IP address. **city**: the identified city for this IP address. **latitude**: the identified latitude for this IP address. **longitude**: the identified longitude for this IP address. **location**: the identified location for this IP address, defined as Geo-point expressed as a string with the format: 'latitude,longitude'. **accuracy_radius**: the approximate accuracy radius, in kilometers, around the latitude and longitude for the location. **time_zone**: the identified time zone for this IP address. **subdivision_N**: the identified subdivision for this IP address. N is a one-up number at the end of the attribute name, starting with 0. **subdivision_isocode_N**: the iso code matching the identified subdivision_N. **country**: the identified country for this IP address. **country_isocode**: the iso code for the identified country for this IP address. **postalcode**: the identified postal code for this IP address. **lookup_micros**: the number of microseconds that the geo lookup took. The Ip to Geo service must have the lookup_micros property enabled in order to have this field available.", "", "*", "false", "false"
+ "geo.hierarchical", "Should the additional geo information fields be added under a hierarchical father field or not.", "", "true", "false", "false"
+ "geo.hierarchical.suffix", "Suffix to use for the field holding geo information. If geo.hierarchical is true, then use this suffix appended to the IP field name to define the father field name. This may be used for instance to distinguish between geo fields with various locales using many Ip to Geo service instances.", "", "_geo", "false", "false"
+ "geo.flat.suffix", "Suffix to use for geo information fields when they are flat. If geo.hierarchical is false, then use this suffix appended to the IP field name but before the geo field name. This may be used for instance to distinguish between geo fields with various locales using many Ip to Geo service instances.", "", "_geo_", "false", "false"
+ "**cache.service**", "The name of the cache service to use.", "", "null", "false", "false"
+ "debug", "If true, an additional debug field is added. If the geo info fields prefix is X, a debug field named X_from_cache contains a boolean value to indicate the origin of the geo fields. The default value for this property is false (debug is disabled).", "", "false", "false", "false"
+
+Extra informations
+__________________
+.. include:: ./details/IpToGeo-Detail.rst
+----------
+
+.. _com.hurence.logisland.processor.bro.ParseBroEvent:
+
+ParseBroEvent
+-------------
+The ParseBroEvent processor is the Logisland entry point to get and process `Bro `_ events. The `Bro-Kafka plugin `_ should be used and configured in order to have Bro events sent to Kafka. See the `Bro/Logisland tutorial `_ for an example of usage for this processor. The ParseBroEvent processor does some minor pre-processing on incoming Bro events from the Bro-Kafka plugin to adapt them to Logisland.
+
+Basically the events coming from the Bro-Kafka plugin are JSON documents with a first level field indicating the type of the event. The ParseBroEvent processor takes the incoming JSON document, sets the event type in a record_type field and sets the original sub-fields of the JSON event as first level fields in the record. Also any dot in a field name is transformed into an underscore. Thus, for instance, the field id.orig_h becomes id_orig_h. The next processors in the stream can then process the Bro events generated by this ParseBroEvent processor.
+
+As an example here is an incoming event from Bro:
+
+{
+
+ "conn": {
+
+ "id.resp_p": 9092,
+
+ "resp_pkts": 0,
+
+ "resp_ip_bytes": 0,
+
+ "local_orig": true,
+
+ "orig_ip_bytes": 0,
+
+ "orig_pkts": 0,
+
+ "missed_bytes": 0,
+
+ "history": "Cc",
+
+ "tunnel_parents": [],
+
+ "id.orig_p": 56762,
+
+ "local_resp": true,
+
+ "uid": "Ct3Ms01I3Yc6pmMZx7",
+
+ "conn_state": "OTH",
+
+ "id.orig_h": "172.17.0.2",
+
+ "proto": "tcp",
+
+ "id.resp_h": "172.17.0.3",
+
+ "ts": 1487596886.953917
+
+ }
+
+ }
+
+It gets processed and transformed into the following Logisland record by the ParseBroEvent processor:
+
+"@timestamp": "2017-02-20T13:36:32Z"
+
+"record_id": "6361f80a-c5c9-4a16-9045-4bb51736333d"
+
+"record_time": 1487597792782
+
+"record_type": "conn"
+
+"id_resp_p": 9092
+
+"resp_pkts": 0
+
+"resp_ip_bytes": 0
+
+"local_orig": true
+
+"orig_ip_bytes": 0
+
+"orig_pkts": 0
+
+"missed_bytes": 0
+
+"history": "Cc"
+
+"tunnel_parents": []
+
+"id_orig_p": 56762
+
+"local_resp": true
+
+"uid": "Ct3Ms01I3Yc6pmMZx7"
+
+"conn_state": "OTH"
+
+"id_orig_h": "172.17.0.2"
+
+"proto": "tcp"
+
+"id_resp_h": "172.17.0.3"
+
+"ts": 1487596886.953917
+
+Module
+______
+com.hurence.logisland:logisland-processor-cyber-security:1.1.2
+
+Class
+_____
+com.hurence.logisland.processor.bro.ParseBroEvent
+
+Tags
+____
+bro, security, IDS, NIDS
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
+
+.. csv-table:: allowable-values
+ :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+ :widths: 20,60,30,20,10,10
+ :escape: \
+
+ "debug", "Enable debug. If enabled, the original JSON string is embedded in the record_value field of the record.", "", "false", "false", "false"
+
+Extra informations
+__________________
+.. include:: ./details/ParseBroEvent-Detail.rst
+----------
+
+.. _com.hurence.logisland.processor.netflow.ParseNetflowEvent:
+
+ParseNetflowEvent
+-----------------
+The `Netflow V5 `_ processor is the Logisland entry point to process Netflow (V5) events. NetFlow is a feature introduced on Cisco routers that provides the ability to collect IP network traffic.We can distinguish 2 components:
+
+ - Flow exporter: aggregates packets into flows and exports flow records (binary format) towards one or more flow collectors
+
+ - Flow collector: responsible for reception, storage and pre-processing of flow data received from a flow exporter
+
+The collected data are then available for analysis purpose (intrusion detection, traffic analysis...)
+Netflow are sent to kafka in order to be processed by logisland.
+In the tutorial we will simulate Netflow traffic using `nfgen `_. this traffic will be sent to port 2055. The we rely on nifi to listen of that port for incoming netflow (V5) traffic and send them to a kafka topic. The Netflow processor could thus treat these events and generate corresponding logisland records. The following processors in the stream can then process the Netflow records generated by this processor.
+
+Module
+______
+com.hurence.logisland:logisland-processor-cyber-security:1.1.2
+
+Class
+_____
+com.hurence.logisland.processor.netflow.ParseNetflowEvent
+
+Tags
+____
+netflow, security
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
+
+.. csv-table:: allowable-values
+ :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+ :widths: 20,60,30,20,10,10
+ :escape: \
+
+ "debug", "Enable debug. If enabled, the original JSON string is embedded in the record_value field of the record.", "", "false", "false", "false"
+ "output.record.type", "the output type of the record", "", "netflowevent", "false", "false"
+ "enrich.record", "Enrich data. If enabledthe netflow record is enriched with inferred data", "", "false", "false", "false"
+
+Extra informations
+__________________
+.. include:: ./details/ParseNetflowEvent-Detail.rst
+----------
+
+.. _com.hurence.logisland.processor.networkpacket.ParseNetworkPacket:
+
+ParseNetworkPacket
+------------------
+The ParseNetworkPacket processor is the LogIsland entry point to parse network packets captured either off-the-wire (stream mode) or in pcap format (batch mode). In batch mode, the processor decodes the bytes of the incoming pcap record, where a Global header followed by a sequence of [packet header, packet data] pairs are stored. Then, each incoming pcap event is parsed into n packet records. The fields of packet headers are then extracted and made available in dedicated record fields. See the `Capturing Network packets tutorial `_ for an example of usage of this processor.
+
+Module
+______
+com.hurence.logisland:logisland-processor-cyber-security:1.1.2
+
+Class
+_____
+com.hurence.logisland.processor.networkpacket.ParseNetworkPacket
+
+Tags
+____
+PCap, security, IDS, NIDS
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
+
+.. csv-table:: allowable-values
+ :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+ :widths: 20,60,30,20,10,10
+ :escape: \
+
+ "debug", "Enable debug.", "", "false", "false", "false"
+ "**flow.mode**", "Flow Mode. Indicate whether packets are provided in batch mode (via pcap files) or in stream mode (without headers). Allowed values are batch and stream.", "batch, stream", "null", "false", "false"
+
+Extra informations
+__________________
+No additional information is provided
+
+----------
+
+.. _com.hurence.logisland.processor.SampleRecords:
+
+SampleRecords
+-------------
+Query matching based on `Luwak `_
+
+you can use this processor to handle custom events defined by lucene queries
+a new record is added to output each time a registered query is matched
+
+A query is expressed as a lucene query against a field like for example:
+
+.. code::
+
+ message:'bad exception'
+ error_count:[10 TO *]
+ bytes_out:5000
+ user_name:tom*
+
+Please read the `Lucene syntax guide `_ for supported operations
+
+.. warning::
+ don't forget to set numeric fields property to handle correctly numeric ranges queries
+
+Module
+______
+com.hurence.logisland:logisland-processor-sampling:1.1.2
+
+Class
+_____
+com.hurence.logisland.processor.SampleRecords
+
+Tags
+____
+analytic, sampler, record, iot, timeseries
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
+
+.. csv-table:: allowable-values
+ :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+ :widths: 20,60,30,20,10,10
+ :escape: \
+
+ "record.value.field", "the name of the numeric field to sample", "", "record_value", "false", "false"
+ "record.time.field", "the name of the time field to sample", "", "record_time", "false", "false"
+ "**sampling.algorithm**", "the implementation of the algorithm", "none, lttb, average, first_item, min_max, mode_median", "null", "false", "false"
+ "**sampling.parameter**", "the parmater of the algorithm", "", "null", "false", "false"
+
+Extra informations
+__________________
+.. include:: ./details/SampleRecords-Detail.rst
+----------
+
+.. _com.hurence.logisland.processor.elasticsearch.BulkAddElasticsearch:
+
+BulkAddElasticsearch
+--------------------
+Indexes the content of a Record in Elasticsearch using elasticsearch's bulk processor
+
+Module
+______
+com.hurence.logisland:logisland-processor-elasticsearch:1.1.2
+
+Class
+_____
+com.hurence.logisland.processor.elasticsearch.BulkAddElasticsearch
+
+Tags
+____
+elasticsearch
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values, and whether a property supports the `Expression Language `_ .
+
+.. csv-table:: allowable-values
+ :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+ :widths: 20,60,30,20,10,10
+ :escape: \
+
+ "**elasticsearch.client.service**", "The instance of the Controller Service to use for accessing Elasticsearch.", "", "null", "false", "false"
+ "**default.index**", "The name of the index to insert into", "", "null", "false", "**true**"
+ "**default.type**", "The type of this document (used by Elasticsearch for indexing and searching)", "", "null", "false", "**true**"
+ "**timebased.index**", "do we add a date suffix", "no (no date added to default index), today (today's date added to default index), yesterday (yesterday's date added to default index)", "no", "false", "false"
+ "es.index.field", "the name of the event field containing es index name => will override index value if set", "", "null", "false", "false"
+ "es.type.field", "the name of the event field containing es doc type => will override type value if set", "", "null", "false", "false"
+
+Extra informations
+__________________
+.. include:: ./details/BulkAddElasticsearch-Detail.rst
+----------
+
+.. _com.hurence.logisland.processor.hbase.FetchHBaseRow:
+
+FetchHBaseRow
+-------------
+Fetches a row from an HBase table. The Destination property controls whether the cells are added as flow file attributes, or the row is written to the flow file content as JSON. This processor may be used to fetch a fixed row on a interval by specifying the table and row id directly in the processor, or it may be used to dynamically fetch rows by referencing the table and row id from incoming flow files.
+
+Module
+______
+com.hurence.logisland:logisland-processor-hbase:1.1.2
+
+Class
+_____
+com.hurence.logisland.processor.hbase.FetchHBaseRow
+
+Tags
+____
+hbase, scan, fetch, get, enrich
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values, and whether a property supports the `Expression Language `_ .
+
+.. csv-table:: allowable-values
+ :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+ :widths: 20,60,30,20,10,10
+ :escape: \
+
+ "**hbase.client.service**", "The instance of the Controller Service to use for accessing HBase.", "", "null", "false", "false"
+ "**table.name.field**", "The field containing the name of the HBase Table to fetch from.", "", "null", "false", "**true**"
+ "**row.identifier.field**", "The field containing the identifier of the row to fetch.", "", "null", "false", "**true**"
+ "columns.field", "The field containing an optional comma-separated list of \"\":\"\" pairs to fetch. To return all columns for a given family, leave off the qualifier such as \"\",\"\".", "", "null", "false", "**true**"
+ "record.serializer", "the serializer needed to i/o the record in the HBase row", "com.hurence.logisland.serializer.KryoSerializer (serialize events as json blocs), com.hurence.logisland.serializer.JsonSerializer (serialize events as json blocs), com.hurence.logisland.serializer.AvroSerializer (serialize events as avro blocs), none (send events as bytes)", "com.hurence.logisland.serializer.KryoSerializer", "false", "false"
+ "record.schema", "the avro schema definition for the Avro serialization", "", "null", "false", "false"
+ "table.name.default", "The table to use if table name field is not set", "", "null", "false", "false"
+
+Extra informations
+__________________
+.. include:: ./details/FetchHBaseRow-Detail.rst
+----------
+
+.. _com.hurence.logisland.processor.elasticsearch.MultiGetElasticsearch:
+
+MultiGetElasticsearch
+---------------------
+Retrieves a content indexed in elasticsearch using elasticsearch multiget queries.
+Each incoming record contains information regarding the elasticsearch multiget query that will be performed. This information is stored in record fields whose names are configured in the plugin properties (see below) :
+
+ - index (String) : name of the elasticsearch index on which the multiget query will be performed. This field is mandatory and should not be empty, otherwise an error output record is sent for this specific incoming record.
+ - type (String) : name of the elasticsearch type on which the multiget query will be performed. This field is not mandatory.
+ - ids (String) : comma separated list of document ids to fetch. This field is mandatory and should not be empty, otherwise an error output record is sent for this specific incoming record.
+ - includes (String) : comma separated list of patterns to filter in (include) fields to retrieve. Supports wildcards. This field is not mandatory.
+ - excludes (String) : comma separated list of patterns to filter out (exclude) fields to retrieve. Supports wildcards. This field is not mandatory.
+
+Each outcoming record holds data of one elasticsearch retrieved document. This data is stored in these fields :
+
+ - index (same field name as the incoming record) : name of the elasticsearch index.
+ - type (same field name as the incoming record) : name of the elasticsearch type.
+ - id (same field name as the incoming record) : retrieved document id.
+ - a list of String fields containing :
+
+ * field name : the retrieved field name
+ * field value : the retrieved field value
+
+Module
+______
+com.hurence.logisland:logisland-processor-elasticsearch:1.1.2
+
+Class
+_____
+com.hurence.logisland.processor.elasticsearch.MultiGetElasticsearch
+
+Tags
+____
+elasticsearch
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
+
+.. csv-table:: allowable-values
+ :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+ :widths: 20,60,30,20,10,10
+ :escape: \
+
+ "**elasticsearch.client.service**", "The instance of the Controller Service to use for accessing Elasticsearch.", "", "null", "false", "false"
+ "**es.index.field**", "the name of the incoming records field containing es index name to use in multiget query. ", "", "null", "false", "false"
+ "**es.type.field**", "the name of the incoming records field containing es type name to use in multiget query", "", "null", "false", "false"
+ "**es.ids.field**", "the name of the incoming records field containing es document Ids to use in multiget query", "", "null", "false", "false"
+ "**es.includes.field**", "the name of the incoming records field containing es includes to use in multiget query", "", "null", "false", "false"
+ "**es.excludes.field**", "the name of the incoming records field containing es excludes to use in multiget query", "", "null", "false", "false"
+
+Extra informations
+__________________
+.. include:: ./details/MultiGetElasticsearch-Detail.rst
+----------
+
+.. _com.hurence.logisland.processor.hbase.PutHBaseCell:
+
+PutHBaseCell
+------------
+Adds the Contents of a Record to HBase as the value of a single cell
+
+Module
+______
+com.hurence.logisland:logisland-processor-hbase:1.1.2
+
+Class
+_____
+com.hurence.logisland.processor.hbase.PutHBaseCell
+
+Tags
+____
+hadoop, hbase
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values, and whether a property supports the `Expression Language `_ .
+
+.. csv-table:: allowable-values
+ :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+ :widths: 20,60,30,20,10,10
+ :escape: \
+
+ "**hbase.client.service**", "The instance of the Controller Service to use for accessing HBase.", "", "null", "false", "false"
+ "**table.name.field**", "The field containing the name of the HBase Table to put data into", "", "null", "false", "**true**"
+ "row.identifier.field", "Specifies field containing the Row ID to use when inserting data into HBase", "", "null", "false", "**true**"
+ "row.identifier.encoding.strategy", "Specifies the data type of Row ID used when inserting data into HBase. The default behavior is to convert the row id to a UTF-8 byte array. Choosing Binary will convert a binary formatted string to the correct byte[] representation. The Binary option should be used if you are using Binary row keys in HBase", "String (Stores the value of row id as a UTF-8 String.), Binary (Stores the value of the rows id as a binary byte array. It expects that the row id is a binary formatted string.)", "String", "false", "false"
+ "**column.family.field**", "The field containing the Column Family to use when inserting data into HBase", "", "null", "false", "**true**"
+ "**column.qualifier.field**", "The field containing the Column Qualifier to use when inserting data into HBase", "", "null", "false", "**true**"
+ "**batch.size**", "The maximum number of Records to process in a single execution. The Records will be grouped by table, and a single Put per table will be performed.", "", "25", "false", "false"
+ "record.schema", "the avro schema definition for the Avro serialization", "", "null", "false", "false"
+ "record.serializer", "the serializer needed to i/o the record in the HBase row", "com.hurence.logisland.serializer.KryoSerializer (serialize events as json blocs), com.hurence.logisland.serializer.JsonSerializer (serialize events as json blocs), com.hurence.logisland.serializer.AvroSerializer (serialize events as avro blocs), none (send events as bytes)", "com.hurence.logisland.serializer.KryoSerializer", "false", "false"
+ "table.name.default", "The table table to use if table name field is not set", "", "null", "false", "false"
+ "column.family.default", "The column family to use if column family field is not set", "", "null", "false", "false"
+ "column.qualifier.default", "The column qualifier to use if column qualifier field is not set", "", "null", "false", "false"
+
+Extra informations
+__________________
+.. include:: ./details/PutHBaseCell-Detail.rst
+----------
+
+.. _com.hurence.logisland.processor.scripting.python.RunPython:
+
+RunPython
+---------
+ !!!! WARNING !!!!
+
+The RunPython processor is currently an experimental feature : it is delivered as is, with the current set of features and is subject to modifications in API or anything else in further logisland releases without warnings. There is no tutorial yet. If you want to play with this processor, use the python-processing.yml example and send the apache logs of the index apache logs tutorial. The debug stream processor at the end of the stream should output events in stderr file of the executors from the spark console.
+
+This processor allows to implement and run a processor written in python. This can be done in 2 ways. Either directly defining the process method code in the **script.code.process** configuration property or poiting to an external python module script file in the **script.path** configuration property. Directly defining methods is called the inline mode whereas using a script file is called the file mode. Both ways are mutually exclusive. Whether using the inline of file mode, your python code may depend on some python dependencies. If the set of python dependencies already delivered with the Logisland framework is not sufficient, you can use the **dependencies.path** configuration property to give their location. Currently only the nltk python library is delivered with Logisland.
+
+Module
+______
+com.hurence.logisland:logisland-processor-scripting:1.1.2
+
+Class
+_____
+com.hurence.logisland.processor.scripting.python.RunPython
+
+Tags
+____
+scripting, python
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
+
+.. csv-table:: allowable-values
+ :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+ :widths: 20,60,30,20,10,10
+ :escape: \
+
+ "script.code.imports", "For inline mode only. This is the python code that should hold the import statements if required.", "", "null", "false", "false"
+ "script.code.init", "The python code to be called when the processor is initialized. This is the python equivalent of the init method code for a java processor. This is not mandatory but can only be used if **script.code.process** is defined (inline mode).", "", "null", "false", "false"
+ "script.code.process", "The python code to be called to process the records. This is the pyhton equivalent of the process method code for a java processor. For inline mode, this is the only minimum required configuration property. Using this property, you may also optionally define the **script.code.init** and **script.code.imports** properties.", "", "null", "false", "false"
+ "script.path", "The path to the user's python processor script. Use this property for file mode. Your python code must be in a python file with the following constraints: let's say your pyhton script is named MyProcessor.py. Then MyProcessor.py is a module file that must contain a class named MyProcessor which must inherits from the Logisland delivered class named AbstractProcessor. You can then define your code in the process method and in the other traditional methods (init...) as you would do in java in a class inheriting from the AbstractProcessor java class.", "", "null", "false", "false"
+ "dependencies.path", "The path to the additional dependencies for the user's python code, whether using inline or file mode. This is optional as your code may not have additional dependencies. If you defined **script.path** (so using file mode) and if **dependencies.path** is not defined, Logisland will scan a potential directory named **dependencies** in the same directory where the script file resides and if it exists, any python code located there will be loaded as dependency as needed.", "", "null", "false", "false"
+ "logisland.dependencies.path", "The path to the directory containing the python dependencies shipped with logisland. You should not have to tune this parameter.", "", "null", "false", "false"
+
+Extra informations
+__________________
+.. include:: ./details/RunPython-Detail.rst
+----------
+
+.. _com.hurence.logisland.processor.xml.EvaluateXPath:
+
+EvaluateXPath
+-------------
+Evaluates one or more XPaths against the content of a record. The results of those XPaths are assigned to new attributes in the records, depending on configuration of the Processor. XPaths are entered by adding user-defined properties; the name of the property maps to the Attribute Name into which the result will be placed. The value of the property must be a valid XPath expression. If the expression matches nothing, no attributes is added.
+
+Module
+______
+com.hurence.logisland:logisland-processor-xml:1.1.2
+
+Class
+_____
+com.hurence.logisland.processor.xml.EvaluateXPath
+
+Tags
+____
+XML, evaluate, XPath
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
+
+.. csv-table:: allowable-values
+ :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+ :widths: 20,60,30,20,10,10
+ :escape: \
+
+ "**source**", "Indicates the attribute containing the xml data to evaluate xpath against.", "", "null", "false", "false"
+ "**validate_dtd**", "Specifies whether or not the XML content should be validated against the DTD.", "true, false", "true", "false", "false"
+ "conflict.resolution.policy", "What to do when a field with the same name already exists ?", "overwrite_existing (if field already exist), keep_only_old_field (keep only old field)", "keep_only_old_field", "false", "false"
+
+Dynamic Properties
+__________________
+Dynamic Properties allow the user to specify both the name and value of a property.
+
+.. csv-table:: dynamic-properties
+ :header: "Name","Value","Description","Allowable Values","Default Value","EL"
+ :widths: 20,20,40,40,20,10
+ :escape: \
+
+ "An attribute", "An XPath expression", " the attribute is set to the result of the XPath Expression.", "", "null", false
+
+Extra informations
+__________________
+.. include:: ./details/EvaluateXPath-Detail.rst
+----------
+
+.. _com.hurence.logisland.processor.excel.ExcelExtract:
+
+ExcelExtract
+------------
+Consumes a Microsoft Excel document and converts each worksheet's line to a structured record. The processor is assuming to receive raw excel file as input record.
+
+Module
+______
+com.hurence.logisland:logisland-processor-excel:1.1.2
+
+Class
+_____
+com.hurence.logisland.processor.excel.ExcelExtract
+
+Tags
+____
+excel, processor, poi
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
+
+.. csv-table:: allowable-values
+ :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+ :widths: 20,60,30,20,10,10
+ :escape: \
+
+ "sheets", "Comma separated list of Excel document sheet names that should be extracted from the excel document. If this property is left blank then all of the sheets will be extracted from the Excel document. You can specify regular expressions. Any sheets not specified in this value will be ignored.", "", "", "false", "false"
+ "skip.columns", "Comma delimited list of column numbers to skip. Use the columns number and not the letter designation. Use this to skip over columns anywhere in your worksheet that you don't want extracted as part of the record.", "", "", "false", "false"
+ "field.names", "The comma separated list representing the names of columns of extracted cells. Order matters! You should use either field.names either field.row.header but not both together.", "", "null", "false", "false"
+ "skip.rows", "The row number of the first row to start processing.Use this to skip over rows of data at the top of your worksheet that are not part of the dataset.Empty rows of data anywhere in the spreadsheet will always be skipped, no matter what this value is set to.", "", "0", "false", "false"
+ "record.type", "Default type of record", "", "excel_record", "false", "false"
+ "field.row.header", "If set, field names mapping will be extracted from the specified row number. You should use either field.names either field.row.header but not both together.", "", "null", "false", "false"
+
+Extra informations
+__________________
+.. include:: ./details/ExcelExtract-Detail.rst
+----------
+
+.. _com.hurence.logisland.processor.commonlogs.gitlab.ParseGitlabLog:
+
+ParseGitlabLog
+--------------
+The Gitlab logs processor is the Logisland entry point to get and process `Gitlab `_ logs. This allows for instance to monitor activities in your Gitlab server. The expected input of this processor are records from the production_json.log log file of Gitlab which contains JSON records. You can for instance use the `kafkacat `_ command to inject those logs into kafka and thus Logisland.
+
+Module
+______
+com.hurence.logisland:logisland-processor-common-logs:1.1.2
+
+Class
+_____
+com.hurence.logisland.processor.commonlogs.gitlab.ParseGitlabLog
+
+Tags
+____
+logs, gitlab
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
+
+.. csv-table:: allowable-values
+ :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+ :widths: 20,60,30,20,10,10
+ :escape: \
+
+ "debug", "Enable debug. If enabled, the original JSON string is embedded in the record_value field of the record.", "", "false", "false", "false"
+
+Extra informations
+__________________
+.. include:: ./details/ParseGitlabLog-Detail.rst
+----------
+
+.. _com.hurence.logisland.processor.MatchIP:
+
+MatchIP
+-------
+IP address Query matching (using `Luwak )`_
+
+You can use this processor to handle custom events matching IP address (CIDR)
+The record sent from a matching an IP address record is tagged appropriately.
+
+A query is expressed as a lucene query against a field like for example:
+
+.. code::
+
+ message:'bad exception'
+ error_count:[10 TO *]
+ bytes_out:5000
+ user_name:tom*
+
+Please read the `Lucene syntax guide `_ for supported operations
+
+.. warning::
+
+ don't forget to set numeric fields property to handle correctly numeric ranges queries
+
+Module
+______
+com.hurence.logisland:logisland-processor-querymatcher:1.1.2
+
+Class
+_____
+com.hurence.logisland.processor.MatchIP
+
+Tags
+____
+analytic, percolator, record, record, query, lucene
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
+
+.. csv-table:: allowable-values
+ :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+ :widths: 20,60,30,20,10,10
+ :escape: \
+
+ "numeric.fields", "a comma separated string of numeric field to be matched", "", "null", "false", "false"
+ "output.record.type", "the output type of the record", "", "alert_match", "false", "false"
+ "record.type.updatePolicy", "Record type update policy", "", "overwrite", "false", "false"
+ "policy.onmatch", "the policy applied to match events: 'first' (default value) match events are tagged with the name and value of the first query that matched;'all' match events are tagged with all names and values of the queries that matched.", "", "first", "false", "false"
+ "policy.onmiss", "the policy applied to miss events: 'discard' (default value) drop events that did not match any query;'forward' include also events that did not match any query.", "", "discard", "false", "false"
+
+Dynamic Properties
+__________________
+Dynamic Properties allow the user to specify both the name and value of a property.
+
+.. csv-table:: dynamic-properties
+ :header: "Name","Value","Description","Allowable Values","Default Value","EL"
+ :widths: 20,20,40,40,20,10
+ :escape: \
+
+ "query", "some Lucene query", "generate a new record when this query is matched", "", "null", **true**
+
+Extra informations
+__________________
+.. include:: ./details/MatchIP-Detail.rst
+----------
+
+.. _com.hurence.logisland.processor.MatchQuery:
+
+MatchQuery
+----------
+Query matching based on `Luwak `_
+
+you can use this processor to handle custom events defined by lucene queries
+a new record is added to output each time a registered query is matched
+
+A query is expressed as a lucene query against a field like for example:
+
+.. code::
+
+ message:'bad exception'
+ error_count:[10 TO *]
+ bytes_out:5000
+ user_name:tom*
+
+Please read the `Lucene syntax guide `_ for supported operations
+
+.. warning::
+
+ don't forget to set numeric fields property to handle correctly numeric ranges queries
+
+Module
+______
+com.hurence.logisland:logisland-processor-querymatcher:1.1.2
+
+Class
+_____
+com.hurence.logisland.processor.MatchQuery
+
+Tags
+____
+analytic, percolator, record, record, query, lucene
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
+
+.. csv-table:: allowable-values
+ :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+ :widths: 20,60,30,20,10,10
+ :escape: \
+
+ "numeric.fields", "a comma separated string of numeric field to be matched", "", "null", "false", "false"
+ "output.record.type", "the output type of the record", "", "alert_match", "false", "false"
+ "record.type.updatePolicy", "Record type update policy", "", "overwrite", "false", "false"
+ "policy.onmatch", "the policy applied to match events: 'first' (default value) match events are tagged with the name and value of the first query that matched;'all' match events are tagged with all names and values of the queries that matched.", "", "first", "false", "false"
+ "policy.onmiss", "the policy applied to miss events: 'discard' (default value) drop events that did not match any query;'forward' include also events that did not match any query.", "", "discard", "false", "false"
+
+Dynamic Properties
+__________________
+Dynamic Properties allow the user to specify both the name and value of a property.
+
+.. csv-table:: dynamic-properties
+ :header: "Name","Value","Description","Allowable Values","Default Value","EL"
+ :widths: 20,20,40,40,20,10
+ :escape: \
+
+ "query", "some Lucene query", "generate a new record when this query is matched", "", "null", **true**
+
+Extra informations
+__________________
+.. include:: ./details/MatchQuery-Detail.rst
+----------
+
+.. _com.hurence.logisland.processor.useragent.ParseUserAgent:
+
+ParseUserAgent
+--------------
+The user-agent processor allows to decompose User-Agent value from an HTTP header into several attributes of interest. There is no standard format for User-Agent strings, hence it is not easily possible to use regexp to handle them. This processor rely on the `YAUAA library `_ to do the heavy work.
+
+Module
+______
+com.hurence.logisland:logisland-processor-useragent:1.1.2
+
+Class
+_____
+com.hurence.logisland.processor.useragent.ParseUserAgent
+
+Tags
+____
+User-Agent, clickstream, DMP
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
+
+.. csv-table:: allowable-values
+ :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+ :widths: 20,60,30,20,10,10
+ :escape: \
+
+ "debug", "Enable debug.", "", "false", "false", "false"
+ "cache.enabled", "Enable caching. Caching to avoid to redo the same computation for many identical User-Agent strings.", "", "true", "false", "false"
+ "cache.size", "Set the size of the cache.", "", "1000", "false", "false"
+ "**useragent.field**", "Must contain the name of the field that contains the User-Agent value in the incoming record.", "", "null", "false", "false"
+ "useragent.keep", "Defines if the field that contained the User-Agent must be kept or not in the resulting records.", "", "true", "false", "false"
+ "confidence.enabled", "Enable confidence reporting. Each field will report a confidence attribute with a value comprised between 0 and 10000.", "", "false", "false", "false"
+ "ambiguity.enabled", "Enable ambiguity reporting. Reports a count of ambiguities.", "", "false", "false", "false"
+ "fields", "Defines the fields to be returned.", "", "DeviceClass, DeviceName, DeviceBrand, DeviceCpu, DeviceFirmwareVersion, DeviceVersion, OperatingSystemClass, OperatingSystemName, OperatingSystemVersion, OperatingSystemNameVersion, OperatingSystemVersionBuild, LayoutEngineClass, LayoutEngineName, LayoutEngineVersion, LayoutEngineVersionMajor, LayoutEngineNameVersion, LayoutEngineNameVersionMajor, LayoutEngineBuild, AgentClass, AgentName, AgentVersion, AgentVersionMajor, AgentNameVersion, AgentNameVersionMajor, AgentBuild, AgentLanguage, AgentLanguageCode, AgentInformationEmail, AgentInformationUrl, AgentSecurity, AgentUuid, FacebookCarrier, FacebookDeviceClass, FacebookDeviceName, FacebookDeviceVersion, FacebookFBOP, FacebookFBSS, FacebookOperatingSystemName, FacebookOperatingSystemVersion, Anonymized, HackerAttackVector, HackerToolkit, KoboAffiliate, KoboPlatformId, IECompatibilityVersion, IECompatibilityVersionMajor, IECompatibilityNameVersion, IECompatibilityNameVersionMajor, __SyntaxError__, Carrier, GSAInstallationID, WebviewAppName, WebviewAppNameVersionMajor, WebviewAppVersion, WebviewAppVersionMajor", "false", "false"
+
+Extra informations
+__________________
+.. include:: ./details/ParseUserAgent-Detail.rst
+----------
+
+.. _com.hurence.logisland.processor.webAnalytics.ConsolidateSession:
+
+ConsolidateSession
+------------------
+The ConsolidateSession processor is the Logisland entry point to get and process events from the Web Analytics.As an example here is an incoming event from the Web Analytics:
+
+"fields": [{ "name": "timestamp", "type": "long" },{ "name": "remoteHost", "type": "string"},{ "name": "record_type", "type": ["null", "string"], "default": null },{ "name": "record_id", "type": ["null", "string"], "default": null },{ "name": "location", "type": ["null", "string"], "default": null },{ "name": "hitType", "type": ["null", "string"], "default": null },{ "name": "eventCategory", "type": ["null", "string"], "default": null },{ "name": "eventAction", "type": ["null", "string"], "default": null },{ "name": "eventLabel", "type": ["null", "string"], "default": null },{ "name": "localPath", "type": ["null", "string"], "default": null },{ "name": "q", "type": ["null", "string"], "default": null },{ "name": "n", "type": ["null", "int"], "default": null },{ "name": "referer", "type": ["null", "string"], "default": null },{ "name": "viewportPixelWidth", "type": ["null", "int"], "default": null },{ "name": "viewportPixelHeight", "type": ["null", "int"], "default": null },{ "name": "screenPixelWidth", "type": ["null", "int"], "default": null },{ "name": "screenPixelHeight", "type": ["null", "int"], "default": null },{ "name": "partyId", "type": ["null", "string"], "default": null },{ "name": "sessionId", "type": ["null", "string"], "default": null },{ "name": "pageViewId", "type": ["null", "string"], "default": null },{ "name": "is_newSession", "type": ["null", "boolean"],"default": null },{ "name": "userAgentString", "type": ["null", "string"], "default": null },{ "name": "pageType", "type": ["null", "string"], "default": null },{ "name": "UserId", "type": ["null", "string"], "default": null },{ "name": "B2Bunit", "type": ["null", "string"], "default": null },{ "name": "pointOfService", "type": ["null", "string"], "default": null },{ "name": "companyID", "type": ["null", "string"], "default": null },{ "name": "GroupCode", "type": ["null", "string"], "default": null },{ "name": "userRoles", "type": ["null", "string"], "default": null },{ "name": "is_PunchOut", "type": ["null", "string"], "default": null }]The ConsolidateSession processor groups the records by sessions and compute the duration between now and the last received event. If the distance from the last event is beyond a given threshold (by default 30mn), then the session is considered closed.The ConsolidateSession is building an aggregated session object for each active session.This aggregated object includes: - The actual session duration. - A boolean representing wether the session is considered active or closed. Note: it is possible to ressurect a session if for instance an event arrives after a session has been marked closed. - User related infos: userId, B2Bunit code, groupCode, userRoles, companyId - First visited page: URL - Last visited page: URL The properties to configure the processor are: - sessionid.field: Property name containing the session identifier (default: sessionId). - timestamp.field: Property name containing the timestamp of the event (default: timestamp). - session.timeout: Timeframe of inactivity (in seconds) after which a session is considered closed (default: 30mn). - visitedpage.field: Property name containing the page visited by the customer (default: location). - fields.to.return: List of fields to return in the aggregated object. (default: N/A)
+
+Module
+______
+com.hurence.logisland:logisland-processor-web-analytics:1.1.2
+
+Class
+_____
+com.hurence.logisland.processor.webAnalytics.ConsolidateSession
+
+Tags
+____
+analytics, web, session
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
+
+.. csv-table:: allowable-values
+ :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+ :widths: 20,60,30,20,10,10
+ :escape: \
+
+ "debug", "Enable debug. If enabled, the original JSON string is embedded in the record_value field of the record.", "", "null", "false", "false"
+ "session.timeout", "session timeout in sec", "", "1800", "false", "false"
+ "sessionid.field", "the name of the field containing the session id => will override default value if set", "", "sessionId", "false", "false"
+ "timestamp.field", "the name of the field containing the timestamp => will override default value if set", "", "h2kTimestamp", "false", "false"
+ "visitedpage.field", "the name of the field containing the visited page => will override default value if set", "", "location", "false", "false"
+ "userid.field", "the name of the field containing the userId => will override default value if set", "", "userId", "false", "false"
+ "fields.to.return", "the list of fields to return", "", "null", "false", "false"
+ "firstVisitedPage.out.field", "the name of the field containing the first visited page => will override default value if set", "", "firstVisitedPage", "false", "false"
+ "lastVisitedPage.out.field", "the name of the field containing the last visited page => will override default value if set", "", "lastVisitedPage", "false", "false"
+ "isSessionActive.out.field", "the name of the field stating whether the session is active or not => will override default value if set", "", "is_sessionActive", "false", "false"
+ "sessionDuration.out.field", "the name of the field containing the session duration => will override default value if set", "", "sessionDuration", "false", "false"
+ "eventsCounter.out.field", "the name of the field containing the session duration => will override default value if set", "", "eventsCounter", "false", "false"
+ "firstEventDateTime.out.field", "the name of the field containing the date of the first event => will override default value if set", "", "firstEventDateTime", "false", "false"
+ "lastEventDateTime.out.field", "the name of the field containing the date of the last event => will override default value if set", "", "lastEventDateTime", "false", "false"
+ "sessionInactivityDuration.out.field", "the name of the field containing the session inactivity duration => will override default value if set", "", "sessionInactivityDuration", "false", "false"
+
+Extra informations
+__________________
+.. include:: ./details/ConsolidateSession-Detail.rst
+----------
+
+.. _com.hurence.logisland.processor.DetectOutliers:
+
+DetectOutliers
+--------------
+Outlier Analysis: A Hybrid Approach
+
+In order to function at scale, a two-phase approach is taken
+
+For every data point
+
+- Detect outlier candidates using a robust estimator of variability (e.g. median absolute deviation) that uses distributional sketching (e.g. Q-trees)
+- Gather a biased sample (biased by recency)
+- Extremely deterministic in space and cheap in computation
+
+For every outlier candidate
+
+- Use traditional, more computationally complex approaches to outlier analysis (e.g. Robust PCA) on the biased sample
+- Expensive computationally, but run infrequently
+
+This becomes a data filter which can be attached to a timeseries data stream within a distributed computational framework (i.e. Storm, Spark, Flink, NiFi) to detect outliers.
+
+Module
+______
+com.hurence.logisland:logisland-processor-outlier-detection:1.1.2
+
+Class
+_____
+com.hurence.logisland.processor.DetectOutliers
+
+Tags
+____
+analytic, outlier, record, iot, timeseries
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
+
+.. csv-table:: allowable-values
+ :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+ :widths: 20,60,30,20,10,10
+ :escape: \
+
+ "**value.field**", "the numeric field to get the value", "", "record_value", "false", "false"
+ "**time.field**", "the numeric field to get the value", "", "record_time", "false", "false"
+ "output.record.type", "the output type of the record", "", "alert_match", "false", "false"
+ "**rotation.policy.type**", "...", "by_amount, by_time, never", "by_amount", "false", "false"
+ "**rotation.policy.amount**", "...", "", "100", "false", "false"
+ "**rotation.policy.unit**", "...", "milliseconds, seconds, hours, days, months, years, points", "points", "false", "false"
+ "**chunking.policy.type**", "...", "by_amount, by_time, never", "by_amount", "false", "false"
+ "**chunking.policy.amount**", "...", "", "100", "false", "false"
+ "**chunking.policy.unit**", "...", "milliseconds, seconds, hours, days, months, years, points", "points", "false", "false"
+ "sketchy.outlier.algorithm", "...", "SKETCHY_MOVING_MAD", "SKETCHY_MOVING_MAD", "false", "false"
+ "batch.outlier.algorithm", "...", "RAD", "RAD", "false", "false"
+ "global.statistics.min", "minimum value", "", "null", "false", "false"
+ "global.statistics.max", "maximum value", "", "null", "false", "false"
+ "global.statistics.mean", "mean value", "", "null", "false", "false"
+ "global.statistics.stddev", "standard deviation value", "", "null", "false", "false"
+ "**zscore.cutoffs.normal**", "zscoreCutoffs level for normal outlier", "", "0.000000000000001", "false", "false"
+ "**zscore.cutoffs.moderate**", "zscoreCutoffs level for moderate outlier", "", "1.5", "false", "false"
+ "**zscore.cutoffs.severe**", "zscoreCutoffs level for severe outlier", "", "10.0", "false", "false"
+ "zscore.cutoffs.notEnoughData", "zscoreCutoffs level for notEnoughData outlier", "", "100", "false", "false"
+ "smooth", "do smoothing ?", "", "false", "false", "false"
+ "decay", "the decay", "", "0.1", "false", "false"
+ "**min.amount.to.predict**", "minAmountToPredict", "", "100", "false", "false"
+ "min_zscore_percentile", "minZscorePercentile", "", "50.0", "false", "false"
+ "reservoir_size", "the size of points reservoir", "", "100", "false", "false"
+ "rpca.force.diff", "No Description Provided.", "", "null", "false", "false"
+ "rpca.lpenalty", "No Description Provided.", "", "null", "false", "false"
+ "rpca.min.records", "No Description Provided.", "", "null", "false", "false"
+ "rpca.spenalty", "No Description Provided.", "", "null", "false", "false"
+ "rpca.threshold", "No Description Provided.", "", "null", "false", "false"
+
+Extra informations
+__________________
+.. include:: ./details/DetectOutliers-Detail.rst
+----------
+
+.. _com.hurence.logisland.processor.webAnalytics.IncrementalWebSession:
+
+IncrementalWebSession
+---------------------
+This processor creates and updates web-sessions based on incoming web-events. Note that both web-sessions and web-events are stored in elasticsearch.
+ Firstly, web-events are grouped by their session identifier and processed in chronological order.
+ Then each web-session associated to each group is retrieved from elasticsearch.
+ In case none exists yet then a new web session is created based on the first web event.
+ The following fields of the newly created web session are set based on the associated web event: session identifier, first timestamp, first visited page. Secondly, once created, or retrieved, the web session is updated by the remaining web-events.
+ Updates have impacts on fields of the web session such as event counter, last visited page, session duration, ...
+ Before updates are actually applied, checks are performed to detect rules that would trigger the creation of a new session:
+
+ the duration between the web session and the web event must not exceed the specified time-out,
+ the web session and the web event must have timestamps within the same day (at midnight a new web session is created),
+ source of traffic (campaign, ...) must be the same on the web session and the web event.
+
+ When a breaking rule is detected, a new web session is created with a new session identifier where as remaining web-events still have the original session identifier. The new session identifier is the original session suffixed with the character '#' followed with an incremented counter. This new session identifier is also set on the remaining web-events.
+ Finally when all web events were applied, all web events -potentially modified with a new session identifier- are save in elasticsearch. And web sessions are passed to the next processor.
+
+WebSession information are:
+- first and last visited page
+- first and last timestamp of processed event
+- total number of processed events
+- the userId
+- a boolean denoting if the web-session is still active or not
+- an integer denoting the duration of the web-sessions
+- optional fields that may be retrieved from the processed events
+
+
+
+Module
+______
+com.hurence.logisland:logisland-processor-web-analytics:1.1.2
+
+Class
+_____
+com.hurence.logisland.processor.webAnalytics.IncrementalWebSession
+
+Tags
+____
+analytics, web, session
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
+
+.. csv-table:: allowable-values
+ :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+ :widths: 20,60,30,20,10,10
+ :escape: \
+
+ "debug", "Enable debug. If enabled, debug information are logged.", "", "false", "false", "false"
+ "**es.session.index.field**", "Name of the field in the record defining the ES index containing the web session documents.", "", "null", "false", "false"
+ "**es.session.type.name**", "Name of the ES type of web session documents.", "", "null", "false", "false"
+ "**es.event.index.prefix**", "Prefix of the index containing the web event documents.", "", "null", "false", "false"
+ "**es.event.type.name**", "Name of the ES type of web event documents.", "", "null", "false", "false"
+ "**es.mapping.event.to.session.index.name**", "Name of the ES index containing the mapping of web session documents.", "", "null", "false", "false"
+ "sessionid.field", "the name of the field containing the session id => will override default value if set", "", "sessionId", "false", "false"
+ "timestamp.field", "the name of the field containing the timestamp => will override default value if set", "", "h2kTimestamp", "false", "false"
+ "visitedpage.field", "the name of the field containing the visited page => will override default value if set", "", "location", "false", "false"
+ "userid.field", "the name of the field containing the userId => will override default value if set", "", "userId", "false", "false"
+ "fields.to.return", "the list of fields to return", "", "null", "false", "false"
+ "firstVisitedPage.out.field", "the name of the field containing the first visited page => will override default value if set", "", "firstVisitedPage", "false", "false"
+ "lastVisitedPage.out.field", "the name of the field containing the last visited page => will override default value if set", "", "lastVisitedPage", "false", "false"
+ "isSessionActive.out.field", "the name of the field stating whether the session is active or not => will override default value if set", "", "is_sessionActive", "false", "false"
+ "sessionDuration.out.field", "the name of the field containing the session duration => will override default value if set", "", "sessionDuration", "false", "false"
+ "sessionInactivityDuration.out.field", "the name of the field containing the session inactivity duration => will override default value if set", "", "sessionInactivityDuration", "false", "false"
+ "session.timeout", "session timeout in sec", "", "1800", "false", "false"
+ "eventsCounter.out.field", "the name of the field containing the session duration => will override default value if set", "", "eventsCounter", "false", "false"
+ "firstEventDateTime.out.field", "the name of the field containing the date of the first event => will override default value if set", "", "firstEventDateTime", "false", "false"
+ "lastEventDateTime.out.field", "the name of the field containing the date of the last event => will override default value if set", "", "lastEventDateTime", "false", "false"
+ "newSessionReason.out.field", "the name of the field containing the reason why a new session was created => will override default value if set", "", "reasonForNewSession", "false", "false"
+ "transactionIds.out.field", "the name of the field containing all transactionIds => will override default value if set", "", "transactionIds", "false", "false"
+ "source_of_traffic.suffix", "Prefix for the source of the traffic related fields", "", "source_of_traffic", "false", "false"
+ "**elasticsearch.client.service**", "The instance of the Controller Service to use for accessing Elasticsearch.", "", "null", "false", "false"
+
+Extra informations
+__________________
+.. include:: ./details/IncrementalWebSession-Detail.rst
+----------
+
+.. _com.hurence.logisland.processor.webAnalytics.SetSourceOfTraffic:
+
+SetSourceOfTraffic
+------------------
+Compute the source of traffic of a web session. Users arrive at a website or application through a variety of sources,
+including advertising/paying campaigns, search engines, social networks, referring sites or direct access.
+When analysing user experience on a webshop, it is crucial to collect, process, and report the campaign and traffic-source data.
+To compute the source of traffic of a web session, the user has to provide the utm_* related properties if available
+i-e: **utm_source.field**, **utm_medium.field**, **utm_campaign.field**, **utm_content.field**, **utm_term.field**)
+, the referer (**referer.field** property) and the first visited page of the session (**first.visited.page.field** property).
+By default the source of traffic information are placed in a flat structure (specified by the **source_of_traffic.suffix** property
+with a default value of source_of_traffic). To work properly the SetSourceOfTraffic processor needs to have access to an
+Elasticsearch index containing a list of the most popular search engines and social networks. The ES index (specified by the **es.index** property) should be structured such that the _id of an ES document MUST be the name of the domain. If the domain is a search engine, the related ES doc MUST have a boolean field (default being search_engine) specified by the property **es.search_engine.field** with a value set to true. If the domain is a social network , the related ES doc MUST have a boolean field (default being social_network) specified by the property **es.social_network.field** with a value set to true.
+
+Module
+______
+com.hurence.logisland:logisland-processor-web-analytics:1.1.2
+
+Class
+_____
+com.hurence.logisland.processor.webAnalytics.SetSourceOfTraffic
+
+Tags
+____
+session, traffic, source, web, analytics
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
+
+.. csv-table:: allowable-values
+ :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+ :widths: 20,60,30,20,10,10
+ :escape: \
+
+ "referer.field", "Name of the field containing the referer value in the session", "", "referer", "false", "false"
+ "first.visited.page.field", "Name of the field containing the first visited page in the session", "", "firstVisitedPage", "false", "false"
+ "utm_source.field", "Name of the field containing the utm_source value in the session", "", "utm_source", "false", "false"
+ "utm_medium.field", "Name of the field containing the utm_medium value in the session", "", "utm_medium", "false", "false"
+ "utm_campaign.field", "Name of the field containing the utm_campaign value in the session", "", "utm_campaign", "false", "false"
+ "utm_content.field", "Name of the field containing the utm_content value in the session", "", "utm_content", "false", "false"
+ "utm_term.field", "Name of the field containing the utm_term value in the session", "", "utm_term", "false", "false"
+ "source_of_traffic.suffix", "Suffix for the source of the traffic related fields", "", "source_of_traffic", "false", "false"
+ "source_of_traffic.hierarchical", "Should the additional source of trafic information fields be added under a hierarchical father field or not.", "", "false", "false", "false"
+ "**elasticsearch.client.service**", "The instance of the Controller Service to use for accessing Elasticsearch.", "", "null", "false", "false"
+ "**cache.service**", "Name of the cache service to use.", "", "null", "false", "false"
+ "cache.validity.timeout", "Timeout validity (in seconds) of an entry in the cache.", "", "0", "false", "false"
+ "debug", "If true, an additional debug field is added. If the source info fields prefix is X, a debug field named X_from_cache contains a boolean value to indicate the origin of the source fields. The default value for this property is false (debug is disabled).", "", "false", "false", "false"
+ "**es.index**", "Name of the ES index containing the list of search engines and social network. ", "", "null", "false", "false"
+ "es.type", "Name of the ES type to use.", "", "default", "false", "false"
+ "es.search_engine.field", "Name of the ES field used to specify that the domain is a search engine.", "", "search_engine", "false", "false"
+ "es.social_network.field", "Name of the ES field used to specify that the domain is a social network.", "", "social_network", "false", "false"
+
+Extra informations
+__________________
+.. include:: ./details/SetSourceOfTraffic-Detail.rst
+----------
+
+.. _com.hurence.logisland.processor.webAnalytics.URLDecoder:
+
+URLDecoder
+----------
+Decode one or more field containing an URL with possibly special chars encoded
+...
+
+Module
+______
+com.hurence.logisland:logisland-processor-web-analytics:1.1.2
+
+Class
+_____
+com.hurence.logisland.processor.webAnalytics.URLDecoder
+
+Tags
+____
+record, fields, Decode
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
+
+.. csv-table:: allowable-values
+ :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+ :widths: 20,60,30,20,10,10
+ :escape: \
+
+ "**decode.fields**", "List of fields (URL) to decode", "", "null", "false", "false"
+ "charset", "Charset to use to decode the URL", "", "UTF-8", "false", "false"
+
+Dynamic Properties
+__________________
+Dynamic Properties allow the user to specify both the name and value of a property.
+
+.. csv-table:: dynamic-properties
+ :header: "Name","Value","Description","Allowable Values","Default Value","EL"
+ :widths: 20,20,40,40,20,10
+ :escape: \
+
+ "fields to decode", "a default value", "Decode one or more fields from the record ", "", "null", false
+
+Extra informations
+__________________
+.. include:: ./details/URLDecoder-Detail.rst
+----------
+
+.. _com.hurence.logisland.processor.elasticsearch.EnrichRecordsElasticsearch:
+
+EnrichRecordsElasticsearch
+--------------------------
+Enrich input records with content indexed in elasticsearch using multiget queries.
+Each incoming record must be possibly enriched with information stored in elasticsearch.
+Each outcoming record holds at least the input record plus potentially one or more fields coming from of one elasticsearch document.
+
+Module
+______
+com.hurence.logisland:logisland-processor-elasticsearch:1.1.2
+
+Class
+_____
+com.hurence.logisland.processor.elasticsearch.EnrichRecordsElasticsearch
+
+Tags
+____
+elasticsearch
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values, and whether a property supports the `Expression Language `_ .
+
+.. csv-table:: allowable-values
+ :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+ :widths: 20,60,30,20,10,10
+ :escape: \
+
+ "**elasticsearch.client.service**", "The instance of the Controller Service to use for accessing Elasticsearch.", "", "null", "false", "false"
+ "**record.key**", "The name of field in the input record containing the document id to use in ES multiget query", "", "null", "false", "**true**"
+ "**es.index**", "The name of the ES index to use in multiget query. ", "", "null", "false", "**true**"
+ "es.type", "The name of the ES type to use in multiget query.", "", "default", "false", "**true**"
+ "es.includes.field", "The name of the ES fields to include in the record.", "", "*", "false", "**true**"
+ "es.excludes.field", "The name of the ES fields to exclude.", "", "N/A", "false", "false"
+
+Extra informations
+__________________
+.. include:: ./details/EnrichRecordsElasticsearch-Detail.rst
+----------
+
+.. _com.hurence.logisland.processor.enrichment.IpToFqdn:
+
+IpToFqdn
+--------
+Translates an IP address into a FQDN (Fully Qualified Domain Name). An input field from the record has the IP as value. An new field is created and its value is the FQDN matching the IP address. The resolution mechanism is based on the underlying operating system. The resolution request may take some time, specially if the IP address cannot be translated into a FQDN. For these reasons this processor relies on the logisland cache service so that once a resolution occurs or not, the result is put into the cache. That way, the real request for the same IP is not re-triggered during a certain period of time, until the cache entry expires. This timeout is configurable but by default a request for the same IP is not triggered before 24 hours to let the time to the underlying DNS system to be potentially updated.
+
+Module
+______
+com.hurence.logisland:logisland-processor-enrichment:1.1.2
+
+Class
+_____
+com.hurence.logisland.processor.enrichment.IpToFqdn
+
+Tags
+____
+dns, ip, fqdn, domain, address, fqhn, reverse, resolution, enrich
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
+
+.. csv-table:: allowable-values
+ :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+ :widths: 20,60,30,20,10,10
+ :escape: \
+
+ "**ip.address.field**", "The name of the field containing the ip address to use.", "", "null", "false", "false"
+ "**fqdn.field**", "The field that will contain the full qualified domain name corresponding to the ip address.", "", "null", "false", "false"
+ "overwrite.fqdn.field", "If the field should be overwritten when it already exists.", "", "false", "false", "false"
+ "**cache.service**", "The name of the cache service to use.", "", "null", "false", "false"
+ "cache.max.time", "The amount of time, in seconds, for which a cached FQDN value is valid in the cache service. After this delay, the next new request to translate the same IP into FQDN will trigger a new reverse DNS request and the result will overwrite the entry in the cache. This allows two things: if the IP was not resolved into a FQDN, this will get a chance to obtain a FQDN if the DNS system has been updated, if the IP is resolved into a FQDN, this will allow to be more accurate if the DNS system has been updated. A value of 0 seconds disables this expiration mechanism. The default value is 84600 seconds, which corresponds to new requests triggered every day if a record with the same IP passes every day in the processor.", "", "84600", "false", "false"
+ "resolution.timeout", "The amount of time, in milliseconds, to wait at most for the resolution to occur. This avoids to block the stream for too much time. Default value is 1000ms. If the delay expires and no resolution could occur before, the FQDN field is not created. A special value of 0 disables the logisland timeout and the resolution request may last for many seconds if the IP cannot be translated into a FQDN by the underlying operating system. In any case, whether the timeout occurs in logisland of in the operating system, the fact that a timeout occurs is kept in the cache system so that a resolution request for the same IP will not occur before the cache entry expires.", "", "1000", "false", "false"
+ "debug", "If true, some additional debug fields are added. If the FQDN field is named X, a debug field named X_os_resolution_time_ms contains the resolution time in ms (using the operating system, not the cache). This field is added whether the resolution occurs or time is out. A debug field named X_os_resolution_timeout contains a boolean value to indicate if the timeout occurred. Finally, a debug field named X_from_cache contains a boolean value to indicate the origin of the FQDN field. The default value for this property is false (debug is disabled.", "", "false", "false", "false"
+
+Extra informations
+__________________
+.. include:: ./details/IpToFqdn-Detail.rst
+----------
+
+.. _com.hurence.logisland.processor.enrichment.IpToGeo:
+
+IpToGeo
+-------
+Looks up geolocation information for an IP address. The attribute that contains the IP address to lookup must be provided in the **ip.address.field** property. By default, the geo information are put in a hierarchical structure. That is, if the name of the IP field is 'X', then the the geo attributes added by enrichment are added under a father field named X_geo. "_geo" is the default hierarchical suffix that may be changed with the **geo.hierarchical.suffix** property. If one wants to put the geo fields at the same level as the IP field, then the **geo.hierarchical** property should be set to false and then the geo attributes are created at the same level as him with the naming pattern X_geo_. "_geo_" is the default flat suffix but this may be changed with the **geo.flat.suffix** property. The IpToGeo processor requires a reference to an Ip to Geo service. This must be defined in the **iptogeo.service** property. The added geo fields are dependant on the underlying Ip to Geo service. The **geo.fields** property must contain the list of geo fields that should be created if data is available for the IP to resolve. This property defaults to "*" which means to add every available fields. If one only wants a subset of the fields, one must define a comma separated list of fields as a value for the **geo.fields** property. The list of the available geo fields is in the description of the **geo.fields** property.
+
+Module
+______
+com.hurence.logisland:logisland-processor-enrichment:1.1.2
+
+Class
+_____
+com.hurence.logisland.processor.enrichment.IpToGeo
+
+Tags
+____
+geo, enrich, ip
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
+
+.. csv-table:: allowable-values
+ :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+ :widths: 20,60,30,20,10,10
+ :escape: \
+
+ "**ip.address.field**", "The name of the field containing the ip address to use.", "", "null", "false", "false"
+ "**iptogeo.service**", "The reference to the IP to Geo service to use.", "", "null", "false", "false"
+ "geo.fields", "Comma separated list of geo information fields to add to the record. Defaults to '*', which means to include all available fields. If a list of fields is specified and the data is not available, the geo field is not created. The geo fields are dependant on the underlying defined Ip to Geo service. The currently only supported type of Ip to Geo service is the Maxmind Ip to Geo service. This means that the currently supported list of geo fields is the following:**continent**: the identified continent for this IP address. **continent_code**: the identified continent code for this IP address. **city**: the identified city for this IP address. **latitude**: the identified latitude for this IP address. **longitude**: the identified longitude for this IP address. **location**: the identified location for this IP address, defined as Geo-point expressed as a string with the format: 'latitude,longitude'. **accuracy_radius**: the approximate accuracy radius, in kilometers, around the latitude and longitude for the location. **time_zone**: the identified time zone for this IP address. **subdivision_N**: the identified subdivision for this IP address. N is a one-up number at the end of the attribute name, starting with 0. **subdivision_isocode_N**: the iso code matching the identified subdivision_N. **country**: the identified country for this IP address. **country_isocode**: the iso code for the identified country for this IP address. **postalcode**: the identified postal code for this IP address. **lookup_micros**: the number of microseconds that the geo lookup took. The Ip to Geo service must have the lookup_micros property enabled in order to have this field available.", "", "*", "false", "false"
+ "geo.hierarchical", "Should the additional geo information fields be added under a hierarchical father field or not.", "", "true", "false", "false"
+ "geo.hierarchical.suffix", "Suffix to use for the field holding geo information. If geo.hierarchical is true, then use this suffix appended to the IP field name to define the father field name. This may be used for instance to distinguish between geo fields with various locales using many Ip to Geo service instances.", "", "_geo", "false", "false"
+ "geo.flat.suffix", "Suffix to use for geo information fields when they are flat. If geo.hierarchical is false, then use this suffix appended to the IP field name but before the geo field name. This may be used for instance to distinguish between geo fields with various locales using many Ip to Geo service instances.", "", "_geo_", "false", "false"
+ "**cache.service**", "The name of the cache service to use.", "", "null", "false", "false"
+ "debug", "If true, an additional debug field is added. If the geo info fields prefix is X, a debug field named X_from_cache contains a boolean value to indicate the origin of the geo fields. The default value for this property is false (debug is disabled).", "", "false", "false", "false"
+
+Extra informations
+__________________
+.. include:: ./details/IpToGeo-Detail.rst
+----------
+
+.. _com.hurence.logisland.processor.bro.ParseBroEvent:
+
+ParseBroEvent
+-------------
+The ParseBroEvent processor is the Logisland entry point to get and process `Bro `_ events. The `Bro-Kafka plugin `_ should be used and configured in order to have Bro events sent to Kafka. See the `Bro/Logisland tutorial `_ for an example of usage for this processor. The ParseBroEvent processor does some minor pre-processing on incoming Bro events from the Bro-Kafka plugin to adapt them to Logisland.
+
+Basically the events coming from the Bro-Kafka plugin are JSON documents with a first level field indicating the type of the event. The ParseBroEvent processor takes the incoming JSON document, sets the event type in a record_type field and sets the original sub-fields of the JSON event as first level fields in the record. Also any dot in a field name is transformed into an underscore. Thus, for instance, the field id.orig_h becomes id_orig_h. The next processors in the stream can then process the Bro events generated by this ParseBroEvent processor.
+
+As an example here is an incoming event from Bro:
+
+{
+
+ "conn": {
+
+ "id.resp_p": 9092,
+
+ "resp_pkts": 0,
+
+ "resp_ip_bytes": 0,
+
+ "local_orig": true,
+
+ "orig_ip_bytes": 0,
+
+ "orig_pkts": 0,
+
+ "missed_bytes": 0,
+
+ "history": "Cc",
+
+ "tunnel_parents": [],
+
+ "id.orig_p": 56762,
+
+ "local_resp": true,
+
+ "uid": "Ct3Ms01I3Yc6pmMZx7",
+
+ "conn_state": "OTH",
+
+ "id.orig_h": "172.17.0.2",
+
+ "proto": "tcp",
+
+ "id.resp_h": "172.17.0.3",
+
+ "ts": 1487596886.953917
+
+ }
+
+ }
+
+It gets processed and transformed into the following Logisland record by the ParseBroEvent processor:
+
+"@timestamp": "2017-02-20T13:36:32Z"
+
+"record_id": "6361f80a-c5c9-4a16-9045-4bb51736333d"
+
+"record_time": 1487597792782
+
+"record_type": "conn"
+
+"id_resp_p": 9092
+
+"resp_pkts": 0
+
+"resp_ip_bytes": 0
+
+"local_orig": true
+
+"orig_ip_bytes": 0
+
+"orig_pkts": 0
+
+"missed_bytes": 0
+
+"history": "Cc"
+
+"tunnel_parents": []
+
+"id_orig_p": 56762
+
+"local_resp": true
+
+"uid": "Ct3Ms01I3Yc6pmMZx7"
+
+"conn_state": "OTH"
+
+"id_orig_h": "172.17.0.2"
+
+"proto": "tcp"
+
+"id_resp_h": "172.17.0.3"
+
+"ts": 1487596886.953917
+
+Module
+______
+com.hurence.logisland:logisland-processor-cyber-security:1.1.2
+
+Class
+_____
+com.hurence.logisland.processor.bro.ParseBroEvent
+
+Tags
+____
+bro, security, IDS, NIDS
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
+
+.. csv-table:: allowable-values
+ :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+ :widths: 20,60,30,20,10,10
+ :escape: \
+
+ "debug", "Enable debug. If enabled, the original JSON string is embedded in the record_value field of the record.", "", "false", "false", "false"
+
+Extra informations
+__________________
+.. include:: ./details/ParseBroEvent-Detail.rst
+----------
+
+.. _com.hurence.logisland.processor.netflow.ParseNetflowEvent:
+
+ParseNetflowEvent
+-----------------
+The `Netflow V5 `_ processor is the Logisland entry point to process Netflow (V5) events. NetFlow is a feature introduced on Cisco routers that provides the ability to collect IP network traffic.We can distinguish 2 components:
+
+ - Flow exporter: aggregates packets into flows and exports flow records (binary format) towards one or more flow collectors
+
+ - Flow collector: responsible for reception, storage and pre-processing of flow data received from a flow exporter
+
+The collected data are then available for analysis purpose (intrusion detection, traffic analysis...)
+Netflow are sent to kafka in order to be processed by logisland.
+In the tutorial we will simulate Netflow traffic using `nfgen `_. this traffic will be sent to port 2055. The we rely on nifi to listen of that port for incoming netflow (V5) traffic and send them to a kafka topic. The Netflow processor could thus treat these events and generate corresponding logisland records. The following processors in the stream can then process the Netflow records generated by this processor.
+
+Module
+______
+com.hurence.logisland:logisland-processor-cyber-security:1.1.2
+
+Class
+_____
+com.hurence.logisland.processor.netflow.ParseNetflowEvent
+
+Tags
+____
+netflow, security
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
+
+.. csv-table:: allowable-values
+ :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+ :widths: 20,60,30,20,10,10
+ :escape: \
+
+ "debug", "Enable debug. If enabled, the original JSON string is embedded in the record_value field of the record.", "", "false", "false", "false"
+ "output.record.type", "the output type of the record", "", "netflowevent", "false", "false"
+ "enrich.record", "Enrich data. If enabledthe netflow record is enriched with inferred data", "", "false", "false", "false"
+
+Extra informations
+__________________
+.. include:: ./details/ParseNetflowEvent-Detail.rst
+----------
+
+.. _com.hurence.logisland.processor.networkpacket.ParseNetworkPacket:
+
+ParseNetworkPacket
+------------------
+The ParseNetworkPacket processor is the LogIsland entry point to parse network packets captured either off-the-wire (stream mode) or in pcap format (batch mode). In batch mode, the processor decodes the bytes of the incoming pcap record, where a Global header followed by a sequence of [packet header, packet data] pairs are stored. Then, each incoming pcap event is parsed into n packet records. The fields of packet headers are then extracted and made available in dedicated record fields. See the `Capturing Network packets tutorial `_ for an example of usage of this processor.
+
+Module
+______
+com.hurence.logisland:logisland-processor-cyber-security:1.1.2
+
+Class
+_____
+com.hurence.logisland.processor.networkpacket.ParseNetworkPacket
+
+Tags
+____
+PCap, security, IDS, NIDS
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
+
+.. csv-table:: allowable-values
+ :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+ :widths: 20,60,30,20,10,10
+ :escape: \
+
+ "debug", "Enable debug.", "", "false", "false", "false"
+ "**flow.mode**", "Flow Mode. Indicate whether packets are provided in batch mode (via pcap files) or in stream mode (without headers). Allowed values are batch and stream.", "batch, stream", "null", "false", "false"
+
+Extra informations
+__________________
+No additional information is provided
+
+----------
+
+.. _com.hurence.logisland.processor.SampleRecords:
+
+SampleRecords
+-------------
+Query matching based on `Luwak `_
+
+you can use this processor to handle custom events defined by lucene queries
+a new record is added to output each time a registered query is matched
+
+A query is expressed as a lucene query against a field like for example:
+
+.. code::
+
+ message:'bad exception'
+ error_count:[10 TO *]
+ bytes_out:5000
+ user_name:tom*
+
+Please read the `Lucene syntax guide `_ for supported operations
+
+.. warning::
+ don't forget to set numeric fields property to handle correctly numeric ranges queries
+
+Module
+______
+com.hurence.logisland:logisland-processor-sampling:1.1.2
+
+Class
+_____
+com.hurence.logisland.processor.SampleRecords
+
+Tags
+____
+analytic, sampler, record, iot, timeseries
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
+
+.. csv-table:: allowable-values
+ :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+ :widths: 20,60,30,20,10,10
+ :escape: \
+
+ "record.value.field", "the name of the numeric field to sample", "", "record_value", "false", "false"
+ "record.time.field", "the name of the time field to sample", "", "record_time", "false", "false"
+ "**sampling.algorithm**", "the implementation of the algorithm", "none, lttb, average, first_item, min_max, mode_median", "null", "false", "false"
+ "**sampling.parameter**", "the parmater of the algorithm", "", "null", "false", "false"
+
+Extra informations
+__________________
+.. include:: ./details/SampleRecords-Detail.rst
+----------
+
+.. _com.hurence.logisland.processor.elasticsearch.BulkAddElasticsearch:
+
+BulkAddElasticsearch
+--------------------
+Indexes the content of a Record in Elasticsearch using elasticsearch's bulk processor
+
+Module
+______
+com.hurence.logisland:logisland-processor-elasticsearch:1.1.2
+
+Class
+_____
+com.hurence.logisland.processor.elasticsearch.BulkAddElasticsearch
+
+Tags
+____
+elasticsearch
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values, and whether a property supports the `Expression Language `_ .
+
+.. csv-table:: allowable-values
+ :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+ :widths: 20,60,30,20,10,10
+ :escape: \
+
+ "**elasticsearch.client.service**", "The instance of the Controller Service to use for accessing Elasticsearch.", "", "null", "false", "false"
+ "**default.index**", "The name of the index to insert into", "", "null", "false", "**true**"
+ "**default.type**", "The type of this document (used by Elasticsearch for indexing and searching)", "", "null", "false", "**true**"
+ "**timebased.index**", "do we add a date suffix", "no (no date added to default index), today (today's date added to default index), yesterday (yesterday's date added to default index)", "no", "false", "false"
+ "es.index.field", "the name of the event field containing es index name => will override index value if set", "", "null", "false", "false"
+ "es.type.field", "the name of the event field containing es doc type => will override type value if set", "", "null", "false", "false"
+
+Extra informations
+__________________
+.. include:: ./details/BulkAddElasticsearch-Detail.rst
+----------
+
+.. _com.hurence.logisland.processor.hbase.FetchHBaseRow:
+
+FetchHBaseRow
+-------------
+Fetches a row from an HBase table. The Destination property controls whether the cells are added as flow file attributes, or the row is written to the flow file content as JSON. This processor may be used to fetch a fixed row on a interval by specifying the table and row id directly in the processor, or it may be used to dynamically fetch rows by referencing the table and row id from incoming flow files.
+
+Module
+______
+com.hurence.logisland:logisland-processor-hbase:1.1.2
+
+Class
+_____
+com.hurence.logisland.processor.hbase.FetchHBaseRow
+
+Tags
+____
+hbase, scan, fetch, get, enrich
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values, and whether a property supports the `Expression Language `_ .
+
+.. csv-table:: allowable-values
+ :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+ :widths: 20,60,30,20,10,10
+ :escape: \
+
+ "**hbase.client.service**", "The instance of the Controller Service to use for accessing HBase.", "", "null", "false", "false"
+ "**table.name.field**", "The field containing the name of the HBase Table to fetch from.", "", "null", "false", "**true**"
+ "**row.identifier.field**", "The field containing the identifier of the row to fetch.", "", "null", "false", "**true**"
+ "columns.field", "The field containing an optional comma-separated list of \"\":\"\" pairs to fetch. To return all columns for a given family, leave off the qualifier such as \"\",\"\".", "", "null", "false", "**true**"
+ "record.serializer", "the serializer needed to i/o the record in the HBase row", "com.hurence.logisland.serializer.KryoSerializer (serialize events as json blocs), com.hurence.logisland.serializer.JsonSerializer (serialize events as json blocs), com.hurence.logisland.serializer.AvroSerializer (serialize events as avro blocs), none (send events as bytes)", "com.hurence.logisland.serializer.KryoSerializer", "false", "false"
+ "record.schema", "the avro schema definition for the Avro serialization", "", "null", "false", "false"
+ "table.name.default", "The table to use if table name field is not set", "", "null", "false", "false"
+
+Extra informations
+__________________
+.. include:: ./details/FetchHBaseRow-Detail.rst
+----------
+
+.. _com.hurence.logisland.processor.elasticsearch.MultiGetElasticsearch:
+
+MultiGetElasticsearch
+---------------------
+Retrieves a content indexed in elasticsearch using elasticsearch multiget queries.
+Each incoming record contains information regarding the elasticsearch multiget query that will be performed. This information is stored in record fields whose names are configured in the plugin properties (see below) :
+
+ - index (String) : name of the elasticsearch index on which the multiget query will be performed. This field is mandatory and should not be empty, otherwise an error output record is sent for this specific incoming record.
+ - type (String) : name of the elasticsearch type on which the multiget query will be performed. This field is not mandatory.
+ - ids (String) : comma separated list of document ids to fetch. This field is mandatory and should not be empty, otherwise an error output record is sent for this specific incoming record.
+ - includes (String) : comma separated list of patterns to filter in (include) fields to retrieve. Supports wildcards. This field is not mandatory.
+ - excludes (String) : comma separated list of patterns to filter out (exclude) fields to retrieve. Supports wildcards. This field is not mandatory.
+
+Each outcoming record holds data of one elasticsearch retrieved document. This data is stored in these fields :
+
+ - index (same field name as the incoming record) : name of the elasticsearch index.
+ - type (same field name as the incoming record) : name of the elasticsearch type.
+ - id (same field name as the incoming record) : retrieved document id.
+ - a list of String fields containing :
+
+ * field name : the retrieved field name
+ * field value : the retrieved field value
+
+Module
+______
+com.hurence.logisland:logisland-processor-elasticsearch:1.1.2
+
+Class
+_____
+com.hurence.logisland.processor.elasticsearch.MultiGetElasticsearch
+
+Tags
+____
+elasticsearch
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
+
+.. csv-table:: allowable-values
+ :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+ :widths: 20,60,30,20,10,10
+ :escape: \
+
+ "**elasticsearch.client.service**", "The instance of the Controller Service to use for accessing Elasticsearch.", "", "null", "false", "false"
+ "**es.index.field**", "the name of the incoming records field containing es index name to use in multiget query. ", "", "null", "false", "false"
+ "**es.type.field**", "the name of the incoming records field containing es type name to use in multiget query", "", "null", "false", "false"
+ "**es.ids.field**", "the name of the incoming records field containing es document Ids to use in multiget query", "", "null", "false", "false"
+ "**es.includes.field**", "the name of the incoming records field containing es includes to use in multiget query", "", "null", "false", "false"
+ "**es.excludes.field**", "the name of the incoming records field containing es excludes to use in multiget query", "", "null", "false", "false"
+
+Extra informations
+__________________
+.. include:: ./details/MultiGetElasticsearch-Detail.rst
+----------
+
+.. _com.hurence.logisland.processor.hbase.PutHBaseCell:
+
+PutHBaseCell
+------------
+Adds the Contents of a Record to HBase as the value of a single cell
+
+Module
+______
+com.hurence.logisland:logisland-processor-hbase:1.1.2
+
+Class
+_____
+com.hurence.logisland.processor.hbase.PutHBaseCell
+
+Tags
+____
+hadoop, hbase
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values, and whether a property supports the `Expression Language `_ .
+
+.. csv-table:: allowable-values
+ :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+ :widths: 20,60,30,20,10,10
+ :escape: \
+
+ "**hbase.client.service**", "The instance of the Controller Service to use for accessing HBase.", "", "null", "false", "false"
+ "**table.name.field**", "The field containing the name of the HBase Table to put data into", "", "null", "false", "**true**"
+ "row.identifier.field", "Specifies field containing the Row ID to use when inserting data into HBase", "", "null", "false", "**true**"
+ "row.identifier.encoding.strategy", "Specifies the data type of Row ID used when inserting data into HBase. The default behavior is to convert the row id to a UTF-8 byte array. Choosing Binary will convert a binary formatted string to the correct byte[] representation. The Binary option should be used if you are using Binary row keys in HBase", "String (Stores the value of row id as a UTF-8 String.), Binary (Stores the value of the rows id as a binary byte array. It expects that the row id is a binary formatted string.)", "String", "false", "false"
+ "**column.family.field**", "The field containing the Column Family to use when inserting data into HBase", "", "null", "false", "**true**"
+ "**column.qualifier.field**", "The field containing the Column Qualifier to use when inserting data into HBase", "", "null", "false", "**true**"
+ "**batch.size**", "The maximum number of Records to process in a single execution. The Records will be grouped by table, and a single Put per table will be performed.", "", "25", "false", "false"
+ "record.schema", "the avro schema definition for the Avro serialization", "", "null", "false", "false"
+ "record.serializer", "the serializer needed to i/o the record in the HBase row", "com.hurence.logisland.serializer.KryoSerializer (serialize events as json blocs), com.hurence.logisland.serializer.JsonSerializer (serialize events as json blocs), com.hurence.logisland.serializer.AvroSerializer (serialize events as avro blocs), none (send events as bytes)", "com.hurence.logisland.serializer.KryoSerializer", "false", "false"
+ "table.name.default", "The table table to use if table name field is not set", "", "null", "false", "false"
+ "column.family.default", "The column family to use if column family field is not set", "", "null", "false", "false"
+ "column.qualifier.default", "The column qualifier to use if column qualifier field is not set", "", "null", "false", "false"
+
+Extra informations
+__________________
+.. include:: ./details/PutHBaseCell-Detail.rst
+----------
+
+.. _com.hurence.logisland.processor.scripting.python.RunPython:
+
+RunPython
+---------
+ !!!! WARNING !!!!
+
+The RunPython processor is currently an experimental feature : it is delivered as is, with the current set of features and is subject to modifications in API or anything else in further logisland releases without warnings. There is no tutorial yet. If you want to play with this processor, use the python-processing.yml example and send the apache logs of the index apache logs tutorial. The debug stream processor at the end of the stream should output events in stderr file of the executors from the spark console.
+
+This processor allows to implement and run a processor written in python. This can be done in 2 ways. Either directly defining the process method code in the **script.code.process** configuration property or poiting to an external python module script file in the **script.path** configuration property. Directly defining methods is called the inline mode whereas using a script file is called the file mode. Both ways are mutually exclusive. Whether using the inline of file mode, your python code may depend on some python dependencies. If the set of python dependencies already delivered with the Logisland framework is not sufficient, you can use the **dependencies.path** configuration property to give their location. Currently only the nltk python library is delivered with Logisland.
+
+Module
+______
+com.hurence.logisland:logisland-processor-scripting:1.1.2
+
+Class
+_____
+com.hurence.logisland.processor.scripting.python.RunPython
+
+Tags
+____
+scripting, python
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
+
+.. csv-table:: allowable-values
+ :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+ :widths: 20,60,30,20,10,10
+ :escape: \
+
+ "script.code.imports", "For inline mode only. This is the python code that should hold the import statements if required.", "", "null", "false", "false"
+ "script.code.init", "The python code to be called when the processor is initialized. This is the python equivalent of the init method code for a java processor. This is not mandatory but can only be used if **script.code.process** is defined (inline mode).", "", "null", "false", "false"
+ "script.code.process", "The python code to be called to process the records. This is the pyhton equivalent of the process method code for a java processor. For inline mode, this is the only minimum required configuration property. Using this property, you may also optionally define the **script.code.init** and **script.code.imports** properties.", "", "null", "false", "false"
+ "script.path", "The path to the user's python processor script. Use this property for file mode. Your python code must be in a python file with the following constraints: let's say your pyhton script is named MyProcessor.py. Then MyProcessor.py is a module file that must contain a class named MyProcessor which must inherits from the Logisland delivered class named AbstractProcessor. You can then define your code in the process method and in the other traditional methods (init...) as you would do in java in a class inheriting from the AbstractProcessor java class.", "", "null", "false", "false"
+ "dependencies.path", "The path to the additional dependencies for the user's python code, whether using inline or file mode. This is optional as your code may not have additional dependencies. If you defined **script.path** (so using file mode) and if **dependencies.path** is not defined, Logisland will scan a potential directory named **dependencies** in the same directory where the script file resides and if it exists, any python code located there will be loaded as dependency as needed.", "", "null", "false", "false"
+ "logisland.dependencies.path", "The path to the directory containing the python dependencies shipped with logisland. You should not have to tune this parameter.", "", "null", "false", "false"
+
+Extra informations
+__________________
+.. include:: ./details/RunPython-Detail.rst
+----------
+
+.. _com.hurence.logisland.processor.xml.EvaluateXPath:
+
+EvaluateXPath
+-------------
+Evaluates one or more XPaths against the content of a record. The results of those XPaths are assigned to new attributes in the records, depending on configuration of the Processor. XPaths are entered by adding user-defined properties; the name of the property maps to the Attribute Name into which the result will be placed. The value of the property must be a valid XPath expression. If the expression matches nothing, no attributes is added.
+
+Module
+______
+com.hurence.logisland:logisland-processor-xml:1.1.2
+
+Class
+_____
+com.hurence.logisland.processor.xml.EvaluateXPath
+
+Tags
+____
+XML, evaluate, XPath
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
+
+.. csv-table:: allowable-values
+ :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+ :widths: 20,60,30,20,10,10
+ :escape: \
+
+ "**source**", "Indicates the attribute containing the xml data to evaluate xpath against.", "", "null", "false", "false"
+ "**validate_dtd**", "Specifies whether or not the XML content should be validated against the DTD.", "true, false", "true", "false", "false"
+ "conflict.resolution.policy", "What to do when a field with the same name already exists ?", "overwrite_existing (if field already exist), keep_only_old_field (keep only old field)", "keep_only_old_field", "false", "false"
+
+Dynamic Properties
+__________________
+Dynamic Properties allow the user to specify both the name and value of a property.
+
+.. csv-table:: dynamic-properties
+ :header: "Name","Value","Description","Allowable Values","Default Value","EL"
+ :widths: 20,20,40,40,20,10
+ :escape: \
+
+ "An attribute", "An XPath expression", " the attribute is set to the result of the XPath Expression.", "", "null", false
+
+Extra informations
+__________________
+.. include:: ./details/EvaluateXPath-Detail.rst
+----------
+
+.. _com.hurence.logisland.processor.excel.ExcelExtract:
+
+ExcelExtract
+------------
+Consumes a Microsoft Excel document and converts each worksheet's line to a structured record. The processor is assuming to receive raw excel file as input record.
+
+Module
+______
+com.hurence.logisland:logisland-processor-excel:1.1.2
+
+Class
+_____
+com.hurence.logisland.processor.excel.ExcelExtract
+
+Tags
+____
+excel, processor, poi
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
+
+.. csv-table:: allowable-values
+ :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+ :widths: 20,60,30,20,10,10
+ :escape: \
+
+ "sheets", "Comma separated list of Excel document sheet names that should be extracted from the excel document. If this property is left blank then all of the sheets will be extracted from the Excel document. You can specify regular expressions. Any sheets not specified in this value will be ignored.", "", "", "false", "false"
+ "skip.columns", "Comma delimited list of column numbers to skip. Use the columns number and not the letter designation. Use this to skip over columns anywhere in your worksheet that you don't want extracted as part of the record.", "", "", "false", "false"
+ "field.names", "The comma separated list representing the names of columns of extracted cells. Order matters! You should use either field.names either field.row.header but not both together.", "", "null", "false", "false"
+ "skip.rows", "The row number of the first row to start processing.Use this to skip over rows of data at the top of your worksheet that are not part of the dataset.Empty rows of data anywhere in the spreadsheet will always be skipped, no matter what this value is set to.", "", "0", "false", "false"
+ "record.type", "Default type of record", "", "excel_record", "false", "false"
+ "field.row.header", "If set, field names mapping will be extracted from the specified row number. You should use either field.names either field.row.header but not both together.", "", "null", "false", "false"
+
+Extra informations
+__________________
+.. include:: ./details/ExcelExtract-Detail.rst
+----------
+
+.. _com.hurence.logisland.processor.commonlogs.gitlab.ParseGitlabLog:
+
+ParseGitlabLog
+--------------
+The Gitlab logs processor is the Logisland entry point to get and process `Gitlab `_ logs. This allows for instance to monitor activities in your Gitlab server. The expected input of this processor are records from the production_json.log log file of Gitlab which contains JSON records. You can for instance use the `kafkacat `_ command to inject those logs into kafka and thus Logisland.
+
+Module
+______
+com.hurence.logisland:logisland-processor-common-logs:1.1.2
+
+Class
+_____
+com.hurence.logisland.processor.commonlogs.gitlab.ParseGitlabLog
+
+Tags
+____
+logs, gitlab
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
+
+.. csv-table:: allowable-values
+ :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+ :widths: 20,60,30,20,10,10
+ :escape: \
+
+ "debug", "Enable debug. If enabled, the original JSON string is embedded in the record_value field of the record.", "", "false", "false", "false"
+
+Extra informations
+__________________
+.. include:: ./details/ParseGitlabLog-Detail.rst
+----------
+
+.. _com.hurence.logisland.processor.MatchIP:
+
+MatchIP
+-------
+IP address Query matching (using `Luwak )`_
+
+You can use this processor to handle custom events matching IP address (CIDR)
+The record sent from a matching an IP address record is tagged appropriately.
+
+A query is expressed as a lucene query against a field like for example:
+
+.. code::
+
+ message:'bad exception'
+ error_count:[10 TO *]
+ bytes_out:5000
+ user_name:tom*
+
+Please read the `Lucene syntax guide `_ for supported operations
+
+.. warning::
+
+ don't forget to set numeric fields property to handle correctly numeric ranges queries
+
+Module
+______
+com.hurence.logisland:logisland-processor-querymatcher:1.1.2
+
+Class
+_____
+com.hurence.logisland.processor.MatchIP
+
+Tags
+____
+analytic, percolator, record, record, query, lucene
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
+
+.. csv-table:: allowable-values
+ :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+ :widths: 20,60,30,20,10,10
+ :escape: \
+
+ "numeric.fields", "a comma separated string of numeric field to be matched", "", "null", "false", "false"
+ "output.record.type", "the output type of the record", "", "alert_match", "false", "false"
+ "record.type.updatePolicy", "Record type update policy", "", "overwrite", "false", "false"
+ "policy.onmatch", "the policy applied to match events: 'first' (default value) match events are tagged with the name and value of the first query that matched;'all' match events are tagged with all names and values of the queries that matched.", "", "first", "false", "false"
+ "policy.onmiss", "the policy applied to miss events: 'discard' (default value) drop events that did not match any query;'forward' include also events that did not match any query.", "", "discard", "false", "false"
+
+Dynamic Properties
+__________________
+Dynamic Properties allow the user to specify both the name and value of a property.
+
+.. csv-table:: dynamic-properties
+ :header: "Name","Value","Description","Allowable Values","Default Value","EL"
+ :widths: 20,20,40,40,20,10
+ :escape: \
+
+ "query", "some Lucene query", "generate a new record when this query is matched", "", "null", **true**
+
+Extra informations
+__________________
+.. include:: ./details/MatchIP-Detail.rst
+----------
+
+.. _com.hurence.logisland.processor.MatchQuery:
+
+MatchQuery
+----------
+Query matching based on `Luwak `_
+
+you can use this processor to handle custom events defined by lucene queries
+a new record is added to output each time a registered query is matched
+
+A query is expressed as a lucene query against a field like for example:
+
+.. code::
+
+ message:'bad exception'
+ error_count:[10 TO *]
+ bytes_out:5000
+ user_name:tom*
+
+Please read the `Lucene syntax guide `_ for supported operations
+
+.. warning::
+
+ don't forget to set numeric fields property to handle correctly numeric ranges queries
+
+Module
+______
+com.hurence.logisland:logisland-processor-querymatcher:1.1.2
+
+Class
+_____
+com.hurence.logisland.processor.MatchQuery
+
+Tags
+____
+analytic, percolator, record, record, query, lucene
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
+
+.. csv-table:: allowable-values
+ :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+ :widths: 20,60,30,20,10,10
+ :escape: \
+
+ "numeric.fields", "a comma separated string of numeric field to be matched", "", "null", "false", "false"
+ "output.record.type", "the output type of the record", "", "alert_match", "false", "false"
+ "record.type.updatePolicy", "Record type update policy", "", "overwrite", "false", "false"
+ "policy.onmatch", "the policy applied to match events: 'first' (default value) match events are tagged with the name and value of the first query that matched;'all' match events are tagged with all names and values of the queries that matched.", "", "first", "false", "false"
+ "policy.onmiss", "the policy applied to miss events: 'discard' (default value) drop events that did not match any query;'forward' include also events that did not match any query.", "", "discard", "false", "false"
+
+Dynamic Properties
+__________________
+Dynamic Properties allow the user to specify both the name and value of a property.
+
+.. csv-table:: dynamic-properties
+ :header: "Name","Value","Description","Allowable Values","Default Value","EL"
+ :widths: 20,20,40,40,20,10
+ :escape: \
+
+ "query", "some Lucene query", "generate a new record when this query is matched", "", "null", **true**
+
+Extra informations
+__________________
+.. include:: ./details/MatchQuery-Detail.rst
+----------
+
+.. _com.hurence.logisland.processor.useragent.ParseUserAgent:
+
+ParseUserAgent
+--------------
+The user-agent processor allows to decompose User-Agent value from an HTTP header into several attributes of interest. There is no standard format for User-Agent strings, hence it is not easily possible to use regexp to handle them. This processor rely on the `YAUAA library `_ to do the heavy work.
+
+Module
+______
+com.hurence.logisland:logisland-processor-useragent:1.1.2
+
+Class
+_____
+com.hurence.logisland.processor.useragent.ParseUserAgent
+
+Tags
+____
+User-Agent, clickstream, DMP
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
+
+.. csv-table:: allowable-values
+ :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+ :widths: 20,60,30,20,10,10
+ :escape: \
+
+ "debug", "Enable debug.", "", "false", "false", "false"
+ "cache.enabled", "Enable caching. Caching to avoid to redo the same computation for many identical User-Agent strings.", "", "true", "false", "false"
+ "cache.size", "Set the size of the cache.", "", "1000", "false", "false"
+ "**useragent.field**", "Must contain the name of the field that contains the User-Agent value in the incoming record.", "", "null", "false", "false"
+ "useragent.keep", "Defines if the field that contained the User-Agent must be kept or not in the resulting records.", "", "true", "false", "false"
+ "confidence.enabled", "Enable confidence reporting. Each field will report a confidence attribute with a value comprised between 0 and 10000.", "", "false", "false", "false"
+ "ambiguity.enabled", "Enable ambiguity reporting. Reports a count of ambiguities.", "", "false", "false", "false"
+ "fields", "Defines the fields to be returned.", "", "DeviceClass, DeviceName, DeviceBrand, DeviceCpu, DeviceFirmwareVersion, DeviceVersion, OperatingSystemClass, OperatingSystemName, OperatingSystemVersion, OperatingSystemNameVersion, OperatingSystemVersionBuild, LayoutEngineClass, LayoutEngineName, LayoutEngineVersion, LayoutEngineVersionMajor, LayoutEngineNameVersion, LayoutEngineNameVersionMajor, LayoutEngineBuild, AgentClass, AgentName, AgentVersion, AgentVersionMajor, AgentNameVersion, AgentNameVersionMajor, AgentBuild, AgentLanguage, AgentLanguageCode, AgentInformationEmail, AgentInformationUrl, AgentSecurity, AgentUuid, FacebookCarrier, FacebookDeviceClass, FacebookDeviceName, FacebookDeviceVersion, FacebookFBOP, FacebookFBSS, FacebookOperatingSystemName, FacebookOperatingSystemVersion, Anonymized, HackerAttackVector, HackerToolkit, KoboAffiliate, KoboPlatformId, IECompatibilityVersion, IECompatibilityVersionMajor, IECompatibilityNameVersion, IECompatibilityNameVersionMajor, __SyntaxError__, Carrier, GSAInstallationID, WebviewAppName, WebviewAppNameVersionMajor, WebviewAppVersion, WebviewAppVersionMajor", "false", "false"
+
+Extra informations
+__________________
+.. include:: ./details/ParseUserAgent-Detail.rst
+----------
+
+.. _com.hurence.logisland.processor.webAnalytics.ConsolidateSession:
+
+ConsolidateSession
+------------------
+The ConsolidateSession processor is the Logisland entry point to get and process events from the Web Analytics.As an example here is an incoming event from the Web Analytics:
+
+"fields": [{ "name": "timestamp", "type": "long" },{ "name": "remoteHost", "type": "string"},{ "name": "record_type", "type": ["null", "string"], "default": null },{ "name": "record_id", "type": ["null", "string"], "default": null },{ "name": "location", "type": ["null", "string"], "default": null },{ "name": "hitType", "type": ["null", "string"], "default": null },{ "name": "eventCategory", "type": ["null", "string"], "default": null },{ "name": "eventAction", "type": ["null", "string"], "default": null },{ "name": "eventLabel", "type": ["null", "string"], "default": null },{ "name": "localPath", "type": ["null", "string"], "default": null },{ "name": "q", "type": ["null", "string"], "default": null },{ "name": "n", "type": ["null", "int"], "default": null },{ "name": "referer", "type": ["null", "string"], "default": null },{ "name": "viewportPixelWidth", "type": ["null", "int"], "default": null },{ "name": "viewportPixelHeight", "type": ["null", "int"], "default": null },{ "name": "screenPixelWidth", "type": ["null", "int"], "default": null },{ "name": "screenPixelHeight", "type": ["null", "int"], "default": null },{ "name": "partyId", "type": ["null", "string"], "default": null },{ "name": "sessionId", "type": ["null", "string"], "default": null },{ "name": "pageViewId", "type": ["null", "string"], "default": null },{ "name": "is_newSession", "type": ["null", "boolean"],"default": null },{ "name": "userAgentString", "type": ["null", "string"], "default": null },{ "name": "pageType", "type": ["null", "string"], "default": null },{ "name": "UserId", "type": ["null", "string"], "default": null },{ "name": "B2Bunit", "type": ["null", "string"], "default": null },{ "name": "pointOfService", "type": ["null", "string"], "default": null },{ "name": "companyID", "type": ["null", "string"], "default": null },{ "name": "GroupCode", "type": ["null", "string"], "default": null },{ "name": "userRoles", "type": ["null", "string"], "default": null },{ "name": "is_PunchOut", "type": ["null", "string"], "default": null }]The ConsolidateSession processor groups the records by sessions and compute the duration between now and the last received event. If the distance from the last event is beyond a given threshold (by default 30mn), then the session is considered closed.The ConsolidateSession is building an aggregated session object for each active session.This aggregated object includes: - The actual session duration. - A boolean representing wether the session is considered active or closed. Note: it is possible to ressurect a session if for instance an event arrives after a session has been marked closed. - User related infos: userId, B2Bunit code, groupCode, userRoles, companyId - First visited page: URL - Last visited page: URL The properties to configure the processor are: - sessionid.field: Property name containing the session identifier (default: sessionId). - timestamp.field: Property name containing the timestamp of the event (default: timestamp). - session.timeout: Timeframe of inactivity (in seconds) after which a session is considered closed (default: 30mn). - visitedpage.field: Property name containing the page visited by the customer (default: location). - fields.to.return: List of fields to return in the aggregated object. (default: N/A)
+
+Module
+______
+com.hurence.logisland:logisland-processor-web-analytics:1.1.2
+
+Class
+_____
+com.hurence.logisland.processor.webAnalytics.ConsolidateSession
+
+Tags
+____
+analytics, web, session
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
+
+.. csv-table:: allowable-values
+ :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+ :widths: 20,60,30,20,10,10
+ :escape: \
+
+ "debug", "Enable debug. If enabled, the original JSON string is embedded in the record_value field of the record.", "", "null", "false", "false"
+ "session.timeout", "session timeout in sec", "", "1800", "false", "false"
+ "sessionid.field", "the name of the field containing the session id => will override default value if set", "", "sessionId", "false", "false"
+ "timestamp.field", "the name of the field containing the timestamp => will override default value if set", "", "h2kTimestamp", "false", "false"
+ "visitedpage.field", "the name of the field containing the visited page => will override default value if set", "", "location", "false", "false"
+ "userid.field", "the name of the field containing the userId => will override default value if set", "", "userId", "false", "false"
+ "fields.to.return", "the list of fields to return", "", "null", "false", "false"
+ "firstVisitedPage.out.field", "the name of the field containing the first visited page => will override default value if set", "", "firstVisitedPage", "false", "false"
+ "lastVisitedPage.out.field", "the name of the field containing the last visited page => will override default value if set", "", "lastVisitedPage", "false", "false"
+ "isSessionActive.out.field", "the name of the field stating whether the session is active or not => will override default value if set", "", "is_sessionActive", "false", "false"
+ "sessionDuration.out.field", "the name of the field containing the session duration => will override default value if set", "", "sessionDuration", "false", "false"
+ "eventsCounter.out.field", "the name of the field containing the session duration => will override default value if set", "", "eventsCounter", "false", "false"
+ "firstEventDateTime.out.field", "the name of the field containing the date of the first event => will override default value if set", "", "firstEventDateTime", "false", "false"
+ "lastEventDateTime.out.field", "the name of the field containing the date of the last event => will override default value if set", "", "lastEventDateTime", "false", "false"
+ "sessionInactivityDuration.out.field", "the name of the field containing the session inactivity duration => will override default value if set", "", "sessionInactivityDuration", "false", "false"
+
+Extra informations
+__________________
+.. include:: ./details/ConsolidateSession-Detail.rst
+----------
+
+.. _com.hurence.logisland.processor.DetectOutliers:
+
+DetectOutliers
+--------------
+Outlier Analysis: A Hybrid Approach
+
+In order to function at scale, a two-phase approach is taken
+
+For every data point
+
+- Detect outlier candidates using a robust estimator of variability (e.g. median absolute deviation) that uses distributional sketching (e.g. Q-trees)
+- Gather a biased sample (biased by recency)
+- Extremely deterministic in space and cheap in computation
+
+For every outlier candidate
+
+- Use traditional, more computationally complex approaches to outlier analysis (e.g. Robust PCA) on the biased sample
+- Expensive computationally, but run infrequently
+
+This becomes a data filter which can be attached to a timeseries data stream within a distributed computational framework (i.e. Storm, Spark, Flink, NiFi) to detect outliers.
+
+Module
+______
+com.hurence.logisland:logisland-processor-outlier-detection:1.1.2
+
+Class
+_____
+com.hurence.logisland.processor.DetectOutliers
+
+Tags
+____
+analytic, outlier, record, iot, timeseries
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
+
+.. csv-table:: allowable-values
+ :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+ :widths: 20,60,30,20,10,10
+ :escape: \
+
+ "**value.field**", "the numeric field to get the value", "", "record_value", "false", "false"
+ "**time.field**", "the numeric field to get the value", "", "record_time", "false", "false"
+ "output.record.type", "the output type of the record", "", "alert_match", "false", "false"
+ "**rotation.policy.type**", "...", "by_amount, by_time, never", "by_amount", "false", "false"
+ "**rotation.policy.amount**", "...", "", "100", "false", "false"
+ "**rotation.policy.unit**", "...", "milliseconds, seconds, hours, days, months, years, points", "points", "false", "false"
+ "**chunking.policy.type**", "...", "by_amount, by_time, never", "by_amount", "false", "false"
+ "**chunking.policy.amount**", "...", "", "100", "false", "false"
+ "**chunking.policy.unit**", "...", "milliseconds, seconds, hours, days, months, years, points", "points", "false", "false"
+ "sketchy.outlier.algorithm", "...", "SKETCHY_MOVING_MAD", "SKETCHY_MOVING_MAD", "false", "false"
+ "batch.outlier.algorithm", "...", "RAD", "RAD", "false", "false"
+ "global.statistics.min", "minimum value", "", "null", "false", "false"
+ "global.statistics.max", "maximum value", "", "null", "false", "false"
+ "global.statistics.mean", "mean value", "", "null", "false", "false"
+ "global.statistics.stddev", "standard deviation value", "", "null", "false", "false"
+ "**zscore.cutoffs.normal**", "zscoreCutoffs level for normal outlier", "", "0.000000000000001", "false", "false"
+ "**zscore.cutoffs.moderate**", "zscoreCutoffs level for moderate outlier", "", "1.5", "false", "false"
+ "**zscore.cutoffs.severe**", "zscoreCutoffs level for severe outlier", "", "10.0", "false", "false"
+ "zscore.cutoffs.notEnoughData", "zscoreCutoffs level for notEnoughData outlier", "", "100", "false", "false"
+ "smooth", "do smoothing ?", "", "false", "false", "false"
+ "decay", "the decay", "", "0.1", "false", "false"
+ "**min.amount.to.predict**", "minAmountToPredict", "", "100", "false", "false"
+ "min_zscore_percentile", "minZscorePercentile", "", "50.0", "false", "false"
+ "reservoir_size", "the size of points reservoir", "", "100", "false", "false"
+ "rpca.force.diff", "No Description Provided.", "", "null", "false", "false"
+ "rpca.lpenalty", "No Description Provided.", "", "null", "false", "false"
+ "rpca.min.records", "No Description Provided.", "", "null", "false", "false"
+ "rpca.spenalty", "No Description Provided.", "", "null", "false", "false"
+ "rpca.threshold", "No Description Provided.", "", "null", "false", "false"
+
+Extra informations
+__________________
+.. include:: ./details/DetectOutliers-Detail.rst
+----------
+
+.. _com.hurence.logisland.processor.webAnalytics.IncrementalWebSession:
+
+IncrementalWebSession
+---------------------
This processor creates and updates web-sessions based on incoming web-events. Note that both web-sessions and web-events are stored in elasticsearch.
Firstly, web-events are grouped by their session identifier and processed in chronological order.
Then each web-session associated to each group is retrieved from elasticsearch.
@@ -464,23 +5102,31 @@ __________________
.. include:: ./details/IncrementalWebSession-Detail.rst
----------
-.. _com.hurence.logisland.processor.enrichment.IpToFqdn:
+.. _com.hurence.logisland.processor.webAnalytics.SetSourceOfTraffic:
-IpToFqdn
---------
-Translates an IP address into a FQDN (Fully Qualified Domain Name). An input field from the record has the IP as value. An new field is created and its value is the FQDN matching the IP address. The resolution mechanism is based on the underlying operating system. The resolution request may take some time, specially if the IP address cannot be translated into a FQDN. For these reasons this processor relies on the logisland cache service so that once a resolution occurs or not, the result is put into the cache. That way, the real request for the same IP is not re-triggered during a certain period of time, until the cache entry expires. This timeout is configurable but by default a request for the same IP is not triggered before 24 hours to let the time to the underlying DNS system to be potentially updated.
+SetSourceOfTraffic
+------------------
+Compute the source of traffic of a web session. Users arrive at a website or application through a variety of sources,
+including advertising/paying campaigns, search engines, social networks, referring sites or direct access.
+When analysing user experience on a webshop, it is crucial to collect, process, and report the campaign and traffic-source data.
+To compute the source of traffic of a web session, the user has to provide the utm_* related properties if available
+i-e: **utm_source.field**, **utm_medium.field**, **utm_campaign.field**, **utm_content.field**, **utm_term.field**)
+, the referer (**referer.field** property) and the first visited page of the session (**first.visited.page.field** property).
+By default the source of traffic information are placed in a flat structure (specified by the **source_of_traffic.suffix** property
+with a default value of source_of_traffic). To work properly the SetSourceOfTraffic processor needs to have access to an
+Elasticsearch index containing a list of the most popular search engines and social networks. The ES index (specified by the **es.index** property) should be structured such that the _id of an ES document MUST be the name of the domain. If the domain is a search engine, the related ES doc MUST have a boolean field (default being search_engine) specified by the property **es.search_engine.field** with a value set to true. If the domain is a social network , the related ES doc MUST have a boolean field (default being social_network) specified by the property **es.social_network.field** with a value set to true.
Module
______
-com.hurence.logisland:logisland-processor-enrichment:1.1.2
+com.hurence.logisland:logisland-processor-web-analytics:1.1.2
Class
_____
-com.hurence.logisland.processor.enrichment.IpToFqdn
+com.hurence.logisland.processor.webAnalytics.SetSourceOfTraffic
Tags
____
-dns, ip, fqdn, domain, address, fqhn, reverse, resolution, enrich
+session, traffic, source, web, analytics
Properties
__________
@@ -491,36 +5137,47 @@ In the list below, the names of required properties appear in **bold**. Any othe
:widths: 20,60,30,20,10,10
:escape: \
- "**ip.address.field**", "The name of the field containing the ip address to use.", "", "null", "false", "false"
- "**fqdn.field**", "The field that will contain the full qualified domain name corresponding to the ip address.", "", "null", "false", "false"
- "overwrite.fqdn.field", "If the field should be overwritten when it already exists.", "", "false", "false", "false"
- "**cache.service**", "The name of the cache service to use.", "", "null", "false", "false"
- "cache.max.time", "The amount of time, in seconds, for which a cached FQDN value is valid in the cache service. After this delay, the next new request to translate the same IP into FQDN will trigger a new reverse DNS request and the result will overwrite the entry in the cache. This allows two things: if the IP was not resolved into a FQDN, this will get a chance to obtain a FQDN if the DNS system has been updated, if the IP is resolved into a FQDN, this will allow to be more accurate if the DNS system has been updated. A value of 0 seconds disables this expiration mechanism. The default value is 84600 seconds, which corresponds to new requests triggered every day if a record with the same IP passes every day in the processor.", "", "84600", "false", "false"
- "resolution.timeout", "The amount of time, in milliseconds, to wait at most for the resolution to occur. This avoids to block the stream for too much time. Default value is 1000ms. If the delay expires and no resolution could occur before, the FQDN field is not created. A special value of 0 disables the logisland timeout and the resolution request may last for many seconds if the IP cannot be translated into a FQDN by the underlying operating system. In any case, whether the timeout occurs in logisland of in the operating system, the fact that a timeout occurs is kept in the cache system so that a resolution request for the same IP will not occur before the cache entry expires.", "", "1000", "false", "false"
- "debug", "If true, some additional debug fields are added. If the FQDN field is named X, a debug field named X_os_resolution_time_ms contains the resolution time in ms (using the operating system, not the cache). This field is added whether the resolution occurs or time is out. A debug field named X_os_resolution_timeout contains a boolean value to indicate if the timeout occurred. Finally, a debug field named X_from_cache contains a boolean value to indicate the origin of the FQDN field. The default value for this property is false (debug is disabled.", "", "false", "false", "false"
+ "referer.field", "Name of the field containing the referer value in the session", "", "referer", "false", "false"
+ "first.visited.page.field", "Name of the field containing the first visited page in the session", "", "firstVisitedPage", "false", "false"
+ "utm_source.field", "Name of the field containing the utm_source value in the session", "", "utm_source", "false", "false"
+ "utm_medium.field", "Name of the field containing the utm_medium value in the session", "", "utm_medium", "false", "false"
+ "utm_campaign.field", "Name of the field containing the utm_campaign value in the session", "", "utm_campaign", "false", "false"
+ "utm_content.field", "Name of the field containing the utm_content value in the session", "", "utm_content", "false", "false"
+ "utm_term.field", "Name of the field containing the utm_term value in the session", "", "utm_term", "false", "false"
+ "source_of_traffic.suffix", "Suffix for the source of the traffic related fields", "", "source_of_traffic", "false", "false"
+ "source_of_traffic.hierarchical", "Should the additional source of trafic information fields be added under a hierarchical father field or not.", "", "false", "false", "false"
+ "**elasticsearch.client.service**", "The instance of the Controller Service to use for accessing Elasticsearch.", "", "null", "false", "false"
+ "**cache.service**", "Name of the cache service to use.", "", "null", "false", "false"
+ "cache.validity.timeout", "Timeout validity (in seconds) of an entry in the cache.", "", "0", "false", "false"
+ "debug", "If true, an additional debug field is added. If the source info fields prefix is X, a debug field named X_from_cache contains a boolean value to indicate the origin of the source fields. The default value for this property is false (debug is disabled).", "", "false", "false", "false"
+ "**es.index**", "Name of the ES index containing the list of search engines and social network. ", "", "null", "false", "false"
+ "es.type", "Name of the ES type to use.", "", "default", "false", "false"
+ "es.search_engine.field", "Name of the ES field used to specify that the domain is a search engine.", "", "search_engine", "false", "false"
+ "es.social_network.field", "Name of the ES field used to specify that the domain is a social network.", "", "social_network", "false", "false"
Extra informations
__________________
-.. include:: ./details/IpToFqdn-Detail.rst
+.. include:: ./details/SetSourceOfTraffic-Detail.rst
----------
-.. _com.hurence.logisland.processor.enrichment.IpToGeo:
+.. _com.hurence.logisland.processor.webAnalytics.URLDecoder:
-IpToGeo
--------
-Looks up geolocation information for an IP address. The attribute that contains the IP address to lookup must be provided in the **ip.address.field** property. By default, the geo information are put in a hierarchical structure. That is, if the name of the IP field is 'X', then the the geo attributes added by enrichment are added under a father field named X_geo. "_geo" is the default hierarchical suffix that may be changed with the **geo.hierarchical.suffix** property. If one wants to put the geo fields at the same level as the IP field, then the **geo.hierarchical** property should be set to false and then the geo attributes are created at the same level as him with the naming pattern X_geo_. "_geo_" is the default flat suffix but this may be changed with the **geo.flat.suffix** property. The IpToGeo processor requires a reference to an Ip to Geo service. This must be defined in the **iptogeo.service** property. The added geo fields are dependant on the underlying Ip to Geo service. The **geo.fields** property must contain the list of geo fields that should be created if data is available for the IP to resolve. This property defaults to "*" which means to add every available fields. If one only wants a subset of the fields, one must define a comma separated list of fields as a value for the **geo.fields** property. The list of the available geo fields is in the description of the **geo.fields** property.
+URLDecoder
+----------
+Decode one or more field containing an URL with possibly special chars encoded
+...
Module
______
-com.hurence.logisland:logisland-processor-enrichment:1.1.2
+com.hurence.logisland:logisland-processor-web-analytics:1.1.2
Class
_____
-com.hurence.logisland.processor.enrichment.IpToGeo
+com.hurence.logisland.processor.webAnalytics.URLDecoder
Tags
____
-geo, enrich, ip
+record, fields, Decode
Properties
__________
@@ -531,188 +5188,123 @@ In the list below, the names of required properties appear in **bold**. Any othe
:widths: 20,60,30,20,10,10
:escape: \
- "**ip.address.field**", "The name of the field containing the ip address to use.", "", "null", "false", "false"
- "**iptogeo.service**", "The reference to the IP to Geo service to use.", "", "null", "false", "false"
- "geo.fields", "Comma separated list of geo information fields to add to the record. Defaults to '*', which means to include all available fields. If a list of fields is specified and the data is not available, the geo field is not created. The geo fields are dependant on the underlying defined Ip to Geo service. The currently only supported type of Ip to Geo service is the Maxmind Ip to Geo service. This means that the currently supported list of geo fields is the following:**continent**: the identified continent for this IP address. **continent_code**: the identified continent code for this IP address. **city**: the identified city for this IP address. **latitude**: the identified latitude for this IP address. **longitude**: the identified longitude for this IP address. **location**: the identified location for this IP address, defined as Geo-point expressed as a string with the format: 'latitude,longitude'. **accuracy_radius**: the approximate accuracy radius, in kilometers, around the latitude and longitude for the location. **time_zone**: the identified time zone for this IP address. **subdivision_N**: the identified subdivision for this IP address. N is a one-up number at the end of the attribute name, starting with 0. **subdivision_isocode_N**: the iso code matching the identified subdivision_N. **country**: the identified country for this IP address. **country_isocode**: the iso code for the identified country for this IP address. **postalcode**: the identified postal code for this IP address. **lookup_micros**: the number of microseconds that the geo lookup took. The Ip to Geo service must have the lookup_micros property enabled in order to have this field available.", "", "*", "false", "false"
- "geo.hierarchical", "Should the additional geo information fields be added under a hierarchical father field or not.", "", "true", "false", "false"
- "geo.hierarchical.suffix", "Suffix to use for the field holding geo information. If geo.hierarchical is true, then use this suffix appended to the IP field name to define the father field name. This may be used for instance to distinguish between geo fields with various locales using many Ip to Geo service instances.", "", "_geo", "false", "false"
- "geo.flat.suffix", "Suffix to use for geo information fields when they are flat. If geo.hierarchical is false, then use this suffix appended to the IP field name but before the geo field name. This may be used for instance to distinguish between geo fields with various locales using many Ip to Geo service instances.", "", "_geo_", "false", "false"
- "**cache.service**", "The name of the cache service to use.", "", "null", "false", "false"
- "debug", "If true, an additional debug field is added. If the geo info fields prefix is X, a debug field named X_from_cache contains a boolean value to indicate the origin of the geo fields. The default value for this property is false (debug is disabled).", "", "false", "false", "false"
+ "**decode.fields**", "List of fields (URL) to decode", "", "null", "false", "false"
+ "charset", "Charset to use to decode the URL", "", "UTF-8", "false", "false"
-Extra informations
+Dynamic Properties
__________________
-.. include:: ./details/IpToGeo-Detail.rst
-----------
-
-.. _com.hurence.logisland.processor.MatchIP:
-
-MatchIP
--------
-IP address Query matching (using `Luwak )`_
-
-You can use this processor to handle custom events matching IP address (CIDR)
-The record sent from a matching an IP address record is tagged appropriately.
-
-A query is expressed as a lucene query against a field like for example:
+Dynamic Properties allow the user to specify both the name and value of a property.
-.. code::
+.. csv-table:: dynamic-properties
+ :header: "Name","Value","Description","Allowable Values","Default Value","EL"
+ :widths: 20,20,40,40,20,10
+ :escape: \
- message:'bad exception'
- error_count:[10 TO *]
- bytes_out:5000
- user_name:tom*
+ "fields to decode", "a default value", "Decode one or more fields from the record ", "", "null", false
-Please read the `Lucene syntax guide `_ for supported operations
+Extra informations
+__________________
+.. include:: ./details/URLDecoder-Detail.rst
+----------
-.. warning::
+.. _com.hurence.logisland.processor.elasticsearch.EnrichRecordsElasticsearch:
- don't forget to set numeric fields property to handle correctly numeric ranges queries
+EnrichRecordsElasticsearch
+--------------------------
+Enrich input records with content indexed in elasticsearch using multiget queries.
+Each incoming record must be possibly enriched with information stored in elasticsearch.
+Each outcoming record holds at least the input record plus potentially one or more fields coming from of one elasticsearch document.
Module
______
-com.hurence.logisland:logisland-processor-querymatcher:1.1.2
+com.hurence.logisland:logisland-processor-elasticsearch:1.1.2
Class
_____
-com.hurence.logisland.processor.MatchIP
+com.hurence.logisland.processor.elasticsearch.EnrichRecordsElasticsearch
Tags
____
-analytic, percolator, record, record, query, lucene
+elasticsearch
Properties
__________
-In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values, and whether a property supports the `Expression Language `_ .
.. csv-table:: allowable-values
:header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
:widths: 20,60,30,20,10,10
:escape: \
- "numeric.fields", "a comma separated string of numeric field to be matched", "", "null", "false", "false"
- "output.record.type", "the output type of the record", "", "alert_match", "false", "false"
- "record.type.updatePolicy", "Record type update policy", "", "overwrite", "false", "false"
- "policy.onmatch", "the policy applied to match events: 'first' (default value) match events are tagged with the name and value of the first query that matched;'all' match events are tagged with all names and values of the queries that matched.", "", "first", "false", "false"
- "policy.onmiss", "the policy applied to miss events: 'discard' (default value) drop events that did not match any query;'forward' include also events that did not match any query.", "", "discard", "false", "false"
-
-Dynamic Properties
-__________________
-Dynamic Properties allow the user to specify both the name and value of a property.
-
-.. csv-table:: dynamic-properties
- :header: "Name","Value","Description","Allowable Values","Default Value","EL"
- :widths: 20,20,40,40,20,10
- :escape: \
-
- "query", "some Lucene query", "generate a new record when this query is matched", "", "null", **true**
+ "**elasticsearch.client.service**", "The instance of the Controller Service to use for accessing Elasticsearch.", "", "null", "false", "false"
+ "**record.key**", "The name of field in the input record containing the document id to use in ES multiget query", "", "null", "false", "**true**"
+ "**es.index**", "The name of the ES index to use in multiget query. ", "", "null", "false", "**true**"
+ "es.type", "The name of the ES type to use in multiget query.", "", "default", "false", "**true**"
+ "es.includes.field", "The name of the ES fields to include in the record.", "", "*", "false", "**true**"
+ "es.excludes.field", "The name of the ES fields to exclude.", "", "N/A", "false", "false"
Extra informations
__________________
-.. include:: ./details/MatchIP-Detail.rst
-----------
-
-.. _com.hurence.logisland.processor.MatchQuery:
-
-MatchQuery
+.. include:: ./details/EnrichRecordsElasticsearch-Detail.rst
----------
-Query matching based on `Luwak `_
-
-you can use this processor to handle custom events defined by lucene queries
-a new record is added to output each time a registered query is matched
-
-A query is expressed as a lucene query against a field like for example:
-
-.. code::
-
- message:'bad exception'
- error_count:[10 TO *]
- bytes_out:5000
- user_name:tom*
-Please read the `Lucene syntax guide `_ for supported operations
-
-.. warning::
+.. _com.hurence.logisland.processor.enrichment.IpToFqdn:
- don't forget to set numeric fields property to handle correctly numeric ranges queries
+IpToFqdn
+--------
+Translates an IP address into a FQDN (Fully Qualified Domain Name). An input field from the record has the IP as value. An new field is created and its value is the FQDN matching the IP address. The resolution mechanism is based on the underlying operating system. The resolution request may take some time, specially if the IP address cannot be translated into a FQDN. For these reasons this processor relies on the logisland cache service so that once a resolution occurs or not, the result is put into the cache. That way, the real request for the same IP is not re-triggered during a certain period of time, until the cache entry expires. This timeout is configurable but by default a request for the same IP is not triggered before 24 hours to let the time to the underlying DNS system to be potentially updated.
Module
______
-com.hurence.logisland:logisland-processor-querymatcher:1.1.2
+com.hurence.logisland:logisland-processor-enrichment:1.1.2
Class
_____
-com.hurence.logisland.processor.MatchQuery
+com.hurence.logisland.processor.enrichment.IpToFqdn
Tags
____
-analytic, percolator, record, record, query, lucene
+dns, ip, fqdn, domain, address, fqhn, reverse, resolution, enrich
Properties
__________
In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
-.. csv-table:: allowable-values
- :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
- :widths: 20,60,30,20,10,10
- :escape: \
-
- "numeric.fields", "a comma separated string of numeric field to be matched", "", "null", "false", "false"
- "output.record.type", "the output type of the record", "", "alert_match", "false", "false"
- "record.type.updatePolicy", "Record type update policy", "", "overwrite", "false", "false"
- "policy.onmatch", "the policy applied to match events: 'first' (default value) match events are tagged with the name and value of the first query that matched;'all' match events are tagged with all names and values of the queries that matched.", "", "first", "false", "false"
- "policy.onmiss", "the policy applied to miss events: 'discard' (default value) drop events that did not match any query;'forward' include also events that did not match any query.", "", "discard", "false", "false"
-
-Dynamic Properties
-__________________
-Dynamic Properties allow the user to specify both the name and value of a property.
-
-.. csv-table:: dynamic-properties
- :header: "Name","Value","Description","Allowable Values","Default Value","EL"
- :widths: 20,20,40,40,20,10
+.. csv-table:: allowable-values
+ :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+ :widths: 20,60,30,20,10,10
:escape: \
- "query", "some Lucene query", "generate a new record when this query is matched", "", "null", **true**
+ "**ip.address.field**", "The name of the field containing the ip address to use.", "", "null", "false", "false"
+ "**fqdn.field**", "The field that will contain the full qualified domain name corresponding to the ip address.", "", "null", "false", "false"
+ "overwrite.fqdn.field", "If the field should be overwritten when it already exists.", "", "false", "false", "false"
+ "**cache.service**", "The name of the cache service to use.", "", "null", "false", "false"
+ "cache.max.time", "The amount of time, in seconds, for which a cached FQDN value is valid in the cache service. After this delay, the next new request to translate the same IP into FQDN will trigger a new reverse DNS request and the result will overwrite the entry in the cache. This allows two things: if the IP was not resolved into a FQDN, this will get a chance to obtain a FQDN if the DNS system has been updated, if the IP is resolved into a FQDN, this will allow to be more accurate if the DNS system has been updated. A value of 0 seconds disables this expiration mechanism. The default value is 84600 seconds, which corresponds to new requests triggered every day if a record with the same IP passes every day in the processor.", "", "84600", "false", "false"
+ "resolution.timeout", "The amount of time, in milliseconds, to wait at most for the resolution to occur. This avoids to block the stream for too much time. Default value is 1000ms. If the delay expires and no resolution could occur before, the FQDN field is not created. A special value of 0 disables the logisland timeout and the resolution request may last for many seconds if the IP cannot be translated into a FQDN by the underlying operating system. In any case, whether the timeout occurs in logisland of in the operating system, the fact that a timeout occurs is kept in the cache system so that a resolution request for the same IP will not occur before the cache entry expires.", "", "1000", "false", "false"
+ "debug", "If true, some additional debug fields are added. If the FQDN field is named X, a debug field named X_os_resolution_time_ms contains the resolution time in ms (using the operating system, not the cache). This field is added whether the resolution occurs or time is out. A debug field named X_os_resolution_timeout contains a boolean value to indicate if the timeout occurred. Finally, a debug field named X_from_cache contains a boolean value to indicate the origin of the FQDN field. The default value for this property is false (debug is disabled.", "", "false", "false", "false"
Extra informations
__________________
-.. include:: ./details/MatchQuery-Detail.rst
+.. include:: ./details/IpToFqdn-Detail.rst
----------
-.. _com.hurence.logisland.processor.elasticsearch.MultiGetElasticsearch:
-
-MultiGetElasticsearch
----------------------
-Retrieves a content indexed in elasticsearch using elasticsearch multiget queries.
-Each incoming record contains information regarding the elasticsearch multiget query that will be performed. This information is stored in record fields whose names are configured in the plugin properties (see below) :
-
- - index (String) : name of the elasticsearch index on which the multiget query will be performed. This field is mandatory and should not be empty, otherwise an error output record is sent for this specific incoming record.
- - type (String) : name of the elasticsearch type on which the multiget query will be performed. This field is not mandatory.
- - ids (String) : comma separated list of document ids to fetch. This field is mandatory and should not be empty, otherwise an error output record is sent for this specific incoming record.
- - includes (String) : comma separated list of patterns to filter in (include) fields to retrieve. Supports wildcards. This field is not mandatory.
- - excludes (String) : comma separated list of patterns to filter out (exclude) fields to retrieve. Supports wildcards. This field is not mandatory.
-
-Each outcoming record holds data of one elasticsearch retrieved document. This data is stored in these fields :
-
- - index (same field name as the incoming record) : name of the elasticsearch index.
- - type (same field name as the incoming record) : name of the elasticsearch type.
- - id (same field name as the incoming record) : retrieved document id.
- - a list of String fields containing :
+.. _com.hurence.logisland.processor.enrichment.IpToGeo:
- * field name : the retrieved field name
- * field value : the retrieved field value
+IpToGeo
+-------
+Looks up geolocation information for an IP address. The attribute that contains the IP address to lookup must be provided in the **ip.address.field** property. By default, the geo information are put in a hierarchical structure. That is, if the name of the IP field is 'X', then the the geo attributes added by enrichment are added under a father field named X_geo. "_geo" is the default hierarchical suffix that may be changed with the **geo.hierarchical.suffix** property. If one wants to put the geo fields at the same level as the IP field, then the **geo.hierarchical** property should be set to false and then the geo attributes are created at the same level as him with the naming pattern X_geo_. "_geo_" is the default flat suffix but this may be changed with the **geo.flat.suffix** property. The IpToGeo processor requires a reference to an Ip to Geo service. This must be defined in the **iptogeo.service** property. The added geo fields are dependant on the underlying Ip to Geo service. The **geo.fields** property must contain the list of geo fields that should be created if data is available for the IP to resolve. This property defaults to "*" which means to add every available fields. If one only wants a subset of the fields, one must define a comma separated list of fields as a value for the **geo.fields** property. The list of the available geo fields is in the description of the **geo.fields** property.
Module
______
-com.hurence.logisland:logisland-processor-elasticsearch:1.1.2
+com.hurence.logisland:logisland-processor-enrichment:1.1.2
Class
_____
-com.hurence.logisland.processor.elasticsearch.MultiGetElasticsearch
+com.hurence.logisland.processor.enrichment.IpToGeo
Tags
____
-elasticsearch
+geo, enrich, ip
Properties
__________
@@ -723,16 +5315,18 @@ In the list below, the names of required properties appear in **bold**. Any othe
:widths: 20,60,30,20,10,10
:escape: \
- "**elasticsearch.client.service**", "The instance of the Controller Service to use for accessing Elasticsearch.", "", "null", "false", "false"
- "**es.index.field**", "the name of the incoming records field containing es index name to use in multiget query. ", "", "null", "false", "false"
- "**es.type.field**", "the name of the incoming records field containing es type name to use in multiget query", "", "null", "false", "false"
- "**es.ids.field**", "the name of the incoming records field containing es document Ids to use in multiget query", "", "null", "false", "false"
- "**es.includes.field**", "the name of the incoming records field containing es includes to use in multiget query", "", "null", "false", "false"
- "**es.excludes.field**", "the name of the incoming records field containing es excludes to use in multiget query", "", "null", "false", "false"
+ "**ip.address.field**", "The name of the field containing the ip address to use.", "", "null", "false", "false"
+ "**iptogeo.service**", "The reference to the IP to Geo service to use.", "", "null", "false", "false"
+ "geo.fields", "Comma separated list of geo information fields to add to the record. Defaults to '*', which means to include all available fields. If a list of fields is specified and the data is not available, the geo field is not created. The geo fields are dependant on the underlying defined Ip to Geo service. The currently only supported type of Ip to Geo service is the Maxmind Ip to Geo service. This means that the currently supported list of geo fields is the following:**continent**: the identified continent for this IP address. **continent_code**: the identified continent code for this IP address. **city**: the identified city for this IP address. **latitude**: the identified latitude for this IP address. **longitude**: the identified longitude for this IP address. **location**: the identified location for this IP address, defined as Geo-point expressed as a string with the format: 'latitude,longitude'. **accuracy_radius**: the approximate accuracy radius, in kilometers, around the latitude and longitude for the location. **time_zone**: the identified time zone for this IP address. **subdivision_N**: the identified subdivision for this IP address. N is a one-up number at the end of the attribute name, starting with 0. **subdivision_isocode_N**: the iso code matching the identified subdivision_N. **country**: the identified country for this IP address. **country_isocode**: the iso code for the identified country for this IP address. **postalcode**: the identified postal code for this IP address. **lookup_micros**: the number of microseconds that the geo lookup took. The Ip to Geo service must have the lookup_micros property enabled in order to have this field available.", "", "*", "false", "false"
+ "geo.hierarchical", "Should the additional geo information fields be added under a hierarchical father field or not.", "", "true", "false", "false"
+ "geo.hierarchical.suffix", "Suffix to use for the field holding geo information. If geo.hierarchical is true, then use this suffix appended to the IP field name to define the father field name. This may be used for instance to distinguish between geo fields with various locales using many Ip to Geo service instances.", "", "_geo", "false", "false"
+ "geo.flat.suffix", "Suffix to use for geo information fields when they are flat. If geo.hierarchical is false, then use this suffix appended to the IP field name but before the geo field name. This may be used for instance to distinguish between geo fields with various locales using many Ip to Geo service instances.", "", "_geo_", "false", "false"
+ "**cache.service**", "The name of the cache service to use.", "", "null", "false", "false"
+ "debug", "If true, an additional debug field is added. If the geo info fields prefix is X, a debug field named X_from_cache contains a boolean value to indicate the origin of the geo fields. The default value for this property is false (debug is disabled).", "", "false", "false", "false"
Extra informations
__________________
-.. include:: ./details/MultiGetElasticsearch-Detail.rst
+.. include:: ./details/IpToGeo-Detail.rst
----------
.. _com.hurence.logisland.processor.bro.ParseBroEvent:
@@ -859,23 +5453,31 @@ __________________
.. include:: ./details/ParseBroEvent-Detail.rst
----------
-.. _com.hurence.logisland.processor.commonlogs.gitlab.ParseGitlabLog:
+.. _com.hurence.logisland.processor.netflow.ParseNetflowEvent:
-ParseGitlabLog
---------------
-The Gitlab logs processor is the Logisland entry point to get and process `Gitlab `_ logs. This allows for instance to monitor activities in your Gitlab server. The expected input of this processor are records from the production_json.log log file of Gitlab which contains JSON records. You can for instance use the `kafkacat `_ command to inject those logs into kafka and thus Logisland.
+ParseNetflowEvent
+-----------------
+The `Netflow V5 `_ processor is the Logisland entry point to process Netflow (V5) events. NetFlow is a feature introduced on Cisco routers that provides the ability to collect IP network traffic.We can distinguish 2 components:
+
+ - Flow exporter: aggregates packets into flows and exports flow records (binary format) towards one or more flow collectors
+
+ - Flow collector: responsible for reception, storage and pre-processing of flow data received from a flow exporter
+
+The collected data are then available for analysis purpose (intrusion detection, traffic analysis...)
+Netflow are sent to kafka in order to be processed by logisland.
+In the tutorial we will simulate Netflow traffic using `nfgen `_. this traffic will be sent to port 2055. The we rely on nifi to listen of that port for incoming netflow (V5) traffic and send them to a kafka topic. The Netflow processor could thus treat these events and generate corresponding logisland records. The following processors in the stream can then process the Netflow records generated by this processor.
Module
______
-com.hurence.logisland:logisland-processor-common-logs:1.1.2
+com.hurence.logisland:logisland-processor-cyber-security:1.1.2
Class
_____
-com.hurence.logisland.processor.commonlogs.gitlab.ParseGitlabLog
+com.hurence.logisland.processor.netflow.ParseNetflowEvent
Tags
____
-logs, gitlab
+netflow, security
Properties
__________
@@ -887,37 +5489,84 @@ In the list below, the names of required properties appear in **bold**. Any othe
:escape: \
"debug", "Enable debug. If enabled, the original JSON string is embedded in the record_value field of the record.", "", "false", "false", "false"
+ "output.record.type", "the output type of the record", "", "netflowevent", "false", "false"
+ "enrich.record", "Enrich data. If enabledthe netflow record is enriched with inferred data", "", "false", "false", "false"
Extra informations
__________________
-.. include:: ./details/ParseGitlabLog-Detail.rst
+.. include:: ./details/ParseNetflowEvent-Detail.rst
----------
-.. _com.hurence.logisland.processor.netflow.ParseNetflowEvent:
+.. _com.hurence.logisland.processor.networkpacket.ParseNetworkPacket:
-ParseNetflowEvent
------------------
-The `Netflow V5 `_ processor is the Logisland entry point to process Netflow (V5) events. NetFlow is a feature introduced on Cisco routers that provides the ability to collect IP network traffic.We can distinguish 2 components:
+ParseNetworkPacket
+------------------
+The ParseNetworkPacket processor is the LogIsland entry point to parse network packets captured either off-the-wire (stream mode) or in pcap format (batch mode). In batch mode, the processor decodes the bytes of the incoming pcap record, where a Global header followed by a sequence of [packet header, packet data] pairs are stored. Then, each incoming pcap event is parsed into n packet records. The fields of packet headers are then extracted and made available in dedicated record fields. See the `Capturing Network packets tutorial `_ for an example of usage of this processor.
- - Flow exporter: aggregates packets into flows and exports flow records (binary format) towards one or more flow collectors
+Module
+______
+com.hurence.logisland:logisland-processor-cyber-security:1.1.2
- - Flow collector: responsible for reception, storage and pre-processing of flow data received from a flow exporter
+Class
+_____
+com.hurence.logisland.processor.networkpacket.ParseNetworkPacket
-The collected data are then available for analysis purpose (intrusion detection, traffic analysis...)
-Netflow are sent to kafka in order to be processed by logisland.
-In the tutorial we will simulate Netflow traffic using `nfgen `_. this traffic will be sent to port 2055. The we rely on nifi to listen of that port for incoming netflow (V5) traffic and send them to a kafka topic. The Netflow processor could thus treat these events and generate corresponding logisland records. The following processors in the stream can then process the Netflow records generated by this processor.
+Tags
+____
+PCap, security, IDS, NIDS
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
+
+.. csv-table:: allowable-values
+ :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+ :widths: 20,60,30,20,10,10
+ :escape: \
+
+ "debug", "Enable debug.", "", "false", "false", "false"
+ "**flow.mode**", "Flow Mode. Indicate whether packets are provided in batch mode (via pcap files) or in stream mode (without headers). Allowed values are batch and stream.", "batch, stream", "null", "false", "false"
+
+Extra informations
+__________________
+No additional information is provided
+
+----------
+
+.. _com.hurence.logisland.processor.SampleRecords:
+
+SampleRecords
+-------------
+Query matching based on `Luwak `_
+
+you can use this processor to handle custom events defined by lucene queries
+a new record is added to output each time a registered query is matched
+
+A query is expressed as a lucene query against a field like for example:
+
+.. code::
+
+ message:'bad exception'
+ error_count:[10 TO *]
+ bytes_out:5000
+ user_name:tom*
+
+Please read the `Lucene syntax guide `_ for supported operations
+
+.. warning::
+ don't forget to set numeric fields property to handle correctly numeric ranges queries
Module
______
-com.hurence.logisland:logisland-processor-cyber-security:1.1.2
+com.hurence.logisland:logisland-processor-sampling:1.1.2
Class
_____
-com.hurence.logisland.processor.netflow.ParseNetflowEvent
+com.hurence.logisland.processor.SampleRecords
Tags
____
-netflow, security
+analytic, sampler, record, iot, timeseries
Properties
__________
@@ -928,32 +5577,129 @@ In the list below, the names of required properties appear in **bold**. Any othe
:widths: 20,60,30,20,10,10
:escape: \
- "debug", "Enable debug. If enabled, the original JSON string is embedded in the record_value field of the record.", "", "false", "false", "false"
- "output.record.type", "the output type of the record", "", "netflowevent", "false", "false"
- "enrich.record", "Enrich data. If enabledthe netflow record is enriched with inferred data", "", "false", "false", "false"
+ "record.value.field", "the name of the numeric field to sample", "", "record_value", "false", "false"
+ "record.time.field", "the name of the time field to sample", "", "record_time", "false", "false"
+ "**sampling.algorithm**", "the implementation of the algorithm", "none, lttb, average, first_item, min_max, mode_median", "null", "false", "false"
+ "**sampling.parameter**", "the parmater of the algorithm", "", "null", "false", "false"
+
+Extra informations
+__________________
+.. include:: ./details/SampleRecords-Detail.rst
+----------
+
+.. _com.hurence.logisland.processor.elasticsearch.BulkAddElasticsearch:
+
+BulkAddElasticsearch
+--------------------
+Indexes the content of a Record in Elasticsearch using elasticsearch's bulk processor
+
+Module
+______
+com.hurence.logisland:logisland-processor-elasticsearch:1.1.2
+
+Class
+_____
+com.hurence.logisland.processor.elasticsearch.BulkAddElasticsearch
+
+Tags
+____
+elasticsearch
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values, and whether a property supports the `Expression Language `_ .
+
+.. csv-table:: allowable-values
+ :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+ :widths: 20,60,30,20,10,10
+ :escape: \
+
+ "**elasticsearch.client.service**", "The instance of the Controller Service to use for accessing Elasticsearch.", "", "null", "false", "false"
+ "**default.index**", "The name of the index to insert into", "", "null", "false", "**true**"
+ "**default.type**", "The type of this document (used by Elasticsearch for indexing and searching)", "", "null", "false", "**true**"
+ "**timebased.index**", "do we add a date suffix", "no (no date added to default index), today (today's date added to default index), yesterday (yesterday's date added to default index)", "no", "false", "false"
+ "es.index.field", "the name of the event field containing es index name => will override index value if set", "", "null", "false", "false"
+ "es.type.field", "the name of the event field containing es doc type => will override type value if set", "", "null", "false", "false"
+
+Extra informations
+__________________
+.. include:: ./details/BulkAddElasticsearch-Detail.rst
+----------
+
+.. _com.hurence.logisland.processor.hbase.FetchHBaseRow:
+
+FetchHBaseRow
+-------------
+Fetches a row from an HBase table. The Destination property controls whether the cells are added as flow file attributes, or the row is written to the flow file content as JSON. This processor may be used to fetch a fixed row on a interval by specifying the table and row id directly in the processor, or it may be used to dynamically fetch rows by referencing the table and row id from incoming flow files.
+
+Module
+______
+com.hurence.logisland:logisland-processor-hbase:1.1.2
+
+Class
+_____
+com.hurence.logisland.processor.hbase.FetchHBaseRow
+
+Tags
+____
+hbase, scan, fetch, get, enrich
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values, and whether a property supports the `Expression Language `_ .
+
+.. csv-table:: allowable-values
+ :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+ :widths: 20,60,30,20,10,10
+ :escape: \
+
+ "**hbase.client.service**", "The instance of the Controller Service to use for accessing HBase.", "", "null", "false", "false"
+ "**table.name.field**", "The field containing the name of the HBase Table to fetch from.", "", "null", "false", "**true**"
+ "**row.identifier.field**", "The field containing the identifier of the row to fetch.", "", "null", "false", "**true**"
+ "columns.field", "The field containing an optional comma-separated list of \"\":\"\" pairs to fetch. To return all columns for a given family, leave off the qualifier such as \"\",\"\".", "", "null", "false", "**true**"
+ "record.serializer", "the serializer needed to i/o the record in the HBase row", "com.hurence.logisland.serializer.KryoSerializer (serialize events as json blocs), com.hurence.logisland.serializer.JsonSerializer (serialize events as json blocs), com.hurence.logisland.serializer.AvroSerializer (serialize events as avro blocs), none (send events as bytes)", "com.hurence.logisland.serializer.KryoSerializer", "false", "false"
+ "record.schema", "the avro schema definition for the Avro serialization", "", "null", "false", "false"
+ "table.name.default", "The table to use if table name field is not set", "", "null", "false", "false"
+
+Extra informations
+__________________
+.. include:: ./details/FetchHBaseRow-Detail.rst
+----------
+
+.. _com.hurence.logisland.processor.elasticsearch.MultiGetElasticsearch:
+
+MultiGetElasticsearch
+---------------------
+Retrieves a content indexed in elasticsearch using elasticsearch multiget queries.
+Each incoming record contains information regarding the elasticsearch multiget query that will be performed. This information is stored in record fields whose names are configured in the plugin properties (see below) :
+
+ - index (String) : name of the elasticsearch index on which the multiget query will be performed. This field is mandatory and should not be empty, otherwise an error output record is sent for this specific incoming record.
+ - type (String) : name of the elasticsearch type on which the multiget query will be performed. This field is not mandatory.
+ - ids (String) : comma separated list of document ids to fetch. This field is mandatory and should not be empty, otherwise an error output record is sent for this specific incoming record.
+ - includes (String) : comma separated list of patterns to filter in (include) fields to retrieve. Supports wildcards. This field is not mandatory.
+ - excludes (String) : comma separated list of patterns to filter out (exclude) fields to retrieve. Supports wildcards. This field is not mandatory.
-Extra informations
-__________________
-.. include:: ./details/ParseNetflowEvent-Detail.rst
-----------
+Each outcoming record holds data of one elasticsearch retrieved document. This data is stored in these fields :
-.. _com.hurence.logisland.processor.networkpacket.ParseNetworkPacket:
+ - index (same field name as the incoming record) : name of the elasticsearch index.
+ - type (same field name as the incoming record) : name of the elasticsearch type.
+ - id (same field name as the incoming record) : retrieved document id.
+ - a list of String fields containing :
-ParseNetworkPacket
-------------------
-The ParseNetworkPacket processor is the LogIsland entry point to parse network packets captured either off-the-wire (stream mode) or in pcap format (batch mode). In batch mode, the processor decodes the bytes of the incoming pcap record, where a Global header followed by a sequence of [packet header, packet data] pairs are stored. Then, each incoming pcap event is parsed into n packet records. The fields of packet headers are then extracted and made available in dedicated record fields. See the `Capturing Network packets tutorial `_ for an example of usage of this processor.
+ * field name : the retrieved field name
+ * field value : the retrieved field value
Module
______
-com.hurence.logisland:logisland-processor-cyber-security:1.1.2
+com.hurence.logisland:logisland-processor-elasticsearch:1.1.2
Class
_____
-com.hurence.logisland.processor.networkpacket.ParseNetworkPacket
+com.hurence.logisland.processor.elasticsearch.MultiGetElasticsearch
Tags
____
-PCap, security, IDS, NIDS
+elasticsearch
Properties
__________
@@ -964,13 +5710,16 @@ In the list below, the names of required properties appear in **bold**. Any othe
:widths: 20,60,30,20,10,10
:escape: \
- "debug", "Enable debug.", "", "false", "false", "false"
- "**flow.mode**", "Flow Mode. Indicate whether packets are provided in batch mode (via pcap files) or in stream mode (without headers). Allowed values are batch and stream.", "batch, stream", "null", "false", "false"
+ "**elasticsearch.client.service**", "The instance of the Controller Service to use for accessing Elasticsearch.", "", "null", "false", "false"
+ "**es.index.field**", "the name of the incoming records field containing es index name to use in multiget query. ", "", "null", "false", "false"
+ "**es.type.field**", "the name of the incoming records field containing es type name to use in multiget query", "", "null", "false", "false"
+ "**es.ids.field**", "the name of the incoming records field containing es document Ids to use in multiget query", "", "null", "false", "false"
+ "**es.includes.field**", "the name of the incoming records field containing es includes to use in multiget query", "", "null", "false", "false"
+ "**es.excludes.field**", "the name of the incoming records field containing es excludes to use in multiget query", "", "null", "false", "false"
Extra informations
__________________
-No additional information is provided
-
+.. include:: ./details/MultiGetElasticsearch-Detail.rst
----------
.. _com.hurence.logisland.processor.hbase.PutHBaseCell:
@@ -1061,40 +5810,70 @@ __________________
.. include:: ./details/RunPython-Detail.rst
----------
-.. _com.hurence.logisland.processor.SampleRecords:
+.. _com.hurence.logisland.processor.xml.EvaluateXPath:
-SampleRecords
+EvaluateXPath
-------------
-Query matching based on `Luwak `_
+Evaluates one or more XPaths against the content of a record. The results of those XPaths are assigned to new attributes in the records, depending on configuration of the Processor. XPaths are entered by adding user-defined properties; the name of the property maps to the Attribute Name into which the result will be placed. The value of the property must be a valid XPath expression. If the expression matches nothing, no attributes is added.
-you can use this processor to handle custom events defined by lucene queries
-a new record is added to output each time a registered query is matched
+Module
+______
+com.hurence.logisland:logisland-processor-xml:1.1.2
-A query is expressed as a lucene query against a field like for example:
+Class
+_____
+com.hurence.logisland.processor.xml.EvaluateXPath
-.. code::
+Tags
+____
+XML, evaluate, XPath
- message:'bad exception'
- error_count:[10 TO *]
- bytes_out:5000
- user_name:tom*
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
-Please read the `Lucene syntax guide `_ for supported operations
+.. csv-table:: allowable-values
+ :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+ :widths: 20,60,30,20,10,10
+ :escape: \
-.. warning::
- don't forget to set numeric fields property to handle correctly numeric ranges queries
+ "**source**", "Indicates the attribute containing the xml data to evaluate xpath against.", "", "null", "false", "false"
+ "**validate_dtd**", "Specifies whether or not the XML content should be validated against the DTD.", "true, false", "true", "false", "false"
+ "conflict.resolution.policy", "What to do when a field with the same name already exists ?", "overwrite_existing (if field already exist), keep_only_old_field (keep only old field)", "keep_only_old_field", "false", "false"
+
+Dynamic Properties
+__________________
+Dynamic Properties allow the user to specify both the name and value of a property.
+
+.. csv-table:: dynamic-properties
+ :header: "Name","Value","Description","Allowable Values","Default Value","EL"
+ :widths: 20,20,40,40,20,10
+ :escape: \
+
+ "An attribute", "An XPath expression", " the attribute is set to the result of the XPath Expression.", "", "null", false
+
+Extra informations
+__________________
+.. include:: ./details/EvaluateXPath-Detail.rst
+----------
+
+.. _com.hurence.logisland.processor.excel.ExcelExtract:
+
+ExcelExtract
+------------
+Consumes a Microsoft Excel document and converts each worksheet's line to a structured record. The processor is assuming to receive raw excel file as input record.
Module
______
-com.hurence.logisland:logisland-processor-sampling:1.1.2
+com.hurence.logisland:logisland-processor-excel:1.1.2
Class
_____
-com.hurence.logisland.processor.SampleRecords
+com.hurence.logisland.processor.excel.ExcelExtract
Tags
____
-analytic, sampler, record, iot, timeseries
+excel, processor, poi
Properties
__________
@@ -1105,34 +5884,87 @@ In the list below, the names of required properties appear in **bold**. Any othe
:widths: 20,60,30,20,10,10
:escape: \
- "record.value.field", "the name of the numeric field to sample", "", "record_value", "false", "false"
- "record.time.field", "the name of the time field to sample", "", "record_time", "false", "false"
- "**sampling.algorithm**", "the implementation of the algorithm", "none, lttb, average, first_item, min_max, mode_median", "null", "false", "false"
- "**sampling.parameter**", "the parmater of the algorithm", "", "null", "false", "false"
+ "sheets", "Comma separated list of Excel document sheet names that should be extracted from the excel document. If this property is left blank then all of the sheets will be extracted from the Excel document. You can specify regular expressions. Any sheets not specified in this value will be ignored.", "", "", "false", "false"
+ "skip.columns", "Comma delimited list of column numbers to skip. Use the columns number and not the letter designation. Use this to skip over columns anywhere in your worksheet that you don't want extracted as part of the record.", "", "", "false", "false"
+ "field.names", "The comma separated list representing the names of columns of extracted cells. Order matters! You should use either field.names either field.row.header but not both together.", "", "null", "false", "false"
+ "skip.rows", "The row number of the first row to start processing.Use this to skip over rows of data at the top of your worksheet that are not part of the dataset.Empty rows of data anywhere in the spreadsheet will always be skipped, no matter what this value is set to.", "", "0", "false", "false"
+ "record.type", "Default type of record", "", "excel_record", "false", "false"
+ "field.row.header", "If set, field names mapping will be extracted from the specified row number. You should use either field.names either field.row.header but not both together.", "", "null", "false", "false"
Extra informations
__________________
-.. include:: ./details/SampleRecords-Detail.rst
+.. include:: ./details/ExcelExtract-Detail.rst
----------
-.. _com.hurence.logisland.processor.webAnalytics.URLDecoder:
+.. _com.hurence.logisland.processor.commonlogs.gitlab.ParseGitlabLog:
-URLDecoder
+ParseGitlabLog
+--------------
+The Gitlab logs processor is the Logisland entry point to get and process `Gitlab `_ logs. This allows for instance to monitor activities in your Gitlab server. The expected input of this processor are records from the production_json.log log file of Gitlab which contains JSON records. You can for instance use the `kafkacat `_ command to inject those logs into kafka and thus Logisland.
+
+Module
+______
+com.hurence.logisland:logisland-processor-common-logs:1.1.2
+
+Class
+_____
+com.hurence.logisland.processor.commonlogs.gitlab.ParseGitlabLog
+
+Tags
+____
+logs, gitlab
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
+
+.. csv-table:: allowable-values
+ :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+ :widths: 20,60,30,20,10,10
+ :escape: \
+
+ "debug", "Enable debug. If enabled, the original JSON string is embedded in the record_value field of the record.", "", "false", "false", "false"
+
+Extra informations
+__________________
+.. include:: ./details/ParseGitlabLog-Detail.rst
----------
-Decode one or more field containing an URL with possibly special chars encoded
-...
+
+.. _com.hurence.logisland.processor.MatchIP:
+
+MatchIP
+-------
+IP address Query matching (using `Luwak )`_
+
+You can use this processor to handle custom events matching IP address (CIDR)
+The record sent from a matching an IP address record is tagged appropriately.
+
+A query is expressed as a lucene query against a field like for example:
+
+.. code::
+
+ message:'bad exception'
+ error_count:[10 TO *]
+ bytes_out:5000
+ user_name:tom*
+
+Please read the `Lucene syntax guide `_ for supported operations
+
+.. warning::
+
+ don't forget to set numeric fields property to handle correctly numeric ranges queries
Module
______
-com.hurence.logisland:logisland-processor-web-analytics:1.1.2
+com.hurence.logisland:logisland-processor-querymatcher:1.1.2
Class
_____
-com.hurence.logisland.processor.webAnalytics.URLDecoder
+com.hurence.logisland.processor.MatchIP
Tags
____
-record, fields, Decode
+analytic, percolator, record, record, query, lucene
Properties
__________
@@ -1143,8 +5975,11 @@ In the list below, the names of required properties appear in **bold**. Any othe
:widths: 20,60,30,20,10,10
:escape: \
- "**decode.fields**", "List of fields (URL) to decode", "", "null", "false", "false"
- "charset", "Charset to use to decode the URL", "", "UTF-8", "false", "false"
+ "numeric.fields", "a comma separated string of numeric field to be matched", "", "null", "false", "false"
+ "output.record.type", "the output type of the record", "", "alert_match", "false", "false"
+ "record.type.updatePolicy", "Record type update policy", "", "overwrite", "false", "false"
+ "policy.onmatch", "the policy applied to match events: 'first' (default value) match events are tagged with the name and value of the first query that matched;'all' match events are tagged with all names and values of the queries that matched.", "", "first", "false", "false"
+ "policy.onmiss", "the policy applied to miss events: 'discard' (default value) drop events that did not match any query;'forward' include also events that did not match any query.", "", "discard", "false", "false"
Dynamic Properties
__________________
@@ -1155,38 +5990,48 @@ Dynamic Properties allow the user to specify both the name and value of a proper
:widths: 20,20,40,40,20,10
:escape: \
- "fields to decode", "a default value", "Decode one or more fields from the record ", "", "null", false
+ "query", "some Lucene query", "generate a new record when this query is matched", "", "null", **true**
Extra informations
__________________
-.. include:: ./details/URLDecoder-Detail.rst
+.. include:: ./details/MatchIP-Detail.rst
----------
-.. _com.hurence.logisland.processor.webAnalytics.setSourceOfTraffic:
+.. _com.hurence.logisland.processor.MatchQuery:
-setSourceOfTraffic
-------------------
-Compute the source of traffic of a web session. Users arrive at a website or application through a variety of sources,
-including advertising/paying campaigns, search engines, social networks, referring sites or direct access.
-When analysing user experience on a webshop, it is crucial to collect, process, and report the campaign and traffic-source data.
-To compute the source of traffic of a web session, the user has to provide the utm_* related properties if available
-i-e: **utm_source.field**, **utm_medium.field**, **utm_campaign.field**, **utm_content.field**, **utm_term.field**)
-, the referer (**referer.field** property) and the first visited page of the session (**first.visited.page.field** property).
-By default the source of traffic information are placed in a flat structure (specified by the **source_of_traffic.suffix** property
-with a default value of source_of_traffic). To work properly the setSourceOfTraffic processor needs to have access to an
-Elasticsearch index containing a list of the most popular search engines and social networks. The ES index (specified by the **es.index** property) should be structured such that the _id of an ES document MUST be the name of the domain. If the domain is a search engine, the related ES doc MUST have a boolean field (default being search_engine) specified by the property **es.search_engine.field** with a value set to true. If the domain is a social network , the related ES doc MUST have a boolean field (default being social_network) specified by the property **es.social_network.field** with a value set to true.
+MatchQuery
+----------
+Query matching based on `Luwak `_
+
+you can use this processor to handle custom events defined by lucene queries
+a new record is added to output each time a registered query is matched
+
+A query is expressed as a lucene query against a field like for example:
+
+.. code::
+
+ message:'bad exception'
+ error_count:[10 TO *]
+ bytes_out:5000
+ user_name:tom*
+
+Please read the `Lucene syntax guide `_ for supported operations
+
+.. warning::
+
+ don't forget to set numeric fields property to handle correctly numeric ranges queries
Module
______
-com.hurence.logisland:logisland-processor-web-analytics:1.1.2
+com.hurence.logisland:logisland-processor-querymatcher:1.1.2
Class
_____
-com.hurence.logisland.processor.webAnalytics.setSourceOfTraffic
+com.hurence.logisland.processor.MatchQuery
Tags
____
-session, traffic, source, web, analytics
+analytic, percolator, record, record, query, lucene
Properties
__________
@@ -1197,24 +6042,23 @@ In the list below, the names of required properties appear in **bold**. Any othe
:widths: 20,60,30,20,10,10
:escape: \
- "referer.field", "Name of the field containing the referer value in the session", "", "referer", "false", "false"
- "first.visited.page.field", "Name of the field containing the first visited page in the session", "", "firstVisitedPage", "false", "false"
- "utm_source.field", "Name of the field containing the utm_source value in the session", "", "utm_source", "false", "false"
- "utm_medium.field", "Name of the field containing the utm_medium value in the session", "", "utm_medium", "false", "false"
- "utm_campaign.field", "Name of the field containing the utm_campaign value in the session", "", "utm_campaign", "false", "false"
- "utm_content.field", "Name of the field containing the utm_content value in the session", "", "utm_content", "false", "false"
- "utm_term.field", "Name of the field containing the utm_term value in the session", "", "utm_term", "false", "false"
- "source_of_traffic.suffix", "Suffix for the source of the traffic related fields", "", "source_of_traffic", "false", "false"
- "source_of_traffic.hierarchical", "Should the additional source of trafic information fields be added under a hierarchical father field or not.", "", "false", "false", "false"
- "**elasticsearch.client.service**", "The instance of the Controller Service to use for accessing Elasticsearch.", "", "null", "false", "false"
- "**cache.service**", "Name of the cache service to use.", "", "null", "false", "false"
- "cache.validity.timeout", "Timeout validity (in seconds) of an entry in the cache.", "", "0", "false", "false"
- "debug", "If true, an additional debug field is added. If the source info fields prefix is X, a debug field named X_from_cache contains a boolean value to indicate the origin of the source fields. The default value for this property is false (debug is disabled).", "", "false", "false", "false"
- "**es.index**", "Name of the ES index containing the list of search engines and social network. ", "", "null", "false", "false"
- "es.type", "Name of the ES type to use.", "", "default", "false", "false"
- "es.search_engine.field", "Name of the ES field used to specify that the domain is a search engine.", "", "search_engine", "false", "false"
- "es.social_network.field", "Name of the ES field used to specify that the domain is a social network.", "", "social_network", "false", "false"
+ "numeric.fields", "a comma separated string of numeric field to be matched", "", "null", "false", "false"
+ "output.record.type", "the output type of the record", "", "alert_match", "false", "false"
+ "record.type.updatePolicy", "Record type update policy", "", "overwrite", "false", "false"
+ "policy.onmatch", "the policy applied to match events: 'first' (default value) match events are tagged with the name and value of the first query that matched;'all' match events are tagged with all names and values of the queries that matched.", "", "first", "false", "false"
+ "policy.onmiss", "the policy applied to miss events: 'discard' (default value) drop events that did not match any query;'forward' include also events that did not match any query.", "", "discard", "false", "false"
+
+Dynamic Properties
+__________________
+Dynamic Properties allow the user to specify both the name and value of a property.
+
+.. csv-table:: dynamic-properties
+ :header: "Name","Value","Description","Allowable Values","Default Value","EL"
+ :widths: 20,20,40,40,20,10
+ :escape: \
+
+ "query", "some Lucene query", "generate a new record when this query is matched", "", "null", **true**
Extra informations
__________________
-.. include:: ./details/setSourceOfTraffic-Detail.rst
\ No newline at end of file
+.. include:: ./details/MatchQuery-Detail.rst
\ No newline at end of file
diff --git a/logisland-documentation/user/components/services.rst b/logisland-documentation/user/components/services.rst
index 3736e66fe..c20a7e454 100644
--- a/logisland-documentation/user/components/services.rst
+++ b/logisland-documentation/user/components/services.rst
@@ -7,6 +7,44 @@ Services
Find below the list.
+----------
+
+.. _com.hurence.logisland.service.iptogeo.maxmind.MaxmindIpToGeoService:
+
+MaxmindIpToGeoService
+---------------------
+Implementation of the IP 2 GEO Service using maxmind lite db file
+
+Module
+______
+com.hurence.logisland:logisland-service-ip-to-geo-maxmind:1.1.2
+
+Class
+_____
+com.hurence.logisland.service.iptogeo.maxmind.MaxmindIpToGeoService
+
+Tags
+____
+ip, service, geo, maxmind
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
+
+.. csv-table:: allowable-values
+ :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+ :widths: 20,60,30,20,10,10
+ :escape: \
+
+ "maxmind.database.uri", "Path to the Maxmind Geo Enrichment Database File.", "", "null", "false", "false"
+ "maxmind.database.path", "Local Path to the Maxmind Geo Enrichment Database File.", "", "null", "false", "false"
+ "locale", "Locale to use for geo information. Defaults to 'en'.", "", "en", "false", "false"
+ "lookup.time", "Should the additional lookup_micros field be returned or not.", "", "false", "false", "false"
+
+Extra informations
+__________________
+No additional information is provided
+
----------
.. _com.hurence.logisland.service.cache.CSVKeyValueCacheService:
@@ -325,23 +363,120 @@ No additional information is provided
----------
-.. _com.hurence.logisland.service.iptogeo.maxmind.MaxmindIpToGeoService:
+.. _com.hurence.logisland.service.mongodb.MongoDBControllerService:
-MaxmindIpToGeoService
----------------------
-Implementation of the IP 2 GEO Service using maxmind lite db file
+MongoDBControllerService
+------------------------
+Provides a controller service that wraps most of the functionality of the MongoDB driver.
Module
______
-com.hurence.logisland:logisland-service-ip-to-geo-maxmind:1.1.2
+com.hurence.logisland:logisland-service-mongodb-client:1.1.2
Class
_____
-com.hurence.logisland.service.iptogeo.maxmind.MaxmindIpToGeoService
+com.hurence.logisland.service.mongodb.MongoDBControllerService
Tags
____
-ip, service, geo, maxmind
+mongo, mongodb, service
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values, and whether a property supports the `Expression Language `_ .
+
+.. csv-table:: allowable-values
+ :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+ :widths: 20,60,30,20,10,10
+ :escape: \
+
+ "**mongo.uri**", "MongoURI, typically of the form: mongodb://host1[:port1][,host2[:port2],...]", "", "null", "false", "**true**"
+ "**mongo.db.name**", "The name of the database to use", "", "null", "false", "**true**"
+ "**mongo.collection.name**", "The name of the collection to use", "", "null", "false", "**true**"
+ "batch.size", "The preferred number of Records to setField to the database in a single transaction", "", "1000", "false", "false"
+ "bulk.size", "bulk size in MB", "", "5", "false", "false"
+ "mongo.bulk.mode", "Bulk mode (insert or upsert)", "insert (Insert records whose key must be unique), upsert (Insert records if not already existing or update the record if already existing)", "insert", "false", "false"
+ "flush.interval", "flush interval in ms", "", "500", "false", "false"
+ "**mongo.write.concern**", "The write concern to use", "ACKNOWLEDGED, UNACKNOWLEDGED, FSYNCED, JOURNALED, REPLICA_ACKNOWLEDGED, MAJORITY", "ACKNOWLEDGED", "false", "false"
+ "mongo.bulk.upsert.condition", "A custom condition for the bulk upsert (Filter for the bulkwrite). If not specified the standard condition is to match same id ('_id': data._id)", "", "${'{ \"_id\" :\"' + record_id + '\"}'}", "false", "**true**"
+
+Extra informations
+__________________
+No additional information is provided
+
+----------
+
+.. _com.hurence.logisland.redis.service.RedisKeyValueCacheService:
+
+RedisKeyValueCacheService
+-------------------------
+A controller service for caching records by key value pair with LRU (last recently used) strategy. using LinkedHashMap
+
+Module
+______
+com.hurence.logisland:logisland-service-redis:1.1.2
+
+Class
+_____
+com.hurence.logisland.redis.service.RedisKeyValueCacheService
+
+Tags
+____
+cache, service, key, value, pair, redis
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values, and whether a property is considered "sensitive"..
+
+.. csv-table:: allowable-values
+ :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+ :widths: 20,60,30,20,10,10
+ :escape: \
+
+ "**redis.mode**", "The type of Redis being communicated with - standalone, sentinel, or clustered.", "standalone (A single standalone Redis instance.), sentinel (Redis Sentinel which provides high-availability. Described further at https://redis.io/topics/sentinel), cluster (Clustered Redis which provides sharding and replication. Described further at https://redis.io/topics/cluster-spec)", "standalone", "false", "false"
+ "**connection.string**", "The connection string for Redis. In a standalone instance this value will be of the form hostname:port. In a sentinel instance this value will be the comma-separated list of sentinels, such as host1:port1,host2:port2,host3:port3. In a clustered instance this value will be the comma-separated list of cluster masters, such as host1:port,host2:port,host3:port.", "", "null", "false", "false"
+ "**database.index**", "The database index to be used by connections created from this connection pool. See the databases property in redis.conf, by default databases 0-15 will be available.", "", "0", "false", "false"
+ "**communication.timeout**", "The timeout to use when attempting to communicate with Redis.", "", "10 seconds", "false", "false"
+ "**cluster.max.redirects**", "The maximum number of redirects that can be performed when clustered.", "", "5", "false", "false"
+ "sentinel.master", "The name of the sentinel master, require when Mode is set to Sentinel", "", "null", "false", "false"
+ "password", "The password used to authenticate to the Redis server. See the requirepass property in redis.conf.", "", "null", "**true**", "false"
+ "**pool.max.total**", "The maximum number of connections that can be allocated by the pool (checked out to clients, or idle awaiting checkout). A negative value indicates that there is no limit.", "", "8", "false", "false"
+ "**pool.max.idle**", "The maximum number of idle connections that can be held in the pool, or a negative value if there is no limit.", "", "8", "false", "false"
+ "**pool.min.idle**", "The target for the minimum number of idle connections to maintain in the pool. If the configured value of Min Idle is greater than the configured value for Max Idle, then the value of Max Idle will be used instead.", "", "0", "false", "false"
+ "**pool.block.when.exhausted**", "Whether or not clients should block and wait when trying to obtain a connection from the pool when the pool has no available connections. Setting this to false means an error will occur immediately when a client requests a connection and none are available.", "true, false", "true", "false", "false"
+ "**pool.max.wait.time**", "The amount of time to wait for an available connection when Block When Exhausted is set to true.", "", "10 seconds", "false", "false"
+ "**pool.min.evictable.idle.time**", "The minimum amount of time an object may sit idle in the pool before it is eligible for eviction.", "", "60 seconds", "false", "false"
+ "**pool.time.between.eviction.runs**", "The amount of time between attempting to evict idle connections from the pool.", "", "30 seconds", "false", "false"
+ "**pool.num.tests.per.eviction.run**", "The number of connections to tests per eviction attempt. A negative value indicates to test all connections.", "", "-1", "false", "false"
+ "**pool.test.on.create**", "Whether or not connections should be tested upon creation.", "true, false", "false", "false", "false"
+ "**pool.test.on.borrow**", "Whether or not connections should be tested upon borrowing from the pool.", "true, false", "false", "false", "false"
+ "**pool.test.on.return**", "Whether or not connections should be tested upon returning to the pool.", "true, false", "false", "false", "false"
+ "**pool.test.while.idle**", "Whether or not connections should be tested while idle.", "true, false", "true", "false", "false"
+ "**record.recordSerializer**", "the way to serialize/deserialize the record", "com.hurence.logisland.serializer.KryoSerializer (serialize events as json blocs), com.hurence.logisland.serializer.JsonSerializer (serialize events as json blocs), com.hurence.logisland.serializer.AvroSerializer (serialize events as avro blocs), com.hurence.logisland.serializer.BytesArraySerializer (serialize events as byte arrays), com.hurence.logisland.serializer.KuraProtobufSerializer (serialize events as Kura protocol buffer), none (send events as bytes)", "com.hurence.logisland.serializer.JsonSerializer", "false", "false"
+
+Extra informations
+__________________
+No additional information is provided
+
+----------
+
+.. _com.hurence.logisland.service.solr.Solr_5_5_5_ClientService:
+
+Solr_5_5_5_ClientService
+------------------------
+Implementation of ElasticsearchClientService for Solr 5.5.5.
+
+Module
+______
+com.hurence.logisland:logisland-service-solr_5_5_5-client:1.1.2
+
+Class
+_____
+com.hurence.logisland.service.solr.Solr_5_5_5_ClientService
+
+Tags
+____
+solr, client
Properties
__________
@@ -352,10 +487,3325 @@ In the list below, the names of required properties appear in **bold**. Any othe
:widths: 20,60,30,20,10,10
:escape: \
- "maxmind.database.uri", "Path to the Maxmind Geo Enrichment Database File.", "", "null", "false", "false"
- "maxmind.database.path", "Local Path to the Maxmind Geo Enrichment Database File.", "", "null", "false", "false"
- "locale", "Locale to use for geo information. Defaults to 'en'.", "", "en", "false", "false"
- "lookup.time", "Should the additional lookup_micros field be returned or not.", "", "false", "false", "false"
+ "batch.size", "The preferred number of Records to setField to the database in a single transaction", "", "1000", "false", "false"
+ "bulk.size", "bulk size in MB", "", "5", "false", "false"
+ "**solr.cloud**", "is slor cloud enabled", "", "false", "false", "false"
+ "**solr.collection**", "name of the collection to use", "", "null", "false", "false"
+ "**solr.connection.string**", "zookeeper quorum host1:2181,host2:2181 for solr cloud or http address of a solr core ", "", "localhost:8983/solr", "false", "false"
+ "solr.concurrent.requests", "setConcurrentRequests", "", "2", "false", "false"
+ "flush.interval", "flush interval in ms", "", "500", "false", "false"
+ "schema.update_timeout", "Schema update timeout interval in s", "", "15", "false", "false"
+
+Extra informations
+__________________
+No additional information is provided
+
+----------
+
+.. _com.hurence.logisland.service.solr.Solr_6_4_2_ChronixClientService:
+
+Solr_6_4_2_ChronixClientService
+-------------------------------
+Implementation of ChronixClientService for Solr 6 4 2
+
+Module
+______
+com.hurence.logisland:logisland-service-solr_chronix_6.4.2-client:1.1.2
+
+Class
+_____
+com.hurence.logisland.service.solr.Solr_6_4_2_ChronixClientService
+
+Tags
+____
+solr, client
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
+
+.. csv-table:: allowable-values
+ :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+ :widths: 20,60,30,20,10,10
+ :escape: \
+
+ "batch.size", "The preferred number of Records to setField to the database in a single transaction", "", "1000", "false", "false"
+ "**solr.cloud**", "is slor cloud enabled", "", "false", "false", "false"
+ "**solr.collection**", "name of the collection to use", "", "null", "false", "false"
+ "**solr.connection.string**", "zookeeper quorum host1:2181,host2:2181 for solr cloud or http address of a solr core ", "", "localhost:8983/solr", "false", "false"
+ "flush.interval", "flush interval in ms", "", "500", "false", "false"
+ "group.by", "The field the chunk should be grouped by", "", "", "false", "false"
+
+Extra informations
+__________________
+No additional information is provided
+
+----------
+
+.. _com.hurence.logisland.service.solr.Solr_6_6_2_ClientService:
+
+Solr_6_6_2_ClientService
+------------------------
+Implementation of ElasticsearchClientService for Solr 5.5.5.
+
+Module
+______
+com.hurence.logisland:logisland-service-solr_6_6_2-client:1.1.2
+
+Class
+_____
+com.hurence.logisland.service.solr.Solr_6_6_2_ClientService
+
+Tags
+____
+solr, client
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
+
+.. csv-table:: allowable-values
+ :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+ :widths: 20,60,30,20,10,10
+ :escape: \
+
+ "batch.size", "The preferred number of Records to setField to the database in a single transaction", "", "1000", "false", "false"
+ "bulk.size", "bulk size in MB", "", "5", "false", "false"
+ "**solr.cloud**", "is slor cloud enabled", "", "false", "false", "false"
+ "**solr.collection**", "name of the collection to use", "", "null", "false", "false"
+ "**solr.connection.string**", "zookeeper quorum host1:2181,host2:2181 for solr cloud or http address of a solr core ", "", "localhost:8983/solr", "false", "false"
+ "solr.concurrent.requests", "setConcurrentRequests", "", "2", "false", "false"
+ "flush.interval", "flush interval in ms", "", "500", "false", "false"
+ "schema.update_timeout", "Schema update timeout interval in s", "", "15", "false", "false"
+
+Extra informations
+__________________
+No additional information is provided
+
+----------
+
+.. _com.hurence.logisland.service.iptogeo.maxmind.MaxmindIpToGeoService:
+
+MaxmindIpToGeoService
+---------------------
+Implementation of the IP 2 GEO Service using maxmind lite db file
+
+Module
+______
+com.hurence.logisland:logisland-service-ip-to-geo-maxmind:1.1.2
+
+Class
+_____
+com.hurence.logisland.service.iptogeo.maxmind.MaxmindIpToGeoService
+
+Tags
+____
+ip, service, geo, maxmind
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
+
+.. csv-table:: allowable-values
+ :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+ :widths: 20,60,30,20,10,10
+ :escape: \
+
+ "maxmind.database.uri", "Path to the Maxmind Geo Enrichment Database File.", "", "null", "false", "false"
+ "maxmind.database.path", "Local Path to the Maxmind Geo Enrichment Database File.", "", "null", "false", "false"
+ "locale", "Locale to use for geo information. Defaults to 'en'.", "", "en", "false", "false"
+ "lookup.time", "Should the additional lookup_micros field be returned or not.", "", "false", "false", "false"
+
+Extra informations
+__________________
+No additional information is provided
+
+----------
+
+.. _com.hurence.logisland.service.cache.CSVKeyValueCacheService:
+
+CSVKeyValueCacheService
+-----------------------
+A cache that store csv lines as records loaded from a file
+
+Module
+______
+com.hurence.logisland:logisland-service-inmemory-cache:1.1.2
+
+Class
+_____
+com.hurence.logisland.service.cache.CSVKeyValueCacheService
+
+Tags
+____
+csv, service, cache
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
+
+.. csv-table:: allowable-values
+ :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+ :widths: 20,60,30,20,10,10
+ :escape: \
+
+ "**csv.format**", "a configuration for loading csv", "default (Standard comma separated format, as for RFC4180 but allowing empty lines. Settings are: withDelimiter(',') withQuote('\"') withRecordSeparator(\"\r\n\") withIgnoreEmptyLines(true)), excel (Excel file format (using a comma as the value delimiter). Note that the actual value delimiter used by Excel is locale dependent, it might be necessary to customize this format to accommodate to your regional settings. withDelimiter(',') withQuote('\"') withRecordSeparator(\"\r\n\") withIgnoreEmptyLines(false) withAllowMissingColumnNames(true)), excel_fr (Excel file format (using a comma as the value delimiter). Note that the actual value delimiter used by Excel is locale dependent, it might be necessary to customize this format to accommodate to your regional settings. withDelimiter(';') withQuote('\"') withRecordSeparator(\"\r\n\") withIgnoreEmptyLines(false) withAllowMissingColumnNames(true)), mysql (Default MySQL format used by the SELECT INTO OUTFILE and LOAD DATA INFILE operations.This is a tab-delimited format with a LF character as the line separator. Values are not quoted and special characters are escaped with '\'. The default NULL string is \"\\N\". Settings are: withDelimiter('\t') withQuote(null) withRecordSeparator('\n') withIgnoreEmptyLines(false) withEscape('\\') withNullString(\"\\N\") withQuoteMode(QuoteMode.ALL_NON_NULL)), rfc4180 (Comma separated format as defined by RFC 4180. Settings are: withDelimiter(',') withQuote('\"') withRecordSeparator(\"\r\n\") withIgnoreEmptyLines(false)), tdf (Tab-delimited format. Settings are: withDelimiter('\t') withQuote('\"') withRecordSeparator(\"\r\n\") withIgnoreSurroundingSpaces(true))", "default", "false", "false"
+ "csv.header", "comma separated header values", "", "null", "false", "false"
+ "csv.file.uri", "Path to the CSV File.", "", "null", "false", "false"
+ "csv.file.path", "Local Path to the CSV File.", "", "null", "false", "false"
+ "**row.key**", "th primary key of this db", "", "null", "false", "false"
+ "cache.size", "The maximum number of element in the cache.", "", "16384", "false", "false"
+ "first.line.header", "csv headers grabbed from first line", "", "null", "false", "false"
+ "encoding.charset", "charset", "", "UTF-8", "false", "false"
+
+Extra informations
+__________________
+No additional information is provided
+
+----------
+
+.. _com.hurence.logisland.service.cassandra.CassandraControllerService:
+
+CassandraControllerService
+--------------------------
+Provides a controller service that for the moment only allows to bulkput records into cassandra.
+
+Module
+______
+com.hurence.logisland:logisland-service-cassandra-client:1.1.2
+
+Class
+_____
+com.hurence.logisland.service.cassandra.CassandraControllerService
+
+Tags
+____
+cassandra, service
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
+
+.. csv-table:: allowable-values
+ :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+ :widths: 20,60,30,20,10,10
+ :escape: \
+
+ "**cassandra.hosts**", "Cassandra cluster hosts as a comma separated value list", "", "null", "false", "false"
+ "**cassandra.port**", "Cassandra cluster port", "", "null", "false", "false"
+ "cassandra.with-ssl", "If this property is true, use SSL. Default is no SSL (false).", "", "false", "false", "false"
+ "cassandra.with-credentials", "If this property is true, use credentials. Default is no credentials (false).", "", "false", "false", "false"
+ "cassandra.credentials.user", "The user name to use for authentication. cassandra.with-credentials must be true for that property to be used.", "", "null", "false", "false"
+ "cassandra.credentials.password", "The user password to use for authentication. cassandra.with-credentials must be true for that property to be used.", "", "null", "false", "false"
+ "batch.size", "The preferred number of Records to setField to the database in a single transaction", "", "1000", "false", "false"
+ "flush.interval", "flush interval in ms", "", "500", "false", "false"
+
+Extra informations
+__________________
+No additional information is provided
+
+----------
+
+.. _com.hurence.logisland.service.elasticsearch.Elasticsearch_2_4_0_ClientService:
+
+Elasticsearch_2_4_0_ClientService
+---------------------------------
+Implementation of ElasticsearchClientService for Elasticsearch 2.4.0.
+
+Module
+______
+com.hurence.logisland:logisland-service-elasticsearch_2_4_0-client:1.1.2
+
+Class
+_____
+com.hurence.logisland.service.elasticsearch.Elasticsearch_2_4_0_ClientService
+
+Tags
+____
+elasticsearch, client
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values, and whether a property is considered "sensitive"..
+
+.. csv-table:: allowable-values
+ :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+ :widths: 20,60,30,20,10,10
+ :escape: \
+
+ "**backoff.policy**", "strategy for retrying to execute requests in bulkRequest", "noBackoff (when a request fail there won't be any retry.), constantBackoff (wait a fixed amount of time between retries, using user put retry number and throttling delay), exponentialBackoff (time waited between retries grow exponentially, using user put retry number and throttling delay), defaultExponentialBackoff (time waited between retries grow exponentially, using es default parameters)", "defaultExponentialBackoff", "false", "false"
+ "**throttling.delay**", "number of time we should wait between each retry (in milliseconds)", "", "500", "false", "false"
+ "**num.retry**", "number of time we should try to inject a bulk into es", "", "3", "false", "false"
+ "batch.size", "The preferred number of Records to setField to the database in a single transaction", "", "1000", "false", "false"
+ "bulk.size", "bulk size in MB", "", "5", "false", "false"
+ "flush.interval", "flush interval in sec", "", "5", "false", "false"
+ "concurrent.requests", "setConcurrentRequests", "", "2", "false", "false"
+ "**cluster.name**", "Name of the ES cluster (for example, elasticsearch_brew). Defaults to 'elasticsearch'", "", "elasticsearch", "false", "false"
+ "**ping.timeout**", "The ping timeout used to determine when a node is unreachable. For example, 5s (5 seconds). If non-local recommended is 30s", "", "5s", "false", "false"
+ "**sampler.interval**", "How often to sample / ping the nodes listed and connected. For example, 5s (5 seconds). If non-local recommended is 30s.", "", "5s", "false", "false"
+ "username", "Username to access the Elasticsearch cluster", "", "null", "false", "false"
+ "password", "Password to access the Elasticsearch cluster", "", "null", "**true**", "false"
+ "shield.location", "Specifies the path to the JAR for the Elasticsearch Shield plugin. If the Elasticsearch cluster has been secured with the Shield plugin, then the Shield plugin JAR must also be available to this processor. Note: Do NOT place the Shield JAR into NiFi's lib/ directory, doing so will prevent the Shield plugin from being loaded.", "", "null", "false", "false"
+ "**hosts**", "ElasticSearch Hosts, which should be comma separated and colon for hostname/port host1:port,host2:port,.... For example testcluster:9300.", "", "null", "false", "false"
+ "ssl.context.service", "The SSL Context Service used to provide client certificate information for TLS/SSL connections. This service only applies if the Shield plugin is available.", "", "null", "false", "false"
+ "**charset**", "Specifies the character set of the document data.", "", "UTF-8", "false", "false"
+
+Extra informations
+__________________
+No additional information is provided
+
+----------
+
+.. _com.hurence.logisland.service.elasticsearch.Elasticsearch_5_4_0_ClientService:
+
+Elasticsearch_5_4_0_ClientService
+---------------------------------
+Implementation of ElasticsearchClientService for Elasticsearch 5.4.0.
+
+Module
+______
+com.hurence.logisland:logisland-service-elasticsearch_5_4_0-client:1.1.2
+
+Class
+_____
+com.hurence.logisland.service.elasticsearch.Elasticsearch_5_4_0_ClientService
+
+Tags
+____
+elasticsearch, client
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values, and whether a property is considered "sensitive"..
+
+.. csv-table:: allowable-values
+ :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+ :widths: 20,60,30,20,10,10
+ :escape: \
+
+ "**backoff.policy**", "strategy for retrying to execute requests in bulkRequest", "noBackoff (when a request fail there won't be any retry.), constantBackoff (wait a fixed amount of time between retries, using user put retry number and throttling delay), exponentialBackoff (time waited between retries grow exponentially, using user put retry number and throttling delay), defaultExponentialBackoff (time waited between retries grow exponentially, using es default parameters)", "defaultExponentialBackoff", "false", "false"
+ "**throttling.delay**", "number of time we should wait between each retry (in milliseconds)", "", "500", "false", "false"
+ "**num.retry**", "number of time we should try to inject a bulk into es", "", "3", "false", "false"
+ "batch.size", "The preferred number of Records to setField to the database in a single transaction", "", "1000", "false", "false"
+ "bulk.size", "bulk size in MB", "", "5", "false", "false"
+ "flush.interval", "flush interval in sec", "", "5", "false", "false"
+ "concurrent.requests", "setConcurrentRequests", "", "2", "false", "false"
+ "**cluster.name**", "Name of the ES cluster (for example, elasticsearch_brew). Defaults to 'elasticsearch'", "", "elasticsearch", "false", "false"
+ "**ping.timeout**", "The ping timeout used to determine when a node is unreachable. For example, 5s (5 seconds). If non-local recommended is 30s", "", "5s", "false", "false"
+ "**sampler.interval**", "How often to sample / ping the nodes listed and connected. For example, 5s (5 seconds). If non-local recommended is 30s.", "", "5s", "false", "false"
+ "username", "Username to access the Elasticsearch cluster", "", "null", "false", "false"
+ "password", "Password to access the Elasticsearch cluster", "", "null", "**true**", "false"
+ "shield.location", "Specifies the path to the JAR for the Elasticsearch Shield plugin. If the Elasticsearch cluster has been secured with the Shield plugin, then the Shield plugin JAR must also be available to this processor. Note: Do NOT place the Shield JAR into NiFi's lib/ directory, doing so will prevent the Shield plugin from being loaded.", "", "null", "false", "false"
+ "**hosts**", "ElasticSearch Hosts, which should be comma separated and colon for hostname/port host1:port,host2:port,.... For example testcluster:9300.", "", "null", "false", "false"
+ "ssl.context.service", "The SSL Context Service used to provide client certificate information for TLS/SSL connections. This service only applies if the Shield plugin is available.", "", "null", "false", "false"
+ "**charset**", "Specifies the character set of the document data.", "", "UTF-8", "false", "false"
+
+Extra informations
+__________________
+No additional information is provided
+
+----------
+
+.. _com.hurence.logisland.service.hbase.HBase_1_1_2_ClientService:
+
+HBase_1_1_2_ClientService
+-------------------------
+Implementation of HBaseClientService for HBase 1.1.2. This service can be configured by providing a comma-separated list of configuration files, or by specifying values for the other properties. If configuration files are provided, they will be loaded first, and the values of the additional properties will override the values from the configuration files. In addition, any user defined properties on the processor will also be passed to the HBase configuration.
+
+Module
+______
+com.hurence.logisland:logisland-service-hbase_1_1_2-client:1.1.2
+
+Class
+_____
+com.hurence.logisland.service.hbase.HBase_1_1_2_ClientService
+
+Tags
+____
+hbase, client
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values, and whether a property supports the `Expression Language `_ .
+
+.. csv-table:: allowable-values
+ :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+ :widths: 20,60,30,20,10,10
+ :escape: \
+
+ "hadoop.configuration.files", "Comma-separated list of Hadoop Configuration files, such as hbase-site.xml and core-site.xml for kerberos, including full paths to the files.", "", "null", "false", "false"
+ "zookeeper.quorum", "Comma-separated list of ZooKeeper hosts for HBase. Required if Hadoop Configuration Files are not provided.", "", "null", "false", "false"
+ "zookeeper.client.port", "The port on which ZooKeeper is accepting client connections. Required if Hadoop Configuration Files are not provided.", "", "null", "false", "false"
+ "zookeeper.znode.parent", "The ZooKeeper ZNode Parent value for HBase (example: /hbase). Required if Hadoop Configuration Files are not provided.", "", "null", "false", "false"
+ "hbase.client.retries", "The number of times the HBase client will retry connecting. Required if Hadoop Configuration Files are not provided.", "", "3", "false", "false"
+ "phoenix.client.jar.location", "The full path to the Phoenix client JAR. Required if Phoenix is installed on top of HBase.", "", "null", "false", "**true**"
+
+Dynamic Properties
+__________________
+Dynamic Properties allow the user to specify both the name and value of a property.
+
+.. csv-table:: dynamic-properties
+ :header: "Name","Value","Description","Allowable Values","Default Value","EL"
+ :widths: 20,20,40,40,20,10
+ :escape: \
+
+ "The name of an HBase configuration property.", "The value of the given HBase configuration property.", "These properties will be set on the HBase configuration after loading any provided configuration files.", "", "null", false
+
+Extra informations
+__________________
+No additional information is provided
+
+----------
+
+.. _com.hurence.logisland.service.influxdb.InfluxDBControllerService:
+
+InfluxDBControllerService
+-------------------------
+Provides a controller service that for the moment only allows to bulkput records into influxdb.
+
+Module
+______
+com.hurence.logisland:logisland-service-influxdb-client:1.1.2
+
+Class
+_____
+com.hurence.logisland.service.influxdb.InfluxDBControllerService
+
+Tags
+____
+influxdb, service, time series
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
+
+.. csv-table:: allowable-values
+ :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+ :widths: 20,60,30,20,10,10
+ :escape: \
+
+ "**influxdb.url**", "InfluxDB connection url", "", "null", "false", "false"
+ "influxdb.user", "The user name to use for authentication.", "", "null", "false", "false"
+ "**influxdb.database**", "InfluxDB database name", "", "null", "false", "false"
+ "influxdb.password", "The user password to use for authentication.", "", "null", "false", "false"
+ "influxdb.tags", "List of tags for each supported measurement. Syntax: :[,]...[;:,[]]... Example: cpu:core1,core2;mem:used : in this example, the cpu measurement has 2 tags: core1 and core2 and the mem measurement has 1 tag: used. This must only be set if configuration mode is explicit_tags_and_fields or all_as_fields_but_explicit_tags.", "", "null", "false", "false"
+ "influxdb.fields", "List of fields for each supported measurement. Syntax: :[,]...[;:,[]]... Example: cpu:core1,core2;mem:used : in this example, the cpu measurement has 2 fields: core1 and core2 and the mem measurement has 1 field: used. This must only be set if configuration mode is explicit_tags_and_fields or all_as_tags_but_explicit_fields.", "", "null", "false", "false"
+ "**influxdb.configuration_mode**", "Determines the way fields and tags are chosen from the logisland record. Possible values and meaning: explicit_tags_and_fields: only logisland record fields listed in influxdb.tags and influxdb.fields will be inserted into InfluxDB with the explicit type. all_as_fields: all available logisland record fields will be inserted into InfluxDB as fields. all_as_tags_but_explicit_fields: all available logisland record fields will be inserted into InfluxDB as tags except those listed in influxdb.fields that will be inserted into InfluxDB as fields. all_as_fields_but_explicit_tags: all available logisland record fields will be inserted into InfluxDB as fields except those listed in influxdb.tags that will be inserted into InfluxDB as tags", "explicit_tags_and_fields, all_as_fields, all_as_fields_but_explicit_tags, all_as_tags_but_explicit_fields", "null", "false", "false"
+ "influxdb.consistency_level", "Determines the consistency level used to write points into InfluxDB. Possible values are: ANY, ONE, QUORUMand ALL. Default value is ANY. This is only useful when using a clustered InfluxDB infrastructure.", "ANY, ONE, QUORUM, ALL", "ANY", "false", "false"
+ "influxdb.retention_policy", "Determines the name of the retention policy to use. Defaults to autogen. The defined retention policy must already be defined in the InfluxDB server.", "", "autogen", "false", "false"
+ "influxdb.timefield", "Time field for each supported measurement. Syntax: :,...[;:,]... With format being any constant defined in java.util.concurrent.TimeUnit enum: DAYS, HOURS, MICROSECONDS, MILLISECONDS, MINUTES, NANOSECONDS or SECONDS. Example: cpu:time,NANOSECONDS;mem:timeStamp,MILLISECONDS In this example: for the cpu measurement, the time for the influx DB point matching the record will be the value of the time field that represents nanoseconds. For the mem measurement, the time for the influx DB point matching the record will be the value of the timeStamp field that represents milliseconds. Any measurement for which the time field is not defined will use the content of the record_time technical field as the time (which is a number of milliseconds since epoch).", "", "null", "false", "false"
+ "batch.size", "The preferred number of Records to setField to the database in a single transaction", "", "1000", "false", "false"
+ "flush.interval", "flush interval in ms", "", "500", "false", "false"
+
+Extra informations
+__________________
+No additional information is provided
+
+----------
+
+.. _com.hurence.logisland.service.cache.LRUKeyValueCacheService:
+
+LRUKeyValueCacheService
+-----------------------
+A controller service for caching data by key value pair with LRU (last recently used) strategy. using LinkedHashMap
+
+Module
+______
+com.hurence.logisland:logisland-service-inmemory-cache:1.1.2
+
+Class
+_____
+com.hurence.logisland.service.cache.LRUKeyValueCacheService
+
+Tags
+____
+cache, service, key, value, pair, LRU
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
+
+.. csv-table:: allowable-values
+ :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+ :widths: 20,60,30,20,10,10
+ :escape: \
+
+ "cache.size", "The maximum number of element in the cache.", "", "16384", "false", "false"
+
+Extra informations
+__________________
+No additional information is provided
+
+----------
+
+.. _com.hurence.logisland.service.mongodb.MongoDBControllerService:
+
+MongoDBControllerService
+------------------------
+Provides a controller service that wraps most of the functionality of the MongoDB driver.
+
+Module
+______
+com.hurence.logisland:logisland-service-mongodb-client:1.1.2
+
+Class
+_____
+com.hurence.logisland.service.mongodb.MongoDBControllerService
+
+Tags
+____
+mongo, mongodb, service
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values, and whether a property supports the `Expression Language `_ .
+
+.. csv-table:: allowable-values
+ :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+ :widths: 20,60,30,20,10,10
+ :escape: \
+
+ "**mongo.uri**", "MongoURI, typically of the form: mongodb://host1[:port1][,host2[:port2],...]", "", "null", "false", "**true**"
+ "**mongo.db.name**", "The name of the database to use", "", "null", "false", "**true**"
+ "**mongo.collection.name**", "The name of the collection to use", "", "null", "false", "**true**"
+ "batch.size", "The preferred number of Records to setField to the database in a single transaction", "", "1000", "false", "false"
+ "bulk.size", "bulk size in MB", "", "5", "false", "false"
+ "mongo.bulk.mode", "Bulk mode (insert or upsert)", "insert (Insert records whose key must be unique), upsert (Insert records if not already existing or update the record if already existing)", "insert", "false", "false"
+ "flush.interval", "flush interval in ms", "", "500", "false", "false"
+ "**mongo.write.concern**", "The write concern to use", "ACKNOWLEDGED, UNACKNOWLEDGED, FSYNCED, JOURNALED, REPLICA_ACKNOWLEDGED, MAJORITY", "ACKNOWLEDGED", "false", "false"
+ "mongo.bulk.upsert.condition", "A custom condition for the bulk upsert (Filter for the bulkwrite). If not specified the standard condition is to match same id ('_id': data._id)", "", "${'{ \"_id\" :\"' + record_id + '\"}'}", "false", "**true**"
+
+Extra informations
+__________________
+No additional information is provided
+
+----------
+
+.. _com.hurence.logisland.redis.service.RedisKeyValueCacheService:
+
+RedisKeyValueCacheService
+-------------------------
+A controller service for caching records by key value pair with LRU (last recently used) strategy. using LinkedHashMap
+
+Module
+______
+com.hurence.logisland:logisland-service-redis:1.1.2
+
+Class
+_____
+com.hurence.logisland.redis.service.RedisKeyValueCacheService
+
+Tags
+____
+cache, service, key, value, pair, redis
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values, and whether a property is considered "sensitive"..
+
+.. csv-table:: allowable-values
+ :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+ :widths: 20,60,30,20,10,10
+ :escape: \
+
+ "**redis.mode**", "The type of Redis being communicated with - standalone, sentinel, or clustered.", "standalone (A single standalone Redis instance.), sentinel (Redis Sentinel which provides high-availability. Described further at https://redis.io/topics/sentinel), cluster (Clustered Redis which provides sharding and replication. Described further at https://redis.io/topics/cluster-spec)", "standalone", "false", "false"
+ "**connection.string**", "The connection string for Redis. In a standalone instance this value will be of the form hostname:port. In a sentinel instance this value will be the comma-separated list of sentinels, such as host1:port1,host2:port2,host3:port3. In a clustered instance this value will be the comma-separated list of cluster masters, such as host1:port,host2:port,host3:port.", "", "null", "false", "false"
+ "**database.index**", "The database index to be used by connections created from this connection pool. See the databases property in redis.conf, by default databases 0-15 will be available.", "", "0", "false", "false"
+ "**communication.timeout**", "The timeout to use when attempting to communicate with Redis.", "", "10 seconds", "false", "false"
+ "**cluster.max.redirects**", "The maximum number of redirects that can be performed when clustered.", "", "5", "false", "false"
+ "sentinel.master", "The name of the sentinel master, require when Mode is set to Sentinel", "", "null", "false", "false"
+ "password", "The password used to authenticate to the Redis server. See the requirepass property in redis.conf.", "", "null", "**true**", "false"
+ "**pool.max.total**", "The maximum number of connections that can be allocated by the pool (checked out to clients, or idle awaiting checkout). A negative value indicates that there is no limit.", "", "8", "false", "false"
+ "**pool.max.idle**", "The maximum number of idle connections that can be held in the pool, or a negative value if there is no limit.", "", "8", "false", "false"
+ "**pool.min.idle**", "The target for the minimum number of idle connections to maintain in the pool. If the configured value of Min Idle is greater than the configured value for Max Idle, then the value of Max Idle will be used instead.", "", "0", "false", "false"
+ "**pool.block.when.exhausted**", "Whether or not clients should block and wait when trying to obtain a connection from the pool when the pool has no available connections. Setting this to false means an error will occur immediately when a client requests a connection and none are available.", "true, false", "true", "false", "false"
+ "**pool.max.wait.time**", "The amount of time to wait for an available connection when Block When Exhausted is set to true.", "", "10 seconds", "false", "false"
+ "**pool.min.evictable.idle.time**", "The minimum amount of time an object may sit idle in the pool before it is eligible for eviction.", "", "60 seconds", "false", "false"
+ "**pool.time.between.eviction.runs**", "The amount of time between attempting to evict idle connections from the pool.", "", "30 seconds", "false", "false"
+ "**pool.num.tests.per.eviction.run**", "The number of connections to tests per eviction attempt. A negative value indicates to test all connections.", "", "-1", "false", "false"
+ "**pool.test.on.create**", "Whether or not connections should be tested upon creation.", "true, false", "false", "false", "false"
+ "**pool.test.on.borrow**", "Whether or not connections should be tested upon borrowing from the pool.", "true, false", "false", "false", "false"
+ "**pool.test.on.return**", "Whether or not connections should be tested upon returning to the pool.", "true, false", "false", "false", "false"
+ "**pool.test.while.idle**", "Whether or not connections should be tested while idle.", "true, false", "true", "false", "false"
+ "**record.recordSerializer**", "the way to serialize/deserialize the record", "com.hurence.logisland.serializer.KryoSerializer (serialize events as json blocs), com.hurence.logisland.serializer.JsonSerializer (serialize events as json blocs), com.hurence.logisland.serializer.AvroSerializer (serialize events as avro blocs), com.hurence.logisland.serializer.BytesArraySerializer (serialize events as byte arrays), com.hurence.logisland.serializer.KuraProtobufSerializer (serialize events as Kura protocol buffer), none (send events as bytes)", "com.hurence.logisland.serializer.JsonSerializer", "false", "false"
+
+Extra informations
+__________________
+No additional information is provided
+
+----------
+
+.. _com.hurence.logisland.service.solr.Solr_5_5_5_ClientService:
+
+Solr_5_5_5_ClientService
+------------------------
+Implementation of ElasticsearchClientService for Solr 5.5.5.
+
+Module
+______
+com.hurence.logisland:logisland-service-solr_5_5_5-client:1.1.2
+
+Class
+_____
+com.hurence.logisland.service.solr.Solr_5_5_5_ClientService
+
+Tags
+____
+solr, client
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
+
+.. csv-table:: allowable-values
+ :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+ :widths: 20,60,30,20,10,10
+ :escape: \
+
+ "batch.size", "The preferred number of Records to setField to the database in a single transaction", "", "1000", "false", "false"
+ "bulk.size", "bulk size in MB", "", "5", "false", "false"
+ "**solr.cloud**", "is slor cloud enabled", "", "false", "false", "false"
+ "**solr.collection**", "name of the collection to use", "", "null", "false", "false"
+ "**solr.connection.string**", "zookeeper quorum host1:2181,host2:2181 for solr cloud or http address of a solr core ", "", "localhost:8983/solr", "false", "false"
+ "solr.concurrent.requests", "setConcurrentRequests", "", "2", "false", "false"
+ "flush.interval", "flush interval in ms", "", "500", "false", "false"
+ "schema.update_timeout", "Schema update timeout interval in s", "", "15", "false", "false"
+
+Extra informations
+__________________
+No additional information is provided
+
+----------
+
+.. _com.hurence.logisland.service.solr.Solr_6_4_2_ChronixClientService:
+
+Solr_6_4_2_ChronixClientService
+-------------------------------
+Implementation of ChronixClientService for Solr 6 4 2
+
+Module
+______
+com.hurence.logisland:logisland-service-solr_chronix_6.4.2-client:1.1.2
+
+Class
+_____
+com.hurence.logisland.service.solr.Solr_6_4_2_ChronixClientService
+
+Tags
+____
+solr, client
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
+
+.. csv-table:: allowable-values
+ :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+ :widths: 20,60,30,20,10,10
+ :escape: \
+
+ "batch.size", "The preferred number of Records to setField to the database in a single transaction", "", "1000", "false", "false"
+ "**solr.cloud**", "is slor cloud enabled", "", "false", "false", "false"
+ "**solr.collection**", "name of the collection to use", "", "null", "false", "false"
+ "**solr.connection.string**", "zookeeper quorum host1:2181,host2:2181 for solr cloud or http address of a solr core ", "", "localhost:8983/solr", "false", "false"
+ "flush.interval", "flush interval in ms", "", "500", "false", "false"
+ "group.by", "The field the chunk should be grouped by", "", "", "false", "false"
+
+Extra informations
+__________________
+No additional information is provided
+
+----------
+
+.. _com.hurence.logisland.service.solr.Solr_6_6_2_ClientService:
+
+Solr_6_6_2_ClientService
+------------------------
+Implementation of ElasticsearchClientService for Solr 5.5.5.
+
+Module
+______
+com.hurence.logisland:logisland-service-solr_6_6_2-client:1.1.2
+
+Class
+_____
+com.hurence.logisland.service.solr.Solr_6_6_2_ClientService
+
+Tags
+____
+solr, client
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
+
+.. csv-table:: allowable-values
+ :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+ :widths: 20,60,30,20,10,10
+ :escape: \
+
+ "batch.size", "The preferred number of Records to setField to the database in a single transaction", "", "1000", "false", "false"
+ "bulk.size", "bulk size in MB", "", "5", "false", "false"
+ "**solr.cloud**", "is slor cloud enabled", "", "false", "false", "false"
+ "**solr.collection**", "name of the collection to use", "", "null", "false", "false"
+ "**solr.connection.string**", "zookeeper quorum host1:2181,host2:2181 for solr cloud or http address of a solr core ", "", "localhost:8983/solr", "false", "false"
+ "solr.concurrent.requests", "setConcurrentRequests", "", "2", "false", "false"
+ "flush.interval", "flush interval in ms", "", "500", "false", "false"
+ "schema.update_timeout", "Schema update timeout interval in s", "", "15", "false", "false"
+
+Extra informations
+__________________
+No additional information is provided
+
+----------
+
+.. _com.hurence.logisland.service.iptogeo.maxmind.MaxmindIpToGeoService:
+
+MaxmindIpToGeoService
+---------------------
+Implementation of the IP 2 GEO Service using maxmind lite db file
+
+Module
+______
+com.hurence.logisland:logisland-service-ip-to-geo-maxmind:1.1.2
+
+Class
+_____
+com.hurence.logisland.service.iptogeo.maxmind.MaxmindIpToGeoService
+
+Tags
+____
+ip, service, geo, maxmind
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
+
+.. csv-table:: allowable-values
+ :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+ :widths: 20,60,30,20,10,10
+ :escape: \
+
+ "maxmind.database.uri", "Path to the Maxmind Geo Enrichment Database File.", "", "null", "false", "false"
+ "maxmind.database.path", "Local Path to the Maxmind Geo Enrichment Database File.", "", "null", "false", "false"
+ "locale", "Locale to use for geo information. Defaults to 'en'.", "", "en", "false", "false"
+ "lookup.time", "Should the additional lookup_micros field be returned or not.", "", "false", "false", "false"
+
+Extra informations
+__________________
+No additional information is provided
+
+----------
+
+.. _com.hurence.logisland.service.cache.CSVKeyValueCacheService:
+
+CSVKeyValueCacheService
+-----------------------
+A cache that store csv lines as records loaded from a file
+
+Module
+______
+com.hurence.logisland:logisland-service-inmemory-cache:1.1.2
+
+Class
+_____
+com.hurence.logisland.service.cache.CSVKeyValueCacheService
+
+Tags
+____
+csv, service, cache
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
+
+.. csv-table:: allowable-values
+ :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+ :widths: 20,60,30,20,10,10
+ :escape: \
+
+ "**csv.format**", "a configuration for loading csv", "default (Standard comma separated format, as for RFC4180 but allowing empty lines. Settings are: withDelimiter(',') withQuote('\"') withRecordSeparator(\"\r\n\") withIgnoreEmptyLines(true)), excel (Excel file format (using a comma as the value delimiter). Note that the actual value delimiter used by Excel is locale dependent, it might be necessary to customize this format to accommodate to your regional settings. withDelimiter(',') withQuote('\"') withRecordSeparator(\"\r\n\") withIgnoreEmptyLines(false) withAllowMissingColumnNames(true)), excel_fr (Excel file format (using a comma as the value delimiter). Note that the actual value delimiter used by Excel is locale dependent, it might be necessary to customize this format to accommodate to your regional settings. withDelimiter(';') withQuote('\"') withRecordSeparator(\"\r\n\") withIgnoreEmptyLines(false) withAllowMissingColumnNames(true)), mysql (Default MySQL format used by the SELECT INTO OUTFILE and LOAD DATA INFILE operations.This is a tab-delimited format with a LF character as the line separator. Values are not quoted and special characters are escaped with '\'. The default NULL string is \"\\N\". Settings are: withDelimiter('\t') withQuote(null) withRecordSeparator('\n') withIgnoreEmptyLines(false) withEscape('\\') withNullString(\"\\N\") withQuoteMode(QuoteMode.ALL_NON_NULL)), rfc4180 (Comma separated format as defined by RFC 4180. Settings are: withDelimiter(',') withQuote('\"') withRecordSeparator(\"\r\n\") withIgnoreEmptyLines(false)), tdf (Tab-delimited format. Settings are: withDelimiter('\t') withQuote('\"') withRecordSeparator(\"\r\n\") withIgnoreSurroundingSpaces(true))", "default", "false", "false"
+ "csv.header", "comma separated header values", "", "null", "false", "false"
+ "csv.file.uri", "Path to the CSV File.", "", "null", "false", "false"
+ "csv.file.path", "Local Path to the CSV File.", "", "null", "false", "false"
+ "**row.key**", "th primary key of this db", "", "null", "false", "false"
+ "cache.size", "The maximum number of element in the cache.", "", "16384", "false", "false"
+ "first.line.header", "csv headers grabbed from first line", "", "null", "false", "false"
+ "encoding.charset", "charset", "", "UTF-8", "false", "false"
+
+Extra informations
+__________________
+No additional information is provided
+
+----------
+
+.. _com.hurence.logisland.service.cassandra.CassandraControllerService:
+
+CassandraControllerService
+--------------------------
+Provides a controller service that for the moment only allows to bulkput records into cassandra.
+
+Module
+______
+com.hurence.logisland:logisland-service-cassandra-client:1.1.2
+
+Class
+_____
+com.hurence.logisland.service.cassandra.CassandraControllerService
+
+Tags
+____
+cassandra, service
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
+
+.. csv-table:: allowable-values
+ :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+ :widths: 20,60,30,20,10,10
+ :escape: \
+
+ "**cassandra.hosts**", "Cassandra cluster hosts as a comma separated value list", "", "null", "false", "false"
+ "**cassandra.port**", "Cassandra cluster port", "", "null", "false", "false"
+ "cassandra.with-ssl", "If this property is true, use SSL. Default is no SSL (false).", "", "false", "false", "false"
+ "cassandra.with-credentials", "If this property is true, use credentials. Default is no credentials (false).", "", "false", "false", "false"
+ "cassandra.credentials.user", "The user name to use for authentication. cassandra.with-credentials must be true for that property to be used.", "", "null", "false", "false"
+ "cassandra.credentials.password", "The user password to use for authentication. cassandra.with-credentials must be true for that property to be used.", "", "null", "false", "false"
+ "batch.size", "The preferred number of Records to setField to the database in a single transaction", "", "1000", "false", "false"
+ "flush.interval", "flush interval in ms", "", "500", "false", "false"
+
+Extra informations
+__________________
+No additional information is provided
+
+----------
+
+.. _com.hurence.logisland.service.elasticsearch.Elasticsearch_2_4_0_ClientService:
+
+Elasticsearch_2_4_0_ClientService
+---------------------------------
+Implementation of ElasticsearchClientService for Elasticsearch 2.4.0.
+
+Module
+______
+com.hurence.logisland:logisland-service-elasticsearch_2_4_0-client:1.1.2
+
+Class
+_____
+com.hurence.logisland.service.elasticsearch.Elasticsearch_2_4_0_ClientService
+
+Tags
+____
+elasticsearch, client
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values, and whether a property is considered "sensitive"..
+
+.. csv-table:: allowable-values
+ :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+ :widths: 20,60,30,20,10,10
+ :escape: \
+
+ "**backoff.policy**", "strategy for retrying to execute requests in bulkRequest", "noBackoff (when a request fail there won't be any retry.), constantBackoff (wait a fixed amount of time between retries, using user put retry number and throttling delay), exponentialBackoff (time waited between retries grow exponentially, using user put retry number and throttling delay), defaultExponentialBackoff (time waited between retries grow exponentially, using es default parameters)", "defaultExponentialBackoff", "false", "false"
+ "**throttling.delay**", "number of time we should wait between each retry (in milliseconds)", "", "500", "false", "false"
+ "**num.retry**", "number of time we should try to inject a bulk into es", "", "3", "false", "false"
+ "batch.size", "The preferred number of Records to setField to the database in a single transaction", "", "1000", "false", "false"
+ "bulk.size", "bulk size in MB", "", "5", "false", "false"
+ "flush.interval", "flush interval in sec", "", "5", "false", "false"
+ "concurrent.requests", "setConcurrentRequests", "", "2", "false", "false"
+ "**cluster.name**", "Name of the ES cluster (for example, elasticsearch_brew). Defaults to 'elasticsearch'", "", "elasticsearch", "false", "false"
+ "**ping.timeout**", "The ping timeout used to determine when a node is unreachable. For example, 5s (5 seconds). If non-local recommended is 30s", "", "5s", "false", "false"
+ "**sampler.interval**", "How often to sample / ping the nodes listed and connected. For example, 5s (5 seconds). If non-local recommended is 30s.", "", "5s", "false", "false"
+ "username", "Username to access the Elasticsearch cluster", "", "null", "false", "false"
+ "password", "Password to access the Elasticsearch cluster", "", "null", "**true**", "false"
+ "shield.location", "Specifies the path to the JAR for the Elasticsearch Shield plugin. If the Elasticsearch cluster has been secured with the Shield plugin, then the Shield plugin JAR must also be available to this processor. Note: Do NOT place the Shield JAR into NiFi's lib/ directory, doing so will prevent the Shield plugin from being loaded.", "", "null", "false", "false"
+ "**hosts**", "ElasticSearch Hosts, which should be comma separated and colon for hostname/port host1:port,host2:port,.... For example testcluster:9300.", "", "null", "false", "false"
+ "ssl.context.service", "The SSL Context Service used to provide client certificate information for TLS/SSL connections. This service only applies if the Shield plugin is available.", "", "null", "false", "false"
+ "**charset**", "Specifies the character set of the document data.", "", "UTF-8", "false", "false"
+
+Extra informations
+__________________
+No additional information is provided
+
+----------
+
+.. _com.hurence.logisland.service.elasticsearch.Elasticsearch_5_4_0_ClientService:
+
+Elasticsearch_5_4_0_ClientService
+---------------------------------
+Implementation of ElasticsearchClientService for Elasticsearch 5.4.0.
+
+Module
+______
+com.hurence.logisland:logisland-service-elasticsearch_5_4_0-client:1.1.2
+
+Class
+_____
+com.hurence.logisland.service.elasticsearch.Elasticsearch_5_4_0_ClientService
+
+Tags
+____
+elasticsearch, client
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values, and whether a property is considered "sensitive"..
+
+.. csv-table:: allowable-values
+ :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+ :widths: 20,60,30,20,10,10
+ :escape: \
+
+ "**backoff.policy**", "strategy for retrying to execute requests in bulkRequest", "noBackoff (when a request fail there won't be any retry.), constantBackoff (wait a fixed amount of time between retries, using user put retry number and throttling delay), exponentialBackoff (time waited between retries grow exponentially, using user put retry number and throttling delay), defaultExponentialBackoff (time waited between retries grow exponentially, using es default parameters)", "defaultExponentialBackoff", "false", "false"
+ "**throttling.delay**", "number of time we should wait between each retry (in milliseconds)", "", "500", "false", "false"
+ "**num.retry**", "number of time we should try to inject a bulk into es", "", "3", "false", "false"
+ "batch.size", "The preferred number of Records to setField to the database in a single transaction", "", "1000", "false", "false"
+ "bulk.size", "bulk size in MB", "", "5", "false", "false"
+ "flush.interval", "flush interval in sec", "", "5", "false", "false"
+ "concurrent.requests", "setConcurrentRequests", "", "2", "false", "false"
+ "**cluster.name**", "Name of the ES cluster (for example, elasticsearch_brew). Defaults to 'elasticsearch'", "", "elasticsearch", "false", "false"
+ "**ping.timeout**", "The ping timeout used to determine when a node is unreachable. For example, 5s (5 seconds). If non-local recommended is 30s", "", "5s", "false", "false"
+ "**sampler.interval**", "How often to sample / ping the nodes listed and connected. For example, 5s (5 seconds). If non-local recommended is 30s.", "", "5s", "false", "false"
+ "username", "Username to access the Elasticsearch cluster", "", "null", "false", "false"
+ "password", "Password to access the Elasticsearch cluster", "", "null", "**true**", "false"
+ "shield.location", "Specifies the path to the JAR for the Elasticsearch Shield plugin. If the Elasticsearch cluster has been secured with the Shield plugin, then the Shield plugin JAR must also be available to this processor. Note: Do NOT place the Shield JAR into NiFi's lib/ directory, doing so will prevent the Shield plugin from being loaded.", "", "null", "false", "false"
+ "**hosts**", "ElasticSearch Hosts, which should be comma separated and colon for hostname/port host1:port,host2:port,.... For example testcluster:9300.", "", "null", "false", "false"
+ "ssl.context.service", "The SSL Context Service used to provide client certificate information for TLS/SSL connections. This service only applies if the Shield plugin is available.", "", "null", "false", "false"
+ "**charset**", "Specifies the character set of the document data.", "", "UTF-8", "false", "false"
+
+Extra informations
+__________________
+No additional information is provided
+
+----------
+
+.. _com.hurence.logisland.service.hbase.HBase_1_1_2_ClientService:
+
+HBase_1_1_2_ClientService
+-------------------------
+Implementation of HBaseClientService for HBase 1.1.2. This service can be configured by providing a comma-separated list of configuration files, or by specifying values for the other properties. If configuration files are provided, they will be loaded first, and the values of the additional properties will override the values from the configuration files. In addition, any user defined properties on the processor will also be passed to the HBase configuration.
+
+Module
+______
+com.hurence.logisland:logisland-service-hbase_1_1_2-client:1.1.2
+
+Class
+_____
+com.hurence.logisland.service.hbase.HBase_1_1_2_ClientService
+
+Tags
+____
+hbase, client
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values, and whether a property supports the `Expression Language `_ .
+
+.. csv-table:: allowable-values
+ :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+ :widths: 20,60,30,20,10,10
+ :escape: \
+
+ "hadoop.configuration.files", "Comma-separated list of Hadoop Configuration files, such as hbase-site.xml and core-site.xml for kerberos, including full paths to the files.", "", "null", "false", "false"
+ "zookeeper.quorum", "Comma-separated list of ZooKeeper hosts for HBase. Required if Hadoop Configuration Files are not provided.", "", "null", "false", "false"
+ "zookeeper.client.port", "The port on which ZooKeeper is accepting client connections. Required if Hadoop Configuration Files are not provided.", "", "null", "false", "false"
+ "zookeeper.znode.parent", "The ZooKeeper ZNode Parent value for HBase (example: /hbase). Required if Hadoop Configuration Files are not provided.", "", "null", "false", "false"
+ "hbase.client.retries", "The number of times the HBase client will retry connecting. Required if Hadoop Configuration Files are not provided.", "", "3", "false", "false"
+ "phoenix.client.jar.location", "The full path to the Phoenix client JAR. Required if Phoenix is installed on top of HBase.", "", "null", "false", "**true**"
+
+Dynamic Properties
+__________________
+Dynamic Properties allow the user to specify both the name and value of a property.
+
+.. csv-table:: dynamic-properties
+ :header: "Name","Value","Description","Allowable Values","Default Value","EL"
+ :widths: 20,20,40,40,20,10
+ :escape: \
+
+ "The name of an HBase configuration property.", "The value of the given HBase configuration property.", "These properties will be set on the HBase configuration after loading any provided configuration files.", "", "null", false
+
+Extra informations
+__________________
+No additional information is provided
+
+----------
+
+.. _com.hurence.logisland.service.influxdb.InfluxDBControllerService:
+
+InfluxDBControllerService
+-------------------------
+Provides a controller service that for the moment only allows to bulkput records into influxdb.
+
+Module
+______
+com.hurence.logisland:logisland-service-influxdb-client:1.1.2
+
+Class
+_____
+com.hurence.logisland.service.influxdb.InfluxDBControllerService
+
+Tags
+____
+influxdb, service, time series
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
+
+.. csv-table:: allowable-values
+ :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+ :widths: 20,60,30,20,10,10
+ :escape: \
+
+ "**influxdb.url**", "InfluxDB connection url", "", "null", "false", "false"
+ "influxdb.user", "The user name to use for authentication.", "", "null", "false", "false"
+ "**influxdb.database**", "InfluxDB database name", "", "null", "false", "false"
+ "influxdb.password", "The user password to use for authentication.", "", "null", "false", "false"
+ "influxdb.tags", "List of tags for each supported measurement. Syntax: :[,]...[;:,[]]... Example: cpu:core1,core2;mem:used : in this example, the cpu measurement has 2 tags: core1 and core2 and the mem measurement has 1 tag: used. This must only be set if configuration mode is explicit_tags_and_fields or all_as_fields_but_explicit_tags.", "", "null", "false", "false"
+ "influxdb.fields", "List of fields for each supported measurement. Syntax: :[,]...[;:,[]]... Example: cpu:core1,core2;mem:used : in this example, the cpu measurement has 2 fields: core1 and core2 and the mem measurement has 1 field: used. This must only be set if configuration mode is explicit_tags_and_fields or all_as_tags_but_explicit_fields.", "", "null", "false", "false"
+ "**influxdb.configuration_mode**", "Determines the way fields and tags are chosen from the logisland record. Possible values and meaning: explicit_tags_and_fields: only logisland record fields listed in influxdb.tags and influxdb.fields will be inserted into InfluxDB with the explicit type. all_as_fields: all available logisland record fields will be inserted into InfluxDB as fields. all_as_tags_but_explicit_fields: all available logisland record fields will be inserted into InfluxDB as tags except those listed in influxdb.fields that will be inserted into InfluxDB as fields. all_as_fields_but_explicit_tags: all available logisland record fields will be inserted into InfluxDB as fields except those listed in influxdb.tags that will be inserted into InfluxDB as tags", "explicit_tags_and_fields, all_as_fields, all_as_fields_but_explicit_tags, all_as_tags_but_explicit_fields", "null", "false", "false"
+ "influxdb.consistency_level", "Determines the consistency level used to write points into InfluxDB. Possible values are: ANY, ONE, QUORUMand ALL. Default value is ANY. This is only useful when using a clustered InfluxDB infrastructure.", "ANY, ONE, QUORUM, ALL", "ANY", "false", "false"
+ "influxdb.retention_policy", "Determines the name of the retention policy to use. Defaults to autogen. The defined retention policy must already be defined in the InfluxDB server.", "", "autogen", "false", "false"
+ "influxdb.timefield", "Time field for each supported measurement. Syntax: :,...[;:,]... With format being any constant defined in java.util.concurrent.TimeUnit enum: DAYS, HOURS, MICROSECONDS, MILLISECONDS, MINUTES, NANOSECONDS or SECONDS. Example: cpu:time,NANOSECONDS;mem:timeStamp,MILLISECONDS In this example: for the cpu measurement, the time for the influx DB point matching the record will be the value of the time field that represents nanoseconds. For the mem measurement, the time for the influx DB point matching the record will be the value of the timeStamp field that represents milliseconds. Any measurement for which the time field is not defined will use the content of the record_time technical field as the time (which is a number of milliseconds since epoch).", "", "null", "false", "false"
+ "batch.size", "The preferred number of Records to setField to the database in a single transaction", "", "1000", "false", "false"
+ "flush.interval", "flush interval in ms", "", "500", "false", "false"
+
+Extra informations
+__________________
+No additional information is provided
+
+----------
+
+.. _com.hurence.logisland.service.cache.LRUKeyValueCacheService:
+
+LRUKeyValueCacheService
+-----------------------
+A controller service for caching data by key value pair with LRU (last recently used) strategy. using LinkedHashMap
+
+Module
+______
+com.hurence.logisland:logisland-service-inmemory-cache:1.1.2
+
+Class
+_____
+com.hurence.logisland.service.cache.LRUKeyValueCacheService
+
+Tags
+____
+cache, service, key, value, pair, LRU
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
+
+.. csv-table:: allowable-values
+ :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+ :widths: 20,60,30,20,10,10
+ :escape: \
+
+ "cache.size", "The maximum number of element in the cache.", "", "16384", "false", "false"
+
+Extra informations
+__________________
+No additional information is provided
+
+----------
+
+.. _com.hurence.logisland.service.mongodb.MongoDBControllerService:
+
+MongoDBControllerService
+------------------------
+Provides a controller service that wraps most of the functionality of the MongoDB driver.
+
+Module
+______
+com.hurence.logisland:logisland-service-mongodb-client:1.1.2
+
+Class
+_____
+com.hurence.logisland.service.mongodb.MongoDBControllerService
+
+Tags
+____
+mongo, mongodb, service
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values, and whether a property supports the `Expression Language `_ .
+
+.. csv-table:: allowable-values
+ :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+ :widths: 20,60,30,20,10,10
+ :escape: \
+
+ "**mongo.uri**", "MongoURI, typically of the form: mongodb://host1[:port1][,host2[:port2],...]", "", "null", "false", "**true**"
+ "**mongo.db.name**", "The name of the database to use", "", "null", "false", "**true**"
+ "**mongo.collection.name**", "The name of the collection to use", "", "null", "false", "**true**"
+ "batch.size", "The preferred number of Records to setField to the database in a single transaction", "", "1000", "false", "false"
+ "bulk.size", "bulk size in MB", "", "5", "false", "false"
+ "mongo.bulk.mode", "Bulk mode (insert or upsert)", "insert (Insert records whose key must be unique), upsert (Insert records if not already existing or update the record if already existing)", "insert", "false", "false"
+ "flush.interval", "flush interval in ms", "", "500", "false", "false"
+ "**mongo.write.concern**", "The write concern to use", "ACKNOWLEDGED, UNACKNOWLEDGED, FSYNCED, JOURNALED, REPLICA_ACKNOWLEDGED, MAJORITY", "ACKNOWLEDGED", "false", "false"
+ "mongo.bulk.upsert.condition", "A custom condition for the bulk upsert (Filter for the bulkwrite). If not specified the standard condition is to match same id ('_id': data._id)", "", "${'{ \"_id\" :\"' + record_id + '\"}'}", "false", "**true**"
+
+Extra informations
+__________________
+No additional information is provided
+
+----------
+
+.. _com.hurence.logisland.redis.service.RedisKeyValueCacheService:
+
+RedisKeyValueCacheService
+-------------------------
+A controller service for caching records by key value pair with LRU (last recently used) strategy. using LinkedHashMap
+
+Module
+______
+com.hurence.logisland:logisland-service-redis:1.1.2
+
+Class
+_____
+com.hurence.logisland.redis.service.RedisKeyValueCacheService
+
+Tags
+____
+cache, service, key, value, pair, redis
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values, and whether a property is considered "sensitive"..
+
+.. csv-table:: allowable-values
+ :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+ :widths: 20,60,30,20,10,10
+ :escape: \
+
+ "**redis.mode**", "The type of Redis being communicated with - standalone, sentinel, or clustered.", "standalone (A single standalone Redis instance.), sentinel (Redis Sentinel which provides high-availability. Described further at https://redis.io/topics/sentinel), cluster (Clustered Redis which provides sharding and replication. Described further at https://redis.io/topics/cluster-spec)", "standalone", "false", "false"
+ "**connection.string**", "The connection string for Redis. In a standalone instance this value will be of the form hostname:port. In a sentinel instance this value will be the comma-separated list of sentinels, such as host1:port1,host2:port2,host3:port3. In a clustered instance this value will be the comma-separated list of cluster masters, such as host1:port,host2:port,host3:port.", "", "null", "false", "false"
+ "**database.index**", "The database index to be used by connections created from this connection pool. See the databases property in redis.conf, by default databases 0-15 will be available.", "", "0", "false", "false"
+ "**communication.timeout**", "The timeout to use when attempting to communicate with Redis.", "", "10 seconds", "false", "false"
+ "**cluster.max.redirects**", "The maximum number of redirects that can be performed when clustered.", "", "5", "false", "false"
+ "sentinel.master", "The name of the sentinel master, require when Mode is set to Sentinel", "", "null", "false", "false"
+ "password", "The password used to authenticate to the Redis server. See the requirepass property in redis.conf.", "", "null", "**true**", "false"
+ "**pool.max.total**", "The maximum number of connections that can be allocated by the pool (checked out to clients, or idle awaiting checkout). A negative value indicates that there is no limit.", "", "8", "false", "false"
+ "**pool.max.idle**", "The maximum number of idle connections that can be held in the pool, or a negative value if there is no limit.", "", "8", "false", "false"
+ "**pool.min.idle**", "The target for the minimum number of idle connections to maintain in the pool. If the configured value of Min Idle is greater than the configured value for Max Idle, then the value of Max Idle will be used instead.", "", "0", "false", "false"
+ "**pool.block.when.exhausted**", "Whether or not clients should block and wait when trying to obtain a connection from the pool when the pool has no available connections. Setting this to false means an error will occur immediately when a client requests a connection and none are available.", "true, false", "true", "false", "false"
+ "**pool.max.wait.time**", "The amount of time to wait for an available connection when Block When Exhausted is set to true.", "", "10 seconds", "false", "false"
+ "**pool.min.evictable.idle.time**", "The minimum amount of time an object may sit idle in the pool before it is eligible for eviction.", "", "60 seconds", "false", "false"
+ "**pool.time.between.eviction.runs**", "The amount of time between attempting to evict idle connections from the pool.", "", "30 seconds", "false", "false"
+ "**pool.num.tests.per.eviction.run**", "The number of connections to tests per eviction attempt. A negative value indicates to test all connections.", "", "-1", "false", "false"
+ "**pool.test.on.create**", "Whether or not connections should be tested upon creation.", "true, false", "false", "false", "false"
+ "**pool.test.on.borrow**", "Whether or not connections should be tested upon borrowing from the pool.", "true, false", "false", "false", "false"
+ "**pool.test.on.return**", "Whether or not connections should be tested upon returning to the pool.", "true, false", "false", "false", "false"
+ "**pool.test.while.idle**", "Whether or not connections should be tested while idle.", "true, false", "true", "false", "false"
+ "**record.recordSerializer**", "the way to serialize/deserialize the record", "com.hurence.logisland.serializer.KryoSerializer (serialize events as json blocs), com.hurence.logisland.serializer.JsonSerializer (serialize events as json blocs), com.hurence.logisland.serializer.AvroSerializer (serialize events as avro blocs), com.hurence.logisland.serializer.BytesArraySerializer (serialize events as byte arrays), com.hurence.logisland.serializer.KuraProtobufSerializer (serialize events as Kura protocol buffer), none (send events as bytes)", "com.hurence.logisland.serializer.JsonSerializer", "false", "false"
+
+Extra informations
+__________________
+No additional information is provided
+
+----------
+
+.. _com.hurence.logisland.service.solr.Solr_5_5_5_ClientService:
+
+Solr_5_5_5_ClientService
+------------------------
+Implementation of ElasticsearchClientService for Solr 5.5.5.
+
+Module
+______
+com.hurence.logisland:logisland-service-solr_5_5_5-client:1.1.2
+
+Class
+_____
+com.hurence.logisland.service.solr.Solr_5_5_5_ClientService
+
+Tags
+____
+solr, client
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
+
+.. csv-table:: allowable-values
+ :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+ :widths: 20,60,30,20,10,10
+ :escape: \
+
+ "batch.size", "The preferred number of Records to setField to the database in a single transaction", "", "1000", "false", "false"
+ "bulk.size", "bulk size in MB", "", "5", "false", "false"
+ "**solr.cloud**", "is slor cloud enabled", "", "false", "false", "false"
+ "**solr.collection**", "name of the collection to use", "", "null", "false", "false"
+ "**solr.connection.string**", "zookeeper quorum host1:2181,host2:2181 for solr cloud or http address of a solr core ", "", "localhost:8983/solr", "false", "false"
+ "solr.concurrent.requests", "setConcurrentRequests", "", "2", "false", "false"
+ "flush.interval", "flush interval in ms", "", "500", "false", "false"
+ "schema.update_timeout", "Schema update timeout interval in s", "", "15", "false", "false"
+
+Extra informations
+__________________
+No additional information is provided
+
+----------
+
+.. _com.hurence.logisland.service.solr.Solr_6_4_2_ChronixClientService:
+
+Solr_6_4_2_ChronixClientService
+-------------------------------
+Implementation of ChronixClientService for Solr 6 4 2
+
+Module
+______
+com.hurence.logisland:logisland-service-solr_chronix_6.4.2-client:1.1.2
+
+Class
+_____
+com.hurence.logisland.service.solr.Solr_6_4_2_ChronixClientService
+
+Tags
+____
+solr, client
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
+
+.. csv-table:: allowable-values
+ :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+ :widths: 20,60,30,20,10,10
+ :escape: \
+
+ "batch.size", "The preferred number of Records to setField to the database in a single transaction", "", "1000", "false", "false"
+ "**solr.cloud**", "is slor cloud enabled", "", "false", "false", "false"
+ "**solr.collection**", "name of the collection to use", "", "null", "false", "false"
+ "**solr.connection.string**", "zookeeper quorum host1:2181,host2:2181 for solr cloud or http address of a solr core ", "", "localhost:8983/solr", "false", "false"
+ "flush.interval", "flush interval in ms", "", "500", "false", "false"
+ "group.by", "The field the chunk should be grouped by", "", "", "false", "false"
+
+Extra informations
+__________________
+No additional information is provided
+
+----------
+
+.. _com.hurence.logisland.service.solr.Solr_6_6_2_ClientService:
+
+Solr_6_6_2_ClientService
+------------------------
+Implementation of ElasticsearchClientService for Solr 5.5.5.
+
+Module
+______
+com.hurence.logisland:logisland-service-solr_6_6_2-client:1.1.2
+
+Class
+_____
+com.hurence.logisland.service.solr.Solr_6_6_2_ClientService
+
+Tags
+____
+solr, client
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
+
+.. csv-table:: allowable-values
+ :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+ :widths: 20,60,30,20,10,10
+ :escape: \
+
+ "batch.size", "The preferred number of Records to setField to the database in a single transaction", "", "1000", "false", "false"
+ "bulk.size", "bulk size in MB", "", "5", "false", "false"
+ "**solr.cloud**", "is slor cloud enabled", "", "false", "false", "false"
+ "**solr.collection**", "name of the collection to use", "", "null", "false", "false"
+ "**solr.connection.string**", "zookeeper quorum host1:2181,host2:2181 for solr cloud or http address of a solr core ", "", "localhost:8983/solr", "false", "false"
+ "solr.concurrent.requests", "setConcurrentRequests", "", "2", "false", "false"
+ "flush.interval", "flush interval in ms", "", "500", "false", "false"
+ "schema.update_timeout", "Schema update timeout interval in s", "", "15", "false", "false"
+
+Extra informations
+__________________
+No additional information is provided
+
+----------
+
+.. _com.hurence.logisland.service.iptogeo.maxmind.MaxmindIpToGeoService:
+
+MaxmindIpToGeoService
+---------------------
+Implementation of the IP 2 GEO Service using maxmind lite db file
+
+Module
+______
+com.hurence.logisland:logisland-service-ip-to-geo-maxmind:1.1.2
+
+Class
+_____
+com.hurence.logisland.service.iptogeo.maxmind.MaxmindIpToGeoService
+
+Tags
+____
+ip, service, geo, maxmind
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
+
+.. csv-table:: allowable-values
+ :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+ :widths: 20,60,30,20,10,10
+ :escape: \
+
+ "maxmind.database.uri", "Path to the Maxmind Geo Enrichment Database File.", "", "null", "false", "false"
+ "maxmind.database.path", "Local Path to the Maxmind Geo Enrichment Database File.", "", "null", "false", "false"
+ "locale", "Locale to use for geo information. Defaults to 'en'.", "", "en", "false", "false"
+ "lookup.time", "Should the additional lookup_micros field be returned or not.", "", "false", "false", "false"
+
+Extra informations
+__________________
+No additional information is provided
+
+----------
+
+.. _com.hurence.logisland.service.cache.CSVKeyValueCacheService:
+
+CSVKeyValueCacheService
+-----------------------
+A cache that store csv lines as records loaded from a file
+
+Module
+______
+com.hurence.logisland:logisland-service-inmemory-cache:1.1.2
+
+Class
+_____
+com.hurence.logisland.service.cache.CSVKeyValueCacheService
+
+Tags
+____
+csv, service, cache
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
+
+.. csv-table:: allowable-values
+ :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+ :widths: 20,60,30,20,10,10
+ :escape: \
+
+ "**csv.format**", "a configuration for loading csv", "default (Standard comma separated format, as for RFC4180 but allowing empty lines. Settings are: withDelimiter(',') withQuote('\"') withRecordSeparator(\"\r\n\") withIgnoreEmptyLines(true)), excel (Excel file format (using a comma as the value delimiter). Note that the actual value delimiter used by Excel is locale dependent, it might be necessary to customize this format to accommodate to your regional settings. withDelimiter(',') withQuote('\"') withRecordSeparator(\"\r\n\") withIgnoreEmptyLines(false) withAllowMissingColumnNames(true)), excel_fr (Excel file format (using a comma as the value delimiter). Note that the actual value delimiter used by Excel is locale dependent, it might be necessary to customize this format to accommodate to your regional settings. withDelimiter(';') withQuote('\"') withRecordSeparator(\"\r\n\") withIgnoreEmptyLines(false) withAllowMissingColumnNames(true)), mysql (Default MySQL format used by the SELECT INTO OUTFILE and LOAD DATA INFILE operations.This is a tab-delimited format with a LF character as the line separator. Values are not quoted and special characters are escaped with '\'. The default NULL string is \"\\N\". Settings are: withDelimiter('\t') withQuote(null) withRecordSeparator('\n') withIgnoreEmptyLines(false) withEscape('\\') withNullString(\"\\N\") withQuoteMode(QuoteMode.ALL_NON_NULL)), rfc4180 (Comma separated format as defined by RFC 4180. Settings are: withDelimiter(',') withQuote('\"') withRecordSeparator(\"\r\n\") withIgnoreEmptyLines(false)), tdf (Tab-delimited format. Settings are: withDelimiter('\t') withQuote('\"') withRecordSeparator(\"\r\n\") withIgnoreSurroundingSpaces(true))", "default", "false", "false"
+ "csv.header", "comma separated header values", "", "null", "false", "false"
+ "csv.file.uri", "Path to the CSV File.", "", "null", "false", "false"
+ "csv.file.path", "Local Path to the CSV File.", "", "null", "false", "false"
+ "**row.key**", "th primary key of this db", "", "null", "false", "false"
+ "cache.size", "The maximum number of element in the cache.", "", "16384", "false", "false"
+ "first.line.header", "csv headers grabbed from first line", "", "null", "false", "false"
+ "encoding.charset", "charset", "", "UTF-8", "false", "false"
+
+Extra informations
+__________________
+No additional information is provided
+
+----------
+
+.. _com.hurence.logisland.service.cassandra.CassandraControllerService:
+
+CassandraControllerService
+--------------------------
+Provides a controller service that for the moment only allows to bulkput records into cassandra.
+
+Module
+______
+com.hurence.logisland:logisland-service-cassandra-client:1.1.2
+
+Class
+_____
+com.hurence.logisland.service.cassandra.CassandraControllerService
+
+Tags
+____
+cassandra, service
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
+
+.. csv-table:: allowable-values
+ :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+ :widths: 20,60,30,20,10,10
+ :escape: \
+
+ "**cassandra.hosts**", "Cassandra cluster hosts as a comma separated value list", "", "null", "false", "false"
+ "**cassandra.port**", "Cassandra cluster port", "", "null", "false", "false"
+ "cassandra.with-ssl", "If this property is true, use SSL. Default is no SSL (false).", "", "false", "false", "false"
+ "cassandra.with-credentials", "If this property is true, use credentials. Default is no credentials (false).", "", "false", "false", "false"
+ "cassandra.credentials.user", "The user name to use for authentication. cassandra.with-credentials must be true for that property to be used.", "", "null", "false", "false"
+ "cassandra.credentials.password", "The user password to use for authentication. cassandra.with-credentials must be true for that property to be used.", "", "null", "false", "false"
+ "batch.size", "The preferred number of Records to setField to the database in a single transaction", "", "1000", "false", "false"
+ "flush.interval", "flush interval in ms", "", "500", "false", "false"
+
+Extra informations
+__________________
+No additional information is provided
+
+----------
+
+.. _com.hurence.logisland.service.elasticsearch.Elasticsearch_2_4_0_ClientService:
+
+Elasticsearch_2_4_0_ClientService
+---------------------------------
+Implementation of ElasticsearchClientService for Elasticsearch 2.4.0.
+
+Module
+______
+com.hurence.logisland:logisland-service-elasticsearch_2_4_0-client:1.1.2
+
+Class
+_____
+com.hurence.logisland.service.elasticsearch.Elasticsearch_2_4_0_ClientService
+
+Tags
+____
+elasticsearch, client
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values, and whether a property is considered "sensitive"..
+
+.. csv-table:: allowable-values
+ :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+ :widths: 20,60,30,20,10,10
+ :escape: \
+
+ "**backoff.policy**", "strategy for retrying to execute requests in bulkRequest", "noBackoff (when a request fail there won't be any retry.), constantBackoff (wait a fixed amount of time between retries, using user put retry number and throttling delay), exponentialBackoff (time waited between retries grow exponentially, using user put retry number and throttling delay), defaultExponentialBackoff (time waited between retries grow exponentially, using es default parameters)", "defaultExponentialBackoff", "false", "false"
+ "**throttling.delay**", "number of time we should wait between each retry (in milliseconds)", "", "500", "false", "false"
+ "**num.retry**", "number of time we should try to inject a bulk into es", "", "3", "false", "false"
+ "batch.size", "The preferred number of Records to setField to the database in a single transaction", "", "1000", "false", "false"
+ "bulk.size", "bulk size in MB", "", "5", "false", "false"
+ "flush.interval", "flush interval in sec", "", "5", "false", "false"
+ "concurrent.requests", "setConcurrentRequests", "", "2", "false", "false"
+ "**cluster.name**", "Name of the ES cluster (for example, elasticsearch_brew). Defaults to 'elasticsearch'", "", "elasticsearch", "false", "false"
+ "**ping.timeout**", "The ping timeout used to determine when a node is unreachable. For example, 5s (5 seconds). If non-local recommended is 30s", "", "5s", "false", "false"
+ "**sampler.interval**", "How often to sample / ping the nodes listed and connected. For example, 5s (5 seconds). If non-local recommended is 30s.", "", "5s", "false", "false"
+ "username", "Username to access the Elasticsearch cluster", "", "null", "false", "false"
+ "password", "Password to access the Elasticsearch cluster", "", "null", "**true**", "false"
+ "shield.location", "Specifies the path to the JAR for the Elasticsearch Shield plugin. If the Elasticsearch cluster has been secured with the Shield plugin, then the Shield plugin JAR must also be available to this processor. Note: Do NOT place the Shield JAR into NiFi's lib/ directory, doing so will prevent the Shield plugin from being loaded.", "", "null", "false", "false"
+ "**hosts**", "ElasticSearch Hosts, which should be comma separated and colon for hostname/port host1:port,host2:port,.... For example testcluster:9300.", "", "null", "false", "false"
+ "ssl.context.service", "The SSL Context Service used to provide client certificate information for TLS/SSL connections. This service only applies if the Shield plugin is available.", "", "null", "false", "false"
+ "**charset**", "Specifies the character set of the document data.", "", "UTF-8", "false", "false"
+
+Extra informations
+__________________
+No additional information is provided
+
+----------
+
+.. _com.hurence.logisland.service.elasticsearch.Elasticsearch_5_4_0_ClientService:
+
+Elasticsearch_5_4_0_ClientService
+---------------------------------
+Implementation of ElasticsearchClientService for Elasticsearch 5.4.0.
+
+Module
+______
+com.hurence.logisland:logisland-service-elasticsearch_5_4_0-client:1.1.2
+
+Class
+_____
+com.hurence.logisland.service.elasticsearch.Elasticsearch_5_4_0_ClientService
+
+Tags
+____
+elasticsearch, client
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values, and whether a property is considered "sensitive"..
+
+.. csv-table:: allowable-values
+ :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+ :widths: 20,60,30,20,10,10
+ :escape: \
+
+ "**backoff.policy**", "strategy for retrying to execute requests in bulkRequest", "noBackoff (when a request fail there won't be any retry.), constantBackoff (wait a fixed amount of time between retries, using user put retry number and throttling delay), exponentialBackoff (time waited between retries grow exponentially, using user put retry number and throttling delay), defaultExponentialBackoff (time waited between retries grow exponentially, using es default parameters)", "defaultExponentialBackoff", "false", "false"
+ "**throttling.delay**", "number of time we should wait between each retry (in milliseconds)", "", "500", "false", "false"
+ "**num.retry**", "number of time we should try to inject a bulk into es", "", "3", "false", "false"
+ "batch.size", "The preferred number of Records to setField to the database in a single transaction", "", "1000", "false", "false"
+ "bulk.size", "bulk size in MB", "", "5", "false", "false"
+ "flush.interval", "flush interval in sec", "", "5", "false", "false"
+ "concurrent.requests", "setConcurrentRequests", "", "2", "false", "false"
+ "**cluster.name**", "Name of the ES cluster (for example, elasticsearch_brew). Defaults to 'elasticsearch'", "", "elasticsearch", "false", "false"
+ "**ping.timeout**", "The ping timeout used to determine when a node is unreachable. For example, 5s (5 seconds). If non-local recommended is 30s", "", "5s", "false", "false"
+ "**sampler.interval**", "How often to sample / ping the nodes listed and connected. For example, 5s (5 seconds). If non-local recommended is 30s.", "", "5s", "false", "false"
+ "username", "Username to access the Elasticsearch cluster", "", "null", "false", "false"
+ "password", "Password to access the Elasticsearch cluster", "", "null", "**true**", "false"
+ "shield.location", "Specifies the path to the JAR for the Elasticsearch Shield plugin. If the Elasticsearch cluster has been secured with the Shield plugin, then the Shield plugin JAR must also be available to this processor. Note: Do NOT place the Shield JAR into NiFi's lib/ directory, doing so will prevent the Shield plugin from being loaded.", "", "null", "false", "false"
+ "**hosts**", "ElasticSearch Hosts, which should be comma separated and colon for hostname/port host1:port,host2:port,.... For example testcluster:9300.", "", "null", "false", "false"
+ "ssl.context.service", "The SSL Context Service used to provide client certificate information for TLS/SSL connections. This service only applies if the Shield plugin is available.", "", "null", "false", "false"
+ "**charset**", "Specifies the character set of the document data.", "", "UTF-8", "false", "false"
+
+Extra informations
+__________________
+No additional information is provided
+
+----------
+
+.. _com.hurence.logisland.service.hbase.HBase_1_1_2_ClientService:
+
+HBase_1_1_2_ClientService
+-------------------------
+Implementation of HBaseClientService for HBase 1.1.2. This service can be configured by providing a comma-separated list of configuration files, or by specifying values for the other properties. If configuration files are provided, they will be loaded first, and the values of the additional properties will override the values from the configuration files. In addition, any user defined properties on the processor will also be passed to the HBase configuration.
+
+Module
+______
+com.hurence.logisland:logisland-service-hbase_1_1_2-client:1.1.2
+
+Class
+_____
+com.hurence.logisland.service.hbase.HBase_1_1_2_ClientService
+
+Tags
+____
+hbase, client
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values, and whether a property supports the `Expression Language `_ .
+
+.. csv-table:: allowable-values
+ :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+ :widths: 20,60,30,20,10,10
+ :escape: \
+
+ "hadoop.configuration.files", "Comma-separated list of Hadoop Configuration files, such as hbase-site.xml and core-site.xml for kerberos, including full paths to the files.", "", "null", "false", "false"
+ "zookeeper.quorum", "Comma-separated list of ZooKeeper hosts for HBase. Required if Hadoop Configuration Files are not provided.", "", "null", "false", "false"
+ "zookeeper.client.port", "The port on which ZooKeeper is accepting client connections. Required if Hadoop Configuration Files are not provided.", "", "null", "false", "false"
+ "zookeeper.znode.parent", "The ZooKeeper ZNode Parent value for HBase (example: /hbase). Required if Hadoop Configuration Files are not provided.", "", "null", "false", "false"
+ "hbase.client.retries", "The number of times the HBase client will retry connecting. Required if Hadoop Configuration Files are not provided.", "", "3", "false", "false"
+ "phoenix.client.jar.location", "The full path to the Phoenix client JAR. Required if Phoenix is installed on top of HBase.", "", "null", "false", "**true**"
+
+Dynamic Properties
+__________________
+Dynamic Properties allow the user to specify both the name and value of a property.
+
+.. csv-table:: dynamic-properties
+ :header: "Name","Value","Description","Allowable Values","Default Value","EL"
+ :widths: 20,20,40,40,20,10
+ :escape: \
+
+ "The name of an HBase configuration property.", "The value of the given HBase configuration property.", "These properties will be set on the HBase configuration after loading any provided configuration files.", "", "null", false
+
+Extra informations
+__________________
+No additional information is provided
+
+----------
+
+.. _com.hurence.logisland.service.influxdb.InfluxDBControllerService:
+
+InfluxDBControllerService
+-------------------------
+Provides a controller service that for the moment only allows to bulkput records into influxdb.
+
+Module
+______
+com.hurence.logisland:logisland-service-influxdb-client:1.1.2
+
+Class
+_____
+com.hurence.logisland.service.influxdb.InfluxDBControllerService
+
+Tags
+____
+influxdb, service, time series
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
+
+.. csv-table:: allowable-values
+ :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+ :widths: 20,60,30,20,10,10
+ :escape: \
+
+ "**influxdb.url**", "InfluxDB connection url", "", "null", "false", "false"
+ "influxdb.user", "The user name to use for authentication.", "", "null", "false", "false"
+ "**influxdb.database**", "InfluxDB database name", "", "null", "false", "false"
+ "influxdb.password", "The user password to use for authentication.", "", "null", "false", "false"
+ "influxdb.tags", "List of tags for each supported measurement. Syntax: :[,]...[;:,[]]... Example: cpu:core1,core2;mem:used : in this example, the cpu measurement has 2 tags: core1 and core2 and the mem measurement has 1 tag: used. This must only be set if configuration mode is explicit_tags_and_fields or all_as_fields_but_explicit_tags.", "", "null", "false", "false"
+ "influxdb.fields", "List of fields for each supported measurement. Syntax: :[,]...[;:,[]]... Example: cpu:core1,core2;mem:used : in this example, the cpu measurement has 2 fields: core1 and core2 and the mem measurement has 1 field: used. This must only be set if configuration mode is explicit_tags_and_fields or all_as_tags_but_explicit_fields.", "", "null", "false", "false"
+ "**influxdb.configuration_mode**", "Determines the way fields and tags are chosen from the logisland record. Possible values and meaning: explicit_tags_and_fields: only logisland record fields listed in influxdb.tags and influxdb.fields will be inserted into InfluxDB with the explicit type. all_as_fields: all available logisland record fields will be inserted into InfluxDB as fields. all_as_tags_but_explicit_fields: all available logisland record fields will be inserted into InfluxDB as tags except those listed in influxdb.fields that will be inserted into InfluxDB as fields. all_as_fields_but_explicit_tags: all available logisland record fields will be inserted into InfluxDB as fields except those listed in influxdb.tags that will be inserted into InfluxDB as tags", "explicit_tags_and_fields, all_as_fields, all_as_fields_but_explicit_tags, all_as_tags_but_explicit_fields", "null", "false", "false"
+ "influxdb.consistency_level", "Determines the consistency level used to write points into InfluxDB. Possible values are: ANY, ONE, QUORUMand ALL. Default value is ANY. This is only useful when using a clustered InfluxDB infrastructure.", "ANY, ONE, QUORUM, ALL", "ANY", "false", "false"
+ "influxdb.retention_policy", "Determines the name of the retention policy to use. Defaults to autogen. The defined retention policy must already be defined in the InfluxDB server.", "", "autogen", "false", "false"
+ "influxdb.timefield", "Time field for each supported measurement. Syntax: :,...[;:,]... With format being any constant defined in java.util.concurrent.TimeUnit enum: DAYS, HOURS, MICROSECONDS, MILLISECONDS, MINUTES, NANOSECONDS or SECONDS. Example: cpu:time,NANOSECONDS;mem:timeStamp,MILLISECONDS In this example: for the cpu measurement, the time for the influx DB point matching the record will be the value of the time field that represents nanoseconds. For the mem measurement, the time for the influx DB point matching the record will be the value of the timeStamp field that represents milliseconds. Any measurement for which the time field is not defined will use the content of the record_time technical field as the time (which is a number of milliseconds since epoch).", "", "null", "false", "false"
+ "batch.size", "The preferred number of Records to setField to the database in a single transaction", "", "1000", "false", "false"
+ "flush.interval", "flush interval in ms", "", "500", "false", "false"
+
+Extra informations
+__________________
+No additional information is provided
+
+----------
+
+.. _com.hurence.logisland.service.cache.LRUKeyValueCacheService:
+
+LRUKeyValueCacheService
+-----------------------
+A controller service for caching data by key value pair with LRU (last recently used) strategy. using LinkedHashMap
+
+Module
+______
+com.hurence.logisland:logisland-service-inmemory-cache:1.1.2
+
+Class
+_____
+com.hurence.logisland.service.cache.LRUKeyValueCacheService
+
+Tags
+____
+cache, service, key, value, pair, LRU
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
+
+.. csv-table:: allowable-values
+ :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+ :widths: 20,60,30,20,10,10
+ :escape: \
+
+ "cache.size", "The maximum number of element in the cache.", "", "16384", "false", "false"
+
+Extra informations
+__________________
+No additional information is provided
+
+----------
+
+.. _com.hurence.logisland.service.mongodb.MongoDBControllerService:
+
+MongoDBControllerService
+------------------------
+Provides a controller service that wraps most of the functionality of the MongoDB driver.
+
+Module
+______
+com.hurence.logisland:logisland-service-mongodb-client:1.1.2
+
+Class
+_____
+com.hurence.logisland.service.mongodb.MongoDBControllerService
+
+Tags
+____
+mongo, mongodb, service
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values, and whether a property supports the `Expression Language `_ .
+
+.. csv-table:: allowable-values
+ :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+ :widths: 20,60,30,20,10,10
+ :escape: \
+
+ "**mongo.uri**", "MongoURI, typically of the form: mongodb://host1[:port1][,host2[:port2],...]", "", "null", "false", "**true**"
+ "**mongo.db.name**", "The name of the database to use", "", "null", "false", "**true**"
+ "**mongo.collection.name**", "The name of the collection to use", "", "null", "false", "**true**"
+ "batch.size", "The preferred number of Records to setField to the database in a single transaction", "", "1000", "false", "false"
+ "bulk.size", "bulk size in MB", "", "5", "false", "false"
+ "mongo.bulk.mode", "Bulk mode (insert or upsert)", "insert (Insert records whose key must be unique), upsert (Insert records if not already existing or update the record if already existing)", "insert", "false", "false"
+ "flush.interval", "flush interval in ms", "", "500", "false", "false"
+ "**mongo.write.concern**", "The write concern to use", "ACKNOWLEDGED, UNACKNOWLEDGED, FSYNCED, JOURNALED, REPLICA_ACKNOWLEDGED, MAJORITY", "ACKNOWLEDGED", "false", "false"
+ "mongo.bulk.upsert.condition", "A custom condition for the bulk upsert (Filter for the bulkwrite). If not specified the standard condition is to match same id ('_id': data._id)", "", "${'{ \"_id\" :\"' + record_id + '\"}'}", "false", "**true**"
+
+Extra informations
+__________________
+No additional information is provided
+
+----------
+
+.. _com.hurence.logisland.redis.service.RedisKeyValueCacheService:
+
+RedisKeyValueCacheService
+-------------------------
+A controller service for caching records by key value pair with LRU (last recently used) strategy. using LinkedHashMap
+
+Module
+______
+com.hurence.logisland:logisland-service-redis:1.1.2
+
+Class
+_____
+com.hurence.logisland.redis.service.RedisKeyValueCacheService
+
+Tags
+____
+cache, service, key, value, pair, redis
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values, and whether a property is considered "sensitive"..
+
+.. csv-table:: allowable-values
+ :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+ :widths: 20,60,30,20,10,10
+ :escape: \
+
+ "**redis.mode**", "The type of Redis being communicated with - standalone, sentinel, or clustered.", "standalone (A single standalone Redis instance.), sentinel (Redis Sentinel which provides high-availability. Described further at https://redis.io/topics/sentinel), cluster (Clustered Redis which provides sharding and replication. Described further at https://redis.io/topics/cluster-spec)", "standalone", "false", "false"
+ "**connection.string**", "The connection string for Redis. In a standalone instance this value will be of the form hostname:port. In a sentinel instance this value will be the comma-separated list of sentinels, such as host1:port1,host2:port2,host3:port3. In a clustered instance this value will be the comma-separated list of cluster masters, such as host1:port,host2:port,host3:port.", "", "null", "false", "false"
+ "**database.index**", "The database index to be used by connections created from this connection pool. See the databases property in redis.conf, by default databases 0-15 will be available.", "", "0", "false", "false"
+ "**communication.timeout**", "The timeout to use when attempting to communicate with Redis.", "", "10 seconds", "false", "false"
+ "**cluster.max.redirects**", "The maximum number of redirects that can be performed when clustered.", "", "5", "false", "false"
+ "sentinel.master", "The name of the sentinel master, require when Mode is set to Sentinel", "", "null", "false", "false"
+ "password", "The password used to authenticate to the Redis server. See the requirepass property in redis.conf.", "", "null", "**true**", "false"
+ "**pool.max.total**", "The maximum number of connections that can be allocated by the pool (checked out to clients, or idle awaiting checkout). A negative value indicates that there is no limit.", "", "8", "false", "false"
+ "**pool.max.idle**", "The maximum number of idle connections that can be held in the pool, or a negative value if there is no limit.", "", "8", "false", "false"
+ "**pool.min.idle**", "The target for the minimum number of idle connections to maintain in the pool. If the configured value of Min Idle is greater than the configured value for Max Idle, then the value of Max Idle will be used instead.", "", "0", "false", "false"
+ "**pool.block.when.exhausted**", "Whether or not clients should block and wait when trying to obtain a connection from the pool when the pool has no available connections. Setting this to false means an error will occur immediately when a client requests a connection and none are available.", "true, false", "true", "false", "false"
+ "**pool.max.wait.time**", "The amount of time to wait for an available connection when Block When Exhausted is set to true.", "", "10 seconds", "false", "false"
+ "**pool.min.evictable.idle.time**", "The minimum amount of time an object may sit idle in the pool before it is eligible for eviction.", "", "60 seconds", "false", "false"
+ "**pool.time.between.eviction.runs**", "The amount of time between attempting to evict idle connections from the pool.", "", "30 seconds", "false", "false"
+ "**pool.num.tests.per.eviction.run**", "The number of connections to tests per eviction attempt. A negative value indicates to test all connections.", "", "-1", "false", "false"
+ "**pool.test.on.create**", "Whether or not connections should be tested upon creation.", "true, false", "false", "false", "false"
+ "**pool.test.on.borrow**", "Whether or not connections should be tested upon borrowing from the pool.", "true, false", "false", "false", "false"
+ "**pool.test.on.return**", "Whether or not connections should be tested upon returning to the pool.", "true, false", "false", "false", "false"
+ "**pool.test.while.idle**", "Whether or not connections should be tested while idle.", "true, false", "true", "false", "false"
+ "**record.recordSerializer**", "the way to serialize/deserialize the record", "com.hurence.logisland.serializer.KryoSerializer (serialize events as json blocs), com.hurence.logisland.serializer.JsonSerializer (serialize events as json blocs), com.hurence.logisland.serializer.AvroSerializer (serialize events as avro blocs), com.hurence.logisland.serializer.BytesArraySerializer (serialize events as byte arrays), com.hurence.logisland.serializer.KuraProtobufSerializer (serialize events as Kura protocol buffer), none (send events as bytes)", "com.hurence.logisland.serializer.JsonSerializer", "false", "false"
+
+Extra informations
+__________________
+No additional information is provided
+
+----------
+
+.. _com.hurence.logisland.service.solr.Solr_5_5_5_ClientService:
+
+Solr_5_5_5_ClientService
+------------------------
+Implementation of ElasticsearchClientService for Solr 5.5.5.
+
+Module
+______
+com.hurence.logisland:logisland-service-solr_5_5_5-client:1.1.2
+
+Class
+_____
+com.hurence.logisland.service.solr.Solr_5_5_5_ClientService
+
+Tags
+____
+solr, client
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
+
+.. csv-table:: allowable-values
+ :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+ :widths: 20,60,30,20,10,10
+ :escape: \
+
+ "batch.size", "The preferred number of Records to setField to the database in a single transaction", "", "1000", "false", "false"
+ "bulk.size", "bulk size in MB", "", "5", "false", "false"
+ "**solr.cloud**", "is slor cloud enabled", "", "false", "false", "false"
+ "**solr.collection**", "name of the collection to use", "", "null", "false", "false"
+ "**solr.connection.string**", "zookeeper quorum host1:2181,host2:2181 for solr cloud or http address of a solr core ", "", "localhost:8983/solr", "false", "false"
+ "solr.concurrent.requests", "setConcurrentRequests", "", "2", "false", "false"
+ "flush.interval", "flush interval in ms", "", "500", "false", "false"
+ "schema.update_timeout", "Schema update timeout interval in s", "", "15", "false", "false"
+
+Extra informations
+__________________
+No additional information is provided
+
+----------
+
+.. _com.hurence.logisland.service.solr.Solr_6_4_2_ChronixClientService:
+
+Solr_6_4_2_ChronixClientService
+-------------------------------
+Implementation of ChronixClientService for Solr 6 4 2
+
+Module
+______
+com.hurence.logisland:logisland-service-solr_chronix_6.4.2-client:1.1.2
+
+Class
+_____
+com.hurence.logisland.service.solr.Solr_6_4_2_ChronixClientService
+
+Tags
+____
+solr, client
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
+
+.. csv-table:: allowable-values
+ :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+ :widths: 20,60,30,20,10,10
+ :escape: \
+
+ "batch.size", "The preferred number of Records to setField to the database in a single transaction", "", "1000", "false", "false"
+ "**solr.cloud**", "is slor cloud enabled", "", "false", "false", "false"
+ "**solr.collection**", "name of the collection to use", "", "null", "false", "false"
+ "**solr.connection.string**", "zookeeper quorum host1:2181,host2:2181 for solr cloud or http address of a solr core ", "", "localhost:8983/solr", "false", "false"
+ "flush.interval", "flush interval in ms", "", "500", "false", "false"
+ "group.by", "The field the chunk should be grouped by", "", "", "false", "false"
+
+Extra informations
+__________________
+No additional information is provided
+
+----------
+
+.. _com.hurence.logisland.service.solr.Solr_6_6_2_ClientService:
+
+Solr_6_6_2_ClientService
+------------------------
+Implementation of ElasticsearchClientService for Solr 5.5.5.
+
+Module
+______
+com.hurence.logisland:logisland-service-solr_6_6_2-client:1.1.2
+
+Class
+_____
+com.hurence.logisland.service.solr.Solr_6_6_2_ClientService
+
+Tags
+____
+solr, client
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
+
+.. csv-table:: allowable-values
+ :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+ :widths: 20,60,30,20,10,10
+ :escape: \
+
+ "batch.size", "The preferred number of Records to setField to the database in a single transaction", "", "1000", "false", "false"
+ "bulk.size", "bulk size in MB", "", "5", "false", "false"
+ "**solr.cloud**", "is slor cloud enabled", "", "false", "false", "false"
+ "**solr.collection**", "name of the collection to use", "", "null", "false", "false"
+ "**solr.connection.string**", "zookeeper quorum host1:2181,host2:2181 for solr cloud or http address of a solr core ", "", "localhost:8983/solr", "false", "false"
+ "solr.concurrent.requests", "setConcurrentRequests", "", "2", "false", "false"
+ "flush.interval", "flush interval in ms", "", "500", "false", "false"
+ "schema.update_timeout", "Schema update timeout interval in s", "", "15", "false", "false"
+
+Extra informations
+__________________
+No additional information is provided
+
+----------
+
+.. _com.hurence.logisland.service.iptogeo.maxmind.MaxmindIpToGeoService:
+
+MaxmindIpToGeoService
+---------------------
+Implementation of the IP 2 GEO Service using maxmind lite db file
+
+Module
+______
+com.hurence.logisland:logisland-service-ip-to-geo-maxmind:1.1.2
+
+Class
+_____
+com.hurence.logisland.service.iptogeo.maxmind.MaxmindIpToGeoService
+
+Tags
+____
+ip, service, geo, maxmind
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
+
+.. csv-table:: allowable-values
+ :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+ :widths: 20,60,30,20,10,10
+ :escape: \
+
+ "maxmind.database.uri", "Path to the Maxmind Geo Enrichment Database File.", "", "null", "false", "false"
+ "maxmind.database.path", "Local Path to the Maxmind Geo Enrichment Database File.", "", "null", "false", "false"
+ "locale", "Locale to use for geo information. Defaults to 'en'.", "", "en", "false", "false"
+ "lookup.time", "Should the additional lookup_micros field be returned or not.", "", "false", "false", "false"
+
+Extra informations
+__________________
+No additional information is provided
+
+----------
+
+.. _com.hurence.logisland.service.cache.CSVKeyValueCacheService:
+
+CSVKeyValueCacheService
+-----------------------
+A cache that store csv lines as records loaded from a file
+
+Module
+______
+com.hurence.logisland:logisland-service-inmemory-cache:1.1.2
+
+Class
+_____
+com.hurence.logisland.service.cache.CSVKeyValueCacheService
+
+Tags
+____
+csv, service, cache
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
+
+.. csv-table:: allowable-values
+ :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+ :widths: 20,60,30,20,10,10
+ :escape: \
+
+ "**csv.format**", "a configuration for loading csv", "default (Standard comma separated format, as for RFC4180 but allowing empty lines. Settings are: withDelimiter(',') withQuote('\"') withRecordSeparator(\"\r\n\") withIgnoreEmptyLines(true)), excel (Excel file format (using a comma as the value delimiter). Note that the actual value delimiter used by Excel is locale dependent, it might be necessary to customize this format to accommodate to your regional settings. withDelimiter(',') withQuote('\"') withRecordSeparator(\"\r\n\") withIgnoreEmptyLines(false) withAllowMissingColumnNames(true)), excel_fr (Excel file format (using a comma as the value delimiter). Note that the actual value delimiter used by Excel is locale dependent, it might be necessary to customize this format to accommodate to your regional settings. withDelimiter(';') withQuote('\"') withRecordSeparator(\"\r\n\") withIgnoreEmptyLines(false) withAllowMissingColumnNames(true)), mysql (Default MySQL format used by the SELECT INTO OUTFILE and LOAD DATA INFILE operations.This is a tab-delimited format with a LF character as the line separator. Values are not quoted and special characters are escaped with '\'. The default NULL string is \"\\N\". Settings are: withDelimiter('\t') withQuote(null) withRecordSeparator('\n') withIgnoreEmptyLines(false) withEscape('\\') withNullString(\"\\N\") withQuoteMode(QuoteMode.ALL_NON_NULL)), rfc4180 (Comma separated format as defined by RFC 4180. Settings are: withDelimiter(',') withQuote('\"') withRecordSeparator(\"\r\n\") withIgnoreEmptyLines(false)), tdf (Tab-delimited format. Settings are: withDelimiter('\t') withQuote('\"') withRecordSeparator(\"\r\n\") withIgnoreSurroundingSpaces(true))", "default", "false", "false"
+ "csv.header", "comma separated header values", "", "null", "false", "false"
+ "csv.file.uri", "Path to the CSV File.", "", "null", "false", "false"
+ "csv.file.path", "Local Path to the CSV File.", "", "null", "false", "false"
+ "**row.key**", "th primary key of this db", "", "null", "false", "false"
+ "cache.size", "The maximum number of element in the cache.", "", "16384", "false", "false"
+ "first.line.header", "csv headers grabbed from first line", "", "null", "false", "false"
+ "encoding.charset", "charset", "", "UTF-8", "false", "false"
+
+Extra informations
+__________________
+No additional information is provided
+
+----------
+
+.. _com.hurence.logisland.service.cassandra.CassandraControllerService:
+
+CassandraControllerService
+--------------------------
+Provides a controller service that for the moment only allows to bulkput records into cassandra.
+
+Module
+______
+com.hurence.logisland:logisland-service-cassandra-client:1.1.2
+
+Class
+_____
+com.hurence.logisland.service.cassandra.CassandraControllerService
+
+Tags
+____
+cassandra, service
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
+
+.. csv-table:: allowable-values
+ :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+ :widths: 20,60,30,20,10,10
+ :escape: \
+
+ "**cassandra.hosts**", "Cassandra cluster hosts as a comma separated value list", "", "null", "false", "false"
+ "**cassandra.port**", "Cassandra cluster port", "", "null", "false", "false"
+ "cassandra.with-ssl", "If this property is true, use SSL. Default is no SSL (false).", "", "false", "false", "false"
+ "cassandra.with-credentials", "If this property is true, use credentials. Default is no credentials (false).", "", "false", "false", "false"
+ "cassandra.credentials.user", "The user name to use for authentication. cassandra.with-credentials must be true for that property to be used.", "", "null", "false", "false"
+ "cassandra.credentials.password", "The user password to use for authentication. cassandra.with-credentials must be true for that property to be used.", "", "null", "false", "false"
+ "batch.size", "The preferred number of Records to setField to the database in a single transaction", "", "1000", "false", "false"
+ "flush.interval", "flush interval in ms", "", "500", "false", "false"
+
+Extra informations
+__________________
+No additional information is provided
+
+----------
+
+.. _com.hurence.logisland.service.elasticsearch.Elasticsearch_2_4_0_ClientService:
+
+Elasticsearch_2_4_0_ClientService
+---------------------------------
+Implementation of ElasticsearchClientService for Elasticsearch 2.4.0.
+
+Module
+______
+com.hurence.logisland:logisland-service-elasticsearch_2_4_0-client:1.1.2
+
+Class
+_____
+com.hurence.logisland.service.elasticsearch.Elasticsearch_2_4_0_ClientService
+
+Tags
+____
+elasticsearch, client
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values, and whether a property is considered "sensitive"..
+
+.. csv-table:: allowable-values
+ :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+ :widths: 20,60,30,20,10,10
+ :escape: \
+
+ "**backoff.policy**", "strategy for retrying to execute requests in bulkRequest", "noBackoff (when a request fail there won't be any retry.), constantBackoff (wait a fixed amount of time between retries, using user put retry number and throttling delay), exponentialBackoff (time waited between retries grow exponentially, using user put retry number and throttling delay), defaultExponentialBackoff (time waited between retries grow exponentially, using es default parameters)", "defaultExponentialBackoff", "false", "false"
+ "**throttling.delay**", "number of time we should wait between each retry (in milliseconds)", "", "500", "false", "false"
+ "**num.retry**", "number of time we should try to inject a bulk into es", "", "3", "false", "false"
+ "batch.size", "The preferred number of Records to setField to the database in a single transaction", "", "1000", "false", "false"
+ "bulk.size", "bulk size in MB", "", "5", "false", "false"
+ "flush.interval", "flush interval in sec", "", "5", "false", "false"
+ "concurrent.requests", "setConcurrentRequests", "", "2", "false", "false"
+ "**cluster.name**", "Name of the ES cluster (for example, elasticsearch_brew). Defaults to 'elasticsearch'", "", "elasticsearch", "false", "false"
+ "**ping.timeout**", "The ping timeout used to determine when a node is unreachable. For example, 5s (5 seconds). If non-local recommended is 30s", "", "5s", "false", "false"
+ "**sampler.interval**", "How often to sample / ping the nodes listed and connected. For example, 5s (5 seconds). If non-local recommended is 30s.", "", "5s", "false", "false"
+ "username", "Username to access the Elasticsearch cluster", "", "null", "false", "false"
+ "password", "Password to access the Elasticsearch cluster", "", "null", "**true**", "false"
+ "shield.location", "Specifies the path to the JAR for the Elasticsearch Shield plugin. If the Elasticsearch cluster has been secured with the Shield plugin, then the Shield plugin JAR must also be available to this processor. Note: Do NOT place the Shield JAR into NiFi's lib/ directory, doing so will prevent the Shield plugin from being loaded.", "", "null", "false", "false"
+ "**hosts**", "ElasticSearch Hosts, which should be comma separated and colon for hostname/port host1:port,host2:port,.... For example testcluster:9300.", "", "null", "false", "false"
+ "ssl.context.service", "The SSL Context Service used to provide client certificate information for TLS/SSL connections. This service only applies if the Shield plugin is available.", "", "null", "false", "false"
+ "**charset**", "Specifies the character set of the document data.", "", "UTF-8", "false", "false"
+
+Extra informations
+__________________
+No additional information is provided
+
+----------
+
+.. _com.hurence.logisland.service.elasticsearch.Elasticsearch_5_4_0_ClientService:
+
+Elasticsearch_5_4_0_ClientService
+---------------------------------
+Implementation of ElasticsearchClientService for Elasticsearch 5.4.0.
+
+Module
+______
+com.hurence.logisland:logisland-service-elasticsearch_5_4_0-client:1.1.2
+
+Class
+_____
+com.hurence.logisland.service.elasticsearch.Elasticsearch_5_4_0_ClientService
+
+Tags
+____
+elasticsearch, client
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values, and whether a property is considered "sensitive"..
+
+.. csv-table:: allowable-values
+ :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+ :widths: 20,60,30,20,10,10
+ :escape: \
+
+ "**backoff.policy**", "strategy for retrying to execute requests in bulkRequest", "noBackoff (when a request fail there won't be any retry.), constantBackoff (wait a fixed amount of time between retries, using user put retry number and throttling delay), exponentialBackoff (time waited between retries grow exponentially, using user put retry number and throttling delay), defaultExponentialBackoff (time waited between retries grow exponentially, using es default parameters)", "defaultExponentialBackoff", "false", "false"
+ "**throttling.delay**", "number of time we should wait between each retry (in milliseconds)", "", "500", "false", "false"
+ "**num.retry**", "number of time we should try to inject a bulk into es", "", "3", "false", "false"
+ "batch.size", "The preferred number of Records to setField to the database in a single transaction", "", "1000", "false", "false"
+ "bulk.size", "bulk size in MB", "", "5", "false", "false"
+ "flush.interval", "flush interval in sec", "", "5", "false", "false"
+ "concurrent.requests", "setConcurrentRequests", "", "2", "false", "false"
+ "**cluster.name**", "Name of the ES cluster (for example, elasticsearch_brew). Defaults to 'elasticsearch'", "", "elasticsearch", "false", "false"
+ "**ping.timeout**", "The ping timeout used to determine when a node is unreachable. For example, 5s (5 seconds). If non-local recommended is 30s", "", "5s", "false", "false"
+ "**sampler.interval**", "How often to sample / ping the nodes listed and connected. For example, 5s (5 seconds). If non-local recommended is 30s.", "", "5s", "false", "false"
+ "username", "Username to access the Elasticsearch cluster", "", "null", "false", "false"
+ "password", "Password to access the Elasticsearch cluster", "", "null", "**true**", "false"
+ "shield.location", "Specifies the path to the JAR for the Elasticsearch Shield plugin. If the Elasticsearch cluster has been secured with the Shield plugin, then the Shield plugin JAR must also be available to this processor. Note: Do NOT place the Shield JAR into NiFi's lib/ directory, doing so will prevent the Shield plugin from being loaded.", "", "null", "false", "false"
+ "**hosts**", "ElasticSearch Hosts, which should be comma separated and colon for hostname/port host1:port,host2:port,.... For example testcluster:9300.", "", "null", "false", "false"
+ "ssl.context.service", "The SSL Context Service used to provide client certificate information for TLS/SSL connections. This service only applies if the Shield plugin is available.", "", "null", "false", "false"
+ "**charset**", "Specifies the character set of the document data.", "", "UTF-8", "false", "false"
+
+Extra informations
+__________________
+No additional information is provided
+
+----------
+
+.. _com.hurence.logisland.service.hbase.HBase_1_1_2_ClientService:
+
+HBase_1_1_2_ClientService
+-------------------------
+Implementation of HBaseClientService for HBase 1.1.2. This service can be configured by providing a comma-separated list of configuration files, or by specifying values for the other properties. If configuration files are provided, they will be loaded first, and the values of the additional properties will override the values from the configuration files. In addition, any user defined properties on the processor will also be passed to the HBase configuration.
+
+Module
+______
+com.hurence.logisland:logisland-service-hbase_1_1_2-client:1.1.2
+
+Class
+_____
+com.hurence.logisland.service.hbase.HBase_1_1_2_ClientService
+
+Tags
+____
+hbase, client
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values, and whether a property supports the `Expression Language `_ .
+
+.. csv-table:: allowable-values
+ :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+ :widths: 20,60,30,20,10,10
+ :escape: \
+
+ "hadoop.configuration.files", "Comma-separated list of Hadoop Configuration files, such as hbase-site.xml and core-site.xml for kerberos, including full paths to the files.", "", "null", "false", "false"
+ "zookeeper.quorum", "Comma-separated list of ZooKeeper hosts for HBase. Required if Hadoop Configuration Files are not provided.", "", "null", "false", "false"
+ "zookeeper.client.port", "The port on which ZooKeeper is accepting client connections. Required if Hadoop Configuration Files are not provided.", "", "null", "false", "false"
+ "zookeeper.znode.parent", "The ZooKeeper ZNode Parent value for HBase (example: /hbase). Required if Hadoop Configuration Files are not provided.", "", "null", "false", "false"
+ "hbase.client.retries", "The number of times the HBase client will retry connecting. Required if Hadoop Configuration Files are not provided.", "", "3", "false", "false"
+ "phoenix.client.jar.location", "The full path to the Phoenix client JAR. Required if Phoenix is installed on top of HBase.", "", "null", "false", "**true**"
+
+Dynamic Properties
+__________________
+Dynamic Properties allow the user to specify both the name and value of a property.
+
+.. csv-table:: dynamic-properties
+ :header: "Name","Value","Description","Allowable Values","Default Value","EL"
+ :widths: 20,20,40,40,20,10
+ :escape: \
+
+ "The name of an HBase configuration property.", "The value of the given HBase configuration property.", "These properties will be set on the HBase configuration after loading any provided configuration files.", "", "null", false
+
+Extra informations
+__________________
+No additional information is provided
+
+----------
+
+.. _com.hurence.logisland.service.influxdb.InfluxDBControllerService:
+
+InfluxDBControllerService
+-------------------------
+Provides a controller service that for the moment only allows to bulkput records into influxdb.
+
+Module
+______
+com.hurence.logisland:logisland-service-influxdb-client:1.1.2
+
+Class
+_____
+com.hurence.logisland.service.influxdb.InfluxDBControllerService
+
+Tags
+____
+influxdb, service, time series
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
+
+.. csv-table:: allowable-values
+ :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+ :widths: 20,60,30,20,10,10
+ :escape: \
+
+ "**influxdb.url**", "InfluxDB connection url", "", "null", "false", "false"
+ "influxdb.user", "The user name to use for authentication.", "", "null", "false", "false"
+ "**influxdb.database**", "InfluxDB database name", "", "null", "false", "false"
+ "influxdb.password", "The user password to use for authentication.", "", "null", "false", "false"
+ "influxdb.tags", "List of tags for each supported measurement. Syntax: :[,]...[;:,[]]... Example: cpu:core1,core2;mem:used : in this example, the cpu measurement has 2 tags: core1 and core2 and the mem measurement has 1 tag: used. This must only be set if configuration mode is explicit_tags_and_fields or all_as_fields_but_explicit_tags.", "", "null", "false", "false"
+ "influxdb.fields", "List of fields for each supported measurement. Syntax: :[,]...[;:,[]]... Example: cpu:core1,core2;mem:used : in this example, the cpu measurement has 2 fields: core1 and core2 and the mem measurement has 1 field: used. This must only be set if configuration mode is explicit_tags_and_fields or all_as_tags_but_explicit_fields.", "", "null", "false", "false"
+ "**influxdb.configuration_mode**", "Determines the way fields and tags are chosen from the logisland record. Possible values and meaning: explicit_tags_and_fields: only logisland record fields listed in influxdb.tags and influxdb.fields will be inserted into InfluxDB with the explicit type. all_as_fields: all available logisland record fields will be inserted into InfluxDB as fields. all_as_tags_but_explicit_fields: all available logisland record fields will be inserted into InfluxDB as tags except those listed in influxdb.fields that will be inserted into InfluxDB as fields. all_as_fields_but_explicit_tags: all available logisland record fields will be inserted into InfluxDB as fields except those listed in influxdb.tags that will be inserted into InfluxDB as tags", "explicit_tags_and_fields, all_as_fields, all_as_fields_but_explicit_tags, all_as_tags_but_explicit_fields", "null", "false", "false"
+ "influxdb.consistency_level", "Determines the consistency level used to write points into InfluxDB. Possible values are: ANY, ONE, QUORUMand ALL. Default value is ANY. This is only useful when using a clustered InfluxDB infrastructure.", "ANY, ONE, QUORUM, ALL", "ANY", "false", "false"
+ "influxdb.retention_policy", "Determines the name of the retention policy to use. Defaults to autogen. The defined retention policy must already be defined in the InfluxDB server.", "", "autogen", "false", "false"
+ "influxdb.timefield", "Time field for each supported measurement. Syntax: :,...[;:,]... With format being any constant defined in java.util.concurrent.TimeUnit enum: DAYS, HOURS, MICROSECONDS, MILLISECONDS, MINUTES, NANOSECONDS or SECONDS. Example: cpu:time,NANOSECONDS;mem:timeStamp,MILLISECONDS In this example: for the cpu measurement, the time for the influx DB point matching the record will be the value of the time field that represents nanoseconds. For the mem measurement, the time for the influx DB point matching the record will be the value of the timeStamp field that represents milliseconds. Any measurement for which the time field is not defined will use the content of the record_time technical field as the time (which is a number of milliseconds since epoch).", "", "null", "false", "false"
+ "batch.size", "The preferred number of Records to setField to the database in a single transaction", "", "1000", "false", "false"
+ "flush.interval", "flush interval in ms", "", "500", "false", "false"
+
+Extra informations
+__________________
+No additional information is provided
+
+----------
+
+.. _com.hurence.logisland.service.cache.LRUKeyValueCacheService:
+
+LRUKeyValueCacheService
+-----------------------
+A controller service for caching data by key value pair with LRU (last recently used) strategy. using LinkedHashMap
+
+Module
+______
+com.hurence.logisland:logisland-service-inmemory-cache:1.1.2
+
+Class
+_____
+com.hurence.logisland.service.cache.LRUKeyValueCacheService
+
+Tags
+____
+cache, service, key, value, pair, LRU
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
+
+.. csv-table:: allowable-values
+ :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+ :widths: 20,60,30,20,10,10
+ :escape: \
+
+ "cache.size", "The maximum number of element in the cache.", "", "16384", "false", "false"
+
+Extra informations
+__________________
+No additional information is provided
+
+----------
+
+.. _com.hurence.logisland.service.mongodb.MongoDBControllerService:
+
+MongoDBControllerService
+------------------------
+Provides a controller service that wraps most of the functionality of the MongoDB driver.
+
+Module
+______
+com.hurence.logisland:logisland-service-mongodb-client:1.1.2
+
+Class
+_____
+com.hurence.logisland.service.mongodb.MongoDBControllerService
+
+Tags
+____
+mongo, mongodb, service
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values, and whether a property supports the `Expression Language `_ .
+
+.. csv-table:: allowable-values
+ :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+ :widths: 20,60,30,20,10,10
+ :escape: \
+
+ "**mongo.uri**", "MongoURI, typically of the form: mongodb://host1[:port1][,host2[:port2],...]", "", "null", "false", "**true**"
+ "**mongo.db.name**", "The name of the database to use", "", "null", "false", "**true**"
+ "**mongo.collection.name**", "The name of the collection to use", "", "null", "false", "**true**"
+ "batch.size", "The preferred number of Records to setField to the database in a single transaction", "", "1000", "false", "false"
+ "bulk.size", "bulk size in MB", "", "5", "false", "false"
+ "mongo.bulk.mode", "Bulk mode (insert or upsert)", "insert (Insert records whose key must be unique), upsert (Insert records if not already existing or update the record if already existing)", "insert", "false", "false"
+ "flush.interval", "flush interval in ms", "", "500", "false", "false"
+ "**mongo.write.concern**", "The write concern to use", "ACKNOWLEDGED, UNACKNOWLEDGED, FSYNCED, JOURNALED, REPLICA_ACKNOWLEDGED, MAJORITY", "ACKNOWLEDGED", "false", "false"
+ "mongo.bulk.upsert.condition", "A custom condition for the bulk upsert (Filter for the bulkwrite). If not specified the standard condition is to match same id ('_id': data._id)", "", "${'{ \"_id\" :\"' + record_id + '\"}'}", "false", "**true**"
+
+Extra informations
+__________________
+No additional information is provided
+
+----------
+
+.. _com.hurence.logisland.redis.service.RedisKeyValueCacheService:
+
+RedisKeyValueCacheService
+-------------------------
+A controller service for caching records by key value pair with LRU (last recently used) strategy. using LinkedHashMap
+
+Module
+______
+com.hurence.logisland:logisland-service-redis:1.1.2
+
+Class
+_____
+com.hurence.logisland.redis.service.RedisKeyValueCacheService
+
+Tags
+____
+cache, service, key, value, pair, redis
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values, and whether a property is considered "sensitive"..
+
+.. csv-table:: allowable-values
+ :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+ :widths: 20,60,30,20,10,10
+ :escape: \
+
+ "**redis.mode**", "The type of Redis being communicated with - standalone, sentinel, or clustered.", "standalone (A single standalone Redis instance.), sentinel (Redis Sentinel which provides high-availability. Described further at https://redis.io/topics/sentinel), cluster (Clustered Redis which provides sharding and replication. Described further at https://redis.io/topics/cluster-spec)", "standalone", "false", "false"
+ "**connection.string**", "The connection string for Redis. In a standalone instance this value will be of the form hostname:port. In a sentinel instance this value will be the comma-separated list of sentinels, such as host1:port1,host2:port2,host3:port3. In a clustered instance this value will be the comma-separated list of cluster masters, such as host1:port,host2:port,host3:port.", "", "null", "false", "false"
+ "**database.index**", "The database index to be used by connections created from this connection pool. See the databases property in redis.conf, by default databases 0-15 will be available.", "", "0", "false", "false"
+ "**communication.timeout**", "The timeout to use when attempting to communicate with Redis.", "", "10 seconds", "false", "false"
+ "**cluster.max.redirects**", "The maximum number of redirects that can be performed when clustered.", "", "5", "false", "false"
+ "sentinel.master", "The name of the sentinel master, require when Mode is set to Sentinel", "", "null", "false", "false"
+ "password", "The password used to authenticate to the Redis server. See the requirepass property in redis.conf.", "", "null", "**true**", "false"
+ "**pool.max.total**", "The maximum number of connections that can be allocated by the pool (checked out to clients, or idle awaiting checkout). A negative value indicates that there is no limit.", "", "8", "false", "false"
+ "**pool.max.idle**", "The maximum number of idle connections that can be held in the pool, or a negative value if there is no limit.", "", "8", "false", "false"
+ "**pool.min.idle**", "The target for the minimum number of idle connections to maintain in the pool. If the configured value of Min Idle is greater than the configured value for Max Idle, then the value of Max Idle will be used instead.", "", "0", "false", "false"
+ "**pool.block.when.exhausted**", "Whether or not clients should block and wait when trying to obtain a connection from the pool when the pool has no available connections. Setting this to false means an error will occur immediately when a client requests a connection and none are available.", "true, false", "true", "false", "false"
+ "**pool.max.wait.time**", "The amount of time to wait for an available connection when Block When Exhausted is set to true.", "", "10 seconds", "false", "false"
+ "**pool.min.evictable.idle.time**", "The minimum amount of time an object may sit idle in the pool before it is eligible for eviction.", "", "60 seconds", "false", "false"
+ "**pool.time.between.eviction.runs**", "The amount of time between attempting to evict idle connections from the pool.", "", "30 seconds", "false", "false"
+ "**pool.num.tests.per.eviction.run**", "The number of connections to tests per eviction attempt. A negative value indicates to test all connections.", "", "-1", "false", "false"
+ "**pool.test.on.create**", "Whether or not connections should be tested upon creation.", "true, false", "false", "false", "false"
+ "**pool.test.on.borrow**", "Whether or not connections should be tested upon borrowing from the pool.", "true, false", "false", "false", "false"
+ "**pool.test.on.return**", "Whether or not connections should be tested upon returning to the pool.", "true, false", "false", "false", "false"
+ "**pool.test.while.idle**", "Whether or not connections should be tested while idle.", "true, false", "true", "false", "false"
+ "**record.recordSerializer**", "the way to serialize/deserialize the record", "com.hurence.logisland.serializer.KryoSerializer (serialize events as json blocs), com.hurence.logisland.serializer.JsonSerializer (serialize events as json blocs), com.hurence.logisland.serializer.AvroSerializer (serialize events as avro blocs), com.hurence.logisland.serializer.BytesArraySerializer (serialize events as byte arrays), com.hurence.logisland.serializer.KuraProtobufSerializer (serialize events as Kura protocol buffer), none (send events as bytes)", "com.hurence.logisland.serializer.JsonSerializer", "false", "false"
+
+Extra informations
+__________________
+No additional information is provided
+
+----------
+
+.. _com.hurence.logisland.service.solr.Solr_5_5_5_ClientService:
+
+Solr_5_5_5_ClientService
+------------------------
+Implementation of ElasticsearchClientService for Solr 5.5.5.
+
+Module
+______
+com.hurence.logisland:logisland-service-solr_5_5_5-client:1.1.2
+
+Class
+_____
+com.hurence.logisland.service.solr.Solr_5_5_5_ClientService
+
+Tags
+____
+solr, client
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
+
+.. csv-table:: allowable-values
+ :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+ :widths: 20,60,30,20,10,10
+ :escape: \
+
+ "batch.size", "The preferred number of Records to setField to the database in a single transaction", "", "1000", "false", "false"
+ "bulk.size", "bulk size in MB", "", "5", "false", "false"
+ "**solr.cloud**", "is slor cloud enabled", "", "false", "false", "false"
+ "**solr.collection**", "name of the collection to use", "", "null", "false", "false"
+ "**solr.connection.string**", "zookeeper quorum host1:2181,host2:2181 for solr cloud or http address of a solr core ", "", "localhost:8983/solr", "false", "false"
+ "solr.concurrent.requests", "setConcurrentRequests", "", "2", "false", "false"
+ "flush.interval", "flush interval in ms", "", "500", "false", "false"
+ "schema.update_timeout", "Schema update timeout interval in s", "", "15", "false", "false"
+
+Extra informations
+__________________
+No additional information is provided
+
+----------
+
+.. _com.hurence.logisland.service.solr.Solr_6_4_2_ChronixClientService:
+
+Solr_6_4_2_ChronixClientService
+-------------------------------
+Implementation of ChronixClientService for Solr 6 4 2
+
+Module
+______
+com.hurence.logisland:logisland-service-solr_chronix_6.4.2-client:1.1.2
+
+Class
+_____
+com.hurence.logisland.service.solr.Solr_6_4_2_ChronixClientService
+
+Tags
+____
+solr, client
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
+
+.. csv-table:: allowable-values
+ :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+ :widths: 20,60,30,20,10,10
+ :escape: \
+
+ "batch.size", "The preferred number of Records to setField to the database in a single transaction", "", "1000", "false", "false"
+ "**solr.cloud**", "is slor cloud enabled", "", "false", "false", "false"
+ "**solr.collection**", "name of the collection to use", "", "null", "false", "false"
+ "**solr.connection.string**", "zookeeper quorum host1:2181,host2:2181 for solr cloud or http address of a solr core ", "", "localhost:8983/solr", "false", "false"
+ "flush.interval", "flush interval in ms", "", "500", "false", "false"
+ "group.by", "The field the chunk should be grouped by", "", "", "false", "false"
+
+Extra informations
+__________________
+No additional information is provided
+
+----------
+
+.. _com.hurence.logisland.service.solr.Solr_6_6_2_ClientService:
+
+Solr_6_6_2_ClientService
+------------------------
+Implementation of ElasticsearchClientService for Solr 5.5.5.
+
+Module
+______
+com.hurence.logisland:logisland-service-solr_6_6_2-client:1.1.2
+
+Class
+_____
+com.hurence.logisland.service.solr.Solr_6_6_2_ClientService
+
+Tags
+____
+solr, client
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
+
+.. csv-table:: allowable-values
+ :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+ :widths: 20,60,30,20,10,10
+ :escape: \
+
+ "batch.size", "The preferred number of Records to setField to the database in a single transaction", "", "1000", "false", "false"
+ "bulk.size", "bulk size in MB", "", "5", "false", "false"
+ "**solr.cloud**", "is slor cloud enabled", "", "false", "false", "false"
+ "**solr.collection**", "name of the collection to use", "", "null", "false", "false"
+ "**solr.connection.string**", "zookeeper quorum host1:2181,host2:2181 for solr cloud or http address of a solr core ", "", "localhost:8983/solr", "false", "false"
+ "solr.concurrent.requests", "setConcurrentRequests", "", "2", "false", "false"
+ "flush.interval", "flush interval in ms", "", "500", "false", "false"
+ "schema.update_timeout", "Schema update timeout interval in s", "", "15", "false", "false"
+
+Extra informations
+__________________
+No additional information is provided
+
+----------
+
+.. _com.hurence.logisland.service.iptogeo.maxmind.MaxmindIpToGeoService:
+
+MaxmindIpToGeoService
+---------------------
+Implementation of the IP 2 GEO Service using maxmind lite db file
+
+Module
+______
+com.hurence.logisland:logisland-service-ip-to-geo-maxmind:1.1.2
+
+Class
+_____
+com.hurence.logisland.service.iptogeo.maxmind.MaxmindIpToGeoService
+
+Tags
+____
+ip, service, geo, maxmind
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
+
+.. csv-table:: allowable-values
+ :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+ :widths: 20,60,30,20,10,10
+ :escape: \
+
+ "maxmind.database.uri", "Path to the Maxmind Geo Enrichment Database File.", "", "null", "false", "false"
+ "maxmind.database.path", "Local Path to the Maxmind Geo Enrichment Database File.", "", "null", "false", "false"
+ "locale", "Locale to use for geo information. Defaults to 'en'.", "", "en", "false", "false"
+ "lookup.time", "Should the additional lookup_micros field be returned or not.", "", "false", "false", "false"
+
+Extra informations
+__________________
+No additional information is provided
+
+----------
+
+.. _com.hurence.logisland.service.cache.CSVKeyValueCacheService:
+
+CSVKeyValueCacheService
+-----------------------
+A cache that store csv lines as records loaded from a file
+
+Module
+______
+com.hurence.logisland:logisland-service-inmemory-cache:1.1.2
+
+Class
+_____
+com.hurence.logisland.service.cache.CSVKeyValueCacheService
+
+Tags
+____
+csv, service, cache
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
+
+.. csv-table:: allowable-values
+ :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+ :widths: 20,60,30,20,10,10
+ :escape: \
+
+ "**csv.format**", "a configuration for loading csv", "default (Standard comma separated format, as for RFC4180 but allowing empty lines. Settings are: withDelimiter(',') withQuote('\"') withRecordSeparator(\"\r\n\") withIgnoreEmptyLines(true)), excel (Excel file format (using a comma as the value delimiter). Note that the actual value delimiter used by Excel is locale dependent, it might be necessary to customize this format to accommodate to your regional settings. withDelimiter(',') withQuote('\"') withRecordSeparator(\"\r\n\") withIgnoreEmptyLines(false) withAllowMissingColumnNames(true)), excel_fr (Excel file format (using a comma as the value delimiter). Note that the actual value delimiter used by Excel is locale dependent, it might be necessary to customize this format to accommodate to your regional settings. withDelimiter(';') withQuote('\"') withRecordSeparator(\"\r\n\") withIgnoreEmptyLines(false) withAllowMissingColumnNames(true)), mysql (Default MySQL format used by the SELECT INTO OUTFILE and LOAD DATA INFILE operations.This is a tab-delimited format with a LF character as the line separator. Values are not quoted and special characters are escaped with '\'. The default NULL string is \"\\N\". Settings are: withDelimiter('\t') withQuote(null) withRecordSeparator('\n') withIgnoreEmptyLines(false) withEscape('\\') withNullString(\"\\N\") withQuoteMode(QuoteMode.ALL_NON_NULL)), rfc4180 (Comma separated format as defined by RFC 4180. Settings are: withDelimiter(',') withQuote('\"') withRecordSeparator(\"\r\n\") withIgnoreEmptyLines(false)), tdf (Tab-delimited format. Settings are: withDelimiter('\t') withQuote('\"') withRecordSeparator(\"\r\n\") withIgnoreSurroundingSpaces(true))", "default", "false", "false"
+ "csv.header", "comma separated header values", "", "null", "false", "false"
+ "csv.file.uri", "Path to the CSV File.", "", "null", "false", "false"
+ "csv.file.path", "Local Path to the CSV File.", "", "null", "false", "false"
+ "**row.key**", "th primary key of this db", "", "null", "false", "false"
+ "cache.size", "The maximum number of element in the cache.", "", "16384", "false", "false"
+ "first.line.header", "csv headers grabbed from first line", "", "null", "false", "false"
+ "encoding.charset", "charset", "", "UTF-8", "false", "false"
+
+Extra informations
+__________________
+No additional information is provided
+
+----------
+
+.. _com.hurence.logisland.service.cassandra.CassandraControllerService:
+
+CassandraControllerService
+--------------------------
+Provides a controller service that for the moment only allows to bulkput records into cassandra.
+
+Module
+______
+com.hurence.logisland:logisland-service-cassandra-client:1.1.2
+
+Class
+_____
+com.hurence.logisland.service.cassandra.CassandraControllerService
+
+Tags
+____
+cassandra, service
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
+
+.. csv-table:: allowable-values
+ :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+ :widths: 20,60,30,20,10,10
+ :escape: \
+
+ "**cassandra.hosts**", "Cassandra cluster hosts as a comma separated value list", "", "null", "false", "false"
+ "**cassandra.port**", "Cassandra cluster port", "", "null", "false", "false"
+ "cassandra.with-ssl", "If this property is true, use SSL. Default is no SSL (false).", "", "false", "false", "false"
+ "cassandra.with-credentials", "If this property is true, use credentials. Default is no credentials (false).", "", "false", "false", "false"
+ "cassandra.credentials.user", "The user name to use for authentication. cassandra.with-credentials must be true for that property to be used.", "", "null", "false", "false"
+ "cassandra.credentials.password", "The user password to use for authentication. cassandra.with-credentials must be true for that property to be used.", "", "null", "false", "false"
+ "batch.size", "The preferred number of Records to setField to the database in a single transaction", "", "1000", "false", "false"
+ "flush.interval", "flush interval in ms", "", "500", "false", "false"
+
+Extra informations
+__________________
+No additional information is provided
+
+----------
+
+.. _com.hurence.logisland.service.elasticsearch.Elasticsearch_2_4_0_ClientService:
+
+Elasticsearch_2_4_0_ClientService
+---------------------------------
+Implementation of ElasticsearchClientService for Elasticsearch 2.4.0.
+
+Module
+______
+com.hurence.logisland:logisland-service-elasticsearch_2_4_0-client:1.1.2
+
+Class
+_____
+com.hurence.logisland.service.elasticsearch.Elasticsearch_2_4_0_ClientService
+
+Tags
+____
+elasticsearch, client
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values, and whether a property is considered "sensitive"..
+
+.. csv-table:: allowable-values
+ :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+ :widths: 20,60,30,20,10,10
+ :escape: \
+
+ "**backoff.policy**", "strategy for retrying to execute requests in bulkRequest", "noBackoff (when a request fail there won't be any retry.), constantBackoff (wait a fixed amount of time between retries, using user put retry number and throttling delay), exponentialBackoff (time waited between retries grow exponentially, using user put retry number and throttling delay), defaultExponentialBackoff (time waited between retries grow exponentially, using es default parameters)", "defaultExponentialBackoff", "false", "false"
+ "**throttling.delay**", "number of time we should wait between each retry (in milliseconds)", "", "500", "false", "false"
+ "**num.retry**", "number of time we should try to inject a bulk into es", "", "3", "false", "false"
+ "batch.size", "The preferred number of Records to setField to the database in a single transaction", "", "1000", "false", "false"
+ "bulk.size", "bulk size in MB", "", "5", "false", "false"
+ "flush.interval", "flush interval in sec", "", "5", "false", "false"
+ "concurrent.requests", "setConcurrentRequests", "", "2", "false", "false"
+ "**cluster.name**", "Name of the ES cluster (for example, elasticsearch_brew). Defaults to 'elasticsearch'", "", "elasticsearch", "false", "false"
+ "**ping.timeout**", "The ping timeout used to determine when a node is unreachable. For example, 5s (5 seconds). If non-local recommended is 30s", "", "5s", "false", "false"
+ "**sampler.interval**", "How often to sample / ping the nodes listed and connected. For example, 5s (5 seconds). If non-local recommended is 30s.", "", "5s", "false", "false"
+ "username", "Username to access the Elasticsearch cluster", "", "null", "false", "false"
+ "password", "Password to access the Elasticsearch cluster", "", "null", "**true**", "false"
+ "shield.location", "Specifies the path to the JAR for the Elasticsearch Shield plugin. If the Elasticsearch cluster has been secured with the Shield plugin, then the Shield plugin JAR must also be available to this processor. Note: Do NOT place the Shield JAR into NiFi's lib/ directory, doing so will prevent the Shield plugin from being loaded.", "", "null", "false", "false"
+ "**hosts**", "ElasticSearch Hosts, which should be comma separated and colon for hostname/port host1:port,host2:port,.... For example testcluster:9300.", "", "null", "false", "false"
+ "ssl.context.service", "The SSL Context Service used to provide client certificate information for TLS/SSL connections. This service only applies if the Shield plugin is available.", "", "null", "false", "false"
+ "**charset**", "Specifies the character set of the document data.", "", "UTF-8", "false", "false"
+
+Extra informations
+__________________
+No additional information is provided
+
+----------
+
+.. _com.hurence.logisland.service.elasticsearch.Elasticsearch_5_4_0_ClientService:
+
+Elasticsearch_5_4_0_ClientService
+---------------------------------
+Implementation of ElasticsearchClientService for Elasticsearch 5.4.0.
+
+Module
+______
+com.hurence.logisland:logisland-service-elasticsearch_5_4_0-client:1.1.2
+
+Class
+_____
+com.hurence.logisland.service.elasticsearch.Elasticsearch_5_4_0_ClientService
+
+Tags
+____
+elasticsearch, client
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values, and whether a property is considered "sensitive"..
+
+.. csv-table:: allowable-values
+ :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+ :widths: 20,60,30,20,10,10
+ :escape: \
+
+ "**backoff.policy**", "strategy for retrying to execute requests in bulkRequest", "noBackoff (when a request fail there won't be any retry.), constantBackoff (wait a fixed amount of time between retries, using user put retry number and throttling delay), exponentialBackoff (time waited between retries grow exponentially, using user put retry number and throttling delay), defaultExponentialBackoff (time waited between retries grow exponentially, using es default parameters)", "defaultExponentialBackoff", "false", "false"
+ "**throttling.delay**", "number of time we should wait between each retry (in milliseconds)", "", "500", "false", "false"
+ "**num.retry**", "number of time we should try to inject a bulk into es", "", "3", "false", "false"
+ "batch.size", "The preferred number of Records to setField to the database in a single transaction", "", "1000", "false", "false"
+ "bulk.size", "bulk size in MB", "", "5", "false", "false"
+ "flush.interval", "flush interval in sec", "", "5", "false", "false"
+ "concurrent.requests", "setConcurrentRequests", "", "2", "false", "false"
+ "**cluster.name**", "Name of the ES cluster (for example, elasticsearch_brew). Defaults to 'elasticsearch'", "", "elasticsearch", "false", "false"
+ "**ping.timeout**", "The ping timeout used to determine when a node is unreachable. For example, 5s (5 seconds). If non-local recommended is 30s", "", "5s", "false", "false"
+ "**sampler.interval**", "How often to sample / ping the nodes listed and connected. For example, 5s (5 seconds). If non-local recommended is 30s.", "", "5s", "false", "false"
+ "username", "Username to access the Elasticsearch cluster", "", "null", "false", "false"
+ "password", "Password to access the Elasticsearch cluster", "", "null", "**true**", "false"
+ "shield.location", "Specifies the path to the JAR for the Elasticsearch Shield plugin. If the Elasticsearch cluster has been secured with the Shield plugin, then the Shield plugin JAR must also be available to this processor. Note: Do NOT place the Shield JAR into NiFi's lib/ directory, doing so will prevent the Shield plugin from being loaded.", "", "null", "false", "false"
+ "**hosts**", "ElasticSearch Hosts, which should be comma separated and colon for hostname/port host1:port,host2:port,.... For example testcluster:9300.", "", "null", "false", "false"
+ "ssl.context.service", "The SSL Context Service used to provide client certificate information for TLS/SSL connections. This service only applies if the Shield plugin is available.", "", "null", "false", "false"
+ "**charset**", "Specifies the character set of the document data.", "", "UTF-8", "false", "false"
+
+Extra informations
+__________________
+No additional information is provided
+
+----------
+
+.. _com.hurence.logisland.service.hbase.HBase_1_1_2_ClientService:
+
+HBase_1_1_2_ClientService
+-------------------------
+Implementation of HBaseClientService for HBase 1.1.2. This service can be configured by providing a comma-separated list of configuration files, or by specifying values for the other properties. If configuration files are provided, they will be loaded first, and the values of the additional properties will override the values from the configuration files. In addition, any user defined properties on the processor will also be passed to the HBase configuration.
+
+Module
+______
+com.hurence.logisland:logisland-service-hbase_1_1_2-client:1.1.2
+
+Class
+_____
+com.hurence.logisland.service.hbase.HBase_1_1_2_ClientService
+
+Tags
+____
+hbase, client
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values, and whether a property supports the `Expression Language `_ .
+
+.. csv-table:: allowable-values
+ :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+ :widths: 20,60,30,20,10,10
+ :escape: \
+
+ "hadoop.configuration.files", "Comma-separated list of Hadoop Configuration files, such as hbase-site.xml and core-site.xml for kerberos, including full paths to the files.", "", "null", "false", "false"
+ "zookeeper.quorum", "Comma-separated list of ZooKeeper hosts for HBase. Required if Hadoop Configuration Files are not provided.", "", "null", "false", "false"
+ "zookeeper.client.port", "The port on which ZooKeeper is accepting client connections. Required if Hadoop Configuration Files are not provided.", "", "null", "false", "false"
+ "zookeeper.znode.parent", "The ZooKeeper ZNode Parent value for HBase (example: /hbase). Required if Hadoop Configuration Files are not provided.", "", "null", "false", "false"
+ "hbase.client.retries", "The number of times the HBase client will retry connecting. Required if Hadoop Configuration Files are not provided.", "", "3", "false", "false"
+ "phoenix.client.jar.location", "The full path to the Phoenix client JAR. Required if Phoenix is installed on top of HBase.", "", "null", "false", "**true**"
+
+Dynamic Properties
+__________________
+Dynamic Properties allow the user to specify both the name and value of a property.
+
+.. csv-table:: dynamic-properties
+ :header: "Name","Value","Description","Allowable Values","Default Value","EL"
+ :widths: 20,20,40,40,20,10
+ :escape: \
+
+ "The name of an HBase configuration property.", "The value of the given HBase configuration property.", "These properties will be set on the HBase configuration after loading any provided configuration files.", "", "null", false
+
+Extra informations
+__________________
+No additional information is provided
+
+----------
+
+.. _com.hurence.logisland.service.influxdb.InfluxDBControllerService:
+
+InfluxDBControllerService
+-------------------------
+Provides a controller service that for the moment only allows to bulkput records into influxdb.
+
+Module
+______
+com.hurence.logisland:logisland-service-influxdb-client:1.1.2
+
+Class
+_____
+com.hurence.logisland.service.influxdb.InfluxDBControllerService
+
+Tags
+____
+influxdb, service, time series
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
+
+.. csv-table:: allowable-values
+ :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+ :widths: 20,60,30,20,10,10
+ :escape: \
+
+ "**influxdb.url**", "InfluxDB connection url", "", "null", "false", "false"
+ "influxdb.user", "The user name to use for authentication.", "", "null", "false", "false"
+ "**influxdb.database**", "InfluxDB database name", "", "null", "false", "false"
+ "influxdb.password", "The user password to use for authentication.", "", "null", "false", "false"
+ "influxdb.tags", "List of tags for each supported measurement. Syntax: :[,]...[;:,[]]... Example: cpu:core1,core2;mem:used : in this example, the cpu measurement has 2 tags: core1 and core2 and the mem measurement has 1 tag: used. This must only be set if configuration mode is explicit_tags_and_fields or all_as_fields_but_explicit_tags.", "", "null", "false", "false"
+ "influxdb.fields", "List of fields for each supported measurement. Syntax: :[,]...[;:,[]]... Example: cpu:core1,core2;mem:used : in this example, the cpu measurement has 2 fields: core1 and core2 and the mem measurement has 1 field: used. This must only be set if configuration mode is explicit_tags_and_fields or all_as_tags_but_explicit_fields.", "", "null", "false", "false"
+ "**influxdb.configuration_mode**", "Determines the way fields and tags are chosen from the logisland record. Possible values and meaning: explicit_tags_and_fields: only logisland record fields listed in influxdb.tags and influxdb.fields will be inserted into InfluxDB with the explicit type. all_as_fields: all available logisland record fields will be inserted into InfluxDB as fields. all_as_tags_but_explicit_fields: all available logisland record fields will be inserted into InfluxDB as tags except those listed in influxdb.fields that will be inserted into InfluxDB as fields. all_as_fields_but_explicit_tags: all available logisland record fields will be inserted into InfluxDB as fields except those listed in influxdb.tags that will be inserted into InfluxDB as tags", "explicit_tags_and_fields, all_as_fields, all_as_fields_but_explicit_tags, all_as_tags_but_explicit_fields", "null", "false", "false"
+ "influxdb.consistency_level", "Determines the consistency level used to write points into InfluxDB. Possible values are: ANY, ONE, QUORUMand ALL. Default value is ANY. This is only useful when using a clustered InfluxDB infrastructure.", "ANY, ONE, QUORUM, ALL", "ANY", "false", "false"
+ "influxdb.retention_policy", "Determines the name of the retention policy to use. Defaults to autogen. The defined retention policy must already be defined in the InfluxDB server.", "", "autogen", "false", "false"
+ "influxdb.timefield", "Time field for each supported measurement. Syntax: :,...[;:,]... With format being any constant defined in java.util.concurrent.TimeUnit enum: DAYS, HOURS, MICROSECONDS, MILLISECONDS, MINUTES, NANOSECONDS or SECONDS. Example: cpu:time,NANOSECONDS;mem:timeStamp,MILLISECONDS In this example: for the cpu measurement, the time for the influx DB point matching the record will be the value of the time field that represents nanoseconds. For the mem measurement, the time for the influx DB point matching the record will be the value of the timeStamp field that represents milliseconds. Any measurement for which the time field is not defined will use the content of the record_time technical field as the time (which is a number of milliseconds since epoch).", "", "null", "false", "false"
+ "batch.size", "The preferred number of Records to setField to the database in a single transaction", "", "1000", "false", "false"
+ "flush.interval", "flush interval in ms", "", "500", "false", "false"
+
+Extra informations
+__________________
+No additional information is provided
+
+----------
+
+.. _com.hurence.logisland.service.cache.LRUKeyValueCacheService:
+
+LRUKeyValueCacheService
+-----------------------
+A controller service for caching data by key value pair with LRU (last recently used) strategy. using LinkedHashMap
+
+Module
+______
+com.hurence.logisland:logisland-service-inmemory-cache:1.1.2
+
+Class
+_____
+com.hurence.logisland.service.cache.LRUKeyValueCacheService
+
+Tags
+____
+cache, service, key, value, pair, LRU
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
+
+.. csv-table:: allowable-values
+ :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+ :widths: 20,60,30,20,10,10
+ :escape: \
+
+ "cache.size", "The maximum number of element in the cache.", "", "16384", "false", "false"
+
+Extra informations
+__________________
+No additional information is provided
+
+----------
+
+.. _com.hurence.logisland.service.mongodb.MongoDBControllerService:
+
+MongoDBControllerService
+------------------------
+Provides a controller service that wraps most of the functionality of the MongoDB driver.
+
+Module
+______
+com.hurence.logisland:logisland-service-mongodb-client:1.1.2
+
+Class
+_____
+com.hurence.logisland.service.mongodb.MongoDBControllerService
+
+Tags
+____
+mongo, mongodb, service
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values, and whether a property supports the `Expression Language `_ .
+
+.. csv-table:: allowable-values
+ :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+ :widths: 20,60,30,20,10,10
+ :escape: \
+
+ "**mongo.uri**", "MongoURI, typically of the form: mongodb://host1[:port1][,host2[:port2],...]", "", "null", "false", "**true**"
+ "**mongo.db.name**", "The name of the database to use", "", "null", "false", "**true**"
+ "**mongo.collection.name**", "The name of the collection to use", "", "null", "false", "**true**"
+ "batch.size", "The preferred number of Records to setField to the database in a single transaction", "", "1000", "false", "false"
+ "bulk.size", "bulk size in MB", "", "5", "false", "false"
+ "mongo.bulk.mode", "Bulk mode (insert or upsert)", "insert (Insert records whose key must be unique), upsert (Insert records if not already existing or update the record if already existing)", "insert", "false", "false"
+ "flush.interval", "flush interval in ms", "", "500", "false", "false"
+ "**mongo.write.concern**", "The write concern to use", "ACKNOWLEDGED, UNACKNOWLEDGED, FSYNCED, JOURNALED, REPLICA_ACKNOWLEDGED, MAJORITY", "ACKNOWLEDGED", "false", "false"
+ "mongo.bulk.upsert.condition", "A custom condition for the bulk upsert (Filter for the bulkwrite). If not specified the standard condition is to match same id ('_id': data._id)", "", "${'{ \"_id\" :\"' + record_id + '\"}'}", "false", "**true**"
+
+Extra informations
+__________________
+No additional information is provided
+
+----------
+
+.. _com.hurence.logisland.redis.service.RedisKeyValueCacheService:
+
+RedisKeyValueCacheService
+-------------------------
+A controller service for caching records by key value pair with LRU (last recently used) strategy. using LinkedHashMap
+
+Module
+______
+com.hurence.logisland:logisland-service-redis:1.1.2
+
+Class
+_____
+com.hurence.logisland.redis.service.RedisKeyValueCacheService
+
+Tags
+____
+cache, service, key, value, pair, redis
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values, and whether a property is considered "sensitive"..
+
+.. csv-table:: allowable-values
+ :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+ :widths: 20,60,30,20,10,10
+ :escape: \
+
+ "**redis.mode**", "The type of Redis being communicated with - standalone, sentinel, or clustered.", "standalone (A single standalone Redis instance.), sentinel (Redis Sentinel which provides high-availability. Described further at https://redis.io/topics/sentinel), cluster (Clustered Redis which provides sharding and replication. Described further at https://redis.io/topics/cluster-spec)", "standalone", "false", "false"
+ "**connection.string**", "The connection string for Redis. In a standalone instance this value will be of the form hostname:port. In a sentinel instance this value will be the comma-separated list of sentinels, such as host1:port1,host2:port2,host3:port3. In a clustered instance this value will be the comma-separated list of cluster masters, such as host1:port,host2:port,host3:port.", "", "null", "false", "false"
+ "**database.index**", "The database index to be used by connections created from this connection pool. See the databases property in redis.conf, by default databases 0-15 will be available.", "", "0", "false", "false"
+ "**communication.timeout**", "The timeout to use when attempting to communicate with Redis.", "", "10 seconds", "false", "false"
+ "**cluster.max.redirects**", "The maximum number of redirects that can be performed when clustered.", "", "5", "false", "false"
+ "sentinel.master", "The name of the sentinel master, require when Mode is set to Sentinel", "", "null", "false", "false"
+ "password", "The password used to authenticate to the Redis server. See the requirepass property in redis.conf.", "", "null", "**true**", "false"
+ "**pool.max.total**", "The maximum number of connections that can be allocated by the pool (checked out to clients, or idle awaiting checkout). A negative value indicates that there is no limit.", "", "8", "false", "false"
+ "**pool.max.idle**", "The maximum number of idle connections that can be held in the pool, or a negative value if there is no limit.", "", "8", "false", "false"
+ "**pool.min.idle**", "The target for the minimum number of idle connections to maintain in the pool. If the configured value of Min Idle is greater than the configured value for Max Idle, then the value of Max Idle will be used instead.", "", "0", "false", "false"
+ "**pool.block.when.exhausted**", "Whether or not clients should block and wait when trying to obtain a connection from the pool when the pool has no available connections. Setting this to false means an error will occur immediately when a client requests a connection and none are available.", "true, false", "true", "false", "false"
+ "**pool.max.wait.time**", "The amount of time to wait for an available connection when Block When Exhausted is set to true.", "", "10 seconds", "false", "false"
+ "**pool.min.evictable.idle.time**", "The minimum amount of time an object may sit idle in the pool before it is eligible for eviction.", "", "60 seconds", "false", "false"
+ "**pool.time.between.eviction.runs**", "The amount of time between attempting to evict idle connections from the pool.", "", "30 seconds", "false", "false"
+ "**pool.num.tests.per.eviction.run**", "The number of connections to tests per eviction attempt. A negative value indicates to test all connections.", "", "-1", "false", "false"
+ "**pool.test.on.create**", "Whether or not connections should be tested upon creation.", "true, false", "false", "false", "false"
+ "**pool.test.on.borrow**", "Whether or not connections should be tested upon borrowing from the pool.", "true, false", "false", "false", "false"
+ "**pool.test.on.return**", "Whether or not connections should be tested upon returning to the pool.", "true, false", "false", "false", "false"
+ "**pool.test.while.idle**", "Whether or not connections should be tested while idle.", "true, false", "true", "false", "false"
+ "**record.recordSerializer**", "the way to serialize/deserialize the record", "com.hurence.logisland.serializer.KryoSerializer (serialize events as json blocs), com.hurence.logisland.serializer.JsonSerializer (serialize events as json blocs), com.hurence.logisland.serializer.AvroSerializer (serialize events as avro blocs), com.hurence.logisland.serializer.BytesArraySerializer (serialize events as byte arrays), com.hurence.logisland.serializer.KuraProtobufSerializer (serialize events as Kura protocol buffer), none (send events as bytes)", "com.hurence.logisland.serializer.JsonSerializer", "false", "false"
+
+Extra informations
+__________________
+No additional information is provided
+
+----------
+
+.. _com.hurence.logisland.service.solr.Solr_5_5_5_ClientService:
+
+Solr_5_5_5_ClientService
+------------------------
+Implementation of ElasticsearchClientService for Solr 5.5.5.
+
+Module
+______
+com.hurence.logisland:logisland-service-solr_5_5_5-client:1.1.2
+
+Class
+_____
+com.hurence.logisland.service.solr.Solr_5_5_5_ClientService
+
+Tags
+____
+solr, client
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
+
+.. csv-table:: allowable-values
+ :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+ :widths: 20,60,30,20,10,10
+ :escape: \
+
+ "batch.size", "The preferred number of Records to setField to the database in a single transaction", "", "1000", "false", "false"
+ "bulk.size", "bulk size in MB", "", "5", "false", "false"
+ "**solr.cloud**", "is slor cloud enabled", "", "false", "false", "false"
+ "**solr.collection**", "name of the collection to use", "", "null", "false", "false"
+ "**solr.connection.string**", "zookeeper quorum host1:2181,host2:2181 for solr cloud or http address of a solr core ", "", "localhost:8983/solr", "false", "false"
+ "solr.concurrent.requests", "setConcurrentRequests", "", "2", "false", "false"
+ "flush.interval", "flush interval in ms", "", "500", "false", "false"
+ "schema.update_timeout", "Schema update timeout interval in s", "", "15", "false", "false"
+
+Extra informations
+__________________
+No additional information is provided
+
+----------
+
+.. _com.hurence.logisland.service.solr.Solr_6_4_2_ChronixClientService:
+
+Solr_6_4_2_ChronixClientService
+-------------------------------
+Implementation of ChronixClientService for Solr 6 4 2
+
+Module
+______
+com.hurence.logisland:logisland-service-solr_chronix_6.4.2-client:1.1.2
+
+Class
+_____
+com.hurence.logisland.service.solr.Solr_6_4_2_ChronixClientService
+
+Tags
+____
+solr, client
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
+
+.. csv-table:: allowable-values
+ :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+ :widths: 20,60,30,20,10,10
+ :escape: \
+
+ "batch.size", "The preferred number of Records to setField to the database in a single transaction", "", "1000", "false", "false"
+ "**solr.cloud**", "is slor cloud enabled", "", "false", "false", "false"
+ "**solr.collection**", "name of the collection to use", "", "null", "false", "false"
+ "**solr.connection.string**", "zookeeper quorum host1:2181,host2:2181 for solr cloud or http address of a solr core ", "", "localhost:8983/solr", "false", "false"
+ "flush.interval", "flush interval in ms", "", "500", "false", "false"
+ "group.by", "The field the chunk should be grouped by", "", "", "false", "false"
+
+Extra informations
+__________________
+No additional information is provided
+
+----------
+
+.. _com.hurence.logisland.service.solr.Solr_6_6_2_ClientService:
+
+Solr_6_6_2_ClientService
+------------------------
+Implementation of ElasticsearchClientService for Solr 5.5.5.
+
+Module
+______
+com.hurence.logisland:logisland-service-solr_6_6_2-client:1.1.2
+
+Class
+_____
+com.hurence.logisland.service.solr.Solr_6_6_2_ClientService
+
+Tags
+____
+solr, client
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
+
+.. csv-table:: allowable-values
+ :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+ :widths: 20,60,30,20,10,10
+ :escape: \
+
+ "batch.size", "The preferred number of Records to setField to the database in a single transaction", "", "1000", "false", "false"
+ "bulk.size", "bulk size in MB", "", "5", "false", "false"
+ "**solr.cloud**", "is slor cloud enabled", "", "false", "false", "false"
+ "**solr.collection**", "name of the collection to use", "", "null", "false", "false"
+ "**solr.connection.string**", "zookeeper quorum host1:2181,host2:2181 for solr cloud or http address of a solr core ", "", "localhost:8983/solr", "false", "false"
+ "solr.concurrent.requests", "setConcurrentRequests", "", "2", "false", "false"
+ "flush.interval", "flush interval in ms", "", "500", "false", "false"
+ "schema.update_timeout", "Schema update timeout interval in s", "", "15", "false", "false"
+
+Extra informations
+__________________
+No additional information is provided
+
+----------
+
+.. _com.hurence.logisland.service.iptogeo.maxmind.MaxmindIpToGeoService:
+
+MaxmindIpToGeoService
+---------------------
+Implementation of the IP 2 GEO Service using maxmind lite db file
+
+Module
+______
+com.hurence.logisland:logisland-service-ip-to-geo-maxmind:1.1.2
+
+Class
+_____
+com.hurence.logisland.service.iptogeo.maxmind.MaxmindIpToGeoService
+
+Tags
+____
+ip, service, geo, maxmind
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
+
+.. csv-table:: allowable-values
+ :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+ :widths: 20,60,30,20,10,10
+ :escape: \
+
+ "maxmind.database.uri", "Path to the Maxmind Geo Enrichment Database File.", "", "null", "false", "false"
+ "maxmind.database.path", "Local Path to the Maxmind Geo Enrichment Database File.", "", "null", "false", "false"
+ "locale", "Locale to use for geo information. Defaults to 'en'.", "", "en", "false", "false"
+ "lookup.time", "Should the additional lookup_micros field be returned or not.", "", "false", "false", "false"
+
+Extra informations
+__________________
+No additional information is provided
+
+----------
+
+.. _com.hurence.logisland.service.cache.CSVKeyValueCacheService:
+
+CSVKeyValueCacheService
+-----------------------
+A cache that store csv lines as records loaded from a file
+
+Module
+______
+com.hurence.logisland:logisland-service-inmemory-cache:1.1.2
+
+Class
+_____
+com.hurence.logisland.service.cache.CSVKeyValueCacheService
+
+Tags
+____
+csv, service, cache
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
+
+.. csv-table:: allowable-values
+ :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+ :widths: 20,60,30,20,10,10
+ :escape: \
+
+ "**csv.format**", "a configuration for loading csv", "default (Standard comma separated format, as for RFC4180 but allowing empty lines. Settings are: withDelimiter(',') withQuote('\"') withRecordSeparator(\"\r\n\") withIgnoreEmptyLines(true)), excel (Excel file format (using a comma as the value delimiter). Note that the actual value delimiter used by Excel is locale dependent, it might be necessary to customize this format to accommodate to your regional settings. withDelimiter(',') withQuote('\"') withRecordSeparator(\"\r\n\") withIgnoreEmptyLines(false) withAllowMissingColumnNames(true)), excel_fr (Excel file format (using a comma as the value delimiter). Note that the actual value delimiter used by Excel is locale dependent, it might be necessary to customize this format to accommodate to your regional settings. withDelimiter(';') withQuote('\"') withRecordSeparator(\"\r\n\") withIgnoreEmptyLines(false) withAllowMissingColumnNames(true)), mysql (Default MySQL format used by the SELECT INTO OUTFILE and LOAD DATA INFILE operations.This is a tab-delimited format with a LF character as the line separator. Values are not quoted and special characters are escaped with '\'. The default NULL string is \"\\N\". Settings are: withDelimiter('\t') withQuote(null) withRecordSeparator('\n') withIgnoreEmptyLines(false) withEscape('\\') withNullString(\"\\N\") withQuoteMode(QuoteMode.ALL_NON_NULL)), rfc4180 (Comma separated format as defined by RFC 4180. Settings are: withDelimiter(',') withQuote('\"') withRecordSeparator(\"\r\n\") withIgnoreEmptyLines(false)), tdf (Tab-delimited format. Settings are: withDelimiter('\t') withQuote('\"') withRecordSeparator(\"\r\n\") withIgnoreSurroundingSpaces(true))", "default", "false", "false"
+ "csv.header", "comma separated header values", "", "null", "false", "false"
+ "csv.file.uri", "Path to the CSV File.", "", "null", "false", "false"
+ "csv.file.path", "Local Path to the CSV File.", "", "null", "false", "false"
+ "**row.key**", "th primary key of this db", "", "null", "false", "false"
+ "cache.size", "The maximum number of element in the cache.", "", "16384", "false", "false"
+ "first.line.header", "csv headers grabbed from first line", "", "null", "false", "false"
+ "encoding.charset", "charset", "", "UTF-8", "false", "false"
+
+Extra informations
+__________________
+No additional information is provided
+
+----------
+
+.. _com.hurence.logisland.service.cassandra.CassandraControllerService:
+
+CassandraControllerService
+--------------------------
+Provides a controller service that for the moment only allows to bulkput records into cassandra.
+
+Module
+______
+com.hurence.logisland:logisland-service-cassandra-client:1.1.2
+
+Class
+_____
+com.hurence.logisland.service.cassandra.CassandraControllerService
+
+Tags
+____
+cassandra, service
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
+
+.. csv-table:: allowable-values
+ :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+ :widths: 20,60,30,20,10,10
+ :escape: \
+
+ "**cassandra.hosts**", "Cassandra cluster hosts as a comma separated value list", "", "null", "false", "false"
+ "**cassandra.port**", "Cassandra cluster port", "", "null", "false", "false"
+ "cassandra.with-ssl", "If this property is true, use SSL. Default is no SSL (false).", "", "false", "false", "false"
+ "cassandra.with-credentials", "If this property is true, use credentials. Default is no credentials (false).", "", "false", "false", "false"
+ "cassandra.credentials.user", "The user name to use for authentication. cassandra.with-credentials must be true for that property to be used.", "", "null", "false", "false"
+ "cassandra.credentials.password", "The user password to use for authentication. cassandra.with-credentials must be true for that property to be used.", "", "null", "false", "false"
+ "batch.size", "The preferred number of Records to setField to the database in a single transaction", "", "1000", "false", "false"
+ "flush.interval", "flush interval in ms", "", "500", "false", "false"
+
+Extra informations
+__________________
+No additional information is provided
+
+----------
+
+.. _com.hurence.logisland.service.elasticsearch.Elasticsearch_2_4_0_ClientService:
+
+Elasticsearch_2_4_0_ClientService
+---------------------------------
+Implementation of ElasticsearchClientService for Elasticsearch 2.4.0.
+
+Module
+______
+com.hurence.logisland:logisland-service-elasticsearch_2_4_0-client:1.1.2
+
+Class
+_____
+com.hurence.logisland.service.elasticsearch.Elasticsearch_2_4_0_ClientService
+
+Tags
+____
+elasticsearch, client
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values, and whether a property is considered "sensitive"..
+
+.. csv-table:: allowable-values
+ :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+ :widths: 20,60,30,20,10,10
+ :escape: \
+
+ "**backoff.policy**", "strategy for retrying to execute requests in bulkRequest", "noBackoff (when a request fail there won't be any retry.), constantBackoff (wait a fixed amount of time between retries, using user put retry number and throttling delay), exponentialBackoff (time waited between retries grow exponentially, using user put retry number and throttling delay), defaultExponentialBackoff (time waited between retries grow exponentially, using es default parameters)", "defaultExponentialBackoff", "false", "false"
+ "**throttling.delay**", "number of time we should wait between each retry (in milliseconds)", "", "500", "false", "false"
+ "**num.retry**", "number of time we should try to inject a bulk into es", "", "3", "false", "false"
+ "batch.size", "The preferred number of Records to setField to the database in a single transaction", "", "1000", "false", "false"
+ "bulk.size", "bulk size in MB", "", "5", "false", "false"
+ "flush.interval", "flush interval in sec", "", "5", "false", "false"
+ "concurrent.requests", "setConcurrentRequests", "", "2", "false", "false"
+ "**cluster.name**", "Name of the ES cluster (for example, elasticsearch_brew). Defaults to 'elasticsearch'", "", "elasticsearch", "false", "false"
+ "**ping.timeout**", "The ping timeout used to determine when a node is unreachable. For example, 5s (5 seconds). If non-local recommended is 30s", "", "5s", "false", "false"
+ "**sampler.interval**", "How often to sample / ping the nodes listed and connected. For example, 5s (5 seconds). If non-local recommended is 30s.", "", "5s", "false", "false"
+ "username", "Username to access the Elasticsearch cluster", "", "null", "false", "false"
+ "password", "Password to access the Elasticsearch cluster", "", "null", "**true**", "false"
+ "shield.location", "Specifies the path to the JAR for the Elasticsearch Shield plugin. If the Elasticsearch cluster has been secured with the Shield plugin, then the Shield plugin JAR must also be available to this processor. Note: Do NOT place the Shield JAR into NiFi's lib/ directory, doing so will prevent the Shield plugin from being loaded.", "", "null", "false", "false"
+ "**hosts**", "ElasticSearch Hosts, which should be comma separated and colon for hostname/port host1:port,host2:port,.... For example testcluster:9300.", "", "null", "false", "false"
+ "ssl.context.service", "The SSL Context Service used to provide client certificate information for TLS/SSL connections. This service only applies if the Shield plugin is available.", "", "null", "false", "false"
+ "**charset**", "Specifies the character set of the document data.", "", "UTF-8", "false", "false"
+
+Extra informations
+__________________
+No additional information is provided
+
+----------
+
+.. _com.hurence.logisland.service.elasticsearch.Elasticsearch_5_4_0_ClientService:
+
+Elasticsearch_5_4_0_ClientService
+---------------------------------
+Implementation of ElasticsearchClientService for Elasticsearch 5.4.0.
+
+Module
+______
+com.hurence.logisland:logisland-service-elasticsearch_5_4_0-client:1.1.2
+
+Class
+_____
+com.hurence.logisland.service.elasticsearch.Elasticsearch_5_4_0_ClientService
+
+Tags
+____
+elasticsearch, client
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values, and whether a property is considered "sensitive"..
+
+.. csv-table:: allowable-values
+ :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+ :widths: 20,60,30,20,10,10
+ :escape: \
+
+ "**backoff.policy**", "strategy for retrying to execute requests in bulkRequest", "noBackoff (when a request fail there won't be any retry.), constantBackoff (wait a fixed amount of time between retries, using user put retry number and throttling delay), exponentialBackoff (time waited between retries grow exponentially, using user put retry number and throttling delay), defaultExponentialBackoff (time waited between retries grow exponentially, using es default parameters)", "defaultExponentialBackoff", "false", "false"
+ "**throttling.delay**", "number of time we should wait between each retry (in milliseconds)", "", "500", "false", "false"
+ "**num.retry**", "number of time we should try to inject a bulk into es", "", "3", "false", "false"
+ "batch.size", "The preferred number of Records to setField to the database in a single transaction", "", "1000", "false", "false"
+ "bulk.size", "bulk size in MB", "", "5", "false", "false"
+ "flush.interval", "flush interval in sec", "", "5", "false", "false"
+ "concurrent.requests", "setConcurrentRequests", "", "2", "false", "false"
+ "**cluster.name**", "Name of the ES cluster (for example, elasticsearch_brew). Defaults to 'elasticsearch'", "", "elasticsearch", "false", "false"
+ "**ping.timeout**", "The ping timeout used to determine when a node is unreachable. For example, 5s (5 seconds). If non-local recommended is 30s", "", "5s", "false", "false"
+ "**sampler.interval**", "How often to sample / ping the nodes listed and connected. For example, 5s (5 seconds). If non-local recommended is 30s.", "", "5s", "false", "false"
+ "username", "Username to access the Elasticsearch cluster", "", "null", "false", "false"
+ "password", "Password to access the Elasticsearch cluster", "", "null", "**true**", "false"
+ "shield.location", "Specifies the path to the JAR for the Elasticsearch Shield plugin. If the Elasticsearch cluster has been secured with the Shield plugin, then the Shield plugin JAR must also be available to this processor. Note: Do NOT place the Shield JAR into NiFi's lib/ directory, doing so will prevent the Shield plugin from being loaded.", "", "null", "false", "false"
+ "**hosts**", "ElasticSearch Hosts, which should be comma separated and colon for hostname/port host1:port,host2:port,.... For example testcluster:9300.", "", "null", "false", "false"
+ "ssl.context.service", "The SSL Context Service used to provide client certificate information for TLS/SSL connections. This service only applies if the Shield plugin is available.", "", "null", "false", "false"
+ "**charset**", "Specifies the character set of the document data.", "", "UTF-8", "false", "false"
+
+Extra informations
+__________________
+No additional information is provided
+
+----------
+
+.. _com.hurence.logisland.service.hbase.HBase_1_1_2_ClientService:
+
+HBase_1_1_2_ClientService
+-------------------------
+Implementation of HBaseClientService for HBase 1.1.2. This service can be configured by providing a comma-separated list of configuration files, or by specifying values for the other properties. If configuration files are provided, they will be loaded first, and the values of the additional properties will override the values from the configuration files. In addition, any user defined properties on the processor will also be passed to the HBase configuration.
+
+Module
+______
+com.hurence.logisland:logisland-service-hbase_1_1_2-client:1.1.2
+
+Class
+_____
+com.hurence.logisland.service.hbase.HBase_1_1_2_ClientService
+
+Tags
+____
+hbase, client
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values, and whether a property supports the `Expression Language `_ .
+
+.. csv-table:: allowable-values
+ :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+ :widths: 20,60,30,20,10,10
+ :escape: \
+
+ "hadoop.configuration.files", "Comma-separated list of Hadoop Configuration files, such as hbase-site.xml and core-site.xml for kerberos, including full paths to the files.", "", "null", "false", "false"
+ "zookeeper.quorum", "Comma-separated list of ZooKeeper hosts for HBase. Required if Hadoop Configuration Files are not provided.", "", "null", "false", "false"
+ "zookeeper.client.port", "The port on which ZooKeeper is accepting client connections. Required if Hadoop Configuration Files are not provided.", "", "null", "false", "false"
+ "zookeeper.znode.parent", "The ZooKeeper ZNode Parent value for HBase (example: /hbase). Required if Hadoop Configuration Files are not provided.", "", "null", "false", "false"
+ "hbase.client.retries", "The number of times the HBase client will retry connecting. Required if Hadoop Configuration Files are not provided.", "", "3", "false", "false"
+ "phoenix.client.jar.location", "The full path to the Phoenix client JAR. Required if Phoenix is installed on top of HBase.", "", "null", "false", "**true**"
+
+Dynamic Properties
+__________________
+Dynamic Properties allow the user to specify both the name and value of a property.
+
+.. csv-table:: dynamic-properties
+ :header: "Name","Value","Description","Allowable Values","Default Value","EL"
+ :widths: 20,20,40,40,20,10
+ :escape: \
+
+ "The name of an HBase configuration property.", "The value of the given HBase configuration property.", "These properties will be set on the HBase configuration after loading any provided configuration files.", "", "null", false
+
+Extra informations
+__________________
+No additional information is provided
+
+----------
+
+.. _com.hurence.logisland.service.influxdb.InfluxDBControllerService:
+
+InfluxDBControllerService
+-------------------------
+Provides a controller service that for the moment only allows to bulkput records into influxdb.
+
+Module
+______
+com.hurence.logisland:logisland-service-influxdb-client:1.1.2
+
+Class
+_____
+com.hurence.logisland.service.influxdb.InfluxDBControllerService
+
+Tags
+____
+influxdb, service, time series
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
+
+.. csv-table:: allowable-values
+ :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+ :widths: 20,60,30,20,10,10
+ :escape: \
+
+ "**influxdb.url**", "InfluxDB connection url", "", "null", "false", "false"
+ "influxdb.user", "The user name to use for authentication.", "", "null", "false", "false"
+ "**influxdb.database**", "InfluxDB database name", "", "null", "false", "false"
+ "influxdb.password", "The user password to use for authentication.", "", "null", "false", "false"
+ "influxdb.tags", "List of tags for each supported measurement. Syntax: :[,]...[;:,[]]... Example: cpu:core1,core2;mem:used : in this example, the cpu measurement has 2 tags: core1 and core2 and the mem measurement has 1 tag: used. This must only be set if configuration mode is explicit_tags_and_fields or all_as_fields_but_explicit_tags.", "", "null", "false", "false"
+ "influxdb.fields", "List of fields for each supported measurement. Syntax: :[,]...[;:,[]]... Example: cpu:core1,core2;mem:used : in this example, the cpu measurement has 2 fields: core1 and core2 and the mem measurement has 1 field: used. This must only be set if configuration mode is explicit_tags_and_fields or all_as_tags_but_explicit_fields.", "", "null", "false", "false"
+ "**influxdb.configuration_mode**", "Determines the way fields and tags are chosen from the logisland record. Possible values and meaning: explicit_tags_and_fields: only logisland record fields listed in influxdb.tags and influxdb.fields will be inserted into InfluxDB with the explicit type. all_as_fields: all available logisland record fields will be inserted into InfluxDB as fields. all_as_tags_but_explicit_fields: all available logisland record fields will be inserted into InfluxDB as tags except those listed in influxdb.fields that will be inserted into InfluxDB as fields. all_as_fields_but_explicit_tags: all available logisland record fields will be inserted into InfluxDB as fields except those listed in influxdb.tags that will be inserted into InfluxDB as tags", "explicit_tags_and_fields, all_as_fields, all_as_fields_but_explicit_tags, all_as_tags_but_explicit_fields", "null", "false", "false"
+ "influxdb.consistency_level", "Determines the consistency level used to write points into InfluxDB. Possible values are: ANY, ONE, QUORUMand ALL. Default value is ANY. This is only useful when using a clustered InfluxDB infrastructure.", "ANY, ONE, QUORUM, ALL", "ANY", "false", "false"
+ "influxdb.retention_policy", "Determines the name of the retention policy to use. Defaults to autogen. The defined retention policy must already be defined in the InfluxDB server.", "", "autogen", "false", "false"
+ "influxdb.timefield", "Time field for each supported measurement. Syntax: :,...[;:,]... With format being any constant defined in java.util.concurrent.TimeUnit enum: DAYS, HOURS, MICROSECONDS, MILLISECONDS, MINUTES, NANOSECONDS or SECONDS. Example: cpu:time,NANOSECONDS;mem:timeStamp,MILLISECONDS In this example: for the cpu measurement, the time for the influx DB point matching the record will be the value of the time field that represents nanoseconds. For the mem measurement, the time for the influx DB point matching the record will be the value of the timeStamp field that represents milliseconds. Any measurement for which the time field is not defined will use the content of the record_time technical field as the time (which is a number of milliseconds since epoch).", "", "null", "false", "false"
+ "batch.size", "The preferred number of Records to setField to the database in a single transaction", "", "1000", "false", "false"
+ "flush.interval", "flush interval in ms", "", "500", "false", "false"
+
+Extra informations
+__________________
+No additional information is provided
+
+----------
+
+.. _com.hurence.logisland.service.cache.LRUKeyValueCacheService:
+
+LRUKeyValueCacheService
+-----------------------
+A controller service for caching data by key value pair with LRU (last recently used) strategy. using LinkedHashMap
+
+Module
+______
+com.hurence.logisland:logisland-service-inmemory-cache:1.1.2
+
+Class
+_____
+com.hurence.logisland.service.cache.LRUKeyValueCacheService
+
+Tags
+____
+cache, service, key, value, pair, LRU
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
+
+.. csv-table:: allowable-values
+ :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+ :widths: 20,60,30,20,10,10
+ :escape: \
+
+ "cache.size", "The maximum number of element in the cache.", "", "16384", "false", "false"
Extra informations
__________________
From 9dc91e8ad66e80642f3ee2e1e6bd14d206576f91 Mon Sep 17 00:00:00 2001
From: oalam
Date: Tue, 18 Jun 2019 14:42:38 +0200
Subject: [PATCH 2/7] documentation : remove old docs
---
.../release.rst => RELEASING.rst | 0
logisland-documentation/api.rst | 370 ----------
logisland-documentation/architecture.rst | 33 -
logisland-documentation/concepts.rst | 51 --
logisland-documentation/connectors.rst | 190 -----
logisland-documentation/faq.rst | 160 ----
logisland-documentation/index.rst | 7 -
logisland-documentation/intro.rst | 8 -
logisland-documentation/monitoring.rst | 357 ---------
logisland-documentation/overview-slides.md | 684 -----------------
logisland-documentation/overview-slides.pdf | Bin 8954686 -> 0 bytes
logisland-documentation/plugins.rst | 158 ----
logisland-documentation/plugins_old.rst | 255 -------
logisland-documentation/rest-api.rst | 689 ------------------
logisland-documentation/workflow.rst | 3 -
15 files changed, 2965 deletions(-)
rename logisland-documentation/release.rst => RELEASING.rst (100%)
delete mode 100644 logisland-documentation/api.rst
delete mode 100644 logisland-documentation/architecture.rst
delete mode 100644 logisland-documentation/concepts.rst
delete mode 100644 logisland-documentation/connectors.rst
delete mode 100644 logisland-documentation/faq.rst
delete mode 100644 logisland-documentation/intro.rst
delete mode 100644 logisland-documentation/monitoring.rst
delete mode 100644 logisland-documentation/overview-slides.md
delete mode 100644 logisland-documentation/overview-slides.pdf
delete mode 100644 logisland-documentation/plugins.rst
delete mode 100644 logisland-documentation/plugins_old.rst
delete mode 100644 logisland-documentation/rest-api.rst
delete mode 100644 logisland-documentation/workflow.rst
diff --git a/logisland-documentation/release.rst b/RELEASING.rst
similarity index 100%
rename from logisland-documentation/release.rst
rename to RELEASING.rst
diff --git a/logisland-documentation/api.rst b/logisland-documentation/api.rst
deleted file mode 100644
index d083c0caf..000000000
--- a/logisland-documentation/api.rst
+++ /dev/null
@@ -1,370 +0,0 @@
-
-
-API design
-==========
-logisland is a framework that you can extend through its API,
-you can use it to build your own ``Processors`` or to build data processing apps over it.
-
-
-Java API
-++++++++
-You can extend logisland with the Java low-level API as described below.
-
-
-The primary material : Records
-------------------------------
-The basic unit of processing is the Record.
-A ``Record`` is a collection of ``Field``, while a ``Field`` has a ``name``, a ``type`` and a ``value``.
-
-You can instanciate a ``Record`` like in the following code snipet:
-
-.. code-block:: java
-
- String id = "firewall_record1";
- String type = "cisco";
- Record record = new Record(type).setId(id);
-
- assertTrue(record.isEmpty());
- assertEquals(record.size(), 0);
-
-A record is defined by its type and a collection of fields. there are three special fields:
-
-.. code-block:: java
-
- // shortcut for id
- assertEquals(record.getId(), id);
- assertEquals(record.getField(FieldDictionary.RECORD_ID).asString(), id);
-
- // shortcut for time
- assertEquals(record.getTime().getTime(), record.getField(FieldDictionary.RECORD_TIME).asLong().longValue());
-
- // shortcut for type
- assertEquals(record.getType(), type);
- assertEquals(record.getType(), record.getField(FieldDictionary.RECORD_TYPE).asString());
- assertEquals(record.getType(), record.getField(FieldDictionary.RECORD_TYPE).getRawValue());
-
-
-And the other fields have generic setters, getters and removers
-
-.. code-block:: java
-
- record.setStringField("url_host", "origin-www.20minutes.fr")
- .setField("method", FieldType.STRING, "GET")
- .setField("response_size", FieldType.INT, 452)
- .setField("is_outside_office_hours", FieldType.BOOLEAN, false)
- .setField("tags", FieldType.ARRAY, Arrays.asList("spam", "filter", "mail"));
-
- assertFalse(record.hasField("unkown_field"));
- assertTrue(record.hasField("method"));
- assertEquals(record.getField("method").asString(), "GET");
- assertTrue(record.getField("response_size").asInteger() - 452 == 0);
- assertTrue(record.getField("is_outside_office_hours").asBoolean());
- record.removeField("is_outside_office_hours");
- assertFalse(record.hasField("is_outside_office_hours"));
-
-Fields are strongly typed, you can validate them
-
-.. code-block:: java
-
- Record record = new StandardRecord();
- record.setField("request_size", FieldType.INT, 1399);
- assertTrue(record.isValid());
- record.setField("request_size", FieldType.INT, "zer");
- assertFalse(record.isValid());
- record.setField("request_size", FieldType.INT, 45L);
- assertFalse(record.isValid());
- record.setField("request_size", FieldType.LONG, 45L);
- assertTrue(record.isValid());
- record.setField("request_size", FieldType.DOUBLE, 45.5d);
- assertTrue(record.isValid());
- record.setField("request_size", FieldType.DOUBLE, 45.5);
- assertTrue(record.isValid());
- record.setField("request_size", FieldType.DOUBLE, 45L);
- assertFalse(record.isValid());
- record.setField("request_size", FieldType.FLOAT, 45.5f);
- assertTrue(record.isValid());
- record.setField("request_size", FieldType.STRING, 45L);
- assertFalse(record.isValid());
- record.setField("request_size", FieldType.FLOAT, 45.5d);
- assertFalse(record.isValid());
-
-The tools to handle processing : Processor
-------------------------------------------
-
-logisland is designed as a component centric framework, so there's a layer of abstraction to build configurable components.
-Basically a component can be Configurable and Configured.
-
-The most common component you'll use is the ``Processor``
-
-Let's explain the code of a basic ``MockProcessor``, that doesn't acheive a really useful work but which is really self-explanatory
-we first need to extend ``AbstractProcessor`` class (or to implement ``Processor`` interface).
-
-.. code-block:: java
-
- public class MockProcessor extends AbstractProcessor {
-
- private static Logger logger = LoggerFactory.getLogger(MockProcessor.class);
- private static String EVENT_TYPE_NAME = "mock";
-
-Then we have to define a list of supported ``PropertyDescriptor``. All theses properties and validation stuff are handled by
-``Configurable`` interface.
-
-.. code-block:: java
-
- public static final PropertyDescriptor FAKE_MESSAGE
- = new PropertyDescriptor.Builder()
- .name("fake.message")
- .description("a fake message")
- .required(true)
- .addValidator(StandardPropertyValidators.NON_EMPTY_VALIDATOR)
- .defaultValue("yoyo")
- .build();
-
- @Override
- public final List getSupportedPropertyDescriptors() {
- final List