diff --git a/.travis.yml b/.travis.yml index 08f4ab560..dfd7138dd 100644 --- a/.travis.yml +++ b/.travis.yml @@ -38,7 +38,7 @@ script: # Build # use travis_wait so it does not time_out after 10 minutes without output (unfortunately that seems to not work) # use -q so there's not too much output for travis (4Mb max) - - travis_wait mvn clean install -Pintegration-tests -q + - travis_wait 30 mvn clean install -Pintegration-tests -q # build assembly (there is currently missing jars in assembly when using mvn clean install...) - mvn clean package -DskipTests -q # Integrations tests diff --git a/logisland-assembly/src/assembly/shared-dependencies.xml b/logisland-assembly/src/assembly/shared-dependencies.xml index cb3f1322d..229a51ab8 100644 --- a/logisland-assembly/src/assembly/shared-dependencies.xml +++ b/logisland-assembly/src/assembly/shared-dependencies.xml @@ -26,8 +26,10 @@ + com.hurence.logisland:logisland-engine-spark_2_1 com.hurence.logisland:logisland-engine-spark_2_3 + com.hurence.logisland:logisland-engine-spark_2_4 com.hurence.logisland:logisland-engine-spark_1_6 com.hurence.logisland:logisland-engine-vanilla diff --git a/logisland-components/logisland-processors/logisland-processor-enrichment/pom.xml b/logisland-components/logisland-processors/logisland-processor-enrichment/pom.xml index b1c52772f..f06889d65 100644 --- a/logisland-components/logisland-processors/logisland-processor-enrichment/pom.xml +++ b/logisland-components/logisland-processors/logisland-processor-enrichment/pom.xml @@ -65,13 +65,13 @@ com.fasterxml.jackson.core jackson-databind - 2.9.3 + ${jackson.version} test com.maxmind.geoip2 geoip2 - 2.11.0 + 2.13.1 test diff --git a/logisland-components/logisland-services/logisland-service-elasticsearch/logisland-service-elasticsearch-api/src/main/java/com/hurence/logisland/service/elasticsearch/ElasticsearchClientService.java b/logisland-components/logisland-services/logisland-service-elasticsearch/logisland-service-elasticsearch-api/src/main/java/com/hurence/logisland/service/elasticsearch/ElasticsearchClientService.java index 9a52cc701..2a2c0fa18 100644 --- a/logisland-components/logisland-services/logisland-service-elasticsearch/logisland-service-elasticsearch-api/src/main/java/com/hurence/logisland/service/elasticsearch/ElasticsearchClientService.java +++ b/logisland-components/logisland-services/logisland-service-elasticsearch/logisland-service-elasticsearch-api/src/main/java/com/hurence/logisland/service/elasticsearch/ElasticsearchClientService.java @@ -15,7 +15,6 @@ */ package com.hurence.logisland.service.elasticsearch; - import com.hurence.logisland.annotation.documentation.CapabilityDescription; import com.hurence.logisland.annotation.documentation.Tags; import com.hurence.logisland.component.AllowableValue; @@ -31,7 +30,6 @@ import java.util.Map; import java.util.Optional; - @Tags({"elasticsearch", "client"}) @CapabilityDescription("A controller service for accessing an elasticsearch client.") public interface ElasticsearchClientService extends DatastoreClientService { @@ -154,6 +152,16 @@ public ValidationResult validate(final String subject, final String input) { .addValidator(StandardValidators.FILE_EXISTS_VALIDATOR) .build(); + PropertyDescriptor ENABLE_SSL = new PropertyDescriptor.Builder() + .name("enable.ssl") + .description("Whether to enable (true) TLS/SSL connections or not (false). This can for instance be used" + + " with opendistro. Defaults to false. Note that the current implementation does try to validate" + + " the server certificate.") + .required(false) + .addValidator(StandardValidators.BOOLEAN_VALIDATOR) + .defaultValue("false") + .build(); + PropertyDescriptor USERNAME = new PropertyDescriptor.Builder() .name("username") .description("Username to access the Elasticsearch cluster") diff --git a/logisland-components/logisland-services/logisland-service-elasticsearch/logisland-service-elasticsearch_6_6_2-client/src/integration-test/java/com/hurence/logisland/service/elasticsearch/ESRule.java b/logisland-components/logisland-services/logisland-service-elasticsearch/logisland-service-elasticsearch_6_6_2-client/src/integration-test/java/com/hurence/logisland/service/elasticsearch/ESRule.java index 59b604daf..f8792bf22 100644 --- a/logisland-components/logisland-services/logisland-service-elasticsearch/logisland-service-elasticsearch_6_6_2-client/src/integration-test/java/com/hurence/logisland/service/elasticsearch/ESRule.java +++ b/logisland-components/logisland-services/logisland-service-elasticsearch/logisland-service-elasticsearch_6_6_2-client/src/integration-test/java/com/hurence/logisland/service/elasticsearch/ESRule.java @@ -36,6 +36,7 @@ public class ESRule implements TestRule { * The internal-transport client that talks to the local node. */ private RestHighLevelClient client; + private ElasticsearchContainer container; /** * Return a closure which starts an embedded ES docker container, executes the unit-test, then shuts down the @@ -46,7 +47,7 @@ public Statement apply(Statement base, Description description) { return new Statement() { @Override public void evaluate() throws Throwable { - ElasticsearchContainer container = new ElasticsearchContainer("docker.elastic.co/elasticsearch/elasticsearch:6.6.2"); + container = new ElasticsearchContainer("docker.elastic.co/elasticsearch/elasticsearch:6.6.2"); container.start(); client = new RestHighLevelClient(RestClient.builder(HttpHost.create(container.getHttpHostAddress()))); @@ -60,6 +61,10 @@ public void evaluate() throws Throwable { }; } + public String getHostPortString() { + return container.getHttpHostAddress(); + } + /** * Return the object through which operations can be performed on the ES cluster. */ diff --git a/logisland-components/logisland-services/logisland-service-elasticsearch/logisland-service-elasticsearch_6_6_2-client/src/integration-test/java/com/hurence/logisland/service/elasticsearch/Elasticsearch_6_6_2_ClientServiceIT.java b/logisland-components/logisland-services/logisland-service-elasticsearch/logisland-service-elasticsearch_6_6_2-client/src/integration-test/java/com/hurence/logisland/service/elasticsearch/Elasticsearch_6_6_2_ClientServiceIT.java index f2104d133..6a772e0af 100644 --- a/logisland-components/logisland-services/logisland-service-elasticsearch/logisland-service-elasticsearch_6_6_2-client/src/integration-test/java/com/hurence/logisland/service/elasticsearch/Elasticsearch_6_6_2_ClientServiceIT.java +++ b/logisland-components/logisland-services/logisland-service-elasticsearch/logisland-service-elasticsearch_6_6_2-client/src/integration-test/java/com/hurence/logisland/service/elasticsearch/Elasticsearch_6_6_2_ClientServiceIT.java @@ -48,7 +48,8 @@ import java.io.IOException; import java.util.*; -import java.util.function.BiConsumer; + +import static com.hurence.logisland.service.elasticsearch.ElasticsearchClientService.HOSTS; public class Elasticsearch_6_6_2_ClientServiceIT { @@ -74,84 +75,13 @@ public void clean() throws IOException { } } - private class MockElasticsearchClientService extends Elasticsearch_6_6_2_ClientService { - - @Override - protected void createElasticsearchClient(ControllerServiceInitializationContext context) throws ProcessException { - if (esClient != null) { - return; - } - esClient = esRule.getClient(); - } - - @Override - protected void createBulkProcessor(ControllerServiceInitializationContext context) { - - if (bulkProcessor != null) { - return; - } - - // create the bulk processor - - BulkProcessor.Listener listener = - new BulkProcessor.Listener() { - @Override - public void beforeBulk(long l, BulkRequest bulkRequest) { - getLogger().debug("Going to execute bulk [id:{}] composed of {} actions", new Object[]{l, bulkRequest.numberOfActions()}); - } - - @Override - public void afterBulk(long l, BulkRequest bulkRequest, BulkResponse bulkResponse) { - getLogger().debug("Executed bulk [id:{}] composed of {} actions", new Object[]{l, bulkRequest.numberOfActions()}); - if (bulkResponse.hasFailures()) { - getLogger().warn("There was failures while executing bulk [id:{}]," + - " done bulk request in {} ms with failure = {}", - new Object[]{l, bulkResponse.getTook().getMillis(), bulkResponse.buildFailureMessage()}); - for (BulkItemResponse item : bulkResponse.getItems()) { - if (item.isFailed()) { - errors.put(item.getId(), item.getFailureMessage()); - } - } - } - } - - @Override - public void afterBulk(long l, BulkRequest bulkRequest, Throwable throwable) { - getLogger().error("something went wrong while bulk loading events to es : {}", new Object[]{throwable.getMessage()}); - } - - }; - - BiConsumer> bulkConsumer = - (request, bulkListener) -> esClient.bulkAsync(request, RequestOptions.DEFAULT, bulkListener); - bulkProcessor = BulkProcessor.builder(bulkConsumer, listener) - .setBulkActions(1000) - .setBulkSize(new ByteSizeValue(10, ByteSizeUnit.MB)) - .setFlushInterval(TimeValue.timeValueSeconds(1)) - .setConcurrentRequests(2) - //.setBackoffPolicy(getBackOffPolicy(context)) - .build(); - - } - - @Override - public List getSupportedPropertyDescriptors() { - - List props = new ArrayList<>(); - - return Collections.unmodifiableList(props); - } - - } - private ElasticsearchClientService configureElasticsearchClientService(final TestRunner runner) throws InitializationException { - final MockElasticsearchClientService elasticsearchClientService = new MockElasticsearchClientService(); + final Elasticsearch_6_6_2_ClientService elasticsearchClientService = new Elasticsearch_6_6_2_ClientService(); runner.addControllerService("elasticsearchClient", elasticsearchClientService); - - runner.enableControllerService(elasticsearchClientService); runner.setProperty(TestProcessor.ELASTICSEARCH_CLIENT_SERVICE, "elasticsearchClient"); - runner.assertValid(elasticsearchClientService); + runner.setProperty(elasticsearchClientService, HOSTS, esRule.getHostPortString()); + runner.enableControllerService(elasticsearchClientService); // TODO : is this necessary ? final ElasticsearchClientService service = PluginProxy.unwrap(runner.getProcessContext().getPropertyValue(TestProcessor.ELASTICSEARCH_CLIENT_SERVICE).asControllerService()); diff --git a/logisland-components/logisland-services/logisland-service-elasticsearch/logisland-service-elasticsearch_7_x-client/pom.xml b/logisland-components/logisland-services/logisland-service-elasticsearch/logisland-service-elasticsearch_7_x-client/pom.xml index baada52f7..4598a76d3 100644 --- a/logisland-components/logisland-services/logisland-service-elasticsearch/logisland-service-elasticsearch_7_x-client/pom.xml +++ b/logisland-components/logisland-services/logisland-service-elasticsearch/logisland-service-elasticsearch_7_x-client/pom.xml @@ -17,6 +17,7 @@ 7.1.1 + 1.12.5 @@ -70,14 +71,14 @@ org.testcontainers testcontainers - 1.10.7 + ${testcontainers.version} test org.testcontainers elasticsearch - 1.10.7 + ${testcontainers.version} test diff --git a/logisland-components/logisland-services/logisland-service-elasticsearch/logisland-service-elasticsearch_7_x-client/src/integration-test/java/com/hurence/logisland/service/elasticsearch/ESOpenDistroRule.java b/logisland-components/logisland-services/logisland-service-elasticsearch/logisland-service-elasticsearch_7_x-client/src/integration-test/java/com/hurence/logisland/service/elasticsearch/ESOpenDistroRule.java new file mode 100644 index 000000000..70190c817 --- /dev/null +++ b/logisland-components/logisland-services/logisland-service-elasticsearch/logisland-service-elasticsearch_7_x-client/src/integration-test/java/com/hurence/logisland/service/elasticsearch/ESOpenDistroRule.java @@ -0,0 +1,150 @@ +/** + * Copyright (C) 2020 Hurence (support@hurence.com) + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.hurence.logisland.service.elasticsearch; + +import org.apache.http.HttpHost; +import org.apache.http.auth.AuthScope; +import org.apache.http.auth.UsernamePasswordCredentials; +import org.apache.http.client.CredentialsProvider; +import org.apache.http.impl.client.BasicCredentialsProvider; +import org.apache.http.impl.nio.client.HttpAsyncClientBuilder; +import org.elasticsearch.client.RestClient; +import org.elasticsearch.client.RestClientBuilder; +import org.elasticsearch.client.RestHighLevelClient; +import org.junit.rules.TestRule; +import org.junit.runner.Description; +import org.junit.runners.model.Statement; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import javax.net.ssl.SSLContext; +import javax.net.ssl.TrustManager; +import javax.net.ssl.X509TrustManager; +import java.security.cert.X509Certificate; + +/** +* A JUnit rule which starts an embedded opendsitro elasticsearch docker container to test security features +*/ +public class ESOpenDistroRule implements TestRule { + + /** + * The internal-transport client that talks to the local node. + */ + private RestHighLevelClient client; + private ElasticsearchOpenDistroContainer container; + private String opendistroUsername; + private String opendistroPassword; + + private static Logger logger = LoggerFactory.getLogger(ESOpenDistroRule.class); + + public ESOpenDistroRule(String opendistroUsername, String opendistroPassword) { + this.opendistroUsername = opendistroUsername; + this.opendistroPassword = opendistroPassword; + } + + /** + * Return a closure which starts an embedded ES OpenDistro docker container, executes the unit-test, then shuts down the + * ES instance. + */ + @Override + public Statement apply(Statement base, Description description) { + return new Statement() { + @Override + public void evaluate() throws Throwable { + container = new ElasticsearchOpenDistroContainer("amazon/opendistro-for-elasticsearch:1.4.0", + opendistroUsername, opendistroPassword); + container.start(); + + // TODO: if testcontainers support no SSL server validation one can use the wait strategy + // in ElasticsearchOpenDistroContainer instead. See inside ElasticsearchOpenDistroContainer. + long wait = 10000L; + logger.info("Waiting for ES open distro container to start for " + wait/1000 + " seconds"); + Thread.sleep(wait); + + /** + * Inspired from https://github.com/opendistro-for-elasticsearch/community/issues/64 + */ + + RestClientBuilder builder = RestClient.builder( + new HttpHost(container.getHostAddress(), container.getPort(), "https")) + .setHttpClientConfigCallback(new RestClientBuilder.HttpClientConfigCallback() { + @Override + public HttpAsyncClientBuilder customizeHttpClient(HttpAsyncClientBuilder httpClientBuilder) { + + // Set user/password basic auth credentials + final CredentialsProvider credentialsProvider = new BasicCredentialsProvider(); + credentialsProvider.setCredentials(AuthScope.ANY, + new UsernamePasswordCredentials(opendistroUsername, opendistroPassword)); + httpClientBuilder.setDefaultCredentialsProvider(credentialsProvider); + + // Set SSL trust manager and context + // Create and use a trust manager accepting all server certificates + TrustManager[] acceptAllTrustManager = new TrustManager[] { new X509TrustManager() { + public java.security.cert.X509Certificate[] getAcceptedIssuers() { + return null; + } + public void checkClientTrusted(X509Certificate[] certs, String authType) { + } + + public void checkServerTrusted(X509Certificate[] certs, String authType) { + } + } }; + + SSLContext sslContext = null; + try { + sslContext = SSLContext.getInstance("SSL"); + sslContext.init(null, acceptAllTrustManager, new java.security.SecureRandom()); + } catch (Exception e) { + e.printStackTrace(); + throw new RuntimeException(e); + } + httpClientBuilder.setSSLContext(sslContext); + + return httpClientBuilder; + } + }); + client = new RestHighLevelClient(builder); + + try { + base.evaluate(); // execute the unit test + } finally { + client.close(); + container.stop(); + } + } + }; + } + + public String getHostPortString() { + return container.getHostPortString(); + } + + public String getHostAddress() { + return container.getHostAddress(); + } + + public int getPort() { + return container.getPort(); + } + + /** + * Return the object through which operations can be performed on the ES cluster. + */ + public RestHighLevelClient getClient() { + return client; + } + +} \ No newline at end of file diff --git a/logisland-components/logisland-services/logisland-service-elasticsearch/logisland-service-elasticsearch_7_x-client/src/integration-test/java/com/hurence/logisland/service/elasticsearch/ESRule.java b/logisland-components/logisland-services/logisland-service-elasticsearch/logisland-service-elasticsearch_7_x-client/src/integration-test/java/com/hurence/logisland/service/elasticsearch/ESRule.java index 28a023c2c..ae1de5241 100644 --- a/logisland-components/logisland-services/logisland-service-elasticsearch/logisland-service-elasticsearch_7_x-client/src/integration-test/java/com/hurence/logisland/service/elasticsearch/ESRule.java +++ b/logisland-components/logisland-services/logisland-service-elasticsearch/logisland-service-elasticsearch_7_x-client/src/integration-test/java/com/hurence/logisland/service/elasticsearch/ESRule.java @@ -36,6 +36,7 @@ public class ESRule implements TestRule { * The internal-transport client that talks to the local node. */ private RestHighLevelClient client; + private ElasticsearchContainer container; /** * Return a closure which starts an embedded ES docker container, executes the unit-test, then shuts down the @@ -46,7 +47,7 @@ public Statement apply(Statement base, Description description) { return new Statement() { @Override public void evaluate() throws Throwable { - ElasticsearchContainer container = new ElasticsearchContainer("docker.elastic.co/elasticsearch/elasticsearch:7.1.1"); + container = new ElasticsearchContainer("docker.elastic.co/elasticsearch/elasticsearch:7.1.1"); container.start(); client = new RestHighLevelClient(RestClient.builder(HttpHost.create(container.getHttpHostAddress()))); @@ -60,6 +61,10 @@ public void evaluate() throws Throwable { }; } + public String getHostPortString() { + return container.getHttpHostAddress(); + } + /** * Return the object through which operations can be performed on the ES cluster. */ diff --git a/logisland-components/logisland-services/logisland-service-elasticsearch/logisland-service-elasticsearch_7_x-client/src/integration-test/java/com/hurence/logisland/service/elasticsearch/ElasticsearchOpenDistroContainer.java b/logisland-components/logisland-services/logisland-service-elasticsearch/logisland-service-elasticsearch_7_x-client/src/integration-test/java/com/hurence/logisland/service/elasticsearch/ElasticsearchOpenDistroContainer.java new file mode 100644 index 000000000..87fc2d410 --- /dev/null +++ b/logisland-components/logisland-services/logisland-service-elasticsearch/logisland-service-elasticsearch_7_x-client/src/integration-test/java/com/hurence/logisland/service/elasticsearch/ElasticsearchOpenDistroContainer.java @@ -0,0 +1,112 @@ +/** + * Copyright (C) 2020 Hurence (support@hurence.com) + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.hurence.logisland.service.elasticsearch; + +import org.testcontainers.containers.GenericContainer; +import org.testcontainers.containers.wait.strategy.HttpWaitStrategy; +import org.testcontainers.utility.Base58; + +import java.net.InetSocketAddress; +import java.time.Duration; + +import static java.net.HttpURLConnection.HTTP_OK; + +/** + * Represents an elasticsearch docker instance which exposes by default port 9200 and 9300 (transport.tcp.port) + * The docker image is by default fetched from docker.elastic.co/elasticsearch/elasticsearch + */ +public class ElasticsearchOpenDistroContainer extends GenericContainer { + + /** + * Elasticsearch Default HTTP port + */ + private static final int ELASTICSEARCH_OPENDISTRO_DEFAULT_PORT = 9200; + + /** + * Elasticsearch Default Transport port + */ + private static final int ELASTICSEARCH_OPENDISTRO_DEFAULT_TCP_PORT = 9300; + + /** + * Elasticsearch Docker base URL + */ + private static final String ELASTICSEARCH_OPENDISTRO_DEFAULT_IMAGE = "amazon/opendistro-for-elasticsearch"; + + /** + * Elasticsearch Default version + */ + protected static final String ELASTICSEARCH_OPENDISTRO_DEFAULT_VERSION = "1.4.0"; + + public ElasticsearchOpenDistroContainer() { + this(ELASTICSEARCH_OPENDISTRO_DEFAULT_IMAGE + ":" + ELASTICSEARCH_OPENDISTRO_DEFAULT_VERSION, null, null); + } + + /** + * Create an OpenDistro Elasticsearch Container by passing the full docker image name + * @param dockerImageName Full docker image name, like: docker.elastic.co/elasticsearch/elasticsearch:6.4.1 + */ + public ElasticsearchOpenDistroContainer(String dockerImageName, String user, String password) { + super(dockerImageName); + + logger().info("Starting an opendistro elasticsearch container using [{}]", dockerImageName); + withNetworkAliases("elasticsearch-opendistro-" + Base58.randomString(6)); + withEnv("discovery.type", "single-node"); + // With enforce_hostname_verification enabled, the Security plugin verifies that the hostname of the + // communication partner matches the hostname in the certificate +// withEnv("opendistro_security.ssl.transport.enforce_hostname_verification", "false"); + // Do the clients (typically the browser or the proxy) have to authenticate themselves to the http server, + // default is OPTIONAL. To enforce authentication use REQUIRE, to completely disable client certificates use + // NONE. + withEnv("opendistro_security.ssl.http.clientauth_mode", "NONE"); +// withEnv("opendistro_security.ssl.http.enabled", "false"); // Disable https +// withEnv("opendistro_security.disabled", "true"); // Completely disable security (https; authentication...) + addExposedPorts(ELASTICSEARCH_OPENDISTRO_DEFAULT_PORT, ELASTICSEARCH_OPENDISTRO_DEFAULT_TCP_PORT); + HttpWaitStrategy httpWaitStrategy = new HttpWaitStrategy() + .forPort(ELASTICSEARCH_OPENDISTRO_DEFAULT_PORT) + .forStatusCodeMatching(response -> response == HTTP_OK) + .usingTls(); + + // Ideally we would like to be able to setup the user with the passed one. For the moment we only support the + // out of the box admin/admin user + if ( (user != null) && (password != null) ) { + httpWaitStrategy.withBasicCredentials(user, password); + } + // TODO: if we use the wait strategy then this fails as it not only connects with SSL but it + // also tries to validate the server SSL certificate. We do not want that and there is currently no option to + // remove that offered by the testcontainers API. We could may be use system properties but this would impact + // the whole VM in which the IT test runs. We prefer for the moment just not use the wait strategy and replace + // it with a dummy sleep in the caller ESOpenDistroRule to let the docker container initialize. That is why it + // is commented here after. +// setWaitStrategy(httpWaitStrategy.withStartupTimeout(Duration.ofMinutes(2))); +// setWaitStrategy(httpWaitStrategy.withStartupTimeout(Duration.ofSeconds(10))); + } + + public String getHostPortString() { + return getContainerIpAddress() + ":" + getMappedPort(ELASTICSEARCH_OPENDISTRO_DEFAULT_PORT); + } + + public String getHostAddress() { + return getContainerIpAddress(); + } + + public int getPort() { + return getMappedPort(ELASTICSEARCH_OPENDISTRO_DEFAULT_PORT); + } + + public InetSocketAddress getTcpHost() { + return new InetSocketAddress(getContainerIpAddress(), getMappedPort(ELASTICSEARCH_OPENDISTRO_DEFAULT_TCP_PORT)); + } +} diff --git a/logisland-components/logisland-services/logisland-service-elasticsearch/logisland-service-elasticsearch_7_x-client/src/integration-test/java/com/hurence/logisland/service/elasticsearch/ElasticsearchOpenDistro_7_x_ClientServiceIT.java b/logisland-components/logisland-services/logisland-service-elasticsearch/logisland-service-elasticsearch_7_x-client/src/integration-test/java/com/hurence/logisland/service/elasticsearch/ElasticsearchOpenDistro_7_x_ClientServiceIT.java new file mode 100644 index 000000000..536067766 --- /dev/null +++ b/logisland-components/logisland-services/logisland-service-elasticsearch/logisland-service-elasticsearch_7_x-client/src/integration-test/java/com/hurence/logisland/service/elasticsearch/ElasticsearchOpenDistro_7_x_ClientServiceIT.java @@ -0,0 +1,523 @@ +/** + * Copyright (C) 2020 Hurence (support@hurence.com) + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.hurence.logisland.service.elasticsearch; + +import com.hurence.logisland.classloading.PluginProxy; +import com.hurence.logisland.component.InitializationException; +import com.hurence.logisland.record.FieldType; +import com.hurence.logisland.record.Record; +import com.hurence.logisland.record.StandardRecord; +import com.hurence.logisland.service.datastore.InvalidMultiGetQueryRecordException; +import com.hurence.logisland.service.datastore.MultiGetQueryRecord; +import com.hurence.logisland.service.datastore.MultiGetResponseRecord; +import com.hurence.logisland.util.runner.TestRunner; +import com.hurence.logisland.util.runner.TestRunners; +import org.elasticsearch.ElasticsearchStatusException; +import org.elasticsearch.action.admin.indices.delete.DeleteIndexRequest; +import org.elasticsearch.client.RequestOptions; +import org.elasticsearch.client.indices.GetIndexRequest; +import org.elasticsearch.client.indices.GetIndexResponse; +import org.elasticsearch.common.unit.TimeValue; +import org.junit.After; +import org.junit.Assert; +import org.junit.ClassRule; +import org.junit.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.util.*; + +import static com.hurence.logisland.service.elasticsearch.ElasticsearchClientService.*; + +/** + * The current implementation uses HTTPS with no server certificate validation (like the ES service does) as well as + * user/password http basic auth, which is currently only admin/admin as it is by default configured in the opendistro + * ES docker image we currently use. + */ +public class ElasticsearchOpenDistro_7_x_ClientServiceIT { + + private static final String MAPPING1 = "{'properties':{'name':{'type': 'text'},'val':{'type':'integer'}}}"; + private static final String MAPPING2 = "{'properties':{'name':{'type': 'text'},'val':{'type': 'text'}}}"; + private static final String MAPPING3 = + "{'dynamic':'strict','properties':{'name':{'type': 'text'},'xyz':{'type': 'text'}}}"; + + private static Logger logger = LoggerFactory.getLogger(ElasticsearchOpenDistro_7_x_ClientServiceIT.class); + + // For the moment, the ES opendistro container does not support configuring and using another user/password than + // admin/admin. To be allowed to changed that, the ElasticsearchOpenDistroContainer constructor must find a way + // to configure a new user/password starting the opendistro container. + public static final String OPENDISTRO_USERNAME = "admin"; + public static final String OPENDISTRO_PASSWORD = "admin"; + + @ClassRule + public static final ESOpenDistroRule esOpenDistroRule = new ESOpenDistroRule(OPENDISTRO_USERNAME, OPENDISTRO_PASSWORD); + + @After + public void clean() throws IOException { +// ClusterHealthRequest is returning nothing... So we are using IndexRequest here + GetIndexRequest request = new GetIndexRequest("*"); + GetIndexResponse response; + try { + response = esOpenDistroRule.getClient().indices().get(request, RequestOptions.DEFAULT); + } catch (ElasticsearchStatusException ex) { + return;//should be index not found + } + String[] indices = response.getIndices(); + List indicesToClean = new ArrayList(); + // Do not remove .opendistro_security mandatory index + Arrays.stream(indices).forEach(index -> { + if (!index.equals(".opendistro_security")) { + indicesToClean.add(index); + } + }); + if (indicesToClean.size() > 0) { + logger.info("Cleaning indices:" + indicesToClean); + DeleteIndexRequest deleteRequest = new DeleteIndexRequest(indicesToClean.toArray(new String[0])); + Assert.assertTrue(esOpenDistroRule.getClient().indices().delete(deleteRequest, RequestOptions.DEFAULT).isAcknowledged()); + } else { + logger.info("No index to clean"); + } + } + + private ElasticsearchClientService configureElasticsearchOpenDistroClientService(final TestRunner runner) throws InitializationException { + final Elasticsearch_7_x_ClientService elasticsearchClientService = new Elasticsearch_7_x_ClientService(); + + runner.addControllerService("elasticsearchClient", elasticsearchClientService); + runner.setProperty(TestProcessor.ELASTICSEARCH_CLIENT_SERVICE, "elasticsearchClient"); + runner.setProperty(elasticsearchClientService, HOSTS, esOpenDistroRule.getHostPortString()); + runner.setProperty(elasticsearchClientService, USERNAME, OPENDISTRO_USERNAME); + runner.setProperty(elasticsearchClientService, PASSWORD, OPENDISTRO_PASSWORD); + runner.setProperty(elasticsearchClientService, ENABLE_SSL, "true"); + runner.enableControllerService(elasticsearchClientService); + + // TODO : is this necessary ? + final ElasticsearchClientService service = PluginProxy.unwrap(runner.getProcessContext().getPropertyValue(TestProcessor.ELASTICSEARCH_CLIENT_SERVICE).asControllerService()); + return service; + } + + @Test + public void testBasics() throws Exception { + + Map document1 = new HashMap<>(); + document1.put("name", "fred"); + document1.put("val", 33); + + boolean result; + + final TestRunner runner = TestRunners.newTestRunner(new TestProcessor()); + + final ElasticsearchClientService elasticsearchClientService = configureElasticsearchOpenDistroClientService(runner); + + + // Verify the index does not exist + Assert.assertEquals(false, elasticsearchClientService.existsCollection("foo")); + + // Define the index + elasticsearchClientService.createCollection("foo", 2, 1); + Assert.assertEquals(true, elasticsearchClientService.existsCollection("foo")); + + // Define another index + elasticsearchClientService.createCollection("bar", 2, 1); + Assert.assertEquals(true, elasticsearchClientService.existsCollection("foo")); + + // Add a mapping to foo + result = elasticsearchClientService.putMapping("foo", null, MAPPING1.replace('\'', '"')); + Assert.assertEquals(true, result); + + // Add the same mapping again + result = elasticsearchClientService.putMapping("foo", null, MAPPING1.replace('\'', '"')); + Assert.assertEquals(true, result); + + // create alias + elasticsearchClientService.createAlias("foo", "aliasFoo"); + Assert.assertEquals(true, elasticsearchClientService.existsCollection("aliasFoo")); + + // Insert a record into foo and count foo + Assert.assertEquals(0, elasticsearchClientService.countCollection("foo")); + elasticsearchClientService.saveSync("foo", null, document1); + Assert.assertEquals(1, elasticsearchClientService.countCollection("foo")); + + // copy index foo to bar - should work + Assert.assertEquals(0, elasticsearchClientService.countCollection("bar")); + elasticsearchClientService.copyCollection(TimeValue.timeValueMinutes(2).toString(), "foo", "bar"); + elasticsearchClientService.bulkFlush(); + Thread.sleep(2000); + elasticsearchClientService.refreshCollection("bar"); + Assert.assertEquals(1, elasticsearchClientService.countCollection("bar")); + + // Define incompatible mappings in two different indexes, then try to copy - should fail + // as a document registered in index foo cannot be written in index baz. + // + // Note: MAPPING2 cannot be added to index foo or bar at all, even under a different doctype, as ES (lucene) + // does not allow two types for the same field-name in different mappings of the same index. However if + // MAPPING2 is added to index baz, then the copyCollection succeeds - because by default ES automatically converts + // integers into strings when necessary. Interestingly, this means MAPPING1 and MAPPING2 are not compatible + // at the "put mapping" level, but are compatible at the "reindex" level.. + // + // The document (doc1) of type "type1" already in index "foo" cannot be inserted into index "baz" as type1 + // because that means applying its source to MAPPING3 - but MAPPING3 is strict and does not define property + // "val", so the insert fails. + elasticsearchClientService.createCollection("baz",2, 1); + elasticsearchClientService.putMapping("baz", null, MAPPING2.replace('\'', '"')); + +// try { +// elasticsearchClientService.copyCollection(TimeValue.timeValueMinutes(2).toString(), "foo", "baz"); +// Assert.fail("Exception not thrown when expected"); +// } catch(DatastoreClientServiceException e) { +// Assert.assertTrue(e.getMessage().contains("Reindex failed")); +// } + elasticsearchClientService.refreshCollection("baz"); + Assert.assertEquals(0, elasticsearchClientService.countCollection("baz")); + + // Drop index foo + elasticsearchClientService.dropCollection("foo"); + Assert.assertEquals(false, elasticsearchClientService.existsCollection("foo")); + Assert.assertEquals(false, elasticsearchClientService.existsCollection("aliasFoo")); // alias for foo disappears too + Assert.assertEquals(true, elasticsearchClientService.existsCollection("bar")); + } + + @Test + public void testBulkPut() throws InitializationException, IOException, InterruptedException { + final String index = "foo"; + final String docId = "id1"; + final String nameKey = "name"; + final String nameValue = "fred"; + final String ageKey = "age"; + final int ageValue = 33; + + Map document1 = new HashMap<>(); + document1.put(nameKey, nameValue); + document1.put(ageKey, ageValue); + + final TestRunner runner = TestRunners.newTestRunner(new TestProcessor()); + + // create the controller service and link it to the test processor : + final ElasticsearchClientService elasticsearchClientService = configureElasticsearchOpenDistroClientService(runner); + + // Verify the index does not exist + Assert.assertEquals(false, elasticsearchClientService.existsCollection(index)); + + // Create the index + elasticsearchClientService.createCollection(index,2, 1); + Assert.assertEquals(true, elasticsearchClientService.existsCollection(index)); + + // Put a document in the bulk processor : + elasticsearchClientService.bulkPut(index, null, document1, Optional.of(docId)); + // Flush the bulk processor : + elasticsearchClientService.bulkFlush(); + Thread.sleep(2000); + try { + // Refresh the index : + elasticsearchClientService.refreshCollection(index); + } catch (Exception e) { + logger.error("Error while refreshing the index : " + e.toString()); + } + + long documentsNumber = 0; + + try { + documentsNumber = elasticsearchClientService.countCollection(index); + } catch (Exception e) { + logger.error("Error while counting the number of documents in the index : " + e.toString()); + } + + Assert.assertEquals(1, documentsNumber); + + try { + elasticsearchClientService.saveSync(index, null, document1); + } catch (Exception e) { + logger.error("Error while saving the document in the index : " + e.toString()); + } + + try { + documentsNumber = elasticsearchClientService.countCollection(index); + } catch (Exception e) { + logger.error("Error while counting the number of documents in the index : " + e.toString()); + } + + Assert.assertEquals(2, documentsNumber); + + long numberOfHits = elasticsearchClientService.searchNumberOfHits(index, null, nameKey, nameValue); + + Assert.assertEquals(2, numberOfHits); + + } + + + @Test + public void testBulkPutGeopoint() throws InitializationException, InterruptedException { + final String index = "future_factory"; + final String docId = "modane_factory"; + Record record = new StandardRecord("factory") + .setId(docId) + .setStringField("address", "rue du Frejus") + .setField("latitude", FieldType.FLOAT, 45.4f) + .setField("longitude", FieldType.FLOAT, 45.4f); + + final TestRunner runner = TestRunners.newTestRunner(new TestProcessor()); + + // create the controller service and link it to the test processor : + final ElasticsearchClientService elasticsearchClientService = configureElasticsearchOpenDistroClientService(runner); + + // Verify the index does not exist + Assert.assertEquals(false, elasticsearchClientService.existsCollection(index)); + + // Create the index + elasticsearchClientService.createCollection(index, 2, 1); + Assert.assertEquals(true, elasticsearchClientService.existsCollection(index)); + + // Put a document in the bulk processor : + String document1 = ElasticsearchRecordConverter.convertToString(record); + elasticsearchClientService.bulkPut(index, null, document1, Optional.of(docId)); + // Flush the bulk processor : + elasticsearchClientService.bulkFlush(); + Thread.sleep(2000); + try { + // Refresh the index : + elasticsearchClientService.refreshCollection(index); + } catch (Exception e) { + logger.error("Error while refreshing the index : " + e.toString()); + } + + long documentsNumber = 0; + + try { + documentsNumber = elasticsearchClientService.countCollection(index); + } catch (Exception e) { + logger.error("Error while counting the number of documents in the index : " + e.toString()); + } + + Assert.assertEquals(1, documentsNumber); + + List multiGetQueryRecords = new ArrayList<>(); + ArrayList documentIds = new ArrayList<>(); + List multiGetResponseRecords = new ArrayList<>(); + + + // Make sure a dummy query returns no result : + documentIds.add(docId); + try { + multiGetQueryRecords.add(new MultiGetQueryRecord(index, null, new String[]{"location", "id"}, new String[]{}, documentIds)); + } catch (InvalidMultiGetQueryRecordException e) { + e.printStackTrace(); + } + multiGetResponseRecords = elasticsearchClientService.multiGet(multiGetQueryRecords); + Assert.assertEquals(1, multiGetResponseRecords.size()); // number of documents retrieved + + } + + + @Test + public void testMultiGet() throws InitializationException, InterruptedException, InvalidMultiGetQueryRecordException { + final String index1 = "index1"; + final String index2 = "index2"; + + Map document1 = new HashMap<>(); + final String docId1 = "id1"; + document1.put("field_beg_1", "field_beg_1_document1_value"); + document1.put("field_beg_2", "field_beg_2_document1_value"); + document1.put("field_beg_3", "field_beg_3_document1_value"); + document1.put("field_fin_1", "field_fin_1_document1_value"); + document1.put("field_fin_2", "field_fin_2_document1_value"); + + Map document2 = new HashMap<>(); + final String docId2 = "id2"; + document2.put("field_beg_1", "field_beg_1_document2_value"); + document2.put("field_beg_2", "field_beg_2_document2_value"); + document2.put("field_beg_3", "field_beg_3_document2_value"); + document2.put("field_fin_1", "field_fin_1_document2_value"); + document2.put("field_fin_2", "field_fin_2_document2_value"); + + Map document3 = new HashMap<>(); + final String docId3 = "id3"; + document3.put("field_beg_1", "field_beg_1_document3_value"); + document3.put("field_beg_2", "field_beg_2_document3_value"); + // this 3rd field is intentionally removed : + // document3.put("field_beg_3", "field_beg_3_document3_value"); + document3.put("field_fin_1", "field_fin_1_document3_value"); + document3.put("field_fin_2", "field_fin_2_document3_value"); + + final TestRunner runner = TestRunners.newTestRunner(new TestProcessor()); + + // create the controller service and link it to the test processor : + final ElasticsearchClientService elasticsearchClientService = configureElasticsearchOpenDistroClientService(runner); + + // Verify the indexes do not exist + Assert.assertEquals(false, elasticsearchClientService.existsCollection(index1)); + Assert.assertEquals(false, elasticsearchClientService.existsCollection(index2)); + + // Create the indexes + elasticsearchClientService.createCollection(index1, 2, 1); + elasticsearchClientService.createCollection(index2, 2, 1); + Assert.assertEquals(true, elasticsearchClientService.existsCollection(index1)); + Assert.assertEquals(true, elasticsearchClientService.existsCollection(index2)); + + // Put documents in the bulk processor : + elasticsearchClientService.bulkPut(index1, null, document1, Optional.of(docId1)); + elasticsearchClientService.bulkPut(index1, null, document2, Optional.of(docId2)); + elasticsearchClientService.bulkPut(index1, null, document3, Optional.of(docId3)); + elasticsearchClientService.bulkPut(index2, null, document1, Optional.of(docId1)); + elasticsearchClientService.bulkPut(index2, null, document2, Optional.of(docId2)); + elasticsearchClientService.bulkPut(index2, null, document3, Optional.of(docId3)); + // Flush the bulk processor : + elasticsearchClientService.bulkFlush(); + Thread.sleep(2000); + try { + // Refresh the indexes : + elasticsearchClientService.refreshCollection(index1); + elasticsearchClientService.refreshCollection(index2); + } catch (Exception e) { + logger.error("Error while refreshing the indexes : " + e.toString()); + } + + long countIndex1 = 0; + long countIndex2 = 0; + try { + countIndex1 = elasticsearchClientService.countCollection(index1); + countIndex2 = elasticsearchClientService.countCollection(index2); + } catch (Exception e) { + logger.error("Error while counting the number of documents in the index : " + e.toString()); + } + Assert.assertEquals(3, countIndex1); + Assert.assertEquals(3, countIndex2); + + List multiGetQueryRecords = new ArrayList<>(); + ArrayList documentIds = new ArrayList<>(); + ArrayList documentIds_2 = new ArrayList<>(); + List multiGetResponseRecords; + String[] fieldsToInclude = {"field_b*", "field*1"}; + String[] fieldsToExclude = {"field_*2"}; + + // Make sure a dummy query returns no result : + documentIds.add(docId1); + multiGetQueryRecords.add(new MultiGetQueryRecord("dummy", "", new String[]{"dummy"}, new String[]{}, documentIds)); + multiGetResponseRecords = elasticsearchClientService.multiGet(multiGetQueryRecords); + Assert.assertEquals(0, multiGetResponseRecords.size()); // number of documents retrieved + + multiGetQueryRecords.clear(); + documentIds.clear(); + multiGetResponseRecords.clear(); + + // Test : 1 MultiGetQueryRecord record, with 1 index, 1 type, 1 id, WITHOUT includes, WITHOUT excludes : + documentIds.add(docId1); + multiGetQueryRecords.add(new MultiGetQueryRecord(index1, null, documentIds)); + multiGetResponseRecords = elasticsearchClientService.multiGet(multiGetQueryRecords); + + Assert.assertEquals(1, multiGetResponseRecords.size()); // number of documents retrieved + Assert.assertEquals(index1, multiGetResponseRecords.get(0).getCollectionName()); + Assert.assertEquals("_doc", multiGetResponseRecords.get(0).getTypeName()); + Assert.assertEquals(docId1, multiGetResponseRecords.get(0).getDocumentId()); + Assert.assertEquals(5, multiGetResponseRecords.get(0).getRetrievedFields().size()); // number of fields retrieved for the document + multiGetResponseRecords.get(0).getRetrievedFields().forEach((k, v) -> document1.get(k).equals(v.toString())); + + multiGetQueryRecords.clear(); + documentIds.clear(); + multiGetResponseRecords.clear(); + + // Test : 1 MultiGetQueryRecord record, with 1 index, 0 type, 3 ids, WITH include, WITH exclude : + documentIds.add(docId1); + documentIds.add(docId2); + documentIds.add(docId3); + multiGetQueryRecords.add(new MultiGetQueryRecord(index1, null, fieldsToInclude, fieldsToExclude, documentIds)); + multiGetResponseRecords = elasticsearchClientService.multiGet(multiGetQueryRecords); + + Assert.assertEquals(3, multiGetResponseRecords.size()); // verify that 3 documents has been retrieved + multiGetResponseRecords.forEach(responseRecord -> Assert.assertEquals(index1, responseRecord.getCollectionName())); // verify that all retrieved are in index1 + multiGetResponseRecords.forEach(responseRecord -> Assert.assertEquals("_doc", responseRecord.getTypeName())); // verify that the type of all retrieved documents is type1 + multiGetResponseRecords.forEach(responseRecord -> { + if (responseRecord.getDocumentId() == docId1) { + Assert.assertEquals(3, responseRecord.getRetrievedFields().size()); // for document1, verify that 3 fields has been retrieved + // verify that the 3 retrieved fields are the correct ones : + Assert.assertEquals(true, responseRecord.getRetrievedFields().containsKey("field_beg_1")); + Assert.assertEquals(true, responseRecord.getRetrievedFields().containsKey("field_beg_3")); + Assert.assertEquals(true, responseRecord.getRetrievedFields().containsKey("field_fin_1")); + // verify that the values of the 3 retrieved fields are the correct ones : + Assert.assertEquals("field_beg_1_document1_value", responseRecord.getRetrievedFields().get("field_beg_1").toString()); + Assert.assertEquals("field_beg_3_document1_value", responseRecord.getRetrievedFields().get("field_beg_3").toString()); + Assert.assertEquals("field_fin_1_document1_value", responseRecord.getRetrievedFields().get("field_fin_1").toString()); + } + if (responseRecord.getDocumentId() == docId2) + Assert.assertEquals(3, responseRecord.getRetrievedFields().size()); // for document2, verify that 3 fields has been retrieved + if (responseRecord.getDocumentId() == docId3) + Assert.assertEquals(2, responseRecord.getRetrievedFields().size()); // for document3, verify that 2 fields has been retrieved + }); + + multiGetQueryRecords.clear(); + documentIds.clear(); + multiGetResponseRecords.clear(); + + // Test : 2 MultiGetQueryRecord records : + // - 1st : 1 index (index1), 1 type, 2 ids, WITH include, WITH exclude --> expecting : 2 docs retrieved (from index1), 3 fields each (except doc3 : 2 fields) + // - 2nd : 1 index (index2), 0 type, 3 ids, WITH include, WITHOUT exclude --> expecting : 3 docs retrieved (from index2), 4 fields each (except doc3 : 3 fields) + documentIds.add(docId1); + documentIds.add(docId2); + multiGetQueryRecords.add(new MultiGetQueryRecord(index1, null, fieldsToInclude, fieldsToExclude, documentIds)); + documentIds_2.add(docId1); + documentIds_2.add(docId1); + documentIds_2.add(docId1); + multiGetQueryRecords.add(new MultiGetQueryRecord(index2, null, fieldsToInclude, null, documentIds_2)); + multiGetResponseRecords = elasticsearchClientService.multiGet(multiGetQueryRecords); + + Assert.assertEquals(5, multiGetResponseRecords.size()); // verify that 5 documents has been retrieved + multiGetResponseRecords.forEach(responseRecord -> { + if (responseRecord.getCollectionName() == index1 && !responseRecord.getDocumentId().equals(docId3)) + Assert.assertEquals(3, responseRecord.getRetrievedFields().size()); // for documents from index1 (except doc3), verify that 3 fields has been retrieved + if (responseRecord.getCollectionName() == index1 && responseRecord.getDocumentId().equals(docId3)) + Assert.assertEquals(2, responseRecord.getRetrievedFields().size()); // for document3 from index1, verify that 2 fields has been retrieved + if (responseRecord.getDocumentId() == index2 && !responseRecord.getDocumentId().equals(docId3)) + Assert.assertEquals(4, responseRecord.getRetrievedFields().size()); // for documents from index2 (except doc3), verify that 4 fields has been retrieved + if (responseRecord.getDocumentId() == index2 && responseRecord.getDocumentId().equals(docId3)) + Assert.assertEquals(3, responseRecord.getRetrievedFields().size()); // for document3 from index2, verify that 3 fields has been retrieved + }); + + } + + @Test + public void testMultiGetInvalidRecords() { + + List multiGetQueryRecords = new ArrayList<>(); + + String errorMessage = ""; + + // Validate null index behaviour : + try { + multiGetQueryRecords.add(new MultiGetQueryRecord(null, null, null, null, null)); + } catch (InvalidMultiGetQueryRecordException e) { + errorMessage = e.getMessage(); + } + Assert.assertEquals(errorMessage, "The index name cannot be null"); + + // Validate empty index behaviour : + try { + multiGetQueryRecords.add(new MultiGetQueryRecord("", null, null, null, null)); + } catch (InvalidMultiGetQueryRecordException e) { + errorMessage = e.getMessage(); + } + Assert.assertEquals(errorMessage, "The index name cannot be empty"); + + // Validate null documentIds behaviour : + try { + multiGetQueryRecords.add(new MultiGetQueryRecord("dummy", null, null, null, null)); + } catch (InvalidMultiGetQueryRecordException e) { + errorMessage = e.getMessage(); + } + Assert.assertEquals(errorMessage, "The list of document ids cannot be null"); + + // Make sure no invalid MultiGetQueryRecord has been added to multiGetQueryRecords list : + Assert.assertEquals(0, multiGetQueryRecords.size()); + } +} diff --git a/logisland-components/logisland-services/logisland-service-elasticsearch/logisland-service-elasticsearch_7_x-client/src/integration-test/java/com/hurence/logisland/service/elasticsearch/Elasticsearch_7_x_ClientServiceIT.java b/logisland-components/logisland-services/logisland-service-elasticsearch/logisland-service-elasticsearch_7_x-client/src/integration-test/java/com/hurence/logisland/service/elasticsearch/Elasticsearch_7_x_ClientServiceIT.java index 5e5b86d05..3c30206bd 100644 --- a/logisland-components/logisland-services/logisland-service-elasticsearch/logisland-service-elasticsearch_7_x-client/src/integration-test/java/com/hurence/logisland/service/elasticsearch/Elasticsearch_7_x_ClientServiceIT.java +++ b/logisland-components/logisland-services/logisland-service-elasticsearch/logisland-service-elasticsearch_7_x-client/src/integration-test/java/com/hurence/logisland/service/elasticsearch/Elasticsearch_7_x_ClientServiceIT.java @@ -29,19 +29,10 @@ import com.hurence.logisland.util.runner.TestRunner; import com.hurence.logisland.util.runner.TestRunners; import org.elasticsearch.ElasticsearchStatusException; -import org.elasticsearch.action.ActionListener; -import org.elasticsearch.action.admin.cluster.health.ClusterHealthRequest; -import org.elasticsearch.action.admin.cluster.health.ClusterHealthResponse; import org.elasticsearch.action.admin.indices.delete.DeleteIndexRequest; -import org.elasticsearch.action.bulk.BulkItemResponse; -import org.elasticsearch.action.bulk.BulkProcessor; -import org.elasticsearch.action.bulk.BulkRequest; -import org.elasticsearch.action.bulk.BulkResponse; import org.elasticsearch.client.RequestOptions; import org.elasticsearch.client.indices.GetIndexRequest; import org.elasticsearch.client.indices.GetIndexResponse; -import org.elasticsearch.common.unit.ByteSizeUnit; -import org.elasticsearch.common.unit.ByteSizeValue; import org.elasticsearch.common.unit.TimeValue; import org.junit.After; import org.junit.Assert; @@ -52,7 +43,8 @@ import java.io.IOException; import java.util.*; -import java.util.function.BiConsumer; + +import static com.hurence.logisland.service.elasticsearch.ElasticsearchClientService.HOSTS; public class Elasticsearch_7_x_ClientServiceIT { @@ -82,85 +74,13 @@ public void clean() throws IOException { Assert.assertTrue(esRule.getClient().indices().delete(deleteRequest, RequestOptions.DEFAULT).isAcknowledged()); } } - - private class MockElasticsearchClientService extends Elasticsearch_7_x_ClientService { - - @Override - protected void createElasticsearchClient(ControllerServiceInitializationContext context) throws ProcessException { - if (esClient != null) { - return; - } - esClient = esRule.getClient(); - } - - @Override - protected void createBulkProcessor(ControllerServiceInitializationContext context) { - - if (bulkProcessor != null) { - return; - } - - // create the bulk processor - - BulkProcessor.Listener listener = - new BulkProcessor.Listener() { - @Override - public void beforeBulk(long l, BulkRequest bulkRequest) { - getLogger().debug("Going to execute bulk [id:{}] composed of {} actions", new Object[]{l, bulkRequest.numberOfActions()}); - } - - @Override - public void afterBulk(long l, BulkRequest bulkRequest, BulkResponse bulkResponse) { - getLogger().debug("Executed bulk [id:{}] composed of {} actions", new Object[]{l, bulkRequest.numberOfActions()}); - if (bulkResponse.hasFailures()) { - getLogger().warn("There was failures while executing bulk [id:{}]," + - " done bulk request in {} ms with failure = {}", - new Object[]{l, bulkResponse.getTook().getMillis(), bulkResponse.buildFailureMessage()}); - for (BulkItemResponse item : bulkResponse.getItems()) { - if (item.isFailed()) { - errors.put(item.getId(), item.getFailureMessage()); - } - } - } - } - - @Override - public void afterBulk(long l, BulkRequest bulkRequest, Throwable throwable) { - getLogger().error("something went wrong while bulk loading events to es : {}", new Object[]{throwable.getMessage()}); - } - - }; - - BiConsumer> bulkConsumer = - (request, bulkListener) -> esClient.bulkAsync(request, RequestOptions.DEFAULT, bulkListener); - bulkProcessor = BulkProcessor.builder(bulkConsumer, listener) - .setBulkActions(1000) - .setBulkSize(new ByteSizeValue(10, ByteSizeUnit.MB)) - .setFlushInterval(TimeValue.timeValueSeconds(1)) - .setConcurrentRequests(2) - //.setBackoffPolicy(getBackOffPolicy(context)) - .build(); - - } - - @Override - public List getSupportedPropertyDescriptors() { - - List props = new ArrayList<>(); - - return Collections.unmodifiableList(props); - } - - } - private ElasticsearchClientService configureElasticsearchClientService(final TestRunner runner) throws InitializationException { - final MockElasticsearchClientService elasticsearchClientService = new MockElasticsearchClientService(); + final Elasticsearch_7_x_ClientService elasticsearchClientService = new Elasticsearch_7_x_ClientService(); runner.addControllerService("elasticsearchClient", elasticsearchClientService); - - runner.enableControllerService(elasticsearchClientService); runner.setProperty(TestProcessor.ELASTICSEARCH_CLIENT_SERVICE, "elasticsearchClient"); - runner.assertValid(elasticsearchClientService); + runner.setProperty(elasticsearchClientService, HOSTS, esRule.getHostPortString()); + runner.enableControllerService(elasticsearchClientService); // TODO : is this necessary ? final ElasticsearchClientService service = PluginProxy.unwrap(runner.getProcessContext().getPropertyValue(TestProcessor.ELASTICSEARCH_CLIENT_SERVICE).asControllerService()); diff --git a/logisland-components/logisland-services/logisland-service-elasticsearch/logisland-service-elasticsearch_7_x-client/src/main/java/com/hurence/logisland/service/elasticsearch/Elasticsearch_7_x_ClientService.java b/logisland-components/logisland-services/logisland-service-elasticsearch/logisland-service-elasticsearch_7_x-client/src/main/java/com/hurence/logisland/service/elasticsearch/Elasticsearch_7_x_ClientService.java index 1a93e0220..133dd8dd7 100644 --- a/logisland-components/logisland-services/logisland-service-elasticsearch/logisland-service-elasticsearch_7_x-client/src/main/java/com/hurence/logisland/service/elasticsearch/Elasticsearch_7_x_ClientService.java +++ b/logisland-components/logisland-services/logisland-service-elasticsearch/logisland-service-elasticsearch_7_x-client/src/main/java/com/hurence/logisland/service/elasticsearch/Elasticsearch_7_x_ClientService.java @@ -59,14 +59,17 @@ import org.elasticsearch.common.xcontent.XContentType; import org.elasticsearch.index.query.QueryBuilders; import org.elasticsearch.index.reindex.ReindexRequest; -import org.elasticsearch.search.SearchHit; import org.elasticsearch.search.builder.SearchSourceBuilder; import org.elasticsearch.search.fetch.subphase.FetchSourceContext; +import javax.net.ssl.SSLContext; +import javax.net.ssl.TrustManager; +import javax.net.ssl.X509TrustManager; +//import javax.security.cert.X509Certificate; import java.io.IOException; +import java.security.cert.X509Certificate; import java.util.*; import java.util.concurrent.TimeUnit; -import java.util.concurrent.atomic.AtomicBoolean; import java.util.function.BiConsumer; @Tags({ "elasticsearch", "client"}) @@ -98,6 +101,7 @@ public List getSupportedPropertyDescriptors() { props.add(SAMPLER_INTERVAL); props.add(USERNAME); props.add(PASSWORD); + props.add(ENABLE_SSL); props.add(PROP_SHIELD_LOCATION); props.add(HOSTS); props.add(PROP_SSL_CONTEXT_SERVICE); @@ -137,20 +141,66 @@ protected void createElasticsearchClient(ControllerServiceInitializationContext final String username = context.getPropertyValue(USERNAME).asString(); final String password = context.getPropertyValue(PASSWORD).asString(); final String hosts = context.getPropertyValue(HOSTS).asString(); + final boolean enableSsl = context.getPropertyValue(ENABLE_SSL).asBoolean(); - esHosts = getEsHosts(hosts); + esHosts = getEsHosts(hosts, enableSsl); if (esHosts != null) { RestClientBuilder builder = RestClient.builder(esHosts); - if (!StringUtils.isEmpty(username) && !StringUtils.isEmpty(password)) { - final CredentialsProvider credentialsProvider = new BasicCredentialsProvider(); - credentialsProvider.setCredentials(AuthScope.ANY, new UsernamePasswordCredentials(username, password)); + /** + * Inspired from: + * https://www.elastic.co/guide/en/elasticsearch/client/java-rest/current/_encrypted_communication.html + * https://github.com/opendistro-for-elasticsearch/community/issues/64 + */ + + if ((!StringUtils.isEmpty(username) && !StringUtils.isEmpty(password)) || enableSsl) { builder.setHttpClientConfigCallback(httpClientBuilder -> { - return httpClientBuilder.setDefaultCredentialsProvider(credentialsProvider); - }); + + if (!StringUtils.isEmpty(username) && !StringUtils.isEmpty(password)) { + // Support user/password basic auth + final CredentialsProvider credentialsProvider = new BasicCredentialsProvider(); + credentialsProvider.setCredentials(AuthScope.ANY, new UsernamePasswordCredentials(username, password)); + httpClientBuilder.setDefaultCredentialsProvider(credentialsProvider); + } + if (enableSsl) { + // Support SSL (ES Shield or OpenDistro) + + /** + * TODO: This current implementation does not verify the server certificate. One could + * improve this and provide support for a local truststore to check the server certificate. + * The same way, authentication to the server through local certificate is not supported + * yet. + */ + + // Create and use a trust manager accepting all server certificates + TrustManager[] acceptAllTrustManager = new TrustManager[] { new X509TrustManager() { + public java.security.cert.X509Certificate[] getAcceptedIssuers() { + return null; + } + public void checkClientTrusted(X509Certificate[] certs, String authType) { + } + + public void checkServerTrusted(X509Certificate[] certs, String authType) { + } + } }; + + SSLContext sslContext = null; + try { + sslContext = SSLContext.getInstance("SSL"); + sslContext.init(null, acceptAllTrustManager, new java.security.SecureRandom()); + } catch (Exception e) { + getLogger().error("Failed to create Elasticsearch client SSLContext due to {}", + new Object[]{e}, e); + throw new RuntimeException(e); + } + + httpClientBuilder.setSSLContext(sslContext); + } + return httpClientBuilder; + }); } esClient = new RestHighLevelClient(builder); @@ -166,9 +216,10 @@ protected void createElasticsearchClient(ControllerServiceInitializationContext * Get the ElasticSearch hosts. * * @param hosts A comma-separated list of ElasticSearch hosts (host:port,host2:port2, etc.) + * @param enableSsl Enable ssl or not * @return List of HttpHost for the ES hosts */ - private HttpHost[] getEsHosts(String hosts) { + private HttpHost[] getEsHosts(String hosts, boolean enableSsl) { if (hosts == null) { return null; @@ -182,7 +233,7 @@ private HttpHost[] getEsHosts(String hosts) { final String hostName = addresses[0].trim(); final int port = Integer.parseInt(addresses[1].trim()); - esHosts[indHost] = new HttpHost(hostName, port); + esHosts[indHost] = new HttpHost(hostName, port, enableSsl ? "https" : "http"); indHost++; } return esHosts; diff --git a/logisland-components/logisland-services/logisland-service-ip-to-geo/logisland-service-ip-to-geo-maxmind/pom.xml b/logisland-components/logisland-services/logisland-service-ip-to-geo/logisland-service-ip-to-geo-maxmind/pom.xml index 7bd481334..cbba578ca 100644 --- a/logisland-components/logisland-services/logisland-service-ip-to-geo/logisland-service-ip-to-geo-maxmind/pom.xml +++ b/logisland-components/logisland-services/logisland-service-ip-to-geo/logisland-service-ip-to-geo-maxmind/pom.xml @@ -41,28 +41,28 @@ 3.7 true - - - com.fasterxml.jackson.core jackson-databind - 2.9.3 + ${jackson.version} + true + + + com.fasterxml.jackson.core + jackson-core + 2.10.3 true com.maxmind.geoip2 geoip2 - 2.11.0 + 2.13.1 true org.apache.hadoop hadoop-client - 2.2.0 + 3.2.1 provided true diff --git a/logisland-components/logisland-services/logisland-service-ip-to-geo/logisland-service-ip-to-geo-maxmind/src/main/java/com/hurence/logisland/service/iptogeo/maxmind/MaxmindIpToGeoService.java b/logisland-components/logisland-services/logisland-service-ip-to-geo/logisland-service-ip-to-geo-maxmind/src/main/java/com/hurence/logisland/service/iptogeo/maxmind/MaxmindIpToGeoService.java index 86722f917..2f7a57385 100644 --- a/logisland-components/logisland-services/logisland-service-ip-to-geo/logisland-service-ip-to-geo-maxmind/src/main/java/com/hurence/logisland/service/iptogeo/maxmind/MaxmindIpToGeoService.java +++ b/logisland-components/logisland-services/logisland-service-ip-to-geo/logisland-service-ip-to-geo-maxmind/src/main/java/com/hurence/logisland/service/iptogeo/maxmind/MaxmindIpToGeoService.java @@ -30,7 +30,6 @@ import com.maxmind.geoip2.exception.GeoIp2Exception; import com.maxmind.geoip2.model.CityResponse; import com.maxmind.geoip2.record.*; -import com.hurence.logisland.component.PropertyValue; import java.io.File; import java.io.IOException; @@ -146,7 +145,7 @@ private void initFromUri(String dbUri) throws Exception Configuration conf = new Configuration(); String hdfsUri = conf.get("fs.defaultFS"); - getLogger().info("Default HDFS URI: " + hdfsUri); + getLogger().info("Base default FS: " + hdfsUri); // Set HADOOP user to same as current suer String hadoopUser = System.getProperty("user.name"); @@ -158,7 +157,7 @@ private void initFromUri(String dbUri) throws Exception // Create a path to config file and init input stream Path hdfsReadpath = new Path(dbUri); - getLogger().info("Reading Maxmind DB file from HDFS at: " + dbUri); + getLogger().info("Reading Maxmind DB file from URI at: " + dbUri); FSDataInputStream inputStream = fs.open(hdfsReadpath); long start = System.currentTimeMillis(); @@ -166,6 +165,8 @@ private void initFromUri(String dbUri) throws Exception long stop = System.currentTimeMillis(); getLogger().info("Completed loading of Maxmind Geo Database in {} milliseconds.", new Object[]{stop - start}); databaseReaderRef.set(databaseReader); + + inputStream.close(); } /** diff --git a/logisland-core/logisland-engines/logisland-engine-spark_1_6/pom.xml b/logisland-core/logisland-engines/logisland-engine-spark_1_6/pom.xml index 72c5afeee..0354112fb 100644 --- a/logisland-core/logisland-engines/logisland-engine-spark_1_6/pom.xml +++ b/logisland-core/logisland-engines/logisland-engine-spark_1_6/pom.xml @@ -110,6 +110,7 @@ http://www.w3.org/2001/XMLSchema-instance "> com.fasterxml.jackson.core jackson-databind + ${jackson.version} provided diff --git a/logisland-core/logisland-engines/logisland-engine-spark_2_X/logisland-engine-spark_2_4/pom.xml b/logisland-core/logisland-engines/logisland-engine-spark_2_X/logisland-engine-spark_2_4/pom.xml new file mode 100644 index 000000000..fc69a1883 --- /dev/null +++ b/logisland-core/logisland-engines/logisland-engine-spark_2_X/logisland-engine-spark_2_4/pom.xml @@ -0,0 +1,216 @@ + + + 4.0.0 + + com.hurence.logisland + logisland-engine-spark_2_X + 1.2.0 + + logisland-engine-spark_2_4 + jar + + + + + 2.11 + 2.4.0 + 0.10.2.1 + 2.11.8 + + + + + + + + + org.apache.kafka + kafka_${scala.binary.version} + ${kafka.version} + true + runtime + + + + org.apache.kafka + kafka-clients + ${kafka.version} + true + runtime + + + org.apache.bahir + spark-sql-streaming-mqtt_2.11 + 2.3.2 + runtime + true + + + + org.apache.spark + spark-core_${scala.binary.version} + ${spark.version} + provided + + + com.google.guava + guava + + + + + org.apache.spark + spark-streaming_${scala.binary.version} + ${spark.version} + provided + + + org.apache.spark + spark-sql_${scala.binary.version} + ${spark.version} + provided + + + org.apache.spark + spark-mllib_${scala.binary.version} + ${spark.version} + provided + + + org.apache.spark + spark-streaming-kafka-0-10_${scala.binary.version} + ${spark.version} + runtime + true + + + org.apache.spark + spark-sql-kafka-0-10_${scala.binary.version} + ${spark.version} + runtime + true + + + org.apache.spark + spark-streaming-kafka-assembly_${scala.binary.version} + ${spark.version} + runtime + true + + + + + + + com.hurence.logisland + logisland-engine-spark_2_common + ${project.version} + + + org.apache.spark + spark-sql_${scala.binary.version} + provided + + + + org.scala-lang + scala-library + ${scala.version} + provided + true + + + com.banzaicloud + spark-metrics_2.11 + 2.4-1.0.5 + + + io.prometheus + simpleclient + 0.0.23 + + + io.prometheus + simpleclient_dropwizard + 0.0.23 + + + io.prometheus + simpleclient_pushgateway + 0.0.23 + + + + + + + + + org.immutables.tools + maven-shade-plugin + 4 + + + package + + shade + + + + + com.fasterxml.jackson.datatype:jackson-datatype-jsr310 + com.fasterxml.jackson.datatype:jackson-datatype-jdk8 + com.hurence.logisland:logisland-engine-spark_2_common + *:* + + + com.fasterxml.jackson.core:* + com.fasterxml.jackson.databind:* + com.fasterxml.jackson.jaxrs*:* + com.fasterxml.jackson.module:jackson-module-jaxb-annotations + org.scala-lang:* + org.scalatest:* + org.apache.zookeeper:* + com.google.guava:* + org.apache.commons:* + org.slf4j:* + log4j:* + org.yaml:* + org.eclipse.jetty:* + org.glassfish.hk2*:* + org.glassfish.jersey*:* + + + + + *:* + + META-INF/license/** + META-INF/* + META-INF/maven/** + LICENSE + NOTICE + /*.txt + build.properties + + + + + + + + + + + + + + + banzaicloud-github + https://raw.github.com/banzaicloud/spark-metrics/master/maven-repo/releases + + + diff --git a/logisland-core/logisland-engines/logisland-engine-spark_2_X/logisland-engine-spark_2_4/src/main/java/com/hurence/logisland/util/spark/Spark24Platform.java b/logisland-core/logisland-engines/logisland-engine-spark_2_X/logisland-engine-spark_2_4/src/main/java/com/hurence/logisland/util/spark/Spark24Platform.java new file mode 100644 index 000000000..9b1e30c09 --- /dev/null +++ b/logisland-core/logisland-engines/logisland-engine-spark_2_X/logisland-engine-spark_2_4/src/main/java/com/hurence/logisland/util/spark/Spark24Platform.java @@ -0,0 +1,30 @@ +/** + * Copyright (C) 2020 Hurence (support@hurence.com) + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.hurence.logisland.util.spark; + +import org.apache.spark.rdd.RDD; +import org.apache.spark.sql.Dataset; +import org.apache.spark.sql.Row; +import org.apache.spark.sql.SQLContext; +import org.apache.spark.sql.catalyst.InternalRow; +import org.apache.spark.sql.types.StructType; + +public class Spark24Platform implements SparkPlatform { + @Override + public Dataset createStreamingDataFrame(SQLContext sqlContext, RDD catalystRows, StructType schema) { + return sqlContext.internalCreateDataFrame(catalystRows, schema, true); + } +} diff --git a/logisland-core/logisland-engines/logisland-engine-spark_2_X/logisland-engine-spark_2_4/src/main/resources/META-INF/services/com.hurence.logisland.util.spark.SparkPlatform b/logisland-core/logisland-engines/logisland-engine-spark_2_X/logisland-engine-spark_2_4/src/main/resources/META-INF/services/com.hurence.logisland.util.spark.SparkPlatform new file mode 100644 index 000000000..405b9bf4e --- /dev/null +++ b/logisland-core/logisland-engines/logisland-engine-spark_2_X/logisland-engine-spark_2_4/src/main/resources/META-INF/services/com.hurence.logisland.util.spark.SparkPlatform @@ -0,0 +1 @@ +com.hurence.logisland.util.spark.Spark24Platform \ No newline at end of file diff --git a/logisland-core/logisland-engines/logisland-engine-spark_2_X/logisland-engine-spark_2_common/pom.xml b/logisland-core/logisland-engines/logisland-engine-spark_2_X/logisland-engine-spark_2_common/pom.xml index 7f7c45021..5ac3d0822 100644 --- a/logisland-core/logisland-engines/logisland-engine-spark_2_X/logisland-engine-spark_2_common/pom.xml +++ b/logisland-core/logisland-engines/logisland-engine-spark_2_X/logisland-engine-spark_2_common/pom.xml @@ -22,8 +22,8 @@ http://www.w3.org/2001/XMLSchema-instance "> 2.3.3 0.10.2.1 2.11.8 - 2.11 2.6.6 + 2.3.14.1 @@ -51,7 +51,7 @@ http://www.w3.org/2001/XMLSchema-instance "> org.apache.kafka - kafka_2.11 + kafka_${scala.binary.version} ${kafka.version} compile @@ -195,14 +195,12 @@ http://www.w3.org/2001/XMLSchema-instance "> - org.apache.bahir - spark-sql-streaming-mqtt_2.11 + spark-sql-streaming-mqtt_${scala.binary.version} 2.3.2 - org.apache.kafka connect-api @@ -369,14 +367,36 @@ http://www.w3.org/2001/XMLSchema-instance "> 5.1.3.RELEASE + + com.microsoft.azure + azure-eventhubs-spark_${scala.binary.version} + ${eventhubs.version} + + + + com.hurence.logisland + logisland-bootstrap + ${project.version} + provided + + + + org.apache.maven.plugins + maven-surefire-plugin + 2.22.2 + + + false + + net.alchim31.maven scala-maven-plugin - 3.2.2 + 4.3.1 scala-compile-first diff --git a/logisland-core/logisland-engines/logisland-engine-spark_2_X/logisland-engine-spark_2_common/src/main/java/com/hurence/logisland/util/spark/SparkConfigReader.java b/logisland-core/logisland-engines/logisland-engine-spark_2_X/logisland-engine-spark_2_common/src/main/java/com/hurence/logisland/util/spark/SparkConfigReader.java new file mode 100644 index 000000000..1f059e7e6 --- /dev/null +++ b/logisland-core/logisland-engines/logisland-engine-spark_2_X/logisland-engine-spark_2_common/src/main/java/com/hurence/logisland/util/spark/SparkConfigReader.java @@ -0,0 +1,59 @@ +package com.hurence.logisland.util.spark; + +import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.dataformat.yaml.YAMLFactory; +import com.hurence.logisland.config.LogislandConfiguration; +import com.hurence.logisland.util.string.StringUtils; +import org.apache.spark.SparkContext; +import org.apache.spark.rdd.RDD; + +import java.util.Arrays; + +import static com.hurence.logisland.config.ConfigReader.checkLogislandConf; + +/** + * This configuration reader depends on spark. We do not want to place methods in this class in the + * com.hurence.logisland.config.ConfigReader class where the loadConfig (from local filesystem) method + * resides, as it would introduce a spark dependency in the logisland-framework module. Only the spark + * engine should have a spark dependency. So this class should be loaded from the StreamProcessingRunner + * and this will succeed only in environments where a spark 2 engine is available and used, otherwise it + * will fail to load. This will for instance be successful in the databricks environment, which is by the + * way the first purpose for which this class is being introduced. + */ +public class SparkConfigReader { + + /** + * Loads a YAML config file using (file located in the shared filesystem) + * + * @param configFilePath the path of the config file + * @return a LogislandSessionConfiguration + * @throws Exception + */ + public static LogislandConfiguration loadConfigFromSharedFS(String configFilePath) throws Exception { + ObjectMapper mapper = new ObjectMapper(new YAMLFactory()); + + /** + * In Databricks, developers should utilize the shared SparkContext instead of creating one using the constructor. + * When running a job, you can access the shared context by calling SparkContext.getOrCreate(). + * + * Also in databricks, a path like /path/to/a/file will be loaded from DBFS so will be interpreted like + * dbfs:/path/to/a/file + */ + + SparkContext sparkContext = SparkContext.getOrCreate(); + + RDD configRdd = sparkContext.textFile(configFilePath, 1); + String[] configStringArray = (String[])configRdd.collect(); + String configString = String.join("\n", Arrays.asList(configStringArray)); + + // replace all host from environment variables + String fileContent = StringUtils.resolveEnvVars(configString, "localhost"); + + System.out.println("Configuration:\n" + fileContent); + + LogislandConfiguration logislandConf = mapper.readValue(fileContent, LogislandConfiguration.class); + checkLogislandConf(logislandConf); + + return logislandConf; + } +} diff --git a/logisland-core/logisland-engines/logisland-engine-spark_2_X/logisland-engine-spark_2_common/src/main/scala/com/hurence/logisland/engine/spark/KafkaStreamProcessingEngine.scala b/logisland-core/logisland-engines/logisland-engine-spark_2_X/logisland-engine-spark_2_common/src/main/scala/com/hurence/logisland/engine/spark/KafkaStreamProcessingEngine.scala index c7ee30d8c..8d94921e4 100644 --- a/logisland-core/logisland-engines/logisland-engine-spark_2_X/logisland-engine-spark_2_common/src/main/scala/com/hurence/logisland/engine/spark/KafkaStreamProcessingEngine.scala +++ b/logisland-core/logisland-engines/logisland-engine-spark_2_X/logisland-engine-spark_2_common/src/main/scala/com/hurence/logisland/engine/spark/KafkaStreamProcessingEngine.scala @@ -104,7 +104,14 @@ object KafkaStreamProcessingEngine { .name("spark.yarn.deploy-mode") .description("The yarn deploy mode") .required(false) - // .allowableValues("client", "cluster") + .allowableValues("client", "cluster") + .build + + val SPARK_DEPLOYMODE = new PropertyDescriptor.Builder() + .name("spark.deploy-mode") + .description("The spark standalone cluster deploy mode") + .required(false) + .allowableValues("client", "cluster") .build val SPARK_YARN_QUEUE = new PropertyDescriptor.Builder() @@ -487,6 +494,7 @@ class KafkaStreamProcessingEngine extends AbstractProcessingEngine { descriptors.add(KafkaStreamProcessingEngine.SPARK_MASTER) descriptors.add(KafkaStreamProcessingEngine.SPARK_MONITORING_DRIVER_PORT) descriptors.add(KafkaStreamProcessingEngine.SPARK_YARN_DEPLOYMODE) + descriptors.add(KafkaStreamProcessingEngine.SPARK_DEPLOYMODE) descriptors.add(KafkaStreamProcessingEngine.SPARK_YARN_QUEUE) descriptors.add(KafkaStreamProcessingEngine.SPARK_DRIVER_MEMORY) descriptors.add(KafkaStreamProcessingEngine.SPARK_EXECUTOR_MEMORY) diff --git a/logisland-core/logisland-engines/logisland-engine-spark_2_X/logisland-engine-spark_2_common/src/main/scala/com/hurence/logisland/stream/spark/package.scala b/logisland-core/logisland-engines/logisland-engine-spark_2_X/logisland-engine-spark_2_common/src/main/scala/com/hurence/logisland/stream/spark/package.scala index 3109b849c..6412f724c 100644 --- a/logisland-core/logisland-engines/logisland-engine-spark_2_X/logisland-engine-spark_2_common/src/main/scala/com/hurence/logisland/stream/spark/package.scala +++ b/logisland-core/logisland-engines/logisland-engine-spark_2_X/logisland-engine-spark_2_common/src/main/scala/com/hurence/logisland/stream/spark/package.scala @@ -33,7 +33,7 @@ package com.hurence.logisland.stream import com.hurence.logisland.component.{AllowableValue, PropertyDescriptor} import com.hurence.logisland.serializer._ import com.hurence.logisland.stream.spark.structured.provider.StructuredStreamProviderService -import com.hurence.logisland.validator.StandardValidators +import com.hurence.logisland.validator.{StandardValidators, ValidationResult, Validator} /* * Licensed to the Apache Software Foundation (ASF) under one or more @@ -557,5 +557,153 @@ object StreamProperties { .defaultValue("aggregation") .build + ////////////////////////////////////// + // Azure event hubs options + ////////////////////////////////////// + + val EVENTHUBS_NAMESPACE: PropertyDescriptor = new PropertyDescriptor.Builder() + .name("eventhubs.nameSpace") + .description("EventHubs namespace.") + .addValidator(StandardValidators.NON_EMPTY_VALIDATOR) + .required(true) + .build + + val EVENTHUBS_MAX_EVENTS_PER_TRIGGER: PropertyDescriptor = new PropertyDescriptor.Builder() + .name("eventhubs.maxEventsPerTrigger") + .description("Rate limit on maximum number of events processed per trigger interval. The specified total number" + + " of events will be proportionally split across partitions of different volume.") + .addValidator(StandardValidators.LONG_VALIDATOR) + .required(false) + .build + + val EVENTHUBS_OPERATION_TIMEOUT: PropertyDescriptor = new PropertyDescriptor.Builder() + .name("eventhubs.operationTimeout") + .description("The amount of time (in milliseconds) Event Hub API calls will be retried before throwing an exception.") + .addValidator(StandardValidators.LONG_VALIDATOR) + .required(false) + .build + val EVENTHUBS_THREAD_POOL_SIZE: PropertyDescriptor = new PropertyDescriptor.Builder() + .name("eventhubs.threadPoolSize") + .description("Sets the size of thread pool.") + .addValidator(StandardValidators.INTEGER_VALIDATOR) + .required(false) + .build + + val EVENTHUBS_READ_EVENT_HUB: PropertyDescriptor = new PropertyDescriptor.Builder() + .name("eventhubs.read.eventHub") + .description("EventHub to read from.") + .addValidator(StandardValidators.NON_EMPTY_VALIDATOR) + .required(false) + .build + + val EVENTHUBS_READ_SAS_KEY_NAME: PropertyDescriptor = new PropertyDescriptor.Builder() + .name("eventhubs.read.sasKeyName") + .description("SAS key name for read eventhub.") + .addValidator(StandardValidators.NON_EMPTY_VALIDATOR) + .required(false) + .build + + val EVENTHUBS_READ_SAS_KEY: PropertyDescriptor = new PropertyDescriptor.Builder() + .name("eventhubs.read.sasKey") + .description("SAS key for read eventhub.") + .addValidator(StandardValidators.NON_EMPTY_VALIDATOR) + .required(false) + .build + + val EVENTHUBS_READ_CONSUMER_GROUP: PropertyDescriptor = new PropertyDescriptor.Builder() + .name("eventhubs.read.consumerGroup") + .description("Consumer group name.") + .addValidator(StandardValidators.NON_EMPTY_VALIDATOR) + .required(false) + .build + + val EVENTHUBS_READ_POSITION_START_OF_STREAM = "start-of-stream" + val EVENTHUBS_READ_POSITION_END_OF_STREAM = "end-of-stream" + val EVENTHUBS_READ_POSITION_INSTANT_NOW = "instant-now" + + // Validator for EVENTHUBS_READ_POSITION + val EVENTHUBS_READ_POSITION_VALIDATOR: Validator = new Validator() { + override def validate(subject: String, value: String): ValidationResult = { + + // Accepted values are long, or start-of-stream or end-of-stream + var ok : Boolean = false + // Recognized string? + if ( (value == EVENTHUBS_READ_POSITION_START_OF_STREAM) || (value == EVENTHUBS_READ_POSITION_END_OF_STREAM) + || (value == EVENTHUBS_READ_POSITION_INSTANT_NOW) ) { + ok = true + } + // Long value? + try { + value.toLong + ok = true + } catch { + case e: Exception => // Not a long; + } + new ValidationResult.Builder().subject(subject).input(value).valid(ok) + .explanation(subject + " should be a long or " + EVENTHUBS_READ_POSITION_START_OF_STREAM + " or " + + EVENTHUBS_READ_POSITION_END_OF_STREAM + " or " + EVENTHUBS_READ_POSITION_INSTANT_NOW).build} + } + + val EVENTHUBS_READ_POSITION_TYPE_NAME = "eventhubs.read.positionType" + val EVENTHUBS_READ_POSITION: PropertyDescriptor = new PropertyDescriptor.Builder() + .name("eventhubs.read.position") + .description("Start event position. This may be either " + EVENTHUBS_READ_POSITION_START_OF_STREAM + ", " + + EVENTHUBS_READ_POSITION_END_OF_STREAM + " or a long value. If this is a long value, " + + EVENTHUBS_READ_POSITION_TYPE_NAME + " should be filled to define the meaning of the value. Default value is " + + EVENTHUBS_READ_POSITION_END_OF_STREAM) + .addValidator(EVENTHUBS_READ_POSITION_VALIDATOR) + .required(false) + .defaultValue(EVENTHUBS_READ_POSITION_END_OF_STREAM) + .build + + val EVENTHUBS_READ_POSITION_TYPE_OFFSET = "offset" + val EVENTHUBS_READ_POSITION_TYPE_SEQUENCE_NUMBER = "sequenceNumber" + val EVENTHUBS_READ_POSITION_TYPE_EPOCH_MILLIS = "epochMillis" + + val EVENTHUBS_READ_POSITION_TYPE: PropertyDescriptor = new PropertyDescriptor.Builder() + .name(EVENTHUBS_READ_POSITION_TYPE_NAME) + .description("Specifies the type of the " + EVENTHUBS_READ_POSITION.getName + " value when it is a long value. " + + "This can be " + EVENTHUBS_READ_POSITION_TYPE_OFFSET + ", " + EVENTHUBS_READ_POSITION_TYPE_SEQUENCE_NUMBER + + " or " + EVENTHUBS_READ_POSITION_TYPE_EPOCH_MILLIS + ". Default value is " + EVENTHUBS_READ_POSITION_TYPE_OFFSET) + .allowableValues(EVENTHUBS_READ_POSITION_TYPE_OFFSET, EVENTHUBS_READ_POSITION_TYPE_SEQUENCE_NUMBER, + EVENTHUBS_READ_POSITION_TYPE_EPOCH_MILLIS) + .required(false) + .defaultValue(EVENTHUBS_READ_POSITION_TYPE_OFFSET) + .build + + val EVENTHUBS_READ_RECEIVER_TIMEOUT: PropertyDescriptor = new PropertyDescriptor.Builder() + .name("eventhubs.read.receiverTimeout") + .description("The amount of time (in milliseconds) Event Hub receive calls will be retried before throwing an exception.") + .addValidator(StandardValidators.LONG_VALIDATOR) + .required(false) + .build + + val EVENTHUBS_READ_PREFETCH_COUNT: PropertyDescriptor = new PropertyDescriptor.Builder() + .name("eventhubs.read.prefetchCount") + .description("Sets the prefetch count for the underlying receiver and controls how many events are received in advance.") + .addValidator(StandardValidators.INTEGER_VALIDATOR) + .required(false) + .build + + val EVENTHUBS_WRITE_EVENT_HUB: PropertyDescriptor = new PropertyDescriptor.Builder() + .name("eventhubs.write.eventHub") + .description("EventHub to write to.") + .addValidator(StandardValidators.NON_EMPTY_VALIDATOR) + .required(false) + .build + + val EVENTHUBS_WRITE_SAS_KEY_NAME: PropertyDescriptor = new PropertyDescriptor.Builder() + .name("eventhubs.write.sasKeyName") + .description("SAS key name for write eventhub.") + .addValidator(StandardValidators.NON_EMPTY_VALIDATOR) + .required(false) + .build + + val EVENTHUBS_WRITE_SAS_KEY: PropertyDescriptor = new PropertyDescriptor.Builder() + .name("eventhubs.write.sasKey") + .description("SAS key for write eventhub.") + .addValidator(StandardValidators.NON_EMPTY_VALIDATOR) + .required(false) + .build } diff --git a/logisland-core/logisland-engines/logisland-engine-spark_2_X/logisland-engine-spark_2_common/src/main/scala/com/hurence/logisland/stream/spark/structured/StructuredStream.scala b/logisland-core/logisland-engines/logisland-engine-spark_2_X/logisland-engine-spark_2_common/src/main/scala/com/hurence/logisland/stream/spark/structured/StructuredStream.scala index d5ebc9427..f90209ec4 100644 --- a/logisland-core/logisland-engines/logisland-engine-spark_2_X/logisland-engine-spark_2_common/src/main/scala/com/hurence/logisland/stream/spark/structured/StructuredStream.scala +++ b/logisland-core/logisland-engines/logisland-engine-spark_2_X/logisland-engine-spark_2_common/src/main/scala/com/hurence/logisland/stream/spark/structured/StructuredStream.scala @@ -1,33 +1,3 @@ -/** - * Copyright (C) 2016 Hurence (support@hurence.com) - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -/** - * Copyright (C) 2016 Hurence (support@hurence.com) - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ /** * Copyright (C) 2016 Hurence (support@hurence.com) * @@ -62,7 +32,6 @@ import org.apache.spark.sql.{Dataset, SQLContext, SparkSession} import org.apache.spark.streaming.StreamingContext import org.slf4j.LoggerFactory - class StructuredStream extends AbstractRecordStream with SparkRecordStream { @@ -91,6 +60,8 @@ class StructuredStream extends AbstractRecordStream with SparkRecordStream { descriptors.add(GROUPBY) descriptors.add(STATE_TIMEOUT_MS) descriptors.add(CHUNK_SIZE) + descriptors.add(AVRO_INPUT_SCHEMA) + descriptors.add(AVRO_OUTPUT_SCHEMA) Collections.unmodifiableList(descriptors) } diff --git a/logisland-core/logisland-engines/logisland-engine-spark_2_X/logisland-engine-spark_2_common/src/main/scala/com/hurence/logisland/stream/spark/structured/provider/AzureEventHubsStructuredStreamProviderService.scala b/logisland-core/logisland-engines/logisland-engine-spark_2_X/logisland-engine-spark_2_common/src/main/scala/com/hurence/logisland/stream/spark/structured/provider/AzureEventHubsStructuredStreamProviderService.scala new file mode 100644 index 000000000..4ce4c8cd1 --- /dev/null +++ b/logisland-core/logisland-engines/logisland-engine-spark_2_X/logisland-engine-spark_2_common/src/main/scala/com/hurence/logisland/stream/spark/structured/provider/AzureEventHubsStructuredStreamProviderService.scala @@ -0,0 +1,365 @@ +/** + * Copyright (C) 2020 Hurence (support@hurence.com) + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.hurence.logisland.stream.spark.structured.provider + +import java.time.{Duration, Instant} +import java.util +import java.util.Collections + +import com.hurence.logisland.annotation.documentation.CapabilityDescription +import com.hurence.logisland.annotation.lifecycle.OnEnabled +import com.hurence.logisland.component.{InitializationException, PropertyDescriptor} +import com.hurence.logisland.controller.{AbstractControllerService, ControllerServiceInitializationContext} +import com.hurence.logisland.record.{FieldDictionary, FieldType, Record, StandardRecord} +import com.hurence.logisland.runner.GlobalOptions +import com.hurence.logisland.stream.StreamContext +import com.hurence.logisland.stream.StreamProperties._ +import com.hurence.logisland.util.spark.ControllerServiceLookupSink +import org.apache.spark.broadcast.Broadcast +import org.apache.spark.eventhubs.{ConnectionStringBuilder, EventHubsConf, EventPosition} +import org.apache.spark.sql.{Dataset, SparkSession} + +/** + * Service to allow reading/writing from/to azure event hub with structured streams + * Developed using documentation at: + * https://github.com/Azure/azure-event-hubs-spark/blob/master/docs/structured-streaming-eventhubs-integration.md + */ +@CapabilityDescription("Provides a ways to use azure event hubs as input or output in StructuredStream streams") +class AzureEventHubsStructuredStreamProviderService extends AbstractControllerService with StructuredStreamProviderService { + + var namespace : String = null + var readPositionString: String = null + var readPositionLong: Long = 0L + var readPositionIsString: Boolean = true + var readPositionType : String = null + var readEventHub : String = null + var readSasKeyName : String = null + var readSasKey : String = null + var readConsumerGroup : String = null + var writeEventHub : String = null + var writeSasKeyName : String = null + var writeSasKey : String = null + + var properties : Map[String, Any] = Map[String, Any]() + + @OnEnabled + @throws[InitializationException] + override def init(context: ControllerServiceInitializationContext): Unit = { + super.init(context) + this.synchronized { + try { + + // namespace + if (!context.getPropertyValue(EVENTHUBS_NAMESPACE).isSet) { + throw new InitializationException("EventHubs service " + EVENTHUBS_NAMESPACE.getName + " not specified.") + } + namespace = context.getPropertyValue(EVENTHUBS_NAMESPACE).asString() + + // readEventHub and writeEventHub + if (!context.getPropertyValue(EVENTHUBS_READ_EVENT_HUB).isSet && + !context.getPropertyValue(EVENTHUBS_WRITE_EVENT_HUB).isSet) { + throw new InitializationException("EventHubs service must at least have a read or write event hub set.") + } + + if (context.getPropertyValue(EVENTHUBS_READ_EVENT_HUB).isSet) { + readEventHub = context.getPropertyValue(EVENTHUBS_READ_EVENT_HUB).asString() + } + + if (context.getPropertyValue(EVENTHUBS_WRITE_EVENT_HUB).isSet) { + writeEventHub = context.getPropertyValue(EVENTHUBS_WRITE_EVENT_HUB).asString() + } + + // maxEventPerTrigger + if (context.getPropertyValue(EVENTHUBS_MAX_EVENTS_PER_TRIGGER).isSet) { + properties += (EVENTHUBS_MAX_EVENTS_PER_TRIGGER.getName + -> context.getPropertyValue(EVENTHUBS_MAX_EVENTS_PER_TRIGGER).asLong().toLong) + } + + // operationTimeout + if (context.getPropertyValue(EVENTHUBS_OPERATION_TIMEOUT).isSet) { + properties += (EVENTHUBS_OPERATION_TIMEOUT.getName + -> context.getPropertyValue(EVENTHUBS_OPERATION_TIMEOUT).asLong().toLong) + } + + // threadPoolSize + if (context.getPropertyValue(EVENTHUBS_THREAD_POOL_SIZE).isSet) { + properties += (EVENTHUBS_THREAD_POOL_SIZE.getName + -> context.getPropertyValue(EVENTHUBS_THREAD_POOL_SIZE).asInteger().toInt) + } + + if ((readEventHub == null) && (writeEventHub == null)) { + throw new InitializationException("EventHubs service must at least have a read or write event hub set.") + } + + // Get read config properties + if (readEventHub != null) { + + // readPosition + val readPosition : Any = context.getPropertyValue(EVENTHUBS_READ_POSITION).asString() + + if ( (readPosition == EVENTHUBS_READ_POSITION_START_OF_STREAM) + || (readPosition == EVENTHUBS_READ_POSITION_END_OF_STREAM) + || (readPosition == EVENTHUBS_READ_POSITION_INSTANT_NOW)) { + readPositionIsString = true + readPositionString = readPosition.asInstanceOf[String] + } else { + readPositionIsString = false + readPositionLong = readPosition.asInstanceOf[String].toLong + } + + // readPositionType + readPositionType = context.getPropertyValue(EVENTHUBS_READ_POSITION_TYPE).asString() + + // readSasKeyName + if (!context.getPropertyValue(EVENTHUBS_READ_SAS_KEY_NAME).isSet) { + throw new InitializationException("EventHubs service read event hub requires " + + EVENTHUBS_READ_SAS_KEY_NAME.getName) + } + readSasKeyName = context.getPropertyValue(EVENTHUBS_READ_SAS_KEY_NAME).asString() + + // readSasKey + if (!context.getPropertyValue(EVENTHUBS_READ_SAS_KEY).isSet) { + throw new InitializationException("EventHubs service read event hub requires " + + EVENTHUBS_READ_SAS_KEY.getName) + } + readSasKey = context.getPropertyValue(EVENTHUBS_READ_SAS_KEY).asString() + + // readConsumerGroup + if (context.getPropertyValue(EVENTHUBS_READ_CONSUMER_GROUP).isSet) { + readConsumerGroup = context.getPropertyValue(EVENTHUBS_READ_CONSUMER_GROUP).asString() + } + + // readReceiverTimeout + if (context.getPropertyValue(EVENTHUBS_READ_RECEIVER_TIMEOUT).isSet) { + properties += (EVENTHUBS_READ_RECEIVER_TIMEOUT.getName + -> context.getPropertyValue(EVENTHUBS_READ_RECEIVER_TIMEOUT).asInteger().toInt) + } + + // readPrefetchCount + if (context.getPropertyValue(EVENTHUBS_READ_PREFETCH_COUNT).isSet) { + properties += (EVENTHUBS_READ_PREFETCH_COUNT.getName + -> context.getPropertyValue(EVENTHUBS_READ_PREFETCH_COUNT).asInteger().toInt) + } + } + + // Get write config properties + if (writeEventHub != null) { + + // writeSasKeyName + if (!context.getPropertyValue(EVENTHUBS_WRITE_SAS_KEY_NAME).isSet) { + throw new InitializationException("EventHubs service write event hub requires " + + EVENTHUBS_WRITE_SAS_KEY_NAME.getName) + } + writeSasKeyName = context.getPropertyValue(EVENTHUBS_WRITE_SAS_KEY_NAME).asString() + + // writeSasKey + if (!context.getPropertyValue(EVENTHUBS_WRITE_SAS_KEY).isSet) { + throw new InitializationException("EventHubs service write event hub requires " + + EVENTHUBS_WRITE_SAS_KEY.getName) + } + writeSasKey = context.getPropertyValue(EVENTHUBS_WRITE_SAS_KEY).asString() + } + + } catch { + case e: Exception => + throw new InitializationException(e) + } + } + } + + /** + * Allows subclasses to register which property descriptor objects are + * supported. + * + * @return PropertyDescriptor objects this processor currently supports + */ + override def getSupportedPropertyDescriptors() = { + val descriptors = new util.ArrayList[PropertyDescriptor] + descriptors.add(EVENTHUBS_NAMESPACE) + descriptors.add(EVENTHUBS_MAX_EVENTS_PER_TRIGGER) + descriptors.add(EVENTHUBS_OPERATION_TIMEOUT) + descriptors.add(EVENTHUBS_THREAD_POOL_SIZE) + descriptors.add(EVENTHUBS_READ_EVENT_HUB) + descriptors.add(EVENTHUBS_READ_SAS_KEY_NAME) + descriptors.add(EVENTHUBS_READ_SAS_KEY) + descriptors.add(EVENTHUBS_READ_CONSUMER_GROUP) + descriptors.add(EVENTHUBS_READ_POSITION) + descriptors.add(EVENTHUBS_READ_POSITION_TYPE) + descriptors.add(EVENTHUBS_READ_RECEIVER_TIMEOUT) + descriptors.add(EVENTHUBS_READ_PREFETCH_COUNT) + descriptors.add(EVENTHUBS_WRITE_EVENT_HUB) + descriptors.add(EVENTHUBS_WRITE_SAS_KEY_NAME) + descriptors.add(EVENTHUBS_WRITE_SAS_KEY) + Collections.unmodifiableList(descriptors) + } + + /** + * Applies the defined service configuration to the passed event hub configuration object + * @param eventHubsConf + */ + def applyConfig(eventHubsConf: EventHubsConf, forRead : Boolean): Unit = { + + if (forRead) { + + /** + * Properties only for read + */ + + if (readConsumerGroup != null) { + eventHubsConf.setConsumerGroup(readConsumerGroup) + } + + if (readPositionIsString) { + // Read position is a string + readPositionString match { + case EVENTHUBS_READ_POSITION_START_OF_STREAM => + eventHubsConf.setStartingPosition(EventPosition.fromStartOfStream) + case EVENTHUBS_READ_POSITION_END_OF_STREAM => + eventHubsConf.setStartingPosition(EventPosition.fromEndOfStream) + case EVENTHUBS_READ_POSITION_INSTANT_NOW => + eventHubsConf.setStartingPosition(EventPosition.fromEnqueuedTime(Instant.now())) + case _ => throw new IllegalStateException("Unsupported read position string value: " + readPositionString) + } + } else { + // Read position is a long, let's use it according to its meaning defined in readPositionType + readPositionType match { + case EVENTHUBS_READ_POSITION_TYPE_OFFSET => + eventHubsConf.setStartingPosition(EventPosition.fromOffset(readPositionLong.toString)) + case EVENTHUBS_READ_POSITION_TYPE_SEQUENCE_NUMBER => + eventHubsConf.setStartingPosition(EventPosition.fromSequenceNumber(readPositionLong)) + case EVENTHUBS_READ_POSITION_TYPE_EPOCH_MILLIS => + eventHubsConf.setStartingPosition(EventPosition.fromEnqueuedTime(Instant.ofEpochMilli(readPositionLong))) + case _ => throw new IllegalStateException("Unsupported read position type value: " + readPositionType) + } + } + + // readReceiverTimeout + val optionLong = properties.get(EVENTHUBS_READ_RECEIVER_TIMEOUT.getName).asInstanceOf[Option[Long]] + if (optionLong.isDefined) { + eventHubsConf.setReceiverTimeout(Duration.ofMillis(optionLong.get)) + } + + // readPrefetchCount + val optionInt : Option[Int] = properties.get(EVENTHUBS_READ_PREFETCH_COUNT.getName).asInstanceOf[Option[Int]] + if (optionInt.isDefined) { + eventHubsConf.setPrefetchCount(optionInt.get) + } + } + + /** + * Properties for both read or write + */ + + // maxEventPerTrigger + var optionLong : Option[Long] = properties.get(EVENTHUBS_MAX_EVENTS_PER_TRIGGER.getName).asInstanceOf[Option[Long]] + if (optionLong.isDefined) { + eventHubsConf.setMaxEventsPerTrigger(optionLong.get) + } + + // operationTimeout + optionLong = properties.get(EVENTHUBS_OPERATION_TIMEOUT.getName).asInstanceOf[Option[Long]] + if (optionLong.isDefined) { + eventHubsConf.setOperationTimeout(Duration.ofMillis(optionLong.get)) + } + + // maxEventPerTrigger + val optionInt : Option[Int] = properties.get(EVENTHUBS_THREAD_POOL_SIZE.getName).asInstanceOf[Option[Int]] + if (optionInt.isDefined) { + eventHubsConf.setThreadPoolSize(optionInt.get) + } + } + + /** + * create a streaming DataFrame that represents data received + * + * @param spark + * @param streamContext + * @return DataFrame currently loaded + */ + override def read(spark: SparkSession, streamContext: StreamContext) = { + import spark.implicits._ + + implicit val recordEncoder = org.apache.spark.sql.Encoders.kryo[Record] + + val connectionString = ConnectionStringBuilder() + .setNamespaceName(namespace) + .setEventHubName(readEventHub) + .setSasKeyName(readSasKeyName) + .setSasKey(readSasKey) + .build + + val eventHubsConf = EventHubsConf(connectionString) + applyConfig(eventHubsConf, true) + + val options = eventHubsConf.toMap + val optionsString = options.toString() + + logger.info(s"Starting azure event hubs structured stream on event hub $readEventHub in $namespace namespace with configuration:\n$optionsString") + val df = spark.readStream + .format("eventhubs") + .options(options) + .load() + .selectExpr("CAST(offset AS STRING)", "CAST(body AS BINARY)") + .as[(String, Array[Byte])] + .map(r => { + new StandardRecord(readEventHub) + .setField(FieldDictionary.RECORD_KEY, FieldType.STRING, r._1) + .setField(FieldDictionary.RECORD_VALUE, FieldType.BYTES, r._2) + }) + + df + } + + /** + * create a streaming DataFrame that represents data received + * + * @param streamContext + * @return DataFrame currently loaded + */ + override def write(df: Dataset[Record], controllerServiceLookupSink: Broadcast[ControllerServiceLookupSink], streamContext: StreamContext) = { + + import df.sparkSession.implicits._ + + val connectionString = ConnectionStringBuilder() + .setNamespaceName(namespace) + .setEventHubName(writeEventHub) + .setSasKeyName(writeSasKeyName) + .setSasKey(writeSasKey) + .build + + val eventHubsConf = EventHubsConf(connectionString) + applyConfig(eventHubsConf, false) + + var checkpointLocation : String = "checkpoints" + if (GlobalOptions.checkpointLocation != null) { + checkpointLocation = GlobalOptions.checkpointLocation + } + + logger.info(s"Starting azure event hubs structured stream to event hub $writeEventHub in " + + s"$namespace namespace with checkpointLocation $checkpointLocation") + + // Write key-value data from a DataFrame to a specific event hub specified in an option + df.map(r => { + (r.getField(FieldDictionary.RECORD_KEY).asString(), r.getField(FieldDictionary.RECORD_VALUE).asBytes()) + }) + .as[(String, Array[Byte])] + .toDF("partitionKey", "body") + .writeStream + .format("eventhubs") + .options(eventHubsConf.toMap) + .option("checkpointLocation", checkpointLocation) + } +} diff --git a/logisland-core/logisland-engines/logisland-engine-spark_2_X/logisland-engine-spark_2_common/src/main/scala/com/hurence/logisland/stream/spark/structured/provider/StructuredStreamProviderService.scala b/logisland-core/logisland-engines/logisland-engine-spark_2_X/logisland-engine-spark_2_common/src/main/scala/com/hurence/logisland/stream/spark/structured/provider/StructuredStreamProviderService.scala index befcb2f0b..fc4af3809 100644 --- a/logisland-core/logisland-engines/logisland-engine-spark_2_X/logisland-engine-spark_2_common/src/main/scala/com/hurence/logisland/stream/spark/structured/provider/StructuredStreamProviderService.scala +++ b/logisland-core/logisland-engines/logisland-engine-spark_2_X/logisland-engine-spark_2_common/src/main/scala/com/hurence/logisland/stream/spark/structured/provider/StructuredStreamProviderService.scala @@ -36,6 +36,7 @@ import java.util.Date import com.hurence.logisland.controller.ControllerService import com.hurence.logisland.record._ +import com.hurence.logisland.runner.GlobalOptions import com.hurence.logisland.serializer.{JsonSerializer, NoopSerializer, RecordSerializer, SerializerProvider} import com.hurence.logisland.stream.StreamContext import com.hurence.logisland.stream.StreamProperties._ @@ -278,10 +279,16 @@ trait StructuredStreamProviderService extends ControllerService { // do the parallel processing val df2 = df.mapPartitions(record => record.map(record => serializeRecords(serializer, keySerializer, record))) + var checkpointLocation : String = "checkpoints/" + streamContext.getIdentifier + if (GlobalOptions.checkpointLocation != null) { + checkpointLocation = GlobalOptions.checkpointLocation + logger.info(s"Saving structured stream using checkpointLocation: $checkpointLocation") + } + write(df2, controllerServiceLookupSink, streamContext) .queryName(streamContext.getIdentifier) // .outputMode("update") - .option("checkpointLocation", "checkpoints/" + streamContext.getIdentifier) + .option("checkpointLocation", checkpointLocation) .start() // .processAllAvailable() diff --git a/logisland-core/logisland-engines/logisland-engine-spark_2_X/logisland-engine-spark_2_common/src/test/java/com/hurence/logisland/stream/spark/structured/provider/AzureEventHubsStructuredStreamProviderServiceTest.java b/logisland-core/logisland-engines/logisland-engine-spark_2_X/logisland-engine-spark_2_common/src/test/java/com/hurence/logisland/stream/spark/structured/provider/AzureEventHubsStructuredStreamProviderServiceTest.java new file mode 100644 index 000000000..e622b28f8 --- /dev/null +++ b/logisland-core/logisland-engines/logisland-engine-spark_2_X/logisland-engine-spark_2_common/src/test/java/com/hurence/logisland/stream/spark/structured/provider/AzureEventHubsStructuredStreamProviderServiceTest.java @@ -0,0 +1,311 @@ +package com.hurence.logisland.stream.spark.structured.provider; + +import com.hurence.logisland.component.InitializationException; +import com.hurence.logisland.util.runner.TestRunner; +import com.hurence.logisland.util.runner.TestRunners; +import com.hurence.logisland.stream.StreamProperties; +import org.junit.jupiter.api.Test; + +import static org.junit.Assert.fail; + +public class AzureEventHubsStructuredStreamProviderServiceTest { + + @Test + public void testConfig() throws InitializationException { + + boolean error = false; + // Missing namespace + try { + error = false; + final AzureEventHubsStructuredStreamProviderService service = + new AzureEventHubsStructuredStreamProviderService(); + // Any processor will do it, we won't use it but we need a real processor to be instantiated + final TestRunner runner = TestRunners.newTestRunner("com.hurence.logisland.processor.datastore.BulkPut"); + runner.addControllerService("eventhubs_service", service); + runner.enableControllerService(service); + error = true; + fail("Namespace not defined: this should have failed"); + } catch (AssertionError e) { + if (error) { + fail(e.getMessage()); + } else { + System.out.println(e.getMessage()); + } + } + + // Namespace but missing read or write hub + try { + error = false; + final AzureEventHubsStructuredStreamProviderService service = + new AzureEventHubsStructuredStreamProviderService(); + final TestRunner runner = TestRunners.newTestRunner("com.hurence.logisland.processor.datastore.BulkPut"); + runner.addControllerService("eventhubs_service", service); + runner.setProperty(service, StreamProperties.EVENTHUBS_NAMESPACE().getName(), "namespace"); + runner.enableControllerService(service); + error = true; + fail("Namespace defined but missing read or write hub: this should have failed"); + } catch (AssertionError e) { + if (error) { + fail(e.getMessage()); + } else { + System.out.println(e.getMessage()); + } + } + + /** + * READ EVENT HUB ONLY + */ + + // Namespace, read hub but missing sasKeyName + try { + error = false; + final AzureEventHubsStructuredStreamProviderService service = + new AzureEventHubsStructuredStreamProviderService(); + final TestRunner runner = TestRunners.newTestRunner("com.hurence.logisland.processor.datastore.BulkPut"); + runner.addControllerService("eventhubs_service", service); + runner.setProperty(service, StreamProperties.EVENTHUBS_NAMESPACE().getName(), "namespace"); + runner.setProperty(service, StreamProperties.EVENTHUBS_READ_EVENT_HUB().getName(), "read_hub"); + runner.enableControllerService(service); + error = true; + fail("Read hub defined but missing sasKeyName: this should have failed"); + } catch (AssertionError e) { + if (error) { + fail(e.getMessage()); + } else { + System.out.println(e.getMessage()); + } + } + + // Namespace, read hub, sasKeyName but missing sasKey + try { + error = false; + final AzureEventHubsStructuredStreamProviderService service = + new AzureEventHubsStructuredStreamProviderService(); + final TestRunner runner = TestRunners.newTestRunner("com.hurence.logisland.processor.datastore.BulkPut"); + runner.addControllerService("eventhubs_service", service); + runner.setProperty(service, StreamProperties.EVENTHUBS_NAMESPACE().getName(), "namespace"); + runner.setProperty(service, StreamProperties.EVENTHUBS_READ_EVENT_HUB().getName(), "read_hub"); + runner.setProperty(service, StreamProperties.EVENTHUBS_READ_SAS_KEY_NAME().getName(), "read_sas_key_name"); + runner.enableControllerService(service); + error = true; + fail("Read hub defined, sasKeyName defined but missing sasKey: this should have failed"); + } catch (AssertionError e) { + if (error) { + fail(e.getMessage()); + } else { + System.out.println(e.getMessage()); + } + } + + // Namespace, read hub, sasKeyName and sasKey -> should be ok + try { + final AzureEventHubsStructuredStreamProviderService service = + new AzureEventHubsStructuredStreamProviderService(); + final TestRunner runner = TestRunners.newTestRunner("com.hurence.logisland.processor.datastore.BulkPut"); + runner.addControllerService("eventhubs_service", service); + runner.setProperty(service, StreamProperties.EVENTHUBS_NAMESPACE().getName(), "namespace"); + runner.setProperty(service, StreamProperties.EVENTHUBS_READ_EVENT_HUB().getName(), "read_hub"); + runner.setProperty(service, StreamProperties.EVENTHUBS_READ_SAS_KEY_NAME().getName(), "read_sas_key_name"); + runner.setProperty(service, StreamProperties.EVENTHUBS_READ_SAS_KEY().getName(), "read_sas_key"); + runner.enableControllerService(service); + error = true; + System.out.println("Read hub defined, sasKeyName, sasKey defined: ok"); + } catch (AssertionError e) { + fail("Read hub defined, sasKeyName, sasKey defined: this should have passed"); + } + + // Bad read position value + try { + error = false; + final AzureEventHubsStructuredStreamProviderService service = + new AzureEventHubsStructuredStreamProviderService(); + final TestRunner runner = TestRunners.newTestRunner("com.hurence.logisland.processor.datastore.BulkPut"); + runner.addControllerService("eventhubs_service", service); + runner.setProperty(service, StreamProperties.EVENTHUBS_NAMESPACE().getName(), "namespace"); + runner.setProperty(service, StreamProperties.EVENTHUBS_READ_EVENT_HUB().getName(), "read_hub"); + runner.setProperty(service, StreamProperties.EVENTHUBS_READ_SAS_KEY_NAME().getName(), "read_sas_key_name"); + runner.setProperty(service, StreamProperties.EVENTHUBS_READ_SAS_KEY().getName(), "read_sas_key"); + runner.setProperty(service, StreamProperties.EVENTHUBS_READ_POSITION().getName(), "bad 0123456789 value"); + runner.enableControllerService(service); + error = true; + fail("Bad read position value: this should have failed"); + } catch (AssertionError e) { + if (error) { + fail(e.getMessage()); + } else { + System.out.println(e.getMessage()); + } + } + + // Bad read position type + try { + error = false; + final AzureEventHubsStructuredStreamProviderService service = + new AzureEventHubsStructuredStreamProviderService(); + final TestRunner runner = TestRunners.newTestRunner("com.hurence.logisland.processor.datastore.BulkPut"); + runner.addControllerService("eventhubs_service", service); + runner.setProperty(service, StreamProperties.EVENTHUBS_NAMESPACE().getName(), "namespace"); + runner.setProperty(service, StreamProperties.EVENTHUBS_READ_EVENT_HUB().getName(), "read_hub"); + runner.setProperty(service, StreamProperties.EVENTHUBS_READ_SAS_KEY_NAME().getName(), "read_sas_key_name"); + runner.setProperty(service, StreamProperties.EVENTHUBS_READ_SAS_KEY().getName(), "read_sas_key"); + runner.setProperty(service, StreamProperties.EVENTHUBS_READ_POSITION_TYPE().getName(), "bad value"); + runner.enableControllerService(service); + error = true; + fail("Bad read position type value: this should have failed"); + } catch (AssertionError e) { + if (error) { + fail(e.getMessage()); + } else { + System.out.println(e.getMessage()); + } + } + + // Set all read properties + try { + final AzureEventHubsStructuredStreamProviderService service = + new AzureEventHubsStructuredStreamProviderService(); + final TestRunner runner = TestRunners.newTestRunner("com.hurence.logisland.processor.datastore.BulkPut"); + runner.addControllerService("eventhubs_service", service); + runner.setProperty(service, StreamProperties.EVENTHUBS_NAMESPACE().getName(), "namespace"); + runner.setProperty(service, StreamProperties.EVENTHUBS_READ_EVENT_HUB().getName(), "read_hub"); + runner.setProperty(service, StreamProperties.EVENTHUBS_READ_SAS_KEY_NAME().getName(), "read_sas_key_name"); + runner.setProperty(service, StreamProperties.EVENTHUBS_READ_SAS_KEY().getName(), "read_sas_key"); + runner.setProperty(service, StreamProperties.EVENTHUBS_READ_POSITION_TYPE().getName(), "sequenceNumber"); + runner.setProperty(service, StreamProperties.EVENTHUBS_READ_CONSUMER_GROUP().getName(), "consumerGroup"); + runner.setProperty(service, StreamProperties.EVENTHUBS_READ_RECEIVER_TIMEOUT().getName(), "123"); + runner.setProperty(service, StreamProperties.EVENTHUBS_READ_PREFETCH_COUNT().getName(), "456"); + runner.enableControllerService(service); + System.out.println("All read properties set: ok"); + } catch (AssertionError e) { + fail("All read properties set: this should have passed"); + } + + /** + * WRITE EVENT HUB ONLY + */ + + // Namespace, write hub but missing sasKeyName + try { + error = false; + final AzureEventHubsStructuredStreamProviderService service = + new AzureEventHubsStructuredStreamProviderService(); + final TestRunner runner = TestRunners.newTestRunner("com.hurence.logisland.processor.datastore.BulkPut"); + runner.addControllerService("eventhubs_service", service); + runner.setProperty(service, StreamProperties.EVENTHUBS_NAMESPACE().getName(), "namespace"); + runner.setProperty(service, StreamProperties.EVENTHUBS_WRITE_EVENT_HUB().getName(), "write_hub"); + runner.enableControllerService(service); + error = true; + fail("Write hub defined but missing sasKeyName: this should have failed"); + } catch (AssertionError e) { + if (error) { + fail(e.getMessage()); + } else { + System.out.println(e.getMessage()); + } + } + + // Namespace, write hub, sasKeyName but missing sasKey + try { + error = false; + final AzureEventHubsStructuredStreamProviderService service = + new AzureEventHubsStructuredStreamProviderService(); + final TestRunner runner = TestRunners.newTestRunner("com.hurence.logisland.processor.datastore.BulkPut"); + runner.addControllerService("eventhubs_service", service); + runner.setProperty(service, StreamProperties.EVENTHUBS_NAMESPACE().getName(), "namespace"); + runner.setProperty(service, StreamProperties.EVENTHUBS_WRITE_EVENT_HUB().getName(), "write_hub"); + runner.setProperty(service, StreamProperties.EVENTHUBS_WRITE_SAS_KEY_NAME().getName(), "write_sas_key_name"); + runner.enableControllerService(service); + error = true; + fail("Write hub defined, sasKeyName defined but missing sasKey: this should have failed"); + } catch (AssertionError e) { + if (error) { + fail(e.getMessage()); + } else { + System.out.println(e.getMessage()); + } + } + + // Namespace, write hub, sasKeyName and sasKey -> should be ok + try { + final AzureEventHubsStructuredStreamProviderService service = + new AzureEventHubsStructuredStreamProviderService(); + final TestRunner runner = TestRunners.newTestRunner("com.hurence.logisland.processor.datastore.BulkPut"); + runner.addControllerService("eventhubs_service", service); + runner.setProperty(service, StreamProperties.EVENTHUBS_NAMESPACE().getName(), "namespace"); + runner.setProperty(service, StreamProperties.EVENTHUBS_WRITE_EVENT_HUB().getName(), "write_hub"); + runner.setProperty(service, StreamProperties.EVENTHUBS_WRITE_SAS_KEY_NAME().getName(), "write_sas_key_name"); + runner.setProperty(service, StreamProperties.EVENTHUBS_WRITE_SAS_KEY().getName(), "write_sas_key"); + runner.enableControllerService(service); + System.out.println("Write hub defined, sasKeyName, sasKey defined: ok"); + } catch (AssertionError e) { + fail("Write hub defined, sasKeyName, sasKey defined: this should have passed"); + } + + /** + * BOTH READ AND WRITE EVENT HUBS + */ + + // Both read and write hubs, minimum set of properties needed + try { + final AzureEventHubsStructuredStreamProviderService service = + new AzureEventHubsStructuredStreamProviderService(); + final TestRunner runner = TestRunners.newTestRunner("com.hurence.logisland.processor.datastore.BulkPut"); + runner.addControllerService("eventhubs_service", service); + runner.setProperty(service, StreamProperties.EVENTHUBS_NAMESPACE().getName(), "namespace"); + runner.setProperty(service, StreamProperties.EVENTHUBS_READ_EVENT_HUB().getName(), "read_hub"); + runner.setProperty(service, StreamProperties.EVENTHUBS_READ_SAS_KEY_NAME().getName(), "read_sas_key_name"); + runner.setProperty(service, StreamProperties.EVENTHUBS_READ_SAS_KEY().getName(), "read_sas_key"); + runner.setProperty(service, StreamProperties.EVENTHUBS_WRITE_EVENT_HUB().getName(), "write_hub"); + runner.setProperty(service, StreamProperties.EVENTHUBS_WRITE_SAS_KEY_NAME().getName(), "write_sas_key_name"); + runner.setProperty(service, StreamProperties.EVENTHUBS_WRITE_SAS_KEY().getName(), "write_sas_key"); + runner.enableControllerService(service); + System.out.println("Read and Write hub defined with their key properties defined: ok"); + } catch (AssertionError e) { + fail("Read and Write hub defined with their key properties defined: this should have passed"); + } + + // Bad read position value as long -> ok + try { + final AzureEventHubsStructuredStreamProviderService service = + new AzureEventHubsStructuredStreamProviderService(); + final TestRunner runner = TestRunners.newTestRunner("com.hurence.logisland.processor.datastore.BulkPut"); + runner.addControllerService("eventhubs_service", service); + runner.setProperty(service, StreamProperties.EVENTHUBS_NAMESPACE().getName(), "namespace"); + runner.setProperty(service, StreamProperties.EVENTHUBS_READ_EVENT_HUB().getName(), "read_hub"); + runner.setProperty(service, StreamProperties.EVENTHUBS_READ_SAS_KEY_NAME().getName(), "read_sas_key_name"); + runner.setProperty(service, StreamProperties.EVENTHUBS_READ_SAS_KEY().getName(), "read_sas_key"); + runner.setProperty(service, StreamProperties.EVENTHUBS_READ_POSITION().getName(), "1234"); + runner.enableControllerService(service); + System.out.println("Read position is a long: ok"); + } catch (AssertionError e) { + fail("Read position as long should haven been ok"); + } + + // Set all possible read and write properties + try { + final AzureEventHubsStructuredStreamProviderService service = + new AzureEventHubsStructuredStreamProviderService(); + final TestRunner runner = TestRunners.newTestRunner("com.hurence.logisland.processor.datastore.BulkPut"); + runner.addControllerService("eventhubs_service", service); + runner.setProperty(service, StreamProperties.EVENTHUBS_NAMESPACE().getName(), "namespace"); + runner.setProperty(service, StreamProperties.EVENTHUBS_MAX_EVENTS_PER_TRIGGER().getName(), "987"); + runner.setProperty(service, StreamProperties.EVENTHUBS_OPERATION_TIMEOUT().getName(), "654"); + runner.setProperty(service, StreamProperties.EVENTHUBS_THREAD_POOL_SIZE().getName(), "321"); + runner.setProperty(service, StreamProperties.EVENTHUBS_READ_EVENT_HUB().getName(), "read_hub"); + runner.setProperty(service, StreamProperties.EVENTHUBS_READ_SAS_KEY_NAME().getName(), "read_sas_key_name"); + runner.setProperty(service, StreamProperties.EVENTHUBS_READ_SAS_KEY().getName(), "read_sas_key"); + runner.setProperty(service, StreamProperties.EVENTHUBS_READ_POSITION().getName(), "8963"); + runner.setProperty(service, StreamProperties.EVENTHUBS_READ_POSITION_TYPE().getName(), "offset"); + runner.setProperty(service, StreamProperties.EVENTHUBS_READ_CONSUMER_GROUP().getName(), "consumerGroup"); + runner.setProperty(service, StreamProperties.EVENTHUBS_READ_RECEIVER_TIMEOUT().getName(), "8436"); + runner.setProperty(service, StreamProperties.EVENTHUBS_READ_PREFETCH_COUNT().getName(), "4723"); + runner.setProperty(service, StreamProperties.EVENTHUBS_WRITE_EVENT_HUB().getName(), "write_hub"); + runner.setProperty(service, StreamProperties.EVENTHUBS_WRITE_SAS_KEY_NAME().getName(), "write_sas_key_name"); + runner.setProperty(service, StreamProperties.EVENTHUBS_WRITE_SAS_KEY().getName(), "write_sas_key"); + runner.enableControllerService(service); + System.out.println("All read and write properties set: ok"); + } catch (AssertionError e) { + fail("All read and write properties set: this should have passed"); + } + } +} diff --git a/logisland-core/logisland-engines/logisland-engine-spark_2_X/pom.xml b/logisland-core/logisland-engines/logisland-engine-spark_2_X/pom.xml index d25cadccc..5a5ca030b 100644 --- a/logisland-core/logisland-engines/logisland-engine-spark_2_X/pom.xml +++ b/logisland-core/logisland-engines/logisland-engine-spark_2_X/pom.xml @@ -22,6 +22,7 @@ logisland-engine-spark_2_common logisland-engine-spark_2_1 logisland-engine-spark_2_3 + logisland-engine-spark_2_4 diff --git a/logisland-core/logisland-engines/logisland-engine-vanilla/pom.xml b/logisland-core/logisland-engines/logisland-engine-vanilla/pom.xml index 7e9bd1081..c08f02c73 100644 --- a/logisland-core/logisland-engines/logisland-engine-vanilla/pom.xml +++ b/logisland-core/logisland-engines/logisland-engine-vanilla/pom.xml @@ -86,19 +86,19 @@ com.fasterxml.jackson.core jackson-databind - 2.9.8 + ${jackson.version} com.fasterxml.jackson.core jackson-core - 2.9.8 + ${jackson.version} com.fasterxml.jackson.core jackson-annotations - 2.9.8 + ${jackson.version} @@ -49,6 +50,7 @@ com.fasterxml.jackson.dataformat jackson-dataformat-yaml + ${jackson.version} commons-cli @@ -65,10 +67,12 @@ com.fasterxml.jackson.core jackson-databind + ${jackson.version} com.fasterxml.jackson.module jackson-module-jsonSchema + ${jackson.version} com.googlecode.json-simple diff --git a/logisland-core/logisland-framework/logisland-utils/src/main/java/com/hurence/logisland/component/ComponentFactory.java b/logisland-core/logisland-framework/logisland-utils/src/main/java/com/hurence/logisland/component/ComponentFactory.java index 1bfa1b3e7..bef48f52f 100644 --- a/logisland-core/logisland-framework/logisland-utils/src/main/java/com/hurence/logisland/component/ComponentFactory.java +++ b/logisland-core/logisland-framework/logisland-utils/src/main/java/com/hurence/logisland/component/ComponentFactory.java @@ -139,6 +139,7 @@ public static T loadComponent(String className) throws ClassNotFoundExceptio try { return (T) PluginLoader.loadPlugin(className); } catch (ClassNotFoundException cnfe) { + logger.warn("Class " + className + " not found in plugins: trying to load from current class loader"); return (T) Class.forName(className).newInstance(); } } catch (Exception e) { diff --git a/logisland-core/logisland-framework/logisland-utils/src/main/java/com/hurence/logisland/config/ConfigReader.java b/logisland-core/logisland-framework/logisland-utils/src/main/java/com/hurence/logisland/config/ConfigReader.java index eaff3a426..d83e06b27 100644 --- a/logisland-core/logisland-framework/logisland-utils/src/main/java/com/hurence/logisland/config/ConfigReader.java +++ b/logisland-core/logisland-framework/logisland-utils/src/main/java/com/hurence/logisland/config/ConfigReader.java @@ -15,7 +15,6 @@ */ package com.hurence.logisland.config; - import com.fasterxml.jackson.core.JsonParser; import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.dataformat.yaml.YAMLFactory; @@ -28,7 +27,6 @@ import java.nio.file.Files; import java.nio.file.Paths; - public class ConfigReader { @@ -38,9 +36,8 @@ static String readFile(String path, Charset encoding) return new String(encoded, encoding); } - /** - * Loads a YAML config file + * Loads a YAML config file (file located in the local file system) * * @param configFilePath the path of the config file * @return a LogislandSessionConfiguration @@ -66,7 +63,7 @@ public static LogislandConfiguration loadConfig(String configFilePath) throws Ex return logislandConf; } - private static void checkLogislandConf(LogislandConfiguration conf) throws IllegalArgumentException { + public static void checkLogislandConf(LogislandConfiguration conf) throws IllegalArgumentException { if (conf.getEngine().getComponent() == null || conf.getEngine().getComponent().isEmpty()) { throw new IllegalArgumentException("key 'component' is missing or empty for engine in configuration file"); } diff --git a/logisland-core/logisland-framework/logisland-utils/src/main/java/com/hurence/logisland/serializer/JsonSerializer.java b/logisland-core/logisland-framework/logisland-utils/src/main/java/com/hurence/logisland/serializer/JsonSerializer.java index 8eac67d14..f925a08d0 100644 --- a/logisland-core/logisland-framework/logisland-utils/src/main/java/com/hurence/logisland/serializer/JsonSerializer.java +++ b/logisland-core/logisland-framework/logisland-utils/src/main/java/com/hurence/logisland/serializer/JsonSerializer.java @@ -164,7 +164,6 @@ protected EventDeserializer(Class t) { @Override public Record deserialize(JsonParser jp, DeserializationContext ctxt) throws IOException { - JsonToken t = jp.getCurrentToken(); String id = null; String type = null; @@ -188,7 +187,10 @@ public Record deserialize(JsonParser jp, DeserializationContext ctxt) throws IOE case VALUE_NUMBER_INT: try { fields.put(jp.getCurrentName(), new Field(jp.getCurrentName(), FieldType.INT, jp.getIntValue())); - } catch (JsonParseException ex) { + } catch (Exception ex) { + // May have JsonParseException or InputCoercionException (for instance for long instead of int) + // This also depends on jackson version. + // The simplest is to catch any exception, as not sure to be exhaustive and precise handling all // special case for creationDate (not a field) if (jp.getCurrentName() != null && jp.getCurrentName().equals("creationDate")) { @@ -208,7 +210,7 @@ public Record deserialize(JsonParser jp, DeserializationContext ctxt) throws IOE try { fields.put(jp.getCurrentName(), new Field(jp.getCurrentName(), FieldType.DOUBLE, jp.getDoubleValue())); - } catch (JsonParseException ex) { + } catch (Exception ex) { fields.put(jp.getCurrentName(), new Field(jp.getCurrentName(), FieldType.FLOAT, jp.getFloatValue())); } diff --git a/logisland-core/pom.xml b/logisland-core/pom.xml index 2df9c55f7..b1c51e6e6 100644 --- a/logisland-core/pom.xml +++ b/logisland-core/pom.xml @@ -27,7 +27,7 @@ org.apache.avro avro - 1.8.2 + 1.9.2 com.fasterxml.jackson.module diff --git a/logisland-documentation/pom.xml b/logisland-documentation/pom.xml index 58adb0103..fe7376019 100644 --- a/logisland-documentation/pom.xml +++ b/logisland-documentation/pom.xml @@ -42,13 +42,13 @@ THIS MODULE DOCUMENTATION DEPENDENCIES com.fasterxml.jackson.core jackson-core - 2.9.4 + ${jackson.version} true com.fasterxml.jackson.core jackson-databind - 2.9.4 + ${jackson.version} true @@ -520,6 +520,11 @@ THIS MODULE DOCUMENTATION DEPENDENCIES logisland-engine-spark_2_3 ${project.version} + + com.hurence.logisland + logisland-engine-spark_2_4 + ${project.version} + org.apache.spark spark-network-common_${scala.binary.version} diff --git a/logisland-documentation/user/components/common-processors.rst b/logisland-documentation/user/components/common-processors.rst index 9751dd0f0..1786f153f 100644 --- a/logisland-documentation/user/components/common-processors.rst +++ b/logisland-documentation/user/components/common-processors.rst @@ -9,56 +9,7 @@ Find below the list. ---------- - -.. _com.hurence.logisland.processor.AddFields: - -AddFields ---------- -Add one or more field to records - -Module -______ -com.hurence.logisland:logisland-processor-common:1.2.0 - -Class -_____ -com.hurence.logisland.processor.AddFields - -Tags -____ -record, fields, Add - -Properties -__________ -In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values. - -.. csv-table:: allowable-values - :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL" - :widths: 20,60,30,20,10,10 - :escape: \ - - "conflict.resolution.policy", "What to do when a field with the same name already exists ?", "overwrite_existing (if field already exist), keep_only_old_field (keep only old field)", "keep_only_old_field", "false", "false" - -Dynamic Properties -__________________ -Dynamic Properties allow the user to specify both the name and value of a property. - -.. csv-table:: dynamic-properties - :header: "Name","Value","Description","Allowable Values","Default Value","EL" - :widths: 20,20,40,40,20,10 - :escape: \ - - "Name of the field to add", "Value of the field to add", "Add a field to the record with the specified value. Expression language can be used.You can not add a field that end with '.type' as this suffix is used to specify the type of fields to add", "", "null", **true** - "Name of the field to add with the suffix '.field.type'", "Type of the field to add", "Add a field to the record with the specified type. These properties are only used if a correspondant property without the suffix '.field.type' is already defined. If this property is not defined, default type for adding fields is String.You can only use Logisland predefined type fields.", "NULL, STRING, INT, LONG, ARRAY, FLOAT, DOUBLE, BYTES, RECORD, MAP, ENUM, BOOLEAN, UNION, DATETIME, OBJECT", "STRING", false - "Name of the field to add with the suffix '.field.name'", "Name of the field to add using expression language", "Add a field to the record with the specified name (which is evaluated using expression language). These properties are only used if a correspondant property without the suffix '.field.name' is already defined. If this property is not defined, the name of the field to add is the key of the first dynamic property (which is the main and only required dynamic property).", "", "null", **true** - -Extra informations -__________________ -.. include:: ./details/common-processors/AddFields-Detail.rst ----------- - -.. _com.hurence.logisland.processor.ApplyRegexp: - +.. _com.hurence.logisland.processor.alerting.ComputeTags: ComputeTags ----------- @@ -430,7 +381,7 @@ Dynamic Properties allow the user to specify both the name and value of a proper :escape: \ "Name of the field to add", "Value of the field to add", "Add a field to the record with the specified value. Expression language can be used.You can not add a field that end with '.type' as this suffix is used to specify the type of fields to add", "", "null", **true** - "Name of the field to add with the suffix '.field.type'", "Type of the field to add", "Add a field to the record with the specified type. These properties are only used if a correspondant property without the suffix '.field.type' is already defined. If this property is not defined, default type for adding fields is String.You can only use Logisland predefined type fields.", "NULL, STRING, INT, LONG, ARRAY, FLOAT, DOUBLE, BYTES, RECORD, MAP, ENUM, BOOLEAN, UNION, DATETIME", "STRING", false + "Name of the field to add with the suffix '.field.type'", "Type of the field to add", "Add a field to the record with the specified type. These properties are only used if a correspondant property without the suffix '.field.type' is already defined. If this property is not defined, default type for adding fields is String.You can only use Logisland predefined type fields.", "NULL, STRING, INT, LONG, ARRAY, FLOAT, DOUBLE, BYTES, RECORD, MAP, ENUM, BOOLEAN, UNION, DATETIME, OBJECT", "STRING", false "Name of the field to add with the suffix '.field.name'", "Name of the field to add using expression language", "Add a field to the record with the specified name (which is evaluated using expression language). These properties are only used if a correspondant property without the suffix '.field.name' is already defined. If this property is not defined, the name of the field to add is the key of the first dynamic property (which is the main and only required dynamic property).", "", "null", **true** Extra informations @@ -715,78 +666,7 @@ __________________ .. include:: ./details/common-processors/SelectDistinctRecords-Detail.rst ---------- -.. _com.hurence.logisland.processor.DecodeBase64: - -DecodeBase64 ------------- -Decodes fields to base64. The fields should be of type string - -Module -______ -com.hurence.logisland:logisland-processor-common:1.2.0 - -Class -_____ -com.hurence.logisland.processor.DecodeBase64 - -Tags -____ -decode, base64 - -Properties -__________ -In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values. - -.. csv-table:: allowable-values - :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL" - :widths: 20,60,30,20,10,10 - :escape: \ - - "**source.fields**", "a comma separated list of fields corresponding to the fields to decode. Please note than the fields should be of type string", "", "null", "false", "false" - "**destination.fields**", "a comma separated list of fields corresponding to the decoded content according to the fields provided as input. Those fields will be of type bytes", "", "null", "false", "false" - -Extra informations -__________________ -.. include:: ./details/common-processors/DecodeBase64-Detail.rst ----------- - -.. _com.hurence.logisland.processor.EncodeBase64: - -EncodeBase64 ------------- -Encodes fields to base64. The fields should be of type array of bytes - -Module -______ -com.hurence.logisland:logisland-processor-common:1.2.0 - -Class -_____ -com.hurence.logisland.processor.EncodeBase64 - -Tags -____ -encode, base64 - -Properties -__________ -In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values. - -.. csv-table:: allowable-values - :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL" - :widths: 20,60,30,20,10,10 - :escape: \ - - "**source.fields**", "a comma separated list of fields corresponding to the fields to encode. Please note than the fields should be of type bytes", "", "null", "false", "false" - "**destination.fields**", "a comma separated list of fields corresponding to the encoded content according to the fields provided as input. Those fields will be of type string", "", "null", "false", "false" - -Extra informations -__________________ -.. include:: ./details/common-processors/EncodeBase64-Detail.rst ----------- - -.. _com.hurence.logisland.processor.EncryptField: - +.. _com.hurence.logisland.processor.EvaluateJsonPath: EvaluateJsonPath ---------------- @@ -1179,6 +1059,76 @@ __________________ .. include:: ./details/common-processors/ConvertSimpleDateFormatFields-Detail.rst ---------- +.. _com.hurence.logisland.processor.DecodeBase64: + +DecodeBase64 +------------ +Decodes fields to base64. The fields should be of type string + +Module +______ +com.hurence.logisland:logisland-processor-common:1.2.0 + +Class +_____ +com.hurence.logisland.processor.DecodeBase64 + +Tags +____ +decode, base64 + +Properties +__________ +In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values. + +.. csv-table:: allowable-values + :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL" + :widths: 20,60,30,20,10,10 + :escape: \ + + "**source.fields**", "a comma separated list of fields corresponding to the fields to decode. Please note than the fields should be of type string", "", "null", "false", "false" + "**destination.fields**", "a comma separated list of fields corresponding to the decoded content according to the fields provided as input. Those fields will be of type bytes", "", "null", "false", "false" + +Extra informations +__________________ +.. include:: ./details/common-processors/DecodeBase64-Detail.rst +---------- + +.. _com.hurence.logisland.processor.EncodeBase64: + +EncodeBase64 +------------ +Encodes fields to base64. The fields should be of type array of bytes + +Module +______ +com.hurence.logisland:logisland-processor-common:1.2.0 + +Class +_____ +com.hurence.logisland.processor.EncodeBase64 + +Tags +____ +encode, base64 + +Properties +__________ +In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values. + +.. csv-table:: allowable-values + :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL" + :widths: 20,60,30,20,10,10 + :escape: \ + + "**source.fields**", "a comma separated list of fields corresponding to the fields to encode. Please note than the fields should be of type bytes", "", "null", "false", "false" + "**destination.fields**", "a comma separated list of fields corresponding to the encoded content according to the fields provided as input. Those fields will be of type string", "", "null", "false", "false" + +Extra informations +__________________ +.. include:: ./details/common-processors/EncodeBase64-Detail.rst +---------- + .. _com.hurence.logisland.processor.EncryptField: EncryptField @@ -1479,11 +1429,11 @@ __________________ .. include:: ./details/common-processors/SetJsonAsFields-Detail.rst ---------- -.. _com.hurence.logisland.processor.alerting.CheckAlerts: +.. _com.hurence.logisland.processor.SplitRecord: -CheckAlerts +SplitRecord ----------- -Add one or more records representing alerts. Using a datastore. +This processor is used to create a new set of records from one record. Module ______ @@ -1491,11 +1441,11 @@ com.hurence.logisland:logisland-processor-common:1.2.0 Class _____ -com.hurence.logisland.processor.alerting.CheckAlerts +com.hurence.logisland.processor.SplitRecord Tags ____ -record, alerting, thresholds, opc, tag +None. Properties __________ @@ -1506,26 +1456,32 @@ In the list below, the names of required properties appear in **bold**. Any othe :widths: 20,60,30,20,10,10 :escape: \ - "max.cpu.time", "maximum CPU time in milliseconds allowed for script execution.", "", "100", "false", "false" - "max.memory", "maximum memory in Bytes which JS executor thread can allocate", "", "51200", "false", "false" - "allow.no.brace", "Force, to check if all blocks are enclosed with curly braces \"\"{}\"\". - - .. raw:: html + "keep.parent.record", "Specify if the parent record should exist", "", "false", "false", "false" + "keep.parent.record_time", "Specify whether to use the processing_time as record_time or not", "", "true", "false", "false" + "keep.parent.record_type", "Specify whether to use the dynamic property name as record_type or not", "", "false", "false", "false" -

+Dynamic Properties +__________________ +Dynamic Properties allow the user to specify both the name and value of a property. - Explanation: all loops (for, do-while, while, and if-else, and functions +.. csv-table:: dynamic-properties + :header: "Name","Value","Description","Allowable Values","Default Value","EL" + :widths: 20,20,40,40,20,10 + :escape: \ - should use braces, because poison_pill() function will be inserted after + "new record name", "fields to have", "the new record", "", "null", **true** - each open brace \"\"{\"\", to ensure interruption checking. Otherwise simple +Extra informations +__________________ +No additional information is provided +---------- -.. _com.hurence.logisland.processor.SplitRecord: +.. _com.hurence.logisland.processor.alerting.CheckAlerts: -SplitRecord +CheckAlerts ----------- -This processor is used to create a new set of records from one record. +Add one or more records representing alerts. Using a datastore. Module ______ @@ -1533,11 +1489,11 @@ com.hurence.logisland:logisland-processor-common:1.2.0 Class _____ -com.hurence.logisland.processor.SplitRecord +com.hurence.logisland.processor.alerting.CheckAlerts Tags ____ -None. +record, alerting, thresholds, opc, tag Properties __________ @@ -1548,29 +1504,21 @@ In the list below, the names of required properties appear in **bold**. Any othe :widths: 20,60,30,20,10,10 :escape: \ - "keep.parent.record", "Specify if the parent record should exist", "", "false", "false", "false" - "keep.parent.record_time", "Specify whether to use the processing_time as record_time or not", "", "true", "false", "false" - "keep.parent.record_type", "Specify whether to use the dynamic property name as record_type or not", "", "false", "false", "false" - -Dynamic Properties -__________________ -Dynamic Properties allow the user to specify both the name and value of a property. + "max.cpu.time", "maximum CPU time in milliseconds allowed for script execution.", "", "100", "false", "false" + "max.memory", "maximum memory in Bytes which JS executor thread can allocate", "", "51200", "false", "false" + "allow.no.brace", "Force, to check if all blocks are enclosed with curly braces \"\"{}\"\". -.. csv-table:: dynamic-properties - :header: "Name","Value","Description","Allowable Values","Default Value","EL" - :widths: 20,20,40,40,20,10 - :escape: \ + .. raw:: html - "new record name", "fields to have", "the new record", "", "null", **true** +

-Extra informations -__________________ -No additional information is provided + Explanation: all loops (for, do-while, while, and if-else, and functions ----------- + should use braces, because poison_pill() function will be inserted after -.. _com.hurence.logisland.processor.SplitText: + each open brace \"\"{\"\", to ensure interruption checking. Otherwise simple + code like:

 
diff --git a/logisland-documentation/user/components/components.yaml b/logisland-documentation/user/components/components.yaml
index 172f6471a..4eba7020e 100644
--- a/logisland-documentation/user/components/components.yaml
+++ b/logisland-documentation/user/components/components.yaml
@@ -179,6 +179,18 @@ extensions:
     module: com.hurence.logisland:logisland-processor-common:1.2.0
     class: com.hurence.logisland.processor.ConvertSimpleDateFormatFields
     tags: [record, fields, add, date, conversion, convert]
+  - name: DecodeBase64
+    description: Decodes fields to base64. The fields should be of type string
+    category: misc
+    module: com.hurence.logisland:logisland-processor-common:1.2.0
+    class: com.hurence.logisland.processor.DecodeBase64
+    tags: [decode, base64]
+  - name: EncodeBase64
+    description: Encodes fields to base64. The fields should be of type array of bytes
+    category: misc
+    module: com.hurence.logisland:logisland-processor-common:1.2.0
+    class: com.hurence.logisland.processor.EncodeBase64
+    tags: [encode, base64]
   - name: EncryptField
     description: This is a processor that is used to encrypt or decrypt one or many fields of any type of a given Record mapping
     category: misc
@@ -231,6 +243,11 @@ extensions:
     module: com.hurence.logisland:logisland-processor-common:1.2.0
     class: com.hurence.logisland.processor.SetJsonAsFields
     tags: [json]
+  - name: SplitRecord
+    description: This processor is used to create a new set of records from one record.
+    category: misc
+    module: com.hurence.logisland:logisland-processor-common:1.2.0
+    class: com.hurence.logisland.processor.SplitRecord
   - name: CheckAlerts
     description: Add one or more records representing alerts. Using a datastore.
     category: alerting
diff --git a/logisland-documentation/user/components/other-processors.rst b/logisland-documentation/user/components/other-processors.rst
index 0db518d83..8b9d0329a 100644
--- a/logisland-documentation/user/components/other-processors.rst
+++ b/logisland-documentation/user/components/other-processors.rst
@@ -2243,6 +2243,3375 @@ Dynamic Properties
 __________________
 Dynamic Properties allow the user to specify both the name and value of a property.
 
+.. csv-table:: dynamic-properties
+   :header: "Name","Value","Description","Allowable Values","Default Value","EL"
+   :widths: 20,20,40,40,20,10
+   :escape: \
+
+   "fields to decode", "a default value", "Decode one or more fields from the record ", "", "null", false
+
+Extra informations
+__________________
+.. include:: ./details/URLDecoder-Detail.rst
+----------
+
+.. _com.hurence.logisland.processor.useragent.ParseUserAgent: 
+
+ParseUserAgent
+--------------
+The user-agent processor allows to decompose User-Agent value from an HTTP header into several attributes of interest. There is no standard format for User-Agent strings, hence it is not easily possible to use regexp to handle them. This processor rely on the `YAUAA library `_ to do the heavy work.
+
+Module
+______
+com.hurence.logisland:logisland-processor-useragent:1.2.0
+
+Class
+_____
+com.hurence.logisland.processor.useragent.ParseUserAgent
+
+Tags
+____
+User-Agent, clickstream, DMP
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
+
+.. csv-table:: allowable-values
+   :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+   :widths: 20,60,30,20,10,10
+   :escape: \
+
+   "debug", "Enable debug.", "", "false", "false", "false"
+   "cache.enabled", "Enable caching. Caching to avoid to redo the same computation for many identical User-Agent strings.", "", "true", "false", "false"
+   "cache.size", "Set the size of the cache.", "", "1000", "false", "false"
+   "**useragent.field**", "Must contain the name of the field that contains the User-Agent value in the incoming record.", "", "null", "false", "false"
+   "useragent.keep", "Defines if the field that contained the User-Agent must be kept or not in the resulting records.", "", "true", "false", "false"
+   "confidence.enabled", "Enable confidence reporting. Each field will report a confidence attribute with a value comprised between 0 and 10000.", "", "false", "false", "false"
+   "ambiguity.enabled", "Enable ambiguity reporting. Reports a count of ambiguities.", "", "false", "false", "false"
+   "fields", "Defines the fields to be returned.", "", "DeviceClass, DeviceName, DeviceBrand, DeviceCpu, DeviceFirmwareVersion, DeviceVersion, OperatingSystemClass, OperatingSystemName, OperatingSystemVersion, OperatingSystemNameVersion, OperatingSystemVersionBuild, LayoutEngineClass, LayoutEngineName, LayoutEngineVersion, LayoutEngineVersionMajor, LayoutEngineNameVersion, LayoutEngineNameVersionMajor, LayoutEngineBuild, AgentClass, AgentName, AgentVersion, AgentVersionMajor, AgentNameVersion, AgentNameVersionMajor, AgentBuild, AgentLanguage, AgentLanguageCode, AgentInformationEmail, AgentInformationUrl, AgentSecurity, AgentUuid, FacebookCarrier, FacebookDeviceClass, FacebookDeviceName, FacebookDeviceVersion, FacebookFBOP, FacebookFBSS, FacebookOperatingSystemName, FacebookOperatingSystemVersion, Anonymized, HackerAttackVector, HackerToolkit, KoboAffiliate, KoboPlatformId, IECompatibilityVersion, IECompatibilityVersionMajor, IECompatibilityNameVersion, IECompatibilityNameVersionMajor, __SyntaxError__, Carrier, GSAInstallationID, WebviewAppName, WebviewAppNameVersionMajor, WebviewAppVersion, WebviewAppVersionMajor", "false", "false"
+
+Extra informations
+__________________
+.. include:: ./details/ParseUserAgent-Detail.rst
+----------
+
+.. _com.hurence.logisland.processor.webAnalytics.IncrementalWebSession: 
+
+IncrementalWebSession
+---------------------
+This processor creates and updates web-sessions based on incoming web-events. Note that both web-sessions and web-events are stored in elasticsearch.
+ Firstly, web-events are grouped by their session identifier and processed in chronological order.
+ Then each web-session associated to each group is retrieved from elasticsearch.
+ In case none exists yet then a new web session is created based on the first web event.
+ The following fields of the newly created web session are set based on the associated web event: session identifier, first timestamp, first visited page. Secondly, once created, or retrieved, the web session is updated by the remaining web-events.
+ Updates have impacts on fields of the web session such as event counter, last visited page,  session duration, ...
+ Before updates are actually applied, checks are performed to detect rules that would trigger the creation of a new session:
+
+	the duration between the web session and the web event must not exceed the specified time-out,
+	the web session and the web event must have timestamps within the same day (at midnight a new web session is created),
+	source of traffic (campaign, ...) must be the same on the web session and the web event.
+
+ When a breaking rule is detected, a new web session is created with a new session identifier where as remaining web-events still have the original session identifier. The new session identifier is the original session suffixed with the character '#' followed with an incremented counter. This new session identifier is also set on the remaining web-events.
+ Finally when all web events were applied, all web events -potentially modified with a new session identifier- are save in elasticsearch. And web sessions are passed to the next processor.
+
+WebSession information are:
+- first and last visited page
+- first and last timestamp of processed event 
+- total number of processed events
+- the userId
+- a boolean denoting if the web-session is still active or not
+- an integer denoting the duration of the web-sessions
+- optional fields that may be retrieved from the processed events
+
+
+
+Module
+______
+com.hurence.logisland:logisland-processor-web-analytics:1.2.0
+
+Class
+_____
+com.hurence.logisland.processor.webAnalytics.IncrementalWebSession
+
+Tags
+____
+analytics, web, session
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
+
+.. csv-table:: allowable-values
+   :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+   :widths: 20,60,30,20,10,10
+   :escape: \
+
+   "debug", "Enable debug. If enabled, debug information are logged.", "", "false", "false", "false"
+   "**es.session.index.field**", "Name of the field in the record defining the ES index containing the web session documents.", "", "null", "false", "false"
+   "**es.session.type.name**", "Name of the ES type of web session documents.", "", "null", "false", "false"
+   "**es.event.index.prefix**", "Prefix of the index containing the web event documents.", "", "null", "false", "false"
+   "**es.event.type.name**", "Name of the ES type of web event documents.", "", "null", "false", "false"
+   "**es.mapping.event.to.session.index.name**", "Name of the ES index containing the mapping of web session documents.", "", "null", "false", "false"
+   "sessionid.field", "the name of the field containing the session id => will override default value if set", "", "sessionId", "false", "false"
+   "timestamp.field", "the name of the field containing the timestamp => will override default value if set", "", "h2kTimestamp", "false", "false"
+   "visitedpage.field", "the name of the field containing the visited page => will override default value if set", "", "location", "false", "false"
+   "userid.field", "the name of the field containing the userId => will override default value if set", "", "userId", "false", "false"
+   "fields.to.return", "the list of fields to return", "", "null", "false", "false"
+   "firstVisitedPage.out.field", "the name of the field containing the first visited page => will override default value if set", "", "firstVisitedPage", "false", "false"
+   "lastVisitedPage.out.field", "the name of the field containing the last visited page => will override default value if set", "", "lastVisitedPage", "false", "false"
+   "isSessionActive.out.field", "the name of the field stating whether the session is active or not => will override default value if set", "", "is_sessionActive", "false", "false"
+   "sessionDuration.out.field", "the name of the field containing the session duration => will override default value if set", "", "sessionDuration", "false", "false"
+   "sessionInactivityDuration.out.field", "the name of the field containing the session inactivity duration => will override default value if set", "", "sessionInactivityDuration", "false", "false"
+   "session.timeout", "session timeout in sec", "", "1800", "false", "false"
+   "eventsCounter.out.field", "the name of the field containing the session duration => will override default value if set", "", "eventsCounter", "false", "false"
+   "firstEventDateTime.out.field", "the name of the field containing the date of the first event => will override default value if set", "", "firstEventDateTime", "false", "false"
+   "lastEventDateTime.out.field", "the name of the field containing the date of the last event => will override default value if set", "", "lastEventDateTime", "false", "false"
+   "newSessionReason.out.field", "the name of the field containing the reason why a new session was created => will override default value if set", "", "reasonForNewSession", "false", "false"
+   "transactionIds.out.field", "the name of the field containing all transactionIds => will override default value if set", "", "transactionIds", "false", "false"
+   "source_of_traffic.suffix", "Prefix for the source of the traffic related fields", "", "source_of_traffic", "false", "false"
+   "**elasticsearch.client.service**", "The instance of the Controller Service to use for accessing Elasticsearch.", "", "null", "false", "false"
+
+Extra informations
+__________________
+.. include:: ./details/IncrementalWebSession-Detail.rst
+----------
+
+.. _com.hurence.logisland.processor.webAnalytics.SetSourceOfTraffic: 
+
+SetSourceOfTraffic
+------------------
+Compute the source of traffic of a web session. Users arrive at a website or application through a variety of sources, 
+including advertising/paying campaigns, search engines, social networks, referring sites or direct access. 
+When analysing user experience on a webshop, it is crucial to collect, process, and report the campaign and traffic-source data. 
+To compute the source of traffic of a web session, the user has to provide the utm_* related properties if available
+i-e: **utm_source.field**, **utm_medium.field**, **utm_campaign.field**, **utm_content.field**, **utm_term.field**)
+, the referer (**referer.field** property) and the first visited page of the session (**first.visited.page.field** property).
+By default the source of traffic information are placed in a flat structure (specified by the **source_of_traffic.suffix** property
+with a default value of source_of_traffic). To work properly the SetSourceOfTraffic processor needs to have access to an 
+Elasticsearch index containing a list of the most popular search engines and social networks. The ES index (specified by the **es.index** property) should be structured such that the _id of an ES document MUST be the name of the domain. If the domain is a search engine, the related ES doc MUST have a boolean field (default being search_engine) specified by the property **es.search_engine.field** with a value set to true. If the domain is a social network , the related ES doc MUST have a boolean field (default being social_network) specified by the property **es.social_network.field** with a value set to true. 
+
+Module
+______
+com.hurence.logisland:logisland-processor-web-analytics:1.2.0
+
+Class
+_____
+com.hurence.logisland.processor.webAnalytics.SetSourceOfTraffic
+
+Tags
+____
+session, traffic, source, web, analytics
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
+
+.. csv-table:: allowable-values
+   :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+   :widths: 20,60,30,20,10,10
+   :escape: \
+
+   "referer.field", "Name of the field containing the referer value in the session", "", "referer", "false", "false"
+   "first.visited.page.field", "Name of the field containing the first visited page in the session", "", "firstVisitedPage", "false", "false"
+   "utm_source.field", "Name of the field containing the utm_source value in the session", "", "utm_source", "false", "false"
+   "utm_medium.field", "Name of the field containing the utm_medium value in the session", "", "utm_medium", "false", "false"
+   "utm_campaign.field", "Name of the field containing the utm_campaign value in the session", "", "utm_campaign", "false", "false"
+   "utm_content.field", "Name of the field containing the utm_content value in the session", "", "utm_content", "false", "false"
+   "utm_term.field", "Name of the field containing the utm_term value in the session", "", "utm_term", "false", "false"
+   "source_of_traffic.suffix", "Suffix for the source of the traffic related fields", "", "source_of_traffic", "false", "false"
+   "source_of_traffic.hierarchical", "Should the additional source of trafic information fields be added under a hierarchical father field or not.", "", "false", "false", "false"
+   "**elasticsearch.client.service**", "The instance of the Controller Service to use for accessing Elasticsearch.", "", "null", "false", "false"
+   "**cache.service**", "Name of the cache service to use.", "", "null", "false", "false"
+   "cache.validity.timeout", "Timeout validity (in seconds) of an entry in the cache.", "", "0", "false", "false"
+   "debug", "If true, an additional debug field is added. If the source info fields prefix is X, a debug field named X_from_cache contains a boolean value to indicate the origin of the source fields. The default value for this property is false (debug is disabled).", "", "false", "false", "false"
+   "**es.index**", "Name of the ES index containing the list of search engines and social network. ", "", "null", "false", "false"
+   "es.type", "Name of the ES type to use.", "", "default", "false", "false"
+   "es.search_engine.field", "Name of the ES field used to specify that the domain is a search engine.", "", "search_engine", "false", "false"
+   "es.social_network.field", "Name of the ES field used to specify that the domain is a social network.", "", "social_network", "false", "false"
+
+Extra informations
+__________________
+.. include:: ./details/SetSourceOfTraffic-Detail.rst
+----------
+
+.. _com.hurence.logisland.processor.enrichment.IpToFqdn: 
+
+IpToFqdn
+--------
+Translates an IP address into a FQDN (Fully Qualified Domain Name). An input field from the record has the IP as value. An new field is created and its value is the FQDN matching the IP address. The resolution mechanism is based on the underlying operating system. The resolution request may take some time, specially if the IP address cannot be translated into a FQDN. For these reasons this processor relies on the logisland cache service so that once a resolution occurs or not, the result is put into the cache. That way, the real request for the same IP is not re-triggered during a certain period of time, until the cache entry expires. This timeout is configurable but by default a request for the same IP is not triggered before 24 hours to let the time to the underlying DNS system to be potentially updated.
+
+Module
+______
+com.hurence.logisland:logisland-processor-enrichment:1.2.0
+
+Class
+_____
+com.hurence.logisland.processor.enrichment.IpToFqdn
+
+Tags
+____
+dns, ip, fqdn, domain, address, fqhn, reverse, resolution, enrich
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
+
+.. csv-table:: allowable-values
+   :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+   :widths: 20,60,30,20,10,10
+   :escape: \
+
+   "**ip.address.field**", "The name of the field containing the ip address to use.", "", "null", "false", "false"
+   "**fqdn.field**", "The field that will contain the full qualified domain name corresponding to the ip address.", "", "null", "false", "false"
+   "overwrite.fqdn.field", "If the field should be overwritten when it already exists.", "", "false", "false", "false"
+   "**cache.service**", "The name of the cache service to use.", "", "null", "false", "false"
+   "cache.max.time", "The amount of time, in seconds, for which a cached FQDN value is valid in the cache service. After this delay, the next new request to translate the same IP into FQDN will trigger a new reverse DNS request and the result will overwrite the entry in the cache. This allows two things: if the IP was not resolved into a FQDN, this will get a chance to obtain a FQDN if the DNS system has been updated, if the IP is resolved into a FQDN, this will allow to be more accurate if the DNS system has been updated.  A value of 0 seconds disables this expiration mechanism. The default value is 84600 seconds, which corresponds to new requests triggered every day if a record with the same IP passes every day in the processor.", "", "84600", "false", "false"
+   "resolution.timeout", "The amount of time, in milliseconds, to wait at most for the resolution to occur. This avoids to block the stream for too much time. Default value is 1000ms. If the delay expires and no resolution could occur before, the FQDN field is not created. A special value of 0 disables the logisland timeout and the resolution request may last for many seconds if the IP cannot be translated into a FQDN by the underlying operating system. In any case, whether the timeout occurs in logisland of in the operating system, the fact that a timeout occurs is kept in the cache system so that a resolution request for the same IP will not occur before the cache entry expires.", "", "1000", "false", "false"
+   "debug", "If true, some additional debug fields are added. If the FQDN field is named X, a debug field named X_os_resolution_time_ms contains the resolution time in ms (using the operating system, not the cache). This field is added whether the resolution occurs or time is out. A debug field named  X_os_resolution_timeout contains a boolean value to indicate if the timeout occurred. Finally, a debug field named X_from_cache contains a boolean value to indicate the origin of the FQDN field. The default value for this property is false (debug is disabled.", "", "false", "false", "false"
+
+Extra informations
+__________________
+.. include:: ./details/IpToFqdn-Detail.rst
+----------
+
+.. _com.hurence.logisland.processor.enrichment.IpToGeo: 
+
+IpToGeo
+-------
+Looks up geolocation information for an IP address. The attribute that contains the IP address to lookup must be provided in the **ip.address.field** property. By default, the geo information are put in a hierarchical structure. That is, if the name of the IP field is 'X', then the the geo attributes added by enrichment are added under a father field named X_geo. "_geo" is the default hierarchical suffix that may be changed with the **geo.hierarchical.suffix** property. If one wants to put the geo fields at the same level as the IP field, then the **geo.hierarchical** property should be set to false and then the geo attributes are  created at the same level as him with the naming pattern X_geo_. "_geo_" is the default flat suffix but this may be changed with the **geo.flat.suffix** property. The IpToGeo processor requires a reference to an Ip to Geo service. This must be defined in the **iptogeo.service** property. The added geo fields are dependant on the underlying Ip to Geo service. The **geo.fields** property must contain the list of geo fields that should be created if data is available for  the IP to resolve. This property defaults to "*" which means to add every available fields. If one only wants a subset of the fields,  one must define a comma separated list of fields as a value for the **geo.fields** property. The list of the available geo fields is in the description of the **geo.fields** property.
+
+Module
+______
+com.hurence.logisland:logisland-processor-enrichment:1.2.0
+
+Class
+_____
+com.hurence.logisland.processor.enrichment.IpToGeo
+
+Tags
+____
+geo, enrich, ip
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
+
+.. csv-table:: allowable-values
+   :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+   :widths: 20,60,30,20,10,10
+   :escape: \
+
+   "**ip.address.field**", "The name of the field containing the ip address to use.", "", "null", "false", "false"
+   "**iptogeo.service**", "The reference to the IP to Geo service to use.", "", "null", "false", "false"
+   "geo.fields", "Comma separated list of geo information fields to add to the record. Defaults to '*', which means to include all available fields. If a list of fields is specified and the data is not available, the geo field is not created. The geo fields are dependant on the underlying defined Ip to Geo service. The currently only supported type of Ip to Geo service is the Maxmind Ip to Geo service. This means that the currently supported list of geo fields is the following:**continent**: the identified continent for this IP address. **continent_code**: the identified continent code for this IP address. **city**: the identified city for this IP address. **latitude**: the identified latitude for this IP address. **longitude**: the identified longitude for this IP address. **location**: the identified location for this IP address, defined as Geo-point expressed as a string with the format: 'latitude,longitude'. **accuracy_radius**: the approximate accuracy radius, in kilometers, around the latitude and longitude for the location. **time_zone**: the identified time zone for this IP address. **subdivision_N**: the identified subdivision for this IP address. N is a one-up number at the end of the attribute name, starting with 0. **subdivision_isocode_N**: the iso code matching the identified subdivision_N. **country**: the identified country for this IP address. **country_isocode**: the iso code for the identified country for this IP address. **postalcode**: the identified postal code for this IP address. **lookup_micros**: the number of microseconds that the geo lookup took. The Ip to Geo service must have the lookup_micros property enabled in order to have this field available.", "", "*", "false", "false"
+   "geo.hierarchical", "Should the additional geo information fields be added under a hierarchical father field or not.", "", "true", "false", "false"
+   "geo.hierarchical.suffix", "Suffix to use for the field holding geo information. If geo.hierarchical is true, then use this suffix appended to the IP field name to define the father field name. This may be used for instance to distinguish between geo fields with various locales using many Ip to Geo service instances.", "", "_geo", "false", "false"
+   "geo.flat.suffix", "Suffix to use for geo information fields when they are flat. If geo.hierarchical is false, then use this suffix appended to the IP field name but before the geo field name. This may be used for instance to distinguish between geo fields with various locales using many Ip to Geo service instances.", "", "_geo_", "false", "false"
+   "**cache.service**", "The name of the cache service to use.", "", "null", "false", "false"
+   "debug", "If true, an additional debug field is added. If the geo info fields prefix is X, a debug field named X_from_cache contains a boolean value to indicate the origin of the geo fields. The default value for this property is false (debug is disabled).", "", "false", "false", "false"
+
+Extra informations
+__________________
+.. include:: ./details/IpToGeo-Detail.rst
+----------
+
+.. _com.hurence.logisland.processor.networkpacket.ParseNetworkPacket: 
+
+ParseNetworkPacket
+------------------
+The ParseNetworkPacket processor is the LogIsland entry point to parse network packets captured either off-the-wire (stream mode) or in pcap format (batch mode).  In batch mode, the processor decodes the bytes of the incoming pcap record, where a Global header followed by a sequence of [packet header, packet data] pairs are stored. Then, each incoming pcap event is parsed into n packet records. The fields of packet headers are then extracted and made available in dedicated record fields. See the `Capturing Network packets tutorial `_ for an example of usage of this processor.
+
+Module
+______
+com.hurence.logisland:logisland-processor-cyber-security:1.2.0
+
+Class
+_____
+com.hurence.logisland.processor.networkpacket.ParseNetworkPacket
+
+Tags
+____
+PCap, security, IDS, NIDS
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
+
+.. csv-table:: allowable-values
+   :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+   :widths: 20,60,30,20,10,10
+   :escape: \
+
+   "debug", "Enable debug.", "", "false", "false", "false"
+   "**flow.mode**", "Flow Mode. Indicate whether packets are provided in batch mode (via pcap files) or in stream mode (without headers). Allowed values are batch and stream.", "batch, stream", "null", "false", "false"
+
+Extra informations
+__________________
+No additional information is provided
+
+----------
+
+.. _com.hurence.logisland.processor.elasticsearch.BulkAddElasticsearch: 
+
+BulkAddElasticsearch
+--------------------
+Indexes the content of a Record in Elasticsearch using elasticsearch's bulk processor
+
+Module
+______
+com.hurence.logisland:logisland-processor-elasticsearch:1.2.0
+
+Class
+_____
+com.hurence.logisland.processor.elasticsearch.BulkAddElasticsearch
+
+Tags
+____
+elasticsearch
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values, and whether a property supports the  `Expression Language `_ .
+
+.. csv-table:: allowable-values
+   :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+   :widths: 20,60,30,20,10,10
+   :escape: \
+
+   "**elasticsearch.client.service**", "The instance of the Controller Service to use for accessing Elasticsearch.", "", "null", "false", "false"
+   "**default.index**", "The name of the index to insert into", "", "null", "false", "**true**"
+   "**default.type**", "The type of this document (used by Elasticsearch for indexing and searching)", "", "null", "false", "**true**"
+   "**timebased.index**", "do we add a date suffix", "no (no date added to default index), today (today's date added to default index), yesterday (yesterday's date added to default index)", "no", "false", "false"
+   "es.index.field", "the name of the event field containing es index name => will override index value if set", "", "null", "false", "false"
+   "es.type.field", "the name of the event field containing es doc type => will override type value if set", "", "null", "false", "false"
+
+Extra informations
+__________________
+.. include:: ./details/BulkAddElasticsearch-Detail.rst
+----------
+
+.. _com.hurence.logisland.processor.hbase.FetchHBaseRow: 
+
+FetchHBaseRow
+-------------
+Fetches a row from an HBase table. The Destination property controls whether the cells are added as flow file attributes, or the row is written to the flow file content as JSON. This processor may be used to fetch a fixed row on a interval by specifying the table and row id directly in the processor, or it may be used to dynamically fetch rows by referencing the table and row id from incoming flow files.
+
+Module
+______
+com.hurence.logisland:logisland-processor-hbase:1.2.0
+
+Class
+_____
+com.hurence.logisland.processor.hbase.FetchHBaseRow
+
+Tags
+____
+hbase, scan, fetch, get, enrich
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values, and whether a property supports the  `Expression Language `_ .
+
+.. csv-table:: allowable-values
+   :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+   :widths: 20,60,30,20,10,10
+   :escape: \
+
+   "**hbase.client.service**", "The instance of the Controller Service to use for accessing HBase.", "", "null", "false", "false"
+   "**table.name.field**", "The field containing the name of the HBase Table to fetch from.", "", "null", "false", "**true**"
+   "**row.identifier.field**", "The field containing the identifier of the row to fetch.", "", "null", "false", "**true**"
+   "columns.field", "The field containing an optional comma-separated list of \"\":\"\" pairs to fetch. To return all columns for a given family, leave off the qualifier such as \"\",\"\".", "", "null", "false", "**true**"
+   "record.serializer", "the serializer needed to i/o the record in the HBase row", "com.hurence.logisland.serializer.KryoSerializer (serialize events as json blocs), com.hurence.logisland.serializer.JsonSerializer (serialize events as json blocs), com.hurence.logisland.serializer.AvroSerializer (serialize events as avro blocs), none (send events as bytes)", "com.hurence.logisland.serializer.KryoSerializer", "false", "false"
+   "record.schema", "the avro schema definition for the Avro serialization", "", "null", "false", "false"
+   "table.name.default", "The table to use if table name field is not set", "", "null", "false", "false"
+
+Extra informations
+__________________
+.. include:: ./details/FetchHBaseRow-Detail.rst
+----------
+
+.. _com.hurence.logisland.processor.elasticsearch.MultiGetElasticsearch: 
+
+MultiGetElasticsearch
+---------------------
+Retrieves a content indexed in elasticsearch using elasticsearch multiget queries.
+Each incoming record contains information regarding the elasticsearch multiget query that will be performed. This information is stored in record fields whose names are configured in the plugin properties (see below) :
+
+ - index (String) : name of the elasticsearch index on which the multiget query will be performed. This field is mandatory and should not be empty, otherwise an error output record is sent for this specific incoming record.
+ - type (String) : name of the elasticsearch type on which the multiget query will be performed. This field is not mandatory.
+ - ids (String) : comma separated list of document ids to fetch. This field is mandatory and should not be empty, otherwise an error output record is sent for this specific incoming record.
+ - includes (String) : comma separated list of patterns to filter in (include) fields to retrieve. Supports wildcards. This field is not mandatory.
+ - excludes (String) : comma separated list of patterns to filter out (exclude) fields to retrieve. Supports wildcards. This field is not mandatory.
+
+Each outcoming record holds data of one elasticsearch retrieved document. This data is stored in these fields :
+
+ - index (same field name as the incoming record) : name of the elasticsearch index.
+ - type (same field name as the incoming record) : name of the elasticsearch type.
+ - id (same field name as the incoming record) : retrieved document id.
+ - a list of String fields containing :
+
+   * field name : the retrieved field name
+   * field value : the retrieved field value
+
+Module
+______
+com.hurence.logisland:logisland-processor-elasticsearch:1.2.0
+
+Class
+_____
+com.hurence.logisland.processor.elasticsearch.MultiGetElasticsearch
+
+Tags
+____
+elasticsearch
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
+
+.. csv-table:: allowable-values
+   :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+   :widths: 20,60,30,20,10,10
+   :escape: \
+
+   "**elasticsearch.client.service**", "The instance of the Controller Service to use for accessing Elasticsearch.", "", "null", "false", "false"
+   "**es.index.field**", "the name of the incoming records field containing es index name to use in multiget query. ", "", "null", "false", "false"
+   "**es.type.field**", "the name of the incoming records field containing es type name to use in multiget query", "", "null", "false", "false"
+   "**es.ids.field**", "the name of the incoming records field containing es document Ids to use in multiget query", "", "null", "false", "false"
+   "**es.includes.field**", "the name of the incoming records field containing es includes to use in multiget query", "", "null", "false", "false"
+   "**es.excludes.field**", "the name of the incoming records field containing es excludes to use in multiget query", "", "null", "false", "false"
+
+Extra informations
+__________________
+.. include:: ./details/MultiGetElasticsearch-Detail.rst
+----------
+
+.. _com.hurence.logisland.processor.hbase.PutHBaseCell: 
+
+PutHBaseCell
+------------
+Adds the Contents of a Record to HBase as the value of a single cell
+
+Module
+______
+com.hurence.logisland:logisland-processor-hbase:1.2.0
+
+Class
+_____
+com.hurence.logisland.processor.hbase.PutHBaseCell
+
+Tags
+____
+hadoop, hbase
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values, and whether a property supports the  `Expression Language `_ .
+
+.. csv-table:: allowable-values
+   :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+   :widths: 20,60,30,20,10,10
+   :escape: \
+
+   "**hbase.client.service**", "The instance of the Controller Service to use for accessing HBase.", "", "null", "false", "false"
+   "**table.name.field**", "The field containing the name of the HBase Table to put data into", "", "null", "false", "**true**"
+   "row.identifier.field", "Specifies  field containing the Row ID to use when inserting data into HBase", "", "null", "false", "**true**"
+   "row.identifier.encoding.strategy", "Specifies the data type of Row ID used when inserting data into HBase. The default behavior is to convert the row id to a UTF-8 byte array. Choosing Binary will convert a binary formatted string to the correct byte[] representation. The Binary option should be used if you are using Binary row keys in HBase", "String (Stores the value of row id as a UTF-8 String.), Binary (Stores the value of the rows id as a binary byte array. It expects that the row id is a binary formatted string.)", "String", "false", "false"
+   "**column.family.field**", "The field containing the  Column Family to use when inserting data into HBase", "", "null", "false", "**true**"
+   "**column.qualifier.field**", "The field containing the  Column Qualifier to use when inserting data into HBase", "", "null", "false", "**true**"
+   "**batch.size**", "The maximum number of Records to process in a single execution. The Records will be grouped by table, and a single Put per table will be performed.", "", "25", "false", "false"
+   "record.schema", "the avro schema definition for the Avro serialization", "", "null", "false", "false"
+   "record.serializer", "the serializer needed to i/o the record in the HBase row", "com.hurence.logisland.serializer.KryoSerializer (serialize events as json blocs), com.hurence.logisland.serializer.JsonSerializer (serialize events as json blocs), com.hurence.logisland.serializer.AvroSerializer (serialize events as avro blocs), none (send events as bytes)", "com.hurence.logisland.serializer.KryoSerializer", "false", "false"
+   "table.name.default", "The table table to use if table name field is not set", "", "null", "false", "false"
+   "column.family.default", "The column family to use if column family field is not set", "", "null", "false", "false"
+   "column.qualifier.default", "The column qualifier to use if column qualifier field is not set", "", "null", "false", "false"
+
+Extra informations
+__________________
+.. include:: ./details/PutHBaseCell-Detail.rst
+----------
+
+.. _com.hurence.logisland.processor.xml.EvaluateXPath: 
+
+EvaluateXPath
+-------------
+Evaluates one or more XPaths against the content of a record. The results of those XPaths are assigned to new attributes in the records, depending on configuration of the Processor. XPaths are entered by adding user-defined properties; the name of the property maps to the Attribute Name into which the result will be placed. The value of the property must be a valid XPath expression. If the expression matches nothing, no attributes is added. 
+
+Module
+______
+com.hurence.logisland:logisland-processor-xml:1.2.0
+
+Class
+_____
+com.hurence.logisland.processor.xml.EvaluateXPath
+
+Tags
+____
+XML, evaluate, XPath
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
+
+.. csv-table:: allowable-values
+   :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+   :widths: 20,60,30,20,10,10
+   :escape: \
+
+   "**source**", "Indicates the attribute containing the xml data to evaluate xpath against.", "", "null", "false", "false"
+   "**validate_dtd**", "Specifies whether or not the XML content should be validated against the DTD.", "true, false", "true", "false", "false"
+   "conflict.resolution.policy", "What to do when a field with the same name already exists ?", "overwrite_existing (if field already exist), keep_only_old_field (keep only old field)", "keep_only_old_field", "false", "false"
+
+Dynamic Properties
+__________________
+Dynamic Properties allow the user to specify both the name and value of a property.
+
+.. csv-table:: dynamic-properties
+   :header: "Name","Value","Description","Allowable Values","Default Value","EL"
+   :widths: 20,20,40,40,20,10
+   :escape: \
+
+   "An attribute", "An XPath expression", " the attribute is set to the result of the XPath Expression.", "", "null", false
+
+Extra informations
+__________________
+.. include:: ./details/EvaluateXPath-Detail.rst
+----------
+
+.. _com.hurence.logisland.processor.webAnalytics.ConsolidateSession: 
+
+ConsolidateSession
+------------------
+The ConsolidateSession processor is the Logisland entry point to get and process events from the Web Analytics.As an example here is an incoming event from the Web Analytics:
+
+"fields": [{ "name": "timestamp",              "type": "long" },{ "name": "remoteHost",             "type": "string"},{ "name": "record_type",            "type": ["null", "string"], "default": null },{ "name": "record_id",              "type": ["null", "string"], "default": null },{ "name": "location",               "type": ["null", "string"], "default": null },{ "name": "hitType",                "type": ["null", "string"], "default": null },{ "name": "eventCategory",          "type": ["null", "string"], "default": null },{ "name": "eventAction",            "type": ["null", "string"], "default": null },{ "name": "eventLabel",             "type": ["null", "string"], "default": null },{ "name": "localPath",              "type": ["null", "string"], "default": null },{ "name": "q",                      "type": ["null", "string"], "default": null },{ "name": "n",                      "type": ["null", "int"],    "default": null },{ "name": "referer",                "type": ["null", "string"], "default": null },{ "name": "viewportPixelWidth",     "type": ["null", "int"],    "default": null },{ "name": "viewportPixelHeight",    "type": ["null", "int"],    "default": null },{ "name": "screenPixelWidth",       "type": ["null", "int"],    "default": null },{ "name": "screenPixelHeight",      "type": ["null", "int"],    "default": null },{ "name": "partyId",                "type": ["null", "string"], "default": null },{ "name": "sessionId",              "type": ["null", "string"], "default": null },{ "name": "pageViewId",             "type": ["null", "string"], "default": null },{ "name": "is_newSession",          "type": ["null", "boolean"],"default": null },{ "name": "userAgentString",        "type": ["null", "string"], "default": null },{ "name": "pageType",               "type": ["null", "string"], "default": null },{ "name": "UserId",                 "type": ["null", "string"], "default": null },{ "name": "B2Bunit",                "type": ["null", "string"], "default": null },{ "name": "pointOfService",         "type": ["null", "string"], "default": null },{ "name": "companyID",              "type": ["null", "string"], "default": null },{ "name": "GroupCode",              "type": ["null", "string"], "default": null },{ "name": "userRoles",              "type": ["null", "string"], "default": null },{ "name": "is_PunchOut",            "type": ["null", "string"], "default": null }]The ConsolidateSession processor groups the records by sessions and compute the duration between now and the last received event. If the distance from the last event is beyond a given threshold (by default 30mn), then the session is considered closed.The ConsolidateSession is building an aggregated session object for each active session.This aggregated object includes: - The actual session duration. - A boolean representing wether the session is considered active or closed.   Note: it is possible to ressurect a session if for instance an event arrives after a session has been marked closed. - User related infos: userId, B2Bunit code, groupCode, userRoles, companyId - First visited page: URL - Last visited page: URL The properties to configure the processor are: - sessionid.field:          Property name containing the session identifier (default: sessionId). - timestamp.field:          Property name containing the timestamp of the event (default: timestamp). - session.timeout:          Timeframe of inactivity (in seconds) after which a session is considered closed (default: 30mn). - visitedpage.field:        Property name containing the page visited by the customer (default: location). - fields.to.return:         List of fields to return in the aggregated object. (default: N/A)
+
+Module
+______
+com.hurence.logisland:logisland-processor-web-analytics:1.2.0
+
+Class
+_____
+com.hurence.logisland.processor.webAnalytics.ConsolidateSession
+
+Tags
+____
+analytics, web, session
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
+
+.. csv-table:: allowable-values
+   :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+   :widths: 20,60,30,20,10,10
+   :escape: \
+
+   "debug", "Enable debug. If enabled, the original JSON string is embedded in the record_value field of the record.", "", "null", "false", "false"
+   "session.timeout", "session timeout in sec", "", "1800", "false", "false"
+   "sessionid.field", "the name of the field containing the session id => will override default value if set", "", "sessionId", "false", "false"
+   "timestamp.field", "the name of the field containing the timestamp => will override default value if set", "", "h2kTimestamp", "false", "false"
+   "visitedpage.field", "the name of the field containing the visited page => will override default value if set", "", "location", "false", "false"
+   "userid.field", "the name of the field containing the userId => will override default value if set", "", "userId", "false", "false"
+   "fields.to.return", "the list of fields to return", "", "null", "false", "false"
+   "firstVisitedPage.out.field", "the name of the field containing the first visited page => will override default value if set", "", "firstVisitedPage", "false", "false"
+   "lastVisitedPage.out.field", "the name of the field containing the last visited page => will override default value if set", "", "lastVisitedPage", "false", "false"
+   "isSessionActive.out.field", "the name of the field stating whether the session is active or not => will override default value if set", "", "is_sessionActive", "false", "false"
+   "sessionDuration.out.field", "the name of the field containing the session duration => will override default value if set", "", "sessionDuration", "false", "false"
+   "eventsCounter.out.field", "the name of the field containing the session duration => will override default value if set", "", "eventsCounter", "false", "false"
+   "firstEventDateTime.out.field", "the name of the field containing the date of the first event => will override default value if set", "", "firstEventDateTime", "false", "false"
+   "lastEventDateTime.out.field", "the name of the field containing the date of the last event => will override default value if set", "", "lastEventDateTime", "false", "false"
+   "sessionInactivityDuration.out.field", "the name of the field containing the session inactivity duration => will override default value if set", "", "sessionInactivityDuration", "false", "false"
+
+Extra informations
+__________________
+.. include:: ./details/ConsolidateSession-Detail.rst
+----------
+
+.. _com.hurence.logisland.processor.DetectOutliers: 
+
+DetectOutliers
+--------------
+Outlier Analysis: A Hybrid Approach
+
+In order to function at scale, a two-phase approach is taken
+
+For every data point
+
+- Detect outlier candidates using a robust estimator of variability (e.g. median absolute deviation) that uses distributional sketching (e.g. Q-trees)
+- Gather a biased sample (biased by recency)
+- Extremely deterministic in space and cheap in computation
+
+For every outlier candidate
+
+- Use traditional, more computationally complex approaches to outlier analysis (e.g. Robust PCA) on the biased sample
+- Expensive computationally, but run infrequently
+
+This becomes a data filter which can be attached to a timeseries data stream within a distributed computational framework (i.e. Storm, Spark, Flink, NiFi) to detect outliers.
+
+Module
+______
+com.hurence.logisland:logisland-processor-outlier-detection:1.2.0
+
+Class
+_____
+com.hurence.logisland.processor.DetectOutliers
+
+Tags
+____
+analytic, outlier, record, iot, timeseries
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
+
+.. csv-table:: allowable-values
+   :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+   :widths: 20,60,30,20,10,10
+   :escape: \
+
+   "**value.field**", "the numeric field to get the value", "", "record_value", "false", "false"
+   "**time.field**", "the numeric field to get the value", "", "record_time", "false", "false"
+   "output.record.type", "the output type of the record", "", "alert_match", "false", "false"
+   "**rotation.policy.type**", "...", "by_amount, by_time, never", "by_amount", "false", "false"
+   "**rotation.policy.amount**", "...", "", "100", "false", "false"
+   "**rotation.policy.unit**", "...", "milliseconds, seconds, hours, days, months, years, points", "points", "false", "false"
+   "**chunking.policy.type**", "...", "by_amount, by_time, never", "by_amount", "false", "false"
+   "**chunking.policy.amount**", "...", "", "100", "false", "false"
+   "**chunking.policy.unit**", "...", "milliseconds, seconds, hours, days, months, years, points", "points", "false", "false"
+   "sketchy.outlier.algorithm", "...", "SKETCHY_MOVING_MAD", "SKETCHY_MOVING_MAD", "false", "false"
+   "batch.outlier.algorithm", "...", "RAD", "RAD", "false", "false"
+   "global.statistics.min", "minimum value", "", "null", "false", "false"
+   "global.statistics.max", "maximum value", "", "null", "false", "false"
+   "global.statistics.mean", "mean value", "", "null", "false", "false"
+   "global.statistics.stddev", "standard deviation value", "", "null", "false", "false"
+   "**zscore.cutoffs.normal**", "zscoreCutoffs level for normal outlier", "", "0.000000000000001", "false", "false"
+   "**zscore.cutoffs.moderate**", "zscoreCutoffs level for moderate outlier", "", "1.5", "false", "false"
+   "**zscore.cutoffs.severe**", "zscoreCutoffs level for severe outlier", "", "10.0", "false", "false"
+   "zscore.cutoffs.notEnoughData", "zscoreCutoffs level for notEnoughData outlier", "", "100", "false", "false"
+   "smooth", "do smoothing ?", "", "false", "false", "false"
+   "decay", "the decay", "", "0.1", "false", "false"
+   "**min.amount.to.predict**", "minAmountToPredict", "", "100", "false", "false"
+   "min_zscore_percentile", "minZscorePercentile", "", "50.0", "false", "false"
+   "reservoir_size", "the size of points reservoir", "", "100", "false", "false"
+   "rpca.force.diff", "No Description Provided.", "", "null", "false", "false"
+   "rpca.lpenalty", "No Description Provided.", "", "null", "false", "false"
+   "rpca.min.records", "No Description Provided.", "", "null", "false", "false"
+   "rpca.spenalty", "No Description Provided.", "", "null", "false", "false"
+   "rpca.threshold", "No Description Provided.", "", "null", "false", "false"
+
+Extra informations
+__________________
+.. include:: ./details/DetectOutliers-Detail.rst
+----------
+
+.. _com.hurence.logisland.processor.elasticsearch.EnrichRecordsElasticsearch: 
+
+EnrichRecordsElasticsearch
+--------------------------
+Enrich input records with content indexed in elasticsearch using multiget queries.
+Each incoming record must be possibly enriched with information stored in elasticsearch. 
+Each outcoming record holds at least the input record plus potentially one or more fields coming from of one elasticsearch document.
+
+Module
+______
+com.hurence.logisland:logisland-processor-elasticsearch:1.2.0
+
+Class
+_____
+com.hurence.logisland.processor.elasticsearch.EnrichRecordsElasticsearch
+
+Tags
+____
+elasticsearch
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values, and whether a property supports the  `Expression Language `_ .
+
+.. csv-table:: allowable-values
+   :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+   :widths: 20,60,30,20,10,10
+   :escape: \
+
+   "**elasticsearch.client.service**", "The instance of the Controller Service to use for accessing Elasticsearch.", "", "null", "false", "false"
+   "**record.key**", "The name of field in the input record containing the document id to use in ES multiget query", "", "null", "false", "**true**"
+   "**es.index**", "The name of the ES index to use in multiget query. ", "", "null", "false", "**true**"
+   "es.type", "The name of the ES type to use in multiget query.", "", "default", "false", "**true**"
+   "es.includes.field", "The name of the ES fields to include in the record.", "", "*", "false", "**true**"
+   "es.excludes.field", "The name of the ES fields to exclude.", "", "N/A", "false", "false"
+
+Extra informations
+__________________
+.. include:: ./details/EnrichRecordsElasticsearch-Detail.rst
+----------
+
+.. _com.hurence.logisland.processor.excel.ExcelExtract: 
+
+ExcelExtract
+------------
+Consumes a Microsoft Excel document and converts each worksheet's line to a structured record. The processor is assuming to receive raw excel file as input record.
+
+Module
+______
+com.hurence.logisland:logisland-processor-excel:1.2.0
+
+Class
+_____
+com.hurence.logisland.processor.excel.ExcelExtract
+
+Tags
+____
+excel, processor, poi
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
+
+.. csv-table:: allowable-values
+   :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+   :widths: 20,60,30,20,10,10
+   :escape: \
+
+   "sheets", "Comma separated list of Excel document sheet names that should be extracted from the excel document. If this property is left blank then all of the sheets will be extracted from the Excel document. You can specify regular expressions. Any sheets not specified in this value will be ignored.", "", "", "false", "false"
+   "skip.columns", "Comma delimited list of column numbers to skip. Use the columns number and not the letter designation. Use this to skip over columns anywhere in your worksheet that you don't want extracted as part of the record.", "", "", "false", "false"
+   "field.names", "The comma separated list representing the names of columns of extracted cells. Order matters! You should use either field.names either field.row.header but not both together.", "", "null", "false", "false"
+   "skip.rows", "The row number of the first row to start processing.Use this to skip over rows of data at the top of your worksheet that are not part of the dataset.Empty rows of data anywhere in the spreadsheet will always be skipped, no matter what this value is set to.", "", "0", "false", "false"
+   "record.type", "Default type of record", "", "excel_record", "false", "false"
+   "field.row.header", "If set, field names mapping will be extracted from the specified row number. You should use either field.names either field.row.header but not both together.", "", "null", "false", "false"
+
+Extra informations
+__________________
+.. include:: ./details/ExcelExtract-Detail.rst
+----------
+
+.. _com.hurence.logisland.processor.MatchIP: 
+
+MatchIP
+-------
+IP address Query matching (using `Luwak )`_
+
+You can use this processor to handle custom events matching IP address (CIDR)
+The record sent from a matching an IP address record is tagged appropriately.
+
+A query is expressed as a lucene query against a field like for example: 
+
+.. code::
+
+	message:'bad exception'
+	error_count:[10 TO *]
+	bytes_out:5000
+	user_name:tom*
+
+Please read the `Lucene syntax guide `_ for supported operations
+
+.. warning::
+
+	don't forget to set numeric fields property to handle correctly numeric ranges queries
+
+Module
+______
+com.hurence.logisland:logisland-processor-querymatcher:1.2.0
+
+Class
+_____
+com.hurence.logisland.processor.MatchIP
+
+Tags
+____
+analytic, percolator, record, record, query, lucene
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
+
+.. csv-table:: allowable-values
+   :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+   :widths: 20,60,30,20,10,10
+   :escape: \
+
+   "numeric.fields", "a comma separated string of numeric field to be matched", "", "null", "false", "false"
+   "output.record.type", "the output type of the record", "", "alert_match", "false", "false"
+   "record.type.updatePolicy", "Record type update policy", "", "overwrite", "false", "false"
+   "policy.onmatch", "the policy applied to match events: 'first' (default value) match events are tagged with the name and value of the first query that matched;'all' match events are tagged with all names and values of the queries that matched.", "", "first", "false", "false"
+   "policy.onmiss", "the policy applied to miss events: 'discard' (default value) drop events that did not match any query;'forward' include also events that did not match any query.", "", "discard", "false", "false"
+
+Dynamic Properties
+__________________
+Dynamic Properties allow the user to specify both the name and value of a property.
+
+.. csv-table:: dynamic-properties
+   :header: "Name","Value","Description","Allowable Values","Default Value","EL"
+   :widths: 20,20,40,40,20,10
+   :escape: \
+
+   "query", "some Lucene query", "generate a new record when this query is matched", "", "null", **true**
+
+Extra informations
+__________________
+.. include:: ./details/MatchIP-Detail.rst
+----------
+
+.. _com.hurence.logisland.processor.MatchQuery: 
+
+MatchQuery
+----------
+Query matching based on `Luwak `_
+
+you can use this processor to handle custom events defined by lucene queries
+a new record is added to output each time a registered query is matched
+
+A query is expressed as a lucene query against a field like for example: 
+
+.. code::
+
+	message:'bad exception'
+	error_count:[10 TO *]
+	bytes_out:5000
+	user_name:tom*
+
+Please read the `Lucene syntax guide `_ for supported operations
+
+.. warning::
+
+	don't forget to set numeric fields property to handle correctly numeric ranges queries
+
+Module
+______
+com.hurence.logisland:logisland-processor-querymatcher:1.2.0
+
+Class
+_____
+com.hurence.logisland.processor.MatchQuery
+
+Tags
+____
+analytic, percolator, record, record, query, lucene
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
+
+.. csv-table:: allowable-values
+   :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+   :widths: 20,60,30,20,10,10
+   :escape: \
+
+   "numeric.fields", "a comma separated string of numeric field to be matched", "", "null", "false", "false"
+   "output.record.type", "the output type of the record", "", "alert_match", "false", "false"
+   "record.type.updatePolicy", "Record type update policy", "", "overwrite", "false", "false"
+   "policy.onmatch", "the policy applied to match events: 'first' (default value) match events are tagged with the name and value of the first query that matched;'all' match events are tagged with all names and values of the queries that matched.", "", "first", "false", "false"
+   "policy.onmiss", "the policy applied to miss events: 'discard' (default value) drop events that did not match any query;'forward' include also events that did not match any query.", "", "discard", "false", "false"
+
+Dynamic Properties
+__________________
+Dynamic Properties allow the user to specify both the name and value of a property.
+
+.. csv-table:: dynamic-properties
+   :header: "Name","Value","Description","Allowable Values","Default Value","EL"
+   :widths: 20,20,40,40,20,10
+   :escape: \
+
+   "query", "some Lucene query", "generate a new record when this query is matched", "", "null", **true**
+
+Extra informations
+__________________
+.. include:: ./details/MatchQuery-Detail.rst
+----------
+
+.. _com.hurence.logisland.processor.bro.ParseBroEvent: 
+
+ParseBroEvent
+-------------
+The ParseBroEvent processor is the Logisland entry point to get and process `Bro `_ events. The `Bro-Kafka plugin `_ should be used and configured in order to have Bro events sent to Kafka. See the `Bro/Logisland tutorial `_ for an example of usage for this processor. The ParseBroEvent processor does some minor pre-processing on incoming Bro events from the Bro-Kafka plugin to adapt them to Logisland.
+
+Basically the events coming from the Bro-Kafka plugin are JSON documents with a first level field indicating the type of the event. The ParseBroEvent processor takes the incoming JSON document, sets the event type in a record_type field and sets the original sub-fields of the JSON event as first level fields in the record. Also any dot in a field name is transformed into an underscore. Thus, for instance, the field id.orig_h becomes id_orig_h. The next processors in the stream can then process the Bro events generated by this ParseBroEvent processor.
+
+As an example here is an incoming event from Bro:
+
+{
+
+   "conn": {
+
+     "id.resp_p": 9092,
+
+     "resp_pkts": 0,
+
+     "resp_ip_bytes": 0,
+
+     "local_orig": true,
+
+     "orig_ip_bytes": 0,
+
+     "orig_pkts": 0,
+
+     "missed_bytes": 0,
+
+     "history": "Cc",
+
+     "tunnel_parents": [],
+
+     "id.orig_p": 56762,
+
+     "local_resp": true,
+
+     "uid": "Ct3Ms01I3Yc6pmMZx7",
+
+     "conn_state": "OTH",
+
+     "id.orig_h": "172.17.0.2",
+
+     "proto": "tcp",
+
+     "id.resp_h": "172.17.0.3",
+
+     "ts": 1487596886.953917
+
+   }
+
+ }
+
+It gets processed and transformed into the following Logisland record by the ParseBroEvent processor:
+
+"@timestamp": "2017-02-20T13:36:32Z"
+
+"record_id": "6361f80a-c5c9-4a16-9045-4bb51736333d"
+
+"record_time": 1487597792782
+
+"record_type": "conn"
+
+"id_resp_p": 9092
+
+"resp_pkts": 0
+
+"resp_ip_bytes": 0
+
+"local_orig": true
+
+"orig_ip_bytes": 0
+
+"orig_pkts": 0
+
+"missed_bytes": 0
+
+"history": "Cc"
+
+"tunnel_parents": []
+
+"id_orig_p": 56762
+
+"local_resp": true
+
+"uid": "Ct3Ms01I3Yc6pmMZx7"
+
+"conn_state": "OTH"
+
+"id_orig_h": "172.17.0.2"
+
+"proto": "tcp"
+
+"id_resp_h": "172.17.0.3"
+
+"ts": 1487596886.953917
+
+Module
+______
+com.hurence.logisland:logisland-processor-cyber-security:1.2.0
+
+Class
+_____
+com.hurence.logisland.processor.bro.ParseBroEvent
+
+Tags
+____
+bro, security, IDS, NIDS
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
+
+.. csv-table:: allowable-values
+   :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+   :widths: 20,60,30,20,10,10
+   :escape: \
+
+   "debug", "Enable debug. If enabled, the original JSON string is embedded in the record_value field of the record.", "", "false", "false", "false"
+
+Extra informations
+__________________
+.. include:: ./details/ParseBroEvent-Detail.rst
+----------
+
+.. _com.hurence.logisland.processor.netflow.ParseNetflowEvent: 
+
+ParseNetflowEvent
+-----------------
+The `Netflow V5 `_ processor is the Logisland entry point to  process Netflow (V5) events. NetFlow is a feature introduced on Cisco routers that provides the ability to collect IP network traffic.We can distinguish 2 components:
+
+	- Flow exporter: aggregates packets into flows and exports flow records (binary format) towards one or more flow collectors
+
+	- Flow collector: responsible for reception, storage and pre-processing of flow data received from a flow exporter
+
+The collected data are then available for analysis purpose (intrusion detection, traffic analysis...)
+Netflow are sent to kafka in order to be processed by logisland.
+In the tutorial we will simulate Netflow traffic using `nfgen `_. this traffic will be sent to port 2055. The we rely on nifi to listen of that port for   incoming netflow (V5) traffic and send them to a kafka topic. The Netflow processor could thus treat these events and generate corresponding logisland records. The following processors in the stream can then process the Netflow records generated by this processor.
+
+Module
+______
+com.hurence.logisland:logisland-processor-cyber-security:1.2.0
+
+Class
+_____
+com.hurence.logisland.processor.netflow.ParseNetflowEvent
+
+Tags
+____
+netflow, security
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
+
+.. csv-table:: allowable-values
+   :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+   :widths: 20,60,30,20,10,10
+   :escape: \
+
+   "debug", "Enable debug. If enabled, the original JSON string is embedded in the record_value field of the record.", "", "false", "false", "false"
+   "output.record.type", "the output type of the record", "", "netflowevent", "false", "false"
+   "enrich.record", "Enrich data. If enabledthe netflow record is enriched with inferred data", "", "false", "false", "false"
+
+Extra informations
+__________________
+.. include:: ./details/ParseNetflowEvent-Detail.rst
+----------
+
+.. _com.hurence.logisland.processor.scripting.python.RunPython: 
+
+RunPython
+---------
+ !!!! WARNING !!!!
+
+The RunPython processor is currently an experimental feature : it is delivered as is, with the current set of features and is subject to modifications in API or anything else in further logisland releases without warnings. There is no tutorial yet. If you want to play with this processor, use the python-processing.yml example and send the apache logs of the index apache logs tutorial. The debug stream processor at the end of the stream should output events in stderr file of the executors from the spark console.
+
+This processor allows to implement and run a processor written in python. This can be done in 2 ways. Either directly defining the process method code in the **script.code.process** configuration property or poiting to an external python module script file in the **script.path** configuration property. Directly defining methods is called the inline mode whereas using a script file is called the file mode. Both ways are mutually exclusive. Whether using the inline of file mode, your python code may depend on some python dependencies. If the set of python dependencies already delivered with the Logisland framework is not sufficient, you can use the **dependencies.path** configuration property to give their location. Currently only the nltk python library is delivered with Logisland.
+
+Module
+______
+com.hurence.logisland:logisland-processor-scripting:1.2.0
+
+Class
+_____
+com.hurence.logisland.processor.scripting.python.RunPython
+
+Tags
+____
+scripting, python
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
+
+.. csv-table:: allowable-values
+   :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+   :widths: 20,60,30,20,10,10
+   :escape: \
+
+   "script.code.imports", "For inline mode only. This is the python code that should hold the import statements if required.", "", "null", "false", "false"
+   "script.code.init", "The python code to be called when the processor is initialized. This is the python equivalent of the init method code for a java processor. This is not mandatory but can only be used if **script.code.process** is defined (inline mode).", "", "null", "false", "false"
+   "script.code.process", "The python code to be called to process the records. This is the pyhton equivalent of the process method code for a java processor. For inline mode, this is the only minimum required configuration property. Using this property, you may also optionally define the **script.code.init** and **script.code.imports** properties.", "", "null", "false", "false"
+   "script.path", "The path to the user's python processor script. Use this property for file mode. Your python code must be in a python file with the following constraints: let's say your pyhton script is named MyProcessor.py. Then MyProcessor.py is a module file that must contain a class named MyProcessor which must inherits from the Logisland delivered class named AbstractProcessor. You can then define your code in the process method and in the other traditional methods (init...) as you would do in java in a class inheriting from the AbstractProcessor java class.", "", "null", "false", "false"
+   "dependencies.path", "The path to the additional dependencies for the user's python code, whether using inline or file mode. This is optional as your code may not have additional dependencies. If you defined **script.path** (so using file mode) and if **dependencies.path** is not defined, Logisland will scan a potential directory named **dependencies** in the same directory where the script file resides and if it exists, any python code located there will be loaded as dependency as needed.", "", "null", "false", "false"
+   "logisland.dependencies.path", "The path to the directory containing the python dependencies shipped with logisland. You should not have to tune this parameter.", "", "null", "false", "false"
+
+Extra informations
+__________________
+.. include:: ./details/RunPython-Detail.rst
+----------
+
+.. _com.hurence.logisland.processor.webAnalytics.URLDecoder: 
+
+URLDecoder
+----------
+Decode one or more field containing an URL with possibly special chars encoded
+...
+
+Module
+______
+com.hurence.logisland:logisland-processor-web-analytics:1.2.0
+
+Class
+_____
+com.hurence.logisland.processor.webAnalytics.URLDecoder
+
+Tags
+____
+record, fields, Decode
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
+
+.. csv-table:: allowable-values
+   :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+   :widths: 20,60,30,20,10,10
+   :escape: \
+
+   "**decode.fields**", "List of fields (URL) to decode", "", "null", "false", "false"
+   "charset", "Charset to use to decode the URL", "", "UTF-8", "false", "false"
+
+Dynamic Properties
+__________________
+Dynamic Properties allow the user to specify both the name and value of a property.
+
+.. csv-table:: dynamic-properties
+   :header: "Name","Value","Description","Allowable Values","Default Value","EL"
+   :widths: 20,20,40,40,20,10
+   :escape: \
+
+   "fields to decode", "a default value", "Decode one or more fields from the record ", "", "null", false
+
+Extra informations
+__________________
+.. include:: ./details/URLDecoder-Detail.rst
+----------
+
+.. _com.hurence.logisland.processor.useragent.ParseUserAgent: 
+
+ParseUserAgent
+--------------
+The user-agent processor allows to decompose User-Agent value from an HTTP header into several attributes of interest. There is no standard format for User-Agent strings, hence it is not easily possible to use regexp to handle them. This processor rely on the `YAUAA library `_ to do the heavy work.
+
+Module
+______
+com.hurence.logisland:logisland-processor-useragent:1.2.0
+
+Class
+_____
+com.hurence.logisland.processor.useragent.ParseUserAgent
+
+Tags
+____
+User-Agent, clickstream, DMP
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
+
+.. csv-table:: allowable-values
+   :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+   :widths: 20,60,30,20,10,10
+   :escape: \
+
+   "debug", "Enable debug.", "", "false", "false", "false"
+   "cache.enabled", "Enable caching. Caching to avoid to redo the same computation for many identical User-Agent strings.", "", "true", "false", "false"
+   "cache.size", "Set the size of the cache.", "", "1000", "false", "false"
+   "**useragent.field**", "Must contain the name of the field that contains the User-Agent value in the incoming record.", "", "null", "false", "false"
+   "useragent.keep", "Defines if the field that contained the User-Agent must be kept or not in the resulting records.", "", "true", "false", "false"
+   "confidence.enabled", "Enable confidence reporting. Each field will report a confidence attribute with a value comprised between 0 and 10000.", "", "false", "false", "false"
+   "ambiguity.enabled", "Enable ambiguity reporting. Reports a count of ambiguities.", "", "false", "false", "false"
+   "fields", "Defines the fields to be returned.", "", "DeviceClass, DeviceName, DeviceBrand, DeviceCpu, DeviceFirmwareVersion, DeviceVersion, OperatingSystemClass, OperatingSystemName, OperatingSystemVersion, OperatingSystemNameVersion, OperatingSystemVersionBuild, LayoutEngineClass, LayoutEngineName, LayoutEngineVersion, LayoutEngineVersionMajor, LayoutEngineNameVersion, LayoutEngineNameVersionMajor, LayoutEngineBuild, AgentClass, AgentName, AgentVersion, AgentVersionMajor, AgentNameVersion, AgentNameVersionMajor, AgentBuild, AgentLanguage, AgentLanguageCode, AgentInformationEmail, AgentInformationUrl, AgentSecurity, AgentUuid, FacebookCarrier, FacebookDeviceClass, FacebookDeviceName, FacebookDeviceVersion, FacebookFBOP, FacebookFBSS, FacebookOperatingSystemName, FacebookOperatingSystemVersion, Anonymized, HackerAttackVector, HackerToolkit, KoboAffiliate, KoboPlatformId, IECompatibilityVersion, IECompatibilityVersionMajor, IECompatibilityNameVersion, IECompatibilityNameVersionMajor, __SyntaxError__, Carrier, GSAInstallationID, WebviewAppName, WebviewAppNameVersionMajor, WebviewAppVersion, WebviewAppVersionMajor", "false", "false"
+
+Extra informations
+__________________
+.. include:: ./details/ParseUserAgent-Detail.rst
+----------
+
+.. _com.hurence.logisland.processor.webAnalytics.IncrementalWebSession: 
+
+IncrementalWebSession
+---------------------
+This processor creates and updates web-sessions based on incoming web-events. Note that both web-sessions and web-events are stored in elasticsearch.
+ Firstly, web-events are grouped by their session identifier and processed in chronological order.
+ Then each web-session associated to each group is retrieved from elasticsearch.
+ In case none exists yet then a new web session is created based on the first web event.
+ The following fields of the newly created web session are set based on the associated web event: session identifier, first timestamp, first visited page. Secondly, once created, or retrieved, the web session is updated by the remaining web-events.
+ Updates have impacts on fields of the web session such as event counter, last visited page,  session duration, ...
+ Before updates are actually applied, checks are performed to detect rules that would trigger the creation of a new session:
+
+	the duration between the web session and the web event must not exceed the specified time-out,
+	the web session and the web event must have timestamps within the same day (at midnight a new web session is created),
+	source of traffic (campaign, ...) must be the same on the web session and the web event.
+
+ When a breaking rule is detected, a new web session is created with a new session identifier where as remaining web-events still have the original session identifier. The new session identifier is the original session suffixed with the character '#' followed with an incremented counter. This new session identifier is also set on the remaining web-events.
+ Finally when all web events were applied, all web events -potentially modified with a new session identifier- are save in elasticsearch. And web sessions are passed to the next processor.
+
+WebSession information are:
+- first and last visited page
+- first and last timestamp of processed event 
+- total number of processed events
+- the userId
+- a boolean denoting if the web-session is still active or not
+- an integer denoting the duration of the web-sessions
+- optional fields that may be retrieved from the processed events
+
+
+
+Module
+______
+com.hurence.logisland:logisland-processor-web-analytics:1.2.0
+
+Class
+_____
+com.hurence.logisland.processor.webAnalytics.IncrementalWebSession
+
+Tags
+____
+analytics, web, session
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
+
+.. csv-table:: allowable-values
+   :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+   :widths: 20,60,30,20,10,10
+   :escape: \
+
+   "debug", "Enable debug. If enabled, debug information are logged.", "", "false", "false", "false"
+   "**es.session.index.field**", "Name of the field in the record defining the ES index containing the web session documents.", "", "null", "false", "false"
+   "**es.session.type.name**", "Name of the ES type of web session documents.", "", "null", "false", "false"
+   "**es.event.index.prefix**", "Prefix of the index containing the web event documents.", "", "null", "false", "false"
+   "**es.event.type.name**", "Name of the ES type of web event documents.", "", "null", "false", "false"
+   "**es.mapping.event.to.session.index.name**", "Name of the ES index containing the mapping of web session documents.", "", "null", "false", "false"
+   "sessionid.field", "the name of the field containing the session id => will override default value if set", "", "sessionId", "false", "false"
+   "timestamp.field", "the name of the field containing the timestamp => will override default value if set", "", "h2kTimestamp", "false", "false"
+   "visitedpage.field", "the name of the field containing the visited page => will override default value if set", "", "location", "false", "false"
+   "userid.field", "the name of the field containing the userId => will override default value if set", "", "userId", "false", "false"
+   "fields.to.return", "the list of fields to return", "", "null", "false", "false"
+   "firstVisitedPage.out.field", "the name of the field containing the first visited page => will override default value if set", "", "firstVisitedPage", "false", "false"
+   "lastVisitedPage.out.field", "the name of the field containing the last visited page => will override default value if set", "", "lastVisitedPage", "false", "false"
+   "isSessionActive.out.field", "the name of the field stating whether the session is active or not => will override default value if set", "", "is_sessionActive", "false", "false"
+   "sessionDuration.out.field", "the name of the field containing the session duration => will override default value if set", "", "sessionDuration", "false", "false"
+   "sessionInactivityDuration.out.field", "the name of the field containing the session inactivity duration => will override default value if set", "", "sessionInactivityDuration", "false", "false"
+   "session.timeout", "session timeout in sec", "", "1800", "false", "false"
+   "eventsCounter.out.field", "the name of the field containing the session duration => will override default value if set", "", "eventsCounter", "false", "false"
+   "firstEventDateTime.out.field", "the name of the field containing the date of the first event => will override default value if set", "", "firstEventDateTime", "false", "false"
+   "lastEventDateTime.out.field", "the name of the field containing the date of the last event => will override default value if set", "", "lastEventDateTime", "false", "false"
+   "newSessionReason.out.field", "the name of the field containing the reason why a new session was created => will override default value if set", "", "reasonForNewSession", "false", "false"
+   "transactionIds.out.field", "the name of the field containing all transactionIds => will override default value if set", "", "transactionIds", "false", "false"
+   "source_of_traffic.suffix", "Prefix for the source of the traffic related fields", "", "source_of_traffic", "false", "false"
+   "**elasticsearch.client.service**", "The instance of the Controller Service to use for accessing Elasticsearch.", "", "null", "false", "false"
+
+Extra informations
+__________________
+.. include:: ./details/IncrementalWebSession-Detail.rst
+----------
+
+.. _com.hurence.logisland.processor.webAnalytics.SetSourceOfTraffic: 
+
+SetSourceOfTraffic
+------------------
+Compute the source of traffic of a web session. Users arrive at a website or application through a variety of sources, 
+including advertising/paying campaigns, search engines, social networks, referring sites or direct access. 
+When analysing user experience on a webshop, it is crucial to collect, process, and report the campaign and traffic-source data. 
+To compute the source of traffic of a web session, the user has to provide the utm_* related properties if available
+i-e: **utm_source.field**, **utm_medium.field**, **utm_campaign.field**, **utm_content.field**, **utm_term.field**)
+, the referer (**referer.field** property) and the first visited page of the session (**first.visited.page.field** property).
+By default the source of traffic information are placed in a flat structure (specified by the **source_of_traffic.suffix** property
+with a default value of source_of_traffic). To work properly the SetSourceOfTraffic processor needs to have access to an 
+Elasticsearch index containing a list of the most popular search engines and social networks. The ES index (specified by the **es.index** property) should be structured such that the _id of an ES document MUST be the name of the domain. If the domain is a search engine, the related ES doc MUST have a boolean field (default being search_engine) specified by the property **es.search_engine.field** with a value set to true. If the domain is a social network , the related ES doc MUST have a boolean field (default being social_network) specified by the property **es.social_network.field** with a value set to true. 
+
+Module
+______
+com.hurence.logisland:logisland-processor-web-analytics:1.2.0
+
+Class
+_____
+com.hurence.logisland.processor.webAnalytics.SetSourceOfTraffic
+
+Tags
+____
+session, traffic, source, web, analytics
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
+
+.. csv-table:: allowable-values
+   :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+   :widths: 20,60,30,20,10,10
+   :escape: \
+
+   "referer.field", "Name of the field containing the referer value in the session", "", "referer", "false", "false"
+   "first.visited.page.field", "Name of the field containing the first visited page in the session", "", "firstVisitedPage", "false", "false"
+   "utm_source.field", "Name of the field containing the utm_source value in the session", "", "utm_source", "false", "false"
+   "utm_medium.field", "Name of the field containing the utm_medium value in the session", "", "utm_medium", "false", "false"
+   "utm_campaign.field", "Name of the field containing the utm_campaign value in the session", "", "utm_campaign", "false", "false"
+   "utm_content.field", "Name of the field containing the utm_content value in the session", "", "utm_content", "false", "false"
+   "utm_term.field", "Name of the field containing the utm_term value in the session", "", "utm_term", "false", "false"
+   "source_of_traffic.suffix", "Suffix for the source of the traffic related fields", "", "source_of_traffic", "false", "false"
+   "source_of_traffic.hierarchical", "Should the additional source of trafic information fields be added under a hierarchical father field or not.", "", "false", "false", "false"
+   "**elasticsearch.client.service**", "The instance of the Controller Service to use for accessing Elasticsearch.", "", "null", "false", "false"
+   "**cache.service**", "Name of the cache service to use.", "", "null", "false", "false"
+   "cache.validity.timeout", "Timeout validity (in seconds) of an entry in the cache.", "", "0", "false", "false"
+   "debug", "If true, an additional debug field is added. If the source info fields prefix is X, a debug field named X_from_cache contains a boolean value to indicate the origin of the source fields. The default value for this property is false (debug is disabled).", "", "false", "false", "false"
+   "**es.index**", "Name of the ES index containing the list of search engines and social network. ", "", "null", "false", "false"
+   "es.type", "Name of the ES type to use.", "", "default", "false", "false"
+   "es.search_engine.field", "Name of the ES field used to specify that the domain is a search engine.", "", "search_engine", "false", "false"
+   "es.social_network.field", "Name of the ES field used to specify that the domain is a social network.", "", "social_network", "false", "false"
+
+Extra informations
+__________________
+.. include:: ./details/SetSourceOfTraffic-Detail.rst
+----------
+
+.. _com.hurence.logisland.processor.enrichment.IpToFqdn: 
+
+IpToFqdn
+--------
+Translates an IP address into a FQDN (Fully Qualified Domain Name). An input field from the record has the IP as value. An new field is created and its value is the FQDN matching the IP address. The resolution mechanism is based on the underlying operating system. The resolution request may take some time, specially if the IP address cannot be translated into a FQDN. For these reasons this processor relies on the logisland cache service so that once a resolution occurs or not, the result is put into the cache. That way, the real request for the same IP is not re-triggered during a certain period of time, until the cache entry expires. This timeout is configurable but by default a request for the same IP is not triggered before 24 hours to let the time to the underlying DNS system to be potentially updated.
+
+Module
+______
+com.hurence.logisland:logisland-processor-enrichment:1.2.0
+
+Class
+_____
+com.hurence.logisland.processor.enrichment.IpToFqdn
+
+Tags
+____
+dns, ip, fqdn, domain, address, fqhn, reverse, resolution, enrich
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
+
+.. csv-table:: allowable-values
+   :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+   :widths: 20,60,30,20,10,10
+   :escape: \
+
+   "**ip.address.field**", "The name of the field containing the ip address to use.", "", "null", "false", "false"
+   "**fqdn.field**", "The field that will contain the full qualified domain name corresponding to the ip address.", "", "null", "false", "false"
+   "overwrite.fqdn.field", "If the field should be overwritten when it already exists.", "", "false", "false", "false"
+   "**cache.service**", "The name of the cache service to use.", "", "null", "false", "false"
+   "cache.max.time", "The amount of time, in seconds, for which a cached FQDN value is valid in the cache service. After this delay, the next new request to translate the same IP into FQDN will trigger a new reverse DNS request and the result will overwrite the entry in the cache. This allows two things: if the IP was not resolved into a FQDN, this will get a chance to obtain a FQDN if the DNS system has been updated, if the IP is resolved into a FQDN, this will allow to be more accurate if the DNS system has been updated.  A value of 0 seconds disables this expiration mechanism. The default value is 84600 seconds, which corresponds to new requests triggered every day if a record with the same IP passes every day in the processor.", "", "84600", "false", "false"
+   "resolution.timeout", "The amount of time, in milliseconds, to wait at most for the resolution to occur. This avoids to block the stream for too much time. Default value is 1000ms. If the delay expires and no resolution could occur before, the FQDN field is not created. A special value of 0 disables the logisland timeout and the resolution request may last for many seconds if the IP cannot be translated into a FQDN by the underlying operating system. In any case, whether the timeout occurs in logisland of in the operating system, the fact that a timeout occurs is kept in the cache system so that a resolution request for the same IP will not occur before the cache entry expires.", "", "1000", "false", "false"
+   "debug", "If true, some additional debug fields are added. If the FQDN field is named X, a debug field named X_os_resolution_time_ms contains the resolution time in ms (using the operating system, not the cache). This field is added whether the resolution occurs or time is out. A debug field named  X_os_resolution_timeout contains a boolean value to indicate if the timeout occurred. Finally, a debug field named X_from_cache contains a boolean value to indicate the origin of the FQDN field. The default value for this property is false (debug is disabled.", "", "false", "false", "false"
+
+Extra informations
+__________________
+.. include:: ./details/IpToFqdn-Detail.rst
+----------
+
+.. _com.hurence.logisland.processor.enrichment.IpToGeo: 
+
+IpToGeo
+-------
+Looks up geolocation information for an IP address. The attribute that contains the IP address to lookup must be provided in the **ip.address.field** property. By default, the geo information are put in a hierarchical structure. That is, if the name of the IP field is 'X', then the the geo attributes added by enrichment are added under a father field named X_geo. "_geo" is the default hierarchical suffix that may be changed with the **geo.hierarchical.suffix** property. If one wants to put the geo fields at the same level as the IP field, then the **geo.hierarchical** property should be set to false and then the geo attributes are  created at the same level as him with the naming pattern X_geo_. "_geo_" is the default flat suffix but this may be changed with the **geo.flat.suffix** property. The IpToGeo processor requires a reference to an Ip to Geo service. This must be defined in the **iptogeo.service** property. The added geo fields are dependant on the underlying Ip to Geo service. The **geo.fields** property must contain the list of geo fields that should be created if data is available for  the IP to resolve. This property defaults to "*" which means to add every available fields. If one only wants a subset of the fields,  one must define a comma separated list of fields as a value for the **geo.fields** property. The list of the available geo fields is in the description of the **geo.fields** property.
+
+Module
+______
+com.hurence.logisland:logisland-processor-enrichment:1.2.0
+
+Class
+_____
+com.hurence.logisland.processor.enrichment.IpToGeo
+
+Tags
+____
+geo, enrich, ip
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
+
+.. csv-table:: allowable-values
+   :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+   :widths: 20,60,30,20,10,10
+   :escape: \
+
+   "**ip.address.field**", "The name of the field containing the ip address to use.", "", "null", "false", "false"
+   "**iptogeo.service**", "The reference to the IP to Geo service to use.", "", "null", "false", "false"
+   "geo.fields", "Comma separated list of geo information fields to add to the record. Defaults to '*', which means to include all available fields. If a list of fields is specified and the data is not available, the geo field is not created. The geo fields are dependant on the underlying defined Ip to Geo service. The currently only supported type of Ip to Geo service is the Maxmind Ip to Geo service. This means that the currently supported list of geo fields is the following:**continent**: the identified continent for this IP address. **continent_code**: the identified continent code for this IP address. **city**: the identified city for this IP address. **latitude**: the identified latitude for this IP address. **longitude**: the identified longitude for this IP address. **location**: the identified location for this IP address, defined as Geo-point expressed as a string with the format: 'latitude,longitude'. **accuracy_radius**: the approximate accuracy radius, in kilometers, around the latitude and longitude for the location. **time_zone**: the identified time zone for this IP address. **subdivision_N**: the identified subdivision for this IP address. N is a one-up number at the end of the attribute name, starting with 0. **subdivision_isocode_N**: the iso code matching the identified subdivision_N. **country**: the identified country for this IP address. **country_isocode**: the iso code for the identified country for this IP address. **postalcode**: the identified postal code for this IP address. **lookup_micros**: the number of microseconds that the geo lookup took. The Ip to Geo service must have the lookup_micros property enabled in order to have this field available.", "", "*", "false", "false"
+   "geo.hierarchical", "Should the additional geo information fields be added under a hierarchical father field or not.", "", "true", "false", "false"
+   "geo.hierarchical.suffix", "Suffix to use for the field holding geo information. If geo.hierarchical is true, then use this suffix appended to the IP field name to define the father field name. This may be used for instance to distinguish between geo fields with various locales using many Ip to Geo service instances.", "", "_geo", "false", "false"
+   "geo.flat.suffix", "Suffix to use for geo information fields when they are flat. If geo.hierarchical is false, then use this suffix appended to the IP field name but before the geo field name. This may be used for instance to distinguish between geo fields with various locales using many Ip to Geo service instances.", "", "_geo_", "false", "false"
+   "**cache.service**", "The name of the cache service to use.", "", "null", "false", "false"
+   "debug", "If true, an additional debug field is added. If the geo info fields prefix is X, a debug field named X_from_cache contains a boolean value to indicate the origin of the geo fields. The default value for this property is false (debug is disabled).", "", "false", "false", "false"
+
+Extra informations
+__________________
+.. include:: ./details/IpToGeo-Detail.rst
+----------
+
+.. _com.hurence.logisland.processor.networkpacket.ParseNetworkPacket: 
+
+ParseNetworkPacket
+------------------
+The ParseNetworkPacket processor is the LogIsland entry point to parse network packets captured either off-the-wire (stream mode) or in pcap format (batch mode).  In batch mode, the processor decodes the bytes of the incoming pcap record, where a Global header followed by a sequence of [packet header, packet data] pairs are stored. Then, each incoming pcap event is parsed into n packet records. The fields of packet headers are then extracted and made available in dedicated record fields. See the `Capturing Network packets tutorial `_ for an example of usage of this processor.
+
+Module
+______
+com.hurence.logisland:logisland-processor-cyber-security:1.2.0
+
+Class
+_____
+com.hurence.logisland.processor.networkpacket.ParseNetworkPacket
+
+Tags
+____
+PCap, security, IDS, NIDS
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
+
+.. csv-table:: allowable-values
+   :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+   :widths: 20,60,30,20,10,10
+   :escape: \
+
+   "debug", "Enable debug.", "", "false", "false", "false"
+   "**flow.mode**", "Flow Mode. Indicate whether packets are provided in batch mode (via pcap files) or in stream mode (without headers). Allowed values are batch and stream.", "batch, stream", "null", "false", "false"
+
+Extra informations
+__________________
+No additional information is provided
+
+----------
+
+.. _com.hurence.logisland.processor.elasticsearch.BulkAddElasticsearch: 
+
+BulkAddElasticsearch
+--------------------
+Indexes the content of a Record in Elasticsearch using elasticsearch's bulk processor
+
+Module
+______
+com.hurence.logisland:logisland-processor-elasticsearch:1.2.0
+
+Class
+_____
+com.hurence.logisland.processor.elasticsearch.BulkAddElasticsearch
+
+Tags
+____
+elasticsearch
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values, and whether a property supports the  `Expression Language `_ .
+
+.. csv-table:: allowable-values
+   :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+   :widths: 20,60,30,20,10,10
+   :escape: \
+
+   "**elasticsearch.client.service**", "The instance of the Controller Service to use for accessing Elasticsearch.", "", "null", "false", "false"
+   "**default.index**", "The name of the index to insert into", "", "null", "false", "**true**"
+   "**default.type**", "The type of this document (used by Elasticsearch for indexing and searching)", "", "null", "false", "**true**"
+   "**timebased.index**", "do we add a date suffix", "no (no date added to default index), today (today's date added to default index), yesterday (yesterday's date added to default index)", "no", "false", "false"
+   "es.index.field", "the name of the event field containing es index name => will override index value if set", "", "null", "false", "false"
+   "es.type.field", "the name of the event field containing es doc type => will override type value if set", "", "null", "false", "false"
+
+Extra informations
+__________________
+.. include:: ./details/BulkAddElasticsearch-Detail.rst
+----------
+
+.. _com.hurence.logisland.processor.hbase.FetchHBaseRow: 
+
+FetchHBaseRow
+-------------
+Fetches a row from an HBase table. The Destination property controls whether the cells are added as flow file attributes, or the row is written to the flow file content as JSON. This processor may be used to fetch a fixed row on a interval by specifying the table and row id directly in the processor, or it may be used to dynamically fetch rows by referencing the table and row id from incoming flow files.
+
+Module
+______
+com.hurence.logisland:logisland-processor-hbase:1.2.0
+
+Class
+_____
+com.hurence.logisland.processor.hbase.FetchHBaseRow
+
+Tags
+____
+hbase, scan, fetch, get, enrich
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values, and whether a property supports the  `Expression Language `_ .
+
+.. csv-table:: allowable-values
+   :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+   :widths: 20,60,30,20,10,10
+   :escape: \
+
+   "**hbase.client.service**", "The instance of the Controller Service to use for accessing HBase.", "", "null", "false", "false"
+   "**table.name.field**", "The field containing the name of the HBase Table to fetch from.", "", "null", "false", "**true**"
+   "**row.identifier.field**", "The field containing the identifier of the row to fetch.", "", "null", "false", "**true**"
+   "columns.field", "The field containing an optional comma-separated list of \"\":\"\" pairs to fetch. To return all columns for a given family, leave off the qualifier such as \"\",\"\".", "", "null", "false", "**true**"
+   "record.serializer", "the serializer needed to i/o the record in the HBase row", "com.hurence.logisland.serializer.KryoSerializer (serialize events as json blocs), com.hurence.logisland.serializer.JsonSerializer (serialize events as json blocs), com.hurence.logisland.serializer.AvroSerializer (serialize events as avro blocs), none (send events as bytes)", "com.hurence.logisland.serializer.KryoSerializer", "false", "false"
+   "record.schema", "the avro schema definition for the Avro serialization", "", "null", "false", "false"
+   "table.name.default", "The table to use if table name field is not set", "", "null", "false", "false"
+
+Extra informations
+__________________
+.. include:: ./details/FetchHBaseRow-Detail.rst
+----------
+
+.. _com.hurence.logisland.processor.elasticsearch.MultiGetElasticsearch: 
+
+MultiGetElasticsearch
+---------------------
+Retrieves a content indexed in elasticsearch using elasticsearch multiget queries.
+Each incoming record contains information regarding the elasticsearch multiget query that will be performed. This information is stored in record fields whose names are configured in the plugin properties (see below) :
+
+ - index (String) : name of the elasticsearch index on which the multiget query will be performed. This field is mandatory and should not be empty, otherwise an error output record is sent for this specific incoming record.
+ - type (String) : name of the elasticsearch type on which the multiget query will be performed. This field is not mandatory.
+ - ids (String) : comma separated list of document ids to fetch. This field is mandatory and should not be empty, otherwise an error output record is sent for this specific incoming record.
+ - includes (String) : comma separated list of patterns to filter in (include) fields to retrieve. Supports wildcards. This field is not mandatory.
+ - excludes (String) : comma separated list of patterns to filter out (exclude) fields to retrieve. Supports wildcards. This field is not mandatory.
+
+Each outcoming record holds data of one elasticsearch retrieved document. This data is stored in these fields :
+
+ - index (same field name as the incoming record) : name of the elasticsearch index.
+ - type (same field name as the incoming record) : name of the elasticsearch type.
+ - id (same field name as the incoming record) : retrieved document id.
+ - a list of String fields containing :
+
+   * field name : the retrieved field name
+   * field value : the retrieved field value
+
+Module
+______
+com.hurence.logisland:logisland-processor-elasticsearch:1.2.0
+
+Class
+_____
+com.hurence.logisland.processor.elasticsearch.MultiGetElasticsearch
+
+Tags
+____
+elasticsearch
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
+
+.. csv-table:: allowable-values
+   :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+   :widths: 20,60,30,20,10,10
+   :escape: \
+
+   "**elasticsearch.client.service**", "The instance of the Controller Service to use for accessing Elasticsearch.", "", "null", "false", "false"
+   "**es.index.field**", "the name of the incoming records field containing es index name to use in multiget query. ", "", "null", "false", "false"
+   "**es.type.field**", "the name of the incoming records field containing es type name to use in multiget query", "", "null", "false", "false"
+   "**es.ids.field**", "the name of the incoming records field containing es document Ids to use in multiget query", "", "null", "false", "false"
+   "**es.includes.field**", "the name of the incoming records field containing es includes to use in multiget query", "", "null", "false", "false"
+   "**es.excludes.field**", "the name of the incoming records field containing es excludes to use in multiget query", "", "null", "false", "false"
+
+Extra informations
+__________________
+.. include:: ./details/MultiGetElasticsearch-Detail.rst
+----------
+
+.. _com.hurence.logisland.processor.hbase.PutHBaseCell: 
+
+PutHBaseCell
+------------
+Adds the Contents of a Record to HBase as the value of a single cell
+
+Module
+______
+com.hurence.logisland:logisland-processor-hbase:1.2.0
+
+Class
+_____
+com.hurence.logisland.processor.hbase.PutHBaseCell
+
+Tags
+____
+hadoop, hbase
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values, and whether a property supports the  `Expression Language `_ .
+
+.. csv-table:: allowable-values
+   :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+   :widths: 20,60,30,20,10,10
+   :escape: \
+
+   "**hbase.client.service**", "The instance of the Controller Service to use for accessing HBase.", "", "null", "false", "false"
+   "**table.name.field**", "The field containing the name of the HBase Table to put data into", "", "null", "false", "**true**"
+   "row.identifier.field", "Specifies  field containing the Row ID to use when inserting data into HBase", "", "null", "false", "**true**"
+   "row.identifier.encoding.strategy", "Specifies the data type of Row ID used when inserting data into HBase. The default behavior is to convert the row id to a UTF-8 byte array. Choosing Binary will convert a binary formatted string to the correct byte[] representation. The Binary option should be used if you are using Binary row keys in HBase", "String (Stores the value of row id as a UTF-8 String.), Binary (Stores the value of the rows id as a binary byte array. It expects that the row id is a binary formatted string.)", "String", "false", "false"
+   "**column.family.field**", "The field containing the  Column Family to use when inserting data into HBase", "", "null", "false", "**true**"
+   "**column.qualifier.field**", "The field containing the  Column Qualifier to use when inserting data into HBase", "", "null", "false", "**true**"
+   "**batch.size**", "The maximum number of Records to process in a single execution. The Records will be grouped by table, and a single Put per table will be performed.", "", "25", "false", "false"
+   "record.schema", "the avro schema definition for the Avro serialization", "", "null", "false", "false"
+   "record.serializer", "the serializer needed to i/o the record in the HBase row", "com.hurence.logisland.serializer.KryoSerializer (serialize events as json blocs), com.hurence.logisland.serializer.JsonSerializer (serialize events as json blocs), com.hurence.logisland.serializer.AvroSerializer (serialize events as avro blocs), none (send events as bytes)", "com.hurence.logisland.serializer.KryoSerializer", "false", "false"
+   "table.name.default", "The table table to use if table name field is not set", "", "null", "false", "false"
+   "column.family.default", "The column family to use if column family field is not set", "", "null", "false", "false"
+   "column.qualifier.default", "The column qualifier to use if column qualifier field is not set", "", "null", "false", "false"
+
+Extra informations
+__________________
+.. include:: ./details/PutHBaseCell-Detail.rst
+----------
+
+.. _com.hurence.logisland.processor.xml.EvaluateXPath: 
+
+EvaluateXPath
+-------------
+Evaluates one or more XPaths against the content of a record. The results of those XPaths are assigned to new attributes in the records, depending on configuration of the Processor. XPaths are entered by adding user-defined properties; the name of the property maps to the Attribute Name into which the result will be placed. The value of the property must be a valid XPath expression. If the expression matches nothing, no attributes is added. 
+
+Module
+______
+com.hurence.logisland:logisland-processor-xml:1.2.0
+
+Class
+_____
+com.hurence.logisland.processor.xml.EvaluateXPath
+
+Tags
+____
+XML, evaluate, XPath
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
+
+.. csv-table:: allowable-values
+   :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+   :widths: 20,60,30,20,10,10
+   :escape: \
+
+   "**source**", "Indicates the attribute containing the xml data to evaluate xpath against.", "", "null", "false", "false"
+   "**validate_dtd**", "Specifies whether or not the XML content should be validated against the DTD.", "true, false", "true", "false", "false"
+   "conflict.resolution.policy", "What to do when a field with the same name already exists ?", "overwrite_existing (if field already exist), keep_only_old_field (keep only old field)", "keep_only_old_field", "false", "false"
+
+Dynamic Properties
+__________________
+Dynamic Properties allow the user to specify both the name and value of a property.
+
+.. csv-table:: dynamic-properties
+   :header: "Name","Value","Description","Allowable Values","Default Value","EL"
+   :widths: 20,20,40,40,20,10
+   :escape: \
+
+   "An attribute", "An XPath expression", " the attribute is set to the result of the XPath Expression.", "", "null", false
+
+Extra informations
+__________________
+.. include:: ./details/EvaluateXPath-Detail.rst
+----------
+
+.. _com.hurence.logisland.processor.webAnalytics.ConsolidateSession: 
+
+ConsolidateSession
+------------------
+The ConsolidateSession processor is the Logisland entry point to get and process events from the Web Analytics.As an example here is an incoming event from the Web Analytics:
+
+"fields": [{ "name": "timestamp",              "type": "long" },{ "name": "remoteHost",             "type": "string"},{ "name": "record_type",            "type": ["null", "string"], "default": null },{ "name": "record_id",              "type": ["null", "string"], "default": null },{ "name": "location",               "type": ["null", "string"], "default": null },{ "name": "hitType",                "type": ["null", "string"], "default": null },{ "name": "eventCategory",          "type": ["null", "string"], "default": null },{ "name": "eventAction",            "type": ["null", "string"], "default": null },{ "name": "eventLabel",             "type": ["null", "string"], "default": null },{ "name": "localPath",              "type": ["null", "string"], "default": null },{ "name": "q",                      "type": ["null", "string"], "default": null },{ "name": "n",                      "type": ["null", "int"],    "default": null },{ "name": "referer",                "type": ["null", "string"], "default": null },{ "name": "viewportPixelWidth",     "type": ["null", "int"],    "default": null },{ "name": "viewportPixelHeight",    "type": ["null", "int"],    "default": null },{ "name": "screenPixelWidth",       "type": ["null", "int"],    "default": null },{ "name": "screenPixelHeight",      "type": ["null", "int"],    "default": null },{ "name": "partyId",                "type": ["null", "string"], "default": null },{ "name": "sessionId",              "type": ["null", "string"], "default": null },{ "name": "pageViewId",             "type": ["null", "string"], "default": null },{ "name": "is_newSession",          "type": ["null", "boolean"],"default": null },{ "name": "userAgentString",        "type": ["null", "string"], "default": null },{ "name": "pageType",               "type": ["null", "string"], "default": null },{ "name": "UserId",                 "type": ["null", "string"], "default": null },{ "name": "B2Bunit",                "type": ["null", "string"], "default": null },{ "name": "pointOfService",         "type": ["null", "string"], "default": null },{ "name": "companyID",              "type": ["null", "string"], "default": null },{ "name": "GroupCode",              "type": ["null", "string"], "default": null },{ "name": "userRoles",              "type": ["null", "string"], "default": null },{ "name": "is_PunchOut",            "type": ["null", "string"], "default": null }]The ConsolidateSession processor groups the records by sessions and compute the duration between now and the last received event. If the distance from the last event is beyond a given threshold (by default 30mn), then the session is considered closed.The ConsolidateSession is building an aggregated session object for each active session.This aggregated object includes: - The actual session duration. - A boolean representing wether the session is considered active or closed.   Note: it is possible to ressurect a session if for instance an event arrives after a session has been marked closed. - User related infos: userId, B2Bunit code, groupCode, userRoles, companyId - First visited page: URL - Last visited page: URL The properties to configure the processor are: - sessionid.field:          Property name containing the session identifier (default: sessionId). - timestamp.field:          Property name containing the timestamp of the event (default: timestamp). - session.timeout:          Timeframe of inactivity (in seconds) after which a session is considered closed (default: 30mn). - visitedpage.field:        Property name containing the page visited by the customer (default: location). - fields.to.return:         List of fields to return in the aggregated object. (default: N/A)
+
+Module
+______
+com.hurence.logisland:logisland-processor-web-analytics:1.2.0
+
+Class
+_____
+com.hurence.logisland.processor.webAnalytics.ConsolidateSession
+
+Tags
+____
+analytics, web, session
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
+
+.. csv-table:: allowable-values
+   :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+   :widths: 20,60,30,20,10,10
+   :escape: \
+
+   "debug", "Enable debug. If enabled, the original JSON string is embedded in the record_value field of the record.", "", "null", "false", "false"
+   "session.timeout", "session timeout in sec", "", "1800", "false", "false"
+   "sessionid.field", "the name of the field containing the session id => will override default value if set", "", "sessionId", "false", "false"
+   "timestamp.field", "the name of the field containing the timestamp => will override default value if set", "", "h2kTimestamp", "false", "false"
+   "visitedpage.field", "the name of the field containing the visited page => will override default value if set", "", "location", "false", "false"
+   "userid.field", "the name of the field containing the userId => will override default value if set", "", "userId", "false", "false"
+   "fields.to.return", "the list of fields to return", "", "null", "false", "false"
+   "firstVisitedPage.out.field", "the name of the field containing the first visited page => will override default value if set", "", "firstVisitedPage", "false", "false"
+   "lastVisitedPage.out.field", "the name of the field containing the last visited page => will override default value if set", "", "lastVisitedPage", "false", "false"
+   "isSessionActive.out.field", "the name of the field stating whether the session is active or not => will override default value if set", "", "is_sessionActive", "false", "false"
+   "sessionDuration.out.field", "the name of the field containing the session duration => will override default value if set", "", "sessionDuration", "false", "false"
+   "eventsCounter.out.field", "the name of the field containing the session duration => will override default value if set", "", "eventsCounter", "false", "false"
+   "firstEventDateTime.out.field", "the name of the field containing the date of the first event => will override default value if set", "", "firstEventDateTime", "false", "false"
+   "lastEventDateTime.out.field", "the name of the field containing the date of the last event => will override default value if set", "", "lastEventDateTime", "false", "false"
+   "sessionInactivityDuration.out.field", "the name of the field containing the session inactivity duration => will override default value if set", "", "sessionInactivityDuration", "false", "false"
+
+Extra informations
+__________________
+.. include:: ./details/ConsolidateSession-Detail.rst
+----------
+
+.. _com.hurence.logisland.processor.DetectOutliers: 
+
+DetectOutliers
+--------------
+Outlier Analysis: A Hybrid Approach
+
+In order to function at scale, a two-phase approach is taken
+
+For every data point
+
+- Detect outlier candidates using a robust estimator of variability (e.g. median absolute deviation) that uses distributional sketching (e.g. Q-trees)
+- Gather a biased sample (biased by recency)
+- Extremely deterministic in space and cheap in computation
+
+For every outlier candidate
+
+- Use traditional, more computationally complex approaches to outlier analysis (e.g. Robust PCA) on the biased sample
+- Expensive computationally, but run infrequently
+
+This becomes a data filter which can be attached to a timeseries data stream within a distributed computational framework (i.e. Storm, Spark, Flink, NiFi) to detect outliers.
+
+Module
+______
+com.hurence.logisland:logisland-processor-outlier-detection:1.2.0
+
+Class
+_____
+com.hurence.logisland.processor.DetectOutliers
+
+Tags
+____
+analytic, outlier, record, iot, timeseries
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
+
+.. csv-table:: allowable-values
+   :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+   :widths: 20,60,30,20,10,10
+   :escape: \
+
+   "**value.field**", "the numeric field to get the value", "", "record_value", "false", "false"
+   "**time.field**", "the numeric field to get the value", "", "record_time", "false", "false"
+   "output.record.type", "the output type of the record", "", "alert_match", "false", "false"
+   "**rotation.policy.type**", "...", "by_amount, by_time, never", "by_amount", "false", "false"
+   "**rotation.policy.amount**", "...", "", "100", "false", "false"
+   "**rotation.policy.unit**", "...", "milliseconds, seconds, hours, days, months, years, points", "points", "false", "false"
+   "**chunking.policy.type**", "...", "by_amount, by_time, never", "by_amount", "false", "false"
+   "**chunking.policy.amount**", "...", "", "100", "false", "false"
+   "**chunking.policy.unit**", "...", "milliseconds, seconds, hours, days, months, years, points", "points", "false", "false"
+   "sketchy.outlier.algorithm", "...", "SKETCHY_MOVING_MAD", "SKETCHY_MOVING_MAD", "false", "false"
+   "batch.outlier.algorithm", "...", "RAD", "RAD", "false", "false"
+   "global.statistics.min", "minimum value", "", "null", "false", "false"
+   "global.statistics.max", "maximum value", "", "null", "false", "false"
+   "global.statistics.mean", "mean value", "", "null", "false", "false"
+   "global.statistics.stddev", "standard deviation value", "", "null", "false", "false"
+   "**zscore.cutoffs.normal**", "zscoreCutoffs level for normal outlier", "", "0.000000000000001", "false", "false"
+   "**zscore.cutoffs.moderate**", "zscoreCutoffs level for moderate outlier", "", "1.5", "false", "false"
+   "**zscore.cutoffs.severe**", "zscoreCutoffs level for severe outlier", "", "10.0", "false", "false"
+   "zscore.cutoffs.notEnoughData", "zscoreCutoffs level for notEnoughData outlier", "", "100", "false", "false"
+   "smooth", "do smoothing ?", "", "false", "false", "false"
+   "decay", "the decay", "", "0.1", "false", "false"
+   "**min.amount.to.predict**", "minAmountToPredict", "", "100", "false", "false"
+   "min_zscore_percentile", "minZscorePercentile", "", "50.0", "false", "false"
+   "reservoir_size", "the size of points reservoir", "", "100", "false", "false"
+   "rpca.force.diff", "No Description Provided.", "", "null", "false", "false"
+   "rpca.lpenalty", "No Description Provided.", "", "null", "false", "false"
+   "rpca.min.records", "No Description Provided.", "", "null", "false", "false"
+   "rpca.spenalty", "No Description Provided.", "", "null", "false", "false"
+   "rpca.threshold", "No Description Provided.", "", "null", "false", "false"
+
+Extra informations
+__________________
+.. include:: ./details/DetectOutliers-Detail.rst
+----------
+
+.. _com.hurence.logisland.processor.elasticsearch.EnrichRecordsElasticsearch: 
+
+EnrichRecordsElasticsearch
+--------------------------
+Enrich input records with content indexed in elasticsearch using multiget queries.
+Each incoming record must be possibly enriched with information stored in elasticsearch. 
+Each outcoming record holds at least the input record plus potentially one or more fields coming from of one elasticsearch document.
+
+Module
+______
+com.hurence.logisland:logisland-processor-elasticsearch:1.2.0
+
+Class
+_____
+com.hurence.logisland.processor.elasticsearch.EnrichRecordsElasticsearch
+
+Tags
+____
+elasticsearch
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values, and whether a property supports the  `Expression Language `_ .
+
+.. csv-table:: allowable-values
+   :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+   :widths: 20,60,30,20,10,10
+   :escape: \
+
+   "**elasticsearch.client.service**", "The instance of the Controller Service to use for accessing Elasticsearch.", "", "null", "false", "false"
+   "**record.key**", "The name of field in the input record containing the document id to use in ES multiget query", "", "null", "false", "**true**"
+   "**es.index**", "The name of the ES index to use in multiget query. ", "", "null", "false", "**true**"
+   "es.type", "The name of the ES type to use in multiget query.", "", "default", "false", "**true**"
+   "es.includes.field", "The name of the ES fields to include in the record.", "", "*", "false", "**true**"
+   "es.excludes.field", "The name of the ES fields to exclude.", "", "N/A", "false", "false"
+
+Extra informations
+__________________
+.. include:: ./details/EnrichRecordsElasticsearch-Detail.rst
+----------
+
+.. _com.hurence.logisland.processor.excel.ExcelExtract: 
+
+ExcelExtract
+------------
+Consumes a Microsoft Excel document and converts each worksheet's line to a structured record. The processor is assuming to receive raw excel file as input record.
+
+Module
+______
+com.hurence.logisland:logisland-processor-excel:1.2.0
+
+Class
+_____
+com.hurence.logisland.processor.excel.ExcelExtract
+
+Tags
+____
+excel, processor, poi
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
+
+.. csv-table:: allowable-values
+   :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+   :widths: 20,60,30,20,10,10
+   :escape: \
+
+   "sheets", "Comma separated list of Excel document sheet names that should be extracted from the excel document. If this property is left blank then all of the sheets will be extracted from the Excel document. You can specify regular expressions. Any sheets not specified in this value will be ignored.", "", "", "false", "false"
+   "skip.columns", "Comma delimited list of column numbers to skip. Use the columns number and not the letter designation. Use this to skip over columns anywhere in your worksheet that you don't want extracted as part of the record.", "", "", "false", "false"
+   "field.names", "The comma separated list representing the names of columns of extracted cells. Order matters! You should use either field.names either field.row.header but not both together.", "", "null", "false", "false"
+   "skip.rows", "The row number of the first row to start processing.Use this to skip over rows of data at the top of your worksheet that are not part of the dataset.Empty rows of data anywhere in the spreadsheet will always be skipped, no matter what this value is set to.", "", "0", "false", "false"
+   "record.type", "Default type of record", "", "excel_record", "false", "false"
+   "field.row.header", "If set, field names mapping will be extracted from the specified row number. You should use either field.names either field.row.header but not both together.", "", "null", "false", "false"
+
+Extra informations
+__________________
+.. include:: ./details/ExcelExtract-Detail.rst
+----------
+
+.. _com.hurence.logisland.processor.MatchIP: 
+
+MatchIP
+-------
+IP address Query matching (using `Luwak )`_
+
+You can use this processor to handle custom events matching IP address (CIDR)
+The record sent from a matching an IP address record is tagged appropriately.
+
+A query is expressed as a lucene query against a field like for example: 
+
+.. code::
+
+	message:'bad exception'
+	error_count:[10 TO *]
+	bytes_out:5000
+	user_name:tom*
+
+Please read the `Lucene syntax guide `_ for supported operations
+
+.. warning::
+
+	don't forget to set numeric fields property to handle correctly numeric ranges queries
+
+Module
+______
+com.hurence.logisland:logisland-processor-querymatcher:1.2.0
+
+Class
+_____
+com.hurence.logisland.processor.MatchIP
+
+Tags
+____
+analytic, percolator, record, record, query, lucene
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
+
+.. csv-table:: allowable-values
+   :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+   :widths: 20,60,30,20,10,10
+   :escape: \
+
+   "numeric.fields", "a comma separated string of numeric field to be matched", "", "null", "false", "false"
+   "output.record.type", "the output type of the record", "", "alert_match", "false", "false"
+   "record.type.updatePolicy", "Record type update policy", "", "overwrite", "false", "false"
+   "policy.onmatch", "the policy applied to match events: 'first' (default value) match events are tagged with the name and value of the first query that matched;'all' match events are tagged with all names and values of the queries that matched.", "", "first", "false", "false"
+   "policy.onmiss", "the policy applied to miss events: 'discard' (default value) drop events that did not match any query;'forward' include also events that did not match any query.", "", "discard", "false", "false"
+
+Dynamic Properties
+__________________
+Dynamic Properties allow the user to specify both the name and value of a property.
+
+.. csv-table:: dynamic-properties
+   :header: "Name","Value","Description","Allowable Values","Default Value","EL"
+   :widths: 20,20,40,40,20,10
+   :escape: \
+
+   "query", "some Lucene query", "generate a new record when this query is matched", "", "null", **true**
+
+Extra informations
+__________________
+.. include:: ./details/MatchIP-Detail.rst
+----------
+
+.. _com.hurence.logisland.processor.MatchQuery: 
+
+MatchQuery
+----------
+Query matching based on `Luwak `_
+
+you can use this processor to handle custom events defined by lucene queries
+a new record is added to output each time a registered query is matched
+
+A query is expressed as a lucene query against a field like for example: 
+
+.. code::
+
+	message:'bad exception'
+	error_count:[10 TO *]
+	bytes_out:5000
+	user_name:tom*
+
+Please read the `Lucene syntax guide `_ for supported operations
+
+.. warning::
+
+	don't forget to set numeric fields property to handle correctly numeric ranges queries
+
+Module
+______
+com.hurence.logisland:logisland-processor-querymatcher:1.2.0
+
+Class
+_____
+com.hurence.logisland.processor.MatchQuery
+
+Tags
+____
+analytic, percolator, record, record, query, lucene
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
+
+.. csv-table:: allowable-values
+   :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+   :widths: 20,60,30,20,10,10
+   :escape: \
+
+   "numeric.fields", "a comma separated string of numeric field to be matched", "", "null", "false", "false"
+   "output.record.type", "the output type of the record", "", "alert_match", "false", "false"
+   "record.type.updatePolicy", "Record type update policy", "", "overwrite", "false", "false"
+   "policy.onmatch", "the policy applied to match events: 'first' (default value) match events are tagged with the name and value of the first query that matched;'all' match events are tagged with all names and values of the queries that matched.", "", "first", "false", "false"
+   "policy.onmiss", "the policy applied to miss events: 'discard' (default value) drop events that did not match any query;'forward' include also events that did not match any query.", "", "discard", "false", "false"
+
+Dynamic Properties
+__________________
+Dynamic Properties allow the user to specify both the name and value of a property.
+
+.. csv-table:: dynamic-properties
+   :header: "Name","Value","Description","Allowable Values","Default Value","EL"
+   :widths: 20,20,40,40,20,10
+   :escape: \
+
+   "query", "some Lucene query", "generate a new record when this query is matched", "", "null", **true**
+
+Extra informations
+__________________
+.. include:: ./details/MatchQuery-Detail.rst
+----------
+
+.. _com.hurence.logisland.processor.bro.ParseBroEvent: 
+
+ParseBroEvent
+-------------
+The ParseBroEvent processor is the Logisland entry point to get and process `Bro `_ events. The `Bro-Kafka plugin `_ should be used and configured in order to have Bro events sent to Kafka. See the `Bro/Logisland tutorial `_ for an example of usage for this processor. The ParseBroEvent processor does some minor pre-processing on incoming Bro events from the Bro-Kafka plugin to adapt them to Logisland.
+
+Basically the events coming from the Bro-Kafka plugin are JSON documents with a first level field indicating the type of the event. The ParseBroEvent processor takes the incoming JSON document, sets the event type in a record_type field and sets the original sub-fields of the JSON event as first level fields in the record. Also any dot in a field name is transformed into an underscore. Thus, for instance, the field id.orig_h becomes id_orig_h. The next processors in the stream can then process the Bro events generated by this ParseBroEvent processor.
+
+As an example here is an incoming event from Bro:
+
+{
+
+   "conn": {
+
+     "id.resp_p": 9092,
+
+     "resp_pkts": 0,
+
+     "resp_ip_bytes": 0,
+
+     "local_orig": true,
+
+     "orig_ip_bytes": 0,
+
+     "orig_pkts": 0,
+
+     "missed_bytes": 0,
+
+     "history": "Cc",
+
+     "tunnel_parents": [],
+
+     "id.orig_p": 56762,
+
+     "local_resp": true,
+
+     "uid": "Ct3Ms01I3Yc6pmMZx7",
+
+     "conn_state": "OTH",
+
+     "id.orig_h": "172.17.0.2",
+
+     "proto": "tcp",
+
+     "id.resp_h": "172.17.0.3",
+
+     "ts": 1487596886.953917
+
+   }
+
+ }
+
+It gets processed and transformed into the following Logisland record by the ParseBroEvent processor:
+
+"@timestamp": "2017-02-20T13:36:32Z"
+
+"record_id": "6361f80a-c5c9-4a16-9045-4bb51736333d"
+
+"record_time": 1487597792782
+
+"record_type": "conn"
+
+"id_resp_p": 9092
+
+"resp_pkts": 0
+
+"resp_ip_bytes": 0
+
+"local_orig": true
+
+"orig_ip_bytes": 0
+
+"orig_pkts": 0
+
+"missed_bytes": 0
+
+"history": "Cc"
+
+"tunnel_parents": []
+
+"id_orig_p": 56762
+
+"local_resp": true
+
+"uid": "Ct3Ms01I3Yc6pmMZx7"
+
+"conn_state": "OTH"
+
+"id_orig_h": "172.17.0.2"
+
+"proto": "tcp"
+
+"id_resp_h": "172.17.0.3"
+
+"ts": 1487596886.953917
+
+Module
+______
+com.hurence.logisland:logisland-processor-cyber-security:1.2.0
+
+Class
+_____
+com.hurence.logisland.processor.bro.ParseBroEvent
+
+Tags
+____
+bro, security, IDS, NIDS
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
+
+.. csv-table:: allowable-values
+   :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+   :widths: 20,60,30,20,10,10
+   :escape: \
+
+   "debug", "Enable debug. If enabled, the original JSON string is embedded in the record_value field of the record.", "", "false", "false", "false"
+
+Extra informations
+__________________
+.. include:: ./details/ParseBroEvent-Detail.rst
+----------
+
+.. _com.hurence.logisland.processor.netflow.ParseNetflowEvent: 
+
+ParseNetflowEvent
+-----------------
+The `Netflow V5 `_ processor is the Logisland entry point to  process Netflow (V5) events. NetFlow is a feature introduced on Cisco routers that provides the ability to collect IP network traffic.We can distinguish 2 components:
+
+	- Flow exporter: aggregates packets into flows and exports flow records (binary format) towards one or more flow collectors
+
+	- Flow collector: responsible for reception, storage and pre-processing of flow data received from a flow exporter
+
+The collected data are then available for analysis purpose (intrusion detection, traffic analysis...)
+Netflow are sent to kafka in order to be processed by logisland.
+In the tutorial we will simulate Netflow traffic using `nfgen `_. this traffic will be sent to port 2055. The we rely on nifi to listen of that port for   incoming netflow (V5) traffic and send them to a kafka topic. The Netflow processor could thus treat these events and generate corresponding logisland records. The following processors in the stream can then process the Netflow records generated by this processor.
+
+Module
+______
+com.hurence.logisland:logisland-processor-cyber-security:1.2.0
+
+Class
+_____
+com.hurence.logisland.processor.netflow.ParseNetflowEvent
+
+Tags
+____
+netflow, security
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
+
+.. csv-table:: allowable-values
+   :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+   :widths: 20,60,30,20,10,10
+   :escape: \
+
+   "debug", "Enable debug. If enabled, the original JSON string is embedded in the record_value field of the record.", "", "false", "false", "false"
+   "output.record.type", "the output type of the record", "", "netflowevent", "false", "false"
+   "enrich.record", "Enrich data. If enabledthe netflow record is enriched with inferred data", "", "false", "false", "false"
+
+Extra informations
+__________________
+.. include:: ./details/ParseNetflowEvent-Detail.rst
+----------
+
+.. _com.hurence.logisland.processor.scripting.python.RunPython: 
+
+RunPython
+---------
+ !!!! WARNING !!!!
+
+The RunPython processor is currently an experimental feature : it is delivered as is, with the current set of features and is subject to modifications in API or anything else in further logisland releases without warnings. There is no tutorial yet. If you want to play with this processor, use the python-processing.yml example and send the apache logs of the index apache logs tutorial. The debug stream processor at the end of the stream should output events in stderr file of the executors from the spark console.
+
+This processor allows to implement and run a processor written in python. This can be done in 2 ways. Either directly defining the process method code in the **script.code.process** configuration property or poiting to an external python module script file in the **script.path** configuration property. Directly defining methods is called the inline mode whereas using a script file is called the file mode. Both ways are mutually exclusive. Whether using the inline of file mode, your python code may depend on some python dependencies. If the set of python dependencies already delivered with the Logisland framework is not sufficient, you can use the **dependencies.path** configuration property to give their location. Currently only the nltk python library is delivered with Logisland.
+
+Module
+______
+com.hurence.logisland:logisland-processor-scripting:1.2.0
+
+Class
+_____
+com.hurence.logisland.processor.scripting.python.RunPython
+
+Tags
+____
+scripting, python
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
+
+.. csv-table:: allowable-values
+   :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+   :widths: 20,60,30,20,10,10
+   :escape: \
+
+   "script.code.imports", "For inline mode only. This is the python code that should hold the import statements if required.", "", "null", "false", "false"
+   "script.code.init", "The python code to be called when the processor is initialized. This is the python equivalent of the init method code for a java processor. This is not mandatory but can only be used if **script.code.process** is defined (inline mode).", "", "null", "false", "false"
+   "script.code.process", "The python code to be called to process the records. This is the pyhton equivalent of the process method code for a java processor. For inline mode, this is the only minimum required configuration property. Using this property, you may also optionally define the **script.code.init** and **script.code.imports** properties.", "", "null", "false", "false"
+   "script.path", "The path to the user's python processor script. Use this property for file mode. Your python code must be in a python file with the following constraints: let's say your pyhton script is named MyProcessor.py. Then MyProcessor.py is a module file that must contain a class named MyProcessor which must inherits from the Logisland delivered class named AbstractProcessor. You can then define your code in the process method and in the other traditional methods (init...) as you would do in java in a class inheriting from the AbstractProcessor java class.", "", "null", "false", "false"
+   "dependencies.path", "The path to the additional dependencies for the user's python code, whether using inline or file mode. This is optional as your code may not have additional dependencies. If you defined **script.path** (so using file mode) and if **dependencies.path** is not defined, Logisland will scan a potential directory named **dependencies** in the same directory where the script file resides and if it exists, any python code located there will be loaded as dependency as needed.", "", "null", "false", "false"
+   "logisland.dependencies.path", "The path to the directory containing the python dependencies shipped with logisland. You should not have to tune this parameter.", "", "null", "false", "false"
+
+Extra informations
+__________________
+.. include:: ./details/RunPython-Detail.rst
+----------
+
+.. _com.hurence.logisland.processor.webAnalytics.URLDecoder: 
+
+URLDecoder
+----------
+Decode one or more field containing an URL with possibly special chars encoded
+...
+
+Module
+______
+com.hurence.logisland:logisland-processor-web-analytics:1.2.0
+
+Class
+_____
+com.hurence.logisland.processor.webAnalytics.URLDecoder
+
+Tags
+____
+record, fields, Decode
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
+
+.. csv-table:: allowable-values
+   :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+   :widths: 20,60,30,20,10,10
+   :escape: \
+
+   "**decode.fields**", "List of fields (URL) to decode", "", "null", "false", "false"
+   "charset", "Charset to use to decode the URL", "", "UTF-8", "false", "false"
+
+Dynamic Properties
+__________________
+Dynamic Properties allow the user to specify both the name and value of a property.
+
+.. csv-table:: dynamic-properties
+   :header: "Name","Value","Description","Allowable Values","Default Value","EL"
+   :widths: 20,20,40,40,20,10
+   :escape: \
+
+   "fields to decode", "a default value", "Decode one or more fields from the record ", "", "null", false
+
+Extra informations
+__________________
+.. include:: ./details/URLDecoder-Detail.rst
+----------
+
+.. _com.hurence.logisland.processor.useragent.ParseUserAgent: 
+
+ParseUserAgent
+--------------
+The user-agent processor allows to decompose User-Agent value from an HTTP header into several attributes of interest. There is no standard format for User-Agent strings, hence it is not easily possible to use regexp to handle them. This processor rely on the `YAUAA library `_ to do the heavy work.
+
+Module
+______
+com.hurence.logisland:logisland-processor-useragent:1.2.0
+
+Class
+_____
+com.hurence.logisland.processor.useragent.ParseUserAgent
+
+Tags
+____
+User-Agent, clickstream, DMP
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
+
+.. csv-table:: allowable-values
+   :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+   :widths: 20,60,30,20,10,10
+   :escape: \
+
+   "debug", "Enable debug.", "", "false", "false", "false"
+   "cache.enabled", "Enable caching. Caching to avoid to redo the same computation for many identical User-Agent strings.", "", "true", "false", "false"
+   "cache.size", "Set the size of the cache.", "", "1000", "false", "false"
+   "**useragent.field**", "Must contain the name of the field that contains the User-Agent value in the incoming record.", "", "null", "false", "false"
+   "useragent.keep", "Defines if the field that contained the User-Agent must be kept or not in the resulting records.", "", "true", "false", "false"
+   "confidence.enabled", "Enable confidence reporting. Each field will report a confidence attribute with a value comprised between 0 and 10000.", "", "false", "false", "false"
+   "ambiguity.enabled", "Enable ambiguity reporting. Reports a count of ambiguities.", "", "false", "false", "false"
+   "fields", "Defines the fields to be returned.", "", "DeviceClass, DeviceName, DeviceBrand, DeviceCpu, DeviceFirmwareVersion, DeviceVersion, OperatingSystemClass, OperatingSystemName, OperatingSystemVersion, OperatingSystemNameVersion, OperatingSystemVersionBuild, LayoutEngineClass, LayoutEngineName, LayoutEngineVersion, LayoutEngineVersionMajor, LayoutEngineNameVersion, LayoutEngineNameVersionMajor, LayoutEngineBuild, AgentClass, AgentName, AgentVersion, AgentVersionMajor, AgentNameVersion, AgentNameVersionMajor, AgentBuild, AgentLanguage, AgentLanguageCode, AgentInformationEmail, AgentInformationUrl, AgentSecurity, AgentUuid, FacebookCarrier, FacebookDeviceClass, FacebookDeviceName, FacebookDeviceVersion, FacebookFBOP, FacebookFBSS, FacebookOperatingSystemName, FacebookOperatingSystemVersion, Anonymized, HackerAttackVector, HackerToolkit, KoboAffiliate, KoboPlatformId, IECompatibilityVersion, IECompatibilityVersionMajor, IECompatibilityNameVersion, IECompatibilityNameVersionMajor, __SyntaxError__, Carrier, GSAInstallationID, WebviewAppName, WebviewAppNameVersionMajor, WebviewAppVersion, WebviewAppVersionMajor", "false", "false"
+
+Extra informations
+__________________
+.. include:: ./details/ParseUserAgent-Detail.rst
+----------
+
+.. _com.hurence.logisland.processor.webAnalytics.IncrementalWebSession: 
+
+IncrementalWebSession
+---------------------
+This processor creates and updates web-sessions based on incoming web-events. Note that both web-sessions and web-events are stored in elasticsearch.
+ Firstly, web-events are grouped by their session identifier and processed in chronological order.
+ Then each web-session associated to each group is retrieved from elasticsearch.
+ In case none exists yet then a new web session is created based on the first web event.
+ The following fields of the newly created web session are set based on the associated web event: session identifier, first timestamp, first visited page. Secondly, once created, or retrieved, the web session is updated by the remaining web-events.
+ Updates have impacts on fields of the web session such as event counter, last visited page,  session duration, ...
+ Before updates are actually applied, checks are performed to detect rules that would trigger the creation of a new session:
+
+	the duration between the web session and the web event must not exceed the specified time-out,
+	the web session and the web event must have timestamps within the same day (at midnight a new web session is created),
+	source of traffic (campaign, ...) must be the same on the web session and the web event.
+
+ When a breaking rule is detected, a new web session is created with a new session identifier where as remaining web-events still have the original session identifier. The new session identifier is the original session suffixed with the character '#' followed with an incremented counter. This new session identifier is also set on the remaining web-events.
+ Finally when all web events were applied, all web events -potentially modified with a new session identifier- are save in elasticsearch. And web sessions are passed to the next processor.
+
+WebSession information are:
+- first and last visited page
+- first and last timestamp of processed event 
+- total number of processed events
+- the userId
+- a boolean denoting if the web-session is still active or not
+- an integer denoting the duration of the web-sessions
+- optional fields that may be retrieved from the processed events
+
+
+
+Module
+______
+com.hurence.logisland:logisland-processor-web-analytics:1.2.0
+
+Class
+_____
+com.hurence.logisland.processor.webAnalytics.IncrementalWebSession
+
+Tags
+____
+analytics, web, session
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
+
+.. csv-table:: allowable-values
+   :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+   :widths: 20,60,30,20,10,10
+   :escape: \
+
+   "debug", "Enable debug. If enabled, debug information are logged.", "", "false", "false", "false"
+   "**es.session.index.field**", "Name of the field in the record defining the ES index containing the web session documents.", "", "null", "false", "false"
+   "**es.session.type.name**", "Name of the ES type of web session documents.", "", "null", "false", "false"
+   "**es.event.index.prefix**", "Prefix of the index containing the web event documents.", "", "null", "false", "false"
+   "**es.event.type.name**", "Name of the ES type of web event documents.", "", "null", "false", "false"
+   "**es.mapping.event.to.session.index.name**", "Name of the ES index containing the mapping of web session documents.", "", "null", "false", "false"
+   "sessionid.field", "the name of the field containing the session id => will override default value if set", "", "sessionId", "false", "false"
+   "timestamp.field", "the name of the field containing the timestamp => will override default value if set", "", "h2kTimestamp", "false", "false"
+   "visitedpage.field", "the name of the field containing the visited page => will override default value if set", "", "location", "false", "false"
+   "userid.field", "the name of the field containing the userId => will override default value if set", "", "userId", "false", "false"
+   "fields.to.return", "the list of fields to return", "", "null", "false", "false"
+   "firstVisitedPage.out.field", "the name of the field containing the first visited page => will override default value if set", "", "firstVisitedPage", "false", "false"
+   "lastVisitedPage.out.field", "the name of the field containing the last visited page => will override default value if set", "", "lastVisitedPage", "false", "false"
+   "isSessionActive.out.field", "the name of the field stating whether the session is active or not => will override default value if set", "", "is_sessionActive", "false", "false"
+   "sessionDuration.out.field", "the name of the field containing the session duration => will override default value if set", "", "sessionDuration", "false", "false"
+   "sessionInactivityDuration.out.field", "the name of the field containing the session inactivity duration => will override default value if set", "", "sessionInactivityDuration", "false", "false"
+   "session.timeout", "session timeout in sec", "", "1800", "false", "false"
+   "eventsCounter.out.field", "the name of the field containing the session duration => will override default value if set", "", "eventsCounter", "false", "false"
+   "firstEventDateTime.out.field", "the name of the field containing the date of the first event => will override default value if set", "", "firstEventDateTime", "false", "false"
+   "lastEventDateTime.out.field", "the name of the field containing the date of the last event => will override default value if set", "", "lastEventDateTime", "false", "false"
+   "newSessionReason.out.field", "the name of the field containing the reason why a new session was created => will override default value if set", "", "reasonForNewSession", "false", "false"
+   "transactionIds.out.field", "the name of the field containing all transactionIds => will override default value if set", "", "transactionIds", "false", "false"
+   "source_of_traffic.suffix", "Prefix for the source of the traffic related fields", "", "source_of_traffic", "false", "false"
+   "**elasticsearch.client.service**", "The instance of the Controller Service to use for accessing Elasticsearch.", "", "null", "false", "false"
+
+Extra informations
+__________________
+.. include:: ./details/IncrementalWebSession-Detail.rst
+----------
+
+.. _com.hurence.logisland.processor.webAnalytics.SetSourceOfTraffic: 
+
+SetSourceOfTraffic
+------------------
+Compute the source of traffic of a web session. Users arrive at a website or application through a variety of sources, 
+including advertising/paying campaigns, search engines, social networks, referring sites or direct access. 
+When analysing user experience on a webshop, it is crucial to collect, process, and report the campaign and traffic-source data. 
+To compute the source of traffic of a web session, the user has to provide the utm_* related properties if available
+i-e: **utm_source.field**, **utm_medium.field**, **utm_campaign.field**, **utm_content.field**, **utm_term.field**)
+, the referer (**referer.field** property) and the first visited page of the session (**first.visited.page.field** property).
+By default the source of traffic information are placed in a flat structure (specified by the **source_of_traffic.suffix** property
+with a default value of source_of_traffic). To work properly the SetSourceOfTraffic processor needs to have access to an 
+Elasticsearch index containing a list of the most popular search engines and social networks. The ES index (specified by the **es.index** property) should be structured such that the _id of an ES document MUST be the name of the domain. If the domain is a search engine, the related ES doc MUST have a boolean field (default being search_engine) specified by the property **es.search_engine.field** with a value set to true. If the domain is a social network , the related ES doc MUST have a boolean field (default being social_network) specified by the property **es.social_network.field** with a value set to true. 
+
+Module
+______
+com.hurence.logisland:logisland-processor-web-analytics:1.2.0
+
+Class
+_____
+com.hurence.logisland.processor.webAnalytics.SetSourceOfTraffic
+
+Tags
+____
+session, traffic, source, web, analytics
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
+
+.. csv-table:: allowable-values
+   :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+   :widths: 20,60,30,20,10,10
+   :escape: \
+
+   "referer.field", "Name of the field containing the referer value in the session", "", "referer", "false", "false"
+   "first.visited.page.field", "Name of the field containing the first visited page in the session", "", "firstVisitedPage", "false", "false"
+   "utm_source.field", "Name of the field containing the utm_source value in the session", "", "utm_source", "false", "false"
+   "utm_medium.field", "Name of the field containing the utm_medium value in the session", "", "utm_medium", "false", "false"
+   "utm_campaign.field", "Name of the field containing the utm_campaign value in the session", "", "utm_campaign", "false", "false"
+   "utm_content.field", "Name of the field containing the utm_content value in the session", "", "utm_content", "false", "false"
+   "utm_term.field", "Name of the field containing the utm_term value in the session", "", "utm_term", "false", "false"
+   "source_of_traffic.suffix", "Suffix for the source of the traffic related fields", "", "source_of_traffic", "false", "false"
+   "source_of_traffic.hierarchical", "Should the additional source of trafic information fields be added under a hierarchical father field or not.", "", "false", "false", "false"
+   "**elasticsearch.client.service**", "The instance of the Controller Service to use for accessing Elasticsearch.", "", "null", "false", "false"
+   "**cache.service**", "Name of the cache service to use.", "", "null", "false", "false"
+   "cache.validity.timeout", "Timeout validity (in seconds) of an entry in the cache.", "", "0", "false", "false"
+   "debug", "If true, an additional debug field is added. If the source info fields prefix is X, a debug field named X_from_cache contains a boolean value to indicate the origin of the source fields. The default value for this property is false (debug is disabled).", "", "false", "false", "false"
+   "**es.index**", "Name of the ES index containing the list of search engines and social network. ", "", "null", "false", "false"
+   "es.type", "Name of the ES type to use.", "", "default", "false", "false"
+   "es.search_engine.field", "Name of the ES field used to specify that the domain is a search engine.", "", "search_engine", "false", "false"
+   "es.social_network.field", "Name of the ES field used to specify that the domain is a social network.", "", "social_network", "false", "false"
+
+Extra informations
+__________________
+.. include:: ./details/SetSourceOfTraffic-Detail.rst
+----------
+
+.. _com.hurence.logisland.processor.enrichment.IpToFqdn: 
+
+IpToFqdn
+--------
+Translates an IP address into a FQDN (Fully Qualified Domain Name). An input field from the record has the IP as value. An new field is created and its value is the FQDN matching the IP address. The resolution mechanism is based on the underlying operating system. The resolution request may take some time, specially if the IP address cannot be translated into a FQDN. For these reasons this processor relies on the logisland cache service so that once a resolution occurs or not, the result is put into the cache. That way, the real request for the same IP is not re-triggered during a certain period of time, until the cache entry expires. This timeout is configurable but by default a request for the same IP is not triggered before 24 hours to let the time to the underlying DNS system to be potentially updated.
+
+Module
+______
+com.hurence.logisland:logisland-processor-enrichment:1.2.0
+
+Class
+_____
+com.hurence.logisland.processor.enrichment.IpToFqdn
+
+Tags
+____
+dns, ip, fqdn, domain, address, fqhn, reverse, resolution, enrich
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
+
+.. csv-table:: allowable-values
+   :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+   :widths: 20,60,30,20,10,10
+   :escape: \
+
+   "**ip.address.field**", "The name of the field containing the ip address to use.", "", "null", "false", "false"
+   "**fqdn.field**", "The field that will contain the full qualified domain name corresponding to the ip address.", "", "null", "false", "false"
+   "overwrite.fqdn.field", "If the field should be overwritten when it already exists.", "", "false", "false", "false"
+   "**cache.service**", "The name of the cache service to use.", "", "null", "false", "false"
+   "cache.max.time", "The amount of time, in seconds, for which a cached FQDN value is valid in the cache service. After this delay, the next new request to translate the same IP into FQDN will trigger a new reverse DNS request and the result will overwrite the entry in the cache. This allows two things: if the IP was not resolved into a FQDN, this will get a chance to obtain a FQDN if the DNS system has been updated, if the IP is resolved into a FQDN, this will allow to be more accurate if the DNS system has been updated.  A value of 0 seconds disables this expiration mechanism. The default value is 84600 seconds, which corresponds to new requests triggered every day if a record with the same IP passes every day in the processor.", "", "84600", "false", "false"
+   "resolution.timeout", "The amount of time, in milliseconds, to wait at most for the resolution to occur. This avoids to block the stream for too much time. Default value is 1000ms. If the delay expires and no resolution could occur before, the FQDN field is not created. A special value of 0 disables the logisland timeout and the resolution request may last for many seconds if the IP cannot be translated into a FQDN by the underlying operating system. In any case, whether the timeout occurs in logisland of in the operating system, the fact that a timeout occurs is kept in the cache system so that a resolution request for the same IP will not occur before the cache entry expires.", "", "1000", "false", "false"
+   "debug", "If true, some additional debug fields are added. If the FQDN field is named X, a debug field named X_os_resolution_time_ms contains the resolution time in ms (using the operating system, not the cache). This field is added whether the resolution occurs or time is out. A debug field named  X_os_resolution_timeout contains a boolean value to indicate if the timeout occurred. Finally, a debug field named X_from_cache contains a boolean value to indicate the origin of the FQDN field. The default value for this property is false (debug is disabled.", "", "false", "false", "false"
+
+Extra informations
+__________________
+.. include:: ./details/IpToFqdn-Detail.rst
+----------
+
+.. _com.hurence.logisland.processor.enrichment.IpToGeo: 
+
+IpToGeo
+-------
+Looks up geolocation information for an IP address. The attribute that contains the IP address to lookup must be provided in the **ip.address.field** property. By default, the geo information are put in a hierarchical structure. That is, if the name of the IP field is 'X', then the the geo attributes added by enrichment are added under a father field named X_geo. "_geo" is the default hierarchical suffix that may be changed with the **geo.hierarchical.suffix** property. If one wants to put the geo fields at the same level as the IP field, then the **geo.hierarchical** property should be set to false and then the geo attributes are  created at the same level as him with the naming pattern X_geo_. "_geo_" is the default flat suffix but this may be changed with the **geo.flat.suffix** property. The IpToGeo processor requires a reference to an Ip to Geo service. This must be defined in the **iptogeo.service** property. The added geo fields are dependant on the underlying Ip to Geo service. The **geo.fields** property must contain the list of geo fields that should be created if data is available for  the IP to resolve. This property defaults to "*" which means to add every available fields. If one only wants a subset of the fields,  one must define a comma separated list of fields as a value for the **geo.fields** property. The list of the available geo fields is in the description of the **geo.fields** property.
+
+Module
+______
+com.hurence.logisland:logisland-processor-enrichment:1.2.0
+
+Class
+_____
+com.hurence.logisland.processor.enrichment.IpToGeo
+
+Tags
+____
+geo, enrich, ip
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
+
+.. csv-table:: allowable-values
+   :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+   :widths: 20,60,30,20,10,10
+   :escape: \
+
+   "**ip.address.field**", "The name of the field containing the ip address to use.", "", "null", "false", "false"
+   "**iptogeo.service**", "The reference to the IP to Geo service to use.", "", "null", "false", "false"
+   "geo.fields", "Comma separated list of geo information fields to add to the record. Defaults to '*', which means to include all available fields. If a list of fields is specified and the data is not available, the geo field is not created. The geo fields are dependant on the underlying defined Ip to Geo service. The currently only supported type of Ip to Geo service is the Maxmind Ip to Geo service. This means that the currently supported list of geo fields is the following:**continent**: the identified continent for this IP address. **continent_code**: the identified continent code for this IP address. **city**: the identified city for this IP address. **latitude**: the identified latitude for this IP address. **longitude**: the identified longitude for this IP address. **location**: the identified location for this IP address, defined as Geo-point expressed as a string with the format: 'latitude,longitude'. **accuracy_radius**: the approximate accuracy radius, in kilometers, around the latitude and longitude for the location. **time_zone**: the identified time zone for this IP address. **subdivision_N**: the identified subdivision for this IP address. N is a one-up number at the end of the attribute name, starting with 0. **subdivision_isocode_N**: the iso code matching the identified subdivision_N. **country**: the identified country for this IP address. **country_isocode**: the iso code for the identified country for this IP address. **postalcode**: the identified postal code for this IP address. **lookup_micros**: the number of microseconds that the geo lookup took. The Ip to Geo service must have the lookup_micros property enabled in order to have this field available.", "", "*", "false", "false"
+   "geo.hierarchical", "Should the additional geo information fields be added under a hierarchical father field or not.", "", "true", "false", "false"
+   "geo.hierarchical.suffix", "Suffix to use for the field holding geo information. If geo.hierarchical is true, then use this suffix appended to the IP field name to define the father field name. This may be used for instance to distinguish between geo fields with various locales using many Ip to Geo service instances.", "", "_geo", "false", "false"
+   "geo.flat.suffix", "Suffix to use for geo information fields when they are flat. If geo.hierarchical is false, then use this suffix appended to the IP field name but before the geo field name. This may be used for instance to distinguish between geo fields with various locales using many Ip to Geo service instances.", "", "_geo_", "false", "false"
+   "**cache.service**", "The name of the cache service to use.", "", "null", "false", "false"
+   "debug", "If true, an additional debug field is added. If the geo info fields prefix is X, a debug field named X_from_cache contains a boolean value to indicate the origin of the geo fields. The default value for this property is false (debug is disabled).", "", "false", "false", "false"
+
+Extra informations
+__________________
+.. include:: ./details/IpToGeo-Detail.rst
+----------
+
+.. _com.hurence.logisland.processor.networkpacket.ParseNetworkPacket: 
+
+ParseNetworkPacket
+------------------
+The ParseNetworkPacket processor is the LogIsland entry point to parse network packets captured either off-the-wire (stream mode) or in pcap format (batch mode).  In batch mode, the processor decodes the bytes of the incoming pcap record, where a Global header followed by a sequence of [packet header, packet data] pairs are stored. Then, each incoming pcap event is parsed into n packet records. The fields of packet headers are then extracted and made available in dedicated record fields. See the `Capturing Network packets tutorial `_ for an example of usage of this processor.
+
+Module
+______
+com.hurence.logisland:logisland-processor-cyber-security:1.2.0
+
+Class
+_____
+com.hurence.logisland.processor.networkpacket.ParseNetworkPacket
+
+Tags
+____
+PCap, security, IDS, NIDS
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
+
+.. csv-table:: allowable-values
+   :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+   :widths: 20,60,30,20,10,10
+   :escape: \
+
+   "debug", "Enable debug.", "", "false", "false", "false"
+   "**flow.mode**", "Flow Mode. Indicate whether packets are provided in batch mode (via pcap files) or in stream mode (without headers). Allowed values are batch and stream.", "batch, stream", "null", "false", "false"
+
+Extra informations
+__________________
+No additional information is provided
+
+----------
+
+.. _com.hurence.logisland.processor.elasticsearch.BulkAddElasticsearch: 
+
+BulkAddElasticsearch
+--------------------
+Indexes the content of a Record in Elasticsearch using elasticsearch's bulk processor
+
+Module
+______
+com.hurence.logisland:logisland-processor-elasticsearch:1.2.0
+
+Class
+_____
+com.hurence.logisland.processor.elasticsearch.BulkAddElasticsearch
+
+Tags
+____
+elasticsearch
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values, and whether a property supports the  `Expression Language `_ .
+
+.. csv-table:: allowable-values
+   :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+   :widths: 20,60,30,20,10,10
+   :escape: \
+
+   "**elasticsearch.client.service**", "The instance of the Controller Service to use for accessing Elasticsearch.", "", "null", "false", "false"
+   "**default.index**", "The name of the index to insert into", "", "null", "false", "**true**"
+   "**default.type**", "The type of this document (used by Elasticsearch for indexing and searching)", "", "null", "false", "**true**"
+   "**timebased.index**", "do we add a date suffix", "no (no date added to default index), today (today's date added to default index), yesterday (yesterday's date added to default index)", "no", "false", "false"
+   "es.index.field", "the name of the event field containing es index name => will override index value if set", "", "null", "false", "false"
+   "es.type.field", "the name of the event field containing es doc type => will override type value if set", "", "null", "false", "false"
+
+Extra informations
+__________________
+.. include:: ./details/BulkAddElasticsearch-Detail.rst
+----------
+
+.. _com.hurence.logisland.processor.hbase.FetchHBaseRow: 
+
+FetchHBaseRow
+-------------
+Fetches a row from an HBase table. The Destination property controls whether the cells are added as flow file attributes, or the row is written to the flow file content as JSON. This processor may be used to fetch a fixed row on a interval by specifying the table and row id directly in the processor, or it may be used to dynamically fetch rows by referencing the table and row id from incoming flow files.
+
+Module
+______
+com.hurence.logisland:logisland-processor-hbase:1.2.0
+
+Class
+_____
+com.hurence.logisland.processor.hbase.FetchHBaseRow
+
+Tags
+____
+hbase, scan, fetch, get, enrich
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values, and whether a property supports the  `Expression Language `_ .
+
+.. csv-table:: allowable-values
+   :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+   :widths: 20,60,30,20,10,10
+   :escape: \
+
+   "**hbase.client.service**", "The instance of the Controller Service to use for accessing HBase.", "", "null", "false", "false"
+   "**table.name.field**", "The field containing the name of the HBase Table to fetch from.", "", "null", "false", "**true**"
+   "**row.identifier.field**", "The field containing the identifier of the row to fetch.", "", "null", "false", "**true**"
+   "columns.field", "The field containing an optional comma-separated list of \"\":\"\" pairs to fetch. To return all columns for a given family, leave off the qualifier such as \"\",\"\".", "", "null", "false", "**true**"
+   "record.serializer", "the serializer needed to i/o the record in the HBase row", "com.hurence.logisland.serializer.KryoSerializer (serialize events as json blocs), com.hurence.logisland.serializer.JsonSerializer (serialize events as json blocs), com.hurence.logisland.serializer.AvroSerializer (serialize events as avro blocs), none (send events as bytes)", "com.hurence.logisland.serializer.KryoSerializer", "false", "false"
+   "record.schema", "the avro schema definition for the Avro serialization", "", "null", "false", "false"
+   "table.name.default", "The table to use if table name field is not set", "", "null", "false", "false"
+
+Extra informations
+__________________
+.. include:: ./details/FetchHBaseRow-Detail.rst
+----------
+
+.. _com.hurence.logisland.processor.elasticsearch.MultiGetElasticsearch: 
+
+MultiGetElasticsearch
+---------------------
+Retrieves a content indexed in elasticsearch using elasticsearch multiget queries.
+Each incoming record contains information regarding the elasticsearch multiget query that will be performed. This information is stored in record fields whose names are configured in the plugin properties (see below) :
+
+ - index (String) : name of the elasticsearch index on which the multiget query will be performed. This field is mandatory and should not be empty, otherwise an error output record is sent for this specific incoming record.
+ - type (String) : name of the elasticsearch type on which the multiget query will be performed. This field is not mandatory.
+ - ids (String) : comma separated list of document ids to fetch. This field is mandatory and should not be empty, otherwise an error output record is sent for this specific incoming record.
+ - includes (String) : comma separated list of patterns to filter in (include) fields to retrieve. Supports wildcards. This field is not mandatory.
+ - excludes (String) : comma separated list of patterns to filter out (exclude) fields to retrieve. Supports wildcards. This field is not mandatory.
+
+Each outcoming record holds data of one elasticsearch retrieved document. This data is stored in these fields :
+
+ - index (same field name as the incoming record) : name of the elasticsearch index.
+ - type (same field name as the incoming record) : name of the elasticsearch type.
+ - id (same field name as the incoming record) : retrieved document id.
+ - a list of String fields containing :
+
+   * field name : the retrieved field name
+   * field value : the retrieved field value
+
+Module
+______
+com.hurence.logisland:logisland-processor-elasticsearch:1.2.0
+
+Class
+_____
+com.hurence.logisland.processor.elasticsearch.MultiGetElasticsearch
+
+Tags
+____
+elasticsearch
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
+
+.. csv-table:: allowable-values
+   :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+   :widths: 20,60,30,20,10,10
+   :escape: \
+
+   "**elasticsearch.client.service**", "The instance of the Controller Service to use for accessing Elasticsearch.", "", "null", "false", "false"
+   "**es.index.field**", "the name of the incoming records field containing es index name to use in multiget query. ", "", "null", "false", "false"
+   "**es.type.field**", "the name of the incoming records field containing es type name to use in multiget query", "", "null", "false", "false"
+   "**es.ids.field**", "the name of the incoming records field containing es document Ids to use in multiget query", "", "null", "false", "false"
+   "**es.includes.field**", "the name of the incoming records field containing es includes to use in multiget query", "", "null", "false", "false"
+   "**es.excludes.field**", "the name of the incoming records field containing es excludes to use in multiget query", "", "null", "false", "false"
+
+Extra informations
+__________________
+.. include:: ./details/MultiGetElasticsearch-Detail.rst
+----------
+
+.. _com.hurence.logisland.processor.hbase.PutHBaseCell: 
+
+PutHBaseCell
+------------
+Adds the Contents of a Record to HBase as the value of a single cell
+
+Module
+______
+com.hurence.logisland:logisland-processor-hbase:1.2.0
+
+Class
+_____
+com.hurence.logisland.processor.hbase.PutHBaseCell
+
+Tags
+____
+hadoop, hbase
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values, and whether a property supports the  `Expression Language `_ .
+
+.. csv-table:: allowable-values
+   :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+   :widths: 20,60,30,20,10,10
+   :escape: \
+
+   "**hbase.client.service**", "The instance of the Controller Service to use for accessing HBase.", "", "null", "false", "false"
+   "**table.name.field**", "The field containing the name of the HBase Table to put data into", "", "null", "false", "**true**"
+   "row.identifier.field", "Specifies  field containing the Row ID to use when inserting data into HBase", "", "null", "false", "**true**"
+   "row.identifier.encoding.strategy", "Specifies the data type of Row ID used when inserting data into HBase. The default behavior is to convert the row id to a UTF-8 byte array. Choosing Binary will convert a binary formatted string to the correct byte[] representation. The Binary option should be used if you are using Binary row keys in HBase", "String (Stores the value of row id as a UTF-8 String.), Binary (Stores the value of the rows id as a binary byte array. It expects that the row id is a binary formatted string.)", "String", "false", "false"
+   "**column.family.field**", "The field containing the  Column Family to use when inserting data into HBase", "", "null", "false", "**true**"
+   "**column.qualifier.field**", "The field containing the  Column Qualifier to use when inserting data into HBase", "", "null", "false", "**true**"
+   "**batch.size**", "The maximum number of Records to process in a single execution. The Records will be grouped by table, and a single Put per table will be performed.", "", "25", "false", "false"
+   "record.schema", "the avro schema definition for the Avro serialization", "", "null", "false", "false"
+   "record.serializer", "the serializer needed to i/o the record in the HBase row", "com.hurence.logisland.serializer.KryoSerializer (serialize events as json blocs), com.hurence.logisland.serializer.JsonSerializer (serialize events as json blocs), com.hurence.logisland.serializer.AvroSerializer (serialize events as avro blocs), none (send events as bytes)", "com.hurence.logisland.serializer.KryoSerializer", "false", "false"
+   "table.name.default", "The table table to use if table name field is not set", "", "null", "false", "false"
+   "column.family.default", "The column family to use if column family field is not set", "", "null", "false", "false"
+   "column.qualifier.default", "The column qualifier to use if column qualifier field is not set", "", "null", "false", "false"
+
+Extra informations
+__________________
+.. include:: ./details/PutHBaseCell-Detail.rst
+----------
+
+.. _com.hurence.logisland.processor.xml.EvaluateXPath: 
+
+EvaluateXPath
+-------------
+Evaluates one or more XPaths against the content of a record. The results of those XPaths are assigned to new attributes in the records, depending on configuration of the Processor. XPaths are entered by adding user-defined properties; the name of the property maps to the Attribute Name into which the result will be placed. The value of the property must be a valid XPath expression. If the expression matches nothing, no attributes is added. 
+
+Module
+______
+com.hurence.logisland:logisland-processor-xml:1.2.0
+
+Class
+_____
+com.hurence.logisland.processor.xml.EvaluateXPath
+
+Tags
+____
+XML, evaluate, XPath
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
+
+.. csv-table:: allowable-values
+   :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+   :widths: 20,60,30,20,10,10
+   :escape: \
+
+   "**source**", "Indicates the attribute containing the xml data to evaluate xpath against.", "", "null", "false", "false"
+   "**validate_dtd**", "Specifies whether or not the XML content should be validated against the DTD.", "true, false", "true", "false", "false"
+   "conflict.resolution.policy", "What to do when a field with the same name already exists ?", "overwrite_existing (if field already exist), keep_only_old_field (keep only old field)", "keep_only_old_field", "false", "false"
+
+Dynamic Properties
+__________________
+Dynamic Properties allow the user to specify both the name and value of a property.
+
+.. csv-table:: dynamic-properties
+   :header: "Name","Value","Description","Allowable Values","Default Value","EL"
+   :widths: 20,20,40,40,20,10
+   :escape: \
+
+   "An attribute", "An XPath expression", " the attribute is set to the result of the XPath Expression.", "", "null", false
+
+Extra informations
+__________________
+.. include:: ./details/EvaluateXPath-Detail.rst
+----------
+
+.. _com.hurence.logisland.processor.webAnalytics.ConsolidateSession: 
+
+ConsolidateSession
+------------------
+The ConsolidateSession processor is the Logisland entry point to get and process events from the Web Analytics.As an example here is an incoming event from the Web Analytics:
+
+"fields": [{ "name": "timestamp",              "type": "long" },{ "name": "remoteHost",             "type": "string"},{ "name": "record_type",            "type": ["null", "string"], "default": null },{ "name": "record_id",              "type": ["null", "string"], "default": null },{ "name": "location",               "type": ["null", "string"], "default": null },{ "name": "hitType",                "type": ["null", "string"], "default": null },{ "name": "eventCategory",          "type": ["null", "string"], "default": null },{ "name": "eventAction",            "type": ["null", "string"], "default": null },{ "name": "eventLabel",             "type": ["null", "string"], "default": null },{ "name": "localPath",              "type": ["null", "string"], "default": null },{ "name": "q",                      "type": ["null", "string"], "default": null },{ "name": "n",                      "type": ["null", "int"],    "default": null },{ "name": "referer",                "type": ["null", "string"], "default": null },{ "name": "viewportPixelWidth",     "type": ["null", "int"],    "default": null },{ "name": "viewportPixelHeight",    "type": ["null", "int"],    "default": null },{ "name": "screenPixelWidth",       "type": ["null", "int"],    "default": null },{ "name": "screenPixelHeight",      "type": ["null", "int"],    "default": null },{ "name": "partyId",                "type": ["null", "string"], "default": null },{ "name": "sessionId",              "type": ["null", "string"], "default": null },{ "name": "pageViewId",             "type": ["null", "string"], "default": null },{ "name": "is_newSession",          "type": ["null", "boolean"],"default": null },{ "name": "userAgentString",        "type": ["null", "string"], "default": null },{ "name": "pageType",               "type": ["null", "string"], "default": null },{ "name": "UserId",                 "type": ["null", "string"], "default": null },{ "name": "B2Bunit",                "type": ["null", "string"], "default": null },{ "name": "pointOfService",         "type": ["null", "string"], "default": null },{ "name": "companyID",              "type": ["null", "string"], "default": null },{ "name": "GroupCode",              "type": ["null", "string"], "default": null },{ "name": "userRoles",              "type": ["null", "string"], "default": null },{ "name": "is_PunchOut",            "type": ["null", "string"], "default": null }]The ConsolidateSession processor groups the records by sessions and compute the duration between now and the last received event. If the distance from the last event is beyond a given threshold (by default 30mn), then the session is considered closed.The ConsolidateSession is building an aggregated session object for each active session.This aggregated object includes: - The actual session duration. - A boolean representing wether the session is considered active or closed.   Note: it is possible to ressurect a session if for instance an event arrives after a session has been marked closed. - User related infos: userId, B2Bunit code, groupCode, userRoles, companyId - First visited page: URL - Last visited page: URL The properties to configure the processor are: - sessionid.field:          Property name containing the session identifier (default: sessionId). - timestamp.field:          Property name containing the timestamp of the event (default: timestamp). - session.timeout:          Timeframe of inactivity (in seconds) after which a session is considered closed (default: 30mn). - visitedpage.field:        Property name containing the page visited by the customer (default: location). - fields.to.return:         List of fields to return in the aggregated object. (default: N/A)
+
+Module
+______
+com.hurence.logisland:logisland-processor-web-analytics:1.2.0
+
+Class
+_____
+com.hurence.logisland.processor.webAnalytics.ConsolidateSession
+
+Tags
+____
+analytics, web, session
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
+
+.. csv-table:: allowable-values
+   :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+   :widths: 20,60,30,20,10,10
+   :escape: \
+
+   "debug", "Enable debug. If enabled, the original JSON string is embedded in the record_value field of the record.", "", "null", "false", "false"
+   "session.timeout", "session timeout in sec", "", "1800", "false", "false"
+   "sessionid.field", "the name of the field containing the session id => will override default value if set", "", "sessionId", "false", "false"
+   "timestamp.field", "the name of the field containing the timestamp => will override default value if set", "", "h2kTimestamp", "false", "false"
+   "visitedpage.field", "the name of the field containing the visited page => will override default value if set", "", "location", "false", "false"
+   "userid.field", "the name of the field containing the userId => will override default value if set", "", "userId", "false", "false"
+   "fields.to.return", "the list of fields to return", "", "null", "false", "false"
+   "firstVisitedPage.out.field", "the name of the field containing the first visited page => will override default value if set", "", "firstVisitedPage", "false", "false"
+   "lastVisitedPage.out.field", "the name of the field containing the last visited page => will override default value if set", "", "lastVisitedPage", "false", "false"
+   "isSessionActive.out.field", "the name of the field stating whether the session is active or not => will override default value if set", "", "is_sessionActive", "false", "false"
+   "sessionDuration.out.field", "the name of the field containing the session duration => will override default value if set", "", "sessionDuration", "false", "false"
+   "eventsCounter.out.field", "the name of the field containing the session duration => will override default value if set", "", "eventsCounter", "false", "false"
+   "firstEventDateTime.out.field", "the name of the field containing the date of the first event => will override default value if set", "", "firstEventDateTime", "false", "false"
+   "lastEventDateTime.out.field", "the name of the field containing the date of the last event => will override default value if set", "", "lastEventDateTime", "false", "false"
+   "sessionInactivityDuration.out.field", "the name of the field containing the session inactivity duration => will override default value if set", "", "sessionInactivityDuration", "false", "false"
+
+Extra informations
+__________________
+.. include:: ./details/ConsolidateSession-Detail.rst
+----------
+
+.. _com.hurence.logisland.processor.DetectOutliers: 
+
+DetectOutliers
+--------------
+Outlier Analysis: A Hybrid Approach
+
+In order to function at scale, a two-phase approach is taken
+
+For every data point
+
+- Detect outlier candidates using a robust estimator of variability (e.g. median absolute deviation) that uses distributional sketching (e.g. Q-trees)
+- Gather a biased sample (biased by recency)
+- Extremely deterministic in space and cheap in computation
+
+For every outlier candidate
+
+- Use traditional, more computationally complex approaches to outlier analysis (e.g. Robust PCA) on the biased sample
+- Expensive computationally, but run infrequently
+
+This becomes a data filter which can be attached to a timeseries data stream within a distributed computational framework (i.e. Storm, Spark, Flink, NiFi) to detect outliers.
+
+Module
+______
+com.hurence.logisland:logisland-processor-outlier-detection:1.2.0
+
+Class
+_____
+com.hurence.logisland.processor.DetectOutliers
+
+Tags
+____
+analytic, outlier, record, iot, timeseries
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
+
+.. csv-table:: allowable-values
+   :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+   :widths: 20,60,30,20,10,10
+   :escape: \
+
+   "**value.field**", "the numeric field to get the value", "", "record_value", "false", "false"
+   "**time.field**", "the numeric field to get the value", "", "record_time", "false", "false"
+   "output.record.type", "the output type of the record", "", "alert_match", "false", "false"
+   "**rotation.policy.type**", "...", "by_amount, by_time, never", "by_amount", "false", "false"
+   "**rotation.policy.amount**", "...", "", "100", "false", "false"
+   "**rotation.policy.unit**", "...", "milliseconds, seconds, hours, days, months, years, points", "points", "false", "false"
+   "**chunking.policy.type**", "...", "by_amount, by_time, never", "by_amount", "false", "false"
+   "**chunking.policy.amount**", "...", "", "100", "false", "false"
+   "**chunking.policy.unit**", "...", "milliseconds, seconds, hours, days, months, years, points", "points", "false", "false"
+   "sketchy.outlier.algorithm", "...", "SKETCHY_MOVING_MAD", "SKETCHY_MOVING_MAD", "false", "false"
+   "batch.outlier.algorithm", "...", "RAD", "RAD", "false", "false"
+   "global.statistics.min", "minimum value", "", "null", "false", "false"
+   "global.statistics.max", "maximum value", "", "null", "false", "false"
+   "global.statistics.mean", "mean value", "", "null", "false", "false"
+   "global.statistics.stddev", "standard deviation value", "", "null", "false", "false"
+   "**zscore.cutoffs.normal**", "zscoreCutoffs level for normal outlier", "", "0.000000000000001", "false", "false"
+   "**zscore.cutoffs.moderate**", "zscoreCutoffs level for moderate outlier", "", "1.5", "false", "false"
+   "**zscore.cutoffs.severe**", "zscoreCutoffs level for severe outlier", "", "10.0", "false", "false"
+   "zscore.cutoffs.notEnoughData", "zscoreCutoffs level for notEnoughData outlier", "", "100", "false", "false"
+   "smooth", "do smoothing ?", "", "false", "false", "false"
+   "decay", "the decay", "", "0.1", "false", "false"
+   "**min.amount.to.predict**", "minAmountToPredict", "", "100", "false", "false"
+   "min_zscore_percentile", "minZscorePercentile", "", "50.0", "false", "false"
+   "reservoir_size", "the size of points reservoir", "", "100", "false", "false"
+   "rpca.force.diff", "No Description Provided.", "", "null", "false", "false"
+   "rpca.lpenalty", "No Description Provided.", "", "null", "false", "false"
+   "rpca.min.records", "No Description Provided.", "", "null", "false", "false"
+   "rpca.spenalty", "No Description Provided.", "", "null", "false", "false"
+   "rpca.threshold", "No Description Provided.", "", "null", "false", "false"
+
+Extra informations
+__________________
+.. include:: ./details/DetectOutliers-Detail.rst
+----------
+
+.. _com.hurence.logisland.processor.elasticsearch.EnrichRecordsElasticsearch: 
+
+EnrichRecordsElasticsearch
+--------------------------
+Enrich input records with content indexed in elasticsearch using multiget queries.
+Each incoming record must be possibly enriched with information stored in elasticsearch. 
+Each outcoming record holds at least the input record plus potentially one or more fields coming from of one elasticsearch document.
+
+Module
+______
+com.hurence.logisland:logisland-processor-elasticsearch:1.2.0
+
+Class
+_____
+com.hurence.logisland.processor.elasticsearch.EnrichRecordsElasticsearch
+
+Tags
+____
+elasticsearch
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values, and whether a property supports the  `Expression Language `_ .
+
+.. csv-table:: allowable-values
+   :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+   :widths: 20,60,30,20,10,10
+   :escape: \
+
+   "**elasticsearch.client.service**", "The instance of the Controller Service to use for accessing Elasticsearch.", "", "null", "false", "false"
+   "**record.key**", "The name of field in the input record containing the document id to use in ES multiget query", "", "null", "false", "**true**"
+   "**es.index**", "The name of the ES index to use in multiget query. ", "", "null", "false", "**true**"
+   "es.type", "The name of the ES type to use in multiget query.", "", "default", "false", "**true**"
+   "es.includes.field", "The name of the ES fields to include in the record.", "", "*", "false", "**true**"
+   "es.excludes.field", "The name of the ES fields to exclude.", "", "N/A", "false", "false"
+
+Extra informations
+__________________
+.. include:: ./details/EnrichRecordsElasticsearch-Detail.rst
+----------
+
+.. _com.hurence.logisland.processor.excel.ExcelExtract: 
+
+ExcelExtract
+------------
+Consumes a Microsoft Excel document and converts each worksheet's line to a structured record. The processor is assuming to receive raw excel file as input record.
+
+Module
+______
+com.hurence.logisland:logisland-processor-excel:1.2.0
+
+Class
+_____
+com.hurence.logisland.processor.excel.ExcelExtract
+
+Tags
+____
+excel, processor, poi
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
+
+.. csv-table:: allowable-values
+   :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+   :widths: 20,60,30,20,10,10
+   :escape: \
+
+   "sheets", "Comma separated list of Excel document sheet names that should be extracted from the excel document. If this property is left blank then all of the sheets will be extracted from the Excel document. You can specify regular expressions. Any sheets not specified in this value will be ignored.", "", "", "false", "false"
+   "skip.columns", "Comma delimited list of column numbers to skip. Use the columns number and not the letter designation. Use this to skip over columns anywhere in your worksheet that you don't want extracted as part of the record.", "", "", "false", "false"
+   "field.names", "The comma separated list representing the names of columns of extracted cells. Order matters! You should use either field.names either field.row.header but not both together.", "", "null", "false", "false"
+   "skip.rows", "The row number of the first row to start processing.Use this to skip over rows of data at the top of your worksheet that are not part of the dataset.Empty rows of data anywhere in the spreadsheet will always be skipped, no matter what this value is set to.", "", "0", "false", "false"
+   "record.type", "Default type of record", "", "excel_record", "false", "false"
+   "field.row.header", "If set, field names mapping will be extracted from the specified row number. You should use either field.names either field.row.header but not both together.", "", "null", "false", "false"
+
+Extra informations
+__________________
+.. include:: ./details/ExcelExtract-Detail.rst
+----------
+
+.. _com.hurence.logisland.processor.MatchIP: 
+
+MatchIP
+-------
+IP address Query matching (using `Luwak )`_
+
+You can use this processor to handle custom events matching IP address (CIDR)
+The record sent from a matching an IP address record is tagged appropriately.
+
+A query is expressed as a lucene query against a field like for example: 
+
+.. code::
+
+	message:'bad exception'
+	error_count:[10 TO *]
+	bytes_out:5000
+	user_name:tom*
+
+Please read the `Lucene syntax guide `_ for supported operations
+
+.. warning::
+
+	don't forget to set numeric fields property to handle correctly numeric ranges queries
+
+Module
+______
+com.hurence.logisland:logisland-processor-querymatcher:1.2.0
+
+Class
+_____
+com.hurence.logisland.processor.MatchIP
+
+Tags
+____
+analytic, percolator, record, record, query, lucene
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
+
+.. csv-table:: allowable-values
+   :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+   :widths: 20,60,30,20,10,10
+   :escape: \
+
+   "numeric.fields", "a comma separated string of numeric field to be matched", "", "null", "false", "false"
+   "output.record.type", "the output type of the record", "", "alert_match", "false", "false"
+   "record.type.updatePolicy", "Record type update policy", "", "overwrite", "false", "false"
+   "policy.onmatch", "the policy applied to match events: 'first' (default value) match events are tagged with the name and value of the first query that matched;'all' match events are tagged with all names and values of the queries that matched.", "", "first", "false", "false"
+   "policy.onmiss", "the policy applied to miss events: 'discard' (default value) drop events that did not match any query;'forward' include also events that did not match any query.", "", "discard", "false", "false"
+
+Dynamic Properties
+__________________
+Dynamic Properties allow the user to specify both the name and value of a property.
+
+.. csv-table:: dynamic-properties
+   :header: "Name","Value","Description","Allowable Values","Default Value","EL"
+   :widths: 20,20,40,40,20,10
+   :escape: \
+
+   "query", "some Lucene query", "generate a new record when this query is matched", "", "null", **true**
+
+Extra informations
+__________________
+.. include:: ./details/MatchIP-Detail.rst
+----------
+
+.. _com.hurence.logisland.processor.MatchQuery: 
+
+MatchQuery
+----------
+Query matching based on `Luwak `_
+
+you can use this processor to handle custom events defined by lucene queries
+a new record is added to output each time a registered query is matched
+
+A query is expressed as a lucene query against a field like for example: 
+
+.. code::
+
+	message:'bad exception'
+	error_count:[10 TO *]
+	bytes_out:5000
+	user_name:tom*
+
+Please read the `Lucene syntax guide `_ for supported operations
+
+.. warning::
+
+	don't forget to set numeric fields property to handle correctly numeric ranges queries
+
+Module
+______
+com.hurence.logisland:logisland-processor-querymatcher:1.2.0
+
+Class
+_____
+com.hurence.logisland.processor.MatchQuery
+
+Tags
+____
+analytic, percolator, record, record, query, lucene
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
+
+.. csv-table:: allowable-values
+   :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+   :widths: 20,60,30,20,10,10
+   :escape: \
+
+   "numeric.fields", "a comma separated string of numeric field to be matched", "", "null", "false", "false"
+   "output.record.type", "the output type of the record", "", "alert_match", "false", "false"
+   "record.type.updatePolicy", "Record type update policy", "", "overwrite", "false", "false"
+   "policy.onmatch", "the policy applied to match events: 'first' (default value) match events are tagged with the name and value of the first query that matched;'all' match events are tagged with all names and values of the queries that matched.", "", "first", "false", "false"
+   "policy.onmiss", "the policy applied to miss events: 'discard' (default value) drop events that did not match any query;'forward' include also events that did not match any query.", "", "discard", "false", "false"
+
+Dynamic Properties
+__________________
+Dynamic Properties allow the user to specify both the name and value of a property.
+
+.. csv-table:: dynamic-properties
+   :header: "Name","Value","Description","Allowable Values","Default Value","EL"
+   :widths: 20,20,40,40,20,10
+   :escape: \
+
+   "query", "some Lucene query", "generate a new record when this query is matched", "", "null", **true**
+
+Extra informations
+__________________
+.. include:: ./details/MatchQuery-Detail.rst
+----------
+
+.. _com.hurence.logisland.processor.bro.ParseBroEvent: 
+
+ParseBroEvent
+-------------
+The ParseBroEvent processor is the Logisland entry point to get and process `Bro `_ events. The `Bro-Kafka plugin `_ should be used and configured in order to have Bro events sent to Kafka. See the `Bro/Logisland tutorial `_ for an example of usage for this processor. The ParseBroEvent processor does some minor pre-processing on incoming Bro events from the Bro-Kafka plugin to adapt them to Logisland.
+
+Basically the events coming from the Bro-Kafka plugin are JSON documents with a first level field indicating the type of the event. The ParseBroEvent processor takes the incoming JSON document, sets the event type in a record_type field and sets the original sub-fields of the JSON event as first level fields in the record. Also any dot in a field name is transformed into an underscore. Thus, for instance, the field id.orig_h becomes id_orig_h. The next processors in the stream can then process the Bro events generated by this ParseBroEvent processor.
+
+As an example here is an incoming event from Bro:
+
+{
+
+   "conn": {
+
+     "id.resp_p": 9092,
+
+     "resp_pkts": 0,
+
+     "resp_ip_bytes": 0,
+
+     "local_orig": true,
+
+     "orig_ip_bytes": 0,
+
+     "orig_pkts": 0,
+
+     "missed_bytes": 0,
+
+     "history": "Cc",
+
+     "tunnel_parents": [],
+
+     "id.orig_p": 56762,
+
+     "local_resp": true,
+
+     "uid": "Ct3Ms01I3Yc6pmMZx7",
+
+     "conn_state": "OTH",
+
+     "id.orig_h": "172.17.0.2",
+
+     "proto": "tcp",
+
+     "id.resp_h": "172.17.0.3",
+
+     "ts": 1487596886.953917
+
+   }
+
+ }
+
+It gets processed and transformed into the following Logisland record by the ParseBroEvent processor:
+
+"@timestamp": "2017-02-20T13:36:32Z"
+
+"record_id": "6361f80a-c5c9-4a16-9045-4bb51736333d"
+
+"record_time": 1487597792782
+
+"record_type": "conn"
+
+"id_resp_p": 9092
+
+"resp_pkts": 0
+
+"resp_ip_bytes": 0
+
+"local_orig": true
+
+"orig_ip_bytes": 0
+
+"orig_pkts": 0
+
+"missed_bytes": 0
+
+"history": "Cc"
+
+"tunnel_parents": []
+
+"id_orig_p": 56762
+
+"local_resp": true
+
+"uid": "Ct3Ms01I3Yc6pmMZx7"
+
+"conn_state": "OTH"
+
+"id_orig_h": "172.17.0.2"
+
+"proto": "tcp"
+
+"id_resp_h": "172.17.0.3"
+
+"ts": 1487596886.953917
+
+Module
+______
+com.hurence.logisland:logisland-processor-cyber-security:1.2.0
+
+Class
+_____
+com.hurence.logisland.processor.bro.ParseBroEvent
+
+Tags
+____
+bro, security, IDS, NIDS
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
+
+.. csv-table:: allowable-values
+   :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+   :widths: 20,60,30,20,10,10
+   :escape: \
+
+   "debug", "Enable debug. If enabled, the original JSON string is embedded in the record_value field of the record.", "", "false", "false", "false"
+
+Extra informations
+__________________
+.. include:: ./details/ParseBroEvent-Detail.rst
+----------
+
+.. _com.hurence.logisland.processor.netflow.ParseNetflowEvent: 
+
+ParseNetflowEvent
+-----------------
+The `Netflow V5 `_ processor is the Logisland entry point to  process Netflow (V5) events. NetFlow is a feature introduced on Cisco routers that provides the ability to collect IP network traffic.We can distinguish 2 components:
+
+	- Flow exporter: aggregates packets into flows and exports flow records (binary format) towards one or more flow collectors
+
+	- Flow collector: responsible for reception, storage and pre-processing of flow data received from a flow exporter
+
+The collected data are then available for analysis purpose (intrusion detection, traffic analysis...)
+Netflow are sent to kafka in order to be processed by logisland.
+In the tutorial we will simulate Netflow traffic using `nfgen `_. this traffic will be sent to port 2055. The we rely on nifi to listen of that port for   incoming netflow (V5) traffic and send them to a kafka topic. The Netflow processor could thus treat these events and generate corresponding logisland records. The following processors in the stream can then process the Netflow records generated by this processor.
+
+Module
+______
+com.hurence.logisland:logisland-processor-cyber-security:1.2.0
+
+Class
+_____
+com.hurence.logisland.processor.netflow.ParseNetflowEvent
+
+Tags
+____
+netflow, security
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
+
+.. csv-table:: allowable-values
+   :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+   :widths: 20,60,30,20,10,10
+   :escape: \
+
+   "debug", "Enable debug. If enabled, the original JSON string is embedded in the record_value field of the record.", "", "false", "false", "false"
+   "output.record.type", "the output type of the record", "", "netflowevent", "false", "false"
+   "enrich.record", "Enrich data. If enabledthe netflow record is enriched with inferred data", "", "false", "false", "false"
+
+Extra informations
+__________________
+.. include:: ./details/ParseNetflowEvent-Detail.rst
+----------
+
+.. _com.hurence.logisland.processor.scripting.python.RunPython: 
+
+RunPython
+---------
+ !!!! WARNING !!!!
+
+The RunPython processor is currently an experimental feature : it is delivered as is, with the current set of features and is subject to modifications in API or anything else in further logisland releases without warnings. There is no tutorial yet. If you want to play with this processor, use the python-processing.yml example and send the apache logs of the index apache logs tutorial. The debug stream processor at the end of the stream should output events in stderr file of the executors from the spark console.
+
+This processor allows to implement and run a processor written in python. This can be done in 2 ways. Either directly defining the process method code in the **script.code.process** configuration property or poiting to an external python module script file in the **script.path** configuration property. Directly defining methods is called the inline mode whereas using a script file is called the file mode. Both ways are mutually exclusive. Whether using the inline of file mode, your python code may depend on some python dependencies. If the set of python dependencies already delivered with the Logisland framework is not sufficient, you can use the **dependencies.path** configuration property to give their location. Currently only the nltk python library is delivered with Logisland.
+
+Module
+______
+com.hurence.logisland:logisland-processor-scripting:1.2.0
+
+Class
+_____
+com.hurence.logisland.processor.scripting.python.RunPython
+
+Tags
+____
+scripting, python
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
+
+.. csv-table:: allowable-values
+   :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+   :widths: 20,60,30,20,10,10
+   :escape: \
+
+   "script.code.imports", "For inline mode only. This is the python code that should hold the import statements if required.", "", "null", "false", "false"
+   "script.code.init", "The python code to be called when the processor is initialized. This is the python equivalent of the init method code for a java processor. This is not mandatory but can only be used if **script.code.process** is defined (inline mode).", "", "null", "false", "false"
+   "script.code.process", "The python code to be called to process the records. This is the pyhton equivalent of the process method code for a java processor. For inline mode, this is the only minimum required configuration property. Using this property, you may also optionally define the **script.code.init** and **script.code.imports** properties.", "", "null", "false", "false"
+   "script.path", "The path to the user's python processor script. Use this property for file mode. Your python code must be in a python file with the following constraints: let's say your pyhton script is named MyProcessor.py. Then MyProcessor.py is a module file that must contain a class named MyProcessor which must inherits from the Logisland delivered class named AbstractProcessor. You can then define your code in the process method and in the other traditional methods (init...) as you would do in java in a class inheriting from the AbstractProcessor java class.", "", "null", "false", "false"
+   "dependencies.path", "The path to the additional dependencies for the user's python code, whether using inline or file mode. This is optional as your code may not have additional dependencies. If you defined **script.path** (so using file mode) and if **dependencies.path** is not defined, Logisland will scan a potential directory named **dependencies** in the same directory where the script file resides and if it exists, any python code located there will be loaded as dependency as needed.", "", "null", "false", "false"
+   "logisland.dependencies.path", "The path to the directory containing the python dependencies shipped with logisland. You should not have to tune this parameter.", "", "null", "false", "false"
+
+Extra informations
+__________________
+.. include:: ./details/RunPython-Detail.rst
+----------
+
+.. _com.hurence.logisland.processor.webAnalytics.URLDecoder: 
+
+URLDecoder
+----------
+Decode one or more field containing an URL with possibly special chars encoded
+...
+
+Module
+______
+com.hurence.logisland:logisland-processor-web-analytics:1.2.0
+
+Class
+_____
+com.hurence.logisland.processor.webAnalytics.URLDecoder
+
+Tags
+____
+record, fields, Decode
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
+
+.. csv-table:: allowable-values
+   :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+   :widths: 20,60,30,20,10,10
+   :escape: \
+
+   "**decode.fields**", "List of fields (URL) to decode", "", "null", "false", "false"
+   "charset", "Charset to use to decode the URL", "", "UTF-8", "false", "false"
+
+Dynamic Properties
+__________________
+Dynamic Properties allow the user to specify both the name and value of a property.
+
 .. csv-table:: dynamic-properties
    :header: "Name","Value","Description","Allowable Values","Default Value","EL"
    :widths: 20,20,40,40,20,10
diff --git a/logisland-documentation/user/components/services.rst b/logisland-documentation/user/components/services.rst
index 434547506..4236b8c0f 100644
--- a/logisland-documentation/user/components/services.rst
+++ b/logisland-documentation/user/components/services.rst
@@ -1252,3 +1252,1604 @@ In the list below, the names of required properties appear in **bold**. Any othe
 Extra informations
 __________________
 No additional information is provided
+
+----------
+
+.. _com.hurence.logisland.service.iptogeo.maxmind.MaxmindIpToGeoService: 
+
+MaxmindIpToGeoService
+---------------------
+Implementation of the IP 2 GEO Service using maxmind lite db file
+
+Module
+______
+com.hurence.logisland:logisland-service-ip-to-geo-maxmind:1.2.0
+
+Class
+_____
+com.hurence.logisland.service.iptogeo.maxmind.MaxmindIpToGeoService
+
+Tags
+____
+ip, service, geo, maxmind
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
+
+.. csv-table:: allowable-values
+   :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+   :widths: 20,60,30,20,10,10
+   :escape: \
+
+   "maxmind.database.uri", "Path to the Maxmind Geo Enrichment Database File.", "", "null", "false", "false"
+   "maxmind.database.path", "Local Path to the Maxmind Geo Enrichment Database File.", "", "null", "false", "false"
+   "locale", "Locale to use for geo information. Defaults to 'en'.", "", "en", "false", "false"
+   "lookup.time", "Should the additional lookup_micros field be returned or not.", "", "false", "false", "false"
+
+Extra informations
+__________________
+No additional information is provided
+
+----------
+
+.. _com.hurence.logisland.service.cache.CSVKeyValueCacheService: 
+
+CSVKeyValueCacheService
+-----------------------
+A cache that store csv lines as records loaded from a file
+
+Module
+______
+com.hurence.logisland:logisland-service-inmemory-cache:1.2.0
+
+Class
+_____
+com.hurence.logisland.service.cache.CSVKeyValueCacheService
+
+Tags
+____
+csv, service, cache
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
+
+.. csv-table:: allowable-values
+   :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+   :widths: 20,60,30,20,10,10
+   :escape: \
+
+   "**csv.format**", "a configuration for loading csv", "default (Standard comma separated format, as for RFC4180 but allowing empty lines. Settings are: withDelimiter(',') withQuote('\"') withRecordSeparator(\"\r\n\") withIgnoreEmptyLines(true)), excel (Excel file format (using a comma as the value delimiter). Note that the actual value delimiter used by Excel is locale dependent, it might be necessary to customize this format to accommodate to your regional settings. withDelimiter(',')  withQuote('\"') withRecordSeparator(\"\r\n\") withIgnoreEmptyLines(false) withAllowMissingColumnNames(true)), excel_fr (Excel file format (using a comma as the value delimiter). Note that the actual value delimiter used by Excel is locale dependent, it might be necessary to customize this format to accommodate to your regional settings. withDelimiter(';')  withQuote('\"') withRecordSeparator(\"\r\n\") withIgnoreEmptyLines(false) withAllowMissingColumnNames(true)), mysql (Default MySQL format used by the SELECT INTO OUTFILE and LOAD DATA INFILE operations.This is a tab-delimited format with a LF character as the line separator. Values are not quoted and special characters are escaped with '\'. The default NULL string is \"\\N\". Settings are: withDelimiter('\t') withQuote(null) withRecordSeparator('\n') withIgnoreEmptyLines(false) withEscape('\\') withNullString(\"\\N\") withQuoteMode(QuoteMode.ALL_NON_NULL)), rfc4180 (Comma separated format as defined by RFC 4180. Settings are: withDelimiter(',') withQuote('\"') withRecordSeparator(\"\r\n\") withIgnoreEmptyLines(false)), tdf (Tab-delimited format. Settings are: withDelimiter('\t') withQuote('\"') withRecordSeparator(\"\r\n\") withIgnoreSurroundingSpaces(true))", "default", "false", "false"
+   "csv.header", "comma separated header values", "", "null", "false", "false"
+   "csv.file.uri", "Path to the CSV File.", "", "null", "false", "false"
+   "csv.file.path", "Local Path to the CSV File.", "", "null", "false", "false"
+   "**row.key**", "th primary key of this db", "", "null", "false", "false"
+   "cache.size", "The maximum number of element in the cache.", "", "16384", "false", "false"
+   "first.line.header", "csv headers grabbed from first line", "", "null", "false", "false"
+   "encoding.charset", "charset", "", "UTF-8", "false", "false"
+
+Extra informations
+__________________
+No additional information is provided
+
+----------
+
+.. _com.hurence.logisland.service.cassandra.CassandraControllerService: 
+
+CassandraControllerService
+--------------------------
+Provides a controller service that for the moment only allows to bulkput records into cassandra.
+
+Module
+______
+com.hurence.logisland:logisland-service-cassandra-client:1.2.0
+
+Class
+_____
+com.hurence.logisland.service.cassandra.CassandraControllerService
+
+Tags
+____
+cassandra, service
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
+
+.. csv-table:: allowable-values
+   :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+   :widths: 20,60,30,20,10,10
+   :escape: \
+
+   "**cassandra.hosts**", "Cassandra cluster hosts as a comma separated value list", "", "null", "false", "false"
+   "**cassandra.port**", "Cassandra cluster port", "", "null", "false", "false"
+   "cassandra.with-ssl", "If this property is true, use SSL. Default is no SSL (false).", "", "false", "false", "false"
+   "cassandra.with-credentials", "If this property is true, use credentials. Default is no credentials (false).", "", "false", "false", "false"
+   "cassandra.credentials.user", "The user name to use for authentication. cassandra.with-credentials must be true for that property to be used.", "", "null", "false", "false"
+   "cassandra.credentials.password", "The user password to use for authentication. cassandra.with-credentials must be true for that property to be used.", "", "null", "false", "false"
+   "batch.size", "The preferred number of Records to setField to the database in a single transaction", "", "1000", "false", "false"
+   "flush.interval", "flush interval in ms", "", "500", "false", "false"
+
+Extra informations
+__________________
+No additional information is provided
+
+----------
+
+.. _com.hurence.logisland.service.elasticsearch.Elasticsearch_6_6_2_ClientService: 
+
+Elasticsearch_6_6_2_ClientService
+---------------------------------
+Implementation of ElasticsearchClientService for Elasticsearch 6.6.2.
+
+Module
+______
+com.hurence.logisland:logisland-service-elasticsearch_6_6_2-client:1.2.0
+
+Class
+_____
+com.hurence.logisland.service.elasticsearch.Elasticsearch_6_6_2_ClientService
+
+Tags
+____
+elasticsearch, client
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values, and whether a property is considered "sensitive"..
+
+.. csv-table:: allowable-values
+   :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+   :widths: 20,60,30,20,10,10
+   :escape: \
+
+   "**backoff.policy**", "strategy for retrying to execute requests in bulkRequest", "noBackoff (when a request fail there won't be any retry.), constantBackoff (wait a fixed amount of time between retries, using user put retry number and throttling delay), exponentialBackoff (time waited between retries grow exponentially, using user put retry number and throttling delay), defaultExponentialBackoff (time waited between retries grow exponentially, using es default parameters)", "defaultExponentialBackoff", "false", "false"
+   "**throttling.delay**", "number of time we should wait between each retry (in milliseconds)", "", "500", "false", "false"
+   "**num.retry**", "number of time we should try to inject a bulk into es", "", "3", "false", "false"
+   "batch.size", "The preferred number of Records to setField to the database in a single transaction", "", "1000", "false", "false"
+   "bulk.size", "bulk size in MB", "", "5", "false", "false"
+   "flush.interval", "flush interval in sec", "", "5", "false", "false"
+   "concurrent.requests", "setConcurrentRequests", "", "2", "false", "false"
+   "**ping.timeout**", "The ping timeout used to determine when a node is unreachable. For example, 5s (5 seconds). If non-local recommended is 30s", "", "5s", "false", "false"
+   "**sampler.interval**", "How often to sample / ping the nodes listed and connected. For example, 5s (5 seconds). If non-local recommended is 30s.", "", "5s", "false", "false"
+   "username", "Username to access the Elasticsearch cluster", "", "null", "false", "false"
+   "password", "Password to access the Elasticsearch cluster", "", "null", "**true**", "false"
+   "shield.location", "Specifies the path to the JAR for the Elasticsearch Shield plugin. If the Elasticsearch cluster has been secured with the Shield plugin, then the Shield plugin JAR must also be available to this processor. Note: Do NOT place the Shield JAR into NiFi's lib/ directory, doing so will prevent the Shield plugin from being loaded.", "", "null", "false", "false"
+   "**hosts**", "ElasticSearch Hosts, which should be comma separated and colon for hostname/port host1:port,host2:port,....  For example testcluster:9300.", "", "null", "false", "false"
+   "ssl.context.service", "The SSL Context Service used to provide client certificate information for TLS/SSL connections. This service only applies if the Shield plugin is available.", "", "null", "false", "false"
+   "**charset**", "Specifies the character set of the document data.", "", "UTF-8", "false", "false"
+
+Extra informations
+__________________
+No additional information is provided
+
+----------
+
+.. _com.hurence.logisland.service.hbase.HBase_1_1_2_ClientService: 
+
+HBase_1_1_2_ClientService
+-------------------------
+Implementation of HBaseClientService for HBase 1.1.2. This service can be configured by providing a comma-separated list of configuration files, or by specifying values for the other properties. If configuration files are provided, they will be loaded first, and the values of the additional properties will override the values from the configuration files. In addition, any user defined properties on the processor will also be passed to the HBase configuration.
+
+Module
+______
+com.hurence.logisland:logisland-service-hbase_1_1_2-client:1.2.0
+
+Class
+_____
+com.hurence.logisland.service.hbase.HBase_1_1_2_ClientService
+
+Tags
+____
+hbase, client
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values, and whether a property supports the  `Expression Language `_ .
+
+.. csv-table:: allowable-values
+   :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+   :widths: 20,60,30,20,10,10
+   :escape: \
+
+   "hadoop.configuration.files", "Comma-separated list of Hadoop Configuration files, such as hbase-site.xml and core-site.xml for kerberos, including full paths to the files.", "", "null", "false", "false"
+   "zookeeper.quorum", "Comma-separated list of ZooKeeper hosts for HBase. Required if Hadoop Configuration Files are not provided.", "", "null", "false", "false"
+   "zookeeper.client.port", "The port on which ZooKeeper is accepting client connections. Required if Hadoop Configuration Files are not provided.", "", "null", "false", "false"
+   "zookeeper.znode.parent", "The ZooKeeper ZNode Parent value for HBase (example: /hbase). Required if Hadoop Configuration Files are not provided.", "", "null", "false", "false"
+   "hbase.client.retries", "The number of times the HBase client will retry connecting. Required if Hadoop Configuration Files are not provided.", "", "3", "false", "false"
+   "phoenix.client.jar.location", "The full path to the Phoenix client JAR. Required if Phoenix is installed on top of HBase.", "", "null", "false", "**true**"
+
+Dynamic Properties
+__________________
+Dynamic Properties allow the user to specify both the name and value of a property.
+
+.. csv-table:: dynamic-properties
+   :header: "Name","Value","Description","Allowable Values","Default Value","EL"
+   :widths: 20,20,40,40,20,10
+   :escape: \
+
+   "The name of an HBase configuration property.", "The value of the given HBase configuration property.", "These properties will be set on the HBase configuration after loading any provided configuration files.", "", "null", false
+
+Extra informations
+__________________
+No additional information is provided
+
+----------
+
+.. _com.hurence.logisland.service.influxdb.InfluxDBControllerService: 
+
+InfluxDBControllerService
+-------------------------
+Provides a controller service that for the moment only allows to bulkput records into influxdb.
+
+Module
+______
+com.hurence.logisland:logisland-service-influxdb-client:1.2.0
+
+Class
+_____
+com.hurence.logisland.service.influxdb.InfluxDBControllerService
+
+Tags
+____
+influxdb, service, time series
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
+
+.. csv-table:: allowable-values
+   :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+   :widths: 20,60,30,20,10,10
+   :escape: \
+
+   "**influxdb.url**", "InfluxDB connection url", "", "null", "false", "false"
+   "influxdb.user", "The user name to use for authentication.", "", "null", "false", "false"
+   "**influxdb.database**", "InfluxDB database name", "", "null", "false", "false"
+   "influxdb.password", "The user password to use for authentication.", "", "null", "false", "false"
+   "influxdb.tags", "List of tags for each supported measurement.  Syntax: :[,]...[;:,[]]... Example: cpu:core1,core2;mem:used : in this example, the cpu measurement has 2 tags: core1 and core2 and the mem measurement has 1 tag: used. This must only be set if configuration mode is explicit_tags_and_fields or all_as_fields_but_explicit_tags.", "", "null", "false", "false"
+   "influxdb.fields", "List of fields for each supported measurement.  Syntax: :[,]...[;:,[]]... Example: cpu:core1,core2;mem:used : in this example, the cpu measurement has 2 fields: core1 and core2 and the mem measurement has 1 field: used. This must only be set if configuration mode is explicit_tags_and_fields or all_as_tags_but_explicit_fields.", "", "null", "false", "false"
+   "**influxdb.configuration_mode**", "Determines the way fields and tags are chosen from the logisland record. Possible values and meaning: explicit_tags_and_fields: only logisland record fields listed in influxdb.tags and influxdb.fields will be inserted into InfluxDB with the explicit type. all_as_fields: all available logisland record fields will be inserted into  InfluxDB as fields. all_as_tags_but_explicit_fields: all available logisland record fields will be inserted into  InfluxDB as tags except those listed in influxdb.fields that will be inserted into InfluxDB as fields. all_as_fields_but_explicit_tags: all available logisland record fields will be inserted into  InfluxDB as fields except those listed in influxdb.tags that will be inserted into InfluxDB as tags", "explicit_tags_and_fields, all_as_fields, all_as_fields_but_explicit_tags, all_as_tags_but_explicit_fields", "null", "false", "false"
+   "influxdb.consistency_level", "Determines the consistency level used to write points into InfluxDB. Possible values are: ANY, ONE, QUORUMand ALL. Default value is ANY. This is only useful when  using a clustered InfluxDB infrastructure.", "ANY, ONE, QUORUM, ALL", "ANY", "false", "false"
+   "influxdb.retention_policy", "Determines the name of the retention policy to use. Defaults to autogen. The defined retention policy must already be defined in the InfluxDB server.", "", "autogen", "false", "false"
+   "influxdb.timefield", "Time field for each supported measurement.  Syntax: :,...[;:,]... With format being any constant defined in  java.util.concurrent.TimeUnit enum: DAYS, HOURS, MICROSECONDS, MILLISECONDS, MINUTES, NANOSECONDS or SECONDS. Example: cpu:time,NANOSECONDS;mem:timeStamp,MILLISECONDS In this example: for the cpu measurement, the time for the influx DB point matching the record will be the value of the time field that represents nanoseconds. For the mem measurement, the time for the influx DB point matching the record will be the value of the timeStamp field that represents milliseconds.  Any measurement for which the time field is not defined will use the content of the record_time technical field as the time (which is a number of milliseconds since epoch).", "", "null", "false", "false"
+   "batch.size", "The preferred number of Records to setField to the database in a single transaction", "", "1000", "false", "false"
+   "flush.interval", "flush interval in ms", "", "500", "false", "false"
+
+Extra informations
+__________________
+No additional information is provided
+
+----------
+
+.. _com.hurence.logisland.service.cache.LRUKeyValueCacheService: 
+
+LRUKeyValueCacheService
+-----------------------
+A controller service for caching data by key value pair with LRU (last recently used) strategy. using LinkedHashMap
+
+Module
+______
+com.hurence.logisland:logisland-service-inmemory-cache:1.2.0
+
+Class
+_____
+com.hurence.logisland.service.cache.LRUKeyValueCacheService
+
+Tags
+____
+cache, service, key, value, pair, LRU
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
+
+.. csv-table:: allowable-values
+   :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+   :widths: 20,60,30,20,10,10
+   :escape: \
+
+   "cache.size", "The maximum number of element in the cache.", "", "16384", "false", "false"
+
+Extra informations
+__________________
+No additional information is provided
+
+----------
+
+.. _com.hurence.logisland.service.mongodb.MongoDBControllerService: 
+
+MongoDBControllerService
+------------------------
+Provides a controller service that wraps most of the functionality of the MongoDB driver.
+
+Module
+______
+com.hurence.logisland:logisland-service-mongodb-client:1.2.0
+
+Class
+_____
+com.hurence.logisland.service.mongodb.MongoDBControllerService
+
+Tags
+____
+mongo, mongodb, service
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values, and whether a property supports the  `Expression Language `_ .
+
+.. csv-table:: allowable-values
+   :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+   :widths: 20,60,30,20,10,10
+   :escape: \
+
+   "**mongo.uri**", "MongoURI, typically of the form: mongodb://host1[:port1][,host2[:port2],...]", "", "null", "false", "**true**"
+   "**mongo.db.name**", "The name of the database to use", "", "null", "false", "**true**"
+   "**mongo.collection.name**", "The name of the collection to use", "", "null", "false", "**true**"
+   "batch.size", "The preferred number of Records to setField to the database in a single transaction", "", "1000", "false", "false"
+   "bulk.size", "bulk size in MB", "", "5", "false", "false"
+   "mongo.bulk.mode", "Bulk mode (insert or upsert)", "insert (Insert records whose key must be unique), upsert (Insert records if not already existing or update the record if already existing)", "insert", "false", "false"
+   "flush.interval", "flush interval in ms", "", "500", "false", "false"
+   "**mongo.write.concern**", "The write concern to use", "ACKNOWLEDGED, UNACKNOWLEDGED, FSYNCED, JOURNALED, REPLICA_ACKNOWLEDGED, MAJORITY", "ACKNOWLEDGED", "false", "false"
+   "mongo.bulk.upsert.condition", "A custom condition for the bulk upsert (Filter for the bulkwrite). If not specified the standard condition is to match same id ('_id': data._id)", "", "${'{ \"_id\" :\"' + record_id + '\"}'}", "false", "**true**"
+
+Extra informations
+__________________
+No additional information is provided
+
+----------
+
+.. _com.hurence.logisland.redis.service.RedisKeyValueCacheService: 
+
+RedisKeyValueCacheService
+-------------------------
+A controller service for caching records by key value pair with LRU (last recently used) strategy. using LinkedHashMap
+
+Module
+______
+com.hurence.logisland:logisland-service-redis:1.2.0
+
+Class
+_____
+com.hurence.logisland.redis.service.RedisKeyValueCacheService
+
+Tags
+____
+cache, service, key, value, pair, redis
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values, and whether a property is considered "sensitive"..
+
+.. csv-table:: allowable-values
+   :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+   :widths: 20,60,30,20,10,10
+   :escape: \
+
+   "**redis.mode**", "The type of Redis being communicated with - standalone, sentinel, or clustered.", "standalone (A single standalone Redis instance.), sentinel (Redis Sentinel which provides high-availability. Described further at https://redis.io/topics/sentinel), cluster (Clustered Redis which provides sharding and replication. Described further at https://redis.io/topics/cluster-spec)", "standalone", "false", "false"
+   "**connection.string**", "The connection string for Redis. In a standalone instance this value will be of the form hostname:port. In a sentinel instance this value will be the comma-separated list of sentinels, such as host1:port1,host2:port2,host3:port3. In a clustered instance this value will be the comma-separated list of cluster masters, such as host1:port,host2:port,host3:port.", "", "null", "false", "false"
+   "**database.index**", "The database index to be used by connections created from this connection pool. See the databases property in redis.conf, by default databases 0-15 will be available.", "", "0", "false", "false"
+   "**communication.timeout**", "The timeout to use when attempting to communicate with Redis.", "", "10 seconds", "false", "false"
+   "**cluster.max.redirects**", "The maximum number of redirects that can be performed when clustered.", "", "5", "false", "false"
+   "sentinel.master", "The name of the sentinel master, require when Mode is set to Sentinel", "", "null", "false", "false"
+   "password", "The password used to authenticate to the Redis server. See the requirepass property in redis.conf.", "", "null", "**true**", "false"
+   "**pool.max.total**", "The maximum number of connections that can be allocated by the pool (checked out to clients, or idle awaiting checkout). A negative value indicates that there is no limit.", "", "8", "false", "false"
+   "**pool.max.idle**", "The maximum number of idle connections that can be held in the pool, or a negative value if there is no limit.", "", "8", "false", "false"
+   "**pool.min.idle**", "The target for the minimum number of idle connections to maintain in the pool. If the configured value of Min Idle is greater than the configured value for Max Idle, then the value of Max Idle will be used instead.", "", "0", "false", "false"
+   "**pool.block.when.exhausted**", "Whether or not clients should block and wait when trying to obtain a connection from the pool when the pool has no available connections. Setting this to false means an error will occur immediately when a client requests a connection and none are available.", "true, false", "true", "false", "false"
+   "**pool.max.wait.time**", "The amount of time to wait for an available connection when Block When Exhausted is set to true.", "", "10 seconds", "false", "false"
+   "**pool.min.evictable.idle.time**", "The minimum amount of time an object may sit idle in the pool before it is eligible for eviction.", "", "60 seconds", "false", "false"
+   "**pool.time.between.eviction.runs**", "The amount of time between attempting to evict idle connections from the pool.", "", "30 seconds", "false", "false"
+   "**pool.num.tests.per.eviction.run**", "The number of connections to tests per eviction attempt. A negative value indicates to test all connections.", "", "-1", "false", "false"
+   "**pool.test.on.create**", "Whether or not connections should be tested upon creation.", "true, false", "false", "false", "false"
+   "**pool.test.on.borrow**", "Whether or not connections should be tested upon borrowing from the pool.", "true, false", "false", "false", "false"
+   "**pool.test.on.return**", "Whether or not connections should be tested upon returning to the pool.", "true, false", "false", "false", "false"
+   "**pool.test.while.idle**", "Whether or not connections should be tested while idle.", "true, false", "true", "false", "false"
+   "**record.recordSerializer**", "the way to serialize/deserialize the record", "com.hurence.logisland.serializer.KryoSerializer (serialize events as json blocs), com.hurence.logisland.serializer.JsonSerializer (serialize events as json blocs), com.hurence.logisland.serializer.AvroSerializer (serialize events as avro blocs), com.hurence.logisland.serializer.BytesArraySerializer (serialize events as byte arrays), com.hurence.logisland.serializer.KuraProtobufSerializer (serialize events as Kura protocol buffer), none (send events as bytes)", "com.hurence.logisland.serializer.JsonSerializer", "false", "false"
+
+Extra informations
+__________________
+No additional information is provided
+
+----------
+
+.. _com.hurence.logisland.service.solr.Solr_6_6_2_ClientService: 
+
+Solr_6_6_2_ClientService
+------------------------
+Implementation of ElasticsearchClientService for Solr 5.5.5.
+
+Module
+______
+com.hurence.logisland:logisland-service-solr_6_6_2-client:1.2.0
+
+Class
+_____
+com.hurence.logisland.service.solr.Solr_6_6_2_ClientService
+
+Tags
+____
+solr, client
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
+
+.. csv-table:: allowable-values
+   :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+   :widths: 20,60,30,20,10,10
+   :escape: \
+
+   "batch.size", "The preferred number of Records to setField to the database in a single transaction", "", "1000", "false", "false"
+   "bulk.size", "bulk size in MB", "", "5", "false", "false"
+   "**solr.cloud**", "is slor cloud enabled", "", "false", "false", "false"
+   "**solr.collection**", "name of the collection to use", "", "null", "false", "false"
+   "**solr.connection.string**", "zookeeper quorum host1:2181,host2:2181 for solr cloud or http address of a solr core ", "", "localhost:8983/solr", "false", "false"
+   "solr.concurrent.requests", "setConcurrentRequests", "", "2", "false", "false"
+   "flush.interval", "flush interval in ms", "", "500", "false", "false"
+   "schema.update_timeout", "Schema update timeout interval in s", "", "15", "false", "false"
+
+Extra informations
+__________________
+No additional information is provided
+
+----------
+
+.. _com.hurence.logisland.service.elasticsearch.Elasticsearch_7_x_ClientService: 
+
+Elasticsearch_7_x_ClientService
+-------------------------------
+Implementation of ElasticsearchClientService for ElasticSearch 7.x. Note that although Elasticsearch 7.x still accepts type information, this implementation will ignore any type usage and will only work at the index level to be already compliant with the ElasticSearch 8.x version that will completely remove type usage.
+
+Module
+______
+com.hurence.logisland:logisland-service-elasticsearch_7_x-client:1.2.0
+
+Class
+_____
+com.hurence.logisland.service.elasticsearch.Elasticsearch_7_x_ClientService
+
+Tags
+____
+elasticsearch, client
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values, and whether a property is considered "sensitive"..
+
+.. csv-table:: allowable-values
+   :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+   :widths: 20,60,30,20,10,10
+   :escape: \
+
+   "**backoff.policy**", "strategy for retrying to execute requests in bulkRequest", "noBackoff (when a request fail there won't be any retry.), constantBackoff (wait a fixed amount of time between retries, using user put retry number and throttling delay), exponentialBackoff (time waited between retries grow exponentially, using user put retry number and throttling delay), defaultExponentialBackoff (time waited between retries grow exponentially, using es default parameters)", "defaultExponentialBackoff", "false", "false"
+   "**throttling.delay**", "number of time we should wait between each retry (in milliseconds)", "", "500", "false", "false"
+   "**num.retry**", "number of time we should try to inject a bulk into es", "", "3", "false", "false"
+   "batch.size", "The preferred number of Records to setField to the database in a single transaction", "", "1000", "false", "false"
+   "bulk.size", "bulk size in MB", "", "5", "false", "false"
+   "flush.interval", "flush interval in sec", "", "5", "false", "false"
+   "concurrent.requests", "setConcurrentRequests", "", "2", "false", "false"
+   "**ping.timeout**", "The ping timeout used to determine when a node is unreachable. For example, 5s (5 seconds). If non-local recommended is 30s", "", "5s", "false", "false"
+   "**sampler.interval**", "How often to sample / ping the nodes listed and connected. For example, 5s (5 seconds). If non-local recommended is 30s.", "", "5s", "false", "false"
+   "username", "Username to access the Elasticsearch cluster", "", "null", "false", "false"
+   "password", "Password to access the Elasticsearch cluster", "", "null", "**true**", "false"
+   "shield.location", "Specifies the path to the JAR for the Elasticsearch Shield plugin. If the Elasticsearch cluster has been secured with the Shield plugin, then the Shield plugin JAR must also be available to this processor. Note: Do NOT place the Shield JAR into NiFi's lib/ directory, doing so will prevent the Shield plugin from being loaded.", "", "null", "false", "false"
+   "**hosts**", "ElasticSearch Hosts, which should be comma separated and colon for hostname/port host1:port,host2:port,....  For example testcluster:9300.", "", "null", "false", "false"
+   "ssl.context.service", "The SSL Context Service used to provide client certificate information for TLS/SSL connections. This service only applies if the Shield plugin is available.", "", "null", "false", "false"
+   "**charset**", "Specifies the character set of the document data.", "", "UTF-8", "false", "false"
+
+Extra informations
+__________________
+No additional information is provided
+
+----------
+
+.. _com.hurence.logisland.service.solr.Solr8ClientService: 
+
+Solr8ClientService
+------------------
+Implementation of SolrClientService for Solr 8
+
+Module
+______
+com.hurence.logisland:logisland-service-solr_8-client:1.2.0
+
+Class
+_____
+com.hurence.logisland.service.solr.Solr8ClientService
+
+Tags
+____
+solr, client
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
+
+.. csv-table:: allowable-values
+   :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+   :widths: 20,60,30,20,10,10
+   :escape: \
+
+   "batch.size", "The preferred number of Records to setField to the database in a single transaction", "", "1000", "false", "false"
+   "bulk.size", "bulk size in MB", "", "5", "false", "false"
+   "**solr.cloud**", "is slor cloud enabled", "", "false", "false", "false"
+   "**solr.collection**", "name of the collection to use", "", "null", "false", "false"
+   "**solr.connection.string**", "zookeeper quorum host1:2181,host2:2181 for solr cloud or http address of a solr core ", "", "localhost:8983/solr", "false", "false"
+   "solr.concurrent.requests", "setConcurrentRequests", "", "2", "false", "false"
+   "flush.interval", "flush interval in ms", "", "500", "false", "false"
+   "schema.update_timeout", "Schema update timeout interval in s", "", "15", "false", "false"
+
+Extra informations
+__________________
+No additional information is provided
+
+----------
+
+.. _com.hurence.logisland.service.iptogeo.maxmind.MaxmindIpToGeoService: 
+
+MaxmindIpToGeoService
+---------------------
+Implementation of the IP 2 GEO Service using maxmind lite db file
+
+Module
+______
+com.hurence.logisland:logisland-service-ip-to-geo-maxmind:1.2.0
+
+Class
+_____
+com.hurence.logisland.service.iptogeo.maxmind.MaxmindIpToGeoService
+
+Tags
+____
+ip, service, geo, maxmind
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
+
+.. csv-table:: allowable-values
+   :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+   :widths: 20,60,30,20,10,10
+   :escape: \
+
+   "maxmind.database.uri", "Path to the Maxmind Geo Enrichment Database File.", "", "null", "false", "false"
+   "maxmind.database.path", "Local Path to the Maxmind Geo Enrichment Database File.", "", "null", "false", "false"
+   "locale", "Locale to use for geo information. Defaults to 'en'.", "", "en", "false", "false"
+   "lookup.time", "Should the additional lookup_micros field be returned or not.", "", "false", "false", "false"
+
+Extra informations
+__________________
+No additional information is provided
+
+----------
+
+.. _com.hurence.logisland.service.cache.CSVKeyValueCacheService: 
+
+CSVKeyValueCacheService
+-----------------------
+A cache that store csv lines as records loaded from a file
+
+Module
+______
+com.hurence.logisland:logisland-service-inmemory-cache:1.2.0
+
+Class
+_____
+com.hurence.logisland.service.cache.CSVKeyValueCacheService
+
+Tags
+____
+csv, service, cache
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
+
+.. csv-table:: allowable-values
+   :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+   :widths: 20,60,30,20,10,10
+   :escape: \
+
+   "**csv.format**", "a configuration for loading csv", "default (Standard comma separated format, as for RFC4180 but allowing empty lines. Settings are: withDelimiter(',') withQuote('\"') withRecordSeparator(\"\r\n\") withIgnoreEmptyLines(true)), excel (Excel file format (using a comma as the value delimiter). Note that the actual value delimiter used by Excel is locale dependent, it might be necessary to customize this format to accommodate to your regional settings. withDelimiter(',')  withQuote('\"') withRecordSeparator(\"\r\n\") withIgnoreEmptyLines(false) withAllowMissingColumnNames(true)), excel_fr (Excel file format (using a comma as the value delimiter). Note that the actual value delimiter used by Excel is locale dependent, it might be necessary to customize this format to accommodate to your regional settings. withDelimiter(';')  withQuote('\"') withRecordSeparator(\"\r\n\") withIgnoreEmptyLines(false) withAllowMissingColumnNames(true)), mysql (Default MySQL format used by the SELECT INTO OUTFILE and LOAD DATA INFILE operations.This is a tab-delimited format with a LF character as the line separator. Values are not quoted and special characters are escaped with '\'. The default NULL string is \"\\N\". Settings are: withDelimiter('\t') withQuote(null) withRecordSeparator('\n') withIgnoreEmptyLines(false) withEscape('\\') withNullString(\"\\N\") withQuoteMode(QuoteMode.ALL_NON_NULL)), rfc4180 (Comma separated format as defined by RFC 4180. Settings are: withDelimiter(',') withQuote('\"') withRecordSeparator(\"\r\n\") withIgnoreEmptyLines(false)), tdf (Tab-delimited format. Settings are: withDelimiter('\t') withQuote('\"') withRecordSeparator(\"\r\n\") withIgnoreSurroundingSpaces(true))", "default", "false", "false"
+   "csv.header", "comma separated header values", "", "null", "false", "false"
+   "csv.file.uri", "Path to the CSV File.", "", "null", "false", "false"
+   "csv.file.path", "Local Path to the CSV File.", "", "null", "false", "false"
+   "**row.key**", "th primary key of this db", "", "null", "false", "false"
+   "cache.size", "The maximum number of element in the cache.", "", "16384", "false", "false"
+   "first.line.header", "csv headers grabbed from first line", "", "null", "false", "false"
+   "encoding.charset", "charset", "", "UTF-8", "false", "false"
+
+Extra informations
+__________________
+No additional information is provided
+
+----------
+
+.. _com.hurence.logisland.service.cassandra.CassandraControllerService: 
+
+CassandraControllerService
+--------------------------
+Provides a controller service that for the moment only allows to bulkput records into cassandra.
+
+Module
+______
+com.hurence.logisland:logisland-service-cassandra-client:1.2.0
+
+Class
+_____
+com.hurence.logisland.service.cassandra.CassandraControllerService
+
+Tags
+____
+cassandra, service
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
+
+.. csv-table:: allowable-values
+   :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+   :widths: 20,60,30,20,10,10
+   :escape: \
+
+   "**cassandra.hosts**", "Cassandra cluster hosts as a comma separated value list", "", "null", "false", "false"
+   "**cassandra.port**", "Cassandra cluster port", "", "null", "false", "false"
+   "cassandra.with-ssl", "If this property is true, use SSL. Default is no SSL (false).", "", "false", "false", "false"
+   "cassandra.with-credentials", "If this property is true, use credentials. Default is no credentials (false).", "", "false", "false", "false"
+   "cassandra.credentials.user", "The user name to use for authentication. cassandra.with-credentials must be true for that property to be used.", "", "null", "false", "false"
+   "cassandra.credentials.password", "The user password to use for authentication. cassandra.with-credentials must be true for that property to be used.", "", "null", "false", "false"
+   "batch.size", "The preferred number of Records to setField to the database in a single transaction", "", "1000", "false", "false"
+   "flush.interval", "flush interval in ms", "", "500", "false", "false"
+
+Extra informations
+__________________
+No additional information is provided
+
+----------
+
+.. _com.hurence.logisland.service.elasticsearch.Elasticsearch_6_6_2_ClientService: 
+
+Elasticsearch_6_6_2_ClientService
+---------------------------------
+Implementation of ElasticsearchClientService for Elasticsearch 6.6.2.
+
+Module
+______
+com.hurence.logisland:logisland-service-elasticsearch_6_6_2-client:1.2.0
+
+Class
+_____
+com.hurence.logisland.service.elasticsearch.Elasticsearch_6_6_2_ClientService
+
+Tags
+____
+elasticsearch, client
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values, and whether a property is considered "sensitive"..
+
+.. csv-table:: allowable-values
+   :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+   :widths: 20,60,30,20,10,10
+   :escape: \
+
+   "**backoff.policy**", "strategy for retrying to execute requests in bulkRequest", "noBackoff (when a request fail there won't be any retry.), constantBackoff (wait a fixed amount of time between retries, using user put retry number and throttling delay), exponentialBackoff (time waited between retries grow exponentially, using user put retry number and throttling delay), defaultExponentialBackoff (time waited between retries grow exponentially, using es default parameters)", "defaultExponentialBackoff", "false", "false"
+   "**throttling.delay**", "number of time we should wait between each retry (in milliseconds)", "", "500", "false", "false"
+   "**num.retry**", "number of time we should try to inject a bulk into es", "", "3", "false", "false"
+   "batch.size", "The preferred number of Records to setField to the database in a single transaction", "", "1000", "false", "false"
+   "bulk.size", "bulk size in MB", "", "5", "false", "false"
+   "flush.interval", "flush interval in sec", "", "5", "false", "false"
+   "concurrent.requests", "setConcurrentRequests", "", "2", "false", "false"
+   "**ping.timeout**", "The ping timeout used to determine when a node is unreachable. For example, 5s (5 seconds). If non-local recommended is 30s", "", "5s", "false", "false"
+   "**sampler.interval**", "How often to sample / ping the nodes listed and connected. For example, 5s (5 seconds). If non-local recommended is 30s.", "", "5s", "false", "false"
+   "username", "Username to access the Elasticsearch cluster", "", "null", "false", "false"
+   "password", "Password to access the Elasticsearch cluster", "", "null", "**true**", "false"
+   "shield.location", "Specifies the path to the JAR for the Elasticsearch Shield plugin. If the Elasticsearch cluster has been secured with the Shield plugin, then the Shield plugin JAR must also be available to this processor. Note: Do NOT place the Shield JAR into NiFi's lib/ directory, doing so will prevent the Shield plugin from being loaded.", "", "null", "false", "false"
+   "**hosts**", "ElasticSearch Hosts, which should be comma separated and colon for hostname/port host1:port,host2:port,....  For example testcluster:9300.", "", "null", "false", "false"
+   "ssl.context.service", "The SSL Context Service used to provide client certificate information for TLS/SSL connections. This service only applies if the Shield plugin is available.", "", "null", "false", "false"
+   "**charset**", "Specifies the character set of the document data.", "", "UTF-8", "false", "false"
+
+Extra informations
+__________________
+No additional information is provided
+
+----------
+
+.. _com.hurence.logisland.service.hbase.HBase_1_1_2_ClientService: 
+
+HBase_1_1_2_ClientService
+-------------------------
+Implementation of HBaseClientService for HBase 1.1.2. This service can be configured by providing a comma-separated list of configuration files, or by specifying values for the other properties. If configuration files are provided, they will be loaded first, and the values of the additional properties will override the values from the configuration files. In addition, any user defined properties on the processor will also be passed to the HBase configuration.
+
+Module
+______
+com.hurence.logisland:logisland-service-hbase_1_1_2-client:1.2.0
+
+Class
+_____
+com.hurence.logisland.service.hbase.HBase_1_1_2_ClientService
+
+Tags
+____
+hbase, client
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values, and whether a property supports the  `Expression Language `_ .
+
+.. csv-table:: allowable-values
+   :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+   :widths: 20,60,30,20,10,10
+   :escape: \
+
+   "hadoop.configuration.files", "Comma-separated list of Hadoop Configuration files, such as hbase-site.xml and core-site.xml for kerberos, including full paths to the files.", "", "null", "false", "false"
+   "zookeeper.quorum", "Comma-separated list of ZooKeeper hosts for HBase. Required if Hadoop Configuration Files are not provided.", "", "null", "false", "false"
+   "zookeeper.client.port", "The port on which ZooKeeper is accepting client connections. Required if Hadoop Configuration Files are not provided.", "", "null", "false", "false"
+   "zookeeper.znode.parent", "The ZooKeeper ZNode Parent value for HBase (example: /hbase). Required if Hadoop Configuration Files are not provided.", "", "null", "false", "false"
+   "hbase.client.retries", "The number of times the HBase client will retry connecting. Required if Hadoop Configuration Files are not provided.", "", "3", "false", "false"
+   "phoenix.client.jar.location", "The full path to the Phoenix client JAR. Required if Phoenix is installed on top of HBase.", "", "null", "false", "**true**"
+
+Dynamic Properties
+__________________
+Dynamic Properties allow the user to specify both the name and value of a property.
+
+.. csv-table:: dynamic-properties
+   :header: "Name","Value","Description","Allowable Values","Default Value","EL"
+   :widths: 20,20,40,40,20,10
+   :escape: \
+
+   "The name of an HBase configuration property.", "The value of the given HBase configuration property.", "These properties will be set on the HBase configuration after loading any provided configuration files.", "", "null", false
+
+Extra informations
+__________________
+No additional information is provided
+
+----------
+
+.. _com.hurence.logisland.service.influxdb.InfluxDBControllerService: 
+
+InfluxDBControllerService
+-------------------------
+Provides a controller service that for the moment only allows to bulkput records into influxdb.
+
+Module
+______
+com.hurence.logisland:logisland-service-influxdb-client:1.2.0
+
+Class
+_____
+com.hurence.logisland.service.influxdb.InfluxDBControllerService
+
+Tags
+____
+influxdb, service, time series
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
+
+.. csv-table:: allowable-values
+   :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+   :widths: 20,60,30,20,10,10
+   :escape: \
+
+   "**influxdb.url**", "InfluxDB connection url", "", "null", "false", "false"
+   "influxdb.user", "The user name to use for authentication.", "", "null", "false", "false"
+   "**influxdb.database**", "InfluxDB database name", "", "null", "false", "false"
+   "influxdb.password", "The user password to use for authentication.", "", "null", "false", "false"
+   "influxdb.tags", "List of tags for each supported measurement.  Syntax: :[,]...[;:,[]]... Example: cpu:core1,core2;mem:used : in this example, the cpu measurement has 2 tags: core1 and core2 and the mem measurement has 1 tag: used. This must only be set if configuration mode is explicit_tags_and_fields or all_as_fields_but_explicit_tags.", "", "null", "false", "false"
+   "influxdb.fields", "List of fields for each supported measurement.  Syntax: :[,]...[;:,[]]... Example: cpu:core1,core2;mem:used : in this example, the cpu measurement has 2 fields: core1 and core2 and the mem measurement has 1 field: used. This must only be set if configuration mode is explicit_tags_and_fields or all_as_tags_but_explicit_fields.", "", "null", "false", "false"
+   "**influxdb.configuration_mode**", "Determines the way fields and tags are chosen from the logisland record. Possible values and meaning: explicit_tags_and_fields: only logisland record fields listed in influxdb.tags and influxdb.fields will be inserted into InfluxDB with the explicit type. all_as_fields: all available logisland record fields will be inserted into  InfluxDB as fields. all_as_tags_but_explicit_fields: all available logisland record fields will be inserted into  InfluxDB as tags except those listed in influxdb.fields that will be inserted into InfluxDB as fields. all_as_fields_but_explicit_tags: all available logisland record fields will be inserted into  InfluxDB as fields except those listed in influxdb.tags that will be inserted into InfluxDB as tags", "explicit_tags_and_fields, all_as_fields, all_as_fields_but_explicit_tags, all_as_tags_but_explicit_fields", "null", "false", "false"
+   "influxdb.consistency_level", "Determines the consistency level used to write points into InfluxDB. Possible values are: ANY, ONE, QUORUMand ALL. Default value is ANY. This is only useful when  using a clustered InfluxDB infrastructure.", "ANY, ONE, QUORUM, ALL", "ANY", "false", "false"
+   "influxdb.retention_policy", "Determines the name of the retention policy to use. Defaults to autogen. The defined retention policy must already be defined in the InfluxDB server.", "", "autogen", "false", "false"
+   "influxdb.timefield", "Time field for each supported measurement.  Syntax: :,...[;:,]... With format being any constant defined in  java.util.concurrent.TimeUnit enum: DAYS, HOURS, MICROSECONDS, MILLISECONDS, MINUTES, NANOSECONDS or SECONDS. Example: cpu:time,NANOSECONDS;mem:timeStamp,MILLISECONDS In this example: for the cpu measurement, the time for the influx DB point matching the record will be the value of the time field that represents nanoseconds. For the mem measurement, the time for the influx DB point matching the record will be the value of the timeStamp field that represents milliseconds.  Any measurement for which the time field is not defined will use the content of the record_time technical field as the time (which is a number of milliseconds since epoch).", "", "null", "false", "false"
+   "batch.size", "The preferred number of Records to setField to the database in a single transaction", "", "1000", "false", "false"
+   "flush.interval", "flush interval in ms", "", "500", "false", "false"
+
+Extra informations
+__________________
+No additional information is provided
+
+----------
+
+.. _com.hurence.logisland.service.cache.LRUKeyValueCacheService: 
+
+LRUKeyValueCacheService
+-----------------------
+A controller service for caching data by key value pair with LRU (last recently used) strategy. using LinkedHashMap
+
+Module
+______
+com.hurence.logisland:logisland-service-inmemory-cache:1.2.0
+
+Class
+_____
+com.hurence.logisland.service.cache.LRUKeyValueCacheService
+
+Tags
+____
+cache, service, key, value, pair, LRU
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
+
+.. csv-table:: allowable-values
+   :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+   :widths: 20,60,30,20,10,10
+   :escape: \
+
+   "cache.size", "The maximum number of element in the cache.", "", "16384", "false", "false"
+
+Extra informations
+__________________
+No additional information is provided
+
+----------
+
+.. _com.hurence.logisland.service.mongodb.MongoDBControllerService: 
+
+MongoDBControllerService
+------------------------
+Provides a controller service that wraps most of the functionality of the MongoDB driver.
+
+Module
+______
+com.hurence.logisland:logisland-service-mongodb-client:1.2.0
+
+Class
+_____
+com.hurence.logisland.service.mongodb.MongoDBControllerService
+
+Tags
+____
+mongo, mongodb, service
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values, and whether a property supports the  `Expression Language `_ .
+
+.. csv-table:: allowable-values
+   :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+   :widths: 20,60,30,20,10,10
+   :escape: \
+
+   "**mongo.uri**", "MongoURI, typically of the form: mongodb://host1[:port1][,host2[:port2],...]", "", "null", "false", "**true**"
+   "**mongo.db.name**", "The name of the database to use", "", "null", "false", "**true**"
+   "**mongo.collection.name**", "The name of the collection to use", "", "null", "false", "**true**"
+   "batch.size", "The preferred number of Records to setField to the database in a single transaction", "", "1000", "false", "false"
+   "bulk.size", "bulk size in MB", "", "5", "false", "false"
+   "mongo.bulk.mode", "Bulk mode (insert or upsert)", "insert (Insert records whose key must be unique), upsert (Insert records if not already existing or update the record if already existing)", "insert", "false", "false"
+   "flush.interval", "flush interval in ms", "", "500", "false", "false"
+   "**mongo.write.concern**", "The write concern to use", "ACKNOWLEDGED, UNACKNOWLEDGED, FSYNCED, JOURNALED, REPLICA_ACKNOWLEDGED, MAJORITY", "ACKNOWLEDGED", "false", "false"
+   "mongo.bulk.upsert.condition", "A custom condition for the bulk upsert (Filter for the bulkwrite). If not specified the standard condition is to match same id ('_id': data._id)", "", "${'{ \"_id\" :\"' + record_id + '\"}'}", "false", "**true**"
+
+Extra informations
+__________________
+No additional information is provided
+
+----------
+
+.. _com.hurence.logisland.redis.service.RedisKeyValueCacheService: 
+
+RedisKeyValueCacheService
+-------------------------
+A controller service for caching records by key value pair with LRU (last recently used) strategy. using LinkedHashMap
+
+Module
+______
+com.hurence.logisland:logisland-service-redis:1.2.0
+
+Class
+_____
+com.hurence.logisland.redis.service.RedisKeyValueCacheService
+
+Tags
+____
+cache, service, key, value, pair, redis
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values, and whether a property is considered "sensitive"..
+
+.. csv-table:: allowable-values
+   :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+   :widths: 20,60,30,20,10,10
+   :escape: \
+
+   "**redis.mode**", "The type of Redis being communicated with - standalone, sentinel, or clustered.", "standalone (A single standalone Redis instance.), sentinel (Redis Sentinel which provides high-availability. Described further at https://redis.io/topics/sentinel), cluster (Clustered Redis which provides sharding and replication. Described further at https://redis.io/topics/cluster-spec)", "standalone", "false", "false"
+   "**connection.string**", "The connection string for Redis. In a standalone instance this value will be of the form hostname:port. In a sentinel instance this value will be the comma-separated list of sentinels, such as host1:port1,host2:port2,host3:port3. In a clustered instance this value will be the comma-separated list of cluster masters, such as host1:port,host2:port,host3:port.", "", "null", "false", "false"
+   "**database.index**", "The database index to be used by connections created from this connection pool. See the databases property in redis.conf, by default databases 0-15 will be available.", "", "0", "false", "false"
+   "**communication.timeout**", "The timeout to use when attempting to communicate with Redis.", "", "10 seconds", "false", "false"
+   "**cluster.max.redirects**", "The maximum number of redirects that can be performed when clustered.", "", "5", "false", "false"
+   "sentinel.master", "The name of the sentinel master, require when Mode is set to Sentinel", "", "null", "false", "false"
+   "password", "The password used to authenticate to the Redis server. See the requirepass property in redis.conf.", "", "null", "**true**", "false"
+   "**pool.max.total**", "The maximum number of connections that can be allocated by the pool (checked out to clients, or idle awaiting checkout). A negative value indicates that there is no limit.", "", "8", "false", "false"
+   "**pool.max.idle**", "The maximum number of idle connections that can be held in the pool, or a negative value if there is no limit.", "", "8", "false", "false"
+   "**pool.min.idle**", "The target for the minimum number of idle connections to maintain in the pool. If the configured value of Min Idle is greater than the configured value for Max Idle, then the value of Max Idle will be used instead.", "", "0", "false", "false"
+   "**pool.block.when.exhausted**", "Whether or not clients should block and wait when trying to obtain a connection from the pool when the pool has no available connections. Setting this to false means an error will occur immediately when a client requests a connection and none are available.", "true, false", "true", "false", "false"
+   "**pool.max.wait.time**", "The amount of time to wait for an available connection when Block When Exhausted is set to true.", "", "10 seconds", "false", "false"
+   "**pool.min.evictable.idle.time**", "The minimum amount of time an object may sit idle in the pool before it is eligible for eviction.", "", "60 seconds", "false", "false"
+   "**pool.time.between.eviction.runs**", "The amount of time between attempting to evict idle connections from the pool.", "", "30 seconds", "false", "false"
+   "**pool.num.tests.per.eviction.run**", "The number of connections to tests per eviction attempt. A negative value indicates to test all connections.", "", "-1", "false", "false"
+   "**pool.test.on.create**", "Whether or not connections should be tested upon creation.", "true, false", "false", "false", "false"
+   "**pool.test.on.borrow**", "Whether or not connections should be tested upon borrowing from the pool.", "true, false", "false", "false", "false"
+   "**pool.test.on.return**", "Whether or not connections should be tested upon returning to the pool.", "true, false", "false", "false", "false"
+   "**pool.test.while.idle**", "Whether or not connections should be tested while idle.", "true, false", "true", "false", "false"
+   "**record.recordSerializer**", "the way to serialize/deserialize the record", "com.hurence.logisland.serializer.KryoSerializer (serialize events as json blocs), com.hurence.logisland.serializer.JsonSerializer (serialize events as json blocs), com.hurence.logisland.serializer.AvroSerializer (serialize events as avro blocs), com.hurence.logisland.serializer.BytesArraySerializer (serialize events as byte arrays), com.hurence.logisland.serializer.KuraProtobufSerializer (serialize events as Kura protocol buffer), none (send events as bytes)", "com.hurence.logisland.serializer.JsonSerializer", "false", "false"
+
+Extra informations
+__________________
+No additional information is provided
+
+----------
+
+.. _com.hurence.logisland.service.solr.Solr_6_6_2_ClientService: 
+
+Solr_6_6_2_ClientService
+------------------------
+Implementation of ElasticsearchClientService for Solr 5.5.5.
+
+Module
+______
+com.hurence.logisland:logisland-service-solr_6_6_2-client:1.2.0
+
+Class
+_____
+com.hurence.logisland.service.solr.Solr_6_6_2_ClientService
+
+Tags
+____
+solr, client
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
+
+.. csv-table:: allowable-values
+   :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+   :widths: 20,60,30,20,10,10
+   :escape: \
+
+   "batch.size", "The preferred number of Records to setField to the database in a single transaction", "", "1000", "false", "false"
+   "bulk.size", "bulk size in MB", "", "5", "false", "false"
+   "**solr.cloud**", "is slor cloud enabled", "", "false", "false", "false"
+   "**solr.collection**", "name of the collection to use", "", "null", "false", "false"
+   "**solr.connection.string**", "zookeeper quorum host1:2181,host2:2181 for solr cloud or http address of a solr core ", "", "localhost:8983/solr", "false", "false"
+   "solr.concurrent.requests", "setConcurrentRequests", "", "2", "false", "false"
+   "flush.interval", "flush interval in ms", "", "500", "false", "false"
+   "schema.update_timeout", "Schema update timeout interval in s", "", "15", "false", "false"
+
+Extra informations
+__________________
+No additional information is provided
+
+----------
+
+.. _com.hurence.logisland.service.elasticsearch.Elasticsearch_7_x_ClientService: 
+
+Elasticsearch_7_x_ClientService
+-------------------------------
+Implementation of ElasticsearchClientService for ElasticSearch 7.x. Note that although Elasticsearch 7.x still accepts type information, this implementation will ignore any type usage and will only work at the index level to be already compliant with the ElasticSearch 8.x version that will completely remove type usage.
+
+Module
+______
+com.hurence.logisland:logisland-service-elasticsearch_7_x-client:1.2.0
+
+Class
+_____
+com.hurence.logisland.service.elasticsearch.Elasticsearch_7_x_ClientService
+
+Tags
+____
+elasticsearch, client
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values, and whether a property is considered "sensitive"..
+
+.. csv-table:: allowable-values
+   :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+   :widths: 20,60,30,20,10,10
+   :escape: \
+
+   "**backoff.policy**", "strategy for retrying to execute requests in bulkRequest", "noBackoff (when a request fail there won't be any retry.), constantBackoff (wait a fixed amount of time between retries, using user put retry number and throttling delay), exponentialBackoff (time waited between retries grow exponentially, using user put retry number and throttling delay), defaultExponentialBackoff (time waited between retries grow exponentially, using es default parameters)", "defaultExponentialBackoff", "false", "false"
+   "**throttling.delay**", "number of time we should wait between each retry (in milliseconds)", "", "500", "false", "false"
+   "**num.retry**", "number of time we should try to inject a bulk into es", "", "3", "false", "false"
+   "batch.size", "The preferred number of Records to setField to the database in a single transaction", "", "1000", "false", "false"
+   "bulk.size", "bulk size in MB", "", "5", "false", "false"
+   "flush.interval", "flush interval in sec", "", "5", "false", "false"
+   "concurrent.requests", "setConcurrentRequests", "", "2", "false", "false"
+   "**ping.timeout**", "The ping timeout used to determine when a node is unreachable. For example, 5s (5 seconds). If non-local recommended is 30s", "", "5s", "false", "false"
+   "**sampler.interval**", "How often to sample / ping the nodes listed and connected. For example, 5s (5 seconds). If non-local recommended is 30s.", "", "5s", "false", "false"
+   "username", "Username to access the Elasticsearch cluster", "", "null", "false", "false"
+   "password", "Password to access the Elasticsearch cluster", "", "null", "**true**", "false"
+   "enable.ssl", "Whether to enable (true) TLS/SSL connections or not (false). This can for instance be used with opendistro. Defaults to false. Note that the current implementation does try to validate the server certificate.", "", "false", "false", "false"
+   "shield.location", "Specifies the path to the JAR for the Elasticsearch Shield plugin. If the Elasticsearch cluster has been secured with the Shield plugin, then the Shield plugin JAR must also be available to this processor. Note: Do NOT place the Shield JAR into NiFi's lib/ directory, doing so will prevent the Shield plugin from being loaded.", "", "null", "false", "false"
+   "**hosts**", "ElasticSearch Hosts, which should be comma separated and colon for hostname/port host1:port,host2:port,....  For example testcluster:9300.", "", "null", "false", "false"
+   "ssl.context.service", "The SSL Context Service used to provide client certificate information for TLS/SSL connections. This service only applies if the Shield plugin is available.", "", "null", "false", "false"
+   "**charset**", "Specifies the character set of the document data.", "", "UTF-8", "false", "false"
+
+Extra informations
+__________________
+No additional information is provided
+
+----------
+
+.. _com.hurence.logisland.service.solr.Solr8ClientService: 
+
+Solr8ClientService
+------------------
+Implementation of SolrClientService for Solr 8
+
+Module
+______
+com.hurence.logisland:logisland-service-solr_8-client:1.2.0
+
+Class
+_____
+com.hurence.logisland.service.solr.Solr8ClientService
+
+Tags
+____
+solr, client
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
+
+.. csv-table:: allowable-values
+   :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+   :widths: 20,60,30,20,10,10
+   :escape: \
+
+   "batch.size", "The preferred number of Records to setField to the database in a single transaction", "", "1000", "false", "false"
+   "bulk.size", "bulk size in MB", "", "5", "false", "false"
+   "**solr.cloud**", "is slor cloud enabled", "", "false", "false", "false"
+   "**solr.collection**", "name of the collection to use", "", "null", "false", "false"
+   "**solr.connection.string**", "zookeeper quorum host1:2181,host2:2181 for solr cloud or http address of a solr core ", "", "localhost:8983/solr", "false", "false"
+   "solr.concurrent.requests", "setConcurrentRequests", "", "2", "false", "false"
+   "flush.interval", "flush interval in ms", "", "500", "false", "false"
+   "schema.update_timeout", "Schema update timeout interval in s", "", "15", "false", "false"
+
+Extra informations
+__________________
+No additional information is provided
+
+----------
+
+.. _com.hurence.logisland.service.iptogeo.maxmind.MaxmindIpToGeoService: 
+
+MaxmindIpToGeoService
+---------------------
+Implementation of the IP 2 GEO Service using maxmind lite db file
+
+Module
+______
+com.hurence.logisland:logisland-service-ip-to-geo-maxmind:1.2.0
+
+Class
+_____
+com.hurence.logisland.service.iptogeo.maxmind.MaxmindIpToGeoService
+
+Tags
+____
+ip, service, geo, maxmind
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
+
+.. csv-table:: allowable-values
+   :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+   :widths: 20,60,30,20,10,10
+   :escape: \
+
+   "maxmind.database.uri", "Path to the Maxmind Geo Enrichment Database File.", "", "null", "false", "false"
+   "maxmind.database.path", "Local Path to the Maxmind Geo Enrichment Database File.", "", "null", "false", "false"
+   "locale", "Locale to use for geo information. Defaults to 'en'.", "", "en", "false", "false"
+   "lookup.time", "Should the additional lookup_micros field be returned or not.", "", "false", "false", "false"
+
+Extra informations
+__________________
+No additional information is provided
+
+----------
+
+.. _com.hurence.logisland.service.cache.CSVKeyValueCacheService: 
+
+CSVKeyValueCacheService
+-----------------------
+A cache that store csv lines as records loaded from a file
+
+Module
+______
+com.hurence.logisland:logisland-service-inmemory-cache:1.2.0
+
+Class
+_____
+com.hurence.logisland.service.cache.CSVKeyValueCacheService
+
+Tags
+____
+csv, service, cache
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
+
+.. csv-table:: allowable-values
+   :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+   :widths: 20,60,30,20,10,10
+   :escape: \
+
+   "**csv.format**", "a configuration for loading csv", "default (Standard comma separated format, as for RFC4180 but allowing empty lines. Settings are: withDelimiter(',') withQuote('\"') withRecordSeparator(\"\r\n\") withIgnoreEmptyLines(true)), excel (Excel file format (using a comma as the value delimiter). Note that the actual value delimiter used by Excel is locale dependent, it might be necessary to customize this format to accommodate to your regional settings. withDelimiter(',')  withQuote('\"') withRecordSeparator(\"\r\n\") withIgnoreEmptyLines(false) withAllowMissingColumnNames(true)), excel_fr (Excel file format (using a comma as the value delimiter). Note that the actual value delimiter used by Excel is locale dependent, it might be necessary to customize this format to accommodate to your regional settings. withDelimiter(';')  withQuote('\"') withRecordSeparator(\"\r\n\") withIgnoreEmptyLines(false) withAllowMissingColumnNames(true)), mysql (Default MySQL format used by the SELECT INTO OUTFILE and LOAD DATA INFILE operations.This is a tab-delimited format with a LF character as the line separator. Values are not quoted and special characters are escaped with '\'. The default NULL string is \"\\N\". Settings are: withDelimiter('\t') withQuote(null) withRecordSeparator('\n') withIgnoreEmptyLines(false) withEscape('\\') withNullString(\"\\N\") withQuoteMode(QuoteMode.ALL_NON_NULL)), rfc4180 (Comma separated format as defined by RFC 4180. Settings are: withDelimiter(',') withQuote('\"') withRecordSeparator(\"\r\n\") withIgnoreEmptyLines(false)), tdf (Tab-delimited format. Settings are: withDelimiter('\t') withQuote('\"') withRecordSeparator(\"\r\n\") withIgnoreSurroundingSpaces(true))", "default", "false", "false"
+   "csv.header", "comma separated header values", "", "null", "false", "false"
+   "csv.file.uri", "Path to the CSV File.", "", "null", "false", "false"
+   "csv.file.path", "Local Path to the CSV File.", "", "null", "false", "false"
+   "**row.key**", "th primary key of this db", "", "null", "false", "false"
+   "cache.size", "The maximum number of element in the cache.", "", "16384", "false", "false"
+   "first.line.header", "csv headers grabbed from first line", "", "null", "false", "false"
+   "encoding.charset", "charset", "", "UTF-8", "false", "false"
+
+Extra informations
+__________________
+No additional information is provided
+
+----------
+
+.. _com.hurence.logisland.service.cassandra.CassandraControllerService: 
+
+CassandraControllerService
+--------------------------
+Provides a controller service that for the moment only allows to bulkput records into cassandra.
+
+Module
+______
+com.hurence.logisland:logisland-service-cassandra-client:1.2.0
+
+Class
+_____
+com.hurence.logisland.service.cassandra.CassandraControllerService
+
+Tags
+____
+cassandra, service
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
+
+.. csv-table:: allowable-values
+   :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+   :widths: 20,60,30,20,10,10
+   :escape: \
+
+   "**cassandra.hosts**", "Cassandra cluster hosts as a comma separated value list", "", "null", "false", "false"
+   "**cassandra.port**", "Cassandra cluster port", "", "null", "false", "false"
+   "cassandra.with-ssl", "If this property is true, use SSL. Default is no SSL (false).", "", "false", "false", "false"
+   "cassandra.with-credentials", "If this property is true, use credentials. Default is no credentials (false).", "", "false", "false", "false"
+   "cassandra.credentials.user", "The user name to use for authentication. cassandra.with-credentials must be true for that property to be used.", "", "null", "false", "false"
+   "cassandra.credentials.password", "The user password to use for authentication. cassandra.with-credentials must be true for that property to be used.", "", "null", "false", "false"
+   "batch.size", "The preferred number of Records to setField to the database in a single transaction", "", "1000", "false", "false"
+   "flush.interval", "flush interval in ms", "", "500", "false", "false"
+
+Extra informations
+__________________
+No additional information is provided
+
+----------
+
+.. _com.hurence.logisland.service.elasticsearch.Elasticsearch_6_6_2_ClientService: 
+
+Elasticsearch_6_6_2_ClientService
+---------------------------------
+Implementation of ElasticsearchClientService for Elasticsearch 6.6.2.
+
+Module
+______
+com.hurence.logisland:logisland-service-elasticsearch_6_6_2-client:1.2.0
+
+Class
+_____
+com.hurence.logisland.service.elasticsearch.Elasticsearch_6_6_2_ClientService
+
+Tags
+____
+elasticsearch, client
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values, and whether a property is considered "sensitive"..
+
+.. csv-table:: allowable-values
+   :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+   :widths: 20,60,30,20,10,10
+   :escape: \
+
+   "**backoff.policy**", "strategy for retrying to execute requests in bulkRequest", "noBackoff (when a request fail there won't be any retry.), constantBackoff (wait a fixed amount of time between retries, using user put retry number and throttling delay), exponentialBackoff (time waited between retries grow exponentially, using user put retry number and throttling delay), defaultExponentialBackoff (time waited between retries grow exponentially, using es default parameters)", "defaultExponentialBackoff", "false", "false"
+   "**throttling.delay**", "number of time we should wait between each retry (in milliseconds)", "", "500", "false", "false"
+   "**num.retry**", "number of time we should try to inject a bulk into es", "", "3", "false", "false"
+   "batch.size", "The preferred number of Records to setField to the database in a single transaction", "", "1000", "false", "false"
+   "bulk.size", "bulk size in MB", "", "5", "false", "false"
+   "flush.interval", "flush interval in sec", "", "5", "false", "false"
+   "concurrent.requests", "setConcurrentRequests", "", "2", "false", "false"
+   "**ping.timeout**", "The ping timeout used to determine when a node is unreachable. For example, 5s (5 seconds). If non-local recommended is 30s", "", "5s", "false", "false"
+   "**sampler.interval**", "How often to sample / ping the nodes listed and connected. For example, 5s (5 seconds). If non-local recommended is 30s.", "", "5s", "false", "false"
+   "username", "Username to access the Elasticsearch cluster", "", "null", "false", "false"
+   "password", "Password to access the Elasticsearch cluster", "", "null", "**true**", "false"
+   "shield.location", "Specifies the path to the JAR for the Elasticsearch Shield plugin. If the Elasticsearch cluster has been secured with the Shield plugin, then the Shield plugin JAR must also be available to this processor. Note: Do NOT place the Shield JAR into NiFi's lib/ directory, doing so will prevent the Shield plugin from being loaded.", "", "null", "false", "false"
+   "**hosts**", "ElasticSearch Hosts, which should be comma separated and colon for hostname/port host1:port,host2:port,....  For example testcluster:9300.", "", "null", "false", "false"
+   "ssl.context.service", "The SSL Context Service used to provide client certificate information for TLS/SSL connections. This service only applies if the Shield plugin is available.", "", "null", "false", "false"
+   "**charset**", "Specifies the character set of the document data.", "", "UTF-8", "false", "false"
+
+Extra informations
+__________________
+No additional information is provided
+
+----------
+
+.. _com.hurence.logisland.service.hbase.HBase_1_1_2_ClientService: 
+
+HBase_1_1_2_ClientService
+-------------------------
+Implementation of HBaseClientService for HBase 1.1.2. This service can be configured by providing a comma-separated list of configuration files, or by specifying values for the other properties. If configuration files are provided, they will be loaded first, and the values of the additional properties will override the values from the configuration files. In addition, any user defined properties on the processor will also be passed to the HBase configuration.
+
+Module
+______
+com.hurence.logisland:logisland-service-hbase_1_1_2-client:1.2.0
+
+Class
+_____
+com.hurence.logisland.service.hbase.HBase_1_1_2_ClientService
+
+Tags
+____
+hbase, client
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values, and whether a property supports the  `Expression Language `_ .
+
+.. csv-table:: allowable-values
+   :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+   :widths: 20,60,30,20,10,10
+   :escape: \
+
+   "hadoop.configuration.files", "Comma-separated list of Hadoop Configuration files, such as hbase-site.xml and core-site.xml for kerberos, including full paths to the files.", "", "null", "false", "false"
+   "zookeeper.quorum", "Comma-separated list of ZooKeeper hosts for HBase. Required if Hadoop Configuration Files are not provided.", "", "null", "false", "false"
+   "zookeeper.client.port", "The port on which ZooKeeper is accepting client connections. Required if Hadoop Configuration Files are not provided.", "", "null", "false", "false"
+   "zookeeper.znode.parent", "The ZooKeeper ZNode Parent value for HBase (example: /hbase). Required if Hadoop Configuration Files are not provided.", "", "null", "false", "false"
+   "hbase.client.retries", "The number of times the HBase client will retry connecting. Required if Hadoop Configuration Files are not provided.", "", "3", "false", "false"
+   "phoenix.client.jar.location", "The full path to the Phoenix client JAR. Required if Phoenix is installed on top of HBase.", "", "null", "false", "**true**"
+
+Dynamic Properties
+__________________
+Dynamic Properties allow the user to specify both the name and value of a property.
+
+.. csv-table:: dynamic-properties
+   :header: "Name","Value","Description","Allowable Values","Default Value","EL"
+   :widths: 20,20,40,40,20,10
+   :escape: \
+
+   "The name of an HBase configuration property.", "The value of the given HBase configuration property.", "These properties will be set on the HBase configuration after loading any provided configuration files.", "", "null", false
+
+Extra informations
+__________________
+No additional information is provided
+
+----------
+
+.. _com.hurence.logisland.service.influxdb.InfluxDBControllerService: 
+
+InfluxDBControllerService
+-------------------------
+Provides a controller service that for the moment only allows to bulkput records into influxdb.
+
+Module
+______
+com.hurence.logisland:logisland-service-influxdb-client:1.2.0
+
+Class
+_____
+com.hurence.logisland.service.influxdb.InfluxDBControllerService
+
+Tags
+____
+influxdb, service, time series
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
+
+.. csv-table:: allowable-values
+   :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+   :widths: 20,60,30,20,10,10
+   :escape: \
+
+   "**influxdb.url**", "InfluxDB connection url", "", "null", "false", "false"
+   "influxdb.user", "The user name to use for authentication.", "", "null", "false", "false"
+   "**influxdb.database**", "InfluxDB database name", "", "null", "false", "false"
+   "influxdb.password", "The user password to use for authentication.", "", "null", "false", "false"
+   "influxdb.tags", "List of tags for each supported measurement.  Syntax: :[,]...[;:,[]]... Example: cpu:core1,core2;mem:used : in this example, the cpu measurement has 2 tags: core1 and core2 and the mem measurement has 1 tag: used. This must only be set if configuration mode is explicit_tags_and_fields or all_as_fields_but_explicit_tags.", "", "null", "false", "false"
+   "influxdb.fields", "List of fields for each supported measurement.  Syntax: :[,]...[;:,[]]... Example: cpu:core1,core2;mem:used : in this example, the cpu measurement has 2 fields: core1 and core2 and the mem measurement has 1 field: used. This must only be set if configuration mode is explicit_tags_and_fields or all_as_tags_but_explicit_fields.", "", "null", "false", "false"
+   "**influxdb.configuration_mode**", "Determines the way fields and tags are chosen from the logisland record. Possible values and meaning: explicit_tags_and_fields: only logisland record fields listed in influxdb.tags and influxdb.fields will be inserted into InfluxDB with the explicit type. all_as_fields: all available logisland record fields will be inserted into  InfluxDB as fields. all_as_tags_but_explicit_fields: all available logisland record fields will be inserted into  InfluxDB as tags except those listed in influxdb.fields that will be inserted into InfluxDB as fields. all_as_fields_but_explicit_tags: all available logisland record fields will be inserted into  InfluxDB as fields except those listed in influxdb.tags that will be inserted into InfluxDB as tags", "explicit_tags_and_fields, all_as_fields, all_as_fields_but_explicit_tags, all_as_tags_but_explicit_fields", "null", "false", "false"
+   "influxdb.consistency_level", "Determines the consistency level used to write points into InfluxDB. Possible values are: ANY, ONE, QUORUMand ALL. Default value is ANY. This is only useful when  using a clustered InfluxDB infrastructure.", "ANY, ONE, QUORUM, ALL", "ANY", "false", "false"
+   "influxdb.retention_policy", "Determines the name of the retention policy to use. Defaults to autogen. The defined retention policy must already be defined in the InfluxDB server.", "", "autogen", "false", "false"
+   "influxdb.timefield", "Time field for each supported measurement.  Syntax: :,...[;:,]... With format being any constant defined in  java.util.concurrent.TimeUnit enum: DAYS, HOURS, MICROSECONDS, MILLISECONDS, MINUTES, NANOSECONDS or SECONDS. Example: cpu:time,NANOSECONDS;mem:timeStamp,MILLISECONDS In this example: for the cpu measurement, the time for the influx DB point matching the record will be the value of the time field that represents nanoseconds. For the mem measurement, the time for the influx DB point matching the record will be the value of the timeStamp field that represents milliseconds.  Any measurement for which the time field is not defined will use the content of the record_time technical field as the time (which is a number of milliseconds since epoch).", "", "null", "false", "false"
+   "batch.size", "The preferred number of Records to setField to the database in a single transaction", "", "1000", "false", "false"
+   "flush.interval", "flush interval in ms", "", "500", "false", "false"
+
+Extra informations
+__________________
+No additional information is provided
+
+----------
+
+.. _com.hurence.logisland.service.cache.LRUKeyValueCacheService: 
+
+LRUKeyValueCacheService
+-----------------------
+A controller service for caching data by key value pair with LRU (last recently used) strategy. using LinkedHashMap
+
+Module
+______
+com.hurence.logisland:logisland-service-inmemory-cache:1.2.0
+
+Class
+_____
+com.hurence.logisland.service.cache.LRUKeyValueCacheService
+
+Tags
+____
+cache, service, key, value, pair, LRU
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
+
+.. csv-table:: allowable-values
+   :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+   :widths: 20,60,30,20,10,10
+   :escape: \
+
+   "cache.size", "The maximum number of element in the cache.", "", "16384", "false", "false"
+
+Extra informations
+__________________
+No additional information is provided
+
+----------
+
+.. _com.hurence.logisland.service.mongodb.MongoDBControllerService: 
+
+MongoDBControllerService
+------------------------
+Provides a controller service that wraps most of the functionality of the MongoDB driver.
+
+Module
+______
+com.hurence.logisland:logisland-service-mongodb-client:1.2.0
+
+Class
+_____
+com.hurence.logisland.service.mongodb.MongoDBControllerService
+
+Tags
+____
+mongo, mongodb, service
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values, and whether a property supports the  `Expression Language `_ .
+
+.. csv-table:: allowable-values
+   :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+   :widths: 20,60,30,20,10,10
+   :escape: \
+
+   "**mongo.uri**", "MongoURI, typically of the form: mongodb://host1[:port1][,host2[:port2],...]", "", "null", "false", "**true**"
+   "**mongo.db.name**", "The name of the database to use", "", "null", "false", "**true**"
+   "**mongo.collection.name**", "The name of the collection to use", "", "null", "false", "**true**"
+   "batch.size", "The preferred number of Records to setField to the database in a single transaction", "", "1000", "false", "false"
+   "bulk.size", "bulk size in MB", "", "5", "false", "false"
+   "mongo.bulk.mode", "Bulk mode (insert or upsert)", "insert (Insert records whose key must be unique), upsert (Insert records if not already existing or update the record if already existing)", "insert", "false", "false"
+   "flush.interval", "flush interval in ms", "", "500", "false", "false"
+   "**mongo.write.concern**", "The write concern to use", "ACKNOWLEDGED, UNACKNOWLEDGED, FSYNCED, JOURNALED, REPLICA_ACKNOWLEDGED, MAJORITY", "ACKNOWLEDGED", "false", "false"
+   "mongo.bulk.upsert.condition", "A custom condition for the bulk upsert (Filter for the bulkwrite). If not specified the standard condition is to match same id ('_id': data._id)", "", "${'{ \"_id\" :\"' + record_id + '\"}'}", "false", "**true**"
+
+Extra informations
+__________________
+No additional information is provided
+
+----------
+
+.. _com.hurence.logisland.redis.service.RedisKeyValueCacheService: 
+
+RedisKeyValueCacheService
+-------------------------
+A controller service for caching records by key value pair with LRU (last recently used) strategy. using LinkedHashMap
+
+Module
+______
+com.hurence.logisland:logisland-service-redis:1.2.0
+
+Class
+_____
+com.hurence.logisland.redis.service.RedisKeyValueCacheService
+
+Tags
+____
+cache, service, key, value, pair, redis
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values, and whether a property is considered "sensitive"..
+
+.. csv-table:: allowable-values
+   :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+   :widths: 20,60,30,20,10,10
+   :escape: \
+
+   "**redis.mode**", "The type of Redis being communicated with - standalone, sentinel, or clustered.", "standalone (A single standalone Redis instance.), sentinel (Redis Sentinel which provides high-availability. Described further at https://redis.io/topics/sentinel), cluster (Clustered Redis which provides sharding and replication. Described further at https://redis.io/topics/cluster-spec)", "standalone", "false", "false"
+   "**connection.string**", "The connection string for Redis. In a standalone instance this value will be of the form hostname:port. In a sentinel instance this value will be the comma-separated list of sentinels, such as host1:port1,host2:port2,host3:port3. In a clustered instance this value will be the comma-separated list of cluster masters, such as host1:port,host2:port,host3:port.", "", "null", "false", "false"
+   "**database.index**", "The database index to be used by connections created from this connection pool. See the databases property in redis.conf, by default databases 0-15 will be available.", "", "0", "false", "false"
+   "**communication.timeout**", "The timeout to use when attempting to communicate with Redis.", "", "10 seconds", "false", "false"
+   "**cluster.max.redirects**", "The maximum number of redirects that can be performed when clustered.", "", "5", "false", "false"
+   "sentinel.master", "The name of the sentinel master, require when Mode is set to Sentinel", "", "null", "false", "false"
+   "password", "The password used to authenticate to the Redis server. See the requirepass property in redis.conf.", "", "null", "**true**", "false"
+   "**pool.max.total**", "The maximum number of connections that can be allocated by the pool (checked out to clients, or idle awaiting checkout). A negative value indicates that there is no limit.", "", "8", "false", "false"
+   "**pool.max.idle**", "The maximum number of idle connections that can be held in the pool, or a negative value if there is no limit.", "", "8", "false", "false"
+   "**pool.min.idle**", "The target for the minimum number of idle connections to maintain in the pool. If the configured value of Min Idle is greater than the configured value for Max Idle, then the value of Max Idle will be used instead.", "", "0", "false", "false"
+   "**pool.block.when.exhausted**", "Whether or not clients should block and wait when trying to obtain a connection from the pool when the pool has no available connections. Setting this to false means an error will occur immediately when a client requests a connection and none are available.", "true, false", "true", "false", "false"
+   "**pool.max.wait.time**", "The amount of time to wait for an available connection when Block When Exhausted is set to true.", "", "10 seconds", "false", "false"
+   "**pool.min.evictable.idle.time**", "The minimum amount of time an object may sit idle in the pool before it is eligible for eviction.", "", "60 seconds", "false", "false"
+   "**pool.time.between.eviction.runs**", "The amount of time between attempting to evict idle connections from the pool.", "", "30 seconds", "false", "false"
+   "**pool.num.tests.per.eviction.run**", "The number of connections to tests per eviction attempt. A negative value indicates to test all connections.", "", "-1", "false", "false"
+   "**pool.test.on.create**", "Whether or not connections should be tested upon creation.", "true, false", "false", "false", "false"
+   "**pool.test.on.borrow**", "Whether or not connections should be tested upon borrowing from the pool.", "true, false", "false", "false", "false"
+   "**pool.test.on.return**", "Whether or not connections should be tested upon returning to the pool.", "true, false", "false", "false", "false"
+   "**pool.test.while.idle**", "Whether or not connections should be tested while idle.", "true, false", "true", "false", "false"
+   "**record.recordSerializer**", "the way to serialize/deserialize the record", "com.hurence.logisland.serializer.KryoSerializer (serialize events as json blocs), com.hurence.logisland.serializer.JsonSerializer (serialize events as json blocs), com.hurence.logisland.serializer.AvroSerializer (serialize events as avro blocs), com.hurence.logisland.serializer.BytesArraySerializer (serialize events as byte arrays), com.hurence.logisland.serializer.KuraProtobufSerializer (serialize events as Kura protocol buffer), none (send events as bytes)", "com.hurence.logisland.serializer.JsonSerializer", "false", "false"
+
+Extra informations
+__________________
+No additional information is provided
+
+----------
+
+.. _com.hurence.logisland.service.solr.Solr_6_6_2_ClientService: 
+
+Solr_6_6_2_ClientService
+------------------------
+Implementation of ElasticsearchClientService for Solr 5.5.5.
+
+Module
+______
+com.hurence.logisland:logisland-service-solr_6_6_2-client:1.2.0
+
+Class
+_____
+com.hurence.logisland.service.solr.Solr_6_6_2_ClientService
+
+Tags
+____
+solr, client
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
+
+.. csv-table:: allowable-values
+   :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+   :widths: 20,60,30,20,10,10
+   :escape: \
+
+   "batch.size", "The preferred number of Records to setField to the database in a single transaction", "", "1000", "false", "false"
+   "bulk.size", "bulk size in MB", "", "5", "false", "false"
+   "**solr.cloud**", "is slor cloud enabled", "", "false", "false", "false"
+   "**solr.collection**", "name of the collection to use", "", "null", "false", "false"
+   "**solr.connection.string**", "zookeeper quorum host1:2181,host2:2181 for solr cloud or http address of a solr core ", "", "localhost:8983/solr", "false", "false"
+   "solr.concurrent.requests", "setConcurrentRequests", "", "2", "false", "false"
+   "flush.interval", "flush interval in ms", "", "500", "false", "false"
+   "schema.update_timeout", "Schema update timeout interval in s", "", "15", "false", "false"
+
+Extra informations
+__________________
+No additional information is provided
+
+----------
+
+.. _com.hurence.logisland.service.elasticsearch.Elasticsearch_7_x_ClientService: 
+
+Elasticsearch_7_x_ClientService
+-------------------------------
+Implementation of ElasticsearchClientService for ElasticSearch 7.x. Note that although Elasticsearch 7.x still accepts type information, this implementation will ignore any type usage and will only work at the index level to be already compliant with the ElasticSearch 8.x version that will completely remove type usage.
+
+Module
+______
+com.hurence.logisland:logisland-service-elasticsearch_7_x-client:1.2.0
+
+Class
+_____
+com.hurence.logisland.service.elasticsearch.Elasticsearch_7_x_ClientService
+
+Tags
+____
+elasticsearch, client
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values, and whether a property is considered "sensitive"..
+
+.. csv-table:: allowable-values
+   :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+   :widths: 20,60,30,20,10,10
+   :escape: \
+
+   "**backoff.policy**", "strategy for retrying to execute requests in bulkRequest", "noBackoff (when a request fail there won't be any retry.), constantBackoff (wait a fixed amount of time between retries, using user put retry number and throttling delay), exponentialBackoff (time waited between retries grow exponentially, using user put retry number and throttling delay), defaultExponentialBackoff (time waited between retries grow exponentially, using es default parameters)", "defaultExponentialBackoff", "false", "false"
+   "**throttling.delay**", "number of time we should wait between each retry (in milliseconds)", "", "500", "false", "false"
+   "**num.retry**", "number of time we should try to inject a bulk into es", "", "3", "false", "false"
+   "batch.size", "The preferred number of Records to setField to the database in a single transaction", "", "1000", "false", "false"
+   "bulk.size", "bulk size in MB", "", "5", "false", "false"
+   "flush.interval", "flush interval in sec", "", "5", "false", "false"
+   "concurrent.requests", "setConcurrentRequests", "", "2", "false", "false"
+   "**ping.timeout**", "The ping timeout used to determine when a node is unreachable. For example, 5s (5 seconds). If non-local recommended is 30s", "", "5s", "false", "false"
+   "**sampler.interval**", "How often to sample / ping the nodes listed and connected. For example, 5s (5 seconds). If non-local recommended is 30s.", "", "5s", "false", "false"
+   "username", "Username to access the Elasticsearch cluster", "", "null", "false", "false"
+   "password", "Password to access the Elasticsearch cluster", "", "null", "**true**", "false"
+   "enable.ssl", "Whether to enable (true) TLS/SSL connections or not (false). This can for instance be used with opendistro. Defaults to false. Note that the current implementation does try to validate the server certificate.", "", "false", "false", "false"
+   "shield.location", "Specifies the path to the JAR for the Elasticsearch Shield plugin. If the Elasticsearch cluster has been secured with the Shield plugin, then the Shield plugin JAR must also be available to this processor. Note: Do NOT place the Shield JAR into NiFi's lib/ directory, doing so will prevent the Shield plugin from being loaded.", "", "null", "false", "false"
+   "**hosts**", "ElasticSearch Hosts, which should be comma separated and colon for hostname/port host1:port,host2:port,....  For example testcluster:9300.", "", "null", "false", "false"
+   "ssl.context.service", "The SSL Context Service used to provide client certificate information for TLS/SSL connections. This service only applies if the Shield plugin is available.", "", "null", "false", "false"
+   "**charset**", "Specifies the character set of the document data.", "", "UTF-8", "false", "false"
+
+Extra informations
+__________________
+No additional information is provided
+
+----------
+
+.. _com.hurence.logisland.service.solr.Solr8ClientService: 
+
+Solr8ClientService
+------------------
+Implementation of SolrClientService for Solr 8
+
+Module
+______
+com.hurence.logisland:logisland-service-solr_8-client:1.2.0
+
+Class
+_____
+com.hurence.logisland.service.solr.Solr8ClientService
+
+Tags
+____
+solr, client
+
+Properties
+__________
+In the list below, the names of required properties appear in **bold**. Any other properties (not in bold) are considered optional. The table also indicates any default values.
+
+.. csv-table:: allowable-values
+   :header: "Name","Description","Allowable Values","Default Value","Sensitive","EL"
+   :widths: 20,60,30,20,10,10
+   :escape: \
+
+   "batch.size", "The preferred number of Records to setField to the database in a single transaction", "", "1000", "false", "false"
+   "bulk.size", "bulk size in MB", "", "5", "false", "false"
+   "**solr.cloud**", "is slor cloud enabled", "", "false", "false", "false"
+   "**solr.collection**", "name of the collection to use", "", "null", "false", "false"
+   "**solr.connection.string**", "zookeeper quorum host1:2181,host2:2181 for solr cloud or http address of a solr core ", "", "localhost:8983/solr", "false", "false"
+   "solr.concurrent.requests", "setConcurrentRequests", "", "2", "false", "false"
+   "flush.interval", "flush interval in ms", "", "500", "false", "false"
+   "schema.update_timeout", "Schema update timeout interval in s", "", "15", "false", "false"
+
+Extra informations
+__________________
+No additional information is provided
diff --git a/logisland-resources/src/main/resources/bin/logisland.sh b/logisland-resources/src/main/resources/bin/logisland.sh
index cbc50d8fd..47589b8c2 100755
--- a/logisland-resources/src/main/resources/bin/logisland.sh
+++ b/logisland-resources/src/main/resources/bin/logisland.sh
@@ -2,7 +2,6 @@
 
 #. $(dirname $0)/launcher.sh
 
-
 case "$(uname -s)" in
    Darwin)
      echo "I've detected that you're running Mac OS X, using greadlink instead of readlink"
@@ -58,6 +57,22 @@ initSparkJarsOptRecursively() {
     return 0;
 }
 
+# Create app classpath for spark standalone mode
+initSparkStandaloneClassPath() {
+    for entry in `ls ${1}/*.jar`
+    do
+      #echo "add spark standalone jar ${entry}"
+      if [[ -z "$spark_standalone_classpath" ]]
+        then
+          spark_standalone_classpath="$entry"
+        else
+          spark_standalone_classpath="$entry,$spark_standalone_classpath"
+        fi
+    done
+
+    echo $spark_standalone_classpath
+    return 0
+}
 
 # update $java_cp so that it contains all logisland jars except for engines.
 # we look for jars into specified dir recursively.
@@ -100,6 +115,7 @@ usage() {
   echo "  --conf  : provides the configuration file"
   echo "  --standalone start logisland in standalone mode (no spark required)"
   echo "  --spark-home : sets the SPARK_HOME (defaults to \$SPARK_HOME environment variable)"
+  echo "  --spark-standalone-dir : sets the base shared directory for logisland jars for spark standlone (experimental)"
   echo "  --help : displays help"
 }
 
@@ -167,6 +183,10 @@ parse_input() {
           SPARK_HOME="$2"
           shift
           ;;
+        --spark-standalone-dir)
+          SPARK_STANDALONE_DIR="$2"
+          shift
+          ;;
         --help)
           usage
           exit 0
@@ -252,7 +272,7 @@ main() {
 
     # ----------------------------------------------------------------
     # find the spark-submit mode
-    # can be either local, standalone, mesos or yarn
+    # can be either local, standalone, spark (standalone), mesos or yarn
     # ----------------------------------------------------------------
     if [[ "$STANDALONE" = true ]] ;
     then
@@ -271,19 +291,24 @@ main() {
             case $? in
                 2) engine_jar=`ls ${lib_dir}/engines/logisland-engine-spark_1_6-*.jar` ;;
                 *) compare_versions ${SPARK_VERSION} 2.3.0
-                    case $? in
-                        2) engine_jar=`ls ${lib_dir}/engines/logisland-engine-spark_2_1-*.jar` ;;
-                        *) engine_jar=`ls ${lib_dir}/engines/logisland-engine-spark_2_3-*.jar` ;;
-
+                   case $? in
+                       2) engine_jar=`ls ${lib_dir}/engines/logisland-engine-spark_2_1-*.jar` ;;
+                       0) engine_jar=`ls ${lib_dir}/engines/logisland-engine-spark_2_3-*.jar` ;;
+                       *) compare_versions ${SPARK_VERSION} 2.4.0
+                          case $? in
+                              2) engine_jar=`ls ${lib_dir}/engines/logisland-engine-spark_2_3-*.jar` ;;
+                              *) engine_jar=`ls ${lib_dir}/engines/logisland-engine-spark_2_4-*.jar` ;;
+                          esac
                     esac
-                   ;;
             esac
 
-
-
         export SPARK_PRINT_LAUNCH_COMMAND=1
         echo "Detected spark version ${SPARK_VERSION}. We'll automatically plug in engine jar ${engine_jar}"
         APP_NAME=`awk '{ if( $1 == "spark.app.name:" ){ print $2 } }' ${CONF_FILE}`
+
+        #
+        # YARN mode?
+        #
         MODE=`awk '{ if( $1 == "spark.master:" ){ print $2 } }' ${CONF_FILE}`
         case ${MODE} in
           "yarn")
@@ -303,31 +328,60 @@ main() {
               MODE=${MODE}-${EXTRA_MODE}
             fi
             ;;
-        esac
 
+        esac
 
+        #
+        # MESOS mode?
+        #
         if [[ "${MODE}" =~ "mesos" ]]
         then
             SPARK_MASTER=${MODE}
             MODE="mesos"
         fi
 
+        #
+        # Spark standalone mode?
+        #
+        if [[ "${MODE}" =~ ^spark://.* ]] # Starts with spark:// (spark standalone url)
+        then
+            if [[ -z "${SPARK_STANDALONE_DIR}" ]]
+            then
+             echo "Spark standalone mode requires --spark-standalone-dir option to be set"
+             exit 1
+            fi
 
+            SPARK_MASTER=${MODE}
+            EXTRA_MODE=`awk '{ if( $1 == "spark.deploy-mode:" ){ print $2 } }' ${CONF_FILE}`
+            if [[ -z "${EXTRA_MODE}" ]]
+            then
+             echo "The property \"spark.deploy-mode\" is missing in config file \"${CONF_FILE}\""
+             exit 1
+            fi
+
+            if [[ ! ${EXTRA_MODE} = "cluster" && ! ${EXTRA_MODE} = "client" ]]
+            then
+              echo "The property \"spark.deploy-mode\" value \"${EXTRA_MODE}\" is not supported"
+              exit 1
+            else
+              MODE=spark-${EXTRA_MODE}
+            fi
+        fi
 
         if [[ ! -z "${VERBOSE_OPTIONS}" ]]
         then
-          echo "Starting with mode \"${MODE}\" on master \"${SPARK_MASTER}\""
+          echo "Starting with run mode \"${MODE}\" on master \"${SPARK_MASTER}\""
         fi
 
         case ${MODE} in
           local*)
 
             ${SPARK_HOME}/bin/spark-submit ${VERBOSE_OPTIONS} ${YARN_CLUSTER_OPTIONS} \
-             --driver-library-path ${OPENCV_NATIVE_LIB_PATH} \
-             --conf spark.driver.extraJavaOptions="-agentlib:jdwp=transport=dt_socket,server=y,suspend=n,address=5005 -Dlog4j.configuration=\"file:${lib_dir}/../conf/log4j.properties\"" \
-             --conf spark.executor.extraJavaOptions="-Dlog4j.configuration=\"file:${lib_dir}/../conf/log4j.properties\"" \
-             --conf spark.metrics.namespace="${APP_NAME}"  \
-             --conf spark.metrics.conf="${lib_dir}/../monitoring/metrics.properties"  \
+            --driver-library-path ${OPENCV_NATIVE_LIB_PATH} \
+            --conf spark.driver.extraJavaOptions="-agentlib:jdwp=transport=dt_socket,server=y,suspend=n,address=5005 -Dlog4j.configuration=\"file:${lib_dir}/../conf/log4j.properties\"" \
+            --conf spark.executor.extraJavaOptions="-Dlog4j.configuration=\"file:${lib_dir}/../conf/log4j.properties\"" \
+            --conf spark.metrics.namespace="${APP_NAME}"  \
+            --conf spark.metrics.conf="${lib_dir}/../monitoring/metrics.properties"  \
             --class ${app_mainclass} \
             --jars ${app_classpath} ${engine_jar} \
             -conf ${CONF_FILE}
@@ -335,8 +389,7 @@ main() {
             ;;
           yarn-cluster)
 
-            YARN_CLUSTER_OPTIONS="--master yarn --deploy-mode cluster --files ${CONF_FILE}#logisland-configuration.yml,${CONF_DIR}/../monitoring/jmx_prometheus_javaagent-0.10.jar#jmx_prometheus_javaagent-0.10.jar,${CONF_DIR}/../monitoring/spark-prometheus.yml#spark-prometheus.yml,${CONF_DIR}/../monitoring/metrics.properties#metrics.properties,${CONF_DIR}/log4j.properties#log4j.properties --conf spark.metrics.namespace=\"${APP_NAME}\" --conf \"spark.executor.extraJavaOptions=-Dlog4j.configuration=log4j.properties\" --conf spark.ui.showConsoleProgress=false"
-
+            YARN_CLUSTER_OPTIONS="--master yarn --deploy-mode cluster --files ${CONF_FILE}#logisland-configuration.yml,${CONF_DIR}/../monitoring/jmx_prometheus_javaagent-0.10.jar#jmx_prometheus_javaagent-0.10.jar,${CONF_DIR}/../monitoring/spark-prometheus.yml#spark-prometheus.yml,${CONF_DIR}/../monitoring/metrics.properties#metrics.properties,${CONF_DIR}/log4j.properties#log4j.properties --conf spark.metrics.namespace=\"${APP_NAME}\" --conf spark.ui.showConsoleProgress=false"
 
             if [[ ! -z "${YARN_APP_NAME}" ]]
             then
@@ -415,7 +468,6 @@ main() {
                  YARN_CLUSTER_OPTIONS="${YARN_CLUSTER_OPTIONS} --conf spark.task.maxFailures=${SPARK_TASK_MAX_FAILURES}"
             fi
 
-
             PROPERTIES_FILE_PATH=`awk '{ if( $1 == "spark.properties.file.path:" ){ print $2 } }' ${CONF_FILE}`
             if [[ ! -z "${PROPERTIES_FILE_PATH}" ]]
             then
@@ -440,7 +492,7 @@ main() {
             --conf "${EXTRA_PROCESSOR_JAVA_OPTIONS}" \
             --class ${app_mainclass} \
             --jars ${app_classpath} ${engine_jar} \
-             -conf ${CONF_FILE}
+            -conf ${CONF_FILE}
 
             ;;
           yarn-client)
@@ -470,14 +522,13 @@ main() {
             --conf spark.metrics.conf=./metrics.properties \
             --class ${app_mainclass} \
             --jars ${app_classpath} ${engine_jar} \
-             -conf ${CONF_FILE}
+            -conf ${CONF_FILE}
             ;;
 
           mesos)
 
             MESOS_OPTIONS="--master ${SPARK_MASTER} --conf spark.metrics.namespace=\"${APP_NAME}\""
 
-
             DRIVER_CORES=`awk '{ if( $1 == "spark.driver.cores:" ){ print $2 } }' ${CONF_FILE}`
             if [[ ! -z "${DRIVER_CORES}" ]]
             then
@@ -517,12 +568,10 @@ main() {
 
             MESOS_NATIVE_JAVA_LIBRARY=`awk '{ if( $1 == "java.library.path:" ){ print $2 } }' ${CONF_FILE}`
 
-
-
             #--deploy-mode cluster \
             #--supervise \
             #--executor-memory 20G \
-           # --total-executor-cores 100 \
+            # --total-executor-cores 100 \
 
             export MESOS_NATIVE_JAVA_LIBRARY="${MESOS_NATIVE_JAVA_LIBRARY}"
             ${SPARK_HOME}/bin/spark-submit ${VERBOSE_OPTIONS} ${MESOS_OPTIONS} \
@@ -532,6 +581,142 @@ main() {
             -conf ${CONF_FILE}
             ;;
 
+          spark-cluster)
+            SPARK_CLUSTER_OPTIONS="--master ${SPARK_MASTER} --deploy-mode cluster --files ${CONF_FILE}#logisland-configuration.yml,${CONF_DIR}/../monitoring/jmx_prometheus_javaagent-0.10.jar#jmx_prometheus_javaagent-0.10.jar,${CONF_DIR}/../monitoring/spark-prometheus.yml#spark-prometheus.yml,${CONF_DIR}/../monitoring/metrics.properties#metrics.properties,${CONF_DIR}/log4j.properties#log4j.properties --conf spark.metrics.namespace=\"${APP_NAME}\" --conf spark.ui.showConsoleProgress=false"
+
+            if [[ ! -z "${SPARK_APP_NAME}" ]]
+            then
+              SPARK_CLUSTER_OPTIONS="${SPARK_CLUSTER_OPTIONS} --name ${SPARK_APP_NAME}"
+            else
+              SPARK_APP_NAME=`awk '{ if( $1 == "spark.app.name:" ){ print $2 } }' ${CONF_FILE}`
+              if [[ ! -z "${SPARK_APP_NAME}" ]]
+              then
+                SPARK_CLUSTER_OPTIONS="${SPARK_CLUSTER_OPTIONS} --name ${SPARK_APP_NAME}"
+              fi
+            fi
+
+            DRIVER_CORES=`awk '{ if( $1 == "spark.driver.cores:" ){ print $2 } }' ${CONF_FILE}`
+            if [[ ! -z "${DRIVER_CORES}" ]]
+            then
+             SPARK_CLUSTER_OPTIONS="${SPARK_CLUSTER_OPTIONS} --driver-cores ${DRIVER_CORES}"
+            fi
+
+            DRIVER_MEMORY=`awk '{ if( $1 == "spark.driver.memory:" ){ print $2 } }' ${CONF_FILE}`
+            if [[ ! -z "${DRIVER_MEMORY}" ]]
+            then
+             SPARK_CLUSTER_OPTIONS="${SPARK_CLUSTER_OPTIONS} --driver-memory ${DRIVER_MEMORY}"
+            fi
+
+            EXECUTORS_CORES=`awk '{ if( $1 == "spark.executor.cores:" ){ print $2 } }' ${CONF_FILE}`
+            if [[ ! -z "${EXECUTORS_CORES}" ]]
+            then
+                 SPARK_CLUSTER_OPTIONS="${SPARK_CLUSTER_OPTIONS} --executor-cores ${EXECUTORS_CORES}"
+            fi
+
+            EXECUTORS_MEMORY=`awk '{ if( $1 == "spark.executor.memory:" ){ print $2 } }' ${CONF_FILE}`
+            if [[ ! -z "${EXECUTORS_MEMORY}" ]]
+            then
+                 SPARK_CLUSTER_OPTIONS="${SPARK_CLUSTER_OPTIONS} --executor-memory ${EXECUTORS_MEMORY}"
+            fi
+
+            EXECUTORS_INSTANCES=`awk '{ if( $1 == "spark.executor.instances:" ){ print $2 } }' ${CONF_FILE}`
+            if [[ ! -z "${EXECUTORS_INSTANCES}" ]]
+            then
+                 SPARK_CLUSTER_OPTIONS="${SPARK_CLUSTER_OPTIONS} --num-executors ${EXECUTORS_INSTANCES}"
+            fi
+
+            SPARK_TASK_MAX_FAILURES=`awk '{ if( $1 == "spark.task.maxFailures:" ){ print $2 } }' ${CONF_FILE}`
+            if [[ ! -z "${SPARK_TASK_MAX_FAILURES}" ]]
+            then
+                 SPARK_CLUSTER_OPTIONS="${SPARK_CLUSTER_OPTIONS} --conf spark.task.maxFailures=${SPARK_TASK_MAX_FAILURES}"
+            fi
+
+            PROPERTIES_FILE_PATH=`awk '{ if( $1 == "spark.properties.file.path:" ){ print $2 } }' ${CONF_FILE}`
+            if [[ ! -z "${PROPERTIES_FILE_PATH}" ]]
+            then
+                SPARK_CLUSTER_OPTIONS="${SPARK_CLUSTER_OPTIONS} --properties-file ${PROPERTIES_FILE_PATH}"
+            fi
+
+            SPARK_MONITORING_DRIVER_PORT=`awk '{ if( $1 == "spark.monitoring.driver.port:" ){ print $2 } }' ${CONF_FILE}`
+            if [[ -z "${SPARK_MONITORING_DRIVER_PORT}" ]]
+            then
+                 EXTRA_DRIVER_JAVA_OPTIONS='spark.driver.extraJavaOptions=-Dlog4j.configuration=log4j.properties'
+                 EXTRA_PROCESSOR_JAVA_OPTIONS='spark.executor.extraJavaOptions=-Dlog4j.configuration=log4j.properties'
+            else
+                 EXTRA_DRIVER_JAVA_OPTIONS='spark.driver.extraJavaOptions=-Dlog4j.configuration=log4j.properties -Dcom.sun.management.jmxremote -Dcom.sun.management.jmxremote.port=0 -Dcom.sun.management.jmxremote.rmi.port=0 -Dcom.sun.management.jmxremote.authenticate=false -Dcom.sun.management.jmxremote.ssl=false -javaagent:./jmx_prometheus_javaagent-0.10.jar='${SPARK_MONITORING_DRIVER_PORT}':./spark-prometheus.yml'
+                 EXTRA_PROCESSOR_JAVA_OPTIONS='spark.executor.extraJavaOptions=-Dlog4j.configuration=log4j.properties -Dcom.sun.management.jmxremote -Dcom.sun.management.jmxremote.port=0 -Dcom.sun.management.jmxremote.rmi.port=0 -Dcom.sun.management.jmxremote.authenticate=false -Dcom.sun.management.jmxremote.ssl=false -javaagent:./jmx_prometheus_javaagent-0.10.jar='${SPARK_MONITORING_DRIVER_PORT}':./spark-prometheus.yml'
+            fi
+
+            CONF_FILE="logisland-configuration.yml"
+
+            engine_jar=(basename $engine_jar)
+
+            ${SPARK_HOME}/bin/spark-submit ${VERBOSE_OPTIONS} ${SPARK_CLUSTER_OPTIONS} \
+            --conf "${EXTRA_DRIVER_JAVA_OPTIONS}" \
+            --conf "${EXTRA_PROCESSOR_JAVA_OPTIONS}" \
+            --class ${app_mainclass} \
+            --jars ${SPARK_STANDALONE_DIR}/* ${SPARK_STANDALONE_DIR}/${engine_jar} \
+            -conf ${CONF_FILE}
+            ;;
+
+          spark-client)
+            SPARK_CLUSTER_OPTIONS="--master ${SPARK_MASTER} --deploy-mode client --conf spark.metrics.namespace=\"${APP_NAME}\""
+
+            if [[ ! -z "${SPARK_APP_NAME}" ]]
+            then
+              SPARK_CLUSTER_OPTIONS="${SPARK_CLUSTER_OPTIONS} --name ${SPARK_APP_NAME}"
+            else
+              SPARK_APP_NAME=`awk '{ if( $1 == "spark.app.name:" ){ print $2 } }' ${CONF_FILE}`
+              if [[ ! -z "${SPARK_APP_NAME}" ]]
+              then
+                SPARK_CLUSTER_OPTIONS="${SPARK_CLUSTER_OPTIONS} --name ${SPARK_APP_NAME}"
+              fi
+            fi
+
+            DRIVER_CORES=`awk '{ if( $1 == "spark.driver.cores:" ){ print $2 } }' ${CONF_FILE}`
+            if [[ ! -z "${DRIVER_CORES}" ]]
+            then
+             SPARK_CLUSTER_OPTIONS="${SPARK_CLUSTER_OPTIONS} --driver-cores ${DRIVER_CORES}"
+            fi
+
+            DRIVER_MEMORY=`awk '{ if( $1 == "spark.driver.memory:" ){ print $2 } }' ${CONF_FILE}`
+            if [[ ! -z "${DRIVER_MEMORY}" ]]
+            then
+             SPARK_CLUSTER_OPTIONS="${SPARK_CLUSTER_OPTIONS} --driver-memory ${DRIVER_MEMORY}"
+            fi
+
+            PROPERTIES_FILE_PATH=`awk '{ if( $1 == "spark.properties.file.path:" ){ print $2 } }' ${CONF_FILE}`
+            if [[ ! -z "${PROPERTIES_FILE_PATH}" ]]
+            then
+                 SPARK_CLUSTER_OPTIONS="${SPARK_CLUSTER_OPTIONS} --properties-file ${PROPERTIES_FILE_PATH}"
+            fi
+
+            EXECUTORS_INSTANCES=`awk '{ if( $1 == "spark.executor.instances:" ){ print $2 } }' ${CONF_FILE}`
+            if [[ ! -z "${EXECUTORS_INSTANCES}" ]]
+            then
+                 SPARK_CLUSTER_OPTIONS="${SPARK_CLUSTER_OPTIONS} --num-executors ${EXECUTORS_INSTANCES}"
+            fi
+
+            EXTRA_DRIVER_JAVA_OPTIONS='spark.driver.extraJavaOptions=-Dlog4j.configuration=log4j.properties'
+            EXTRA_PROCESSOR_JAVA_OPTIONS='spark.executor.extraJavaOptions=-Dlog4j.configuration=log4j.properties'
+
+            engine_jar=`basename $engine_jar`
+
+            spark_standalone_classpath=`initSparkStandaloneClassPath ${SPARK_STANDALONE_DIR}`
+
+            ${SPARK_HOME}/bin/spark-submit ${VERBOSE_OPTIONS} ${SPARK_CLUSTER_OPTIONS} \
+            --conf spark.metrics.conf="${CONF_DIR}/../monitoring/metrics.properties"  \
+            --conf "${EXTRA_DRIVER_JAVA_OPTIONS}" \
+            --conf "${EXTRA_PROCESSOR_JAVA_OPTIONS}" \
+            --class ${app_mainclass} \
+            --jars ${spark_standalone_classpath} ${SPARK_STANDALONE_DIR}/${engine_jar} \
+            -conf ${CONF_FILE}
+            ;;
+
+          *)
+            echo "Unsupported run mode: ${MODE}"
+            ;;
+
           esac
     fi
 }
diff --git a/logisland-resources/src/main/resources/conf/ivy.xml b/logisland-resources/src/main/resources/conf/ivy.xml
index e75c59230..07ac15d0f 100644
--- a/logisland-resources/src/main/resources/conf/ivy.xml
+++ b/logisland-resources/src/main/resources/conf/ivy.xml
@@ -19,8 +19,9 @@
     
     
         
+            
             
-            
+            
             
             
         
diff --git a/pom.xml b/pom.xml
index ef8341738..eadcdfb0c 100644
--- a/pom.xml
+++ b/pom.xml
@@ -107,7 +107,7 @@
         3.7
 
         1.5.9
-        2.9.10
+        2.10.3
 
         2.12.9
         2.12
@@ -163,7 +163,7 @@
         
         
             jcenter
-            http://jcenter.bintray.com
+            https://jcenter.bintray.com
             
                 false