From 6a2c1a5791128b4349ef1e4d0885b4c9e3b276f3 Mon Sep 17 00:00:00 2001 From: Steve Lawrence Date: Mon, 29 Jan 2024 08:38:13 -0500 Subject: [PATCH] Add support for setting DFDL external variables The name/value of NiFi dynamic properties are now treated as DFDL external variables. Property values are allowed to be NiFi expressions, which are evaluated and set as the variable values. If a property value evaluates to the empty string, it is ignored--this helps when a NiFi expression determines that a variable does not apply for a schema, since DFDL requires that all variables passed in externally must be valid for the schema. This does mean it is not possible to have a variable with the value of an empty string, but this should be rare and worked around. This also refactored exceptions, so that functions throw correct exceptions instead of turning everything into an IOException, which worked but is not technically correct. Also added a new "External Variables" section to additional details pages documening this capability, and cleaned up the additional details pages. --- .../processors/AbstractDaffodilProcessor.java | 97 ++++++++++--- .../processors/DaffodilCompileException.java | 2 +- .../nifi/processors/DaffodilParse.java | 8 ++ .../nifi/processors/DaffodilUnparse.java | 8 ++ .../additionalDetails.html | 135 +++++++++++------- .../additionalDetails.html | 135 +++++++++++------- .../processors/TestDaffodilProcessor.java | 83 +++++++++++ .../TestDaffodilProcessor/bitlength.dfdl.xsd | 2 +- .../noleftover_le.bin.xml | 1 + 9 files changed, 344 insertions(+), 127 deletions(-) create mode 100644 nifi-daffodil-processors/src/test/resources/TestDaffodilProcessor/noleftover_le.bin.xml diff --git a/nifi-daffodil-processors/src/main/java/com/owlcyberdefense/nifi/processors/AbstractDaffodilProcessor.java b/nifi-daffodil-processors/src/main/java/com/owlcyberdefense/nifi/processors/AbstractDaffodilProcessor.java index 1e98f6f..bcc618a 100644 --- a/nifi-daffodil-processors/src/main/java/com/owlcyberdefense/nifi/processors/AbstractDaffodilProcessor.java +++ b/nifi-daffodil-processors/src/main/java/com/owlcyberdefense/nifi/processors/AbstractDaffodilProcessor.java @@ -20,6 +20,7 @@ import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; +import java.net.MalformedURLException; import java.net.URISyntaxException; import java.net.URL; import java.nio.channels.Channels; @@ -28,7 +29,9 @@ import java.util.Arrays; import java.util.Collections; import java.util.HashSet; +import java.util.LinkedHashMap; import java.util.List; +import java.util.Map; import java.util.Objects; import java.util.Set; import java.util.concurrent.ExecutionException; @@ -37,8 +40,10 @@ import org.apache.nifi.annotation.lifecycle.OnScheduled; import org.apache.nifi.components.AllowableValue; import org.apache.nifi.components.PropertyDescriptor; +import org.apache.nifi.components.PropertyValue; import org.apache.nifi.components.resource.ResourceCardinality; import org.apache.nifi.components.resource.ResourceType; +import org.apache.nifi.components.Validator; import org.apache.nifi.expression.ExpressionLanguageScope; import org.apache.nifi.flowfile.FlowFile; import org.apache.nifi.flowfile.attributes.CoreAttributes; @@ -61,6 +66,7 @@ import org.apache.daffodil.japi.Daffodil; import org.apache.daffodil.japi.DataProcessor; import org.apache.daffodil.japi.Diagnostic; +import org.apache.daffodil.japi.ExternalVariableException; import org.apache.daffodil.japi.ProcessorFactory; import org.apache.daffodil.japi.WithDiagnostics; import org.apache.daffodil.japi.ValidationMode; @@ -261,7 +267,7 @@ public boolean equals(Object obj) { * happen outside of this function, as those changes will not be cached and will need to * be done for every flow file, which could have performance implications. */ - DataProcessor newDataProcessor(ComponentLog logger) throws IOException { + DataProcessor newDataProcessor(ComponentLog logger) throws DaffodilCompileException { // Try to find the schema to compile or reload. If dfdlSchema is a file that exists, // we just use that. If dfdlSchema is not a file, try to find it on the classpath, @@ -273,7 +279,11 @@ DataProcessor newDataProcessor(ComponentLog logger) throws IOException { URL schemaURL = null; File f = new File(this.dfdlSchema); if (f.isFile()) { - schemaURL = f.toURI().toURL(); + try { + schemaURL = f.toURI().toURL(); + } catch (MalformedURLException e) { + throw new DaffodilCompileException("Invalid 'DFDL Schema File' property: " + e); + } } else { // it is important to use getClassLoader.getResource() here. If we just do // getClass.getResource() then Java will prepend the classes package to @@ -296,7 +306,7 @@ DataProcessor newDataProcessor(ComponentLog logger) throws IOException { dp = c.reload(rbc); rbc.close(); is.close(); - } catch (InvalidParserException e) { + } catch (InvalidParserException|IOException e) { logger.error("Failed to reload pre-compiled DFDL schema: " + this.dfdlSchema + ". " + e.getMessage()); throw new DaffodilCompileException("Failed to reload pre-compiled DFDL schema: " + this.dfdlSchema + ". " + e.getMessage()); } @@ -314,6 +324,8 @@ DataProcessor newDataProcessor(ComponentLog logger) throws IOException { AbstractDaffodilProcessor.logDiagnostics(logger, dp); throw new DaffodilCompileException("Failed to compile DFDL schema: " + this.dfdlSchema); } + } catch (IOException e) { + throw new DaffodilCompileException("Failed to compile DFDL schema: " + this.dfdlSchema, e); } catch (URISyntaxException e) { throw new AssertionError("invalid URI should no be possible: " + e); } @@ -321,18 +333,18 @@ DataProcessor newDataProcessor(ComponentLog logger) throws IOException { try { dp = dp.withValidationMode(this.validationMode); } catch (InvalidUsageException e) { - throw new IOException(e); + throw new AssertionError("invalid usage of Daffodil API: " + e); } return dp; } } - protected DataProcessor getDataProcessor(CompilationParams params) throws IOException { + protected DataProcessor getDataProcessor(CompilationParams params) throws DaffodilCompileException { if (cache != null) { try { return cache.get(params); } catch (ExecutionException e) { - throw new IOException(e); + throw new DaffodilCompileException(e); } } else { return params.newDataProcessor(getLogger()); @@ -349,6 +361,22 @@ protected List getSupportedPropertyDescriptors() { return properties; } + @Override + protected PropertyDescriptor getSupportedDynamicPropertyDescriptor(final String propertyDescriptorName) { + // all dynamic properties are treated as variables, with the property name/value set as the variable + // name/value. NiFi expressions are allowed to support dynamic variable values, such as from flow file + // attributes. We do not need a validator, since a variable value could theoretically contain + // anyting--we rely on Daffodil to check the variable value according to its variable type when we + // provide the variables at parse/unparse time. + return new PropertyDescriptor.Builder() + .name(propertyDescriptorName) + .expressionLanguageSupported(ExpressionLanguageScope.FLOWFILE_ATTRIBUTES) + .addValidator(Validator.VALID) + .dynamic(true) + .build(); + } + + @OnScheduled public void onScheduled(final ProcessContext context) { final ComponentLog logger = getLogger(); @@ -363,7 +391,7 @@ public void onScheduled(final ProcessContext context) { cache = cacheBuilder.build( new CacheLoader() { - public DataProcessor load(CompilationParams params) throws IOException { + public DataProcessor load(CompilationParams params) throws DaffodilCompileException { return params.newDataProcessor(logger); } }); @@ -413,33 +441,68 @@ public void onTrigger(final ProcessContext context, final ProcessSession session infosetType = infosetTypeValue; } + + try { + // Get the DataProcessor, likely from a cache of already compiled data processors. The only change + // that should happen to the cached DataProcessor is setting variables specific to the flowfile. + // We don't cache DataProcessor with variables preset because variables are expressions and could + // change per flow file. Also, assigning external variables is pretty efficient so not worth + // caching. + final DataProcessor cachedDP; + final DataProcessor dpForProcessing; + try { + cachedDP = getDataProcessor(params); + } catch (DaffodilCompileException e) { + throw new ProcessException(e); + } + + // Treat dynamic properties as variables. If the value of the variable is the empty + // string (or an expression that evaluates to the empty string), the dynamic property is + // ignored and is not added as a variable. This supports expressions that determine that + // a variable does not apply to a schema and to ignore it, since all varibles passed to + // withExternalVariables must be valid for that schema. + final LinkedHashMap variableMap = new LinkedHashMap<>(); + for (final PropertyDescriptor pd : context.getProperties().keySet()) { + if (pd.isDynamic()) { + final String value = context.getProperty(pd.getName()).evaluateAttributeExpressions(original).getValue(); + if (!value.isEmpty()) { + variableMap.put(pd.getName(), value); + } + } + } + + if (variableMap.isEmpty()) { + dpForProcessing = cachedDP; + } else { + try { + dpForProcessing = cachedDP.withExternalVariables(variableMap); + } catch (ExternalVariableException ex) { + throw new ProcessException("variables not valid for schema: " + ex.getMessage()); + } + } + FlowFile output = session.write(original, new StreamCallback() { @Override public void process(final InputStream in, final OutputStream out) throws IOException { - // Get the DataProcessor, likely from a cache of already compiled data - // processors. Note that no changes to the DataProcessor should happen. Any - // changes to a DataProcessor should happen in the CompilationParams - // newDataProcessor() function using only the parameters passed to the - // CompilationParams constructor - DataProcessor dp = getDataProcessor(params); - - // Parse or unparse the flow file, reading from he input stream and writing + // Parse or unparse the flow file, reading from the input stream and writing // to the output stream - processWithDaffodil(dp, original, in, out, infosetType); + processWithDaffodil(dpForProcessing, original, in, out, infosetType); } }); + final String outputMimeType = getOutputMimeType(infosetType); if (outputMimeType != null) { output = session.putAttribute(output, CoreAttributes.MIME_TYPE.key(), outputMimeType); } else { output = session.removeAttribute(output, CoreAttributes.MIME_TYPE.key()); } + session.transfer(output, REL_SUCCESS); session.getProvenanceReporter().modifyContent(output, stopWatch.getElapsed(TimeUnit.MILLISECONDS)); logger.debug("Processed {}", new Object[]{original}); } catch (ProcessException e) { - logger.error("Failed to process {} due to {}", new Object[]{original, e}); + logger.error("Failed to process {} due to {}", new Object[]{original, e.getMessage()}); session.transfer(original, REL_FAILURE); } } diff --git a/nifi-daffodil-processors/src/main/java/com/owlcyberdefense/nifi/processors/DaffodilCompileException.java b/nifi-daffodil-processors/src/main/java/com/owlcyberdefense/nifi/processors/DaffodilCompileException.java index f1fa70a..8d785fa 100644 --- a/nifi-daffodil-processors/src/main/java/com/owlcyberdefense/nifi/processors/DaffodilCompileException.java +++ b/nifi-daffodil-processors/src/main/java/com/owlcyberdefense/nifi/processors/DaffodilCompileException.java @@ -18,7 +18,7 @@ import java.io.IOException; -public class DaffodilCompileException extends IOException { +public class DaffodilCompileException extends Exception { public DaffodilCompileException() { super(); diff --git a/nifi-daffodil-processors/src/main/java/com/owlcyberdefense/nifi/processors/DaffodilParse.java b/nifi-daffodil-processors/src/main/java/com/owlcyberdefense/nifi/processors/DaffodilParse.java index 6888680..f008d09 100644 --- a/nifi-daffodil-processors/src/main/java/com/owlcyberdefense/nifi/processors/DaffodilParse.java +++ b/nifi-daffodil-processors/src/main/java/com/owlcyberdefense/nifi/processors/DaffodilParse.java @@ -25,6 +25,7 @@ import java.nio.channels.ReadableByteChannel; import org.apache.nifi.annotation.behavior.EventDriven; +import org.apache.nifi.annotation.behavior.DynamicProperty; import org.apache.nifi.annotation.behavior.InputRequirement; import org.apache.nifi.annotation.behavior.InputRequirement.Requirement; import org.apache.nifi.annotation.behavior.SideEffectFree; @@ -32,6 +33,7 @@ import org.apache.nifi.annotation.behavior.WritesAttribute; import org.apache.nifi.annotation.documentation.CapabilityDescription; import org.apache.nifi.annotation.documentation.Tags; +import org.apache.nifi.expression.ExpressionLanguageScope; import org.apache.nifi.flowfile.FlowFile; import org.apache.nifi.annotation.behavior.RequiresInstanceClassLoading; @@ -51,6 +53,12 @@ @Tags({"xml", "json", "daffodil", "dfdl", "schema", "xsd"}) @CapabilityDescription("Use Daffodil and a user-specified DFDL schema to transform data to an infoset, represented by either XML or JSON.") @WritesAttribute(attribute = "mime.type", description = "Sets the mime type to application/json or application/xml based on the infoset type.") +@DynamicProperty( + name = "Name of external variable defined in a DFDL schema", + value = "Value to set for the DFDL external variable. May be an expression. The DFDL variable is not set if the value expression evaluates to an empty string.", + description = "Defines an external variable to be used when parsing", + expressionLanguageScope = ExpressionLanguageScope.FLOWFILE_ATTRIBUTES +) @RequiresInstanceClassLoading public class DaffodilParse extends AbstractDaffodilProcessor { diff --git a/nifi-daffodil-processors/src/main/java/com/owlcyberdefense/nifi/processors/DaffodilUnparse.java b/nifi-daffodil-processors/src/main/java/com/owlcyberdefense/nifi/processors/DaffodilUnparse.java index 5e2158f..046363a 100644 --- a/nifi-daffodil-processors/src/main/java/com/owlcyberdefense/nifi/processors/DaffodilUnparse.java +++ b/nifi-daffodil-processors/src/main/java/com/owlcyberdefense/nifi/processors/DaffodilUnparse.java @@ -25,6 +25,7 @@ import java.nio.channels.WritableByteChannel; import org.apache.nifi.annotation.behavior.EventDriven; +import org.apache.nifi.annotation.behavior.DynamicProperty; import org.apache.nifi.annotation.behavior.InputRequirement; import org.apache.nifi.annotation.behavior.InputRequirement.Requirement; import org.apache.nifi.annotation.behavior.SideEffectFree; @@ -32,6 +33,7 @@ import org.apache.nifi.annotation.behavior.WritesAttribute; import org.apache.nifi.annotation.documentation.CapabilityDescription; import org.apache.nifi.annotation.documentation.Tags; +import org.apache.nifi.expression.ExpressionLanguageScope; import org.apache.nifi.flowfile.FlowFile; import org.apache.nifi.annotation.behavior.RequiresInstanceClassLoading; @@ -48,6 +50,12 @@ @Tags({"xml", "json", "daffodil", "dfdl", "schema", "xsd"}) @CapabilityDescription("Use Daffodil and a user-specified DFDL schema to transform an XML or JSON representation of data back to the original data format.") @WritesAttribute(attribute = "mime.type", description = "If the FlowFile is successfully unparsed, this attriute is removed, as the MIME Type is no longer known.") +@DynamicProperty( + name = "Name of external variable defined in a DFDL schema", + value = "Value to set for the DFDL external variable. May be an expression. The DFDL variable is not set if the value expression evaluates to an empty string.", + description = "Defines an external variable to be used when parsing", + expressionLanguageScope = ExpressionLanguageScope.FLOWFILE_ATTRIBUTES +) @RequiresInstanceClassLoading public class DaffodilUnparse extends AbstractDaffodilProcessor { diff --git a/nifi-daffodil-processors/src/main/resources/docs/com.owlcyberdefense.nifi.processors.DaffodilParse/additionalDetails.html b/nifi-daffodil-processors/src/main/resources/docs/com.owlcyberdefense.nifi.processors.DaffodilParse/additionalDetails.html index 2122f7e..f79e76e 100644 --- a/nifi-daffodil-processors/src/main/resources/docs/com.owlcyberdefense.nifi.processors.DaffodilParse/additionalDetails.html +++ b/nifi-daffodil-processors/src/main/resources/docs/com.owlcyberdefense.nifi.processors.DaffodilParse/additionalDetails.html @@ -19,7 +19,7 @@ DaffodilParse - + @@ -46,80 +46,107 @@

Daffodil and DFDL

Publicly available DFDL schemas are available on the DFDL Schemas github.

+

External Variables

+

+ DFDL external variables are supported using NiFi's dynamic properties. +

+

+ To set an external variable, click the "+" sign in the top right of the processor's + properties tab to add a new dynamic property. The name of the property should be the same as the name of the + variable. Additionally, if the variable name alone is ambiguous, you can provide a namespace prefix and a + colon (e.g. ns:variableName) or a full namespace URI surrounded by curly braces (e.g. + {http://example.com/namespace}variableName). +

+

+ The value of the dynamic property is used for the value of the variable, and can be a simple value or a NiFi + expression to be evaluated for each flow file. In either case, if the value is the empty string, the variable is + ignored for that flowfile and is not provided to Daffodil. This is useful in cases where a NiFi expression + determines a variable is not valid for the 'DFDL Schema File' being used, or to temporarily disable a variable + without completely removing it. +

+

Plugins and Schemas

-

The Plugins and Schemas property is a comma separated list of paths to files and/or directories that the processor can use to find Daffodil plugins and schemas. -

-
-
Plugins
-
-

- Some DFDL schemas may require the use of plugins, such as layers, user defined functions, or custom - character encodings. To make these available to NiFi Daffodil processors, these plugins can be compiled to - jars, and paths to the jars or parent directory may be added to the Plugins and Schemas property. For - example, if all Daffodil plugins are in the /usr/share/nifi-daffodil/plugins/ directory, you might - set the property to the following: -

- -
-            Plugins and Schemas: /usr/share/nifi-daffodil/plugins/
-
-
Schemas
-
-

- The DFDL Schema File property is used to define the path to a schema to be compiled, or a saved - parser to reload if Pre-compiled schema is true. However, in some cases it may be more convenient - to provide the schema as a path inside a jar, or as just a filename instead of a full path. The Plugins - and Schemas property can be used for this purpose. -

- -

- If a schema (or saved parser) is defined in a jar, then add the jar to the Plugins and Schemas - property (either as a path to the directory containing the jar or a path to the jar itself), and set the - DFDL Schema File property to the absolute path to the schema inside the jar. For - example, if a jar at /usr/share/nifi-daffodil/schemas/dfdl-foo.jar contains a DFDL schema file - called com/example/foo.dfdl.xsd, you might configure the following: -

- -
-            DFDL Schema File: /com/example/foo.dfdl.xsd
-            Plugins and Schemas: /usr/share/nifi-daffodil/schemas/
+

-

- If a schema (or saved parser) is not in a jar, then add the file to the Plugins and Schemas - property (either as a path to the directory containing the file or a path to the file itself) and set the - DFDL Schema File property to just the file name. For example, if a saved parser is at - /usr/share/nifi-daffodil/saved-parsers/foo.bin, you might configure the following: -

+

Plugins

+

+ Some DFDL schemas may require the use of plugins, such as layers, user defined functions, or custom character + encodings. To make these available to NiFi Daffodil processors, these plugins can be compiled to jars, and paths to + individual jars or to a parent directory containing one or more jars may be added to the Plugins and + Schemas property. For example, if all Daffodil plugins are in the /usr/share/nifi-daffodil/plugins/ + directory, you might set the property to the following: +

+
+Plugins and Schemas: /usr/share/nifi-daffodil/plugins/
+
-
-            DFDL Schema File: foo.bin
-            Plugins and Schemas: /usr/share/nifi-daffodil/saved-parsers/
-            Pre-compiled Schema: true
-
-
+

Schemas

+

+ The DFDL Schema File property is used to define the path to a schema to be compiled, or a saved + parser to reload if Pre-compiled schema is true. However, in some cases it may be more convenient + to provide the schema as a path inside a jar, or as just a filename instead of a full path. The Plugins + and Schemas property can be used for this purpose. +

+

+ If a schema (or saved parser) is defined in a jar, then add the jar to the Plugins and Schemas + property (either as a path to the directory containing the jar or a path to the jar itself), and set the + DFDL Schema File property to the absolute path to the schema inside the jar. For + example, if a jar at /usr/share/nifi-daffodil/schemas/dfdl-foo.jar contains a DFDL schema file + called com/example/foo.dfdl.xsd, you might configure the following: +

+
+DFDL Schema File: /com/example/foo.dfdl.xsd
+Plugins and Schemas: /usr/share/nifi-daffodil/schemas/
+
+

+ In the above example, notice how the DFDL Schema File path begins with a '/', which denotes that the file is to be + found at that location inside a jar file. +

+

+ If a schema (or saved parser) is not in a jar, then add the file to the Plugins and Schemas + property (either as a path to the directory containing the file or a path to the file itself) and set the + DFDL Schema File property to just the file name. For example, if a saved parser is at + /usr/share/nifi-daffodil/saved-parsers/foo.bin, you might configure the following:

+
+DFDL Schema File: foo.bin
+Plugins and Schemas: /usr/share/nifi-daffodil/saved-parsers/
+Pre-compiled Schema: true
+
+

Compiled DFDL Schema Cache

Before a FlowFile can be parsed or unparsed, Daffodil first compiles the DFDL schema to an internal data - structure. This compilation can be relatively slow, so once a schema is compiled it is cached inside this + structure. This compilation can be relatively slow, so once a schema is compiled, it is cached inside this NiFi processor for rapid reuse. The following properties are provided to control how this cache is maintained:

Cache Size
+

Defines the maximum number of DFDL schemas that can be compiled and saved for rapid reuse from the cache. To avoid compilation, it is recommended that this value be larger than the expected number of possible DFDL schemas. Setting this value to 0 disables the cache, though this is not - recommended. + recommended. The default value is 50. +

Cache TTL after last access
- Defines the cache time-to-live, or how long to keep an unused compiled DFDL schemas in the cache. To avoid - compilation, it is recommended that this should be larger than the amount of time it is expected - for a DFDL schema to be unused. +

+ Cached compiled DFDL schemas that go unused for a specified amount of time are removed from the cache to + save memory. This time is defind by the Cache TTL after last access property, with the format of + <duration> <time_unit>, where <duration> is a non-negative integer and + <time_unit> is a supported unit of time, such as nanos, millis, secs, mins, hrs, days. If set + to zero (e.g. "0 seconds"), cached compiled DFDL schemas are never removed from the cache. The default value + is 30 minutes. +

+

+ For example, if a schema is used occasionally (once a day perhaps), then set this to 24 hours to + avoid recompiling it each time it is used. +

diff --git a/nifi-daffodil-processors/src/main/resources/docs/com.owlcyberdefense.nifi.processors.DaffodilUnparse/additionalDetails.html b/nifi-daffodil-processors/src/main/resources/docs/com.owlcyberdefense.nifi.processors.DaffodilUnparse/additionalDetails.html index 977882d..a44e0bd 100644 --- a/nifi-daffodil-processors/src/main/resources/docs/com.owlcyberdefense.nifi.processors.DaffodilUnparse/additionalDetails.html +++ b/nifi-daffodil-processors/src/main/resources/docs/com.owlcyberdefense.nifi.processors.DaffodilUnparse/additionalDetails.html @@ -19,7 +19,7 @@ DaffodilUnparse - + @@ -39,7 +39,7 @@

Daffodil and DFDL

is a language capable of describing many data formats, including textual and binary, commercial record-oriented, scientific and numeric, modern and legacy, and many industry standards. It leverages XML technology and concepts, using a subset of W3C XML schema type system and annotations to describe such - data. Daffodil uses this data description to "parse" data into an XML representation of the data. This + data. Daffodil uses this data description to "parse" data into an XML or JSON representation of the data. This allows one to take advantage of the many XML and JSON technologies (e.g. XQuery, XPath, XSLT) to ingest, validate, and manipulate complex data formats. Daffodil can also use this data description to "unparse", or serialize, the XML or JSON representation back to the original data format. @@ -47,61 +47,77 @@

Daffodil and DFDL

Publicly available DFDL schemas are available on the DFDL Schemas github.

+

External Variables

+

+ DFDL external variables are supported using NiFi's dynamic properties. +

+

+ To set an external variable, click the "+" sign in the top right of the processor's + properties tab to add a new dynamic property. The name of the property should be the same as the name of the + variable. Additionally, if the variable name alone is ambiguous, you can provide a namespace prefix and a + colon (e.g. ns:variableName) or a full namespace URI surrounded by curly braces (e.g. + {http://example.com/namespace}variableName). +

+

+ The value of the dynamic property is used for the value of the variable, and can be a simple value or a NiFi + expression to be evaluated for each flow file. In either case, if the value is the empty string, the variable is + ignored for that flowfile and is not provided to Daffodil. This is useful in cases where a NiFi expression + determines a variable is not valid for the 'DFDL Schema File' being used, or to temporarily disable a variable + without completely removing it. +

+

Plugins and Schemas

-

The Plugins and Schemas property is a comma separated list of paths to files and/or directories that the processor can use to find Daffodil plugins and schemas. -

-
-
Plugins
-
-

- Some DFDL schemas may require the use of plugins, such as layers, user defined functions, or custom - character encodings. To make these available to NiFi Daffodil processors, these plugins can be compiled to - jars, and paths to the jars or parent directory may be added to the Plugins and Schemas property. For - example, if all Daffodil plugins are in the /usr/share/nifi-daffodil/plugins/ directory, you might - set the property to the following: -

- -
-            Plugins and Schemas: /usr/share/nifi-daffodil/plugins/
-
-
Schemas
-
-

- The DFDL Schema File property is used to define the path to a schema to be compiled, or a saved - parser to reload if Pre-compiled schema is true. However, in some cases it may be more convenient - to provide the schema as a path inside a jar, or as just a filename instead of a full path. The Plugins - and Schemas property can be used for this purpose. -

- -

- If a schema (or saved parser) is defined in a jar, then add the jar to the Plugins and Schemas - property (either as a path to the directory containing the jar or a path to the jar itself), and set the - DFDL Schema File property to the absolute path to the schema inside the jar. For - example, if a jar at /usr/share/nifi-daffodil/schemas/dfdl-foo.jar contains a DFDL schema file - called com/example/foo.dfdl.xsd, you might configure the following: -

- -
-            DFDL Schema File: /com/example/foo.dfdl.xsd
-            Plugins and Schemas: /usr/share/nifi-daffodil/schemas/
+

-

- If a schema (or saved parser) is not in a jar, then add the file to the Plugins and Schemas - property (either as a path to the directory containing the file or a path to the file itself) and set the - DFDL Schema File property to just the file name. For example, if a saved parser is at - /usr/share/nifi-daffodil/saved-parsers/foo.bin, you might configure the following: -

+

Plugins

+

+ Some DFDL schemas may require the use of plugins, such as layers, user defined functions, or custom character + encodings. To make these available to NiFi Daffodil processors, these plugins can be compiled to jars, and paths to + individual jars or to a parent directory containing one or more jars may be added to the Plugins and + Schemas property. For example, if all Daffodil plugins are in the /usr/share/nifi-daffodil/plugins/ + directory, you might set the property to the following: +

+
+Plugins and Schemas: /usr/share/nifi-daffodil/plugins/
+
-
-            DFDL Schema File: foo.bin
-            Plugins and Schemas: /usr/share/nifi-daffodil/saved-parsers/
-            Pre-compiled Schema: true
-
-
+

Schemas

+

+ The DFDL Schema File property is used to define the path to a schema to be compiled, or a saved + parser to reload if Pre-compiled schema is true. However, in some cases it may be more convenient + to provide the schema as a path inside a jar, or as just a filename instead of a full path. The Plugins + and Schemas property can be used for this purpose. +

+

+ If a schema (or saved parser) is defined in a jar, then add the jar to the Plugins and Schemas + property (either as a path to the directory containing the jar or a path to the jar itself), and set the + DFDL Schema File property to the absolute path to the schema inside the jar. For + example, if a jar at /usr/share/nifi-daffodil/schemas/dfdl-foo.jar contains a DFDL schema file + called com/example/foo.dfdl.xsd, you might configure the following: +

+
+DFDL Schema File: /com/example/foo.dfdl.xsd
+Plugins and Schemas: /usr/share/nifi-daffodil/schemas/
+
+

+ In the above example, notice how the DFDL Schema File path begins with a '/', which denotes that the file is to be + found at that location inside a jar file. +

+

+ If a schema (or saved parser) is not in a jar, then add the file to the Plugins and Schemas + property (either as a path to the directory containing the file or a path to the file itself) and set the + DFDL Schema File property to just the file name. For example, if a saved parser is at + /usr/share/nifi-daffodil/saved-parsers/foo.bin, you might configure the following:

+
+DFDL Schema File: foo.bin
+Plugins and Schemas: /usr/share/nifi-daffodil/saved-parsers/
+Pre-compiled Schema: true
+
+

Compiled DFDL Schema Cache

Before a FlowFile can be parsed or unparsed, Daffodil first compiles the DFDL schema to an internal data @@ -111,16 +127,27 @@

Compiled DFDL Schema Cache

Cache Size
+

Defines the maximum number of DFDL schemas that can be compiled and saved for rapid reuse from the cache. To avoid compilation, it is recommended that this value be larger than the expected number of possible DFDL schemas. Setting this value to 0 disables the cache, though this is not - recommended. + recommended. The default value is 50. +

Cache TTL after last access
- Defines the cache time-to-live, or how long to keep an unused compiled DFDL schemas in the cache. To avoid - compilation, it is recommended that this should be larger than the amount of time it is expected - for a DFDL schema to be unused. +

+ Cached compiled DFDL schemas that go unused for a specified amount of time are removed from the cache to + save memory. This time is defind by the Cache TTL after last access property, with the format of + <duration> <time_unit>, where <duration> is a non-negative integer and + <time_unit> is a supported unit of time, such as nanos, millis, secs, mins, hrs, days. If set + to zero (e.g. "0 seconds"), cached compiled DFDL schemas are never removed from the cache. The default value + is 30 minutes. +

+

+ For example, if a schema is used occasionally (once a day perhaps), then set this to 24 hours to + avoid recompiling it each time it is used. +

diff --git a/nifi-daffodil-processors/src/test/java/com/owlcyberdefense/nifi/processors/TestDaffodilProcessor.java b/nifi-daffodil-processors/src/test/java/com/owlcyberdefense/nifi/processors/TestDaffodilProcessor.java index 8b315be..02681eb 100644 --- a/nifi-daffodil-processors/src/test/java/com/owlcyberdefense/nifi/processors/TestDaffodilProcessor.java +++ b/nifi-daffodil-processors/src/test/java/com/owlcyberdefense/nifi/processors/TestDaffodilProcessor.java @@ -29,6 +29,7 @@ import java.util.Map; import java.util.HashMap; +import org.apache.nifi.components.PropertyDescriptor; import org.apache.nifi.flowfile.attributes.CoreAttributes; import org.apache.nifi.util.MockFlowFile; import org.apache.nifi.util.TestRunner; @@ -367,4 +368,86 @@ public void testCompilationParamsEquality() { assertFalse(ck1.hashCode() == ck6.hashCode()); } + @Test + public void testParseVariable() throws IOException { + final TestRunner testRunner = TestRunners.newTestRunner(DaffodilParse.class); + testRunner.setProperty(DaffodilParse.DFDL_SCHEMA_FILE, "src/test/resources/TestDaffodilProcessor/bitlength.dfdl.xsd"); + testRunner.setProperty(new PropertyDescriptor.Builder().name("byteOrder").dynamic(true).build(), "littleEndian"); + testRunner.enqueue(Paths.get("src/test/resources/TestDaffodilProcessor/noleftover.bin")); + testRunner.run(); + testRunner.assertAllFlowFilesTransferred(DaffodilParse.REL_SUCCESS); + final MockFlowFile infoset = testRunner.getFlowFilesForRelationship(DaffodilParse.REL_SUCCESS).get(0); + final String expectedContent = new String(Files.readAllBytes(Paths.get("src/test/resources/TestDaffodilProcessor/noleftover_le.bin.xml"))); + infoset.assertContentEquals(expectedContent); + assertEquals(DaffodilParse.XML_MIME_TYPE, infoset.getAttribute(CoreAttributes.MIME_TYPE.key())); + } + + @Test + public void testParseVariablePrefix() throws IOException { + final TestRunner testRunner = TestRunners.newTestRunner(DaffodilParse.class); + testRunner.setProperty(DaffodilParse.DFDL_SCHEMA_FILE, "src/test/resources/TestDaffodilProcessor/bitlength.dfdl.xsd"); + testRunner.setProperty(new PropertyDescriptor.Builder().name("dfdl:byteOrder").dynamic(true).build(), "littleEndian"); + testRunner.enqueue(Paths.get("src/test/resources/TestDaffodilProcessor/noleftover.bin")); + testRunner.run(); + testRunner.assertAllFlowFilesTransferred(DaffodilParse.REL_SUCCESS); + final MockFlowFile infoset = testRunner.getFlowFilesForRelationship(DaffodilParse.REL_SUCCESS).get(0); + final String expectedContent = new String(Files.readAllBytes(Paths.get("src/test/resources/TestDaffodilProcessor/noleftover_le.bin.xml"))); + infoset.assertContentEquals(expectedContent); + assertEquals(DaffodilParse.XML_MIME_TYPE, infoset.getAttribute(CoreAttributes.MIME_TYPE.key())); + } + + @Test + public void testParseVariableNamespace() throws IOException { + final TestRunner testRunner = TestRunners.newTestRunner(DaffodilParse.class); + testRunner.setProperty(DaffodilParse.DFDL_SCHEMA_FILE, "src/test/resources/TestDaffodilProcessor/bitlength.dfdl.xsd"); + testRunner.setProperty(new PropertyDescriptor.Builder().name("{http://www.ogf.org/dfdl/dfdl-1.0/}byteOrder").dynamic(true).build(), "littleEndian"); + testRunner.enqueue(Paths.get("src/test/resources/TestDaffodilProcessor/noleftover.bin")); + testRunner.run(); + testRunner.assertAllFlowFilesTransferred(DaffodilParse.REL_SUCCESS); + final MockFlowFile infoset = testRunner.getFlowFilesForRelationship(DaffodilParse.REL_SUCCESS).get(0); + final String expectedContent = new String(Files.readAllBytes(Paths.get("src/test/resources/TestDaffodilProcessor/noleftover_le.bin.xml"))); + infoset.assertContentEquals(expectedContent); + assertEquals(DaffodilParse.XML_MIME_TYPE, infoset.getAttribute(CoreAttributes.MIME_TYPE.key())); + } + + @Test + public void testParseVariableInvalid() throws IOException { + final TestRunner testRunner = TestRunners.newTestRunner(DaffodilParse.class); + testRunner.setProperty(DaffodilParse.DFDL_SCHEMA_FILE, "src/test/resources/TestDaffodilProcessor/bitlength.dfdl.xsd"); + testRunner.setProperty(new PropertyDescriptor.Builder().name("byteOrder").dynamic(true).build(), "badEndian"); + testRunner.enqueue(Paths.get("src/test/resources/TestDaffodilProcessor/noleftover.bin")); + testRunner.run(); + testRunner.assertAllFlowFilesTransferred(DaffodilParse.REL_FAILURE); + final MockFlowFile original = testRunner.getFlowFilesForRelationship(DaffodilParse.REL_FAILURE).get(0); + final String expectedContent = new String(Files.readAllBytes(Paths.get("src/test/resources/TestDaffodilProcessor/noleftover.bin"))); + original.assertContentEquals(expectedContent); + } + + @Test + public void testParseVariableUnknown() throws IOException { + final TestRunner testRunner = TestRunners.newTestRunner(DaffodilParse.class); + testRunner.setProperty(DaffodilParse.DFDL_SCHEMA_FILE, "src/test/resources/TestDaffodilProcessor/bitlength.dfdl.xsd"); + testRunner.setProperty(new PropertyDescriptor.Builder().name("unknown").dynamic(true).build(), "shouldError"); + testRunner.enqueue(Paths.get("src/test/resources/TestDaffodilProcessor/noleftover.bin")); + testRunner.run(); + testRunner.assertAllFlowFilesTransferred(DaffodilParse.REL_FAILURE); + final MockFlowFile original = testRunner.getFlowFilesForRelationship(DaffodilParse.REL_FAILURE).get(0); + final String expectedContent = new String(Files.readAllBytes(Paths.get("src/test/resources/TestDaffodilProcessor/noleftover.bin"))); + original.assertContentEquals(expectedContent); + } + + @Test + public void testParseVariableExpressionIgnored() throws IOException { + final TestRunner testRunner = TestRunners.newTestRunner(DaffodilParse.class); + testRunner.setProperty(DaffodilParse.DFDL_SCHEMA_FILE, "src/test/resources/TestDaffodilProcessor/bitlength.dfdl.xsd"); + testRunner.setProperty(new PropertyDescriptor.Builder().name("unknownIgnored").dynamic(true).build(), "${literal('')}"); + testRunner.enqueue(Paths.get("src/test/resources/TestDaffodilProcessor/noleftover.bin")); + testRunner.run(); + testRunner.assertAllFlowFilesTransferred(DaffodilParse.REL_SUCCESS); + final MockFlowFile infoset = testRunner.getFlowFilesForRelationship(DaffodilParse.REL_SUCCESS).get(0); + final String expectedContent = new String(Files.readAllBytes(Paths.get("src/test/resources/TestDaffodilProcessor/noleftover.bin.xml"))); + infoset.assertContentEquals(expectedContent); + assertEquals(DaffodilParse.XML_MIME_TYPE, infoset.getAttribute(CoreAttributes.MIME_TYPE.key())); + } + } diff --git a/nifi-daffodil-processors/src/test/resources/TestDaffodilProcessor/bitlength.dfdl.xsd b/nifi-daffodil-processors/src/test/resources/TestDaffodilProcessor/bitlength.dfdl.xsd index 1228afb..1f6c60a 100644 --- a/nifi-daffodil-processors/src/test/resources/TestDaffodilProcessor/bitlength.dfdl.xsd +++ b/nifi-daffodil-processors/src/test/resources/TestDaffodilProcessor/bitlength.dfdl.xsd @@ -24,7 +24,7 @@ + lengthKind="implicit" separator="" byteOrder="{ $dfdl:byteOrder }" /> diff --git a/nifi-daffodil-processors/src/test/resources/TestDaffodilProcessor/noleftover_le.bin.xml b/nifi-daffodil-processors/src/test/resources/TestDaffodilProcessor/noleftover_le.bin.xml new file mode 100644 index 0000000..883cd82 --- /dev/null +++ b/nifi-daffodil-processors/src/test/resources/TestDaffodilProcessor/noleftover_le.bin.xml @@ -0,0 +1 @@ +1653727