From a5bc1c2b3d3bb729033f02be531f6db1d162bb07 Mon Sep 17 00:00:00 2001 From: James McMullan Date: Thu, 3 Oct 2024 09:42:21 -0400 Subject: [PATCH] HPCC4J-650 DFSClient should retain integer subtypes - Modified record translation code to maintain integer subtype - Fixed record translation test case - Added index record translation to test case Signed-off-by: James McMullan James.McMullan@lexisnexis.com --- .../org/hpccsystems/commons/ecl/FieldDef.java | 39 ++++++++--- .../hpccsystems/commons/ecl/HpccSrcType.java | 6 ++ .../ecl/RecordDefinitionTranslator.java | 70 ++++++++++++++----- .../commons/ecl/TestFieldDefinitions.java | 23 ++++++ .../commons/ecl/RecordDefinitionTests.java | 42 ++++++----- .../dfs/client/BinaryRecordReader.java | 20 ++---- .../hpccsystems/dfs/client/ColumnPruner.java | 2 +- .../hpccsystems/dfs/client/FileUtility.java | 2 +- .../org/hpccsystems/dfs/client/HPCCFile.java | 31 ++++++-- 9 files changed, 168 insertions(+), 67 deletions(-) diff --git a/commons-hpcc/src/main/java/org/hpccsystems/commons/ecl/FieldDef.java b/commons-hpcc/src/main/java/org/hpccsystems/commons/ecl/FieldDef.java index d13d79c5c..d813d0e64 100644 --- a/commons-hpcc/src/main/java/org/hpccsystems/commons/ecl/FieldDef.java +++ b/commons-hpcc/src/main/java/org/hpccsystems/commons/ecl/FieldDef.java @@ -35,7 +35,6 @@ public class FieldDef implements Serializable private long len = 0; private boolean fixedLength = false; private boolean isUnsigned = false; - private boolean isBiased = false; private int additionalFlags = 0; /** @@ -49,12 +48,17 @@ public FieldDef(FieldDef rhs) this.fieldName = rhs.fieldName; this.fieldType = rhs.fieldType; this.typeName = rhs.typeName; - this.defs = rhs.defs; + + this.defs = new FieldDef[rhs.defs.length]; + for (int i = 0; i < rhs.defs.length; i++) + { + this.defs[i] = new FieldDef(rhs.defs[i]); + } + this.srcType = rhs.srcType; this.len = rhs.len; this.fixedLength = rhs.fixedLength; this.isUnsigned = rhs.isUnsigned; - this.isBiased = rhs.isBiased; this.additionalFlags = rhs.additionalFlags; } @@ -172,6 +176,16 @@ public HpccSrcType getSourceType() return this.srcType; } + /** + * Sets data type on the HPCC cluster. + * + * @param srcType the new source type + */ + public void setSourceType(HpccSrcType srcType) + { + this.srcType = srcType; + } + /** * Length of data or minimum length if variable. * @@ -305,18 +319,27 @@ public boolean isUnsigned() } /** - * Is the underlying value biased? + * Is the underlying value biased? Deprecated in favor of isNonStandardInt. * - * @return true when biased + * @return true when biased + * + * @deprecated */ public boolean isBiased() { - return this.isBiased; + return isNonStandardInt(); } - void setIsBiased(boolean biased) + /** + * + * + * @return true when biased + */ + public boolean isNonStandardInt() { - this.isBiased = biased; + return this.srcType == HpccSrcType.KEYED_INTEGER + || this.srcType == HpccSrcType.SWAPPED_INTEGER + || this.srcType == HpccSrcType.BIAS_SWAPPED_INTEGER; } /** diff --git a/commons-hpcc/src/main/java/org/hpccsystems/commons/ecl/HpccSrcType.java b/commons-hpcc/src/main/java/org/hpccsystems/commons/ecl/HpccSrcType.java index 678c90de1..e0615fc36 100644 --- a/commons-hpcc/src/main/java/org/hpccsystems/commons/ecl/HpccSrcType.java +++ b/commons-hpcc/src/main/java/org/hpccsystems/commons/ecl/HpccSrcType.java @@ -36,6 +36,12 @@ public enum HpccSrcType "little endian", false ), BINARY_CODED_DECIMAL ( "Binary coded decimal", false + ), KEYED_INTEGER ( + "Non-payload integer field within a key", false + ), SWAPPED_INTEGER ( + "Byte swapped integer, used within keys", false + ), BIAS_SWAPPED_INTEGER ( + "Byte swapped integer, deprecated", false ), UNKNOWN ( "Unkown", false ); diff --git a/commons-hpcc/src/main/java/org/hpccsystems/commons/ecl/RecordDefinitionTranslator.java b/commons-hpcc/src/main/java/org/hpccsystems/commons/ecl/RecordDefinitionTranslator.java index 7db62895c..dbae989e4 100644 --- a/commons-hpcc/src/main/java/org/hpccsystems/commons/ecl/RecordDefinitionTranslator.java +++ b/commons-hpcc/src/main/java/org/hpccsystems/commons/ecl/RecordDefinitionTranslator.java @@ -34,6 +34,8 @@ public class RecordDefinitionTranslator private static final String CHILD_KEY = "child"; private static final String FLAGS_KEY = "flags"; + private static final String ESP_TYPE_NAME_PREFIX = "ty"; + private static final int FLAG_UNSIGNED = 256; private static final int FLAG_UNKNOWN_SIZE = 1024; private static final int TYPE_ID_MASK = 0xff; // 0x7fff & ~FLAG_UNKNOWN_SIZE & ~FLAG_UNSIGNED; @@ -55,6 +57,7 @@ public class RecordDefinitionTranslator final private static int type_varunicode = 33; final private static int type_utf8 = 41; + // FNoInitializer, // 0 means no initialiser - not a special virtual initialiser // FVirtualFilePosition, // FVirtualLocalFilePosition, @@ -68,6 +71,8 @@ public class RecordDefinitionTranslator final private static int type_char = 11; // Convert to string final private static int type_qstring = 30; // Convert to string + final private static char XPATH_DELIMITER = 0x0001; + // Additional retained flags final private static int FLAG_IS_PAYLOAD_FIELD = 0x00010000; @@ -134,9 +139,12 @@ private static HpccSrcType getSourceType(int typeID) case type_int: case type_real: return HpccSrcType.LITTLE_ENDIAN; - case type_swapint: case type_biasedswapint: + return HpccSrcType.BIAS_SWAPPED_INTEGER; + case type_swapint: + return HpccSrcType.SWAPPED_INTEGER; case type_keyedint: + return HpccSrcType.KEYED_INTEGER; case type_filepos: return HpccSrcType.BIG_ENDIAN; case type_utf8: @@ -272,7 +280,7 @@ private static String getEClTypeDefinition(FieldDef field, HashMap 0) + + int flags = childTypeID | childField.getAdditionalFlags(); + if (flags > 0) { - int flags = childTypeID | childField.getAdditionalFlags(); childJson.put("flags", flags); } if (childField.getFieldType() == FieldType.DATASET) { - char delim = 0x0001; - childJson.put("xpath", childField.getFieldName() + delim + "Row"); + childJson.put("xpath", childField.getFieldName() + XPATH_DELIMITER + "Row"); + } + else if (childField.getFieldType() == FieldType.SET) + { + childJson.put("xpath", childField.getFieldName() + XPATH_DELIMITER + "Item"); } fields.put(childJson); @@ -758,6 +787,17 @@ private static int getJsonTypeDefinition(FieldDef field, HashMap 0) { @@ -719,13 +719,11 @@ private void readIntoScratchBuffer(int offset, int dataLen) throws IOException * the length, 1 to 8 bytes * @param little_endian * true if the value is little endian - * @param shouldCorrectBias - * true if the value should be corrected for index bias * @return the integer extracted as a long * @throws IOException * Signals that an I/O exception has occurred. */ - private long getInt(int len, boolean little_endian, boolean shouldCorrectBias) throws IOException + private long getInt(int len, boolean little_endian) throws IOException { long v = getUnsigned(len, little_endian); @@ -739,12 +737,6 @@ private long getInt(int len, boolean little_endian, boolean shouldCorrectBias) t } } - if (isIndex && shouldCorrectBias) - { - // Roxie indexes are biased to allow for easier comparison. This corrects the bias - v += negMask; - } - return v; } diff --git a/dfsclient/src/main/java/org/hpccsystems/dfs/client/ColumnPruner.java b/dfsclient/src/main/java/org/hpccsystems/dfs/client/ColumnPruner.java index 4cc73ac77..004ff9b5b 100644 --- a/dfsclient/src/main/java/org/hpccsystems/dfs/client/ColumnPruner.java +++ b/dfsclient/src/main/java/org/hpccsystems/dfs/client/ColumnPruner.java @@ -129,7 +129,7 @@ public FieldDef pruneRecordDefinition(FieldDef originalRD) throws Exception { if (selectedFieldMap.size() == 0) { - return originalRD; + return new FieldDef(originalRD); } ArrayList selectedFields = new ArrayList(); diff --git a/dfsclient/src/main/java/org/hpccsystems/dfs/client/FileUtility.java b/dfsclient/src/main/java/org/hpccsystems/dfs/client/FileUtility.java index 3da02c805..e6f1c7b15 100644 --- a/dfsclient/src/main/java/org/hpccsystems/dfs/client/FileUtility.java +++ b/dfsclient/src/main/java/org/hpccsystems/dfs/client/FileUtility.java @@ -786,7 +786,7 @@ private static Runnable[] createNonRedistributingCopyTasks(HPCCFile file, DFUCre for (int j = 0; j < numIncomingParts; j++) { DataPartition inFilePart = inFileParts[incomingFilePartIndex + j]; - filePartReaders[j] = new HpccRemoteFileReader(inFilePart, recordDef, new HPCCRecordBuilder(recordDef)); + filePartReaders[j] = new HpccRemoteFileReader(inFilePart, recordDef, new HPCCRecordBuilder(file.getProjectedRecordDefinition())); } incomingFilePartIndex += numIncomingParts; diff --git a/dfsclient/src/main/java/org/hpccsystems/dfs/client/HPCCFile.java b/dfsclient/src/main/java/org/hpccsystems/dfs/client/HPCCFile.java index afec67413..a46c1f76a 100644 --- a/dfsclient/src/main/java/org/hpccsystems/dfs/client/HPCCFile.java +++ b/dfsclient/src/main/java/org/hpccsystems/dfs/client/HPCCFile.java @@ -26,6 +26,7 @@ import org.apache.logging.log4j.LogManager; import org.hpccsystems.commons.ecl.FieldDef; import org.hpccsystems.commons.ecl.FileFilter; +import org.hpccsystems.commons.ecl.HpccSrcType; import org.hpccsystems.commons.ecl.RecordDefinitionTranslator; import org.hpccsystems.commons.errors.HpccFileException; import org.hpccsystems.dfs.cluster.ClusterRemapper; @@ -163,12 +164,12 @@ public static int getFilePartFromFPos(long fpos) } /** - * Extracts the offset in the file part from a fileposition value. + * Extracts the offset in the file part from a fileposition value. * * @param fpos file position * @return the project list */ - public static long getOffsetFromFPos(long fpos) + public static long getOffsetFromFPos(long fpos) { // First 48 bits store the offset return fpos & 0xffffffffffffL; @@ -198,11 +199,27 @@ public HPCCFile setProjectList(String projectList) throws Exception this.columnPruner = new ColumnPruner(projectList); if (this.recordDefinition != null) { - this.projectedRecordDefinition = this.columnPruner.pruneRecordDefinition(this.recordDefinition); + updateProjectedRecordDef(); } return this; } + private void updateProjectedRecordDef() throws Exception + { + this.projectedRecordDefinition = this.columnPruner.pruneRecordDefinition(this.recordDefinition); + + // By default project all sub-integer types to standard integers + for (int i = 0; i < this.projectedRecordDefinition.getNumDefs(); i++) + { + FieldDef field = this.projectedRecordDefinition.getDef(i); + if (field.isNonStandardInt()) + { + field.setSourceType(HpccSrcType.LITTLE_ENDIAN); + } + + } + } + /** * Gets the file access expiry secs. * @@ -434,7 +451,7 @@ private void createDataParts() throws HpccFileException this.partitionProcessor = new PartitionProcessor(this.recordDefinition, this.dataParts, null); } - this.projectedRecordDefinition = this.columnPruner.pruneRecordDefinition(this.recordDefinition); + updateProjectedRecordDef(); } else throw new HpccFileException("Could not fetch metadata for file: '" + fileName + "'"); @@ -622,13 +639,13 @@ private static String acquireFileAccess(String fileName, HPCCWsDFUClient hpcc, i String uniqueID = "HPCC-FILE: " + UUID.randomUUID().toString(); return hpcc.getFileAccessBlob(fileName, clusterName, expirySeconds, uniqueID); } - + /** * @return the file metadata information for this HPCCFile (if it exists) */ - public DFUFileDetailWrapper getOriginalFileMetadata() + public DFUFileDetailWrapper getOriginalFileMetadata() { - if (originalFileMetadata==null) + if (originalFileMetadata==null) { HPCCWsDFUClient dfuClient = HPCCWsDFUClient.get(espConnInfo); if (dfuClient.hasInitError())