From 93630c3ab3c6e18df00c970d62b59e6a366efa28 Mon Sep 17 00:00:00 2001 From: Ryan Amari Date: Wed, 7 Aug 2024 08:24:02 -0400 Subject: [PATCH] ALS-6511: Handle single value variables --- .../hpds/processing/io/PfbWriter.java | 46 +++++++++---------- .../hpds/processing/io/PfbWriterTest.java | 9 ++-- 2 files changed, 27 insertions(+), 28 deletions(-) diff --git a/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/io/PfbWriter.java b/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/io/PfbWriter.java index 883f8667..756336d7 100644 --- a/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/io/PfbWriter.java +++ b/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/io/PfbWriter.java @@ -32,6 +32,8 @@ public class PfbWriter implements ResultWriter { private Schema entitySchema; private Schema patientDataSchema; + private static final Set SINGULAR_FIELDS = Set.of("patient_id"); + public PfbWriter(File tempFile) { file = tempFile; entityFieldAssembler = SchemaBuilder.record("entity") @@ -58,7 +60,14 @@ public void writeHeader(String[] data) { SchemaBuilder.FieldAssembler patientRecords = SchemaBuilder.record("patientData") .fields(); - fields.forEach(field -> patientRecords.name(field).type(SchemaBuilder.array().items(SchemaBuilder.nullable().stringType())).noDefault()); + fields.forEach(field -> { + if (isSingularField(field)) { + patientRecords.nullableString(field, "null"); + } else { + patientRecords.name(field).type(SchemaBuilder.array().items(SchemaBuilder.nullable().stringType())).noDefault(); + } + + }); patientDataSchema = patientRecords.endRecord(); Schema objectSchema = Schema.createUnion(metadataSchema, patientDataSchema); @@ -81,6 +90,10 @@ public void writeHeader(String[] data) { writeMetadata(); } + private boolean isSingularField(String field) { + return SINGULAR_FIELDS.contains(field); + } + protected String formatFieldName(String s) { String formattedFieldName = s.replaceAll("\\W", "_"); if (Character.isDigit(formattedFieldName.charAt(0))) { @@ -118,27 +131,7 @@ private void writeMetadata() { @Override public void writeEntity(Collection entities) { - entities.forEach(entity -> { - if (entity.length != fields.size()) { - throw new IllegalArgumentException("Entity length much match the number of fields in this document"); - } - GenericRecord patientData = new GenericData.Record(patientDataSchema); - for(int i = 0; i < fields.size(); i++) { - List fieldValue = entity[i] != null ? List.of(entity[i]) : List.of(); - patientData.put(fields.get(i), fieldValue); - } - - GenericRecord entityRecord = new GenericData.Record(entitySchema); - entityRecord.put("object", patientData); - entityRecord.put("name", "patientData"); - entityRecord.put("id", "192035"); - - try { - dataFileWriter.append(entityRecord); - } catch (IOException e) { - throw new UncheckedIOException(e); - } - }); + throw new RuntimeException("Method not supported, use writeMultiValueEntity instead"); } @Override @@ -149,8 +142,13 @@ public void writeMultiValueEntity(Collection>> entities) { } GenericRecord patientData = new GenericData.Record(patientDataSchema); for(int i = 0; i < fields.size(); i++) { - List fieldValue = entity.get(i) != null ? entity.get(i) : List.of(); - patientData.put(fields.get(i), fieldValue); + if (isSingularField(fields.get(i))) { + String entityValue = (entity.get(i) != null && !entity.get(i).isEmpty()) ? entity.get(i).get(0) : ""; + patientData.put(fields.get(i), entityValue); + } else { + List fieldValue = entity.get(i) != null ? entity.get(i) : List.of(); + patientData.put(fields.get(i), fieldValue); + } } diff --git a/processing/src/test/java/edu/harvard/hms/dbmi/avillach/hpds/processing/io/PfbWriterTest.java b/processing/src/test/java/edu/harvard/hms/dbmi/avillach/hpds/processing/io/PfbWriterTest.java index 4094dd93..4f1b04d3 100644 --- a/processing/src/test/java/edu/harvard/hms/dbmi/avillach/hpds/processing/io/PfbWriterTest.java +++ b/processing/src/test/java/edu/harvard/hms/dbmi/avillach/hpds/processing/io/PfbWriterTest.java @@ -16,10 +16,11 @@ public class PfbWriterTest { public void writeValidPFB() { PfbWriter pfbWriter = new PfbWriter(new File("target/test-result.avro")); - pfbWriter.writeHeader(new String[] {"\\demographics\\age\\", "\\phs123\\stroke\\"}); - pfbWriter.writeEntity(List.of(new String[]{"80", "Y"}, - new String[]{"70", "N"}, - new String[]{"75", null} + pfbWriter.writeHeader(new String[] {"patient_id", "\\demographics\\age\\", "\\phs123\\stroke\\"}); + pfbWriter.writeMultiValueEntity(List.of( + List.of(List.of("123"), List.of("80"), List.of("Y")), + List.of(List.of("456"), List.of("70"),List.of("N", "Y")), + List.of(List.of("789"), List.of("75"), List.of()) )); pfbWriter.close(); // todo: validate this programatically