diff --git a/plugins/parquet/parquetembed.cpp b/plugins/parquet/parquetembed.cpp index 2349ea70530..88cb29bd3f9 100644 --- a/plugins/parquet/parquetembed.cpp +++ b/plugins/parquet/parquetembed.cpp @@ -714,7 +714,7 @@ std::shared_ptr ParquetWriter::makeChildRecord(const RtlField const RtlFieldInfo childFieldInfo = RtlFieldInfo("", "", child); std::vector> childField; reportIfFailure(fieldToNode(&childFieldInfo, childField)); - return std::make_shared(childField[0]); + return std::make_shared(childField[0]); } } @@ -845,8 +845,8 @@ void ParquetWriter::beginSet(const RtlFieldInfo *field) arrow::FieldPath match = getNestedFieldBuilder(field, childBuilder); fieldBuilderStack.push_back(std::make_shared(field, childBuilder, CPNTSet, std::move(match))); - arrow::ListBuilder *listBuilder = static_cast(childBuilder); - reportIfFailure(listBuilder->Append()); + arrow::LargeListBuilder *largeListBuilder = static_cast(childBuilder); + reportIfFailure(largeListBuilder->Append()); } /** @@ -946,7 +946,7 @@ arrow::ArrayBuilder *ParquetWriter::getFieldBuilder(const RtlFieldInfo *field) return recordBatchBuilder->GetField(schema->GetFieldIndex(fieldName.str())); } else if (fieldBuilderStack.back()->nodeType == CPNTSet) - return static_cast(fieldBuilderStack.back()->structPtr)->value_builder(); + return static_cast(fieldBuilderStack.back()->structPtr)->value_builder(); else return fieldBuilderStack.back()->structPtr->child(fieldBuilderStack.back()->childrenProcessed++); } @@ -1463,6 +1463,12 @@ void ParquetRowBuilder::processBeginSet(const RtlFieldInfo *field, bool &isAll) newPathNode.childCount = arrayVisitor->listArr->value_slice(currentRow)->length(); pathStack.push_back(newPathNode); } + else if (arrayVisitor->type == LargeListType) + { + ParquetColumnTracker newPathNode(field, arrayVisitor->largeListArr, CPNTSet); + newPathNode.childCount = arrayVisitor->largeListArr->value_slice(currentRow)->length(); + pathStack.push_back(newPathNode); + } else { failx("Error reading nested set with name %s.", field->name); @@ -1585,8 +1591,20 @@ void ParquetRowBuilder::nextFromStruct(const RtlFieldInfo *field) } else if (pathStack.back().nodeType == CPNTSet) { - auto child = arrayVisitor->listArr->value_slice(currentRow); - reportIfFailure(child->Accept(arrayVisitor.get())); + if (arrayVisitor->type == ListType) + { + auto child = arrayVisitor->listArr->value_slice(currentRow); + reportIfFailure(child->Accept(arrayVisitor.get())); + } + else if (arrayVisitor->type == LargeListType) + { + auto child = arrayVisitor->largeListArr->value_slice(currentRow); + reportIfFailure(child->Accept(arrayVisitor.get())); + } + else + { + failx("Unexpected type in CPNTSet: neither ListType nor LargeListType"); + } } } diff --git a/plugins/parquet/parquetembed.hpp b/plugins/parquet/parquetembed.hpp index 877a35d0ab9..f7545026cae 100644 --- a/plugins/parquet/parquetembed.hpp +++ b/plugins/parquet/parquetembed.hpp @@ -131,6 +131,7 @@ enum ParquetArrayType LargeBinaryType, DecimalType, ListType, + LargeListType, StructType, RealType }; @@ -314,6 +315,12 @@ class ParquetArrayVisitor : public arrow::ArrayVisitor type = ListType; return arrow::Status::OK(); } + arrow::Status Visit(const arrow::LargeListArray &array) + { + largeListArr = &array; + type = LargeListType; + return arrow::Status::OK(); + } arrow::Status Visit(const arrow::StructArray &array) { structArr = &array; @@ -348,6 +355,7 @@ class ParquetArrayVisitor : public arrow::ArrayVisitor const arrow::Decimal128Array *decArr = nullptr; const arrow::Decimal256Array *largeDecArr = nullptr; const arrow::ListArray *listArr = nullptr; + const arrow::LargeListArray *largeListArr = nullptr; const arrow::StructArray *structArr = nullptr; };