diff --git a/plugins/parquet/parquetembed.cpp b/plugins/parquet/parquetembed.cpp index 88cb29bd3f9..473f56e145d 100644 --- a/plugins/parquet/parquetembed.cpp +++ b/plugins/parquet/parquetembed.cpp @@ -770,10 +770,17 @@ arrow::Status ParquetWriter::fieldToNode(const RtlFieldInfo *field, std::vector< case type_unicode: case type_varunicode: case type_decimal: - arrowFields.push_back(std::make_shared(name.str(), arrow::utf8())); //TODO add decimal encoding + arrowFields.push_back(std::make_shared(name.str(), arrow::utf8())); // TODO: add decimal encoding break; case type_data: - arrowFields.push_back(std::make_shared(name.str(), arrow::large_binary())); + if (field->type->length > 0) + { + arrowFields.push_back(std::make_shared(name.str(), arrow::fixed_size_binary(field->type->length))); + } + else + { + arrowFields.push_back(std::make_shared(name.str(), arrow::large_binary())); + } break; case type_record: arrowFields.push_back(std::make_shared(name.str(), makeChildRecord(field))); @@ -992,20 +999,26 @@ void ParquetWriter::addFieldToBuilder(const RtlFieldInfo *field, unsigned len, c arrow::ArrayBuilder *fieldBuilder = getFieldBuilder(field); switch(fieldBuilder->type()->id()) { - case arrow::Type::type::STRING: + case arrow::Type::STRING: { arrow::StringBuilder *stringBuilder = static_cast(fieldBuilder); reportIfFailure(stringBuilder->Append(data, len)); break; } - case arrow::Type::type::LARGE_BINARY: + case arrow::Type::LARGE_BINARY: { arrow::LargeBinaryBuilder *largeBinaryBuilder = static_cast(fieldBuilder); reportIfFailure(largeBinaryBuilder->Append(data, len)); break; } + case arrow::Type::FIXED_SIZE_BINARY: + { + arrow::FixedSizeBinaryBuilder *fixedSizeBinaryBuilder = static_cast(fieldBuilder); + reportIfFailure(fixedSizeBinaryBuilder->Append(data)); + break; + } default: - failx("Incorrect type for String/Large_Binary addFieldToBuilder: %s", fieldBuilder->type()->ToString().c_str()); + failx("Incorrect type for String/Large_Binary/Fixed_Size_Binary addFieldToBuilder: %s", fieldBuilder->type()->ToString().c_str()); } } @@ -1178,6 +1191,8 @@ std::string_view ParquetRowBuilder::getCurrView(const RtlFieldInfo *field) return arrayVisitor->largeStringArr->GetView(currArrayIndex()); case DecimalType: return arrayVisitor->size == 128 ? arrayVisitor->decArr->GetView(currArrayIndex()) : arrayVisitor->largeDecArr->GetView(currArrayIndex()); + case FixedSizeBinaryType: + return arrayVisitor->fixedSizeBinaryArr->GetView(currArrayIndex()); default: failx("Unimplemented Parquet type for field with name %s.", field->name); } diff --git a/plugins/parquet/parquetembed.hpp b/plugins/parquet/parquetembed.hpp index f7545026cae..e95e19967d1 100644 --- a/plugins/parquet/parquetembed.hpp +++ b/plugins/parquet/parquetembed.hpp @@ -133,7 +133,8 @@ enum ParquetArrayType ListType, LargeListType, StructType, - RealType + RealType, + FixedSizeBinaryType }; /** @@ -271,6 +272,13 @@ class ParquetArrayVisitor : public arrow::ArrayVisitor size = 8; return arrow::Status::OK(); } + arrow::Status Visit(const arrow::FixedSizeBinaryArray &array) + { + fixedSizeBinaryArr = &array; + type = FixedSizeBinaryType; + size = array.byte_width(); + return arrow::Status::OK(); + } arrow::Status Visit(const arrow::StringArray &array) { stringArr = &array; @@ -348,6 +356,7 @@ class ParquetArrayVisitor : public arrow::ArrayVisitor const arrow::HalfFloatArray *halfFloatArr = nullptr; const arrow::FloatArray *floatArr = nullptr; const arrow::DoubleArray *doubleArr = nullptr; + const arrow::FixedSizeBinaryArray *fixedSizeBinaryArr = nullptr; const arrow::StringArray *stringArr = nullptr; const arrow::LargeStringArray *largeStringArr = nullptr; const arrow::BinaryArray *binArr = nullptr;