Skip to content

Commit

Permalink
Merge pull request #18888 from ilhan2316/HPCC-32228
Browse files Browse the repository at this point in the history
HPCC-32228 Add fixed_size_binary data type to Parquet Plugin 

Reviewed-By: Jack Del Vecchio
Reviewed-By: Dan S. Camper <[email protected]>
Merged-by: Gavin Halliday <[email protected]>
  • Loading branch information
ghalliday authored Aug 5, 2024
2 parents 8c44a0e + be0072a commit 727c51a
Show file tree
Hide file tree
Showing 2 changed files with 30 additions and 6 deletions.
25 changes: 20 additions & 5 deletions plugins/parquet/parquetembed.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -770,10 +770,17 @@ arrow::Status ParquetWriter::fieldToNode(const RtlFieldInfo *field, std::vector<
case type_unicode:
case type_varunicode:
case type_decimal:
arrowFields.push_back(std::make_shared<arrow::Field>(name.str(), arrow::utf8())); //TODO add decimal encoding
arrowFields.push_back(std::make_shared<arrow::Field>(name.str(), arrow::utf8())); // TODO: add decimal encoding
break;
case type_data:
arrowFields.push_back(std::make_shared<arrow::Field>(name.str(), arrow::large_binary()));
if (field->type->length > 0)
{
arrowFields.push_back(std::make_shared<arrow::Field>(name.str(), arrow::fixed_size_binary(field->type->length)));
}
else
{
arrowFields.push_back(std::make_shared<arrow::Field>(name.str(), arrow::large_binary()));
}
break;
case type_record:
arrowFields.push_back(std::make_shared<arrow::Field>(name.str(), makeChildRecord(field)));
Expand Down Expand Up @@ -992,20 +999,26 @@ void ParquetWriter::addFieldToBuilder(const RtlFieldInfo *field, unsigned len, c
arrow::ArrayBuilder *fieldBuilder = getFieldBuilder(field);
switch(fieldBuilder->type()->id())
{
case arrow::Type::type::STRING:
case arrow::Type::STRING:
{
arrow::StringBuilder *stringBuilder = static_cast<arrow::StringBuilder *>(fieldBuilder);
reportIfFailure(stringBuilder->Append(data, len));
break;
}
case arrow::Type::type::LARGE_BINARY:
case arrow::Type::LARGE_BINARY:
{
arrow::LargeBinaryBuilder *largeBinaryBuilder = static_cast<arrow::LargeBinaryBuilder *>(fieldBuilder);
reportIfFailure(largeBinaryBuilder->Append(data, len));
break;
}
case arrow::Type::FIXED_SIZE_BINARY:
{
arrow::FixedSizeBinaryBuilder *fixedSizeBinaryBuilder = static_cast<arrow::FixedSizeBinaryBuilder *>(fieldBuilder);
reportIfFailure(fixedSizeBinaryBuilder->Append(data));
break;
}
default:
failx("Incorrect type for String/Large_Binary addFieldToBuilder: %s", fieldBuilder->type()->ToString().c_str());
failx("Incorrect type for String/Large_Binary/Fixed_Size_Binary addFieldToBuilder: %s", fieldBuilder->type()->ToString().c_str());
}
}

Expand Down Expand Up @@ -1178,6 +1191,8 @@ std::string_view ParquetRowBuilder::getCurrView(const RtlFieldInfo *field)
return arrayVisitor->largeStringArr->GetView(currArrayIndex());
case DecimalType:
return arrayVisitor->size == 128 ? arrayVisitor->decArr->GetView(currArrayIndex()) : arrayVisitor->largeDecArr->GetView(currArrayIndex());
case FixedSizeBinaryType:
return arrayVisitor->fixedSizeBinaryArr->GetView(currArrayIndex());
default:
failx("Unimplemented Parquet type for field with name %s.", field->name);
}
Expand Down
11 changes: 10 additions & 1 deletion plugins/parquet/parquetembed.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,8 @@ enum ParquetArrayType
ListType,
LargeListType,
StructType,
RealType
RealType,
FixedSizeBinaryType
};

/**
Expand Down Expand Up @@ -271,6 +272,13 @@ class ParquetArrayVisitor : public arrow::ArrayVisitor
size = 8;
return arrow::Status::OK();
}
arrow::Status Visit(const arrow::FixedSizeBinaryArray &array)
{
fixedSizeBinaryArr = &array;
type = FixedSizeBinaryType;
size = array.byte_width();
return arrow::Status::OK();
}
arrow::Status Visit(const arrow::StringArray &array)
{
stringArr = &array;
Expand Down Expand Up @@ -348,6 +356,7 @@ class ParquetArrayVisitor : public arrow::ArrayVisitor
const arrow::HalfFloatArray *halfFloatArr = nullptr;
const arrow::FloatArray *floatArr = nullptr;
const arrow::DoubleArray *doubleArr = nullptr;
const arrow::FixedSizeBinaryArray *fixedSizeBinaryArr = nullptr;
const arrow::StringArray *stringArr = nullptr;
const arrow::LargeStringArray *largeStringArr = nullptr;
const arrow::BinaryArray *binArr = nullptr;
Expand Down

0 comments on commit 727c51a

Please sign in to comment.