From e9718a02ed0ad5ccd29826090563c490900b7024 Mon Sep 17 00:00:00 2001 From: ZongtianHou Date: Mon, 12 Oct 2020 15:14:13 +0800 Subject: [PATCH] HAWQ-1767. enhance orc writer and reader clean magma type --- GNUmakefile.in | 6 ++++++ contrib/exthdfs/exthdfs.c | 2 -- .../dbcommon/common/vector/variable-length-vector.h | 1 + depends/dbcommon/src/dbcommon/nodes/datum.h | 6 ++++-- depends/dbcommon/src/dbcommon/type/magma-tid.h | 2 +- .../src/storage/format/orc/orc-format-reader.cc | 13 +++++++++++-- .../src/storage/format/orc/orc-format-writer.cc | 2 +- 7 files changed, 24 insertions(+), 8 deletions(-) diff --git a/GNUmakefile.in b/GNUmakefile.in index bef6c0dac4..b69df1b00f 100644 --- a/GNUmakefile.in +++ b/GNUmakefile.in @@ -11,11 +11,17 @@ include $(top_builddir)/src/Makefile.global all: # $(MAKE) -C doc $@ $(MAKE) -C depends/thirdparty/googletest $@ + $(MAKE) -C depends/thirdparty/googletest install $(MAKE) -C depends/libhdfs3 $@ + $(MAKE) -C depends/libhdfs3 install $(MAKE) -C depends/libyarn $@ + $(MAKE) -C depends/libyarn install $(MAKE) -C depends/dbcommon $@ + $(MAKE) -C depends/dbcommon install $(MAKE) -C depends/univplan $@ + $(MAKE) -C depends/univplan install $(MAKE) -C depends/storage $@ + $(MAKE) -C depends/storage install $(MAKE) -C src $@ $(MAKE) -C config $@ $(MAKE) -C contrib $@ diff --git a/contrib/exthdfs/exthdfs.c b/contrib/exthdfs/exthdfs.c index 28808a7129..09f60f20e3 100644 --- a/contrib/exthdfs/exthdfs.c +++ b/contrib/exthdfs/exthdfs.c @@ -170,8 +170,6 @@ Datum hdfsprotocol_blocklocation(PG_FUNCTION_ARGS) FscHdfsGetFileBlockLocationFromArray(bla, bidx); BlockLocation *bl = &(blf->locations[bidx]); bl->numOfNodes = FscHdfsGetFileBlockLocationNNodes(blo); - bl->rangeId = -1; - bl->replicaGroupId = -1; bl->hosts = (char **) palloc0(sizeof(char *) * bl->numOfNodes); bl->names = (char **) palloc0(sizeof(char *) * bl->numOfNodes); bl->topologyPaths = (char **) palloc0( diff --git a/depends/dbcommon/src/dbcommon/common/vector/variable-length-vector.h b/depends/dbcommon/src/dbcommon/common/vector/variable-length-vector.h index e23b39e568..91638793f1 100644 --- a/depends/dbcommon/src/dbcommon/common/vector/variable-length-vector.h +++ b/depends/dbcommon/src/dbcommon/common/vector/variable-length-vector.h @@ -157,6 +157,7 @@ class VariableSizeTypeVector : public Vector { ? getValPtrPlain(end) : getValPtrPlain(end - 1) + getLengths()[end - 1]; uint64_t valSz = static_cast(pEnd - pStart); + if (values.data()) { ret->setValue(pStart, valSz); } diff --git a/depends/dbcommon/src/dbcommon/nodes/datum.h b/depends/dbcommon/src/dbcommon/nodes/datum.h index fc78a24e8d..4b8bf7f773 100644 --- a/depends/dbcommon/src/dbcommon/nodes/datum.h +++ b/depends/dbcommon/src/dbcommon/nodes/datum.h @@ -186,7 +186,9 @@ struct MagmaTid { return cmp(x, y) == 0; } - friend bool operator==(const MagmaTid &x, const int &y) { return false; } + friend bool operator==(const MagmaTid &x, const int &y) { + return false; + } friend bool operator!=(const MagmaTid &x, const MagmaTid &y) { return cmp(x, y) != 0; @@ -213,7 +215,7 @@ struct MagmaTid { return x.rangeId < y.rangeId ? -1 : 1; } } -}; +}; // namespace dbcommon /** * A structure to represent a scalar value diff --git a/depends/dbcommon/src/dbcommon/type/magma-tid.h b/depends/dbcommon/src/dbcommon/type/magma-tid.h index 25a51ab53c..da602c0489 100644 --- a/depends/dbcommon/src/dbcommon/type/magma-tid.h +++ b/depends/dbcommon/src/dbcommon/type/magma-tid.h @@ -63,7 +63,7 @@ class MagmaTidType : public FixedSizeTypeBase { return std::to_string(val.rowId); } - uint64_t getTypeWidth() const override { return kWidth; } + uint64_t getTypeWidth() const override { return sizeof(MagmaTid); } std::string DatumToString(const Datum &d) const override { LOG_ERROR(ERRCODE_FEATURE_NOT_SUPPORTED, "MagmaTid type not supported yet"); diff --git a/depends/storage/src/storage/format/orc/orc-format-reader.cc b/depends/storage/src/storage/format/orc/orc-format-reader.cc index 12a60c9617..7586212707 100644 --- a/depends/storage/src/storage/format/orc/orc-format-reader.cc +++ b/depends/storage/src/storage/format/orc/orc-format-reader.cc @@ -276,14 +276,23 @@ dbcommon::TupleBatch::uptr ORCFormatReader::createTupleBatch( bool isDecimal = structBatch->fields[0]->getType() == orc::ORCTypeKind::DECIMAL; auto vecSum = dbcommon::Vector::BuildVector( - isDecimal ? dbcommon::TypeKind::DECIMALID + isDecimal ? dbcommon::TypeKind::DECIMALNEWID : dbcommon::TypeKind::DOUBLEID, false); auto vecCount = dbcommon::Vector::BuildVector(dbcommon::TypeKind::BIGINTID, false); { auto b0 = structBatch->fields[0]; - vecSum->setValue(b0->getData(), b0->numElements * b0->getWidth()); + if (isDecimal) { + vecSum->setValue(b0->getData(), + b0->numElements * b0->getWidth() / 3); + vecSum->setAuxiliaryValue(b0->getAuxiliaryData(), + b0->numElements * b0->getWidth() / 3); + vecSum->setScaleValue(b0->getScaleData(), + b0->numElements * b0->getWidth() / 3); + } else { + vecSum->setValue(b0->getData(), b0->numElements * b0->getWidth()); + } vecSum->setHasNull(b0->hasNulls); if (b0->hasNulls) vecSum->setNotNulls(b0->getNotNull(), b0->numElements); diff --git a/depends/storage/src/storage/format/orc/orc-format-writer.cc b/depends/storage/src/storage/format/orc/orc-format-writer.cc index e59d636122..a9f77e952b 100644 --- a/depends/storage/src/storage/format/orc/orc-format-writer.cc +++ b/depends/storage/src/storage/format/orc/orc-format-writer.cc @@ -205,7 +205,7 @@ std::unique_ptr ORCFormatWriter::buildSchema( break; case dbcommon::TypeKind::AVG_DECIMAL_TRANS_DATA_ID: child.reset(new orc::TypeImpl(orc::ORCTypeKind::STRUCT)); - grandchild.reset(new orc::TypeImpl(orc::ORCTypeKind::DECIMAL)); + grandchild.reset(new orc::TypeImpl(orc::ORCTypeKind::DECIMAL, 38, 2)); child->addStructField(name, std::move(grandchild)); grandchild.reset(new orc::TypeImpl(orc::ORCTypeKind::LONG)); child->addStructField(name, std::move(grandchild));