@@ -81,6 +81,16 @@ class ReaderBase {
8181 // / the data still exists in the buffered inputs.
8282 bool isRowGroupBuffered (int32_t rowGroupIndex) const ;
8383
84+ // / @brief Convert the names of row type to lower case when
85+ // / fileColumnNamesReadAsLowerCase is true.
86+ // / @param rowTypePtr the input row type.
87+ // / @param fileColumnNamesReadAsLowerCase whether to convert names into lower
88+ // / case.
89+ // / @return row type with names converted.
90+ static std::shared_ptr<const RowType> convertRowTypeNames (
91+ const RowTypePtr& rowTypePtr,
92+ bool fileColumnNamesReadAsLowerCase);
93+
8494 private:
8595 // Reads and parses file footer.
8696 void loadFileMetaData ();
@@ -543,22 +553,39 @@ TypePtr ReaderBase::convertType(
543553 }
544554}
545555
556+ std::shared_ptr<const RowType> ReaderBase::convertRowTypeNames (
557+ const RowTypePtr& rowTypePtr,
558+ bool fileColumnNamesReadAsLowerCase) {
559+ if (!fileColumnNamesReadAsLowerCase) {
560+ return rowTypePtr;
561+ }
562+ std::vector<std::string> names;
563+ names.reserve (rowTypePtr->names ().size ());
564+ std::vector<TypePtr> types = rowTypePtr->children ();
565+ for (const auto & name : rowTypePtr->names ()) {
566+ std::string childName = name;
567+ folly::toLowerAscii (childName);
568+ names.emplace_back (childName);
569+ }
570+ return TypeFactory<TypeKind::ROW>::create (std::move (names), std::move (types));
571+ }
572+
546573std::shared_ptr<const RowType> ReaderBase::createRowType (
547574 std::vector<std::shared_ptr<const ParquetTypeWithId::TypeWithId>> children,
548575 bool fileColumnNamesReadAsLowerCase) {
549576 std::vector<std::string> childNames;
577+ childNames.reserve (children.size ());
550578 std::vector<TypePtr> childTypes;
551- for (auto & child : children) {
552- auto childName =
553- std::static_pointer_cast<const ParquetTypeWithId>(child)->name_ ;
554- if (fileColumnNamesReadAsLowerCase) {
555- folly::toLowerAscii (childName);
556- }
557- childNames.push_back (std::move (childName));
558- childTypes.push_back (child->type ());
579+ childTypes.reserve (children.size ());
580+ for (const auto & child : children) {
581+ childNames.emplace_back (
582+ std::static_pointer_cast<const ParquetTypeWithId>(child)->name_ );
583+ childTypes.emplace_back (child->type ());
559584 }
560- return TypeFactory<TypeKind::ROW>::create (
561- std::move (childNames), std::move (childTypes));
585+ return convertRowTypeNames (
586+ TypeFactory<TypeKind::ROW>::create (
587+ std::move (childNames), std::move (childTypes)),
588+ fileColumnNamesReadAsLowerCase);
562589}
563590
564591void ReaderBase::scheduleRowGroups (
@@ -639,7 +666,11 @@ ParquetRowReader::ParquetRowReader(
639666 columnReader_ = ParquetColumnReader::build (
640667 readerBase_->schemaWithId (), // Id is schema id
641668 params,
642- *options_.getScanSpec ());
669+ *options_.getScanSpec (),
670+ ReaderBase::convertRowTypeNames (
671+ asRowType (options_.getSelector ()->getSchemaWithId ()->type ()),
672+ readerBase_->isFileColumnNamesReadAsLowerCase ()),
673+ pool_);
643674
644675 filterRowGroups ();
645676 if (!rowGroupIds_.empty ()) {
0 commit comments