Skip to content

Commit 97e308e

Browse files
rui-mozhejiangxiaomai
authored andcommitted
Fix the output type of complex type vector (#359)
1 parent 5ff5303 commit 97e308e

File tree

6 files changed

+90
-10
lines changed

6 files changed

+90
-10
lines changed

velox/dwio/common/SelectiveRepeatedColumnReader.cpp

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -192,14 +192,19 @@ void SelectiveListColumnReader::getValues(RowSet rows, VectorPtr* result) {
192192
}
193193
*result = std::make_shared<ArrayVector>(
194194
&memoryPool_,
195-
requestedType_->type,
195+
outputType_ ? outputType_ : requestedType_->type,
196196
anyNulls_ ? resultNulls_ : nullptr,
197197
rows.size(),
198198
offsets_,
199199
sizes_,
200200
elements);
201201
}
202202

203+
void SelectiveListColumnReader::setOutputType(
204+
const std::shared_ptr<const ArrayType>& outputType) {
205+
outputType_ = outputType;
206+
}
207+
203208
SelectiveMapColumnReader::SelectiveMapColumnReader(
204209
const std::shared_ptr<const dwio::common::TypeWithId>& requestedType,
205210
const std::shared_ptr<const dwio::common::TypeWithId>& dataType,
@@ -279,7 +284,7 @@ void SelectiveMapColumnReader::getValues(RowSet rows, VectorPtr* result) {
279284
}
280285
*result = std::make_shared<MapVector>(
281286
&memoryPool_,
282-
requestedType_->type,
287+
outputType_ ? outputType_ : requestedType_->type,
283288
anyNulls_ ? resultNulls_ : nullptr,
284289
rows.size(),
285290
offsets_,
@@ -288,4 +293,9 @@ void SelectiveMapColumnReader::getValues(RowSet rows, VectorPtr* result) {
288293
values);
289294
}
290295

296+
void SelectiveMapColumnReader::setOutputType(
297+
const std::shared_ptr<const MapType>& outputType) {
298+
outputType_ = outputType;
299+
}
300+
291301
} // namespace facebook::velox::dwio::common

velox/dwio/common/SelectiveRepeatedColumnReader.h

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,8 +109,13 @@ class SelectiveListColumnReader : public SelectiveRepeatedColumnReader {
109109
void getValues(RowSet rows, VectorPtr* FOLLY_NULLABLE result) override;
110110

111111
protected:
112+
void setOutputType(const std::shared_ptr<const ArrayType>& outputType);
113+
112114
std::unique_ptr<SelectiveColumnReader> child_;
113115
const std::shared_ptr<const dwio::common::TypeWithId> requestedType_;
116+
117+
private:
118+
std::shared_ptr<const ArrayType> outputType_ = nullptr;
114119
};
115120

116121
class SelectiveMapColumnReader : public SelectiveRepeatedColumnReader {
@@ -138,6 +143,12 @@ class SelectiveMapColumnReader : public SelectiveRepeatedColumnReader {
138143
std::unique_ptr<SelectiveColumnReader> keyReader_;
139144
std::unique_ptr<SelectiveColumnReader> elementReader_;
140145
const std::shared_ptr<const dwio::common::TypeWithId> requestedType_;
146+
147+
protected:
148+
void setOutputType(const std::shared_ptr<const MapType>& outputType);
149+
150+
private:
151+
std::shared_ptr<const MapType> outputType_ = nullptr;
141152
};
142153

143154
} // namespace facebook::velox::dwio::common

velox/dwio/common/SelectiveStructColumnReader.cpp

Lines changed: 55 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -216,6 +216,7 @@ void fillRowVectorChildren(
216216
}
217217
}
218218
}
219+
219220
} // namespace
220221

221222
void SelectiveStructColumnReaderBase::getValues(
@@ -228,16 +229,14 @@ void SelectiveStructColumnReaderBase::getValues(
228229
VELOX_CHECK(
229230
result->get()->type()->isRow(),
230231
"Struct reader expects a result of type ROW.");
231-
auto& rowType = result->get()->type()->asRow();
232-
if (!result->unique() || result->get()->isLazy()) {
232+
checkOutputType(outputType_, asRowType(requestedType_->type));
233+
const auto& outDataType = outputType_ ? outputType_ : result->get()->type();
234+
auto& rowType = outDataType->asRow();
235+
if (outputType_ || !result->unique() || result->get()->isLazy()) {
233236
std::vector<VectorPtr> children(rowType.size());
234237
fillRowVectorChildren(*result->get()->pool(), rowType, children);
235238
*result = std::make_unique<RowVector>(
236-
result->get()->pool(),
237-
result->get()->type(),
238-
nullptr,
239-
0,
240-
std::move(children));
239+
result->get()->pool(), outDataType, nullptr, 0, std::move(children));
241240
}
242241
auto* resultRow = static_cast<RowVector*>(result->get());
243242
resultRow->resize(rows.size());
@@ -277,7 +276,7 @@ void SelectiveStructColumnReaderBase::getValues(
277276
}
278277
resultRow->childAt(channel) = std::make_shared<LazyVector>(
279278
&memoryPool_,
280-
resultRow->type()->childAt(channel),
279+
outDataType->childAt(channel),
281280
rows.size(),
282281
std::make_unique<ColumnLoader>(this, children_[index], numReads_));
283282
} else {
@@ -287,4 +286,52 @@ void SelectiveStructColumnReaderBase::getValues(
287286
}
288287
}
289288

289+
void SelectiveStructColumnReaderBase::setOutputType(
290+
const RowTypePtr& outputType) {
291+
outputType_ = outputType;
292+
}
293+
294+
/**
295+
* Check the output type against requested type on compatibility.
296+
* @param outputType: the output type from user.
297+
* @param requestedType: the type from Parquet.
298+
*/
299+
void SelectiveStructColumnReaderBase::checkOutputType(
300+
const RowTypePtr& outputType,
301+
const RowTypePtr& requestedType) {
302+
if (outputType == nullptr) {
303+
return;
304+
}
305+
VELOX_CHECK_NOT_NULL(requestedType);
306+
for (int i = 0; i < outputType->size(); ++i) {
307+
if (!requestedType->containsChild(outputType->nameOf(i)))
308+
continue;
309+
310+
bool isPartitionColumn = false;
311+
for (const auto& childSpec : scanSpec_->children()) {
312+
if (childSpec->fieldName() == outputType->nameOf(i) &&
313+
childSpec->isConstant()) {
314+
isPartitionColumn = true;
315+
break;
316+
}
317+
}
318+
// Skip the type check for partition column because requested type does not
319+
// contain it.
320+
if (isPartitionColumn)
321+
continue;
322+
323+
const auto& childOutputType = outputType->childAt(i);
324+
const auto& childRequestedType =
325+
requestedType->findChild(outputType->nameOf(i));
326+
if (auto rowTypePtr = asRowType(childOutputType)) {
327+
VELOX_CHECK_NOT_NULL(asRowType(childRequestedType));
328+
checkOutputType(
329+
asRowType(childOutputType), asRowType(childRequestedType));
330+
continue;
331+
}
332+
VELOX_CHECK(BaseVector::compatibleKind(
333+
childOutputType->kind(), childRequestedType->kind()));
334+
}
335+
}
336+
290337
} // namespace facebook::velox::dwio::common

velox/dwio/common/SelectiveStructColumnReader.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,8 @@ class SelectiveStructColumnReaderBase : public SelectiveColumnReader {
116116
return hasMutation_;
117117
}
118118

119+
void setOutputType(const RowTypePtr& outputType);
120+
119121
const std::shared_ptr<const dwio::common::TypeWithId> requestedType_;
120122

121123
std::vector<SelectiveColumnReader*> children_;
@@ -141,6 +143,13 @@ class SelectiveStructColumnReaderBase : public SelectiveColumnReader {
141143
// and query. Set at construction, which takes place on first
142144
// use. If no ExceptionContext is in effect, this is "".
143145
const std::string debugString_;
146+
147+
private:
148+
void checkOutputType(
149+
const RowTypePtr& outputType,
150+
const RowTypePtr& requestedType);
151+
152+
RowTypePtr outputType_ = nullptr;
144153
};
145154

146155
struct SelectiveStructColumnReader : SelectiveStructColumnReaderBase {

velox/dwio/parquet/reader/RepeatedColumnReader.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -125,6 +125,7 @@ MapColumnReader::MapColumnReader(
125125
scanSpec) {
126126
const std::shared_ptr<const MapType>& mapTypePtr =
127127
std::dynamic_pointer_cast<const MapType>(colType);
128+
setOutputType(mapTypePtr);
128129
auto& keyChildType = requestedType->childAt(0);
129130
auto& elementChildType = requestedType->childAt(1);
130131
keyReader_ = ParquetColumnReader::build(
@@ -249,6 +250,7 @@ ListColumnReader::ListColumnReader(
249250
auto& childType = requestedType->childAt(0);
250251
const std::shared_ptr<const ArrayType>& arrayTypePtr =
251252
std::dynamic_pointer_cast<const ArrayType>(colType);
253+
setOutputType(arrayTypePtr);
252254
child_ = ParquetColumnReader::build(
253255
childType,
254256
params,

velox/dwio/parquet/reader/StructColumnReader.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ StructColumnReader::StructColumnReader(
4747
rowTypePtr = asRowType(colType);
4848
VELOX_CHECK_NOT_NULL(rowTypePtr);
4949
}
50+
setOutputType(rowTypePtr);
5051

5152
auto& childSpecs = scanSpec_->children();
5253
if (rowTypePtr && !caseSensitive) {

0 commit comments

Comments
 (0)