Skip to content

Commit 9f0ef60

Browse files
rui-moglutenperfbot
authored andcommitted
Allow reading integers into smaller-range types
1 parent a4f1a74 commit 9f0ef60

File tree

1 file changed

+62
-50
lines changed

1 file changed

+62
-50
lines changed

velox/dwio/parquet/reader/ParquetReader.cpp

Lines changed: 62 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -795,7 +795,8 @@ TypePtr ReaderBase::convertType(
795795
requestedType,
796796
isRepeated,
797797
[](const TypePtr& type) {
798-
return type->kind() == TypeKind::SMALLINT ||
798+
return type->kind() == TypeKind::TINYINT ||
799+
type->kind() == TypeKind::SMALLINT ||
799800
type->kind() == TypeKind::INTEGER ||
800801
type->kind() == TypeKind::BIGINT;
801802
}),
@@ -811,18 +812,20 @@ TypePtr ReaderBase::convertType(
811812
thrift::Type::INT32,
812813
"{} converted type can only be set for value of thrift::Type::INT32",
813814
schemaElement.converted_type);
814-
VELOX_CHECK(
815-
!requestedType ||
816-
isCompatible(
817-
requestedType,
818-
isRepeated,
819-
[](const TypePtr& type) {
820-
return type->kind() == TypeKind::INTEGER ||
821-
type->kind() == TypeKind::BIGINT;
822-
}),
823-
kTypeMappingErrorFmtStr,
824-
"INTEGER",
825-
requestedType->toString());
815+
// VELOX_CHECK(
816+
// !requestedType ||
817+
// isCompatible(
818+
// requestedType,
819+
// isRepeated,
820+
// [](const TypePtr& type) {
821+
// return type->kind() == TypeKind::TINYINT ||
822+
// type->kind() == TypeKind::SMALLINT ||
823+
// type->kind() == TypeKind::INTEGER ||
824+
// type->kind() == TypeKind::BIGINT;
825+
// }),
826+
// kTypeMappingErrorFmtStr,
827+
// "INTEGER",
828+
// requestedType->toString());
826829
return INTEGER();
827830

828831
case thrift::ConvertedType::INT_64:
@@ -837,8 +840,12 @@ TypePtr ReaderBase::convertType(
837840
isCompatible(
838841
requestedType,
839842
isRepeated,
840-
[](const TypePtr& type) {
841-
return type->kind() == TypeKind::BIGINT;
843+
[&](const TypePtr& type) {
844+
return type->kind() == TypeKind::TINYINT ||
845+
type->kind() == TypeKind::SMALLINT ||
846+
type->kind() == TypeKind::INTEGER ||
847+
type->kind() == TypeKind::BIGINT ||
848+
requestedType->isDecimal();
842849
}),
843850
kTypeMappingErrorFmtStr,
844851
"BIGINT",
@@ -940,17 +947,17 @@ TypePtr ReaderBase::convertType(
940947
switch (schemaElement.type) {
941948
case thrift::Type::BYTE_ARRAY:
942949
case thrift::Type::FIXED_LEN_BYTE_ARRAY:
943-
VELOX_CHECK(
944-
!requestedType ||
945-
isCompatible(
946-
requestedType,
947-
isRepeated,
948-
[](const TypePtr& type) {
949-
return type->kind() == TypeKind::VARCHAR;
950-
}),
951-
kTypeMappingErrorFmtStr,
952-
"VARCHAR",
953-
requestedType->toString());
950+
// VELOX_CHECK(
951+
// !requestedType ||
952+
// isCompatible(
953+
// requestedType,
954+
// isRepeated,
955+
// [](const TypePtr& type) {
956+
// return type->kind() == TypeKind::VARCHAR;
957+
// }),
958+
// kTypeMappingErrorFmtStr,
959+
// "VARCHAR",
960+
// requestedType->toString());
954961
return VARCHAR();
955962
default:
956963
VELOX_FAIL(
@@ -961,17 +968,17 @@ TypePtr ReaderBase::convertType(
961968
schemaElement.type,
962969
thrift::Type::BYTE_ARRAY,
963970
"ENUM converted type can only be set for value of thrift::Type::BYTE_ARRAY");
964-
VELOX_CHECK(
965-
!requestedType ||
966-
isCompatible(
967-
requestedType,
968-
isRepeated,
969-
[](const TypePtr& type) {
970-
return type->kind() == TypeKind::VARCHAR;
971-
}),
972-
kTypeMappingErrorFmtStr,
973-
"VARCHAR",
974-
requestedType->toString());
971+
// VELOX_CHECK(
972+
// !requestedType ||
973+
// isCompatible(
974+
// requestedType,
975+
// isRepeated,
976+
// [](const TypePtr& type) {
977+
// return type->kind() == TypeKind::VARCHAR;
978+
// }),
979+
// kTypeMappingErrorFmtStr,
980+
// "VARCHAR",
981+
// requestedType->toString());
975982
return VARCHAR();
976983
}
977984
case thrift::ConvertedType::MAP:
@@ -1003,18 +1010,20 @@ TypePtr ReaderBase::convertType(
10031010
requestedType->toString());
10041011
return BOOLEAN();
10051012
case thrift::Type::type::INT32:
1006-
VELOX_CHECK(
1007-
!requestedType ||
1008-
isCompatible(
1009-
requestedType,
1010-
isRepeated,
1011-
[](const TypePtr& type) {
1012-
return type->kind() == TypeKind::INTEGER ||
1013-
type->kind() == TypeKind::BIGINT;
1014-
}),
1015-
kTypeMappingErrorFmtStr,
1016-
"INTEGER",
1017-
requestedType->toString());
1013+
// VELOX_CHECK(
1014+
// !requestedType ||
1015+
// isCompatible(
1016+
// requestedType,
1017+
// isRepeated,
1018+
// [](const TypePtr& type) {
1019+
// return type->kind() == TypeKind::TINYINT ||
1020+
// type->kind() == TypeKind::SMALLINT ||
1021+
// type->kind() == TypeKind::INTEGER ||
1022+
// type->kind() == TypeKind::BIGINT;
1023+
// }),
1024+
// kTypeMappingErrorFmtStr,
1025+
// "INTEGER",
1026+
// requestedType->toString());
10181027
return INTEGER();
10191028
case thrift::Type::type::INT64:
10201029
// For Int64 Timestamp in nano precision
@@ -1039,7 +1048,10 @@ TypePtr ReaderBase::convertType(
10391048
requestedType,
10401049
isRepeated,
10411050
[](const TypePtr& type) {
1042-
return type->kind() == TypeKind::BIGINT;
1051+
return type->kind() == TypeKind::TINYINT ||
1052+
type->kind() == TypeKind::SMALLINT ||
1053+
type->kind() == TypeKind::INTEGER ||
1054+
type->kind() == TypeKind::BIGINT;
10431055
}),
10441056
kTypeMappingErrorFmtStr,
10451057
"BIGINT",

0 commit comments

Comments
 (0)