Skip to content

Commit 9807c97

Browse files
rui-mojinchengchenghh
authored andcommitted
Allow reading integers into smaller-range types
1 parent 7649577 commit 9807c97

File tree

1 file changed

+62
-50
lines changed

1 file changed

+62
-50
lines changed

velox/dwio/parquet/reader/ParquetReader.cpp

Lines changed: 62 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -797,7 +797,8 @@ TypePtr ReaderBase::convertType(
797797
requestedType,
798798
isRepeated,
799799
[](const TypePtr& type) {
800-
return type->kind() == TypeKind::SMALLINT ||
800+
return type->kind() == TypeKind::TINYINT ||
801+
type->kind() == TypeKind::SMALLINT ||
801802
type->kind() == TypeKind::INTEGER ||
802803
type->kind() == TypeKind::BIGINT;
803804
}),
@@ -813,18 +814,20 @@ TypePtr ReaderBase::convertType(
813814
thrift::Type::INT32,
814815
"{} converted type can only be set for value of thrift::Type::INT32",
815816
schemaElement.converted_type);
816-
VELOX_CHECK(
817-
!requestedType ||
818-
isCompatible(
819-
requestedType,
820-
isRepeated,
821-
[](const TypePtr& type) {
822-
return type->kind() == TypeKind::INTEGER ||
823-
type->kind() == TypeKind::BIGINT;
824-
}),
825-
kTypeMappingErrorFmtStr,
826-
"INTEGER",
827-
requestedType->toString());
817+
// VELOX_CHECK(
818+
// !requestedType ||
819+
// isCompatible(
820+
// requestedType,
821+
// isRepeated,
822+
// [](const TypePtr& type) {
823+
// return type->kind() == TypeKind::TINYINT ||
824+
// type->kind() == TypeKind::SMALLINT ||
825+
// type->kind() == TypeKind::INTEGER ||
826+
// type->kind() == TypeKind::BIGINT;
827+
// }),
828+
// kTypeMappingErrorFmtStr,
829+
// "INTEGER",
830+
// requestedType->toString());
828831
return INTEGER();
829832

830833
case thrift::ConvertedType::INT_64:
@@ -839,8 +842,12 @@ TypePtr ReaderBase::convertType(
839842
isCompatible(
840843
requestedType,
841844
isRepeated,
842-
[](const TypePtr& type) {
843-
return type->kind() == TypeKind::BIGINT;
845+
[&](const TypePtr& type) {
846+
return type->kind() == TypeKind::TINYINT ||
847+
type->kind() == TypeKind::SMALLINT ||
848+
type->kind() == TypeKind::INTEGER ||
849+
type->kind() == TypeKind::BIGINT ||
850+
requestedType->isDecimal();
844851
}),
845852
kTypeMappingErrorFmtStr,
846853
"BIGINT",
@@ -942,17 +949,17 @@ TypePtr ReaderBase::convertType(
942949
switch (schemaElement.type) {
943950
case thrift::Type::BYTE_ARRAY:
944951
case thrift::Type::FIXED_LEN_BYTE_ARRAY:
945-
VELOX_CHECK(
946-
!requestedType ||
947-
isCompatible(
948-
requestedType,
949-
isRepeated,
950-
[](const TypePtr& type) {
951-
return type->kind() == TypeKind::VARCHAR;
952-
}),
953-
kTypeMappingErrorFmtStr,
954-
"VARCHAR",
955-
requestedType->toString());
952+
// VELOX_CHECK(
953+
// !requestedType ||
954+
// isCompatible(
955+
// requestedType,
956+
// isRepeated,
957+
// [](const TypePtr& type) {
958+
// return type->kind() == TypeKind::VARCHAR;
959+
// }),
960+
// kTypeMappingErrorFmtStr,
961+
// "VARCHAR",
962+
// requestedType->toString());
956963
return VARCHAR();
957964
default:
958965
VELOX_FAIL(
@@ -963,17 +970,17 @@ TypePtr ReaderBase::convertType(
963970
schemaElement.type,
964971
thrift::Type::BYTE_ARRAY,
965972
"ENUM converted type can only be set for value of thrift::Type::BYTE_ARRAY");
966-
VELOX_CHECK(
967-
!requestedType ||
968-
isCompatible(
969-
requestedType,
970-
isRepeated,
971-
[](const TypePtr& type) {
972-
return type->kind() == TypeKind::VARCHAR;
973-
}),
974-
kTypeMappingErrorFmtStr,
975-
"VARCHAR",
976-
requestedType->toString());
973+
// VELOX_CHECK(
974+
// !requestedType ||
975+
// isCompatible(
976+
// requestedType,
977+
// isRepeated,
978+
// [](const TypePtr& type) {
979+
// return type->kind() == TypeKind::VARCHAR;
980+
// }),
981+
// kTypeMappingErrorFmtStr,
982+
// "VARCHAR",
983+
// requestedType->toString());
977984
return VARCHAR();
978985
}
979986
case thrift::ConvertedType::MAP:
@@ -1005,18 +1012,20 @@ TypePtr ReaderBase::convertType(
10051012
requestedType->toString());
10061013
return BOOLEAN();
10071014
case thrift::Type::type::INT32:
1008-
VELOX_CHECK(
1009-
!requestedType ||
1010-
isCompatible(
1011-
requestedType,
1012-
isRepeated,
1013-
[](const TypePtr& type) {
1014-
return type->kind() == TypeKind::INTEGER ||
1015-
type->kind() == TypeKind::BIGINT;
1016-
}),
1017-
kTypeMappingErrorFmtStr,
1018-
"INTEGER",
1019-
requestedType->toString());
1015+
// VELOX_CHECK(
1016+
// !requestedType ||
1017+
// isCompatible(
1018+
// requestedType,
1019+
// isRepeated,
1020+
// [](const TypePtr& type) {
1021+
// return type->kind() == TypeKind::TINYINT ||
1022+
// type->kind() == TypeKind::SMALLINT ||
1023+
// type->kind() == TypeKind::INTEGER ||
1024+
// type->kind() == TypeKind::BIGINT;
1025+
// }),
1026+
// kTypeMappingErrorFmtStr,
1027+
// "INTEGER",
1028+
// requestedType->toString());
10201029
return INTEGER();
10211030
case thrift::Type::type::INT64:
10221031
// For Int64 Timestamp in nano precision
@@ -1041,7 +1050,10 @@ TypePtr ReaderBase::convertType(
10411050
requestedType,
10421051
isRepeated,
10431052
[](const TypePtr& type) {
1044-
return type->kind() == TypeKind::BIGINT;
1053+
return type->kind() == TypeKind::TINYINT ||
1054+
type->kind() == TypeKind::SMALLINT ||
1055+
type->kind() == TypeKind::INTEGER ||
1056+
type->kind() == TypeKind::BIGINT;
10451057
}),
10461058
kTypeMappingErrorFmtStr,
10471059
"BIGINT",

0 commit comments

Comments
 (0)