@@ -524,6 +524,51 @@ void PageReader::prepareDictionary(const PageHeader& pageHeader) {
524524 }
525525 break ;
526526 }
527+ case thrift::Type::INT96: {
528+ auto numVeloxBytes = dictionary_.numValues * sizeof (Timestamp);
529+ dictionary_.values = AlignedBuffer::allocate<char >(numVeloxBytes, &pool_);
530+ auto numBytes = dictionary_.numValues * sizeof (Int96Timestamp);
531+ if (pageData_) {
532+ memcpy (dictionary_.values ->asMutable <char >(), pageData_, numBytes);
533+ } else {
534+ dwio::common::readBytes (
535+ numBytes,
536+ inputStream_.get (),
537+ dictionary_.values ->asMutable <char >(),
538+ bufferStart_,
539+ bufferEnd_);
540+ }
541+ // Expand the Parquet type length values to Velox type length.
542+ // We start from the end to allow in-place expansion.
543+ auto values = dictionary_.values ->asMutable <Timestamp>();
544+ auto parquetValues = dictionary_.values ->asMutable <char >();
545+ static constexpr int64_t kJulianToUnixEpochDays = 2440588LL ;
546+ static constexpr int64_t kSecondsPerDay = 86400LL ;
547+ static constexpr int64_t kNanosPerSecond =
548+ Timestamp::kNanosecondsInMillisecond *
549+ Timestamp::kMillisecondsInSecond ;
550+ for (auto i = dictionary_.numValues - 1 ; i >= 0 ; --i) {
551+ // Convert the timestamp into seconds and nanos since the Unix epoch,
552+ // 00:00:00.000000 on 1 January 1970.
553+ uint64_t nanos;
554+ memcpy (
555+ &nanos,
556+ parquetValues + i * sizeof (Int96Timestamp),
557+ sizeof (uint64_t ));
558+ int32_t days;
559+ memcpy (
560+ &days,
561+ parquetValues + i * sizeof (Int96Timestamp) + sizeof (uint64_t ),
562+ sizeof (int32_t ));
563+ int64_t seconds = (days - kJulianToUnixEpochDays ) * kSecondsPerDay ;
564+ if (nanos > Timestamp::kMaxNanos ) {
565+ seconds += nanos / kNanosPerSecond ;
566+ nanos -= (nanos / kNanosPerSecond ) * kNanosPerSecond ;
567+ }
568+ values[i] = Timestamp (seconds, nanos);
569+ }
570+ break ;
571+ }
527572 case thrift::Type::BYTE_ARRAY: {
528573 dictionary_.values =
529574 AlignedBuffer::allocate<StringView>(dictionary_.numValues , &pool_);
@@ -614,7 +659,6 @@ void PageReader::prepareDictionary(const PageHeader& pageHeader) {
614659 VELOX_UNSUPPORTED (
615660 " Parquet type {} not supported for dictionary" , parquetType);
616661 }
617- case thrift::Type::INT96:
618662 default :
619663 VELOX_UNSUPPORTED (
620664 " Parquet type {} not supported for dictionary" , parquetType);
@@ -641,6 +685,8 @@ int32_t parquetTypeBytes(thrift::Type::type type) {
641685 case thrift::Type::INT64:
642686 case thrift::Type::DOUBLE:
643687 return 8 ;
688+ case thrift::Type::INT96:
689+ return 12 ;
644690 default :
645691 VELOX_FAIL (" Type does not have a byte width {}" , type);
646692 }
0 commit comments