@@ -47,12 +47,18 @@ ReaderBase::ReaderBase(
4747}
4848
4949void ReaderBase::loadFileMetaData () {
50- bool preloadFile_ = fileLength_ <= filePreloadThreshold_;
50+ preloadFile_ = fileLength_ <= filePreloadThreshold_ ||
51+ fileLength_ <= directorySizeGuess_;
5152 uint64_t readSize =
5253 preloadFile_ ? fileLength_ : std::min (fileLength_, directorySizeGuess_);
5354
54- auto stream = input_->read (
55- fileLength_ - readSize, readSize, dwio::common::LogType::FOOTER);
55+ std::unique_ptr<dwio::common::SeekableInputStream> stream = nullptr ;
56+ if (preloadFile_) {
57+ stream = input_->readFile (fileLength_, dwio::common::LogType::FOOTER);
58+ } else {
59+ stream = input_->read (
60+ fileLength_ - readSize, readSize, dwio::common::LogType::FOOTER);
61+ }
5662
5763 std::vector<char > copy (readSize);
5864 const char * bufferStart = nullptr ;
@@ -465,19 +471,30 @@ void ReaderBase::scheduleRowGroups(
465471 currentGroup + 1 < rowGroupIds.size () ? rowGroupIds[currentGroup + 1 ] : 0 ;
466472 auto input = inputs_[thisGroup].get ();
467473 if (!input) {
468- auto newInput = input_->clone ();
469- reader.enqueueRowGroup (thisGroup, *newInput);
470- newInput->load (dwio::common::LogType::STRIPE);
471- inputs_[thisGroup] = std::move (newInput);
474+ if (preloadFile_) {
475+ // Read data from buffer directly.
476+ reader.enqueueRowGroup (thisGroup, *input_);
477+ inputs_[thisGroup] = input_;
478+ } else {
479+ auto newInput = input_->clone ();
480+ reader.enqueueRowGroup (thisGroup, *newInput);
481+ newInput->load (dwio::common::LogType::STRIPE);
482+ inputs_[thisGroup] = std::move (newInput);
483+ }
472484 }
473485 for (auto counter = 0 ; counter < FLAGS_parquet_prefetch_rowgroups;
474486 ++counter) {
475487 if (nextGroup) {
476- if (inputs_.count (nextGroup) != 0 ) {
477- auto newInput = input_->clone ();
478- reader.enqueueRowGroup (nextGroup, *newInput);
479- newInput->load (dwio::common::LogType::STRIPE);
480- inputs_[nextGroup] = std::move (newInput);
488+ if (inputs_.count (nextGroup) == 0 ) {
489+ if (preloadFile_) {
490+ reader.enqueueRowGroup (nextGroup, *input_);
491+ inputs_[nextGroup] = input_;
492+ } else {
493+ auto newInput = input_->clone ();
494+ reader.enqueueRowGroup (nextGroup, *newInput);
495+ newInput->load (dwio::common::LogType::STRIPE);
496+ inputs_[nextGroup] = std::move (newInput);
497+ }
481498 }
482499 } else {
483500 break ;
0 commit comments