Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
317 changes: 176 additions & 141 deletions SerialPrograms/Source/CommonFramework/Tools/FileUnzip.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,178 +26,213 @@ using std::endl;

namespace PokemonAutomation{

namespace fs = std::filesystem;

struct ProgressData {
std::ofstream* out_file;
uint64_t total_bytes;
uint64_t processed_bytes;
int last_percentage;
};

// Callback triggered for every chunk of decompressed data
// pOpaque is an opaque pointer that actually represents ProgressData
size_t write_callback(void* pOpaque, mz_uint64 file_ofs, const void* pBuf, size_t n){
ProgressData* data = static_cast<ProgressData*>(pOpaque);

// 1. Check if we actually need to seek
// tellp() returns the current 'put' position. get the current position of the write pointer in an output stream.
if (static_cast<mz_uint64>(data->out_file->tellp()) != file_ofs){
data->out_file->seekp(file_ofs);
}

// Write chunk to disk
data->out_file->write(static_cast<const char*>(pBuf), n);

// Update and display progress
data->processed_bytes += n;
double percent = (double)data->processed_bytes / data->total_bytes * 100.0;
int current_percent = static_cast<int>(percent);

// Only print if the integer value has changed
if (current_percent > data->last_percentage){
data->last_percentage = current_percent;
std::cout << "\rProgress: " << current_percent << "% ("
<< data->processed_bytes << "/" << data->total_bytes << " bytes)" << endl;
}

return n;
namespace fs = std::filesystem;

struct ProgressData {
std::ofstream* out_file;
uint64_t total_bytes;
uint64_t processed_bytes;
int last_percentage;
std::function<void(uint64_t bytes_done, uint64_t total_bytes)> progress_callback;
CancellableScope& scope;
};

// Callback triggered for every chunk of decompressed data
// pOpaque is an opaque pointer that actually represents ProgressData
size_t write_callback(void* pOpaque, [[maybe_unused]] mz_uint64 file_ofs, const void* pBuf, size_t n){
ProgressData* data = static_cast<ProgressData*>(pOpaque);

if (data->scope.cancelled()){
return 0; // this causes mz_zip_reader_extract_to_callback to return an error
}

// ensure that entry_name is inside target_dir, to prevent path traversal attacks.
bool is_safe(const std::string& target_dir, const std::string& entry_name){
try{
// 1. Get absolute, normalized paths
// handles symlinks. and resolves .. and . components. throws error if path doesn't exist
Filesystem::Path base = fs::canonical(Filesystem::Path(target_dir));
// confirms that base is a directory, and not a file
if (!fs::is_directory(base)) return false;
// Check if we actually need to seek
// tellp() returns the current 'put' position. get the current position of the write pointer in an output stream.
// if (static_cast<mz_uint64>(data->out_file->tellp()) != file_ofs){
// data->out_file->seekp(file_ofs);
// }

// Write chunk to disk
data->out_file->write(static_cast<const char*>(pBuf), n);

// Update and display progress
data->processed_bytes += n;

data->progress_callback(data->processed_bytes, data->total_bytes);



// resolves .. and . components and returns an absolute path without requiring the final path to exist.
fs::path target = fs::weakly_canonical(Filesystem::Path(base / entry_name));
return n;
}

// cout << base << endl;
// cout << target << endl;
// ensure that entry_name is inside target_dir, to prevent path traversal attacks.
// assumes target_dir exists
bool is_safe(const std::string& target_dir, const std::string& entry_name){
try {
// 1. Get absolute, normalized paths
// handles symlinks. and resolves .. and . components. throws error if path doesn't exist
Filesystem::Path base = fs::canonical(Filesystem::Path(target_dir));
// confirms that base is a directory, and not a file
if (!fs::is_directory(base)) return false;

// resolves .. and . components and returns an absolute path without requiring the final path to exist.
fs::path target = fs::weakly_canonical(Filesystem::Path(base / Filesystem::Path(entry_name)));

// 2. Use lexically_relative to find the path from base to target
fs::path rel = target.lexically_relative(base);
// cout << base << endl;
// cout << target << endl;

// 3. Validation:
// - If rel is empty, they are likely different roots
// - If rel starts with "..", it escaped the base
// - If rel is ".", it IS the base directory (usually safe)
if (rel.empty() || *rel.begin() == ".."){
return false;
}
// 2. Use lexically_relative to find the path from base to target
fs::path rel = target.lexically_relative(base);

return true;
}catch (...){
cout << "target_dir path doesn't exist." << endl;
// 3. Validation:
// - If rel is empty, they are likely different roots
// - If rel starts with "..", it escaped the base
// - If rel is ".", it IS the base directory (usually safe)
if (rel.empty() || *rel.begin() == ".."){
return false;
}

return true;
} catch (...){
cout << "target_dir path doesn't exist." << endl;
return false;
}
}

void unzip_file(const char* zip_path, const char* target_dir){
Filesystem::Path p{zip_path};
if (!fs::exists(p)){
throw InternalProgramError(nullptr, PA_CURRENT_FUNCTION, "unzip_all: Attempted to unzip a file that doesn't exist.");
}
void unzip_file(
CancellableScope& scope,
const char* zip_path,
const char* target_dir,
std::function<void(uint64_t bytes_done, uint64_t total_bytes)> progress_callback
){
Filesystem::Path p{zip_path};
if (!fs::exists(p)){
throw InternalProgramError(nullptr, PA_CURRENT_FUNCTION, "unzip_file: Attempted to unzip a file that doesn't exist.");
}

{
Filesystem::Path dir{target_dir};
std::error_code ec{};
fs::create_directories(dir, ec);
if (ec){
throw InternalProgramError(nullptr, PA_CURRENT_FUNCTION, "unzip_file: Error creating the target directory " + std::string(target_dir) + ": " + ec.message());
}
}

mz_zip_archive zip_archive;
memset(&zip_archive, 0, sizeof(zip_archive));
mz_zip_archive zip_archive;
memset(&zip_archive, 0, sizeof(zip_archive));

// Opens the ZIP file at zip_path
// zip_archive holds the state and metadata of the ZIP archive.
if (!mz_zip_reader_init_file(&zip_archive, zip_path, 0)){
cout << "failed to run mz_zip_reader_init_file" << endl;
cout << "mz_zip_error: " << mz_zip_get_last_error(&zip_archive) << endl;
return;
}
// Opens the ZIP file at zip_path
// zip_archive holds the state and metadata of the ZIP archive.
if (!mz_zip_reader_init_file(&zip_archive, zip_path, 0)){
throw InternalProgramError(nullptr, PA_CURRENT_FUNCTION,
"unzip_file: failed to run mz_zip_reader_init_file. mz_zip_error: " + std::to_string(mz_zip_get_last_error(&zip_archive)));
}

// Get total number of files in the archive
int num_files = (int)mz_zip_reader_get_num_files(&zip_archive);
// This automatically calls mz_zip_reader_end when this function exits for any reason.
struct ZipCleanup {
mz_zip_archive* p;
~ZipCleanup() { mz_zip_reader_end(p); }
} cleanup{&zip_archive};

// calculate total uncompressed size
uint64_t total_uncompressed_size = 0;
for (int i = 0; i < num_files; i++){
mz_zip_archive_file_stat file_stat; // holds info on the specific file
// Get total number of files in the archive
int num_files = (int)mz_zip_reader_get_num_files(&zip_archive);

// fills file_stat with the data for the current index
if (!mz_zip_reader_file_stat(&zip_archive, i, &file_stat)) continue;
// calculate total uncompressed size
uint64_t total_uncompressed_size = 0;
for (int i = 0; i < num_files; i++){
scope.throw_if_cancelled();

cout << std::to_string(file_stat.m_uncomp_size) << endl;
total_uncompressed_size += file_stat.m_uncomp_size;
}
mz_zip_archive_file_stat file_stat; // holds info on the specific file

uint64_t total_processed_bytes = 0;
for (int i = 0; i < num_files; i++){
mz_zip_archive_file_stat file_stat; // holds info on the specific file
// fills file_stat with the data for the current index
if (!mz_zip_reader_file_stat(&zip_archive, i, &file_stat)) continue;

// fills file_stat with the data for the current index
if (!mz_zip_reader_file_stat(&zip_archive, i, &file_stat)) continue;
// cout << std::to_string(file_stat.m_uncomp_size) << endl;
total_uncompressed_size += file_stat.m_uncomp_size;
}

// Checks if the current entry is a folder. Miniz treats folders as entries;
// this code skips them to avoid trying to "write" a folder as if it were a file.
if (mz_zip_reader_is_file_a_directory(&zip_archive, i)){
continue;
}
uint64_t total_processed_bytes = 0;
for (int i = 0; i < num_files; i++){
scope.throw_if_cancelled();

// Construct your output path (e.g., target_dir + file_stat.m_filename)
std::string out_path = std::string(target_dir) + "/" + file_stat.m_filename;
Filesystem::Path const parent_dir{Filesystem::Path(out_path).parent_path()};
mz_zip_archive_file_stat file_stat; // holds info on the specific file

// Create the entire directory, including intermediate directories for this file
std::error_code ec{};
fs::create_directories(parent_dir, ec);
if (ec){
std::cerr << "Error creating " << parent_dir << ": " << ec.message() << std::endl;
ec.clear();
}
// fills file_stat with the data for the current index
if (!mz_zip_reader_file_stat(&zip_archive, i, &file_stat)) continue;

// Checks if the current entry is a folder. Miniz treats folders as entries;
// this code skips them to avoid trying to "write" a folder as if it were a file.
if (mz_zip_reader_is_file_a_directory(&zip_archive, i)){
continue;
}

// ensure that entry_name is inside target_dir. to prevent path traversal attacks.
if (!is_safe(target_dir, file_stat.m_filename)){
throw InternalProgramError(nullptr, PA_CURRENT_FUNCTION, "unzip_all: Attempted to unzip a file that was trying to leave its base directory. This is a security risk.");
// ensure that entry_name is inside target_dir. to prevent path traversal attacks.
if (!is_safe(target_dir, file_stat.m_filename)){
throw InternalProgramError(nullptr, PA_CURRENT_FUNCTION, "unzip_file: Attempted to unzip a file that was trying to leave its base directory. This is a security risk.");
}

// Construct your output path (e.g., target_dir + file_stat.m_filename)
Filesystem::Path out_path = Filesystem::Path(target_dir) / Filesystem::Path(file_stat.m_filename);
Filesystem::Path const parent_dir{out_path.parent_path()};

// Create the entire directory, including intermediate directories for this file
std::error_code ec{};
fs::create_directories(parent_dir, ec);
if (ec){
std::cerr << "Error creating " << parent_dir << ": " << ec.message() << std::endl;
ec.clear();
}


std::ofstream out_file(out_path.string(), std::ios::binary); // std::ios::binary is to prevent line-ending conversions.
ProgressData progress = { &out_file, total_uncompressed_size, total_processed_bytes, -1, progress_callback, scope };

// Extract using the callback
// decompresses the file in chunks and repeatedly calls write_callback to save those chunks to the disk via the out_file
mz_bool status = mz_zip_reader_extract_to_callback(&zip_archive, i, write_callback, &progress, 0);

if (!status){
out_file.close();
if (scope.cancelled()){
// close and delete the partially unzipped file
fs::remove(out_path, ec);
throw OperationCancelledException();
}

std::ofstream out_file(out_path, std::ios::binary); // std::ios::binary is to prevent line-ending conversions.
ProgressData progress = { &out_file, total_uncompressed_size, total_processed_bytes, -1 };

// Extract using the callback
// decompresses the file in chunks and repeatedly calls write_callback to save those chunks to the disk via the out_file
mz_zip_reader_extract_to_callback(&zip_archive, i, write_callback, &progress, 0);
std::cout << "\nFinished: " << file_stat.m_filename << std::endl;
total_processed_bytes += file_stat.m_uncomp_size;
}

mz_zip_reader_end(&zip_archive);
// std::cout << "\nFinished: " << file_stat.m_filename << std::endl;
total_processed_bytes += file_stat.m_uncomp_size;
}

// void unzip_file(const std::string& zip_path, const std::string& output_dir){
// cout << "try to unzip the file." << endl;
// miniz_cpp::zip_file archive(zip_path);

// // create folder structure before extracting.
// // since miniz-cpp does not automatically create subdirectories if they exist within the zip archive
// std::vector<miniz_cpp::zip_info> const info_list = archive.infolist();
// auto const current_directory = std::filesystem::current_path();
// std::error_code ec{};
// for(miniz_cpp::zip_info const & info: info_list ){
// std::filesystem::path const p{(std::filesystem::path(output_dir) / info.filename).parent_path()};
// // Create the entire directory tree for this file
// std::filesystem::create_directories(p, ec);

// if (ec){
// std::cerr << "Error creating " << p << ": " << ec.message() << std::endl;
// ec.clear();
// }
// }
}

// void unzip_file(const std::string& zip_path, const std::string& output_dir){
// cout << "try to unzip the file." << endl;
// miniz_cpp::zip_file archive(zip_path);

// // create folder structure before extracting.
// // since miniz-cpp does not automatically create subdirectories if they exist within the zip archive
// std::vector<miniz_cpp::zip_info> const info_list = archive.infolist();
// auto const current_directory = std::filesystem::current_path();
// std::error_code ec{};
// for(miniz_cpp::zip_info const & info: info_list ){
// std::filesystem::path const p{(std::filesystem::path(output_dir) / info.filename).parent_path()};
// // Create the entire directory tree for this file
// std::filesystem::create_directories(p, ec);

// if (ec){
// std::cerr << "Error creating " << p << ": " << ec.message() << std::endl;
// ec.clear();
// }
// }

// // Extract all files to the specified path
// archive.extractall(output_dir);
// // Extract all files to the specified path
// archive.extractall(output_dir);

// cout << "done unzipping the file." << endl;
// cout << "done unzipping the file." << endl;

// }
// }


}
14 changes: 13 additions & 1 deletion SerialPrograms/Source/CommonFramework/Tools/FileUnzip.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,22 @@
#define PokemonAutomation_FileUnzip_H

#include <string>
#include <functional>
#include "Common/Cpp/CancellableScope.h"


namespace PokemonAutomation{

void unzip_file(const char* zip_path, const char* target_dir);
// unzips the zip file located in zip_path, to target_dir
// if target_dir doesn't already exist, it will create it.
// throw OperationCancelledException if the CancellableScope is cancelled
// throw InternalProgramError if unzipping fails.
void unzip_file(
CancellableScope& scope,
const char* zip_path,
const char* target_dir,
std::function<void(uint64_t bytes_done, uint64_t total_bytes)> progress_callback
);

}
#endif