Skip to content

Commit daad68d

Browse files
committed
FileUnzip
1 parent f5c2a3e commit daad68d

2 files changed

Lines changed: 189 additions & 142 deletions

File tree

SerialPrograms/Source/CommonFramework/Tools/FileUnzip.cpp

Lines changed: 176 additions & 141 deletions
Original file line numberDiff line numberDiff line change
@@ -26,178 +26,213 @@ using std::endl;
2626

2727
namespace PokemonAutomation{
2828

29-
namespace fs = std::filesystem;
30-
31-
struct ProgressData {
32-
std::ofstream* out_file;
33-
uint64_t total_bytes;
34-
uint64_t processed_bytes;
35-
int last_percentage;
36-
};
37-
38-
// Callback triggered for every chunk of decompressed data
39-
// pOpaque is an opaque pointer that actually represents ProgressData
40-
size_t write_callback(void* pOpaque, mz_uint64 file_ofs, const void* pBuf, size_t n){
41-
ProgressData* data = static_cast<ProgressData*>(pOpaque);
42-
43-
// 1. Check if we actually need to seek
44-
// tellp() returns the current 'put' position. get the current position of the write pointer in an output stream.
45-
if (static_cast<mz_uint64>(data->out_file->tellp()) != file_ofs){
46-
data->out_file->seekp(file_ofs);
47-
}
48-
49-
// Write chunk to disk
50-
data->out_file->write(static_cast<const char*>(pBuf), n);
51-
52-
// Update and display progress
53-
data->processed_bytes += n;
54-
double percent = (double)data->processed_bytes / data->total_bytes * 100.0;
55-
int current_percent = static_cast<int>(percent);
56-
57-
// Only print if the integer value has changed
58-
if (current_percent > data->last_percentage){
59-
data->last_percentage = current_percent;
60-
std::cout << "\rProgress: " << current_percent << "% ("
61-
<< data->processed_bytes << "/" << data->total_bytes << " bytes)" << endl;
62-
}
63-
64-
return n;
29+
namespace fs = std::filesystem;
30+
31+
struct ProgressData {
32+
std::ofstream* out_file;
33+
uint64_t total_bytes;
34+
uint64_t processed_bytes;
35+
int last_percentage;
36+
std::function<void(uint64_t bytes_done, uint64_t total_bytes)> progress_callback;
37+
CancellableScope& scope;
38+
};
39+
40+
// Callback triggered for every chunk of decompressed data
41+
// pOpaque is an opaque pointer that actually represents ProgressData
42+
size_t write_callback(void* pOpaque, [[maybe_unused]] mz_uint64 file_ofs, const void* pBuf, size_t n){
43+
ProgressData* data = static_cast<ProgressData*>(pOpaque);
44+
45+
if (data->scope.cancelled()){
46+
return 0; // this causes mz_zip_reader_extract_to_callback to return an error
6547
}
6648

67-
// ensure that entry_name is inside target_dir, to prevent path traversal attacks.
68-
bool is_safe(const std::string& target_dir, const std::string& entry_name){
69-
try{
70-
// 1. Get absolute, normalized paths
71-
// handles symlinks. and resolves .. and . components. throws error if path doesn't exist
72-
Filesystem::Path base = fs::canonical(Filesystem::Path(target_dir));
73-
// confirms that base is a directory, and not a file
74-
if (!fs::is_directory(base)) return false;
49+
// Check if we actually need to seek
50+
// tellp() returns the current 'put' position. get the current position of the write pointer in an output stream.
51+
// if (static_cast<mz_uint64>(data->out_file->tellp()) != file_ofs){
52+
// data->out_file->seekp(file_ofs);
53+
// }
54+
55+
// Write chunk to disk
56+
data->out_file->write(static_cast<const char*>(pBuf), n);
57+
58+
// Update and display progress
59+
data->processed_bytes += n;
60+
61+
data->progress_callback(data->processed_bytes, data->total_bytes);
62+
63+
7564

76-
// resolves .. and . components and returns an absolute path without requiring the final path to exist.
77-
fs::path target = fs::weakly_canonical(Filesystem::Path(base / entry_name));
65+
return n;
66+
}
7867

79-
// cout << base << endl;
80-
// cout << target << endl;
68+
// ensure that entry_name is inside target_dir, to prevent path traversal attacks.
69+
// assumes target_dir exists
70+
bool is_safe(const std::string& target_dir, const std::string& entry_name){
71+
try {
72+
// 1. Get absolute, normalized paths
73+
// handles symlinks. and resolves .. and . components. throws error if path doesn't exist
74+
Filesystem::Path base = fs::canonical(Filesystem::Path(target_dir));
75+
// confirms that base is a directory, and not a file
76+
if (!fs::is_directory(base)) return false;
77+
78+
// resolves .. and . components and returns an absolute path without requiring the final path to exist.
79+
fs::path target = fs::weakly_canonical(Filesystem::Path(base / Filesystem::Path(entry_name)));
8180

82-
// 2. Use lexically_relative to find the path from base to target
83-
fs::path rel = target.lexically_relative(base);
81+
// cout << base << endl;
82+
// cout << target << endl;
8483

85-
// 3. Validation:
86-
// - If rel is empty, they are likely different roots
87-
// - If rel starts with "..", it escaped the base
88-
// - If rel is ".", it IS the base directory (usually safe)
89-
if (rel.empty() || *rel.begin() == ".."){
90-
return false;
91-
}
84+
// 2. Use lexically_relative to find the path from base to target
85+
fs::path rel = target.lexically_relative(base);
9286

93-
return true;
94-
}catch (...){
95-
cout << "target_dir path doesn't exist." << endl;
87+
// 3. Validation:
88+
// - If rel is empty, they are likely different roots
89+
// - If rel starts with "..", it escaped the base
90+
// - If rel is ".", it IS the base directory (usually safe)
91+
if (rel.empty() || *rel.begin() == ".."){
9692
return false;
9793
}
94+
95+
return true;
96+
} catch (...){
97+
cout << "target_dir path doesn't exist." << endl;
98+
return false;
9899
}
100+
}
99101

100-
void unzip_file(const char* zip_path, const char* target_dir){
101-
Filesystem::Path p{zip_path};
102-
if (!fs::exists(p)){
103-
throw InternalProgramError(nullptr, PA_CURRENT_FUNCTION, "unzip_all: Attempted to unzip a file that doesn't exist.");
104-
}
102+
void unzip_file(
103+
CancellableScope& scope,
104+
const char* zip_path,
105+
const char* target_dir,
106+
std::function<void(uint64_t bytes_done, uint64_t total_bytes)> progress_callback
107+
){
108+
Filesystem::Path p{zip_path};
109+
if (!fs::exists(p)){
110+
throw InternalProgramError(nullptr, PA_CURRENT_FUNCTION, "unzip_file: Attempted to unzip a file that doesn't exist.");
111+
}
112+
113+
{
114+
Filesystem::Path dir{target_dir};
115+
std::error_code ec{};
116+
fs::create_directories(dir, ec);
117+
if (ec){
118+
throw InternalProgramError(nullptr, PA_CURRENT_FUNCTION, "unzip_file: Error creating the target directory " + std::string(target_dir) + ": " + ec.message());
119+
}
120+
}
105121

106-
mz_zip_archive zip_archive;
107-
memset(&zip_archive, 0, sizeof(zip_archive));
122+
mz_zip_archive zip_archive;
123+
memset(&zip_archive, 0, sizeof(zip_archive));
108124

109-
// Opens the ZIP file at zip_path
110-
// zip_archive holds the state and metadata of the ZIP archive.
111-
if (!mz_zip_reader_init_file(&zip_archive, zip_path, 0)){
112-
cout << "failed to run mz_zip_reader_init_file" << endl;
113-
cout << "mz_zip_error: " << mz_zip_get_last_error(&zip_archive) << endl;
114-
return;
115-
}
125+
// Opens the ZIP file at zip_path
126+
// zip_archive holds the state and metadata of the ZIP archive.
127+
if (!mz_zip_reader_init_file(&zip_archive, zip_path, 0)){
128+
throw InternalProgramError(nullptr, PA_CURRENT_FUNCTION,
129+
"unzip_file: failed to run mz_zip_reader_init_file. mz_zip_error: " + std::to_string(mz_zip_get_last_error(&zip_archive)));
130+
}
116131

117-
// Get total number of files in the archive
118-
int num_files = (int)mz_zip_reader_get_num_files(&zip_archive);
132+
// This automatically calls mz_zip_reader_end when this function exits for any reason.
133+
struct ZipCleanup {
134+
mz_zip_archive* p;
135+
~ZipCleanup() { mz_zip_reader_end(p); }
136+
} cleanup{&zip_archive};
119137

120-
// calculate total uncompressed size
121-
uint64_t total_uncompressed_size = 0;
122-
for (int i = 0; i < num_files; i++){
123-
mz_zip_archive_file_stat file_stat; // holds info on the specific file
138+
// Get total number of files in the archive
139+
int num_files = (int)mz_zip_reader_get_num_files(&zip_archive);
124140

125-
// fills file_stat with the data for the current index
126-
if (!mz_zip_reader_file_stat(&zip_archive, i, &file_stat)) continue;
141+
// calculate total uncompressed size
142+
uint64_t total_uncompressed_size = 0;
143+
for (int i = 0; i < num_files; i++){
144+
scope.throw_if_cancelled();
127145

128-
cout << std::to_string(file_stat.m_uncomp_size) << endl;
129-
total_uncompressed_size += file_stat.m_uncomp_size;
130-
}
146+
mz_zip_archive_file_stat file_stat; // holds info on the specific file
131147

132-
uint64_t total_processed_bytes = 0;
133-
for (int i = 0; i < num_files; i++){
134-
mz_zip_archive_file_stat file_stat; // holds info on the specific file
148+
// fills file_stat with the data for the current index
149+
if (!mz_zip_reader_file_stat(&zip_archive, i, &file_stat)) continue;
135150

136-
// fills file_stat with the data for the current index
137-
if (!mz_zip_reader_file_stat(&zip_archive, i, &file_stat)) continue;
151+
// cout << std::to_string(file_stat.m_uncomp_size) << endl;
152+
total_uncompressed_size += file_stat.m_uncomp_size;
153+
}
138154

139-
// Checks if the current entry is a folder. Miniz treats folders as entries;
140-
// this code skips them to avoid trying to "write" a folder as if it were a file.
141-
if (mz_zip_reader_is_file_a_directory(&zip_archive, i)){
142-
continue;
143-
}
155+
uint64_t total_processed_bytes = 0;
156+
for (int i = 0; i < num_files; i++){
157+
scope.throw_if_cancelled();
144158

145-
// Construct your output path (e.g., target_dir + file_stat.m_filename)
146-
std::string out_path = std::string(target_dir) + "/" + file_stat.m_filename;
147-
Filesystem::Path const parent_dir{Filesystem::Path(out_path).parent_path()};
159+
mz_zip_archive_file_stat file_stat; // holds info on the specific file
148160

149-
// Create the entire directory, including intermediate directories for this file
150-
std::error_code ec{};
151-
fs::create_directories(parent_dir, ec);
152-
if (ec){
153-
std::cerr << "Error creating " << parent_dir << ": " << ec.message() << std::endl;
154-
ec.clear();
155-
}
161+
// fills file_stat with the data for the current index
162+
if (!mz_zip_reader_file_stat(&zip_archive, i, &file_stat)) continue;
163+
164+
// Checks if the current entry is a folder. Miniz treats folders as entries;
165+
// this code skips them to avoid trying to "write" a folder as if it were a file.
166+
if (mz_zip_reader_is_file_a_directory(&zip_archive, i)){
167+
continue;
168+
}
156169

157-
// ensure that entry_name is inside target_dir. to prevent path traversal attacks.
158-
if (!is_safe(target_dir, file_stat.m_filename)){
159-
throw InternalProgramError(nullptr, PA_CURRENT_FUNCTION, "unzip_all: Attempted to unzip a file that was trying to leave its base directory. This is a security risk.");
170+
// ensure that entry_name is inside target_dir. to prevent path traversal attacks.
171+
if (!is_safe(target_dir, file_stat.m_filename)){
172+
throw InternalProgramError(nullptr, PA_CURRENT_FUNCTION, "unzip_file: Attempted to unzip a file that was trying to leave its base directory. This is a security risk.");
173+
}
174+
175+
// Construct your output path (e.g., target_dir + file_stat.m_filename)
176+
Filesystem::Path out_path = Filesystem::Path(target_dir) / Filesystem::Path(file_stat.m_filename);
177+
Filesystem::Path const parent_dir{out_path.parent_path()};
178+
179+
// Create the entire directory, including intermediate directories for this file
180+
std::error_code ec{};
181+
fs::create_directories(parent_dir, ec);
182+
if (ec){
183+
std::cerr << "Error creating " << parent_dir << ": " << ec.message() << std::endl;
184+
ec.clear();
185+
}
186+
187+
188+
std::ofstream out_file(out_path.string(), std::ios::binary); // std::ios::binary is to prevent line-ending conversions.
189+
ProgressData progress = { &out_file, total_uncompressed_size, total_processed_bytes, -1, progress_callback, scope };
190+
191+
// Extract using the callback
192+
// decompresses the file in chunks and repeatedly calls write_callback to save those chunks to the disk via the out_file
193+
mz_bool status = mz_zip_reader_extract_to_callback(&zip_archive, i, write_callback, &progress, 0);
194+
195+
if (!status){
196+
out_file.close();
197+
if (scope.cancelled()){
198+
// close and delete the partially unzipped file
199+
fs::remove(out_path, ec);
200+
throw OperationCancelledException();
160201
}
161-
162-
std::ofstream out_file(out_path, std::ios::binary); // std::ios::binary is to prevent line-ending conversions.
163-
ProgressData progress = { &out_file, total_uncompressed_size, total_processed_bytes, -1 };
164-
165-
// Extract using the callback
166-
// decompresses the file in chunks and repeatedly calls write_callback to save those chunks to the disk via the out_file
167-
mz_zip_reader_extract_to_callback(&zip_archive, i, write_callback, &progress, 0);
168-
std::cout << "\nFinished: " << file_stat.m_filename << std::endl;
169-
total_processed_bytes += file_stat.m_uncomp_size;
170202
}
171203

172-
mz_zip_reader_end(&zip_archive);
204+
// std::cout << "\nFinished: " << file_stat.m_filename << std::endl;
205+
total_processed_bytes += file_stat.m_uncomp_size;
173206
}
174207

175-
// void unzip_file(const std::string& zip_path, const std::string& output_dir){
176-
// cout << "try to unzip the file." << endl;
177-
// miniz_cpp::zip_file archive(zip_path);
178-
179-
// // create folder structure before extracting.
180-
// // since miniz-cpp does not automatically create subdirectories if they exist within the zip archive
181-
// std::vector<miniz_cpp::zip_info> const info_list = archive.infolist();
182-
// auto const current_directory = std::filesystem::current_path();
183-
// std::error_code ec{};
184-
// for(miniz_cpp::zip_info const & info: info_list ){
185-
// std::filesystem::path const p{(std::filesystem::path(output_dir) / info.filename).parent_path()};
186-
// // Create the entire directory tree for this file
187-
// std::filesystem::create_directories(p, ec);
188-
189-
// if (ec){
190-
// std::cerr << "Error creating " << p << ": " << ec.message() << std::endl;
191-
// ec.clear();
192-
// }
193-
// }
208+
}
209+
210+
// void unzip_file(const std::string& zip_path, const std::string& output_dir){
211+
// cout << "try to unzip the file." << endl;
212+
// miniz_cpp::zip_file archive(zip_path);
213+
214+
// // create folder structure before extracting.
215+
// // since miniz-cpp does not automatically create subdirectories if they exist within the zip archive
216+
// std::vector<miniz_cpp::zip_info> const info_list = archive.infolist();
217+
// auto const current_directory = std::filesystem::current_path();
218+
// std::error_code ec{};
219+
// for(miniz_cpp::zip_info const & info: info_list ){
220+
// std::filesystem::path const p{(std::filesystem::path(output_dir) / info.filename).parent_path()};
221+
// // Create the entire directory tree for this file
222+
// std::filesystem::create_directories(p, ec);
223+
224+
// if (ec){
225+
// std::cerr << "Error creating " << p << ": " << ec.message() << std::endl;
226+
// ec.clear();
227+
// }
228+
// }
194229

195-
// // Extract all files to the specified path
196-
// archive.extractall(output_dir);
230+
// // Extract all files to the specified path
231+
// archive.extractall(output_dir);
197232

198-
// cout << "done unzipping the file." << endl;
233+
// cout << "done unzipping the file." << endl;
199234

200-
// }
235+
// }
201236

202237

203238
}

SerialPrograms/Source/CommonFramework/Tools/FileUnzip.h

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,10 +8,22 @@
88
#define PokemonAutomation_FileUnzip_H
99

1010
#include <string>
11+
#include <functional>
12+
#include "Common/Cpp/CancellableScope.h"
13+
1114

1215
namespace PokemonAutomation{
1316

14-
void unzip_file(const char* zip_path, const char* target_dir);
17+
// unzips the zip file located in zip_path, to target_dir
18+
// if target_dir doesn't already exist, it will create it.
19+
// throw OperationCancelledException if the CancellableScope is cancelled
20+
// throw InternalProgramError if unzipping fails.
21+
void unzip_file(
22+
CancellableScope& scope,
23+
const char* zip_path,
24+
const char* target_dir,
25+
std::function<void(uint64_t bytes_done, uint64_t total_bytes)> progress_callback
26+
);
1527

1628
}
1729
#endif

0 commit comments

Comments
 (0)