Skip to content

Commit 03c09aa

Browse files
Support directories in PUT/GET
1 parent eb00a91 commit 03c09aa

File tree

5 files changed

+408
-92
lines changed

5 files changed

+408
-92
lines changed

cpp/FileMetadataInitializer.cpp

Lines changed: 103 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
#include "snowflake/platform.h"
99
#include "snowflake/SnowflakeTransferException.hpp"
1010
#include <cerrno>
11+
#include "boost/filesystem.hpp"
1112

1213
#define COMPRESSION_AUTO "AUTO"
1314
#define COMPRESSION_AUTO_DETECT "AUTO_DETECT"
@@ -20,7 +21,7 @@
2021
#include <fnmatch.h>
2122
#endif
2223

23-
24+
using namespace boost::filesystem;
2425

2526
Snowflake::Client::FileMetadataInitializer::FileMetadataInitializer(
2627
std::vector<FileMetadata> &smallFileMetadata,
@@ -34,16 +35,16 @@ Snowflake::Client::FileMetadataInitializer::FileMetadataInitializer(
3435
}
3536

3637
void
37-
Snowflake::Client::FileMetadataInitializer::initUploadFileMetadata(const std::string &fileDir, const char *fileName,
38+
Snowflake::Client::FileMetadataInitializer::initUploadFileMetadata(const std::string &fileNameFull,
39+
const std::string &destPath,
40+
const std::string &fileName,
3841
size_t fileSize, size_t threshold)
3942
{
40-
std::string fileNameFull = fileDir;
41-
fileNameFull += fileName;
42-
4343
FileMetadata fileMetadata;
4444
fileMetadata.srcFileName = m_stmtPutGet->platformStringToUTF8(fileNameFull);
4545
fileMetadata.srcFileSize = fileSize;
46-
fileMetadata.destFileName = m_stmtPutGet->platformStringToUTF8(std::string(fileName));
46+
fileMetadata.destPath = m_stmtPutGet->platformStringToUTF8(destPath);
47+
fileMetadata.destFileName = m_stmtPutGet->platformStringToUTF8(fileName);
4748
// process compression type
4849
initCompressionMetadata(fileMetadata);
4950

@@ -56,9 +57,54 @@ Snowflake::Client::FileMetadataInitializer::initUploadFileMetadata(const std::st
5657

5758
void Snowflake::Client::FileMetadataInitializer::populateSrcLocUploadMetadata(std::string &sourceLocation,
5859
size_t putThreshold)
60+
{
61+
// looking for files on disk.
62+
std::string srcLocationPlatform = m_stmtPutGet->UTF8ToPlatformString(sourceLocation);
63+
replaceStrAll(srcLocationPlatform, "/", std::string() + PATH_SEP);
64+
size_t dirSep = srcLocationPlatform.find_last_of(PATH_SEP);
65+
std::string basePath = srcLocationPlatform.substr(0, dirSep + 1);
66+
67+
std::vector<std::string> fileList;
68+
if (!listFiles(srcLocationPlatform, fileList))
69+
{
70+
CXX_LOG_ERROR("Failed on finding files for uploading.");
71+
return;
72+
}
73+
74+
for (auto file = fileList.begin(); file != fileList.end(); file++)
75+
{
76+
path p(*file);
77+
size_t fileSize = file_size(p);
78+
std::string fileNameFull = p.string();
79+
std::string fileName = p.filename().string();
80+
//make the path on stage by removing base path and file name from full path
81+
std::string destPath = fileNameFull.substr(basePath.length(),
82+
fileNameFull.length() - basePath.length() - fileName.length());
83+
initUploadFileMetadata(fileNameFull, destPath, fileName, fileSize, putThreshold);
84+
}
85+
}
86+
87+
void Snowflake::Client::FileMetadataInitializer::includeSubfolderFilesRecursive(const std::string &folderPath,
88+
std::vector<std::string> & fileList)
89+
{
90+
for (auto const& entry : recursive_directory_iterator(folderPath))
91+
{
92+
if (is_regular_file(entry))
93+
{
94+
fileList.push_back(entry.path().string());
95+
}
96+
}
97+
}
98+
99+
bool Snowflake::Client::FileMetadataInitializer::listFiles(const std::string &sourceLocation,
100+
std::vector<std::string> & fileList)
59101
{
60102
// looking for files on disk.
61103
std::string srcLocationPlatform = m_stmtPutGet->UTF8ToPlatformString(sourceLocation);
104+
size_t dirSep = srcLocationPlatform.find_last_of(PATH_SEP);
105+
std::string dirPath = srcLocationPlatform.substr(0, dirSep + 1);
106+
std::string filePattern = srcLocationPlatform.substr(dirSep + 1);
107+
bool includeSubfolder = filePattern == "**";
62108

63109
#ifdef _WIN32
64110
WIN32_FIND_DATA fdd;
@@ -71,8 +117,7 @@ void Snowflake::Client::FileMetadataInitializer::populateSrcLocUploadMetadata(st
71117
{
72118
CXX_LOG_ERROR("No file matching pattern %s has been found. Error: %d",
73119
sourceLocation.c_str(), dwError);
74-
FindClose(hFind);
75-
return;
120+
return false;
76121
}
77122
else if (dwError != ERROR_SUCCESS)
78123
{
@@ -85,37 +130,29 @@ void Snowflake::Client::FileMetadataInitializer::populateSrcLocUploadMetadata(st
85130
do {
86131
if (!(fdd.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) )
87132
{
88-
std::string fileFullPath = std::string(fdd.cFileName);
89-
size_t dirSep = srcLocationPlatform.find_last_of(PATH_SEP);
90-
if (dirSep == std::string::npos)
91-
{
92-
dirSep = sourceLocation.find_last_of(ALTER_PATH_SEP);
93-
}
94-
if (dirSep != std::string::npos)
133+
fileList.push_back(dirPath + fdd.cFileName);
134+
}
135+
else
136+
{
137+
if (includeSubfolder &&
138+
(std::string(fdd.cFileName) != ".") &&
139+
(std::string(fdd.cFileName) != ".."))
95140
{
96-
std::string dirPath = srcLocationPlatform.substr(0, dirSep + 1);
97-
LARGE_INTEGER fileSize;
98-
fileSize.LowPart = fdd.nFileSizeLow;
99-
fileSize.HighPart = fdd.nFileSizeHigh;
100-
initUploadFileMetadata(dirPath, (char *)fdd.cFileName, (size_t)fileSize.QuadPart, putThreshold);
141+
includeSubfolderFilesRecursive(dirPath + fdd.cFileName, fileList);
101142
}
102143
}
103144
} while (FindNextFile(hFind, &fdd) != 0);
104145

105146
DWORD dwError = GetLastError();
147+
FindClose(hFind);
106148
if (dwError != ERROR_NO_MORE_FILES)
107149
{
108150
CXX_LOG_ERROR("Failed on FindNextFile. Error: %d", dwError);
109151
throw SnowflakeTransferException(TransferError::DIR_OPEN_ERROR,
110152
srcLocationPlatform.c_str(), dwError);
111153
}
112-
FindClose(hFind);
113154

114155
#else
115-
unsigned long dirSep = srcLocationPlatform.find_last_of(PATH_SEP);
116-
std::string dirPath = srcLocationPlatform.substr(0, dirSep + 1);
117-
std::string filePattern = srcLocationPlatform.substr(dirSep + 1);
118-
119156
DIR * dir = nullptr;
120157
struct dirent * dir_entry;
121158
if ((dir = opendir(dirPath.c_str())) != NULL)
@@ -130,8 +167,14 @@ void Snowflake::Client::FileMetadataInitializer::populateSrcLocUploadMetadata(st
130167
if (!ret)
131168
{
132169
if (S_ISREG(fileStatus.st_mode)) {
133-
initUploadFileMetadata(dirPath, dir_entry->d_name,
134-
(size_t) fileStatus.st_size, putThreshold);
170+
fileList.push_back(dirPath + dir_entry->d_name);
171+
}
172+
else if (includeSubfolder &&
173+
(S_ISDIR(fileStatus.st_mode)) &&
174+
(std::string(dir_entry->d_name) != ".") &&
175+
(std::string(dir_entry->d_name) != ".."))
176+
{
177+
includeSubfolderFilesRecursive(dirPath + dir_entry->d_name, fileList);
135178
}
136179
}
137180
else
@@ -153,6 +196,7 @@ void Snowflake::Client::FileMetadataInitializer::populateSrcLocUploadMetadata(st
153196
dirPath.c_str(), errno);
154197
}
155198
#endif
199+
return true;
156200
}
157201

158202
void Snowflake::Client::FileMetadataInitializer::initCompressionMetadata(
@@ -168,8 +212,10 @@ void Snowflake::Client::FileMetadataInitializer::initCompressionMetadata(
168212
{
169213
// guess
170214
CXX_LOG_INFO("Auto detect on compression type");
171-
fileMetadata.sourceCompression = FileCompressionType::guessCompressionType(
215+
std::string srcFileNamePlatform = m_stmtPutGet->UTF8ToPlatformString(
172216
fileMetadata.srcFileName);
217+
fileMetadata.sourceCompression = FileCompressionType::guessCompressionType(
218+
srcFileNamePlatform);
173219
}
174220
else if (!sf_strncasecmp(m_sourceCompression, COMPRESSION_NONE,
175221
sizeof(COMPRESSION_NONE)))
@@ -253,8 +299,9 @@ populateSrcLocDownloadMetadata(std::string &sourceLocation,
253299
size_t getThreshold)
254300
{
255301
std::string fullPath = *remoteLocation + sourceLocation;
256-
size_t dirSep = fullPath.find_last_of('/');
257-
std::string dstFileName = fullPath.substr(dirSep + 1);
302+
size_t dirSep = sourceLocation.find_last_of('/');
303+
std::string dstFileName = sourceLocation.substr(dirSep + 1);
304+
std::string dstPath = sourceLocation.substr(0, dirSep + 1);
258305

259306
FileMetadata fileMetadata;
260307
fileMetadata.presignedUrl = presignedUrl;
@@ -271,6 +318,7 @@ populateSrcLocDownloadMetadata(std::string &sourceLocation,
271318
metaListToPush.push_back(fileMetadata);
272319
metaListToPush.back().srcFileName = fullPath;
273320
metaListToPush.back().destFileName = dstFileName;
321+
metaListToPush.back().destPath = dstPath;
274322
if (encMat)
275323
{
276324
EncryptionProvider::decryptFileKey(&(metaListToPush.back()), encMat, getRandomDev());
@@ -284,4 +332,29 @@ populateSrcLocDownloadMetadata(std::string &sourceLocation,
284332
return outcome;
285333
}
286334

335+
void Snowflake::Client::FileMetadataInitializer::
336+
replaceStrAll(std::string& stringToReplace,
337+
std::string const& oldValue,
338+
std::string const& newValue)
339+
{
340+
size_t oldValueLen = oldValue.length();
341+
size_t newValueLen = newValue.length();
342+
if (0 == oldValueLen)
343+
{
344+
return;
345+
}
346+
347+
size_t index = 0;
348+
while (true) {
349+
/* Locate the substring to replace. */
350+
index = stringToReplace.find(oldValue, index);
351+
if (index == std::string::npos) break;
352+
353+
/* Make the replacement. */
354+
stringToReplace.replace(index, oldValueLen, newValue);
355+
356+
/* Advance index forward so the next iteration doesn't pick it up as well. */
357+
index += newValueLen;
358+
}
359+
}
287360

cpp/FileMetadataInitializer.hpp

Lines changed: 30 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,11 +30,38 @@ class FileMetadataInitializer
3030
IStatementPutGet *stmtPutGet);
3131

3232
/**
33-
* Given a source locations, find all files that match the location pattern,
33+
* Given a source location, find all files that match the location pattern,
3434
* init file metadata, and divide them into different vector according to size
3535
*/
3636
void populateSrcLocUploadMetadata(std::string &sourceLocation, size_t putThreshold);
3737

38+
39+
/**
40+
* Utility function to replace all matching instances in a string.
41+
*/
42+
static void replaceStrAll(std::string& stringToReplace, std::string const& oldValue,
43+
std::string const& newValue);
44+
/**
45+
* Given a source location, find all files match the partern, recursively include
46+
* all subfolders if the pattern is **
47+
* Utility function called from populateSrcLocUploadMetadata.
48+
*
49+
* @param sourceLocation The source location could have pattern at the end.
50+
* @param fileList Output the files with the full path.
51+
*
52+
* @return True when succeeded, false when no file matches with the source location.
53+
* @throw SnowflakeTransferException on unexpected error.
54+
*/
55+
bool listFiles(const std::string &sourceLocation, std::vector<std::string> & fileList);
56+
57+
/**
58+
* Given a full path of a folder, add all files in the folder recursively including subfolders.
59+
*
60+
* @param folderPath The full path of a folder.
61+
* @param fileList Output the files in the folder recursively including subfolders.
62+
*/
63+
void includeSubfolderFilesRecursive(const std::string &folderPath, std::vector<std::string> & fileList);
64+
3865
/**
3966
* Given a source location, find out file size to determine use parallel
4067
* download or not.
@@ -79,7 +106,8 @@ class FileMetadataInitializer
79106
* Given file name, populate metadata
80107
* @param fileName
81108
*/
82-
void initUploadFileMetadata(const std::string &fileDir, const char *fileName, size_t fileSize, size_t threshold);
109+
void initUploadFileMetadata(const std::string &fileNameFull, const std::string &destPath,
110+
const std::string &fileName, size_t fileSize, size_t threshold);
83111

84112
/**
85113
* init compression metadata

0 commit comments

Comments
 (0)