diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 87d43181..e6a614e6 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -93,7 +93,9 @@ add_library(mapping_core_base_lib util/parameters.cpp datatypes/raster/raster.cpp raster/opencl.cpp - util/ogr_source_datasets.cpp) + util/ogr_source_datasets.cpp + util/uploader_util.cpp + util/uploader_util.h) target_include_directories(mapping_core_base_lib PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) target_link_libraries_internal(mapping_cgi mapping_core_base_lib) @@ -118,7 +120,8 @@ add_library(mapping_core_services_lib pointvisualization/FindResult.cpp pointvisualization/QuadTreeNode.cpp pointvisualization/CircleClusteringQuadTree.cpp - services/featurecollectiondb.cpp) + services/featurecollectiondb.cpp + services/ogrimporter.cpp) target_link_libraries_internal(mapping_core_services_lib mapping_core_base_lib) target_link_libraries_internal(mapping_cgi mapping_core_services_lib) target_link_libraries_internal(mapping_uploader mapping_core_services_lib) diff --git a/src/operators/source/ogr_source.cpp b/src/operators/source/ogr_source.cpp index 1a85f518..7665ba66 100644 --- a/src/operators/source/ogr_source.cpp +++ b/src/operators/source/ogr_source.cpp @@ -95,7 +95,7 @@ Json::Value OGRSourceOperator::constructParameters(Json::Value ¶ms){ // create new json object from the queries parameters, the dataset definition, and the layer definition: Json::Value constructed_params(Json::ValueType::objectValue); - constructed_params["filename"] = dataset_json["filename"]; + constructed_params["filename"] = OGRSourceDatasets::getJsonParameter(layer_json, dataset_json, "filename"); constructed_params["layer_name"] = params["layer_name"]; constructed_params["time"] = OGRSourceDatasets::getJsonParameterDefault(layer_json, dataset_json, "time", "none"); @@ -139,7 +139,7 @@ Json::Value OGRSourceOperator::constructParameters(Json::Value ¶ms){ // provenance information Json::Value dataset_provenance = dataset_json["provenance"]; - Json::Value layer_provenance = dataset_json["provenance"]; + Json::Value layer_provenance = layer_json["provenance"]; Json::Value provenanceInfo; if(OGRSourceDatasets::hasJsonParameter(layer_provenance, dataset_provenance, "citation")) provenanceInfo["citation"] = OGRSourceDatasets::getJsonParameter(layer_provenance, dataset_provenance, "citation"); diff --git a/src/services/ogrimporter.cpp b/src/services/ogrimporter.cpp new file mode 100644 index 00000000..68d38ee0 --- /dev/null +++ b/src/services/ogrimporter.cpp @@ -0,0 +1,296 @@ + +#include "util/ogr_source_datasets.h" +#include "util/uploader_util.h" +#include "util/ogr_source_util.h" +#include "services/httpservice.h" + +/** + * Import Service for OGR Source. It will import a whole upload directory of the user. At the moment it + * does not delete the upload after a successful import. + * + * If the dataset does not exists, it will be created. Else the new layer can be appended, but that has to be + * activated explicitly with the append_dataset parameter. One importer call imports only one layer, so multiple + * layers in a dataset require multiple importer calls. + * + * WFS links can also be imported: The WFS server link has to be provided in the main_file parameter, beginning with "WFS:". + * The dataset name has to be empty or not provided, because no dataset is used. + * + * Parameters: + * - sessiontoken + * - upload_name: name of the upload that is to be imported + * - main_file: name of the main file (to be opened by OGR Source) + * - dataset_name: name for the dataset that is created + * - layer_name: name of the layer that the below parameters are provided for + * - append_dataset: bool, if an existing dataset will be appended. Defaults to false. + * + * These dataset information also have to be provided as parameters: + * - time: the type of the time column(s): "none", "start", "start+end", "start+duration" + * - duration: the duration of the time validity for all features in the file [if time == "start"] + * - time1_format: "custom", "seconds", "dmyhm", "iso" [if time != "none"] + * - time1_custom_format: format string for parsing time1 attribute. [if time1_format == "custom"] + * - time2_format: "custom", "seconds", "dmyhm", "iso" [if time == "start+end" || "start+duration"] + * - time2_custom_format: format string for parsing time2 attribute. [if time2_format == "custom"] + * - x: the name of the column containing the x coordinate (or the wkt string) [if CSV file] + * - y: the name of the column containing the y coordinate [if CSV file with y column] + * - time1: the name of the first time column [if time != "none"] + * - time2: the name of the second time column [if time == "start+end" || "start+duration"] + * - on_error: specify the type of error handling: "skip", "abort", "keep" + * - citation + * - license + * - uri + * - default: wkt definition of the default point/line/polygon as a string [optional] + * - description: describing the layer [optional] + * - force_ogr_time_filter: bool [optional] + * - geometry_type: needed when OGR does not know the geometry type, esp. for csv files [optional] + */ + +namespace bf = boost::filesystem; + +class OGRImporter : public HTTPService { +public: + using HTTPService::HTTPService; + virtual ~OGRImporter() = default; + virtual void run(); +private: + Json::Value createLayerJson(const bf::path &target_dir, const bool isWFS); + void testFileValidity(const std::string &user_id, const std::string &upload_name, const std::string &main_file, const std::string &layer_name, const bool isWFS); +}; + +REGISTER_HTTP_SERVICE(OGRImporter, "IMPORTER_OGR"); + +void OGRImporter::run() { + + //check user session and permissions + auto session = UserDB::loadSession(params.get("sessiontoken")); + + if(session->isExpired()){ + throw ImporterException("Not a valid user session."); + } + + auto user = session->getUser(); + if(!user.hasPermission("upload") || !user.hasPermission("import_ogr")) { + throw ImporterException("User does not have permission."); + } + + const std::string user_id = user.getUserIDString(); + const std::string ogr_dir = Configuration::get("ogrsource.files.path"); + const std::string upload_name = params.get("upload_name", ""); + const std::string main_file = params.get("main_file"); + const std::string dataset_name = params.get("dataset_name"); + const std::string layer_name = params.get("layer_name"); + const bool append_dataset = params.getBool("append_dataset", false); + + const bool isWFS = main_file.find("WFS:") == 0; + if(isWFS && !upload_name.empty()){ + throw ImporterException("WFS link detected in main_file (starts with 'WFS:'). In that case the " + "upload_name is expected to be empty, because it is not needed."); + } + + bf::path import_target_dir(ogr_dir); + import_target_dir /= params.get("dataset_name"); + bf::path dataset_json_path(ogr_dir); + dataset_json_path /= dataset_name + ".json"; + + if(!isWFS){ + //check if upload and main file exist + const bool upload_exists = UploaderUtil::exists(user_id, upload_name) + && UploaderUtil::uploadHasFile(user_id, upload_name, main_file); + if(!upload_exists){ + throw ImporterException("Requested upload or main file does not exist."); + } + } + + //check if dataset with same name already exists + const bool dataset_exists = bf::exists(dataset_json_path); + if(dataset_exists && !append_dataset){ + throw ImporterException("OGRSource dataset with same name already exists. Appending was not requested."); + } + if(append_dataset && !dataset_exists){ + throw ImporterException("Dataset should be appended, but it does not exist already."); + } + + //if dataset_json already exists load it and add the new layer to it. Else create new dataset json. + Json::Value dataset_json(Json::ValueType::nullValue); + if(dataset_exists){ + dataset_json = OGRSourceDatasets::getDatasetDescription(dataset_name); + } else { + dataset_json = Json::Value(Json::ValueType::objectValue); + } + if(!dataset_json.isMember("layers")) + dataset_json["layers"] = Json::Value(Json::ValueType::objectValue); + + if(dataset_exists && dataset_json["layers"].isMember(layer_name)) { + throw ImporterException("Layer can not be added to OGR Dataset because a layer with same name already exists: " + layer_name); + } + + // test if the dataset & layer can be opened with OGR + testFileValidity(user_id, upload_name, main_file, layer_name, isWFS); + + //create layer json and add it to dataset_json + auto layer_json = createLayerJson(import_target_dir, isWFS); + dataset_json["layers"][layer_name] = layer_json; + + if(!isWFS){ + //move uploaded files to OGR location, if error occurs delete the copied files + std::vector copied_files; + copied_files.reserve(8); //avoid resizing for most cases + try { + UploaderUtil::moveUpload(user_id, upload_name, import_target_dir, copied_files); + } catch(std::exception &e){ + //could not move upload, delete already copied files and directory. + for(auto &filename : copied_files){ + bf::path file_path = import_target_dir; + file_path /= filename; + bf::remove(file_path); + } + response.sendFailureJSON("Could not copy files"); + return; + } + } + + //write dataset json to file, after moving the files, because that could go wrong. + Json::StyledWriter writer; + std::ofstream file; + file.open(dataset_json_path.string()); + file << writer.write(dataset_json); + file.close(); + + user.addPermission("data.ogr_source." + dataset_name); + response.sendSuccessJSON(); +} + +/** + * write the needed parameters into the dataset json. Tests some conditions and throws exceptions when they are not met. + */ +Json::Value OGRImporter::createLayerJson(const bf::path &target_dir, const bool isWFS) { + Json::Value layer_json(Json::ValueType::objectValue); + + if(ErrorHandlingConverter.is_value(params.get("on_error"))) + layer_json["on_error"] = params.get("on_error"); + else + throw ImporterException("Invalid 'on_error' parameter."); + + if(TimeSpecificationConverter.is_value(params.get("time"))) + layer_json["time"] = params.get("time"); + else + throw ImporterException("Invalid time parameter."); + + if(params.hasParam("duration")) + layer_json["duration"] = params.get("duration"); + else if(params.get("time") == "start") + throw ImporterException("For TimeSpecification 'start' a duration has to be provided."); + + if(params.hasParam("time1_format")){ + Json::Value time1(Json::ValueType::objectValue); + std::string format = params.get("time1_format"); + time1["format"] = format; + if(format == "custom"){ + time1["custom_format"] = params.get("time1_custom_format"); + } + layer_json["time1_format"] = time1; + } + if(params.hasParam("time2_format")){ + Json::Value time2(Json::ValueType::objectValue); + std::string format = params.get("time2_format"); + time2["format"] = format; + if(format == "custom"){ + time2["custom_format"] = params.get("time2_custom_format"); + } + layer_json["time2_format"] = time2; + } + + Json::Value columns(Json::ValueType::objectValue); + + // if it is a csv file, it is already tested in testFileValidity (in openGDALDataset) that these + // parameters are working or not provided and standard parameters work. + if(params.hasParam("x")) + columns["x"] = params.get("x"); + if(params.hasParam("y")) + columns["y"] = params.get("y"); + + if(params.hasParam("time1")) + columns["time1"] = params.get("time1"); + else if(layer_json["time"].asString() != "none"){ + std::string msg("Selected time specification '"); + msg += layer_json["time"].asString(); + msg += "' requires a time1 attribute."; + throw ImporterException(msg); + } + + if(params.hasParam("time2")) + columns["time2"] = params.get("time2"); + else if(layer_json["time"].asString() == "start+duration" || layer_json["time"].asString() == "start+end"){ + std::string msg("Selected time specification '"); + msg += layer_json["time"].asString(); + msg += "' requires a time2 attribute."; + throw ImporterException(msg); + } + + layer_json["columns"] = columns; + + if(params.hasParam("default")) + layer_json["default"] = params.get("default"); + + if(params.hasParam("description")) + layer_json["description"] = params.get("description"); + + if(params.hasParam("force_ogr_time_filter")) + layer_json["force_ogr_time_filter"] = params.get("force_ogr_time_filter"); + + if(params.hasParam("geometry_type")) + layer_json["geometry_type"] = params.get("geometry_type"); + + Json::Value provenance(Json::ValueType::objectValue); + provenance["citation"] = params.get("citation"); + provenance["license"] = params.get("license"); + provenance["uri"] = params.get("uri"); + layer_json["provenance"] = provenance; + + if(isWFS){ + layer_json["filename"] = params.get("main_file"); + } else { + bf::path file_path = target_dir; + file_path /= params.get("main_file"); + layer_json["filename"] = file_path.string(); + } + + return layer_json; +} + +/** + * Tests if the given main file of the upload can be opened with OGR and has valid data + */ +void OGRImporter::testFileValidity(const std::string &user_id, const std::string &upload_name, const std::string &main_file, const std::string &layer_name, const bool isWFS) { + bf::path upload_path; + if(isWFS) { + upload_path = main_file; + } else { + upload_path = UploaderUtil::getUploadPath(user_id, upload_name); + upload_path /= main_file; + } + + // opening the datasets needs filename, and x,y info if it is a csv file, as Json. + Json::Value param_json(Json::ValueType::objectValue); + param_json["filename"] = upload_path.string(); + Json::Value column_json(Json::ValueType::objectValue); + if(params.hasParam("x")) { + column_json["x"] = params.get("x"); + } + if(params.hasParam("y")) { + column_json["y"] = params.get("y"); + } + param_json["column"] = column_json; + + auto dataset = OGRSourceUtil::openGDALDataset(param_json); + if(dataset == nullptr){ + throw ImporterException("Can not load open main file with OGR"); + } + + OGRLayer *layer = dataset->GetLayerByName(layer_name.c_str()); + if(layer == nullptr){ + GDALClose(dataset); + throw ImporterException("The imported layer does not exist"); + } + + GDALClose(dataset); +} diff --git a/src/util/enumconverter.h b/src/util/enumconverter.h index b262279a..af4bdbc7 100644 --- a/src/util/enumconverter.h +++ b/src/util/enumconverter.h @@ -1,5 +1,7 @@ -#include "util/exceptions.h" +#ifndef UTIL_ENUMCONVERTER_H +#define UTIL_ENUMCONVERTER_H +#include "util/exceptions.h" #include #include #include @@ -42,7 +44,17 @@ class EnumConverter { auto str = root.get(name, default_value).asString(); return from_string(str); } + + bool is_value(const std::string &s){ + for (auto &tuple : map) { + if (tuple.second == s) + return true; + } + return false; + } private: const std::vector< std::pair> ↦ const std::string default_value; }; + +#endif diff --git a/src/util/uploader_util.cpp b/src/util/uploader_util.cpp new file mode 100644 index 00000000..5d48e6e1 --- /dev/null +++ b/src/util/uploader_util.cpp @@ -0,0 +1,46 @@ + +#include +#include "util/uploader_util.h" +#include "util/exceptions.h" +#include "util/configuration.h" + +namespace bf = boost::filesystem; + +void UploaderUtil::moveUpload(const std::string &user_id, const std::string &upload_name, bf::path &target_dir, std::vector &copied_files) +{ + bf::path upload_path = getUploadPath(user_id, upload_name); + + if(!bf::exists(upload_path) || !bf::is_directory(upload_path)) + throw UploaderException(concat("Requested upload '", upload_name,"' does not exist")); + + if(!bf::exists(target_dir)) + bf::create_directory(target_dir); + + for(auto it = bf::directory_iterator(upload_path); it != bf::directory_iterator{}; ++it){ + std::string filename = it->path().filename().string(); + bf::path file_target(target_dir); + file_target /= filename; + if(!bf::exists(file_target)){ + bf::copy_file(it->path(), file_target, bf::copy_option::fail_if_exists); + copied_files.emplace_back(filename); + } + } +} + +bool UploaderUtil::exists(const std::string &user_id, const std::string &upload_name) { + bf::path path = getUploadPath(user_id, upload_name); + return bf::exists(path) & bf::is_directory(path); +} + +bool UploaderUtil::uploadHasFile(const std::string &user_id, const std::string &upload_name, const std::string &file_name) { + bf::path path = getUploadPath(user_id, upload_name); + path /= file_name; + return bf::exists(path) & bf::is_regular_file(path); +} + +bf::path UploaderUtil::getUploadPath(const std::string &user_id, const std::string &upload_name) { + bf::path upload_path(Configuration::get("uploader.directory")); + upload_path /= user_id; + upload_path /= upload_name; + return upload_path; +} diff --git a/src/util/uploader_util.h b/src/util/uploader_util.h new file mode 100644 index 00000000..3581cbae --- /dev/null +++ b/src/util/uploader_util.h @@ -0,0 +1,22 @@ + +#ifndef MAPPING_CORE_UPLOADER_UTIL_H +#define MAPPING_CORE_UPLOADER_UTIL_H + +#include +#include "userdb/userdb.h" +#include + +/** + * Functionality for the import services to interact with uploads. + * The caller has to check if the user has permission for these interactions, name of permission for uploader is "upload" + */ +class UploaderUtil { +public: + static void moveUpload(const std::string &user_id, const std::string &upload_name, boost::filesystem::path &target_dir, std::vector &copied_files); + static bool exists(const std::string &user_id, const std::string &upload_name); + static bool uploadHasFile(const std::string &user_id, const std::string &upload_name, const std::string &file_name); + static boost::filesystem::path getUploadPath(const std::string &user_id, const std::string &upload_name); +}; + + +#endif //MAPPING_CORE_UPLOADER_UTIL_H diff --git a/test/unittests/uploader.cpp b/test/unittests/uploader.cpp index 39e5fec2..d3741db2 100644 --- a/test/unittests/uploader.cpp +++ b/test/unittests/uploader.cpp @@ -23,6 +23,7 @@ class UploaderTest : public ::testing::Test { virtual void TearDown(); std::string sessiontoken; std::string upl_dir; + std::string upl_dir_orig; std::string userID; }; @@ -35,6 +36,7 @@ void UploaderTest::SetUp() { time_t now = time(nullptr); UserDB::init("sqlite", ":memory:", make_unique(&now), 0); + upl_dir_orig = Configuration::get("uploader.directory"); //change upload dir parameter to a test directory Configuration::loadFromString("[uploader]\ndirectory=\""+ temp_dir_name + "\""); upl_dir = Configuration::get("uploader.directory"); @@ -59,6 +61,7 @@ void UploaderTest::TearDown() { //delete test upload dir and just in case change back the parameter. boost::filesystem::path test_path(upl_dir); boost::filesystem::remove_all(test_path); + Configuration::loadFromString("[uploader]\ndirectory=\""+ upl_dir_orig + "\""); UserDB::shutdown(); }