diff --git a/README.md b/README.md index be0a2366..e4dcf22a 100644 --- a/README.md +++ b/README.md @@ -265,10 +265,9 @@ kernel <- "UnivariateMaternNuggetsStationary" initial_theta <- c(1,0.1,0.5,0.1) lower_bound <- c(0.05,0.005,0.05,0.005) upper_bound <- c(5,5,5,5) -acc <- 1e-9 p <- 1 q <- 1 -opt_itrs <- 100 +opt_itrs <- 300 # Initialize hardware configuration hardware <- new(Hardware, computation, ncores, ngpus, p, q) @@ -291,20 +290,88 @@ estimated_theta <- model_data( dimension=dimension, lb=lower_bound, ub=upper_bound, - mle_itr=opt_itrs) + mle_itr=opt_itrs, + tol=7) # Perform spatial prediction using the estimated parameters test_x <- c(0.2, 0.330) test_y <- c(0.104, 0.14) -predict_data( - train_data=list(x=exageostat_data$x, y=exageostat_data$y, exageostat_data$m), +test_z <- c(-0.10838, -0.10838) + +result <- predict_data( + kernel=kernel, + estimated_theta=estimated_theta, + dts=dts, + train_data=list(exageostat_data$x, exageostat_data$y, exageostat_data$m), + test_data=list(test_x, test_y), + test_measurements=test_z + ) + +cat("Predicted values:", result, "\n") +cat("Actual values:", test_z, "\n") +cat("Difference:", result - test_z, "\n") +``` + +``` +## R Example +Here is another example demonstrating how to use **ExaGeoStatCPP** with nugget for prediction in R: + +```r +library(ExaGeoStatCPP) + +ncores <- 4 +ngpus <- 0 +dts <- 8 +computation <- "exact" +dimension <- "2D" +kernel <- "UnivariateMaternNuggetsStationary" +p <- 1 +q <- 1 + +hardware <- new(Hardware, computation, ncores, ngpus, p, q) + +# Use small example data (14 training points) +z_value <- c(-1.272336140360187606, -2.590699695867695773, 0.512142584178685967, + -0.163880452049749520, 0.313503633252489700, -1.474410682226017677, + 0.161705025505231914, 0.623389205185149065, -1.341858445399783495, + -1.054282062428600009, -1.669383221392507943, 0.219170645803740793, + 0.971213790000161170, 0.538973474182433021) + +locations_x <- c(0.092042420080872822, 0.193041886015106440, 0.330556191348134576, + 0.181612878614480805, 0.370473792629892440, 0.652140077821011688, + 0.553322652018005678, 0.800961318379491916, 0.207324330510414295, + 0.465445944914930965, 0.528267338063630132, 0.974792095826657490, + 0.552452887769893985, 0.877592126344701295) + +locations_y <- c(0.928648813611047563, 0.103883421072709245, 0.135790035858701447, 0.434683756771190977, + 0.400778210116731537, 0.168459601739528508, 0.105195696955825133, + 0.396398870832379624, 0.296757457846952011, 0.564507515068284116, + 0.627679865720607300, 0.958236057068741931, + 0.573571374074921758, 0.568657969024185528) + +test_x <- c(0.347951, 0.62768) +test_y <- c(0.806332, 0.105196) +test_z <- c(-1.05428, -1.47441) # Actual test measurements for MSPE +estimated_theta <- c(1, 0.1, 0.5, 0.1) + +cat("Testing with SMALL dataset (14 train + 2 test)\n") +cat("With test_measurements for MSPE calculation\n") +result <- predict_data( + train_data=list(locations_x, locations_y, z_value), test_data=list(test_x, test_y), kernel=kernel, dts=dts, - estimated_theta=estimated_theta) + estimated_theta=estimated_theta, + #test_measurements=test_z +) +cat("Predicted values:", result, "\n") +cat("Actual values:", test_z, "\n") +cat("Difference:", result - test_z, "\n") ``` -These two examples walk through initializing hardware, simulating spatial data, estimating model parameters, and making predictions using **ExaGeoStatCPP** in R. + + +These three R examples walk through initializing hardware, simulating spatial data, estimating model parameters, and making predictions using **ExaGeoStatCPP** in R. > **Note:** Please take a look at the end-to-end examples in the `examples/` directory as a reference for using all the operations. diff --git a/inst/include/Rcpp-adapters/FunctionsAdapter.hpp b/inst/include/Rcpp-adapters/FunctionsAdapter.hpp index 7d65974f..a1fedaec 100644 --- a/inst/include/Rcpp-adapters/FunctionsAdapter.hpp +++ b/inst/include/Rcpp-adapters/FunctionsAdapter.hpp @@ -98,6 +98,7 @@ namespace exageostat::adapters { * @param[in] aDimension Dimensionality of the spatial data ("2D" or "3D"). * @param[in] aTrainData Training data set used for predictions. * @param[in] aTestData Test data set for which predictions are made. + * @param[in] aTestMeasurementsValues Vector of actual measured values at the test locations, used for MSPE calculation. * @return Vector of predicted values based on the test data. * */ @@ -105,7 +106,9 @@ namespace exageostat::adapters { const std::vector &aEstimatedTheta, const int &aDenseTileSize, const int &aLowTileSize, const std::string &aDimension, std::vector> &aTrainData, - std::vector> &aTestData); + std::vector> &aTestData, + std::vector &aTestMeasurementsValues, + const std::string &aComputation); /** * @brief Calculates the Mean Logarithmic Error (MLOE) and the Mean Measure of Model Output (MMOM) for ExaGeoStat predictions. @@ -236,7 +239,8 @@ namespace exageostat::adapters { const std::string &aDimension, std::vector> &aTrainData, std::vector> &aTestData, const std::vector &aEstimatedTheta, - const std::vector &aTestMeasurementsValues); + const std::vector &aTestMeasurementsValues, + const std::string &aComputation); } #endif //EXAGEOSTATCPP_FUNCTIONSADAPTER_HPP diff --git a/inst/include/configurations/Configurations.hpp b/inst/include/configurations/Configurations.hpp index 963cac42..d982626f 100644 --- a/inst/include/configurations/Configurations.hpp +++ b/inst/include/configurations/Configurations.hpp @@ -321,6 +321,9 @@ namespace exageostat::configurations { CREATE_SETTER_FUNCTION(IsMSPE, bool, aIsMSPE, "mspe") CREATE_GETTER_FUNCTION(IsMSPE, bool, "mspe") + CREATE_SETTER_FUNCTION(HasTestMeasurements, bool, aHasTestMeasurements, "hastestmeasurements") + CREATE_GETTER_FUNCTION(HasTestMeasurements, bool, "hastestmeasurements") + CREATE_SETTER_FUNCTION(IsIDW, bool, aIsIDW, "idw") CREATE_GETTER_FUNCTION(IsIDW, bool, "idw") diff --git a/src/Rcpp-adapters/FunctionsAdapter.cpp b/src/Rcpp-adapters/FunctionsAdapter.cpp index 02d9f0af..65a55cc7 100644 --- a/src/Rcpp-adapters/FunctionsAdapter.cpp +++ b/src/Rcpp-adapters/FunctionsAdapter.cpp @@ -157,14 +157,14 @@ namespace exageostat::adapters { vector R_ExaGeoStatPredictData(const string &aKernelName, const string &aDistanceMatrix, const vector &aEstimatedTheta, const int &aDenseTileSize, const int &aLowTileSize, const string &aDimension, - vector > &aTrainData, vector > &aTestData) { + vector > &aTrainData, vector > &aTestData, + vector &aTestMeasurementsValues, const string &aComputation) { Configurations configurations; configurations.SetIsMSPE(TRUE); configurations.SetEstimatedTheta(aEstimatedTheta); - vector empty_vector; PredictionSetupHelper(configurations, aKernelName, aDistanceMatrix, aDenseTileSize, aLowTileSize, aDimension, - aTrainData, aTestData, aEstimatedTheta, empty_vector); + aTrainData, aTestData, aEstimatedTheta, aTestMeasurementsValues, aComputation); return Results::GetInstance()->GetPredictedMissedValues(); } @@ -180,7 +180,7 @@ namespace exageostat::adapters { vector empty_vector; PredictionSetupHelper(configurations, aKernelName, aDistanceMatrix, aDenseTileSize, aLowTileSize, aDimension, - aTrainData, aTestData, aEstimatedTheta, empty_vector); + aTrainData, aTestData, aEstimatedTheta, empty_vector, "exact"); vector mloe_mmom_values; mloe_mmom_values.push_back(Results::GetInstance()->GetMLOE()); @@ -198,7 +198,7 @@ namespace exageostat::adapters { vector empty_vector; PredictionSetupHelper(configurations, aKernelName, aDistanceMatrix, aDenseTileSize, aLowTileSize, aDimension, - aTrainData, aTestData, aEstimatedTheta, empty_vector); + aTrainData, aTestData, aEstimatedTheta, empty_vector, "exact"); return Results::GetInstance()->GetFisherMatrix(); } @@ -213,7 +213,7 @@ namespace exageostat::adapters { configurations.SetIsIDW(TRUE); PredictionSetupHelper(configurations, aKernelName, aDistanceMatrix, aDenseTileSize, aLowTileSize, aDimension, - aTrainData, aTestData, aEstimatedTheta, aTestMeasurementsValues); + aTrainData, aTestData, aEstimatedTheta, aTestMeasurementsValues, "exact"); return Results::GetInstance()->GetIDWError(); } @@ -304,9 +304,10 @@ namespace exageostat::adapters { PredictionSetupHelper(Configurations &aConfigurations, const string &aKernelName, const string &aDistanceMatrix, const int &aDenseTileSize, const int &aLowTileSize, const string &aDimension, vector > &aTrainData, vector > &aTestData, - const vector &aEstimatedTheta, const vector &aTestMeasurementsValues) { + const vector &aEstimatedTheta, const vector &aTestMeasurementsValues, + const string &aComputation) { - aConfigurations.SetComputation(EXACT_DENSE); + aConfigurations.SetComputation(validator::Validator::CheckComputationValue(aComputation)); ValidateDataDimensions(aTrainData, "train"); ValidateDataDimensions(aTestData, "test"); @@ -327,6 +328,10 @@ namespace exageostat::adapters { dataunits::Locations train_locations(train_data_size, aConfigurations.GetDimension()); dataunits::Locations test_locations(test_data_size, aConfigurations.GetDimension()); + // Track whether test measurements were provided for MSPE logging + bool has_test_measurements = !aTestMeasurementsValues.empty(); + aConfigurations.SetHasTestMeasurements(has_test_measurements); + // Allocate memory for z_values to hold elements from both sources auto *z_values = new double[aTrainData.back().size() + aTestMeasurementsValues.size()]; @@ -341,10 +346,11 @@ namespace exageostat::adapters { } } memcpy(z_values, aTrainData.back().data(), aTrainData.back().size() * sizeof(double)); - // Calculate the starting position for the next part of the data in z_values - auto *destination = z_values + aTrainData.back().size(); - // Copy data from aTestMeasurementsValues to the next part of z_values, after the previously copied data - memcpy(destination, aTestMeasurementsValues.data(), aTestMeasurementsValues.size() * sizeof(double)); + // Copy test measurements if provided + if (has_test_measurements) { + auto *destination = z_values + aTrainData.back().size(); + memcpy(destination, aTestMeasurementsValues.data(), aTestMeasurementsValues.size() * sizeof(double)); + } for (int i = 0; i < test_data_size; i++) { test_locations.SetLocationX(*aTestData[0].data(), test_data_size); diff --git a/src/Rcpp-adapters/RcppModules.cpp b/src/Rcpp-adapters/RcppModules.cpp index 776c34e5..5107aad7 100644 --- a/src/Rcpp-adapters/RcppModules.cpp +++ b/src/Rcpp-adapters/RcppModules.cpp @@ -80,14 +80,16 @@ RCPP_MODULE(ExaGeoStatCPP) { function("predict_data", &exageostat::adapters::R_ExaGeoStatPredictData, List::create( - _["kernel"], - _["distance_matrix"] = "euclidean", - _["estimated_theta"], - _["dts"], - _["lts"] = 0, - _["dimension"] = "2D", - _["train_data"], - _["test_data"] + _["kernel"], + _["distance_matrix"] = "euclidean", + _["estimated_theta"], + _["dts"], + _["lts"] = 0, + _["dimension"] = "2D", + _["train_data"], + _["test_data"], + _["test_measurements"] = std::vector(), + _["computation"] = "exact" )); function("mloe_mmom", &exageostat::adapters::R_ExaGeoStatMLOE_MMOM, diff --git a/src/configurations/Configurations.cpp b/src/configurations/Configurations.cpp index e67a8ad7..a044eecd 100644 --- a/src/configurations/Configurations.cpp +++ b/src/configurations/Configurations.cpp @@ -49,6 +49,7 @@ Configurations::Configurations() { SetRecoveryFile(""); SetPrecision(DOUBLE); SetIsMSPE(false); + SetHasTestMeasurements(true); SetIsFisher(false); SetIsIDW(false); SetIsMLOEMMOM(false); diff --git a/src/prediction/Prediction.cpp b/src/prediction/Prediction.cpp index f0e6a7df..324fb3a5 100644 --- a/src/prediction/Prediction.cpp +++ b/src/prediction/Prediction.cpp @@ -63,14 +63,15 @@ void Prediction::PredictMissingData(unique_ptr> &aData, Con } else { z_miss_number = apTestLocations->GetSize(); n_z_obs = apTrainLocations->GetSize(); - z_actual = nullptr; + z_actual = new T[z_miss_number * p]; } aConfigurations.SetObservationNumber(n_z_obs); - auto linear_algebra_solver = linearAlgebra::LinearAlgebraFactory::CreateLinearAlgebraSolver(common::EXACT_DENSE); + auto linear_algebra_solver = linearAlgebra::LinearAlgebraFactory::CreateLinearAlgebraSolver(aConfigurations.GetComputation()); VERBOSE("\t- Total number of Z: " << aConfigurations.GetProblemSize()) LOGGER("\t- Number of Z Miss: " << z_miss_number) LOGGER("\t- Number of Z observations: " << n_z_obs) + LOGGER("\t- Computation mode: " << aConfigurations.GetComputation()) // FISHER Prediction Function Call if (aConfigurations.GetIsFisher()) { @@ -125,7 +126,7 @@ void Prediction::PredictMissingData(unique_ptr> &aData, Con // Prediction is only supported with 2D. auto obs_locations = new Locations(n_z_obs, aConfigurations.GetDimension()); - // We Predict date with only Exact computation. This is a pre-request. + // Initialize prediction arguments using the configured computation mode InitializePredictionArguments(aConfigurations, aData, linear_algebra_solver, z_obs, z_actual, *miss_locations, *obs_locations, apMeasurementsMatrix, p, apTrainLocations, apTestLocations); @@ -190,8 +191,9 @@ void Prediction::PredictMissingData(unique_ptr> &aData, Con z_miss_vector.push_back(z_miss[idx]); } Results::GetInstance()->SetPredictedMissedValues(z_miss_vector); - if (z_actual) { - LOGGER("\t\t- Prediction value: " << avg_pred_value[0]) + // Only log MSPE if test measurements were actually provided + if (z_actual && aConfigurations.GetHasTestMeasurements()) { + LOGGER("\t\t- MSPE value: " << avg_pred_value[0]) } delete[] prediction_error_mspe; } @@ -235,7 +237,12 @@ void Prediction::InitializePredictionArguments(Configurations &aConfiguration aMissLocation.GetLocationX()[i] = apTestLocations->GetLocationX()[i]; aMissLocation.GetLocationY()[i] = apTestLocations->GetLocationY()[i]; } - memcpy(apZObs, apMeasurementsMatrix, aObsLocation.GetSize() * sizeof(T)); + // Copy train measurements (n_z_obs * p elements for multivariate) + memcpy(apZObs, apMeasurementsMatrix, aObsLocation.GetSize() * aP * sizeof(T)); + // Extract test measurements only if they were actually provided + if (apZActual && apMeasurementsMatrix && aConfigurations.GetHasTestMeasurements()) { + memcpy(apZActual, apMeasurementsMatrix + aObsLocation.GetSize() * aP, aMissLocation.GetSize() * aP * sizeof(T)); + } } delete[] z; #endif diff --git a/test.R b/test.R new file mode 100644 index 00000000..c41660b2 --- /dev/null +++ b/test.R @@ -0,0 +1,57 @@ +.libPaths("~/R/x86_64-pc-linux-gnu-library/4.1") +library(ExaGeoStatCPP) + +ncores <- 20 +ngpus <- 0 +problem_size <- 1600 +dts <- 40 +lts <- 0 +computation <- "exact" +dimension <- "2D" +kernel <- "UnivariateMaternNuggetsStationary" +initial_theta <- c(1,0.1,0.5,0.1) +lower_bound <- c(0.01,0.02,0.01,0.01) +upper_bound <- c(5,5,5,5) +p <- 1 +q <- 1 +opt_itrs <- 500 + +hardware <- new(Hardware, computation, ncores, ngpus, p, q) + +exageostat_data <- simulate_data( + kernel = kernel, + initial_theta = initial_theta, + problem_size = problem_size, + dts = dts, + dimension = dimension +) + +estimated_theta <- model_data( + matrix=exageostat_data$m, + x=exageostat_data$x, + y=exageostat_data$y, + kernel=kernel, dts=dts, + dimension=dimension, + lb=lower_bound, + ub=upper_bound, + mle_itr=opt_itrs, + tol=7) + +test_x <- c(0.2, 0.330) +test_y <- c(0.104, 0.14) +test_z <- c(-0.10838, -0.10838) + +cat("Using computation mode:", computation, "\n") +result <- predict_data( + kernel=kernel, + estimated_theta=estimated_theta, + dts=dts, + train_data=list(exageostat_data$x, exageostat_data$y, exageostat_data$m), + test_data=list(test_x, test_y), + test_measurements=test_z, + computation=computation + ) + +cat("Predicted values:", result, "\n") +cat("Actual values:", test_z, "\n") +cat("Difference:", result - test_z, "\n") diff --git a/test_small.R b/test_small.R new file mode 100644 index 00000000..59591855 --- /dev/null +++ b/test_small.R @@ -0,0 +1,54 @@ +.libPaths("~/R/x86_64-pc-linux-gnu-library/4.1") +library(ExaGeoStatCPP) + +ncores <- 4 +ngpus <- 0 +dts <- 8 +computation <- "tlr" +dimension <- "2D" +kernel <- "UnivariateMaternNuggetsStationary" +p <- 1 +q <- 1 + +hardware <- new(Hardware, computation, ncores, ngpus, p, q) + +# Use small example data (14 training points) +z_value <- c(-1.272336140360187606, -2.590699695867695773, 0.512142584178685967, + -0.163880452049749520, 0.313503633252489700, -1.474410682226017677, + 0.161705025505231914, 0.623389205185149065, -1.341858445399783495, + -1.054282062428600009, -1.669383221392507943, 0.219170645803740793, + 0.971213790000161170, 0.538973474182433021) + +locations_x <- c(0.092042420080872822, 0.193041886015106440, 0.330556191348134576, + 0.181612878614480805, 0.370473792629892440, 0.652140077821011688, + 0.553322652018005678, 0.800961318379491916, 0.207324330510414295, + 0.465445944914930965, 0.528267338063630132, 0.974792095826657490, + 0.552452887769893985, 0.877592126344701295) + +locations_y <- c(0.928648813611047563, 0.103883421072709245, 0.135790035858701447, 0.434683756771190977, + 0.400778210116731537, 0.168459601739528508, 0.105195696955825133, + 0.396398870832379624, 0.296757457846952011, 0.564507515068284116, + 0.627679865720607300, 0.958236057068741931, + 0.573571374074921758, 0.568657969024185528) + +test_x <- c(0.347951, 0.62768) +test_y <- c(0.806332, 0.105196) +test_z <- c(-1.05428, -1.47441) # Actual test measurements for MSPE +estimated_theta <- c(1, 0.1, 0.5, 0.1) + +cat("Testing with SMALL dataset (14 train + 2 test)\n") +cat("With test_measurements for MSPE calculation\n") +cat("Using computation mode:", computation, "\n") +result <- predict_data( + train_data=list(locations_x, locations_y, z_value), + test_data=list(test_x, test_y), + kernel=kernel, + dts=dts, + estimated_theta=estimated_theta, + test_measurements=test_z, + computation=computation +) +cat("Predicted values:", result, "\n") +cat("Actual values:", test_z, "\n") +cat("Difference:", result - test_z, "\n") +