Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
81 changes: 74 additions & 7 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -265,10 +265,9 @@ kernel <- "UnivariateMaternNuggetsStationary"
initial_theta <- c(1,0.1,0.5,0.1)
lower_bound <- c(0.05,0.005,0.05,0.005)
upper_bound <- c(5,5,5,5)
acc <- 1e-9
p <- 1
q <- 1
opt_itrs <- 100
opt_itrs <- 300

# Initialize hardware configuration
hardware <- new(Hardware, computation, ncores, ngpus, p, q)
Expand All @@ -291,20 +290,88 @@ estimated_theta <- model_data(
dimension=dimension,
lb=lower_bound,
ub=upper_bound,
mle_itr=opt_itrs)
mle_itr=opt_itrs,
tol=7)

# Perform spatial prediction using the estimated parameters
test_x <- c(0.2, 0.330)
test_y <- c(0.104, 0.14)
predict_data(
train_data=list(x=exageostat_data$x, y=exageostat_data$y, exageostat_data$m),
test_z <- c(-0.10838, -0.10838)

result <- predict_data(
kernel=kernel,
estimated_theta=estimated_theta,
dts=dts,
train_data=list(exageostat_data$x, exageostat_data$y, exageostat_data$m),
test_data=list(test_x, test_y),
test_measurements=test_z
)

cat("Predicted values:", result, "\n")
cat("Actual values:", test_z, "\n")
cat("Difference:", result - test_z, "\n")
```

```
## R Example
Here is another example demonstrating how to use **ExaGeoStatCPP** with nugget for prediction in R:

```r
library(ExaGeoStatCPP)

ncores <- 4
ngpus <- 0
dts <- 8
computation <- "exact"
dimension <- "2D"
kernel <- "UnivariateMaternNuggetsStationary"
p <- 1
q <- 1

hardware <- new(Hardware, computation, ncores, ngpus, p, q)

# Use small example data (14 training points)
z_value <- c(-1.272336140360187606, -2.590699695867695773, 0.512142584178685967,
-0.163880452049749520, 0.313503633252489700, -1.474410682226017677,
0.161705025505231914, 0.623389205185149065, -1.341858445399783495,
-1.054282062428600009, -1.669383221392507943, 0.219170645803740793,
0.971213790000161170, 0.538973474182433021)

locations_x <- c(0.092042420080872822, 0.193041886015106440, 0.330556191348134576,
0.181612878614480805, 0.370473792629892440, 0.652140077821011688,
0.553322652018005678, 0.800961318379491916, 0.207324330510414295,
0.465445944914930965, 0.528267338063630132, 0.974792095826657490,
0.552452887769893985, 0.877592126344701295)

locations_y <- c(0.928648813611047563, 0.103883421072709245, 0.135790035858701447, 0.434683756771190977,
0.400778210116731537, 0.168459601739528508, 0.105195696955825133,
0.396398870832379624, 0.296757457846952011, 0.564507515068284116,
0.627679865720607300, 0.958236057068741931,
0.573571374074921758, 0.568657969024185528)

test_x <- c(0.347951, 0.62768)
test_y <- c(0.806332, 0.105196)
test_z <- c(-1.05428, -1.47441) # Actual test measurements for MSPE
estimated_theta <- c(1, 0.1, 0.5, 0.1)

cat("Testing with SMALL dataset (14 train + 2 test)\n")
cat("With test_measurements for MSPE calculation\n")
result <- predict_data(
train_data=list(locations_x, locations_y, z_value),
test_data=list(test_x, test_y),
kernel=kernel,
dts=dts,
estimated_theta=estimated_theta)
estimated_theta=estimated_theta,
#test_measurements=test_z
)
cat("Predicted values:", result, "\n")
cat("Actual values:", test_z, "\n")
cat("Difference:", result - test_z, "\n")
```

These two examples walk through initializing hardware, simulating spatial data, estimating model parameters, and making predictions using **ExaGeoStatCPP** in R.


These three R examples walk through initializing hardware, simulating spatial data, estimating model parameters, and making predictions using **ExaGeoStatCPP** in R.

> **Note:** Please take a look at the end-to-end examples in the `examples/` directory as a reference for using all the operations.

Expand Down
8 changes: 6 additions & 2 deletions inst/include/Rcpp-adapters/FunctionsAdapter.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -98,14 +98,17 @@ namespace exageostat::adapters {
* @param[in] aDimension Dimensionality of the spatial data ("2D" or "3D").
* @param[in] aTrainData Training data set used for predictions.
* @param[in] aTestData Test data set for which predictions are made.
* @param[in] aTestMeasurementsValues Vector of actual measured values at the test locations, used for MSPE calculation.
* @return Vector of predicted values based on the test data.
*
*/
std::vector<double> R_ExaGeoStatPredictData(const std::string &aKernelName, const std::string &aDistanceMatrix,
const std::vector<double> &aEstimatedTheta, const int &aDenseTileSize,
const int &aLowTileSize, const std::string &aDimension,
std::vector<std::vector<double>> &aTrainData,
std::vector<std::vector<double>> &aTestData);
std::vector<std::vector<double>> &aTestData,
std::vector<double> &aTestMeasurementsValues,
const std::string &aComputation);

/**
* @brief Calculates the Mean Logarithmic Error (MLOE) and the Mean Measure of Model Output (MMOM) for ExaGeoStat predictions.
Expand Down Expand Up @@ -236,7 +239,8 @@ namespace exageostat::adapters {
const std::string &aDimension, std::vector<std::vector<double>> &aTrainData,
std::vector<std::vector<double>> &aTestData,
const std::vector<double> &aEstimatedTheta,
const std::vector<double> &aTestMeasurementsValues);
const std::vector<double> &aTestMeasurementsValues,
const std::string &aComputation);

}
#endif //EXAGEOSTATCPP_FUNCTIONSADAPTER_HPP
3 changes: 3 additions & 0 deletions inst/include/configurations/Configurations.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -321,6 +321,9 @@ namespace exageostat::configurations {
CREATE_SETTER_FUNCTION(IsMSPE, bool, aIsMSPE, "mspe")
CREATE_GETTER_FUNCTION(IsMSPE, bool, "mspe")

CREATE_SETTER_FUNCTION(HasTestMeasurements, bool, aHasTestMeasurements, "hastestmeasurements")
CREATE_GETTER_FUNCTION(HasTestMeasurements, bool, "hastestmeasurements")

CREATE_SETTER_FUNCTION(IsIDW, bool, aIsIDW, "idw")
CREATE_GETTER_FUNCTION(IsIDW, bool, "idw")

Expand Down
30 changes: 18 additions & 12 deletions src/Rcpp-adapters/FunctionsAdapter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -157,14 +157,14 @@ namespace exageostat::adapters {
vector<double> R_ExaGeoStatPredictData(const string &aKernelName, const string &aDistanceMatrix,
const vector<double> &aEstimatedTheta, const int &aDenseTileSize,
const int &aLowTileSize, const string &aDimension,
vector <vector<double>> &aTrainData, vector <vector<double>> &aTestData) {
vector <vector<double>> &aTrainData, vector <vector<double>> &aTestData,
vector<double> &aTestMeasurementsValues, const string &aComputation) {

Configurations configurations;
configurations.SetIsMSPE(TRUE);
configurations.SetEstimatedTheta(aEstimatedTheta);
vector<double> empty_vector;
PredictionSetupHelper(configurations, aKernelName, aDistanceMatrix, aDenseTileSize, aLowTileSize, aDimension,
aTrainData, aTestData, aEstimatedTheta, empty_vector);
aTrainData, aTestData, aEstimatedTheta, aTestMeasurementsValues, aComputation);
return Results::GetInstance()->GetPredictedMissedValues();
}

Expand All @@ -180,7 +180,7 @@ namespace exageostat::adapters {

vector<double> empty_vector;
PredictionSetupHelper(configurations, aKernelName, aDistanceMatrix, aDenseTileSize, aLowTileSize, aDimension,
aTrainData, aTestData, aEstimatedTheta, empty_vector);
aTrainData, aTestData, aEstimatedTheta, empty_vector, "exact");

vector<double> mloe_mmom_values;
mloe_mmom_values.push_back(Results::GetInstance()->GetMLOE());
Expand All @@ -198,7 +198,7 @@ namespace exageostat::adapters {

vector<double> empty_vector;
PredictionSetupHelper(configurations, aKernelName, aDistanceMatrix, aDenseTileSize, aLowTileSize, aDimension,
aTrainData, aTestData, aEstimatedTheta, empty_vector);
aTrainData, aTestData, aEstimatedTheta, empty_vector, "exact");

return Results::GetInstance()->GetFisherMatrix();
}
Expand All @@ -213,7 +213,7 @@ namespace exageostat::adapters {
configurations.SetIsIDW(TRUE);

PredictionSetupHelper(configurations, aKernelName, aDistanceMatrix, aDenseTileSize, aLowTileSize, aDimension,
aTrainData, aTestData, aEstimatedTheta, aTestMeasurementsValues);
aTrainData, aTestData, aEstimatedTheta, aTestMeasurementsValues, "exact");

return Results::GetInstance()->GetIDWError();
}
Expand Down Expand Up @@ -304,9 +304,10 @@ namespace exageostat::adapters {
PredictionSetupHelper(Configurations &aConfigurations, const string &aKernelName, const string &aDistanceMatrix,
const int &aDenseTileSize, const int &aLowTileSize, const string &aDimension,
vector <vector<double>> &aTrainData, vector <vector<double>> &aTestData,
const vector<double> &aEstimatedTheta, const vector<double> &aTestMeasurementsValues) {
const vector<double> &aEstimatedTheta, const vector<double> &aTestMeasurementsValues,
const string &aComputation) {

aConfigurations.SetComputation(EXACT_DENSE);
aConfigurations.SetComputation(validator::Validator::CheckComputationValue(aComputation));

ValidateDataDimensions(aTrainData, "train");
ValidateDataDimensions(aTestData, "test");
Expand All @@ -327,6 +328,10 @@ namespace exageostat::adapters {
dataunits::Locations<double> train_locations(train_data_size, aConfigurations.GetDimension());
dataunits::Locations<double> test_locations(test_data_size, aConfigurations.GetDimension());

// Track whether test measurements were provided for MSPE logging
bool has_test_measurements = !aTestMeasurementsValues.empty();
aConfigurations.SetHasTestMeasurements(has_test_measurements);

// Allocate memory for z_values to hold elements from both sources
auto *z_values = new double[aTrainData.back().size() + aTestMeasurementsValues.size()];

Expand All @@ -341,10 +346,11 @@ namespace exageostat::adapters {
}
}
memcpy(z_values, aTrainData.back().data(), aTrainData.back().size() * sizeof(double));
// Calculate the starting position for the next part of the data in z_values
auto *destination = z_values + aTrainData.back().size();
// Copy data from aTestMeasurementsValues to the next part of z_values, after the previously copied data
memcpy(destination, aTestMeasurementsValues.data(), aTestMeasurementsValues.size() * sizeof(double));
// Copy test measurements if provided
if (has_test_measurements) {
auto *destination = z_values + aTrainData.back().size();
memcpy(destination, aTestMeasurementsValues.data(), aTestMeasurementsValues.size() * sizeof(double));
}

for (int i = 0; i < test_data_size; i++) {
test_locations.SetLocationX(*aTestData[0].data(), test_data_size);
Expand Down
18 changes: 10 additions & 8 deletions src/Rcpp-adapters/RcppModules.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -80,14 +80,16 @@ RCPP_MODULE(ExaGeoStatCPP) {

function("predict_data", &exageostat::adapters::R_ExaGeoStatPredictData,
List::create(
_["kernel"],
_["distance_matrix"] = "euclidean",
_["estimated_theta"],
_["dts"],
_["lts"] = 0,
_["dimension"] = "2D",
_["train_data"],
_["test_data"]
_["kernel"],
_["distance_matrix"] = "euclidean",
_["estimated_theta"],
_["dts"],
_["lts"] = 0,
_["dimension"] = "2D",
_["train_data"],
_["test_data"],
_["test_measurements"] = std::vector<double>(),
_["computation"] = "exact"
));

function("mloe_mmom", &exageostat::adapters::R_ExaGeoStatMLOE_MMOM,
Expand Down
1 change: 1 addition & 0 deletions src/configurations/Configurations.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ Configurations::Configurations() {
SetRecoveryFile("");
SetPrecision(DOUBLE);
SetIsMSPE(false);
SetHasTestMeasurements(true);
SetIsFisher(false);
SetIsIDW(false);
SetIsMLOEMMOM(false);
Expand Down
19 changes: 13 additions & 6 deletions src/prediction/Prediction.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -63,14 +63,15 @@ void Prediction<T>::PredictMissingData(unique_ptr<ExaGeoStatData<T>> &aData, Con
} else {
z_miss_number = apTestLocations->GetSize();
n_z_obs = apTrainLocations->GetSize();
z_actual = nullptr;
z_actual = new T[z_miss_number * p];
}
aConfigurations.SetObservationNumber(n_z_obs);
auto linear_algebra_solver = linearAlgebra::LinearAlgebraFactory<T>::CreateLinearAlgebraSolver(common::EXACT_DENSE);
auto linear_algebra_solver = linearAlgebra::LinearAlgebraFactory<T>::CreateLinearAlgebraSolver(aConfigurations.GetComputation());

VERBOSE("\t- Total number of Z: " << aConfigurations.GetProblemSize())
LOGGER("\t- Number of Z Miss: " << z_miss_number)
LOGGER("\t- Number of Z observations: " << n_z_obs)
LOGGER("\t- Computation mode: " << aConfigurations.GetComputation())

// FISHER Prediction Function Call
if (aConfigurations.GetIsFisher()) {
Expand Down Expand Up @@ -125,7 +126,7 @@ void Prediction<T>::PredictMissingData(unique_ptr<ExaGeoStatData<T>> &aData, Con
// Prediction is only supported with 2D.
auto obs_locations = new Locations<T>(n_z_obs, aConfigurations.GetDimension());

// We Predict date with only Exact computation. This is a pre-request.
// Initialize prediction arguments using the configured computation mode
InitializePredictionArguments(aConfigurations, aData, linear_algebra_solver, z_obs, z_actual, *miss_locations,
*obs_locations, apMeasurementsMatrix, p, apTrainLocations, apTestLocations);

Expand Down Expand Up @@ -190,8 +191,9 @@ void Prediction<T>::PredictMissingData(unique_ptr<ExaGeoStatData<T>> &aData, Con
z_miss_vector.push_back(z_miss[idx]);
}
Results::GetInstance()->SetPredictedMissedValues(z_miss_vector);
if (z_actual) {
LOGGER("\t\t- Prediction value: " << avg_pred_value[0])
// Only log MSPE if test measurements were actually provided
if (z_actual && aConfigurations.GetHasTestMeasurements()) {
LOGGER("\t\t- MSPE value: " << avg_pred_value[0])
}
delete[] prediction_error_mspe;
}
Expand Down Expand Up @@ -235,7 +237,12 @@ void Prediction<T>::InitializePredictionArguments(Configurations &aConfiguration
aMissLocation.GetLocationX()[i] = apTestLocations->GetLocationX()[i];
aMissLocation.GetLocationY()[i] = apTestLocations->GetLocationY()[i];
}
memcpy(apZObs, apMeasurementsMatrix, aObsLocation.GetSize() * sizeof(T));
// Copy train measurements (n_z_obs * p elements for multivariate)
memcpy(apZObs, apMeasurementsMatrix, aObsLocation.GetSize() * aP * sizeof(T));
// Extract test measurements only if they were actually provided
if (apZActual && apMeasurementsMatrix && aConfigurations.GetHasTestMeasurements()) {
memcpy(apZActual, apMeasurementsMatrix + aObsLocation.GetSize() * aP, aMissLocation.GetSize() * aP * sizeof(T));
}
}
delete[] z;
#endif
Expand Down
57 changes: 57 additions & 0 deletions test.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
.libPaths("~/R/x86_64-pc-linux-gnu-library/4.1")
library(ExaGeoStatCPP)

ncores <- 20
ngpus <- 0
problem_size <- 1600
dts <- 40
lts <- 0
computation <- "exact"
dimension <- "2D"
kernel <- "UnivariateMaternNuggetsStationary"
initial_theta <- c(1,0.1,0.5,0.1)
lower_bound <- c(0.01,0.02,0.01,0.01)
upper_bound <- c(5,5,5,5)
p <- 1
q <- 1
opt_itrs <- 500

hardware <- new(Hardware, computation, ncores, ngpus, p, q)

exageostat_data <- simulate_data(
kernel = kernel,
initial_theta = initial_theta,
problem_size = problem_size,
dts = dts,
dimension = dimension
)

estimated_theta <- model_data(
matrix=exageostat_data$m,
x=exageostat_data$x,
y=exageostat_data$y,
kernel=kernel, dts=dts,
dimension=dimension,
lb=lower_bound,
ub=upper_bound,
mle_itr=opt_itrs,
tol=7)

test_x <- c(0.2, 0.330)
test_y <- c(0.104, 0.14)
test_z <- c(-0.10838, -0.10838)

cat("Using computation mode:", computation, "\n")
result <- predict_data(
kernel=kernel,
estimated_theta=estimated_theta,
dts=dts,
train_data=list(exageostat_data$x, exageostat_data$y, exageostat_data$m),
test_data=list(test_x, test_y),
test_measurements=test_z,
computation=computation
)

cat("Predicted values:", result, "\n")
cat("Actual values:", test_z, "\n")
cat("Difference:", result - test_z, "\n")
Loading