diff --git a/benchmarks/CMakeLists.txt b/benchmarks/CMakeLists.txt index 5ef8f0eb3f4..1934d21e243 100644 --- a/benchmarks/CMakeLists.txt +++ b/benchmarks/CMakeLists.txt @@ -18,6 +18,10 @@ PCL_ADD_BENCHMARK(features_normal_3d FILES features/normal_3d.cpp ARGUMENTS "${PCL_SOURCE_DIR}/test/table_scene_mug_stereo_textured.pcd" "${PCL_SOURCE_DIR}/test/milk_cartoon_all_small_clorox.pcd") +PCL_ADD_BENCHMARK(features_shot FILES features/shot.cpp + LINK_WITH pcl_io pcl_search pcl_features + ARGUMENTS "${PCL_SOURCE_DIR}/test/milk_cartoon_all_small_clorox.pcd") + PCL_ADD_BENCHMARK(filters_voxel_grid FILES filters/voxel_grid.cpp LINK_WITH pcl_io pcl_filters ARGUMENTS "${PCL_SOURCE_DIR}/test/table_scene_mug_stereo_textured.pcd" diff --git a/benchmarks/features/shot.cpp b/benchmarks/features/shot.cpp new file mode 100644 index 00000000000..cb7a01e9207 --- /dev/null +++ b/benchmarks/features/shot.cpp @@ -0,0 +1,184 @@ +#include +#include +#include +#include +#include +#include + +#include + +constexpr float lrf_radius = 0.04f; +constexpr float shot_search_radius = 0.04f; +constexpr float uniform_sampling_radius = 0.03f; + +static void +BM_SHOT352(benchmark::State& state, const std::string& file) +{ + pcl::PointCloud::Ptr cloud(new pcl::PointCloud); + pcl::PCDReader reader; + reader.read(file, *cloud); + + pcl::NormalEstimationOMP ne; + ne.setInputCloud(cloud); + ne.setKSearch(10); + pcl::PointCloud::Ptr normals(new pcl::PointCloud); + ne.compute(*normals); + + pcl::UniformSampling uniform_sampling; + uniform_sampling.setInputCloud(cloud); + uniform_sampling.setRadiusSearch(uniform_sampling_radius); + pcl::PointCloud::Ptr keypoints(new pcl::PointCloud); + uniform_sampling.filter(*keypoints); + + pcl::SHOTEstimation shot; + shot.setInputCloud(keypoints); + shot.setInputNormals(normals); + shot.setSearchSurface(cloud); + shot.setRadiusSearch(shot_search_radius); + shot.setLRFRadius(lrf_radius); + + pcl::PointCloud::Ptr output(new pcl::PointCloud); + for (auto _ : state) { + shot.compute(*output); + } +} + +static void +BM_SHOT352_OMP(benchmark::State& state, const std::string& file) +{ + pcl::PointCloud::Ptr cloud(new pcl::PointCloud); + pcl::PCDReader reader; + reader.read(file, *cloud); + + pcl::NormalEstimationOMP ne; + ne.setInputCloud(cloud); + ne.setKSearch(10); + pcl::PointCloud::Ptr normals(new pcl::PointCloud); + ne.compute(*normals); + + pcl::UniformSampling uniform_sampling; + uniform_sampling.setInputCloud(cloud); + uniform_sampling.setRadiusSearch(uniform_sampling_radius); + pcl::PointCloud::Ptr keypoints(new pcl::PointCloud); + uniform_sampling.filter(*keypoints); + + pcl::SHOTEstimationOMP shot( + 0, state.range(0) // + ); + shot.setInputCloud(keypoints); + shot.setInputNormals(normals); + shot.setSearchSurface(cloud); + shot.setRadiusSearch(shot_search_radius); + shot.setLRFRadius(lrf_radius); + + pcl::PointCloud::Ptr output(new pcl::PointCloud); + for (auto _ : state) { + shot.compute(*output); + } +} + +static void +BM_SHOT1344(benchmark::State& state, const std::string& file) +{ + pcl::PointCloud::Ptr cloud_rgba( + new pcl::PointCloud); + pcl::PCDReader reader; + reader.read(file, *cloud_rgba); + + pcl::NormalEstimationOMP ne; + ne.setInputCloud(cloud_rgba); + ne.setKSearch(10); + pcl::PointCloud::Ptr normals(new pcl::PointCloud); + ne.compute(*normals); + + pcl::UniformSampling uniform_sampling; + uniform_sampling.setInputCloud(cloud_rgba); + uniform_sampling.setRadiusSearch(uniform_sampling_radius); + pcl::PointCloud::Ptr keypoints( + new pcl::PointCloud); + uniform_sampling.filter(*keypoints); + + pcl::SHOTColorEstimation shot(true, + true); + shot.setInputCloud(keypoints); + shot.setInputNormals(normals); + shot.setSearchSurface(cloud_rgba); + shot.setRadiusSearch(shot_search_radius); + shot.setLRFRadius(lrf_radius); + + pcl::PointCloud::Ptr output(new pcl::PointCloud); + for (auto _ : state) { + shot.compute(*output); + } +} + +static void +BM_SHOT1344_OMP(benchmark::State& state, const std::string& file) +{ + pcl::PointCloud::Ptr cloud_rgba( + new pcl::PointCloud); + pcl::PCDReader reader; + reader.read(file, *cloud_rgba); + + pcl::NormalEstimationOMP ne; + ne.setInputCloud(cloud_rgba); + ne.setKSearch(10); + pcl::PointCloud::Ptr normals(new pcl::PointCloud); + ne.compute(*normals); + + pcl::UniformSampling uniform_sampling; + uniform_sampling.setInputCloud(cloud_rgba); + uniform_sampling.setRadiusSearch(uniform_sampling_radius); + pcl::PointCloud::Ptr keypoints( + new pcl::PointCloud); + uniform_sampling.filter(*keypoints); + + pcl::SHOTColorEstimationOMP shot( + true, true, 0, state.range(0)); + shot.setInputCloud(keypoints); + shot.setInputNormals(normals); + shot.setSearchSurface(cloud_rgba); + shot.setRadiusSearch(shot_search_radius); + shot.setLRFRadius(lrf_radius); + + pcl::PointCloud::Ptr output(new pcl::PointCloud); + for (auto _ : state) { + shot.compute(*output); + } +} + +int +main(int argc, char** argv) +{ + if (argc < 2) { + std::cerr << "No test files given. Please provide a PCD file path to use for " + "both SHOT352 and SHOT1344 benchmarks." + << std::endl; + return (-1); + } + + constexpr int runs = 100; + + benchmark::RegisterBenchmark("BM_SHOT352", &BM_SHOT352, argv[1]) + ->Unit(benchmark::kMillisecond) + ->Iterations(runs); + benchmark::RegisterBenchmark("BM_SHOT352_OMP", &BM_SHOT352_OMP, argv[1]) + ->Arg(64) + ->Arg(128) + ->Arg(256) + ->Unit(benchmark::kMillisecond) + ->Iterations(runs); + + benchmark::RegisterBenchmark("BM_SHOT1344", &BM_SHOT1344, argv[1]) + ->Unit(benchmark::kMillisecond) + ->Iterations(runs); + benchmark::RegisterBenchmark("BM_SHOT1344_OMP", &BM_SHOT1344_OMP, argv[1]) + ->Arg(64) + ->Arg(128) + ->Arg(256) + ->Unit(benchmark::kMillisecond) + ->Iterations(runs); + + benchmark::Initialize(&argc, argv); + benchmark::RunSpecifiedBenchmarks(); +} diff --git a/features/include/pcl/features/impl/shot_omp.hpp b/features/include/pcl/features/impl/shot_omp.hpp index 4a0a01a1988..e5137a75bd8 100644 --- a/features/include/pcl/features/impl/shot_omp.hpp +++ b/features/include/pcl/features/impl/shot_omp.hpp @@ -153,7 +153,8 @@ pcl::SHOTEstimationOMP::computeFeature ( #pragma omp parallel for \ default(none) \ shared(output) \ - num_threads(threads_) + num_threads(threads_) \ + schedule(dynamic, chunk_size_) for (std::ptrdiff_t idx = 0; idx < static_cast (indices_->size ()); ++idx) { @@ -240,7 +241,8 @@ pcl::SHOTColorEstimationOMP::computeFeat #pragma omp parallel for \ default(none) \ shared(output) \ - num_threads(threads_) + num_threads(threads_) \ + schedule(dynamic, chunk_size_) for (std::ptrdiff_t idx = 0; idx < static_cast (indices_->size ()); ++idx) { Eigen::VectorXf shot; diff --git a/features/include/pcl/features/shot_omp.h b/features/include/pcl/features/shot_omp.h index 59d48eaac17..50fefdf9a2c 100644 --- a/features/include/pcl/features/shot_omp.h +++ b/features/include/pcl/features/shot_omp.h @@ -94,8 +94,12 @@ namespace pcl using PointCloudOut = typename Feature::PointCloudOut; using PointCloudIn = typename Feature::PointCloudIn; - /** \brief Empty constructor. */ - SHOTEstimationOMP (unsigned int nr_threads = 0) : SHOTEstimation () + /** \brief Empty constructor. + * \param nr_threads the number of hardware threads to use (0 sets the value back to automatic) + * \param chunk_size PCL will use dynamic scheduling with this chunk size. Setting it too low will lead to more parallelization overhead. Setting it too high will lead to a worse balancing between the threads. + */ + SHOTEstimationOMP (unsigned int nr_threads = 0, int chunk_size = 256) + : SHOTEstimation (), chunk_size_(chunk_size) { setNumberOfThreads(nr_threads); }; @@ -122,6 +126,9 @@ namespace pcl /** \brief The number of threads the scheduler should use. */ unsigned int threads_; + + /** \brief Chunk size for (dynamic) scheduling. */ + int chunk_size_; }; /** \brief SHOTColorEstimationOMP estimates the Signature of Histograms of OrienTations (SHOT) descriptor for a given point cloud dataset @@ -176,11 +183,16 @@ namespace pcl using PointCloudOut = typename Feature::PointCloudOut; using PointCloudIn = typename Feature::PointCloudIn; - /** \brief Empty constructor. */ + /** \brief Empty constructor. + * \param nr_threads the number of hardware threads to use (0 sets the value back to automatic) + * \param chunk_size PCL will use dynamic scheduling with this chunk size. Setting it too low will lead to more parallelization overhead. Setting it too high will lead to a worse balancing between the threads. + */ SHOTColorEstimationOMP (bool describe_shape = true, bool describe_color = true, - unsigned int nr_threads = 0) - : SHOTColorEstimation (describe_shape, describe_color) + unsigned int nr_threads = 0, + int chunk_size = 64) + : SHOTColorEstimation (describe_shape, describe_color), + chunk_size_(chunk_size) { setNumberOfThreads(nr_threads); } @@ -207,6 +219,9 @@ namespace pcl /** \brief The number of threads the scheduler should use. */ unsigned int threads_; + + /** \brief Chunk size for (dynamic) scheduling. */ + int chunk_size_; }; }