diff --git a/experimental/builder/include/ck_tile/builder/testing/conv/args.hpp b/experimental/builder/include/ck_tile/builder/testing/conv/args.hpp index eba6771964b..7568ce9fc20 100644 --- a/experimental/builder/include/ck_tile/builder/testing/conv/args.hpp +++ b/experimental/builder/include/ck_tile/builder/testing/conv/args.hpp @@ -3,12 +3,16 @@ #pragma once +#include +#include +#include + #include "ck_tile/builder/conv_signature_concepts.hpp" -#include "ck_tile/builder/factory/helpers/ck/conv_tensor_layout.hpp" #include "ck_tile/builder/factory/helpers/ck/conv_elementwise_op.hpp" #include "ck_tile/builder/testing/testing.hpp" #include "ck_tile/builder/testing/filter_extent.hpp" #include "ck_tile/builder/testing/tensor_descriptor.hpp" +#include "ck_tile/builder/testing/validation.hpp" #include "ck_tile/host/convolution_parameter.hpp" #include "ck/library/utility/convolution_parameter.hpp" #include "ck/library/utility/convolution_host_tensor_descriptor_helper.hpp" @@ -49,6 +53,99 @@ struct ConvTensorLengths FilterExtent filter = {}; // X, Y, Z }; +namespace detail { + +/// @brief Calculate memory strides for a tensor with custom dimension ordering. +/// +/// Given tensor dimensions and a memory layout order, compute the stride +/// (memory jump size) needed to move by 1 in each dimension. +/// +/// @param lengths Tensor dimensions (e.g., {3, 4} for 3 rows × 4 columns) +/// @param outer_to_inner Dimension ordering from outermost to innermost in memory +/// @return Strides for each dimension (e.g., {4, 1} for row-major 3×4 tensor) +/// +/// Example: For a 3×4 tensor stored row-major (outer_to_inner = {0, 1}): +/// - Moving 1 row down requires jumping 4 positions → stride[0] = 4 +/// - Moving 1 column right requires jumping 1 position → stride[1] = 1 +template +Extent make_packed_strides_for_order(const Extent& lengths, + const std::array& outer_to_inner) +{ + Extent strides = {}; + + size_t stride = 1; // Innermost dimension always has stride 1 + for(size_t i = RANK; i > 0; --i) + { + const auto dim = outer_to_inner[i - 1]; // Get dimension at this position + strides[dim] = stride; // Assign current stride + stride *= lengths[dim]; // Update stride for next (outer) dimension + } + + return strides; +} + +template +std::array to_spatial_array(const FilterExtent& extent) +{ + if constexpr(SPATIAL_DIM == 1) + { + return {static_cast(extent.width)}; + } + else if constexpr(SPATIAL_DIM == 2) + { + // CK Builder uses spatial ordering {H, W} for 2D. + return {static_cast(extent.height), static_cast(extent.width)}; + } + else + { + // CK Builder uses spatial ordering {D, H, W} for 3D. + return {static_cast(extent.depth), + static_cast(extent.height), + static_cast(extent.width)}; + } +} + +template +std::array +compute_output_spatial(const std::array& input_spatial, + const std::array& filter_spatial, + const std::array& conv_strides, + const std::array& conv_dilations, + const std::array& left_pads, + const std::array& right_pads) +{ + std::array output_spatial = {}; + + for(int i = 0; i < SPATIAL_DIM; ++i) + { + const auto in = input_spatial[i]; + const auto fil = filter_spatial[i]; + const auto s = conv_strides[i]; + const auto d = conv_dilations[i]; + const auto pl = left_pads[i]; + const auto pr = right_pads[i]; + + // effective_filter = dilation*(filter-1) + 1 + const auto effective_filter = d * (fil - 1) + 1; + const auto numerator = in + pl + pr - effective_filter; + + if(s <= 0) + { + throw std::runtime_error("invalid convolution stride (must be > 0)"); + } + if(numerator < 0) + { + throw std::runtime_error("invalid convolution parameters (negative output spatial)"); + } + + output_spatial[i] = numerator / s + 1; + } + + return output_spatial; +} + +} // namespace detail + /// @brief `Args` specialization for forward convolution. /// /// @tparam SIGNATURE Forward convolution signature. @@ -74,15 +171,20 @@ struct Args // TODO: We shouldn't need to call into an internal namespace here. using Ops = factory::internal::ConvElementwiseOps; + // NOTE: ConvTensorLayouts removed - not used in this file and causes compilation error // TODO: We shouldn't need to call into an internal namespace here. - using Layouts = factory::internal::ConvTensorLayouts; + // using Layouts = factory::internal::ConvTensorLayouts; ConvTensorLengths lengths; - // TODO: Tensor strides. This needs a new structure as well as some - // reworking of the make_*_descriptor() functions, as the current - // implementation (based on ConvParam in old CK / CK Tile) does not - // support strides at all. + // Optional explicit tensor-memory strides (in elements), for custom/non-packed tensors. + // When not set, packed strides are derived automatically from the selected TensorLayout. + // NOTE: These have explicit default initializers to avoid + // -Wmissing-designated-field-initializers when `Args` is aggregate-initialized + // using designated initializers in tests. + std::optional input_strides = std::nullopt; + std::optional weight_strides = std::nullopt; + std::optional output_strides = std::nullopt; FilterExtent filter_strides; FilterExtent filter_dilation; @@ -95,23 +197,112 @@ struct Args int k_batch = 1; + /// @brief Compute output spatial dimensions from convolution parameters. + /// + /// @returns FilterExtent with computed output height, width, (and depth for 3D) + FilterExtent compute_output_spatial() const + { + const auto input_spatial_arr = detail::to_spatial_array(this->lengths.image); + const auto filter_spatial_arr = detail::to_spatial_array(this->lengths.filter); + const auto conv_strides_arr = detail::to_spatial_array(this->filter_strides); + const auto conv_dilations_arr = + detail::to_spatial_array(this->filter_dilation); + const auto left_pads_arr = detail::to_spatial_array(this->input_left_pad); + const auto right_pads_arr = detail::to_spatial_array(this->input_right_pad); + + const auto output_spatial_arr = + detail::compute_output_spatial(input_spatial_arr, + filter_spatial_arr, + conv_strides_arr, + conv_dilations_arr, + left_pads_arr, + right_pads_arr); + + return filter_extent_from_vector( + std::vector(output_spatial_arr.begin(), output_spatial_arr.end())); + } + /// This function returns the `TensorDescriptor` corresponding to /// the input-tensor of the convolution problem. This can then /// be used to, for example, allocate memory. InputDescriptor make_input_descriptor() const { - // TODO: We're using old CK functionality to compute the right - // values here, mainly because CK tile does not support the - // right tensor layouts here. We should probably change that - // because CK currently prints an annoying message about it, - // plus that would let us get rid of the `to_ck_conv_param()` - // function. - const auto param = to_ck_conv_param(); - const auto desc = ck::utils::conv::make_input_host_tensor_descriptor_g_n_c_wis_packed< - typename Layouts::InLayout>(param); using Extent = typename InputDescriptor::Extent; - return InputDescriptor(Extent::from_vector(desc.GetLengths()), - Extent::from_vector(desc.GetStrides())); + Extent lens = {}; + + lens[0] = this->lengths.groups; + lens[1] = this->lengths.batch_size; + lens[2] = this->lengths.input_channels; + if constexpr(SPATIAL_DIM == 1) + { + lens[3] = this->lengths.image.width; + } + else if constexpr(SPATIAL_DIM == 2) + { + lens[3] = this->lengths.image.height; + lens[4] = this->lengths.image.width; + } + else + { + lens[3] = this->lengths.image.depth; + lens[4] = this->lengths.image.height; + lens[5] = this->lengths.image.width; + } + + const auto make_default_strides = [&] { + constexpr auto layout = SIGNATURE.input.config.layout; + + if constexpr(SPATIAL_DIM == 1) + { + if constexpr(layout == TensorLayout::GNCW) + return detail::make_packed_strides_for_order<4>(lens, {0, 1, 2, 3}); + else if constexpr(layout == TensorLayout::GNWC || + layout == TensorLayout::G_NW_C_strided) + return detail::make_packed_strides_for_order<4>(lens, {0, 1, 3, 2}); + else if constexpr(layout == TensorLayout::NWGC) + return detail::make_packed_strides_for_order<4>(lens, {1, 3, 0, 2}); + else if constexpr(layout == TensorLayout::NGCW) + return detail::make_packed_strides_for_order<4>(lens, {1, 0, 2, 3}); + else + static_assert(sizeof(UnsupportedEnumValue) == 0, + "Unsupported 1D input layout for descriptor initialization."); + } + else if constexpr(SPATIAL_DIM == 2) + { + if constexpr(layout == TensorLayout::GNCHW) + return detail::make_packed_strides_for_order<5>(lens, {0, 1, 2, 3, 4}); + else if constexpr(layout == TensorLayout::GNHWC || + layout == TensorLayout::G_NHW_C_strided) + return detail::make_packed_strides_for_order<5>(lens, {0, 1, 3, 4, 2}); + else if constexpr(layout == TensorLayout::NHWGC) + return detail::make_packed_strides_for_order<5>(lens, {1, 3, 4, 0, 2}); + else if constexpr(layout == TensorLayout::NGCHW) + return detail::make_packed_strides_for_order<5>(lens, {1, 0, 2, 3, 4}); + else + static_assert(sizeof(UnsupportedEnumValue) == 0, + "Unsupported 2D input layout for descriptor initialization."); + } + else + { + if constexpr(layout == TensorLayout::GNCDHW) + return detail::make_packed_strides_for_order<6>(lens, {0, 1, 2, 3, 4, 5}); + else if constexpr(layout == TensorLayout::GNDHWC || + layout == TensorLayout::G_NDHW_C_strided) + return detail::make_packed_strides_for_order<6>(lens, {0, 1, 3, 4, 5, 2}); + else if constexpr(layout == TensorLayout::NDHWGC) + return detail::make_packed_strides_for_order<6>(lens, {1, 3, 4, 5, 0, 2}); + else if constexpr(layout == TensorLayout::NGCDHW) + return detail::make_packed_strides_for_order<6>(lens, {1, 0, 2, 3, 4, 5}); + else + static_assert(sizeof(UnsupportedEnumValue) == 0, + "Unsupported 3D input layout for descriptor initialization."); + } + }; + + const Extent strides = + this->input_strides.has_value() ? *this->input_strides : make_default_strides(); + + return InputDescriptor(lens, strides); } /// This function returns the `TensorDescriptor` corresponding to @@ -119,13 +310,76 @@ struct Args /// be used to, for example, allocate memory. WeightDescriptor make_weight_descriptor() const { - // See note in implementation of `make_input_descriptor`. - const auto param = to_ck_conv_param(); - const auto desc = ck::utils::conv::make_weight_host_tensor_descriptor_g_k_c_xs_packed< - typename Layouts::WeiLayout>(param); using Extent = typename WeightDescriptor::Extent; - return WeightDescriptor(Extent::from_vector(desc.GetLengths()), - Extent::from_vector(desc.GetStrides())); + Extent lens = {}; + + lens[0] = this->lengths.groups; + lens[1] = this->lengths.output_channels; + lens[2] = this->lengths.input_channels; + if constexpr(SPATIAL_DIM == 1) + { + lens[3] = this->lengths.filter.width; + } + else if constexpr(SPATIAL_DIM == 2) + { + lens[3] = this->lengths.filter.height; + lens[4] = this->lengths.filter.width; + } + else + { + lens[3] = this->lengths.filter.depth; + lens[4] = this->lengths.filter.height; + lens[5] = this->lengths.filter.width; + } + + const auto make_default_strides = [&] { + constexpr auto layout = SIGNATURE.weight.config.layout; + + if constexpr(SPATIAL_DIM == 1) + { + if constexpr(layout == TensorLayout::GKCX) + return detail::make_packed_strides_for_order<4>(lens, {0, 1, 2, 3}); + else if constexpr(layout == TensorLayout::GKXC || + layout == TensorLayout::G_K_X_C_strided) + return detail::make_packed_strides_for_order<4>(lens, {0, 1, 3, 2}); + else if constexpr(layout == TensorLayout::KXGC) + return detail::make_packed_strides_for_order<4>(lens, {1, 3, 0, 2}); + else + static_assert(sizeof(UnsupportedEnumValue) == 0, + "Unsupported 1D weight layout for descriptor initialization."); + } + else if constexpr(SPATIAL_DIM == 2) + { + if constexpr(layout == TensorLayout::GKCYX) + return detail::make_packed_strides_for_order<5>(lens, {0, 1, 2, 3, 4}); + else if constexpr(layout == TensorLayout::GKYXC || + layout == TensorLayout::G_K_YX_C_strided) + return detail::make_packed_strides_for_order<5>(lens, {0, 1, 3, 4, 2}); + else if constexpr(layout == TensorLayout::KYXGC) + return detail::make_packed_strides_for_order<5>(lens, {1, 3, 4, 0, 2}); + else + static_assert(sizeof(UnsupportedEnumValue) == 0, + "Unsupported 2D weight layout for descriptor initialization."); + } + else + { + if constexpr(layout == TensorLayout::GKCZYX) + return detail::make_packed_strides_for_order<6>(lens, {0, 1, 2, 3, 4, 5}); + else if constexpr(layout == TensorLayout::GKZYXC || + layout == TensorLayout::G_K_ZYX_C_strided) + return detail::make_packed_strides_for_order<6>(lens, {0, 1, 3, 4, 5, 2}); + else if constexpr(layout == TensorLayout::KZYXGC) + return detail::make_packed_strides_for_order<6>(lens, {1, 3, 4, 5, 0, 2}); + else + static_assert(sizeof(UnsupportedEnumValue) == 0, + "Unsupported 3D weight layout for descriptor initialization."); + } + }; + + const Extent strides = + this->weight_strides.has_value() ? *this->weight_strides : make_default_strides(); + + return WeightDescriptor(lens, strides); } /// This function returns the `TensorDescriptor` corresponding to @@ -133,43 +387,84 @@ struct Args /// be used to, for example, allocate memory. OutputDescriptor make_output_descriptor() const { - // See note in implementation of `make_input_descriptor`. - const auto param = to_ck_conv_param(); - const auto desc = ck::utils::conv::make_output_host_tensor_descriptor_g_n_k_wos_packed< - typename Layouts::OutLayout>(param); using Extent = typename OutputDescriptor::Extent; - return OutputDescriptor(Extent::from_vector(desc.GetLengths()), - Extent::from_vector(desc.GetStrides())); - } + Extent lens = {}; + + const auto output_spatial = compute_output_spatial(); + + lens[0] = this->lengths.groups; + lens[1] = this->lengths.batch_size; + lens[2] = this->lengths.output_channels; + if constexpr(SPATIAL_DIM == 1) + { + lens[3] = output_spatial.width; + } + else if constexpr(SPATIAL_DIM == 2) + { + lens[3] = output_spatial.height; + lens[4] = output_spatial.width; + } + else + { + lens[3] = output_spatial.depth; + lens[4] = output_spatial.height; + lens[5] = output_spatial.width; + } + + const auto make_default_strides = [&] { + constexpr auto layout = SIGNATURE.output.config.layout; - /// Convert the Args structure into a CK conv_param structure. This - /// function is mainly used to be able to use the existing - /// CK-functionality to obtain tensor descriptors. - ck::utils::conv::ConvParam to_ck_conv_param() const - { - const auto to_vector = [](const auto& extent) { if constexpr(SPATIAL_DIM == 1) - return std::vector{ck::index_t(extent.width)}; + { + if constexpr(layout == TensorLayout::GNKW) + return detail::make_packed_strides_for_order<4>(lens, {0, 1, 2, 3}); + else if constexpr(layout == TensorLayout::GNWK || + layout == TensorLayout::G_NW_K_strided) + return detail::make_packed_strides_for_order<4>(lens, {0, 1, 3, 2}); + else if constexpr(layout == TensorLayout::NWGK) + return detail::make_packed_strides_for_order<4>(lens, {1, 3, 0, 2}); + else if constexpr(layout == TensorLayout::NGKW) + return detail::make_packed_strides_for_order<4>(lens, {1, 0, 2, 3}); + else + static_assert(sizeof(UnsupportedEnumValue) == 0, + "Unsupported 1D output layout for descriptor initialization."); + } else if constexpr(SPATIAL_DIM == 2) - return std::vector{ck::index_t(extent.height), - ck::index_t(extent.width)}; + { + if constexpr(layout == TensorLayout::GNKHW) + return detail::make_packed_strides_for_order<5>(lens, {0, 1, 2, 3, 4}); + else if constexpr(layout == TensorLayout::GNHWK || + layout == TensorLayout::G_NHW_K_strided) + return detail::make_packed_strides_for_order<5>(lens, {0, 1, 3, 4, 2}); + else if constexpr(layout == TensorLayout::NHWGK) + return detail::make_packed_strides_for_order<5>(lens, {1, 3, 4, 0, 2}); + else if constexpr(layout == TensorLayout::NGKHW) + return detail::make_packed_strides_for_order<5>(lens, {1, 0, 2, 3, 4}); + else + static_assert(sizeof(UnsupportedEnumValue) == 0, + "Unsupported 2D output layout for descriptor initialization."); + } else - return std::vector{ck::index_t(extent.depth), - ck::index_t(extent.height), - ck::index_t(extent.width)}; + { + if constexpr(layout == TensorLayout::GNKDHW) + return detail::make_packed_strides_for_order<6>(lens, {0, 1, 2, 3, 4, 5}); + else if constexpr(layout == TensorLayout::GNDHWK || + layout == TensorLayout::G_NDHW_K_strided) + return detail::make_packed_strides_for_order<6>(lens, {0, 1, 3, 4, 5, 2}); + else if constexpr(layout == TensorLayout::NDHWGK) + return detail::make_packed_strides_for_order<6>(lens, {1, 3, 4, 5, 0, 2}); + else if constexpr(layout == TensorLayout::NGKDHW) + return detail::make_packed_strides_for_order<6>(lens, {1, 0, 2, 3, 4, 5}); + else + static_assert(sizeof(UnsupportedEnumValue) == 0, + "Unsupported 3D output layout for descriptor initialization."); + } }; - return ck::utils::conv::ConvParam(SPATIAL_DIM, - this->lengths.groups, - this->lengths.batch_size, - this->lengths.output_channels, - this->lengths.input_channels, - to_vector(this->lengths.filter), - to_vector(this->lengths.image), - to_vector(this->filter_strides), - to_vector(this->filter_dilation), - to_vector(this->input_left_pad), - to_vector(this->input_right_pad)); + const Extent strides = + this->output_strides.has_value() ? *this->output_strides : make_default_strides(); + + return OutputDescriptor(lens, strides); } /// Convert the Args structure into a CK Tile conv_param structure. diff --git a/experimental/builder/include/ck_tile/builder/testing/conv/fwd_ck.hpp b/experimental/builder/include/ck_tile/builder/testing/conv/fwd_ck.hpp index 5eca79508c1..05302d9e3d3 100644 --- a/experimental/builder/include/ck_tile/builder/testing/conv/fwd_ck.hpp +++ b/experimental/builder/include/ck_tile/builder/testing/conv/fwd_ck.hpp @@ -103,24 +103,12 @@ template { constexpr auto spatial_dim = SIGNATURE.spatial_dim; - const auto copy = [](const auto& src, auto& dst) { - std::copy(src.begin(), src.end(), dst.begin()); - }; - const auto to_ck_lengths = [&](const auto& src) { std::array result; - copy(src, result); - return result; - }; - - const auto to_ck_extent = [&](const auto& extent) { - std::array result; - copy(extent, result); + std::copy(src.begin(), src.end(), result.begin()); return result; }; - const auto param = args.to_ck_conv_param(); - const auto input_desc = args.make_input_descriptor(); const auto weight_desc = args.make_weight_descriptor(); const auto output_desc = args.make_output_descriptor(); @@ -140,10 +128,10 @@ template {}, to_ck_lengths(output_desc.get_lengths()), to_ck_lengths(output_desc.get_strides()), - to_ck_extent(param.conv_filter_strides_), - to_ck_extent(param.conv_filter_dilations_), - to_ck_extent(param.input_left_pads_), - to_ck_extent(param.input_right_pads_), + args.filter_strides.to_array(), + args.filter_dilation.to_array(), + args.input_left_pad.to_array(), + args.input_right_pad.to_array(), args.a_elementwise_op, args.b_elementwise_op, args.cde_elementwise_op); diff --git a/experimental/builder/include/ck_tile/builder/testing/filter_extent.hpp b/experimental/builder/include/ck_tile/builder/testing/filter_extent.hpp index 2fc1f390127..89eea6142f4 100644 --- a/experimental/builder/include/ck_tile/builder/testing/filter_extent.hpp +++ b/experimental/builder/include/ck_tile/builder/testing/filter_extent.hpp @@ -17,6 +17,12 @@ template <> struct FilterExtent<1> { size_t width = 1; + + template + std::array to_array() const + { + return {static_cast(width)}; + } }; template <> @@ -24,6 +30,12 @@ struct FilterExtent<2> { size_t width = 1; size_t height = 1; + + template + std::array to_array() const + { + return {static_cast(height), static_cast(width)}; + } }; template <> @@ -32,6 +44,12 @@ struct FilterExtent<3> size_t width = 1; size_t height = 1; size_t depth = 1; + + template + std::array to_array() const + { + return {static_cast(depth), static_cast(height), static_cast(width)}; + } }; template diff --git a/experimental/builder/test/conv/ck_tile/test_ckb_conv_fwd_2d_fp16_v3.cpp b/experimental/builder/test/conv/ck_tile/test_ckb_conv_fwd_2d_fp16_v3.cpp index 2c35fb50761..038aa8e5a6d 100644 --- a/experimental/builder/test/conv/ck_tile/test_ckb_conv_fwd_2d_fp16_v3.cpp +++ b/experimental/builder/test/conv/ck_tile/test_ckb_conv_fwd_2d_fp16_v3.cpp @@ -3,33 +3,46 @@ #include "utils/ckb_conv_tile_test_configs.hpp" #include "utils/ckb_conv_test_utils.hpp" +#include "ck_tile/builder/testing/conv/ck_tile.hpp" +#include "ck_tile/builder/testing/conv/reference.hpp" +#include "ck_tile/host/device_prop.hpp" +#include "testing_utils.hpp" -namespace { +namespace ckb = ck_tile::builder; +namespace ckt = ck_tile::builder::test; +namespace cku = ck_tile::builder::test_utils; -using namespace ck_tile::builder::test_utils; +using ck_tile::test::MatchesReference; -TEST(FwdConvInstances, Create_ConvAlgorithm_Tile_GroupedConvolutionKernel_2D_FP16_NHWGC) +constexpr auto SIGNATURE = + ckt::ConvSignature{.spatial_dim = 2, + .direction = ckb::ConvDirection::FORWARD, + .data_type = ckb::DataType::FP16, + .accumulation_data_type = ckb::DataType::FP32, + .input = {.config = {.layout = ckb::TensorLayout::NHWGC}}, + .weight = {.config = {.layout = ckb::TensorLayout::GKYXC}}, + .output = {.config = {.layout = ckb::TensorLayout::NHWGK}}}; + +constexpr auto TILE_ALGORITHM = cku::ConvAlgorithm_Tile_GroupedConvolutionKernel{} + .with_tile_specializations(ckb::TileConvSpecialization::DEFAULT) + .with_tile_thread_block(cku::TileThreadBlock_64x64x64) + .with_tile_block_gemm(cku::TileBlockGemmDesc_16x16_v3_intrawave) + .with_tile_transfer(cku::TileTransfer_4x4x4) + .with_tile_optimizations(ckt::TileOptimizations{ + .num_groups_to_merge = 1, + .split_image = false, + .explicit_gemm = false, + }); + +using Builder = ckb::ConvBuilder; +using TileConv = Builder::Instance; +using Reference = ckb::ConvBuilder::Instance; + +TEST(Fwd2DFp16_TileV3_NHWGC, Create) { - constexpr ConvSignature FwdConvSignature{.spatial_dim = 2, - .direction = ConvDirection::FORWARD, - .data_type = DataType::FP16, - .accumulation_data_type = DataType::FP32, - .input = {.config = {.layout = TensorLayout::NHWGC}}, - .weight = {.config = {.layout = TensorLayout::GKYXC}}, - .output = {.config = {.layout = TensorLayout::NHWGK}}}; - - constexpr auto FwdConvAlgorithm = - ConvAlgorithm_Tile_GroupedConvolutionKernel{} - .with_tile_specializations(TileConvSpecialization::DEFAULT) - .with_tile_thread_block(TileThreadBlock_64x64x64) - .with_tile_block_gemm(TileBlockGemmDesc_16x16_v3_intrawave) - .with_tile_transfer(TileTransfer_4x4x4) - .with_tile_optimizations(TileOptimizations{ - .num_groups_to_merge = 1, .split_image = false, .explicit_gemm = false}); - - using Builder = ConvBuilder; - run_ck_tile_test({ - "grouped_convolution_forward", + const auto expected_type_string = "grouped_convolution_forward"; + cku::run_ck_tile_test({ + expected_type_string, "fp16", "NHWGC_GKYXC_NHWGK", "64x64x64", @@ -48,4 +61,51 @@ TEST(FwdConvInstances, Create_ConvAlgorithm_Tile_GroupedConvolutionKernel_2D_FP1 }); } -} // namespace +TEST(Fwd2DFp16_TileV3_NHWGC, EndToEnd) +{ + if(!ck_tile::get_device_name().starts_with("gfx9")) + { + GTEST_SKIP() << "unsupported architecture"; + } + + ckt::Args args = { + .lengths = + { + .batch_size = 16, + .groups = 1, + .input_channels = 32, + .output_channels = 48, + .image = + { + .width = 56, + .height = 64, + }, + .filter = + { + .width = 3, + .height = 5, + }, + }, + .filter_strides = {.width = 1, .height = 1}, + .filter_dilation = {.width = 1, .height = 1}, + .input_left_pad = {.width = 0, .height = 0}, + .input_right_pad = {.width = 0, .height = 0}, + .a_elementwise_op = {}, + .b_elementwise_op = {}, + .cde_elementwise_op = {}, + }; + + auto inputs = ckt::alloc_inputs(args); + auto outputs = ckt::alloc_outputs(args); + auto reference = ckt::alloc_outputs(args); + + ckt::init_inputs(args, inputs.get()); + + auto tile_conv = TileConv{}; + ckt::run(tile_conv, args, inputs.get(), outputs.get()); + + auto ref_conv = Reference{}; + ckt::run(ref_conv, args, inputs.get(), reference.get()); + + EXPECT_THAT(outputs.get(), MatchesReference(args, reference.get())); +}