From 7026b303d636e7639f8877ae8d5eff54f39c1121 Mon Sep 17 00:00:00 2001 From: Gian Marco Iodice Date: Wed, 26 Jun 2019 17:18:11 +0100 Subject: COMPMID-1979: Fuse Activation Function in CLGEMM - part 1 Implementing a new struct to contains the information for the OpenCL GEMM kernels Change-Id: I6c641c312f9c3b025a7c69dd0df3b730d2d2c2cb Signed-off-by: Gian Marco Iodice Reviewed-on: https://review.mlplatform.org/c/1434 Tested-by: Arm Jenkins Reviewed-by: Giuseppe Rossini --- .../CL/kernels/CLGEMMMatrixMultiplyNativeKernel.h | 6 ++- .../kernels/CLGEMMMatrixMultiplyReshapedKernel.h | 6 ++- .../CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.h | 6 ++- arm_compute/core/KernelDescriptors.h | 11 ++++ arm_compute/core/utils/misc/ShapeCalculator.h | 40 ++++++++++++++ .../kernels/CLGEMMMatrixMultiplyNativeKernel.cpp | 56 +++++++++---------- .../kernels/CLGEMMMatrixMultiplyReshapedKernel.cpp | 42 +++++++-------- .../CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.cpp | 52 +++++++++--------- src/runtime/CL/functions/CLGEMM.cpp | 62 +++++++++++++++------- tests/validation/CL/GEMMMatrixMultiplyNative.cpp | 12 +++-- tests/validation/CL/GEMMMatrixMultiplyReshaped.cpp | 13 +++-- .../CL/GEMMMatrixMultiplyReshapedOnlyRHS.cpp | 13 +++-- tests/validation/fixtures/GEMMFixture.h | 55 ++++++++++++++++--- 13 files changed, 258 insertions(+), 116 deletions(-) diff --git a/arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyNativeKernel.h b/arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyNativeKernel.h index 79689a2894..96f412c6a5 100644 --- a/arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyNativeKernel.h +++ b/arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyNativeKernel.h @@ -26,6 +26,8 @@ #include "arm_compute/core/CL/ICLKernel.h" +#include "arm_compute/core/KernelDescriptors.h" + namespace arm_compute { class ICLTensor; @@ -62,7 +64,7 @@ public: */ void configure(const ICLTensor *input0, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, float alpha, float beta, const GEMMLHSMatrixInfo &lhs_info, const GEMMRHSMatrixInfo &rhs_info, - const GEMMReshapeInfo &gemm_info); + const GEMMKernelInfo &gemm_info); /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMMatrixMultiplyNativeKernel * * @param[in] input0 Input tensor info for the LHS matrix. Data type supported: F32/F16. The number of dimensions for the LHS matrix must be less or equal than 4. @@ -83,7 +85,7 @@ public: */ static Status validate(const ITensorInfo *input0, const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, float alpha, float beta, const GEMMLHSMatrixInfo &lhs_info, const GEMMRHSMatrixInfo &rhs_info, - const GEMMReshapeInfo &gemm_info); + const GEMMKernelInfo &gemm_info); // Inherited methods overridden: void run(const Window &window, cl::CommandQueue &queue) override; diff --git a/arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyReshapedKernel.h b/arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyReshapedKernel.h index 68ab94a31d..47916b3019 100644 --- a/arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyReshapedKernel.h +++ b/arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyReshapedKernel.h @@ -26,6 +26,8 @@ #include "arm_compute/core/CL/ICLKernel.h" +#include "arm_compute/core/KernelDescriptors.h" + namespace arm_compute { class ICLTensor; @@ -69,7 +71,7 @@ public: */ void configure(const ICLTensor *input0, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, float alpha, float beta, const GEMMLHSMatrixInfo &lhs_info, const GEMMRHSMatrixInfo &rhs_info, - const GEMMReshapeInfo &gemm_info); + const GEMMKernelInfo &gemm_info); /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMMatrixMultiplyReshapedKernel * * @param[in] input0 Input tensor containing the LHS reshaped matrix. Data type supported: F32/F16. The number of dimensions for the LHS matrix must be less or equal than 4 @@ -94,7 +96,7 @@ public: */ static Status validate(const ITensorInfo *input0, const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, float alpha, float beta, const GEMMLHSMatrixInfo &lhs_info, const GEMMRHSMatrixInfo &rhs_info, - const GEMMReshapeInfo &gemm_info); + const GEMMKernelInfo &gemm_info); // Inherited methods overridden: void run(const Window &window, cl::CommandQueue &queue) override; diff --git a/arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.h b/arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.h index e3b3880a37..3315331e87 100644 --- a/arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.h +++ b/arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.h @@ -26,6 +26,8 @@ #include "arm_compute/core/CL/ICLKernel.h" +#include "arm_compute/core/KernelDescriptors.h" + namespace arm_compute { class ICLTensor; @@ -65,7 +67,7 @@ public: */ void configure(const ICLTensor *input0, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, float alpha, float beta, const GEMMLHSMatrixInfo &lhs_info, const GEMMRHSMatrixInfo &rhs_info, - const GEMMReshapeInfo &gemm_info); + const GEMMKernelInfo &gemm_info); /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMMatrixMultiplyReshapedOnlyRHSKernel * * @param[in] input0 Input tensor info for the LHS matrix. Data type supported: F32/F16. The number of dimensions for the LHS matrix must be less or equal than 4. @@ -86,7 +88,7 @@ public: */ static Status validate(const ITensorInfo *input0, const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, float alpha, float beta, const GEMMLHSMatrixInfo &lhs_info, const GEMMRHSMatrixInfo &rhs_info, - const GEMMReshapeInfo &gemm_info); + const GEMMKernelInfo &gemm_info); // Inherited methods overridden: void run(const Window &window, cl::CommandQueue &queue) override; diff --git a/arm_compute/core/KernelDescriptors.h b/arm_compute/core/KernelDescriptors.h index 83131f4296..fe59365d06 100644 --- a/arm_compute/core/KernelDescriptors.h +++ b/arm_compute/core/KernelDescriptors.h @@ -48,5 +48,16 @@ struct FFTRadixStageKernelInfo unsigned int Nx{ 0 }; /**< Nx coefficient. */ bool is_first_stage{ false }; /**< Flags if the FFT kernels is the first stage of a decomposed FFT. */ }; + +/** Descriptor used by the GEMM kernels */ +struct GEMMKernelInfo +{ + unsigned int m{ 0 }; + unsigned int n{ 0 }; + unsigned int k{ 0 }; + unsigned int depth_output_gemm3d{ 0 }; + bool reinterpret_input_as_3d{ false }; + bool broadcast_bias{ false }; +}; } // namespace arm_compute #endif /* __ARM_COMPUTE_CORE_KERNEL_DESCRIPTORS_H__ */ diff --git a/arm_compute/core/utils/misc/ShapeCalculator.h b/arm_compute/core/utils/misc/ShapeCalculator.h index 7eab17ba11..010501454f 100644 --- a/arm_compute/core/utils/misc/ShapeCalculator.h +++ b/arm_compute/core/utils/misc/ShapeCalculator.h @@ -26,6 +26,7 @@ #include "arm_compute/core/Helpers.h" #include "arm_compute/core/ITensorInfo.h" +#include "arm_compute/core/KernelDescriptors.h" #include "arm_compute/core/Utils.h" #include "arm_compute/core/utils/helpers/tensor_transform.h" @@ -850,6 +851,8 @@ inline TensorShape compute_mm_shape(const ITensorInfo &input0, const ITensorInfo } /** Calculate the matrix multiplication output shape of two tensors + * + * @note Deprecated. Remove when GEMMReshapeInfo is not used anymore by any other kernels * * @param[in] input0 First input tensor info * @param[in] input1 Second input tensor info @@ -886,6 +889,43 @@ inline TensorShape compute_mm_shape(const ITensorInfo &input0, const ITensorInfo return output_shape; } +/** Calculate the matrix multiplication output shape of two tensors + * + * @param[in] input0 First input tensor info + * @param[in] input1 Second input tensor info + * @param[in] gemm_info GEMM kernel info used to retrieve the original dimensions of the input matrices + * + * @return the calculated shape + */ +inline TensorShape compute_mm_shape(const ITensorInfo &input0, const ITensorInfo &input1, const GEMMKernelInfo &gemm_info) +{ + ARM_COMPUTE_ERROR_ON_MSG(input0.num_dimensions() > 4, "The number of dimensions for the matrix A must be <= 4"); + + const bool reinterpret_input_as_3d = gemm_info.reinterpret_input_as_3d; + const bool reinterpret_output_as_3d = gemm_info.depth_output_gemm3d != 0; + const unsigned int depth_output_gemm3d = reinterpret_output_as_3d ? gemm_info.depth_output_gemm3d : 1; + + TensorShape output_shape{ input0.tensor_shape() }; + + if(!reinterpret_input_as_3d && !reinterpret_output_as_3d) + { + output_shape.set(0, gemm_info.n); + output_shape.set(1, gemm_info.m); + } + else + { + // If the output of GEMM has to be reinterpreted as 3D, the number of input0 rows (M) is obtained collapsing the second and third + // dimension of the output tensor + const unsigned int batch_size = reinterpret_input_as_3d ? input0.tensor_shape()[3] : input0.tensor_shape()[2]; + output_shape.set(0, gemm_info.n); + output_shape.set(1, gemm_info.m / depth_output_gemm3d); + output_shape.set(2, reinterpret_output_as_3d ? depth_output_gemm3d : batch_size); + output_shape.set(3, reinterpret_output_as_3d ? batch_size : 1); + } + + return output_shape; +} + /** Calculate the matrix multiplication output shape of two tensors * * @param[in] input Input tensor info diff --git a/src/core/CL/kernels/CLGEMMMatrixMultiplyNativeKernel.cpp b/src/core/CL/kernels/CLGEMMMatrixMultiplyNativeKernel.cpp index 0b9359e610..e5d199d4ee 100644 --- a/src/core/CL/kernels/CLGEMMMatrixMultiplyNativeKernel.cpp +++ b/src/core/CL/kernels/CLGEMMMatrixMultiplyNativeKernel.cpp @@ -53,7 +53,7 @@ using ElementsProcessed = Steps; Status validate_arguments(const ITensorInfo *input0, const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, float alpha, float beta, const GEMMLHSMatrixInfo &lhs_info, const GEMMRHSMatrixInfo &rhs_info, - const GEMMReshapeInfo &gemm_info) + const GEMMKernelInfo &gemm_info) { ARM_COMPUTE_UNUSED(alpha); ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input0, input1, output); @@ -66,33 +66,33 @@ Status validate_arguments(const ITensorInfo *input0, const ITensorInfo *input1, ARM_COMPUTE_RETURN_ERROR_ON(lhs_info.m0 < 1 || lhs_info.m0 > 8); ARM_COMPUTE_RETURN_ERROR_ON_MSG(((rhs_info.n0 & (rhs_info.n0 - 1)) && rhs_info.n0 != 3), "Only 2,3,4,8,16 are supported for n0"); - const int m = gemm_info.m(); - const int n = gemm_info.n(); - const int k = gemm_info.k(); + const unsigned int m = gemm_info.m; + const unsigned int n = gemm_info.n; + const unsigned int k = gemm_info.k; ARM_COMPUTE_UNUSED(m); ARM_COMPUTE_UNUSED(n); ARM_COMPUTE_UNUSED(k); - ARM_COMPUTE_RETURN_ERROR_ON(input0->dimension(0) != static_cast(k)); - ARM_COMPUTE_RETURN_ERROR_ON(input1->dimension(0) != static_cast(n)); - ARM_COMPUTE_RETURN_ERROR_ON(input1->dimension(1) != static_cast(k)); - if(gemm_info.reinterpret_input_as_3d()) + ARM_COMPUTE_RETURN_ERROR_ON(input0->dimension(0) != k); + ARM_COMPUTE_RETURN_ERROR_ON(input1->dimension(0) != n); + ARM_COMPUTE_RETURN_ERROR_ON(input1->dimension(1) != k); + if(gemm_info.reinterpret_input_as_3d) { - ARM_COMPUTE_RETURN_ERROR_ON(input0->dimension(1) * input0->dimension(2) != static_cast(m)); + ARM_COMPUTE_RETURN_ERROR_ON(input0->dimension(1) * input0->dimension(2) != m); } else { - ARM_COMPUTE_RETURN_ERROR_ON(input0->dimension(1) != static_cast(m)); + ARM_COMPUTE_RETURN_ERROR_ON(input0->dimension(1) != m); } if(input2 != nullptr && !(helpers::float_ops::is_zero(beta))) { - const int input2_dim0 = static_cast(input2->dimension(0)); - const int input2_dim1 = static_cast(input2->dimension(1)); + const unsigned int input2_dim0 = input2->dimension(0); + const unsigned int input2_dim1 = input2->dimension(1); ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input2, input1); - if(gemm_info.broadcast_bias()) + if(gemm_info.broadcast_bias) { ARM_COMPUTE_RETURN_ERROR_ON_MSG((input2_dim1 != 1 || input2_dim0 != n), "Incorrect dimension of bias matrix which is to be broadcasted"); } @@ -114,12 +114,12 @@ Status validate_arguments(const ITensorInfo *input0, const ITensorInfo *input1, std::pair validate_and_configure_window(ITensorInfo *input0, ITensorInfo *input1, ITensorInfo *input2, ITensorInfo *output, const GEMMLHSMatrixInfo &lhs_info, const GEMMRHSMatrixInfo &rhs_info, - const GEMMReshapeInfo &gemm_info, ElementsProcessed &num_elements_processed) + const GEMMKernelInfo &gemm_info, ElementsProcessed &num_elements_processed) { unsigned int &num_elems_processed_per_iteration_x = num_elements_processed[0]; unsigned int &num_elems_processed_per_iteration_y = num_elements_processed[1]; - bool reinterpret_input_as_3d = gemm_info.reinterpret_input_as_3d(); - bool reinterpret_output_as_3d = (gemm_info.depth_output_gemm3d() != 0); + bool reinterpret_input_as_3d = gemm_info.reinterpret_input_as_3d; + bool reinterpret_output_as_3d = gemm_info.depth_output_gemm3d != 0; Window win{}; Window win_out{}; @@ -152,8 +152,8 @@ std::pair validate_and_configure_window(ITensorInfo *input0, ITe // Note: bottom paddings are calculated manually as the output can be reinterpreted as 3D tensor // The only way to set properly the paddings, it is to set those explicitly through the AccessWindowStatic - const int m = reinterpret_output_as_3d ? gemm_info.m() : input0->dimension(1); - const int bottom_pad = (num_elems_processed_per_iteration_y - (m % num_elems_processed_per_iteration_y)) % num_elems_processed_per_iteration_y; + const unsigned int m = reinterpret_output_as_3d ? gemm_info.m : input0->dimension(1); + const unsigned int bottom_pad = (num_elems_processed_per_iteration_y - (m % num_elems_processed_per_iteration_y)) % num_elems_processed_per_iteration_y; win = calculate_max_window(tmp_info, Steps(num_elems_processed_per_iteration_x, num_elems_processed_per_iteration_y)); win_out = calculate_max_window(*output, Steps(num_elems_processed_per_iteration_x, num_elems_processed_per_iteration_y)); @@ -172,7 +172,7 @@ std::pair validate_and_configure_window(ITensorInfo *input0, ITe { const int bias_processed_per_iteration_x = num_elems_processed_per_iteration_x; - const int bias_processed_per_iteration_y = gemm_info.broadcast_bias() ? 1 : num_elems_processed_per_iteration_y; + const int bias_processed_per_iteration_y = gemm_info.broadcast_bias ? 1 : num_elems_processed_per_iteration_y; AccessWindowStatic input2_access(input2, 0, 0, ceil_to_multiple(input2->dimension(0), bias_processed_per_iteration_x), @@ -208,7 +208,7 @@ CLGEMMMatrixMultiplyNativeKernel::CLGEMMMatrixMultiplyNativeKernel() void CLGEMMMatrixMultiplyNativeKernel::configure(const ICLTensor *input0, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, float alpha, float beta, const GEMMLHSMatrixInfo &lhs_info, - const GEMMRHSMatrixInfo &rhs_info, const GEMMReshapeInfo &gemm_info) + const GEMMRHSMatrixInfo &rhs_info, const GEMMKernelInfo &gemm_info) { ARM_COMPUTE_ERROR_ON_NULLPTR(input0, input1, output); @@ -218,11 +218,11 @@ void CLGEMMMatrixMultiplyNativeKernel::configure(const ICLTensor *input0, const _input1 = input1; _input2 = helpers::float_ops::is_zero(beta) ? nullptr : input2; _output = output; - _reinterpret_input_as_3d = gemm_info.reinterpret_input_as_3d(); - _reinterpret_output_as_3d = (gemm_info.depth_output_gemm3d() != 0); + _reinterpret_input_as_3d = gemm_info.reinterpret_input_as_3d; + _reinterpret_output_as_3d = gemm_info.depth_output_gemm3d != 0; _use_dummy_work_items = preferred_dummy_work_items_support(CLKernelLibrary::get().get_device()); _add_bias = _input2 != nullptr; - _broadcast_bias = gemm_info.broadcast_bias(); + _broadcast_bias = gemm_info.broadcast_bias; // In case both input and output have to be reinterpreted as 3D tensors, // force reinterpret_input_as_3d and reinterpret_output_as_3d to be false. @@ -249,7 +249,7 @@ void CLGEMMMatrixMultiplyNativeKernel::configure(const ICLTensor *input0, const build_opts.add_option_if(!(helpers::float_ops::is_one(alpha)), "-DALPHA=" + float_to_string_with_full_precision(alpha)); build_opts.add_option_if(_input2 != nullptr, "-DBETA=" + float_to_string_with_full_precision(beta)); build_opts.add_option_if(helpers::float_ops::is_one(beta), "-DUNIT_BETA"); - build_opts.add_option_if(gemm_info.broadcast_bias(), "-DBROADCAST_BIAS"); + build_opts.add_option_if(gemm_info.broadcast_bias, "-DBROADCAST_BIAS"); build_opts.add_option_if(_reinterpret_input_as_3d, "-DREINTERPRET_INPUT_AS_3D"); build_opts.add_option_if(_reinterpret_output_as_3d, "-DREINTERPRET_OUTPUT_AS_3D"); build_opts.add_option_if(_reinterpret_input_as_3d || _reinterpret_output_as_3d, "-DHEIGHT_GEMM3D=" + support::cpp11::to_string(output->info()->dimension(1))); @@ -257,8 +257,8 @@ void CLGEMMMatrixMultiplyNativeKernel::configure(const ICLTensor *input0, const build_opts.add_option_if(!_slide_matrix_b, "-DMATRIX_B_DEPTH=" + support::cpp11::to_string(input1->info()->dimension(2))); build_opts.add_option_if(_use_dummy_work_items, "-DDUMMY_WORK_ITEMS"); build_opts.add_option("-DM=" + support::cpp11::to_string(input0->info()->dimension(1))); - build_opts.add_option("-DN=" + support::cpp11::to_string(gemm_info.n())); - build_opts.add_option("-DK=" + support::cpp11::to_string(gemm_info.k())); + build_opts.add_option("-DN=" + support::cpp11::to_string(gemm_info.n)); + build_opts.add_option("-DK=" + support::cpp11::to_string(gemm_info.k)); build_opts.add_option("-DM0=" + support::cpp11::to_string(lhs_info.m0)); build_opts.add_option("-DN0=" + support::cpp11::to_string(rhs_info.n0)); build_opts.add_option("-DK0=" + support::cpp11::to_string(rhs_info.k0)); @@ -281,7 +281,7 @@ void CLGEMMMatrixMultiplyNativeKernel::configure(const ICLTensor *input0, const _config_id += "_"; _config_id += support::cpp11::to_string(output->info()->dimension(0)); _config_id += "_"; - _config_id += support::cpp11::to_string(gemm_info.k()); + _config_id += support::cpp11::to_string(gemm_info.k); _config_id += "_"; _config_id += support::cpp11::to_string(output->info()->dimension(2)); _config_id += "_"; @@ -294,7 +294,7 @@ void CLGEMMMatrixMultiplyNativeKernel::configure(const ICLTensor *input0, const Status CLGEMMMatrixMultiplyNativeKernel::validate(const ITensorInfo *input0, const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, float alpha, float beta, const GEMMLHSMatrixInfo &lhs_info, - const GEMMRHSMatrixInfo &rhs_info, const GEMMReshapeInfo &gemm_info) + const GEMMRHSMatrixInfo &rhs_info, const GEMMKernelInfo &gemm_info) { ElementsProcessed num_elements_processed{}; ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input0, input1, input2, output, alpha, beta, lhs_info, rhs_info, gemm_info)); diff --git a/src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedKernel.cpp b/src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedKernel.cpp index 4436726852..fecb8b55f8 100644 --- a/src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedKernel.cpp +++ b/src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedKernel.cpp @@ -58,7 +58,7 @@ using ElementsProcessed = Steps; Status validate_arguments(const ITensorInfo *input0, const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, float alpha, float beta, const GEMMLHSMatrixInfo &lhs_info, const GEMMRHSMatrixInfo &rhs_info, - const GEMMReshapeInfo &gemm_info) + const GEMMKernelInfo &gemm_info) { ARM_COMPUTE_UNUSED(alpha); ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input0, input1, output); @@ -75,9 +75,9 @@ Status validate_arguments(const ITensorInfo *input0, const ITensorInfo *input1, ARM_COMPUTE_RETURN_ERROR_ON(lhs_info.m0 < 2 || lhs_info.m0 > 8); ARM_COMPUTE_RETURN_ERROR_ON_MSG(((rhs_info.n0 & (rhs_info.n0 - 1)) && rhs_info.n0 != 3), "Only 2,3,4,8,16 are supported for n0"); - const int m = gemm_info.m(); - const int n = gemm_info.n(); - const int k = gemm_info.k(); + const unsigned int m = gemm_info.m; + const unsigned int n = gemm_info.n; + const unsigned int k = gemm_info.k; TensorShape tensor_shape0{ input0->tensor_shape() }; tensor_shape0.set(0, k); @@ -89,11 +89,11 @@ Status validate_arguments(const ITensorInfo *input0, const ITensorInfo *input1, if(input2 != nullptr && !(helpers::float_ops::is_zero(beta))) { - const int input2_dim0 = static_cast(input2->dimension(0)); - const int input2_dim1 = static_cast(input2->dimension(1)); + const unsigned int input2_dim0 = input2->dimension(0); + const unsigned int input2_dim1 = input2->dimension(1); ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input2, input1); - if(gemm_info.broadcast_bias()) + if(gemm_info.broadcast_bias) { ARM_COMPUTE_RETURN_ERROR_ON_MSG((input2_dim1 != 1 || input2_dim0 != n), "Incorrect dimension of bias matrix which is to be broadcasted"); } @@ -124,11 +124,11 @@ Status validate_arguments(const ITensorInfo *input0, const ITensorInfo *input1, std::pair validate_and_configure_window(ITensorInfo *input0, ITensorInfo *input1, ITensorInfo *input2, ITensorInfo *output, const GEMMLHSMatrixInfo &lhs_info, const GEMMRHSMatrixInfo &rhs_info, - const GEMMReshapeInfo &gemm_info, ElementsProcessed &num_elements_processed) + const GEMMKernelInfo &gemm_info, ElementsProcessed &num_elements_processed) { unsigned int &num_elems_processed_per_iteration_x = num_elements_processed[0]; unsigned int &num_elems_processed_per_iteration_y = num_elements_processed[1]; - bool reinterpret_output_as_3d = (gemm_info.depth_output_gemm3d() != 0); + bool reinterpret_output_as_3d = gemm_info.depth_output_gemm3d != 0; Window win{}; Window win_out{}; @@ -154,8 +154,8 @@ std::pair validate_and_configure_window(ITensorInfo *input0, ITe // Note: bottom paddings are calculated manually as the output can be reinterpreted as 3D tensor // The only way to set properly the paddings, it is to set those explicitly through the AccessWindowStatic - const int m = gemm_info.m(); - const int bottom_pad = (num_elems_processed_per_iteration_y - (m % num_elems_processed_per_iteration_y)) % num_elems_processed_per_iteration_y; + const unsigned int m = gemm_info.m; + const unsigned int bottom_pad = (num_elems_processed_per_iteration_y - (m % num_elems_processed_per_iteration_y)) % num_elems_processed_per_iteration_y; win = calculate_max_window(tmp_info, Steps(num_elems_processed_per_iteration_x, num_elems_processed_per_iteration_y)); win_out = calculate_max_window(*output, Steps(num_elems_processed_per_iteration_x, num_elems_processed_per_iteration_y)); @@ -174,7 +174,7 @@ std::pair validate_and_configure_window(ITensorInfo *input0, ITe { const int bias_processed_per_iteration_x = num_elems_processed_per_iteration_x; - const int bias_processed_per_iteration_y = gemm_info.broadcast_bias() ? 1 : num_elems_processed_per_iteration_y; + const int bias_processed_per_iteration_y = gemm_info.broadcast_bias ? 1 : num_elems_processed_per_iteration_y; AccessWindowStatic input2_access(input2, 0, 0, ceil_to_multiple(input2->dimension(0), bias_processed_per_iteration_x), @@ -210,7 +210,7 @@ CLGEMMMatrixMultiplyReshapedKernel::CLGEMMMatrixMultiplyReshapedKernel() void CLGEMMMatrixMultiplyReshapedKernel::configure(const ICLTensor *input0, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, float alpha, float beta, const GEMMLHSMatrixInfo &lhs_info, - const GEMMRHSMatrixInfo &rhs_info, const GEMMReshapeInfo &gemm_info) + const GEMMRHSMatrixInfo &rhs_info, const GEMMKernelInfo &gemm_info) { ARM_COMPUTE_ERROR_ON_NULLPTR(input0, input1, output); @@ -220,11 +220,11 @@ void CLGEMMMatrixMultiplyReshapedKernel::configure(const ICLTensor *input0, cons _input1 = input1; _input2 = helpers::float_ops::is_zero(beta) ? nullptr : input2; _output = output; - _reinterpret_output_as_3d = (gemm_info.depth_output_gemm3d() != 0); - _k = gemm_info.k(); + _reinterpret_output_as_3d = gemm_info.depth_output_gemm3d != 0; + _k = gemm_info.k; _use_dummy_work_items = preferred_dummy_work_items_support(CLKernelLibrary::get().get_device()); _add_bias = _input2 != nullptr; - _broadcast_bias = gemm_info.broadcast_bias(); + _broadcast_bias = gemm_info.broadcast_bias; // Check if we need to slide the matrix B const unsigned int num_dimensions_input0 = _input0->info()->num_dimensions(); @@ -246,13 +246,13 @@ void CLGEMMMatrixMultiplyReshapedKernel::configure(const ICLTensor *input0, cons build_opts.add_option_if(_reinterpret_output_as_3d, "-DREINTERPRET_OUTPUT_AS_3D"); build_opts.add_option_if(_reinterpret_output_as_3d, "-DHEIGHT_GEMM3D=" + support::cpp11::to_string(output->info()->dimension(1))); build_opts.add_option_if(_reinterpret_output_as_3d, "-DDEPTH_GEMM3D=" + support::cpp11::to_string(output->info()->dimension(2))); - build_opts.add_option_if(gemm_info.broadcast_bias(), "-DBROADCAST_BIAS"); + build_opts.add_option_if(gemm_info.broadcast_bias, "-DBROADCAST_BIAS"); build_opts.add_option_if(!_slide_matrix_b, "-DMATRIX_B_DEPTH=" + support::cpp11::to_string(input1->info()->dimension(2))); build_opts.add_option_if(lhs_info.interleave, "-DLHS_INTERLEAVE"); build_opts.add_option_if(rhs_info.interleave, "-DRHS_INTERLEAVE"); build_opts.add_option_if(_use_dummy_work_items, "-DDUMMY_WORK_ITEMS"); - build_opts.add_option("-DM=" + support::cpp11::to_string(gemm_info.m())); - build_opts.add_option("-DN=" + support::cpp11::to_string(gemm_info.n())); + build_opts.add_option("-DM=" + support::cpp11::to_string(gemm_info.m)); + build_opts.add_option("-DN=" + support::cpp11::to_string(gemm_info.n)); build_opts.add_option("-DM0=" + support::cpp11::to_string(lhs_info.m0)); build_opts.add_option("-DN0=" + support::cpp11::to_string(rhs_info.n0)); build_opts.add_option("-DK0=" + support::cpp11::to_string(lhs_info.k0)); @@ -278,7 +278,7 @@ void CLGEMMMatrixMultiplyReshapedKernel::configure(const ICLTensor *input0, cons _config_id += "_"; _config_id += support::cpp11::to_string(output->info()->dimension(0)); _config_id += "_"; - _config_id += support::cpp11::to_string(gemm_info.k()); + _config_id += support::cpp11::to_string(gemm_info.k); _config_id += "_"; _config_id += support::cpp11::to_string(output->info()->dimension(2)); _config_id += "_"; @@ -299,7 +299,7 @@ void CLGEMMMatrixMultiplyReshapedKernel::configure(const ICLTensor *input0, cons Status CLGEMMMatrixMultiplyReshapedKernel::validate(const ITensorInfo *input0, const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, float alpha, float beta, const GEMMLHSMatrixInfo &lhs_info, - const GEMMRHSMatrixInfo &rhs_info, const GEMMReshapeInfo &gemm_info) + const GEMMRHSMatrixInfo &rhs_info, const GEMMKernelInfo &gemm_info) { ElementsProcessed num_elements_processed{}; ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input0, input1, input2, output, alpha, beta, lhs_info, rhs_info, gemm_info)); diff --git a/src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.cpp b/src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.cpp index d952de8232..7258b7fb12 100644 --- a/src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.cpp +++ b/src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.cpp @@ -53,7 +53,7 @@ using ElementsProcessed = Steps; Status validate_arguments(const ITensorInfo *input0, const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, float alpha, float beta, const GEMMLHSMatrixInfo &lhs_info, const GEMMRHSMatrixInfo &rhs_info, - const GEMMReshapeInfo &gemm_info) + const GEMMKernelInfo &gemm_info) { ARM_COMPUTE_UNUSED(alpha); ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input0, input1, output); @@ -66,9 +66,9 @@ Status validate_arguments(const ITensorInfo *input0, const ITensorInfo *input1, ARM_COMPUTE_RETURN_ERROR_ON(lhs_info.m0 < 1 || lhs_info.m0 > 8); ARM_COMPUTE_RETURN_ERROR_ON_MSG(((rhs_info.n0 & (rhs_info.n0 - 1)) && rhs_info.n0 != 3), "Only 2,3,4,8,16 are supported for n0"); - const int m = gemm_info.m(); - const int n = gemm_info.n(); - const int k = gemm_info.k(); + const unsigned int m = gemm_info.m; + const unsigned int n = gemm_info.n; + const unsigned int k = gemm_info.k; TensorShape tensor_shape1{ input1->tensor_shape() }; tensor_shape1.set(0, n); @@ -76,11 +76,11 @@ Status validate_arguments(const ITensorInfo *input0, const ITensorInfo *input1, if(input2 != nullptr && !(helpers::float_ops::is_zero(beta))) { - const int input2_dim0 = static_cast(input2->dimension(0)); - const int input2_dim1 = static_cast(input2->dimension(1)); + const unsigned int input2_dim0 = input2->dimension(0); + const unsigned int input2_dim1 = input2->dimension(1); ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input2, input1); - if(gemm_info.broadcast_bias()) + if(gemm_info.broadcast_bias) { ARM_COMPUTE_RETURN_ERROR_ON_MSG((input2_dim1 != 1 || input2_dim0 != n), "Incorrect dimension of bias matrix which is to be broadcasted"); } @@ -94,14 +94,14 @@ Status validate_arguments(const ITensorInfo *input0, const ITensorInfo *input1, const TensorInfo tensor_info_reshaped1 = input1->clone()->set_tensor_shape(compute_rhs_reshaped_shape(tensor_info1, rhs_info)); - ARM_COMPUTE_RETURN_ERROR_ON(input0->dimension(0) != static_cast(k)); - if(gemm_info.reinterpret_input_as_3d()) + ARM_COMPUTE_RETURN_ERROR_ON(input0->dimension(0) != k); + if(gemm_info.reinterpret_input_as_3d) { - ARM_COMPUTE_RETURN_ERROR_ON(input0->dimension(1) * input0->dimension(2) != static_cast(m)); + ARM_COMPUTE_RETURN_ERROR_ON(input0->dimension(1) * input0->dimension(2) != m); } else { - ARM_COMPUTE_RETURN_ERROR_ON(input0->dimension(1) != static_cast(m)); + ARM_COMPUTE_RETURN_ERROR_ON(input0->dimension(1) != m); } ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input1, &tensor_info_reshaped1); @@ -117,12 +117,12 @@ Status validate_arguments(const ITensorInfo *input0, const ITensorInfo *input1, std::pair validate_and_configure_window(ITensorInfo *input0, ITensorInfo *input1, ITensorInfo *input2, ITensorInfo *output, const GEMMLHSMatrixInfo &lhs_info, const GEMMRHSMatrixInfo &rhs_info, - const GEMMReshapeInfo &gemm_info, ElementsProcessed &num_elements_processed) + const GEMMKernelInfo &gemm_info, ElementsProcessed &num_elements_processed) { unsigned int &num_elems_processed_per_iteration_x = num_elements_processed[0]; unsigned int &num_elems_processed_per_iteration_y = num_elements_processed[1]; - bool reinterpret_input_as_3d = gemm_info.reinterpret_input_as_3d(); - bool reinterpret_output_as_3d = (gemm_info.depth_output_gemm3d() != 0); + bool reinterpret_input_as_3d = gemm_info.reinterpret_input_as_3d; + bool reinterpret_output_as_3d = gemm_info.depth_output_gemm3d != 0; Window win{}; Window win_out{}; @@ -155,8 +155,8 @@ std::pair validate_and_configure_window(ITensorInfo *input0, ITe // Note: bottom paddings are calculated manually as the output can be reinterpreted as 3D tensor // The only way to set properly the paddings, it is to set those explicitly through the AccessWindowStatic - const int m = reinterpret_output_as_3d ? gemm_info.m() : input0->dimension(1); - const int bottom_pad = (num_elems_processed_per_iteration_y - (m % num_elems_processed_per_iteration_y)) % num_elems_processed_per_iteration_y; + const unsigned int m = reinterpret_output_as_3d ? gemm_info.m : input0->dimension(1); + const unsigned int bottom_pad = (num_elems_processed_per_iteration_y - (m % num_elems_processed_per_iteration_y)) % num_elems_processed_per_iteration_y; win = calculate_max_window(tmp_info, Steps(num_elems_processed_per_iteration_x, num_elems_processed_per_iteration_y)); win_out = calculate_max_window(*output, Steps(num_elems_processed_per_iteration_x, num_elems_processed_per_iteration_y)); @@ -175,7 +175,7 @@ std::pair validate_and_configure_window(ITensorInfo *input0, ITe { const int bias_processed_per_iteration_x = num_elems_processed_per_iteration_x; - const int bias_processed_per_iteration_y = gemm_info.broadcast_bias() ? 1 : num_elems_processed_per_iteration_y; + const int bias_processed_per_iteration_y = gemm_info.broadcast_bias ? 1 : num_elems_processed_per_iteration_y; AccessWindowStatic input2_access(input2, 0, 0, ceil_to_multiple(input2->dimension(0), bias_processed_per_iteration_x), @@ -211,7 +211,7 @@ CLGEMMMatrixMultiplyReshapedOnlyRHSKernel::CLGEMMMatrixMultiplyReshapedOnlyRHSKe void CLGEMMMatrixMultiplyReshapedOnlyRHSKernel::configure(const ICLTensor *input0, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, float alpha, float beta, const GEMMLHSMatrixInfo &lhs_info, - const GEMMRHSMatrixInfo &rhs_info, const GEMMReshapeInfo &gemm_info) + const GEMMRHSMatrixInfo &rhs_info, const GEMMKernelInfo &gemm_info) { ARM_COMPUTE_ERROR_ON_NULLPTR(input0, input1, output); @@ -221,11 +221,11 @@ void CLGEMMMatrixMultiplyReshapedOnlyRHSKernel::configure(const ICLTensor *input _input1 = input1; _input2 = helpers::float_ops::is_zero(beta) ? nullptr : input2; _output = output; - _reinterpret_input_as_3d = gemm_info.reinterpret_input_as_3d(); - _reinterpret_output_as_3d = (gemm_info.depth_output_gemm3d() != 0); + _reinterpret_input_as_3d = gemm_info.reinterpret_input_as_3d; + _reinterpret_output_as_3d = gemm_info.depth_output_gemm3d != 0; _use_dummy_work_items = preferred_dummy_work_items_support(CLKernelLibrary::get().get_device()); _add_bias = _input2 != nullptr; - _broadcast_bias = gemm_info.broadcast_bias(); + _broadcast_bias = gemm_info.broadcast_bias; // In case both input and output have to be reinterpreted as 3D tensors, // force reinterpret_input_as_3d and reinterpret_output_as_3d to be false. @@ -254,15 +254,15 @@ void CLGEMMMatrixMultiplyReshapedOnlyRHSKernel::configure(const ICLTensor *input build_opts.add_option_if(helpers::float_ops::is_one(beta), "-DUNIT_BETA"); build_opts.add_option_if(_reinterpret_input_as_3d, "-DREINTERPRET_INPUT_AS_3D"); build_opts.add_option_if(_reinterpret_output_as_3d, "-DREINTERPRET_OUTPUT_AS_3D"); - build_opts.add_option_if(gemm_info.broadcast_bias(), "-DBROADCAST_BIAS"); + build_opts.add_option_if(gemm_info.broadcast_bias, "-DBROADCAST_BIAS"); build_opts.add_option_if(_reinterpret_input_as_3d || _reinterpret_output_as_3d, "-DHEIGHT_GEMM3D=" + support::cpp11::to_string(output->info()->dimension(1))); build_opts.add_option_if(_reinterpret_input_as_3d || _reinterpret_output_as_3d, "-DDEPTH_GEMM3D=" + support::cpp11::to_string(output->info()->dimension(2))); build_opts.add_option_if(!_slide_matrix_b, "-DMATRIX_B_DEPTH=" + support::cpp11::to_string(input1->info()->dimension(2))); build_opts.add_option_if(rhs_info.interleave, "-DRHS_INTERLEAVE"); build_opts.add_option_if(_use_dummy_work_items, "-DDUMMY_WORK_ITEMS"); build_opts.add_option("-DM=" + support::cpp11::to_string(input0->info()->dimension(1))); - build_opts.add_option("-DN=" + support::cpp11::to_string(gemm_info.n())); - build_opts.add_option("-DK=" + support::cpp11::to_string(gemm_info.k())); + build_opts.add_option("-DN=" + support::cpp11::to_string(gemm_info.n)); + build_opts.add_option("-DK=" + support::cpp11::to_string(gemm_info.k)); build_opts.add_option("-DM0=" + support::cpp11::to_string(lhs_info.m0)); build_opts.add_option("-DN0=" + support::cpp11::to_string(rhs_info.n0)); build_opts.add_option("-DK0=" + support::cpp11::to_string(rhs_info.k0)); @@ -287,7 +287,7 @@ void CLGEMMMatrixMultiplyReshapedOnlyRHSKernel::configure(const ICLTensor *input _config_id += "_"; _config_id += support::cpp11::to_string(output->info()->dimension(0)); _config_id += "_"; - _config_id += support::cpp11::to_string(gemm_info.k()); + _config_id += support::cpp11::to_string(gemm_info.k); _config_id += "_"; _config_id += support::cpp11::to_string(output->info()->dimension(2)); _config_id += "_"; @@ -304,7 +304,7 @@ void CLGEMMMatrixMultiplyReshapedOnlyRHSKernel::configure(const ICLTensor *input Status CLGEMMMatrixMultiplyReshapedOnlyRHSKernel::validate(const ITensorInfo *input0, const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, float alpha, float beta, const GEMMLHSMatrixInfo &lhs_info, - const GEMMRHSMatrixInfo &rhs_info, const GEMMReshapeInfo &gemm_info) + const GEMMRHSMatrixInfo &rhs_info, const GEMMKernelInfo &gemm_info) { ElementsProcessed num_elements_processed{}; ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input0, input1, input2, output, alpha, beta, lhs_info, rhs_info, gemm_info)); diff --git a/src/runtime/CL/functions/CLGEMM.cpp b/src/runtime/CL/functions/CLGEMM.cpp index 94b318c93e..c0ccd0f451 100644 --- a/src/runtime/CL/functions/CLGEMM.cpp +++ b/src/runtime/CL/functions/CLGEMM.cpp @@ -30,6 +30,7 @@ #include "arm_compute/core/Error.h" #include "arm_compute/core/GPUTarget.h" #include "arm_compute/core/Helpers.h" +#include "arm_compute/core/KernelDescriptors.h" #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Types.h" #include "arm_compute/core/Utils.h" @@ -199,13 +200,18 @@ void CLGEMM::configure_reshaped_v2(const ICLTensor *a, const ICLTensor *b, const const int depth_output_gemm3d = gemm_info.depth_output_gemm3d(); const GPUTarget gpu_target = CLScheduler::get().target(); bool broadcast_bias = gemm_info.broadcast_bias(); + GEMMKernelInfo kernel_info; + kernel_info.m = m; + kernel_info.n = n; + kernel_info.k = k; + kernel_info.depth_output_gemm3d = depth_output_gemm3d; + kernel_info.reinterpret_input_as_3d = false; + kernel_info.broadcast_bias = broadcast_bias; // Set the target for the kernels _reshape_lhs_kernel.set_target(gpu_target); _mm_kernel.set_target(gpu_target); - GEMMReshapeInfo reshape_info(m, n, k, 1, 1, depth_output_gemm3d, false, broadcast_bias); - // Manage intermediate buffers _memory_group.manage(&_tmp_a); if(!_reshape_b_only_on_first_run) @@ -228,7 +234,7 @@ void CLGEMM::configure_reshaped_v2(const ICLTensor *a, const ICLTensor *b, const _reshape_rhs_kernel.configure(b, &_tmp_b, rhs_info); // Configure and tune matrix multiply kernel - _mm_reshaped_kernel.configure(&_tmp_a, &_tmp_b, c, output, alpha, beta, lhs_info, rhs_info, reshape_info); + _mm_reshaped_kernel.configure(&_tmp_a, &_tmp_b, c, output, alpha, beta, lhs_info, rhs_info, kernel_info); // Allocate intermediate tensors _tmp_a.allocator()->allocate(); @@ -249,12 +255,17 @@ void CLGEMM::configure_reshaped_only_rhs(const ICLTensor *a, const ICLTensor *b, const int depth_output_gemm3d = gemm_info.depth_output_gemm3d(); const GPUTarget gpu_target = CLScheduler::get().target(); bool broadcast_bias = gemm_info.broadcast_bias(); + GEMMKernelInfo kernel_info; + kernel_info.m = m; + kernel_info.n = n; + kernel_info.k = k; + kernel_info.depth_output_gemm3d = depth_output_gemm3d; + kernel_info.reinterpret_input_as_3d = reinterpret_input_as_3d; + kernel_info.broadcast_bias = broadcast_bias; // Set the target for the kernels _mm_kernel.set_target(gpu_target); - GEMMReshapeInfo reshape_info(m, n, k, 1, 1, depth_output_gemm3d, reinterpret_input_as_3d, broadcast_bias); - // Manage intermediate buffers if(!_reshape_b_only_on_first_run) { @@ -274,7 +285,7 @@ void CLGEMM::configure_reshaped_only_rhs(const ICLTensor *a, const ICLTensor *b, _reshape_rhs_kernel.configure(b, &_tmp_b, rhs_info); // Configure and tune matrix multiply kernel - _mm_reshaped_only_rhs_kernel.configure(a, &_tmp_b, c, output, alpha, beta, lhs_info, rhs_info, reshape_info); + _mm_reshaped_only_rhs_kernel.configure(a, &_tmp_b, c, output, alpha, beta, lhs_info, rhs_info, kernel_info); if(!_reshape_b_only_on_first_run) { @@ -394,8 +405,13 @@ Status CLGEMM::validate_reshaped_v2(const ITensorInfo *a, const ITensorInfo *b, const unsigned int batch_size = reinterpret_input_as_3d ? a->dimension(3) : a->dimension(2); const int depth_output_gemm3d = gemm_info.depth_output_gemm3d(); const bool broadcast_bias = gemm_info.broadcast_bias(); - - const GEMMReshapeInfo reshape_info = GEMMReshapeInfo(m, n, k, 1, 1, depth_output_gemm3d, false, broadcast_bias); + GEMMKernelInfo kernel_info; + kernel_info.m = m; + kernel_info.n = n; + kernel_info.k = k; + kernel_info.depth_output_gemm3d = depth_output_gemm3d; + kernel_info.reinterpret_input_as_3d = false; + kernel_info.broadcast_bias = broadcast_bias; GEMMLHSMatrixInfo lhs_info; GEMMRHSMatrixInfo rhs_info; @@ -414,7 +430,7 @@ Status CLGEMM::validate_reshaped_v2(const ITensorInfo *a, const ITensorInfo *b, ARM_COMPUTE_RETURN_ON_ERROR(CLGEMMReshapeRHSMatrixKernel::validate(b, &tmp_b_info, rhs_info)); // Validate matrix multiply - ARM_COMPUTE_RETURN_ON_ERROR(CLGEMMMatrixMultiplyReshapedKernel::validate(&tmp_a_info, &tmp_b_info, c, output, alpha, beta, lhs_info, rhs_info, reshape_info)); + ARM_COMPUTE_RETURN_ON_ERROR(CLGEMMMatrixMultiplyReshapedKernel::validate(&tmp_a_info, &tmp_b_info, c, output, alpha, beta, lhs_info, rhs_info, kernel_info)); return Status{}; } @@ -427,16 +443,22 @@ Status CLGEMM::validate_reshaped_only_rhs(const ITensorInfo *a, const ITensorInf TensorInfo tmp_b_info{}; // Get the GPU target - const GPUTarget gpu_target = CLScheduler::get().target(); - const DataType data_type = a->data_type(); - bool reinterpret_input_as_3d = gemm_info.reinterpret_input_as_3d(); - const unsigned int m = reinterpret_input_as_3d ? (a->dimension(1) * a->dimension(2)) : a->dimension(1); - const unsigned int n = b->dimension(0); - const unsigned int k = a->dimension(0); - const unsigned int batch_size = reinterpret_input_as_3d ? a->dimension(3) : a->dimension(2); - const int depth_output_gemm3d = gemm_info.depth_output_gemm3d(); - const bool broadcast_bias = gemm_info.broadcast_bias(); - const GEMMReshapeInfo reshape_info = GEMMReshapeInfo(m, n, k, 1, 1, depth_output_gemm3d, reinterpret_input_as_3d, broadcast_bias); + const GPUTarget gpu_target = CLScheduler::get().target(); + const DataType data_type = a->data_type(); + bool reinterpret_input_as_3d = gemm_info.reinterpret_input_as_3d(); + const unsigned int m = reinterpret_input_as_3d ? (a->dimension(1) * a->dimension(2)) : a->dimension(1); + const unsigned int n = b->dimension(0); + const unsigned int k = a->dimension(0); + const unsigned int batch_size = reinterpret_input_as_3d ? a->dimension(3) : a->dimension(2); + const int depth_output_gemm3d = gemm_info.depth_output_gemm3d(); + const bool broadcast_bias = gemm_info.broadcast_bias(); + GEMMKernelInfo kernel_info; + kernel_info.m = m; + kernel_info.n = n; + kernel_info.k = k; + kernel_info.depth_output_gemm3d = depth_output_gemm3d; + kernel_info.reinterpret_input_as_3d = reinterpret_input_as_3d; + kernel_info.broadcast_bias = broadcast_bias; GEMMLHSMatrixInfo lhs_info; GEMMRHSMatrixInfo rhs_info; @@ -452,7 +474,7 @@ Status CLGEMM::validate_reshaped_only_rhs(const ITensorInfo *a, const ITensorInf ARM_COMPUTE_RETURN_ON_ERROR(CLGEMMReshapeRHSMatrixKernel::validate(b, &tmp_b_info, rhs_info)); // Validate matrix multiply - ARM_COMPUTE_RETURN_ON_ERROR(CLGEMMMatrixMultiplyReshapedOnlyRHSKernel::validate(a, &tmp_b_info, c, output, alpha, beta, lhs_info, rhs_info, reshape_info)); + ARM_COMPUTE_RETURN_ON_ERROR(CLGEMMMatrixMultiplyReshapedOnlyRHSKernel::validate(a, &tmp_b_info, c, output, alpha, beta, lhs_info, rhs_info, kernel_info)); return Status{}; } diff --git a/tests/validation/CL/GEMMMatrixMultiplyNative.cpp b/tests/validation/CL/GEMMMatrixMultiplyNative.cpp index b0d1fd2ad1..031b807ef8 100644 --- a/tests/validation/CL/GEMMMatrixMultiplyNative.cpp +++ b/tests/validation/CL/GEMMMatrixMultiplyNative.cpp @@ -22,6 +22,7 @@ * SOFTWARE. */ #include "arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyNativeKernel.h" +#include "arm_compute/core/KernelDescriptors.h" #include "arm_compute/core/Types.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "arm_compute/runtime/CL/CLTensor.h" @@ -128,7 +129,11 @@ void validate_configuration(unsigned int m_value, unsigned int n_value, unsigned rhs_info.n0 = n0_value; rhs_info.k0 = k0_value; - GEMMReshapeInfo gemm_info(M, N, K, false, false, 0, false, broadcast_bias); + GEMMKernelInfo kernel_info; + kernel_info.m = M; + kernel_info.n = N; + kernel_info.k = K; + kernel_info.broadcast_bias = broadcast_bias; const TensorShape lhs_shape(K, M, b_value); const TensorShape rhs_shape(N, K, b_value); @@ -137,7 +142,8 @@ void validate_configuration(unsigned int m_value, unsigned int n_value, unsigned broadcast_bias? 1 : b_value); const TensorShape dst_shape = compute_mm_shape(TensorInfo(lhs_shape, 1, data_type), TensorInfo(rhs_shape, 1, data_type), - gemm_info); + kernel_info); + // Create tensors CLTensor lhs = create_tensor(lhs_shape, data_type); CLTensor rhs = create_tensor(rhs_shape, data_type); @@ -151,7 +157,7 @@ void validate_configuration(unsigned int m_value, unsigned int n_value, unsigned // Create and configure function CLGEMMMatrixMultiplyNative gemm; - gemm.configure(&lhs, &rhs, &bias, &dst, 1.0f, 1.0f, lhs_info, rhs_info, gemm_info); + gemm.configure(&lhs, &rhs, &bias, &dst, 1.0f, 1.0f, lhs_info, rhs_info, kernel_info); } } // namespace diff --git a/tests/validation/CL/GEMMMatrixMultiplyReshaped.cpp b/tests/validation/CL/GEMMMatrixMultiplyReshaped.cpp index 69e58303f3..bd70ddbe8c 100644 --- a/tests/validation/CL/GEMMMatrixMultiplyReshaped.cpp +++ b/tests/validation/CL/GEMMMatrixMultiplyReshaped.cpp @@ -24,6 +24,7 @@ #include "arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyReshapedKernel.h" #include "arm_compute/core/CL/kernels/CLGEMMReshapeLHSMatrixKernel.h" #include "arm_compute/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.h" +#include "arm_compute/core/KernelDescriptors.h" #include "arm_compute/core/Types.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "arm_compute/runtime/CL/CLTensor.h" @@ -157,7 +158,13 @@ void validate_configuration(unsigned int m_value, unsigned int n_value, unsigned rhs_info.interleave = i_value_rhs; rhs_info.transpose = true; - GEMMReshapeInfo gemm_info(M, N, K, false, false, 0, false, broadcast_bias); + GEMMKernelInfo kernel_info; + kernel_info.m = M; + kernel_info.n = N; + kernel_info.k = K; + kernel_info.depth_output_gemm3d = 0; + kernel_info.reinterpret_input_as_3d = false; + kernel_info.broadcast_bias = broadcast_bias; const TensorShape lhs_shape(K, M, b_value); const TensorShape lhs_shape_reshaped = compute_lhs_reshaped_shape(TensorInfo(lhs_shape, 1, data_type), @@ -170,7 +177,7 @@ void validate_configuration(unsigned int m_value, unsigned int n_value, unsigned const TensorShape dst_shape = compute_mm_shape(TensorInfo(lhs_shape_reshaped, 1, data_type), TensorInfo(rhs_shape_reshaped, 1, data_type), - gemm_info); + kernel_info); const TensorShape bias_shape(N, broadcast_bias? 1 : M, @@ -189,7 +196,7 @@ void validate_configuration(unsigned int m_value, unsigned int n_value, unsigned // Create and configure function CLGEMMMatrixMultiplyReshaped gemm; - gemm.configure(&lhs_reshaped, &rhs_reshaped, &bias, &dst, 1.0f, 1.0f, lhs_info, rhs_info, gemm_info); + gemm.configure(&lhs_reshaped, &rhs_reshaped, &bias, &dst, 1.0f, 1.0f, lhs_info, rhs_info, kernel_info); } } // namespace diff --git a/tests/validation/CL/GEMMMatrixMultiplyReshapedOnlyRHS.cpp b/tests/validation/CL/GEMMMatrixMultiplyReshapedOnlyRHS.cpp index 133170e2d3..6c34fc870a 100644 --- a/tests/validation/CL/GEMMMatrixMultiplyReshapedOnlyRHS.cpp +++ b/tests/validation/CL/GEMMMatrixMultiplyReshapedOnlyRHS.cpp @@ -23,6 +23,7 @@ */ #include "arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.h" #include "arm_compute/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.h" +#include "arm_compute/core/KernelDescriptors.h" #include "arm_compute/core/Types.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "arm_compute/runtime/CL/CLTensor.h" @@ -144,7 +145,13 @@ void validate_configuration(unsigned int m_value, unsigned int n_value, unsigned rhs_info.interleave = i_value_rhs; rhs_info.transpose = t_value_rhs; - GEMMReshapeInfo gemm_info(M, N, K, false, false, 0, false, broadcast_bias); + GEMMKernelInfo kernel_info; + kernel_info.m = M; + kernel_info.n = N; + kernel_info.k = K; + kernel_info.depth_output_gemm3d = 0; + kernel_info.reinterpret_input_as_3d = false; + kernel_info.broadcast_bias = broadcast_bias; const TensorShape lhs_shape(K, M, b_value); const TensorShape rhs_shape(N, K, b_value); @@ -153,7 +160,7 @@ void validate_configuration(unsigned int m_value, unsigned int n_value, unsigned const TensorShape dst_shape = compute_mm_shape(TensorInfo(lhs_shape, 1, data_type), TensorInfo(rhs_shape_reshaped, 1, data_type), - gemm_info); + kernel_info); const TensorShape bias_shape(N, broadcast_bias? 1 : M, @@ -172,7 +179,7 @@ void validate_configuration(unsigned int m_value, unsigned int n_value, unsigned // Create and configure function CLGEMMMatrixMultiplyReshapedOnlyRHS gemm; - gemm.configure(&lhs, &rhs_reshaped, &bias, &dst, 1.0f, 1.0f, lhs_info, rhs_info, gemm_info); + gemm.configure(&lhs, &rhs_reshaped, &bias, &dst, 1.0f, 1.0f, lhs_info, rhs_info, kernel_info); } } // namespace diff --git a/tests/validation/fixtures/GEMMFixture.h b/tests/validation/fixtures/GEMMFixture.h index b721d841f7..a225c71ab7 100644 --- a/tests/validation/fixtures/GEMMFixture.h +++ b/tests/validation/fixtures/GEMMFixture.h @@ -24,6 +24,7 @@ #ifndef ARM_COMPUTE_TEST_GEMM_FIXTURE #define ARM_COMPUTE_TEST_GEMM_FIXTURE +#include "arm_compute/core/KernelDescriptors.h" #include "arm_compute/core/TensorShape.h" #include "arm_compute/core/Types.h" #include "tests/AssetsLibrary.h" @@ -210,6 +211,13 @@ protected: const unsigned int M = lhs_shape[1]; const unsigned int N = rhs_shape[0]; const unsigned int K = lhs_shape[0]; + GEMMKernelInfo kernel_info; + kernel_info.m = M; + kernel_info.n = N; + kernel_info.k = K; + kernel_info.depth_output_gemm3d = 0; + kernel_info.reinterpret_input_as_3d = false; + kernel_info.broadcast_bias = broadcast_bias; // The output tensor will be auto-initialized within the function @@ -219,7 +227,7 @@ protected: GEMMFunctionType gemm; reshape_lhs.configure(&lhs, &lhs_reshaped, lhs_info); reshape_rhs.configure(&rhs, &rhs_reshaped, rhs_info); - gemm.configure(&lhs_reshaped, &rhs_reshaped, &bias, &dst, alpha, beta, lhs_info, rhs_info, GEMMReshapeInfo(M, N, K, 1, 1, 0, false, broadcast_bias)); + gemm.configure(&lhs_reshaped, &rhs_reshaped, &bias, &dst, alpha, beta, lhs_info, rhs_info, kernel_info); ARM_COMPUTE_EXPECT(lhs.info()->is_resizable(), framework::LogLevel::ERRORS); ARM_COMPUTE_EXPECT(rhs.info()->is_resizable(), framework::LogLevel::ERRORS); @@ -346,6 +354,13 @@ protected: const unsigned int M = lhs_shape[1]; const unsigned int N = rhs_shape[0]; const unsigned int K = lhs_shape[0]; + GEMMKernelInfo kernel_info; + kernel_info.m = M; + kernel_info.n = N; + kernel_info.k = K; + kernel_info.depth_output_gemm3d = m_h; + kernel_info.reinterpret_input_as_3d = false; + kernel_info.broadcast_bias = true; // The output tensor will be auto-initialized within the function @@ -355,7 +370,7 @@ protected: GEMMFunctionType gemm; reshape_lhs.configure(&lhs, &lhs_reshaped, lhs_info); reshape_rhs.configure(&rhs, &rhs_reshaped, rhs_info); - gemm.configure(&lhs_reshaped, &rhs_reshaped, &bias, &dst, alpha, beta, lhs_info, rhs_info, GEMMReshapeInfo(M, N, K, 1, 1, m_h, false, true)); + gemm.configure(&lhs_reshaped, &rhs_reshaped, &bias, &dst, alpha, beta, lhs_info, rhs_info, kernel_info); ARM_COMPUTE_EXPECT(lhs.info()->is_resizable(), framework::LogLevel::ERRORS); ARM_COMPUTE_EXPECT(rhs.info()->is_resizable(), framework::LogLevel::ERRORS); @@ -479,6 +494,13 @@ protected: const unsigned int M = lhs_shape[1]; const unsigned int N = rhs_shape[0]; const unsigned int K = lhs_shape[0]; + GEMMKernelInfo kernel_info; + kernel_info.m = M; + kernel_info.n = N; + kernel_info.k = K; + kernel_info.depth_output_gemm3d = 0; + kernel_info.reinterpret_input_as_3d = false; + kernel_info.broadcast_bias = broadcast_bias; // The output tensor will be auto-initialized within the function @@ -486,7 +508,7 @@ protected: ReshapeRHSFunctionType reshape_rhs; GEMMFunctionType gemm; reshape_rhs.configure(&rhs, &rhs_reshaped, rhs_info); - gemm.configure(&lhs, &rhs_reshaped, &bias, &dst, alpha, beta, lhs_info, rhs_info, GEMMReshapeInfo(M, N, K, 1, 1, 0, false, broadcast_bias)); + gemm.configure(&lhs, &rhs_reshaped, &bias, &dst, alpha, beta, lhs_info, rhs_info, kernel_info); ARM_COMPUTE_EXPECT(lhs.info()->is_resizable(), framework::LogLevel::ERRORS); ARM_COMPUTE_EXPECT(rhs.info()->is_resizable(), framework::LogLevel::ERRORS); @@ -606,6 +628,13 @@ protected: const unsigned int M = lhs_shape[1]; const unsigned int N = rhs_shape[0]; const unsigned int K = lhs_shape[0]; + GEMMKernelInfo kernel_info; + kernel_info.m = M; + kernel_info.n = N; + kernel_info.k = K; + kernel_info.depth_output_gemm3d = m_h; + kernel_info.reinterpret_input_as_3d = false; + kernel_info.broadcast_bias = true; // The output tensor will be auto-initialized within the function @@ -613,7 +642,7 @@ protected: ReshapeRHSFunctionType reshape_rhs; GEMMFunctionType gemm; reshape_rhs.configure(&rhs, &rhs_reshaped, rhs_info); - gemm.configure(&lhs, &rhs_reshaped, &bias, &dst, alpha, beta, lhs_info, rhs_info, GEMMReshapeInfo(M, N, K, 1, 1, m_h, false, true)); + gemm.configure(&lhs, &rhs_reshaped, &bias, &dst, alpha, beta, lhs_info, rhs_info, kernel_info); ARM_COMPUTE_EXPECT(lhs.info()->is_resizable(), framework::LogLevel::ERRORS); ARM_COMPUTE_EXPECT(rhs.info()->is_resizable(), framework::LogLevel::ERRORS); @@ -729,10 +758,17 @@ protected: const unsigned int M = lhs_shape[1]; const unsigned int N = rhs_shape[0]; const unsigned int K = lhs_shape[0]; + GEMMKernelInfo kernel_info; + kernel_info.m = M; + kernel_info.n = N; + kernel_info.k = K; + kernel_info.depth_output_gemm3d = 0; + kernel_info.reinterpret_input_as_3d = false; + kernel_info.broadcast_bias = broadcast_bias; // Create and configure function GEMMFunctionType gemm; - gemm.configure(&lhs, &rhs, &bias, &dst, alpha, beta, lhs_info, rhs_info, GEMMReshapeInfo(M, N, K, 1, 1, 0, false, broadcast_bias)); + gemm.configure(&lhs, &rhs, &bias, &dst, alpha, beta, lhs_info, rhs_info, kernel_info); ARM_COMPUTE_EXPECT(lhs.info()->is_resizable(), framework::LogLevel::ERRORS); ARM_COMPUTE_EXPECT(rhs.info()->is_resizable(), framework::LogLevel::ERRORS); @@ -843,12 +879,19 @@ protected: const unsigned int M = lhs_shape[1]; const unsigned int N = rhs_shape[0]; const unsigned int K = lhs_shape[0]; + GEMMKernelInfo kernel_info; + kernel_info.m = M; + kernel_info.n = N; + kernel_info.k = K; + kernel_info.depth_output_gemm3d = m_h; + kernel_info.reinterpret_input_as_3d = false; + kernel_info.broadcast_bias = true; // The output tensor will be auto-initialized within the function // Create and configure function GEMMFunctionType gemm; - gemm.configure(&lhs, &rhs, &bias, &dst, alpha, beta, lhs_info, rhs_info, GEMMReshapeInfo(M, N, K, 1, 1, m_h, false, true)); + gemm.configure(&lhs, &rhs, &bias, &dst, alpha, beta, lhs_info, rhs_info, kernel_info); ARM_COMPUTE_EXPECT(lhs.info()->is_resizable(), framework::LogLevel::ERRORS); ARM_COMPUTE_EXPECT(rhs.info()->is_resizable(), framework::LogLevel::ERRORS); -- cgit v1.2.1