aboutsummaryrefslogtreecommitdiff
path: root/src/gpu/cl/kernels
diff options
context:
space:
mode:
authorJakub Sujak <jakub.sujak@arm.com>2023-08-24 14:01:20 +0100
committerJakub Sujak <jakub.sujak@arm.com>2023-09-04 14:41:16 +0000
commit0d27b2ee8d811d66693555ac1e7be44d93e662e2 (patch)
tree8b62a464a8bb9cd46702c8b5a60f3a97e3821b41 /src/gpu/cl/kernels
parent7ff03b67ba7ce669223f4d807e18fa3efa2f729b (diff)
downloadComputeLibrary-0d27b2ee8d811d66693555ac1e7be44d93e662e2.tar.gz
Remove legacy PostOps code
PostOps was the experimental interface for Dynamic Fusion. It is now replaced by the new Dynamic Fusion interface with code generation using the Compute Kernel Writer. Resolves: COMPMID-6190 Change-Id: I813b48facef2fd6f3aee332588886b4f9b3d33d8 Signed-off-by: Jakub Sujak <jakub.sujak@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/10219 Benchmark: Arm Jenkins <bsgcomp@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: SiCong Li <sicong.li@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/gpu/cl/kernels')
-rw-r--r--src/gpu/cl/kernels/ClGemmMatrixMultiplyNativeKernel.cpp63
-rw-r--r--src/gpu/cl/kernels/ClGemmMatrixMultiplyNativeKernel.h25
-rw-r--r--src/gpu/cl/kernels/ClGemmMatrixMultiplyReshapedKernel.cpp55
-rw-r--r--src/gpu/cl/kernels/ClGemmMatrixMultiplyReshapedKernel.h25
-rw-r--r--src/gpu/cl/kernels/ClGemmMatrixMultiplyReshapedOnlyRhsKernel.cpp56
-rw-r--r--src/gpu/cl/kernels/ClGemmMatrixMultiplyReshapedOnlyRhsKernel.h29
6 files changed, 59 insertions, 194 deletions
diff --git a/src/gpu/cl/kernels/ClGemmMatrixMultiplyNativeKernel.cpp b/src/gpu/cl/kernels/ClGemmMatrixMultiplyNativeKernel.cpp
index 5fea097ae3..b8997dfc7f 100644
--- a/src/gpu/cl/kernels/ClGemmMatrixMultiplyNativeKernel.cpp
+++ b/src/gpu/cl/kernels/ClGemmMatrixMultiplyNativeKernel.cpp
@@ -23,7 +23,6 @@
*/
#include "src/gpu/cl/kernels/ClGemmMatrixMultiplyNativeKernel.h"
-#include "arm_compute/core/utils/ActivationFunctionUtils.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
#include "arm_compute/core/CL/ICLTensor.h"
@@ -31,11 +30,11 @@
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Validate.h"
-#include "arm_compute/core/utils/misc/ShapeCalculator.h"
+#include "arm_compute/core/utils/ActivationFunctionUtils.h"
#include "arm_compute/core/utils/StringUtils.h"
+#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "src/core/AccessWindowStatic.h"
#include "src/core/CL/CLUtils.h"
-#include "src/core/experimental/PostOpUtils.h"
#include "src/core/helpers/AutoConfiguration.h"
#include "src/core/helpers/WindowHelpers.h"
#include "src/core/utils/helpers/float_ops.h"
@@ -52,25 +51,6 @@ namespace
{
using ElementsProcessed = Steps;
-const auto post_op_utils = experimental::PostOpCLKernelUtils(
-{
- // PostOp sequence -> {Kernel Postfix, PostOp Slots}
- { {}, { "", {} } },
- { { experimental::PostOpType::Activation }, { "", { 1 } } },
-
- { { experimental::PostOpType::Eltwise_Add }, { "_post_act_eltwise_op_act", { 2 } } },
- { { experimental::PostOpType::Eltwise_PRelu }, { "_post_act_eltwise_op_act", { 2 } } },
-
- { { experimental::PostOpType::Activation, experimental::PostOpType::Eltwise_Add }, { "_post_act_eltwise_op_act", { 1, 2 } } },
- { { experimental::PostOpType::Activation, experimental::PostOpType::Eltwise_PRelu }, { "_post_act_eltwise_op_act", { 1, 2 } } },
-
- { { experimental::PostOpType::Eltwise_Add, experimental::PostOpType::Activation }, { "_post_act_eltwise_op_act", { 2, 3 } } },
- { { experimental::PostOpType::Eltwise_PRelu, experimental::PostOpType::Activation }, { "_post_act_eltwise_op_act", { 2, 3 } } },
-
- { { experimental::PostOpType::Activation, experimental::PostOpType::Eltwise_Add, experimental::PostOpType::Activation }, { "_post_act_eltwise_op_act", { 1, 2, 3 } } },
- { { experimental::PostOpType::Activation, experimental::PostOpType::Eltwise_PRelu, experimental::PostOpType::Activation }, { "_post_act_eltwise_op_act", { 1, 2, 3 } } }
-});
-
Status validate_arguments(const ITensorInfo *src0, const ITensorInfo *src1, const ITensorInfo *src2, const ITensorInfo *dst, float alpha, float beta, const GEMMLHSMatrixInfo &lhs_info,
const GEMMRHSMatrixInfo &rhs_info,
const GEMMKernelInfo &gemm_info)
@@ -90,7 +70,6 @@ Status validate_arguments(const ITensorInfo *src0, const ITensorInfo *src1, cons
"Bias addition only supported with broadcast mode in case the input or dst has to be reinterpreted as 3D");
ARM_COMPUTE_RETURN_ERROR_ON_MSG(gemm_info.fp_mixed_precision, "Mixed precision not supported");
ARM_COMPUTE_RETURN_ERROR_ON_MSG(rhs_info.export_to_cl_image, "Export to CLImage not supported for GEMM native");
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(!post_op_utils.is_post_op_sequence_supported(gemm_info.post_ops), "The sequence of Post Ops is not supported");
const unsigned int m = gemm_info.m;
const unsigned int n = gemm_info.n;
@@ -133,7 +112,6 @@ Status validate_arguments(const ITensorInfo *src0, const ITensorInfo *src1, cons
const TensorInfo tensor_info_dst = dst->clone()->set_tensor_shape(misc::shape_calculator::compute_mm_shape(*src0, *src1, gemm_info));
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(dst, &tensor_info_dst);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(src0, dst);
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(!post_op_utils.are_post_op_shapes_compliant(dst, gemm_info.post_ops), "The Post Op shapes are not compliant");
}
return Status{};
@@ -240,7 +218,6 @@ void ClGemmMatrixMultiplyNativeKernel::configure(const CLCompileContext &compile
_reinterpret_output_as_3d = gemm_info.depth_output_gemm3d != 0;
_use_dummy_work_items = preferred_dummy_work_items_support(CLKernelLibrary::get().get_device());
_add_bias = src2 != nullptr;
- _num_post_op_args = gemm_info.post_ops.total_num_arguments();
// In case both input and dst have to be reinterpreted as 3D tensors,
// force reinterpret_input_as_3d and reinterpret_output_as_3d to be false.
@@ -298,20 +275,11 @@ void ClGemmMatrixMultiplyNativeKernel::configure(const CLCompileContext &compile
build_opts.add_option("-DK0=" + support::cpp11::to_string(rhs_info.k0));
build_opts.add_option("-DPARTIAL_STORE_M0=" + support::cpp11::to_string(partial_store_m0));
build_opts.add_option("-DPARTIAL_STORE_N0=" + support::cpp11::to_string(partial_store_n0));
- // If post_ops are used, then we disable the use of gemm_info.activation_info
- if(gemm_info.post_ops.size() > 0)
- {
- post_op_utils.set_post_ops_cl_build_options(build_opts, gemm_info.post_ops);
- }
- else
- {
- build_opts.add_option_if(gemm_info.activation_info.enabled(), "-DACTIVATION_TYPE=" + lower_string(string_from_activation_func(gemm_info.activation_info.activation())));
- build_opts.add_option_if(gemm_info.activation_info.enabled(), "-DA_VAL=" + float_to_string_with_full_precision(gemm_info.activation_info.a()));
- build_opts.add_option_if(gemm_info.activation_info.enabled(), "-DB_VAL=" + float_to_string_with_full_precision(gemm_info.activation_info.b()));
- }
+ build_opts.add_option_if(gemm_info.activation_info.enabled(), "-DACTIVATION_TYPE=" + lower_string(string_from_activation_func(gemm_info.activation_info.activation())));
+ build_opts.add_option_if(gemm_info.activation_info.enabled(), "-DA_VAL=" + float_to_string_with_full_precision(gemm_info.activation_info.a()));
+ build_opts.add_option_if(gemm_info.activation_info.enabled(), "-DB_VAL=" + float_to_string_with_full_precision(gemm_info.activation_info.b()));
std::string kernel_name("gemm_mm_native");
- post_op_utils.set_post_ops_cl_kernel_name(kernel_name, gemm_info.post_ops);
// A macro guard to compile ONLY the kernel of interest
build_opts.add_option("-D" + upper_string(kernel_name));
@@ -396,11 +364,11 @@ void ClGemmMatrixMultiplyNativeKernel::run_op(ITensorPack &tensors, const Window
unsigned int idx0;
if(_add_bias)
{
- idx0 = (4 + _num_post_op_args) * num_arguments_per_2D_tensor() + (7 + _num_post_op_args);
+ idx0 = 4 * num_arguments_per_2D_tensor() + 7;
}
else
{
- idx0 = (3 + _num_post_op_args) * num_arguments_per_2D_tensor() + (6 + _num_post_op_args);
+ idx0 = 3 * num_arguments_per_2D_tensor() + 6;
}
const unsigned int total_cross_plane_pad = src0->info()->padding().top + src0->info()->padding().bottom;
_kernel.setArg<cl_uint>(idx0, static_cast<unsigned int>(total_cross_plane_pad));
@@ -412,11 +380,11 @@ void ClGemmMatrixMultiplyNativeKernel::run_op(ITensorPack &tensors, const Window
unsigned int idx0;
if(_add_bias)
{
- idx0 = (4 + _num_post_op_args) * num_arguments_per_2D_tensor() + 7 + (_reinterpret_input_as_3d ? 1 : 0) + _num_post_op_args;
+ idx0 = 4 * num_arguments_per_2D_tensor() + 7 + (_reinterpret_input_as_3d ? 1 : 0);
}
else
{
- idx0 = (3 + _num_post_op_args) * num_arguments_per_2D_tensor() + 6 + (_reinterpret_input_as_3d ? 1 : 0) + _num_post_op_args;
+ idx0 = 3 * num_arguments_per_2D_tensor() + 6 + (_reinterpret_input_as_3d ? 1 : 0);
}
const unsigned int total_cross_plane_pad = dst->info()->padding().top + dst->info()->padding().bottom;
_kernel.setArg<cl_uint>(idx0, static_cast<unsigned int>(total_cross_plane_pad));
@@ -440,12 +408,7 @@ void ClGemmMatrixMultiplyNativeKernel::run_op(ITensorPack &tensors, const Window
add_2D_tensor_argument(idx, src2, slice);
}
add_2D_tensor_argument(idx, dst, slice);
- // post op argument buffers
- for(size_t i = 0; i < _num_post_op_args; ++i)
- {
- const auto post_op_arg = utils::cast::polymorphic_downcast<const ICLTensor *>(tensors.get_const_tensor(experimental::get_post_op_arg_type(i)));
- add_2D_tensor_argument(idx, post_op_arg, slice);
- }
+
_kernel.setArg<cl_uint>(idx++, static_cast<unsigned int>(src0->info()->strides_in_bytes()[2]));
_kernel.setArg<cl_uint>(idx++, static_cast<unsigned int>(src1->info()->strides_in_bytes()[2]));
if(_add_bias)
@@ -453,12 +416,6 @@ void ClGemmMatrixMultiplyNativeKernel::run_op(ITensorPack &tensors, const Window
_kernel.setArg<cl_uint>(idx++, static_cast<unsigned int>(src2->info()->strides_in_bytes()[2]));
}
_kernel.setArg<cl_uint>(idx++, static_cast<unsigned int>(dst->info()->strides_in_bytes()[2]));
- // post op argument stride_z
- for(size_t i = 0; i < _num_post_op_args; ++i)
- {
- const auto post_op_arg = utils::cast::polymorphic_downcast<const ICLTensor *>(tensors.get_const_tensor(experimental::get_post_op_arg_type(i)));
- _kernel.setArg<cl_uint>(idx++, static_cast<unsigned int>(post_op_arg->info()->strides_in_bytes()[2]));
- }
// Pass m, n and k at runtime
_kernel.setArg<cl_int>(idx++, _m);
diff --git a/src/gpu/cl/kernels/ClGemmMatrixMultiplyNativeKernel.h b/src/gpu/cl/kernels/ClGemmMatrixMultiplyNativeKernel.h
index e478df727a..80f8355932 100644
--- a/src/gpu/cl/kernels/ClGemmMatrixMultiplyNativeKernel.h
+++ b/src/gpu/cl/kernels/ClGemmMatrixMultiplyNativeKernel.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019-2021 Arm Limited.
+ * Copyright (c) 2019-2021, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -21,8 +21,8 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#ifndef ARM_COMPUTE_CL_GEMM_MATRIXMULTIPLY_NATIVE_KERNEL_H
-#define ARM_COMPUTE_CL_GEMM_MATRIXMULTIPLY_NATIVE_KERNEL_H
+#ifndef ACL_SRC_GPU_CL_KERNELS_CLGEMMMATRIXMULTIPLYNATIVEKERNEL_H
+#define ACL_SRC_GPU_CL_KERNELS_CLGEMMMATRIXMULTIPLYNATIVEKERNEL_H
#include "arm_compute/core/KernelDescriptors.h"
#include "src/core/common/Macros.h"
@@ -76,17 +76,16 @@ public:
void run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) override;
private:
- bool _slide_matrix_b{ true };
- bool _reinterpret_input_as_3d{ false };
- bool _reinterpret_output_as_3d{ false };
- bool _use_dummy_work_items{ false };
- bool _add_bias{ false };
- signed int _m{ 1 };
- signed int _n{ 1 };
- signed int _k{ 1 };
- unsigned int _num_post_op_args{ 0 }; // (EXPERIMENTAL_POST_OPS) total number of post op arguments
+ bool _slide_matrix_b{ true };
+ bool _reinterpret_input_as_3d{ false };
+ bool _reinterpret_output_as_3d{ false };
+ bool _use_dummy_work_items{ false };
+ bool _add_bias{ false };
+ signed int _m{ 1 };
+ signed int _n{ 1 };
+ signed int _k{ 1 };
};
} // namespace kernels
} // namespace opencl
} // namespace arm_compute
-#endif /* ARM_COMPUTE_CL_GEMM_MATRIXMULTIPLY_NATIVE_KERNEL_H */
+#endif // ACL_SRC_GPU_CL_KERNELS_CLGEMMMATRIXMULTIPLYNATIVEKERNEL_H
diff --git a/src/gpu/cl/kernels/ClGemmMatrixMultiplyReshapedKernel.cpp b/src/gpu/cl/kernels/ClGemmMatrixMultiplyReshapedKernel.cpp
index f14a6f1900..d72d29ea1e 100644
--- a/src/gpu/cl/kernels/ClGemmMatrixMultiplyReshapedKernel.cpp
+++ b/src/gpu/cl/kernels/ClGemmMatrixMultiplyReshapedKernel.cpp
@@ -23,7 +23,6 @@
*/
#include "src/gpu/cl/kernels/ClGemmMatrixMultiplyReshapedKernel.h"
-#include "arm_compute/core/utils/ActivationFunctionUtils.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
#include "arm_compute/core/CL/ICLTensor.h"
@@ -31,11 +30,11 @@
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Validate.h"
-#include "arm_compute/core/utils/misc/ShapeCalculator.h"
+#include "arm_compute/core/utils/ActivationFunctionUtils.h"
#include "arm_compute/core/utils/StringUtils.h"
+#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "src/core/CL/CLUtils.h"
#include "src/core/CL/CLValidate.h"
-#include "src/core/experimental/PostOpUtils.h"
#include "src/core/helpers/AutoConfiguration.h"
#include "src/core/helpers/WindowHelpers.h"
#include "src/core/utils/helpers/float_ops.h"
@@ -53,25 +52,6 @@ namespace
{
using ElementsProcessed = Steps;
-const auto post_op_utils = experimental::PostOpCLKernelUtils(
-{
- // PostOp sequence -> {Kernel Postfix, PostOp Slots}
- { {}, { "", {} } },
- { { experimental::PostOpType::Activation }, { "", { 1 } } },
-
- { { experimental::PostOpType::Eltwise_Add }, { "_post_act_eltwise_op_act", { 2 } } },
- { { experimental::PostOpType::Eltwise_PRelu }, { "_post_act_eltwise_op_act", { 2 } } },
-
- { { experimental::PostOpType::Activation, experimental::PostOpType::Eltwise_Add }, { "_post_act_eltwise_op_act", { 1, 2 } } },
- { { experimental::PostOpType::Activation, experimental::PostOpType::Eltwise_PRelu }, { "_post_act_eltwise_op_act", { 1, 2 } } },
-
- { { experimental::PostOpType::Eltwise_Add, experimental::PostOpType::Activation }, { "_post_act_eltwise_op_act", { 2, 3 } } },
- { { experimental::PostOpType::Eltwise_PRelu, experimental::PostOpType::Activation }, { "_post_act_eltwise_op_act", { 2, 3 } } },
-
- { { experimental::PostOpType::Activation, experimental::PostOpType::Eltwise_Add, experimental::PostOpType::Activation }, { "_post_act_eltwise_op_act", { 1, 2, 3 } } },
- { { experimental::PostOpType::Activation, experimental::PostOpType::Eltwise_PRelu, experimental::PostOpType::Activation }, { "_post_act_eltwise_op_act", { 1, 2, 3 } } }
-});
-
Status validate_arguments(const ITensorInfo *src0, const ITensorInfo *src1, const ITensorInfo *src2, const ITensorInfo *dst, float alpha, float beta, const GEMMLHSMatrixInfo &lhs_info,
const GEMMRHSMatrixInfo &rhs_info,
const GEMMKernelInfo &gemm_info)
@@ -95,7 +75,6 @@ Status validate_arguments(const ITensorInfo *src0, const ITensorInfo *src1, cons
"Bias addition only supported with broadcast mode in case the input or dst has to be reinterpreted as 3D");
ARM_COMPUTE_RETURN_ERROR_ON_MSG(gemm_info.fp_mixed_precision && (src0->data_type() == DataType::F32), "Mixed precision only supported for F16 data type");
ARM_COMPUTE_RETURN_ON_ERROR(gemm::validate_image2d_support_on_rhs(*src1, rhs_info));
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(!post_op_utils.is_post_op_sequence_supported(gemm_info.post_ops), "The sequence of Post Ops is not supported");
const unsigned int m = gemm_info.m;
const unsigned int n = gemm_info.n;
@@ -139,7 +118,6 @@ Status validate_arguments(const ITensorInfo *src0, const ITensorInfo *src1, cons
const TensorInfo tensor_info_dst = dst->clone()->set_tensor_shape(misc::shape_calculator::compute_mm_shape(*src0, *src1, gemm_info));
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(dst, &tensor_info_dst);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(src0, dst);
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(!post_op_utils.are_post_op_shapes_compliant(dst, gemm_info.post_ops), "The Post Op shapes are not compliant");
}
return Status{};
@@ -202,7 +180,6 @@ void ClGemmMatrixMultiplyReshapedKernel::configure(const CLCompileContext &compi
_use_dummy_work_items = preferred_dummy_work_items_support(CLKernelLibrary::get().get_device());
_add_bias = src2 != nullptr;
_export_to_cl_image = rhs_info.export_to_cl_image;
- _num_post_op_args = gemm_info.post_ops.total_num_arguments();
// Check if we need to slide the matrix B
const unsigned int num_dimensions_src0 = src0->num_dimensions();
@@ -260,23 +237,14 @@ void ClGemmMatrixMultiplyReshapedKernel::configure(const CLCompileContext &compi
build_opts.add_option("-DH0=" + support::cpp11::to_string(rhs_info.h0));
build_opts.add_option("-DPARTIAL_STORE_M0=" + support::cpp11::to_string(partial_store_m0));
build_opts.add_option("-DPARTIAL_STORE_N0=" + support::cpp11::to_string(partial_store_n0));
- // If post_ops are used, then we disable the use of gemm_info.activation_info
- if(gemm_info.post_ops.size() > 0)
- {
- post_op_utils.set_post_ops_cl_build_options(build_opts, gemm_info.post_ops);
- }
- else
- {
- build_opts.add_option_if(gemm_info.activation_info.enabled(), "-DACTIVATION_TYPE=" + lower_string(string_from_activation_func(gemm_info.activation_info.activation())));
- build_opts.add_option_if(gemm_info.activation_info.enabled(), "-DA_VAL=" + float_to_string_with_full_precision(gemm_info.activation_info.a()));
- build_opts.add_option_if(gemm_info.activation_info.enabled(), "-DB_VAL=" + float_to_string_with_full_precision(gemm_info.activation_info.b()));
- }
+ build_opts.add_option_if(gemm_info.activation_info.enabled(), "-DACTIVATION_TYPE=" + lower_string(string_from_activation_func(gemm_info.activation_info.activation())));
+ build_opts.add_option_if(gemm_info.activation_info.enabled(), "-DA_VAL=" + float_to_string_with_full_precision(gemm_info.activation_info.a()));
+ build_opts.add_option_if(gemm_info.activation_info.enabled(), "-DB_VAL=" + float_to_string_with_full_precision(gemm_info.activation_info.b()));
std::string kernel_name("gemm_mm_reshaped_");
kernel_name += lhs_info.transpose ? "lhs_t_" : "lhs_nt_";
kernel_name += rhs_info.transpose ? "rhs_t" : "rhs_nt";
kernel_name += rhs_info.export_to_cl_image ? "_texture" : "";
- post_op_utils.set_post_ops_cl_kernel_name(kernel_name, gemm_info.post_ops);
// A macro guard to compile ONLY the kernel of interest
build_opts.add_option("-D" + upper_string(kernel_name));
@@ -395,13 +363,6 @@ void ClGemmMatrixMultiplyReshapedKernel::run_op(ITensorPack &tensors, const Wind
// dst buffer
add_2D_tensor_argument(idx, dst, slice);
- // post op argument buffers
- for(size_t i = 0; i < _num_post_op_args; ++i)
- {
- const auto post_op_arg = utils::cast::polymorphic_downcast<const ICLTensor *>(tensors.get_const_tensor(experimental::get_post_op_arg_type(i)));
- add_2D_tensor_argument(idx, post_op_arg, slice);
- }
-
// LHS stride_z
_kernel.setArg<cl_uint>(idx++, static_cast<unsigned int>(src0->info()->strides_in_bytes()[2]));
@@ -417,12 +378,6 @@ void ClGemmMatrixMultiplyReshapedKernel::run_op(ITensorPack &tensors, const Wind
// dst stride_z
_kernel.setArg<cl_uint>(idx++, static_cast<unsigned int>(dst->info()->strides_in_bytes()[2]));
- // post op argument stride_z
- for(size_t i = 0; i < _num_post_op_args; ++i)
- {
- const auto post_op_arg = utils::cast::polymorphic_downcast<const ICLTensor *>(tensors.get_const_tensor(experimental::get_post_op_arg_type(i)));
- _kernel.setArg<cl_uint>(idx++, static_cast<unsigned int>(post_op_arg->info()->strides_in_bytes()[2]));
- }
// Cross-plan padding (if _reinterpret_output_as_3d = true)
if(_reinterpret_output_as_3d)
{
diff --git a/src/gpu/cl/kernels/ClGemmMatrixMultiplyReshapedKernel.h b/src/gpu/cl/kernels/ClGemmMatrixMultiplyReshapedKernel.h
index 2d668b91a3..8d25412a40 100644
--- a/src/gpu/cl/kernels/ClGemmMatrixMultiplyReshapedKernel.h
+++ b/src/gpu/cl/kernels/ClGemmMatrixMultiplyReshapedKernel.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2021 Arm Limited.
+ * Copyright (c) 2018-2021, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -21,8 +21,8 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#ifndef ARM_COMPUTE_CL_GEMM_MATRIXMULTIPLY_RESHAPED_KERNEL_H
-#define ARM_COMPUTE_CL_GEMM_MATRIXMULTIPLY_RESHAPED_KERNEL_H
+#ifndef ACL_SRC_GPU_CL_KERNELS_CLGEMMMATRIXMULTIPLYRESHAPEDKERNEL_H
+#define ACL_SRC_GPU_CL_KERNELS_CLGEMMMATRIXMULTIPLYRESHAPEDKERNEL_H
#include "src/core/common/Macros.h"
#include "src/gpu/cl/ClCompileContext.h"
@@ -100,17 +100,16 @@ public:
void run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) override;
private:
- bool _slide_matrix_b{ true };
- bool _reinterpret_output_as_3d{ false };
- bool _use_dummy_work_items{ false };
- bool _add_bias{ false };
- bool _export_to_cl_image{ false };
- signed int _m{ 1 };
- signed int _n{ 1 };
- signed int _k{ 1 };
- unsigned int _num_post_op_args{ 0 }; // (EXPERIMENTAL_POST_OPS) total number of post op arguments
+ bool _slide_matrix_b{ true };
+ bool _reinterpret_output_as_3d{ false };
+ bool _use_dummy_work_items{ false };
+ bool _add_bias{ false };
+ bool _export_to_cl_image{ false };
+ signed int _m{ 1 };
+ signed int _n{ 1 };
+ signed int _k{ 1 };
};
} // namespace kernels
} // namespace opencl
} // namespace arm_compute
-#endif /* ARM_COMPUTE_CL_GEMM_MATRIXMULTIPLY_RESHAPED_KERNEL_H */ \ No newline at end of file
+#endif // ACL_SRC_GPU_CL_KERNELS_CLGEMMMATRIXMULTIPLYRESHAPEDKERNEL_H
diff --git a/src/gpu/cl/kernels/ClGemmMatrixMultiplyReshapedOnlyRhsKernel.cpp b/src/gpu/cl/kernels/ClGemmMatrixMultiplyReshapedOnlyRhsKernel.cpp
index f780538f53..b34c17cda8 100644
--- a/src/gpu/cl/kernels/ClGemmMatrixMultiplyReshapedOnlyRhsKernel.cpp
+++ b/src/gpu/cl/kernels/ClGemmMatrixMultiplyReshapedOnlyRhsKernel.cpp
@@ -23,13 +23,12 @@
*/
#include "src/gpu/cl/kernels/ClGemmMatrixMultiplyReshapedOnlyRhsKernel.h"
-#include "arm_compute/core/utils/ActivationFunctionUtils.h"
#include "arm_compute/core/CL/ICLTensor.h"
-#include "arm_compute/core/utils/misc/ShapeCalculator.h"
+#include "arm_compute/core/utils/ActivationFunctionUtils.h"
#include "arm_compute/core/utils/StringUtils.h"
+#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "src/core/CL/CLUtils.h"
#include "src/core/CL/CLValidate.h"
-#include "src/core/experimental/PostOpUtils.h"
#include "src/core/helpers/AutoConfiguration.h"
#include "src/core/helpers/WindowHelpers.h"
#include "src/core/utils/helpers/float_ops.h"
@@ -47,25 +46,6 @@ namespace
{
using ElementsProcessed = Steps;
-const auto post_op_utils = experimental::PostOpCLKernelUtils(
-{
- // PostOp sequence -> {Kernel Postfix, PostOp Slots}
- { {}, { "", {} } },
- { { experimental::PostOpType::Activation }, { "", { 1 } } },
-
- { { experimental::PostOpType::Eltwise_Add }, { "_post_act_eltwise_op_act", { 2 } } },
- { { experimental::PostOpType::Eltwise_PRelu }, { "_post_act_eltwise_op_act", { 2 } } },
-
- { { experimental::PostOpType::Activation, experimental::PostOpType::Eltwise_Add }, { "_post_act_eltwise_op_act", { 1, 2 } } },
- { { experimental::PostOpType::Activation, experimental::PostOpType::Eltwise_PRelu }, { "_post_act_eltwise_op_act", { 1, 2 } } },
-
- { { experimental::PostOpType::Eltwise_Add, experimental::PostOpType::Activation }, { "_post_act_eltwise_op_act", { 2, 3 } } },
- { { experimental::PostOpType::Eltwise_PRelu, experimental::PostOpType::Activation }, { "_post_act_eltwise_op_act", { 2, 3 } } },
-
- { { experimental::PostOpType::Activation, experimental::PostOpType::Eltwise_Add, experimental::PostOpType::Activation }, { "_post_act_eltwise_op_act", { 1, 2, 3 } } },
- { { experimental::PostOpType::Activation, experimental::PostOpType::Eltwise_PRelu, experimental::PostOpType::Activation }, { "_post_act_eltwise_op_act", { 1, 2, 3 } } }
-});
-
Status validate_arguments(const ITensorInfo *src0, const ITensorInfo *src1, const ITensorInfo *src2, const ITensorInfo *dst, float alpha, float beta,
const GEMMLHSMatrixInfo &lhs_info, const GEMMRHSMatrixInfo &rhs_info, const GEMMKernelInfo &gemm_info)
{
@@ -86,7 +66,6 @@ Status validate_arguments(const ITensorInfo *src0, const ITensorInfo *src1, cons
"Bias addition only supported with broadcast mode in case the input or dst has to be reinterpreted as 3D");
ARM_COMPUTE_RETURN_ERROR_ON_MSG(gemm_info.fp_mixed_precision, "Mixed precision not supported");
ARM_COMPUTE_RETURN_ON_ERROR(gemm::validate_image2d_support_on_rhs(*src1, rhs_info));
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(!post_op_utils.is_post_op_sequence_supported(gemm_info.post_ops), "The sequence of Post Ops is not supported");
const unsigned int m = gemm_info.m;
const unsigned int n = gemm_info.n;
@@ -132,7 +111,6 @@ Status validate_arguments(const ITensorInfo *src0, const ITensorInfo *src1, cons
const TensorInfo tensor_info_dst = dst->clone()->set_tensor_shape(misc::shape_calculator::compute_mm_shape(*src0, *src1, gemm_info));
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(dst, &tensor_info_dst);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(src0, dst);
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(!post_op_utils.are_post_op_shapes_compliant(dst, gemm_info.post_ops), "The Post Op shapes are not compliant");
}
return Status{};
@@ -203,7 +181,6 @@ void ClGemmMatrixMultiplyReshapedOnlyRhsKernel::configure(const CLCompileContext
_add_bias = src2 != nullptr;
_export_to_cl_image = rhs_info.export_to_cl_image;
_has_pad_y = gemm_info.has_pad_y;
- _num_post_op_args = gemm_info.post_ops.total_num_arguments();
auto padding_info = get_padding_info({ src0, src1, src2, dst });
@@ -270,22 +247,14 @@ void ClGemmMatrixMultiplyReshapedOnlyRhsKernel::configure(const CLCompileContext
build_opts.add_option_if(_reinterpret_input_as_3d || _reinterpret_output_as_3d, "-DHEIGHT_GEMM3D=" + support::cpp11::to_string(h_gemm_3d));
build_opts.add_option_if(_reinterpret_input_as_3d || _reinterpret_output_as_3d, "-DDEPTH_GEMM3D=" + support::cpp11::to_string(d_gemm_3d));
}
- // If post_ops are used, then we disable the use of gemm_info.activation_info
- if(gemm_info.post_ops.size() > 0)
- {
- post_op_utils.set_post_ops_cl_build_options(build_opts, gemm_info.post_ops);
- }
- else
- {
- build_opts.add_option_if(gemm_info.activation_info.enabled(), "-DACTIVATION_TYPE=" + lower_string(string_from_activation_func(gemm_info.activation_info.activation())));
- build_opts.add_option_if(gemm_info.activation_info.enabled(), "-DA_VAL=" + float_to_string_with_full_precision(gemm_info.activation_info.a()));
- build_opts.add_option_if(gemm_info.activation_info.enabled(), "-DB_VAL=" + float_to_string_with_full_precision(gemm_info.activation_info.b()));
- }
+
+ build_opts.add_option_if(gemm_info.activation_info.enabled(), "-DACTIVATION_TYPE=" + lower_string(string_from_activation_func(gemm_info.activation_info.activation())));
+ build_opts.add_option_if(gemm_info.activation_info.enabled(), "-DA_VAL=" + float_to_string_with_full_precision(gemm_info.activation_info.a()));
+ build_opts.add_option_if(gemm_info.activation_info.enabled(), "-DB_VAL=" + float_to_string_with_full_precision(gemm_info.activation_info.b()));
std::string kernel_name("gemm_mm_reshaped_only_rhs_");
kernel_name += rhs_info.transpose ? "t" : "nt";
kernel_name += rhs_info.export_to_cl_image ? "_texture" : "";
- post_op_utils.set_post_ops_cl_kernel_name(kernel_name, gemm_info.post_ops);
// A macro guard to compile ONLY the kernel of interest
build_opts.add_option("-D" + upper_string(kernel_name));
@@ -411,13 +380,6 @@ void ClGemmMatrixMultiplyReshapedOnlyRhsKernel::run_op(ITensorPack &tensors, con
// dst buffer
add_2D_tensor_argument(idx, dst, slice);
- // post op argument buffers
- for(size_t i = 0; i < _num_post_op_args; ++i)
- {
- const auto post_op_arg = utils::cast::polymorphic_downcast<const ICLTensor *>(tensors.get_const_tensor(experimental::get_post_op_arg_type(i)));
- add_2D_tensor_argument(idx, post_op_arg, slice);
- }
-
// LHS stride_z
_kernel.setArg<cl_uint>(idx++, static_cast<unsigned int>(src0->info()->strides_in_bytes()[lhs_idx_batch_size]));
@@ -432,12 +394,6 @@ void ClGemmMatrixMultiplyReshapedOnlyRhsKernel::run_op(ITensorPack &tensors, con
// dst stride_z
_kernel.setArg<cl_uint>(idx++, static_cast<unsigned int>(dst->info()->strides_in_bytes()[out_idx_batch_size]));
- // post op argument stride_z
- for(size_t i = 0; i < _num_post_op_args; ++i)
- {
- const auto post_op_arg = utils::cast::polymorphic_downcast<const ICLTensor *>(tensors.get_const_tensor(experimental::get_post_op_arg_type(i)));
- _kernel.setArg<cl_uint>(idx++, static_cast<unsigned int>(post_op_arg->info()->strides_in_bytes()[2]));
- }
// Cross-plan padding (if _reinterpret_input_as_3d = true)
if(_reinterpret_input_as_3d && _has_pad_y)
diff --git a/src/gpu/cl/kernels/ClGemmMatrixMultiplyReshapedOnlyRhsKernel.h b/src/gpu/cl/kernels/ClGemmMatrixMultiplyReshapedOnlyRhsKernel.h
index 00cdb299ce..471160c94b 100644
--- a/src/gpu/cl/kernels/ClGemmMatrixMultiplyReshapedOnlyRhsKernel.h
+++ b/src/gpu/cl/kernels/ClGemmMatrixMultiplyReshapedOnlyRhsKernel.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019-2022 Arm Limited.
+ * Copyright (c) 2019-2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -21,8 +21,8 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#ifndef ARM_COMPUTE_CL_GEMM_MATRIXMULTIPLY_RESHAPED_ONLY_RHS_KERNEL_H
-#define ARM_COMPUTE_CL_GEMM_MATRIXMULTIPLY_RESHAPED_ONLY_RHS_KERNEL_H
+#ifndef ACL_SRC_GPU_CL_KERNELS_CLGEMMMATRIXMULTIPLYRESHAPEDONLYRHSKERNEL_H
+#define ACL_SRC_GPU_CL_KERNELS_CLGEMMMATRIXMULTIPLYRESHAPEDONLYRHSKERNEL_H
#include "src/core/common/Macros.h"
#include "src/gpu/cl/ClCompileContext.h"
@@ -90,19 +90,18 @@ public:
void run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) override;
private:
- bool _slide_matrix_b{ true };
- bool _reinterpret_input_as_3d{ false };
- bool _reinterpret_output_as_3d{ false };
- bool _use_dummy_work_items{ false };
- bool _add_bias{ false };
- bool _export_to_cl_image{ false };
- bool _has_pad_y{ false };
- signed int _m{ 1 };
- signed int _n{ 1 };
- signed int _k{ 1 };
- unsigned int _num_post_op_args{ 0 }; // (EXPERIMENTAL_POST_OPS) total number of post op arguments
+ bool _slide_matrix_b{ true };
+ bool _reinterpret_input_as_3d{ false };
+ bool _reinterpret_output_as_3d{ false };
+ bool _use_dummy_work_items{ false };
+ bool _add_bias{ false };
+ bool _export_to_cl_image{ false };
+ bool _has_pad_y{ false };
+ signed int _m{ 1 };
+ signed int _n{ 1 };
+ signed int _k{ 1 };
};
} // namespace kernels
} // namespace opencl
} // namespace arm_compute
-#endif /* ARM_COMPUTE_CL_GEMM_MATRIXMULTIPLY_RESHAPED_ONLY_RHS_KERNEL_H */
+#endif // ACL_SRC_GPU_CL_KERNELS_CLGEMMMATRIXMULTIPLYRESHAPEDONLYRHSKERNEL_H