Add Fused Activation to OpenCL MatMul

- Added fused activation to MatMul function interface - Added fused activation to CL backend - Includes tests for supported Activation Functions in MatMul Resolves: [COMPMID-6192] Signed-off-by: Mohammed Suhail Munshi <MohammedSuhail.Munshi@arm.com> Change-Id: Ie103212b600b60699eaf6a6394d609e6e1f5aba6 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/c/VisualCompute/ComputeLibrary/+/522465 Comments-Addressed: bsgcomp <bsgcomp@arm.com> Reviewed-by: Viet-Hoa Do <viet-hoa.do@arm.com> Tested-by: bsgcomp <bsgcomp@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/9714 Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Jakub Sujak <jakub.sujak@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com> Benchmark: Arm Jenkins <bsgcomp@arm.com>
author: Mohammed Suhail Munshi <MohammedSuhail.Munshi@arm.com> 2023-05-25 16:48:43 +0100
committer: Mohmun02 <MohammedSuhail.Munshi@arm.com> 2023-06-16 15:38:39 +0000
commit: 94abde4f4e98f6f1adb5c46b194527f34a8ea07d (patch)
tree: d6d717031788850d970fb44ff3f41de311cc5fc0 /src/gpu/cl/kernels/ClMatMulNativeKernel.cpp
parent: dd8d7f4102653ef55d872c71ae5d5f2ca2ead0c1 (diff)
download: ComputeLibrary-94abde4f4e98f6f1adb5c46b194527f34a8ea07d.tar.gz
1 files changed, 12 insertions, 5 deletions
diff --git a/src/gpu/cl/kernels/ClMatMulNativeKernel.cpp b/src/gpu/cl/kernels/ClMatMulNativeKernel.cpp
index 8f53c1998f..205396a639 100644
--- a/src/gpu/cl/kernels/ClMatMulNativeKernel.cpp
+++ b/src/gpu/cl/kernels/ClMatMulNativeKernel.cpp
@@ -112,7 +112,7 @@ Status validate_export_to_cl_image(const ITensorInfo *rhs, const MatMulKernelInf
         ARM_COMPUTE_RETURN_ERROR_ON_MSG(!export_to_cl_image(rhs), "Export to CLImage is not supported for this device/configuration");
     }
 
-    return Status {};
+    return Status{};
 }
 }
 ClMatMulNativeKernel::ClMatMulNativeKernel()
@@ -120,8 +120,9 @@ ClMatMulNativeKernel::ClMatMulNativeKernel()
     _type = CLKernelType::GEMM;
 }
 
-Status ClMatMulNativeKernel::validate(const ITensorInfo *lhs, const ITensorInfo *rhs, const ITensorInfo *dst, const MatMulKernelInfo &matmul_kernel_info)
+Status ClMatMulNativeKernel::validate(const ITensorInfo *lhs, const ITensorInfo *rhs, const ITensorInfo *dst, const MatMulKernelInfo &matmul_kernel_info, const ActivationLayerInfo &act_info)
 {
+    ARM_COMPUTE_UNUSED(act_info);
     ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(lhs, rhs, dst);
     ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(lhs, 1, DataType::F32, DataType::F16);
     ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(lhs, rhs);
@@ -138,7 +139,8 @@ Status ClMatMulNativeKernel::validate(const ITensorInfo *lhs, const ITensorInfo
 
     return Status{};
 }
-void ClMatMulNativeKernel::configure(const ClCompileContext &compile_context, ITensorInfo *lhs, ITensorInfo *rhs, ITensorInfo *dst, const MatMulKernelInfo &matmul_kernel_info)
+void ClMatMulNativeKernel::configure(const ClCompileContext &compile_context, ITensorInfo *lhs, ITensorInfo *rhs, ITensorInfo *dst, const MatMulKernelInfo &matmul_kernel_info,
+                                     const ActivationLayerInfo &act_info)
 {
     ARM_COMPUTE_ERROR_ON_NULLPTR(lhs, rhs, dst, &compile_context, &matmul_kernel_info);
     ARM_COMPUTE_LOG_PARAMS(lhs, rhs, dst, matmul_kernel_info);
@@ -176,6 +178,11 @@ void ClMatMulNativeKernel::configure(const ClCompileContext &compile_context, IT
     build_opts.add_option("-DK=" + support::cpp11::to_string(k));
     build_opts.add_option_if_else(_export_rhs_to_cl_image, "-DRHS_TENSOR_TYPE=IMAGE", "-DRHS_TENSOR_TYPE=BUFFER");
 
+    // Define values for activation function
+    build_opts.add_option(("-DA_VAL=" + float_to_string_with_full_precision(act_info.a())));
+    build_opts.add_option(("-DB_VAL=" + float_to_string_with_full_precision(act_info.b())));
+    build_opts.add_option("-DACTIVATION_TYPE=" + lower_string(string_from_activation_func(act_info.activation())));
+
     std::string kernel_name("mat_mul_native");
     kernel_name += matmul_kernel_info.adj_lhs ? "_t" : "_nt";
     kernel_name += matmul_kernel_info.adj_rhs ? "_t" : "_nt";
@@ -218,8 +225,8 @@ void ClMatMulNativeKernel::run_op(ITensorPack &tensors, const Window &window, cl
     ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
     ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window);
 
-    const ICLTensor *lhs    = utils::cast::polymorphic_downcast<const ICLTensor *>(tensors.get_const_tensor(TensorType::ACL_SRC_0));
-    const ICLTensor *rhs    = utils::cast::polymorphic_downcast<const ICLTensor *>(tensors.get_const_tensor(TensorType::ACL_SRC_1));
+    const ICLTensor *lhs = utils::cast::polymorphic_downcast<const ICLTensor *>(tensors.get_const_tensor(TensorType::ACL_SRC_0));
+    const ICLTensor *rhs = utils::cast::polymorphic_downcast<const ICLTensor *>(tensors.get_const_tensor(TensorType::ACL_SRC_1));
     ICLTensor       *dst = utils::cast::polymorphic_downcast<ICLTensor *>(tensors.get_tensor(TensorType::ACL_DST));
     ARM_COMPUTE_ERROR_ON_NULLPTR(lhs, rhs, dst);
     ARM_COMPUTE_LOG_PARAMS(lhs, rhs, dst);
author	Mohammed Suhail Munshi <MohammedSuhail.Munshi@arm.com>	2023-05-25 16:48:43 +0100
committer	Mohmun02 <MohammedSuhail.Munshi@arm.com>	2023-06-16 15:38:39 +0000
commit	94abde4f4e98f6f1adb5c46b194527f34a8ea07d (patch)
tree	d6d717031788850d970fb44ff3f41de311cc5fc0 /src/gpu/cl/kernels/ClMatMulNativeKernel.cpp
parent	dd8d7f4102653ef55d872c71ae5d5f2ca2ead0c1 (diff)
download	ComputeLibrary-94abde4f4e98f6f1adb5c46b194527f34a8ea07d.tar.gz