From ada6cbc057ff725e57d301a99a1816ce602485b9 Mon Sep 17 00:00:00 2001 From: Giorgio Arena Date: Fri, 16 Apr 2021 17:03:39 +0100 Subject: Remove OpenCL padding: CLPixelWiseMultiplicationKernel - Change kernel's vec_size to 16 / sizeof(output) - Change ICLKernel.cpp to handle broadcast without padding Resolve COMPMID-3913 Signed-off-by: Giorgio Arena Change-Id: I03e884b250ef5784dc109bff8cf2c96b345d119f Signed-off-by: Giorgio Arena Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/5450 Tested-by: Arm Jenkins Comments-Addressed: Arm Jenkins Reviewed-by: Michele Di Giorgio Reviewed-by: Gian Marco Iodice --- .../gpu/cl/operators/ClPixelWiseMultiplication.cpp | 65 ---------------------- .../gpu/cl/operators/ClPixelWiseMultiplication.h | 12 ---- 2 files changed, 77 deletions(-) (limited to 'src/runtime/gpu/cl/operators') diff --git a/src/runtime/gpu/cl/operators/ClPixelWiseMultiplication.cpp b/src/runtime/gpu/cl/operators/ClPixelWiseMultiplication.cpp index c4f11a4e29..137a0de6a7 100644 --- a/src/runtime/gpu/cl/operators/ClPixelWiseMultiplication.cpp +++ b/src/runtime/gpu/cl/operators/ClPixelWiseMultiplication.cpp @@ -24,7 +24,6 @@ #include "src/runtime/gpu/cl/operators/ClPixelWiseMultiplication.h" #include "arm_compute/runtime/CL/CLScheduler.h" -#include "src/core/CL/kernels/CLFillBorderKernel.h" #include "src/core/gpu/cl/ClCompileContext.h" #include "src/core/gpu/cl/kernels/ClPixelWiseMultiplicationKernel.h" @@ -32,44 +31,12 @@ namespace arm_compute { namespace opencl { -namespace -{ -ITensorPack select_border_input(ITensorPack &tensors) -{ - ITensorPack pack; - if(tensors.get_tensor(TensorType::ACL_DST)->info()->dimension(0) > 1) - { - if(tensors.get_const_tensor(TensorType::ACL_SRC_1)->info()->dimension(0) == 1) - { - pack.add_tensor(TensorType::ACL_SRC, tensors.get_const_tensor(TensorType::ACL_SRC_1)); - } - else - { - pack.add_tensor(TensorType::ACL_SRC, tensors.get_const_tensor(TensorType::ACL_SRC_0)); - } - } - return pack; -} -} // namespace - void ClPixelWiseMultiplication::configure(const CLCompileContext &compile_context, ITensorInfo *src1, ITensorInfo *src2, ITensorInfo *dst, float scale, ConvertPolicy overflow_policy, RoundingPolicy rounding_policy, const ActivationLayerInfo &act_info) { auto k = std::make_unique(); k->configure(compile_context, src1, src2, dst, scale, overflow_policy, rounding_policy, act_info); _kernel = std::move(k); - - if(dst->dimension(0) > 1) - { - ITensorInfo *broadcasted_info = (src1->dimension(0) == 1) ? src1 : src2; - - if(broadcasted_info->dimension(0) == 1) - { - auto b = std::make_unique(); - b->configure(compile_context, broadcasted_info, _kernel->border_size(), BorderMode::REPLICATE); - _border_handler = std::move(b); - } - } } Status ClPixelWiseMultiplication::validate(const ITensorInfo *src1, const ITensorInfo *src2, const ITensorInfo *dst, float scale, @@ -78,48 +45,16 @@ Status ClPixelWiseMultiplication::validate(const ITensorInfo *src1, const ITenso return kernels::ClPixelWiseMultiplicationKernel::validate(src1, src2, dst, scale, overflow_policy, rounding_policy, act_info); } -void ClPixelWiseMultiplication::run(ITensorPack &tensors) -{ - if(_border_handler) - { - auto border_pack = select_border_input(tensors); - CLScheduler::get().enqueue_op(*_border_handler, border_pack); - } - ICLOperator::run(tensors); -} - void ClComplexPixelWiseMultiplication::configure(const CLCompileContext &compile_context, ITensorInfo *src1, ITensorInfo *src2, ITensorInfo *dst, const ActivationLayerInfo &act_info) { auto k = std::make_unique(); k->configure(compile_context, src1, src2, dst, act_info); _kernel = std::move(k); - - if(dst->dimension(0) > 1) - { - ITensorInfo *broadcasted_info = (src1->dimension(0) == 1) ? src1 : src2; - - if(broadcasted_info->dimension(0) == 1) - { - auto b = std::make_unique(); - b->configure(compile_context, broadcasted_info, _kernel->border_size(), BorderMode::REPLICATE); - _border_handler = std::move(b); - } - } } Status ClComplexPixelWiseMultiplication::validate(const ITensorInfo *src1, const ITensorInfo *src2, const ITensorInfo *dst, const ActivationLayerInfo &act_info) { return kernels::ClComplexPixelWiseMultiplicationKernel::validate(src1, src2, dst, act_info); } - -void ClComplexPixelWiseMultiplication::run(ITensorPack &tensors) -{ - if(_border_handler) - { - auto border_pack = select_border_input(tensors); - CLScheduler::get().enqueue_op(*_border_handler, border_pack); - } - ICLOperator::run(tensors); -} } // namespace opencl } // namespace arm_compute \ No newline at end of file diff --git a/src/runtime/gpu/cl/operators/ClPixelWiseMultiplication.h b/src/runtime/gpu/cl/operators/ClPixelWiseMultiplication.h index e9b3e4a5ef..e1598cb870 100644 --- a/src/runtime/gpu/cl/operators/ClPixelWiseMultiplication.h +++ b/src/runtime/gpu/cl/operators/ClPixelWiseMultiplication.h @@ -99,12 +99,6 @@ public: */ static Status validate(const ITensorInfo *src1, const ITensorInfo *src2, const ITensorInfo *dst, float scale, ConvertPolicy overflow_policy, RoundingPolicy rounding_policy, const ActivationLayerInfo &act_info = ActivationLayerInfo()); - - // Inherited methods overridden: - void run(ITensorPack &tensors) override; - -private: - std::unique_ptr _border_handler{ nullptr }; }; /** Basic function to run @ref opencl::ClComplexPixelWiseMultiplication. */ @@ -132,12 +126,6 @@ public: * @param[in] act_info (Optional) Activation layer information in case of a fused activation. */ static Status validate(const ITensorInfo *src1, const ITensorInfo *src2, const ITensorInfo *dst, const ActivationLayerInfo &act_info = ActivationLayerInfo()); - - // Inherited methods overridden: - void run(ITensorPack &tensors) override; - -private: - std::unique_ptr _border_handler{ nullptr }; }; } // namespace opencl } // namespace arm_compute -- cgit v1.2.1