aboutsummaryrefslogtreecommitdiff
path: root/src/runtime
diff options
context:
space:
mode:
authorGiorgio Arena <giorgio.arena@arm.com>2021-04-16 17:03:39 +0100
committerGiorgio Arena <giorgio.arena@arm.com>2021-04-20 09:26:59 +0000
commitada6cbc057ff725e57d301a99a1816ce602485b9 (patch)
treef869994cb2b061de0bc4731d720336413b81d32a /src/runtime
parent031d6a97de79fc3ca3eb6fca1611f03aa9b5893b (diff)
downloadComputeLibrary-ada6cbc057ff725e57d301a99a1816ce602485b9.tar.gz
Remove OpenCL padding: CLPixelWiseMultiplicationKernel
- Change kernel's vec_size to 16 / sizeof(output) - Change ICLKernel.cpp to handle broadcast without padding Resolve COMPMID-3913 Signed-off-by: Giorgio Arena <giorgio.arena@arm.com> Change-Id: I03e884b250ef5784dc109bff8cf2c96b345d119f Signed-off-by: Giorgio Arena <giorgio.arena@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/5450 Tested-by: Arm Jenkins <bsgcomp@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Michele Di Giorgio <michele.digiorgio@arm.com> Reviewed-by: Gian Marco Iodice <gianmarco.iodice@arm.com>
Diffstat (limited to 'src/runtime')
-rw-r--r--src/runtime/CL/functions/CLPixelWiseMultiplication.cpp2
-rw-r--r--src/runtime/gpu/cl/operators/ClPixelWiseMultiplication.cpp65
-rw-r--r--src/runtime/gpu/cl/operators/ClPixelWiseMultiplication.h12
3 files changed, 1 insertions, 78 deletions
diff --git a/src/runtime/CL/functions/CLPixelWiseMultiplication.cpp b/src/runtime/CL/functions/CLPixelWiseMultiplication.cpp
index 5ebaf5d122..efebf2b84c 100644
--- a/src/runtime/CL/functions/CLPixelWiseMultiplication.cpp
+++ b/src/runtime/CL/functions/CLPixelWiseMultiplication.cpp
@@ -25,7 +25,7 @@
#include "arm_compute/core/CL/ICLTensor.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
-#include "src/core/CL/kernels/CLFillBorderKernel.h"
+#include "src/core/CL/ICLKernel.h"
#include "src/runtime/gpu/cl/operators/ClPixelWiseMultiplication.h"
#include <utility>
diff --git a/src/runtime/gpu/cl/operators/ClPixelWiseMultiplication.cpp b/src/runtime/gpu/cl/operators/ClPixelWiseMultiplication.cpp
index c4f11a4e29..137a0de6a7 100644
--- a/src/runtime/gpu/cl/operators/ClPixelWiseMultiplication.cpp
+++ b/src/runtime/gpu/cl/operators/ClPixelWiseMultiplication.cpp
@@ -24,7 +24,6 @@
#include "src/runtime/gpu/cl/operators/ClPixelWiseMultiplication.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
-#include "src/core/CL/kernels/CLFillBorderKernel.h"
#include "src/core/gpu/cl/ClCompileContext.h"
#include "src/core/gpu/cl/kernels/ClPixelWiseMultiplicationKernel.h"
@@ -32,44 +31,12 @@ namespace arm_compute
{
namespace opencl
{
-namespace
-{
-ITensorPack select_border_input(ITensorPack &tensors)
-{
- ITensorPack pack;
- if(tensors.get_tensor(TensorType::ACL_DST)->info()->dimension(0) > 1)
- {
- if(tensors.get_const_tensor(TensorType::ACL_SRC_1)->info()->dimension(0) == 1)
- {
- pack.add_tensor(TensorType::ACL_SRC, tensors.get_const_tensor(TensorType::ACL_SRC_1));
- }
- else
- {
- pack.add_tensor(TensorType::ACL_SRC, tensors.get_const_tensor(TensorType::ACL_SRC_0));
- }
- }
- return pack;
-}
-} // namespace
-
void ClPixelWiseMultiplication::configure(const CLCompileContext &compile_context, ITensorInfo *src1, ITensorInfo *src2, ITensorInfo *dst, float scale,
ConvertPolicy overflow_policy, RoundingPolicy rounding_policy, const ActivationLayerInfo &act_info)
{
auto k = std::make_unique<kernels::ClPixelWiseMultiplicationKernel>();
k->configure(compile_context, src1, src2, dst, scale, overflow_policy, rounding_policy, act_info);
_kernel = std::move(k);
-
- if(dst->dimension(0) > 1)
- {
- ITensorInfo *broadcasted_info = (src1->dimension(0) == 1) ? src1 : src2;
-
- if(broadcasted_info->dimension(0) == 1)
- {
- auto b = std::make_unique<CLFillBorderKernel>();
- b->configure(compile_context, broadcasted_info, _kernel->border_size(), BorderMode::REPLICATE);
- _border_handler = std::move(b);
- }
- }
}
Status ClPixelWiseMultiplication::validate(const ITensorInfo *src1, const ITensorInfo *src2, const ITensorInfo *dst, float scale,
@@ -78,48 +45,16 @@ Status ClPixelWiseMultiplication::validate(const ITensorInfo *src1, const ITenso
return kernels::ClPixelWiseMultiplicationKernel::validate(src1, src2, dst, scale, overflow_policy, rounding_policy, act_info);
}
-void ClPixelWiseMultiplication::run(ITensorPack &tensors)
-{
- if(_border_handler)
- {
- auto border_pack = select_border_input(tensors);
- CLScheduler::get().enqueue_op(*_border_handler, border_pack);
- }
- ICLOperator::run(tensors);
-}
-
void ClComplexPixelWiseMultiplication::configure(const CLCompileContext &compile_context, ITensorInfo *src1, ITensorInfo *src2, ITensorInfo *dst, const ActivationLayerInfo &act_info)
{
auto k = std::make_unique<kernels::ClComplexPixelWiseMultiplicationKernel>();
k->configure(compile_context, src1, src2, dst, act_info);
_kernel = std::move(k);
-
- if(dst->dimension(0) > 1)
- {
- ITensorInfo *broadcasted_info = (src1->dimension(0) == 1) ? src1 : src2;
-
- if(broadcasted_info->dimension(0) == 1)
- {
- auto b = std::make_unique<CLFillBorderKernel>();
- b->configure(compile_context, broadcasted_info, _kernel->border_size(), BorderMode::REPLICATE);
- _border_handler = std::move(b);
- }
- }
}
Status ClComplexPixelWiseMultiplication::validate(const ITensorInfo *src1, const ITensorInfo *src2, const ITensorInfo *dst, const ActivationLayerInfo &act_info)
{
return kernels::ClComplexPixelWiseMultiplicationKernel::validate(src1, src2, dst, act_info);
}
-
-void ClComplexPixelWiseMultiplication::run(ITensorPack &tensors)
-{
- if(_border_handler)
- {
- auto border_pack = select_border_input(tensors);
- CLScheduler::get().enqueue_op(*_border_handler, border_pack);
- }
- ICLOperator::run(tensors);
-}
} // namespace opencl
} // namespace arm_compute \ No newline at end of file
diff --git a/src/runtime/gpu/cl/operators/ClPixelWiseMultiplication.h b/src/runtime/gpu/cl/operators/ClPixelWiseMultiplication.h
index e9b3e4a5ef..e1598cb870 100644
--- a/src/runtime/gpu/cl/operators/ClPixelWiseMultiplication.h
+++ b/src/runtime/gpu/cl/operators/ClPixelWiseMultiplication.h
@@ -99,12 +99,6 @@ public:
*/
static Status validate(const ITensorInfo *src1, const ITensorInfo *src2, const ITensorInfo *dst, float scale,
ConvertPolicy overflow_policy, RoundingPolicy rounding_policy, const ActivationLayerInfo &act_info = ActivationLayerInfo());
-
- // Inherited methods overridden:
- void run(ITensorPack &tensors) override;
-
-private:
- std::unique_ptr<ICLKernel> _border_handler{ nullptr };
};
/** Basic function to run @ref opencl::ClComplexPixelWiseMultiplication. */
@@ -132,12 +126,6 @@ public:
* @param[in] act_info (Optional) Activation layer information in case of a fused activation.
*/
static Status validate(const ITensorInfo *src1, const ITensorInfo *src2, const ITensorInfo *dst, const ActivationLayerInfo &act_info = ActivationLayerInfo());
-
- // Inherited methods overridden:
- void run(ITensorPack &tensors) override;
-
-private:
- std::unique_ptr<ICLKernel> _border_handler{ nullptr };
};
} // namespace opencl
} // namespace arm_compute