From ad7515d231acb075a9585e52f257373b1a1b5d1f Mon Sep 17 00:00:00 2001 From: Michalis Spyrou Date: Fri, 24 Jul 2020 00:02:23 +0100 Subject: COMPMID-3385: Async support to CLArithmetic* kernels/functions Pt.1 Signed-off-by: Michalis Spyrou Change-Id: I94007565e688f8a0aead4f14c9fc30bfd9f9f7eb Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/3613 Tested-by: Arm Jenkins Reviewed-by: Georgios Pinitas --- src/runtime/CL/functions/CLPReluLayer.cpp | 83 ++++++++++++++++++++++++++++--- 1 file changed, 76 insertions(+), 7 deletions(-) (limited to 'src/runtime/CL/functions/CLPReluLayer.cpp') diff --git a/src/runtime/CL/functions/CLPReluLayer.cpp b/src/runtime/CL/functions/CLPReluLayer.cpp index b1b97381c8..fbb466acc8 100644 --- a/src/runtime/CL/functions/CLPReluLayer.cpp +++ b/src/runtime/CL/functions/CLPReluLayer.cpp @@ -24,6 +24,7 @@ #include "arm_compute/core/CL/kernels/CLElementwiseOperationKernel.h" #include "arm_compute/core/CL/ICLTensor.h" +#include "arm_compute/runtime/CL/CLScheduler.h" #include "arm_compute/runtime/CL/functions/CLPReluLayer.h" #include "support/MemorySupport.h" @@ -31,26 +32,42 @@ namespace arm_compute { namespace { -void configure_border_handler(const CLCompileContext &compile_context, CLFillBorderKernel &border_handler, BorderSize border_size, ICLTensor *input1, ICLTensor *input2, const ICLTensor *output) +void configure_border_handler(const CLCompileContext &compile_context, CLFillBorderKernel &border_handler, BorderSize border_size, ITensorInfo *input1, ITensorInfo *input2, const ITensorInfo *output) { - if(output->info()->dimension(0) > 1) + if(output->dimension(0) > 1) { - ICLTensor *broadcasted_info = (input1->info()->dimension(0) == 1) ? input1 : input2; + ITensorInfo *broadcasted_info = (input1->dimension(0) == 1) ? input1 : input2; - if(broadcasted_info->info()->dimension(0) == 1) + if(broadcasted_info->dimension(0) == 1) { border_handler.configure(compile_context, broadcasted_info, border_size, BorderMode::REPLICATE); } } } +void select_border_input(InputTensorMap &tensor_map, InputTensorMap &inputs, OutputTensorMap &outputs) +{ + if(outputs.at(TensorType::ACL_DST)->info()->dimension(0) > 1) + { + if(inputs.at(TensorType::ACL_SRC_1)->info()->dimension(0) == 1) + { + tensor_map[TensorType::ACL_SRC] = inputs.at(TensorType::ACL_SRC_1); + } + else + { + tensor_map[TensorType::ACL_SRC] = inputs.at(TensorType::ACL_SRC_0); + } + } +} } // namespace -void CLPReluLayer::configure(ICLTensor *input, ICLTensor *alpha, ICLTensor *output) +namespace experimental +{ +CLPReluLayer::CLPReluLayer() + : _border_handler() { - configure(CLKernelLibrary::get().get_compile_context(), input, alpha, output); } -void CLPReluLayer::configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *alpha, ICLTensor *output) +void CLPReluLayer::configure(const CLCompileContext &compile_context, ITensorInfo *input, ITensorInfo *alpha, ITensorInfo *output) { auto k = arm_compute::support::cpp14::make_unique(); k->configure(compile_context, ArithmeticOperation::PRELU, input, alpha, output); @@ -62,4 +79,56 @@ Status CLPReluLayer::validate(const ITensorInfo *input, const ITensorInfo *alpha { return CLArithmeticOperationKernel::validate(ArithmeticOperation::PRELU, input, alpha, output); } + +void CLPReluLayer::run(InputTensorMap inputs, OutputTensorMap outputs, OperatorTensorMap workspace) +{ + InputTensorMap src; + select_border_input(src, inputs, outputs); + CLScheduler::get().enqueue_op(_border_handler, src, {}); + ICLOperator::run(inputs, outputs, workspace); +} +} // namespace experimental + +struct CLPReluLayer::Impl +{ + const ICLTensor *src_0{ nullptr }; + const ICLTensor *src_1{ nullptr }; + ICLTensor *dst{ nullptr }; + std::unique_ptr op{ nullptr }; +}; + +CLPReluLayer::CLPReluLayer() + : _impl(support::cpp14::make_unique()) +{ +} +CLPReluLayer::CLPReluLayer(CLPReluLayer &&) = default; +CLPReluLayer &CLPReluLayer::operator=(CLPReluLayer &&) = default; +CLPReluLayer::~CLPReluLayer() = default; + +void CLPReluLayer::configure(ICLTensor *input, ICLTensor *alpha, ICLTensor *output) +{ + configure(CLKernelLibrary::get().get_compile_context(), input, alpha, output); +} + +void CLPReluLayer::configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *alpha, ICLTensor *output) +{ + _impl->src_0 = input; + _impl->src_1 = alpha; + _impl->dst = output; + _impl->op = arm_compute::support::cpp14::make_unique(); + _impl->op->configure(compile_context, input->info(), alpha->info(), output->info()); +} + +Status CLPReluLayer::validate(const ITensorInfo *input, const ITensorInfo *alpha, const ITensorInfo *output) +{ + return experimental::CLPReluLayer::validate(input, alpha, output); +} + +void CLPReluLayer::run() +{ + const InputTensorMap src{ { TensorType::ACL_SRC_0, _impl->src_0 }, { TensorType::ACL_SRC_1, _impl->src_1 } }; + const OutputTensorMap dst{ { TensorType::ACL_DST, _impl->dst } }; + + _impl->op->run(src, dst, {}); +} } // namespace arm_compute -- cgit v1.2.1