From ad7515d231acb075a9585e52f257373b1a1b5d1f Mon Sep 17 00:00:00 2001 From: Michalis Spyrou Date: Fri, 24 Jul 2020 00:02:23 +0100 Subject: COMPMID-3385: Async support to CLArithmetic* kernels/functions Pt.1 Signed-off-by: Michalis Spyrou Change-Id: I94007565e688f8a0aead4f14c9fc30bfd9f9f7eb Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/3613 Tested-by: Arm Jenkins Reviewed-by: Georgios Pinitas --- .../CL/functions/CLElementwiseOperations.cpp | 428 +++++++++++++++++++-- 1 file changed, 403 insertions(+), 25 deletions(-) (limited to 'src/runtime/CL/functions/CLElementwiseOperations.cpp') diff --git a/src/runtime/CL/functions/CLElementwiseOperations.cpp b/src/runtime/CL/functions/CLElementwiseOperations.cpp index 670e605b4d..e66e4bf526 100644 --- a/src/runtime/CL/functions/CLElementwiseOperations.cpp +++ b/src/runtime/CL/functions/CLElementwiseOperations.cpp @@ -25,6 +25,7 @@ #include "arm_compute/core/CL/ICLTensor.h" #include "arm_compute/core/CL/kernels/CLElementwiseOperationKernel.h" +#include "arm_compute/runtime/CL/CLScheduler.h" #include "support/MemorySupport.h" #include @@ -33,26 +34,43 @@ namespace arm_compute { namespace { -void configure_border_handler(const CLCompileContext &compile_context, CLFillBorderKernel &border_handler, BorderSize border_size, ICLTensor *input1, ICLTensor *input2, const ICLTensor *output) +void configure_border_handler(const CLCompileContext &compile_context, CLFillBorderKernel &border_handler, BorderSize border_size, ITensorInfo *input1, ITensorInfo *input2, const ITensorInfo *output) { - if(output->info()->dimension(0) > 1) + if(output->dimension(0) > 1) { - ICLTensor *broadcasted_info = (input1->info()->dimension(0) == 1) ? input1 : input2; + ITensorInfo *broadcasted_info = (input1->dimension(0) == 1) ? input1 : input2; - if(broadcasted_info->info()->dimension(0) == 1) + if(broadcasted_info->dimension(0) == 1) { border_handler.configure(compile_context, broadcasted_info, border_size, BorderMode::REPLICATE); } } } + +void select_border_input(InputTensorMap &tensor_map, InputTensorMap &inputs, OutputTensorMap &outputs) +{ + if(outputs.at(TensorType::ACL_DST)->info()->dimension(0) > 1) + { + if(inputs.at(TensorType::ACL_SRC_1)->info()->dimension(0) == 1) + { + tensor_map[TensorType::ACL_SRC] = inputs.at(TensorType::ACL_SRC_1); + } + else + { + tensor_map[TensorType::ACL_SRC] = inputs.at(TensorType::ACL_SRC_0); + } + } +} } // namespace -void CLArithmeticAddition::configure(ICLTensor *input1, ICLTensor *input2, ICLTensor *output, ConvertPolicy policy, const ActivationLayerInfo &act_info) +namespace experimental +{ +CLArithmeticAddition::CLArithmeticAddition() + : _border_handler() { - configure(CLKernelLibrary::get().get_compile_context(), input1, input2, output, policy, act_info); } -void CLArithmeticAddition::configure(const CLCompileContext &compile_context, ICLTensor *input1, ICLTensor *input2, ICLTensor *output, ConvertPolicy policy, const ActivationLayerInfo &act_info) +void CLArithmeticAddition::configure(const CLCompileContext &compile_context, ITensorInfo *input1, ITensorInfo *input2, ITensorInfo *output, ConvertPolicy policy, const ActivationLayerInfo &act_info) { auto k = arm_compute::support::cpp14::make_unique(); k->configure(compile_context, ArithmeticOperation::ADD, input1, input2, output, policy, act_info); @@ -65,12 +83,20 @@ Status CLArithmeticAddition::validate(const ITensorInfo *input1, const ITensorIn return CLSaturatedArithmeticOperationKernel::validate(ArithmeticOperation::ADD, input1, input2, output, policy, act_info); } -void CLArithmeticSubtraction::configure(ICLTensor *input1, ICLTensor *input2, ICLTensor *output, ConvertPolicy policy, const ActivationLayerInfo &act_info) +void CLArithmeticAddition::run(InputTensorMap inputs, OutputTensorMap outputs, OperatorTensorMap workspace) { - configure(CLKernelLibrary::get().get_compile_context(), input1, input2, output, policy, act_info); + InputTensorMap src; + select_border_input(src, inputs, outputs); + CLScheduler::get().enqueue_op(_border_handler, src, {}); + ICLOperator::run(inputs, outputs, workspace); } -void CLArithmeticSubtraction::configure(const CLCompileContext &compile_context, ICLTensor *input1, ICLTensor *input2, ICLTensor *output, ConvertPolicy policy, const ActivationLayerInfo &act_info) +CLArithmeticSubtraction::CLArithmeticSubtraction() + : _border_handler() +{ +} +void CLArithmeticSubtraction::configure(const CLCompileContext &compile_context, ITensorInfo *input1, ITensorInfo *input2, ITensorInfo *output, ConvertPolicy policy, + const ActivationLayerInfo &act_info) { auto k = arm_compute::support::cpp14::make_unique(); k->configure(compile_context, ArithmeticOperation::SUB, input1, input2, output, policy, act_info); @@ -84,12 +110,20 @@ Status CLArithmeticSubtraction::validate(const ITensorInfo *input1, const ITenso return CLSaturatedArithmeticOperationKernel::validate(ArithmeticOperation::SUB, input1, input2, output, policy, act_info); } -void CLArithmeticDivision::configure(ICLTensor *input1, ICLTensor *input2, ICLTensor *output, const ActivationLayerInfo &act_info) +void CLArithmeticSubtraction::run(InputTensorMap inputs, OutputTensorMap outputs, OperatorTensorMap workspace) +{ + InputTensorMap src; + select_border_input(src, inputs, outputs); + CLScheduler::get().enqueue_op(_border_handler, src, {}); + ICLOperator::run(inputs, outputs, workspace); +} + +CLArithmeticDivision::CLArithmeticDivision() + : _border_handler() { - configure(CLKernelLibrary::get().get_compile_context(), input1, input2, output, act_info); } -void CLArithmeticDivision::configure(const CLCompileContext &compile_context, ICLTensor *input1, ICLTensor *input2, ICLTensor *output, const ActivationLayerInfo &act_info) +void CLArithmeticDivision::configure(const CLCompileContext &compile_context, ITensorInfo *input1, ITensorInfo *input2, ITensorInfo *output, const ActivationLayerInfo &act_info) { auto k = arm_compute::support::cpp14::make_unique(); k->configure(compile_context, ArithmeticOperation::DIV, input1, input2, output, act_info); @@ -102,12 +136,20 @@ Status CLArithmeticDivision::validate(const ITensorInfo *input1, const ITensorIn return CLArithmeticOperationKernel::validate(ArithmeticOperation::DIV, input1, input2, output, act_info); } -void CLElementwiseMax::configure(ICLTensor *input1, ICLTensor *input2, ICLTensor *output, const ActivationLayerInfo &act_info) +void CLArithmeticDivision::run(InputTensorMap inputs, OutputTensorMap outputs, OperatorTensorMap workspace) { - configure(CLKernelLibrary::get().get_compile_context(), input1, input2, output, act_info); + InputTensorMap src; + select_border_input(src, inputs, outputs); + CLScheduler::get().enqueue_op(_border_handler, src, {}); + ICLOperator::run(inputs, outputs, workspace); } -void CLElementwiseMax::configure(const CLCompileContext &compile_context, ICLTensor *input1, ICLTensor *input2, ICLTensor *output, const ActivationLayerInfo &act_info) +CLElementwiseMax::CLElementwiseMax() + : _border_handler() +{ +} + +void CLElementwiseMax::configure(const CLCompileContext &compile_context, ITensorInfo *input1, ITensorInfo *input2, ITensorInfo *output, const ActivationLayerInfo &act_info) { auto k = arm_compute::support::cpp14::make_unique(); k->configure(compile_context, ArithmeticOperation::MAX, input1, input2, output, act_info); @@ -120,12 +162,20 @@ Status CLElementwiseMax::validate(const ITensorInfo *input1, const ITensorInfo * return CLArithmeticOperationKernel::validate(ArithmeticOperation::MAX, input1, input2, output, act_info); } -void CLElementwiseMin::configure(ICLTensor *input1, ICLTensor *input2, ICLTensor *output, const ActivationLayerInfo &act_info) +void CLElementwiseMax::run(InputTensorMap inputs, OutputTensorMap outputs, OperatorTensorMap workspace) { - configure(CLKernelLibrary::get().get_compile_context(), input1, input2, output, act_info); + InputTensorMap src; + select_border_input(src, inputs, outputs); + CLScheduler::get().enqueue_op(_border_handler, src, {}); + ICLOperator::run(inputs, outputs, workspace); } -void CLElementwiseMin::configure(const CLCompileContext &compile_context, ICLTensor *input1, ICLTensor *input2, ICLTensor *output, const ActivationLayerInfo &act_info) +CLElementwiseMin::CLElementwiseMin() + : _border_handler() +{ +} + +void CLElementwiseMin::configure(const CLCompileContext &compile_context, ITensorInfo *input1, ITensorInfo *input2, ITensorInfo *output, const ActivationLayerInfo &act_info) { auto k = arm_compute::support::cpp14::make_unique(); k->configure(compile_context, ArithmeticOperation::MIN, input1, input2, output, act_info); @@ -138,12 +188,20 @@ Status CLElementwiseMin::validate(const ITensorInfo *input1, const ITensorInfo * return CLArithmeticOperationKernel::validate(ArithmeticOperation::MIN, input1, input2, output, act_info); } -void CLElementwiseSquaredDiff::configure(ICLTensor *input1, ICLTensor *input2, ICLTensor *output, const ActivationLayerInfo &act_info) +void CLElementwiseMin::run(InputTensorMap inputs, OutputTensorMap outputs, OperatorTensorMap workspace) { - configure(CLKernelLibrary::get().get_compile_context(), input1, input2, output, act_info); + InputTensorMap src; + select_border_input(src, inputs, outputs); + CLScheduler::get().enqueue_op(_border_handler, src, {}); + ICLOperator::run(inputs, outputs, workspace); } -void CLElementwiseSquaredDiff::configure(const CLCompileContext &compile_context, ICLTensor *input1, ICLTensor *input2, ICLTensor *output, const ActivationLayerInfo &act_info) +CLElementwiseSquaredDiff::CLElementwiseSquaredDiff() + : _border_handler() +{ +} + +void CLElementwiseSquaredDiff::configure(const CLCompileContext &compile_context, ITensorInfo *input1, ITensorInfo *input2, ITensorInfo *output, const ActivationLayerInfo &act_info) { auto k = arm_compute::support::cpp14::make_unique(); k->configure(compile_context, ArithmeticOperation::SQUARED_DIFF, input1, input2, output, act_info); @@ -156,12 +214,20 @@ Status CLElementwiseSquaredDiff::validate(const ITensorInfo *input1, const ITens return CLArithmeticOperationKernel::validate(ArithmeticOperation::SQUARED_DIFF, input1, input2, output, act_info); } -void CLElementwisePower::configure(ICLTensor *input1, ICLTensor *input2, ICLTensor *output, const ActivationLayerInfo &act_info) +void CLElementwiseSquaredDiff::run(InputTensorMap inputs, OutputTensorMap outputs, OperatorTensorMap workspace) { - configure(CLKernelLibrary::get().get_compile_context(), input1, input2, output, act_info); + InputTensorMap src; + select_border_input(src, inputs, outputs); + CLScheduler::get().enqueue_op(_border_handler, src, {}); + ICLOperator::run(inputs, outputs, workspace); } -void CLElementwisePower::configure(const CLCompileContext &compile_context, ICLTensor *input1, ICLTensor *input2, ICLTensor *output, const ActivationLayerInfo &act_info) +CLElementwisePower::CLElementwisePower() + : _border_handler() +{ +} + +void CLElementwisePower::configure(const CLCompileContext &compile_context, ITensorInfo *input1, ITensorInfo *input2, ITensorInfo *output, const ActivationLayerInfo &act_info) { auto k = arm_compute::support::cpp14::make_unique(); k->configure(compile_context, ArithmeticOperation::POWER, input1, input2, output, act_info); @@ -174,4 +240,316 @@ Status CLElementwisePower::validate(const ITensorInfo *input1, const ITensorInfo return CLArithmeticOperationKernel::validate(ArithmeticOperation::POWER, input1, input2, output, act_info); } +void CLElementwisePower::run(InputTensorMap inputs, OutputTensorMap outputs, OperatorTensorMap workspace) +{ + InputTensorMap src; + select_border_input(src, inputs, outputs); + CLScheduler::get().enqueue_op(_border_handler, src, {}); + ICLOperator::run(inputs, outputs, workspace); +} +} // namespace experimental + +struct CLArithmeticAddition::Impl +{ + const ICLTensor *src_0{ nullptr }; + const ICLTensor *src_1{ nullptr }; + ICLTensor *dst{ nullptr }; + std::unique_ptr op{ nullptr }; +}; + +CLArithmeticAddition::CLArithmeticAddition() + : _impl(support::cpp14::make_unique()) +{ +} +CLArithmeticAddition::CLArithmeticAddition(CLArithmeticAddition &&) = default; +CLArithmeticAddition &CLArithmeticAddition::operator=(CLArithmeticAddition &&) = default; +CLArithmeticAddition::~CLArithmeticAddition() = default; + +void CLArithmeticAddition::configure(ICLTensor *input1, ICLTensor *input2, ICLTensor *output, ConvertPolicy policy, const ActivationLayerInfo &act_info) +{ + configure(CLKernelLibrary::get().get_compile_context(), input1, input2, output, policy, act_info); +} + +void CLArithmeticAddition::configure(const CLCompileContext &compile_context, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, ConvertPolicy policy, + const ActivationLayerInfo &act_info) +{ + _impl->src_0 = input1; + _impl->src_1 = input2; + _impl->dst = output; + _impl->op = arm_compute::support::cpp14::make_unique(); + _impl->op->configure(compile_context, input1->info(), input2->info(), output->info(), policy, act_info); +} + +Status CLArithmeticAddition::validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, ConvertPolicy policy, const ActivationLayerInfo &act_info) +{ + return experimental::CLArithmeticAddition::validate(input1, input2, output, policy, act_info); +} + +void CLArithmeticAddition::run() +{ + const InputTensorMap src{ { TensorType::ACL_SRC_0, _impl->src_0 }, { TensorType::ACL_SRC_1, _impl->src_1 } }; + const OutputTensorMap dst{ { TensorType::ACL_DST, _impl->dst } }; + + _impl->op->run(src, dst, {}); +} + +struct CLArithmeticSubtraction::Impl +{ + const ICLTensor *src_0{ nullptr }; + const ICLTensor *src_1{ nullptr }; + ICLTensor *dst{ nullptr }; + std::unique_ptr op{ nullptr }; +}; + +CLArithmeticSubtraction::CLArithmeticSubtraction() + : _impl(support::cpp14::make_unique()) +{ +} +CLArithmeticSubtraction::CLArithmeticSubtraction(CLArithmeticSubtraction &&) = default; +CLArithmeticSubtraction &CLArithmeticSubtraction::operator=(CLArithmeticSubtraction &&) = default; +CLArithmeticSubtraction::~CLArithmeticSubtraction() = default; + +void CLArithmeticSubtraction::configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, ConvertPolicy policy, const ActivationLayerInfo &act_info) +{ + configure(CLKernelLibrary::get().get_compile_context(), input1, input2, output, policy, act_info); +} + +void CLArithmeticSubtraction::configure(const CLCompileContext &compile_context, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, ConvertPolicy policy, + const ActivationLayerInfo &act_info) +{ + _impl->src_0 = input1; + _impl->src_1 = input2; + _impl->dst = output; + _impl->op = arm_compute::support::cpp14::make_unique(); + _impl->op->configure(compile_context, input1->info(), input2->info(), output->info(), policy, act_info); +} + +Status CLArithmeticSubtraction::validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, ConvertPolicy policy, const ActivationLayerInfo &act_info) +{ + return experimental::CLArithmeticSubtraction::validate(input1, input2, output, policy, act_info); +} + +void CLArithmeticSubtraction::run() +{ + const InputTensorMap src{ { TensorType::ACL_SRC_0, _impl->src_0 }, { TensorType::ACL_SRC_1, _impl->src_1 } }; + const OutputTensorMap dst{ { TensorType::ACL_DST, _impl->dst } }; + + _impl->op->run(src, dst, {}); +} + +struct CLArithmeticDivision::Impl +{ + const ICLTensor *src_0{ nullptr }; + const ICLTensor *src_1{ nullptr }; + ICLTensor *dst{ nullptr }; + std::unique_ptr op{ nullptr }; +}; + +CLArithmeticDivision::CLArithmeticDivision() + : _impl(support::cpp14::make_unique()) +{ +} +CLArithmeticDivision::CLArithmeticDivision(CLArithmeticDivision &&) = default; +CLArithmeticDivision &CLArithmeticDivision::operator=(CLArithmeticDivision &&) = default; +CLArithmeticDivision::~CLArithmeticDivision() = default; + +void CLArithmeticDivision::configure(ICLTensor *input1, ICLTensor *input2, ICLTensor *output, const ActivationLayerInfo &act_info) +{ + configure(CLKernelLibrary::get().get_compile_context(), input1, input2, output, act_info); +} + +void CLArithmeticDivision::configure(const CLCompileContext &compile_context, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, const ActivationLayerInfo &act_info) +{ + _impl->src_0 = input1; + _impl->src_1 = input2; + _impl->dst = output; + _impl->op = arm_compute::support::cpp14::make_unique(); + _impl->op->configure(compile_context, input1->info(), input2->info(), output->info(), act_info); +} + +Status CLArithmeticDivision::validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, const ActivationLayerInfo &act_info) +{ + return experimental::CLArithmeticDivision::validate(input1, input2, output, act_info); +} + +void CLArithmeticDivision::run() +{ + const InputTensorMap src{ { TensorType::ACL_SRC_0, _impl->src_0 }, { TensorType::ACL_SRC_1, _impl->src_1 } }; + const OutputTensorMap dst{ { TensorType::ACL_DST, _impl->dst } }; + + _impl->op->run(src, dst, {}); +} + +struct CLElementwiseMax::Impl +{ + const ICLTensor *src_0{ nullptr }; + const ICLTensor *src_1{ nullptr }; + ICLTensor *dst{ nullptr }; + std::unique_ptr op{ nullptr }; +}; + +CLElementwiseMax::CLElementwiseMax() + : _impl(support::cpp14::make_unique()) +{ +} +CLElementwiseMax::CLElementwiseMax(CLElementwiseMax &&) = default; +CLElementwiseMax &CLElementwiseMax::operator=(CLElementwiseMax &&) = default; +CLElementwiseMax::~CLElementwiseMax() = default; + +void CLElementwiseMax::configure(ICLTensor *input1, ICLTensor *input2, ICLTensor *output, const ActivationLayerInfo &act_info) +{ + configure(CLKernelLibrary::get().get_compile_context(), input1, input2, output, act_info); +} + +void CLElementwiseMax::configure(const CLCompileContext &compile_context, ICLTensor *input1, ICLTensor *input2, ICLTensor *output, const ActivationLayerInfo &act_info) +{ + _impl->src_0 = input1; + _impl->src_1 = input2; + _impl->dst = output; + _impl->op = arm_compute::support::cpp14::make_unique(); + _impl->op->configure(compile_context, input1->info(), input2->info(), output->info(), act_info); +} + +Status CLElementwiseMax::validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, const ActivationLayerInfo &act_info) +{ + return experimental::CLElementwiseMax::validate(input1, input2, output, act_info); +} + +void CLElementwiseMax::run() +{ + const InputTensorMap src{ { TensorType::ACL_SRC_0, _impl->src_0 }, { TensorType::ACL_SRC_1, _impl->src_1 } }; + const OutputTensorMap dst{ { TensorType::ACL_DST, _impl->dst } }; + + _impl->op->run(src, dst, {}); +} + +struct CLElementwiseMin::Impl +{ + const ICLTensor *src_0{ nullptr }; + const ICLTensor *src_1{ nullptr }; + ICLTensor *dst{ nullptr }; + std::unique_ptr op{ nullptr }; +}; + +CLElementwiseMin::CLElementwiseMin() + : _impl(support::cpp14::make_unique()) +{ +} +CLElementwiseMin::CLElementwiseMin(CLElementwiseMin &&) = default; +CLElementwiseMin &CLElementwiseMin::operator=(CLElementwiseMin &&) = default; +CLElementwiseMin::~CLElementwiseMin() = default; + +void CLElementwiseMin::configure(ICLTensor *input1, ICLTensor *input2, ICLTensor *output, const ActivationLayerInfo &act_info) +{ + configure(CLKernelLibrary::get().get_compile_context(), input1, input2, output, act_info); +} + +void CLElementwiseMin::configure(const CLCompileContext &compile_context, ICLTensor *input1, ICLTensor *input2, ICLTensor *output, const ActivationLayerInfo &act_info) +{ + _impl->src_0 = input1; + _impl->src_1 = input2; + _impl->dst = output; + _impl->op = arm_compute::support::cpp14::make_unique(); + _impl->op->configure(compile_context, input1->info(), input2->info(), output->info(), act_info); +} + +Status CLElementwiseMin::validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, const ActivationLayerInfo &act_info) +{ + return experimental::CLElementwiseMin::validate(input1, input2, output, act_info); +} + +void CLElementwiseMin::run() +{ + const InputTensorMap src{ { TensorType::ACL_SRC_0, _impl->src_0 }, { TensorType::ACL_SRC_1, _impl->src_1 } }; + const OutputTensorMap dst{ { TensorType::ACL_DST, _impl->dst } }; + + _impl->op->run(src, dst, {}); +} + +struct CLElementwiseSquaredDiff::Impl +{ + const ICLTensor *src_0{ nullptr }; + const ICLTensor *src_1{ nullptr }; + ICLTensor *dst{ nullptr }; + std::unique_ptr op{ nullptr }; + std::unique_ptr _border_handler{ nullptr }; +}; + +CLElementwiseSquaredDiff::CLElementwiseSquaredDiff() + : _impl(support::cpp14::make_unique()) +{ +} +CLElementwiseSquaredDiff::CLElementwiseSquaredDiff(CLElementwiseSquaredDiff &&) = default; +CLElementwiseSquaredDiff &CLElementwiseSquaredDiff::operator=(CLElementwiseSquaredDiff &&) = default; +CLElementwiseSquaredDiff::~CLElementwiseSquaredDiff() = default; + +void CLElementwiseSquaredDiff::configure(ICLTensor *input1, ICLTensor *input2, ICLTensor *output, const ActivationLayerInfo &act_info) +{ + configure(CLKernelLibrary::get().get_compile_context(), input1, input2, output, act_info); +} + +void CLElementwiseSquaredDiff::configure(const CLCompileContext &compile_context, ICLTensor *input1, ICLTensor *input2, ICLTensor *output, const ActivationLayerInfo &act_info) +{ + _impl->src_0 = input1; + _impl->src_1 = input2; + _impl->dst = output; + _impl->op = arm_compute::support::cpp14::make_unique(); + _impl->op->configure(compile_context, input1->info(), input2->info(), output->info(), act_info); +} + +Status CLElementwiseSquaredDiff::validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, const ActivationLayerInfo &act_info) +{ + return experimental::CLElementwiseSquaredDiff::validate(input1, input2, output, act_info); +} + +void CLElementwiseSquaredDiff::run() +{ + const InputTensorMap src{ { TensorType::ACL_SRC_0, _impl->src_0 }, { TensorType::ACL_SRC_1, _impl->src_1 } }; + const OutputTensorMap dst{ { TensorType::ACL_DST, _impl->dst } }; + + _impl->op->run(src, dst, {}); +} + +struct CLElementwisePower::Impl +{ + const ICLTensor *src_0{ nullptr }; + const ICLTensor *src_1{ nullptr }; + ICLTensor *dst{ nullptr }; + std::unique_ptr op{ nullptr }; +}; + +CLElementwisePower::CLElementwisePower() + : _impl(support::cpp14::make_unique()) +{ +} +CLElementwisePower::CLElementwisePower(CLElementwisePower &&) = default; +CLElementwisePower &CLElementwisePower::operator=(CLElementwisePower &&) = default; +CLElementwisePower::~CLElementwisePower() = default; + +void CLElementwisePower::configure(ICLTensor *input1, ICLTensor *input2, ICLTensor *output, const ActivationLayerInfo &act_info) +{ + configure(CLKernelLibrary::get().get_compile_context(), input1, input2, output, act_info); +} + +void CLElementwisePower::configure(const CLCompileContext &compile_context, ICLTensor *input1, ICLTensor *input2, ICLTensor *output, const ActivationLayerInfo &act_info) +{ + _impl->src_0 = input1; + _impl->src_1 = input2; + _impl->dst = output; + _impl->op = arm_compute::support::cpp14::make_unique(); + _impl->op->configure(compile_context, input1->info(), input2->info(), output->info(), act_info); +} + +Status CLElementwisePower::validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, const ActivationLayerInfo &act_info) +{ + return experimental::CLElementwisePower::validate(input1, input2, output, act_info); +} + +void CLElementwisePower::run() +{ + const InputTensorMap src{ { TensorType::ACL_SRC_0, _impl->src_0 }, { TensorType::ACL_SRC_1, _impl->src_1 } }; + const OutputTensorMap dst{ { TensorType::ACL_DST, _impl->dst } }; + + _impl->op->run(src, dst, {}); +} } // namespace arm_compute -- cgit v1.2.1