From ce0c67559cf03965acc8f212263a9f53205a0a3f Mon Sep 17 00:00:00 2001 From: Michalis Spyrou Date: Thu, 18 Jun 2020 10:14:57 +0100 Subject: COMPMID-3377: Async support to NEElementwiseUnaryLayerKernel kernels/functions Signed-off-by: Michalis Spyrou Change-Id: I208287b44ece051e95f891d43a691cb0ac6e56c5 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/3419 Tested-by: Arm Jenkins Reviewed-by: Michele Di Giorgio Comments-Addressed: Arm Jenkins --- .../NEON/functions/NEElementwiseOperators.cpp | 335 ++++++++++++++++++++- 1 file changed, 328 insertions(+), 7 deletions(-) (limited to 'src/runtime/NEON/functions/NEElementwiseOperators.cpp') diff --git a/src/runtime/NEON/functions/NEElementwiseOperators.cpp b/src/runtime/NEON/functions/NEElementwiseOperators.cpp index 926ae1fa21..63fd5654fd 100644 --- a/src/runtime/NEON/functions/NEElementwiseOperators.cpp +++ b/src/runtime/NEON/functions/NEElementwiseOperators.cpp @@ -32,7 +32,9 @@ namespace arm_compute { -void NEElementwiseMax::configure(ITensor *input1, ITensor *input2, ITensor *output, const ActivationLayerInfo &act_info) +namespace experimental +{ +void NEElementwiseMax::configure(const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output, const ActivationLayerInfo &act_info) { ARM_COMPUTE_UNUSED(act_info); auto k = arm_compute::support::cpp14::make_unique(); @@ -46,7 +48,12 @@ Status NEElementwiseMax::validate(const ITensorInfo *input1, const ITensorInfo * return NEArithmeticOperationKernel::validate(ArithmeticOperation::MAX, input1, input2, output); } -void NEElementwiseMin::configure(ITensor *input1, ITensor *input2, ITensor *output, const ActivationLayerInfo &act_info) +MemoryRequirements NEElementwiseMax::workspace() const +{ + return MemoryRequirements{}; +} + +void NEElementwiseMin::configure(const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output, const ActivationLayerInfo &act_info) { ARM_COMPUTE_UNUSED(act_info); auto k = arm_compute::support::cpp14::make_unique(); @@ -60,7 +67,12 @@ Status NEElementwiseMin::validate(const ITensorInfo *input1, const ITensorInfo * return NEArithmeticOperationKernel::validate(ArithmeticOperation::MIN, input1, input2, output); } -void NEElementwiseSquaredDiff::configure(ITensor *input1, ITensor *input2, ITensor *output, const ActivationLayerInfo &act_info) +MemoryRequirements NEElementwiseMin::workspace() const +{ + return MemoryRequirements{}; +} + +void NEElementwiseSquaredDiff::configure(const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output, const ActivationLayerInfo &act_info) { ARM_COMPUTE_UNUSED(act_info); auto k = arm_compute::support::cpp14::make_unique(); @@ -74,7 +86,12 @@ Status NEElementwiseSquaredDiff::validate(const ITensorInfo *input1, const ITens return NEArithmeticOperationKernel::validate(ArithmeticOperation::SQUARED_DIFF, input1, input2, output); } -void NEElementwiseDivision::configure(ITensor *input1, ITensor *input2, ITensor *output, const ActivationLayerInfo &act_info) +MemoryRequirements NEElementwiseSquaredDiff::workspace() const +{ + return MemoryRequirements{}; +} + +void NEElementwiseDivision::configure(const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output, const ActivationLayerInfo &act_info) { ARM_COMPUTE_UNUSED(act_info); auto k = arm_compute::support::cpp14::make_unique(); @@ -88,7 +105,12 @@ Status NEElementwiseDivision::validate(const ITensorInfo *input1, const ITensorI return NEDivisionOperationKernel::validate(input1, input2, output); } -void NEElementwisePower::configure(ITensor *input1, ITensor *input2, ITensor *output, const ActivationLayerInfo &act_info) +MemoryRequirements NEElementwiseDivision::workspace() const +{ + return MemoryRequirements{}; +} + +void NEElementwisePower::configure(const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output, const ActivationLayerInfo &act_info) { ARM_COMPUTE_UNUSED(act_info); auto k = arm_compute::support::cpp14::make_unique(); @@ -102,8 +124,13 @@ Status NEElementwisePower::validate(const ITensorInfo *input1, const ITensorInfo return NEPowerOperationKernel::validate(input1, input2, output); } +MemoryRequirements NEElementwisePower::workspace() const +{ + return MemoryRequirements{}; +} + template -void NEElementwiseComparisonStatic::configure(ITensor *input1, ITensor *input2, ITensor *output) +void NEElementwiseComparisonStatic::configure(const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output) { auto k = arm_compute::support::cpp14::make_unique(); k->configure(COP, input1, input2, output); @@ -116,7 +143,13 @@ Status NEElementwiseComparisonStatic::validate(const ITensorInfo *input1, c return NEComparisonOperationKernel::validate(COP, input1, input2, output); } -void NEElementwiseComparison::configure(ITensor *input1, ITensor *input2, ITensor *output, ComparisonOperation op) +template +MemoryRequirements NEElementwiseComparisonStatic::workspace() const +{ + return MemoryRequirements{}; +} + +void NEElementwiseComparison::configure(const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output, ComparisonOperation op) { auto k = arm_compute::support::cpp14::make_unique(); k->configure(op, input1, input2, output); @@ -128,6 +161,294 @@ Status NEElementwiseComparison::validate(const ITensorInfo *input1, const ITenso return NEComparisonOperationKernel::validate(op, input1, input2, output); } +MemoryRequirements NEElementwiseComparison::workspace() const +{ + return MemoryRequirements{}; +} + +// Supported Specializations +template class NEElementwiseComparisonStatic; +template class NEElementwiseComparisonStatic; +template class NEElementwiseComparisonStatic; +template class NEElementwiseComparisonStatic; +template class NEElementwiseComparisonStatic; +template class NEElementwiseComparisonStatic; +} // namespace experimental + +struct NEElementwiseMax::Impl +{ + const ITensor *src_0{ nullptr }; + const ITensor *src_1{ nullptr }; + ITensor *dst{ nullptr }; + std::unique_ptr op{ nullptr }; +}; + +NEElementwiseMax::NEElementwiseMax() + : _impl(support::cpp14::make_unique()) +{ +} +NEElementwiseMax::NEElementwiseMax(NEElementwiseMax &&) = default; +NEElementwiseMax &NEElementwiseMax::operator=(NEElementwiseMax &&) = default; +NEElementwiseMax::~NEElementwiseMax() = default; + +void NEElementwiseMax::configure(ITensor *input1, ITensor *input2, ITensor *output, const ActivationLayerInfo &act_info) +{ + _impl->src_0 = input1; + _impl->src_1 = input2; + _impl->dst = output; + _impl->op = arm_compute::support::cpp14::make_unique(); + _impl->op->configure(input1->info(), input2->info(), output->info(), act_info); +} + +Status NEElementwiseMax::validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, const ActivationLayerInfo &act_info) +{ + ARM_COMPUTE_RETURN_ERROR_ON(act_info.enabled()); + return experimental::NEElementwiseMax::validate(input1, input2, output, act_info); +} + +void NEElementwiseMax::run() +{ + const InputTensorMap src{ { TensorType::ACL_SRC_0, _impl->src_0 }, { TensorType::ACL_SRC_1, _impl->src_1 } }; + const OutputTensorMap dst{ { TensorType::ACL_DST, _impl->dst } }; + _impl->op->run(src, dst, {}); +} + +struct NEElementwiseMin::Impl +{ + const ITensor *src_0{ nullptr }; + const ITensor *src_1{ nullptr }; + ITensor *dst{ nullptr }; + std::unique_ptr op{ nullptr }; +}; + +NEElementwiseMin::NEElementwiseMin() + : _impl(support::cpp14::make_unique()) +{ +} +NEElementwiseMin::NEElementwiseMin(NEElementwiseMin &&) = default; +NEElementwiseMin &NEElementwiseMin::operator=(NEElementwiseMin &&) = default; +NEElementwiseMin::~NEElementwiseMin() = default; + +void NEElementwiseMin::configure(ITensor *input1, ITensor *input2, ITensor *output, const ActivationLayerInfo &act_info) +{ + _impl->src_0 = input1; + _impl->src_1 = input2; + _impl->dst = output; + _impl->op = arm_compute::support::cpp14::make_unique(); + _impl->op->configure(input1->info(), input2->info(), output->info(), act_info); +} + +Status NEElementwiseMin::validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, const ActivationLayerInfo &act_info) +{ + ARM_COMPUTE_RETURN_ERROR_ON(act_info.enabled()); + return experimental::NEElementwiseMin::validate(input1, input2, output, act_info); +} + +void NEElementwiseMin::run() +{ + const InputTensorMap src{ { TensorType::ACL_SRC_0, _impl->src_0 }, { TensorType::ACL_SRC_1, _impl->src_1 } }; + const OutputTensorMap dst{ { TensorType::ACL_DST, _impl->dst } }; + _impl->op->run(src, dst, {}); +} + +struct NEElementwiseSquaredDiff::Impl +{ + const ITensor *src_0{ nullptr }; + const ITensor *src_1{ nullptr }; + ITensor *dst{ nullptr }; + std::unique_ptr op{ nullptr }; +}; + +NEElementwiseSquaredDiff::NEElementwiseSquaredDiff() + : _impl(support::cpp14::make_unique()) +{ +} +NEElementwiseSquaredDiff::NEElementwiseSquaredDiff(NEElementwiseSquaredDiff &&) = default; +NEElementwiseSquaredDiff &NEElementwiseSquaredDiff::operator=(NEElementwiseSquaredDiff &&) = default; +NEElementwiseSquaredDiff::~NEElementwiseSquaredDiff() = default; + +void NEElementwiseSquaredDiff::configure(ITensor *input1, ITensor *input2, ITensor *output, const ActivationLayerInfo &act_info) +{ + _impl->src_0 = input1; + _impl->src_1 = input2; + _impl->dst = output; + _impl->op = arm_compute::support::cpp14::make_unique(); + _impl->op->configure(input1->info(), input2->info(), output->info(), act_info); +} + +Status NEElementwiseSquaredDiff::validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, const ActivationLayerInfo &act_info) +{ + ARM_COMPUTE_RETURN_ERROR_ON(act_info.enabled()); + return experimental::NEElementwiseSquaredDiff::validate(input1, input2, output, act_info); +} + +void NEElementwiseSquaredDiff::run() +{ + const InputTensorMap src{ { TensorType::ACL_SRC_0, _impl->src_0 }, { TensorType::ACL_SRC_1, _impl->src_1 } }; + const OutputTensorMap dst{ { TensorType::ACL_DST, _impl->dst } }; + _impl->op->run(src, dst, {}); +} + +struct NEElementwiseDivision::Impl +{ + const ITensor *src_0{ nullptr }; + const ITensor *src_1{ nullptr }; + ITensor *dst{ nullptr }; + std::unique_ptr op{ nullptr }; +}; + +NEElementwiseDivision::NEElementwiseDivision() + : _impl(support::cpp14::make_unique()) +{ +} +NEElementwiseDivision::NEElementwiseDivision(NEElementwiseDivision &&) = default; +NEElementwiseDivision &NEElementwiseDivision::operator=(NEElementwiseDivision &&) = default; +NEElementwiseDivision::~NEElementwiseDivision() = default; + +void NEElementwiseDivision::configure(ITensor *input1, ITensor *input2, ITensor *output, const ActivationLayerInfo &act_info) +{ + ARM_COMPUTE_UNUSED(act_info); + _impl->src_0 = input1; + _impl->src_1 = input2; + _impl->dst = output; + _impl->op = arm_compute::support::cpp14::make_unique(); + _impl->op->configure(input1->info(), input2->info(), output->info(), act_info); +} + +Status NEElementwiseDivision::validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, const ActivationLayerInfo &act_info) +{ + ARM_COMPUTE_RETURN_ERROR_ON(act_info.enabled()); + return experimental::NEElementwiseDivision::validate(input1, input2, output, act_info); +} + +void NEElementwiseDivision::run() +{ + const InputTensorMap src{ { TensorType::ACL_SRC_0, _impl->src_0 }, { TensorType::ACL_SRC_1, _impl->src_1 } }; + const OutputTensorMap dst{ { TensorType::ACL_DST, _impl->dst } }; + _impl->op->run(src, dst, {}); +} + +struct NEElementwisePower::Impl +{ + const ITensor *src_0{ nullptr }; + const ITensor *src_1{ nullptr }; + ITensor *dst{ nullptr }; + std::unique_ptr op{ nullptr }; +}; + +NEElementwisePower::NEElementwisePower() + : _impl(support::cpp14::make_unique()) +{ +} +NEElementwisePower::NEElementwisePower(NEElementwisePower &&) = default; +NEElementwisePower &NEElementwisePower::operator=(NEElementwisePower &&) = default; +NEElementwisePower::~NEElementwisePower() = default; + +void NEElementwisePower::configure(ITensor *input1, ITensor *input2, ITensor *output, const ActivationLayerInfo &act_info) +{ + ARM_COMPUTE_UNUSED(act_info); + _impl->src_0 = input1; + _impl->src_1 = input2; + _impl->dst = output; + _impl->op = arm_compute::support::cpp14::make_unique(); + _impl->op->configure(input1->info(), input2->info(), output->info(), act_info); +} + +Status NEElementwisePower::validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, const ActivationLayerInfo &act_info) +{ + ARM_COMPUTE_RETURN_ERROR_ON(act_info.enabled()); + return experimental::NEElementwisePower::validate(input1, input2, output, act_info); +} + +void NEElementwisePower::run() +{ + const InputTensorMap src{ { TensorType::ACL_SRC_0, _impl->src_0 }, { TensorType::ACL_SRC_1, _impl->src_1 } }; + const OutputTensorMap dst{ { TensorType::ACL_DST, _impl->dst } }; + _impl->op->run(src, dst, {}); +} + +template +struct NEElementwiseComparisonStatic::Impl +{ + const ITensor *src_0{ nullptr }; + const ITensor *src_1{ nullptr }; + ITensor *dst{ nullptr }; + std::unique_ptr> op{ nullptr }; +}; + +template +NEElementwiseComparisonStatic::NEElementwiseComparisonStatic() + : _impl(support::cpp14::make_unique()) +{ +} +template +NEElementwiseComparisonStatic::NEElementwiseComparisonStatic(NEElementwiseComparisonStatic &&) = default; +template +NEElementwiseComparisonStatic &NEElementwiseComparisonStatic::operator=(NEElementwiseComparisonStatic &&) = default; +template +NEElementwiseComparisonStatic::~NEElementwiseComparisonStatic() = default; + +template +void NEElementwiseComparisonStatic::configure(ITensor *input1, ITensor *input2, ITensor *output) +{ + _impl->src_0 = input1; + _impl->src_1 = input2; + _impl->dst = output; + _impl->op = arm_compute::support::cpp14::make_unique>(); + _impl->op->configure(input1->info(), input2->info(), output->info()); +} + +template +Status NEElementwiseComparisonStatic::validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output) +{ + return experimental::NEElementwiseComparisonStatic::validate(input1, input2, output); +} + +template +void NEElementwiseComparisonStatic::run() +{ + const InputTensorMap src{ { TensorType::ACL_SRC_0, _impl->src_0 }, { TensorType::ACL_SRC_1, _impl->src_1 } }; + const OutputTensorMap dst{ { TensorType::ACL_DST, _impl->dst } }; + _impl->op->run(src, dst, {}); +} + +struct NEElementwiseComparison::Impl +{ + const ITensor *src_0{ nullptr }; + const ITensor *src_1{ nullptr }; + ITensor *dst{ nullptr }; + std::unique_ptr op{ nullptr }; +}; + +NEElementwiseComparison::NEElementwiseComparison() + : _impl(support::cpp14::make_unique()) +{ +} +NEElementwiseComparison::NEElementwiseComparison(NEElementwiseComparison &&) = default; +NEElementwiseComparison &NEElementwiseComparison::operator=(NEElementwiseComparison &&) = default; +NEElementwiseComparison::~NEElementwiseComparison() = default; + +void NEElementwiseComparison::configure(ITensor *input1, ITensor *input2, ITensor *output, ComparisonOperation op) +{ + _impl->src_0 = input1; + _impl->src_1 = input2; + _impl->dst = output; + _impl->op = arm_compute::support::cpp14::make_unique(); + _impl->op->configure(input1->info(), input2->info(), output->info(), op); +} + +Status NEElementwiseComparison::validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, ComparisonOperation op) +{ + return experimental::NEElementwiseComparison::validate(input1, input2, output, op); +} + +void NEElementwiseComparison::run() +{ + const InputTensorMap src{ { TensorType::ACL_SRC_0, _impl->src_0 }, { TensorType::ACL_SRC_1, _impl->src_1 } }; + const OutputTensorMap dst{ { TensorType::ACL_DST, _impl->dst } }; + _impl->op->run(src, dst, {}); +} + // Supported Specializations template class NEElementwiseComparisonStatic; template class NEElementwiseComparisonStatic; -- cgit v1.2.1