diff options
author | Michalis Spyrou <michalis.spyrou@arm.com> | 2020-06-18 10:14:57 +0100 |
---|---|---|
committer | Michalis Spyrou <michalis.spyrou@arm.com> | 2020-06-19 14:35:22 +0000 |
commit | ce0c67559cf03965acc8f212263a9f53205a0a3f (patch) | |
tree | c37105c72538108c46e5964cf49d15acd2d85980 /src/runtime | |
parent | 3be0b8c8d4e90bd264e9575dc2b6994ce8e14d50 (diff) | |
download | ComputeLibrary-ce0c67559cf03965acc8f212263a9f53205a0a3f.tar.gz |
COMPMID-3377: Async support to NEElementwiseUnaryLayerKernel kernels/functions
Signed-off-by: Michalis Spyrou <michalis.spyrou@arm.com>
Change-Id: I208287b44ece051e95f891d43a691cb0ac6e56c5
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/3419
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Michele Di Giorgio <michele.digiorgio@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/runtime')
-rw-r--r-- | src/runtime/CPP/CPPScheduler.cpp | 8 | ||||
-rw-r--r-- | src/runtime/CPP/SingleThreadScheduler.cpp | 2 | ||||
-rw-r--r-- | src/runtime/NEON/INEOperator.cpp | 4 | ||||
-rw-r--r-- | src/runtime/NEON/functions/NEActivationLayer.cpp | 6 | ||||
-rw-r--r-- | src/runtime/NEON/functions/NEElementwiseOperators.cpp | 335 | ||||
-rw-r--r-- | src/runtime/NEON/functions/NEPReluLayer.cpp | 47 | ||||
-rw-r--r-- | src/runtime/NEON/functions/NEReshapeLayer.cpp | 7 | ||||
-rw-r--r-- | src/runtime/OMP/OMPScheduler.cpp | 2 |
8 files changed, 389 insertions, 22 deletions
diff --git a/src/runtime/CPP/CPPScheduler.cpp b/src/runtime/CPP/CPPScheduler.cpp index 41e1a2d647..af6d8d77c4 100644 --- a/src/runtime/CPP/CPPScheduler.cpp +++ b/src/runtime/CPP/CPPScheduler.cpp @@ -363,7 +363,7 @@ void CPPScheduler::run_workloads(std::vector<IScheduler::Workload> &workloads) } #endif /* DOXYGEN_SKIP_THIS */ -void CPPScheduler::schedule_common(ICPPKernel *kernel, const Hints &hints, const std::vector<InputTensor> &inputs, const std::vector<OutputTensor> &outputs) +void CPPScheduler::schedule_common(ICPPKernel *kernel, const Hints &hints, const InputTensorMap &inputs, const OutputTensorMap &outputs) { ARM_COMPUTE_ERROR_ON_MSG(!kernel, "The child class didn't set the kernel"); @@ -473,15 +473,15 @@ void CPPScheduler::schedule_common(ICPPKernel *kernel, const Hints &hints, const } } -void CPPScheduler::schedule_op(ICPPKernel *kernel, const Hints &hints, const std::vector<InputTensor> &inputs, const std::vector<OutputTensor> &outputs) +void CPPScheduler::schedule_op(ICPPKernel *kernel, const Hints &hints, const InputTensorMap &inputs, const OutputTensorMap &outputs) { schedule_common(kernel, hints, inputs, outputs); } void CPPScheduler::schedule(ICPPKernel *kernel, const Hints &hints) { - const std::vector<InputTensor> inputs; - std::vector<OutputTensor> outputs; + const InputTensorMap inputs; + OutputTensorMap outputs; schedule_common(kernel, hints, inputs, outputs); } } // namespace arm_compute diff --git a/src/runtime/CPP/SingleThreadScheduler.cpp b/src/runtime/CPP/SingleThreadScheduler.cpp index 8257628090..63c6c7b298 100644 --- a/src/runtime/CPP/SingleThreadScheduler.cpp +++ b/src/runtime/CPP/SingleThreadScheduler.cpp @@ -49,7 +49,7 @@ void SingleThreadScheduler::schedule(ICPPKernel *kernel, const Hints &hints) kernel->run(kernel->window(), info); } -void SingleThreadScheduler::schedule_op(ICPPKernel *kernel, const Hints &hints, const std::vector<InputTensor> &inputs, const std::vector<OutputTensor> &outputs) +void SingleThreadScheduler::schedule_op(ICPPKernel *kernel, const Hints &hints, const InputTensorMap &inputs, const OutputTensorMap &outputs) { ARM_COMPUTE_UNUSED(hints); ThreadInfo info; diff --git a/src/runtime/NEON/INEOperator.cpp b/src/runtime/NEON/INEOperator.cpp index 78790856ee..00dab75a95 100644 --- a/src/runtime/NEON/INEOperator.cpp +++ b/src/runtime/NEON/INEOperator.cpp @@ -33,7 +33,7 @@ INEOperator::INEOperator(IRuntimeContext *ctx) { } -void INEOperator::run(std::vector<InputTensor> inputs, std::vector<OutputTensor> outputs, std::vector<OperatorTensor> workspace) +void INEOperator::run(InputTensorMap inputs, OutputTensorMap outputs, OperatorTensorMap workspace) { ARM_COMPUTE_UNUSED(workspace); @@ -45,7 +45,7 @@ void INEOperator::run(std::vector<InputTensor> inputs, std::vector<OutputTensor> NEScheduler::get().schedule_op(_kernel.get(), Window::DimY, inputs, outputs); } -void INEOperator::prepare(std::vector<OperatorTensor> constants) +void INEOperator::prepare(OperatorTensorMap constants) { ARM_COMPUTE_UNUSED(constants); } diff --git a/src/runtime/NEON/functions/NEActivationLayer.cpp b/src/runtime/NEON/functions/NEActivationLayer.cpp index 889ff6b1f4..03222386d9 100644 --- a/src/runtime/NEON/functions/NEActivationLayer.cpp +++ b/src/runtime/NEON/functions/NEActivationLayer.cpp @@ -90,9 +90,9 @@ Status NEActivationLayer::validate(const ITensorInfo *input, const ITensorInfo * void NEActivationLayer::run() { - const InputTensor src{ TensorType::ACL_SRC, _impl->src }; - OutputTensor dst{ TensorType::ACL_DST, _impl->dst }; + const InputTensorMap src{ { TensorType::ACL_SRC, _impl->src } }; + const OutputTensorMap dst{ { TensorType::ACL_DST, _impl->dst } }; - _impl->op->run({ src }, { dst }, {}); + _impl->op->run(src, dst, {}); } } // namespace arm_compute diff --git a/src/runtime/NEON/functions/NEElementwiseOperators.cpp b/src/runtime/NEON/functions/NEElementwiseOperators.cpp index 926ae1fa21..63fd5654fd 100644 --- a/src/runtime/NEON/functions/NEElementwiseOperators.cpp +++ b/src/runtime/NEON/functions/NEElementwiseOperators.cpp @@ -32,7 +32,9 @@ namespace arm_compute { -void NEElementwiseMax::configure(ITensor *input1, ITensor *input2, ITensor *output, const ActivationLayerInfo &act_info) +namespace experimental +{ +void NEElementwiseMax::configure(const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output, const ActivationLayerInfo &act_info) { ARM_COMPUTE_UNUSED(act_info); auto k = arm_compute::support::cpp14::make_unique<NEArithmeticOperationKernel>(); @@ -46,7 +48,12 @@ Status NEElementwiseMax::validate(const ITensorInfo *input1, const ITensorInfo * return NEArithmeticOperationKernel::validate(ArithmeticOperation::MAX, input1, input2, output); } -void NEElementwiseMin::configure(ITensor *input1, ITensor *input2, ITensor *output, const ActivationLayerInfo &act_info) +MemoryRequirements NEElementwiseMax::workspace() const +{ + return MemoryRequirements{}; +} + +void NEElementwiseMin::configure(const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output, const ActivationLayerInfo &act_info) { ARM_COMPUTE_UNUSED(act_info); auto k = arm_compute::support::cpp14::make_unique<NEArithmeticOperationKernel>(); @@ -60,7 +67,12 @@ Status NEElementwiseMin::validate(const ITensorInfo *input1, const ITensorInfo * return NEArithmeticOperationKernel::validate(ArithmeticOperation::MIN, input1, input2, output); } -void NEElementwiseSquaredDiff::configure(ITensor *input1, ITensor *input2, ITensor *output, const ActivationLayerInfo &act_info) +MemoryRequirements NEElementwiseMin::workspace() const +{ + return MemoryRequirements{}; +} + +void NEElementwiseSquaredDiff::configure(const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output, const ActivationLayerInfo &act_info) { ARM_COMPUTE_UNUSED(act_info); auto k = arm_compute::support::cpp14::make_unique<NEArithmeticOperationKernel>(); @@ -74,7 +86,12 @@ Status NEElementwiseSquaredDiff::validate(const ITensorInfo *input1, const ITens return NEArithmeticOperationKernel::validate(ArithmeticOperation::SQUARED_DIFF, input1, input2, output); } -void NEElementwiseDivision::configure(ITensor *input1, ITensor *input2, ITensor *output, const ActivationLayerInfo &act_info) +MemoryRequirements NEElementwiseSquaredDiff::workspace() const +{ + return MemoryRequirements{}; +} + +void NEElementwiseDivision::configure(const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output, const ActivationLayerInfo &act_info) { ARM_COMPUTE_UNUSED(act_info); auto k = arm_compute::support::cpp14::make_unique<NEDivisionOperationKernel>(); @@ -88,7 +105,12 @@ Status NEElementwiseDivision::validate(const ITensorInfo *input1, const ITensorI return NEDivisionOperationKernel::validate(input1, input2, output); } -void NEElementwisePower::configure(ITensor *input1, ITensor *input2, ITensor *output, const ActivationLayerInfo &act_info) +MemoryRequirements NEElementwiseDivision::workspace() const +{ + return MemoryRequirements{}; +} + +void NEElementwisePower::configure(const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output, const ActivationLayerInfo &act_info) { ARM_COMPUTE_UNUSED(act_info); auto k = arm_compute::support::cpp14::make_unique<NEPowerOperationKernel>(); @@ -102,8 +124,13 @@ Status NEElementwisePower::validate(const ITensorInfo *input1, const ITensorInfo return NEPowerOperationKernel::validate(input1, input2, output); } +MemoryRequirements NEElementwisePower::workspace() const +{ + return MemoryRequirements{}; +} + template <ComparisonOperation COP> -void NEElementwiseComparisonStatic<COP>::configure(ITensor *input1, ITensor *input2, ITensor *output) +void NEElementwiseComparisonStatic<COP>::configure(const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output) { auto k = arm_compute::support::cpp14::make_unique<NEComparisonOperationKernel>(); k->configure(COP, input1, input2, output); @@ -116,7 +143,13 @@ Status NEElementwiseComparisonStatic<COP>::validate(const ITensorInfo *input1, c return NEComparisonOperationKernel::validate(COP, input1, input2, output); } -void NEElementwiseComparison::configure(ITensor *input1, ITensor *input2, ITensor *output, ComparisonOperation op) +template <ComparisonOperation COP> +MemoryRequirements NEElementwiseComparisonStatic<COP>::workspace() const +{ + return MemoryRequirements{}; +} + +void NEElementwiseComparison::configure(const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output, ComparisonOperation op) { auto k = arm_compute::support::cpp14::make_unique<NEComparisonOperationKernel>(); k->configure(op, input1, input2, output); @@ -128,6 +161,294 @@ Status NEElementwiseComparison::validate(const ITensorInfo *input1, const ITenso return NEComparisonOperationKernel::validate(op, input1, input2, output); } +MemoryRequirements NEElementwiseComparison::workspace() const +{ + return MemoryRequirements{}; +} + +// Supported Specializations +template class NEElementwiseComparisonStatic<ComparisonOperation::Equal>; +template class NEElementwiseComparisonStatic<ComparisonOperation::NotEqual>; +template class NEElementwiseComparisonStatic<ComparisonOperation::Greater>; +template class NEElementwiseComparisonStatic<ComparisonOperation::GreaterEqual>; +template class NEElementwiseComparisonStatic<ComparisonOperation::Less>; +template class NEElementwiseComparisonStatic<ComparisonOperation::LessEqual>; +} // namespace experimental + +struct NEElementwiseMax::Impl +{ + const ITensor *src_0{ nullptr }; + const ITensor *src_1{ nullptr }; + ITensor *dst{ nullptr }; + std::unique_ptr<experimental::NEElementwiseMax> op{ nullptr }; +}; + +NEElementwiseMax::NEElementwiseMax() + : _impl(support::cpp14::make_unique<Impl>()) +{ +} +NEElementwiseMax::NEElementwiseMax(NEElementwiseMax &&) = default; +NEElementwiseMax &NEElementwiseMax::operator=(NEElementwiseMax &&) = default; +NEElementwiseMax::~NEElementwiseMax() = default; + +void NEElementwiseMax::configure(ITensor *input1, ITensor *input2, ITensor *output, const ActivationLayerInfo &act_info) +{ + _impl->src_0 = input1; + _impl->src_1 = input2; + _impl->dst = output; + _impl->op = arm_compute::support::cpp14::make_unique<experimental::NEElementwiseMax>(); + _impl->op->configure(input1->info(), input2->info(), output->info(), act_info); +} + +Status NEElementwiseMax::validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, const ActivationLayerInfo &act_info) +{ + ARM_COMPUTE_RETURN_ERROR_ON(act_info.enabled()); + return experimental::NEElementwiseMax::validate(input1, input2, output, act_info); +} + +void NEElementwiseMax::run() +{ + const InputTensorMap src{ { TensorType::ACL_SRC_0, _impl->src_0 }, { TensorType::ACL_SRC_1, _impl->src_1 } }; + const OutputTensorMap dst{ { TensorType::ACL_DST, _impl->dst } }; + _impl->op->run(src, dst, {}); +} + +struct NEElementwiseMin::Impl +{ + const ITensor *src_0{ nullptr }; + const ITensor *src_1{ nullptr }; + ITensor *dst{ nullptr }; + std::unique_ptr<experimental::NEElementwiseMin> op{ nullptr }; +}; + +NEElementwiseMin::NEElementwiseMin() + : _impl(support::cpp14::make_unique<Impl>()) +{ +} +NEElementwiseMin::NEElementwiseMin(NEElementwiseMin &&) = default; +NEElementwiseMin &NEElementwiseMin::operator=(NEElementwiseMin &&) = default; +NEElementwiseMin::~NEElementwiseMin() = default; + +void NEElementwiseMin::configure(ITensor *input1, ITensor *input2, ITensor *output, const ActivationLayerInfo &act_info) +{ + _impl->src_0 = input1; + _impl->src_1 = input2; + _impl->dst = output; + _impl->op = arm_compute::support::cpp14::make_unique<experimental::NEElementwiseMin>(); + _impl->op->configure(input1->info(), input2->info(), output->info(), act_info); +} + +Status NEElementwiseMin::validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, const ActivationLayerInfo &act_info) +{ + ARM_COMPUTE_RETURN_ERROR_ON(act_info.enabled()); + return experimental::NEElementwiseMin::validate(input1, input2, output, act_info); +} + +void NEElementwiseMin::run() +{ + const InputTensorMap src{ { TensorType::ACL_SRC_0, _impl->src_0 }, { TensorType::ACL_SRC_1, _impl->src_1 } }; + const OutputTensorMap dst{ { TensorType::ACL_DST, _impl->dst } }; + _impl->op->run(src, dst, {}); +} + +struct NEElementwiseSquaredDiff::Impl +{ + const ITensor *src_0{ nullptr }; + const ITensor *src_1{ nullptr }; + ITensor *dst{ nullptr }; + std::unique_ptr<experimental::NEElementwiseSquaredDiff> op{ nullptr }; +}; + +NEElementwiseSquaredDiff::NEElementwiseSquaredDiff() + : _impl(support::cpp14::make_unique<Impl>()) +{ +} +NEElementwiseSquaredDiff::NEElementwiseSquaredDiff(NEElementwiseSquaredDiff &&) = default; +NEElementwiseSquaredDiff &NEElementwiseSquaredDiff::operator=(NEElementwiseSquaredDiff &&) = default; +NEElementwiseSquaredDiff::~NEElementwiseSquaredDiff() = default; + +void NEElementwiseSquaredDiff::configure(ITensor *input1, ITensor *input2, ITensor *output, const ActivationLayerInfo &act_info) +{ + _impl->src_0 = input1; + _impl->src_1 = input2; + _impl->dst = output; + _impl->op = arm_compute::support::cpp14::make_unique<experimental::NEElementwiseSquaredDiff>(); + _impl->op->configure(input1->info(), input2->info(), output->info(), act_info); +} + +Status NEElementwiseSquaredDiff::validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, const ActivationLayerInfo &act_info) +{ + ARM_COMPUTE_RETURN_ERROR_ON(act_info.enabled()); + return experimental::NEElementwiseSquaredDiff::validate(input1, input2, output, act_info); +} + +void NEElementwiseSquaredDiff::run() +{ + const InputTensorMap src{ { TensorType::ACL_SRC_0, _impl->src_0 }, { TensorType::ACL_SRC_1, _impl->src_1 } }; + const OutputTensorMap dst{ { TensorType::ACL_DST, _impl->dst } }; + _impl->op->run(src, dst, {}); +} + +struct NEElementwiseDivision::Impl +{ + const ITensor *src_0{ nullptr }; + const ITensor *src_1{ nullptr }; + ITensor *dst{ nullptr }; + std::unique_ptr<experimental::NEElementwiseDivision> op{ nullptr }; +}; + +NEElementwiseDivision::NEElementwiseDivision() + : _impl(support::cpp14::make_unique<Impl>()) +{ +} +NEElementwiseDivision::NEElementwiseDivision(NEElementwiseDivision &&) = default; +NEElementwiseDivision &NEElementwiseDivision::operator=(NEElementwiseDivision &&) = default; +NEElementwiseDivision::~NEElementwiseDivision() = default; + +void NEElementwiseDivision::configure(ITensor *input1, ITensor *input2, ITensor *output, const ActivationLayerInfo &act_info) +{ + ARM_COMPUTE_UNUSED(act_info); + _impl->src_0 = input1; + _impl->src_1 = input2; + _impl->dst = output; + _impl->op = arm_compute::support::cpp14::make_unique<experimental::NEElementwiseDivision>(); + _impl->op->configure(input1->info(), input2->info(), output->info(), act_info); +} + +Status NEElementwiseDivision::validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, const ActivationLayerInfo &act_info) +{ + ARM_COMPUTE_RETURN_ERROR_ON(act_info.enabled()); + return experimental::NEElementwiseDivision::validate(input1, input2, output, act_info); +} + +void NEElementwiseDivision::run() +{ + const InputTensorMap src{ { TensorType::ACL_SRC_0, _impl->src_0 }, { TensorType::ACL_SRC_1, _impl->src_1 } }; + const OutputTensorMap dst{ { TensorType::ACL_DST, _impl->dst } }; + _impl->op->run(src, dst, {}); +} + +struct NEElementwisePower::Impl +{ + const ITensor *src_0{ nullptr }; + const ITensor *src_1{ nullptr }; + ITensor *dst{ nullptr }; + std::unique_ptr<experimental::NEElementwisePower> op{ nullptr }; +}; + +NEElementwisePower::NEElementwisePower() + : _impl(support::cpp14::make_unique<Impl>()) +{ +} +NEElementwisePower::NEElementwisePower(NEElementwisePower &&) = default; +NEElementwisePower &NEElementwisePower::operator=(NEElementwisePower &&) = default; +NEElementwisePower::~NEElementwisePower() = default; + +void NEElementwisePower::configure(ITensor *input1, ITensor *input2, ITensor *output, const ActivationLayerInfo &act_info) +{ + ARM_COMPUTE_UNUSED(act_info); + _impl->src_0 = input1; + _impl->src_1 = input2; + _impl->dst = output; + _impl->op = arm_compute::support::cpp14::make_unique<experimental::NEElementwisePower>(); + _impl->op->configure(input1->info(), input2->info(), output->info(), act_info); +} + +Status NEElementwisePower::validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, const ActivationLayerInfo &act_info) +{ + ARM_COMPUTE_RETURN_ERROR_ON(act_info.enabled()); + return experimental::NEElementwisePower::validate(input1, input2, output, act_info); +} + +void NEElementwisePower::run() +{ + const InputTensorMap src{ { TensorType::ACL_SRC_0, _impl->src_0 }, { TensorType::ACL_SRC_1, _impl->src_1 } }; + const OutputTensorMap dst{ { TensorType::ACL_DST, _impl->dst } }; + _impl->op->run(src, dst, {}); +} + +template <ComparisonOperation COP> +struct NEElementwiseComparisonStatic<COP>::Impl +{ + const ITensor *src_0{ nullptr }; + const ITensor *src_1{ nullptr }; + ITensor *dst{ nullptr }; + std::unique_ptr<experimental::NEElementwiseComparisonStatic<COP>> op{ nullptr }; +}; + +template <ComparisonOperation COP> +NEElementwiseComparisonStatic<COP>::NEElementwiseComparisonStatic() + : _impl(support::cpp14::make_unique<Impl>()) +{ +} +template <ComparisonOperation COP> +NEElementwiseComparisonStatic<COP>::NEElementwiseComparisonStatic(NEElementwiseComparisonStatic &&) = default; +template <ComparisonOperation COP> +NEElementwiseComparisonStatic<COP> &NEElementwiseComparisonStatic<COP>::operator=(NEElementwiseComparisonStatic &&) = default; +template <ComparisonOperation COP> +NEElementwiseComparisonStatic<COP>::~NEElementwiseComparisonStatic() = default; + +template <ComparisonOperation COP> +void NEElementwiseComparisonStatic<COP>::configure(ITensor *input1, ITensor *input2, ITensor *output) +{ + _impl->src_0 = input1; + _impl->src_1 = input2; + _impl->dst = output; + _impl->op = arm_compute::support::cpp14::make_unique<experimental::NEElementwiseComparisonStatic<COP>>(); + _impl->op->configure(input1->info(), input2->info(), output->info()); +} + +template <ComparisonOperation COP> +Status NEElementwiseComparisonStatic<COP>::validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output) +{ + return experimental::NEElementwiseComparisonStatic<COP>::validate(input1, input2, output); +} + +template <ComparisonOperation COP> +void NEElementwiseComparisonStatic<COP>::run() +{ + const InputTensorMap src{ { TensorType::ACL_SRC_0, _impl->src_0 }, { TensorType::ACL_SRC_1, _impl->src_1 } }; + const OutputTensorMap dst{ { TensorType::ACL_DST, _impl->dst } }; + _impl->op->run(src, dst, {}); +} + +struct NEElementwiseComparison::Impl +{ + const ITensor *src_0{ nullptr }; + const ITensor *src_1{ nullptr }; + ITensor *dst{ nullptr }; + std::unique_ptr<experimental::NEElementwiseComparison> op{ nullptr }; +}; + +NEElementwiseComparison::NEElementwiseComparison() + : _impl(support::cpp14::make_unique<Impl>()) +{ +} +NEElementwiseComparison::NEElementwiseComparison(NEElementwiseComparison &&) = default; +NEElementwiseComparison &NEElementwiseComparison::operator=(NEElementwiseComparison &&) = default; +NEElementwiseComparison::~NEElementwiseComparison() = default; + +void NEElementwiseComparison::configure(ITensor *input1, ITensor *input2, ITensor *output, ComparisonOperation op) +{ + _impl->src_0 = input1; + _impl->src_1 = input2; + _impl->dst = output; + _impl->op = arm_compute::support::cpp14::make_unique<experimental::NEElementwiseComparison>(); + _impl->op->configure(input1->info(), input2->info(), output->info(), op); +} + +Status NEElementwiseComparison::validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, ComparisonOperation op) +{ + return experimental::NEElementwiseComparison::validate(input1, input2, output, op); +} + +void NEElementwiseComparison::run() +{ + const InputTensorMap src{ { TensorType::ACL_SRC_0, _impl->src_0 }, { TensorType::ACL_SRC_1, _impl->src_1 } }; + const OutputTensorMap dst{ { TensorType::ACL_DST, _impl->dst } }; + _impl->op->run(src, dst, {}); +} + // Supported Specializations template class NEElementwiseComparisonStatic<ComparisonOperation::Equal>; template class NEElementwiseComparisonStatic<ComparisonOperation::NotEqual>; diff --git a/src/runtime/NEON/functions/NEPReluLayer.cpp b/src/runtime/NEON/functions/NEPReluLayer.cpp index 02dfc6f137..1dd01fc162 100644 --- a/src/runtime/NEON/functions/NEPReluLayer.cpp +++ b/src/runtime/NEON/functions/NEPReluLayer.cpp @@ -29,7 +29,9 @@ namespace arm_compute { -void NEPReluLayer::configure(const ITensor *input, const ITensor *alpha, ITensor *output) +namespace experimental +{ +void NEPReluLayer::configure(const ITensorInfo *input, const ITensorInfo *alpha, ITensorInfo *output) { auto k = arm_compute::support::cpp14::make_unique<NEArithmeticOperationKernel>(); k->configure(ArithmeticOperation::PRELU, input, alpha, output); @@ -40,4 +42,47 @@ Status NEPReluLayer::validate(const ITensorInfo *input, const ITensorInfo *alpha { return NEArithmeticOperationKernel::validate(ArithmeticOperation::PRELU, input, alpha, output); } + +MemoryRequirements NEPReluLayer::workspace() const +{ + return MemoryRequirements{}; +} +} // nsamespace experimental + +struct NEPReluLayer::Impl +{ + const ITensor *src_0{ nullptr }; + const ITensor *src_1{ nullptr }; + ITensor *dst{ nullptr }; + std::unique_ptr<experimental::NEPReluLayer> op{ nullptr }; +}; + +NEPReluLayer::NEPReluLayer() + : _impl(support::cpp14::make_unique<Impl>()) +{ +} +NEPReluLayer::NEPReluLayer(NEPReluLayer &&) = default; +NEPReluLayer &NEPReluLayer::operator=(NEPReluLayer &&) = default; +NEPReluLayer::~NEPReluLayer() = default; + +void NEPReluLayer::configure(const ITensor *input, const ITensor *alpha, ITensor *output) +{ + _impl->src_0 = input; + _impl->src_1 = alpha; + _impl->dst = output; + _impl->op = arm_compute::support::cpp14::make_unique<experimental::NEPReluLayer>(); + _impl->op->configure(input->info(), alpha->info(), output->info()); +} + +void NEPReluLayer::run() +{ + const InputTensorMap src{ { TensorType::ACL_SRC_0, _impl->src_0 }, { TensorType::ACL_SRC_1, _impl->src_1 } }; + const OutputTensorMap dst{ { TensorType::ACL_DST, _impl->dst } }; + _impl->op->run(src, dst, {}); +} + +Status NEPReluLayer::validate(const ITensorInfo *input, const ITensorInfo *alpha, const ITensorInfo *output) +{ + return experimental::NEPReluLayer::validate(input, alpha, output); +} } // namespace arm_compute diff --git a/src/runtime/NEON/functions/NEReshapeLayer.cpp b/src/runtime/NEON/functions/NEReshapeLayer.cpp index daf358e7db..2b866b532c 100644 --- a/src/runtime/NEON/functions/NEReshapeLayer.cpp +++ b/src/runtime/NEON/functions/NEReshapeLayer.cpp @@ -89,8 +89,9 @@ Status NEReshapeLayer::validate(const ITensorInfo *input, const ITensorInfo *out void NEReshapeLayer::run() { - const InputTensor src{ TensorType::ACL_SRC, _impl->src }; - OutputTensor dst{ TensorType::ACL_DST, _impl->dst }; - _impl->op->run({ src }, { dst }, {}); + const InputTensorMap src{ { TensorType::ACL_SRC, _impl->src } }; + const OutputTensorMap dst{ { TensorType::ACL_DST, _impl->dst } }; + + _impl->op->run(src, dst, {}); } } // namespace arm_compute diff --git a/src/runtime/OMP/OMPScheduler.cpp b/src/runtime/OMP/OMPScheduler.cpp index 6d6b285019..5b4b76a9df 100644 --- a/src/runtime/OMP/OMPScheduler.cpp +++ b/src/runtime/OMP/OMPScheduler.cpp @@ -83,7 +83,7 @@ void OMPScheduler::schedule(ICPPKernel *kernel, const Hints &hints) } } -void OMPScheduler::schedule_op(ICPPKernel *kernel, const Hints &hints, const std::vector<InputTensor> &inputs, const std::vector<OutputTensor> &outputs) +void OMPScheduler::schedule_op(ICPPKernel *kernel, const Hints &hints, const InputTensorMap &inputs, const OutputTensorMap &outputs) { ARM_COMPUTE_ERROR_ON_MSG(!kernel, "The child class didn't set the kernel"); ARM_COMPUTE_ERROR_ON_MSG(hints.strategy() == StrategyHint::DYNAMIC, |