From 1fd2c80692ed8ecefc4d8deb783564ad19eaf70c Mon Sep 17 00:00:00 2001 From: Georgios Pinitas Date: Tue, 16 Jun 2020 17:44:46 +0100 Subject: COMPMID-3375: Port NEActivationLayer functions/kernels to run on different tensors. Signed-off-by: Georgios Pinitas Change-Id: I98782bb73e9dc0899ffb1796aca6f99714adea94 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/3343 Reviewed-by: Michalis Spyrou Comments-Addressed: Arm Jenkins Tested-by: Arm Jenkins --- arm_compute/core/CPP/ICPPKernel.h | 2 +- .../core/NEON/kernels/NEActivationLayerKernel.h | 24 +++---- .../core/NEON/kernels/NEReshapeLayerKernel.h | 4 +- arm_compute/core/experimental/Types.h | 50 ++++++++++---- arm_compute/runtime/CPP/CPPScheduler.h | 4 +- arm_compute/runtime/IOperator.h | 4 +- arm_compute/runtime/IScheduler.h | 2 +- arm_compute/runtime/NEON/INEOperator.h | 4 +- .../runtime/NEON/functions/NEActivationLayer.h | 46 +++++++++++-- arm_compute/runtime/NEON/functions/NELSTMLayer.h | 14 ++-- arm_compute/runtime/NEON/functions/NERNNLayer.h | 6 +- .../runtime/NEON/functions/NEReshapeLayer.h | 20 ++++-- arm_compute/runtime/OMP/OMPScheduler.h | 2 +- arm_compute/runtime/SingleThreadScheduler.h | 2 +- src/core/NEON/kernels/NEActivationLayerKernel.cpp | 72 ++++++++++---------- src/core/NEON/kernels/NEReshapeLayerKernel.cpp | 13 ++-- src/runtime/CPP/CPPScheduler.cpp | 8 +-- src/runtime/CPP/SingleThreadScheduler.cpp | 2 +- src/runtime/NEON/INEOperator.cpp | 4 +- src/runtime/NEON/functions/NEActivationLayer.cpp | 61 +++++++++++++++-- src/runtime/NEON/functions/NELSTMLayer.cpp | 32 ++++----- src/runtime/NEON/functions/NERNNLayer.cpp | 10 +-- src/runtime/NEON/functions/NEReshapeLayer.cpp | 42 +++++++----- src/runtime/OMP/OMPScheduler.cpp | 2 +- tests/framework/instruments/SchedulerTimer.cpp | 2 +- tests/validation/NEON/ReshapeOperator.cpp | 78 ---------------------- 26 files changed, 281 insertions(+), 229 deletions(-) delete mode 100644 tests/validation/NEON/ReshapeOperator.cpp diff --git a/arm_compute/core/CPP/ICPPKernel.h b/arm_compute/core/CPP/ICPPKernel.h index 21f6ab714a..3ec54756a0 100644 --- a/arm_compute/core/CPP/ICPPKernel.h +++ b/arm_compute/core/CPP/ICPPKernel.h @@ -84,7 +84,7 @@ public: * @param[in] window Region on which to execute the kernel. (Must be a region of the window returned by window()) * @param[in] info Info about executing thread and CPU. */ - virtual void run_op(const std::vector &inputs, std::vector &outputs, const Window &window, const ThreadInfo &info) + virtual void run_op(const std::vector &inputs, const std::vector &outputs, const Window &window, const ThreadInfo &info) { ARM_COMPUTE_UNUSED(inputs, outputs, window, info); } diff --git a/arm_compute/core/NEON/kernels/NEActivationLayerKernel.h b/arm_compute/core/NEON/kernels/NEActivationLayerKernel.h index 82103b988b..399afa63c6 100644 --- a/arm_compute/core/NEON/kernels/NEActivationLayerKernel.h +++ b/arm_compute/core/NEON/kernels/NEActivationLayerKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 ARM Limited. + * Copyright (c) 2017-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -33,6 +33,7 @@ namespace arm_compute { +// Forward declarations class ITensor; /** Interface for the activation layer kernel. */ @@ -57,12 +58,12 @@ public: * * @note If the output tensor is a nullptr, the activation function will be performed in-place * - * @param[in, out] input Source tensor. In case of @p output tensor = nullptr, this tensor will store the result + * @param[in, out] input Source tensor info. In case of @p output tensor = nullptr, this tensor will store the result * of the activation function. Data types supported: QASYMM8/QASYMM8_SIGNED/QSYMM16/F16/F32. - * @param[out] output Destination tensor. Data type supported: same as @p input + * @param[out] output Destination tensor info. Data type supported: same as @p input * @param[in] activation_info Activation layer information. */ - void configure(ITensor *input, ITensor *output, ActivationLayerInfo activation_info); + void configure(const ITensorInfo *input, ITensorInfo *output, ActivationLayerInfo activation_info); /** Static function to check if given info will lead to a valid configuration of @ref NEActivationLayerKernel * * @param[in] input Source tensor info. In case of @p output tensor info = nullptr, this tensor will store the result @@ -75,7 +76,8 @@ public: static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ActivationLayerInfo &act_info); // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; + void run_op(const std::vector &inputs, const std::vector &outputs, + const Window &window, const ThreadInfo &info) override; private: using ActivationFunction = ActivationLayerInfo::ActivationFunction; @@ -83,36 +85,34 @@ private: * * @param[in] window Region on which to execute the kernel. */ - using ActivationFunctionExecutorPtr = void (NEActivationLayerKernel::*)(const Window &window); + using ActivationFunctionExecutorPtr = void (NEActivationLayerKernel::*)(const ITensor *src, ITensor *dst, const Window &window); /** Function to apply an activation function on a tensor. * * @param[in] window Region on which to execute the kernel */ template typename std::enable_if::value, void>::type - activation(const Window &window); + activation(const ITensor *src, ITensor *dst, const Window &window); /** Function to apply an activation function on a tensor. * * @param[in] window Region on which to execute the kernel */ template - typename std::enable_if::value, void>::type activation(const Window &window); + typename std::enable_if::value, void>::type activation(const ITensor *src, ITensor *dst, const Window &window); /** Function to apply an activation function on a tensor. * * @param[in] window Region on which to execute the kernel */ template - typename std::enable_if::value, void>::type activation(const Window &window); + typename std::enable_if::value, void>::type activation(const ITensor *src, ITensor *dst, const Window &window); /** Function to apply an activation function on a tensor. * * @param[in] window Region on which to execute the kernel */ template - typename std::enable_if::value, void>::type activation(const Window &window); + typename std::enable_if::value, void>::type activation(const ITensor *src, ITensor *dst, const Window &window); private: - ITensor *_input; - ITensor *_output; ActivationFunctionExecutorPtr _func; ActivationLayerInfo _act_info; }; diff --git a/arm_compute/core/NEON/kernels/NEReshapeLayerKernel.h b/arm_compute/core/NEON/kernels/NEReshapeLayerKernel.h index 6f888e0914..7a4dce128d 100644 --- a/arm_compute/core/NEON/kernels/NEReshapeLayerKernel.h +++ b/arm_compute/core/NEON/kernels/NEReshapeLayerKernel.h @@ -57,8 +57,8 @@ public: static Status validate(const ITensorInfo *input, const ITensorInfo *output); // Inherited methods overridden: - void run_op(const std::vector &inputs, std::vector &outputs, const Window &window, const ThreadInfo &info) override; + void run_op(const std::vector &inputs, const std::vector &outputs, + const Window &window, const ThreadInfo &info) override; }; - } // namespace arm_compute #endif /*ARM_COMPUTE_NERESHAPELAYERKERNEL_H */ diff --git a/arm_compute/core/experimental/Types.h b/arm_compute/core/experimental/Types.h index 6043db9ff4..2b5591872a 100644 --- a/arm_compute/core/experimental/Types.h +++ b/arm_compute/core/experimental/Types.h @@ -36,21 +36,43 @@ class ITensor; /** Memory type */ enum class TensorType { - ACL_SRC = 0, - ACL_SRC_0 = 0, - ACL_SRC_1 = 1, - ACL_SRC_2 = 2, - ACL_DST = 30, - ACL_DST_0 = 30, - ACL_DST_1 = 31, - ACL_INT = 50, - ACL_INT_0 = 50, - ACL_INT_1 = 51, - ACL_INT_2 = 52 + ACL_UNKNOWN = -1, + ACL_SRC = 0, + ACL_SRC_0 = 0, + ACL_SRC_1 = 1, + ACL_SRC_2 = 2, + ACL_DST = 30, + ACL_DST_0 = 30, + ACL_DST_1 = 31, + ACL_INT = 50, + ACL_INT_0 = 50, + ACL_INT_1 = 51, + ACL_INT_2 = 52 }; -using InputOperatorTensors = std::pair; -using OutputOperatorTensors = std::pair; -using OperatorTensors = OutputOperatorTensors; + +/** Input tensor aggregate */ +struct InputTensor +{ + InputTensor(TensorType type, const ITensor *tensor) + : type(type), tensor(tensor) + { + } + + TensorType type{ TensorType::ACL_UNKNOWN }; + const ITensor *tensor{ nullptr }; +}; +/** Output tensor aggregate */ +struct OutputTensor +{ + OutputTensor(TensorType type, ITensor *tensor) + : type(type), tensor(tensor) + { + } + + TensorType type{ TensorType::ACL_UNKNOWN }; + ITensor *tensor{ nullptr }; +}; +using OperatorTensor = OutputTensor; namespace experimental { diff --git a/arm_compute/runtime/CPP/CPPScheduler.h b/arm_compute/runtime/CPP/CPPScheduler.h index 78ad43c2b4..2ccb094fdb 100644 --- a/arm_compute/runtime/CPP/CPPScheduler.h +++ b/arm_compute/runtime/CPP/CPPScheduler.h @@ -77,7 +77,7 @@ public: * @param[in] inputs Vector that contains the input tensors. * @param[in] outputs Vector that contains the output tensors. */ - void schedule_op(ICPPKernel *kernel, const Hints &hints, std::vector &inputs, std::vector &outputs) override; + void schedule_op(ICPPKernel *kernel, const Hints &hints, const std::vector &inputs, const std::vector &outputs) override; protected: /** Will run the workloads in parallel using num_threads @@ -87,7 +87,7 @@ protected: void run_workloads(std::vector &workloads) override; private: - void schedule_common(ICPPKernel *kernel, const Hints &hints, std::vector &inputs, std::vector &outputs); + void schedule_common(ICPPKernel *kernel, const Hints &hints, const std::vector &inputs, const std::vector &outputs); struct Impl; std::unique_ptr _impl; }; diff --git a/arm_compute/runtime/IOperator.h b/arm_compute/runtime/IOperator.h index 110c935702..cf3c8b05a1 100644 --- a/arm_compute/runtime/IOperator.h +++ b/arm_compute/runtime/IOperator.h @@ -46,7 +46,7 @@ public: * @param[in] workspace Vector that contains the workspace tensors. * */ - virtual void run(std::vector &inputs, std::vector &outputs, std::vector &workspace) = 0; + virtual void run(std::vector inputs, std::vector outputs, std::vector workspace) = 0; /** Prepare the function for executing * * Any one off pre-processing step required by the function is handled here @@ -55,7 +55,7 @@ public: * * @note Prepare stage might not need all the function's buffers' backing memory to be available in order to execute */ - virtual void prepare(std::vector constants) = 0; + virtual void prepare(std::vector constants) = 0; /** Return the memory requirements required by the workspace */ diff --git a/arm_compute/runtime/IScheduler.h b/arm_compute/runtime/IScheduler.h index 02d0cef086..40da86fd10 100644 --- a/arm_compute/runtime/IScheduler.h +++ b/arm_compute/runtime/IScheduler.h @@ -157,7 +157,7 @@ public: * @param[in] inputs Vector containing the input tensors. * @param[in] outputs Vector containing the output tensors. */ - virtual void schedule_op(ICPPKernel *kernel, const Hints &hints, std::vector &inputs, std::vector &outputs) = 0; + virtual void schedule_op(ICPPKernel *kernel, const Hints &hints, const std::vector &inputs, const std::vector &outputs) = 0; /** Execute all the passed workloads * diff --git a/arm_compute/runtime/NEON/INEOperator.h b/arm_compute/runtime/NEON/INEOperator.h index 4467e6d5ab..2f6e18048d 100644 --- a/arm_compute/runtime/NEON/INEOperator.h +++ b/arm_compute/runtime/NEON/INEOperator.h @@ -54,8 +54,8 @@ public: INEOperator &operator=(INEOperator &&) = default; // Inherited methods overridden: - void run(std::vector &inputs, std::vector &outputs, std::vector &workspace) override final; - void prepare(std::vector constants) override final; + void run(std::vector inputs, std::vector outputs, std::vector workspace) override final; + void prepare(std::vector constants) override final; protected: std::unique_ptr _kernel; diff --git a/arm_compute/runtime/NEON/functions/NEActivationLayer.h b/arm_compute/runtime/NEON/functions/NEActivationLayer.h index 95901dc2d8..8ac2dae911 100644 --- a/arm_compute/runtime/NEON/functions/NEActivationLayer.h +++ b/arm_compute/runtime/NEON/functions/NEActivationLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 ARM Limited. + * Copyright (c) 2017-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -27,6 +27,7 @@ #include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h" #include "arm_compute/core/Types.h" +#include "arm_compute/runtime/NEON/INEOperator.h" namespace arm_compute { @@ -37,7 +38,7 @@ class ITensor; * * @note The function simulates an activation layer with the specified activation function. */ -class NEActivationLayer : public INESimpleFunctionNoBorder +class NEActivationLayer : public IFunction { public: /** Constructor @@ -45,14 +46,16 @@ public: * @param[in] ctx Runtime context to be used by the function */ NEActivationLayer(IRuntimeContext *ctx = nullptr); + /** Destructor */ + ~NEActivationLayer(); /** Prevent instances of this class from being copied (As this class contains pointers) */ NEActivationLayer(const NEActivationLayer &) = delete; /** Default move constructor */ - NEActivationLayer(NEActivationLayer &&) = default; + NEActivationLayer(NEActivationLayer &&); /** Prevent instances of this class from being copied (As this class contains pointers) */ NEActivationLayer &operator=(const NEActivationLayer &) = delete; /** Default move assignment operator */ - NEActivationLayer &operator=(NEActivationLayer &&) = default; + NEActivationLayer &operator=(NEActivationLayer &&); /** [NEActivationLayer snippet] **/ /** Set the input and output tensor. * @@ -75,6 +78,41 @@ public: * @return a status */ static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ActivationLayerInfo &act_info); + + // Inherited methods overridden + void run() override; + +private: + struct Impl; + std::unique_ptr _impl; +}; + +namespace experimental +{ +/** Basic function to run @ref NEActivationLayerKernel */ +class NEActivationLayer : public INEOperator +{ +public: + /** Set the input and output tensor. + * + * @param[in] input Source tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/QSYMM16/F16/F32. + * @param[out] output Destination tensor info. Data type supported: same as @p input + * @param[in] activation_info Activation layer parameters. + */ + void configure(const ITensorInfo *input, ITensorInfo *output, const ActivationLayerInfo &activation_info); + /** Static function to check if given info will lead to a valid configuration of @ref NEActivationLayer + * + * @param[in] input Source tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/QSYMM16/F16/F32. + * @param[in] output Destination tensor info. Data type supported: same as @p input + * @param[in] act_info Activation layer information. + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ActivationLayerInfo &act_info); + + // Inherited methods overridden: + MemoryRequirements workspace() const override; }; +} // namespace experimental } // namespace arm_compute #endif /* ARM_COMPUTE_NEACTIVATIONLAYER_H */ diff --git a/arm_compute/runtime/NEON/functions/NELSTMLayer.h b/arm_compute/runtime/NEON/functions/NELSTMLayer.h index e85e87b88e..64845115b8 100644 --- a/arm_compute/runtime/NEON/functions/NELSTMLayer.h +++ b/arm_compute/runtime/NEON/functions/NELSTMLayer.h @@ -151,28 +151,28 @@ private: NEArithmeticAddition _accum_input_gate1; NEArithmeticSubtractionKernel _subtract_input_gate; NEPixelWiseMultiplicationKernel _pixelwise_mul_input_gate; - NEActivationLayerKernel _activation_input_gate; + NEActivationLayer _activation_input_gate; NEFullyConnectedLayer _fully_connected_forget_gate; NEArithmeticAddition _accum_forget_gate1; NEPixelWiseMultiplicationKernel _pixelwise_mul_forget_gate; - NEActivationLayerKernel _activation_forget_gate; + NEActivationLayer _activation_forget_gate; NEFullyConnectedLayer _fully_connected_cell_state; NEGEMM _gemm_cell_state1; NETransposeKernel _transpose_cell_state; NEArithmeticAdditionKernel _accum_cell_state1; NEArithmeticAdditionKernel _accum_cell_state2; NEPixelWiseMultiplicationKernel _pixelwise_mul_cell_state1; - NEActivationLayerKernel _activation_cell_state; - NEActivationLayerKernel _cell_clip; + NEActivationLayer _activation_cell_state; + NEActivationLayer _cell_clip; NEPixelWiseMultiplicationKernel _pixelwise_mul_cell_state2; NEFullyConnectedLayer _fully_connected_output; NEPixelWiseMultiplicationKernel _pixelwise_mul_output_state1; NEArithmeticAddition _accum_output1; - NEActivationLayerKernel _activation_output; - NEActivationLayerKernel _activation_output_state; + NEActivationLayer _activation_output; + NEActivationLayer _activation_output_state; NEPixelWiseMultiplicationKernel _pixelwise_mul_output_state2; NEFullyConnectedLayer _fully_connected_output_state; - NEActivationLayerKernel _projection_clip; + NEActivationLayer _projection_clip; NECopyKernel _copy_cell_state; NECopyKernel _copy_output; NEConcatenateLayer _concat_scratch_buffer; diff --git a/arm_compute/runtime/NEON/functions/NERNNLayer.h b/arm_compute/runtime/NEON/functions/NERNNLayer.h index 0bfb905e19..db4134fd2d 100644 --- a/arm_compute/runtime/NEON/functions/NERNNLayer.h +++ b/arm_compute/runtime/NEON/functions/NERNNLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019 ARM Limited. + * Copyright (c) 2018-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -24,11 +24,11 @@ #ifndef ARM_COMPUTE_NERNNLAYER_H #define ARM_COMPUTE_NERNNLAYER_H -#include "arm_compute/core/NEON/kernels/NEActivationLayerKernel.h" #include "arm_compute/core/NEON/kernels/NEArithmeticAdditionKernel.h" #include "arm_compute/core/NEON/kernels/NECopyKernel.h" #include "arm_compute/core/Types.h" +#include "arm_compute/runtime/NEON/functions/NEActivationLayer.h" #include "arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h" #include "arm_compute/runtime/NEON/functions/NEGEMM.h" @@ -85,7 +85,7 @@ private: MemoryGroup _memory_group; NEGEMM _gemm_state_f; NEArithmeticAdditionKernel _add_kernel; - NEActivationLayerKernel _activation_kernel; + NEActivationLayer _activation; NEFullyConnectedLayer _fully_connected; NECopyKernel _copy_kernel; Tensor _fully_connected_out; diff --git a/arm_compute/runtime/NEON/functions/NEReshapeLayer.h b/arm_compute/runtime/NEON/functions/NEReshapeLayer.h index 5a296a776d..43605584e0 100644 --- a/arm_compute/runtime/NEON/functions/NEReshapeLayer.h +++ b/arm_compute/runtime/NEON/functions/NEReshapeLayer.h @@ -39,6 +39,18 @@ class ITensor; class NEReshapeLayer : public IFunction { public: + /** Default Constructor */ + NEReshapeLayer(); + /** Default Destructor */ + ~NEReshapeLayer(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEReshapeLayer(const NEReshapeLayer &) = delete; + /** Default move constructor */ + NEReshapeLayer(NEReshapeLayer &&); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEReshapeLayer &operator=(const NEReshapeLayer &) = delete; + /** Default move assignment operator */ + NEReshapeLayer &operator=(NEReshapeLayer &&); /** Initialise the kernel's inputs and outputs * * @param[in] input Input tensor. Data type supported: All @@ -59,12 +71,8 @@ public: void run() override; private: - const ITensor *_input - { - nullptr - }; - ITensor *_output{ nullptr }; - std::unique_ptr _kernel{ nullptr }; + struct Impl; + std::unique_ptr _impl; }; namespace experimental diff --git a/arm_compute/runtime/OMP/OMPScheduler.h b/arm_compute/runtime/OMP/OMPScheduler.h index 8ed1705a97..b7c186a838 100644 --- a/arm_compute/runtime/OMP/OMPScheduler.h +++ b/arm_compute/runtime/OMP/OMPScheduler.h @@ -66,7 +66,7 @@ public: * @param[in] inputs Vector containing the input tensors. * @param[in] outputs Vector containing the output tensors. */ - void schedule_op(ICPPKernel *kernel, const Hints &hints, std::vector &inputs, std::vector &outputs) override; + void schedule_op(ICPPKernel *kernel, const Hints &hints, const std::vector &inputs, const std::vector &outputs) override; protected: /** Execute all the passed workloads diff --git a/arm_compute/runtime/SingleThreadScheduler.h b/arm_compute/runtime/SingleThreadScheduler.h index 8094758249..8a69a5be15 100644 --- a/arm_compute/runtime/SingleThreadScheduler.h +++ b/arm_compute/runtime/SingleThreadScheduler.h @@ -57,7 +57,7 @@ public: * @param[in] inputs Vector containing the input tensors. * @param[in] outputs Vector containing the output tensors. */ - void schedule_op(ICPPKernel *kernel, const Hints &hints, std::vector &inputs, std::vector &outputs) override; + void schedule_op(ICPPKernel *kernel, const Hints &hints, const std::vector &inputs, const std::vector &outputs) override; protected: /** Will run the workloads sequentially and in order. diff --git a/src/core/NEON/kernels/NEActivationLayerKernel.cpp b/src/core/NEON/kernels/NEActivationLayerKernel.cpp index ffbfd710f9..2c00a76305 100644 --- a/src/core/NEON/kernels/NEActivationLayerKernel.cpp +++ b/src/core/NEON/kernels/NEActivationLayerKernel.cpp @@ -95,7 +95,7 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, c return Status{}; } -std::pair validate_and_configure_window(ITensorInfo *input, ITensorInfo *output) +std::pair validate_and_configure_window(const ITensorInfo *input, ITensorInfo *output) { // Configure kernel window Window win = calculate_max_window(*input, Steps()); @@ -116,23 +116,15 @@ std::pair validate_and_configure_window(ITensorInfo *input, ITen } // namespace NEActivationLayerKernel::NEActivationLayerKernel() - : _input(nullptr), _output(nullptr), _func(nullptr), _act_info() + : _func(nullptr), _act_info() { } -void NEActivationLayerKernel::configure(ITensor *input, ITensor *output, ActivationLayerInfo activation_info) +void NEActivationLayerKernel::configure(const ITensorInfo *input, ITensorInfo *output, ActivationLayerInfo activation_info) { - ARM_COMPUTE_ERROR_ON_NULLPTR(input); + ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); - _input = input; _act_info = activation_info; - _output = input; - - // Out-of-place calculation - if(output != nullptr) - { - _output = output; - } // Disabled activation, thus no operation needed if(!activation_info.enabled()) @@ -140,7 +132,7 @@ void NEActivationLayerKernel::configure(ITensor *input, ITensor *output, Activat _func = nullptr; } - ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), (output != nullptr) ? output->info() : nullptr, activation_info)); + ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input, output, activation_info)); // Activation functions : FP32 static std::map act_map_f32 = @@ -218,7 +210,7 @@ void NEActivationLayerKernel::configure(ITensor *input, ITensor *output, Activat }; - switch(input->info()->data_type()) + switch(input->data_type()) { case DataType::QASYMM8_SIGNED: _func = act_map_qasymm8_signed[activation_info.activation()]; @@ -242,14 +234,14 @@ void NEActivationLayerKernel::configure(ITensor *input, ITensor *output, Activat } // Configure kernel window - auto win_config = validate_and_configure_window(input->info(), (output != nullptr) ? output->info() : nullptr); + auto win_config = validate_and_configure_window(input, output); ARM_COMPUTE_ERROR_THROW_ON(win_config.first); ICPPKernel::configure(win_config.second); } template typename std::enable_if::value, void>::type -NEActivationLayerKernel::activation(const Window &window) +NEActivationLayerKernel::activation(const ITensor *src, ITensor *dst, const Window &window) { /** NEON vector tag type. */ using ExactTagType = typename wrapper::traits::neon_bitvector_tag_t; @@ -262,16 +254,16 @@ NEActivationLayerKernel::activation(const Window &window) Window win_collapsed = window.collapse_if_possible(window, Window::DimZ); win_collapsed.set(Window::DimX, Window::Dimension(0, 1, 1)); - Iterator input(_input, win_collapsed); - Iterator output(_output, win_collapsed); + Iterator input(src, win_collapsed); + Iterator output(dst, win_collapsed); // A small delta added to the input to prevent NAN values caused by zeros in inputs to SQRT #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC - const auto delta = wrapper::vdup_n(static_cast(1e-7), ExactTagType{}); + const auto delta = wrapper::vdup_n(static_cast(1e-7), ExactTagType {}); #else /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ - const auto delta = wrapper::vdup_n(static_cast(1e-24), ExactTagType{}); + const auto delta = wrapper::vdup_n(static_cast(1e-24), ExactTagType {}); #endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ - const auto const_1 = wrapper::vdup_n(static_cast(1.f), ExactTagType{}); + const auto const_1 = wrapper::vdup_n(static_cast(1.f), ExactTagType {}); const auto const_0 = wrapper::vdup_n(static_cast(0.f), ExactTagType{}); const auto const_6 = wrapper::vdup_n(static_cast(6.f), ExactTagType{}); const auto const_3 = wrapper::vdup_n(static_cast(3.f), ExactTagType{}); @@ -402,7 +394,7 @@ NEActivationLayerKernel::activation(const Window &window) } template -typename std::enable_if::value, void>::type NEActivationLayerKernel::activation(const Window &window) +typename std::enable_if::value, void>::type NEActivationLayerKernel::activation(const ITensor *src, ITensor *dst, const Window &window) { const int window_step_x = 16 / sizeof(T); const auto window_start_x = static_cast(window.x().start()); @@ -412,11 +404,11 @@ typename std::enable_if::value, void>::type NEActivat Window win_collapsed = window.collapse_if_possible(window, Window::DimZ); win_collapsed.set(Window::DimX, Window::Dimension(0, 1, 1)); - Iterator input(_input, win_collapsed); - Iterator output(_output, win_collapsed); + Iterator input(src, win_collapsed); + Iterator output(dst, win_collapsed); - const UniformQuantizationInfo qi_in = _input->info()->quantization_info().uniform(); - const UniformQuantizationInfo qi_out = _output->info()->quantization_info().uniform(); + const UniformQuantizationInfo qi_in = src->info()->quantization_info().uniform(); + const UniformQuantizationInfo qi_out = dst->info()->quantization_info().uniform(); const qasymm8x16_t va = vdupq_n_u8(quantize_qasymm8(_act_info.a(), qi_in)); const qasymm8x16_t vb = vdupq_n_u8(quantize_qasymm8(_act_info.b(), qi_in)); const qasymm8_t a = quantize_qasymm8(_act_info.a(), qi_in); @@ -579,7 +571,7 @@ typename std::enable_if::value, void>::type NEActivat } template -typename std::enable_if::value, void>::type NEActivationLayerKernel::activation(const Window &window) +typename std::enable_if::value, void>::type NEActivationLayerKernel::activation(const ITensor *src, ITensor *dst, const Window &window) { const int window_step_x = 16 / sizeof(T); const auto window_start_x = static_cast(window.x().start()); @@ -589,11 +581,11 @@ typename std::enable_if::value, void>::type NE Window win_collapsed = window.collapse_if_possible(window, Window::DimZ); win_collapsed.set(Window::DimX, Window::Dimension(0, 1, 1)); - Iterator input(_input, win_collapsed); - Iterator output(_output, win_collapsed); + Iterator input(src, win_collapsed); + Iterator output(dst, win_collapsed); - const UniformQuantizationInfo qi_in = _input->info()->quantization_info().uniform(); - const UniformQuantizationInfo qi_out = _output->info()->quantization_info().uniform(); + const UniformQuantizationInfo qi_in = src->info()->quantization_info().uniform(); + const UniformQuantizationInfo qi_out = dst->info()->quantization_info().uniform(); const qasymm8x16_signed_t va = vdupq_n_s8(quantize_qasymm8_signed(_act_info.a(), qi_in)); const qasymm8x16_signed_t vb = vdupq_n_s8(quantize_qasymm8_signed(_act_info.b(), qi_in)); const qasymm8_signed_t a = quantize_qasymm8_signed(_act_info.a(), qi_in); @@ -756,7 +748,7 @@ typename std::enable_if::value, void>::type NE } template -typename std::enable_if::value, void>::type NEActivationLayerKernel::activation(const Window &window) +typename std::enable_if::value, void>::type NEActivationLayerKernel::activation(const ITensor *src, ITensor *dst, const Window &window) { const int window_step_x = 16 / sizeof(T); const auto window_start_x = static_cast(window.x().start()); @@ -766,11 +758,11 @@ typename std::enable_if::value, void>::type NEActivat Window win_collapsed = window.collapse_if_possible(window, Window::DimZ); win_collapsed.set(Window::DimX, Window::Dimension(0, 1, 1)); - Iterator input(_input, win_collapsed); - Iterator output(_output, win_collapsed); + Iterator input(src, win_collapsed); + Iterator output(dst, win_collapsed); - const UniformQuantizationInfo qi_in = _input->info()->quantization_info().uniform(); - const UniformQuantizationInfo qi_out = _output->info()->quantization_info().uniform(); + const UniformQuantizationInfo qi_in = src->info()->quantization_info().uniform(); + const UniformQuantizationInfo qi_out = dst->info()->quantization_info().uniform(); const auto vconst_1 = vdupq_n_f32(1.f); const float32x4_t va_f32 = vdupq_n_f32(_act_info.a()); const float32x4_t vb_f32 = vdupq_n_f32(_act_info.b()); @@ -863,7 +855,9 @@ Status NEActivationLayerKernel::validate(const ITensorInfo *input, const ITensor return Status{}; } -void NEActivationLayerKernel::run(const Window &window, const ThreadInfo &info) +void NEActivationLayerKernel::run_op(const std::vector &inputs, + const std::vector &outputs, + const Window &window, const ThreadInfo &info) { // Early exit on disabled activation if(!_act_info.enabled()) @@ -876,5 +870,7 @@ void NEActivationLayerKernel::run(const Window &window, const ThreadInfo &info) ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window); ARM_COMPUTE_ERROR_ON(_func == nullptr); - (this->*_func)(window); + ARM_COMPUTE_ERROR_ON(inputs.empty() || outputs.empty()); + + (this->*_func)(inputs[0].tensor, outputs[0].tensor, window); } diff --git a/src/core/NEON/kernels/NEReshapeLayerKernel.cpp b/src/core/NEON/kernels/NEReshapeLayerKernel.cpp index 600f8f9bf1..c141eecf75 100644 --- a/src/core/NEON/kernels/NEReshapeLayerKernel.cpp +++ b/src/core/NEON/kernels/NEReshapeLayerKernel.cpp @@ -86,29 +86,32 @@ void NEReshapeLayerKernel::configure(const ITensorInfo *input, ITensorInfo *outp INEKernel::configure(win); } -void NEReshapeLayerKernel::run_op(const std::vector &inputs, std::vector &outputs, const Window &window, const ThreadInfo &info) +void NEReshapeLayerKernel::run_op(const std::vector &inputs, const std::vector &outputs, const Window &window, const ThreadInfo &info) { ARM_COMPUTE_UNUSED(info); ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window); - switch(inputs[0]->second->info()->data_type()) + const auto src = inputs[0].tensor; + auto dst = outputs[0].tensor; + + switch(src->info()->data_type()) { case DataType::U8: case DataType::S8: case DataType::QASYMM8: case DataType::QASYMM8_SIGNED: - reshape_tensor(window, inputs[0]->second, outputs[0]->second); + reshape_tensor(window, src, dst); break; case DataType::U16: case DataType::S16: case DataType::F16: - reshape_tensor(window, inputs[0]->second, outputs[0]->second); + reshape_tensor(window, src, dst); break; case DataType::U32: case DataType::S32: case DataType::F32: - reshape_tensor(window, inputs[0]->second, outputs[0]->second); + reshape_tensor(window, src, dst); break; default: ARM_COMPUTE_ERROR("Unsupported data type!"); diff --git a/src/runtime/CPP/CPPScheduler.cpp b/src/runtime/CPP/CPPScheduler.cpp index db551590ea..41e1a2d647 100644 --- a/src/runtime/CPP/CPPScheduler.cpp +++ b/src/runtime/CPP/CPPScheduler.cpp @@ -363,7 +363,7 @@ void CPPScheduler::run_workloads(std::vector &workloads) } #endif /* DOXYGEN_SKIP_THIS */ -void CPPScheduler::schedule_common(ICPPKernel *kernel, const Hints &hints, std::vector &inputs, std::vector &outputs) +void CPPScheduler::schedule_common(ICPPKernel *kernel, const Hints &hints, const std::vector &inputs, const std::vector &outputs) { ARM_COMPUTE_ERROR_ON_MSG(!kernel, "The child class didn't set the kernel"); @@ -473,15 +473,15 @@ void CPPScheduler::schedule_common(ICPPKernel *kernel, const Hints &hints, std:: } } -void CPPScheduler::schedule_op(ICPPKernel *kernel, const Hints &hints, std::vector &inputs, std::vector &outputs) +void CPPScheduler::schedule_op(ICPPKernel *kernel, const Hints &hints, const std::vector &inputs, const std::vector &outputs) { schedule_common(kernel, hints, inputs, outputs); } void CPPScheduler::schedule(ICPPKernel *kernel, const Hints &hints) { - std::vector inputs; - std::vector outputs; + const std::vector inputs; + std::vector outputs; schedule_common(kernel, hints, inputs, outputs); } } // namespace arm_compute diff --git a/src/runtime/CPP/SingleThreadScheduler.cpp b/src/runtime/CPP/SingleThreadScheduler.cpp index 777f84bec8..8257628090 100644 --- a/src/runtime/CPP/SingleThreadScheduler.cpp +++ b/src/runtime/CPP/SingleThreadScheduler.cpp @@ -49,7 +49,7 @@ void SingleThreadScheduler::schedule(ICPPKernel *kernel, const Hints &hints) kernel->run(kernel->window(), info); } -void SingleThreadScheduler::schedule_op(ICPPKernel *kernel, const Hints &hints, std::vector &inputs, std::vector &outputs) +void SingleThreadScheduler::schedule_op(ICPPKernel *kernel, const Hints &hints, const std::vector &inputs, const std::vector &outputs) { ARM_COMPUTE_UNUSED(hints); ThreadInfo info; diff --git a/src/runtime/NEON/INEOperator.cpp b/src/runtime/NEON/INEOperator.cpp index c24d5c47f1..78790856ee 100644 --- a/src/runtime/NEON/INEOperator.cpp +++ b/src/runtime/NEON/INEOperator.cpp @@ -33,7 +33,7 @@ INEOperator::INEOperator(IRuntimeContext *ctx) { } -void INEOperator::run(std::vector &inputs, std::vector &outputs, std::vector &workspace) +void INEOperator::run(std::vector inputs, std::vector outputs, std::vector workspace) { ARM_COMPUTE_UNUSED(workspace); @@ -45,7 +45,7 @@ void INEOperator::run(std::vector &inputs, std::vector constants) +void INEOperator::prepare(std::vector constants) { ARM_COMPUTE_UNUSED(constants); } diff --git a/src/runtime/NEON/functions/NEActivationLayer.cpp b/src/runtime/NEON/functions/NEActivationLayer.cpp index e4d1125c79..889ff6b1f4 100644 --- a/src/runtime/NEON/functions/NEActivationLayer.cpp +++ b/src/runtime/NEON/functions/NEActivationLayer.cpp @@ -23,25 +23,76 @@ */ #include "arm_compute/runtime/NEON/functions/NEActivationLayer.h" +#include "arm_compute/core/Error.h" #include "arm_compute/core/NEON/kernels/NEActivationLayerKernel.h" +#include "arm_compute/core/experimental/Types.h" #include "arm_compute/runtime/IRuntimeContext.h" +#include "arm_compute/runtime/Tensor.h" #include "support/MemorySupport.h" namespace arm_compute { -NEActivationLayer::NEActivationLayer(IRuntimeContext *ctx) // NOLINT - : INESimpleFunctionNoBorder(ctx) +namespace experimental { -} -void NEActivationLayer::configure(ITensor *input, ITensor *output, ActivationLayerInfo activation_info) +void NEActivationLayer::configure(const ITensorInfo *input, ITensorInfo *output, const ActivationLayerInfo &activation_info) { auto k = arm_compute::support::cpp14::make_unique(); k->configure(input, output, activation_info); _kernel = std::move(k); } +Status NEActivationLayer::validate(const ITensorInfo *input, const ITensorInfo *output, const ActivationLayerInfo &activation_info) +{ + return NEActivationLayerKernel::validate(input, output, activation_info); +} + +MemoryRequirements NEActivationLayer::workspace() const +{ + return MemoryRequirements{}; +} +} // namespace experimental + +struct NEActivationLayer::Impl +{ + const ITensor *src{ nullptr }; + ITensor *dst{ nullptr }; + IRuntimeContext *ctx{ nullptr }; + std::unique_ptr op{ nullptr }; +}; + +NEActivationLayer::NEActivationLayer(IRuntimeContext *ctx) + : _impl(support::cpp14::make_unique()) +{ + _impl->ctx = ctx; +} + +NEActivationLayer::NEActivationLayer(NEActivationLayer &&) = default; + +NEActivationLayer &NEActivationLayer::operator=(NEActivationLayer &&) = default; + +NEActivationLayer::~NEActivationLayer() = default; + +void NEActivationLayer::configure(ITensor *input, ITensor *output, ActivationLayerInfo activation_info) +{ + ARM_COMPUTE_ERROR_ON_NULLPTR(input); + + _impl->src = input; + _impl->dst = output == nullptr ? input : output; + + _impl->op = arm_compute::support::cpp14::make_unique(); + _impl->op->configure(_impl->src->info(), _impl->dst->info(), activation_info); +} + Status NEActivationLayer::validate(const ITensorInfo *input, const ITensorInfo *output, const ActivationLayerInfo &act_info) { - return NEActivationLayerKernel::validate(input, output, act_info); + return experimental::NEActivationLayer::validate(input, output, act_info); +} + +void NEActivationLayer::run() +{ + const InputTensor src{ TensorType::ACL_SRC, _impl->src }; + OutputTensor dst{ TensorType::ACL_DST, _impl->dst }; + + _impl->op->run({ src }, { dst }, {}); } } // namespace arm_compute diff --git a/src/runtime/NEON/functions/NELSTMLayer.cpp b/src/runtime/NEON/functions/NELSTMLayer.cpp index f9d445fe71..0a111363e3 100644 --- a/src/runtime/NEON/functions/NELSTMLayer.cpp +++ b/src/runtime/NEON/functions/NELSTMLayer.cpp @@ -474,7 +474,7 @@ Status NELSTMLayer::validate(const ITensorInfo *input, RoundingPolicy::TO_ZERO)); ARM_COMPUTE_RETURN_ON_ERROR(NEArithmeticAddition::validate(&forget_gate, forget_gate_bias, &forget_gate, ConvertPolicy::SATURATE)); } - ARM_COMPUTE_RETURN_ON_ERROR(NEActivationLayerKernel::validate(&forget_gate, &forget_gate, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LOGISTIC))); + ARM_COMPUTE_RETURN_ON_ERROR(NEActivationLayer::validate(&forget_gate, &forget_gate, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LOGISTIC))); // Validate input gate if(!lstm_params.has_cifg_opt()) @@ -508,7 +508,7 @@ Status NELSTMLayer::validate(const ITensorInfo *input, ARM_COMPUTE_RETURN_ON_ERROR(NEPixelWiseMultiplicationKernel::validate(&input_gate, lstm_params.input_layer_norm_weights(), &input_gate, 1, ConvertPolicy::SATURATE, RoundingPolicy::TO_ZERO)); ARM_COMPUTE_RETURN_ON_ERROR(NEArithmeticAddition::validate(&input_gate, lstm_params.input_gate_bias(), &input_gate, ConvertPolicy::SATURATE)); } - ARM_COMPUTE_RETURN_ON_ERROR(NEActivationLayerKernel::validate(&input_gate, nullptr, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LOGISTIC))); + ARM_COMPUTE_RETURN_ON_ERROR(NEActivationLayer::validate(&input_gate, nullptr, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LOGISTIC))); } else { @@ -526,14 +526,14 @@ Status NELSTMLayer::validate(const ITensorInfo *input, RoundingPolicy::TO_ZERO)); ARM_COMPUTE_RETURN_ON_ERROR(NEArithmeticAddition::validate(&cell_state_tmp, cell_bias, &cell_state_tmp, ConvertPolicy::SATURATE)); } - ARM_COMPUTE_RETURN_ON_ERROR(NEActivationLayerKernel::validate(&cell_state_tmp, nullptr, activation_info)); + ARM_COMPUTE_RETURN_ON_ERROR(NEActivationLayer::validate(&cell_state_tmp, nullptr, activation_info)); ARM_COMPUTE_RETURN_ON_ERROR(NEPixelWiseMultiplicationKernel::validate(&cell_state_tmp, &input_gate, &cell_state_tmp, 1, ConvertPolicy::SATURATE, RoundingPolicy::TO_ZERO)); ARM_COMPUTE_RETURN_ON_ERROR(NEPixelWiseMultiplicationKernel::validate(&cell_state_tmp, &forget_gate, &cell_state_tmp, 1, ConvertPolicy::SATURATE, RoundingPolicy::TO_ZERO)); ARM_COMPUTE_RETURN_ON_ERROR(NEArithmeticAddition::validate(&cell_state_tmp, &cell_state_tmp, &cell_state_tmp, ConvertPolicy::SATURATE)); if(cell_threshold != 0.f) { - ARM_COMPUTE_RETURN_ON_ERROR(NEActivationLayerKernel::validate(&cell_state_tmp, nullptr, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, -cell_threshold, - cell_threshold))); + ARM_COMPUTE_RETURN_ON_ERROR(NEActivationLayer::validate(&cell_state_tmp, nullptr, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, -cell_threshold, + cell_threshold))); } // Validate output gate tmp @@ -559,18 +559,18 @@ Status NELSTMLayer::validate(const ITensorInfo *input, RoundingPolicy::TO_ZERO)); ARM_COMPUTE_RETURN_ON_ERROR(NEArithmeticAddition::validate(&output_gate_tmp, output_gate_bias, &output_gate_tmp, ConvertPolicy::SATURATE)); } - ARM_COMPUTE_RETURN_ON_ERROR(NEActivationLayerKernel::validate(&output_gate_tmp, nullptr, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LOGISTIC))); + ARM_COMPUTE_RETURN_ON_ERROR(NEActivationLayer::validate(&output_gate_tmp, nullptr, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LOGISTIC))); // Validate output state - ARM_COMPUTE_RETURN_ON_ERROR(NEActivationLayerKernel::validate(&cell_state_tmp, &cell_state_tmp, activation_info)); + ARM_COMPUTE_RETURN_ON_ERROR(NEActivationLayer::validate(&cell_state_tmp, &cell_state_tmp, activation_info)); ARM_COMPUTE_RETURN_ON_ERROR(NEPixelWiseMultiplicationKernel::validate(&cell_state_tmp, &output_gate_tmp, &output_gate_tmp, 1, ConvertPolicy::SATURATE, RoundingPolicy::TO_ZERO)); if(lstm_params.has_projection()) { ARM_COMPUTE_RETURN_ON_ERROR(NEFullyConnectedLayer::validate(&output_gate_tmp, lstm_params.projection_weights(), lstm_params.projection_bias(), output_state_out)); if(projection_threshold != 0.f) { - ARM_COMPUTE_RETURN_ON_ERROR(NEActivationLayerKernel::validate(output_state_out, output_state_out, - ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, -projection_threshold, projection_threshold))); + ARM_COMPUTE_RETURN_ON_ERROR(NEActivationLayer::validate(output_state_out, output_state_out, + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, -projection_threshold, projection_threshold))); } } @@ -612,7 +612,7 @@ void NELSTMLayer::run() NEScheduler::get().schedule(&_pixelwise_mul_forget_gate_coeff, Window::DimY); NEScheduler::get().schedule(&_accum_forget_gate_bias, Window::DimY); } - NEScheduler::get().schedule(&_activation_forget_gate, Window::DimY); + _activation_forget_gate.run(); if(_run_cifg_opt) { @@ -642,7 +642,7 @@ void NELSTMLayer::run() NEScheduler::get().schedule(&_pixelwise_mul_input_gate_coeff, Window::DimY); NEScheduler::get().schedule(&_accum_input_gate_bias, Window::DimY); } - NEScheduler::get().schedule(&_activation_input_gate, Window::DimY); + _activation_input_gate.run(); } _fully_connected_cell_state.run(); @@ -655,14 +655,14 @@ void NELSTMLayer::run() NEScheduler::get().schedule(&_pixelwise_mul_cell_gate_coeff, Window::DimY); NEScheduler::get().schedule(&_accum_cell_gate_bias, Window::DimY); } - NEScheduler::get().schedule(&_activation_cell_state, Window::DimY); + _activation_cell_state.run(); NEScheduler::get().schedule(&_pixelwise_mul_cell_state1, Window::DimY); NEScheduler::get().schedule(&_pixelwise_mul_cell_state2, Window::DimY); NEScheduler::get().schedule(&_accum_cell_state2, Window::DimY); if(_perform_cell_clipping) { - NEScheduler::get().schedule(&_cell_clip, Window::DimY); + _cell_clip.run(); } _fully_connected_output.run(); @@ -677,9 +677,9 @@ void NELSTMLayer::run() NEScheduler::get().schedule(&_pixelwise_mul_output_gate_coeff, Window::DimY); NEScheduler::get().schedule(&_accum_output_gate_bias, Window::DimY); } - NEScheduler::get().schedule(&_activation_output, Window::DimY); + _activation_output.run(); - NEScheduler::get().schedule(&_activation_output_state, Window::DimY); + _activation_output_state.run(); NEScheduler::get().schedule(&_pixelwise_mul_output_state2, Window::DimY); if(_has_projection_weights) @@ -687,7 +687,7 @@ void NELSTMLayer::run() _fully_connected_output_state.run(); if(_perform_projection_clipping) { - NEScheduler::get().schedule(&_projection_clip, Window::DimY); + _projection_clip.run(); } } diff --git a/src/runtime/NEON/functions/NERNNLayer.cpp b/src/runtime/NEON/functions/NERNNLayer.cpp index 154b060c3d..4a15777be9 100644 --- a/src/runtime/NEON/functions/NERNNLayer.cpp +++ b/src/runtime/NEON/functions/NERNNLayer.cpp @@ -34,8 +34,8 @@ namespace arm_compute { NERNNLayer::NERNNLayer(std::shared_ptr memory_manager) - : _memory_group(std::move(memory_manager)), _gemm_state_f(), _add_kernel(), _activation_kernel(), _fully_connected(memory_manager), _copy_kernel(), _fully_connected_out(), _gemm_output(), - _add_output(), _is_prepared(false) + : _memory_group(std::move(memory_manager)), _gemm_state_f(), _add_kernel(), _activation(), _fully_connected(memory_manager), _copy_kernel(), _fully_connected_out(), _gemm_output(), _add_output(), + _is_prepared(false) { } @@ -60,7 +60,7 @@ Status NERNNLayer::validate(const ITensorInfo *input, const ITensorInfo *weights ARM_COMPUTE_RETURN_ON_ERROR(NEFullyConnectedLayer::validate(input, weights, bias, &shape_info)); ARM_COMPUTE_RETURN_ON_ERROR(NEArithmeticAdditionKernel::validate(&shape_info, &shape_info, &shape_info, ConvertPolicy::SATURATE)); - ARM_COMPUTE_RETURN_ON_ERROR(NEActivationLayerKernel::validate(&shape_info, &shape_info, info)); + ARM_COMPUTE_RETURN_ON_ERROR(NEActivationLayer::validate(&shape_info, &shape_info, info)); return Status{}; } @@ -95,7 +95,7 @@ void NERNNLayer::configure(const ITensor *input, const ITensor *weights, const I _fully_connected_out.allocator()->allocate(); _gemm_output.allocator()->allocate(); - _activation_kernel.configure(&_add_output, hidden_state, info); + _activation.configure(&_add_output, hidden_state, info); _add_output.allocator()->allocate(); _copy_kernel.configure(hidden_state, output); @@ -112,7 +112,7 @@ void NERNNLayer::run() _gemm_state_f.run(); NEScheduler::get().schedule(&_add_kernel, Window::DimY); - NEScheduler::get().schedule(&_activation_kernel, Window::DimY); + _activation.run(); // copy hidden out to output NEScheduler::get().schedule(&_copy_kernel, Window::DimY); diff --git a/src/runtime/NEON/functions/NEReshapeLayer.cpp b/src/runtime/NEON/functions/NEReshapeLayer.cpp index 680abef026..daf358e7db 100644 --- a/src/runtime/NEON/functions/NEReshapeLayer.cpp +++ b/src/runtime/NEON/functions/NEReshapeLayer.cpp @@ -44,7 +44,7 @@ void NEReshapeLayer::configure(const ITensorInfo *input, ITensorInfo *output) Status NEReshapeLayer::validate(const ITensorInfo *input, const ITensorInfo *output) { - return arm_compute::NEReshapeLayer::validate(input, output); + return arm_compute::NEReshapeLayerKernel::validate(input, output); } MemoryRequirements NEReshapeLayer::workspace() const @@ -53,32 +53,44 @@ MemoryRequirements NEReshapeLayer::workspace() const } } // namespace experimental -void NEReshapeLayer::configure(const ITensor *input, ITensor *output) +struct NEReshapeLayer::Impl { - _input = input; - _output = output; + const ITensor *src{ nullptr }; + ITensor *dst{ nullptr }; + std::unique_ptr op{ nullptr }; +}; - auto k = arm_compute::support::cpp14::make_unique(); - k->configure(input->info(), output->info()); - _kernel = std::move(k); +NEReshapeLayer::NEReshapeLayer() + : _impl(support::cpp14::make_unique()) +{ +} + +NEReshapeLayer::NEReshapeLayer(NEReshapeLayer &&) = default; + +NEReshapeLayer &NEReshapeLayer::operator=(NEReshapeLayer &&) = default; + +NEReshapeLayer::~NEReshapeLayer() = default; + +void NEReshapeLayer::configure(const ITensor *input, ITensor *output) +{ + _impl->src = input; + _impl->dst = output; + _impl->op = arm_compute::support::cpp14::make_unique(); + _impl->op->configure(input->info(), output->info()); } Status NEReshapeLayer::validate(const ITensorInfo *input, const ITensorInfo *output) { ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output); - ARM_COMPUTE_RETURN_ON_ERROR(NEReshapeLayerKernel::validate(input, output)); + ARM_COMPUTE_RETURN_ON_ERROR(experimental::NEReshapeLayer::validate(input, output)); return Status{}; } void NEReshapeLayer::run() { - InputOperatorTensors src_0 = std::make_pair(TensorType::ACL_SRC, _input); - OutputOperatorTensors dst_0 = std::make_pair(TensorType::ACL_DST, _output); - - std::vector inputs = { &src_0 }; - std::vector outputs = { &dst_0 }; - - NEScheduler::get().schedule_op(_kernel.get(), Window::DimY, inputs, outputs); + const InputTensor src{ TensorType::ACL_SRC, _impl->src }; + OutputTensor dst{ TensorType::ACL_DST, _impl->dst }; + _impl->op->run({ src }, { dst }, {}); } } // namespace arm_compute diff --git a/src/runtime/OMP/OMPScheduler.cpp b/src/runtime/OMP/OMPScheduler.cpp index a1851f03c3..6d6b285019 100644 --- a/src/runtime/OMP/OMPScheduler.cpp +++ b/src/runtime/OMP/OMPScheduler.cpp @@ -83,7 +83,7 @@ void OMPScheduler::schedule(ICPPKernel *kernel, const Hints &hints) } } -void OMPScheduler::schedule_op(ICPPKernel *kernel, const Hints &hints, std::vector &inputs, std::vector &outputs) +void OMPScheduler::schedule_op(ICPPKernel *kernel, const Hints &hints, const std::vector &inputs, const std::vector &outputs) { ARM_COMPUTE_ERROR_ON_MSG(!kernel, "The child class didn't set the kernel"); ARM_COMPUTE_ERROR_ON_MSG(hints.strategy() == StrategyHint::DYNAMIC, diff --git a/tests/framework/instruments/SchedulerTimer.cpp b/tests/framework/instruments/SchedulerTimer.cpp index 58e1b56904..8729179a46 100644 --- a/tests/framework/instruments/SchedulerTimer.cpp +++ b/tests/framework/instruments/SchedulerTimer.cpp @@ -86,7 +86,7 @@ public: _kernels.push_back(std::move(info)); } - void schedule_op(ICPPKernel *kernel, const Hints &hints, std::vector &inputs, std::vector &outputs) override + void schedule_op(ICPPKernel *kernel, const Hints &hints, const std::vector &inputs, const std::vector &outputs) override { _timer.start(); _real_scheduler.schedule_op(kernel, hints, inputs, outputs); diff --git a/tests/validation/NEON/ReshapeOperator.cpp b/tests/validation/NEON/ReshapeOperator.cpp deleted file mode 100644 index 82e9768a2c..0000000000 --- a/tests/validation/NEON/ReshapeOperator.cpp +++ /dev/null @@ -1,78 +0,0 @@ -/* - * Copyright (c) 2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "arm_compute/core/Types.h" -#include "arm_compute/runtime/NEON/functions/NEReshapeLayer.h" -#include "arm_compute/runtime/OperatorTensor.h" -#include "arm_compute/runtime/Tensor.h" -#include "arm_compute/runtime/TensorAllocator.h" -#include "tests/AssetsLibrary.h" -#include "tests/Globals.h" -#include "tests/NEON/Accessor.h" -#include "tests/Utils.h" -#include "tests/framework/Asserts.h" -#include "tests/framework/Macros.h" -#include "tests/framework/datasets/Datasets.h" -#include "tests/validation/Validation.h" - -namespace arm_compute -{ -namespace test -{ -namespace validation -{ -TEST_SUITE(NEON) -TEST_SUITE(ReshapeOperator) - -TEST_CASE(Run, framework::DatasetMode::ALL) -{ - // Create tensors and info - TensorInfo src_info(TensorShape(27U, 11U, 3U), 1, DataType::F32); - TensorInfo dst_info(TensorShape(27U, 11U, 3U), 1, DataType::F32); - Tensor src = create_tensor(TensorShape(27U, 11U, 3U), DataType::F32, 1); - Tensor dst = create_tensor(TensorShape(27U, 11U, 3U), DataType::F32, 1); - - // Create and configure function - experimental::NEReshapeLayer reshape_operator; - reshape_operator.configure(&src_info, &dst_info); - - // Allocate tensors - src.allocator()->allocate(); - dst.allocator()->allocate(); - - InputOperatorTensors src_0 = std::make_pair(TensorType::ACL_SRC, &src); - OutputOperatorTensors dst_0 = std::make_pair(TensorType::ACL_DST, &dst); - - std::vector src_vec = { &src_0 }; - std::vector dst_vec = { &dst_0 }; - std::vector work_vec = {}; - - // Compute functions - reshape_operator.run(src_vec, dst_vec, work_vec); -} - -TEST_SUITE_END() // ReshapeOperator -TEST_SUITE_END() // NEON -} // namespace validation -} // namespace test -} // namespace arm_compute -- cgit v1.2.1