From bcd2352d7fd99a2f6aab220fa0c3b3f3119a1a4c Mon Sep 17 00:00:00 2001 From: Michalis Spyrou Date: Thu, 21 May 2020 15:02:36 +0100 Subject: COMPMID-3391: Implement Async interfaces Change-Id: I8168cea5056ff48a0253ebb8c88ea549a3ea69a2 Signed-off-by: Michalis Spyrou Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/3335 Tested-by: Arm Jenkins Comments-Addressed: Arm Jenkins Reviewed-by: Georgios Pinitas --- Android.bp | 2 + arm_compute/core/CPP/ICPPKernel.h | 24 ++++++- .../core/NEON/kernels/NEReshapeLayerKernel.h | 16 +++-- arm_compute/core/experimental/Types.h | 71 ++++++++++++++++++++ arm_compute/runtime/CPP/CPPScheduler.h | 16 ++++- arm_compute/runtime/IOperator.h | 66 ++++++++++++++++++ arm_compute/runtime/IScheduler.h | 12 ++++ arm_compute/runtime/NEON/INEOperator.h | 67 +++++++++++++++++++ .../NEON/functions/NEGenerateProposalsLayer.h | 7 +- .../runtime/NEON/functions/NEReductionOperation.h | 4 +- .../runtime/NEON/functions/NEReshapeLayer.h | 51 ++++++++++++-- .../runtime/NEON/functions/NESoftmaxLayer.h | 8 +-- arm_compute/runtime/OMP/OMPScheduler.h | 15 ++++- arm_compute/runtime/OperatorTensor.h | 75 +++++++++++++++++++++ arm_compute/runtime/SingleThreadScheduler.h | 10 ++- arm_compute/runtime/experimental/Types.h | 42 ++++++++++++ src/core/NEON/kernels/NEReshapeLayerKernel.cpp | 45 ++++++------- src/runtime/CPP/CPPScheduler.cpp | 78 ++++++++++++++-------- src/runtime/CPP/SingleThreadScheduler.cpp | 8 +++ src/runtime/NEON/INEOperator.cpp | 53 +++++++++++++++ .../NEON/functions/NEGenerateProposalsLayer.cpp | 20 +++--- .../NEON/functions/NEReductionOperation.cpp | 8 +-- src/runtime/NEON/functions/NEReshapeLayer.cpp | 38 ++++++++++- src/runtime/NEON/functions/NESoftmaxLayer.cpp | 27 ++++---- src/runtime/OMP/OMPScheduler.cpp | 35 +++++++++- src/runtime/OperatorTensor.cpp | 57 ++++++++++++++++ tests/framework/instruments/SchedulerTimer.cpp | 15 ++++- tests/validation/NEON/ReshapeOperator.cpp | 78 ++++++++++++++++++++++ 28 files changed, 840 insertions(+), 108 deletions(-) create mode 100644 arm_compute/core/experimental/Types.h create mode 100644 arm_compute/runtime/IOperator.h create mode 100644 arm_compute/runtime/NEON/INEOperator.h create mode 100644 arm_compute/runtime/OperatorTensor.h create mode 100644 arm_compute/runtime/experimental/Types.h create mode 100644 src/runtime/NEON/INEOperator.cpp create mode 100644 src/runtime/OperatorTensor.cpp create mode 100644 tests/validation/NEON/ReshapeOperator.cpp diff --git a/Android.bp b/Android.bp index b484a1e909..2d12d27211 100644 --- a/Android.bp +++ b/Android.bp @@ -604,6 +604,7 @@ cc_library_static { "src/runtime/MemoryManagerOnDemand.cpp", "src/runtime/MultiHOG.cpp", "src/runtime/MultiImage.cpp", + "src/runtime/NEON/INEOperator.cpp", "src/runtime/NEON/INESimpleFunction.cpp", "src/runtime/NEON/INESimpleFunctionNoBorder.cpp", "src/runtime/NEON/functions/NEAbsoluteDifference.cpp", @@ -742,6 +743,7 @@ cc_library_static { "src/runtime/OMP/OMPScheduler.cpp", "src/runtime/OffsetLifetimeManager.cpp", "src/runtime/OffsetMemoryPool.cpp", + "src/runtime/OperatorTensor.cpp", "src/runtime/PoolManager.cpp", "src/runtime/Pyramid.cpp", "src/runtime/RuntimeContext.cpp", diff --git a/arm_compute/core/CPP/ICPPKernel.h b/arm_compute/core/CPP/ICPPKernel.h index ec05af20bd..21f6ab714a 100644 --- a/arm_compute/core/CPP/ICPPKernel.h +++ b/arm_compute/core/CPP/ICPPKernel.h @@ -26,10 +26,13 @@ #include "arm_compute/core/CPP/CPPTypes.h" #include "arm_compute/core/IKernel.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/core/experimental/Types.h" namespace arm_compute { class Window; +class ITensor; /** Common interface for all kernels implemented in C++ */ class ICPPKernel : public IKernel @@ -51,8 +54,7 @@ public: */ virtual void run(const Window &window, const ThreadInfo &info) { - ARM_COMPUTE_UNUSED(window); - ARM_COMPUTE_UNUSED(info); + ARM_COMPUTE_UNUSED(window, info); ARM_COMPUTE_ERROR("default implementation of legacy run() virtual member function invoked"); } @@ -69,6 +71,24 @@ public: run(window, info); } + /** Execute the kernel on the passed window + * + * @warning If is_parallelisable() returns false then the passed window must be equal to window() + * + * @note The window has to be a region within the window returned by the window() method + * + * @note The width of the window has to be a multiple of num_elems_processed_per_iteration(). + * + * @param[in] inputs A vector containing the input tensors. + * @param[in] outputs A vector containing the output tensors. + * @param[in] window Region on which to execute the kernel. (Must be a region of the window returned by window()) + * @param[in] info Info about executing thread and CPU. + */ + virtual void run_op(const std::vector &inputs, std::vector &outputs, const Window &window, const ThreadInfo &info) + { + ARM_COMPUTE_UNUSED(inputs, outputs, window, info); + } + /** Name of the kernel * * @return Kernel name diff --git a/arm_compute/core/NEON/kernels/NEReshapeLayerKernel.h b/arm_compute/core/NEON/kernels/NEReshapeLayerKernel.h index fccf2685a8..6f888e0914 100644 --- a/arm_compute/core/NEON/kernels/NEReshapeLayerKernel.h +++ b/arm_compute/core/NEON/kernels/NEReshapeLayerKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 ARM Limited. + * Copyright (c) 2017-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -24,6 +24,7 @@ #ifndef ARM_COMPUTE_NERESHAPELAYERKERNEL_H #define ARM_COMPUTE_NERESHAPELAYERKERNEL_H +#include "arm_compute/core/NEON/INEKernel.h" #include "arm_compute/core/NEON/INESimpleKernel.h" namespace arm_compute @@ -32,19 +33,19 @@ namespace arm_compute class ITensor; /** Interface for the kernel to perform tensor reshaping */ -class NEReshapeLayerKernel : public INESimpleKernel +class NEReshapeLayerKernel : public INEKernel { public: const char *name() const override { return "NEReshapeLayerKernel"; } - /** Set the input and output of the kernel + /** Set the input and output info of the kernel * - * @param[in] input Source tensor. Data type supported: All - * @param[out] output Destination tensor. Data type supported: Same as @p input + * @param[in] input Source tensor info. Data type supported: All + * @param[out] output Destination tensor info. Data type supported: Same as @p input */ - void configure(const ITensor *input, ITensor *output); + void configure(const ITensorInfo *input, ITensorInfo *output); /** Static function to check if given info will lead to a valid configuration of @ref NEReshapeLayerKernel * @@ -56,7 +57,8 @@ public: static Status validate(const ITensorInfo *input, const ITensorInfo *output); // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; + void run_op(const std::vector &inputs, std::vector &outputs, const Window &window, const ThreadInfo &info) override; }; + } // namespace arm_compute #endif /*ARM_COMPUTE_NERESHAPELAYERKERNEL_H */ diff --git a/arm_compute/core/experimental/Types.h b/arm_compute/core/experimental/Types.h new file mode 100644 index 0000000000..6043db9ff4 --- /dev/null +++ b/arm_compute/core/experimental/Types.h @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2020 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_EXPERIMENTAL_TYPES_H +#define ARM_COMPUTE_EXPERIMENTAL_TYPES_H + +#include "arm_compute/core/TensorShape.h" + +#include +#include + +namespace arm_compute +{ +class ITensor; + +/** Memory type */ +enum class TensorType +{ + ACL_SRC = 0, + ACL_SRC_0 = 0, + ACL_SRC_1 = 1, + ACL_SRC_2 = 2, + ACL_DST = 30, + ACL_DST_0 = 30, + ACL_DST_1 = 31, + ACL_INT = 50, + ACL_INT_0 = 50, + ACL_INT_1 = 51, + ACL_INT_2 = 52 +}; +using InputOperatorTensors = std::pair; +using OutputOperatorTensors = std::pair; +using OperatorTensors = OutputOperatorTensors; + +namespace experimental +{ +struct MemoryInfo +{ + MemoryInfo(TensorType type, size_t size, size_t alignment) + : type(type), size(size), alignment(alignment) + { + } + TensorType type; + size_t size; + size_t alignment; +}; + +using MemoryRequirements = std::vector; +} // namespace experimental +} // namespace arm_compute +#endif /* ARM_COMPUTE_EXPERIMENTAL_TYPES_H */ diff --git a/arm_compute/runtime/CPP/CPPScheduler.h b/arm_compute/runtime/CPP/CPPScheduler.h index c8de41bf20..78ad43c2b4 100644 --- a/arm_compute/runtime/CPP/CPPScheduler.h +++ b/arm_compute/runtime/CPP/CPPScheduler.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 ARM Limited. + * Copyright (c) 2016-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -24,6 +24,7 @@ #ifndef ARM_COMPUTE_CPPSCHEDULER_H #define ARM_COMPUTE_CPPSCHEDULER_H +#include "arm_compute/core/experimental/Types.h" #include "arm_compute/runtime/IScheduler.h" #include @@ -65,6 +66,18 @@ public: * @param[in] hints Hints for the scheduler. */ void schedule(ICPPKernel *kernel, const Hints &hints) override; + /** Multithread the execution of the passed kernel if possible. + * + * The kernel will run on a single thread if any of these conditions is true: + * - ICPPKernel::is_parallelisable() returns false + * - The scheduler has been initialized with only one thread. + * + * @param[in] kernel Kernel to execute. + * @param[in] hints Hints for the scheduler. + * @param[in] inputs Vector that contains the input tensors. + * @param[in] outputs Vector that contains the output tensors. + */ + void schedule_op(ICPPKernel *kernel, const Hints &hints, std::vector &inputs, std::vector &outputs) override; protected: /** Will run the workloads in parallel using num_threads @@ -74,6 +87,7 @@ protected: void run_workloads(std::vector &workloads) override; private: + void schedule_common(ICPPKernel *kernel, const Hints &hints, std::vector &inputs, std::vector &outputs); struct Impl; std::unique_ptr _impl; }; diff --git a/arm_compute/runtime/IOperator.h b/arm_compute/runtime/IOperator.h new file mode 100644 index 0000000000..110c935702 --- /dev/null +++ b/arm_compute/runtime/IOperator.h @@ -0,0 +1,66 @@ +/* + * Copyright (c) 2020 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_IOPERATOR_H +#define ARM_COMPUTE_IOPERATOR_H + +#include "arm_compute/runtime/IOperator.h" +#include "arm_compute/runtime/IRuntimeContext.h" +#include "arm_compute/runtime/Types.h" + +namespace arm_compute +{ +namespace experimental +{ +/** Base class for all async functions */ +class IOperator +{ +public: + /** Destructor */ + virtual ~IOperator() = default; + /** Run the kernels contained in the function + * + * + * @param[in] inputs Vector that contains the input tensors. + * @param[in] outputs Vector that contains the output tensors. + * @param[in] workspace Vector that contains the workspace tensors. + * + */ + virtual void run(std::vector &inputs, std::vector &outputs, std::vector &workspace) = 0; + /** Prepare the function for executing + * + * Any one off pre-processing step required by the function is handled here + * + * @param[in] constants Vector that contains the constants tensors. + * + * @note Prepare stage might not need all the function's buffers' backing memory to be available in order to execute + */ + virtual void prepare(std::vector constants) = 0; + + /** Return the memory requirements required by the workspace + */ + virtual MemoryRequirements workspace() const = 0; +}; +} // namespace experimental +} // namespace arm_compute +#endif /*ARM_COMPUTE_IOPERATOR_H */ diff --git a/arm_compute/runtime/IScheduler.h b/arm_compute/runtime/IScheduler.h index a5e20ee627..02d0cef086 100644 --- a/arm_compute/runtime/IScheduler.h +++ b/arm_compute/runtime/IScheduler.h @@ -25,6 +25,8 @@ #define ARM_COMPUTE_ISCHEDULER_H #include "arm_compute/core/CPP/CPPTypes.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/core/experimental/Types.h" #include #include @@ -32,6 +34,7 @@ namespace arm_compute { class ICPPKernel; +class ITensor; /** Scheduler interface to run kernels */ class IScheduler @@ -147,6 +150,15 @@ public: */ virtual void schedule(ICPPKernel *kernel, const Hints &hints) = 0; + /** Runs the kernel in the same thread as the caller synchronously. + * + * @param[in] kernel Kernel to execute. + * @param[in] hints Hints for the scheduler. + * @param[in] inputs Vector containing the input tensors. + * @param[in] outputs Vector containing the output tensors. + */ + virtual void schedule_op(ICPPKernel *kernel, const Hints &hints, std::vector &inputs, std::vector &outputs) = 0; + /** Execute all the passed workloads * * @note there is no guarantee regarding the order in which the workloads will be executed or whether or not they will be executed in parallel. diff --git a/arm_compute/runtime/NEON/INEOperator.h b/arm_compute/runtime/NEON/INEOperator.h new file mode 100644 index 0000000000..4467e6d5ab --- /dev/null +++ b/arm_compute/runtime/NEON/INEOperator.h @@ -0,0 +1,67 @@ +/* + * Copyright (c) 2020 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_INEOPERATOR_H +#define ARM_COMPUTE_INEOPERATOR_H + +#include "arm_compute/core/NEON/INEKernel.h" +#include "arm_compute/runtime/IOperator.h" +#include "arm_compute/runtime/IRuntimeContext.h" +#include "arm_compute/runtime/Types.h" + +#include + +namespace arm_compute +{ +namespace experimental +{ +/** Basic interface for functions which have a single async NEON kernel */ +class INEOperator : public IOperator +{ +public: + /** Constructor + * + * @param[in] ctx Runtime context to be used by the function + */ + INEOperator(IRuntimeContext *ctx = nullptr); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + INEOperator(const INEOperator &) = delete; + /** Default move constructor */ + INEOperator(INEOperator &&) = default; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + INEOperator &operator=(const INEOperator &) = delete; + /** Default move assignment operator */ + INEOperator &operator=(INEOperator &&) = default; + + // Inherited methods overridden: + void run(std::vector &inputs, std::vector &outputs, std::vector &workspace) override final; + void prepare(std::vector constants) override final; + +protected: + std::unique_ptr _kernel; + IRuntimeContext *_ctx; + MemoryRequirements _workspace; +}; +} // namespace experimental +} // namespace arm_compute +#endif /*ARM_COMPUTE_INEOPERATOR_H */ diff --git a/arm_compute/runtime/NEON/functions/NEGenerateProposalsLayer.h b/arm_compute/runtime/NEON/functions/NEGenerateProposalsLayer.h index 7c470fbaf0..7260434606 100644 --- a/arm_compute/runtime/NEON/functions/NEGenerateProposalsLayer.h +++ b/arm_compute/runtime/NEON/functions/NEGenerateProposalsLayer.h @@ -23,18 +23,19 @@ */ #ifndef ARM_COMPUTE_NEGENERATEPROPOSALSLAYER_H #define ARM_COMPUTE_NEGENERATEPROPOSALSLAYER_H + #include "arm_compute/core/NEON/kernels/NEBoundingBoxTransformKernel.h" #include "arm_compute/core/NEON/kernels/NEDequantizationLayerKernel.h" #include "arm_compute/core/NEON/kernels/NEGenerateProposalsLayerKernel.h" #include "arm_compute/core/NEON/kernels/NEPadLayerKernel.h" #include "arm_compute/core/NEON/kernels/NEPermuteKernel.h" #include "arm_compute/core/NEON/kernels/NEQuantizationLayerKernel.h" -#include "arm_compute/core/NEON/kernels/NEReshapeLayerKernel.h" #include "arm_compute/core/Types.h" #include "arm_compute/runtime/CPP/CPPScheduler.h" #include "arm_compute/runtime/CPP/functions/CPPBoxWithNonMaximaSuppressionLimit.h" #include "arm_compute/runtime/IFunction.h" #include "arm_compute/runtime/MemoryGroup.h" +#include "arm_compute/runtime/NEON/functions/NEReshapeLayer.h" #include "arm_compute/runtime/Tensor.h" namespace arm_compute @@ -112,9 +113,9 @@ private: // Neon kernels NEPermuteKernel _permute_deltas_kernel; - NEReshapeLayerKernel _flatten_deltas_kernel; + NEReshapeLayer _flatten_deltas; NEPermuteKernel _permute_scores_kernel; - NEReshapeLayerKernel _flatten_scores_kernel; + NEReshapeLayer _flatten_scores; NEComputeAllAnchorsKernel _compute_anchors_kernel; NEBoundingBoxTransformKernel _bounding_box_kernel; NEPadLayerKernel _pad_kernel; diff --git a/arm_compute/runtime/NEON/functions/NEReductionOperation.h b/arm_compute/runtime/NEON/functions/NEReductionOperation.h index abda4159ba..78e8b04dbb 100644 --- a/arm_compute/runtime/NEON/functions/NEReductionOperation.h +++ b/arm_compute/runtime/NEON/functions/NEReductionOperation.h @@ -28,8 +28,8 @@ #include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h" #include "arm_compute/core/NEON/kernels/NEReductionOperationKernel.h" -#include "arm_compute/core/NEON/kernels/NEReshapeLayerKernel.h" #include "arm_compute/core/Types.h" +#include "arm_compute/runtime/NEON/functions/NEReshapeLayer.h" #include "arm_compute/runtime/Tensor.h" namespace arm_compute @@ -76,7 +76,7 @@ private: MemoryGroup _memory_group; NEReductionOperationKernel _reduction_kernel; NEFillBorderKernel _fill_border_kernel; - NEReshapeLayerKernel _reshape_kernel; + NEReshapeLayer _reshape; Tensor _output_internal; size_t _window_split; int _reduction_axis; diff --git a/arm_compute/runtime/NEON/functions/NEReshapeLayer.h b/arm_compute/runtime/NEON/functions/NEReshapeLayer.h index d6643842d9..5a296a776d 100644 --- a/arm_compute/runtime/NEON/functions/NEReshapeLayer.h +++ b/arm_compute/runtime/NEON/functions/NEReshapeLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 ARM Limited. + * Copyright (c) 2017-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -24,8 +24,11 @@ #ifndef ARM_COMPUTE_NERESHAPELAYER_H #define ARM_COMPUTE_NERESHAPELAYER_H +#include "arm_compute/core/NEON/kernels/NEReshapeLayerKernel.h" #include "arm_compute/core/Types.h" -#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h" +#include "arm_compute/runtime/IFunction.h" +#include "arm_compute/runtime/NEON/INEOperator.h" +#include "arm_compute/runtime/Types.h" namespace arm_compute { @@ -33,24 +36,62 @@ namespace arm_compute class ITensor; /** Basic function to run @ref NEReshapeLayerKernel */ -class NEReshapeLayer : public INESimpleFunctionNoBorder +class NEReshapeLayer : public IFunction { public: /** Initialise the kernel's inputs and outputs * - * @param[in] input First tensor input. Data type supported: All + * @param[in] input Input tensor. Data type supported: All * @param[out] output Output tensor. Data type supported: Same as @p input */ void configure(const ITensor *input, ITensor *output); /** Static function to check if given info will lead to a valid configuration of @ref NEReshapeLayer * - * @param[in] input First tensor info. Data type supported: All + * @param[in] input Input tensor info. Data type supported: All * @param[in] output Output tensor info. Data type supported: Same as @p input * * @return a status */ static Status validate(const ITensorInfo *input, const ITensorInfo *output); + + // Inherited methods overridden: + void run() override; + +private: + const ITensor *_input + { + nullptr + }; + ITensor *_output{ nullptr }; + std::unique_ptr _kernel{ nullptr }; +}; + +namespace experimental +{ +/** Basic function to run @ref NEReshapeLayerKernel */ +class NEReshapeLayer : public INEOperator +{ +public: + /** Initialise the kernel's inputs and outputs + * + * @param[in] input Input tensor info. Data type supported: All + * @param[out] output Output info. Data type supported: Same as @p input + */ + void configure(const ITensorInfo *input, ITensorInfo *output); + + /** Static function to check if given info will lead to a valid configuration of @ref NEReshapeLayer + * + * @param[in] input Input tensor info. Data type supported: All + * @param[in] output Output tensor info. Data type supported: Same as @p input + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output); + + // Inherited methods overridden: + MemoryRequirements workspace() const override; }; +} // namespace experimental } // namespace arm_compute #endif /*ARM_COMPUTE_NERESHAPELAYER_H */ diff --git a/arm_compute/runtime/NEON/functions/NESoftmaxLayer.h b/arm_compute/runtime/NEON/functions/NESoftmaxLayer.h index c5c83d8b5a..51d981de44 100644 --- a/arm_compute/runtime/NEON/functions/NESoftmaxLayer.h +++ b/arm_compute/runtime/NEON/functions/NESoftmaxLayer.h @@ -25,11 +25,11 @@ #define ARM_COMPUTE_NESOFTMAXLAYER_H #include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h" -#include "arm_compute/core/NEON/kernels/NEFlattenLayerKernel.h" -#include "arm_compute/core/NEON/kernels/NEReshapeLayerKernel.h" #include "arm_compute/core/NEON/kernels/NESoftmaxLayerKernel.h" #include "arm_compute/runtime/IFunction.h" #include "arm_compute/runtime/MemoryGroup.h" +#include "arm_compute/runtime/NEON/functions/NEFlattenLayer.h" +#include "arm_compute/runtime/NEON/functions/NEReshapeLayer.h" #include "arm_compute/runtime/Tensor.h" namespace arm_compute @@ -119,9 +119,9 @@ private: MemoryGroup _memory_group; NELogits1DMaxKernel _max_kernel; NELogits1DSoftmaxKernel _softmax_kernel; - std::unique_ptr _flat_or_reshape_kernel_ptr; + std::unique_ptr _flat_or_reshape_ptr; NEFillBorderKernel _fill_border_kernel; - NEReshapeLayerKernel _reshape_kernel; + NEReshapeLayer _reshape; Tensor _max; Tensor _tmp; Tensor _input_flattened; diff --git a/arm_compute/runtime/OMP/OMPScheduler.h b/arm_compute/runtime/OMP/OMPScheduler.h index ed00833a9c..8ed1705a97 100644 --- a/arm_compute/runtime/OMP/OMPScheduler.h +++ b/arm_compute/runtime/OMP/OMPScheduler.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 ARM Limited. + * Copyright (c) 2017-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -55,6 +55,19 @@ public: */ void schedule(ICPPKernel *kernel, const Hints &hints) override; + /** Multithread the execution of the passed kernel if possible. + * + * The kernel will run on a single thread if any of these conditions is true: + * - ICPPKernel::is_parallelisable() returns false + * - The scheduler has been initialized with only one thread. + * + * @param[in] kernel Kernel to execute. + * @param[in] hints Hints for the scheduler. + * @param[in] inputs Vector containing the input tensors. + * @param[in] outputs Vector containing the output tensors. + */ + void schedule_op(ICPPKernel *kernel, const Hints &hints, std::vector &inputs, std::vector &outputs) override; + protected: /** Execute all the passed workloads * diff --git a/arm_compute/runtime/OperatorTensor.h b/arm_compute/runtime/OperatorTensor.h new file mode 100644 index 0000000000..3901f93291 --- /dev/null +++ b/arm_compute/runtime/OperatorTensor.h @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2020 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_OPERATORTENSOR_H +#define ARM_COMPUTE_OPERATORTENSOR_H + +#include "arm_compute/core/ITensor.h" +#include "arm_compute/core/TensorInfo.h" +#include "arm_compute/runtime/Types.h" +#include "arm_compute/runtime/experimental/Types.h" + +#include + +namespace arm_compute +{ +class TensorInfo; +class IRuntimeContext; +class IMemory; +namespace experimental +{ +/** Basic implementation of the tensor interface */ +class OperatorTensor : public ITensor +{ +public: + /** Constructor + * + * @param[in] info Pointer to the tensor info. + * @param[in] memory Pointer to the memory info. + * + */ + OperatorTensor(ITensorInfo *info, IMemory *memory); + /** Destructor: free the tensor's memory */ + ~OperatorTensor() = default; + /** Allow instances of this class to be move constructed */ + OperatorTensor(OperatorTensor &&) = default; + /** Allow instances of this class to be moved */ + OperatorTensor &operator=(OperatorTensor &&) = default; + /** Prevent instances of this class to be copy assigned */ + OperatorTensor &operator=(const OperatorTensor &) = delete; + /** Prevent instances of this class to be copy constructed */ + OperatorTensor(const OperatorTensor &) = delete; + + // Inherited methods overridden: + arm_compute::ITensorInfo *info() const override; + arm_compute::ITensorInfo *info() override; + uint8_t *buffer() const override; + +private: + arm_compute::ITensorInfo *_info; + arm_compute::IMemory *_memory; + MemoryType _mem_type; +}; +} // namespace experimental +} // namespace arm_compute +#endif /* ARM_COMPUTE_OPERATORTENSOR_H */ diff --git a/arm_compute/runtime/SingleThreadScheduler.h b/arm_compute/runtime/SingleThreadScheduler.h index 3f279ebb19..8094758249 100644 --- a/arm_compute/runtime/SingleThreadScheduler.h +++ b/arm_compute/runtime/SingleThreadScheduler.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 ARM Limited. + * Copyright (c) 2017-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -50,6 +50,14 @@ public: * @param[in] hints Hints for the scheduler. */ void schedule(ICPPKernel *kernel, const Hints &hints) override; + /** Runs the kernel in the same thread as the caller synchronously. + * + * @param[in] kernel Kernel to execute. + * @param[in] hints Hints for the scheduler. + * @param[in] inputs Vector containing the input tensors. + * @param[in] outputs Vector containing the output tensors. + */ + void schedule_op(ICPPKernel *kernel, const Hints &hints, std::vector &inputs, std::vector &outputs) override; protected: /** Will run the workloads sequentially and in order. diff --git a/arm_compute/runtime/experimental/Types.h b/arm_compute/runtime/experimental/Types.h new file mode 100644 index 0000000000..bced0072b8 --- /dev/null +++ b/arm_compute/runtime/experimental/Types.h @@ -0,0 +1,42 @@ +/* + * Copyright (c) 2020 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_RUNTIME_EXPERIMENTAL_TYPES_H +#define ARM_COMPUTE_RUNTIME_EXPERIMENTAL_TYPES_H + +#include + +namespace arm_compute +{ +namespace experimental +{ +/** Memory type */ +enum class MemoryType +{ + CPU, + CL, + GLES +}; +} // namespace experimental +} // namespace arm_compute +#endif /* ARM_COMPUTE_RUNTIME_EXPERIMENTAL_TYPES_H */ diff --git a/src/core/NEON/kernels/NEReshapeLayerKernel.cpp b/src/core/NEON/kernels/NEReshapeLayerKernel.cpp index 53fcfd724d..600f8f9bf1 100644 --- a/src/core/NEON/kernels/NEReshapeLayerKernel.cpp +++ b/src/core/NEON/kernels/NEReshapeLayerKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 ARM Limited. + * Copyright (c) 2017-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -31,13 +31,14 @@ #include "arm_compute/core/ITensor.h" #include "arm_compute/core/NEON/INEKernel.h" #include "arm_compute/core/TensorInfo.h" +#include "arm_compute/core/Types.h" #include "arm_compute/core/Validate.h" #include /** [NEReshapeLayerKernel Kernel] **/ -using namespace arm_compute; - +namespace arm_compute +{ namespace { Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output) @@ -71,56 +72,54 @@ inline void reshape_tensor(const Window &window, const ITensor *input, ITensor * } } // namespace -void NEReshapeLayerKernel::configure(const ITensor *input, ITensor *output) +void NEReshapeLayerKernel::configure(const ITensorInfo *input, ITensorInfo *output) { ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); - ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info())); - - _input = input; - _output = output; + ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input, output)); // Configure kernel window - Window win = calculate_max_window(*input->info()); + Window win = calculate_max_window(*input); // Set the output valid region - output->info()->set_valid_region(ValidRegion(Coordinates(), output->info()->tensor_shape())); + output->set_valid_region(ValidRegion(Coordinates(), output->tensor_shape())); INEKernel::configure(win); } -Status NEReshapeLayerKernel::validate(const ITensorInfo *input, const ITensorInfo *output) -{ - ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output)); - - return Status{}; -} - -void NEReshapeLayerKernel::run(const Window &window, const ThreadInfo &info) +void NEReshapeLayerKernel::run_op(const std::vector &inputs, std::vector &outputs, const Window &window, const ThreadInfo &info) { ARM_COMPUTE_UNUSED(info); ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INESimpleKernel::window(), window); + ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window); - switch(_input->info()->data_type()) + switch(inputs[0]->second->info()->data_type()) { case DataType::U8: case DataType::S8: case DataType::QASYMM8: case DataType::QASYMM8_SIGNED: - reshape_tensor(window, _input, _output); + reshape_tensor(window, inputs[0]->second, outputs[0]->second); break; case DataType::U16: case DataType::S16: case DataType::F16: - reshape_tensor(window, _input, _output); + reshape_tensor(window, inputs[0]->second, outputs[0]->second); break; case DataType::U32: case DataType::S32: case DataType::F32: - reshape_tensor(window, _input, _output); + reshape_tensor(window, inputs[0]->second, outputs[0]->second); break; default: ARM_COMPUTE_ERROR("Unsupported data type!"); } } + +Status NEReshapeLayerKernel::validate(const ITensorInfo *input, const ITensorInfo *output) +{ + ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output)); + + return Status{}; +} +} // namespace arm_compute /** [NEReshapeLayerKernel Kernel] **/ diff --git a/src/runtime/CPP/CPPScheduler.cpp b/src/runtime/CPP/CPPScheduler.cpp index 0a03497cb9..db551590ea 100644 --- a/src/runtime/CPP/CPPScheduler.cpp +++ b/src/runtime/CPP/CPPScheduler.cpp @@ -95,10 +95,10 @@ std::pair split_2d(unsigned max_threads, std::size_t m, std: // nt = sqrt(max_threads * (m / n) ) const unsigned adjusted = std::round( - std::sqrt(max_threads * ratio)); + std::sqrt(max_threads * ratio)); //find the nearest factor of max_threads - for(unsigned i = 0; i!= adjusted; ++i) + for(unsigned i = 0; i != adjusted; ++i) { //try down const unsigned adj_down = adjusted - i; @@ -118,11 +118,11 @@ std::pair split_2d(unsigned max_threads, std::size_t m, std: //we didn't find anything so lets bail out with maxes biased to the largest dimension if(m > n) { - return{ std::min(m, max_threads), 1 }; + return { std::min(m, max_threads), 1 }; } else { - return{ 1, std::min(n, max_threads) }; + return { 1, std::min(n, max_threads) }; } } @@ -144,7 +144,6 @@ void process_workloads(std::vector &workloads, ThreadFeede } while(feeder.get_next(workload_index)); } - } //namespace struct CPPScheduler::Impl final @@ -364,11 +363,11 @@ void CPPScheduler::run_workloads(std::vector &workloads) } #endif /* DOXYGEN_SKIP_THIS */ -void CPPScheduler::schedule(ICPPKernel *kernel, const Hints &hints) +void CPPScheduler::schedule_common(ICPPKernel *kernel, const Hints &hints, std::vector &inputs, std::vector &outputs) { ARM_COMPUTE_ERROR_ON_MSG(!kernel, "The child class didn't set the kernel"); - const Window &max_window = kernel->window(); + const Window &max_window = kernel->window(); if(hints.split_dimension() == IScheduler::split_dimensions_all) { @@ -379,34 +378,32 @@ void CPPScheduler::schedule(ICPPKernel *kernel, const Hints &hints) const std::size_t m = max_window.num_iterations(Window::DimX); const std::size_t n = max_window.num_iterations(Window::DimY); - //in c++17 this can be swapped for auto [ m_threads, n_threads ] = split_2d(... + //in c++17 this can be swapped for auto [ m_threads, n_threads ] = split_2d(... unsigned m_threads, n_threads; std::tie(m_threads, n_threads) = split_2d(_impl->_num_threads, m, n); std::vector workloads; - for(unsigned int ni = 0; ni != n_threads; ++ni) + for(unsigned int ni = 0; ni != n_threads; ++ni) { - for(unsigned int mi = 0; mi != m_threads; ++mi) + for(unsigned int mi = 0; mi != m_threads; ++mi) { workloads.push_back( - [ ni, mi, m_threads, n_threads, &max_window, &kernel ] - (const ThreadInfo & info) - { - //narrow the window to our mi-ni workload - Window win = max_window.split_window(Window::DimX, mi, m_threads) - .split_window(Window::DimY, ni, n_threads); + [ni, mi, m_threads, n_threads, &max_window, &kernel](const ThreadInfo & info) + { + //narrow the window to our mi-ni workload + Window win = max_window.split_window(Window::DimX, mi, m_threads) + .split_window(Window::DimY, ni, n_threads); - win.validate(); + win.validate(); - Window thread_locator; - thread_locator.set(Window::DimX, Window::Dimension(mi, m_threads)); - thread_locator.set(Window::DimY, Window::Dimension(ni, n_threads)); + Window thread_locator; + thread_locator.set(Window::DimX, Window::Dimension(mi, m_threads)); + thread_locator.set(Window::DimY, Window::Dimension(ni, n_threads)); - thread_locator.validate(); + thread_locator.validate(); - kernel->run_nd(win, info, thread_locator); - } - ); + kernel->run_nd(win, info, thread_locator); + }); } } run_workloads(workloads); @@ -425,7 +422,14 @@ void CPPScheduler::schedule(ICPPKernel *kernel, const Hints &hints) { ThreadInfo info; info.cpu_info = &_cpu_info; - kernel->run(max_window, info); + if(inputs.empty()) + { + kernel->run(max_window, info); + } + else + { + kernel->run_op(inputs, outputs, max_window, info); + } } else { @@ -449,15 +453,35 @@ void CPPScheduler::schedule(ICPPKernel *kernel, const Hints &hints) for(unsigned int t = 0; t < num_windows; t++) { //Capture 't' by copy, all the other variables by reference: - workloads[t] = [t, &hints, &max_window, &num_windows, &kernel](const ThreadInfo & info) + workloads[t] = [t, &hints, &max_window, &num_windows, &kernel, &inputs, &outputs](const ThreadInfo & info) { Window win = max_window.split_window(hints.split_dimension(), t, num_windows); win.validate(); - kernel->run(win, info); + + if(inputs.empty()) + { + kernel->run(win, info); + } + else + { + kernel->run_op(inputs, outputs, win, info); + } }; } run_workloads(workloads); } } } + +void CPPScheduler::schedule_op(ICPPKernel *kernel, const Hints &hints, std::vector &inputs, std::vector &outputs) +{ + schedule_common(kernel, hints, inputs, outputs); +} + +void CPPScheduler::schedule(ICPPKernel *kernel, const Hints &hints) +{ + std::vector inputs; + std::vector outputs; + schedule_common(kernel, hints, inputs, outputs); +} } // namespace arm_compute diff --git a/src/runtime/CPP/SingleThreadScheduler.cpp b/src/runtime/CPP/SingleThreadScheduler.cpp index 660a79652c..777f84bec8 100644 --- a/src/runtime/CPP/SingleThreadScheduler.cpp +++ b/src/runtime/CPP/SingleThreadScheduler.cpp @@ -49,6 +49,14 @@ void SingleThreadScheduler::schedule(ICPPKernel *kernel, const Hints &hints) kernel->run(kernel->window(), info); } +void SingleThreadScheduler::schedule_op(ICPPKernel *kernel, const Hints &hints, std::vector &inputs, std::vector &outputs) +{ + ARM_COMPUTE_UNUSED(hints); + ThreadInfo info; + info.cpu_info = &_cpu_info; + kernel->run_op(inputs, outputs, kernel->window(), info); +} + void SingleThreadScheduler::run_workloads(std::vector &workloads) { ThreadInfo info; diff --git a/src/runtime/NEON/INEOperator.cpp b/src/runtime/NEON/INEOperator.cpp new file mode 100644 index 0000000000..c24d5c47f1 --- /dev/null +++ b/src/runtime/NEON/INEOperator.cpp @@ -0,0 +1,53 @@ +/* + * Copyright (c) 2020 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/NEON/INEOperator.h" +#include "arm_compute/runtime/NEON/NEScheduler.h" + +namespace arm_compute +{ +namespace experimental +{ +INEOperator::INEOperator(IRuntimeContext *ctx) + : _kernel(), _ctx(ctx), _workspace() +{ +} + +void INEOperator::run(std::vector &inputs, std::vector &outputs, std::vector &workspace) +{ + ARM_COMPUTE_UNUSED(workspace); + + if(inputs.empty() || outputs.empty()) + { + ARM_COMPUTE_ERROR("No inputs provided"); + } + + NEScheduler::get().schedule_op(_kernel.get(), Window::DimY, inputs, outputs); +} + +void INEOperator::prepare(std::vector constants) +{ + ARM_COMPUTE_UNUSED(constants); +} +} // namespace experimental +} // namespace arm_compute diff --git a/src/runtime/NEON/functions/NEGenerateProposalsLayer.cpp b/src/runtime/NEON/functions/NEGenerateProposalsLayer.cpp index 82880bac85..dabbebacb4 100644 --- a/src/runtime/NEON/functions/NEGenerateProposalsLayer.cpp +++ b/src/runtime/NEON/functions/NEGenerateProposalsLayer.cpp @@ -31,9 +31,9 @@ namespace arm_compute NEGenerateProposalsLayer::NEGenerateProposalsLayer(std::shared_ptr memory_manager) : _memory_group(memory_manager), _permute_deltas_kernel(), - _flatten_deltas_kernel(), + _flatten_deltas(), _permute_scores_kernel(), - _flatten_scores_kernel(), + _flatten_scores(), _compute_anchors_kernel(), _bounding_box_kernel(), _pad_kernel(), @@ -95,12 +95,12 @@ void NEGenerateProposalsLayer::configure(const ITensor *scores, const ITensor *d { _memory_group.manage(&_deltas_permuted); _permute_deltas_kernel.configure(deltas, &_deltas_permuted, PermutationVector{ 2, 0, 1 }); - _flatten_deltas_kernel.configure(&_deltas_permuted, &_deltas_flattened); + _flatten_deltas.configure(&_deltas_permuted, &_deltas_flattened); _deltas_permuted.allocator()->allocate(); } else { - _flatten_deltas_kernel.configure(deltas, &_deltas_flattened); + _flatten_deltas.configure(deltas, &_deltas_flattened); } const TensorShape flatten_shape_scores(1, total_num_anchors); @@ -112,12 +112,12 @@ void NEGenerateProposalsLayer::configure(const ITensor *scores, const ITensor *d { _memory_group.manage(&_scores_permuted); _permute_scores_kernel.configure(scores, &_scores_permuted, PermutationVector{ 2, 0, 1 }); - _flatten_scores_kernel.configure(&_scores_permuted, &_scores_flattened); + _flatten_scores.configure(&_scores_permuted, &_scores_flattened); _scores_permuted.allocator()->allocate(); } else { - _flatten_scores_kernel.configure(scores, &_scores_flattened); + _flatten_scores.configure(scores, &_scores_flattened); } Tensor *anchors_to_use = &_all_anchors; @@ -244,12 +244,12 @@ Status NEGenerateProposalsLayer::validate(const ITensorInfo *scores, const ITens } TensorInfo deltas_flattened_info(deltas->clone()->set_tensor_shape(TensorShape(values_per_roi, total_num_anchors)).set_is_resizable(true)); - ARM_COMPUTE_RETURN_ON_ERROR(NEReshapeLayerKernel::validate(&deltas_permuted_info, &deltas_flattened_info)); + ARM_COMPUTE_RETURN_ON_ERROR(NEReshapeLayer::validate(&deltas_permuted_info, &deltas_flattened_info)); TensorInfo scores_flattened_info(scores->clone()->set_tensor_shape(TensorShape(1, total_num_anchors)).set_is_resizable(true)); TensorInfo proposals_4_roi_values(deltas->clone()->set_tensor_shape(TensorShape(values_per_roi, total_num_anchors)).set_is_resizable(true)); - ARM_COMPUTE_RETURN_ON_ERROR(NEReshapeLayerKernel::validate(&scores_permuted_info, &scores_flattened_info)); + ARM_COMPUTE_RETURN_ON_ERROR(NEReshapeLayer::validate(&scores_permuted_info, &scores_flattened_info)); TensorInfo *proposals_4_roi_values_to_use = &proposals_4_roi_values; TensorInfo proposals_4_roi_values_quantized(deltas->clone()->set_tensor_shape(TensorShape(values_per_roi, total_num_anchors)).set_is_resizable(true)); @@ -327,8 +327,8 @@ void NEGenerateProposalsLayer::run() NEScheduler::get().schedule(&_permute_scores_kernel, Window::DimY); } - NEScheduler::get().schedule(&_flatten_deltas_kernel, Window::DimY); - NEScheduler::get().schedule(&_flatten_scores_kernel, Window::DimY); + _flatten_deltas.run(); + _flatten_scores.run(); if(_is_qasymm8) { diff --git a/src/runtime/NEON/functions/NEReductionOperation.cpp b/src/runtime/NEON/functions/NEReductionOperation.cpp index 80ebe6731a..a895147cc9 100644 --- a/src/runtime/NEON/functions/NEReductionOperation.cpp +++ b/src/runtime/NEON/functions/NEReductionOperation.cpp @@ -54,7 +54,7 @@ size_t reduction_window_split_dimension(unsigned int axis) } // namespace NEReductionOperation::NEReductionOperation(std::shared_ptr memory_manager) - : _memory_group(memory_manager), _reduction_kernel(), _fill_border_kernel(), _reshape_kernel(), _output_internal(), _window_split(0), _reduction_axis(), _is_reshape_required(false) + : _memory_group(memory_manager), _reduction_kernel(), _fill_border_kernel(), _reshape(), _output_internal(), _window_split(0), _reduction_axis(), _is_reshape_required(false) { } @@ -91,7 +91,7 @@ Status NEReductionOperation::validate(const ITensorInfo *input, const ITensorInf if(is_reshape_required) { - ARM_COMPUTE_RETURN_ON_ERROR(NEReshapeLayerKernel::validate(output_internal, output)); + ARM_COMPUTE_RETURN_ON_ERROR(NEReshapeLayer::validate(output_internal, output)); } return Status{}; @@ -171,7 +171,7 @@ void NEReductionOperation::configure(ITensor *input, ITensor *output, unsigned i if(_is_reshape_required) { - _reshape_kernel.configure(output_internal, output); + _reshape.configure(output_internal, output); _output_internal.allocator()->allocate(); } } @@ -185,7 +185,7 @@ void NEReductionOperation::run() NEScheduler::get().schedule(&_reduction_kernel, _window_split); if(_is_reshape_required) { - NEScheduler::get().schedule(&_reshape_kernel, Window::DimY); + _reshape.run(); } } } // namespace arm_compute diff --git a/src/runtime/NEON/functions/NEReshapeLayer.cpp b/src/runtime/NEON/functions/NEReshapeLayer.cpp index 0a9f42d510..680abef026 100644 --- a/src/runtime/NEON/functions/NEReshapeLayer.cpp +++ b/src/runtime/NEON/functions/NEReshapeLayer.cpp @@ -25,19 +25,44 @@ #include "arm_compute/core/NEON/kernels/NEReshapeLayerKernel.h" #include "arm_compute/core/Validate.h" +#include "arm_compute/runtime/NEON/NEScheduler.h" +#include "arm_compute/runtime/Types.h" #include "support/MemorySupport.h" #include namespace arm_compute { -void NEReshapeLayer::configure(const ITensor *input, ITensor *output) +namespace experimental +{ +void NEReshapeLayer::configure(const ITensorInfo *input, ITensorInfo *output) { auto k = arm_compute::support::cpp14::make_unique(); k->configure(input, output); _kernel = std::move(k); } +Status NEReshapeLayer::validate(const ITensorInfo *input, const ITensorInfo *output) +{ + return arm_compute::NEReshapeLayer::validate(input, output); +} + +MemoryRequirements NEReshapeLayer::workspace() const +{ + return MemoryRequirements{}; +} +} // namespace experimental + +void NEReshapeLayer::configure(const ITensor *input, ITensor *output) +{ + _input = input; + _output = output; + + auto k = arm_compute::support::cpp14::make_unique(); + k->configure(input->info(), output->info()); + _kernel = std::move(k); +} + Status NEReshapeLayer::validate(const ITensorInfo *input, const ITensorInfo *output) { ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output); @@ -45,4 +70,15 @@ Status NEReshapeLayer::validate(const ITensorInfo *input, const ITensorInfo *out return Status{}; } + +void NEReshapeLayer::run() +{ + InputOperatorTensors src_0 = std::make_pair(TensorType::ACL_SRC, _input); + OutputOperatorTensors dst_0 = std::make_pair(TensorType::ACL_DST, _output); + + std::vector inputs = { &src_0 }; + std::vector outputs = { &dst_0 }; + + NEScheduler::get().schedule_op(_kernel.get(), Window::DimY, inputs, outputs); +} } // namespace arm_compute diff --git a/src/runtime/NEON/functions/NESoftmaxLayer.cpp b/src/runtime/NEON/functions/NESoftmaxLayer.cpp index 5509edec87..5cd6a550af 100644 --- a/src/runtime/NEON/functions/NESoftmaxLayer.cpp +++ b/src/runtime/NEON/functions/NESoftmaxLayer.cpp @@ -32,8 +32,8 @@ namespace arm_compute { template NESoftmaxLayerGeneric::NESoftmaxLayerGeneric(std::shared_ptr memory_manager) - : _memory_group(std::move(memory_manager)), _max_kernel(), _softmax_kernel(), _flat_or_reshape_kernel_ptr(nullptr), _fill_border_kernel(), _reshape_kernel(), _max(), _tmp(), _input_flattened(), - _output_flattened(), _needs_flattening(false) + : _memory_group(std::move(memory_manager)), _max_kernel(), _softmax_kernel(), _flat_or_reshape_ptr(nullptr), _fill_border_kernel(), _reshape(), _max(), _tmp(), _input_flattened(), _output_flattened(), + _needs_flattening(false) { } @@ -46,23 +46,20 @@ void NESoftmaxLayerGeneric::configure_reshape_input_kernel(const ITensor // Initialize the flat input _input_flattened.allocator()->init(input->info()->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(shape_flatten)); - // If we need to flatten the input, we can use NEFlattenKernel or NEReshapeKernel - // If the number of reduced axes is 3 (max dimension), which means collapsing all axes except the batch axis, we use NEFlattenKernel. - // In all other cases we have to use NEReshapeKernel // Note that the "other cases" include both: // 1. first_n_reduce_axes < 3: Reduce the first 1 (no need to reduce) or 2 dimensions (inclusive) // 2. first_n_reduce_axes == 4: Reduce all 4 dimensions. This can only be handled by NEReshapeKernel instead of NEFlattenKernel. if(first_n_reduce_axes == 3) { - auto flatten_kernel_ptr = support::cpp14::make_unique(); + auto flatten_kernel_ptr = support::cpp14::make_unique(); flatten_kernel_ptr->configure(input, &_input_flattened); - _flat_or_reshape_kernel_ptr = std::move(flatten_kernel_ptr); + _flat_or_reshape_ptr = std::move(flatten_kernel_ptr); } else { - auto reshape_kernel_ptr = support::cpp14::make_unique(); + auto reshape_kernel_ptr = support::cpp14::make_unique(); reshape_kernel_ptr->configure(input, &_input_flattened); - _flat_or_reshape_kernel_ptr = std::move(reshape_kernel_ptr); + _flat_or_reshape_ptr = std::move(reshape_kernel_ptr); } // We need to init the output tensor here. Indeed, the reshape kernel expects @@ -127,7 +124,7 @@ void NESoftmaxLayerGeneric::configure(ITensor *input, ITensor *output, f _input_flattened.allocator()->allocate(); // Reshape the flat output into the requested (4D) output - _reshape_kernel.configure(&_output_flattened, output); + _reshape.configure(&_output_flattened, output); // Allocate the intermediate flat tensors _output_flattened.allocator()->allocate(); @@ -174,11 +171,11 @@ Status NESoftmaxLayerGeneric::validate(const ITensorInfo *input, const I if(first_n_reduce_axes == 3) { - ARM_COMPUTE_RETURN_ON_ERROR(NEFlattenLayerKernel::validate(input, &tensor_info_flat)); + ARM_COMPUTE_RETURN_ON_ERROR(NEFlattenLayer::validate(input, &tensor_info_flat)); } else { - ARM_COMPUTE_RETURN_ON_ERROR(NEReshapeLayerKernel::validate(input, &tensor_info_flat)); + ARM_COMPUTE_RETURN_ON_ERROR(NEReshapeLayer::validate(input, &tensor_info_flat)); } } @@ -195,7 +192,7 @@ void NESoftmaxLayerGeneric::run() if(_needs_flattening) { - NEScheduler::get().schedule(_flat_or_reshape_kernel_ptr.get(), Window::DimY); + _flat_or_reshape_ptr->run(); } NEScheduler::get().schedule(&_fill_border_kernel, Window::DimY); @@ -204,11 +201,11 @@ void NESoftmaxLayerGeneric::run() if(_needs_flattening) { - NEScheduler::get().schedule(&_reshape_kernel, Window::DimY); + _reshape.run(); } } template class NESoftmaxLayerGeneric; template class NESoftmaxLayerGeneric; -} // namespace arm_compute \ No newline at end of file +} // namespace arm_compute diff --git a/src/runtime/OMP/OMPScheduler.cpp b/src/runtime/OMP/OMPScheduler.cpp index f67f06fc94..a1851f03c3 100644 --- a/src/runtime/OMP/OMPScheduler.cpp +++ b/src/runtime/OMP/OMPScheduler.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 ARM Limited. + * Copyright (c) 2017-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -83,6 +83,39 @@ void OMPScheduler::schedule(ICPPKernel *kernel, const Hints &hints) } } +void OMPScheduler::schedule_op(ICPPKernel *kernel, const Hints &hints, std::vector &inputs, std::vector &outputs) +{ + ARM_COMPUTE_ERROR_ON_MSG(!kernel, "The child class didn't set the kernel"); + ARM_COMPUTE_ERROR_ON_MSG(hints.strategy() == StrategyHint::DYNAMIC, + "Dynamic scheduling is not supported in OMPScheduler"); + + const Window &max_window = kernel->window(); + const unsigned int num_iterations = max_window.num_iterations(hints.split_dimension()); + const unsigned int num_threads = std::min(num_iterations, _num_threads); + + if(!kernel->is_parallelisable() || num_threads == 1) + { + ThreadInfo info; + info.cpu_info = &_cpu_info; + kernel->run_op(inputs, outputs, max_window, info); + } + else + { + const unsigned int num_windows = num_threads; + std::vector workloads(num_windows); + for(unsigned int t = 0; t < num_windows; t++) + { + //Capture 't' by copy, all the other variables by reference: + workloads[t] = [t, &hints, &max_window, &num_windows, &kernel, &inputs, &outputs](const ThreadInfo & info) + { + Window win = max_window.split_window(hints.split_dimension(), t, num_windows); + win.validate(); + kernel->run_op(inputs, outputs, win, info); + }; + } + run_workloads(workloads); + } +} #ifndef DOXYGEN_SKIP_THIS void OMPScheduler::run_workloads(std::vector &workloads) { diff --git a/src/runtime/OperatorTensor.cpp b/src/runtime/OperatorTensor.cpp new file mode 100644 index 0000000000..5d4e126177 --- /dev/null +++ b/src/runtime/OperatorTensor.cpp @@ -0,0 +1,57 @@ +/* + * Copyright (c) 2020 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/OperatorTensor.h" +#include "arm_compute/runtime/MemoryRegion.h" + +namespace arm_compute +{ +namespace experimental +{ +OperatorTensor::OperatorTensor(ITensorInfo *info, IMemory *memory) + : _info(info), _memory(memory), _mem_type(MemoryType::CPU) +{ +} + +ITensorInfo *OperatorTensor::info() const +{ + return _info; +} + +ITensorInfo *OperatorTensor::info() +{ + return _info; +} + +uint8_t *OperatorTensor::buffer() const +{ + switch(_mem_type) + { + case MemoryType::CPU: + return (uint8_t *)dynamic_cast(_memory->region())->buffer(); + default: + ARM_COMPUTE_ERROR("Memory type not supported."); + } +} +} // namespace experimental +} // namespace arm_compute diff --git a/tests/framework/instruments/SchedulerTimer.cpp b/tests/framework/instruments/SchedulerTimer.cpp index 9e8bba28e8..58e1b56904 100644 --- a/tests/framework/instruments/SchedulerTimer.cpp +++ b/tests/framework/instruments/SchedulerTimer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 ARM Limited. + * Copyright (c) 2017-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -86,6 +86,19 @@ public: _kernels.push_back(std::move(info)); } + void schedule_op(ICPPKernel *kernel, const Hints &hints, std::vector &inputs, std::vector &outputs) override + { + _timer.start(); + _real_scheduler.schedule_op(kernel, hints, inputs, outputs); + _timer.stop(); + + typename SchedulerClock::kernel_info info; + info.name = kernel->name(); + info.prefix = _prefix; + info.measurements = _timer.measurements(); + _kernels.push_back(std::move(info)); + } + void run_tagged_workloads(std::vector &workloads, const char *tag) override { _timer.start(); diff --git a/tests/validation/NEON/ReshapeOperator.cpp b/tests/validation/NEON/ReshapeOperator.cpp new file mode 100644 index 0000000000..82e9768a2c --- /dev/null +++ b/tests/validation/NEON/ReshapeOperator.cpp @@ -0,0 +1,78 @@ +/* + * Copyright (c) 2020 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/NEON/functions/NEReshapeLayer.h" +#include "arm_compute/runtime/OperatorTensor.h" +#include "arm_compute/runtime/Tensor.h" +#include "arm_compute/runtime/TensorAllocator.h" +#include "tests/AssetsLibrary.h" +#include "tests/Globals.h" +#include "tests/NEON/Accessor.h" +#include "tests/Utils.h" +#include "tests/framework/Asserts.h" +#include "tests/framework/Macros.h" +#include "tests/framework/datasets/Datasets.h" +#include "tests/validation/Validation.h" + +namespace arm_compute +{ +namespace test +{ +namespace validation +{ +TEST_SUITE(NEON) +TEST_SUITE(ReshapeOperator) + +TEST_CASE(Run, framework::DatasetMode::ALL) +{ + // Create tensors and info + TensorInfo src_info(TensorShape(27U, 11U, 3U), 1, DataType::F32); + TensorInfo dst_info(TensorShape(27U, 11U, 3U), 1, DataType::F32); + Tensor src = create_tensor(TensorShape(27U, 11U, 3U), DataType::F32, 1); + Tensor dst = create_tensor(TensorShape(27U, 11U, 3U), DataType::F32, 1); + + // Create and configure function + experimental::NEReshapeLayer reshape_operator; + reshape_operator.configure(&src_info, &dst_info); + + // Allocate tensors + src.allocator()->allocate(); + dst.allocator()->allocate(); + + InputOperatorTensors src_0 = std::make_pair(TensorType::ACL_SRC, &src); + OutputOperatorTensors dst_0 = std::make_pair(TensorType::ACL_DST, &dst); + + std::vector src_vec = { &src_0 }; + std::vector dst_vec = { &dst_0 }; + std::vector work_vec = {}; + + // Compute functions + reshape_operator.run(src_vec, dst_vec, work_vec); +} + +TEST_SUITE_END() // ReshapeOperator +TEST_SUITE_END() // NEON +} // namespace validation +} // namespace test +} // namespace arm_compute -- cgit v1.2.1