From 0499dff9293a86d3d53f72fed0a38b2823563674 Mon Sep 17 00:00:00 2001 From: Georgios Pinitas Date: Fri, 31 Jul 2020 22:21:38 +0100 Subject: COMPMID-3392: Collapse TensorMaps into a single TensorPack Collapse InputTensorMap and OutputTensorMap to a single TensorPack mechanism. Signed-off-by: Georgios Pinitas Change-Id: Ie2fdfc6b07d84ad589169ec99ca64fcf45a00bec Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/c/VisualCompute/ComputeLibrary/+/253783 Tested-by: bsgcomp Reviewed-by: Michalis Spyrou Signed-off-by: Michalis Spyrou Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/3641 Tested-by: Arm Jenkins Comments-Addressed: Arm Jenkins Reviewed-by: SiCong Li Reviewed-by: Sheri Zhang --- Android.bp | 1 + arm_compute/core/CL/ICLKernel.h | 7 +- .../core/CL/kernels/CLActivationLayerKernel.h | 3 +- .../CL/kernels/CLBatchConcatenateLayerKernel.h | 3 +- .../CL/kernels/CLDepthConcatenateLayerKernel.h | 3 +- .../CL/kernels/CLElementWiseUnaryLayerKernel.h | 3 +- .../core/CL/kernels/CLElementwiseOperationKernel.h | 3 +- arm_compute/core/CL/kernels/CLFillBorderKernel.h | 2 +- .../CL/kernels/CLHeightConcatenateLayerKernel.h | 3 +- .../CL/kernels/CLPixelWiseMultiplicationKernel.h | 4 +- arm_compute/core/CL/kernels/CLReshapeLayerKernel.h | 3 +- arm_compute/core/CL/kernels/CLStridedSliceKernel.h | 3 +- .../CL/kernels/CLWidthConcatenate2TensorsKernel.h | 3 +- .../CL/kernels/CLWidthConcatenate4TensorsKernel.h | 3 +- .../CL/kernels/CLWidthConcatenateLayerKernel.h | 3 +- arm_compute/core/CPP/ICPPKernel.h | 7 +- arm_compute/core/ITensorPack.h | 100 ++++++++++++++++ .../core/NEON/kernels/NEActivationLayerKernel.h | 3 +- .../core/NEON/kernels/NEArithmeticAdditionKernel.h | 2 +- .../NEON/kernels/NEArithmeticSubtractionKernel.h | 2 +- .../NEON/kernels/NEBatchConcatenateLayerKernel.h | 3 +- .../NEON/kernels/NEDepthConcatenateLayerKernel.h | 3 +- .../NEON/kernels/NEElementwiseOperationKernel.h | 3 +- .../NEON/kernels/NEHeightConcatenateLayerKernel.h | 3 +- .../NEON/kernels/NEPixelWiseMultiplicationKernel.h | 4 +- .../core/NEON/kernels/NEReshapeLayerKernel.h | 3 +- .../core/NEON/kernels/NEStridedSliceKernel.h | 3 +- .../NEON/kernels/NEWidthConcatenateLayerKernel.h | 3 +- arm_compute/core/experimental/Types.h | 7 +- arm_compute/runtime/CL/CLScheduler.h | 7 +- arm_compute/runtime/CL/CLTuner.h | 7 +- arm_compute/runtime/CL/ICLOperator.h | 4 +- arm_compute/runtime/CL/ICLTuner.h | 5 +- .../runtime/CL/functions/CLConcatenateLayer.h | 2 +- .../runtime/CL/functions/CLElementwiseOperations.h | 14 +-- arm_compute/runtime/CL/functions/CLPReluLayer.h | 2 +- .../CL/functions/CLPixelWiseMultiplication.h | 4 +- arm_compute/runtime/CL/tuners/BifrostTuner.h | 2 +- arm_compute/runtime/CL/tuners/MidgardTuner.h | 2 +- arm_compute/runtime/CPP/CPPScheduler.h | 4 +- arm_compute/runtime/IOperator.h | 9 +- arm_compute/runtime/IScheduler.h | 5 +- arm_compute/runtime/NEON/INEOperator.h | 5 +- .../runtime/NEON/functions/NEConcatenateLayer.h | 2 +- arm_compute/runtime/OMP/OMPScheduler.h | 5 +- arm_compute/runtime/SingleThreadScheduler.h | 5 +- src/core/CL/kernels/CLActivationLayerKernel.cpp | 6 +- .../CL/kernels/CLBatchConcatenateLayerKernel.cpp | 7 +- .../CL/kernels/CLDepthConcatenateLayerKernel.cpp | 7 +- .../CL/kernels/CLElementWiseUnaryLayerKernel.cpp | 6 +- .../CL/kernels/CLElementwiseOperationKernel.cpp | 8 +- src/core/CL/kernels/CLFillBorderKernel.cpp | 6 +- .../CL/kernels/CLHeightConcatenateLayerKernel.cpp | 7 +- .../CL/kernels/CLPixelWiseMultiplicationKernel.cpp | 16 +-- src/core/CL/kernels/CLReshapeLayerKernel.cpp | 6 +- src/core/CL/kernels/CLStridedSliceKernel.cpp | 6 +- .../kernels/CLWidthConcatenate2TensorsKernel.cpp | 9 +- .../kernels/CLWidthConcatenate4TensorsKernel.cpp | 13 +- .../CL/kernels/CLWidthConcatenateLayerKernel.cpp | 7 +- src/core/ITensor.cpp | 10 +- src/core/ITensorPack.cpp | 65 ++++++++++ src/core/NEON/kernels/NEActivationLayerKernel.cpp | 10 +- .../NEON/kernels/NEArithmeticAdditionKernel.cpp | 8 +- .../NEON/kernels/NEArithmeticSubtractionKernel.cpp | 8 +- .../NEON/kernels/NEBatchConcatenateLayerKernel.cpp | 8 +- .../NEON/kernels/NEDepthConcatenateLayerKernel.cpp | 8 +- .../NEON/kernels/NEElementwiseOperationKernel.cpp | 6 +- .../kernels/NEHeightConcatenateLayerKernel.cpp | 7 +- .../kernels/NEPixelWiseMultiplicationKernel.cpp | 16 +-- src/core/NEON/kernels/NEReshapeLayerKernel.cpp | 6 +- src/core/NEON/kernels/NEStridedSliceKernel.cpp | 6 +- .../NEON/kernels/NEWidthConcatenateLayerKernel.cpp | 7 +- src/runtime/CL/CLOperator.cpp | 10 +- src/runtime/CL/CLScheduler.cpp | 15 +-- src/runtime/CL/CLTuner.cpp | 15 +-- src/runtime/CL/functions/CLActivationLayer.cpp | 8 +- src/runtime/CL/functions/CLConcatenateLayer.cpp | 24 ++-- .../CL/functions/CLElementWiseUnaryLayer.cpp | 56 ++++----- .../CL/functions/CLElementwiseOperations.cpp | 131 +++++++++++---------- src/runtime/CL/functions/CLPReluLayer.cpp | 30 +++-- .../CL/functions/CLPixelWiseMultiplication.cpp | 46 ++++---- src/runtime/CL/functions/CLReshapeLayer.cpp | 8 +- src/runtime/CL/functions/CLSlice.cpp | 7 +- src/runtime/CL/functions/CLStridedSlice.cpp | 8 +- src/runtime/CL/tuners/BifrostTuner.cpp | 4 +- src/runtime/CL/tuners/MidgardTuner.cpp | 4 +- src/runtime/CPP/CPPScheduler.cpp | 21 ++-- src/runtime/CPP/SingleThreadScheduler.cpp | 4 +- src/runtime/NEON/INEOperator.cpp | 10 +- src/runtime/NEON/functions/NEActivationLayer.cpp | 8 +- .../NEON/functions/NEArithmeticAddition.cpp | 8 +- .../NEON/functions/NEArithmeticSubtraction.cpp | 8 +- src/runtime/NEON/functions/NEConcatenateLayer.cpp | 22 ++-- .../NEON/functions/NEElementwiseOperators.cpp | 56 +++++---- src/runtime/NEON/functions/NEPReluLayer.cpp | 8 +- .../NEON/functions/NEPixelWiseMultiplication.cpp | 16 ++- src/runtime/NEON/functions/NEReshapeLayer.cpp | 8 +- src/runtime/NEON/functions/NESlice.cpp | 7 +- src/runtime/NEON/functions/NEStridedSlice.cpp | 7 +- src/runtime/OMP/OMPScheduler.cpp | 8 +- tests/framework/instruments/SchedulerTimer.cpp | 4 +- 101 files changed, 646 insertions(+), 461 deletions(-) create mode 100644 arm_compute/core/ITensorPack.h create mode 100644 src/core/ITensorPack.cpp diff --git a/Android.bp b/Android.bp index 45d96e7e9b..3b59a7d283 100644 --- a/Android.bp +++ b/Android.bp @@ -229,6 +229,7 @@ cc_library_static { "src/core/IDistribution1D.cpp", "src/core/IKernel.cpp", "src/core/ITensor.cpp", + "src/core/ITensorPack.cpp", "src/core/MultiImageInfo.cpp", "src/core/NEON/kernels/NEAbsoluteDifferenceKernel.cpp", "src/core/NEON/kernels/NEAccumulateKernel.cpp", diff --git a/arm_compute/core/CL/ICLKernel.h b/arm_compute/core/CL/ICLKernel.h index 6e606ff857..d4990a1dee 100644 --- a/arm_compute/core/CL/ICLKernel.h +++ b/arm_compute/core/CL/ICLKernel.h @@ -225,14 +225,13 @@ public: * * @note The queue is *not* flushed by this method, and therefore the kernel will not have been executed by the time this method returns. * - * @param[in] inputs A vector containing the input tensors. - * @param[in] outputs A vector containing the output tensors. + * @param[in] tensors A vector containing the tensors to operato on. * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()). * @param[in,out] queue Command queue on which to enqueue the kernel. */ - virtual void run_op(const InputTensorMap &inputs, const OutputTensorMap &outputs, const Window &window, cl::CommandQueue &queue) + virtual void run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) { - ARM_COMPUTE_UNUSED(inputs, outputs, window, queue); + ARM_COMPUTE_UNUSED(tensors, window, queue); } /** Add the passed parameters to the object's kernel's arguments starting from the index idx. * diff --git a/arm_compute/core/CL/kernels/CLActivationLayerKernel.h b/arm_compute/core/CL/kernels/CLActivationLayerKernel.h index cf62ca600b..81d4ccb065 100644 --- a/arm_compute/core/CL/kernels/CLActivationLayerKernel.h +++ b/arm_compute/core/CL/kernels/CLActivationLayerKernel.h @@ -68,8 +68,7 @@ public: static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ActivationLayerInfo &act_info); // Inherited methods overridden: - void run_op(const InputTensorMap &inputs, const OutputTensorMap &outputs, - const Window &window, cl::CommandQueue &queue) override; + void run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) override; private: bool _run_in_place; diff --git a/arm_compute/core/CL/kernels/CLBatchConcatenateLayerKernel.h b/arm_compute/core/CL/kernels/CLBatchConcatenateLayerKernel.h index 890fecf0c9..bb8968ca83 100644 --- a/arm_compute/core/CL/kernels/CLBatchConcatenateLayerKernel.h +++ b/arm_compute/core/CL/kernels/CLBatchConcatenateLayerKernel.h @@ -73,8 +73,7 @@ public: static Status validate(const ITensorInfo *input, unsigned int batch_offset, const ITensorInfo *output); // Inherited methods overridden: - void run_op(const InputTensorMap &inputs, const OutputTensorMap &outputs, - const Window &window, cl::CommandQueue &queue) override; + void run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) override; private: unsigned int _batch_offset; diff --git a/arm_compute/core/CL/kernels/CLDepthConcatenateLayerKernel.h b/arm_compute/core/CL/kernels/CLDepthConcatenateLayerKernel.h index ad365d655b..d8493bc5d8 100644 --- a/arm_compute/core/CL/kernels/CLDepthConcatenateLayerKernel.h +++ b/arm_compute/core/CL/kernels/CLDepthConcatenateLayerKernel.h @@ -71,8 +71,7 @@ public: static Status validate(const ITensorInfo *input, unsigned int depth_offset, const ITensorInfo *output); // Inherited methods overridden: - void run_op(const InputTensorMap &inputs, const OutputTensorMap &outputs, - const Window &window, cl::CommandQueue &queue) override; + void run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) override; private: unsigned int _depth_offset; diff --git a/arm_compute/core/CL/kernels/CLElementWiseUnaryLayerKernel.h b/arm_compute/core/CL/kernels/CLElementWiseUnaryLayerKernel.h index f8f158c5bc..82cd953b68 100644 --- a/arm_compute/core/CL/kernels/CLElementWiseUnaryLayerKernel.h +++ b/arm_compute/core/CL/kernels/CLElementWiseUnaryLayerKernel.h @@ -60,8 +60,7 @@ public: static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ElementWiseUnary &op); // Inherited methods overridden: - void run_op(const InputTensorMap &inputs, const OutputTensorMap &outputs, - const Window &window, cl::CommandQueue &queue) override; + void run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) override; }; } // namespace arm_compute #endif /* ARM_COMPUTE_CLELEMENTWISEUNARYLAYERKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLElementwiseOperationKernel.h b/arm_compute/core/CL/kernels/CLElementwiseOperationKernel.h index 76bc879638..b459292161 100644 --- a/arm_compute/core/CL/kernels/CLElementwiseOperationKernel.h +++ b/arm_compute/core/CL/kernels/CLElementwiseOperationKernel.h @@ -54,8 +54,7 @@ public: ~CLElementwiseOperationKernel() = default; // Inherited methods overridden: - void run_op(const InputTensorMap &inputs, const OutputTensorMap &outputs, const Window &window, cl::CommandQueue &queue) override; - + void run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) override; BorderSize border_size() const override; protected: diff --git a/arm_compute/core/CL/kernels/CLFillBorderKernel.h b/arm_compute/core/CL/kernels/CLFillBorderKernel.h index 8cad68dc1a..5323af4c0e 100644 --- a/arm_compute/core/CL/kernels/CLFillBorderKernel.h +++ b/arm_compute/core/CL/kernels/CLFillBorderKernel.h @@ -85,7 +85,7 @@ public: void set_constant_border(unsigned int idx, const PixelValue &constant_border_value); // Inherited methods overridden: - void run_op(const InputTensorMap &inputs, const OutputTensorMap &outputs, const Window &window, cl::CommandQueue &queue) override; + void run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) override; void run(const Window &window, cl::CommandQueue &queue) override; bool is_parallelisable() const override; diff --git a/arm_compute/core/CL/kernels/CLHeightConcatenateLayerKernel.h b/arm_compute/core/CL/kernels/CLHeightConcatenateLayerKernel.h index 0563fad414..4fa2b40881 100644 --- a/arm_compute/core/CL/kernels/CLHeightConcatenateLayerKernel.h +++ b/arm_compute/core/CL/kernels/CLHeightConcatenateLayerKernel.h @@ -68,8 +68,7 @@ public: static Status validate(const ITensorInfo *input, unsigned int height_offset, const ITensorInfo *output); // Inherited methods overridden: - void run_op(const InputTensorMap &inputs, const OutputTensorMap &outputs, - const Window &window, cl::CommandQueue &queue) override; + void run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) override; private: unsigned int _height_offset; diff --git a/arm_compute/core/CL/kernels/CLPixelWiseMultiplicationKernel.h b/arm_compute/core/CL/kernels/CLPixelWiseMultiplicationKernel.h index 86159fc915..6b5bd11bde 100644 --- a/arm_compute/core/CL/kernels/CLPixelWiseMultiplicationKernel.h +++ b/arm_compute/core/CL/kernels/CLPixelWiseMultiplicationKernel.h @@ -132,7 +132,7 @@ public: ConvertPolicy overflow_policy, RoundingPolicy rounding_policy, const ActivationLayerInfo &act_info = ActivationLayerInfo()); // Inherited methods overridden: - void run_op(const InputTensorMap &inputs, const OutputTensorMap &outputs, const Window &window, cl::CommandQueue &queue) override; + void run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) override; BorderSize border_size() const override; private: @@ -184,7 +184,7 @@ public: static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, const ActivationLayerInfo &act_info = ActivationLayerInfo()); // Inherited methods overridden: - void run_op(const InputTensorMap &inputs, const OutputTensorMap &outputs, const Window &window, cl::CommandQueue &queue) override; + void run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) override; BorderSize border_size() const override; private: diff --git a/arm_compute/core/CL/kernels/CLReshapeLayerKernel.h b/arm_compute/core/CL/kernels/CLReshapeLayerKernel.h index 11c06d1cab..6e3f255c52 100644 --- a/arm_compute/core/CL/kernels/CLReshapeLayerKernel.h +++ b/arm_compute/core/CL/kernels/CLReshapeLayerKernel.h @@ -53,8 +53,7 @@ public: static Status validate(const ITensorInfo *input, const ITensorInfo *output); // Inherited methods overridden: - void run_op(const InputTensorMap &inputs, const OutputTensorMap &outputs, - const Window &window, cl::CommandQueue &queue) override; + void run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) override; }; } // namespace arm_compute #endif /*ARM_COMPUTE_CLRESHAPELAYERKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLStridedSliceKernel.h b/arm_compute/core/CL/kernels/CLStridedSliceKernel.h index 28a665b113..74311b71fa 100644 --- a/arm_compute/core/CL/kernels/CLStridedSliceKernel.h +++ b/arm_compute/core/CL/kernels/CLStridedSliceKernel.h @@ -73,8 +73,7 @@ public: int32_t begin_mask, int32_t end_mask, int32_t shrink_axis_mask); // Inherited methods overridden: - void run_op(const InputTensorMap &inputs, const OutputTensorMap &outputs, - const Window &window, cl::CommandQueue &queue) override; + void run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) override; }; } // namespace arm_compute #endif /*ARM_COMPUTE_CL_STRIDED_SLICE_KERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLWidthConcatenate2TensorsKernel.h b/arm_compute/core/CL/kernels/CLWidthConcatenate2TensorsKernel.h index aaedaa90ee..a379b5f0b8 100644 --- a/arm_compute/core/CL/kernels/CLWidthConcatenate2TensorsKernel.h +++ b/arm_compute/core/CL/kernels/CLWidthConcatenate2TensorsKernel.h @@ -67,8 +67,7 @@ public: static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output); // Inherited methods overridden: - void run_op(const InputTensorMap &inputs, const OutputTensorMap &outputs, - const Window &window, cl::CommandQueue &queue) override; + void run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) override; }; } // namespace arm_compute #endif /* ARM_COMPUTE_CLWIDTHCONCATENATE_2TENSORS_KERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLWidthConcatenate4TensorsKernel.h b/arm_compute/core/CL/kernels/CLWidthConcatenate4TensorsKernel.h index b1d6e890a0..6b0e8ee21d 100644 --- a/arm_compute/core/CL/kernels/CLWidthConcatenate4TensorsKernel.h +++ b/arm_compute/core/CL/kernels/CLWidthConcatenate4TensorsKernel.h @@ -71,8 +71,7 @@ public: static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *input3, const ITensorInfo *input4, const ITensorInfo *output); // Inherited methods overridden: - void run_op(const InputTensorMap &inputs, const OutputTensorMap &outputs, - const Window &window, cl::CommandQueue &queue) override; + void run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) override; }; } // namespace arm_compute #endif /* ARM_COMPUTE_CLWIDTHCONCATENATE_4TENSORS_KERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLWidthConcatenateLayerKernel.h b/arm_compute/core/CL/kernels/CLWidthConcatenateLayerKernel.h index cb8777d5fd..32e90af404 100644 --- a/arm_compute/core/CL/kernels/CLWidthConcatenateLayerKernel.h +++ b/arm_compute/core/CL/kernels/CLWidthConcatenateLayerKernel.h @@ -68,8 +68,7 @@ public: static Status validate(const ITensorInfo *input, unsigned int width_offset, const ITensorInfo *output); // Inherited methods overridden: - void run_op(const InputTensorMap &inputs, const OutputTensorMap &outputs, - const Window &window, cl::CommandQueue &queue) override; + void run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) override; private: unsigned int _width_offset; diff --git a/arm_compute/core/CPP/ICPPKernel.h b/arm_compute/core/CPP/ICPPKernel.h index 3f784cf6d9..ab369ffe1d 100644 --- a/arm_compute/core/CPP/ICPPKernel.h +++ b/arm_compute/core/CPP/ICPPKernel.h @@ -79,14 +79,13 @@ public: * * @note The width of the window has to be a multiple of num_elems_processed_per_iteration(). * - * @param[in] inputs A vector containing the input tensors. - * @param[in] outputs A vector containing the output tensors. + * @param[in] tensors A vector containing the tensors to operate on. * @param[in] window Region on which to execute the kernel. (Must be a region of the window returned by window()) * @param[in] info Info about executing thread and CPU. */ - virtual void run_op(const InputTensorMap &inputs, const OutputTensorMap &outputs, const Window &window, const ThreadInfo &info) + virtual void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) { - ARM_COMPUTE_UNUSED(inputs, outputs, window, info); + ARM_COMPUTE_UNUSED(tensors, window, info); } /** Name of the kernel diff --git a/arm_compute/core/ITensorPack.h b/arm_compute/core/ITensorPack.h new file mode 100644 index 0000000000..36b6aea490 --- /dev/null +++ b/arm_compute/core/ITensorPack.h @@ -0,0 +1,100 @@ +/* + * Copyright (c) 2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_ITENSORPACK_H +#define ARM_COMPUTE_ITENSORPACK_H + +#include +#include + +namespace arm_compute +{ +// Forward declaration +class ITensor; + +/** Tensor packing service */ +class ITensorPack +{ +private: + struct PackElement + { + PackElement() = default; + PackElement(ITensor *tensor) + : tensor(tensor), ctensor(nullptr) + { + } + PackElement(const ITensor *ctensor) + : tensor(nullptr), ctensor(ctensor) + { + } + + ITensor *tensor{ nullptr }; + const ITensor *ctensor{ nullptr }; + }; + +public: + /** Default Constructor */ + ITensorPack() = default; + /** Add tensor to the pack + * + * @param[in] id ID/type of the tensor to add + * @param[in] tensor Tensor to add + */ + void add_tensor(int id, ITensor *tensor); + + /** Add const tensor to the pack + * + * @param[in] id ID/type of the tensor to add + * @param[in] tensor Tensor to add + */ + void add_tensor(int id, const ITensor *tensor); + /** Get tensor of a given id from the pac + * + * @param[in] id ID of tensor to extract + * + * @return The pointer to the tensor if exist and is non-const else nullptr + */ + ITensor *get_tensor(int id); + /** Get constant tensor of a given id + * + * @param[in] id ID of tensor to extract + * + * @return The pointer to the tensor if exist and is const else nullptr + */ + const ITensor *get_const_tensor(int id) const; + /** Pack size accessor + * + * @return Number of tensors registered to the pack + */ + size_t size() const; + /** Checks if pack is empty + * + * @return True if empty else false + */ + bool empty() const; + +private: + std::map _pack{}; /**< Container with the packed tensors */ +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_ITENSORPACK_H */ diff --git a/arm_compute/core/NEON/kernels/NEActivationLayerKernel.h b/arm_compute/core/NEON/kernels/NEActivationLayerKernel.h index 960e6fa3e5..325647bd66 100644 --- a/arm_compute/core/NEON/kernels/NEActivationLayerKernel.h +++ b/arm_compute/core/NEON/kernels/NEActivationLayerKernel.h @@ -76,8 +76,7 @@ public: static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ActivationLayerInfo &act_info); // Inherited methods overridden: - void run_op(const InputTensorMap &inputs, const OutputTensorMap &outputs, - const Window &window, const ThreadInfo &info) override; + void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) override; private: using ActivationFunction = ActivationLayerInfo::ActivationFunction; diff --git a/arm_compute/core/NEON/kernels/NEArithmeticAdditionKernel.h b/arm_compute/core/NEON/kernels/NEArithmeticAdditionKernel.h index 764596d7dc..eece5708e8 100644 --- a/arm_compute/core/NEON/kernels/NEArithmeticAdditionKernel.h +++ b/arm_compute/core/NEON/kernels/NEArithmeticAdditionKernel.h @@ -86,7 +86,7 @@ public: static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, ConvertPolicy policy); // Inherited methods overridden: - void run_op(const InputTensorMap &inputs, const OutputTensorMap &outputs, const Window &window, const ThreadInfo &info) override; + void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) override; private: /** Common signature for all the specialised add functions diff --git a/arm_compute/core/NEON/kernels/NEArithmeticSubtractionKernel.h b/arm_compute/core/NEON/kernels/NEArithmeticSubtractionKernel.h index 86c3dd8e88..e3a41a2b1c 100644 --- a/arm_compute/core/NEON/kernels/NEArithmeticSubtractionKernel.h +++ b/arm_compute/core/NEON/kernels/NEArithmeticSubtractionKernel.h @@ -98,7 +98,7 @@ public: static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, ConvertPolicy policy); // Inherited methods overridden: - void run_op(const InputTensorMap &inputs, const OutputTensorMap &outputs, const Window &window, const ThreadInfo &info) override; + void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) override; private: /** Common signature for all the specialised sub functions diff --git a/arm_compute/core/NEON/kernels/NEBatchConcatenateLayerKernel.h b/arm_compute/core/NEON/kernels/NEBatchConcatenateLayerKernel.h index f397a29b48..478890925b 100644 --- a/arm_compute/core/NEON/kernels/NEBatchConcatenateLayerKernel.h +++ b/arm_compute/core/NEON/kernels/NEBatchConcatenateLayerKernel.h @@ -76,8 +76,7 @@ public: static Status validate(const ITensorInfo *input, unsigned int batch_offset, const ITensorInfo *output); // Inherited methods overridden: - void run_op(const InputTensorMap &inputs, const OutputTensorMap &outputs, - const Window &window, const ThreadInfo &info) override; + void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) override; private: using BatchConcatFunction = void(const ITensor *in, ITensor *out, unsigned int batch_offset, const Window &window); diff --git a/arm_compute/core/NEON/kernels/NEDepthConcatenateLayerKernel.h b/arm_compute/core/NEON/kernels/NEDepthConcatenateLayerKernel.h index e1aaa59f25..3b2b9a1b79 100644 --- a/arm_compute/core/NEON/kernels/NEDepthConcatenateLayerKernel.h +++ b/arm_compute/core/NEON/kernels/NEDepthConcatenateLayerKernel.h @@ -76,8 +76,7 @@ public: static Status validate(const ITensorInfo *input, unsigned int depth_offset, const ITensorInfo *output); // Inherited methods overridden: - void run_op(const InputTensorMap &inputs, const OutputTensorMap &outputs, - const Window &window, const ThreadInfo &info) override; + void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) override; private: using DepthConcatFunction = void(const ITensor *in, ITensor *out, unsigned int depth_offset, const Window &window); diff --git a/arm_compute/core/NEON/kernels/NEElementwiseOperationKernel.h b/arm_compute/core/NEON/kernels/NEElementwiseOperationKernel.h index cf43aca4b0..47b8c3b7c8 100644 --- a/arm_compute/core/NEON/kernels/NEElementwiseOperationKernel.h +++ b/arm_compute/core/NEON/kernels/NEElementwiseOperationKernel.h @@ -67,8 +67,7 @@ public: using ElementwiseFunction = void(const ITensor *input1, const ITensor *input2, ITensor *output, const Window &window); // Inherited methods overridden: - void run_op(const InputTensorMap &inputs, const OutputTensorMap &outputs, - const Window &window, const ThreadInfo &info) override; + void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) override; protected: /** Validate the argument passed to the kernel diff --git a/arm_compute/core/NEON/kernels/NEHeightConcatenateLayerKernel.h b/arm_compute/core/NEON/kernels/NEHeightConcatenateLayerKernel.h index d463b53e2c..8a5e86acc4 100644 --- a/arm_compute/core/NEON/kernels/NEHeightConcatenateLayerKernel.h +++ b/arm_compute/core/NEON/kernels/NEHeightConcatenateLayerKernel.h @@ -74,8 +74,7 @@ public: static Status validate(const ITensorInfo *input, unsigned int height_offset, const ITensorInfo *output); // Inherited methods overridden: - void run_op(const InputTensorMap &inputs, const OutputTensorMap &outputs, - const Window &window, const ThreadInfo &info) override; + void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) override; private: unsigned int _height_offset; diff --git a/arm_compute/core/NEON/kernels/NEPixelWiseMultiplicationKernel.h b/arm_compute/core/NEON/kernels/NEPixelWiseMultiplicationKernel.h index 8876d04c62..c530d78c42 100644 --- a/arm_compute/core/NEON/kernels/NEPixelWiseMultiplicationKernel.h +++ b/arm_compute/core/NEON/kernels/NEPixelWiseMultiplicationKernel.h @@ -111,7 +111,7 @@ public: static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, float scale, ConvertPolicy overflow_policy, RoundingPolicy rounding_policy); // Inherited methods overridden - void run_op(const InputTensorMap &inputs, const OutputTensorMap &outputs, const Window &window, const ThreadInfo &info) override; + void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) override; private: /** Common signature for all the specialised multiplication functions with integer scaling factor @@ -178,7 +178,7 @@ public: static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output); // Inherited methods overridden: - void run_op(const InputTensorMap &inputs, const OutputTensorMap &outputs, const Window &window, const ThreadInfo &info) override; + void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) override; }; } // namespace arm_compute diff --git a/arm_compute/core/NEON/kernels/NEReshapeLayerKernel.h b/arm_compute/core/NEON/kernels/NEReshapeLayerKernel.h index 97e91ed03d..a4b8426e41 100644 --- a/arm_compute/core/NEON/kernels/NEReshapeLayerKernel.h +++ b/arm_compute/core/NEON/kernels/NEReshapeLayerKernel.h @@ -57,8 +57,7 @@ public: static Status validate(const ITensorInfo *input, const ITensorInfo *output); // Inherited methods overridden: - void run_op(const InputTensorMap &inputs, const OutputTensorMap &outputs, - const Window &window, const ThreadInfo &info) override; + void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) override; }; } // namespace arm_compute #endif /*ARM_COMPUTE_NERESHAPELAYERKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NEStridedSliceKernel.h b/arm_compute/core/NEON/kernels/NEStridedSliceKernel.h index 59caeecb0d..be55fd75de 100644 --- a/arm_compute/core/NEON/kernels/NEStridedSliceKernel.h +++ b/arm_compute/core/NEON/kernels/NEStridedSliceKernel.h @@ -91,8 +91,7 @@ public: int32_t begin_mask, int32_t end_mask, int32_t shrink_axis_mask); // Inherited methods overridden: - void run_op(const InputTensorMap &inputs, const OutputTensorMap &outputs, - const Window &window, const ThreadInfo &info) override; + void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) override; private: Coordinates _starts_abs; /**< Absolute start coordinates */ diff --git a/arm_compute/core/NEON/kernels/NEWidthConcatenateLayerKernel.h b/arm_compute/core/NEON/kernels/NEWidthConcatenateLayerKernel.h index b5336ad026..64d741deab 100644 --- a/arm_compute/core/NEON/kernels/NEWidthConcatenateLayerKernel.h +++ b/arm_compute/core/NEON/kernels/NEWidthConcatenateLayerKernel.h @@ -73,8 +73,7 @@ public: static Status validate(const ITensorInfo *input, unsigned int width_offset, const ITensorInfo *output); // Inherited methods overridden: - void run_op(const InputTensorMap &inputs, const OutputTensorMap &outputs, - const Window &window, const ThreadInfo &info) override; + void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) override; private: unsigned int _width_offset; diff --git a/arm_compute/core/experimental/Types.h b/arm_compute/core/experimental/Types.h index eca833e708..4dee5ff70d 100644 --- a/arm_compute/core/experimental/Types.h +++ b/arm_compute/core/experimental/Types.h @@ -24,13 +24,14 @@ #ifndef ARM_COMPUTE_EXPERIMENTAL_TYPES_H #define ARM_COMPUTE_EXPERIMENTAL_TYPES_H +#include "arm_compute/core/ITensorPack.h" #include "arm_compute/core/TensorShape.h" -#include #include namespace arm_compute { +// Forward declaration class ITensor; /** Memory type */ @@ -51,10 +52,6 @@ enum TensorType : int32_t ACL_SRC_VEC = 256, }; -using InputTensorMap = std::map; -using OutputTensorMap = std::map; -using OperatorTensorMap = OutputTensorMap; - namespace experimental { struct MemoryInfo diff --git a/arm_compute/runtime/CL/CLScheduler.h b/arm_compute/runtime/CL/CLScheduler.h index 93595c65c7..8a22832792 100644 --- a/arm_compute/runtime/CL/CLScheduler.h +++ b/arm_compute/runtime/CL/CLScheduler.h @@ -76,11 +76,10 @@ public: /** Schedule the execution of the passed kernel if possible. * * @param[in] kernel Kernel to execute. - * @param[in] inputs Vector containing the input tensors. - * @param[in] outputs Vector containing the output tensors. + * @param[in] tensors Vector containing the tensors to operate on. * @param[in] flush (Optional) Specifies if the command queue will be flushed after running the kernel. */ - void enqueue_op(ICLKernel &kernel, const InputTensorMap &inputs, const OutputTensorMap &outputs, bool flush = true); + void enqueue_op(ICLKernel &kernel, ITensorPack &tensors, bool flush = true); /** Initialises the context and command queue to be used by the scheduler. * @@ -152,7 +151,7 @@ public: bool is_initialised() const; private: - void enqueue_common(ICLKernel &kernel, const InputTensorMap &inputs, const OutputTensorMap &outputs, bool flush); + void enqueue_common(ICLKernel &kernel, ITensorPack &tensors, bool flush); /** Flag to ensure symbols initialisation is happening before Scheduler creation */ static std::once_flag _initialize_symbols; diff --git a/arm_compute/runtime/CL/CLTuner.h b/arm_compute/runtime/CL/CLTuner.h index aa31181d2d..3b45a2177e 100644 --- a/arm_compute/runtime/CL/CLTuner.h +++ b/arm_compute/runtime/CL/CLTuner.h @@ -116,7 +116,7 @@ public: // Inherited methods overridden: void tune_kernel_static(ICLKernel &kernel) override; void tune_kernel_dynamic(ICLKernel &kernel) override; - void tune_kernel_dynamic(ICLKernel &kernel, const InputTensorMap &inputs, const OutputTensorMap &outputs) override; + void tune_kernel_dynamic(ICLKernel &kernel, ITensorPack &tensors) override; /** Is the kernel_event set ? * @@ -127,11 +127,12 @@ public: private: /** Find optimal LWS using brute-force approach * - * @param[in] kernel OpenCL kernel to be tuned with LWS + * @param[in] kernel OpenCL kernel to be tuned with LWS + * @param[in,out] tensors Tensors for the kernel to operate on * * @return The optimal LWS to use */ - cl::NDRange find_optimal_lws(ICLKernel &kernel, const InputTensorMap &inputs, const OutputTensorMap &outputs); + cl::NDRange find_optimal_lws(ICLKernel &kernel, ITensorPack &tensors); std::unordered_map _lws_table; cl::Event _kernel_event; diff --git a/arm_compute/runtime/CL/ICLOperator.h b/arm_compute/runtime/CL/ICLOperator.h index 2d6c96e815..526b7e93e9 100644 --- a/arm_compute/runtime/CL/ICLOperator.h +++ b/arm_compute/runtime/CL/ICLOperator.h @@ -54,8 +54,8 @@ public: ICLOperator &operator=(ICLOperator &&) = default; // Inherited methods overridden: - void run(InputTensorMap inputs, OutputTensorMap outputs, OperatorTensorMap workspace) override; - void prepare(OperatorTensorMap constants) override; + void run(ITensorPack &tensors) override; + void prepare(ITensorPack &constants) override; MemoryRequirements workspace() const override; protected: diff --git a/arm_compute/runtime/CL/ICLTuner.h b/arm_compute/runtime/CL/ICLTuner.h index 4bc8ddf632..0f951c384e 100644 --- a/arm_compute/runtime/CL/ICLTuner.h +++ b/arm_compute/runtime/CL/ICLTuner.h @@ -54,10 +54,9 @@ public: /** Tune OpenCL kernel dynamically * * @param[in] kernel Kernel to tune - * @param[in] inputs Inputs for the kernel to use - * @param[in, out] outputs Outputs for the kernel to use + * @param[in, out] tensors Tensors for the kernel to use */ - virtual void tune_kernel_dynamic(ICLKernel &kernel, const InputTensorMap &inputs, const OutputTensorMap &outputs) = 0; + virtual void tune_kernel_dynamic(ICLKernel &kernel, ITensorPack &tensors) = 0; }; } // namespace arm_compute #endif /*ARM_COMPUTE_ICLTUNER_H */ diff --git a/arm_compute/runtime/CL/functions/CLConcatenateLayer.h b/arm_compute/runtime/CL/functions/CLConcatenateLayer.h index 99a2053a5a..f535c8ea97 100644 --- a/arm_compute/runtime/CL/functions/CLConcatenateLayer.h +++ b/arm_compute/runtime/CL/functions/CLConcatenateLayer.h @@ -144,7 +144,7 @@ public: static Status validate(const std::vector &inputs_vector, const ITensorInfo *output, size_t axis); // Inherited methods overridden: - void run(InputTensorMap inputs, OutputTensorMap outputs, OperatorTensorMap workspace) override; + void run(ITensorPack &tensors) override; private: std::vector> _concat_kernels; diff --git a/arm_compute/runtime/CL/functions/CLElementwiseOperations.h b/arm_compute/runtime/CL/functions/CLElementwiseOperations.h index 5af24c90ac..2d9d43863d 100644 --- a/arm_compute/runtime/CL/functions/CLElementwiseOperations.h +++ b/arm_compute/runtime/CL/functions/CLElementwiseOperations.h @@ -98,7 +98,7 @@ public: static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, ConvertPolicy policy, const ActivationLayerInfo &act_info = ActivationLayerInfo()); // Inherited methods overridden: - void run(InputTensorMap inputs, OutputTensorMap outputs, OperatorTensorMap workspace) override; + void run(ITensorPack &tensors) override; private: CLFillBorderKernel _border_handler; @@ -168,7 +168,7 @@ public: static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, ConvertPolicy policy, const ActivationLayerInfo &act_info = ActivationLayerInfo()); // Inherited methods overridden: - void run(InputTensorMap inputs, OutputTensorMap outputs, OperatorTensorMap workspace) override; + void run(ITensorPack &tensors) override; private: CLFillBorderKernel _border_handler; @@ -207,7 +207,7 @@ public: static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, const ActivationLayerInfo &act_info = ActivationLayerInfo()); // Inherited methods overridden: - void run(InputTensorMap inputs, OutputTensorMap outputs, OperatorTensorMap workspace) override; + void run(ITensorPack &tensors) override; private: CLFillBorderKernel _border_handler; @@ -246,7 +246,7 @@ public: static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, const ActivationLayerInfo &act_info = ActivationLayerInfo()); // Inherited methods overridden: - void run(InputTensorMap inputs, OutputTensorMap outputs, OperatorTensorMap workspace) override; + void run(ITensorPack &tensors) override; private: CLFillBorderKernel _border_handler; @@ -285,7 +285,7 @@ public: static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, const ActivationLayerInfo &act_info = ActivationLayerInfo()); // Inherited methods overridden: - void run(InputTensorMap inputs, OutputTensorMap outputs, OperatorTensorMap workspace) override; + void run(ITensorPack &tensors) override; private: CLFillBorderKernel _border_handler; @@ -324,7 +324,7 @@ public: static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, const ActivationLayerInfo &act_info = ActivationLayerInfo()); // Inherited methods overridden: - void run(InputTensorMap inputs, OutputTensorMap outputs, OperatorTensorMap workspace) override; + void run(ITensorPack &tensors) override; private: CLFillBorderKernel _border_handler; @@ -363,7 +363,7 @@ public: static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, const ActivationLayerInfo &act_info = ActivationLayerInfo()); // Inherited methods overridden: - void run(InputTensorMap inputs, OutputTensorMap outputs, OperatorTensorMap workspace) override; + void run(ITensorPack &tensors) override; private: CLFillBorderKernel _border_handler; diff --git a/arm_compute/runtime/CL/functions/CLPReluLayer.h b/arm_compute/runtime/CL/functions/CLPReluLayer.h index 08567cccfb..84743508df 100644 --- a/arm_compute/runtime/CL/functions/CLPReluLayer.h +++ b/arm_compute/runtime/CL/functions/CLPReluLayer.h @@ -64,7 +64,7 @@ public: static Status validate(const ITensorInfo *input, const ITensorInfo *alpha, const ITensorInfo *output); // Inherited methods overridden: - void run(InputTensorMap inputs, OutputTensorMap outputs, OperatorTensorMap workspace) override; + void run(ITensorPack &tensors) override; private: CLFillBorderKernel _border_handler; diff --git a/arm_compute/runtime/CL/functions/CLPixelWiseMultiplication.h b/arm_compute/runtime/CL/functions/CLPixelWiseMultiplication.h index ca8d77e6b7..2066012306 100644 --- a/arm_compute/runtime/CL/functions/CLPixelWiseMultiplication.h +++ b/arm_compute/runtime/CL/functions/CLPixelWiseMultiplication.h @@ -103,7 +103,7 @@ public: ConvertPolicy overflow_policy, RoundingPolicy rounding_policy, const ActivationLayerInfo &act_info = ActivationLayerInfo()); // Inherited methods overridden: - void run(InputTensorMap inputs, OutputTensorMap outputs, OperatorTensorMap workspace) override; + void run(ITensorPack &tensors) override; private: CLFillBorderKernel _border_handler; @@ -136,7 +136,7 @@ public: static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, const ActivationLayerInfo &act_info = ActivationLayerInfo()); // Inherited methods overridden: - void run(InputTensorMap inputs, OutputTensorMap outputs, OperatorTensorMap workspace) override; + void run(ITensorPack &tensors) override; private: CLFillBorderKernel _border_handler; diff --git a/arm_compute/runtime/CL/tuners/BifrostTuner.h b/arm_compute/runtime/CL/tuners/BifrostTuner.h index 830f7d9067..237693fb88 100644 --- a/arm_compute/runtime/CL/tuners/BifrostTuner.h +++ b/arm_compute/runtime/CL/tuners/BifrostTuner.h @@ -37,7 +37,7 @@ public: // Inherited overriden methods void tune_kernel_static(ICLKernel &kernel) override; void tune_kernel_dynamic(ICLKernel &kernel) override; - void tune_kernel_dynamic(ICLKernel &kernel, const InputTensorMap &inputs, const OutputTensorMap &outputs) override; + void tune_kernel_dynamic(ICLKernel &kernel, ITensorPack &tensors) override; }; } // namespace tuners } // namespace arm_compute diff --git a/arm_compute/runtime/CL/tuners/MidgardTuner.h b/arm_compute/runtime/CL/tuners/MidgardTuner.h index c702e7a2aa..86d46044c2 100644 --- a/arm_compute/runtime/CL/tuners/MidgardTuner.h +++ b/arm_compute/runtime/CL/tuners/MidgardTuner.h @@ -37,7 +37,7 @@ public: // Inherited overriden methods void tune_kernel_static(ICLKernel &kernel) override; void tune_kernel_dynamic(ICLKernel &kernel) override; - void tune_kernel_dynamic(ICLKernel &kernel, const InputTensorMap &inputs, const OutputTensorMap &outputs) override; + void tune_kernel_dynamic(ICLKernel &kernel, ITensorPack &tensors) override; }; } // namespace tuners } // namespace arm_compute diff --git a/arm_compute/runtime/CPP/CPPScheduler.h b/arm_compute/runtime/CPP/CPPScheduler.h index 9d55ed448e..e8ad427eba 100644 --- a/arm_compute/runtime/CPP/CPPScheduler.h +++ b/arm_compute/runtime/CPP/CPPScheduler.h @@ -52,7 +52,7 @@ public: void set_num_threads_with_affinity(unsigned int num_threads, BindFunc func) override; unsigned int num_threads() const override; void schedule(ICPPKernel *kernel, const Hints &hints) override; - void schedule_op(ICPPKernel *kernel, const Hints &hints, const InputTensorMap &inputs, const OutputTensorMap &outputs) override; + void schedule_op(ICPPKernel *kernel, const Hints &hints, ITensorPack &tensors) override; protected: /** Will run the workloads in parallel using num_threads @@ -62,7 +62,7 @@ protected: void run_workloads(std::vector &workloads) override; private: - void schedule_common(ICPPKernel *kernel, const Hints &hints, const InputTensorMap &inputs, const OutputTensorMap &outputs); + void schedule_common(ICPPKernel *kernel, const Hints &hints, ITensorPack &tensors); struct Impl; std::unique_ptr _impl; }; diff --git a/arm_compute/runtime/IOperator.h b/arm_compute/runtime/IOperator.h index d72fca4399..e7952bb748 100644 --- a/arm_compute/runtime/IOperator.h +++ b/arm_compute/runtime/IOperator.h @@ -40,13 +40,10 @@ public: virtual ~IOperator() = default; /** Run the kernels contained in the function * - * - * @param[in] inputs Vector that contains the input tensors. - * @param[in] outputs Vector that contains the output tensors. - * @param[in] workspace Vector that contains the workspace tensors. + * @param[in] tensors Vector that contains the tensors to operate on. * */ - virtual void run(InputTensorMap inputs, OutputTensorMap outputs, OperatorTensorMap workspace) = 0; + virtual void run(ITensorPack &tensors) = 0; /** Prepare the function for executing * * Any one off pre-processing step required by the function is handled here @@ -55,7 +52,7 @@ public: * * @note Prepare stage might not need all the function's buffers' backing memory to be available in order to execute */ - virtual void prepare(OperatorTensorMap constants) = 0; + virtual void prepare(ITensorPack &constants) = 0; /** Return the memory requirements required by the workspace */ diff --git a/arm_compute/runtime/IScheduler.h b/arm_compute/runtime/IScheduler.h index fff77274bd..98627538e8 100644 --- a/arm_compute/runtime/IScheduler.h +++ b/arm_compute/runtime/IScheduler.h @@ -168,10 +168,9 @@ public: * * @param[in] kernel Kernel to execute. * @param[in] hints Hints for the scheduler. - * @param[in] inputs Vector containing the input tensors. - * @param[in] outputs Vector containing the output tensors. + * @param[in] tensors Vector containing the tensors to operate on. */ - virtual void schedule_op(ICPPKernel *kernel, const Hints &hints, const InputTensorMap &inputs, const OutputTensorMap &outputs) = 0; + virtual void schedule_op(ICPPKernel *kernel, const Hints &hints, ITensorPack &tensors) = 0; /** Execute all the passed workloads * diff --git a/arm_compute/runtime/NEON/INEOperator.h b/arm_compute/runtime/NEON/INEOperator.h index f91305543f..415e767eec 100644 --- a/arm_compute/runtime/NEON/INEOperator.h +++ b/arm_compute/runtime/NEON/INEOperator.h @@ -24,6 +24,7 @@ #ifndef ARM_COMPUTE_INEOPERATOR_H #define ARM_COMPUTE_INEOPERATOR_H +#include "../../core/ITensor.h" #include "arm_compute/core/NEON/INEKernel.h" #include "arm_compute/runtime/IOperator.h" #include "arm_compute/runtime/IRuntimeContext.h" @@ -54,8 +55,8 @@ public: INEOperator &operator=(INEOperator &&) = default; // Inherited methods overridden: - void run(InputTensorMap inputs, OutputTensorMap outputs, OperatorTensorMap workspace) override; - void prepare(OperatorTensorMap constants) override; + void run(ITensorPack &tensors) override; + void prepare(ITensorPack &constants) override; MemoryRequirements workspace() const override; protected: diff --git a/arm_compute/runtime/NEON/functions/NEConcatenateLayer.h b/arm_compute/runtime/NEON/functions/NEConcatenateLayer.h index 73c62330c5..1d703ae729 100644 --- a/arm_compute/runtime/NEON/functions/NEConcatenateLayer.h +++ b/arm_compute/runtime/NEON/functions/NEConcatenateLayer.h @@ -132,7 +132,7 @@ public: static Status validate(const std::vector &inputs_vector, const ITensorInfo *output, size_t axis); // Inherited methods overridden: - void run(InputTensorMap inputs, OutputTensorMap outputs, OperatorTensorMap workspace) override; + void run(ITensorPack &tensors) override; private: std::vector> _concat_kernels; diff --git a/arm_compute/runtime/OMP/OMPScheduler.h b/arm_compute/runtime/OMP/OMPScheduler.h index 56bd6baaa6..56a31cc076 100644 --- a/arm_compute/runtime/OMP/OMPScheduler.h +++ b/arm_compute/runtime/OMP/OMPScheduler.h @@ -63,10 +63,9 @@ public: * * @param[in] kernel Kernel to execute. * @param[in] hints Hints for the scheduler. - * @param[in] inputs Vector containing the input tensors. - * @param[in] outputs Vector containing the output tensors. + * @param[in] tensors Vector containing the tensors to operate on. */ - void schedule_op(ICPPKernel *kernel, const Hints &hints, const InputTensorMap &inputs, const OutputTensorMap &outputs) override; + void schedule_op(ICPPKernel *kernel, const Hints &hints, ITensorPack &tensors) override; protected: /** Execute all the passed workloads diff --git a/arm_compute/runtime/SingleThreadScheduler.h b/arm_compute/runtime/SingleThreadScheduler.h index 42fc742062..d45730e499 100644 --- a/arm_compute/runtime/SingleThreadScheduler.h +++ b/arm_compute/runtime/SingleThreadScheduler.h @@ -54,10 +54,9 @@ public: * * @param[in] kernel Kernel to execute. * @param[in] hints Hints for the scheduler. - * @param[in] inputs Vector containing the input tensors. - * @param[in] outputs Vector containing the output tensors. + * @param[in] tensors Vector containing the tensors to operate on. */ - void schedule_op(ICPPKernel *kernel, const Hints &hints, const InputTensorMap &inputs, const OutputTensorMap &outputs) override; + void schedule_op(ICPPKernel *kernel, const Hints &hints, ITensorPack &tensors) override; protected: /** Will run the workloads sequentially and in order. diff --git a/src/core/CL/kernels/CLActivationLayerKernel.cpp b/src/core/CL/kernels/CLActivationLayerKernel.cpp index 66751f7dd3..62cafc5ad1 100644 --- a/src/core/CL/kernels/CLActivationLayerKernel.cpp +++ b/src/core/CL/kernels/CLActivationLayerKernel.cpp @@ -248,13 +248,13 @@ Status CLActivationLayerKernel::validate(const ITensorInfo *input, const ITensor return Status{}; } -void CLActivationLayerKernel::run_op(const InputTensorMap &inputs, const OutputTensorMap &outputs, const Window &window, cl::CommandQueue &queue) +void CLActivationLayerKernel::run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) { ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window); - const auto src = utils::cast::polymorphic_downcast(inputs.at(TensorType::ACL_SRC)); - auto dst = utils::cast::polymorphic_downcast(outputs.at(TensorType::ACL_DST)); + const auto src = utils::cast::polymorphic_downcast(tensors.get_const_tensor(TensorType::ACL_SRC)); + auto dst = utils::cast::polymorphic_downcast(tensors.get_tensor(TensorType::ACL_DST)); ARM_COMPUTE_ERROR_ON(_run_in_place && src != dst); Window collapsed = window.collapse_if_possible(ICLKernel::window(), Window::DimZ); diff --git a/src/core/CL/kernels/CLBatchConcatenateLayerKernel.cpp b/src/core/CL/kernels/CLBatchConcatenateLayerKernel.cpp index b3496f7229..feebe01cdb 100644 --- a/src/core/CL/kernels/CLBatchConcatenateLayerKernel.cpp +++ b/src/core/CL/kernels/CLBatchConcatenateLayerKernel.cpp @@ -139,14 +139,13 @@ Status CLBatchConcatenateLayerKernel::validate(const arm_compute::ITensorInfo *i return Status{}; } -void CLBatchConcatenateLayerKernel::run_op(const InputTensorMap &inputs, const OutputTensorMap &outputs, - const Window &window, cl::CommandQueue &queue) +void CLBatchConcatenateLayerKernel::run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) { ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window); - const auto src = utils::cast::polymorphic_downcast(inputs.at(TensorType::ACL_SRC)); - auto dst = utils::cast::polymorphic_downcast(outputs.at(TensorType::ACL_DST)); + const auto src = utils::cast::polymorphic_downcast(tensors.get_const_tensor(TensorType::ACL_SRC)); + auto dst = utils::cast::polymorphic_downcast(tensors.get_tensor(TensorType::ACL_DST)); Window slice = window.first_slice_window_3D(); diff --git a/src/core/CL/kernels/CLDepthConcatenateLayerKernel.cpp b/src/core/CL/kernels/CLDepthConcatenateLayerKernel.cpp index 0bae901baa..5978a0223f 100644 --- a/src/core/CL/kernels/CLDepthConcatenateLayerKernel.cpp +++ b/src/core/CL/kernels/CLDepthConcatenateLayerKernel.cpp @@ -123,14 +123,13 @@ Status CLDepthConcatenateLayerKernel::validate(const arm_compute::ITensorInfo *i return Status{}; } -void CLDepthConcatenateLayerKernel::run_op(const InputTensorMap &inputs, const OutputTensorMap &outputs, - const Window &window, cl::CommandQueue &queue) +void CLDepthConcatenateLayerKernel::run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) { ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window); - const auto src = utils::cast::polymorphic_downcast(inputs.at(TensorType::ACL_SRC)); - auto dst = utils::cast::polymorphic_downcast(outputs.at(TensorType::ACL_DST)); + const auto src = utils::cast::polymorphic_downcast(tensors.get_const_tensor(TensorType::ACL_SRC)); + auto dst = utils::cast::polymorphic_downcast(tensors.get_tensor(TensorType::ACL_DST)); Window slice = window.first_slice_window_3D(); diff --git a/src/core/CL/kernels/CLElementWiseUnaryLayerKernel.cpp b/src/core/CL/kernels/CLElementWiseUnaryLayerKernel.cpp index 87fafd340c..c8c7fb03b8 100644 --- a/src/core/CL/kernels/CLElementWiseUnaryLayerKernel.cpp +++ b/src/core/CL/kernels/CLElementWiseUnaryLayerKernel.cpp @@ -119,7 +119,7 @@ Status CLElementWiseUnaryLayerKernel::validate(const ITensorInfo *input, const I return Status{}; } -void CLElementWiseUnaryLayerKernel::run_op(const InputTensorMap &inputs, const OutputTensorMap &outputs, const Window &window, cl::CommandQueue &queue) +void CLElementWiseUnaryLayerKernel::run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) { ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window); @@ -127,8 +127,8 @@ void CLElementWiseUnaryLayerKernel::run_op(const InputTensorMap &inputs, const O Window collapsed = window.collapse_if_possible(ICLKernel::window(), Window::DimZ); Window slice = collapsed.first_slice_window_3D(); - const auto src = utils::cast::polymorphic_downcast(inputs.at(TensorType::ACL_SRC)); - auto dst = utils::cast::polymorphic_downcast(outputs.at(TensorType::ACL_DST)); + const auto src = utils::cast::polymorphic_downcast(tensors.get_const_tensor(TensorType::ACL_SRC)); + auto dst = utils::cast::polymorphic_downcast(tensors.get_tensor(TensorType::ACL_DST)); do { diff --git a/src/core/CL/kernels/CLElementwiseOperationKernel.cpp b/src/core/CL/kernels/CLElementwiseOperationKernel.cpp index 7cc6fb38b1..ec33500f20 100644 --- a/src/core/CL/kernels/CLElementwiseOperationKernel.cpp +++ b/src/core/CL/kernels/CLElementwiseOperationKernel.cpp @@ -280,14 +280,14 @@ void CLElementwiseOperationKernel::configure_common(const CLCompileContext &comp _config_id = generate_id_for_tuning(kernel_name, *input1, *output); } -void CLElementwiseOperationKernel::run_op(const InputTensorMap &inputs, const OutputTensorMap &outputs, const Window &window, cl::CommandQueue &queue) +void CLElementwiseOperationKernel::run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) { ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window); - const auto src_0 = utils::cast::polymorphic_downcast(inputs.at(TensorType::ACL_SRC_0)); - const auto src_1 = utils::cast::polymorphic_downcast(inputs.at(TensorType::ACL_SRC_1)); - auto dst = utils::cast::polymorphic_downcast(outputs.at(TensorType::ACL_DST)); + const auto src_0 = utils::cast::polymorphic_downcast(tensors.get_const_tensor(TensorType::ACL_SRC_0)); + const auto src_1 = utils::cast::polymorphic_downcast(tensors.get_const_tensor(TensorType::ACL_SRC_1)); + auto dst = utils::cast::polymorphic_downcast(tensors.get_tensor(TensorType::ACL_DST)); const TensorShape &in_shape1 = src_0->info()->tensor_shape(); const TensorShape &in_shape2 = src_1->info()->tensor_shape(); diff --git a/src/core/CL/kernels/CLFillBorderKernel.cpp b/src/core/CL/kernels/CLFillBorderKernel.cpp index 1fca646129..1ea654b5cc 100644 --- a/src/core/CL/kernels/CLFillBorderKernel.cpp +++ b/src/core/CL/kernels/CLFillBorderKernel.cpp @@ -170,17 +170,15 @@ void CLFillBorderKernel::configure(const CLCompileContext &compile_context, ITen _config_id += lower_string(string_from_border_mode(border_mode)); } -void CLFillBorderKernel::run_op(const InputTensorMap &inputs, const OutputTensorMap &outputs, const Window &window, cl::CommandQueue &queue) +void CLFillBorderKernel::run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) { - ARM_COMPUTE_UNUSED(outputs); - // Border mode undefined or border width == 0 if(_kernel() == nullptr) { return; } - const auto tensor = utils::cast::polymorphic_downcast(inputs.at(TensorType::ACL_SRC)); + const auto tensor = utils::cast::polymorphic_downcast(tensors.get_const_tensor(TensorType::ACL_SRC)); ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); ARM_COMPUTE_ERROR_ON_MISMATCHING_WINDOWS(ICLKernel::window(), window); diff --git a/src/core/CL/kernels/CLHeightConcatenateLayerKernel.cpp b/src/core/CL/kernels/CLHeightConcatenateLayerKernel.cpp index 5ab270cc01..22b2cfcbc5 100644 --- a/src/core/CL/kernels/CLHeightConcatenateLayerKernel.cpp +++ b/src/core/CL/kernels/CLHeightConcatenateLayerKernel.cpp @@ -123,14 +123,13 @@ void CLHeightConcatenateLayerKernel::configure(const CLCompileContext &compile_c output->set_valid_region(ValidRegion(Coordinates(), output->tensor_shape())); } -void CLHeightConcatenateLayerKernel::run_op(const InputTensorMap &inputs, const OutputTensorMap &outputs, - const Window &window, cl::CommandQueue &queue) +void CLHeightConcatenateLayerKernel::run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) { ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window); - const auto src = utils::cast::polymorphic_downcast(inputs.at(TensorType::ACL_SRC)); - auto dst = utils::cast::polymorphic_downcast(outputs.at(TensorType::ACL_DST)); + const auto src = utils::cast::polymorphic_downcast(tensors.get_const_tensor(TensorType::ACL_SRC)); + auto dst = utils::cast::polymorphic_downcast(tensors.get_tensor(TensorType::ACL_DST)); unsigned int idx = 0; add_4D_tensor_argument(idx, src, window); diff --git a/src/core/CL/kernels/CLPixelWiseMultiplicationKernel.cpp b/src/core/CL/kernels/CLPixelWiseMultiplicationKernel.cpp index 95869f7e96..229937ef31 100644 --- a/src/core/CL/kernels/CLPixelWiseMultiplicationKernel.cpp +++ b/src/core/CL/kernels/CLPixelWiseMultiplicationKernel.cpp @@ -267,14 +267,14 @@ Status CLPixelWiseMultiplicationKernel::validate(const ITensorInfo *input1, cons return Status{}; } -void CLPixelWiseMultiplicationKernel::run_op(const InputTensorMap &inputs, const OutputTensorMap &outputs, const Window &window, cl::CommandQueue &queue) +void CLPixelWiseMultiplicationKernel::run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) { ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window); - const auto src_0 = utils::cast::polymorphic_downcast(inputs.at(TensorType::ACL_SRC_0)); - const auto src_1 = utils::cast::polymorphic_downcast(inputs.at(TensorType::ACL_SRC_1)); - auto dst = utils::cast::polymorphic_downcast(outputs.at(TensorType::ACL_DST)); + const auto src_0 = utils::cast::polymorphic_downcast(tensors.get_const_tensor(TensorType::ACL_SRC_0)); + const auto src_1 = utils::cast::polymorphic_downcast(tensors.get_const_tensor(TensorType::ACL_SRC_1)); + auto dst = utils::cast::polymorphic_downcast(tensors.get_tensor(TensorType::ACL_DST)); const TensorShape &in_shape1 = src_0->info()->tensor_shape(); const TensorShape &in_shape2 = src_1->info()->tensor_shape(); @@ -420,14 +420,14 @@ Status CLComplexPixelWiseMultiplicationKernel::validate(const ITensorInfo *input return Status{}; } -void CLComplexPixelWiseMultiplicationKernel::run_op(const InputTensorMap &inputs, const OutputTensorMap &outputs, const Window &window, cl::CommandQueue &queue) +void CLComplexPixelWiseMultiplicationKernel::run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) { ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window); - const auto src_0 = utils::cast::polymorphic_downcast(inputs.at(TensorType::ACL_SRC_0)); - const auto src_1 = utils::cast::polymorphic_downcast(inputs.at(TensorType::ACL_SRC_1)); - auto dst = utils::cast::polymorphic_downcast(outputs.at(TensorType::ACL_DST)); + const auto src_0 = utils::cast::polymorphic_downcast(tensors.get_const_tensor(TensorType::ACL_SRC_0)); + const auto src_1 = utils::cast::polymorphic_downcast(tensors.get_const_tensor(TensorType::ACL_SRC_1)); + auto dst = utils::cast::polymorphic_downcast(tensors.get_tensor(TensorType::ACL_DST)); const TensorShape &in_shape1 = src_0->info()->tensor_shape(); const TensorShape &in_shape2 = src_1->info()->tensor_shape(); diff --git a/src/core/CL/kernels/CLReshapeLayerKernel.cpp b/src/core/CL/kernels/CLReshapeLayerKernel.cpp index 05b6dd5675..3daf21a9a7 100644 --- a/src/core/CL/kernels/CLReshapeLayerKernel.cpp +++ b/src/core/CL/kernels/CLReshapeLayerKernel.cpp @@ -100,7 +100,7 @@ Status CLReshapeLayerKernel::validate(const ITensorInfo *input, const ITensorInf return Status{}; } -void CLReshapeLayerKernel::run_op(const InputTensorMap &inputs, const OutputTensorMap &outputs, const Window &window, cl::CommandQueue &queue) +void CLReshapeLayerKernel::run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) { ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window); @@ -108,8 +108,8 @@ void CLReshapeLayerKernel::run_op(const InputTensorMap &inputs, const OutputTens Window window_collapsed = window.collapse_if_possible(ICLKernel::window(), Window::DimZ); Window slice = window_collapsed.first_slice_window_3D(); - const auto src = utils::cast::polymorphic_downcast(inputs.at(TensorType::ACL_SRC)); - auto dst = utils::cast::polymorphic_downcast(outputs.at(TensorType::ACL_DST)); + const auto src = utils::cast::polymorphic_downcast(tensors.get_const_tensor(TensorType::ACL_SRC)); + auto dst = utils::cast::polymorphic_downcast(tensors.get_tensor(TensorType::ACL_DST)); // Set inputs unsigned int idx = 0; diff --git a/src/core/CL/kernels/CLStridedSliceKernel.cpp b/src/core/CL/kernels/CLStridedSliceKernel.cpp index 94cbd43bb1..f7b7290a3f 100644 --- a/src/core/CL/kernels/CLStridedSliceKernel.cpp +++ b/src/core/CL/kernels/CLStridedSliceKernel.cpp @@ -172,13 +172,13 @@ Status CLStridedSliceKernel::validate(const ITensorInfo *input, const ITensorInf return Status{}; } -void CLStridedSliceKernel::run_op(const InputTensorMap &inputs, const OutputTensorMap &outputs, const Window &window, cl::CommandQueue &queue) +void CLStridedSliceKernel::run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) { ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window); - const auto src = utils::cast::polymorphic_downcast(inputs.at(TensorType::ACL_SRC)); - auto dst = utils::cast::polymorphic_downcast(outputs.at(TensorType::ACL_DST)); + const auto src = utils::cast::polymorphic_downcast(tensors.get_const_tensor(TensorType::ACL_SRC)); + auto dst = utils::cast::polymorphic_downcast(tensors.get_tensor(TensorType::ACL_DST)); Window window_collapsed = window.collapse_if_possible(ICLKernel::window(), Window::DimZ); Window slice = window_collapsed.first_slice_window_4D(); diff --git a/src/core/CL/kernels/CLWidthConcatenate2TensorsKernel.cpp b/src/core/CL/kernels/CLWidthConcatenate2TensorsKernel.cpp index 88b69c7cce..76100c2a63 100644 --- a/src/core/CL/kernels/CLWidthConcatenate2TensorsKernel.cpp +++ b/src/core/CL/kernels/CLWidthConcatenate2TensorsKernel.cpp @@ -147,17 +147,16 @@ void CLWidthConcatenate2TensorsKernel::configure(const CLCompileContext &compile _config_id += support::cpp11::to_string(input2->dimension(1)); } -void CLWidthConcatenate2TensorsKernel::run_op(const InputTensorMap &inputs, const OutputTensorMap &outputs, - const Window &window, cl::CommandQueue &queue) +void CLWidthConcatenate2TensorsKernel::run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) { ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window); Window slice = window.first_slice_window_4D(); - const auto src0 = utils::cast::polymorphic_downcast(inputs.at(TensorType::ACL_SRC_VEC)); - const auto src1 = utils::cast::polymorphic_downcast(inputs.at(TensorType::ACL_SRC_VEC + 1)); - auto dst = utils::cast::polymorphic_downcast(outputs.at(TensorType::ACL_DST)); + const auto src0 = utils::cast::polymorphic_downcast(tensors.get_const_tensor(TensorType::ACL_SRC_VEC)); + const auto src1 = utils::cast::polymorphic_downcast(tensors.get_const_tensor(TensorType::ACL_SRC_VEC + 1)); + auto dst = utils::cast::polymorphic_downcast(tensors.get_tensor(TensorType::ACL_DST)); do { diff --git a/src/core/CL/kernels/CLWidthConcatenate4TensorsKernel.cpp b/src/core/CL/kernels/CLWidthConcatenate4TensorsKernel.cpp index e49bb1c8ce..0377eb76b1 100644 --- a/src/core/CL/kernels/CLWidthConcatenate4TensorsKernel.cpp +++ b/src/core/CL/kernels/CLWidthConcatenate4TensorsKernel.cpp @@ -202,17 +202,16 @@ void CLWidthConcatenate4TensorsKernel::configure(const CLCompileContext &compile _config_id += support::cpp11::to_string(input4->dimension(1)); } -void CLWidthConcatenate4TensorsKernel::run_op(const InputTensorMap &inputs, const OutputTensorMap &outputs, - const Window &window, cl::CommandQueue &queue) +void CLWidthConcatenate4TensorsKernel::run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) { ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window); - const auto src0 = utils::cast::polymorphic_downcast(inputs.at(TensorType::ACL_SRC_VEC)); - const auto src1 = utils::cast::polymorphic_downcast(inputs.at(TensorType::ACL_SRC_VEC + 1)); - const auto src2 = utils::cast::polymorphic_downcast(inputs.at(TensorType::ACL_SRC_VEC + 2)); - const auto src3 = utils::cast::polymorphic_downcast(inputs.at(TensorType::ACL_SRC_VEC + 3)); - auto dst = utils::cast::polymorphic_downcast(outputs.at(TensorType::ACL_DST)); + const auto src0 = utils::cast::polymorphic_downcast(tensors.get_const_tensor(TensorType::ACL_SRC_VEC)); + const auto src1 = utils::cast::polymorphic_downcast(tensors.get_const_tensor(TensorType::ACL_SRC_VEC + 1)); + const auto src2 = utils::cast::polymorphic_downcast(tensors.get_const_tensor(TensorType::ACL_SRC_VEC + 2)); + const auto src3 = utils::cast::polymorphic_downcast(tensors.get_const_tensor(TensorType::ACL_SRC_VEC + 3)); + auto dst = utils::cast::polymorphic_downcast(tensors.get_tensor(TensorType::ACL_DST)); Window slice = window.first_slice_window_4D(); diff --git a/src/core/CL/kernels/CLWidthConcatenateLayerKernel.cpp b/src/core/CL/kernels/CLWidthConcatenateLayerKernel.cpp index 591c26f877..d40597fbb5 100644 --- a/src/core/CL/kernels/CLWidthConcatenateLayerKernel.cpp +++ b/src/core/CL/kernels/CLWidthConcatenateLayerKernel.cpp @@ -122,14 +122,13 @@ void CLWidthConcatenateLayerKernel::configure(const CLCompileContext &compile_co output->set_valid_region(ValidRegion(Coordinates(), output->tensor_shape())); } -void CLWidthConcatenateLayerKernel::run_op(const InputTensorMap &inputs, const OutputTensorMap &outputs, - const Window &window, cl::CommandQueue &queue) +void CLWidthConcatenateLayerKernel::run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) { ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window); - const auto src = utils::cast::polymorphic_downcast(inputs.at(TensorType::ACL_SRC)); - auto dst = utils::cast::polymorphic_downcast(outputs.at(TensorType::ACL_DST)); + const auto src = utils::cast::polymorphic_downcast(tensors.get_const_tensor(TensorType::ACL_SRC)); + auto dst = utils::cast::polymorphic_downcast(tensors.get_tensor(TensorType::ACL_DST)); unsigned int idx = 0; add_4D_tensor_argument(idx, src, window); diff --git a/src/core/ITensor.cpp b/src/core/ITensor.cpp index 226e8d5714..e263596333 100644 --- a/src/core/ITensor.cpp +++ b/src/core/ITensor.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 Arm Limited. + * Copyright (c) 2016-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -30,8 +30,8 @@ #include #include -using namespace arm_compute; - +namespace arm_compute +{ void ITensor::copy_from(const ITensor &src) { if(&src == this) @@ -64,7 +64,8 @@ void ITensor::copy_from(const ITensor &src) const size_t line_size = src_info->element_size() * src_info->dimension(0); - execute_window_loop(win_src, [&](const Coordinates &) + execute_window_loop( + win_src, [&](const Coordinates &) { memcpy(dst_it.ptr(), src_it.ptr(), line_size); }, @@ -168,3 +169,4 @@ void ITensor::mark_as_unused() const { _is_used = false; } +} // namespace arm_compute \ No newline at end of file diff --git a/src/core/ITensorPack.cpp b/src/core/ITensorPack.cpp new file mode 100644 index 0000000000..7a54a8bc6b --- /dev/null +++ b/src/core/ITensorPack.cpp @@ -0,0 +1,65 @@ +/* + * Copyright (c) 2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/core/ITensorPack.h" + +#include "arm_compute/core/ITensor.h" + +namespace arm_compute +{ +void ITensorPack::add_tensor(int id, ITensor *tensor) +{ + _pack[id] = PackElement(tensor); +} + +void ITensorPack::add_tensor(int id, const ITensor *tensor) +{ + _pack[id] = PackElement(tensor); +} + +const ITensor *ITensorPack::get_const_tensor(int id) const +{ + auto it = _pack.find(id); + if(it != _pack.end()) + { + return it->second.ctensor != nullptr ? it->second.ctensor : it->second.tensor; + } + return nullptr; +} + +ITensor *ITensorPack::get_tensor(int id) +{ + auto it = _pack.find(id); + return it != _pack.end() ? it->second.tensor : nullptr; +} + +size_t ITensorPack::size() const +{ + return _pack.size(); +} + +bool ITensorPack::empty() const +{ + return _pack.empty(); +} +} // namespace arm_compute \ No newline at end of file diff --git a/src/core/NEON/kernels/NEActivationLayerKernel.cpp b/src/core/NEON/kernels/NEActivationLayerKernel.cpp index 7023d59763..b15df311cc 100644 --- a/src/core/NEON/kernels/NEActivationLayerKernel.cpp +++ b/src/core/NEON/kernels/NEActivationLayerKernel.cpp @@ -850,9 +850,7 @@ Status NEActivationLayerKernel::validate(const ITensorInfo *input, const ITensor return Status{}; } -void NEActivationLayerKernel::run_op(const InputTensorMap &inputs, - const OutputTensorMap &outputs, - const Window &window, const ThreadInfo &info) +void NEActivationLayerKernel::run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) { // Early exit on disabled activation if(!_act_info.enabled()) @@ -865,8 +863,10 @@ void NEActivationLayerKernel::run_op(const InputTensorMap &inputs, ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window); ARM_COMPUTE_ERROR_ON(_func == nullptr); - ARM_COMPUTE_ERROR_ON(inputs.empty() || outputs.empty()); + ARM_COMPUTE_ERROR_ON(tensors.empty()); - (this->*_func)(inputs.at(TensorType::ACL_SRC), outputs.at(TensorType::ACL_DST), window); + (this->*_func)(tensors.get_const_tensor(TensorType::ACL_SRC), + tensors.get_tensor(TensorType::ACL_DST), + window); } } // namespace arm_compute diff --git a/src/core/NEON/kernels/NEArithmeticAdditionKernel.cpp b/src/core/NEON/kernels/NEArithmeticAdditionKernel.cpp index 0ad4b3f12e..5f5a3e5b37 100644 --- a/src/core/NEON/kernels/NEArithmeticAdditionKernel.cpp +++ b/src/core/NEON/kernels/NEArithmeticAdditionKernel.cpp @@ -984,12 +984,16 @@ Status NEArithmeticAdditionKernel::validate(const ITensorInfo *input1, const ITe return Status{}; } -void NEArithmeticAdditionKernel::run_op(const InputTensorMap &inputs, const OutputTensorMap &outputs, const Window &window, const ThreadInfo &info) +void NEArithmeticAdditionKernel::run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) { ARM_COMPUTE_UNUSED(info); ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window); // Dispatch kernel - (*_func)(inputs.at(TensorType::ACL_SRC_0), inputs.at(TensorType::ACL_SRC_1), outputs.at(TensorType::ACL_DST), _policy, window); + (*_func)(tensors.get_const_tensor(TensorType::ACL_SRC_0), + tensors.get_const_tensor(TensorType::ACL_SRC_1), + tensors.get_tensor(TensorType::ACL_DST), + _policy, + window); } } // namespace arm_compute diff --git a/src/core/NEON/kernels/NEArithmeticSubtractionKernel.cpp b/src/core/NEON/kernels/NEArithmeticSubtractionKernel.cpp index 572f09eab2..92371936fa 100644 --- a/src/core/NEON/kernels/NEArithmeticSubtractionKernel.cpp +++ b/src/core/NEON/kernels/NEArithmeticSubtractionKernel.cpp @@ -807,12 +807,16 @@ Status NEArithmeticSubtractionKernel::validate(const ITensorInfo *input1, const return Status{}; } -void NEArithmeticSubtractionKernel::run_op(const InputTensorMap &inputs, const OutputTensorMap &outputs, const Window &window, const ThreadInfo &info) +void NEArithmeticSubtractionKernel::run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) { ARM_COMPUTE_UNUSED(info); ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window); // Dispatch kernel - (*_func)(inputs.at(TensorType::ACL_SRC_0), inputs.at(TensorType::ACL_SRC_1), outputs.at(TensorType::ACL_DST), window, (_policy == ConvertPolicy::SATURATE)); + (*_func)(tensors.get_const_tensor(TensorType::ACL_SRC_0), + tensors.get_const_tensor(TensorType::ACL_SRC_1), + tensors.get_tensor(TensorType::ACL_DST), + window, + (_policy == ConvertPolicy::SATURATE)); } } // namespace arm_compute diff --git a/src/core/NEON/kernels/NEBatchConcatenateLayerKernel.cpp b/src/core/NEON/kernels/NEBatchConcatenateLayerKernel.cpp index c597afd804..0ee6d0efcf 100644 --- a/src/core/NEON/kernels/NEBatchConcatenateLayerKernel.cpp +++ b/src/core/NEON/kernels/NEBatchConcatenateLayerKernel.cpp @@ -191,14 +191,16 @@ Status NEBatchConcatenateLayerKernel::validate(const arm_compute::ITensorInfo *i return Status{}; } -void NEBatchConcatenateLayerKernel::run_op(const InputTensorMap &inputs, const OutputTensorMap &outputs, - const Window &window, const ThreadInfo &info) +void NEBatchConcatenateLayerKernel::run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) { ARM_COMPUTE_UNUSED(info); ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window); ARM_COMPUTE_ERROR_ON(_func == nullptr); - (*_func)(inputs.at(TensorType::ACL_SRC), outputs.at(TensorType::ACL_DST), _batch_offset, window); + (*_func)(tensors.get_const_tensor(TensorType::ACL_SRC), + tensors.get_tensor(TensorType::ACL_DST), + _batch_offset, + window); } } // namespace arm_compute diff --git a/src/core/NEON/kernels/NEDepthConcatenateLayerKernel.cpp b/src/core/NEON/kernels/NEDepthConcatenateLayerKernel.cpp index 49e10de94e..6926ec1aac 100644 --- a/src/core/NEON/kernels/NEDepthConcatenateLayerKernel.cpp +++ b/src/core/NEON/kernels/NEDepthConcatenateLayerKernel.cpp @@ -189,14 +189,16 @@ Status NEDepthConcatenateLayerKernel::validate(const arm_compute::ITensorInfo *i return Status{}; } -void NEDepthConcatenateLayerKernel::run_op(const InputTensorMap &inputs, const OutputTensorMap &outputs, - const Window &window, const ThreadInfo &info) +void NEDepthConcatenateLayerKernel::run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) { ARM_COMPUTE_UNUSED(info); ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window); ARM_COMPUTE_ERROR_ON(_func == nullptr); - (*_func)(inputs.at(TensorType::ACL_SRC), outputs.at(TensorType::ACL_DST), _depth_offset, window); + (*_func)(tensors.get_const_tensor(TensorType::ACL_SRC), + tensors.get_tensor(TensorType::ACL_DST), + _depth_offset, + window); } } // namespace arm_compute diff --git a/src/core/NEON/kernels/NEElementwiseOperationKernel.cpp b/src/core/NEON/kernels/NEElementwiseOperationKernel.cpp index 213103a830..014a564bf1 100644 --- a/src/core/NEON/kernels/NEElementwiseOperationKernel.cpp +++ b/src/core/NEON/kernels/NEElementwiseOperationKernel.cpp @@ -1157,13 +1157,15 @@ void NEElementwiseOperationKernel::configure_common(const ITensorInfo *input1, c INEKernel::configure(win); } -void NEElementwiseOperationKernel::run_op(const InputTensorMap &inputs, const OutputTensorMap &outputs, const Window &window, const ThreadInfo &info) +void NEElementwiseOperationKernel::run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) { ARM_COMPUTE_UNUSED(info, window); ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window); ARM_COMPUTE_ERROR_ON(_function == nullptr); - _function(inputs.at(TensorType::ACL_SRC_0), inputs.at(TensorType::ACL_SRC_1), outputs.at(TensorType::ACL_DST), window); + _function(tensors.get_const_tensor(TensorType::ACL_SRC_0), + tensors.get_const_tensor(TensorType::ACL_SRC_1), + tensors.get_tensor(TensorType::ACL_DST), window); } /** Arithmetic operators (min, max, squared_diff) */ diff --git a/src/core/NEON/kernels/NEHeightConcatenateLayerKernel.cpp b/src/core/NEON/kernels/NEHeightConcatenateLayerKernel.cpp index d4043e02b7..8a671bfa23 100644 --- a/src/core/NEON/kernels/NEHeightConcatenateLayerKernel.cpp +++ b/src/core/NEON/kernels/NEHeightConcatenateLayerKernel.cpp @@ -84,15 +84,14 @@ Status NEHeightConcatenateLayerKernel::validate(const ITensorInfo *input, unsign return Status{}; } -void NEHeightConcatenateLayerKernel::run_op(const InputTensorMap &inputs, const OutputTensorMap &outputs, - const Window &window, const ThreadInfo &info) +void NEHeightConcatenateLayerKernel::run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) { ARM_COMPUTE_UNUSED(info); ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window); - const auto src = inputs.at(TensorType::ACL_SRC); - auto dst = outputs.at(TensorType::ACL_DST); + const auto src = tensors.get_const_tensor(TensorType::ACL_SRC); + auto dst = tensors.get_tensor(TensorType::ACL_DST); // Offset output pointer to the correct position uint8_t *output_ptr = dst->buffer() + dst->info()->offset_first_element_in_bytes() + _height_offset * dst->info()->strides_in_bytes()[Window::DimY]; diff --git a/src/core/NEON/kernels/NEPixelWiseMultiplicationKernel.cpp b/src/core/NEON/kernels/NEPixelWiseMultiplicationKernel.cpp index b5b4f841b4..907a7f197b 100644 --- a/src/core/NEON/kernels/NEPixelWiseMultiplicationKernel.cpp +++ b/src/core/NEON/kernels/NEPixelWiseMultiplicationKernel.cpp @@ -1265,15 +1265,15 @@ Status NEPixelWiseMultiplicationKernel::validate(const ITensorInfo *input1, cons return Status{}; } -void NEPixelWiseMultiplicationKernel::run_op(const InputTensorMap &inputs, const OutputTensorMap &outputs, const Window &window, const ThreadInfo &info) +void NEPixelWiseMultiplicationKernel::run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) { ARM_COMPUTE_UNUSED(info); ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window); - auto input1 = inputs.at(TensorType::ACL_SRC_0); - auto input2 = inputs.at(TensorType::ACL_SRC_1); - auto output = outputs.at(TensorType::ACL_DST); + auto input1 = tensors.get_const_tensor(TensorType::ACL_SRC_0); + auto input2 = tensors.get_const_tensor(TensorType::ACL_SRC_1); + auto output = tensors.get_tensor(TensorType::ACL_DST); if(_func_quantized != nullptr) { @@ -1363,15 +1363,15 @@ Status NEComplexPixelWiseMultiplicationKernel::validate(const ITensorInfo *input return Status{}; } -void NEComplexPixelWiseMultiplicationKernel::run_op(const InputTensorMap &inputs, const OutputTensorMap &outputs, const Window &window, const ThreadInfo &info) +void NEComplexPixelWiseMultiplicationKernel::run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) { ARM_COMPUTE_UNUSED(info); ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window); - auto input1 = inputs.at(TensorType::ACL_SRC_0); - auto input2 = inputs.at(TensorType::ACL_SRC_1); - auto output = outputs.at(TensorType::ACL_DST); + auto input1 = tensors.get_const_tensor(TensorType::ACL_SRC_0); + auto input2 = tensors.get_const_tensor(TensorType::ACL_SRC_1); + auto output = tensors.get_tensor(TensorType::ACL_DST); Iterator input1_it(input1, window.broadcast_if_dimension_le_one(input1->info()->tensor_shape())); Iterator input2_it(input2, window.broadcast_if_dimension_le_one(input2->info()->tensor_shape())); diff --git a/src/core/NEON/kernels/NEReshapeLayerKernel.cpp b/src/core/NEON/kernels/NEReshapeLayerKernel.cpp index f4aeed5e9f..23b349b443 100644 --- a/src/core/NEON/kernels/NEReshapeLayerKernel.cpp +++ b/src/core/NEON/kernels/NEReshapeLayerKernel.cpp @@ -86,14 +86,14 @@ void NEReshapeLayerKernel::configure(const ITensorInfo *input, ITensorInfo *outp INEKernel::configure(win); } -void NEReshapeLayerKernel::run_op(const InputTensorMap &inputs, const OutputTensorMap &outputs, const Window &window, const ThreadInfo &info) +void NEReshapeLayerKernel::run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) { ARM_COMPUTE_UNUSED(info); ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window); - const auto src = inputs.at(TensorType::ACL_SRC); - auto dst = outputs.at(TensorType::ACL_DST); + const auto src = tensors.get_const_tensor(TensorType::ACL_SRC); + auto dst = tensors.get_tensor(TensorType::ACL_DST); switch(src->info()->data_type()) { diff --git a/src/core/NEON/kernels/NEStridedSliceKernel.cpp b/src/core/NEON/kernels/NEStridedSliceKernel.cpp index 3472f7fe15..243a60f249 100644 --- a/src/core/NEON/kernels/NEStridedSliceKernel.cpp +++ b/src/core/NEON/kernels/NEStridedSliceKernel.cpp @@ -166,13 +166,15 @@ Status NEStridedSliceKernel::validate(const ITensorInfo *input, const ITensorInf return Status{}; } -void NEStridedSliceKernel::run_op(const InputTensorMap &inputs, const OutputTensorMap &outputs, const Window &window, const ThreadInfo &info) +void NEStridedSliceKernel::run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) { ARM_COMPUTE_UNUSED(info); ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window); // Dispatch kernel - strided_slice_generic(inputs.at(TensorType::ACL_SRC_0), outputs.at(TensorType::ACL_DST), _starts_abs, _final_strides, _shrink_mask, window); + strided_slice_generic(tensors.get_const_tensor(TensorType::ACL_SRC_0), + tensors.get_tensor(TensorType::ACL_DST), + _starts_abs, _final_strides, _shrink_mask, window); } } // namespace arm_compute diff --git a/src/core/NEON/kernels/NEWidthConcatenateLayerKernel.cpp b/src/core/NEON/kernels/NEWidthConcatenateLayerKernel.cpp index 1b32e3614e..171f5965a5 100644 --- a/src/core/NEON/kernels/NEWidthConcatenateLayerKernel.cpp +++ b/src/core/NEON/kernels/NEWidthConcatenateLayerKernel.cpp @@ -84,15 +84,14 @@ Status NEWidthConcatenateLayerKernel::validate(const ITensorInfo *input, unsigne return Status{}; } -void NEWidthConcatenateLayerKernel::run_op(const InputTensorMap &inputs, const OutputTensorMap &outputs, - const Window &window, const ThreadInfo &info) +void NEWidthConcatenateLayerKernel::run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) { ARM_COMPUTE_UNUSED(info); ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window); - const auto src = inputs.at(TensorType::ACL_SRC); - auto dst = outputs.at(TensorType::ACL_DST); + const auto src = tensors.get_const_tensor(TensorType::ACL_SRC); + auto dst = tensors.get_tensor(TensorType::ACL_DST); // Offset output pointer to the correct position uint8_t *output_ptr = dst->buffer() + dst->info()->offset_first_element_in_bytes() + _width_offset * dst->info()->strides_in_bytes()[0]; diff --git a/src/runtime/CL/CLOperator.cpp b/src/runtime/CL/CLOperator.cpp index c41454e933..57a4d0ec57 100644 --- a/src/runtime/CL/CLOperator.cpp +++ b/src/runtime/CL/CLOperator.cpp @@ -33,19 +33,17 @@ ICLOperator::ICLOperator(IRuntimeContext *ctx) { } -void ICLOperator::run(InputTensorMap inputs, OutputTensorMap outputs, OperatorTensorMap workspace) +void ICLOperator::run(ITensorPack &tensors) { - ARM_COMPUTE_UNUSED(workspace); - - if(inputs.empty()) + if(tensors.empty()) { ARM_COMPUTE_ERROR("No inputs provided"); } - CLScheduler::get().enqueue_op(*_kernel.get(), inputs, outputs, false); + CLScheduler::get().enqueue_op(*_kernel.get(), tensors, false); } -void ICLOperator::prepare(OperatorTensorMap constants) +void ICLOperator::prepare(ITensorPack &constants) { ARM_COMPUTE_UNUSED(constants); } diff --git a/src/runtime/CL/CLScheduler.cpp b/src/runtime/CL/CLScheduler.cpp index 5ef66f456a..ccef5cbd1b 100644 --- a/src/runtime/CL/CLScheduler.cpp +++ b/src/runtime/CL/CLScheduler.cpp @@ -151,22 +151,22 @@ void CLScheduler::init(cl::Context context, cl::CommandQueue queue, const cl::De _cl_tuner = cl_tuner; } -void CLScheduler::enqueue_common(ICLKernel &kernel, const InputTensorMap &inputs, const OutputTensorMap &outputs, bool flush) +void CLScheduler::enqueue_common(ICLKernel &kernel, ITensorPack &tensors, bool flush) { ARM_COMPUTE_ERROR_ON_MSG(!_is_initialised, "The CLScheduler is not initialised yet! Please call the CLScheduler::get().default_init(), \ or CLScheduler::get()::init() and CLKernelLibrary::get()::init() function before running functions!"); - const bool inject_memory = !inputs.empty(); + const bool inject_memory = !tensors.empty(); // Tune the kernel if the CLTuner has been provided if(_cl_tuner != nullptr) { - inject_memory ? _cl_tuner->tune_kernel_dynamic(kernel, inputs, outputs) : _cl_tuner->tune_kernel_dynamic(kernel); + inject_memory ? _cl_tuner->tune_kernel_dynamic(kernel, tensors) : _cl_tuner->tune_kernel_dynamic(kernel); } // Run kernel - inject_memory ? kernel.run_op(inputs, outputs, kernel.window(), _queue) : kernel.run(kernel.window(), _queue); + inject_memory ? kernel.run_op(tensors, kernel.window(), _queue) : kernel.run(kernel.window(), _queue); if(flush) { @@ -176,11 +176,12 @@ void CLScheduler::enqueue_common(ICLKernel &kernel, const InputTensorMap &inputs void CLScheduler::enqueue(ICLKernel &kernel, bool flush) { - enqueue_common(kernel, {}, {}, flush); + ITensorPack pack; + enqueue_common(kernel, pack, flush); } -void CLScheduler::enqueue_op(ICLKernel &kernel, const InputTensorMap &inputs, const OutputTensorMap &outputs, bool flush) +void CLScheduler::enqueue_op(ICLKernel &kernel, ITensorPack &tensors, bool flush) { - enqueue_common(kernel, inputs, outputs, flush); + enqueue_common(kernel, tensors, flush); } } // namespace arm_compute diff --git a/src/runtime/CL/CLTuner.cpp b/src/runtime/CL/CLTuner.cpp index b2e3476e20..adfe67fb11 100644 --- a/src/runtime/CL/CLTuner.cpp +++ b/src/runtime/CL/CLTuner.cpp @@ -77,10 +77,11 @@ void CLTuner::tune_kernel_static(ICLKernel &kernel) void CLTuner::tune_kernel_dynamic(ICLKernel &kernel) { - tune_kernel_dynamic(kernel, {}, {}); + ITensorPack pack; + tune_kernel_dynamic(kernel, pack); } -void CLTuner::tune_kernel_dynamic(ICLKernel &kernel, const InputTensorMap &inputs, const OutputTensorMap &outputs) +void CLTuner::tune_kernel_dynamic(ICLKernel &kernel, ITensorPack &tensors) { // Get the configuration ID from the kernel and append GPU target name and number of available compute units const std::string config_id = kernel.config_id() + "_" + string_from_target(kernel.get_target()) + "_MP" + support::cpp11::to_string(CLKernelLibrary::get().get_num_compute_units()); @@ -95,7 +96,7 @@ void CLTuner::tune_kernel_dynamic(ICLKernel &kernel, const InputTensorMap &input if(_tune_new_kernels) { // Find the optimal LWS for the kernel - cl::NDRange opt_lws = find_optimal_lws(kernel, inputs, outputs); + cl::NDRange opt_lws = find_optimal_lws(kernel, tensors); // Insert the optimal LWS in the table add_lws_to_table(config_id, opt_lws); @@ -117,7 +118,7 @@ void CLTuner::add_lws_to_table(const std::string &kernel_id, cl::NDRange optimal _lws_table.emplace(kernel_id, optimal_lws); } -cl::NDRange CLTuner::find_optimal_lws(ICLKernel &kernel, const InputTensorMap &inputs, const OutputTensorMap &outputs) +cl::NDRange CLTuner::find_optimal_lws(ICLKernel &kernel, ITensorPack &tensors) { // Profiling queue cl::CommandQueue queue_profiler; @@ -172,8 +173,8 @@ cl::NDRange CLTuner::find_optimal_lws(ICLKernel &kernel, const InputTensorMap &i cl::NDRange gws = ICLKernel::gws_from_window(kernel.window()); // Run the kernel with default lws to be used as baseline - const bool inject_memory = !inputs.empty(); - inject_memory ? kernel.run_op(inputs, outputs, kernel.window(), queue_profiler) : kernel.run(kernel.window(), queue_profiler); + const bool inject_memory = !tensors.empty(); + inject_memory ? kernel.run_op(tensors, kernel.window(), queue_profiler) : kernel.run(kernel.window(), queue_profiler); queue_profiler.finish(); @@ -203,7 +204,7 @@ cl::NDRange CLTuner::find_optimal_lws(ICLKernel &kernel, const InputTensorMap &i kernel.set_lws_hint(lws_test); // Run the kernel - inject_memory ? kernel.run_op(inputs, outputs, kernel.window(), queue_profiler) : kernel.run(kernel.window(), queue_profiler); + inject_memory ? kernel.run_op(tensors, kernel.window(), queue_profiler) : kernel.run(kernel.window(), queue_profiler); queue_profiler.finish(); diff --git a/src/runtime/CL/functions/CLActivationLayer.cpp b/src/runtime/CL/functions/CLActivationLayer.cpp index 784473d426..5ddf227382 100644 --- a/src/runtime/CL/functions/CLActivationLayer.cpp +++ b/src/runtime/CL/functions/CLActivationLayer.cpp @@ -89,9 +89,9 @@ Status CLActivationLayer::validate(const ITensorInfo *input, const ITensorInfo * void CLActivationLayer::run() { - const InputTensorMap src{ { TensorType::ACL_SRC, _impl->src } }; - const OutputTensorMap dst{ { TensorType::ACL_DST, _impl->dst } }; - - _impl->op->run(src, dst, {}); + ITensorPack pack; + pack.add_tensor(TensorType::ACL_SRC, _impl->src); + pack.add_tensor(TensorType::ACL_DST, _impl->dst); + _impl->op->run(pack); } } // namespace arm_compute diff --git a/src/runtime/CL/functions/CLConcatenateLayer.cpp b/src/runtime/CL/functions/CLConcatenateLayer.cpp index 1ddda021bc..4214813446 100644 --- a/src/runtime/CL/functions/CLConcatenateLayer.cpp +++ b/src/runtime/CL/functions/CLConcatenateLayer.cpp @@ -220,16 +220,14 @@ Status CLConcatenation::validate(const std::vector &inputs_ return Status{}; } -void CLConcatenation::run(InputTensorMap inputs, OutputTensorMap outputs, OperatorTensorMap workspace) +void CLConcatenation::run(ITensorPack &tensors) { - ARM_COMPUTE_UNUSED(workspace); - - if(inputs.empty() || outputs.empty()) + if(tensors.empty()) { ARM_COMPUTE_ERROR("No inputs provided"); } - if(inputs.size() != _num_inputs) + if(static_cast(tensors.size()) - 1 != static_cast(_num_inputs)) { ARM_COMPUTE_ERROR("Configured with different number of inputs"); } @@ -237,15 +235,17 @@ void CLConcatenation::run(InputTensorMap inputs, OutputTensorMap outputs, Operat if(_axis == Window::DimX && (_num_inputs == 2 || _num_inputs == 4)) { ARM_COMPUTE_ERROR_ON(_concat_kernels.empty()); - CLScheduler::get().enqueue_op(*_concat_kernels.at(0), inputs, outputs, true); + CLScheduler::get().enqueue_op(*_concat_kernels.at(0), tensors, true); } else { int i = 0; for(auto &k : _concat_kernels) { - const InputTensorMap input = { { TensorType::ACL_SRC, inputs.at(ACL_SRC_VEC + i) } }; - CLScheduler::get().enqueue_op(*k, input, outputs, true); + ITensorPack pack; + pack.add_tensor(TensorType::ACL_SRC, tensors.get_const_tensor(ACL_SRC_VEC + i)); + pack.add_tensor(TensorType::ACL_DST, tensors.get_tensor(ACL_DST)); + CLScheduler::get().enqueue_op(*k, pack, true); ++i; } } @@ -303,13 +303,13 @@ Status CLConcatenateLayer::validate(const std::vector &inpu void CLConcatenateLayer::run() { - InputTensorMap srcs; + ITensorPack pack; for(unsigned i = 0; i < _impl->num_inputs; ++i) { - srcs.insert(std::make_pair(TensorType::ACL_SRC_VEC + i, _impl->srcs.at(i))); + pack.add_tensor(TensorType::ACL_SRC_VEC + i, _impl->srcs.at(i)); } - const OutputTensorMap dst{ { TensorType::ACL_DST, _impl->dst } }; + pack.add_tensor(TensorType::ACL_DST, _impl->dst); - _impl->op->run(srcs, dst, {}); + _impl->op->run(pack); } } // namespace arm_compute diff --git a/src/runtime/CL/functions/CLElementWiseUnaryLayer.cpp b/src/runtime/CL/functions/CLElementWiseUnaryLayer.cpp index f8e9694b1c..de94255b48 100644 --- a/src/runtime/CL/functions/CLElementWiseUnaryLayer.cpp +++ b/src/runtime/CL/functions/CLElementWiseUnaryLayer.cpp @@ -153,10 +153,10 @@ Status CLRsqrtLayer::validate(const ITensorInfo *input, const ITensorInfo *outpu void CLRsqrtLayer::run() { - const InputTensorMap src{ { TensorType::ACL_SRC, _impl->src } }; - const OutputTensorMap dst{ { TensorType::ACL_DST, _impl->dst } }; - - _impl->op->run(src, dst, {}); + ITensorPack pack; + pack.add_tensor(TensorType::ACL_SRC, _impl->src); + pack.add_tensor(TensorType::ACL_DST, _impl->dst); + _impl->op->run(pack); } struct CLExpLayer::Impl @@ -195,10 +195,10 @@ Status CLExpLayer::validate(const ITensorInfo *input, const ITensorInfo *output) void CLExpLayer::run() { - const InputTensorMap src{ { TensorType::ACL_SRC, _impl->src } }; - const OutputTensorMap dst{ { TensorType::ACL_DST, _impl->dst } }; - - _impl->op->run(src, dst, {}); + ITensorPack pack; + pack.add_tensor(TensorType::ACL_SRC, _impl->src); + pack.add_tensor(TensorType::ACL_DST, _impl->dst); + _impl->op->run(pack); } struct CLNegLayer::Impl @@ -236,10 +236,10 @@ Status CLNegLayer::validate(const ITensorInfo *input, const ITensorInfo *output) void CLNegLayer::run() { - const InputTensorMap src{ { TensorType::ACL_SRC, _impl->src } }; - const OutputTensorMap dst{ { TensorType::ACL_DST, _impl->dst } }; - - _impl->op->run(src, dst, {}); + ITensorPack pack; + pack.add_tensor(TensorType::ACL_SRC, _impl->src); + pack.add_tensor(TensorType::ACL_DST, _impl->dst); + _impl->op->run(pack); } struct CLSinLayer::Impl @@ -277,10 +277,10 @@ Status CLSinLayer::validate(const ITensorInfo *input, const ITensorInfo *output) void CLSinLayer::run() { - const InputTensorMap src{ { TensorType::ACL_SRC, _impl->src } }; - const OutputTensorMap dst{ { TensorType::ACL_DST, _impl->dst } }; - - _impl->op->run(src, dst, {}); + ITensorPack pack; + pack.add_tensor(TensorType::ACL_SRC, _impl->src); + pack.add_tensor(TensorType::ACL_DST, _impl->dst); + _impl->op->run(pack); } struct CLAbsLayer::Impl @@ -318,10 +318,10 @@ Status CLAbsLayer::validate(const ITensorInfo *input, const ITensorInfo *output) void CLAbsLayer::run() { - const InputTensorMap src{ { TensorType::ACL_SRC, _impl->src } }; - const OutputTensorMap dst{ { TensorType::ACL_DST, _impl->dst } }; - - _impl->op->run(src, dst, {}); + ITensorPack pack; + pack.add_tensor(TensorType::ACL_SRC, _impl->src); + pack.add_tensor(TensorType::ACL_DST, _impl->dst); + _impl->op->run(pack); } struct CLLogLayer::Impl @@ -359,10 +359,10 @@ Status CLLogLayer::validate(const ITensorInfo *input, const ITensorInfo *output) void CLLogLayer::run() { - const InputTensorMap src{ { TensorType::ACL_SRC, _impl->src } }; - const OutputTensorMap dst{ { TensorType::ACL_DST, _impl->dst } }; - - _impl->op->run(src, dst, {}); + ITensorPack pack; + pack.add_tensor(TensorType::ACL_SRC, _impl->src); + pack.add_tensor(TensorType::ACL_DST, _impl->dst); + _impl->op->run(pack); } struct CLRoundLayer::Impl @@ -400,9 +400,9 @@ Status CLRoundLayer::validate(const ITensorInfo *input, const ITensorInfo *outpu void CLRoundLayer::run() { - const InputTensorMap src{ { TensorType::ACL_SRC, _impl->src } }; - const OutputTensorMap dst{ { TensorType::ACL_DST, _impl->dst } }; - - _impl->op->run(src, dst, {}); + ITensorPack pack; + pack.add_tensor(TensorType::ACL_SRC, _impl->src); + pack.add_tensor(TensorType::ACL_DST, _impl->dst); + _impl->op->run(pack); } } // namespace arm_compute diff --git a/src/runtime/CL/functions/CLElementwiseOperations.cpp b/src/runtime/CL/functions/CLElementwiseOperations.cpp index e66e4bf526..6f664725c5 100644 --- a/src/runtime/CL/functions/CLElementwiseOperations.cpp +++ b/src/runtime/CL/functions/CLElementwiseOperations.cpp @@ -47,19 +47,21 @@ void configure_border_handler(const CLCompileContext &compile_context, CLFillBor } } -void select_border_input(InputTensorMap &tensor_map, InputTensorMap &inputs, OutputTensorMap &outputs) +ITensorPack select_border_input(ITensorPack &tensors) { - if(outputs.at(TensorType::ACL_DST)->info()->dimension(0) > 1) + ITensorPack pack; + if(tensors.get_tensor(TensorType::ACL_DST)->info()->dimension(0) > 1) { - if(inputs.at(TensorType::ACL_SRC_1)->info()->dimension(0) == 1) + if(tensors.get_const_tensor(TensorType::ACL_SRC_1)->info()->dimension(0) == 1) { - tensor_map[TensorType::ACL_SRC] = inputs.at(TensorType::ACL_SRC_1); + pack.add_tensor(TensorType::ACL_SRC, tensors.get_const_tensor(TensorType::ACL_SRC_1)); } else { - tensor_map[TensorType::ACL_SRC] = inputs.at(TensorType::ACL_SRC_0); + pack.add_tensor(TensorType::ACL_SRC, tensors.get_const_tensor(TensorType::ACL_SRC_0)); } } + return pack; } } // namespace @@ -83,12 +85,11 @@ Status CLArithmeticAddition::validate(const ITensorInfo *input1, const ITensorIn return CLSaturatedArithmeticOperationKernel::validate(ArithmeticOperation::ADD, input1, input2, output, policy, act_info); } -void CLArithmeticAddition::run(InputTensorMap inputs, OutputTensorMap outputs, OperatorTensorMap workspace) +void CLArithmeticAddition::run(ITensorPack &tensors) { - InputTensorMap src; - select_border_input(src, inputs, outputs); - CLScheduler::get().enqueue_op(_border_handler, src, {}); - ICLOperator::run(inputs, outputs, workspace); + auto border_pack = select_border_input(tensors); + CLScheduler::get().enqueue_op(_border_handler, border_pack); + ICLOperator::run(tensors); } CLArithmeticSubtraction::CLArithmeticSubtraction() @@ -110,12 +111,11 @@ Status CLArithmeticSubtraction::validate(const ITensorInfo *input1, const ITenso return CLSaturatedArithmeticOperationKernel::validate(ArithmeticOperation::SUB, input1, input2, output, policy, act_info); } -void CLArithmeticSubtraction::run(InputTensorMap inputs, OutputTensorMap outputs, OperatorTensorMap workspace) +void CLArithmeticSubtraction::run(ITensorPack &tensors) { - InputTensorMap src; - select_border_input(src, inputs, outputs); - CLScheduler::get().enqueue_op(_border_handler, src, {}); - ICLOperator::run(inputs, outputs, workspace); + auto border_pack = select_border_input(tensors); + CLScheduler::get().enqueue_op(_border_handler, border_pack); + ICLOperator::run(tensors); } CLArithmeticDivision::CLArithmeticDivision() @@ -136,12 +136,11 @@ Status CLArithmeticDivision::validate(const ITensorInfo *input1, const ITensorIn return CLArithmeticOperationKernel::validate(ArithmeticOperation::DIV, input1, input2, output, act_info); } -void CLArithmeticDivision::run(InputTensorMap inputs, OutputTensorMap outputs, OperatorTensorMap workspace) +void CLArithmeticDivision::run(ITensorPack &tensors) { - InputTensorMap src; - select_border_input(src, inputs, outputs); - CLScheduler::get().enqueue_op(_border_handler, src, {}); - ICLOperator::run(inputs, outputs, workspace); + auto border_pack = select_border_input(tensors); + CLScheduler::get().enqueue_op(_border_handler, border_pack); + ICLOperator::run(tensors); } CLElementwiseMax::CLElementwiseMax() @@ -162,12 +161,11 @@ Status CLElementwiseMax::validate(const ITensorInfo *input1, const ITensorInfo * return CLArithmeticOperationKernel::validate(ArithmeticOperation::MAX, input1, input2, output, act_info); } -void CLElementwiseMax::run(InputTensorMap inputs, OutputTensorMap outputs, OperatorTensorMap workspace) +void CLElementwiseMax::run(ITensorPack &tensors) { - InputTensorMap src; - select_border_input(src, inputs, outputs); - CLScheduler::get().enqueue_op(_border_handler, src, {}); - ICLOperator::run(inputs, outputs, workspace); + auto border_pack = select_border_input(tensors); + CLScheduler::get().enqueue_op(_border_handler, border_pack); + ICLOperator::run(tensors); } CLElementwiseMin::CLElementwiseMin() @@ -188,12 +186,11 @@ Status CLElementwiseMin::validate(const ITensorInfo *input1, const ITensorInfo * return CLArithmeticOperationKernel::validate(ArithmeticOperation::MIN, input1, input2, output, act_info); } -void CLElementwiseMin::run(InputTensorMap inputs, OutputTensorMap outputs, OperatorTensorMap workspace) +void CLElementwiseMin::run(ITensorPack &tensors) { - InputTensorMap src; - select_border_input(src, inputs, outputs); - CLScheduler::get().enqueue_op(_border_handler, src, {}); - ICLOperator::run(inputs, outputs, workspace); + auto border_pack = select_border_input(tensors); + CLScheduler::get().enqueue_op(_border_handler, border_pack); + ICLOperator::run(tensors); } CLElementwiseSquaredDiff::CLElementwiseSquaredDiff() @@ -214,12 +211,11 @@ Status CLElementwiseSquaredDiff::validate(const ITensorInfo *input1, const ITens return CLArithmeticOperationKernel::validate(ArithmeticOperation::SQUARED_DIFF, input1, input2, output, act_info); } -void CLElementwiseSquaredDiff::run(InputTensorMap inputs, OutputTensorMap outputs, OperatorTensorMap workspace) +void CLElementwiseSquaredDiff::run(ITensorPack &tensors) { - InputTensorMap src; - select_border_input(src, inputs, outputs); - CLScheduler::get().enqueue_op(_border_handler, src, {}); - ICLOperator::run(inputs, outputs, workspace); + auto border_pack = select_border_input(tensors); + CLScheduler::get().enqueue_op(_border_handler, border_pack); + ICLOperator::run(tensors); } CLElementwisePower::CLElementwisePower() @@ -240,12 +236,11 @@ Status CLElementwisePower::validate(const ITensorInfo *input1, const ITensorInfo return CLArithmeticOperationKernel::validate(ArithmeticOperation::POWER, input1, input2, output, act_info); } -void CLElementwisePower::run(InputTensorMap inputs, OutputTensorMap outputs, OperatorTensorMap workspace) +void CLElementwisePower::run(ITensorPack &tensors) { - InputTensorMap src; - select_border_input(src, inputs, outputs); - CLScheduler::get().enqueue_op(_border_handler, src, {}); - ICLOperator::run(inputs, outputs, workspace); + auto border_pack = select_border_input(tensors); + CLScheduler::get().enqueue_op(_border_handler, border_pack); + ICLOperator::run(tensors); } } // namespace experimental @@ -287,10 +282,12 @@ Status CLArithmeticAddition::validate(const ITensorInfo *input1, const ITensorIn void CLArithmeticAddition::run() { - const InputTensorMap src{ { TensorType::ACL_SRC_0, _impl->src_0 }, { TensorType::ACL_SRC_1, _impl->src_1 } }; - const OutputTensorMap dst{ { TensorType::ACL_DST, _impl->dst } }; + ITensorPack pack; + pack.add_tensor(TensorType::ACL_SRC_0, _impl->src_0); + pack.add_tensor(TensorType::ACL_SRC_1, _impl->src_1); + pack.add_tensor(TensorType::ACL_DST, _impl->dst); - _impl->op->run(src, dst, {}); + _impl->op->run(pack); } struct CLArithmeticSubtraction::Impl @@ -331,10 +328,12 @@ Status CLArithmeticSubtraction::validate(const ITensorInfo *input1, const ITenso void CLArithmeticSubtraction::run() { - const InputTensorMap src{ { TensorType::ACL_SRC_0, _impl->src_0 }, { TensorType::ACL_SRC_1, _impl->src_1 } }; - const OutputTensorMap dst{ { TensorType::ACL_DST, _impl->dst } }; + ITensorPack pack; + pack.add_tensor(TensorType::ACL_SRC_0, _impl->src_0); + pack.add_tensor(TensorType::ACL_SRC_1, _impl->src_1); + pack.add_tensor(TensorType::ACL_DST, _impl->dst); - _impl->op->run(src, dst, {}); + _impl->op->run(pack); } struct CLArithmeticDivision::Impl @@ -374,10 +373,12 @@ Status CLArithmeticDivision::validate(const ITensorInfo *input1, const ITensorIn void CLArithmeticDivision::run() { - const InputTensorMap src{ { TensorType::ACL_SRC_0, _impl->src_0 }, { TensorType::ACL_SRC_1, _impl->src_1 } }; - const OutputTensorMap dst{ { TensorType::ACL_DST, _impl->dst } }; + ITensorPack pack; + pack.add_tensor(TensorType::ACL_SRC_0, _impl->src_0); + pack.add_tensor(TensorType::ACL_SRC_1, _impl->src_1); + pack.add_tensor(TensorType::ACL_DST, _impl->dst); - _impl->op->run(src, dst, {}); + _impl->op->run(pack); } struct CLElementwiseMax::Impl @@ -417,10 +418,12 @@ Status CLElementwiseMax::validate(const ITensorInfo *input1, const ITensorInfo * void CLElementwiseMax::run() { - const InputTensorMap src{ { TensorType::ACL_SRC_0, _impl->src_0 }, { TensorType::ACL_SRC_1, _impl->src_1 } }; - const OutputTensorMap dst{ { TensorType::ACL_DST, _impl->dst } }; + ITensorPack pack; + pack.add_tensor(TensorType::ACL_SRC_0, _impl->src_0); + pack.add_tensor(TensorType::ACL_SRC_1, _impl->src_1); + pack.add_tensor(TensorType::ACL_DST, _impl->dst); - _impl->op->run(src, dst, {}); + _impl->op->run(pack); } struct CLElementwiseMin::Impl @@ -460,10 +463,12 @@ Status CLElementwiseMin::validate(const ITensorInfo *input1, const ITensorInfo * void CLElementwiseMin::run() { - const InputTensorMap src{ { TensorType::ACL_SRC_0, _impl->src_0 }, { TensorType::ACL_SRC_1, _impl->src_1 } }; - const OutputTensorMap dst{ { TensorType::ACL_DST, _impl->dst } }; + ITensorPack pack; + pack.add_tensor(TensorType::ACL_SRC_0, _impl->src_0); + pack.add_tensor(TensorType::ACL_SRC_1, _impl->src_1); + pack.add_tensor(TensorType::ACL_DST, _impl->dst); - _impl->op->run(src, dst, {}); + _impl->op->run(pack); } struct CLElementwiseSquaredDiff::Impl @@ -504,10 +509,12 @@ Status CLElementwiseSquaredDiff::validate(const ITensorInfo *input1, const ITens void CLElementwiseSquaredDiff::run() { - const InputTensorMap src{ { TensorType::ACL_SRC_0, _impl->src_0 }, { TensorType::ACL_SRC_1, _impl->src_1 } }; - const OutputTensorMap dst{ { TensorType::ACL_DST, _impl->dst } }; + ITensorPack pack; + pack.add_tensor(TensorType::ACL_SRC_0, _impl->src_0); + pack.add_tensor(TensorType::ACL_SRC_1, _impl->src_1); + pack.add_tensor(TensorType::ACL_DST, _impl->dst); - _impl->op->run(src, dst, {}); + _impl->op->run(pack); } struct CLElementwisePower::Impl @@ -547,9 +554,11 @@ Status CLElementwisePower::validate(const ITensorInfo *input1, const ITensorInfo void CLElementwisePower::run() { - const InputTensorMap src{ { TensorType::ACL_SRC_0, _impl->src_0 }, { TensorType::ACL_SRC_1, _impl->src_1 } }; - const OutputTensorMap dst{ { TensorType::ACL_DST, _impl->dst } }; + ITensorPack pack; + pack.add_tensor(TensorType::ACL_SRC_0, _impl->src_0); + pack.add_tensor(TensorType::ACL_SRC_1, _impl->src_1); + pack.add_tensor(TensorType::ACL_DST, _impl->dst); - _impl->op->run(src, dst, {}); + _impl->op->run(pack); } } // namespace arm_compute diff --git a/src/runtime/CL/functions/CLPReluLayer.cpp b/src/runtime/CL/functions/CLPReluLayer.cpp index fbb466acc8..e03bd13284 100644 --- a/src/runtime/CL/functions/CLPReluLayer.cpp +++ b/src/runtime/CL/functions/CLPReluLayer.cpp @@ -44,19 +44,22 @@ void configure_border_handler(const CLCompileContext &compile_context, CLFillBor } } } -void select_border_input(InputTensorMap &tensor_map, InputTensorMap &inputs, OutputTensorMap &outputs) + +ITensorPack select_border_input(ITensorPack &tensors) { - if(outputs.at(TensorType::ACL_DST)->info()->dimension(0) > 1) + ITensorPack pack; + if(tensors.get_tensor(TensorType::ACL_DST)->info()->dimension(0) > 1) { - if(inputs.at(TensorType::ACL_SRC_1)->info()->dimension(0) == 1) + if(tensors.get_const_tensor(TensorType::ACL_SRC_1)->info()->dimension(0) == 1) { - tensor_map[TensorType::ACL_SRC] = inputs.at(TensorType::ACL_SRC_1); + pack.add_tensor(TensorType::ACL_SRC, tensors.get_const_tensor(TensorType::ACL_SRC_1)); } else { - tensor_map[TensorType::ACL_SRC] = inputs.at(TensorType::ACL_SRC_0); + pack.add_tensor(TensorType::ACL_SRC, tensors.get_const_tensor(TensorType::ACL_SRC_0)); } } + return pack; } } // namespace @@ -80,12 +83,11 @@ Status CLPReluLayer::validate(const ITensorInfo *input, const ITensorInfo *alpha return CLArithmeticOperationKernel::validate(ArithmeticOperation::PRELU, input, alpha, output); } -void CLPReluLayer::run(InputTensorMap inputs, OutputTensorMap outputs, OperatorTensorMap workspace) +void CLPReluLayer::run(ITensorPack &tensors) { - InputTensorMap src; - select_border_input(src, inputs, outputs); - CLScheduler::get().enqueue_op(_border_handler, src, {}); - ICLOperator::run(inputs, outputs, workspace); + auto border_pack = select_border_input(tensors); + CLScheduler::get().enqueue_op(_border_handler, border_pack); + ICLOperator::run(tensors); } } // namespace experimental @@ -126,9 +128,11 @@ Status CLPReluLayer::validate(const ITensorInfo *input, const ITensorInfo *alpha void CLPReluLayer::run() { - const InputTensorMap src{ { TensorType::ACL_SRC_0, _impl->src_0 }, { TensorType::ACL_SRC_1, _impl->src_1 } }; - const OutputTensorMap dst{ { TensorType::ACL_DST, _impl->dst } }; + ITensorPack pack; + pack.add_tensor(TensorType::ACL_SRC_0, _impl->src_0); + pack.add_tensor(TensorType::ACL_SRC_1, _impl->src_1); + pack.add_tensor(TensorType::ACL_DST, _impl->dst); - _impl->op->run(src, dst, {}); + _impl->op->run(pack); } } // namespace arm_compute diff --git a/src/runtime/CL/functions/CLPixelWiseMultiplication.cpp b/src/runtime/CL/functions/CLPixelWiseMultiplication.cpp index 34e06a3d03..883ce68536 100644 --- a/src/runtime/CL/functions/CLPixelWiseMultiplication.cpp +++ b/src/runtime/CL/functions/CLPixelWiseMultiplication.cpp @@ -34,19 +34,21 @@ namespace arm_compute { namespace { -void select_border_input(InputTensorMap &tensor_map, InputTensorMap &inputs, OutputTensorMap &outputs) +ITensorPack select_border_input(ITensorPack &tensors) { - if(outputs.at(TensorType::ACL_DST)->info()->dimension(0) > 1) + ITensorPack pack; + if(tensors.get_tensor(TensorType::ACL_DST)->info()->dimension(0) > 1) { - if(inputs.at(TensorType::ACL_SRC_1)->info()->dimension(0) == 1) + if(tensors.get_const_tensor(TensorType::ACL_SRC_1)->info()->dimension(0) == 1) { - tensor_map[TensorType::ACL_SRC] = inputs.at(TensorType::ACL_SRC_1); + pack.add_tensor(TensorType::ACL_SRC, tensors.get_const_tensor(TensorType::ACL_SRC_1)); } else { - tensor_map[TensorType::ACL_SRC] = inputs.at(TensorType::ACL_SRC_0); + pack.add_tensor(TensorType::ACL_SRC, tensors.get_const_tensor(TensorType::ACL_SRC_0)); } } + return pack; } } // namespace @@ -81,12 +83,11 @@ Status CLPixelWiseMultiplication::validate(const ITensorInfo *input1, const ITen return CLPixelWiseMultiplicationKernel::validate(input1, input2, output, scale, overflow_policy, rounding_policy, act_info); } -void CLPixelWiseMultiplication::run(InputTensorMap inputs, OutputTensorMap outputs, OperatorTensorMap workspace) +void CLPixelWiseMultiplication::run(ITensorPack &tensors) { - InputTensorMap src; - select_border_input(src, inputs, outputs); - CLScheduler::get().enqueue_op(_border_handler, src, {}); - ICLOperator::run(inputs, outputs, workspace); + auto border_pack = select_border_input(tensors); + CLScheduler::get().enqueue_op(_border_handler, border_pack); + ICLOperator::run(tensors); } CLComplexPixelWiseMultiplication::CLComplexPixelWiseMultiplication() @@ -116,12 +117,11 @@ Status CLComplexPixelWiseMultiplication::validate(const ITensorInfo *input1, con return CLComplexPixelWiseMultiplicationKernel::validate(input1, input2, output, act_info); } -void CLComplexPixelWiseMultiplication::run(InputTensorMap inputs, OutputTensorMap outputs, OperatorTensorMap workspace) +void CLComplexPixelWiseMultiplication::run(ITensorPack &tensors) { - InputTensorMap src; - select_border_input(src, inputs, outputs); - CLScheduler::get().enqueue_op(_border_handler, src, {}); - ICLOperator::run(inputs, outputs, workspace); + auto border_pack = select_border_input(tensors); + CLScheduler::get().enqueue_op(_border_handler, border_pack); + ICLOperator::run(tensors); } } // namespace experimental @@ -165,10 +165,12 @@ Status CLPixelWiseMultiplication::validate(const ITensorInfo *input1, const ITen void CLPixelWiseMultiplication::run() { - const InputTensorMap src{ { TensorType::ACL_SRC_0, _impl->src_0 }, { TensorType::ACL_SRC_1, _impl->src_1 } }; - const OutputTensorMap dst{ { TensorType::ACL_DST, _impl->dst } }; + ITensorPack pack; + pack.add_tensor(TensorType::ACL_SRC_0, _impl->src_0); + pack.add_tensor(TensorType::ACL_SRC_1, _impl->src_1); + pack.add_tensor(TensorType::ACL_DST, _impl->dst); - _impl->op->run(src, dst, {}); + _impl->op->run(pack); } struct CLComplexPixelWiseMultiplication::Impl @@ -208,9 +210,11 @@ Status CLComplexPixelWiseMultiplication::validate(const ITensorInfo *input1, con void CLComplexPixelWiseMultiplication::run() { - const InputTensorMap src{ { TensorType::ACL_SRC_0, _impl->src_0 }, { TensorType::ACL_SRC_1, _impl->src_1 } }; - const OutputTensorMap dst{ { TensorType::ACL_DST, _impl->dst } }; + ITensorPack pack; + pack.add_tensor(TensorType::ACL_SRC_0, _impl->src_0); + pack.add_tensor(TensorType::ACL_SRC_1, _impl->src_1); + pack.add_tensor(TensorType::ACL_DST, _impl->dst); - _impl->op->run(src, dst, {}); + _impl->op->run(pack); } } // namespace arm_compute diff --git a/src/runtime/CL/functions/CLReshapeLayer.cpp b/src/runtime/CL/functions/CLReshapeLayer.cpp index ac8b176963..273a761a0a 100644 --- a/src/runtime/CL/functions/CLReshapeLayer.cpp +++ b/src/runtime/CL/functions/CLReshapeLayer.cpp @@ -84,10 +84,10 @@ Status CLReshapeLayer::validate(const ITensorInfo *input, const ITensorInfo *out void CLReshapeLayer::run() { - const InputTensorMap src{ { TensorType::ACL_SRC, _impl->src } }; - const OutputTensorMap dst{ { TensorType::ACL_DST, _impl->dst } }; - - _impl->op->run(src, dst, {}); + ITensorPack pack; + pack.add_tensor(TensorType::ACL_SRC, _impl->src); + pack.add_tensor(TensorType::ACL_DST, _impl->dst); + _impl->op->run(pack); } } // namespace arm_compute /** [CLReshapeLayer snippet] **/ diff --git a/src/runtime/CL/functions/CLSlice.cpp b/src/runtime/CL/functions/CLSlice.cpp index 3689707bd0..f36550ba91 100644 --- a/src/runtime/CL/functions/CLSlice.cpp +++ b/src/runtime/CL/functions/CLSlice.cpp @@ -97,8 +97,9 @@ void CLSlice::configure(const CLCompileContext &compile_context, const ICLTensor void CLSlice::run() { - const InputTensorMap src{ { TensorType::ACL_SRC, _impl->src } }; - const OutputTensorMap dst{ { TensorType::ACL_DST, _impl->dst } }; - _impl->op->run(src, dst, {}); + ITensorPack pack; + pack.add_tensor(TensorType::ACL_SRC, _impl->src); + pack.add_tensor(TensorType::ACL_DST, _impl->dst); + _impl->op->run(pack); } } // namespace arm_compute diff --git a/src/runtime/CL/functions/CLStridedSlice.cpp b/src/runtime/CL/functions/CLStridedSlice.cpp index bdef0785ec..b78073dd67 100644 --- a/src/runtime/CL/functions/CLStridedSlice.cpp +++ b/src/runtime/CL/functions/CLStridedSlice.cpp @@ -96,9 +96,9 @@ Status CLStridedSlice::validate(const ITensorInfo *input, const ITensorInfo *out void CLStridedSlice::run() { - const InputTensorMap src{ { TensorType::ACL_SRC, _impl->src } }; - const OutputTensorMap dst{ { TensorType::ACL_DST, _impl->dst } }; - - _impl->op->run(src, dst, {}); + ITensorPack pack; + pack.add_tensor(TensorType::ACL_SRC, _impl->src); + pack.add_tensor(TensorType::ACL_DST, _impl->dst); + _impl->op->run(pack); } } // namespace arm_compute diff --git a/src/runtime/CL/tuners/BifrostTuner.cpp b/src/runtime/CL/tuners/BifrostTuner.cpp index 1797c2ceb1..52644bf192 100644 --- a/src/runtime/CL/tuners/BifrostTuner.cpp +++ b/src/runtime/CL/tuners/BifrostTuner.cpp @@ -316,9 +316,9 @@ void BifrostTuner::tune_kernel_dynamic(ICLKernel &kernel) ARM_COMPUTE_UNUSED(kernel); } -void BifrostTuner::tune_kernel_dynamic(ICLKernel &kernel, const InputTensorMap &inputs, const OutputTensorMap &outputs) +void BifrostTuner::tune_kernel_dynamic(ICLKernel &kernel, ITensorPack &tensors) { - ARM_COMPUTE_UNUSED(kernel, inputs, outputs); + ARM_COMPUTE_UNUSED(kernel, tensors); } } // namespace tuners } // namespace arm_compute \ No newline at end of file diff --git a/src/runtime/CL/tuners/MidgardTuner.cpp b/src/runtime/CL/tuners/MidgardTuner.cpp index 68c98cebe7..e49e15508b 100644 --- a/src/runtime/CL/tuners/MidgardTuner.cpp +++ b/src/runtime/CL/tuners/MidgardTuner.cpp @@ -74,9 +74,9 @@ void MidgardTuner::tune_kernel_dynamic(ICLKernel &kernel) ARM_COMPUTE_UNUSED(kernel); } -void MidgardTuner::tune_kernel_dynamic(ICLKernel &kernel, const InputTensorMap &inputs, const OutputTensorMap &outputs) +void MidgardTuner::tune_kernel_dynamic(ICLKernel &kernel, ITensorPack &tensors) { - ARM_COMPUTE_UNUSED(kernel, inputs, outputs); + ARM_COMPUTE_UNUSED(kernel, tensors); } } // namespace tuners } // namespace arm_compute diff --git a/src/runtime/CPP/CPPScheduler.cpp b/src/runtime/CPP/CPPScheduler.cpp index b07aa8ce18..55f62c1387 100644 --- a/src/runtime/CPP/CPPScheduler.cpp +++ b/src/runtime/CPP/CPPScheduler.cpp @@ -405,7 +405,7 @@ void CPPScheduler::run_workloads(std::vector &workloads) } #endif /* DOXYGEN_SKIP_THIS */ -void CPPScheduler::schedule_common(ICPPKernel *kernel, const Hints &hints, const InputTensorMap &inputs, const OutputTensorMap &outputs) +void CPPScheduler::schedule_common(ICPPKernel *kernel, const Hints &hints, ITensorPack &tensors) { ARM_COMPUTE_ERROR_ON_MSG(!kernel, "The child class didn't set the kernel"); @@ -464,13 +464,13 @@ void CPPScheduler::schedule_common(ICPPKernel *kernel, const Hints &hints, const { ThreadInfo info; info.cpu_info = &_cpu_info; - if(inputs.empty()) + if(tensors.empty()) { kernel->run(max_window, info); } else { - kernel->run_op(inputs, outputs, max_window, info); + kernel->run_op(tensors, max_window, info); } } else @@ -495,18 +495,18 @@ void CPPScheduler::schedule_common(ICPPKernel *kernel, const Hints &hints, const for(unsigned int t = 0; t < num_windows; t++) { //Capture 't' by copy, all the other variables by reference: - workloads[t] = [t, &hints, &max_window, &num_windows, &kernel, &inputs, &outputs](const ThreadInfo & info) + workloads[t] = [t, &hints, &max_window, &num_windows, &kernel, &tensors](const ThreadInfo & info) { Window win = max_window.split_window(hints.split_dimension(), t, num_windows); win.validate(); - if(inputs.empty()) + if(tensors.empty()) { kernel->run(win, info); } else { - kernel->run_op(inputs, outputs, win, info); + kernel->run_op(tensors, win, info); } }; } @@ -515,15 +515,14 @@ void CPPScheduler::schedule_common(ICPPKernel *kernel, const Hints &hints, const } } -void CPPScheduler::schedule_op(ICPPKernel *kernel, const Hints &hints, const InputTensorMap &inputs, const OutputTensorMap &outputs) +void CPPScheduler::schedule_op(ICPPKernel *kernel, const Hints &hints, ITensorPack &tensors) { - schedule_common(kernel, hints, inputs, outputs); + schedule_common(kernel, hints, tensors); } void CPPScheduler::schedule(ICPPKernel *kernel, const Hints &hints) { - const InputTensorMap inputs; - OutputTensorMap outputs; - schedule_common(kernel, hints, inputs, outputs); + ITensorPack tensors; + schedule_common(kernel, hints, tensors); } } // namespace arm_compute diff --git a/src/runtime/CPP/SingleThreadScheduler.cpp b/src/runtime/CPP/SingleThreadScheduler.cpp index a672656419..6f67bc005f 100644 --- a/src/runtime/CPP/SingleThreadScheduler.cpp +++ b/src/runtime/CPP/SingleThreadScheduler.cpp @@ -49,12 +49,12 @@ void SingleThreadScheduler::schedule(ICPPKernel *kernel, const Hints &hints) kernel->run(kernel->window(), info); } -void SingleThreadScheduler::schedule_op(ICPPKernel *kernel, const Hints &hints, const InputTensorMap &inputs, const OutputTensorMap &outputs) +void SingleThreadScheduler::schedule_op(ICPPKernel *kernel, const Hints &hints, ITensorPack &tensors) { ARM_COMPUTE_UNUSED(hints); ThreadInfo info; info.cpu_info = &_cpu_info; - kernel->run_op(inputs, outputs, kernel->window(), info); + kernel->run_op(tensors, kernel->window(), info); } void SingleThreadScheduler::run_workloads(std::vector &workloads) diff --git a/src/runtime/NEON/INEOperator.cpp b/src/runtime/NEON/INEOperator.cpp index 1d819977c8..75068b15c9 100644 --- a/src/runtime/NEON/INEOperator.cpp +++ b/src/runtime/NEON/INEOperator.cpp @@ -33,19 +33,17 @@ INEOperator::INEOperator(IRuntimeContext *ctx) { } -void INEOperator::run(InputTensorMap inputs, OutputTensorMap outputs, OperatorTensorMap workspace) +void INEOperator::run(ITensorPack &tensors) { - ARM_COMPUTE_UNUSED(workspace); - - if(inputs.empty() || outputs.empty()) + if(tensors.empty()) { ARM_COMPUTE_ERROR("No inputs provided"); } - NEScheduler::get().schedule_op(_kernel.get(), Window::DimY, inputs, outputs); + NEScheduler::get().schedule_op(_kernel.get(), Window::DimY, tensors); } -void INEOperator::prepare(OperatorTensorMap constants) +void INEOperator::prepare(ITensorPack &constants) { ARM_COMPUTE_UNUSED(constants); } diff --git a/src/runtime/NEON/functions/NEActivationLayer.cpp b/src/runtime/NEON/functions/NEActivationLayer.cpp index 0e75e58b3b..7f55edbf70 100644 --- a/src/runtime/NEON/functions/NEActivationLayer.cpp +++ b/src/runtime/NEON/functions/NEActivationLayer.cpp @@ -85,9 +85,9 @@ Status NEActivationLayer::validate(const ITensorInfo *input, const ITensorInfo * void NEActivationLayer::run() { - const InputTensorMap src{ { TensorType::ACL_SRC, _impl->src } }; - const OutputTensorMap dst{ { TensorType::ACL_DST, _impl->dst } }; - - _impl->op->run(src, dst, {}); + ITensorPack pack; + pack.add_tensor(TensorType::ACL_SRC, _impl->src); + pack.add_tensor(TensorType::ACL_DST, _impl->dst); + _impl->op->run(pack); } } // namespace arm_compute diff --git a/src/runtime/NEON/functions/NEArithmeticAddition.cpp b/src/runtime/NEON/functions/NEArithmeticAddition.cpp index b18309ef1d..4453a015e8 100644 --- a/src/runtime/NEON/functions/NEArithmeticAddition.cpp +++ b/src/runtime/NEON/functions/NEArithmeticAddition.cpp @@ -79,8 +79,10 @@ void NEArithmeticAddition::configure(const ITensor *input1, const ITensor *input void NEArithmeticAddition::run() { - const InputTensorMap src{ { TensorType::ACL_SRC_0, _impl->src_0 }, { TensorType::ACL_SRC_1, _impl->src_1 } }; - const OutputTensorMap dst{ { TensorType::ACL_DST, _impl->dst } }; - _impl->op->run(src, dst, {}); + ITensorPack pack; + pack.add_tensor(TensorType::ACL_SRC_0, _impl->src_0); + pack.add_tensor(TensorType::ACL_SRC_1, _impl->src_1); + pack.add_tensor(TensorType::ACL_DST, _impl->dst); + _impl->op->run(pack); } } // namespace arm_compute diff --git a/src/runtime/NEON/functions/NEArithmeticSubtraction.cpp b/src/runtime/NEON/functions/NEArithmeticSubtraction.cpp index c7f492bcbc..1c95bbfae8 100644 --- a/src/runtime/NEON/functions/NEArithmeticSubtraction.cpp +++ b/src/runtime/NEON/functions/NEArithmeticSubtraction.cpp @@ -80,8 +80,10 @@ void NEArithmeticSubtraction::configure(const ITensor *input1, const ITensor *in void NEArithmeticSubtraction::run() { - const InputTensorMap src{ { TensorType::ACL_SRC_0, _impl->src_0 }, { TensorType::ACL_SRC_1, _impl->src_1 } }; - const OutputTensorMap dst{ { TensorType::ACL_DST, _impl->dst } }; - _impl->op->run(src, dst, {}); + ITensorPack pack; + pack.add_tensor(TensorType::ACL_SRC_0, _impl->src_0); + pack.add_tensor(TensorType::ACL_SRC_1, _impl->src_1); + pack.add_tensor(TensorType::ACL_DST, _impl->dst); + _impl->op->run(pack); } } // namespace arm_compute diff --git a/src/runtime/NEON/functions/NEConcatenateLayer.cpp b/src/runtime/NEON/functions/NEConcatenateLayer.cpp index 9f8a2a1b8e..8df4f4cb62 100644 --- a/src/runtime/NEON/functions/NEConcatenateLayer.cpp +++ b/src/runtime/NEON/functions/NEConcatenateLayer.cpp @@ -146,16 +146,14 @@ Status NEConcatenation::validate(const std::vector &inputs_ return Status{}; } -void NEConcatenation::run(InputTensorMap inputs, OutputTensorMap outputs, OperatorTensorMap workspace) +void NEConcatenation::run(ITensorPack &tensors) { - ARM_COMPUTE_UNUSED(workspace); - - if(inputs.empty() || outputs.empty()) + if(tensors.empty()) { ARM_COMPUTE_ERROR("No inputs provided"); } - if(inputs.size() != _num_inputs) + if(static_cast(tensors.size() - 1) != static_cast(_num_inputs)) { ARM_COMPUTE_ERROR("Configured with different number of inputs"); } @@ -163,8 +161,10 @@ void NEConcatenation::run(InputTensorMap inputs, OutputTensorMap outputs, Operat int i = 0; for(auto &k : _concat_kernels) { - const InputTensorMap input = { { TensorType::ACL_SRC, inputs.at(ACL_SRC_VEC + i) } }; - NEScheduler::get().schedule_op(k.get(), Window::DimY, input, outputs); + ITensorPack pack; + pack.add_tensor(TensorType::ACL_SRC, tensors.get_const_tensor(ACL_SRC_VEC + i)); + pack.add_tensor(TensorType::ACL_DST, tensors.get_tensor(ACL_DST)); + NEScheduler::get().schedule_op(k.get(), Window::DimY, pack); ++i; } } @@ -216,13 +216,13 @@ Status NEConcatenateLayer::validate(const std::vector &inpu void NEConcatenateLayer::run() { - InputTensorMap srcs; + ITensorPack pack; for(unsigned i = 0; i < _impl->num_inputs; ++i) { - srcs.insert(std::make_pair(TensorType::ACL_SRC_VEC + i, _impl->srcs.at(i))); + pack.add_tensor(TensorType::ACL_SRC_VEC + i, _impl->srcs.at(i)); } - const OutputTensorMap dst{ { TensorType::ACL_DST, _impl->dst } }; + pack.add_tensor(TensorType::ACL_DST, _impl->dst); - _impl->op->run(srcs, dst, {}); + _impl->op->run(pack); } } // namespace arm_compute diff --git a/src/runtime/NEON/functions/NEElementwiseOperators.cpp b/src/runtime/NEON/functions/NEElementwiseOperators.cpp index 9340cc09d4..d1f60c71e1 100644 --- a/src/runtime/NEON/functions/NEElementwiseOperators.cpp +++ b/src/runtime/NEON/functions/NEElementwiseOperators.cpp @@ -163,9 +163,11 @@ Status NEElementwiseMax::validate(const ITensorInfo *input1, const ITensorInfo * void NEElementwiseMax::run() { - const InputTensorMap src{ { TensorType::ACL_SRC_0, _impl->src_0 }, { TensorType::ACL_SRC_1, _impl->src_1 } }; - const OutputTensorMap dst{ { TensorType::ACL_DST, _impl->dst } }; - _impl->op->run(src, dst, {}); + ITensorPack pack; + pack.add_tensor(TensorType::ACL_SRC_0, _impl->src_0); + pack.add_tensor(TensorType::ACL_SRC_1, _impl->src_1); + pack.add_tensor(TensorType::ACL_DST, _impl->dst); + _impl->op->run(pack); } struct NEElementwiseMin::Impl @@ -202,9 +204,11 @@ Status NEElementwiseMin::validate(const ITensorInfo *input1, const ITensorInfo * void NEElementwiseMin::run() { - const InputTensorMap src{ { TensorType::ACL_SRC_0, _impl->src_0 }, { TensorType::ACL_SRC_1, _impl->src_1 } }; - const OutputTensorMap dst{ { TensorType::ACL_DST, _impl->dst } }; - _impl->op->run(src, dst, {}); + ITensorPack pack; + pack.add_tensor(TensorType::ACL_SRC_0, _impl->src_0); + pack.add_tensor(TensorType::ACL_SRC_1, _impl->src_1); + pack.add_tensor(TensorType::ACL_DST, _impl->dst); + _impl->op->run(pack); } struct NEElementwiseSquaredDiff::Impl @@ -241,9 +245,11 @@ Status NEElementwiseSquaredDiff::validate(const ITensorInfo *input1, const ITens void NEElementwiseSquaredDiff::run() { - const InputTensorMap src{ { TensorType::ACL_SRC_0, _impl->src_0 }, { TensorType::ACL_SRC_1, _impl->src_1 } }; - const OutputTensorMap dst{ { TensorType::ACL_DST, _impl->dst } }; - _impl->op->run(src, dst, {}); + ITensorPack pack; + pack.add_tensor(TensorType::ACL_SRC_0, _impl->src_0); + pack.add_tensor(TensorType::ACL_SRC_1, _impl->src_1); + pack.add_tensor(TensorType::ACL_DST, _impl->dst); + _impl->op->run(pack); } struct NEElementwiseDivision::Impl @@ -280,9 +286,11 @@ Status NEElementwiseDivision::validate(const ITensorInfo *input1, const ITensorI void NEElementwiseDivision::run() { - const InputTensorMap src{ { TensorType::ACL_SRC_0, _impl->src_0 }, { TensorType::ACL_SRC_1, _impl->src_1 } }; - const OutputTensorMap dst{ { TensorType::ACL_DST, _impl->dst } }; - _impl->op->run(src, dst, {}); + ITensorPack pack; + pack.add_tensor(TensorType::ACL_SRC_0, _impl->src_0); + pack.add_tensor(TensorType::ACL_SRC_1, _impl->src_1); + pack.add_tensor(TensorType::ACL_DST, _impl->dst); + _impl->op->run(pack); } struct NEElementwisePower::Impl @@ -319,9 +327,11 @@ Status NEElementwisePower::validate(const ITensorInfo *input1, const ITensorInfo void NEElementwisePower::run() { - const InputTensorMap src{ { TensorType::ACL_SRC_0, _impl->src_0 }, { TensorType::ACL_SRC_1, _impl->src_1 } }; - const OutputTensorMap dst{ { TensorType::ACL_DST, _impl->dst } }; - _impl->op->run(src, dst, {}); + ITensorPack pack; + pack.add_tensor(TensorType::ACL_SRC_0, _impl->src_0); + pack.add_tensor(TensorType::ACL_SRC_1, _impl->src_1); + pack.add_tensor(TensorType::ACL_DST, _impl->dst); + _impl->op->run(pack); } template @@ -364,9 +374,11 @@ Status NEElementwiseComparisonStatic::validate(const ITensorInfo *input1, c template void NEElementwiseComparisonStatic::run() { - const InputTensorMap src{ { TensorType::ACL_SRC_0, _impl->src_0 }, { TensorType::ACL_SRC_1, _impl->src_1 } }; - const OutputTensorMap dst{ { TensorType::ACL_DST, _impl->dst } }; - _impl->op->run(src, dst, {}); + ITensorPack pack; + pack.add_tensor(TensorType::ACL_SRC_0, _impl->src_0); + pack.add_tensor(TensorType::ACL_SRC_1, _impl->src_1); + pack.add_tensor(TensorType::ACL_DST, _impl->dst); + _impl->op->run(pack); } struct NEElementwiseComparison::Impl @@ -401,9 +413,11 @@ Status NEElementwiseComparison::validate(const ITensorInfo *input1, const ITenso void NEElementwiseComparison::run() { - const InputTensorMap src{ { TensorType::ACL_SRC_0, _impl->src_0 }, { TensorType::ACL_SRC_1, _impl->src_1 } }; - const OutputTensorMap dst{ { TensorType::ACL_DST, _impl->dst } }; - _impl->op->run(src, dst, {}); + ITensorPack pack; + pack.add_tensor(TensorType::ACL_SRC_0, _impl->src_0); + pack.add_tensor(TensorType::ACL_SRC_1, _impl->src_1); + pack.add_tensor(TensorType::ACL_DST, _impl->dst); + _impl->op->run(pack); } // Supported Specializations diff --git a/src/runtime/NEON/functions/NEPReluLayer.cpp b/src/runtime/NEON/functions/NEPReluLayer.cpp index 15d9fd9959..f9393a4d92 100644 --- a/src/runtime/NEON/functions/NEPReluLayer.cpp +++ b/src/runtime/NEON/functions/NEPReluLayer.cpp @@ -71,9 +71,11 @@ void NEPReluLayer::configure(const ITensor *input, const ITensor *alpha, ITensor void NEPReluLayer::run() { - const InputTensorMap src{ { TensorType::ACL_SRC_0, _impl->src_0 }, { TensorType::ACL_SRC_1, _impl->src_1 } }; - const OutputTensorMap dst{ { TensorType::ACL_DST, _impl->dst } }; - _impl->op->run(src, dst, {}); + ITensorPack pack; + pack.add_tensor(TensorType::ACL_SRC_0, _impl->src_0); + pack.add_tensor(TensorType::ACL_SRC_1, _impl->src_1); + pack.add_tensor(TensorType::ACL_DST, _impl->dst); + _impl->op->run(pack); } Status NEPReluLayer::validate(const ITensorInfo *input, const ITensorInfo *alpha, const ITensorInfo *output) diff --git a/src/runtime/NEON/functions/NEPixelWiseMultiplication.cpp b/src/runtime/NEON/functions/NEPixelWiseMultiplication.cpp index ba5dd7cdee..4208878b75 100644 --- a/src/runtime/NEON/functions/NEPixelWiseMultiplication.cpp +++ b/src/runtime/NEON/functions/NEPixelWiseMultiplication.cpp @@ -97,9 +97,11 @@ void NEPixelWiseMultiplication::configure(const ITensor *input1, const ITensor * void NEPixelWiseMultiplication::run() { - const InputTensorMap src{ { TensorType::ACL_SRC_0, _impl->src_0 }, { TensorType::ACL_SRC_1, _impl->src_1 } }; - const OutputTensorMap dst{ { TensorType::ACL_DST, _impl->dst } }; - _impl->op->run(src, dst, {}); + ITensorPack pack; + pack.add_tensor(TensorType::ACL_SRC_0, _impl->src_0); + pack.add_tensor(TensorType::ACL_SRC_1, _impl->src_1); + pack.add_tensor(TensorType::ACL_DST, _impl->dst); + _impl->op->run(pack); } struct NEComplexPixelWiseMultiplication::Impl @@ -134,8 +136,10 @@ void NEComplexPixelWiseMultiplication::configure(ITensor *input1, ITensor *input void NEComplexPixelWiseMultiplication::run() { - const InputTensorMap src{ { TensorType::ACL_SRC_0, _impl->src_0 }, { TensorType::ACL_SRC_1, _impl->src_1 } }; - const OutputTensorMap dst{ { TensorType::ACL_DST, _impl->dst } }; - _impl->op->run(src, dst, {}); + ITensorPack pack; + pack.add_tensor(TensorType::ACL_SRC_0, _impl->src_0); + pack.add_tensor(TensorType::ACL_SRC_1, _impl->src_1); + pack.add_tensor(TensorType::ACL_DST, _impl->dst); + _impl->op->run(pack); } } // namespace arm_compute diff --git a/src/runtime/NEON/functions/NEReshapeLayer.cpp b/src/runtime/NEON/functions/NEReshapeLayer.cpp index 47d5519274..c1c88c1c7a 100644 --- a/src/runtime/NEON/functions/NEReshapeLayer.cpp +++ b/src/runtime/NEON/functions/NEReshapeLayer.cpp @@ -84,9 +84,9 @@ Status NEReshapeLayer::validate(const ITensorInfo *input, const ITensorInfo *out void NEReshapeLayer::run() { - const InputTensorMap src{ { TensorType::ACL_SRC, _impl->src } }; - const OutputTensorMap dst{ { TensorType::ACL_DST, _impl->dst } }; - - _impl->op->run(src, dst, {}); + ITensorPack pack; + pack.add_tensor(TensorType::ACL_SRC, _impl->src); + pack.add_tensor(TensorType::ACL_DST, _impl->dst); + _impl->op->run(pack); } } // namespace arm_compute diff --git a/src/runtime/NEON/functions/NESlice.cpp b/src/runtime/NEON/functions/NESlice.cpp index 7c3252178b..2bacf2ee2a 100644 --- a/src/runtime/NEON/functions/NESlice.cpp +++ b/src/runtime/NEON/functions/NESlice.cpp @@ -94,9 +94,10 @@ void NESlice::configure(const ITensor *input, ITensor *output, const Coordinates void NESlice::run() { - const InputTensorMap src{ { TensorType::ACL_SRC, _impl->src } }; - const OutputTensorMap dst{ { TensorType::ACL_DST, _impl->dst } }; - _impl->op->run(src, dst, {}); + ITensorPack pack; + pack.add_tensor(TensorType::ACL_SRC, _impl->src); + pack.add_tensor(TensorType::ACL_DST, _impl->dst); + _impl->op->run(pack); } } // namespace arm_compute diff --git a/src/runtime/NEON/functions/NEStridedSlice.cpp b/src/runtime/NEON/functions/NEStridedSlice.cpp index 37e3590446..8bf81e8270 100644 --- a/src/runtime/NEON/functions/NEStridedSlice.cpp +++ b/src/runtime/NEON/functions/NEStridedSlice.cpp @@ -76,9 +76,10 @@ void NEStridedSlice::configure(const ITensor *input, ITensor *output, void NEStridedSlice::run() { - const InputTensorMap src{ { TensorType::ACL_SRC, _impl->src } }; - const OutputTensorMap dst{ { TensorType::ACL_DST, _impl->dst } }; - _impl->op->run(src, dst, {}); + ITensorPack pack; + pack.add_tensor(TensorType::ACL_SRC, _impl->src); + pack.add_tensor(TensorType::ACL_DST, _impl->dst); + _impl->op->run(pack); } Status NEStridedSlice::validate(const ITensorInfo *input, const ITensorInfo *output, diff --git a/src/runtime/OMP/OMPScheduler.cpp b/src/runtime/OMP/OMPScheduler.cpp index 101601868a..11448e595c 100644 --- a/src/runtime/OMP/OMPScheduler.cpp +++ b/src/runtime/OMP/OMPScheduler.cpp @@ -83,7 +83,7 @@ void OMPScheduler::schedule(ICPPKernel *kernel, const Hints &hints) } } -void OMPScheduler::schedule_op(ICPPKernel *kernel, const Hints &hints, const InputTensorMap &inputs, const OutputTensorMap &outputs) +void OMPScheduler::schedule_op(ICPPKernel *kernel, const Hints &hints, ITensorPack &tensors) { ARM_COMPUTE_ERROR_ON_MSG(!kernel, "The child class didn't set the kernel"); ARM_COMPUTE_ERROR_ON_MSG(hints.strategy() == StrategyHint::DYNAMIC, @@ -97,7 +97,7 @@ void OMPScheduler::schedule_op(ICPPKernel *kernel, const Hints &hints, const Inp { ThreadInfo info; info.cpu_info = &_cpu_info; - kernel->run_op(inputs, outputs, max_window, info); + kernel->run_op(tensors, max_window, info); } else { @@ -106,11 +106,11 @@ void OMPScheduler::schedule_op(ICPPKernel *kernel, const Hints &hints, const Inp for(unsigned int t = 0; t < num_windows; t++) { //Capture 't' by copy, all the other variables by reference: - workloads[t] = [t, &hints, &max_window, &num_windows, &kernel, &inputs, &outputs](const ThreadInfo & info) + workloads[t] = [t, &hints, &max_window, &num_windows, &kernel, &tensors](const ThreadInfo & info) { Window win = max_window.split_window(hints.split_dimension(), t, num_windows); win.validate(); - kernel->run_op(inputs, outputs, win, info); + kernel->run_op(tensors, win, info); }; } run_workloads(workloads); diff --git a/tests/framework/instruments/SchedulerTimer.cpp b/tests/framework/instruments/SchedulerTimer.cpp index 75b128591a..aa69bc297d 100644 --- a/tests/framework/instruments/SchedulerTimer.cpp +++ b/tests/framework/instruments/SchedulerTimer.cpp @@ -91,10 +91,10 @@ public: _kernels.push_back(std::move(info)); } - void schedule_op(ICPPKernel *kernel, const Hints &hints, const InputTensorMap &inputs, const OutputTensorMap &outputs) override + void schedule_op(ICPPKernel *kernel, const Hints &hints, ITensorPack &tensors) override { _timer.start(); - _real_scheduler.schedule_op(kernel, hints, inputs, outputs); + _real_scheduler.schedule_op(kernel, hints, tensors); _timer.stop(); typename SchedulerClock::kernel_info info; -- cgit v1.2.1