diff options
author | Gunes Bayir <gunes.bayir@arm.com> | 2023-10-07 23:52:48 +0100 |
---|---|---|
committer | Gunes Bayir <gunes.bayir@arm.com> | 2023-10-10 09:48:53 +0000 |
commit | 0b72aa4b2abdba7ab48aaa8a45c624ba1e27a411 (patch) | |
tree | ea14c31a15c623cfa07db1dba722cd4ae61621b0 /src/core/NEON/kernels/NEStackLayerKernel.h | |
parent | c6137d2be4fb781b63831138970146a4eb8550a1 (diff) | |
download | ComputeLibrary-0b72aa4b2abdba7ab48aaa8a45c624ba1e27a411.tar.gz |
Optimize NEStackLayer
Optimize the stack operation in Cpu by leveraging block memcpy.
Resolves: COMPMID-6498
Change-Id: I49d79d179f0375a73d654edd59fb33072112569b
Signed-off-by: Gunes Bayir <gunes.bayir@arm.com>
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/10451
Reviewed-by: SiCong Li <sicong.li@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Benchmark: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/core/NEON/kernels/NEStackLayerKernel.h')
-rw-r--r-- | src/core/NEON/kernels/NEStackLayerKernel.h | 62 |
1 files changed, 35 insertions, 27 deletions
diff --git a/src/core/NEON/kernels/NEStackLayerKernel.h b/src/core/NEON/kernels/NEStackLayerKernel.h index 685812b56d..02ee776ea4 100644 --- a/src/core/NEON/kernels/NEStackLayerKernel.h +++ b/src/core/NEON/kernels/NEStackLayerKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2021 Arm Limited. + * Copyright (c) 2018-2021, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -22,13 +22,16 @@ * SOFTWARE. */ -#ifndef ARM_COMPUTE_NESTACKLAYERKERNEL_H -#define ARM_COMPUTE_NESTACKLAYERKERNEL_H +#ifndef ACL_SRC_CORE_NEON_KERNELS_NESTACKLAYERKERNEL_H +#define ACL_SRC_CORE_NEON_KERNELS_NESTACKLAYERKERNEL_H #include "arm_compute/core/Types.h" #include "src/core/NEON/INEKernel.h" +#include <cstdint> +#include <functional> + namespace arm_compute { class ITensor; @@ -57,43 +60,48 @@ public: * * @note Supported input tensor rank: up to 4 * - * @param[in] input Input tensor. Data types supported: All - * @param[in] axis The dimension to stack the tensors along. It must be smaller than the number of input dimensions. - * @param[in] idx_input Index of the input tensor in the list of tensors to stack. - * All tensors in the list must have the same shape - * @param[in] num_tensors Number of tensors to stack - * @param[out] output Output tensor. Data types supported: Same as @p input. + * @param[in] input Input tensors. Data types supported: All + * @param[in] axis The dimension to stack the tensors along. It must be smaller than the number of input dimensions. + * @param[out] output Output tensor. Data types supported: Same as @p input. * */ - void configure( - const ITensor *input, unsigned int axis, unsigned int idx_input, unsigned int num_tensors, ITensor *output); + void configure(const std::vector<ITensor *> &input, uint32_t axis, ITensor *output); /** Static function to check if given info will lead to a valid configuration of @ref NEStackLayerKernel * * @note Supported input tensor rank: up to 4 * - * @param[in] input Input tensor info. Data types supported: All - * @param[in] axis The dimension to stack the tensors along. It must be smaller than the number of input dimensions. - * @param[in] idx_input Index of the input tensor in the list of tensors to stack - * All tensors in the list must have the same shape - * @param[in] num_tensors Number of tensors to stack - * @param[in] output Output tensor info. Data types supported: Same as @p input. + * @param[in] input Input tensor infos. Data types supported: All + * @param[in] axis The dimension to stack the tensors along. It must be smaller than the number of input dimensions. + * @param[in] output Output tensor info. Data types supported: Same as @p input. * * @return a status */ - static Status validate(const ITensorInfo *input, - unsigned int axis, - unsigned int idx_input, - unsigned int num_tensors, - const ITensorInfo *output); + static Status validate(const std::vector<ITensorInfo *> &input, uint32_t axis, const ITensorInfo *output); + + /** Prepare the reshape kernel for execution (Only executed once) for + * choosing the window and the algorithm. + */ + void prepare(); // Inherited methods overridden void run(const Window &window, const ThreadInfo &info) override; + /** Get the dimension to split the kernel workload + * + * @return the split dimension + */ + uint32_t get_split_dimension() const + { + return _split_dimension; + } + private: - const ITensor *_input; - ITensor *_output; - unsigned int _axis; - unsigned int _idx_input; + std::vector<ITensor *> _input; + ITensor *_output; + uint32_t _axis; + uint32_t _split_dimension; + + std::function<void(const std::vector<ITensor *> &, ITensor *, uint32_t, const Window &)> _stack_fn{}; }; } // namespace arm_compute -#endif /* ARM_COMPUTE_NESTACKLAYERKERNEL_H */ +#endif // ACL_SRC_CORE_NEON_KERNELS_NESTACKLAYERKERNEL_H |