From 082630ba4e2de697b6d372dd35ffc1be6a42c346 Mon Sep 17 00:00:00 2001 From: Anitha Raj Date: Tue, 22 Aug 2023 15:46:27 +0100 Subject: Update CpuGemmConv2d and CpuFlatten to use CpuReshape operator - Following CpuReshapeKernel Optimizations, update the CpuGemmConv2D and CpuFlatten to use CpuReshape operator instead of CpuReshapeKernel - Minor changes to comment in NEReorgLayerKernel.h Resolves COMPMID-6504 Signed-off-by: Anitha Raj Change-Id: Ib6ee1fdc313d91249f9fe41c81e73324031c1ff4 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/10186 Reviewed-by: Jakub Sujak Reviewed-by: Gunes Bayir Comments-Addressed: Arm Jenkins Tested-by: Arm Jenkins Benchmark: Arm Jenkins --- src/core/NEON/kernels/NEReorgLayerKernel.h | 10 +++++----- src/cpu/operators/CpuFlatten.cpp | 23 +++++++++++++++++------ src/cpu/operators/CpuFlatten.h | 19 +++++++++++++++---- src/cpu/operators/CpuGemmConv2d.cpp | 12 ++++++------ src/cpu/operators/CpuGemmConv2d.h | 14 +++++++------- 5 files changed, 50 insertions(+), 28 deletions(-) (limited to 'src') diff --git a/src/core/NEON/kernels/NEReorgLayerKernel.h b/src/core/NEON/kernels/NEReorgLayerKernel.h index 38a7d9f196..6e67eb364e 100644 --- a/src/core/NEON/kernels/NEReorgLayerKernel.h +++ b/src/core/NEON/kernels/NEReorgLayerKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2021 Arm Limited. + * Copyright (c) 2018-2021, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,8 +21,8 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef ARM_COMPUTE_NEREORGLAYERKERNEL_H -#define ARM_COMPUTE_NEREORGLAYERKERNEL_H +#ifndef ACL_SRC_CORE_NEON_KERNELS_NEREORGLAYERKERNEL_H +#define ACL_SRC_CORE_NEON_KERNELS_NEREORGLAYERKERNEL_H #include "src/core/NEON/INEKernel.h" @@ -60,7 +60,7 @@ public: */ void configure(const ITensor *input, ITensor *output, int32_t stride); - /** Static function to check if given info will lead to a valid configuration of @ref cpu::kernels::CpuReshapeKernel + /** Static function to check if given info will lead to a valid configuration * * @param[in] input Source tensor info. Data type supported: All * @param[in] output Destination tensor info. Data type supported: Same as @p input @@ -80,4 +80,4 @@ private: int32_t _stride; }; } // namespace arm_compute -#endif /*ARM_COMPUTE_NEREORGLAYERKERNEL_H */ +#endif // ACL_SRC_CORE_NEON_KERNELS_NEREORGLAYERKERNEL_H diff --git a/src/cpu/operators/CpuFlatten.cpp b/src/cpu/operators/CpuFlatten.cpp index f6ae139794..7bab9e481c 100644 --- a/src/cpu/operators/CpuFlatten.cpp +++ b/src/cpu/operators/CpuFlatten.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021 Arm Limited. + * Copyright (c) 2021, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -23,7 +23,7 @@ */ #include "src/cpu/operators/CpuFlatten.h" -#include "src/cpu/kernels/CpuReshapeKernel.h" +#include "src/cpu/operators/CpuReshape.h" #include "src/common/utils/Log.h" @@ -31,17 +31,28 @@ namespace arm_compute { namespace cpu { +CpuFlatten::CpuFlatten() + : _reshape(nullptr) +{ +} + +CpuFlatten::~CpuFlatten() = default; + void CpuFlatten::configure(const ITensorInfo *src, ITensorInfo *dst) { ARM_COMPUTE_LOG_PARAMS(src, dst); - auto k = std::make_unique(); - k->configure(src, dst); - _kernel = std::move(k); + _reshape = std::make_unique(); + _reshape->configure(src, dst); } Status CpuFlatten::validate(const ITensorInfo *src, const ITensorInfo *dst) { - return kernels::CpuReshapeKernel::validate(src, dst); + return CpuReshape::validate(src, dst); +} + +void CpuFlatten::run(ITensorPack &tensors) +{ + _reshape->run(tensors); } } // namespace cpu } // namespace arm_compute diff --git a/src/cpu/operators/CpuFlatten.h b/src/cpu/operators/CpuFlatten.h index 0e9fcbdc35..911760dd95 100644 --- a/src/cpu/operators/CpuFlatten.h +++ b/src/cpu/operators/CpuFlatten.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021 Arm Limited. + * Copyright (c) 2021, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,8 +21,8 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef ARM_COMPUTE_CPU_FLATTEN_H -#define ARM_COMPUTE_CPU_FLATTEN_H +#ifndef ACL_SRC_CPU_OPERATORS_CPUFLATTEN_H +#define ACL_SRC_CPU_OPERATORS_CPUFLATTEN_H #include "src/cpu/ICpuOperator.h" @@ -30,10 +30,15 @@ namespace arm_compute { namespace cpu { +class CpuReshape; /** Basic function to flatten a given input */ class CpuFlatten : public ICpuOperator { public: + /** Constructor */ + CpuFlatten(); + /** Destructor */ + ~CpuFlatten(); /** Configure operator for a given list of arguments * * Valid data layouts: @@ -58,7 +63,13 @@ public: * @return a status */ static Status validate(const ITensorInfo *src, const ITensorInfo *dst); + + // Inherited methods overridden: + void run(ITensorPack &tensors) override; + +private: + std::unique_ptr _reshape; }; } // namespace cpu } // namespace arm_compute -#endif /* ARM_COMPUTE_CPU_FLATTEN_H */ +#endif // ACL_SRC_CPU_OPERATORS_CPUFLATTEN_H diff --git a/src/cpu/operators/CpuGemmConv2d.cpp b/src/cpu/operators/CpuGemmConv2d.cpp index 7c0e58b94e..d11e4f0b24 100644 --- a/src/cpu/operators/CpuGemmConv2d.cpp +++ b/src/cpu/operators/CpuGemmConv2d.cpp @@ -35,11 +35,11 @@ #include "src/core/helpers/MemoryHelpers.h" #include "src/cpu/kernels/CpuCol2ImKernel.h" #include "src/cpu/kernels/CpuIm2ColKernel.h" -#include "src/cpu/kernels/CpuReshapeKernel.h" #include "src/cpu/kernels/CpuWeightsReshapeKernel.h" #include "src/cpu/operators/CpuGemm.h" #include "src/cpu/operators/CpuGemmLowpMatrixMultiplyCore.h" #include "src/cpu/operators/CpuGemmLowpOutputStage.h" +#include "src/cpu/operators/CpuReshape.h" #include "src/cpu/utils/CpuAuxTensorHandler.h" #include @@ -92,7 +92,7 @@ CpuGemmConv2d::SkipInfo CpuGemmConv2d::skip_im_col_info(const ITensorInfo *src, } CpuGemmConv2d::CpuGemmConv2d() - : _weights_reshape_kernel(nullptr), _im2col_kernel(), _mm_gemm(), _mm_gemmlowp(), _col2im_kernel(), _reshape_kernel(), _im2col_output(), _weights_reshaped(), _gemm_output(), _gemm_output_3d(), + : _weights_reshape_kernel(nullptr), _im2col_kernel(), _mm_gemm(), _mm_gemmlowp(), _col2im_kernel(), _reshape(), _im2col_output(), _weights_reshaped(), _gemm_output(), _gemm_output_3d(), _data_layout(DataLayout::NCHW), _skip_im2col(false), _skip_col2im(false), _is_quantized(false), _is_prepared(false), _aux_mem(AuxTensorIdx::Count) { } @@ -379,8 +379,8 @@ void CpuGemmConv2d::configure(const ITensorInfo *src, const ITensorInfo *weights else { // Configure reshape layer - _reshape_kernel = std::make_unique(); - _reshape_kernel->configure(gemm_output_to_use, dst); + _reshape = std::make_unique(); + _reshape->configure(gemm_output_to_use, dst); } // Check if GEMM transforms weights @@ -642,7 +642,7 @@ void CpuGemmConv2d::run(ITensorPack &tensors) { TensorType::ACL_SRC, gemm_output_to_use }, { TensorType::ACL_DST, dst } }; - NEScheduler::get().schedule_op(_reshape_kernel.get(), Window::DimY, _reshape_kernel->window(), pack); + _reshape->run(pack); } } else if(out_has_padding) @@ -652,7 +652,7 @@ void CpuGemmConv2d::run(ITensorPack &tensors) { TensorType::ACL_SRC, gemm_output_to_use }, { TensorType::ACL_DST, dst } }; - NEScheduler::get().schedule_op(_reshape_kernel.get(), Window::DimY, _reshape_kernel->window(), pack); + _reshape->run(pack); } } diff --git a/src/cpu/operators/CpuGemmConv2d.h b/src/cpu/operators/CpuGemmConv2d.h index 81d34ae93d..61fe63a79f 100644 --- a/src/cpu/operators/CpuGemmConv2d.h +++ b/src/cpu/operators/CpuGemmConv2d.h @@ -21,8 +21,8 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef ARM_COMPUTE_CPU_GEMM_CONV2D_H -#define ARM_COMPUTE_CPU_GEMM_CONV2D_H +#ifndef ACL_SRC_CPU_OPERATORS_CPUGEMMCONV2D_H +#define ACL_SRC_CPU_OPERATORS_CPUGEMMCONV2D_H #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Types.h" @@ -38,12 +38,12 @@ namespace cpu class CpuGemm; class CpuGemmLowpMatrixMultiplyCore; class CpuGemmLowpOutputStage; +class CpuReshape; namespace kernels { class CpuWeightsReshapeKernel; class CpuIm2ColKernel; class CpuCol2ImKernel; -class CpuReshapeKernel; } // namespace kernels /** Basic function to compute the convolution layer. This function calls the following kernels/functions: @@ -130,8 +130,8 @@ public: const bool enable_fast_math = false); // Inherited methods overridden: - void run(ITensorPack &tensors) override; - void prepare(ITensorPack &tensors) override; + void run(ITensorPack &tensors) override; + void prepare(ITensorPack &tensors) override; experimental::MemoryRequirements workspace() const override; private: @@ -222,7 +222,7 @@ private: std::unique_ptr _mm_gemm; std::unique_ptr _mm_gemmlowp; std::unique_ptr _col2im_kernel; - std::unique_ptr _reshape_kernel; + std::unique_ptr _reshape; TensorInfo _im2col_output; TensorInfo _weights_reshaped; @@ -240,4 +240,4 @@ private: }; } // namespace cpu } // namespace arm_compute -#endif /* ARM_COMPUTE_CPU_GEMM_CONV2D_H */ +#endif // ACL_SRC_CPU_OPERATORS_CPUGEMMCONV2D_H -- cgit v1.2.1