From 842ad211c11417ba456a2dca7e89988db98eb256 Mon Sep 17 00:00:00 2001 From: Jakub Sujak Date: Sat, 17 Sep 2022 13:08:56 +0100 Subject: =?UTF-8?q?Optimize=20Neon=E2=84=A2=20SUB=20operator=20by=20squash?= =?UTF-8?q?ing=20execution=20window?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Resolves: COMPMID-5462 Change-Id: I2c7151c8faf4016cc33592fff04d492d7cbc8fd6 Signed-off-by: Jakub Sujak Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/8366 Tested-by: Arm Jenkins Comments-Addressed: Arm Jenkins Reviewed-by: Gunes Bayir Benchmark: Arm Jenkins --- src/cpu/kernels/CpuSubKernel.cpp | 3 ++- src/cpu/kernels/CpuSubKernel.h | 6 ++++++ src/cpu/operators/CpuSub.cpp | 11 ++++++++++- src/cpu/operators/CpuSub.h | 5 ++++- 4 files changed, 22 insertions(+), 3 deletions(-) (limited to 'src/cpu') diff --git a/src/cpu/kernels/CpuSubKernel.cpp b/src/cpu/kernels/CpuSubKernel.cpp index c55d11e899..d908e4ed28 100644 --- a/src/cpu/kernels/CpuSubKernel.cpp +++ b/src/cpu/kernels/CpuSubKernel.cpp @@ -131,7 +131,8 @@ void CpuSubKernel::configure(const ITensorInfo *src0, const ITensorInfo *src1, I _name = std::string("CpuSubKernel").append("/").append(uk->name); // CpuSubKernel doesn't need padding so update_window_and_padding() can be skipped - Window win = calculate_max_window(out_shape, Steps()); + Window win; + std::tie(win, _split_dimension) = calculate_squashed_or_max_window(*src0, *src1); ICpuKernel::configure(win); } diff --git a/src/cpu/kernels/CpuSubKernel.h b/src/cpu/kernels/CpuSubKernel.h index 323a3f1316..e835bac3d5 100644 --- a/src/cpu/kernels/CpuSubKernel.h +++ b/src/cpu/kernels/CpuSubKernel.h @@ -82,10 +82,16 @@ public: static const std::vector &get_available_kernels(); + size_t get_split_dimension() const + { + return _split_dimension; + } + private: ConvertPolicy _policy{}; SubKernelPtr _run_method{ nullptr }; std::string _name{}; + size_t _split_dimension{ Window::DimY }; }; } // namespace kernels } // namespace cpu diff --git a/src/cpu/operators/CpuSub.cpp b/src/cpu/operators/CpuSub.cpp index f0a7770cea..91a5b6e63c 100644 --- a/src/cpu/operators/CpuSub.cpp +++ b/src/cpu/operators/CpuSub.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021 Arm Limited. + * Copyright (c) 2021-2022 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -27,6 +27,8 @@ #include "src/common/utils/Log.h" +#include "arm_compute/runtime/NEON/NEScheduler.h" + namespace arm_compute { namespace cpu @@ -45,5 +47,12 @@ Status CpuSub::validate(const ITensorInfo *src0, const ITensorInfo *src1, const ARM_COMPUTE_RETURN_ERROR_ON(act_info.enabled()); return kernels::CpuSubKernel::validate(src0, src1, dst, policy); } + +void CpuSub::run(ITensorPack &tensors) +{ + const auto split_dimension = static_cast(_kernel.get())->get_split_dimension(); + + NEScheduler::get().schedule_op(_kernel.get(), split_dimension, _kernel->window(), tensors); +} } // namespace cpu } // namespace arm_compute diff --git a/src/cpu/operators/CpuSub.h b/src/cpu/operators/CpuSub.h index 025c928d8f..d463d1e063 100644 --- a/src/cpu/operators/CpuSub.h +++ b/src/cpu/operators/CpuSub.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021 Arm Limited. + * Copyright (c) 2021-2022 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -60,6 +60,9 @@ public: * @return a status */ static Status validate(const ITensorInfo *src0, const ITensorInfo *src1, const ITensorInfo *dst, ConvertPolicy policy, const ActivationLayerInfo &act_info = ActivationLayerInfo()); + + // Inherited methods overridden: + void run(ITensorPack &tensors) override; }; } // namespace cpu } // namespace arm_compute -- cgit v1.2.1