From 4cdd6b80754b3abbf54650d9359cf940a4aaf772 Mon Sep 17 00:00:00 2001 From: Dana Zlotnik Date: Thu, 7 Oct 2021 15:31:54 +0300 Subject: Implement Minimum Workload Size (MWS) in all CPPKernels used by small networks * create get_mws method in ICPPKernel class that retuns default value for all kernels * overwrite the default value for all the kernels used by small networks (according to banchmark case) Resolves COMPMID-4648 Change-Id: I46d7cae61217213279d2ee740edc73f600b6d576 Signed-off-by: Dana Zlotnik Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/6412 Tested-by: Arm Jenkins Reviewed-by: SiCong Li Comments-Addressed: Arm Jenkins --- arm_compute/core/CPP/ICPPKernel.h | 19 ++++++++++++++++++- src/core/NEON/kernels/NEPadLayerKernel.cpp | 8 ++++++++ src/core/NEON/kernels/NEPadLayerKernel.h | 9 +++++++++ src/cpu/kernels/CpuActivationKernel.cpp | 7 +++++++ src/cpu/kernels/CpuActivationKernel.h | 9 +++++++++ src/cpu/kernels/CpuAddKernel.cpp | 8 ++++++++ src/cpu/kernels/CpuAddKernel.h | 9 +++++++++ src/cpu/kernels/CpuIm2ColKernel.cpp | 7 +++++++ src/cpu/kernels/CpuIm2ColKernel.h | 8 ++++++++ src/cpu/kernels/CpuReshapeKernel.cpp | 8 ++++++++ src/cpu/kernels/CpuReshapeKernel.h | 9 +++++++++ .../kernels/assembly/CpuGemmAssemblyWrapperKernel.h | 13 +++++++++++++ .../CpuDepthwiseConv2dAssemblyWrapperKernel.cpp | 7 +++++++ .../CpuDepthwiseConv2dAssemblyWrapperKernel.h | 9 +++++++++ .../internal/CpuPool2dAssemblyWrapperKernel.cpp | 7 +++++++ .../kernels/internal/CpuPool2dAssemblyWrapperKernel.h | 9 +++++++++ 16 files changed, 145 insertions(+), 1 deletion(-) diff --git a/arm_compute/core/CPP/ICPPKernel.h b/arm_compute/core/CPP/ICPPKernel.h index ab369ffe1d..af4a896a6c 100644 --- a/arm_compute/core/CPP/ICPPKernel.h +++ b/arm_compute/core/CPP/ICPPKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020 Arm Limited. + * Copyright (c) 2016-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -38,6 +38,9 @@ class ITensor; class ICPPKernel : public IKernel { public: + static constexpr size_t default_mws = 128; /* Default minimum workload size value */ + static constexpr size_t small_network_mws = 256; /* Default Minimum workload size value for small networks */ + /** Default destructor */ virtual ~ICPPKernel() = default; @@ -88,6 +91,20 @@ public: ARM_COMPUTE_UNUSED(tensors, window, info); } + /** Return minimum workload size of the relevant kernel + * + * @param[in] platform The CPU platform used to create the context. + * @param[in] thread_count Number of threads in the execution. + * + * @return[out] mws Minimum workload size for requsted configuration. + */ + virtual size_t get_mws(const CPUInfo &platform, size_t thread_count) const + { + ARM_COMPUTE_UNUSED(platform, thread_count); + + return default_mws; + } + /** Name of the kernel * * @return Kernel name diff --git a/src/core/NEON/kernels/NEPadLayerKernel.cpp b/src/core/NEON/kernels/NEPadLayerKernel.cpp index 3e2c57a18c..60986812be 100644 --- a/src/core/NEON/kernels/NEPadLayerKernel.cpp +++ b/src/core/NEON/kernels/NEPadLayerKernel.cpp @@ -258,4 +258,12 @@ void NEPadLayerKernel::run(const Window &window, const ThreadInfo &info) (this->*_func)(window); } } + +size_t NEPadLayerKernel::get_mws(const CPUInfo &platform, size_t thread_count) const +{ + ARM_COMPUTE_UNUSED(platform, thread_count); + + return ICPPKernel::small_network_mws; +} + } // namespace arm_compute diff --git a/src/core/NEON/kernels/NEPadLayerKernel.h b/src/core/NEON/kernels/NEPadLayerKernel.h index 00cda7dc22..b3b0725af8 100644 --- a/src/core/NEON/kernels/NEPadLayerKernel.h +++ b/src/core/NEON/kernels/NEPadLayerKernel.h @@ -79,6 +79,15 @@ public: // Inherited methods overridden: void run(const Window &window, const ThreadInfo &info) override; + /** Return minimum workload size of the relevant kernel + * + * @param[in] platform The CPU platform used to create the context. + * @param[in] thread_count Number of threads in the execution. + * + * @return[out] small_network_mws Minimum workload size for requsted configuration. + */ + size_t get_mws(const CPUInfo &platform, size_t thread_count) const override; + private: /** Template function to run the padding function with constant padding * diff --git a/src/cpu/kernels/CpuActivationKernel.cpp b/src/cpu/kernels/CpuActivationKernel.cpp index 8fa7e9525e..70ab06fc8a 100644 --- a/src/cpu/kernels/CpuActivationKernel.cpp +++ b/src/cpu/kernels/CpuActivationKernel.cpp @@ -230,6 +230,13 @@ Status CpuActivationKernel::validate(const ITensorInfo *src, const ITensorInfo * return Status{}; } +size_t CpuActivationKernel::get_mws(const CPUInfo &platform, size_t thread_count) const +{ + ARM_COMPUTE_UNUSED(platform, thread_count); + + return ICPPKernel::small_network_mws; +} + void CpuActivationKernel::run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) { // Early exit on disabled activation diff --git a/src/cpu/kernels/CpuActivationKernel.h b/src/cpu/kernels/CpuActivationKernel.h index 43c266529f..8e78d86016 100644 --- a/src/cpu/kernels/CpuActivationKernel.h +++ b/src/cpu/kernels/CpuActivationKernel.h @@ -57,6 +57,15 @@ public: */ static Status validate(const ITensorInfo *src, const ITensorInfo *dst, const ActivationLayerInfo &act_info); + /** Return minimum workload size of the relevant kernel + * + * @param[in] platform The CPU platform used to create the context. + * @param[in] thread_count Number of threads in the execution. + * + * @return[out] small_network_mws Minimum workload size for requsted configuration. + */ + size_t get_mws(const CPUInfo &platform, size_t thread_count) const override; + // Inherited methods overridden: void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) override; const char *name() const override; diff --git a/src/cpu/kernels/CpuAddKernel.cpp b/src/cpu/kernels/CpuAddKernel.cpp index edbab37301..77aa1ffb3f 100644 --- a/src/cpu/kernels/CpuAddKernel.cpp +++ b/src/cpu/kernels/CpuAddKernel.cpp @@ -291,6 +291,14 @@ const char *CpuAddKernel::name() const { return _name.c_str(); } + +size_t CpuAddKernel::get_mws(const CPUInfo &platform, size_t thread_count) const +{ + ARM_COMPUTE_UNUSED(platform, thread_count); + + return ICPPKernel::small_network_mws; +} + } // namespace kernels } // namespace cpu } // namespace arm_compute diff --git a/src/cpu/kernels/CpuAddKernel.h b/src/cpu/kernels/CpuAddKernel.h index 11c0f67132..a0c7e497dd 100644 --- a/src/cpu/kernels/CpuAddKernel.h +++ b/src/cpu/kernels/CpuAddKernel.h @@ -70,6 +70,15 @@ public: void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) override; const char *name() const override; + /** Return minimum workload size of the relevant kernel + * + * @param[in] platform The CPU platform used to create the context. + * @param[in] thread_count Number of threads in the execution. + * + * @return[out] small_network_mws Minimum workload size for requsted configuration. + */ + size_t get_mws(const CPUInfo &platform, size_t thread_count) const override; + private: using AddKernelPtr = std::add_pointer::type; diff --git a/src/cpu/kernels/CpuIm2ColKernel.cpp b/src/cpu/kernels/CpuIm2ColKernel.cpp index 13764c49d1..5e3385d4ab 100644 --- a/src/cpu/kernels/CpuIm2ColKernel.cpp +++ b/src/cpu/kernels/CpuIm2ColKernel.cpp @@ -443,6 +443,13 @@ const char *CpuIm2ColKernel::name() const { return "CpuIm2ColKernel"; } + +size_t CpuIm2ColKernel::get_mws(const CPUInfo &platform, size_t thread_count) const +{ + ARM_COMPUTE_UNUSED(platform, thread_count); + + return ICPPKernel::small_network_mws; +} } // namespace kernels } // namespace cpu } // namespace arm_compute \ No newline at end of file diff --git a/src/cpu/kernels/CpuIm2ColKernel.h b/src/cpu/kernels/CpuIm2ColKernel.h index fc8ae056bb..797d54c95c 100644 --- a/src/cpu/kernels/CpuIm2ColKernel.h +++ b/src/cpu/kernels/CpuIm2ColKernel.h @@ -91,6 +91,14 @@ public: // Inherited methods overridden: void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) override; const char *name() const override; + /** Return minimum workload size of the relevant kernel + * + * @param[in] platform The CPU platform used to create the context. + * @param[in] thread_count Number of threads in the execution. + * + * @return[out] small_network_mws Minimum workload size for requsted configuration. + */ + size_t get_mws(const CPUInfo &platform, size_t thread_count) const override; private: /** Template function to run im2col diff --git a/src/cpu/kernels/CpuReshapeKernel.cpp b/src/cpu/kernels/CpuReshapeKernel.cpp index 3bbcc09cc5..91c549643f 100644 --- a/src/cpu/kernels/CpuReshapeKernel.cpp +++ b/src/cpu/kernels/CpuReshapeKernel.cpp @@ -134,6 +134,14 @@ const char *CpuReshapeKernel::name() const { return "CpuReshapeKernel"; } + +size_t CpuReshapeKernel::get_mws(const CPUInfo &platform, size_t thread_count) const +{ + ARM_COMPUTE_UNUSED(platform, thread_count); + + return ICPPKernel::small_network_mws; +} + } // namespace kernels } // namespace cpu } // namespace arm_compute diff --git a/src/cpu/kernels/CpuReshapeKernel.h b/src/cpu/kernels/CpuReshapeKernel.h index 9fe4350445..d4e2b44b54 100644 --- a/src/cpu/kernels/CpuReshapeKernel.h +++ b/src/cpu/kernels/CpuReshapeKernel.h @@ -57,6 +57,15 @@ public: // Inherited methods overridden: void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) override; const char *name() const override; + + /** Return minimum workload size of the relevant kernel + * + * @param[in] platform The CPU platform used to create the context. + * @param[in] thread_count Number of threads in the execution. + * + * @return[out] small_network_mws Minimum workload size for requsted configuration. + */ + size_t get_mws(const CPUInfo &platform, size_t thread_count) const override; }; } // namespace kernels } // namespace cpu diff --git a/src/cpu/kernels/assembly/CpuGemmAssemblyWrapperKernel.h b/src/cpu/kernels/assembly/CpuGemmAssemblyWrapperKernel.h index 3b9a6b4760..ff8b0b143f 100644 --- a/src/cpu/kernels/assembly/CpuGemmAssemblyWrapperKernel.h +++ b/src/cpu/kernels/assembly/CpuGemmAssemblyWrapperKernel.h @@ -115,6 +115,19 @@ public: _name += "/" + kernel_name_tag; } } + /** Return minimum workload size of the relevant kernel + * + * @param[in] platform The CPU platform used to create the context. + * @param[in] thread_count Number of threads in the execution. + * + * @return[out] small_network_mws Minimum workload size for requsted configuration. + */ + size_t get_mws(const CPUInfo &platform, size_t thread_count) const override + { + ARM_COMPUTE_UNUSED(platform, thread_count); + + return ICPPKernel::small_network_mws; + } private: arm_gemm::GemmCommon *_kernel; diff --git a/src/cpu/kernels/internal/CpuDepthwiseConv2dAssemblyWrapperKernel.cpp b/src/cpu/kernels/internal/CpuDepthwiseConv2dAssemblyWrapperKernel.cpp index eed4bb9dd5..a71864c10c 100644 --- a/src/cpu/kernels/internal/CpuDepthwiseConv2dAssemblyWrapperKernel.cpp +++ b/src/cpu/kernels/internal/CpuDepthwiseConv2dAssemblyWrapperKernel.cpp @@ -354,6 +354,13 @@ const char *CpuDepthwiseConv2dAssemblyWrapperKernel::name() const { return "CpuDepthwiseConv2dAssemblyWrapperKernel"; } + +size_t CpuDepthwiseConv2dAssemblyWrapperKernel::get_mws(const CPUInfo &platform, size_t thread_count) const +{ + ARM_COMPUTE_UNUSED(platform, thread_count); + + return ICPPKernel::small_network_mws; +} } // namespace kernels } // namespace cpu } // namespace arm_compute diff --git a/src/cpu/kernels/internal/CpuDepthwiseConv2dAssemblyWrapperKernel.h b/src/cpu/kernels/internal/CpuDepthwiseConv2dAssemblyWrapperKernel.h index 8ee24a6613..8980922945 100644 --- a/src/cpu/kernels/internal/CpuDepthwiseConv2dAssemblyWrapperKernel.h +++ b/src/cpu/kernels/internal/CpuDepthwiseConv2dAssemblyWrapperKernel.h @@ -108,6 +108,15 @@ public: */ bool is_configured() const; + /** Return minimum workload size of the relevant kernel + * + * @param[in] platform The CPU platform used to create the context. + * @param[in] thread_count Number of threads in the execution. + * + * @return[out] small_network_mws Minimum workload size for requsted configuration. + */ + size_t get_mws(const CPUInfo &platform, size_t thread_count) const override; + private: std::unique_ptr _kernel_asm; std::vector _multipliers{}; diff --git a/src/cpu/kernels/internal/CpuPool2dAssemblyWrapperKernel.cpp b/src/cpu/kernels/internal/CpuPool2dAssemblyWrapperKernel.cpp index 958c04b677..f9c11fd4bd 100644 --- a/src/cpu/kernels/internal/CpuPool2dAssemblyWrapperKernel.cpp +++ b/src/cpu/kernels/internal/CpuPool2dAssemblyWrapperKernel.cpp @@ -274,6 +274,13 @@ void CpuPool2dAssemblyWrapperKernel::create_arm_pooling_requant(const ITensorInf _kernel_asm = std::move(pooling_kernel_asm); } + +size_t CpuPool2dAssemblyWrapperKernel::get_mws(const CPUInfo &platform, size_t thread_count) const +{ + ARM_COMPUTE_UNUSED(platform, thread_count); + + return ICPPKernel::small_network_mws; +} } // namespace kernels } // namespace cpu } // namespace arm_compute diff --git a/src/cpu/kernels/internal/CpuPool2dAssemblyWrapperKernel.h b/src/cpu/kernels/internal/CpuPool2dAssemblyWrapperKernel.h index ab3ed25b1e..8625fd96b4 100644 --- a/src/cpu/kernels/internal/CpuPool2dAssemblyWrapperKernel.h +++ b/src/cpu/kernels/internal/CpuPool2dAssemblyWrapperKernel.h @@ -112,6 +112,15 @@ private: void create_arm_pooling_requant(const ITensorInfo *src, ITensorInfo *dst, const PoolingLayerInfo &info, const CPUInfo &cpu_info); std::unique_ptr _kernel_asm{ nullptr }; + + /** Return minimum workload size of the relevant kernel + * + * @param[in] platform The CPU platform used to create the context. + * @param[in] thread_count Number of threads in the execution. + * + * @return[out] small_network_mws Minimum workload size for requsted configuration. + */ + size_t get_mws(const CPUInfo &platform, size_t thread_count) const override; }; } // namespace kernels } // namespace cpu -- cgit v1.2.1