diff options
author | Mohammed Suhail Munshi <MohammedSuhail.Munshi@arm.com> | 2022-09-20 11:49:23 +0100 |
---|---|---|
committer | Mohmun02 <MohammedSuhail.Munshi@arm.com> | 2022-10-12 09:27:42 +0000 |
commit | fa79fda2c797282de3589aaa69b06e065e8a21e0 (patch) | |
tree | 91020783a702c9a3be16dad076bca10321fe04c2 /src/cpu/kernels | |
parent | c8cc024603cb1db084227196a52e562bf251d339 (diff) | |
download | ComputeLibrary-fa79fda2c797282de3589aaa69b06e065e8a21e0.tar.gz |
Optimize Neon™ Logistic Activation
- Use a 1d execution window to improve memory access pattern.
Resolves: [COMPMID-5465]
Signed-off-by: Mohammed Suhail Munshi <MohammedSuhail.Munshi@arm.com>
Change-Id: Ida30669ffa06eb002ca43a6edf15e25a6eaad2f6
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/8344
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Gunes Bayir <gunes.bayir@arm.com>
Benchmark: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/cpu/kernels')
-rw-r--r-- | src/cpu/kernels/CpuActivationKernel.cpp | 24 | ||||
-rw-r--r-- | src/cpu/kernels/CpuActivationKernel.h | 10 |
2 files changed, 30 insertions, 4 deletions
diff --git a/src/cpu/kernels/CpuActivationKernel.cpp b/src/cpu/kernels/CpuActivationKernel.cpp index 61efcb2dd6..f1e485883c 100644 --- a/src/cpu/kernels/CpuActivationKernel.cpp +++ b/src/cpu/kernels/CpuActivationKernel.cpp @@ -182,10 +182,16 @@ std::pair<Status, Window> validate_and_configure_window(const ITensorInfo *src, void CpuActivationKernel::configure(const ITensorInfo *src, ITensorInfo *dst, ActivationLayerInfo activation_info) { + ARM_COMPUTE_UNUSED(dst); ARM_COMPUTE_ERROR_ON_NULLPTR(src); ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(src, dst, activation_info)); const auto uk = CpuActivationKernel::get_implementation(ActivationDataTypeISASelectorData{ src->data_type(), CPUInfo::get().get_isa(), activation_info.activation() }); + if(dst != nullptr) + { + // dst auto inizialitation if not yet initialized + auto_init_if_empty(*dst, *src->clone()); + } ARM_COMPUTE_ERROR_ON_NULLPTR(uk); @@ -200,10 +206,20 @@ void CpuActivationKernel::configure(const ITensorInfo *src, ITensorInfo *dst, Ac #endif // __aarch64__ _act_info = activation_info; - // Configure kernel window - auto win_config = validate_and_configure_window(src, dst); - ARM_COMPUTE_ERROR_THROW_ON(win_config.first); - ICPPKernel::configure(win_config.second); + Window win; + + if(src->data_layout() != DataLayout::NHWC) + { + // Use squashed window + std::tie(win, _split_dimension) = calculate_squashed_or_max_window(*src); + ICPPKernel::configure(win); + } + else + { + // Configure kernel window + win = calculate_max_window(*src, Steps()); + ICPPKernel::configure(win); + } } Status CpuActivationKernel::validate(const ITensorInfo *src, const ITensorInfo *dst, const ActivationLayerInfo &act_info) diff --git a/src/cpu/kernels/CpuActivationKernel.h b/src/cpu/kernels/CpuActivationKernel.h index d856a9357f..fe2d783059 100644 --- a/src/cpu/kernels/CpuActivationKernel.h +++ b/src/cpu/kernels/CpuActivationKernel.h @@ -73,6 +73,15 @@ public: void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) override; const char *name() const override; + /** Get the preferred dimension in which the scheduler splits the work into multiple jobs. + * + * @return The split dimension hint. + */ + size_t get_split_dimension_hint() const + { + return _split_dimension; + } + struct ActivationKernel { const char *name; @@ -85,6 +94,7 @@ public: private: ActivationLayerInfo _act_info{}; ActivationKernelPtr _run_method{ nullptr }; + size_t _split_dimension{ Window::DimY }; std::string _name{}; }; } // namespace kernels |