From 1b2ee3eb6991efcbd09cde84ab0c383f15648738 Mon Sep 17 00:00:00 2001 From: David Mansell Date: Tue, 22 Aug 2023 13:27:03 +0100 Subject: CPU: Depthwise: Generate correct size for input indirection array. Signed-off-by: David Mansell Change-Id: I359ed0703f4036e017b34b622f76b630cefac973 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/10183 Tested-by: Arm Jenkins Reviewed-by: Viet-Hoa Do Benchmark: Arm Jenkins --- docs/user_guide/release_version_and_change_log.dox | 2 +- .../NEON/kernels/arm_conv/depthwise/depthwise_depthfirst_generic.hpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/user_guide/release_version_and_change_log.dox b/docs/user_guide/release_version_and_change_log.dox index c8c872631d..2581ae45ab 100644 --- a/docs/user_guide/release_version_and_change_log.dox +++ b/docs/user_guide/release_version_and_change_log.dox @@ -47,7 +47,7 @@ v23.11 Public major release - Add support for output data type S64 in NEArgMinMaxLayer and CLArgMinMaxLayer - Update OpenCLâ„¢ API headers to v2023.04.17. - Performance optimizations: - - Optimize @ref CpuReshape + - Optimize @ref cpu::CpuReshape v23.08 Public major release - Deprecate the legacy 'libarm_compute_core' library. This library is an artifact of Compute Library's legacy library architecture and no longer serves any purpose. diff --git a/src/core/NEON/kernels/arm_conv/depthwise/depthwise_depthfirst_generic.hpp b/src/core/NEON/kernels/arm_conv/depthwise/depthwise_depthfirst_generic.hpp index ca5026b6e0..e2d05560a1 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/depthwise_depthfirst_generic.hpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/depthwise_depthfirst_generic.hpp @@ -186,7 +186,7 @@ class GenericInputArrayElement static size_t get_element_size(const WorkspaceArgs &args) { const auto kernel_points = args.depthwise_args.kernel_rows * args.depthwise_args.kernel_cols; - return sizeof(T **) * args.strategy->get_input_rows() * args.strategy->get_input_cols() * kernel_points; + return sizeof(T **) * args.strategy->get_output_rows() * args.strategy->get_output_cols() * kernel_points; } template -- cgit v1.2.1