aboutsummaryrefslogtreecommitdiff
path: root/src/cpu/kernels/internal/CpuPool2dAssemblyWrapperKernel.cpp
diff options
context:
space:
mode:
authorramelg01 <ramy.elgammal@arm.com>2022-04-08 03:52:28 +0100
committerRamy Elgammal <ramy.elgammal@arm.com>2022-04-25 15:35:59 +0000
commitc827e99fc46521f43719b0c2d1b6f05d66abf68c (patch)
tree31df1002673b2a4c4aae66608ad85b1ad6517050 /src/cpu/kernels/internal/CpuPool2dAssemblyWrapperKernel.cpp
parent0a3948394e7e77344201b8732e9c20fcb5fa9a38 (diff)
downloadComputeLibrary-c827e99fc46521f43719b0c2d1b6f05d66abf68c.tar.gz
Update Neon™ pooling kernel
- Reduce duplication and simplify overall structure. - Improve multi-threaded performance by sharing more data in lower-level caches. Partially Resolves: COMPMID-5054 Signed-off-by: Ramy Elgammal<ramy.elgammal@arm.com> Change-Id: I5f4dc50913401d5c1cbfc10b866fae9490cbc4d7 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/7404 Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Andrew Mundy Reviewed-by: Sheri Zhang <sheri.zhang@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/cpu/kernels/internal/CpuPool2dAssemblyWrapperKernel.cpp')
-rw-r--r--src/cpu/kernels/internal/CpuPool2dAssemblyWrapperKernel.cpp6
1 files changed, 3 insertions, 3 deletions
diff --git a/src/cpu/kernels/internal/CpuPool2dAssemblyWrapperKernel.cpp b/src/cpu/kernels/internal/CpuPool2dAssemblyWrapperKernel.cpp
index 77428b5f48..10ff4183c0 100644
--- a/src/cpu/kernels/internal/CpuPool2dAssemblyWrapperKernel.cpp
+++ b/src/cpu/kernels/internal/CpuPool2dAssemblyWrapperKernel.cpp
@@ -156,7 +156,7 @@ void CpuPool2dAssemblyWrapperKernel::run_op(ITensorPack &tensors, const Window &
const auto in_ptr = src->buffer() + src->info()->offset_first_element_in_bytes();
auto out_ptr = dst->buffer() + dst->info()->offset_first_element_in_bytes();
- auto working_space = workspace->buffer() + workspace->info()->offset_first_element_in_bytes();
+ auto working_space = (workspace == nullptr) ? nullptr : workspace->buffer() + workspace->info()->offset_first_element_in_bytes();
const auto src_shape = src->info()->tensor_shape();
const auto dst_shape = dst->info()->tensor_shape();
@@ -197,7 +197,7 @@ void CpuPool2dAssemblyWrapperKernel::create_arm_pooling(const ITensorInfo *src,
arm_conv::pooling::PoolingStride stride{};
std::tie(stride.cols, stride.rows) = info.pad_stride_info.stride();
- const arm_conv::PaddingValues padding{ info.pad_stride_info.pad_left(), info.pad_stride_info.pad_top(), info.pad_stride_info.pad_right(), info.pad_stride_info.pad_bottom() };
+ const arm_conv::pooling::PaddingValues padding{ info.pad_stride_info.pad_left(), info.pad_stride_info.pad_top(), info.pad_stride_info.pad_right(), info.pad_stride_info.pad_bottom() };
constexpr unsigned int idx_width = 1;
constexpr unsigned int idx_height = 2;
@@ -236,7 +236,7 @@ void CpuPool2dAssemblyWrapperKernel::create_arm_pooling_requant(const ITensorInf
arm_conv::pooling::PoolingStride stride{};
std::tie(stride.cols, stride.rows) = info.pad_stride_info.stride();
- const arm_conv::PaddingValues padding{ info.pad_stride_info.pad_left(), info.pad_stride_info.pad_top(), info.pad_stride_info.pad_right(), info.pad_stride_info.pad_bottom() };
+ const arm_conv::pooling::PaddingValues padding{ info.pad_stride_info.pad_left(), info.pad_stride_info.pad_top(), info.pad_stride_info.pad_right(), info.pad_stride_info.pad_bottom() };
constexpr unsigned int idx_width = 1;
constexpr unsigned int idx_height = 2;