From 9032ee32da54804806a3f26cbbf5a62b3c764f72 Mon Sep 17 00:00:00 2001 From: Manuel Bottini Date: Wed, 7 Aug 2019 17:04:11 +0100 Subject: MLCE-129: NEPad 30x slower than TensorFlow's implementation Change-Id: I44770e6a3134c70c4bd58f890d06cb43c9bd8bff Signed-off-by: Manuel Bottini Reviewed-on: https://review.mlplatform.org/c/1853 Reviewed-by: Giorgio Arena Comments-Addressed: Arm Jenkins Tested-by: Arm Jenkins --- arm_compute/runtime/NEON/functions/NEPadLayer.h | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) (limited to 'arm_compute/runtime') diff --git a/arm_compute/runtime/NEON/functions/NEPadLayer.h b/arm_compute/runtime/NEON/functions/NEPadLayer.h index 67f68b86d3..5ba951a94d 100644 --- a/arm_compute/runtime/NEON/functions/NEPadLayer.h +++ b/arm_compute/runtime/NEON/functions/NEPadLayer.h @@ -30,16 +30,21 @@ #include "arm_compute/runtime/SubTensor.h" #include "arm_compute/core/NEON/kernels/NECopyKernel.h" -#include "arm_compute/core/NEON/kernels/NEMemsetKernel.h" +#include "arm_compute/core/NEON/kernels/NEPadLayerKernel.h" #include "arm_compute/core/Types.h" #include "arm_compute/runtime/Tensor.h" namespace arm_compute { -/** Basic function to pad a tensor. This function calls the following NEON kernels: +/** Basic function to pad a tensor. This function calls the following NEON functions/kernels: + * + * - For padding mode = PaddingMode::CONSTANT: + * -# @ref NEPadLayerKernel + * - Otherwise: + * -# @ref NECopyKernel + * -# @ref NEStridedSlice + * -# @ref NEConcatenateLayer * - * -# @ref NEMemsetKernel - * -# @ref NECopyKernel */ class NEPadLayer : public IFunction { @@ -93,15 +98,14 @@ private: private: NECopyKernel _copy_kernel; + NEPadLayerKernel _pad_kernel; PaddingMode _mode; PaddingList _padding; - NEMemsetKernel _memset_kernel; uint32_t _num_dimensions; std::vector _slice_functions; std::vector _concat_functions; std::vector _slice_results; std::vector _concat_results; - SubTensor _output_subtensor; }; } // namespace arm_compute #endif /*__ARM_COMPUTE_NEPADLAYER_H__ */ -- cgit v1.2.1