aboutsummaryrefslogtreecommitdiff
path: root/arm_compute/runtime
diff options
context:
space:
mode:
authorManuel Bottini <manuel.bottini@arm.com>2019-08-07 17:04:11 +0100
committerManuel Bottini <manuel.bottini@arm.com>2019-09-09 15:16:51 +0000
commit9032ee32da54804806a3f26cbbf5a62b3c764f72 (patch)
tree6264e3def00f2d044b7c28e5159fe8bedb50653d /arm_compute/runtime
parentffd31defdb84d4ca1e24e9248d628c0075767302 (diff)
downloadComputeLibrary-9032ee32da54804806a3f26cbbf5a62b3c764f72.tar.gz
MLCE-129: NEPad 30x slower than TensorFlow's implementation
Change-Id: I44770e6a3134c70c4bd58f890d06cb43c9bd8bff Signed-off-by: Manuel Bottini <manuel.bottini@arm.com> Reviewed-on: https://review.mlplatform.org/c/1853 Reviewed-by: Giorgio Arena <giorgio.arena@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'arm_compute/runtime')
-rw-r--r--arm_compute/runtime/NEON/functions/NEPadLayer.h16
1 files changed, 10 insertions, 6 deletions
diff --git a/arm_compute/runtime/NEON/functions/NEPadLayer.h b/arm_compute/runtime/NEON/functions/NEPadLayer.h
index 67f68b86d3..5ba951a94d 100644
--- a/arm_compute/runtime/NEON/functions/NEPadLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEPadLayer.h
@@ -30,16 +30,21 @@
#include "arm_compute/runtime/SubTensor.h"
#include "arm_compute/core/NEON/kernels/NECopyKernel.h"
-#include "arm_compute/core/NEON/kernels/NEMemsetKernel.h"
+#include "arm_compute/core/NEON/kernels/NEPadLayerKernel.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/Tensor.h"
namespace arm_compute
{
-/** Basic function to pad a tensor. This function calls the following NEON kernels:
+/** Basic function to pad a tensor. This function calls the following NEON functions/kernels:
+ *
+ * - For padding mode = PaddingMode::CONSTANT:
+ * -# @ref NEPadLayerKernel
+ * - Otherwise:
+ * -# @ref NECopyKernel
+ * -# @ref NEStridedSlice
+ * -# @ref NEConcatenateLayer
*
- * -# @ref NEMemsetKernel
- * -# @ref NECopyKernel
*/
class NEPadLayer : public IFunction
{
@@ -93,15 +98,14 @@ private:
private:
NECopyKernel _copy_kernel;
+ NEPadLayerKernel _pad_kernel;
PaddingMode _mode;
PaddingList _padding;
- NEMemsetKernel _memset_kernel;
uint32_t _num_dimensions;
std::vector<NEStridedSlice> _slice_functions;
std::vector<NEConcatenateLayer> _concat_functions;
std::vector<Tensor> _slice_results;
std::vector<Tensor> _concat_results;
- SubTensor _output_subtensor;
};
} // namespace arm_compute
#endif /*__ARM_COMPUTE_NEPADLAYER_H__ */