aboutsummaryrefslogtreecommitdiff
path: root/arm_compute/core/NEON
diff options
context:
space:
mode:
authorGian Marco Iodice <gianmarco.iodice@arm.com>2020-09-03 13:20:34 +0100
committerGian Marco Iodice <gianmarco.iodice@arm.com>2020-09-03 17:54:45 +0000
commitfeaea101da17b383fe85440b0820132d0e0fa97d (patch)
tree0bcbe39636efe56ba7b0799e39f2880a2b09ba45 /arm_compute/core/NEON
parentec4dee8c68a3d0f6d63db184bfb2f4589429778e (diff)
downloadComputeLibrary-feaea101da17b383fe85440b0820132d0e0fa97d.tar.gz
COMPMID-3143: Remove padding from NEGEMMInterleave4x4Kernel
- Remove padding from NEGEMMInterleave4x4Kernel - Extend test for validating zero padding requirement Change-Id: I94abc271e005f9dd6e1721b185631f55f598dbfd Signed-off-by: Gian Marco Iodice <gianmarco.iodice@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/3915 Reviewed-by: Michele Di Giorgio <michele.digiorgio@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'arm_compute/core/NEON')
-rw-r--r--arm_compute/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h25
1 files changed, 18 insertions, 7 deletions
diff --git a/arm_compute/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h b/arm_compute/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h
index 7ddbf4bca8..322932bab2 100644
--- a/arm_compute/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h
+++ b/arm_compute/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2019 Arm Limited.
+ * Copyright (c) 2016-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -77,15 +77,26 @@ public:
void run(const Window &window, const ThreadInfo &info) override;
private:
- /** Common signature for all the transpose functions
+ /** Template function to run gemm interleave 4x4
*
- * @param[in] input An input tensor. Data types supported: All
- * @param[out] output The output tensor. Data type supported: same as @p input
- * @param[in] window Region on which to execute the kernel.
+ * @tparam ScalarType Scalar datatype
+ *
+ * @param[in] input Input tensor. Data types supported: uint32_t, uint16_t and uint8_t
+ * @param[out] output Output tensor. Data types supported: uint32_t, uint16_t and uint8_t
+ * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()).
+ */
+ template <typename ScalarType>
+ void gemm_interleave4x4(const ITensor *input, ITensor *output, const Window &window);
+
+ /** Common signature for all the specialised gemm interleave 4x4 functions
+ *
+ * @param[in] input Input tensor. Data types supported: uint32_t, uint16_t and uint8_t
+ * @param[out] output Output tensor. Data types supported: uint32_t, uint16_t and uint8_t
+ * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()).
*/
- using GEMMInterleaveFunction = void(const ITensor *input, ITensor *output, const Window &window);
+ using GEMMInterleaveFunctionFuncPtr = void (NEGEMMInterleave4x4Kernel::*)(const ITensor *input, ITensor *output, const Window &window);
- GEMMInterleaveFunction *_func; /**< GEMM interleave function to use for the particular tensor types passed to configure() */
+ GEMMInterleaveFunctionFuncPtr _func;
};
} // namespace arm_compute
#endif /*ARM_COMPUTE_NEGEMMINTERLEAVE4x4KERNEL_H*/