COMPMID-3143: Remove padding from NEGEMMInterleave4x4Kernel

- Remove padding from NEGEMMInterleave4x4Kernel - Extend test for validating zero padding requirement Change-Id: I94abc271e005f9dd6e1721b185631f55f598dbfd Signed-off-by: Gian Marco Iodice <gianmarco.iodice@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/3915 Reviewed-by: Michele Di Giorgio <michele.digiorgio@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com>
author: Gian Marco Iodice <gianmarco.iodice@arm.com> 2020-09-03 13:20:34 +0100
committer: Gian Marco Iodice <gianmarco.iodice@arm.com> 2020-09-03 17:54:45 +0000
commit: feaea101da17b383fe85440b0820132d0e0fa97d (patch)
tree: 0bcbe39636efe56ba7b0799e39f2880a2b09ba45 /arm_compute/core/NEON/kernels
parent: ec4dee8c68a3d0f6d63db184bfb2f4589429778e (diff)
download: ComputeLibrary-feaea101da17b383fe85440b0820132d0e0fa97d.tar.gz
1 files changed, 18 insertions, 7 deletions
diff --git a/arm_compute/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h b/arm_compute/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h
index 7ddbf4bca8..322932bab2 100644
--- a/arm_compute/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h
+++ b/arm_compute/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2019 Arm Limited.
+ * Copyright (c) 2016-2020 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -77,15 +77,26 @@ public:
     void run(const Window &window, const ThreadInfo &info) override;
 
 private:
-    /** Common signature for all the transpose functions
+    /** Template function to run gemm interleave 4x4
      *
-     * @param[in]  input  An input tensor. Data types supported: All
-     * @param[out] output The output tensor. Data type supported: same as @p input
-     * @param[in]  window Region on which to execute the kernel.
+     * @tparam ScalarType Scalar datatype
+     *
+     * @param[in]  input  Input tensor. Data types supported: uint32_t, uint16_t and uint8_t
+     * @param[out] output Output tensor. Data types supported: uint32_t, uint16_t and uint8_t
+     * @param[in]  window Region on which to execute the kernel. (Must be a valid region of the window returned by window()).
+     */
+    template <typename ScalarType>
+    void gemm_interleave4x4(const ITensor *input, ITensor *output, const Window &window);
+
+    /** Common signature for all the specialised gemm interleave 4x4 functions
+     *
+     * @param[in]  input  Input tensor. Data types supported: uint32_t, uint16_t and uint8_t
+     * @param[out] output Output tensor. Data types supported: uint32_t, uint16_t and uint8_t
+     * @param[in]  window Region on which to execute the kernel. (Must be a valid region of the window returned by window()).
      */
-    using GEMMInterleaveFunction = void(const ITensor *input, ITensor *output, const Window &window);
+    using GEMMInterleaveFunctionFuncPtr = void (NEGEMMInterleave4x4Kernel::*)(const ITensor *input, ITensor *output, const Window &window);
 
-    GEMMInterleaveFunction *_func; /**< GEMM interleave function to use for the particular tensor types passed to configure() */
+    GEMMInterleaveFunctionFuncPtr _func;
 };
 } // namespace arm_compute
 #endif /*ARM_COMPUTE_NEGEMMINTERLEAVE4x4KERNEL_H*/
author	Gian Marco Iodice <gianmarco.iodice@arm.com>	2020-09-03 13:20:34 +0100
committer	Gian Marco Iodice <gianmarco.iodice@arm.com>	2020-09-03 17:54:45 +0000
commit	feaea101da17b383fe85440b0820132d0e0fa97d (patch)
tree	0bcbe39636efe56ba7b0799e39f2880a2b09ba45 /arm_compute/core/NEON/kernels
parent	ec4dee8c68a3d0f6d63db184bfb2f4589429778e (diff)
download	ComputeLibrary-feaea101da17b383fe85440b0820132d0e0fa97d.tar.gz