diff options
author | Gian Marco Iodice <gianmarco.iodice@arm.com> | 2020-09-03 13:20:34 +0100 |
---|---|---|
committer | Gian Marco Iodice <gianmarco.iodice@arm.com> | 2020-09-03 17:54:45 +0000 |
commit | feaea101da17b383fe85440b0820132d0e0fa97d (patch) | |
tree | 0bcbe39636efe56ba7b0799e39f2880a2b09ba45 /arm_compute/core/NEON/kernels | |
parent | ec4dee8c68a3d0f6d63db184bfb2f4589429778e (diff) | |
download | ComputeLibrary-feaea101da17b383fe85440b0820132d0e0fa97d.tar.gz |
COMPMID-3143: Remove padding from NEGEMMInterleave4x4Kernel
- Remove padding from NEGEMMInterleave4x4Kernel
- Extend test for validating zero padding requirement
Change-Id: I94abc271e005f9dd6e1721b185631f55f598dbfd
Signed-off-by: Gian Marco Iodice <gianmarco.iodice@arm.com>
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/3915
Reviewed-by: Michele Di Giorgio <michele.digiorgio@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'arm_compute/core/NEON/kernels')
-rw-r--r-- | arm_compute/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h | 25 |
1 files changed, 18 insertions, 7 deletions
diff --git a/arm_compute/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h b/arm_compute/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h index 7ddbf4bca8..322932bab2 100644 --- a/arm_compute/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h +++ b/arm_compute/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 Arm Limited. + * Copyright (c) 2016-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -77,15 +77,26 @@ public: void run(const Window &window, const ThreadInfo &info) override; private: - /** Common signature for all the transpose functions + /** Template function to run gemm interleave 4x4 * - * @param[in] input An input tensor. Data types supported: All - * @param[out] output The output tensor. Data type supported: same as @p input - * @param[in] window Region on which to execute the kernel. + * @tparam ScalarType Scalar datatype + * + * @param[in] input Input tensor. Data types supported: uint32_t, uint16_t and uint8_t + * @param[out] output Output tensor. Data types supported: uint32_t, uint16_t and uint8_t + * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()). + */ + template <typename ScalarType> + void gemm_interleave4x4(const ITensor *input, ITensor *output, const Window &window); + + /** Common signature for all the specialised gemm interleave 4x4 functions + * + * @param[in] input Input tensor. Data types supported: uint32_t, uint16_t and uint8_t + * @param[out] output Output tensor. Data types supported: uint32_t, uint16_t and uint8_t + * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()). */ - using GEMMInterleaveFunction = void(const ITensor *input, ITensor *output, const Window &window); + using GEMMInterleaveFunctionFuncPtr = void (NEGEMMInterleave4x4Kernel::*)(const ITensor *input, ITensor *output, const Window &window); - GEMMInterleaveFunction *_func; /**< GEMM interleave function to use for the particular tensor types passed to configure() */ + GEMMInterleaveFunctionFuncPtr _func; }; } // namespace arm_compute #endif /*ARM_COMPUTE_NEGEMMINTERLEAVE4x4KERNEL_H*/ |