From f6c572ce404c8ac99b0b00c65b757fbadab33dc1 Mon Sep 17 00:00:00 2001 From: Pablo Tello Date: Wed, 14 Feb 2018 12:47:30 +0000 Subject: COMPMID-784: Productise Winograd. a) Added support for kernel size 5. b) Templatised data type for transforms and batched gemms kernels. Change-Id: Idb83dda7a5eec19e015888ab31902bd791913297 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/120540 Reviewed-by: Anthony Barbier Tested-by: Jenkins --- arm_compute/runtime/NEON/functions/NEWinogradLayer.h | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) (limited to 'arm_compute/runtime/NEON') diff --git a/arm_compute/runtime/NEON/functions/NEWinogradLayer.h b/arm_compute/runtime/NEON/functions/NEWinogradLayer.h index 63cac3a3b4..f57be697b5 100644 --- a/arm_compute/runtime/NEON/functions/NEWinogradLayer.h +++ b/arm_compute/runtime/NEON/functions/NEWinogradLayer.h @@ -26,7 +26,7 @@ #include "arm_compute/runtime/IFunction.h" -#include "arm_compute/core/NEON/kernels/NEWinogradLayerKernel.h" +#include "arm_compute/core/NEON/INEKernel.h" #include "arm_compute/core/Types.h" #include "arm_compute/runtime/CPP/functions/CPPPermute.h" #include "arm_compute/runtime/MemoryGroup.h" @@ -38,6 +38,11 @@ namespace arm_compute { class ITensor; /** Basic function to simulate a convolution layer. This function calls the following NEON kernels: + * -# @ref NEWinogradLayerTransformWeightsKernel (executed only once in the first call to the run() method ) + * -# @ref NEWinogradLayerTransformInputKernel + * -# @ref NEWinogradLayerTransformOutputKernel + * -# @ref NEWinogradLayerBatchedGEMMKernel + * -# @ref CPPPermute (three times: weights, input and output) */ class NEWinogradLayer : public IFunction { @@ -68,11 +73,12 @@ public: NEWinogradLayer &operator=(const NEWinogradLayer &) = delete; private: - MemoryGroup _memory_group; - NEWinogradLayerKernel<2, 2, 3, 3> _winograd_kernel; - NEWinogradLayerTransformInputKernel<2, 2, 3, 3> _transform_input_kernel; - NEWinogradLayerTransformOutputKernel<2, 2, 3, 3> _transform_output_kernel; - NEWinogradLayerTransformWeightsKernel<2, 2, 3, 3> _transform_weights_kernel; + MemoryGroup _memory_group; + std::unique_ptr _batched_gemm_kernel; + std::unique_ptr _transform_input_kernel; + std::unique_ptr _transform_output_kernel; + std::unique_ptr _transform_weights_kernel; + CPPPermute _permute_input; CPPPermute _permute_weights; CPPPermute _permute_output; -- cgit v1.2.1