diff options
Diffstat (limited to 'arm_compute')
-rw-r--r-- | arm_compute/core/NEON/kernels/NEWinogradLayerKernel.h | 7 | ||||
-rw-r--r-- | arm_compute/runtime/NEON/functions/NEWinogradLayer.h | 8 |
2 files changed, 10 insertions, 5 deletions
diff --git a/arm_compute/core/NEON/kernels/NEWinogradLayerKernel.h b/arm_compute/core/NEON/kernels/NEWinogradLayerKernel.h index c1343044a6..9ab2e4de11 100644 --- a/arm_compute/core/NEON/kernels/NEWinogradLayerKernel.h +++ b/arm_compute/core/NEON/kernels/NEWinogradLayerKernel.h @@ -31,6 +31,7 @@ namespace arm_compute { class ITensor; class NEWinogradLayerKernel; + class Winograd3x3F32 { public: @@ -68,10 +69,9 @@ public: /** Initialise the kernel * - * @param[in,out] output Output tensor to store the result of matrix multiplication. Data type supported: F32. - * @param[in] convolver A pointer to the winograd convolver, this object must have been configured and is ready to execute 16 GEMMS . + * @param[in] convolver A pointer to the winograd convolver, this object must have been configured and is ready to execute 16 GEMMS . */ - void configure(ITensor *output, Winograd3x3F32 *convolver); + void configure(Winograd3x3F32 *convolver); // Inherited methods overridden: void run(const Window &window, const ThreadInfo &info) override; @@ -90,7 +90,6 @@ public: protected: Winograd3x3F32 *_convolver; - ITensor *_output; }; } // namespace arm_compute diff --git a/arm_compute/runtime/NEON/functions/NEWinogradLayer.h b/arm_compute/runtime/NEON/functions/NEWinogradLayer.h index 77707060ec..6fecf082a2 100644 --- a/arm_compute/runtime/NEON/functions/NEWinogradLayer.h +++ b/arm_compute/runtime/NEON/functions/NEWinogradLayer.h @@ -28,6 +28,7 @@ #include "arm_compute/core/NEON/kernels/NEWinogradLayerKernel.h" #include "arm_compute/core/Types.h" +#include "arm_compute/runtime/CPP/functions/CPPPermute.h" #include "arm_compute/runtime/MemoryGroup.h" #include "arm_compute/runtime/Tensor.h" @@ -69,9 +70,14 @@ public: private: MemoryGroup _memory_group; NEWinogradLayerKernel _winograd_kernel; - Tensor _weights_workspace; + CPPPermute _permute_input; + CPPPermute _permute_weights; + CPPPermute _permute_output; Tensor _workspace; Tensor _kernel_storage; + Tensor _input_nhwc; + Tensor _output_nhwc; + Tensor _weights_hwio; const ITensor *_input; const ITensor *_weights; ITensor *_output; |