From a4bba9c594c4022c9f85192bb8fd3593ad1a8d3c Mon Sep 17 00:00:00 2001 From: Georgios Pinitas Date: Tue, 2 Apr 2019 15:27:52 +0100 Subject: COMPMID-1995: Fix 32-bit NEDepthwiseConvolution errors. -Updates padding handling in assembly depthwise kernels. -Fixes 32-bit runs issues for depthwise convolution. Change-Id: I3fe6369397c1d13f5629dd34c068ce4af53c95cd Signed-off-by: Georgios Pinitas Reviewed-on: https://review.mlplatform.org/c/939 Reviewed-by: Giuseppe Rossini Comments-Addressed: Arm Jenkins Tested-by: Arm Jenkins --- .../kernels/convolution/depthwise/depthwise.hpp | 37 ++++++++++++++++++++-- 1 file changed, 34 insertions(+), 3 deletions(-) (limited to 'arm_compute/core/NEON/kernels/convolution/depthwise/depthwise.hpp') diff --git a/arm_compute/core/NEON/kernels/convolution/depthwise/depthwise.hpp b/arm_compute/core/NEON/kernels/convolution/depthwise/depthwise.hpp index 45e8da0272..e0cb616a3d 100644 --- a/arm_compute/core/NEON/kernels/convolution/depthwise/depthwise.hpp +++ b/arm_compute/core/NEON/kernels/convolution/depthwise/depthwise.hpp @@ -24,7 +24,7 @@ #pragma once -#include "arm_compute/core/NEON/kernels/convolution/common/arm.hpp" +#include #include "arm_compute/core/NEON/kernels/convolution/common/activation.hpp" #include "arm_compute/core/NEON/kernels/convolution/common/padding.hpp" @@ -275,6 +275,14 @@ class DepthwiseConvolutionBase : public IDepthwiseConvolution unsigned int out_col_stride ); + template + void execute_tile( + int n_channels, + const void* packed_params, + const InputType* inptrs[inner_tile_rows][inner_tile_cols], + OutputType* outptrs[output_tile_rows][output_tile_cols] + ); + int n_channels(void) const; private: @@ -290,9 +298,7 @@ class DepthwiseConvolutionBase : public IDepthwiseConvolution // Stride information for a convolution instance int _input_col_stride, _input_row_stride, _input_batch_stride; - const int _input_ws_col_stride, _input_ws_row_stride; int _output_col_stride, _output_row_stride, _output_batch_stride; - const int _output_ws_col_stride, _output_ws_row_stride; // Methods for getting access to working space size_t _get_input_working_space_size(void) const; @@ -352,6 +358,14 @@ class DepthwiseConvolution : public DepthwiseConvolutionBase< unsigned int out_row_stride, unsigned int out_col_stride ); + + template + void execute_tile( + int n_channels, + const void* packed_params, + const InputType* inptrs[Base::inner_tile_rows][Base::inner_tile_cols], + OutputType* outptrs[Base::output_tile_rows][Base::output_tile_cols] + ); }; @@ -415,6 +429,14 @@ class DepthwiseConvolution< unsigned int out_row_stride, unsigned int out_col_stride ); + + template + void execute_tile( + int n_channels, + const void* packed_params, + const float* inptrs[Base::inner_tile_rows][Base::inner_tile_cols], + float* outptrs[Base::output_tile_rows][Base::output_tile_cols] + ); }; #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC @@ -478,6 +500,15 @@ class DepthwiseConvolution< unsigned int out_row_stride, unsigned int out_col_stride ); + + template + void execute_tile( + int n_channels, + const void* packed_params, + const float16_t* inptrs[Base::inner_tile_rows][Base::inner_tile_cols], + float16_t* outptrs[Base::output_tile_rows][Base::output_tile_cols] + ); }; #endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC + } // namespace depthwise -- cgit v1.2.1