aboutsummaryrefslogtreecommitdiff
path: root/arm_compute/core/NEON/kernels/convolution/depthwise/depthwise.hpp
diff options
context:
space:
mode:
authorGeorgios Pinitas <georgios.pinitas@arm.com>2019-04-02 15:27:52 +0100
committerGeorgios Pinitas <georgios.pinitas@arm.com>2019-04-09 11:58:01 +0000
commita4bba9c594c4022c9f85192bb8fd3593ad1a8d3c (patch)
tree0e79ebd7105411f6756e63d3ce23f16aaeb88566 /arm_compute/core/NEON/kernels/convolution/depthwise/depthwise.hpp
parent3418ba520dd6251738ba905df84a201121433ecd (diff)
downloadComputeLibrary-a4bba9c594c4022c9f85192bb8fd3593ad1a8d3c.tar.gz
COMPMID-1995: Fix 32-bit NEDepthwiseConvolution errors.
-Updates padding handling in assembly depthwise kernels. -Fixes 32-bit runs issues for depthwise convolution. Change-Id: I3fe6369397c1d13f5629dd34c068ce4af53c95cd Signed-off-by: Georgios Pinitas <georgios.pinitas@arm.com> Reviewed-on: https://review.mlplatform.org/c/939 Reviewed-by: Giuseppe Rossini <giuseppe.rossini@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'arm_compute/core/NEON/kernels/convolution/depthwise/depthwise.hpp')
-rw-r--r--arm_compute/core/NEON/kernels/convolution/depthwise/depthwise.hpp37
1 files changed, 34 insertions, 3 deletions
diff --git a/arm_compute/core/NEON/kernels/convolution/depthwise/depthwise.hpp b/arm_compute/core/NEON/kernels/convolution/depthwise/depthwise.hpp
index 45e8da0272..e0cb616a3d 100644
--- a/arm_compute/core/NEON/kernels/convolution/depthwise/depthwise.hpp
+++ b/arm_compute/core/NEON/kernels/convolution/depthwise/depthwise.hpp
@@ -24,7 +24,7 @@
#pragma once
-#include "arm_compute/core/NEON/kernels/convolution/common/arm.hpp"
+#include <arm_neon.h>
#include "arm_compute/core/NEON/kernels/convolution/common/activation.hpp"
#include "arm_compute/core/NEON/kernels/convolution/common/padding.hpp"
@@ -275,6 +275,14 @@ class DepthwiseConvolutionBase : public IDepthwiseConvolution
unsigned int out_col_stride
);
+ template <nck::ActivationFunction Activation>
+ void execute_tile(
+ int n_channels,
+ const void* packed_params,
+ const InputType* inptrs[inner_tile_rows][inner_tile_cols],
+ OutputType* outptrs[output_tile_rows][output_tile_cols]
+ );
+
int n_channels(void) const;
private:
@@ -290,9 +298,7 @@ class DepthwiseConvolutionBase : public IDepthwiseConvolution
// Stride information for a convolution instance
int _input_col_stride, _input_row_stride, _input_batch_stride;
- const int _input_ws_col_stride, _input_ws_row_stride;
int _output_col_stride, _output_row_stride, _output_batch_stride;
- const int _output_ws_col_stride, _output_ws_row_stride;
// Methods for getting access to working space
size_t _get_input_working_space_size(void) const;
@@ -352,6 +358,14 @@ class DepthwiseConvolution : public DepthwiseConvolutionBase<
unsigned int out_row_stride,
unsigned int out_col_stride
);
+
+ template <nck::ActivationFunction Activation>
+ void execute_tile(
+ int n_channels,
+ const void* packed_params,
+ const InputType* inptrs[Base::inner_tile_rows][Base::inner_tile_cols],
+ OutputType* outptrs[Base::output_tile_rows][Base::output_tile_cols]
+ );
};
@@ -415,6 +429,14 @@ class DepthwiseConvolution<
unsigned int out_row_stride,
unsigned int out_col_stride
);
+
+ template <nck::ActivationFunction Activation>
+ void execute_tile(
+ int n_channels,
+ const void* packed_params,
+ const float* inptrs[Base::inner_tile_rows][Base::inner_tile_cols],
+ float* outptrs[Base::output_tile_rows][Base::output_tile_cols]
+ );
};
#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
@@ -478,6 +500,15 @@ class DepthwiseConvolution<
unsigned int out_row_stride,
unsigned int out_col_stride
);
+
+ template <nck::ActivationFunction Activation>
+ void execute_tile(
+ int n_channels,
+ const void* packed_params,
+ const float16_t* inptrs[Base::inner_tile_rows][Base::inner_tile_cols],
+ float16_t* outptrs[Base::output_tile_rows][Base::output_tile_cols]
+ );
};
#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+
} // namespace depthwise