aboutsummaryrefslogtreecommitdiff
path: root/arm_compute/core/NEON/kernels
diff options
context:
space:
mode:
authorGeorgios Pinitas <georgios.pinitas@arm.com>2018-09-12 20:11:34 +0100
committerAnthony Barbier <anthony.barbier@arm.com>2018-11-02 16:54:54 +0000
commita799ce0ad775829862891dd98d1232638ec8761e (patch)
tree4b7bb9b080a44aa5cfff67b2ce7177929b22405f /arm_compute/core/NEON/kernels
parentd63dfa2fc61a33b4e675ec6bc7458d8700174134 (diff)
downloadComputeLibrary-a799ce0ad775829862891dd98d1232638ec8761e.tar.gz
COMPMID-1564: Add NEDepthwiseConvolution3x3 for QASYMM8
Change-Id: I1f55508af6f220e5f41df7b56daffb4761ed0591 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/148253 Tested-by: bsgcomp <bsgcomp@arm.com> Reviewed-by: Isabella Gottardi <isabella.gottardi@arm.com>
Diffstat (limited to 'arm_compute/core/NEON/kernels')
-rw-r--r--arm_compute/core/NEON/kernels/convolution/depthwise/depthwise.hpp17
-rw-r--r--arm_compute/core/NEON/kernels/convolution/depthwise/impl_base.hpp26
2 files changed, 35 insertions, 8 deletions
diff --git a/arm_compute/core/NEON/kernels/convolution/depthwise/depthwise.hpp b/arm_compute/core/NEON/kernels/convolution/depthwise/depthwise.hpp
index 4ca68116db..472c44f97a 100644
--- a/arm_compute/core/NEON/kernels/convolution/depthwise/depthwise.hpp
+++ b/arm_compute/core/NEON/kernels/convolution/depthwise/depthwise.hpp
@@ -33,6 +33,7 @@ class IDepthwiseConvolution
virtual ~IDepthwiseConvolution() = default;
virtual int output_size(const int dim_size, const bool padding_same) const = 0;
virtual unsigned int get_window(void) const = 0;
+ virtual void set_offsets(int input_offset, int weights_offset) = 0;
virtual void run(const unsigned int start, const unsigned int stop) = 0;
};
@@ -179,6 +180,13 @@ class DepthwiseConvolution : public IDepthwiseConvolution
>::get_output_size(dim_size, padding_same);
}
+ /** Sets quantization offsets
+ *
+ * @param[in] input_offset Input offset
+ * @param[in] weights_offset Weights offset
+ */
+ void set_offsets(int input_offset, int weights_offset) override;
+
/** Get the window of work to be performed by an instance of the operator.
*/
unsigned int get_window(void) const override;
@@ -212,7 +220,9 @@ class DepthwiseConvolution : public IDepthwiseConvolution
const int row_pad_out_bottom,
const int n_tiles,
const int n_input_cols,
- const int n_output_cols
+ const int n_output_cols,
+ const int input_offset,
+ const int weights_offset
);
// Determine the maximum (and minimum) padding values which can be applied
@@ -272,7 +282,9 @@ class DepthwiseConvolution : public IDepthwiseConvolution
const int _in_pad_bottom,
const int _in_pad_right,
const int _out_pad_bottom,
- const int _out_pad_right
+ const int _out_pad_right,
+ const int _input_offset,
+ const int _weights_offset
);
/* Arrays of methods to process tensor tiles.
@@ -300,6 +312,7 @@ class DepthwiseConvolution : public IDepthwiseConvolution
const int _weight_col_stride, _weight_row_stride;
const int _input_col_stride, _input_row_stride, _input_batch_stride;
const int _output_col_stride, _output_row_stride, _output_batch_stride;
+ int _input_offset, _weights_offset;
};
} // namespace depthwise
diff --git a/arm_compute/core/NEON/kernels/convolution/depthwise/impl_base.hpp b/arm_compute/core/NEON/kernels/convolution/depthwise/impl_base.hpp
index 17889849db..e262817a3c 100644
--- a/arm_compute/core/NEON/kernels/convolution/depthwise/impl_base.hpp
+++ b/arm_compute/core/NEON/kernels/convolution/depthwise/impl_base.hpp
@@ -82,7 +82,8 @@ DepthwiseConvolution<OTR, OTC, KR, KC, SR, SC, TIn, TOut>::DepthwiseConvolution(
_input_batch_stride(input_batch_stride ? input_batch_stride : _n_input_rows * _input_row_stride),
_output_col_stride(output_col_stride ? output_col_stride : _n_channels),
_output_row_stride(output_row_stride ? output_row_stride : _n_output_cols * _output_col_stride),
- _output_batch_stride(output_batch_stride ? output_batch_stride : _n_output_rows * _output_row_stride)
+ _output_batch_stride(output_batch_stride ? output_batch_stride : _n_output_rows * _output_row_stride),
+ _input_offset(0), _weights_offset(0)
{
}
@@ -94,6 +95,12 @@ unsigned int DepthwiseConvolution<OTR, OTC, KR, KC, SR, SC, TIn, TOut>::get_wind
return iceildiv(_n_channels, CHANNEL_BLOCK);
}
+template <int OTR, int OTC, int KR, int KC, int SR, int SC, typename TIn, typename TOut>
+void DepthwiseConvolution<OTR, OTC, KR, KC, SR, SC, TIn, TOut>::set_offsets(int input_offset, int weights_offset)
+{
+ _input_offset = input_offset;
+ _weights_offset = weights_offset;
+}
template <int OTR, int OTC, int KR, int KC, int SR, int SC, typename TIn, typename TOut>
void DepthwiseConvolution<OTR, OTC, KR, KC, SR, SC, TIn, TOut>::run(
@@ -145,7 +152,8 @@ void DepthwiseConvolution<OTR, OTC, KR, KC, SR, SC, TIn, TOut>::run(
outptr_row + start_channel, _output_row_stride, _output_col_stride,
input_row_pad_top, input_pad_left, input_row_pad_bottom,
output_row_pad_bottom,
- _n_tile_cols, _n_input_cols, _n_output_cols
+ _n_tile_cols, _n_input_cols, _n_output_cols,
+ _input_offset, _weights_offset
);
}
}
@@ -170,7 +178,9 @@ void DepthwiseConvolution<OTR, OTC, KR, KC, SR, SC, TIn, TOut>::process_tile_row
const int row_pad_out_bottom,
const int n_tiles,
const int n_input_cols,
- const int n_output_cols
+ const int n_output_cols,
+ const int input_offset,
+ const int weights_offset
)
{
constexpr int tile_overlap = kernel_cols - stride_cols;
@@ -242,7 +252,7 @@ void DepthwiseConvolution<OTR, OTC, KR, KC, SR, SC, TIn, TOut>::process_tile_row
inptr_col, in_row_stride, in_col_stride,
outptr_col, out_row_stride, out_col_stride,
row_pad_in_top, t_pad_in_left, row_pad_in_bottom, t_pad_in_right,
- row_pad_out_bottom, t_pad_out_right
+ row_pad_out_bottom, t_pad_out_right, input_offset, weights_offset
);
}
}
@@ -313,7 +323,9 @@ struct DepthwiseConvolutionImpl : public DepthwiseConvolution<
const int in_pad_bottom=0,
const int in_pad_right=0,
const int out_pad_bottom=0,
- const int out_pad_right=0
+ const int out_pad_right=0,
+ const int input_offset=0,
+ const int weights_offset=0
);
};
@@ -340,7 +352,9 @@ void DepthwiseConvolutionImpl<OTR, OTC, KR, KC, SR, SC, TIn, TOut>::process_tile
const int _in_pad_bottom,
const int _in_pad_right,
const int _out_pad_bottom,
- const int _out_pad_right
+ const int _out_pad_right,
+ const int _input_offset,
+ const int _weights_offset
)
{
constexpr auto inner_tile_rows = DWC::inner_tile_rows;