aboutsummaryrefslogtreecommitdiff
path: root/src/core/NEON/kernels/NEDepthwiseConvolutionLayer3x3Kernel.cpp
diff options
context:
space:
mode:
authorGeorgios Pinitas <georgios.pinitas@arm.com>2018-09-12 20:11:34 +0100
committerAnthony Barbier <anthony.barbier@arm.com>2018-11-02 16:54:54 +0000
commita799ce0ad775829862891dd98d1232638ec8761e (patch)
tree4b7bb9b080a44aa5cfff67b2ce7177929b22405f /src/core/NEON/kernels/NEDepthwiseConvolutionLayer3x3Kernel.cpp
parentd63dfa2fc61a33b4e675ec6bc7458d8700174134 (diff)
downloadComputeLibrary-a799ce0ad775829862891dd98d1232638ec8761e.tar.gz
COMPMID-1564: Add NEDepthwiseConvolution3x3 for QASYMM8
Change-Id: I1f55508af6f220e5f41df7b56daffb4761ed0591 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/148253 Tested-by: bsgcomp <bsgcomp@arm.com> Reviewed-by: Isabella Gottardi <isabella.gottardi@arm.com>
Diffstat (limited to 'src/core/NEON/kernels/NEDepthwiseConvolutionLayer3x3Kernel.cpp')
-rw-r--r--src/core/NEON/kernels/NEDepthwiseConvolutionLayer3x3Kernel.cpp37
1 files changed, 34 insertions, 3 deletions
diff --git a/src/core/NEON/kernels/NEDepthwiseConvolutionLayer3x3Kernel.cpp b/src/core/NEON/kernels/NEDepthwiseConvolutionLayer3x3Kernel.cpp
index 7029b06615..99bdb7a70e 100644
--- a/src/core/NEON/kernels/NEDepthwiseConvolutionLayer3x3Kernel.cpp
+++ b/src/core/NEON/kernels/NEDepthwiseConvolutionLayer3x3Kernel.cpp
@@ -198,8 +198,10 @@ std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITen
output_shape.set(1, convolver->output_size(output_shape.y(), same_padding)); // Set width
output_shape.set(2, convolver->output_size(output_shape.z(), same_padding)); // Set height
+ const DataType output_dt = (input->data_type() == DataType::QASYMM8) ? DataType::S32 : input->data_type();
+
// Output auto inizialitation if not yet initialized
- auto_init_if_empty(*output, input->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(output_shape));
+ auto_init_if_empty(*output, input->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(output_shape).set_data_type(output_dt));
// Configure window (optimised)
// Set padding in channels
@@ -324,7 +326,7 @@ bool NEDepthwiseConvolutionLayer3x3Kernel::is_optimized_execution_possible(Tenso
}
// Check supported data type
- bool supported_datatype = is_data_type_float(dt);
+ bool supported_datatype = is_data_type_float(dt) || is_data_type_quantized(dt);
// Check for supported strides
const auto &strides = conv_info.stride();
@@ -345,11 +347,15 @@ bool NEDepthwiseConvolutionLayer3x3Kernel::is_optimized_execution_possible(Tenso
void NEDepthwiseConvolutionLayer3x3Kernel::generate_convolver()
{
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(_input, 1, DataType::F16, DataType::F32);
+ ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(_input, 1, DataType::QASYMM8, DataType::F16, DataType::F32);
ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(_input, _weights);
ARM_COMPUTE_ERROR_ON(_weights->info()->dimension(1) != 3 || _weights->info()->dimension(2) != 3);
_convolver = create_convolver_object(_conv_info, _weights, _input, _output, true);
+ if(_convolver)
+ {
+ _convolver->set_offsets(-_input->info()->quantization_info().offset, -_weights->info()->quantization_info().offset);
+ }
}
void NEDepthwiseConvolutionLayer3x3Kernel::configure_generic()
@@ -433,6 +439,31 @@ std::unique_ptr<depthwise::IDepthwiseConvolution> NEDepthwiseConvolutionLayer3x3
const auto stride_x = conv_info.stride().first;
switch(dt)
{
+ case DataType::QASYMM8:
+ {
+ switch(stride_x)
+ {
+ case 1:
+ return arm_compute::support::cpp14::make_unique<DepthwiseConvolution<4, 4, 3, 3, 1, 1, uint8_t, int32_t>>(
+ n_batches, in_rows, in_cols, n_channels, padding_same,
+ reinterpret_cast<const uint8_t *>(w->ptr_to_element(Coordinates())),
+ in->ptr_to_element(Coordinates()),
+ reinterpret_cast<int32_t *>(out->ptr_to_element(Coordinates())), weight_col_stride,
+ weight_row_stride, input_col_stride, input_row_stride, input_batch_stride,
+ output_col_stride, output_row_stride, output_batch_stride);
+ case 2:
+ return arm_compute::support::cpp14::make_unique<DepthwiseConvolution<4, 4, 3, 3, 2, 2, uint8_t, int32_t>>(
+ n_batches, in_rows, in_cols, n_channels, padding_same,
+ reinterpret_cast<const uint8_t *>(w->ptr_to_element(Coordinates())),
+ in->ptr_to_element(Coordinates()),
+ reinterpret_cast<int32_t *>(out->ptr_to_element(Coordinates())), weight_col_stride,
+ weight_row_stride, input_col_stride, input_row_stride, input_batch_stride,
+ output_col_stride, output_row_stride, output_batch_stride);
+ default:
+ return nullptr;
+ }
+ break;
+ }
#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
case DataType::F16:
{