diff options
author | Georgios Pinitas <georgios.pinitas@arm.com> | 2018-09-12 20:11:34 +0100 |
---|---|---|
committer | Anthony Barbier <anthony.barbier@arm.com> | 2018-11-02 16:54:54 +0000 |
commit | a799ce0ad775829862891dd98d1232638ec8761e (patch) | |
tree | 4b7bb9b080a44aa5cfff67b2ce7177929b22405f /src/core/NEON/kernels/NEDepthwiseConvolutionLayer3x3Kernel.cpp | |
parent | d63dfa2fc61a33b4e675ec6bc7458d8700174134 (diff) | |
download | ComputeLibrary-a799ce0ad775829862891dd98d1232638ec8761e.tar.gz |
COMPMID-1564: Add NEDepthwiseConvolution3x3 for QASYMM8
Change-Id: I1f55508af6f220e5f41df7b56daffb4761ed0591
Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/148253
Tested-by: bsgcomp <bsgcomp@arm.com>
Reviewed-by: Isabella Gottardi <isabella.gottardi@arm.com>
Diffstat (limited to 'src/core/NEON/kernels/NEDepthwiseConvolutionLayer3x3Kernel.cpp')
-rw-r--r-- | src/core/NEON/kernels/NEDepthwiseConvolutionLayer3x3Kernel.cpp | 37 |
1 files changed, 34 insertions, 3 deletions
diff --git a/src/core/NEON/kernels/NEDepthwiseConvolutionLayer3x3Kernel.cpp b/src/core/NEON/kernels/NEDepthwiseConvolutionLayer3x3Kernel.cpp index 7029b06615..99bdb7a70e 100644 --- a/src/core/NEON/kernels/NEDepthwiseConvolutionLayer3x3Kernel.cpp +++ b/src/core/NEON/kernels/NEDepthwiseConvolutionLayer3x3Kernel.cpp @@ -198,8 +198,10 @@ std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITen output_shape.set(1, convolver->output_size(output_shape.y(), same_padding)); // Set width output_shape.set(2, convolver->output_size(output_shape.z(), same_padding)); // Set height + const DataType output_dt = (input->data_type() == DataType::QASYMM8) ? DataType::S32 : input->data_type(); + // Output auto inizialitation if not yet initialized - auto_init_if_empty(*output, input->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(output_shape)); + auto_init_if_empty(*output, input->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(output_shape).set_data_type(output_dt)); // Configure window (optimised) // Set padding in channels @@ -324,7 +326,7 @@ bool NEDepthwiseConvolutionLayer3x3Kernel::is_optimized_execution_possible(Tenso } // Check supported data type - bool supported_datatype = is_data_type_float(dt); + bool supported_datatype = is_data_type_float(dt) || is_data_type_quantized(dt); // Check for supported strides const auto &strides = conv_info.stride(); @@ -345,11 +347,15 @@ bool NEDepthwiseConvolutionLayer3x3Kernel::is_optimized_execution_possible(Tenso void NEDepthwiseConvolutionLayer3x3Kernel::generate_convolver() { - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(_input, 1, DataType::F16, DataType::F32); + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(_input, 1, DataType::QASYMM8, DataType::F16, DataType::F32); ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(_input, _weights); ARM_COMPUTE_ERROR_ON(_weights->info()->dimension(1) != 3 || _weights->info()->dimension(2) != 3); _convolver = create_convolver_object(_conv_info, _weights, _input, _output, true); + if(_convolver) + { + _convolver->set_offsets(-_input->info()->quantization_info().offset, -_weights->info()->quantization_info().offset); + } } void NEDepthwiseConvolutionLayer3x3Kernel::configure_generic() @@ -433,6 +439,31 @@ std::unique_ptr<depthwise::IDepthwiseConvolution> NEDepthwiseConvolutionLayer3x3 const auto stride_x = conv_info.stride().first; switch(dt) { + case DataType::QASYMM8: + { + switch(stride_x) + { + case 1: + return arm_compute::support::cpp14::make_unique<DepthwiseConvolution<4, 4, 3, 3, 1, 1, uint8_t, int32_t>>( + n_batches, in_rows, in_cols, n_channels, padding_same, + reinterpret_cast<const uint8_t *>(w->ptr_to_element(Coordinates())), + in->ptr_to_element(Coordinates()), + reinterpret_cast<int32_t *>(out->ptr_to_element(Coordinates())), weight_col_stride, + weight_row_stride, input_col_stride, input_row_stride, input_batch_stride, + output_col_stride, output_row_stride, output_batch_stride); + case 2: + return arm_compute::support::cpp14::make_unique<DepthwiseConvolution<4, 4, 3, 3, 2, 2, uint8_t, int32_t>>( + n_batches, in_rows, in_cols, n_channels, padding_same, + reinterpret_cast<const uint8_t *>(w->ptr_to_element(Coordinates())), + in->ptr_to_element(Coordinates()), + reinterpret_cast<int32_t *>(out->ptr_to_element(Coordinates())), weight_col_stride, + weight_row_stride, input_col_stride, input_row_stride, input_batch_stride, + output_col_stride, output_row_stride, output_batch_stride); + default: + return nullptr; + } + break; + } #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC case DataType::F16: { |