diff options
author | Georgios Pinitas <georgios.pinitas@arm.com> | 2018-03-13 13:08:12 +0000 |
---|---|---|
committer | Anthony Barbier <anthony.barbier@arm.com> | 2018-11-02 16:49:16 +0000 |
commit | be0ae93c50bfa3e588111585025278daa8cb0694 (patch) | |
tree | d78c13e8846c31587a5acb70b38b13fa7d03200d /src/core/NEON/kernels/NEDepthwiseConvolutionLayer3x3Kernel.cpp | |
parent | ae4ce7b411d0f4809ac7d3d90fe89bdb2520dbf6 (diff) | |
download | ComputeLibrary-be0ae93c50bfa3e588111585025278daa8cb0694.tar.gz |
COMPMID-1005: Update Depthwise Convolution form RSH
Change-Id: I3033ddb8de183661010d6c71a83f71132037b139
Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/124338
Tested-by: Jenkins <bsgcomp@arm.com>
Reviewed-by: Pablo Tello <pablo.tello@arm.com>
Diffstat (limited to 'src/core/NEON/kernels/NEDepthwiseConvolutionLayer3x3Kernel.cpp')
-rw-r--r-- | src/core/NEON/kernels/NEDepthwiseConvolutionLayer3x3Kernel.cpp | 66 |
1 files changed, 44 insertions, 22 deletions
diff --git a/src/core/NEON/kernels/NEDepthwiseConvolutionLayer3x3Kernel.cpp b/src/core/NEON/kernels/NEDepthwiseConvolutionLayer3x3Kernel.cpp index f5ee608b60..49c67d19bb 100644 --- a/src/core/NEON/kernels/NEDepthwiseConvolutionLayer3x3Kernel.cpp +++ b/src/core/NEON/kernels/NEDepthwiseConvolutionLayer3x3Kernel.cpp @@ -219,8 +219,7 @@ void NEDepthwiseConvolutionLayer3x3Kernel::generate_convolver() ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(_input, _weights); ARM_COMPUTE_ERROR_ON(_weights->info()->dimension(1) != 3 || _weights->info()->dimension(2) != 3); - _convolver = create_convolver_object(_input->info()->tensor_shape(), _conv_info, - _weights->buffer(), _input->buffer(), _output->buffer()); + _convolver = create_convolver_object(_conv_info, _weights, _input, _output, true); } void NEDepthwiseConvolutionLayer3x3Kernel::configure_generic() @@ -282,8 +281,7 @@ void NEDepthwiseConvolutionLayer3x3Kernel::configure_optimized() ARM_COMPUTE_ERROR_ON(_weights->info()->dimension(1) != 3 || _weights->info()->dimension(2) != 3); _border_size = BorderSize(0, 0); - _convolver = create_convolver_object(_input->info()->tensor_shape(), _conv_info, - _weights->buffer(), _input->buffer(), _output->buffer()); + _convolver = create_convolver_object(_conv_info, _weights, _input, _output); // Auto-configure output bool same_padding = _conv_info.has_padding(); @@ -296,6 +294,15 @@ void NEDepthwiseConvolutionLayer3x3Kernel::configure_optimized() auto_init_if_empty(*_output->info(), _input->info()->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(output_shape)); + // Set padding in channels + const int num_channels = _weights->info()->dimension(0); + if((num_channels >= 128) && (num_channels % 16 == 0)) + { + _input->info()->extend_padding(PaddingSize(0, 4, 0, 0)); + _weights->info()->extend_padding(PaddingSize(0, 4, 0, 0)); + _output->info()->extend_padding(PaddingSize(0, 4, 0, 0)); + } + // Configure window Window win; auto win_last = _convolver->get_window(); @@ -330,41 +337,56 @@ void NEDepthwiseConvolutionLayer3x3Kernel::run_optimized(const Window &window, c _convolver->run(start, end); } -std::unique_ptr<depthwise::IDepthwiseConvolution> NEDepthwiseConvolutionLayer3x3Kernel::create_convolver_object(TensorShape shape, - PadStrideInfo conv_info, - const uint8_t *w_ptr, - uint8_t *in_ptr, - uint8_t *out_ptr) +std::unique_ptr<depthwise::IDepthwiseConvolution> NEDepthwiseConvolutionLayer3x3Kernel::create_convolver_object(PadStrideInfo conv_info, + const ITensor *w, + const ITensor *in, + ITensor *out, + bool setup_strides) { - const int in_rows = shape.z(); - const int in_cols = shape.y(); - const int n_batches = shape[3]; - const int n_channels = shape.x(); - const bool padding_same = conv_info.has_padding(); + const TensorShape shape = in->info()->tensor_shape(); + const int in_rows = shape.z(); + const int in_cols = shape.y(); + const int n_batches = shape[3]; + const int n_channels = shape.x(); + const bool padding_same = conv_info.has_padding(); + const int weight_col_stride = (setup_strides) ? w->info()->strides_in_bytes().y() / w->info()->element_size() : 0; + const int weight_row_stride = (setup_strides) ? w->info()->strides_in_bytes().z() / w->info()->element_size() : 0; + const int input_col_stride = (setup_strides) ? in->info()->strides_in_bytes().y() / in->info()->element_size() : 0; + const int input_row_stride = (setup_strides) ? in->info()->strides_in_bytes().z() / in->info()->element_size() : 0; + const int input_batch_stride = (setup_strides) ? in->info()->strides_in_bytes()[3] / in->info()->element_size() : 0; + const int output_col_stride = (setup_strides) ? out->info()->strides_in_bytes().y() / out->info()->element_size() : 0; + const int output_row_stride = (setup_strides) ? out->info()->strides_in_bytes().z() / out->info()->element_size() : 0; + const int output_batch_stride = (setup_strides) ? out->info()->strides_in_bytes()[3] / out->info()->element_size() : 0; const auto stride_x = conv_info.stride().first; switch(stride_x) { case 1: - return arm_compute::support::cpp14::make_unique<DepthwiseConvolution<2, 2, 3, 3, 1, 1, float, float>>( + return arm_compute::support::cpp14::make_unique<DepthwiseConvolution<4, 4, 3, 3, 1, 1, float, float>>( n_batches, in_rows, in_cols, n_channels, padding_same, - reinterpret_cast<const float *>(w_ptr), - reinterpret_cast<float *>(in_ptr), - reinterpret_cast<float *>(out_ptr)); + reinterpret_cast<const float *>(w->ptr_to_element(Coordinates())), + reinterpret_cast<float *>(in->ptr_to_element(Coordinates())), + reinterpret_cast<float *>(out->ptr_to_element(Coordinates())), + weight_col_stride, weight_row_stride, + input_col_stride, input_row_stride, input_batch_stride, + output_col_stride, output_row_stride, output_batch_stride); case 2: - return arm_compute::support::cpp14::make_unique<DepthwiseConvolution<2, 2, 3, 3, 2, 2, float, float>>( + return arm_compute::support::cpp14::make_unique<DepthwiseConvolution<3, 3, 3, 3, 2, 2, float, float>>( n_batches, in_rows, in_cols, n_channels, padding_same, - reinterpret_cast<const float *>(w_ptr), - reinterpret_cast<float *>(in_ptr), - reinterpret_cast<float *>(out_ptr)); + reinterpret_cast<const float *>(w->ptr_to_element(Coordinates())), + reinterpret_cast<float *>(in->ptr_to_element(Coordinates())), + reinterpret_cast<float *>(out->ptr_to_element(Coordinates())), + weight_col_stride, weight_row_stride, + input_col_stride, input_row_stride, input_batch_stride, + output_col_stride, output_row_stride, output_batch_stride); default: return nullptr; } |