diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/core/NEON/kernels/NEDirectConvolutionLayerKernel.cpp | 11 | ||||
-rw-r--r-- | src/core/NEON/kernels/NEDirectConvolutionLayerKernel.h | 1 | ||||
-rw-r--r-- | src/core/cpu/kernels/pooling/neon/fp16.cpp | 4 | ||||
-rw-r--r-- | src/core/cpu/kernels/pooling/neon/fp32.cpp | 4 | ||||
-rw-r--r-- | src/core/cpu/kernels/pooling/neon/list.h | 4 | ||||
-rw-r--r-- | src/core/cpu/kernels/pooling/neon/nchw/all.cpp | 2 | ||||
-rw-r--r-- | src/core/cpu/kernels/scale/sve/qasymm8.cpp | 7 | ||||
-rw-r--r-- | src/core/cpu/kernels/scale/sve/qasymm8_signed.cpp | 7 | ||||
-rw-r--r-- | src/runtime/CL/functions/CLDepthwiseConvolutionLayer.cpp | 6 | ||||
-rw-r--r-- | src/runtime/NEON/functions/NEWinogradConvolutionLayer.cpp | 24 |
10 files changed, 34 insertions, 36 deletions
diff --git a/src/core/NEON/kernels/NEDirectConvolutionLayerKernel.cpp b/src/core/NEON/kernels/NEDirectConvolutionLayerKernel.cpp index 09f99748bf..98b76c7db3 100644 --- a/src/core/NEON/kernels/NEDirectConvolutionLayerKernel.cpp +++ b/src/core/NEON/kernels/NEDirectConvolutionLayerKernel.cpp @@ -1217,7 +1217,7 @@ void NEDirectConvolutionLayerKernel::convolve_nhwc(const Window &window) NEDirectConvolutionLayerKernel::NEDirectConvolutionLayerKernel() : _input(nullptr), _weights(nullptr), _output(nullptr), _conv_info(), _border_size(0), _kernel_size(0), _num_weight_elems_read_per_row(0), _num_elems_read_per_iteration(0), - _num_elems_written_per_iteration(0) + _num_elems_written_per_iteration(0), _data_layout() { } @@ -1234,13 +1234,14 @@ void NEDirectConvolutionLayerKernel::configure(const ITensor *input, const ITens _weights = weights; _output = output; _conv_info = conv_info; - _kernel_size = weights->info()->dimension(get_data_layout_dimension_index(weights->info()->data_layout(), DataLayoutDimension::WIDTH)); + _data_layout = _input->info()->data_layout(); + _kernel_size = weights->info()->dimension(get_data_layout_dimension_index(_data_layout, DataLayoutDimension::WIDTH)); const unsigned int conv_pad_left = conv_info.pad_left(); const unsigned int conv_pad_top = conv_info.pad_top(); const unsigned int conv_pad_right = conv_info.pad_right(); const unsigned int conv_pad_bottom = conv_info.pad_bottom(); - if(_input->info()->data_layout() == DataLayout::NCHW) + if(_data_layout == DataLayout::NCHW) { _border_size = BorderSize(conv_pad_top, conv_pad_right, conv_pad_bottom, conv_pad_left); } @@ -1294,9 +1295,9 @@ void NEDirectConvolutionLayerKernel::run(const Window &window, const ThreadInfo ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window); ARM_COMPUTE_ERROR_ON(_input->buffer() == nullptr); - const int kernel_size = _weights->info()->dimension(get_data_layout_dimension_index(_weights->info()->data_layout(), DataLayoutDimension::WIDTH)); + const int kernel_size = _weights->info()->dimension(get_data_layout_dimension_index(_data_layout, DataLayoutDimension::WIDTH)); - if(_input->info()->data_layout() == DataLayout::NCHW) + if(_data_layout == DataLayout::NCHW) { switch(kernel_size) { diff --git a/src/core/NEON/kernels/NEDirectConvolutionLayerKernel.h b/src/core/NEON/kernels/NEDirectConvolutionLayerKernel.h index 258def77a3..259eb683f6 100644 --- a/src/core/NEON/kernels/NEDirectConvolutionLayerKernel.h +++ b/src/core/NEON/kernels/NEDirectConvolutionLayerKernel.h @@ -103,6 +103,7 @@ private: unsigned int _num_weight_elems_read_per_row; unsigned int _num_elems_read_per_iteration; unsigned int _num_elems_written_per_iteration; + DataLayout _data_layout; }; } // namespace arm_compute #endif /*ARM_COMPUTE_NEDIRECTCONVOLUTIONLAYERKERNEL_H */ diff --git a/src/core/cpu/kernels/pooling/neon/fp16.cpp b/src/core/cpu/kernels/pooling/neon/fp16.cpp index 314be3704e..1ecceafe86 100644 --- a/src/core/cpu/kernels/pooling/neon/fp16.cpp +++ b/src/core/cpu/kernels/pooling/neon/fp16.cpp @@ -93,7 +93,7 @@ void pooling2_f16_maxpool_indices(const ITensor *src, ITensor *dst0, ITensor *ds // Store result vst1q_f16(reinterpret_cast<float16_t *>(out.ptr()) + x_off, vres); - const uint32_t offset_base = offset_no_padding<float16_t>(in.offset(), id, *src->info(), pool_stride_x, pool_stride_y); + const uint32_t offset_base = offset_no_padding<float16_t>(in.offset(), id, *src->info(), pool_stride_x, pool_stride_y, DataLayout::NHWC); const uint32_t offset_x0 = (uint32_t)offset_base / sizeof(float16_t) + x_off; const uint32_t offset_x1 = (uint32_t)offset_x0 + in_stride_y / sizeof(float16_t) - pad_right; const uint32_t offset_x2 = (uint32_t)offset_x0 + in_stride_z / sizeof(float16_t) - pad_right * src->info()->tensor_shape()[1]; @@ -132,7 +132,7 @@ void pooling2_f16_maxpool_indices(const ITensor *src, ITensor *dst0, ITensor *ds // Store result *(reinterpret_cast<float16_t *>(out.ptr()) + x_off) = res; - const uint32_t offset_base = offset_no_padding<float16_t>(in.offset(), id, *src->info(), pool_stride_x, pool_stride_y); + const uint32_t offset_base = offset_no_padding<float16_t>(in.offset(), id, *src->info(), pool_stride_x, pool_stride_y, DataLayout::NHWC); const uint32_t offset_x0 = (uint32_t)offset_base / sizeof(float16_t) + x_off; const uint32_t offset_x1 = (uint32_t)offset_x0 + in_stride_y / sizeof(float16_t) - pad_right; const uint32_t offset_x2 = (uint32_t)offset_x0 + in_stride_z / sizeof(float16_t) - pad_right * src->info()->tensor_shape()[1]; diff --git a/src/core/cpu/kernels/pooling/neon/fp32.cpp b/src/core/cpu/kernels/pooling/neon/fp32.cpp index e319047d76..a2bd4a6bb3 100644 --- a/src/core/cpu/kernels/pooling/neon/fp32.cpp +++ b/src/core/cpu/kernels/pooling/neon/fp32.cpp @@ -95,7 +95,7 @@ void pooling2_f32_maxpool_indices(const ITensor *src, ITensor *dst0, ITensor *ds // Store result vst1q_f32(reinterpret_cast<float *>(out.ptr()) + x_off, vres); - const uint32_t offset_base = offset_no_padding<float>(in.offset(), id, *src->info(), pool_stride_x, pool_stride_y); + const uint32_t offset_base = offset_no_padding<float>(in.offset(), id, *src->info(), pool_stride_x, pool_stride_y, DataLayout::NHWC); const uint32_t offset_x0 = (uint32_t)offset_base / sizeof(float) + x_off; const uint32_t offset_x1 = (uint32_t)offset_x0 + in_stride_y / sizeof(float) - pad_right; const uint32_t offset_x2 = (uint32_t)offset_x0 + in_stride_z / sizeof(float) - pad_right * src->info()->tensor_shape()[1]; @@ -124,7 +124,7 @@ void pooling2_f32_maxpool_indices(const ITensor *src, ITensor *dst0, ITensor *ds // Store result *(reinterpret_cast<float *>(out.ptr()) + x_off) = res; - const uint32_t offset_base = offset_no_padding<float>(in.offset(), id, *src->info(), pool_stride_x, pool_stride_y); + const uint32_t offset_base = offset_no_padding<float>(in.offset(), id, *src->info(), pool_stride_x, pool_stride_y, DataLayout::NHWC); const uint32_t offset_x0 = (uint32_t)offset_base / sizeof(float) + x_off; const uint32_t offset_x1 = (uint32_t)offset_x0 + in_stride_y / sizeof(float) - pad_right; const uint32_t offset_x2 = (uint32_t)offset_x0 + in_stride_z / sizeof(float) - pad_right * src->info()->tensor_shape()[1]; diff --git a/src/core/cpu/kernels/pooling/neon/list.h b/src/core/cpu/kernels/pooling/neon/list.h index 3435ee6724..bec1536f61 100644 --- a/src/core/cpu/kernels/pooling/neon/list.h +++ b/src/core/cpu/kernels/pooling/neon/list.h @@ -59,7 +59,7 @@ DECLARE_POOLING_KERNEL(poolingMxN_fp32_neon_nchw); #undef DECLARE_POOLING_KERNEL template <typename T> -inline uint32_t offset_no_padding(uint32_t padded_offset, const Coordinates &id, const ITensorInfo &info, int pool_stride_x, int pool_stride_y) +inline uint32_t offset_no_padding(uint32_t padded_offset, const Coordinates &id, const ITensorInfo &info, int pool_stride_x, int pool_stride_y, DataLayout data_layout) { const int pad_left = info.padding().left; const int pad_right = info.padding().right; @@ -70,7 +70,7 @@ inline uint32_t offset_no_padding(uint32_t padded_offset, const Coordinates &id, const int pad_horiz = pad_left + pad_right; const int pad_vert = pad_top + pad_bottom; - if(info.data_layout() == DataLayout::NCHW) + if(data_layout == DataLayout::NCHW) { const uint32_t offset_base = padded_offset - sizeof(T) * pad_horiz * id.y() * pool_stride_y /* subtract padding elems per row */ diff --git a/src/core/cpu/kernels/pooling/neon/nchw/all.cpp b/src/core/cpu/kernels/pooling/neon/nchw/all.cpp index 47ac7b4f7f..80eac684aa 100644 --- a/src/core/cpu/kernels/pooling/neon/nchw/all.cpp +++ b/src/core/cpu/kernels/pooling/neon/nchw/all.cpp @@ -150,7 +150,7 @@ void pooling2_nchw_maxpool_indices(const ITensor *src, ITensor *dst0, ITensor *d *(reinterpret_cast<T *>(out.ptr())) = static_cast<T>(vget_lane_f32(max_data, 0)); // Calculate max data indice, which will be used in max unpool. - const uint32_t offset_base = offset_no_padding<T>(in.offset(), id, *src->info(), pool_stride_x, pool_stride_y); + const uint32_t offset_base = offset_no_padding<T>(in.offset(), id, *src->info(), pool_stride_x, pool_stride_y, DataLayout::NCHW); const uint32_t offset_top = (uint32_t)(offset_base / sizeof(T)); const uint32_t offset_bottom = offset_top + in_stride_y / sizeof(T) - pad_right - pad_left; const uint32x2_t voffset_top = { offset_top, offset_top + 1u }; diff --git a/src/core/cpu/kernels/scale/sve/qasymm8.cpp b/src/core/cpu/kernels/scale/sve/qasymm8.cpp index c475ad615c..c041f14b22 100644 --- a/src/core/cpu/kernels/scale/sve/qasymm8.cpp +++ b/src/core/cpu/kernels/scale/sve/qasymm8.cpp @@ -89,10 +89,9 @@ void qasymm8_sve_scale_bilinear(const ITensor *src, ITensor *dst, const ITensor BorderMode border_mode, PixelValue constant_border_value, float sampling_offset, bool align_corners, const Window &window) { - // Get data layout and width/height indices - const DataLayout data_layout = src->info()->data_layout(); - const int idx_width = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH); - const int idx_height = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT); + // Data layout is NHWC + const int idx_width = 1; + const int idx_height = 2; // Compute the ratio between source height and destination height const auto hr = scale_utils::calculate_resize_ratio(src->info()->dimension(idx_height), dst->info()->dimension(idx_height), align_corners); diff --git a/src/core/cpu/kernels/scale/sve/qasymm8_signed.cpp b/src/core/cpu/kernels/scale/sve/qasymm8_signed.cpp index b39b75abba..9df4301fe3 100644 --- a/src/core/cpu/kernels/scale/sve/qasymm8_signed.cpp +++ b/src/core/cpu/kernels/scale/sve/qasymm8_signed.cpp @@ -89,10 +89,9 @@ void qasymm8_signed_sve_scale_bilinear(const ITensor *src, ITensor *dst, const I BorderMode border_mode, PixelValue constant_border_value, float sampling_offset, bool align_corners, const Window &window) { - // Get data layout and width/height indices - const DataLayout data_layout = src->info()->data_layout(); - const int idx_width = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH); - const int idx_height = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT); + // Data layout is NHWC + const int idx_width = 1; + const int idx_height = 2; // Compute the ratio between source height and destination height const auto hr = scale_utils::calculate_resize_ratio(src->info()->dimension(idx_height), dst->info()->dimension(idx_height), align_corners); diff --git a/src/runtime/CL/functions/CLDepthwiseConvolutionLayer.cpp b/src/runtime/CL/functions/CLDepthwiseConvolutionLayer.cpp index 8d2c81bc15..5ed8aa98c9 100644 --- a/src/runtime/CL/functions/CLDepthwiseConvolutionLayer.cpp +++ b/src/runtime/CL/functions/CLDepthwiseConvolutionLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020 Arm Limited. + * Copyright (c) 2017-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -324,7 +324,7 @@ void CLDepthwiseConvolutionLayer::CLDepthwiseConvolutionLayerGeneric::prepare() { _output_multipliers.map(); _output_shifts.map(); - const unsigned int idx_ofms = get_data_layout_dimension_index(_output->info()->data_layout(), DataLayoutDimension::CHANNEL); + const unsigned int idx_ofms = _needs_permute ? 2 : 0; quantization::compute_quantized_multipliers_and_shifts(_input->info(), _original_weights->info(), _output->info(), @@ -529,7 +529,7 @@ void CLDepthwiseConvolutionLayer::CLDepthwiseConvolutionLayerInternal3x3::prepar { _output_multipliers.map(); _output_shifts.map(); - const unsigned int idx_ofms = get_data_layout_dimension_index(_output->info()->data_layout(), DataLayoutDimension::CHANNEL); + const unsigned int idx_ofms = _needs_permute ? 2 : 0; quantization::compute_quantized_multipliers_and_shifts(_input->info(), _original_weights->info(), _output->info(), diff --git a/src/runtime/NEON/functions/NEWinogradConvolutionLayer.cpp b/src/runtime/NEON/functions/NEWinogradConvolutionLayer.cpp index dc3bbbe562..941cb21e5e 100644 --- a/src/runtime/NEON/functions/NEWinogradConvolutionLayer.cpp +++ b/src/runtime/NEON/functions/NEWinogradConvolutionLayer.cpp @@ -303,7 +303,7 @@ arm_gemm::Activation arm_gemm_activation_from_acl_activation(const ActivationLay NEWinogradConvolutionLayer::NEWinogradConvolutionLayer(const std::shared_ptr<IMemoryManager> &memory_manager) : _memory_group(memory_manager), _gemm_function(memory_manager), _transform_input_kernel(nullptr), _transform_output_kernel(nullptr), _transform_weights_kernel(nullptr), _activationlayer_function(), _permute_input(), _permute_weights(), _permute_output(), _input_transformed(), _output_transformed(), _input_workspace(), _output_workspace(), _kernel_storage(), _input_nhwc(), _output_nhwc(), - _weights_hwio(), _input(), _weights(), _output(), _is_prepared(false), _is_activationlayer_enabled(false) + _weights_hwio(), _input(), _weights(), _output(), _is_prepared(false), _is_activationlayer_enabled(false), _data_layout() { } @@ -314,10 +314,10 @@ void NEWinogradConvolutionLayer::configure(const ITensor *input, const ITensor * ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), weights->info(), (biases != nullptr) ? biases->info() : nullptr, output->info(), conv_info)); // Get indices for the width and height - const DataLayout data_layout = input->info()->data_layout(); - const unsigned int width_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH); - const unsigned int height_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT); - const unsigned int channel_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::CHANNEL); + _data_layout = input->info()->data_layout(); + const unsigned int width_idx = get_data_layout_dimension_index(_data_layout, DataLayoutDimension::WIDTH); + const unsigned int height_idx = get_data_layout_dimension_index(_data_layout, DataLayoutDimension::HEIGHT); + const unsigned int channel_idx = get_data_layout_dimension_index(_data_layout, DataLayoutDimension::CHANNEL); const Size2D input_dims = Size2D(input->info()->dimension(width_idx), input->info()->dimension(height_idx)); const Size2D kernel_size = Size2D(weights->info()->dimension(width_idx), weights->info()->dimension(height_idx)); @@ -537,7 +537,7 @@ void NEWinogradConvolutionLayer::configure(const ITensor *input, const ITensor * const unsigned int max_num_threads = NEScheduler::get().num_threads(); // Configure the kernel to transform the input tensor from NCHW -> NHWC - if(data_layout == DataLayout::NCHW) + if(_data_layout == DataLayout::NCHW) { _memory_group.manage(&_input_nhwc); _permute_input.configure(input, &_input_nhwc, PermutationVector(2U, 0U, 1U)); @@ -554,7 +554,7 @@ void NEWinogradConvolutionLayer::configure(const ITensor *input, const ITensor * TensorInfo input_workspace_info(TensorShape(input_workspace_size), 1, _input->info()->data_type()); _input_workspace.allocator()->init(input_workspace_info); _input_workspace.allocator()->allocate(); - if(data_layout == DataLayout::NCHW) + if(_data_layout == DataLayout::NCHW) { _input_nhwc.allocator()->allocate(); } @@ -570,7 +570,7 @@ void NEWinogradConvolutionLayer::configure(const ITensor *input, const ITensor * // Configure output transform function // The biases tensor has not been allocated at this point in time, the output transform will add the biases to the final result in the run() method - if(data_layout == DataLayout::NCHW) + if(_data_layout == DataLayout::NCHW) { _memory_group.manage(&_output_nhwc); output_to_use = &_output_nhwc; @@ -595,7 +595,7 @@ void NEWinogradConvolutionLayer::configure(const ITensor *input, const ITensor * _output_transformed.allocator()->allocate(); // Reorder the convoluted output to ACL's ordering NCHW - if(data_layout == DataLayout::NCHW) + if(_data_layout == DataLayout::NCHW) { _permute_output.configure(&_output_nhwc, _output, PermutationVector(1U, 2U, 0U)); _output_nhwc.allocator()->allocate(); @@ -615,13 +615,11 @@ void NEWinogradConvolutionLayer::configure(const ITensor *input, const ITensor * void NEWinogradConvolutionLayer::run() { - const DataLayout data_layout = _input->info()->data_layout(); - prepare(); MemoryGroupResourceScope scope_mg(_memory_group); - if(data_layout == DataLayout::NCHW) + if(_data_layout == DataLayout::NCHW) { //Bring channels to the front as Winograd code expects the tensor to be in the format NHWC _permute_input.run(); @@ -636,7 +634,7 @@ void NEWinogradConvolutionLayer::run() // Transform output tensor to the spatial domain NEScheduler::get().schedule(_transform_output_kernel.get(), Window::DimX); - if(data_layout == DataLayout::NCHW) + if(_data_layout == DataLayout::NCHW) { // Reorder the convoluted output to ACL's ordering NCHW _permute_output.run(); |