aboutsummaryrefslogtreecommitdiff
path: root/src/cpu/kernels/CpuIm2ColKernel.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/cpu/kernels/CpuIm2ColKernel.cpp')
-rw-r--r--src/cpu/kernels/CpuIm2ColKernel.cpp288
1 files changed, 149 insertions, 139 deletions
diff --git a/src/cpu/kernels/CpuIm2ColKernel.cpp b/src/cpu/kernels/CpuIm2ColKernel.cpp
index 9ac291549b..55ac7c5192 100644
--- a/src/cpu/kernels/CpuIm2ColKernel.cpp
+++ b/src/cpu/kernels/CpuIm2ColKernel.cpp
@@ -29,13 +29,13 @@
#include "arm_compute/core/Size2D.h"
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Types.h"
+#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "arm_compute/core/Validate.h"
+
#include "src/core/CPP/Validate.h"
#include "src/core/helpers/AutoConfiguration.h"
#include "src/core/helpers/WindowHelpers.h"
-#include "arm_compute/core/utils/misc/ShapeCalculator.h"
-
#include <arm_neon.h>
#include <cstddef>
#include <cstdint>
@@ -51,26 +51,34 @@ namespace kernels
{
namespace
{
-Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info,
- bool has_bias, const Size2D &dilation, unsigned int num_groups, unsigned int input_pad_right)
+Status validate_arguments(const ITensorInfo *input,
+ const ITensorInfo *output,
+ const Size2D &kernel_dims,
+ const PadStrideInfo &conv_info,
+ bool has_bias,
+ const Size2D &dilation,
+ unsigned int num_groups,
+ unsigned int input_pad_right)
{
ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input);
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(output);
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::BFLOAT16, DataType::F16, DataType::F32);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED,
+ DataType::BFLOAT16, DataType::F16, DataType::F32);
ARM_COMPUTE_RETURN_ERROR_ON(is_data_type_quantized(input->data_type()) && has_bias);
ARM_COMPUTE_RETURN_ERROR_ON((dilation.x() < 1) || (dilation.y() < 1));
ARM_COMPUTE_RETURN_ERROR_ON_MSG(num_groups > 1, "Number of groups greater than one are not supported on Neon");
// Since there's no implicit padding added, check the total input spatial dimensions (with conv paddings) are big enough for the kernel dimensions
- const unsigned int width_idx = get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::WIDTH);
- const unsigned int height_idx = get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::HEIGHT);
- const unsigned total_width = input->dimension(width_idx) + conv_info.pad_left() + conv_info.pad_right();
+ const unsigned int width_idx = get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::WIDTH);
+ const unsigned int height_idx = get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::HEIGHT);
+ const unsigned total_width = input->dimension(width_idx) + conv_info.pad_left() + conv_info.pad_right();
const unsigned total_height = input->dimension(height_idx) + conv_info.pad_top() + conv_info.pad_bottom();
ARM_COMPUTE_RETURN_ERROR_ON((total_width < kernel_dims.width) || (total_height < kernel_dims.height));
- if(output->total_size() > 0)
+ if (output->total_size() > 0)
{
- TensorInfo expected_output = output->clone()->set_tensor_shape(compute_im2col_conv_shape(input, kernel_dims, conv_info, has_bias, dilation, false, num_groups, input_pad_right));
+ TensorInfo expected_output = output->clone()->set_tensor_shape(compute_im2col_conv_shape(
+ input, kernel_dims, conv_info, has_bias, dilation, false, num_groups, input_pad_right));
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(&expected_output, output);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_QUANTIZATION_INFO(input, output);
@@ -106,14 +114,14 @@ inline void linearize_volume_nchw(const uint8_t *const in_ptr,
// This for loop linearize a volume with 3 slices. This allows:
// 1) to reduce the iterations of the outer for loop "d"
// 2) to have an optimized im2col for the first convolution layer where usually we have 3 IFMs
- for(; d <= (kernel_depth - 3); d += 3)
+ for (; d <= (kernel_depth - 3); d += 3)
{
- for(int y = top_left_y; y < y_e; y += dilation_y)
+ for (int y = top_left_y; y < y_e; y += dilation_y)
{
- if((y < 0 || y >= input_h) && has_pads)
+ if ((y < 0 || y >= input_h) && has_pads)
{
// All the values will be the offset (will be zeros when not quantized)
- for(int x = top_left_x; x < x_e; x += dilation_x, ++out_ptr)
+ for (int x = top_left_x; x < x_e; x += dilation_x, ++out_ptr)
{
*(out_ptr + 0 * kernel_size2) = pad_value;
*(out_ptr + 1 * kernel_size2) = pad_value;
@@ -122,9 +130,9 @@ inline void linearize_volume_nchw(const uint8_t *const in_ptr,
}
else
{
- for(int x = top_left_x; x < x_e; x += dilation_x, ++out_ptr)
+ for (int x = top_left_x; x < x_e; x += dilation_x, ++out_ptr)
{
- if((x < 0 || x >= input_w) && has_pads)
+ if ((x < 0 || x >= input_w) && has_pads)
{
*(out_ptr + 0 * kernel_size2) = pad_value;
*(out_ptr + 1 * kernel_size2) = pad_value;
@@ -132,9 +140,12 @@ inline void linearize_volume_nchw(const uint8_t *const in_ptr,
}
else
{
- *(out_ptr + 0 * kernel_size2) = *(reinterpret_cast<const T *>(in_ptr + ((d + 0) * input_stride_z + y * input_stride_y + x * input_stride_x)));
- *(out_ptr + 1 * kernel_size2) = *(reinterpret_cast<const T *>(in_ptr + ((d + 1) * input_stride_z + y * input_stride_y + x * input_stride_x)));
- *(out_ptr + 2 * kernel_size2) = *(reinterpret_cast<const T *>(in_ptr + ((d + 2) * input_stride_z + y * input_stride_y + x * input_stride_x)));
+ *(out_ptr + 0 * kernel_size2) = *(reinterpret_cast<const T *>(
+ in_ptr + ((d + 0) * input_stride_z + y * input_stride_y + x * input_stride_x)));
+ *(out_ptr + 1 * kernel_size2) = *(reinterpret_cast<const T *>(
+ in_ptr + ((d + 1) * input_stride_z + y * input_stride_y + x * input_stride_x)));
+ *(out_ptr + 2 * kernel_size2) = *(reinterpret_cast<const T *>(
+ in_ptr + ((d + 2) * input_stride_z + y * input_stride_y + x * input_stride_x)));
}
}
}
@@ -143,11 +154,11 @@ inline void linearize_volume_nchw(const uint8_t *const in_ptr,
}
// Left over
- for(; d < kernel_depth; d++)
+ for (; d < kernel_depth; d++)
{
- for(int y = top_left_y; y < y_e; y += dilation_y)
+ for (int y = top_left_y; y < y_e; y += dilation_y)
{
- if((y < 0 || y >= input_h) && has_pads)
+ if ((y < 0 || y >= input_h) && has_pads)
{
// All the values will be the offset (will be zeros when not quantized)
memset(static_cast<void *>(out_ptr), pad_value, kernel_width * sizeof(T));
@@ -155,15 +166,16 @@ inline void linearize_volume_nchw(const uint8_t *const in_ptr,
}
else
{
- for(int x = top_left_x; x < x_e; x += dilation_x, ++out_ptr)
+ for (int x = top_left_x; x < x_e; x += dilation_x, ++out_ptr)
{
- if((x < 0 || x >= input_w) && has_pads)
+ if ((x < 0 || x >= input_w) && has_pads)
{
*out_ptr = pad_value;
}
else
{
- *out_ptr = *(reinterpret_cast<const T *>(in_ptr + (d * input_stride_z + y * input_stride_y + x * input_stride_x)));
+ *out_ptr = *(reinterpret_cast<const T *>(
+ in_ptr + (d * input_stride_z + y * input_stride_y + x * input_stride_x)));
}
}
}
@@ -171,7 +183,7 @@ inline void linearize_volume_nchw(const uint8_t *const in_ptr,
}
// Append 1 if the convolution layer has biases
- if(has_bias)
+ if (has_bias)
{
*out_ptr = static_cast<T>(1);
}
@@ -198,36 +210,39 @@ inline void linearize_volume_nhwc(const uint8_t *const in_ptr,
const int end_y = start_y + kernel_height * dilation_y;
const int pad_quant = kernel_width * input_c;
const int element_size = static_cast<int>(sizeof(T));
- if((start_y >= 0) && (end_y < input_h) && (start_x >= 0) && (end_x < input_w) && (dilation_x == 1) && (input_stride_y == input_c * element_size))
+ if ((start_y >= 0) && (end_y < input_h) && (start_x >= 0) && (end_x < input_w) && (dilation_x == 1) &&
+ (input_stride_y == input_c * element_size))
{
- for(int y = start_y; y < end_y; y += dilation_y)
+ for (int y = start_y; y < end_y; y += dilation_y)
{
//optimized for no dilation and no boundary pixels
- memcpy(out_ptr, reinterpret_cast<const T *>(in_ptr + (y * input_stride_z + start_x * input_stride_y)), input_c * kernel_width * element_size);
+ memcpy(out_ptr, reinterpret_cast<const T *>(in_ptr + (y * input_stride_z + start_x * input_stride_y)),
+ input_c * kernel_width * element_size);
out_ptr += input_c * kernel_width;
}
}
else
{
- for(int y = start_y; y < end_y; y += dilation_y)
+ for (int y = start_y; y < end_y; y += dilation_y)
{
- if(y < 0 || y >= input_h)
+ if (y < 0 || y >= input_h)
{
memset(static_cast<void *>(out_ptr), pad_value, pad_quant * element_size);
out_ptr += pad_quant;
}
- else if(dilation_x > 1 || start_x < 0 || end_x >= input_w || input_stride_y != input_c * element_size)
+ else if (dilation_x > 1 || start_x < 0 || end_x >= input_w || input_stride_y != input_c * element_size)
{
- for(int x = start_x; x < end_x; x += dilation_x)
+ for (int x = start_x; x < end_x; x += dilation_x)
{
- if(x < 0 || x >= input_w)
+ if (x < 0 || x >= input_w)
{
memset(static_cast<void *>(out_ptr), pad_value, input_c * element_size);
out_ptr += input_c;
}
else
{
- memcpy(out_ptr, reinterpret_cast<const T *>(in_ptr + (y * input_stride_z + x * input_stride_y)), input_c * element_size);
+ memcpy(out_ptr, reinterpret_cast<const T *>(in_ptr + (y * input_stride_z + x * input_stride_y)),
+ input_c * element_size);
out_ptr += input_c;
}
}
@@ -235,13 +250,14 @@ inline void linearize_volume_nhwc(const uint8_t *const in_ptr,
else
{
//optimized for no dilation and no boundary pixels
- memcpy(out_ptr, reinterpret_cast<const T *>(in_ptr + (y * input_stride_z + start_x * input_stride_y)), input_c * kernel_width * element_size);
+ memcpy(out_ptr, reinterpret_cast<const T *>(in_ptr + (y * input_stride_z + start_x * input_stride_y)),
+ input_c * kernel_width * element_size);
out_ptr += input_c * kernel_width;
}
}
}
// Append 1 if the convolution layer has biases
- if(has_bias)
+ if (has_bias)
{
*out_ptr = static_cast<T>(1);
}
@@ -271,12 +287,13 @@ inline void linearize_volume_nhwc(const uint8_t *const in_ptr,
const int element_size = static_cast<int>(sizeof(T));
const int channel_chunk_size = input_c * element_size;
- if((start_y >= 0) && (end_y < input_h) && (start_x >= 0) && (end_x < input_w) && (dilation_x == 1) && (input_stride_y == channel_chunk_size))
+ if ((start_y >= 0) && (end_y < input_h) && (start_x >= 0) && (end_x < input_w) && (dilation_x == 1) &&
+ (input_stride_y == channel_chunk_size))
{
- for(int y = start_y; y < end_y; y += dilation_y)
+ for (int y = start_y; y < end_y; y += dilation_y)
{
const uint8_t *offset_ptr = in_ptr + (y * input_stride_z + start_x * input_stride_y);
- for(int e = 0; e < kernel_width; e++)
+ for (int e = 0; e < kernel_width; e++)
{
memcpy(out_ptr, reinterpret_cast<const T *>(offset_ptr + e * channel_chunk_size), channel_chunk_size);
out_ptr += input_c + pad_right;
@@ -285,25 +302,26 @@ inline void linearize_volume_nhwc(const uint8_t *const in_ptr,
}
else
{
- for(int y = start_y; y < end_y; y += dilation_y)
+ for (int y = start_y; y < end_y; y += dilation_y)
{
- if(y < 0 || y >= input_h)
+ if (y < 0 || y >= input_h)
{
memset(static_cast<void *>(out_ptr), pad_value, pad_quant * element_size);
out_ptr += pad_quant;
}
- else if(dilation_x > 1 || start_x < 0 || end_x >= input_w || input_stride_y != channel_chunk_size)
+ else if (dilation_x > 1 || start_x < 0 || end_x >= input_w || input_stride_y != channel_chunk_size)
{
- for(int x = start_x; x < end_x; x += dilation_x)
+ for (int x = start_x; x < end_x; x += dilation_x)
{
- if(x < 0 || x >= input_w)
+ if (x < 0 || x >= input_w)
{
memset(static_cast<void *>(out_ptr), pad_value, (input_c + pad_right) * element_size);
out_ptr += input_c + pad_right;
}
else
{
- memcpy(out_ptr, reinterpret_cast<const T *>(in_ptr + (y * input_stride_z + x * input_stride_y)), channel_chunk_size);
+ memcpy(out_ptr, reinterpret_cast<const T *>(in_ptr + (y * input_stride_z + x * input_stride_y)),
+ channel_chunk_size);
out_ptr += input_c + pad_right;
}
}
@@ -311,16 +329,17 @@ inline void linearize_volume_nhwc(const uint8_t *const in_ptr,
else
{
const uint8_t *offset_ptr = in_ptr + (y * input_stride_z + start_x * input_stride_y);
- for(int e = 0; e < kernel_width; e++)
+ for (int e = 0; e < kernel_width; e++)
{
- memcpy(out_ptr, reinterpret_cast<const T *>(offset_ptr + e * channel_chunk_size), channel_chunk_size);
+ memcpy(out_ptr, reinterpret_cast<const T *>(offset_ptr + e * channel_chunk_size),
+ channel_chunk_size);
out_ptr += input_c + pad_right;
}
}
}
}
// Append 1 if the convolution layer has biases
- if(has_bias)
+ if (has_bias)
{
*out_ptr = static_cast<T>(1);
}
@@ -348,7 +367,8 @@ void CpuIm2ColKernel::run_im2col(const ITensor *src, ITensor *dst, const Window
const int pad_top = _conv_info.pad_top();
const int stride_x = _conv_info.stride().first;
const int stride_y = _conv_info.stride().second;
- const int pad_value = is_data_type_quantized(src->info()->data_type()) ? src->info()->quantization_info().uniform().offset : 0;
+ const int pad_value =
+ is_data_type_quantized(src->info()->data_type()) ? src->info()->quantization_info().uniform().offset : 0;
Window window_in_out(window);
// The first three dimensions of the input and output are increased by the inner loops
@@ -361,84 +381,57 @@ void CpuIm2ColKernel::run_im2col(const ITensor *src, ITensor *dst, const Window
Iterator out(dst, window_in_out);
execute_window_loop(
- window, [&](const Coordinates & id)
- {
- const int start_w = id[width_idx] * stride_x - pad_left;
- const int start_h = id[height_idx] * stride_y - pad_top;
+ window,
+ [&](const Coordinates &id)
+ {
+ const int start_w = id[width_idx] * stride_x - pad_left;
+ const int start_h = id[height_idx] * stride_y - pad_top;
- // Get pointers
- const uint8_t *const input_ptr = in.ptr();
- auto output_ptr = reinterpret_cast<T *>(out.ptr() + (id[width_idx] + id[height_idx] * _convolved_dims.first) * dst->info()->strides_in_bytes().y());
+ // Get pointers
+ const uint8_t *const input_ptr = in.ptr();
+ auto output_ptr =
+ reinterpret_cast<T *>(out.ptr() + (id[width_idx] + id[height_idx] * _convolved_dims.first) *
+ dst->info()->strides_in_bytes().y());
- // Linearize volume
- if(is_nchw)
- {
- linearize_volume_nchw<T, has_pads>(input_ptr,
- output_ptr,
- _has_bias,
- start_w,
- start_h,
- _kernel_width,
- _kernel_height,
- input_c,
- input_w,
- input_h,
- input_stride_x,
- input_stride_y,
- input_stride_z,
- pad_value,
- _dilation.x(),
- _dilation.y());
- }
- else
- {
- if(_input_pad_right > 0)
+ // Linearize volume
+ if (is_nchw)
{
- linearize_volume_nhwc<T, has_pads>(input_ptr,
- output_ptr,
- _has_bias,
- start_w,
- start_h,
- _kernel_width,
- _kernel_height,
- input_w,
- input_h,
- input_c,
- input_stride_y,
- input_stride_z,
- pad_value,
- _dilation.x(),
- _dilation.y(),
- _input_pad_right);
+ linearize_volume_nchw<T, has_pads>(
+ input_ptr, output_ptr, _has_bias, start_w, start_h, _kernel_width, _kernel_height, input_c, input_w,
+ input_h, input_stride_x, input_stride_y, input_stride_z, pad_value, _dilation.x(), _dilation.y());
}
else
{
- linearize_volume_nhwc<T, has_pads>(input_ptr,
- output_ptr,
- _has_bias,
- start_w,
- start_h,
- _kernel_width,
- _kernel_height,
- input_w,
- input_h,
- input_c,
- input_stride_y,
- input_stride_z,
- pad_value,
- _dilation.x(),
- _dilation.y());
+ if (_input_pad_right > 0)
+ {
+ linearize_volume_nhwc<T, has_pads>(input_ptr, output_ptr, _has_bias, start_w, start_h,
+ _kernel_width, _kernel_height, input_w, input_h, input_c,
+ input_stride_y, input_stride_z, pad_value, _dilation.x(),
+ _dilation.y(), _input_pad_right);
+ }
+ else
+ {
+ linearize_volume_nhwc<T, has_pads>(
+ input_ptr, output_ptr, _has_bias, start_w, start_h, _kernel_width, _kernel_height, input_w,
+ input_h, input_c, input_stride_y, input_stride_z, pad_value, _dilation.x(), _dilation.y());
+ }
}
- }
- },
- in, out);
+ },
+ in, out);
}
-void CpuIm2ColKernel::configure(const ITensorInfo *src, ITensorInfo *dst, const Size2D &kernel_dims, const PadStrideInfo &conv_info,
- bool has_bias, const Size2D &dilation, unsigned int num_groups, unsigned int input_pad_right)
+void CpuIm2ColKernel::configure(const ITensorInfo *src,
+ ITensorInfo *dst,
+ const Size2D &kernel_dims,
+ const PadStrideInfo &conv_info,
+ bool has_bias,
+ const Size2D &dilation,
+ unsigned int num_groups,
+ unsigned int input_pad_right)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(src, dst);
- ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(src, dst, kernel_dims, conv_info, has_bias, dilation, num_groups, input_pad_right));
+ ARM_COMPUTE_ERROR_THROW_ON(
+ validate_arguments(src, dst, kernel_dims, conv_info, has_bias, dilation, num_groups, input_pad_right));
ARM_COMPUTE_UNUSED(num_groups);
_data_layout = src->data_layout();
@@ -451,31 +444,34 @@ void CpuIm2ColKernel::configure(const ITensorInfo *src, ITensorInfo *dst, const
_kernel_height = kernel_dims.height;
_input_pad_right = input_pad_right;
_dilation = dilation;
- _convolved_dims = scaled_dimensions(src->dimension(width_idx), dst->dimension(height_idx),
- _kernel_width, _kernel_height,
- _conv_info, _dilation);
+ _convolved_dims = scaled_dimensions(src->dimension(width_idx), dst->dimension(height_idx), _kernel_width,
+ _kernel_height, _conv_info, _dilation);
_has_bias = has_bias;
- if(_data_layout == DataLayout::NCHW)
+ if (_data_layout == DataLayout::NCHW)
{
- switch(src->data_type())
+ switch (src->data_type())
{
case DataType::F32:
- _func = (!conv_info.has_padding()) ? &CpuIm2ColKernel::run_im2col<float, false, true> : &CpuIm2ColKernel::run_im2col<float, true, true>;
+ _func = (!conv_info.has_padding()) ? &CpuIm2ColKernel::run_im2col<float, false, true>
+ : &CpuIm2ColKernel::run_im2col<float, true, true>;
break;
#if defined(ARM_COMPUTE_ENABLE_BF16)
case DataType::BFLOAT16:
- _func = (!conv_info.has_padding()) ? &CpuIm2ColKernel::run_im2col<bfloat16, false, true> : &CpuIm2ColKernel::run_im2col<bfloat16, true, true>;
+ _func = (!conv_info.has_padding()) ? &CpuIm2ColKernel::run_im2col<bfloat16, false, true>
+ : &CpuIm2ColKernel::run_im2col<bfloat16, true, true>;
break;
#endif /* defined(ARM_COMPUTE_ENABLE_BF16) */
#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
case DataType::F16:
- _func = (!conv_info.has_padding()) ? &CpuIm2ColKernel::run_im2col<float16_t, false, true> : &CpuIm2ColKernel::run_im2col<float16_t, true, true>;
+ _func = (!conv_info.has_padding()) ? &CpuIm2ColKernel::run_im2col<float16_t, false, true>
+ : &CpuIm2ColKernel::run_im2col<float16_t, true, true>;
break;
#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
case DataType::QASYMM8_SIGNED:
case DataType::QASYMM8:
- _func = (!conv_info.has_padding()) ? &CpuIm2ColKernel::run_im2col<qasymm8_t, false, true> : &CpuIm2ColKernel::run_im2col<qasymm8_t, true, true>;
+ _func = (!conv_info.has_padding()) ? &CpuIm2ColKernel::run_im2col<qasymm8_t, false, true>
+ : &CpuIm2ColKernel::run_im2col<qasymm8_t, true, true>;
break;
default:
ARM_COMPUTE_ERROR("Data type not supported");
@@ -484,26 +480,31 @@ void CpuIm2ColKernel::configure(const ITensorInfo *src, ITensorInfo *dst, const
}
else
{
- switch(src->data_type())
+ switch (src->data_type())
{
case DataType::F32:
- _func = (!conv_info.has_padding()) ? &CpuIm2ColKernel::run_im2col<float, false, false> : &CpuIm2ColKernel::run_im2col<float, true, false>;
+ _func = (!conv_info.has_padding()) ? &CpuIm2ColKernel::run_im2col<float, false, false>
+ : &CpuIm2ColKernel::run_im2col<float, true, false>;
break;
#if defined(ARM_COMPUTE_ENABLE_BF16)
case DataType::BFLOAT16:
- _func = (!conv_info.has_padding()) ? &CpuIm2ColKernel::run_im2col<bfloat16, false, false> : &CpuIm2ColKernel::run_im2col<bfloat16, true, false>;
+ _func = (!conv_info.has_padding()) ? &CpuIm2ColKernel::run_im2col<bfloat16, false, false>
+ : &CpuIm2ColKernel::run_im2col<bfloat16, true, false>;
break;
#endif /* defined(ARM_COMPUTE_ENABLE_BF16) */
#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
case DataType::F16:
- _func = (!conv_info.has_padding()) ? &CpuIm2ColKernel::run_im2col<float16_t, false, false> : &CpuIm2ColKernel::run_im2col<float16_t, true, false>;
+ _func = (!conv_info.has_padding()) ? &CpuIm2ColKernel::run_im2col<float16_t, false, false>
+ : &CpuIm2ColKernel::run_im2col<float16_t, true, false>;
break;
#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
case DataType::QASYMM8:
- _func = (!conv_info.has_padding()) ? &CpuIm2ColKernel::run_im2col<uint8_t, false, false> : &CpuIm2ColKernel::run_im2col<qasymm8_t, true, false>;
+ _func = (!conv_info.has_padding()) ? &CpuIm2ColKernel::run_im2col<uint8_t, false, false>
+ : &CpuIm2ColKernel::run_im2col<qasymm8_t, true, false>;
break;
case DataType::QASYMM8_SIGNED:
- _func = (!conv_info.has_padding()) ? &CpuIm2ColKernel::run_im2col<int8_t, false, false> : &CpuIm2ColKernel::run_im2col<qasymm8_t, true, false>;
+ _func = (!conv_info.has_padding()) ? &CpuIm2ColKernel::run_im2col<int8_t, false, false>
+ : &CpuIm2ColKernel::run_im2col<qasymm8_t, true, false>;
break;
default:
ARM_COMPUTE_ERROR("Data type not supported");
@@ -512,11 +513,13 @@ void CpuIm2ColKernel::configure(const ITensorInfo *src, ITensorInfo *dst, const
}
// Output tensor auto initialization if not yet initialized
- auto_init_if_empty(*dst, src->clone()->set_tensor_shape(compute_im2col_conv_shape(src, kernel_dims, conv_info, has_bias, dilation, false, num_groups, _input_pad_right)));
+ auto_init_if_empty(
+ *dst, src->clone()->set_tensor_shape(compute_im2col_conv_shape(src, kernel_dims, conv_info, has_bias, dilation,
+ false, num_groups, _input_pad_right)));
- std::pair<unsigned int, unsigned int> convolved_dims = scaled_dimensions(src->dimension(width_idx), src->dimension(height_idx),
- kernel_dims.width, kernel_dims.height,
- conv_info, dilation);
+ std::pair<unsigned int, unsigned int> convolved_dims =
+ scaled_dimensions(src->dimension(width_idx), src->dimension(height_idx), kernel_dims.width, kernel_dims.height,
+ conv_info, dilation);
Window win = calculate_max_window(*src, Steps());
win.set(width_idx, Window::Dimension(0, convolved_dims.first, 1));
@@ -526,10 +529,17 @@ void CpuIm2ColKernel::configure(const ITensorInfo *src, ITensorInfo *dst, const
ICpuKernel::configure(win);
}
-Status CpuIm2ColKernel::validate(const ITensorInfo *src, const ITensorInfo *dst, const Size2D &kernel_dims, const PadStrideInfo &conv_info,
- bool has_bias, const Size2D &dilation, unsigned int num_groups, unsigned int input_pad_right)
+Status CpuIm2ColKernel::validate(const ITensorInfo *src,
+ const ITensorInfo *dst,
+ const Size2D &kernel_dims,
+ const PadStrideInfo &conv_info,
+ bool has_bias,
+ const Size2D &dilation,
+ unsigned int num_groups,
+ unsigned int input_pad_right)
{
- ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(src, dst, kernel_dims, conv_info, has_bias, dilation, num_groups, input_pad_right));
+ ARM_COMPUTE_RETURN_ON_ERROR(
+ validate_arguments(src, dst, kernel_dims, conv_info, has_bias, dilation, num_groups, input_pad_right));
return Status{};
}