aboutsummaryrefslogtreecommitdiff
path: root/src/core/NEON/kernels/NEDirectConvolutionLayerKernel.cpp
diff options
context:
space:
mode:
authorMichalis Spyrou <michalis.spyrou@arm.com>2017-11-30 14:25:57 +0000
committerAnthony Barbier <anthony.barbier@arm.com>2018-11-02 16:42:17 +0000
commitafa5d817b1d083837cd7ea30d32f845d82620c12 (patch)
tree1ca2a27ab7108b7137b96fc1547a8b5ac5d9c8f7 /src/core/NEON/kernels/NEDirectConvolutionLayerKernel.cpp
parent631c41a4e3645a948b0f597caa77e8fa91ca0efc (diff)
downloadComputeLibrary-afa5d817b1d083837cd7ea30d32f845d82620c12.tar.gz
COMPMID-617 Add validation methods to Kernels
- NEActivationLayer - NESoftmax - NEDirectConvolutionLayer - NENormalizationLayer - NEPoolingLayer Change-Id: Ib279f1c1b7f9247679b0d6593aed7393da8fe87b Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/111335 Tested-by: BSG Visual Compute Jenkins server to access repositories on http://mpd-gerrit.cambridge.arm.com <bsgcomp@arm.com> Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com>
Diffstat (limited to 'src/core/NEON/kernels/NEDirectConvolutionLayerKernel.cpp')
-rw-r--r--src/core/NEON/kernels/NEDirectConvolutionLayerKernel.cpp249
1 files changed, 155 insertions, 94 deletions
diff --git a/src/core/NEON/kernels/NEDirectConvolutionLayerKernel.cpp b/src/core/NEON/kernels/NEDirectConvolutionLayerKernel.cpp
index 78afbc2c20..1ca213b04a 100644
--- a/src/core/NEON/kernels/NEDirectConvolutionLayerKernel.cpp
+++ b/src/core/NEON/kernels/NEDirectConvolutionLayerKernel.cpp
@@ -251,10 +251,15 @@ inline qint32x4x2_t internal_vmlal(const qint32x4x2_t &x, const qint16x8_t &y, c
return r;
}
-constexpr int SmallTensorSizeOptim = 8;
+constexpr int small_tensor_size_optim = 8;
+inline bool run_optim_small_tensor_info(const ITensorInfo *t)
+{
+ return t->dimension(Window::DimX) <= small_tensor_size_optim && t->dimension(Window::DimY) <= small_tensor_size_optim;
+}
+
inline bool run_optim_small_tensor(const ITensor *t)
{
- return t->info()->dimension(Window::DimX) <= SmallTensorSizeOptim && t->info()->dimension(Window::DimY) <= SmallTensorSizeOptim;
+ return run_optim_small_tensor_info(t->info());
}
// Optimized convolver for 1x1 kernels used only where input width and height are both <= 8
@@ -266,8 +271,8 @@ class convolver_w1x1_i8x8_f32
public:
static void convolve(const Window &window, const ITensor *input, const ITensor *weights, ITensor *output, const PadStrideInfo &conv_info)
{
- ARM_COMPUTE_ERROR_ON(input->info()->dimension(Window::DimX) > SmallTensorSizeOptim);
- ARM_COMPUTE_ERROR_ON(input->info()->dimension(Window::DimY) > SmallTensorSizeOptim);
+ ARM_COMPUTE_ERROR_ON(input->info()->dimension(Window::DimX) > small_tensor_size_optim);
+ ARM_COMPUTE_ERROR_ON(input->info()->dimension(Window::DimY) > small_tensor_size_optim);
const int input_stride_y = input->info()->strides_in_bytes().y();
const int input_stride_z = input->info()->strides_in_bytes().z();
@@ -302,12 +307,12 @@ public:
execute_window_loop(window_out, [&](const Coordinates & id)
{
- const uint8_t *input_ptr = in.ptr();
- uint8_t *out_ptr = out.ptr();
- int ih = 0;
- int oh = 0;
- float32x4_t accum0[SmallTensorSizeOptim] = { vdupq_n_f32(0), vdupq_n_f32(0), vdupq_n_f32(0), vdupq_n_f32(0), vdupq_n_f32(0), vdupq_n_f32(0), vdupq_n_f32(0), vdupq_n_f32(0) };
- float32x4_t accum1[SmallTensorSizeOptim] = { vdupq_n_f32(0), vdupq_n_f32(0), vdupq_n_f32(0), vdupq_n_f32(0), vdupq_n_f32(0), vdupq_n_f32(0), vdupq_n_f32(0), vdupq_n_f32(0) };
+ const uint8_t *input_ptr = in.ptr();
+ uint8_t *out_ptr = out.ptr();
+ int ih = 0;
+ int oh = 0;
+ float32x4_t accum0[small_tensor_size_optim] = { vdupq_n_f32(0), vdupq_n_f32(0), vdupq_n_f32(0), vdupq_n_f32(0), vdupq_n_f32(0), vdupq_n_f32(0), vdupq_n_f32(0), vdupq_n_f32(0) };
+ float32x4_t accum1[small_tensor_size_optim] = { vdupq_n_f32(0), vdupq_n_f32(0), vdupq_n_f32(0), vdupq_n_f32(0), vdupq_n_f32(0), vdupq_n_f32(0), vdupq_n_f32(0), vdupq_n_f32(0) };
for(int oz = 0; oz < range_z; ++oz)
{
accum0[0] = accum0[1] = accum0[2] = accum0[3] = accum0[4] = accum0[5] = accum0[6] = accum0[7] = vdupq_n_f32(0.f);
@@ -992,121 +997,118 @@ inline void convolve_5x5(const Window &window, unsigned int num_elems_read_per_i
}
}
-} // namespace
-
-NEDirectConvolutionLayerKernel::NEDirectConvolutionLayerKernel()
- : _input(nullptr), _weights(nullptr), _output(nullptr), _conv_info(), _border_size(0), _kernel_size(0), _num_weight_elems_read_per_row(0), _num_elems_read_per_iteration(0),
- _num_elems_written_per_iteration(0)
-{
-}
-
-BorderSize NEDirectConvolutionLayerKernel::border_size() const
+inline TensorShape get_convolved_dimensions(const ITensorInfo *input, const ITensorInfo *weights, const int kernel_size, const PadStrideInfo &conv_info)
{
- return _border_size;
-}
-
-void NEDirectConvolutionLayerKernel::configure(const ITensor *input, const ITensor *weights, ITensor *output, const PadStrideInfo &conv_info)
-{
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QS8, DataType::F16, DataType::QS16, DataType::F32);
- ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights);
- ARM_COMPUTE_ERROR_ON_MSG(weights->info()->dimension(0) == 1 && (std::get<0>(conv_info.pad()) || std::get<1>(conv_info.pad())),
- "Pad > 0 not supported for 1x1 weights");
- ARM_COMPUTE_ERROR_ON_MSG(weights->info()->dimension(0) == 3 && (std::get<0>(conv_info.pad()) > 1 || std::get<1>(conv_info.pad()) > 1),
- "Pad > 1 not supported for 3x3 weights");
- ARM_COMPUTE_ERROR_ON_MSG(weights->info()->dimension(0) == 5 && (std::get<0>(conv_info.pad()) > 2 || std::get<1>(conv_info.pad()) > 2),
- "Pad > 2 not supported for 5x5 weights");
-
- ARM_COMPUTE_ERROR_ON_MSG(std::get<0>(conv_info.stride()) > 3, "Strides larger than 3 not supported.");
- ARM_COMPUTE_ERROR_ON(weights->info()->dimension(2) != input->info()->dimension(2));
- ARM_COMPUTE_ERROR_ON(weights->info()->dimension(0) != weights->info()->dimension(1));
- ARM_COMPUTE_ERROR_ON(weights->info()->num_dimensions() > 4);
-
- const unsigned int conv_stride_x = std::get<0>(conv_info.stride());
- const unsigned int conv_pad_x = std::get<0>(conv_info.pad());
- const unsigned int conv_pad_y = std::get<1>(conv_info.pad());
-
- _input = input;
- _weights = weights;
- _output = output;
- _conv_info = conv_info;
- _kernel_size = weights->info()->dimension(0);
- _border_size = BorderSize(conv_pad_y, conv_pad_x);
-
- const unsigned int kernel_size = weights->info()->dimension(0);
-
- // Get convolved dimensions
unsigned int output_width = 0;
unsigned int output_height = 0;
- std::tie(output_width, output_height) = scaled_dimensions(input->info()->dimension(0), input->info()->dimension(1), kernel_size, kernel_size, conv_info);
+ std::tie(output_width, output_height) = scaled_dimensions(input->dimension(0), input->dimension(1), kernel_size, kernel_size, conv_info);
- TensorShape output_shape = input->info()->tensor_shape();
+ TensorShape output_shape = input->tensor_shape();
output_shape.set(0, output_width);
output_shape.set(1, output_height);
- output_shape.set(2, weights->info()->dimension(3));
+ output_shape.set(2, weights->dimension(3));
- DataType data_type = input->info()->data_type();
+ return output_shape;
+}
- if(is_data_type_fixed_point(data_type))
+Status validate_arguments(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *output, const PadStrideInfo &conv_info)
+{
+ ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, output);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QS8, DataType::QS16, DataType::F16, DataType::F32);
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights);
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG(weights->dimension(0) == 1 && (std::get<0>(conv_info.pad()) || std::get<1>(conv_info.pad())),
+ "Pad > 0 not supported for 1x1 weights");
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG(weights->dimension(0) == 3 && (std::get<0>(conv_info.pad()) > 1 || std::get<1>(conv_info.pad()) > 1),
+ "Pad > 1 not supported for 3x3 weights");
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG(weights->dimension(0) == 5 && (std::get<0>(conv_info.pad()) > 2 || std::get<1>(conv_info.pad()) > 2),
+ "Pad > 2 not supported for 5x5 weights");
+
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG(std::get<0>(conv_info.stride()) > 3, "Strides larger than 3 not supported.");
+ ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(2) != input->dimension(2));
+ ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(0) != weights->dimension(1));
+ ARM_COMPUTE_RETURN_ERROR_ON(weights->num_dimensions() > 4);
+
+ // Checks performed when output is configured
+ if(output->total_size() != 0)
{
- // Promote data type in case of fixed point
- data_type = ((data_type == DataType::QS8) ? DataType::QS16 : DataType::QS32);
+ TensorShape output_shape = get_convolved_dimensions(input, weights, weights->dimension(0), conv_info);
+
+ DataType data_type = input->data_type();
+ if(is_data_type_fixed_point(data_type))
+ {
+ // Promote data type in case of fixed point
+ data_type = ((data_type == DataType::QS8) ? DataType::QS16 : DataType::QS32);
+ }
+
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(output->tensor_shape(), output_shape);
+ ARM_COMPUTE_RETURN_ERROR_ON(output->data_type() != data_type);
}
- // Output auto inizialitation if not yet initialized
- auto_init_if_empty(*output->info(), output_shape, 1, data_type, input->info()->fixed_point_position());
+ return Status{};
+}
- ARM_COMPUTE_ERROR_ON_MISMATCHING_DIMENSIONS(output->info()->tensor_shape(), output_shape);
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, output->info()->data_type());
+std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITensorInfo *weights, ITensorInfo *output, const PadStrideInfo &conv_info, unsigned int &num_weight_elems_read_per_row,
+ unsigned int &num_elems_read_per_iteration, unsigned int &num_elems_written_per_iteration)
+{
+ // Calculate right and bottom border
+ unsigned int kernel_size = weights->dimension(0);
+ const unsigned int conv_pad_x = std::get<0>(conv_info.pad());
+ const unsigned int conv_pad_y = std::get<1>(conv_info.pad());
+ const unsigned int conv_stride_x = std::get<0>(conv_info.stride());
+ const unsigned int conv_stride_y = std::get<1>(conv_info.stride());
+ BorderSize border_size = BorderSize(conv_pad_y, conv_pad_x);
+ const int input_width = input->dimension(0);
+ const int input_height = input->dimension(1);
- switch(_kernel_size)
+ switch(kernel_size)
{
case 1:
{
- switch(input->info()->data_type())
+ switch(input->data_type())
{
#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
case DataType::F16:
#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
case DataType::QS8:
case DataType::QS16:
- _num_elems_written_per_iteration = 8;
+ num_elems_written_per_iteration = 8;
break;
case DataType::F32:
- if(run_optim_small_tensor(input))
+ if(run_optim_small_tensor_info(input))
{
- _num_elems_written_per_iteration = 8;
+ num_elems_written_per_iteration = 8;
}
else
{
- _num_elems_written_per_iteration = 4;
+ num_elems_written_per_iteration = 4;
}
break;
default:
ARM_COMPUTE_ERROR("Data type not supported.");
break;
}
- _num_weight_elems_read_per_row = kernel_size;
- _num_elems_read_per_iteration = conv_stride_x * _num_elems_written_per_iteration;
+ num_weight_elems_read_per_row = kernel_size;
+ num_elems_read_per_iteration = conv_stride_x * num_elems_written_per_iteration;
break;
}
case 3:
case 5:
{
- switch(input->info()->data_type())
+ switch(input->data_type())
{
case DataType::F32:
- _num_weight_elems_read_per_row = 4 + _kernel_size - 1;
- _num_elems_read_per_iteration = 12;
- _num_elems_written_per_iteration = 16 >> conv_stride_x;
+ num_weight_elems_read_per_row = 4 + kernel_size - 1;
+ num_elems_read_per_iteration = 12;
+ num_elems_written_per_iteration = 16 >> conv_stride_x;
break;
#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
case DataType::F16:
#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
case DataType::QS8:
case DataType::QS16:
- _num_weight_elems_read_per_row = 8 + _kernel_size - 1;
- _num_elems_read_per_iteration = 24;
- _num_elems_written_per_iteration = 32 >> conv_stride_x;
+ num_weight_elems_read_per_row = 8 + kernel_size - 1;
+ num_elems_read_per_iteration = 24;
+ num_elems_written_per_iteration = 32 >> conv_stride_x;
break;
default:
ARM_COMPUTE_ERROR("Data type not supported.");
@@ -1121,22 +1123,81 @@ void NEDirectConvolutionLayerKernel::configure(const ITensor *input, const ITens
}
}
- // Calculate right and bottom border
- const unsigned int conv_stride_y = std::get<1>(_conv_info.stride());
- const int input_width = input->info()->dimension(0);
- const int input_height = input->info()->dimension(1);
- const int upper_bound_w = ceil_to_multiple(((output->info()->dimension(0) - 1) * conv_stride_x + _kernel_size), _num_elems_read_per_iteration) - conv_pad_x - input_width;
- const int upper_bound_h = ((output->info()->dimension(1) - 1) * conv_stride_y - conv_pad_y + _kernel_size) - input_height;
- _border_size.right = std::max(upper_bound_w, static_cast<int>(_kernel_size));
- _border_size.bottom = std::max(upper_bound_h, static_cast<int>(_kernel_size));
- Window win = calculate_max_window(*output->info(), Steps(_num_elems_written_per_iteration));
- AccessWindowStatic input_access(input->info(), -conv_pad_x, -conv_pad_y, input_width + _border_size.right, input_height + _border_size.bottom);
- AccessWindowStatic weights_access(weights->info(), 0, 0, _num_weight_elems_read_per_row, _kernel_size);
- AccessWindowHorizontal output_access(output->info(), 0, _num_elems_written_per_iteration);
- update_window_and_padding(win, input_access, weights_access, output_access);
- output_access.set_valid_region(win, ValidRegion(Coordinates(), output->info()->tensor_shape()));
-
- INEKernel::configure(win);
+ const int upper_bound_w = ceil_to_multiple(((output->dimension(0) - 1) * conv_stride_x + kernel_size), num_elems_read_per_iteration) - conv_pad_x - input_width;
+ const int upper_bound_h = ((output->dimension(1) - 1) * conv_stride_y - conv_pad_y + kernel_size) - input_height;
+ border_size.right = std::max(upper_bound_w, static_cast<int>(kernel_size));
+ border_size.bottom = std::max(upper_bound_h, static_cast<int>(kernel_size));
+ Window win = calculate_max_window(*output, Steps(num_elems_written_per_iteration));
+ AccessWindowStatic input_access(input, -conv_pad_x, -conv_pad_y, input_width + border_size.right, input_height + border_size.bottom);
+ AccessWindowStatic weights_access(weights, 0, 0, num_weight_elems_read_per_row, kernel_size);
+ AccessWindowHorizontal output_access(output, 0, num_elems_written_per_iteration);
+ bool window_changed = update_window_and_padding(win, input_access, weights_access, output_access);
+ output_access.set_valid_region(win, ValidRegion(Coordinates(), output->tensor_shape()));
+
+ Status err = (window_changed) ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!") : Status{};
+ return std::make_pair(err, win);
+}
+} // namespace
+
+NEDirectConvolutionLayerKernel::NEDirectConvolutionLayerKernel()
+ : _input(nullptr), _weights(nullptr), _output(nullptr), _conv_info(), _border_size(0), _kernel_size(0), _num_weight_elems_read_per_row(0), _num_elems_read_per_iteration(0),
+ _num_elems_written_per_iteration(0)
+{
+}
+
+BorderSize NEDirectConvolutionLayerKernel::border_size() const
+{
+ return _border_size;
+}
+
+void NEDirectConvolutionLayerKernel::configure(const ITensor *input, const ITensor *weights, ITensor *output, const PadStrideInfo &conv_info)
+{
+ ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output);
+ const unsigned int conv_pad_x = std::get<0>(conv_info.pad());
+ const unsigned int conv_pad_y = std::get<1>(conv_info.pad());
+
+ _input = input;
+ _weights = weights;
+ _output = output;
+ _conv_info = conv_info;
+ _kernel_size = weights->info()->dimension(0);
+ _border_size = BorderSize(conv_pad_y, conv_pad_x);
+
+ // Get convolved dimensions
+ TensorShape output_shape = get_convolved_dimensions(input->info(), weights->info(), _kernel_size, conv_info);
+
+ DataType data_type = input->info()->data_type();
+
+ if(is_data_type_fixed_point(data_type))
+ {
+ // Promote data type in case of fixed point
+ data_type = ((data_type == DataType::QS8) ? DataType::QS16 : DataType::QS32);
+ }
+
+ // Output auto inizialitation if not yet initialized
+ auto_init_if_empty(*output->info(), output_shape, 1, data_type, input->info()->fixed_point_position());
+
+ // Perform validation step
+ ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), weights->info(), output->info(), conv_info));
+
+ // Configure kernel window
+ auto win_config = validate_and_configure_window(input->info(), weights->info(), output->info(), conv_info, _num_weight_elems_read_per_row,
+ _num_elems_read_per_iteration, _num_elems_written_per_iteration);
+ ARM_COMPUTE_ERROR_THROW_ON(win_config.first);
+ INEKernel::configure(win_config.second);
+}
+
+Status NEDirectConvolutionLayerKernel::validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *output, const PadStrideInfo &conv_info)
+{
+ unsigned int num_weight_elems_read_per_row = 0;
+ unsigned int num_elems_read_per_iteration = 0;
+ unsigned int num_elems_written_per_iteration = 0;
+ ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, weights, output, conv_info));
+ ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window(input->clone().get(), weights->clone().get(), output->clone().get(), conv_info, num_weight_elems_read_per_row, num_elems_read_per_iteration,
+ num_elems_written_per_iteration)
+ .first);
+
+ return Status{};
}
void NEDirectConvolutionLayerKernel::run(const Window &window, const ThreadInfo &info)