aboutsummaryrefslogtreecommitdiff
path: root/src/core/NEON
diff options
context:
space:
mode:
authorIoan-Cristian Szabo <ioan-cristian.szabo@arm.com>2017-11-30 17:17:17 +0000
committerAnthony Barbier <anthony.barbier@arm.com>2018-11-02 16:47:40 +0000
commitb4e3e1c371d8091e86ee1c6e704057559bbe1554 (patch)
treed072c9f9d7471e4df9ef5aa6b50cb09c35b0c361 /src/core/NEON
parentc1b6e37233e0ebd21cb44bf8863a09c0ba5feeb1 (diff)
downloadComputeLibrary-b4e3e1c371d8091e86ee1c6e704057559bbe1554.tar.gz
COMPMID-617: Add validate support for NEON FullyConnectedLayer
Change-Id: I08987022c8d4cc335c00b8af27bd3edb8fe64d3b Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/111596 Tested-by: Jenkins <bsgcomp@arm.com> Reviewed-by: Alexander Gilday <alexander.gilday@arm.com> Reviewed-by: Anthony Barbier <anthony.barbier@arm.com>
Diffstat (limited to 'src/core/NEON')
-rw-r--r--src/core/NEON/kernels/NEGEMMMatrixAccumulateBiasesKernel.cpp69
-rw-r--r--src/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.cpp85
-rw-r--r--src/core/NEON/kernels/NEIm2ColKernel.cpp40
3 files changed, 154 insertions, 40 deletions
diff --git a/src/core/NEON/kernels/NEGEMMMatrixAccumulateBiasesKernel.cpp b/src/core/NEON/kernels/NEGEMMMatrixAccumulateBiasesKernel.cpp
index 3dd59bddd6..cab3c7a58f 100644
--- a/src/core/NEON/kernels/NEGEMMMatrixAccumulateBiasesKernel.cpp
+++ b/src/core/NEON/kernels/NEGEMMMatrixAccumulateBiasesKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -39,6 +39,42 @@
using namespace arm_compute;
+namespace
+{
+inline Status validate_arguments(const ITensorInfo *accum, const ITensorInfo *biases)
+{
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(accum, 1, DataType::QS8, DataType::QS16, DataType::F16, DataType::F32);
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(biases, accum);
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_FIXED_POINT_POSITION(biases, accum);
+ ARM_COMPUTE_RETURN_ERROR_ON(biases->num_dimensions() > 1);
+ ARM_COMPUTE_RETURN_ERROR_ON(biases->dimension(0) != accum->dimension(0));
+
+ return Status{};
+}
+
+inline std::pair<Status, Window> validate_and_configure_window(ITensorInfo *accum, ITensorInfo *biases)
+{
+ constexpr unsigned int num_elems_processed_per_iteration = 16;
+
+ // Configure kernel window
+ Window win = calculate_max_window(*accum, Steps(num_elems_processed_per_iteration));
+
+ bool window_changed = update_window_and_padding(win,
+ AccessWindowHorizontal(accum, 0, num_elems_processed_per_iteration),
+ AccessWindowStatic(biases, 0, 0, ceil_to_multiple(biases->dimension(0), num_elems_processed_per_iteration), biases->tensor_shape().y()));
+
+ AccessWindowHorizontal output_access(accum, 0, num_elems_processed_per_iteration);
+
+ // Set the valid region for the accum tensor
+ Coordinates coord;
+ coord.set_num_dimensions(accum->num_dimensions());
+ output_access.set_valid_region(win, ValidRegion(coord, accum->tensor_shape()));
+
+ Status err = (window_changed) ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!") : Status{};
+ return std::make_pair(err, win);
+}
+} // namespace
+
NEGEMMMatrixAccumulateBiasesKernel::NEGEMMMatrixAccumulateBiasesKernel()
: _accum(nullptr), _biases(nullptr)
{
@@ -46,31 +82,26 @@ NEGEMMMatrixAccumulateBiasesKernel::NEGEMMMatrixAccumulateBiasesKernel()
void NEGEMMMatrixAccumulateBiasesKernel::configure(ITensor *accum, const ITensor *biases)
{
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(accum, 1, DataType::QS8, DataType::QS16, DataType::F16, DataType::F32);
- ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(biases, accum);
- ARM_COMPUTE_ERROR_ON_MISMATCHING_FIXED_POINT_POSITION(biases, accum);
- ARM_COMPUTE_ERROR_ON(biases->info()->num_dimensions() > 1);
+ ARM_COMPUTE_ERROR_ON_NULLPTR(accum, biases);
+
+ // Perform validate step
+ ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(accum->info(), biases->info()));
_biases = biases;
_accum = accum;
- constexpr unsigned int num_elems_processed_per_iteration = 16;
-
// Configure kernel window
- Window win = calculate_max_window(*accum->info(), Steps(num_elems_processed_per_iteration));
-
- update_window_and_padding(win,
- AccessWindowHorizontal(accum->info(), 0, num_elems_processed_per_iteration),
- AccessWindowStatic(biases->info(), 0, 0, ceil_to_multiple(biases->info()->dimension(0), num_elems_processed_per_iteration), biases->info()->tensor_shape().y()));
-
- AccessWindowHorizontal output_access(accum->info(), 0, num_elems_processed_per_iteration);
+ auto win_config = validate_and_configure_window(accum->info(), biases->info());
+ ARM_COMPUTE_ERROR_THROW_ON(win_config.first);
+ INEKernel::configure(win_config.second);
+}
- // Set the valid region for the accum tensor
- Coordinates coord;
- coord.set_num_dimensions(accum->info()->num_dimensions());
- output_access.set_valid_region(win, ValidRegion(coord, accum->info()->tensor_shape()));
+Status NEGEMMMatrixAccumulateBiasesKernel::validate(const ITensorInfo *accum, const ITensorInfo *biases)
+{
+ ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(accum, biases));
+ ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window(accum->clone().get(), biases->clone().get()).first);
- INEKernel::configure(win);
+ return Status{};
}
void NEGEMMMatrixAccumulateBiasesKernel::run(const Window &window, const ThreadInfo &info)
diff --git a/src/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.cpp b/src/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.cpp
index aa5e2dd0dd..69b052a9bd 100644
--- a/src/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.cpp
+++ b/src/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -36,6 +36,8 @@
#include "arm_compute/core/Validate.h"
#include "arm_compute/core/Window.h"
+#include "arm_compute/core/utils/misc/ShapeCalculator.h"
+
#include <arm_neon.h>
#include <cstddef>
#include <cstdint>
@@ -1409,27 +1411,73 @@ void matrix_matrix_multiply_qs16(const ITensor *input0, const ITensor *input1, I
ina, inb, out);
}
-Status validate_arguments(const ITensorInfo *input0, const ITensorInfo *input1, const ITensorInfo *output)
+inline Status validate_arguments(const ITensorInfo *input0, const ITensorInfo *input1, const ITensorInfo *output, float alpha, bool is_interleaved, const GEMMReshapeInfo &reshape_info)
{
+ ARM_COMPUTE_UNUSED(alpha);
+
ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input0, 1, DataType::F16, DataType::F32, DataType::QS8, DataType::QS16);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input0, input1, output);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_FIXED_POINT(input0, input1, output);
- ARM_COMPUTE_UNUSED(input0);
- ARM_COMPUTE_UNUSED(input1);
- ARM_COMPUTE_UNUSED(output);
- if(output->dimension(1) == 1)
+ if(!is_interleaved)
{
ARM_COMPUTE_RETURN_ERROR_ON(input0->dimension(0) != input1->dimension(1));
+
+ if(output->total_size() != 0)
+ {
+ ARM_COMPUTE_RETURN_ERROR_ON(input1->dimension(0) != output->dimension(0));
+ ARM_COMPUTE_RETURN_ERROR_ON(input0->dimension(1) != output->dimension(1));
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input0, output);
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_FIXED_POINT(input0, output);
+ }
+ }
+ else
+ {
+ const int m = reshape_info.m();
+ const int n = reshape_info.n();
+ const int k = reshape_info.k();
+ const int mult_transpose1xW_width = reshape_info.mult_transpose1xW_width();
+ const int mult_interleave4x4_height = reshape_info.mult_interleave4x4_height();
+
+ /* Interleave */
+ TensorShape tensor_shape0{ input0->tensor_shape() };
+ tensor_shape0.set(0, k);
+ tensor_shape0.set(1, m);
+
+ const TensorInfo tensor_info0 = input0->clone()->set_tensor_shape(tensor_shape0);
+ const TensorInfo tensor_info_reshaped0 = input0->clone()->set_tensor_shape(misc::shape_calculator::compute_interleaved_shape(tensor_info0, mult_interleave4x4_height));
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input0, &tensor_info_reshaped0);
+
+ if(n != 0) /* Transpose */
+ {
+ TensorShape tensor_shape1{ input1->tensor_shape() };
+ tensor_shape1.set(0, n);
+ tensor_shape1.set(1, k);
+
+ const TensorInfo tensor_info1 = input1->clone()->set_tensor_shape(tensor_shape1);
+ const TensorInfo tensor_info_reshaped1 = input1->clone()->set_tensor_shape(misc::shape_calculator::compute_transpose1xW_with_element_size_shape(tensor_info1, mult_transpose1xW_width));
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input1, &tensor_info_reshaped1);
+ }
+
+ if(output->total_size() != 0)
+ {
+ if(n != 0)
+ {
+ ARM_COMPUTE_RETURN_ERROR_ON(output->dimension(0) != static_cast<size_t>(n));
+ }
+ ARM_COMPUTE_RETURN_ERROR_ON(output->dimension(1) != static_cast<size_t>(m));
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input0, output);
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_FIXED_POINT(input0, output);
+ }
}
return Status{};
}
-std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input0, ITensorInfo *input1, ITensorInfo *output)
+inline std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input0, ITensorInfo *input1, ITensorInfo *output)
{
- Window win = Window();
- bool window_changed = false;
+ bool window_changed{};
+ Window win{};
unsigned int num_elems_processed_per_iteration_x = 0;
const unsigned int num_elems_processed_per_iteration_y = 4;
@@ -1538,11 +1586,19 @@ NEGEMMMatrixMultiplyKernel::NEGEMMMatrixMultiplyKernel()
{
}
-void NEGEMMMatrixMultiplyKernel::configure(const ITensor *input0, const ITensor *input1, ITensor *output, float alpha)
+void NEGEMMMatrixMultiplyKernel::configure(const ITensor *input0, const ITensor *input1, ITensor *output, float alpha, bool is_interleaved, const GEMMReshapeInfo &reshape_info)
{
- // Perform validate step
ARM_COMPUTE_ERROR_ON_NULLPTR(input0, input1, output);
- ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input0->info(), input1->info(), output->info()));
+
+ // Output tensor auto inizialitation if not yet initialized
+ TensorShape tensor_shape{ input0->info()->tensor_shape() };
+ tensor_shape.set(0, is_interleaved ? reshape_info.n() : input1->info()->dimension(0));
+ tensor_shape.set(1, is_interleaved ? reshape_info.m() : input0->info()->dimension(1));
+
+ auto_init_if_empty(*output->info(), input0->info()->clone()->set_tensor_shape(tensor_shape));
+
+ // Perform validate step
+ ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input0->info(), input1->info(), output->info(), alpha, is_interleaved, reshape_info));
_input0 = input0;
_input1 = input1;
@@ -1555,9 +1611,10 @@ void NEGEMMMatrixMultiplyKernel::configure(const ITensor *input0, const ITensor
INEKernel::configure(win_config.second);
}
-Status NEGEMMMatrixMultiplyKernel::validate(const ITensorInfo *input0, const ITensorInfo *input1, const ITensorInfo *output)
+Status NEGEMMMatrixMultiplyKernel::validate(const ITensorInfo *input0, const ITensorInfo *input1, const ITensorInfo *output, float alpha, bool is_interleaved,
+ const GEMMReshapeInfo &reshape_info)
{
- ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input0, input1, output));
+ ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input0, input1, output, alpha, is_interleaved, reshape_info));
ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window(input0->clone().get(), input1->clone().get(), output->clone().get()).first);
return Status{};
diff --git a/src/core/NEON/kernels/NEIm2ColKernel.cpp b/src/core/NEON/kernels/NEIm2ColKernel.cpp
index 633f78de4b..4fa329bf44 100644
--- a/src/core/NEON/kernels/NEIm2ColKernel.cpp
+++ b/src/core/NEON/kernels/NEIm2ColKernel.cpp
@@ -32,6 +32,8 @@
#include "arm_compute/core/Types.h"
#include "arm_compute/core/Validate.h"
+#include "arm_compute/core/utils/misc/ShapeCalculator.h"
+
#include <arm_neon.h>
#include <cstddef>
#include <cstdint>
@@ -42,14 +44,34 @@ using namespace arm_compute;
namespace
{
-Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias)
+Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info,
+ bool has_bias, bool is_fully_connected, bool is_flatten)
{
ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QS8, DataType::QASYMM8, DataType::QS16, DataType::F16, DataType::F32);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_FIXED_POINT_POSITION(input, output);
ARM_COMPUTE_RETURN_ERROR_ON(input->data_type() == DataType::QASYMM8 && has_bias);
- ARM_COMPUTE_UNUSED(kernel_dims);
- ARM_COMPUTE_UNUSED(conv_info);
+
+ if(is_flatten) /* Called by FlattenLayer */
+ {
+ size_t flatten_shape = input->tensor_shape().x() * input->tensor_shape().y() * input->tensor_shape().z();
+ ARM_COMPUTE_RETURN_ERROR_ON(output->dimension(0) != flatten_shape);
+ }
+ else if(!is_fully_connected) /* Called by ConvolutionLayer */
+ {
+ std::pair<unsigned int, unsigned int> out_dims = scaled_dimensions(input->dimension(0), input->dimension(1), kernel_dims.width, kernel_dims.height, conv_info);
+ ARM_COMPUTE_RETURN_ERROR_ON(output->dimension(0) != (input->dimension(2) * kernel_dims.area() + (has_bias ? 1 : 0)));
+ ARM_COMPUTE_RETURN_ERROR_ON(output->dimension(1) != (out_dims.first * out_dims.second));
+ ARM_COMPUTE_RETURN_ERROR_ON(output->dimension(2) != 1);
+ }
+ else /* Called by FullyConnectedLayer */
+ {
+ const int num_batch_dimensions = std::max(0, static_cast<int>(output->tensor_shape().num_dimensions()) - 1);
+ const int num_input_dimensions = input->tensor_shape().num_dimensions() - num_batch_dimensions;
+
+ TensorInfo expected_output = output->clone()->set_tensor_shape(misc::shape_calculator::compute_im2col_shape(input, num_input_dimensions));
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(&expected_output, output);
+ }
return Status{};
}
@@ -291,12 +313,15 @@ NEIm2ColKernel::NEIm2ColKernel()
{
}
-void NEIm2ColKernel::configure(const ITensor *input, ITensor *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias)
+void NEIm2ColKernel::configure(const ITensor *input, ITensor *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info,
+ bool has_bias, bool is_fully_connected, bool is_flatten)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
// Perform validation step
- ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info(), kernel_dims, conv_info, has_bias));
+ ARM_COMPUTE_UNUSED(is_fully_connected);
+ ARM_COMPUTE_UNUSED(is_flatten);
+ ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info(), kernel_dims, conv_info, has_bias, is_fully_connected, is_flatten));
_input = input;
_output = output;
@@ -382,9 +407,10 @@ void NEIm2ColKernel::configure(const ITensor *input, ITensor *output, const Size
IKernel::configure(window);
}
-Status NEIm2ColKernel::validate(const ITensorInfo *input, const ITensorInfo *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias)
+Status NEIm2ColKernel::validate(const ITensorInfo *input, const ITensorInfo *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info,
+ bool has_bias, bool is_fully_connected, bool is_flatten)
{
- ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output, kernel_dims, conv_info, has_bias));
+ ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output, kernel_dims, conv_info, has_bias, is_fully_connected, is_flatten));
return Status{};
}