aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--arm_compute/runtime/NEON/functions/NEDirectConvolutionLayer.h5
-rw-r--r--src/core/NEON/kernels/NEDirectConvolutionLayerBiasAccumulateKernel.cpp1
-rw-r--r--src/core/NEON/kernels/NEDirectConvolutionLayerKernel.cpp16
-rw-r--r--src/core/NEON/kernels/NEFillBorderKernel.cpp4
-rw-r--r--src/runtime/NEON/functions/NEDirectConvolutionLayer.cpp72
5 files changed, 54 insertions, 44 deletions
diff --git a/arm_compute/runtime/NEON/functions/NEDirectConvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEDirectConvolutionLayer.h
index 6d8ce1f6f4..09a54968bb 100644
--- a/arm_compute/runtime/NEON/functions/NEDirectConvolutionLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEDirectConvolutionLayer.h
@@ -62,7 +62,7 @@ public:
* Supported sizes: 1x1, 3x3 and 5x5.
* The 3rd dimension must be the same as the input's volume 3rd dimension.
* Data type supported: Same as @p input.
- * @param[in] bias Set of biases. Data type supported: Same as @p input.
+ * @param[in] bias Set of biases. Can be nullptr. Data type supported: Same as @p input.
* @param[out] output Output tensor.
* The 3rd dimensions must be equal to the 4th dimension of the @p kernels tensor. Data types supported: Same as @p input.
* @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo.
@@ -80,7 +80,7 @@ public:
* Supported sizes: 1x1, 3x3 and 5x5.
* The 3rd dimension must be the same as the input's volume 3rd dimension.
* Data type supported: Same as @p input.
- * @param[in] bias Set of biases. Data type supported: Same as @p input.
+ * @param[in] bias Set of biases. Can be nullptr. Data type supported: Same as @p input.
* @param[in] output Output tensor.
* The 3rd dimensions must be equal to the 4th dimension of the @p kernels tensor. Data types supported: Same as @p input.
* @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo.
@@ -98,6 +98,7 @@ private:
NEDirectConvolutionLayerKernel _conv_kernel;
NEFillBorderKernel _input_border_handler;
Tensor _accumulator;
+ bool _has_bias;
};
}
#endif /* __ARM_COMPUTE_NEDIRECTCONVOLUTIONLAYER_H__ */
diff --git a/src/core/NEON/kernels/NEDirectConvolutionLayerBiasAccumulateKernel.cpp b/src/core/NEON/kernels/NEDirectConvolutionLayerBiasAccumulateKernel.cpp
index a6585ade12..65b7087d7e 100644
--- a/src/core/NEON/kernels/NEDirectConvolutionLayerBiasAccumulateKernel.cpp
+++ b/src/core/NEON/kernels/NEDirectConvolutionLayerBiasAccumulateKernel.cpp
@@ -244,6 +244,7 @@ void NEDirectConvolutionLayerBiasAccumulateKernel::configure(ITensor *input, con
{
ARM_COMPUTE_ERROR_ON_NULLPTR(input, bias);
+ // Auto-initialize output output if required
if(output != nullptr)
{
// Output tensor auto initialization if not yet initialized
diff --git a/src/core/NEON/kernels/NEDirectConvolutionLayerKernel.cpp b/src/core/NEON/kernels/NEDirectConvolutionLayerKernel.cpp
index 1ca213b04a..2ba0ef2e69 100644
--- a/src/core/NEON/kernels/NEDirectConvolutionLayerKernel.cpp
+++ b/src/core/NEON/kernels/NEDirectConvolutionLayerKernel.cpp
@@ -1048,7 +1048,7 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *weights,
}
std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITensorInfo *weights, ITensorInfo *output, const PadStrideInfo &conv_info, unsigned int &num_weight_elems_read_per_row,
- unsigned int &num_elems_read_per_iteration, unsigned int &num_elems_written_per_iteration)
+ unsigned int &num_elems_read_per_iteration, unsigned int &num_elems_written_per_iteration, BorderSize &border_size)
{
// Calculate right and bottom border
unsigned int kernel_size = weights->dimension(0);
@@ -1056,7 +1056,6 @@ std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITen
const unsigned int conv_pad_y = std::get<1>(conv_info.pad());
const unsigned int conv_stride_x = std::get<0>(conv_info.stride());
const unsigned int conv_stride_y = std::get<1>(conv_info.stride());
- BorderSize border_size = BorderSize(conv_pad_y, conv_pad_x);
const int input_width = input->dimension(0);
const int input_height = input->dimension(1);
@@ -1182,7 +1181,7 @@ void NEDirectConvolutionLayerKernel::configure(const ITensor *input, const ITens
// Configure kernel window
auto win_config = validate_and_configure_window(input->info(), weights->info(), output->info(), conv_info, _num_weight_elems_read_per_row,
- _num_elems_read_per_iteration, _num_elems_written_per_iteration);
+ _num_elems_read_per_iteration, _num_elems_written_per_iteration, _border_size);
ARM_COMPUTE_ERROR_THROW_ON(win_config.first);
INEKernel::configure(win_config.second);
}
@@ -1192,9 +1191,16 @@ Status NEDirectConvolutionLayerKernel::validate(const ITensorInfo *input, const
unsigned int num_weight_elems_read_per_row = 0;
unsigned int num_elems_read_per_iteration = 0;
unsigned int num_elems_written_per_iteration = 0;
+ BorderSize border_size(conv_info.pad().first, conv_info.pad().second);
ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, weights, output, conv_info));
- ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window(input->clone().get(), weights->clone().get(), output->clone().get(), conv_info, num_weight_elems_read_per_row, num_elems_read_per_iteration,
- num_elems_written_per_iteration)
+ ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window(input->clone().get(),
+ weights->clone().get(),
+ output->clone().get(),
+ conv_info,
+ num_weight_elems_read_per_row,
+ num_elems_read_per_iteration,
+ num_elems_written_per_iteration,
+ border_size)
.first);
return Status{};
diff --git a/src/core/NEON/kernels/NEFillBorderKernel.cpp b/src/core/NEON/kernels/NEFillBorderKernel.cpp
index c66e057f23..af04955608 100644
--- a/src/core/NEON/kernels/NEFillBorderKernel.cpp
+++ b/src/core/NEON/kernels/NEFillBorderKernel.cpp
@@ -47,8 +47,8 @@ inline void fill_constant_value_single_channel_special<float, 1u, 1u>(ITensor *t
float border_value;
constant_border_value.get(border_value);
uint8_t *const start_valid_region = tensor->ptr_to_element(tensor->info()->valid_region().anchor);
- const size_t &width = tensor->info()->valid_region().shape[0];
- const size_t &height = tensor->info()->valid_region().shape[1];
+ const size_t width = tensor->info()->valid_region().shape[0];
+ const size_t height = tensor->info()->valid_region().shape[1];
const int stridey = tensor->info()->strides_in_bytes()[1];
// Left and right border
diff --git a/src/runtime/NEON/functions/NEDirectConvolutionLayer.cpp b/src/runtime/NEON/functions/NEDirectConvolutionLayer.cpp
index 2eabe459a5..ef5d987832 100644
--- a/src/runtime/NEON/functions/NEDirectConvolutionLayer.cpp
+++ b/src/runtime/NEON/functions/NEDirectConvolutionLayer.cpp
@@ -34,7 +34,7 @@
using namespace arm_compute;
NEDirectConvolutionLayer::NEDirectConvolutionLayer(std::shared_ptr<IMemoryManager> memory_manager)
- : _memory_group(std::move(memory_manager)), _accumulate_bias_kernel(), _conv_kernel(), _input_border_handler(), _accumulator()
+ : _memory_group(std::move(memory_manager)), _accumulate_bias_kernel(), _conv_kernel(), _input_border_handler(), _accumulator(), _has_bias(false)
{
}
@@ -46,38 +46,29 @@ void NEDirectConvolutionLayer::configure(ITensor *input, const ITensor *weights,
_accumulator.allocator()->free();
}
+ // Check if bias should be added in the convolution result
+ _has_bias = (bias != nullptr);
+
// Allocate the intermediate accumulator tensor in case of fixed point input
- switch(output->info()->data_type())
+ if(is_data_type_fixed_point(input->info()->data_type()))
{
- case DataType::QS8:
- {
- _accumulator.allocator()->init(TensorInfo(output->info()->tensor_shape(), 1, DataType::QS16, output->info()->fixed_point_position()));
- _memory_group.manage(&_accumulator);
- _conv_kernel.configure(input, weights, &_accumulator, conv_info);
- _accumulate_bias_kernel.configure(&_accumulator, bias, output);
- _accumulator.allocator()->allocate();
- break;
- }
- case DataType::QS16:
+ const DataType promoted_dt = (input->info()->data_type() == DataType::QS8) ? DataType::QS16 : DataType::QS32;
+ _accumulator.allocator()->init(TensorInfo(output->info()->tensor_shape(), 1, promoted_dt, output->info()->fixed_point_position()));
+ _memory_group.manage(&_accumulator);
+ _conv_kernel.configure(input, weights, &_accumulator, conv_info);
+ // TODO (COMPMID-746): Fix accumulate biases to just down-cast when no bias is provided
+ if(_has_bias)
{
- _accumulator.allocator()->init(TensorInfo(output->info()->tensor_shape(), 1, DataType::QS32, output->info()->fixed_point_position()));
- _memory_group.manage(&_accumulator);
- _conv_kernel.configure(input, weights, &_accumulator, conv_info);
_accumulate_bias_kernel.configure(&_accumulator, bias, output);
- _accumulator.allocator()->allocate();
- break;
}
- case DataType::F16:
- case DataType::F32:
+ _accumulator.allocator()->allocate();
+ }
+ else
+ {
+ _conv_kernel.configure(input, weights, output, conv_info);
+ if(_has_bias)
{
- _conv_kernel.configure(input, weights, output, conv_info);
_accumulate_bias_kernel.configure(output, bias);
- break;
- }
- default:
- {
- ARM_COMPUTE_ERROR("Data type not supported");
- break;
}
}
@@ -87,7 +78,7 @@ void NEDirectConvolutionLayer::configure(ITensor *input, const ITensor *weights,
Status NEDirectConvolutionLayer::validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *bias, const ITensorInfo *output, const PadStrideInfo &conv_info)
{
- ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, bias, output);
+ ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, output);
DataType data_type = output->data_type();
if(is_data_type_fixed_point(data_type))
@@ -97,14 +88,22 @@ Status NEDirectConvolutionLayer::validate(const ITensorInfo *input, const ITenso
}
TensorInfo accumulator(output->clone()->set_is_resizable(true).reset_padding().set_data_type(data_type));
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(weights, bias);
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(bias->dimension(0) != weights->dimension(3),
- "Biases size and number of input feature maps should match");
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(bias->num_dimensions() > 1,
- "Biases should be one dimensional");
-
+ // Validate Convolution kernel
ARM_COMPUTE_RETURN_ON_ERROR(NEDirectConvolutionLayerKernel::validate(input, weights, &accumulator, conv_info));
- ARM_COMPUTE_RETURN_ON_ERROR(NEDirectConvolutionLayerBiasAccumulateKernel::validate(&accumulator, bias, output));
+
+ // Validate bias
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG((bias == nullptr) && is_data_type_fixed_point(data_type),
+ "Biases should be provided for fixed point inputs");
+ if(bias != nullptr)
+ {
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(weights, bias);
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG(bias->dimension(0) != weights->dimension(3),
+ "Biases size and number of input feature maps should match");
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG(bias->num_dimensions() > 1, "Biases should be one dimensional");
+
+ // Validate bias kernel
+ ARM_COMPUTE_RETURN_ON_ERROR(NEDirectConvolutionLayerBiasAccumulateKernel::validate(&accumulator, bias, output));
+ }
return Status{};
}
@@ -116,7 +115,10 @@ void NEDirectConvolutionLayer::run()
_memory_group.acquire();
NEScheduler::get().schedule(&_conv_kernel, Window::DimZ);
- NEScheduler::get().schedule(&_accumulate_bias_kernel, Window::DimY);
+ if(_has_bias)
+ {
+ NEScheduler::get().schedule(&_accumulate_bias_kernel, Window::DimY);
+ }
_memory_group.release();
}