aboutsummaryrefslogtreecommitdiff
path: root/src/core/CL/kernels/CLDirectConvolutionLayerKernel.cpp
diff options
context:
space:
mode:
authorGiorgio Arena <giorgio.arena@arm.com>2018-03-16 14:02:34 +0000
committerAnthony Barbier <anthony.barbier@arm.com>2018-11-02 16:50:48 +0000
commitc0f54434383f945d95f95549c1c4b0d5f5d2caff (patch)
treec4dadc7d83fa9dccef8cd7e85b31223266946093 /src/core/CL/kernels/CLDirectConvolutionLayerKernel.cpp
parent3c520c5a6ca9352560828fdf389d31e38b85afeb (diff)
downloadComputeLibrary-c0f54434383f945d95f95549c1c4b0d5f5d2caff.tar.gz
COMPMID-808 Add NHWC data format support for NEON direct convolution
Change-Id: I5d4cc3d5b0d25f3fe4ed998c0f15b1b8e260a43a Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/125697 Tested-by: Jenkins <bsgcomp@arm.com> Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com> Reviewed-by: Anthony Barbier <anthony.barbier@arm.com>
Diffstat (limited to 'src/core/CL/kernels/CLDirectConvolutionLayerKernel.cpp')
-rw-r--r--src/core/CL/kernels/CLDirectConvolutionLayerKernel.cpp33
1 files changed, 8 insertions, 25 deletions
diff --git a/src/core/CL/kernels/CLDirectConvolutionLayerKernel.cpp b/src/core/CL/kernels/CLDirectConvolutionLayerKernel.cpp
index 13ee9a1d14..e1fa650e81 100644
--- a/src/core/CL/kernels/CLDirectConvolutionLayerKernel.cpp
+++ b/src/core/CL/kernels/CLDirectConvolutionLayerKernel.cpp
@@ -34,6 +34,7 @@
#include "arm_compute/core/Types.h"
#include "arm_compute/core/Utils.h"
#include "arm_compute/core/Validate.h"
+#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "arm_compute/core/utils/quantization/AsymmHelpers.h"
#include "support/ToolchainSupport.h"
@@ -41,26 +42,6 @@ using namespace arm_compute;
namespace
{
-/** Calculates expected output shape dimension
- *
- * @param[in] Input shape
- *
- * @return Expected output shape
- */
-TensorShape get_output_shape(TensorShape input_shape, TensorShape weights_shape, PadStrideInfo conv_info)
-{
- unsigned int output_width = 0;
- unsigned int output_height = 0;
- std::tie(output_width, output_height) = scaled_dimensions(input_shape.x(), input_shape.y(), weights_shape.x(), weights_shape.y(), conv_info);
-
- TensorShape output_shape = input_shape;
- output_shape.set(0, output_width);
- output_shape.set(1, output_height);
- output_shape.set(2, weights_shape[3]);
-
- return output_shape;
-}
-
Status validate_arguments(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info)
{
ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QS8, DataType::QASYMM8, DataType::QS16, DataType::F16, DataType::F32);
@@ -100,7 +81,7 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *weights,
if(output->total_size() != 0)
{
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(output->tensor_shape(),
- get_output_shape(input->tensor_shape(), weights->tensor_shape(), conv_info));
+ misc::shape_calculator::compute_deep_convolution_shape(*input, *weights, conv_info));
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_FIXED_POINT(input, output);
}
@@ -114,7 +95,7 @@ std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITen
const DataType data_type = input->data_type();
// Get convolved dimensions
- TensorShape output_shape = get_output_shape(input->tensor_shape(), weights->tensor_shape(), conv_info);
+ TensorShape output_shape = misc::shape_calculator::compute_deep_convolution_shape(*input, *weights, conv_info);
// Output auto inizialitation if not yet initialized
// FIXME: input->clone()->set_tensor_shape(output_shape) doesn't work with subtensors for grouped direct convolutions (AlexNet).
@@ -134,7 +115,8 @@ std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITen
unsigned int num_elems_written_per_iteration_x = 0;
unsigned int num_elems_written_per_iteration_y = 0;
- if(gpu_target_is_in(target, GPUTarget::G71, GPUTarget::G72, GPUTarget::G51, GPUTarget::G51BIG, GPUTarget::G51LIT, GPUTarget::TNOX) && (kernel_size <= 5) && (conv_stride_x == 1) && (conv_stride_y == 1) && (data_type == DataType::F32))
+ if(gpu_target_is_in(target, GPUTarget::G71, GPUTarget::G72, GPUTarget::G51, GPUTarget::G51BIG, GPUTarget::G51LIT, GPUTarget::TNOX) && (kernel_size <= 5) && (conv_stride_x == 1)
+ && (conv_stride_y == 1) && (data_type == DataType::F32))
{
// Configure kernel window
@@ -274,7 +256,7 @@ void CLDirectConvolutionLayerKernel::configure(const ICLTensor *input, const ICL
const DataType data_type = input->info()->data_type();
// Get convolved dimensions
- TensorShape output_shape = get_output_shape(input->info()->tensor_shape(), weights->info()->tensor_shape(), conv_info);
+ TensorShape output_shape = misc::shape_calculator::compute_deep_convolution_shape(*input->info(), *weights->info(), conv_info);
// Output auto inizialitation if not yet initialized
// FIXME: input->clone()->set_tensor_shape(output_shape) doesn't work with subtensors for grouped direct convolutions (AlexNet).
@@ -309,7 +291,8 @@ void CLDirectConvolutionLayerKernel::configure(const ICLTensor *input, const ICL
CLBuildOptions build_options;
build_options.add_option_if(_biases != nullptr, std::string("-DHAS_BIAS"));
- if(gpu_target_is_in(gpu_target, GPUTarget::G71, GPUTarget::G72, GPUTarget::G51, GPUTarget::G51BIG, GPUTarget::G51LIT, GPUTarget::TNOX) && (kernel_size <= 5) && (_conv_stride_x == 1) && (_conv_stride_y == 1) && (data_type == DataType::F32))
+ if(gpu_target_is_in(gpu_target, GPUTarget::G71, GPUTarget::G72, GPUTarget::G51, GPUTarget::G51BIG, GPUTarget::G51LIT, GPUTarget::TNOX) && (kernel_size <= 5) && (_conv_stride_x == 1)
+ && (_conv_stride_y == 1) && (data_type == DataType::F32))
{
build_options.add_option(std::string("-DWEIGHTS_DEPTH=" + support::cpp11::to_string(_weights->info()->dimension(2))));