From ec8b45e7333006ae06d834e300c11a4fd1b4c067 Mon Sep 17 00:00:00 2001 From: Gian Marco Iodice Date: Thu, 22 Jun 2017 13:00:39 +0100 Subject: COMPMID-345 - Auto-inizialization for NECol2ImKernel and NEGEMMInterleave4x4Kernel Added support for 8 bit fixed point in CLTransposeKernel Change-Id: I8257fa30e90e70825c97c16b0a11af73c426319c Reviewed-on: http://mpd-gerrit.cambridge.arm.com/78563 Reviewed-by: Moritz Pflanzer Tested-by: Kaizen Reviewed-by: Georgios Pinitas --- arm_compute/core/CL/kernels/CLTransposeKernel.h | 2 +- arm_compute/runtime/CL/functions/CLTranspose.h | 2 +- src/core/CL/kernels/CLTransposeKernel.cpp | 5 +++-- src/core/NEON/kernels/NECol2ImKernel.cpp | 10 +++++----- src/core/NEON/kernels/NEGEMMInterleave4x4Kernel.cpp | 14 +++++++++++--- src/core/NEON/kernels/NETransposeKernel.cpp | 5 +++-- 6 files changed, 24 insertions(+), 14 deletions(-) diff --git a/arm_compute/core/CL/kernels/CLTransposeKernel.h b/arm_compute/core/CL/kernels/CLTransposeKernel.h index 9ad183f8f1..79596f34a1 100644 --- a/arm_compute/core/CL/kernels/CLTransposeKernel.h +++ b/arm_compute/core/CL/kernels/CLTransposeKernel.h @@ -40,7 +40,7 @@ class CLTransposeKernel : public ICLSimple2DKernel public: /** Initialise the kernel's input and output. * - * @param[in] input Input tensor. Data types supported: U8/S8/U16/S16/F16/U32/S32/F32 + * @param[in] input Input tensor. Data types supported: U8/S8/QS8/U16/S16/F16/U32/S32/F32 * @param[out] output Output tensor. Data type supported: Same as @p input */ void configure(const ICLTensor *input, ICLTensor *output); diff --git a/arm_compute/runtime/CL/functions/CLTranspose.h b/arm_compute/runtime/CL/functions/CLTranspose.h index 9b57fe00a8..9ac5458a93 100644 --- a/arm_compute/runtime/CL/functions/CLTranspose.h +++ b/arm_compute/runtime/CL/functions/CLTranspose.h @@ -40,7 +40,7 @@ class CLTranspose : public ICLSimpleFunction public: /** Initialise the kernel's inputs and output * - * @param[in] input Input tensor. Data types supported: U8/S8/U16/S16/F16/U32/S32/F32 + * @param[in] input Input tensor. Data types supported: U8/S8/QS8/U16/S16/F16/U32/S32/F32 * @param[out] output Output tensor. Data type supported: Same as @p input */ void configure(const ICLTensor *input, ICLTensor *output); diff --git a/src/core/CL/kernels/CLTransposeKernel.cpp b/src/core/CL/kernels/CLTransposeKernel.cpp index 2ee6fcb9dc..c1a75f7dff 100644 --- a/src/core/CL/kernels/CLTransposeKernel.cpp +++ b/src/core/CL/kernels/CLTransposeKernel.cpp @@ -41,7 +41,7 @@ using namespace arm_compute; void CLTransposeKernel::configure(const ICLTensor *input, ICLTensor *output) { ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8, DataType::S8, DataType::QS8, DataType::U16, DataType::S16, DataType::U32, DataType::S32, DataType::F16, DataType::F32); - ARM_COMPUTE_ERROR_ON(output == nullptr); + ARM_COMPUTE_ERROR_ON_NULLPTR(output); TensorShape output_shape{ input->info()->tensor_shape() }; const size_t w_out = input->info()->dimension(1); @@ -52,8 +52,9 @@ void CLTransposeKernel::configure(const ICLTensor *input, ICLTensor *output) // Output tensor auto inizialitation if not yet initialized auto_init_if_empty(*output->info(), output_shape, 1, input->info()->data_type(), input->info()->fixed_point_position()); - ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); ARM_COMPUTE_ERROR_ON_MISMATCHING_DIMENSIONS(output->info()->tensor_shape(), output_shape); + ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); + ARM_COMPUTE_ERROR_ON_MISMATCHING_FIXED_POINT(input, output); _input = input; _output = output; diff --git a/src/core/NEON/kernels/NECol2ImKernel.cpp b/src/core/NEON/kernels/NECol2ImKernel.cpp index 6d370acff1..e9a73607e6 100644 --- a/src/core/NEON/kernels/NECol2ImKernel.cpp +++ b/src/core/NEON/kernels/NECol2ImKernel.cpp @@ -69,20 +69,20 @@ NECol2ImKernel::NECol2ImKernel() void NECol2ImKernel::configure(const ITensor *input, ITensor *output, std::pair convolved_dims) { - ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); - - set_data_type_if_unknown(*output->info(), input->info()->data_type()); + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8, DataType::S8, DataType::QS8, DataType::U16, DataType::S16, DataType::U32, DataType::S32, DataType::F16, DataType::F32); + ARM_COMPUTE_ERROR_ON_NULLPTR(output); TensorShape output_shape = input->info()->tensor_shape(); output_shape.set(0, convolved_dims.first); output_shape.set(1, convolved_dims.second); output_shape.set(2, input->info()->tensor_shape()[0]); - set_shape_if_empty(*output->info(), output_shape); + // Output auto inizialitation if not yet initialized + auto_init_if_empty(*output->info(), output_shape, 1, input->info()->data_type(), input->info()->fixed_point_position()); ARM_COMPUTE_ERROR_ON_MISMATCHING_DIMENSIONS(output->info()->tensor_shape(), output_shape); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8, DataType::S8, DataType::QS8, DataType::U16, DataType::S16, DataType::U32, DataType::S32, DataType::F16, DataType::F32); ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); + ARM_COMPUTE_ERROR_ON_MISMATCHING_FIXED_POINT(input, output); _input = input; _output = output; diff --git a/src/core/NEON/kernels/NEGEMMInterleave4x4Kernel.cpp b/src/core/NEON/kernels/NEGEMMInterleave4x4Kernel.cpp index 3ff8b7b201..4505dcb363 100644 --- a/src/core/NEON/kernels/NEGEMMInterleave4x4Kernel.cpp +++ b/src/core/NEON/kernels/NEGEMMInterleave4x4Kernel.cpp @@ -133,10 +133,18 @@ NEGEMMInterleave4x4Kernel::NEGEMMInterleave4x4Kernel() void NEGEMMInterleave4x4Kernel::configure(const ITensor *input, ITensor *output) { ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QS8, DataType::U8, DataType::S8, DataType::U16, DataType::S16, DataType::U32, DataType::S32, DataType::F16, DataType::F32); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::QS8, DataType::U8, DataType::S8, DataType::U16, DataType::S16, DataType::U32, DataType::S32, DataType::F16, DataType::F32); + ARM_COMPUTE_ERROR_ON_NULLPTR(output); + + TensorShape output_shape = input->info()->tensor_shape(); + output_shape.set(0, input->info()->dimension(0) * 4); + output_shape.set(1, std::ceil(input->info()->dimension(1) / 4.0f)); + + // Output auto inizialitation if not yet initialized + auto_init_if_empty(*output->info(), output_shape, 1, input->info()->data_type(), input->info()->fixed_point_position()); + + ARM_COMPUTE_ERROR_ON_MISMATCHING_DIMENSIONS(output->info()->tensor_shape(), output_shape); ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); - ARM_COMPUTE_ERROR_ON(output->info()->dimension(0) != input->info()->dimension(0) * 4); - ARM_COMPUTE_ERROR_ON(output->info()->dimension(1) != std::ceil(input->info()->dimension(1) / 4.0f)); + ARM_COMPUTE_ERROR_ON_MISMATCHING_FIXED_POINT(input, output); _input = input; _output = output; diff --git a/src/core/NEON/kernels/NETransposeKernel.cpp b/src/core/NEON/kernels/NETransposeKernel.cpp index 492de8a6ee..a990e9068e 100644 --- a/src/core/NEON/kernels/NETransposeKernel.cpp +++ b/src/core/NEON/kernels/NETransposeKernel.cpp @@ -180,7 +180,7 @@ NETransposeKernel::NETransposeKernel() void NETransposeKernel::configure(const ITensor *input, ITensor *output) { ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8, DataType::S8, DataType::QS8, DataType::U16, DataType::S16, DataType::U32, DataType::S32, DataType::F16, DataType::F32); - ARM_COMPUTE_ERROR_ON(output == nullptr); + ARM_COMPUTE_ERROR_ON_NULLPTR(output); TensorShape output_shape{ input->info()->tensor_shape() }; const size_t w_out = input->info()->dimension(1); @@ -191,8 +191,9 @@ void NETransposeKernel::configure(const ITensor *input, ITensor *output) // Output tensor auto inizialitation if not yet initialized auto_init_if_empty(*output->info(), output_shape, 1, input->info()->data_type(), input->info()->fixed_point_position()); - ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); ARM_COMPUTE_ERROR_ON_MISMATCHING_DIMENSIONS(output->info()->tensor_shape(), output_shape); + ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); + ARM_COMPUTE_ERROR_ON_MISMATCHING_FIXED_POINT(input, output); _input = input; _output = output; -- cgit v1.2.1