diff options
18 files changed, 314 insertions, 145 deletions
diff --git a/arm_compute/core/CL/kernels/CLCol2ImKernel.h b/arm_compute/core/CL/kernels/CLCol2ImKernel.h index 9d445e3004..d391cac889 100644 --- a/arm_compute/core/CL/kernels/CLCol2ImKernel.h +++ b/arm_compute/core/CL/kernels/CLCol2ImKernel.h @@ -66,7 +66,7 @@ public: /** Set the input and output of the kernel. * - * @param[in] input The input tensor to convert. Data types supported: F16, F32 + * @param[in] input The input tensor to convert. Data types supported: F16/F32 * @param[out] output The output tensor. 3 lower dimensions represent a single output [width, height, OFM], * while the rest represent batch of outputs. Data types supported: Same as @p input * @param[in] convolved_dims Output convolved dimensions. diff --git a/arm_compute/core/CL/kernels/CLIm2ColKernel.h b/arm_compute/core/CL/kernels/CLIm2ColKernel.h index d2224b53e1..b3b5cd8e80 100644 --- a/arm_compute/core/CL/kernels/CLIm2ColKernel.h +++ b/arm_compute/core/CL/kernels/CLIm2ColKernel.h @@ -29,6 +29,7 @@ namespace arm_compute { class ICLTensor; +class Size2D; /** Interface for the im2col reshape kernel. * @@ -67,15 +68,15 @@ public: CLIm2ColKernel &operator=(CLIm2ColKernel &&) = default; /** Set the input and output of the kernel. * - * @param[in] input The input tensor to convert. 3 lower dimensions represent a single input [width, height, IFM], - * while every optional dimension from 4 and above represent a batch of inputs. Data types supported: F16, F32 - * @param[out] output The output tensor. First 2 lower dimensions represent a transform of each 3D input, - * while every dimension above represents a batch. Data types supported: Same as @p input - * @param[in] convolved_dims The convolved output dimensions. - * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. - * @param[in] has_bias In case biases are provided expands the matrix with 1. + * @param[in] input The input tensor to convert. 3 lower dimensions represent a single input [width, height, IFM], + * while every optional dimension from 4 and above represent a batch of inputs. Data types supported: F16/F32 + * @param[out] output The output tensor. First 2 lower dimensions represent a transform of each 3D input, + * while every dimension above represents a batch. Data types supported: Same as @p input + * @param[in] kernel_dims The kernel dimensions (width and height). + * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. + * @param[in] has_bias In case biases are provided expands the matrix with 1. */ - void configure(const ICLTensor *input, ICLTensor *output, std::pair<unsigned int, unsigned int> convolved_dims, const PadStrideInfo &conv_info, bool has_bias); + void configure(const ICLTensor *input, ICLTensor *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias); // Inherited methods overridden: void run(const Window &window, cl::CommandQueue &queue) override; @@ -101,8 +102,6 @@ private: const ICLTensor *_input; ICLTensor *_output; std::pair<unsigned int, unsigned int> _convolved_dims; - PadStrideInfo _conv_info; - int _kernel_size; unsigned int _num_elems_processed_per_iteration; Im2ColFunction _run_func; }; diff --git a/arm_compute/core/CL/kernels/CLWeightsReshapeKernel.h b/arm_compute/core/CL/kernels/CLWeightsReshapeKernel.h index 0d00f0e00e..8732c6094b 100644 --- a/arm_compute/core/CL/kernels/CLWeightsReshapeKernel.h +++ b/arm_compute/core/CL/kernels/CLWeightsReshapeKernel.h @@ -63,4 +63,4 @@ private: ICLTensor *_output; }; } -#endif /*__ARM_COMPUTE_CLWEIGHTSRESHAPEKERNEL_H__ */ +#endif /*__ARM_COMPUTE_CLWEIGHTSRESHAPEKERNEL_H__ */
\ No newline at end of file diff --git a/arm_compute/core/NEON/kernels/NEIm2ColKernel.h b/arm_compute/core/NEON/kernels/NEIm2ColKernel.h index e219ce2e0e..9b8b98b388 100644 --- a/arm_compute/core/NEON/kernels/NEIm2ColKernel.h +++ b/arm_compute/core/NEON/kernels/NEIm2ColKernel.h @@ -29,6 +29,7 @@ namespace arm_compute { class ITensor; +class Size2D; /** Interface for the im2col reshape kernel. * @@ -71,14 +72,14 @@ public: /** Set the input and output of the kernel. * - * @param[in] input The input tensor to convert. 3 lower dimensions represent a single input [width, height, IFM], - * while every optional dimension from 4 and above represent a batch of inputs. Data types supported: QS8/F16/F32 - * @param[out] output The output tensor. Data types supported: Same as @p input - * @param[in] convolved_dims The convolved output dimensions. - * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. - * @param[in] has_bias In case biases are provided expands the matrix with 1. + * @param[in] input The input tensor to convert. 3 lower dimensions represent a single input [width, height, IFM], + * while every optional dimension from 4 and above represent a batch of inputs. Data types supported: QS8/F16/F32 + * @param[out] output The output tensor. Data types supported: Same as @p input + * @param[in] kernel_dims The kernel dimensions (width and height). + * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. + * @param[in] has_bias In case biases are provided expands the matrix with 1. */ - void configure(const ITensor *input, ITensor *output, std::pair<unsigned int, unsigned int> convolved_dims, const PadStrideInfo &conv_info, bool has_bias); + void configure(const ITensor *input, ITensor *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias); // Inherited methods overridden: void run(const Window &window) override; @@ -107,7 +108,8 @@ private: ITensor *_output; std::pair<unsigned int, unsigned int> _convolved_dims; PadStrideInfo _conv_info; - unsigned int _kernel_size; + unsigned int _kernel_width; + unsigned int _kernel_height; bool _has_bias; }; } diff --git a/src/core/CL/cl_kernels/convolution_layer.cl b/src/core/CL/cl_kernels/convolution_layer.cl index bd5dfaff68..837fdd70fe 100644 --- a/src/core/CL/cl_kernels/convolution_layer.cl +++ b/src/core/CL/cl_kernels/convolution_layer.cl @@ -27,7 +27,7 @@ * * @note Datatype should be given as a preprocessor argument using -DDATA_TYPE=type. e.g. -DDATA_TYPE=short * - * @param[in] src_ptr Pointer to the source tensor. Supported data types: F16, F32 + * @param[in] src_ptr Pointer to the source tensor. Supported data types: F16/F32 * @param[in] src_stride_x Stride of the source tensor in X dimension (in bytes) * @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes) * @param[in] src_stride_y Stride of the source tensor in Y dimension (in bytes) @@ -35,13 +35,13 @@ * @param[in] src_stride_z Stride of the source tensor in Z dimension (in bytes) * @param[in] src_step_z src_stride_z * number of elements along Y processed per workitem(in bytes) * @param[in] src_offset_first_element_in_bytes The offset of the first element in the source tensor - * @param[out] dst_ptr Pointer to the destination tensor. Same as input + * @param[out] dst_ptr Pointer to the destination tensor. Same as @p src_ptr * @param[in] dst_stride_x Stride of the destination tensor in X dimension (in bytes) * @param[in] dst_step_x dst_stride_x * number of elements along X processed per workitem(in bytes) * @param[in] dst_stride_y Stride of the destination tensor in Y dimension (in bytes) * @param[in] dst_step_y dst_stride_y * number of elements along Y processed per workitem(in bytes) * @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination tensor - * @param[in] bias_ptr Pointer to the bias tensor. Same as input + * @param[in] bias_ptr Pointer to the bias tensor. Same as @p src_ptr * @param[in] bias_stride_x Stride of the bias tensor in X dimension (in bytes) * @param[in] bias_step_x bias_stride_x * number of elements along X processed per workitem(in bytes) * @param[in] bias_offset_first_element_in_bytes The offset of the first element in the source tensor @@ -93,12 +93,13 @@ __kernel void reshape_to_columns( } } +#if(defined CONVOLVED_WIDTH && defined STRIDE_X && defined STRIDE_Y && defined PAD_X && defined PAD_Y && defined KERNEL_WIDTH && defined KERNEL_HEIGHT && defined KERNEL_DEPTH && SRC_WIDTH && SRC_HEIGHT) /** This kernel performs a reshaping of the input tensor to a tensor used to perform convolution using GEMM. * * @note The data type must be passed at compile time using -DDATA_TYPE: e.g. -DDATA_TYPE=float * @note In case biases will be added to the convolution -DHAS_BIAS has to be passed to append the final matrix with 1 in each row. * - * @param[in] src_ptr Pointer to the source tensor. Supported data types: F16, F32 + * @param[in] src_ptr Pointer to the source tensor. Supported data types: F16/F32 * @param[in] src_stride_x Stride of the source tensor in X dimension (in bytes) * @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes) * @param[in] src_stride_y Stride of the source tensor in Y dimension (in bytes) @@ -106,48 +107,36 @@ __kernel void reshape_to_columns( * @param[in] src_stride_z Stride of the source tensor in Z dimension (in bytes) * @param[in] src_step_z src_stride_z * number of elements along Z processed per workitem(in bytes) * @param[in] src_offset_first_element_in_bytes The offset of the first element in the source tensor - * @param[out] dst_ptr Pointer to the destination tensor. Supported data types: F16, F32 + * @param[out] dst_ptr Pointer to the destination tensor. Supported data types: same as @p src_ptr * @param[in] dst_stride_x Stride of the destination tensor in X dimension (in bytes) * @param[in] dst_step_x dst_stride_x * number of elements along X processed per workitem(in bytes) * @param[in] dst_stride_y Stride of the destination tensor in Y dimension (in bytes) * @param[in] dst_step_y dst_stride_y * number of elements along Y processed per workitem(in bytes) * @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination tensor - * @param[in] kernel_size The convolution kernel size - * @param[in] kernel_depth The kernel depth - * @param[in] width The output tensor width - * @param[in] input_dims The input tensor dimensions - * @param[in] strides The strides of the im2col operation - * @param[in] paddings The input tensor paddings */ __kernel void im2col_generic( TENSOR3D_DECLARATION(src), - IMAGE_DECLARATION(dst), - int kernel_size, - int kernel_depth, - int width, - int2 input_dims, - int2 strides, - int2 paddings) + IMAGE_DECLARATION(dst)) { Tensor3D src = CONVERT_TO_TENSOR3D_STRUCT(src); Image dst = CONVERT_TO_IMAGE_STRUCT_NO_STEP(dst); // Determine output index - uint idx = (get_global_id(1) * width + get_global_id(0)) * dst.stride_y; + uint idx = (get_global_id(1) * CONVOLVED_WIDTH + get_global_id(0)) * dst.stride_y; __global uchar *output_ptr = dst.ptr + idx; // Determine current input index - const int top_left_x = get_global_id(0) * strides.x - paddings.x; - const int top_left_y = get_global_id(1) * strides.y - paddings.y; + const int top_left_x = get_global_id(0) * STRIDE_X - PAD_X; + const int top_left_y = get_global_id(1) * STRIDE_Y - PAD_Y; // Linearize convolution elements - for(int d = 0; d < kernel_depth; ++d) + for(int d = 0; d < KERNEL_DEPTH; ++d) { - for(int y = top_left_y, y_e = top_left_y + kernel_size; y < y_e; ++y) + for(int y = top_left_y, y_e = top_left_y + KERNEL_HEIGHT; y < y_e; ++y) { - for(int x = top_left_x, x_e = top_left_x + kernel_size; x < x_e; ++x, output_ptr += dst.stride_x) + for(int x = top_left_x, x_e = top_left_x + KERNEL_WIDTH; x < x_e; ++x, output_ptr += dst.stride_x) { - if(x < 0 || x >= input_dims.x || y < 0 || y >= input_dims.y) + if(x < 0 || x >= SRC_WIDTH || y < 0 || y >= SRC_HEIGHT) { *((__global DATA_TYPE *)output_ptr) = 0; } @@ -160,21 +149,22 @@ __kernel void im2col_generic( } #if defined HAS_BIAS - *((__global DATA_TYPE *)output_ptr) = 1; + *((__global DATA_TYPE *)output_ptr) = (DATA_TYPE)1; #endif } +#endif //(defined CONVOLVED_WIDTH && defined STRIDE_X && defined STRIDE_Y && defined PAD_X && defined PAD_Y && defined KERNEL_WIDTH && defined KERNEL_HEIGHT && defined KERNEL_DEPTH && SRC_WIDTH && SRC_HEIGHT) /** This kernel performs a reshaping of the output of the convolution layer. * * @note The data type must be passed at compile time using -DDATA_TYPE: e.g. -DDATA_TYPE=float * - * @param[in] src_ptr Pointer to the source tensor. Supported data types: F16, F32 + * @param[in] src_ptr Pointer to the source tensor. Supported data types: F16/F32 * @param[in] src_stride_x Stride of the source tensor in X dimension (in bytes) * @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes) * @param[in] src_stride_y Stride of the source tensor in Y dimension (in bytes) * @param[in] src_step_y src_stride_y * number of elements along Y processed per workitem(in bytes) * @param[in] src_offset_first_element_in_bytes The offset of the first element in the source tensor - * @param[out] dst_ptr Pointer to the destination tensor. Supported data types: F16, F32 + * @param[out] dst_ptr Pointer to the destination tensor. Supported data types: same as @p src_ptr * @param[in] dst_stride_x Stride of the destination tensor in X dimension (in bytes) * @param[in] dst_step_x dst_stride_x * number of elements along X processed per workitem(in bytes) * @param[in] dst_stride_y Stride of the destination tensor in Y dimension (in bytes) @@ -202,7 +192,7 @@ __kernel void col2im( * @note Datatype should be given as a preprocessor argument using -DDATA_TYPE=type. e.g. -DDATA_TYPE=float * @note In case biases will be added in late stage, -DHAS_BIAS has to be passed to append the final matrix with 1 in each row. * - * @param[in] src_ptr Pointer to the source tensor. Supported data types: F16, F32 + * @param[in] src_ptr Pointer to the source tensor. Supported data types: F16/F32 * @param[in] src_stride_x Stride of the source tensor in X dimension (in bytes) * @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes) * @param[in] src_stride_y Stride of the source tensor in Y dimension (in bytes) @@ -210,7 +200,7 @@ __kernel void col2im( * @param[in] src_stride_z Stride of the source tensor in Z dimension (in bytes) * @param[in] src_step_z src_stride_z * number of elements along Y processed per workitem(in bytes) * @param[in] src_offset_first_element_in_bytes The offset of the first element in the source tensor - * @param[out] dst_ptr Pointer to the destination tensor. Same as input. + * @param[out] dst_ptr Pointer to the destination tensor. Same as @p src_ptr * @param[in] dst_stride_x Stride of the destination tensor in X dimension (in bytes) * @param[in] dst_step_x dst_stride_x * number of elements along X processed per workitem(in bytes) * @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination tensor diff --git a/src/core/CL/kernels/CLCol2ImKernel.cpp b/src/core/CL/kernels/CLCol2ImKernel.cpp index ad66c39483..679943ba3e 100644 --- a/src/core/CL/kernels/CLCol2ImKernel.cpp +++ b/src/core/CL/kernels/CLCol2ImKernel.cpp @@ -61,8 +61,12 @@ void CLCol2ImKernel::configure(const ICLTensor *input, ICLTensor *output, std::p // Configure window Window win = calculate_max_window(*input->info(), Steps()); + // The CLCol2ImKernel doesn't need padding so update_window_and_padding() can be skipped - output->info()->set_valid_region(ValidRegion(Coordinates(), output->info()->tensor_shape())); + Coordinates coord; + coord.set_num_dimensions(output->info()->num_dimensions()); + output->info()->set_valid_region(ValidRegion(coord, output->info()->tensor_shape())); + ICLKernel::configure(win); } diff --git a/src/core/CL/kernels/CLIm2ColKernel.cpp b/src/core/CL/kernels/CLIm2ColKernel.cpp index 8c0fe26666..092f495f92 100644 --- a/src/core/CL/kernels/CLIm2ColKernel.cpp +++ b/src/core/CL/kernels/CLIm2ColKernel.cpp @@ -29,8 +29,10 @@ #include "arm_compute/core/CL/OpenCL.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" +#include "arm_compute/core/Size2D.h" #include "arm_compute/core/Types.h" #include "arm_compute/core/Validate.h" +#include "support/ToolchainSupport.h" #include <cmath> #include <tuple> @@ -38,14 +40,14 @@ using namespace arm_compute; CLIm2ColKernel::CLIm2ColKernel() - : _input(nullptr), _output(nullptr), _convolved_dims(), _conv_info(), _kernel_size(0), _num_elems_processed_per_iteration(1), _run_func(nullptr) + : _input(nullptr), _output(nullptr), _convolved_dims(), _num_elems_processed_per_iteration(1), _run_func(nullptr) { } -void CLIm2ColKernel::configure(const ICLTensor *input, ICLTensor *output, std::pair<unsigned int, unsigned int> convolved_dims, const PadStrideInfo &conv_info, bool has_bias) +void CLIm2ColKernel::configure(const ICLTensor *input, ICLTensor *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias) { - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F32); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::F32); + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32); + ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); _input = input; _output = output; @@ -70,44 +72,23 @@ void CLIm2ColKernel::configure(const ICLTensor *input, ICLTensor *output, std::p if(!run_img2col_reduced) { - _convolved_dims = convolved_dims; - _conv_info = conv_info; - _kernel_size = std::sqrt((output->info()->dimension(0) - (has_bias ? 1 : 0)) / input->info()->dimension(2)); + _convolved_dims = scaled_dimensions(input->info()->dimension(0), input->info()->dimension(1), + kernel_dims.width, kernel_dims.height, + conv_info); _num_elems_processed_per_iteration = output->info()->dimension(0); - _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel("im2col_generic", build_opts)); + build_opts.emplace("-DKERNEL_WIDTH=" + support::cpp11::to_string(kernel_dims.width)); + build_opts.emplace("-DKERNEL_HEIGHT=" + support::cpp11::to_string(kernel_dims.height)); + build_opts.emplace("-DKERNEL_DEPTH=" + support::cpp11::to_string(input->info()->dimension(2))); + build_opts.emplace("-DCONVOLVED_WIDTH=" + support::cpp11::to_string(_convolved_dims.first)); + build_opts.emplace("-DSTRIDE_X=" + support::cpp11::to_string(conv_info.stride().first)); + build_opts.emplace("-DSTRIDE_Y=" + support::cpp11::to_string(conv_info.stride().second)); + build_opts.emplace("-DPAD_X=" + support::cpp11::to_string(conv_info.pad().first)); + build_opts.emplace("-DPAD_Y=" + support::cpp11::to_string(conv_info.pad().second)); + build_opts.emplace("-DSRC_WIDTH=" + support::cpp11::to_string(input->info()->dimension(0))); + build_opts.emplace("-DSRC_HEIGHT=" + support::cpp11::to_string(input->info()->dimension(1))); - // Create static kernel arguments - const cl_int2 input_dims = - { - { - static_cast<cl_int>(input->info()->dimension(0)), - static_cast<cl_int>(input->info()->dimension(1)), - } - }; - const cl_int2 strides = - { - { - stride_x, - stride_y, - } - }; - const cl_int2 paddings = - { - { - pad_x, - pad_y, - } - }; - - // Set static kernel arguments - unsigned int idx = num_arguments_per_2D_tensor() + num_arguments_per_3D_tensor(); - _kernel.setArg<cl_int>(idx++, _kernel_size); - _kernel.setArg<cl_int>(idx++, input->info()->dimension(2) /* depth */); - _kernel.setArg<cl_int>(idx++, _convolved_dims.first /* output width */); - _kernel.setArg<cl_int2>(idx++, input_dims); - _kernel.setArg<cl_int2>(idx++, strides); - _kernel.setArg<cl_int2>(idx++, paddings); + _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel("im2col_generic", build_opts)); _run_func = &CLIm2ColKernel::run_generic; } @@ -136,13 +117,6 @@ void CLIm2ColKernel::run_generic(const Window &window, cl::CommandQueue &queue) ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); ARM_COMPUTE_ERROR_ON_MISMATCHING_WINDOWS(ICLKernel::window(), window); - int pad_x = 0; - int pad_y = 0; - int stride_x = 0; - int stride_y = 0; - std::tie(pad_x, pad_y) = _conv_info.pad(); - std::tie(stride_x, stride_y) = _conv_info.stride(); - // Get initial windows Window slice = window.first_slice_window_3D(); Window slice_in = window.first_slice_window_3D(); diff --git a/src/core/CL/kernels/CLWeightsReshapeKernel.cpp b/src/core/CL/kernels/CLWeightsReshapeKernel.cpp index 845bd3799d..82634164de 100644 --- a/src/core/CL/kernels/CLWeightsReshapeKernel.cpp +++ b/src/core/CL/kernels/CLWeightsReshapeKernel.cpp @@ -42,14 +42,7 @@ CLWeightsReshapeKernel::CLWeightsReshapeKernel() void CLWeightsReshapeKernel::configure(const ICLTensor *input, const ICLTensor *biases, ICLTensor *output) { ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QS8, DataType::F16, DataType::F32); - ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, biases); - ARM_COMPUTE_ERROR_ON_MISMATCHING_FIXED_POINT(input, biases); - ARM_COMPUTE_ERROR_ON((input->info()->num_dimensions() == 4) && (biases->info()->num_dimensions() != 1)); - ARM_COMPUTE_ERROR_ON((input->info()->num_dimensions() == 5) && (biases->info()->num_dimensions() != 2)); - ARM_COMPUTE_ERROR_ON((input->info()->num_dimensions() == 4) && (biases->info()->dimension(0) != input->info()->tensor_shape()[3])); - ARM_COMPUTE_ERROR_ON((input->info()->num_dimensions() == 5) && (biases->info()->dimension(0) != input->info()->tensor_shape()[3] || biases->info()->dimension(1) != input->info()->tensor_shape()[4])); ARM_COMPUTE_ERROR_ON_NULLPTR(output); - ARM_COMPUTE_ERROR_ON(input->info()->dimension(0) != input->info()->dimension(1)); const DataType dt = input->info()->data_type(); const int fixed_point_position = input->info()->fixed_point_position(); @@ -67,6 +60,16 @@ void CLWeightsReshapeKernel::configure(const ICLTensor *input, const ICLTensor * ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); ARM_COMPUTE_ERROR_ON_MISMATCHING_FIXED_POINT(input, output); + if(biases != nullptr) + { + ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, biases); + ARM_COMPUTE_ERROR_ON_MISMATCHING_FIXED_POINT(input, biases); + ARM_COMPUTE_ERROR_ON((input->info()->num_dimensions() == 4) && (biases->info()->num_dimensions() != 1)); + ARM_COMPUTE_ERROR_ON((input->info()->num_dimensions() == 5) && (biases->info()->num_dimensions() != 2)); + ARM_COMPUTE_ERROR_ON((input->info()->num_dimensions() == 4) && (biases->info()->dimension(0) != input->info()->tensor_shape()[3])); + ARM_COMPUTE_ERROR_ON((input->info()->num_dimensions() == 5) && (biases->info()->dimension(0) != input->info()->tensor_shape()[3] || biases->info()->dimension(1) != input->info()->tensor_shape()[4])); + } + _biases = biases; _output = output; _input = input; diff --git a/src/core/NEON/kernels/NEIm2ColKernel.cpp b/src/core/NEON/kernels/NEIm2ColKernel.cpp index 875c08ed42..99daa2e5e7 100644 --- a/src/core/NEON/kernels/NEIm2ColKernel.cpp +++ b/src/core/NEON/kernels/NEIm2ColKernel.cpp @@ -27,6 +27,7 @@ #include "arm_compute/core/FixedPoint.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/ITensor.h" +#include "arm_compute/core/Size2D.h" #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Types.h" #include "arm_compute/core/Validate.h" @@ -47,7 +48,8 @@ inline void linearize_volume(const uint8_t *const in_ptr, bool has_bias, int top_left_x, int top_left_y, - int kernel_size, + int kernel_width, + int kernel_height, int kernel_depth, int input_w, int input_h, @@ -56,9 +58,9 @@ inline void linearize_volume(const uint8_t *const in_ptr, int input_stride_z, int fixed_point_position) { - const int kernel_size2 = kernel_size * kernel_size; - const int x_e = top_left_x + kernel_size; - const int y_e = top_left_y + kernel_size; + const int kernel_size2 = kernel_width * kernel_height; + const int x_e = top_left_x + kernel_width; + const int y_e = top_left_y + kernel_height; // Linearize volume int d = 0; @@ -109,8 +111,8 @@ inline void linearize_volume(const uint8_t *const in_ptr, if((y < 0 || y >= input_h) && has_pads) { // All the values will be zeros - memset(out_ptr, 0, kernel_size * sizeof(T)); - out_ptr += kernel_size; + memset(out_ptr, 0, kernel_width * sizeof(T)); + out_ptr += kernel_width; } else { @@ -199,7 +201,8 @@ void NEIm2ColKernel::run_generic(const Window &window) _has_bias, top_left_x, top_left_y, - static_cast<int>(_kernel_size), + static_cast<int>(_kernel_width), + static_cast<int>(_kernel_height), kernel_depth, input_w, input_h, @@ -260,22 +263,24 @@ void NEIm2ColKernel::run_reduced(const Window &window) } NEIm2ColKernel::NEIm2ColKernel() - : _func(), _input(nullptr), _output(nullptr), _convolved_dims(), _conv_info(), _kernel_size(0), _has_bias(false) + : _func(), _input(nullptr), _output(nullptr), _convolved_dims(), _conv_info(), _kernel_width(0), _kernel_height(0), _has_bias(false) { } -void NEIm2ColKernel::configure(const ITensor *input, ITensor *output, std::pair<unsigned int, unsigned int> convolved_dims, const PadStrideInfo &conv_info, bool has_bias) +void NEIm2ColKernel::configure(const ITensor *input, ITensor *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias) { ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32, DataType::QS8); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::F16, DataType::F32, DataType::QS8); ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); _input = input; _output = output; - _convolved_dims = convolved_dims; _conv_info = conv_info; - _kernel_size = std::sqrt((output->info()->dimension(0) - (has_bias ? 1 : 0)) / input->info()->dimension(2)); - _has_bias = has_bias; + _kernel_width = kernel_dims.width; + _kernel_height = kernel_dims.height, + _convolved_dims = scaled_dimensions(input->info()->dimension(0), input->info()->dimension(1), + _kernel_width, _kernel_height, + _conv_info); + _has_bias = has_bias; unsigned int pad_x, pad_y, stride_x, stride_y = 0; std::tie(pad_x, pad_y) = conv_info.pad(); diff --git a/src/core/NEON/kernels/NEWeightsReshapeKernel.cpp b/src/core/NEON/kernels/NEWeightsReshapeKernel.cpp index e9b76e7967..ac688e1381 100644 --- a/src/core/NEON/kernels/NEWeightsReshapeKernel.cpp +++ b/src/core/NEON/kernels/NEWeightsReshapeKernel.cpp @@ -97,13 +97,13 @@ void NEWeightsReshapeKernel::configure(const ITensor *input, const ITensor *bias { ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QS8, DataType::F16, DataType::F32); ARM_COMPUTE_ERROR_ON_NULLPTR(output); - ARM_COMPUTE_ERROR_ON(input->info()->dimension(0) != input->info()->dimension(1)); - const DataType dt = input->info()->data_type(); - const int fixed_point_position = input->info()->fixed_point_position(); - - TensorShape output_shape{ input->info()->tensor_shape() }; + const int fixed_point_position = input->info()->fixed_point_position(); + const DataType dt = input->info()->data_type(); + const TensorShape &input_shape = input->info()->tensor_shape(); + TensorShape output_shape{ input_shape }; output_shape.collapse(3); + const size_t tmp_dim = output_shape[0]; output_shape.set(0, output_shape[1]); output_shape.set(1, tmp_dim + (bias != nullptr ? 1 : 0)); diff --git a/src/runtime/CL/functions/CLConvolutionLayer.cpp b/src/runtime/CL/functions/CLConvolutionLayer.cpp index 933d92bef7..b29bf8f136 100644 --- a/src/runtime/CL/functions/CLConvolutionLayer.cpp +++ b/src/runtime/CL/functions/CLConvolutionLayer.cpp @@ -24,6 +24,7 @@ #include "arm_compute/runtime/CL/functions/CLConvolutionLayer.h" #include "arm_compute/core/PixelValue.h" +#include "arm_compute/core/Size2D.h" #include "arm_compute/core/Utils.h" #include "arm_compute/core/Validate.h" #include "arm_compute/runtime/CL/CLScheduler.h" @@ -40,16 +41,13 @@ CLConvolutionLayerReshapeWeights::CLConvolutionLayerReshapeWeights() void CLConvolutionLayerReshapeWeights::configure(const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, bool transpose1xW) { - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(weights, 1, DataType::F32); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(weights, 1, DataType::F32); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::F32); - ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(weights, biases, output); - ARM_COMPUTE_ERROR_ON_MISMATCHING_FIXED_POINT(weights, biases, output); + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(weights, 1, DataType::F16, DataType::F32); + ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(weights, output); + ARM_COMPUTE_ERROR_ON_MISMATCHING_FIXED_POINT(weights, output); ARM_COMPUTE_ERROR_ON(weights->info()->num_dimensions() > 4); if(biases != nullptr) { - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(biases, 1, DataType::F32); ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(weights, biases); ARM_COMPUTE_ERROR_ON(biases->info()->dimension(0) != weights->info()->dimension(3)); ARM_COMPUTE_ERROR_ON(biases->info()->num_dimensions() > 1); @@ -98,8 +96,6 @@ CLConvolutionLayer::CLConvolutionLayer() void CLConvolutionLayer::configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info) { ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(weights, 1, DataType::F16, DataType::F32); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::F16, DataType::F32); ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights, output); ARM_COMPUTE_ERROR_ON(!weights_info.are_reshaped() && weights->info()->dimension(2) != input->info()->dimension(2)); ARM_COMPUTE_ERROR_ON(weights->info()->num_dimensions() > 4); @@ -191,7 +187,7 @@ void CLConvolutionLayer::configure(const ICLTensor *input, const ICLTensor *weig _gemm_output.allocator()->init(TensorInfo(shape_gemm, 1, input->info()->data_type())); // Configure kernels - _input_im2col_kernel.configure(input, &_input_im2col_reshaped, std::make_pair(conv_w, conv_h), conv_info, _has_bias); + _input_im2col_kernel.configure(input, &_input_im2col_reshaped, Size2D(kernel_width, kernel_height), conv_info, _has_bias); _output_col2im_kernel.configure(&_gemm_output, output, std::make_pair(conv_w, conv_h)); if(_is_fully_connected_convolution) diff --git a/src/runtime/CL/functions/CLFullyConnectedLayer.cpp b/src/runtime/CL/functions/CLFullyConnectedLayer.cpp index 57d57d517f..b51e709927 100644 --- a/src/runtime/CL/functions/CLFullyConnectedLayer.cpp +++ b/src/runtime/CL/functions/CLFullyConnectedLayer.cpp @@ -23,6 +23,7 @@ */ #include "arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h" +#include "arm_compute/core/Size2D.h" #include "arm_compute/core/Validate.h" #include "arm_compute/runtime/CL/CLScheduler.h" @@ -126,7 +127,7 @@ void CLFullyConnectedLayer::configure_conv_fc_wb(const ICLTensor *input, const I _interleave4x4_output.allocator()->init(TensorInfo(shape_interleaved, 1, dt, fixed_point_position)); // Configure im2col kernel - _im2col_kernel.configure(input, &_im2col_output, std::make_pair(1, 1), PadStrideInfo(1, 1, 0, 0), false); + _im2col_kernel.configure(input, &_im2col_output, Size2D(1, 1), PadStrideInfo(1, 1, 0, 0), false); // Configure interleave4x4 kernel _interleave4x4_kernel.configure(&_im2col_output, &_interleave4x4_output); @@ -176,7 +177,7 @@ void CLFullyConnectedLayer::configure_conv_fc_nb(const ICLTensor *input, const I _im2col_output.allocator()->init(TensorInfo(shape_im2col, 1, dt, fixed_point_position)); // Configure im2col kernel - _im2col_kernel.configure(input, &_im2col_output, std::make_pair(1, 1), PadStrideInfo(1, 1, 0, 0), false); + _im2col_kernel.configure(input, &_im2col_output, Size2D(1, 1), PadStrideInfo(1, 1, 0, 0), false); // Configure matrix multiply kernel _mm_kernel.configure(&_im2col_output, weights, output, 1.0f); diff --git a/src/runtime/CL/functions/CLLocallyConnectedLayer.cpp b/src/runtime/CL/functions/CLLocallyConnectedLayer.cpp index 0e6d23e0d8..ef6fb50bbf 100644 --- a/src/runtime/CL/functions/CLLocallyConnectedLayer.cpp +++ b/src/runtime/CL/functions/CLLocallyConnectedLayer.cpp @@ -100,7 +100,7 @@ void CLLocallyConnectedLayer::configure(const ICLTensor *input, const ICLTensor _gemm_output.allocator()->init(TensorInfo(shape_gemm, 1, input->info()->data_type())); // Configure kernels - _input_im2col_kernel.configure(input, &_input_im2col_reshaped, std::make_pair(conv_w, conv_h), conv_info, _has_bias); + _input_im2col_kernel.configure(input, &_input_im2col_reshaped, Size2D(conv_w, conv_h), conv_info, _has_bias); _weights_reshape_kernel.configure(weights, biases, &_weights_reshaped); _mm_kernel.configure(&_input_im2col_reshaped, &_weights_reshaped, &_gemm_output); _output_col2im_kernel.configure(&_gemm_output, output, std::make_pair(conv_w, conv_h)); diff --git a/src/runtime/NEON/functions/NEConvolutionLayer.cpp b/src/runtime/NEON/functions/NEConvolutionLayer.cpp index b38d6617d5..dc8652747f 100644 --- a/src/runtime/NEON/functions/NEConvolutionLayer.cpp +++ b/src/runtime/NEON/functions/NEConvolutionLayer.cpp @@ -24,6 +24,7 @@ #include "arm_compute/runtime/NEON/functions/NEConvolutionLayer.h" #include "arm_compute/core/PixelValue.h" +#include "arm_compute/core/Size2D.h" #include "arm_compute/core/Utils.h" #include "arm_compute/core/Validate.h" #include "arm_compute/runtime/NEON/NEScheduler.h" @@ -41,7 +42,6 @@ NEConvolutionLayerReshapeWeights::NEConvolutionLayerReshapeWeights() void NEConvolutionLayerReshapeWeights::configure(const ITensor *weights, const ITensor *biases, ITensor *output, bool transpose1xW) { ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(weights, 1, DataType::QS8, DataType::F16, DataType::F32); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::QS8, DataType::F16, DataType::F32); ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(weights, output); ARM_COMPUTE_ERROR_ON_MISMATCHING_FIXED_POINT(weights, output); ARM_COMPUTE_ERROR_ON(weights->info()->num_dimensions() > 4); @@ -97,8 +97,6 @@ NEConvolutionLayer::NEConvolutionLayer() void NEConvolutionLayer::configure(const ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info) { ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QS8, DataType::F16, DataType::F32); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(weights, 1, DataType::QS8, DataType::F16, DataType::F32); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::QS8, DataType::F16, DataType::F32); ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights, output); ARM_COMPUTE_ERROR_ON_MISMATCHING_FIXED_POINT(input, weights, output); ARM_COMPUTE_ERROR_ON(!weights_info.are_reshaped() && weights->info()->dimension(2) != input->info()->dimension(2)); @@ -106,7 +104,6 @@ void NEConvolutionLayer::configure(const ITensor *input, const ITensor *weights, if(biases != nullptr) { - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(biases, 1, DataType::QS8, DataType::F16, DataType::F32); ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, biases); ARM_COMPUTE_ERROR_ON_MISMATCHING_FIXED_POINT(input, biases); ARM_COMPUTE_ERROR_ON(!weights_info.are_reshaped() && biases->info()->dimension(0) != weights->info()->dimension(3)); @@ -197,7 +194,7 @@ void NEConvolutionLayer::configure(const ITensor *input, const ITensor *weights, _gemm_output.allocator()->init(TensorInfo(shape_gemm, 1, dt, fixed_point_position)); // Configure kernels - _input_im2col_kernel.configure(input, &_input_im2col_reshaped, std::make_pair(conv_w, conv_h), conv_info, _has_bias); + _input_im2col_kernel.configure(input, &_input_im2col_reshaped, Size2D(kernel_width, kernel_height), conv_info, _has_bias); if(_is_fully_connected_convolution) { _mm_kernel.configure(&_input_im2col_reshaped, weights, &_gemm_output, 1.0f); diff --git a/src/runtime/NEON/functions/NEFullyConnectedLayer.cpp b/src/runtime/NEON/functions/NEFullyConnectedLayer.cpp index abb41e9f70..6e27ed344a 100644 --- a/src/runtime/NEON/functions/NEFullyConnectedLayer.cpp +++ b/src/runtime/NEON/functions/NEFullyConnectedLayer.cpp @@ -23,6 +23,7 @@ */ #include "arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h" +#include "arm_compute/core/Size2D.h" #include "arm_compute/core/Validate.h" #include "arm_compute/runtime/NEON/NEScheduler.h" @@ -126,7 +127,7 @@ void NEFullyConnectedLayer::configure_conv_fc_wb(const ITensor *input, const ITe _interleave4x4_output.allocator()->init(TensorInfo(shape_interleaved, 1, dt, fixed_point_position)); // Configure im2col kernel - _im2col_kernel.configure(input, &_im2col_output, std::make_pair(1, 1), PadStrideInfo(1, 1, 0, 0), false); + _im2col_kernel.configure(input, &_im2col_output, Size2D(1, 1), PadStrideInfo(1, 1, 0, 0), false); // Configure interleave4x4 kernel _interleave4x4_kernel.configure(&_im2col_output, &_interleave4x4_output); @@ -176,7 +177,7 @@ void NEFullyConnectedLayer::configure_conv_fc_nb(const ITensor *input, const ITe _im2col_output.allocator()->init(TensorInfo(shape_im2col, 1, dt, fixed_point_position)); // Configure im2col kernel - _im2col_kernel.configure(input, &_im2col_output, std::make_pair(1, 1), PadStrideInfo(1, 1, 0, 0), false); + _im2col_kernel.configure(input, &_im2col_output, Size2D(1, 1), PadStrideInfo(1, 1, 0, 0), false); // Configure matrix multiply kernel _mm_kernel.configure(&_im2col_output, weights, output, 1.0f); diff --git a/src/runtime/NEON/functions/NELocallyConnectedLayer.cpp b/src/runtime/NEON/functions/NELocallyConnectedLayer.cpp index 3b095b4688..e7c71e04d1 100644 --- a/src/runtime/NEON/functions/NELocallyConnectedLayer.cpp +++ b/src/runtime/NEON/functions/NELocallyConnectedLayer.cpp @@ -65,10 +65,13 @@ void NELocallyConnectedLayer::configure(const ITensor *input, const ITensor *wei std::tie(stride_x, stride_y) = conv_info.stride(); std::tie(pad_x, pad_y) = conv_info.pad(); + const unsigned int kernel_width = weights->info()->dimension(0); + const unsigned int kernel_height = weights->info()->dimension(1); + // Get convolved dimensions unsigned int conv_w = 0; unsigned int conv_h = 0; - std::tie(conv_w, conv_h) = scaled_dimensions(input->info()->dimension(0), input->info()->dimension(1), weights->info()->dimension(0), weights->info()->dimension(1), + std::tie(conv_w, conv_h) = scaled_dimensions(input->info()->dimension(0), input->info()->dimension(1), kernel_width, kernel_height, conv_info); ARM_COMPUTE_ERROR_ON_MSG((output->info()->dimension(0) != conv_w) || (output->info()->dimension(1) != conv_h), "Output shape does not match the expected one"); @@ -100,7 +103,7 @@ void NELocallyConnectedLayer::configure(const ITensor *input, const ITensor *wei _gemm_output.allocator()->init(TensorInfo(shape_gemm, 1, input->info()->data_type())); // Configure kernels - _input_im2col_kernel.configure(input, &_input_im2col_reshaped, std::make_pair(conv_w, conv_h), conv_info, _has_bias); + _input_im2col_kernel.configure(input, &_input_im2col_reshaped, Size2D(kernel_width, kernel_height), conv_info, _has_bias); _weights_reshape_kernel.configure(weights, biases, &_weights_reshaped); _mm_kernel.configure(&_input_im2col_reshaped, &_weights_reshaped, &_gemm_output); _output_col2im_kernel.configure(&_gemm_output, output, std::make_pair(conv_w, conv_h)); diff --git a/tests/dataset/ConvolutionLayerDataset.h b/tests/dataset/ConvolutionLayerDataset.h index 85f46cceb5..402fae31ad 100644 --- a/tests/dataset/ConvolutionLayerDataset.h +++ b/tests/dataset/ConvolutionLayerDataset.h @@ -73,7 +73,7 @@ template <unsigned int Size> using ConvolutionLayerDataset = GenericDataset<ConvolutionLayerDataObject, Size>; /** Data set containing small convolution layer shapes */ -class SmallConvolutionLayerDataset final : public ConvolutionLayerDataset<3> +class SmallConvolutionLayerDataset final : public ConvolutionLayerDataset<6> { public: SmallConvolutionLayerDataset() @@ -81,7 +81,10 @@ public: { ConvolutionLayerDataObject{ TensorShape(23U, 27U, 5U), TensorShape(3U, 3U, 5U, 21U), TensorShape(21U), TensorShape(11U, 25U, 21U), PadStrideInfo(2, 1, 0, 0) }, ConvolutionLayerDataObject{ TensorShape(33U, 27U, 7U), TensorShape(5U, 5U, 7U, 16U), TensorShape(16U), TensorShape(11U, 12U, 16U), PadStrideInfo(3, 2, 1, 0) }, - ConvolutionLayerDataObject{ TensorShape(17U, 31U, 2U, 7U), TensorShape(5U, 5U, 2U, 19U), TensorShape(19U), TensorShape(15U, 15U, 19U, 7U), PadStrideInfo(1, 2, 1, 1) } + ConvolutionLayerDataObject{ TensorShape(17U, 31U, 2U, 7U), TensorShape(5U, 5U, 2U, 19U), TensorShape(19U), TensorShape(15U, 15U, 19U, 7U), PadStrideInfo(1, 2, 1, 1) }, + ConvolutionLayerDataObject{ TensorShape(23U, 27U, 5U), TensorShape(3U, 1U, 5U, 21U), TensorShape(21U), TensorShape(11U, 27U, 21U), PadStrideInfo(2, 1, 0, 0) }, + ConvolutionLayerDataObject{ TensorShape(33U, 27U, 7U), TensorShape(5U, 7U, 7U, 16U), TensorShape(16U), TensorShape(11U, 11U, 16U), PadStrideInfo(3, 2, 1, 0) }, + ConvolutionLayerDataObject{ TensorShape(17U, 31U, 2U, 7U), TensorShape(5U, 3U, 2U, 19U), TensorShape(19U), TensorShape(15U, 16U, 19U, 7U), PadStrideInfo(1, 2, 1, 1) } } { } diff --git a/tests/validation/CL/ConvolutionLayer.cpp b/tests/validation/CL/ConvolutionLayer.cpp new file mode 100644 index 0000000000..60e8754193 --- /dev/null +++ b/tests/validation/CL/ConvolutionLayer.cpp @@ -0,0 +1,191 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "CL/CLAccessor.h" + +#include "TypePrinter.h" +#include "dataset/ConvolutionLayerDataset.h" +#include "tests/Globals.h" +#include "tests/Utils.h" +#include "validation/Datasets.h" +#include "validation/Reference.h" +#include "validation/Validation.h" + +#include "arm_compute/core/Helpers.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/CL/functions/CLConvolutionLayer.h" +#include "arm_compute/runtime/Tensor.h" +#include "arm_compute/runtime/TensorAllocator.h" + +#include <random> + +using namespace arm_compute; +using namespace arm_compute::test; +using namespace arm_compute::test::cl; +using namespace arm_compute::test::validation; + +namespace +{ +const float tolerance_f32 = 1e-03f; /**< Tolerance value for comparing reference's output against implementation's output for DataType::F32 */ + +CLTensor compute_convolution_layer(const TensorShape &input_shape, const TensorShape &weights_shape, const TensorShape &bias_shape, const TensorShape &output_shape, DataType dt, + const PadStrideInfo &conv_info, int fixed_point_position) +{ + // Create tensors + CLTensor src = create_tensor<CLTensor>(input_shape, dt, 1, fixed_point_position); + CLTensor weights = create_tensor<CLTensor>(weights_shape, dt, 1, fixed_point_position); + CLTensor bias = create_tensor<CLTensor>(bias_shape, dt, 1, fixed_point_position); + CLTensor dst = create_tensor<CLTensor>(output_shape, dt, 1, fixed_point_position); + + // Create and configure function + CLConvolutionLayer conv; + conv.configure(&src, &weights, &bias, &dst, conv_info); + + // Allocate tensors + src.allocator()->allocate(); + weights.allocator()->allocate(); + bias.allocator()->allocate(); + dst.allocator()->allocate(); + + BOOST_TEST(!src.info()->is_resizable()); + BOOST_TEST(!weights.info()->is_resizable()); + BOOST_TEST(!bias.info()->is_resizable()); + BOOST_TEST(!dst.info()->is_resizable()); + + // Fill tensors + if(dt == DataType::F32) + { + std::uniform_real_distribution<> distribution(-1.0f, 1.0f); + library->fill(CLAccessor(src), distribution, 0); + library->fill(CLAccessor(weights), distribution, 1); + library->fill(CLAccessor(bias), distribution, 2); + } + else + { + library->fill_tensor_uniform(CLAccessor(src), 0); + library->fill_tensor_uniform(CLAccessor(weights), 1); + library->fill_tensor_uniform(CLAccessor(bias), 2); + } + + // Compute CLConvolutionLayer function + conv.run(); + + return dst; +} +} // namespace + +#ifndef DOXYGEN_SKIP_THIS +BOOST_AUTO_TEST_SUITE(CL) +BOOST_AUTO_TEST_SUITE(ConvolutionLayer) +BOOST_AUTO_TEST_SUITE(GEMM) + +BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit") * boost::unit_test::label("nightly")) +BOOST_DATA_TEST_CASE(Configuration, + AlexNetConvolutionLayerDataset() * boost::unit_test::data::make({ DataType::F32 }), + conv_set, dt) +{ + // Set fixed point position data type allowed + int fixed_point_position = (dt == DataType::F32) ? 0 : 3; + + // Create tensors + CLTensor src = create_tensor<CLTensor>(conv_set.src_shape, dt, 1, fixed_point_position); + CLTensor weights = create_tensor<CLTensor>(conv_set.weights_shape, dt, 1, fixed_point_position); + CLTensor bias = create_tensor<CLTensor>(conv_set.bias_shape, dt, 1, fixed_point_position); + CLTensor dst = create_tensor<CLTensor>(conv_set.dst_shape, dt, 1, fixed_point_position); + + BOOST_TEST(src.info()->is_resizable()); + BOOST_TEST(weights.info()->is_resizable()); + BOOST_TEST(bias.info()->is_resizable()); + BOOST_TEST(dst.info()->is_resizable()); + + // Create and configure function + CLConvolutionLayer conv; + conv.configure(&src, &weights, &bias, &dst, conv_set.info); + + // Validate valid region + const ValidRegion src_valid_region = shape_to_valid_region(conv_set.src_shape); + const ValidRegion weights_valid_region = shape_to_valid_region(conv_set.weights_shape); + const ValidRegion bias_valid_region = shape_to_valid_region(conv_set.bias_shape); + const ValidRegion dst_valid_region = shape_to_valid_region(conv_set.dst_shape); + + validate(src.info()->valid_region(), src_valid_region); + validate(weights.info()->valid_region(), weights_valid_region); + validate(bias.info()->valid_region(), bias_valid_region); + validate(dst.info()->valid_region(), dst_valid_region); +} + +#ifdef ARM_COMPUTE_ENABLE_FP16 +BOOST_AUTO_TEST_SUITE(Float16) +BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit")) +BOOST_DATA_TEST_CASE(SmallConvolutionLayer, + SmallConvolutionLayerDataset() * boost::unit_test::data::make(DataType::F16), + conv_set, dt) +{ + // Compute function + CLTensor dst = compute_convolution_layer(conv_set.src_shape, conv_set.weights_shape, conv_set.bias_shape, conv_set.dst_shape, dt, conv_set.info, 0); + + // Compute reference + RawTensor ref_dst = Reference::compute_reference_convolution_layer(conv_set.src_shape, conv_set.weights_shape, conv_set.bias_shape, conv_set.dst_shape, dt, conv_set.info, 0); + + // Validate output + validate(CLAccessor(dst), ref_dst, tolerance_f32); +} +BOOST_AUTO_TEST_SUITE_END() +#endif + +BOOST_AUTO_TEST_SUITE(Float) +BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit")) +BOOST_DATA_TEST_CASE(SmallConvolutionLayer, + SmallConvolutionLayerDataset() * boost::unit_test::data::make(DataType::F32), + conv_set, dt) +{ + // Compute function + CLTensor dst = compute_convolution_layer(conv_set.src_shape, conv_set.weights_shape, conv_set.bias_shape, conv_set.dst_shape, dt, conv_set.info, 0); + + // Compute reference + RawTensor ref_dst = Reference::compute_reference_convolution_layer(conv_set.src_shape, conv_set.weights_shape, conv_set.bias_shape, conv_set.dst_shape, dt, conv_set.info, 0); + + // Validate output + validate(CLAccessor(dst), ref_dst, tolerance_f32); +} + +BOOST_TEST_DECORATOR(*boost::unit_test::label("nightly")) +BOOST_DATA_TEST_CASE(LargeConvolutionLayer, + AlexNetConvolutionLayerDataset() * boost::unit_test::data::make(DataType::F32), + conv_set, dt) +{ + // Compute function + CLTensor dst = compute_convolution_layer(conv_set.src_shape, conv_set.weights_shape, conv_set.bias_shape, conv_set.dst_shape, dt, conv_set.info, 0); + + // Compute reference + RawTensor ref_dst = Reference::compute_reference_convolution_layer(conv_set.src_shape, conv_set.weights_shape, conv_set.bias_shape, conv_set.dst_shape, dt, conv_set.info, 0); + + // Validate output + validate(CLAccessor(dst), ref_dst, tolerance_f32); +} +BOOST_AUTO_TEST_SUITE_END() + +BOOST_AUTO_TEST_SUITE_END() +BOOST_AUTO_TEST_SUITE_END() +BOOST_AUTO_TEST_SUITE_END() +#endif |