aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--arm_compute/core/CL/kernels/CLCol2ImKernel.h2
-rw-r--r--arm_compute/core/CL/kernels/CLIm2ColKernel.h19
-rw-r--r--arm_compute/core/CL/kernels/CLWeightsReshapeKernel.h2
-rw-r--r--arm_compute/core/NEON/kernels/NEIm2ColKernel.h18
-rw-r--r--src/core/CL/cl_kernels/convolution_layer.cl50
-rw-r--r--src/core/CL/kernels/CLCol2ImKernel.cpp6
-rw-r--r--src/core/CL/kernels/CLIm2ColKernel.cpp66
-rw-r--r--src/core/CL/kernels/CLWeightsReshapeKernel.cpp17
-rw-r--r--src/core/NEON/kernels/NEIm2ColKernel.cpp31
-rw-r--r--src/core/NEON/kernels/NEWeightsReshapeKernel.cpp10
-rw-r--r--src/runtime/CL/functions/CLConvolutionLayer.cpp14
-rw-r--r--src/runtime/CL/functions/CLFullyConnectedLayer.cpp5
-rw-r--r--src/runtime/CL/functions/CLLocallyConnectedLayer.cpp2
-rw-r--r--src/runtime/NEON/functions/NEConvolutionLayer.cpp7
-rw-r--r--src/runtime/NEON/functions/NEFullyConnectedLayer.cpp5
-rw-r--r--src/runtime/NEON/functions/NELocallyConnectedLayer.cpp7
-rw-r--r--tests/dataset/ConvolutionLayerDataset.h7
-rw-r--r--tests/validation/CL/ConvolutionLayer.cpp191
18 files changed, 314 insertions, 145 deletions
diff --git a/arm_compute/core/CL/kernels/CLCol2ImKernel.h b/arm_compute/core/CL/kernels/CLCol2ImKernel.h
index 9d445e3004..d391cac889 100644
--- a/arm_compute/core/CL/kernels/CLCol2ImKernel.h
+++ b/arm_compute/core/CL/kernels/CLCol2ImKernel.h
@@ -66,7 +66,7 @@ public:
/** Set the input and output of the kernel.
*
- * @param[in] input The input tensor to convert. Data types supported: F16, F32
+ * @param[in] input The input tensor to convert. Data types supported: F16/F32
* @param[out] output The output tensor. 3 lower dimensions represent a single output [width, height, OFM],
* while the rest represent batch of outputs. Data types supported: Same as @p input
* @param[in] convolved_dims Output convolved dimensions.
diff --git a/arm_compute/core/CL/kernels/CLIm2ColKernel.h b/arm_compute/core/CL/kernels/CLIm2ColKernel.h
index d2224b53e1..b3b5cd8e80 100644
--- a/arm_compute/core/CL/kernels/CLIm2ColKernel.h
+++ b/arm_compute/core/CL/kernels/CLIm2ColKernel.h
@@ -29,6 +29,7 @@
namespace arm_compute
{
class ICLTensor;
+class Size2D;
/** Interface for the im2col reshape kernel.
*
@@ -67,15 +68,15 @@ public:
CLIm2ColKernel &operator=(CLIm2ColKernel &&) = default;
/** Set the input and output of the kernel.
*
- * @param[in] input The input tensor to convert. 3 lower dimensions represent a single input [width, height, IFM],
- * while every optional dimension from 4 and above represent a batch of inputs. Data types supported: F16, F32
- * @param[out] output The output tensor. First 2 lower dimensions represent a transform of each 3D input,
- * while every dimension above represents a batch. Data types supported: Same as @p input
- * @param[in] convolved_dims The convolved output dimensions.
- * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo.
- * @param[in] has_bias In case biases are provided expands the matrix with 1.
+ * @param[in] input The input tensor to convert. 3 lower dimensions represent a single input [width, height, IFM],
+ * while every optional dimension from 4 and above represent a batch of inputs. Data types supported: F16/F32
+ * @param[out] output The output tensor. First 2 lower dimensions represent a transform of each 3D input,
+ * while every dimension above represents a batch. Data types supported: Same as @p input
+ * @param[in] kernel_dims The kernel dimensions (width and height).
+ * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo.
+ * @param[in] has_bias In case biases are provided expands the matrix with 1.
*/
- void configure(const ICLTensor *input, ICLTensor *output, std::pair<unsigned int, unsigned int> convolved_dims, const PadStrideInfo &conv_info, bool has_bias);
+ void configure(const ICLTensor *input, ICLTensor *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias);
// Inherited methods overridden:
void run(const Window &window, cl::CommandQueue &queue) override;
@@ -101,8 +102,6 @@ private:
const ICLTensor *_input;
ICLTensor *_output;
std::pair<unsigned int, unsigned int> _convolved_dims;
- PadStrideInfo _conv_info;
- int _kernel_size;
unsigned int _num_elems_processed_per_iteration;
Im2ColFunction _run_func;
};
diff --git a/arm_compute/core/CL/kernels/CLWeightsReshapeKernel.h b/arm_compute/core/CL/kernels/CLWeightsReshapeKernel.h
index 0d00f0e00e..8732c6094b 100644
--- a/arm_compute/core/CL/kernels/CLWeightsReshapeKernel.h
+++ b/arm_compute/core/CL/kernels/CLWeightsReshapeKernel.h
@@ -63,4 +63,4 @@ private:
ICLTensor *_output;
};
}
-#endif /*__ARM_COMPUTE_CLWEIGHTSRESHAPEKERNEL_H__ */
+#endif /*__ARM_COMPUTE_CLWEIGHTSRESHAPEKERNEL_H__ */ \ No newline at end of file
diff --git a/arm_compute/core/NEON/kernels/NEIm2ColKernel.h b/arm_compute/core/NEON/kernels/NEIm2ColKernel.h
index e219ce2e0e..9b8b98b388 100644
--- a/arm_compute/core/NEON/kernels/NEIm2ColKernel.h
+++ b/arm_compute/core/NEON/kernels/NEIm2ColKernel.h
@@ -29,6 +29,7 @@
namespace arm_compute
{
class ITensor;
+class Size2D;
/** Interface for the im2col reshape kernel.
*
@@ -71,14 +72,14 @@ public:
/** Set the input and output of the kernel.
*
- * @param[in] input The input tensor to convert. 3 lower dimensions represent a single input [width, height, IFM],
- * while every optional dimension from 4 and above represent a batch of inputs. Data types supported: QS8/F16/F32
- * @param[out] output The output tensor. Data types supported: Same as @p input
- * @param[in] convolved_dims The convolved output dimensions.
- * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo.
- * @param[in] has_bias In case biases are provided expands the matrix with 1.
+ * @param[in] input The input tensor to convert. 3 lower dimensions represent a single input [width, height, IFM],
+ * while every optional dimension from 4 and above represent a batch of inputs. Data types supported: QS8/F16/F32
+ * @param[out] output The output tensor. Data types supported: Same as @p input
+ * @param[in] kernel_dims The kernel dimensions (width and height).
+ * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo.
+ * @param[in] has_bias In case biases are provided expands the matrix with 1.
*/
- void configure(const ITensor *input, ITensor *output, std::pair<unsigned int, unsigned int> convolved_dims, const PadStrideInfo &conv_info, bool has_bias);
+ void configure(const ITensor *input, ITensor *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias);
// Inherited methods overridden:
void run(const Window &window) override;
@@ -107,7 +108,8 @@ private:
ITensor *_output;
std::pair<unsigned int, unsigned int> _convolved_dims;
PadStrideInfo _conv_info;
- unsigned int _kernel_size;
+ unsigned int _kernel_width;
+ unsigned int _kernel_height;
bool _has_bias;
};
}
diff --git a/src/core/CL/cl_kernels/convolution_layer.cl b/src/core/CL/cl_kernels/convolution_layer.cl
index bd5dfaff68..837fdd70fe 100644
--- a/src/core/CL/cl_kernels/convolution_layer.cl
+++ b/src/core/CL/cl_kernels/convolution_layer.cl
@@ -27,7 +27,7 @@
*
* @note Datatype should be given as a preprocessor argument using -DDATA_TYPE=type. e.g. -DDATA_TYPE=short
*
- * @param[in] src_ptr Pointer to the source tensor. Supported data types: F16, F32
+ * @param[in] src_ptr Pointer to the source tensor. Supported data types: F16/F32
* @param[in] src_stride_x Stride of the source tensor in X dimension (in bytes)
* @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes)
* @param[in] src_stride_y Stride of the source tensor in Y dimension (in bytes)
@@ -35,13 +35,13 @@
* @param[in] src_stride_z Stride of the source tensor in Z dimension (in bytes)
* @param[in] src_step_z src_stride_z * number of elements along Y processed per workitem(in bytes)
* @param[in] src_offset_first_element_in_bytes The offset of the first element in the source tensor
- * @param[out] dst_ptr Pointer to the destination tensor. Same as input
+ * @param[out] dst_ptr Pointer to the destination tensor. Same as @p src_ptr
* @param[in] dst_stride_x Stride of the destination tensor in X dimension (in bytes)
* @param[in] dst_step_x dst_stride_x * number of elements along X processed per workitem(in bytes)
* @param[in] dst_stride_y Stride of the destination tensor in Y dimension (in bytes)
* @param[in] dst_step_y dst_stride_y * number of elements along Y processed per workitem(in bytes)
* @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination tensor
- * @param[in] bias_ptr Pointer to the bias tensor. Same as input
+ * @param[in] bias_ptr Pointer to the bias tensor. Same as @p src_ptr
* @param[in] bias_stride_x Stride of the bias tensor in X dimension (in bytes)
* @param[in] bias_step_x bias_stride_x * number of elements along X processed per workitem(in bytes)
* @param[in] bias_offset_first_element_in_bytes The offset of the first element in the source tensor
@@ -93,12 +93,13 @@ __kernel void reshape_to_columns(
}
}
+#if(defined CONVOLVED_WIDTH && defined STRIDE_X && defined STRIDE_Y && defined PAD_X && defined PAD_Y && defined KERNEL_WIDTH && defined KERNEL_HEIGHT && defined KERNEL_DEPTH && SRC_WIDTH && SRC_HEIGHT)
/** This kernel performs a reshaping of the input tensor to a tensor used to perform convolution using GEMM.
*
* @note The data type must be passed at compile time using -DDATA_TYPE: e.g. -DDATA_TYPE=float
* @note In case biases will be added to the convolution -DHAS_BIAS has to be passed to append the final matrix with 1 in each row.
*
- * @param[in] src_ptr Pointer to the source tensor. Supported data types: F16, F32
+ * @param[in] src_ptr Pointer to the source tensor. Supported data types: F16/F32
* @param[in] src_stride_x Stride of the source tensor in X dimension (in bytes)
* @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes)
* @param[in] src_stride_y Stride of the source tensor in Y dimension (in bytes)
@@ -106,48 +107,36 @@ __kernel void reshape_to_columns(
* @param[in] src_stride_z Stride of the source tensor in Z dimension (in bytes)
* @param[in] src_step_z src_stride_z * number of elements along Z processed per workitem(in bytes)
* @param[in] src_offset_first_element_in_bytes The offset of the first element in the source tensor
- * @param[out] dst_ptr Pointer to the destination tensor. Supported data types: F16, F32
+ * @param[out] dst_ptr Pointer to the destination tensor. Supported data types: same as @p src_ptr
* @param[in] dst_stride_x Stride of the destination tensor in X dimension (in bytes)
* @param[in] dst_step_x dst_stride_x * number of elements along X processed per workitem(in bytes)
* @param[in] dst_stride_y Stride of the destination tensor in Y dimension (in bytes)
* @param[in] dst_step_y dst_stride_y * number of elements along Y processed per workitem(in bytes)
* @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination tensor
- * @param[in] kernel_size The convolution kernel size
- * @param[in] kernel_depth The kernel depth
- * @param[in] width The output tensor width
- * @param[in] input_dims The input tensor dimensions
- * @param[in] strides The strides of the im2col operation
- * @param[in] paddings The input tensor paddings
*/
__kernel void im2col_generic(
TENSOR3D_DECLARATION(src),
- IMAGE_DECLARATION(dst),
- int kernel_size,
- int kernel_depth,
- int width,
- int2 input_dims,
- int2 strides,
- int2 paddings)
+ IMAGE_DECLARATION(dst))
{
Tensor3D src = CONVERT_TO_TENSOR3D_STRUCT(src);
Image dst = CONVERT_TO_IMAGE_STRUCT_NO_STEP(dst);
// Determine output index
- uint idx = (get_global_id(1) * width + get_global_id(0)) * dst.stride_y;
+ uint idx = (get_global_id(1) * CONVOLVED_WIDTH + get_global_id(0)) * dst.stride_y;
__global uchar *output_ptr = dst.ptr + idx;
// Determine current input index
- const int top_left_x = get_global_id(0) * strides.x - paddings.x;
- const int top_left_y = get_global_id(1) * strides.y - paddings.y;
+ const int top_left_x = get_global_id(0) * STRIDE_X - PAD_X;
+ const int top_left_y = get_global_id(1) * STRIDE_Y - PAD_Y;
// Linearize convolution elements
- for(int d = 0; d < kernel_depth; ++d)
+ for(int d = 0; d < KERNEL_DEPTH; ++d)
{
- for(int y = top_left_y, y_e = top_left_y + kernel_size; y < y_e; ++y)
+ for(int y = top_left_y, y_e = top_left_y + KERNEL_HEIGHT; y < y_e; ++y)
{
- for(int x = top_left_x, x_e = top_left_x + kernel_size; x < x_e; ++x, output_ptr += dst.stride_x)
+ for(int x = top_left_x, x_e = top_left_x + KERNEL_WIDTH; x < x_e; ++x, output_ptr += dst.stride_x)
{
- if(x < 0 || x >= input_dims.x || y < 0 || y >= input_dims.y)
+ if(x < 0 || x >= SRC_WIDTH || y < 0 || y >= SRC_HEIGHT)
{
*((__global DATA_TYPE *)output_ptr) = 0;
}
@@ -160,21 +149,22 @@ __kernel void im2col_generic(
}
#if defined HAS_BIAS
- *((__global DATA_TYPE *)output_ptr) = 1;
+ *((__global DATA_TYPE *)output_ptr) = (DATA_TYPE)1;
#endif
}
+#endif //(defined CONVOLVED_WIDTH && defined STRIDE_X && defined STRIDE_Y && defined PAD_X && defined PAD_Y && defined KERNEL_WIDTH && defined KERNEL_HEIGHT && defined KERNEL_DEPTH && SRC_WIDTH && SRC_HEIGHT)
/** This kernel performs a reshaping of the output of the convolution layer.
*
* @note The data type must be passed at compile time using -DDATA_TYPE: e.g. -DDATA_TYPE=float
*
- * @param[in] src_ptr Pointer to the source tensor. Supported data types: F16, F32
+ * @param[in] src_ptr Pointer to the source tensor. Supported data types: F16/F32
* @param[in] src_stride_x Stride of the source tensor in X dimension (in bytes)
* @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes)
* @param[in] src_stride_y Stride of the source tensor in Y dimension (in bytes)
* @param[in] src_step_y src_stride_y * number of elements along Y processed per workitem(in bytes)
* @param[in] src_offset_first_element_in_bytes The offset of the first element in the source tensor
- * @param[out] dst_ptr Pointer to the destination tensor. Supported data types: F16, F32
+ * @param[out] dst_ptr Pointer to the destination tensor. Supported data types: same as @p src_ptr
* @param[in] dst_stride_x Stride of the destination tensor in X dimension (in bytes)
* @param[in] dst_step_x dst_stride_x * number of elements along X processed per workitem(in bytes)
* @param[in] dst_stride_y Stride of the destination tensor in Y dimension (in bytes)
@@ -202,7 +192,7 @@ __kernel void col2im(
* @note Datatype should be given as a preprocessor argument using -DDATA_TYPE=type. e.g. -DDATA_TYPE=float
* @note In case biases will be added in late stage, -DHAS_BIAS has to be passed to append the final matrix with 1 in each row.
*
- * @param[in] src_ptr Pointer to the source tensor. Supported data types: F16, F32
+ * @param[in] src_ptr Pointer to the source tensor. Supported data types: F16/F32
* @param[in] src_stride_x Stride of the source tensor in X dimension (in bytes)
* @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes)
* @param[in] src_stride_y Stride of the source tensor in Y dimension (in bytes)
@@ -210,7 +200,7 @@ __kernel void col2im(
* @param[in] src_stride_z Stride of the source tensor in Z dimension (in bytes)
* @param[in] src_step_z src_stride_z * number of elements along Y processed per workitem(in bytes)
* @param[in] src_offset_first_element_in_bytes The offset of the first element in the source tensor
- * @param[out] dst_ptr Pointer to the destination tensor. Same as input.
+ * @param[out] dst_ptr Pointer to the destination tensor. Same as @p src_ptr
* @param[in] dst_stride_x Stride of the destination tensor in X dimension (in bytes)
* @param[in] dst_step_x dst_stride_x * number of elements along X processed per workitem(in bytes)
* @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination tensor
diff --git a/src/core/CL/kernels/CLCol2ImKernel.cpp b/src/core/CL/kernels/CLCol2ImKernel.cpp
index ad66c39483..679943ba3e 100644
--- a/src/core/CL/kernels/CLCol2ImKernel.cpp
+++ b/src/core/CL/kernels/CLCol2ImKernel.cpp
@@ -61,8 +61,12 @@ void CLCol2ImKernel::configure(const ICLTensor *input, ICLTensor *output, std::p
// Configure window
Window win = calculate_max_window(*input->info(), Steps());
+
// The CLCol2ImKernel doesn't need padding so update_window_and_padding() can be skipped
- output->info()->set_valid_region(ValidRegion(Coordinates(), output->info()->tensor_shape()));
+ Coordinates coord;
+ coord.set_num_dimensions(output->info()->num_dimensions());
+ output->info()->set_valid_region(ValidRegion(coord, output->info()->tensor_shape()));
+
ICLKernel::configure(win);
}
diff --git a/src/core/CL/kernels/CLIm2ColKernel.cpp b/src/core/CL/kernels/CLIm2ColKernel.cpp
index 8c0fe26666..092f495f92 100644
--- a/src/core/CL/kernels/CLIm2ColKernel.cpp
+++ b/src/core/CL/kernels/CLIm2ColKernel.cpp
@@ -29,8 +29,10 @@
#include "arm_compute/core/CL/OpenCL.h"
#include "arm_compute/core/Error.h"
#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/Size2D.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/core/Validate.h"
+#include "support/ToolchainSupport.h"
#include <cmath>
#include <tuple>
@@ -38,14 +40,14 @@
using namespace arm_compute;
CLIm2ColKernel::CLIm2ColKernel()
- : _input(nullptr), _output(nullptr), _convolved_dims(), _conv_info(), _kernel_size(0), _num_elems_processed_per_iteration(1), _run_func(nullptr)
+ : _input(nullptr), _output(nullptr), _convolved_dims(), _num_elems_processed_per_iteration(1), _run_func(nullptr)
{
}
-void CLIm2ColKernel::configure(const ICLTensor *input, ICLTensor *output, std::pair<unsigned int, unsigned int> convolved_dims, const PadStrideInfo &conv_info, bool has_bias)
+void CLIm2ColKernel::configure(const ICLTensor *input, ICLTensor *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias)
{
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F32);
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::F32);
+ ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32);
+ ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
_input = input;
_output = output;
@@ -70,44 +72,23 @@ void CLIm2ColKernel::configure(const ICLTensor *input, ICLTensor *output, std::p
if(!run_img2col_reduced)
{
- _convolved_dims = convolved_dims;
- _conv_info = conv_info;
- _kernel_size = std::sqrt((output->info()->dimension(0) - (has_bias ? 1 : 0)) / input->info()->dimension(2));
+ _convolved_dims = scaled_dimensions(input->info()->dimension(0), input->info()->dimension(1),
+ kernel_dims.width, kernel_dims.height,
+ conv_info);
_num_elems_processed_per_iteration = output->info()->dimension(0);
- _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel("im2col_generic", build_opts));
+ build_opts.emplace("-DKERNEL_WIDTH=" + support::cpp11::to_string(kernel_dims.width));
+ build_opts.emplace("-DKERNEL_HEIGHT=" + support::cpp11::to_string(kernel_dims.height));
+ build_opts.emplace("-DKERNEL_DEPTH=" + support::cpp11::to_string(input->info()->dimension(2)));
+ build_opts.emplace("-DCONVOLVED_WIDTH=" + support::cpp11::to_string(_convolved_dims.first));
+ build_opts.emplace("-DSTRIDE_X=" + support::cpp11::to_string(conv_info.stride().first));
+ build_opts.emplace("-DSTRIDE_Y=" + support::cpp11::to_string(conv_info.stride().second));
+ build_opts.emplace("-DPAD_X=" + support::cpp11::to_string(conv_info.pad().first));
+ build_opts.emplace("-DPAD_Y=" + support::cpp11::to_string(conv_info.pad().second));
+ build_opts.emplace("-DSRC_WIDTH=" + support::cpp11::to_string(input->info()->dimension(0)));
+ build_opts.emplace("-DSRC_HEIGHT=" + support::cpp11::to_string(input->info()->dimension(1)));
- // Create static kernel arguments
- const cl_int2 input_dims =
- {
- {
- static_cast<cl_int>(input->info()->dimension(0)),
- static_cast<cl_int>(input->info()->dimension(1)),
- }
- };
- const cl_int2 strides =
- {
- {
- stride_x,
- stride_y,
- }
- };
- const cl_int2 paddings =
- {
- {
- pad_x,
- pad_y,
- }
- };
-
- // Set static kernel arguments
- unsigned int idx = num_arguments_per_2D_tensor() + num_arguments_per_3D_tensor();
- _kernel.setArg<cl_int>(idx++, _kernel_size);
- _kernel.setArg<cl_int>(idx++, input->info()->dimension(2) /* depth */);
- _kernel.setArg<cl_int>(idx++, _convolved_dims.first /* output width */);
- _kernel.setArg<cl_int2>(idx++, input_dims);
- _kernel.setArg<cl_int2>(idx++, strides);
- _kernel.setArg<cl_int2>(idx++, paddings);
+ _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel("im2col_generic", build_opts));
_run_func = &CLIm2ColKernel::run_generic;
}
@@ -136,13 +117,6 @@ void CLIm2ColKernel::run_generic(const Window &window, cl::CommandQueue &queue)
ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
ARM_COMPUTE_ERROR_ON_MISMATCHING_WINDOWS(ICLKernel::window(), window);
- int pad_x = 0;
- int pad_y = 0;
- int stride_x = 0;
- int stride_y = 0;
- std::tie(pad_x, pad_y) = _conv_info.pad();
- std::tie(stride_x, stride_y) = _conv_info.stride();
-
// Get initial windows
Window slice = window.first_slice_window_3D();
Window slice_in = window.first_slice_window_3D();
diff --git a/src/core/CL/kernels/CLWeightsReshapeKernel.cpp b/src/core/CL/kernels/CLWeightsReshapeKernel.cpp
index 845bd3799d..82634164de 100644
--- a/src/core/CL/kernels/CLWeightsReshapeKernel.cpp
+++ b/src/core/CL/kernels/CLWeightsReshapeKernel.cpp
@@ -42,14 +42,7 @@ CLWeightsReshapeKernel::CLWeightsReshapeKernel()
void CLWeightsReshapeKernel::configure(const ICLTensor *input, const ICLTensor *biases, ICLTensor *output)
{
ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QS8, DataType::F16, DataType::F32);
- ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, biases);
- ARM_COMPUTE_ERROR_ON_MISMATCHING_FIXED_POINT(input, biases);
- ARM_COMPUTE_ERROR_ON((input->info()->num_dimensions() == 4) && (biases->info()->num_dimensions() != 1));
- ARM_COMPUTE_ERROR_ON((input->info()->num_dimensions() == 5) && (biases->info()->num_dimensions() != 2));
- ARM_COMPUTE_ERROR_ON((input->info()->num_dimensions() == 4) && (biases->info()->dimension(0) != input->info()->tensor_shape()[3]));
- ARM_COMPUTE_ERROR_ON((input->info()->num_dimensions() == 5) && (biases->info()->dimension(0) != input->info()->tensor_shape()[3] || biases->info()->dimension(1) != input->info()->tensor_shape()[4]));
ARM_COMPUTE_ERROR_ON_NULLPTR(output);
- ARM_COMPUTE_ERROR_ON(input->info()->dimension(0) != input->info()->dimension(1));
const DataType dt = input->info()->data_type();
const int fixed_point_position = input->info()->fixed_point_position();
@@ -67,6 +60,16 @@ void CLWeightsReshapeKernel::configure(const ICLTensor *input, const ICLTensor *
ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
ARM_COMPUTE_ERROR_ON_MISMATCHING_FIXED_POINT(input, output);
+ if(biases != nullptr)
+ {
+ ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, biases);
+ ARM_COMPUTE_ERROR_ON_MISMATCHING_FIXED_POINT(input, biases);
+ ARM_COMPUTE_ERROR_ON((input->info()->num_dimensions() == 4) && (biases->info()->num_dimensions() != 1));
+ ARM_COMPUTE_ERROR_ON((input->info()->num_dimensions() == 5) && (biases->info()->num_dimensions() != 2));
+ ARM_COMPUTE_ERROR_ON((input->info()->num_dimensions() == 4) && (biases->info()->dimension(0) != input->info()->tensor_shape()[3]));
+ ARM_COMPUTE_ERROR_ON((input->info()->num_dimensions() == 5) && (biases->info()->dimension(0) != input->info()->tensor_shape()[3] || biases->info()->dimension(1) != input->info()->tensor_shape()[4]));
+ }
+
_biases = biases;
_output = output;
_input = input;
diff --git a/src/core/NEON/kernels/NEIm2ColKernel.cpp b/src/core/NEON/kernels/NEIm2ColKernel.cpp
index 875c08ed42..99daa2e5e7 100644
--- a/src/core/NEON/kernels/NEIm2ColKernel.cpp
+++ b/src/core/NEON/kernels/NEIm2ColKernel.cpp
@@ -27,6 +27,7 @@
#include "arm_compute/core/FixedPoint.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/ITensor.h"
+#include "arm_compute/core/Size2D.h"
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/core/Validate.h"
@@ -47,7 +48,8 @@ inline void linearize_volume(const uint8_t *const in_ptr,
bool has_bias,
int top_left_x,
int top_left_y,
- int kernel_size,
+ int kernel_width,
+ int kernel_height,
int kernel_depth,
int input_w,
int input_h,
@@ -56,9 +58,9 @@ inline void linearize_volume(const uint8_t *const in_ptr,
int input_stride_z,
int fixed_point_position)
{
- const int kernel_size2 = kernel_size * kernel_size;
- const int x_e = top_left_x + kernel_size;
- const int y_e = top_left_y + kernel_size;
+ const int kernel_size2 = kernel_width * kernel_height;
+ const int x_e = top_left_x + kernel_width;
+ const int y_e = top_left_y + kernel_height;
// Linearize volume
int d = 0;
@@ -109,8 +111,8 @@ inline void linearize_volume(const uint8_t *const in_ptr,
if((y < 0 || y >= input_h) && has_pads)
{
// All the values will be zeros
- memset(out_ptr, 0, kernel_size * sizeof(T));
- out_ptr += kernel_size;
+ memset(out_ptr, 0, kernel_width * sizeof(T));
+ out_ptr += kernel_width;
}
else
{
@@ -199,7 +201,8 @@ void NEIm2ColKernel::run_generic(const Window &window)
_has_bias,
top_left_x,
top_left_y,
- static_cast<int>(_kernel_size),
+ static_cast<int>(_kernel_width),
+ static_cast<int>(_kernel_height),
kernel_depth,
input_w,
input_h,
@@ -260,22 +263,24 @@ void NEIm2ColKernel::run_reduced(const Window &window)
}
NEIm2ColKernel::NEIm2ColKernel()
- : _func(), _input(nullptr), _output(nullptr), _convolved_dims(), _conv_info(), _kernel_size(0), _has_bias(false)
+ : _func(), _input(nullptr), _output(nullptr), _convolved_dims(), _conv_info(), _kernel_width(0), _kernel_height(0), _has_bias(false)
{
}
-void NEIm2ColKernel::configure(const ITensor *input, ITensor *output, std::pair<unsigned int, unsigned int> convolved_dims, const PadStrideInfo &conv_info, bool has_bias)
+void NEIm2ColKernel::configure(const ITensor *input, ITensor *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias)
{
ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32, DataType::QS8);
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::F16, DataType::F32, DataType::QS8);
ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
_input = input;
_output = output;
- _convolved_dims = convolved_dims;
_conv_info = conv_info;
- _kernel_size = std::sqrt((output->info()->dimension(0) - (has_bias ? 1 : 0)) / input->info()->dimension(2));
- _has_bias = has_bias;
+ _kernel_width = kernel_dims.width;
+ _kernel_height = kernel_dims.height,
+ _convolved_dims = scaled_dimensions(input->info()->dimension(0), input->info()->dimension(1),
+ _kernel_width, _kernel_height,
+ _conv_info);
+ _has_bias = has_bias;
unsigned int pad_x, pad_y, stride_x, stride_y = 0;
std::tie(pad_x, pad_y) = conv_info.pad();
diff --git a/src/core/NEON/kernels/NEWeightsReshapeKernel.cpp b/src/core/NEON/kernels/NEWeightsReshapeKernel.cpp
index e9b76e7967..ac688e1381 100644
--- a/src/core/NEON/kernels/NEWeightsReshapeKernel.cpp
+++ b/src/core/NEON/kernels/NEWeightsReshapeKernel.cpp
@@ -97,13 +97,13 @@ void NEWeightsReshapeKernel::configure(const ITensor *input, const ITensor *bias
{
ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QS8, DataType::F16, DataType::F32);
ARM_COMPUTE_ERROR_ON_NULLPTR(output);
- ARM_COMPUTE_ERROR_ON(input->info()->dimension(0) != input->info()->dimension(1));
- const DataType dt = input->info()->data_type();
- const int fixed_point_position = input->info()->fixed_point_position();
-
- TensorShape output_shape{ input->info()->tensor_shape() };
+ const int fixed_point_position = input->info()->fixed_point_position();
+ const DataType dt = input->info()->data_type();
+ const TensorShape &input_shape = input->info()->tensor_shape();
+ TensorShape output_shape{ input_shape };
output_shape.collapse(3);
+
const size_t tmp_dim = output_shape[0];
output_shape.set(0, output_shape[1]);
output_shape.set(1, tmp_dim + (bias != nullptr ? 1 : 0));
diff --git a/src/runtime/CL/functions/CLConvolutionLayer.cpp b/src/runtime/CL/functions/CLConvolutionLayer.cpp
index 933d92bef7..b29bf8f136 100644
--- a/src/runtime/CL/functions/CLConvolutionLayer.cpp
+++ b/src/runtime/CL/functions/CLConvolutionLayer.cpp
@@ -24,6 +24,7 @@
#include "arm_compute/runtime/CL/functions/CLConvolutionLayer.h"
#include "arm_compute/core/PixelValue.h"
+#include "arm_compute/core/Size2D.h"
#include "arm_compute/core/Utils.h"
#include "arm_compute/core/Validate.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
@@ -40,16 +41,13 @@ CLConvolutionLayerReshapeWeights::CLConvolutionLayerReshapeWeights()
void CLConvolutionLayerReshapeWeights::configure(const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, bool transpose1xW)
{
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(weights, 1, DataType::F32);
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(weights, 1, DataType::F32);
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::F32);
- ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(weights, biases, output);
- ARM_COMPUTE_ERROR_ON_MISMATCHING_FIXED_POINT(weights, biases, output);
+ ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(weights, 1, DataType::F16, DataType::F32);
+ ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(weights, output);
+ ARM_COMPUTE_ERROR_ON_MISMATCHING_FIXED_POINT(weights, output);
ARM_COMPUTE_ERROR_ON(weights->info()->num_dimensions() > 4);
if(biases != nullptr)
{
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(biases, 1, DataType::F32);
ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(weights, biases);
ARM_COMPUTE_ERROR_ON(biases->info()->dimension(0) != weights->info()->dimension(3));
ARM_COMPUTE_ERROR_ON(biases->info()->num_dimensions() > 1);
@@ -98,8 +96,6 @@ CLConvolutionLayer::CLConvolutionLayer()
void CLConvolutionLayer::configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info)
{
ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32);
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(weights, 1, DataType::F16, DataType::F32);
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::F16, DataType::F32);
ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights, output);
ARM_COMPUTE_ERROR_ON(!weights_info.are_reshaped() && weights->info()->dimension(2) != input->info()->dimension(2));
ARM_COMPUTE_ERROR_ON(weights->info()->num_dimensions() > 4);
@@ -191,7 +187,7 @@ void CLConvolutionLayer::configure(const ICLTensor *input, const ICLTensor *weig
_gemm_output.allocator()->init(TensorInfo(shape_gemm, 1, input->info()->data_type()));
// Configure kernels
- _input_im2col_kernel.configure(input, &_input_im2col_reshaped, std::make_pair(conv_w, conv_h), conv_info, _has_bias);
+ _input_im2col_kernel.configure(input, &_input_im2col_reshaped, Size2D(kernel_width, kernel_height), conv_info, _has_bias);
_output_col2im_kernel.configure(&_gemm_output, output, std::make_pair(conv_w, conv_h));
if(_is_fully_connected_convolution)
diff --git a/src/runtime/CL/functions/CLFullyConnectedLayer.cpp b/src/runtime/CL/functions/CLFullyConnectedLayer.cpp
index 57d57d517f..b51e709927 100644
--- a/src/runtime/CL/functions/CLFullyConnectedLayer.cpp
+++ b/src/runtime/CL/functions/CLFullyConnectedLayer.cpp
@@ -23,6 +23,7 @@
*/
#include "arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h"
+#include "arm_compute/core/Size2D.h"
#include "arm_compute/core/Validate.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
@@ -126,7 +127,7 @@ void CLFullyConnectedLayer::configure_conv_fc_wb(const ICLTensor *input, const I
_interleave4x4_output.allocator()->init(TensorInfo(shape_interleaved, 1, dt, fixed_point_position));
// Configure im2col kernel
- _im2col_kernel.configure(input, &_im2col_output, std::make_pair(1, 1), PadStrideInfo(1, 1, 0, 0), false);
+ _im2col_kernel.configure(input, &_im2col_output, Size2D(1, 1), PadStrideInfo(1, 1, 0, 0), false);
// Configure interleave4x4 kernel
_interleave4x4_kernel.configure(&_im2col_output, &_interleave4x4_output);
@@ -176,7 +177,7 @@ void CLFullyConnectedLayer::configure_conv_fc_nb(const ICLTensor *input, const I
_im2col_output.allocator()->init(TensorInfo(shape_im2col, 1, dt, fixed_point_position));
// Configure im2col kernel
- _im2col_kernel.configure(input, &_im2col_output, std::make_pair(1, 1), PadStrideInfo(1, 1, 0, 0), false);
+ _im2col_kernel.configure(input, &_im2col_output, Size2D(1, 1), PadStrideInfo(1, 1, 0, 0), false);
// Configure matrix multiply kernel
_mm_kernel.configure(&_im2col_output, weights, output, 1.0f);
diff --git a/src/runtime/CL/functions/CLLocallyConnectedLayer.cpp b/src/runtime/CL/functions/CLLocallyConnectedLayer.cpp
index 0e6d23e0d8..ef6fb50bbf 100644
--- a/src/runtime/CL/functions/CLLocallyConnectedLayer.cpp
+++ b/src/runtime/CL/functions/CLLocallyConnectedLayer.cpp
@@ -100,7 +100,7 @@ void CLLocallyConnectedLayer::configure(const ICLTensor *input, const ICLTensor
_gemm_output.allocator()->init(TensorInfo(shape_gemm, 1, input->info()->data_type()));
// Configure kernels
- _input_im2col_kernel.configure(input, &_input_im2col_reshaped, std::make_pair(conv_w, conv_h), conv_info, _has_bias);
+ _input_im2col_kernel.configure(input, &_input_im2col_reshaped, Size2D(conv_w, conv_h), conv_info, _has_bias);
_weights_reshape_kernel.configure(weights, biases, &_weights_reshaped);
_mm_kernel.configure(&_input_im2col_reshaped, &_weights_reshaped, &_gemm_output);
_output_col2im_kernel.configure(&_gemm_output, output, std::make_pair(conv_w, conv_h));
diff --git a/src/runtime/NEON/functions/NEConvolutionLayer.cpp b/src/runtime/NEON/functions/NEConvolutionLayer.cpp
index b38d6617d5..dc8652747f 100644
--- a/src/runtime/NEON/functions/NEConvolutionLayer.cpp
+++ b/src/runtime/NEON/functions/NEConvolutionLayer.cpp
@@ -24,6 +24,7 @@
#include "arm_compute/runtime/NEON/functions/NEConvolutionLayer.h"
#include "arm_compute/core/PixelValue.h"
+#include "arm_compute/core/Size2D.h"
#include "arm_compute/core/Utils.h"
#include "arm_compute/core/Validate.h"
#include "arm_compute/runtime/NEON/NEScheduler.h"
@@ -41,7 +42,6 @@ NEConvolutionLayerReshapeWeights::NEConvolutionLayerReshapeWeights()
void NEConvolutionLayerReshapeWeights::configure(const ITensor *weights, const ITensor *biases, ITensor *output, bool transpose1xW)
{
ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(weights, 1, DataType::QS8, DataType::F16, DataType::F32);
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::QS8, DataType::F16, DataType::F32);
ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(weights, output);
ARM_COMPUTE_ERROR_ON_MISMATCHING_FIXED_POINT(weights, output);
ARM_COMPUTE_ERROR_ON(weights->info()->num_dimensions() > 4);
@@ -97,8 +97,6 @@ NEConvolutionLayer::NEConvolutionLayer()
void NEConvolutionLayer::configure(const ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info)
{
ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QS8, DataType::F16, DataType::F32);
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(weights, 1, DataType::QS8, DataType::F16, DataType::F32);
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::QS8, DataType::F16, DataType::F32);
ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights, output);
ARM_COMPUTE_ERROR_ON_MISMATCHING_FIXED_POINT(input, weights, output);
ARM_COMPUTE_ERROR_ON(!weights_info.are_reshaped() && weights->info()->dimension(2) != input->info()->dimension(2));
@@ -106,7 +104,6 @@ void NEConvolutionLayer::configure(const ITensor *input, const ITensor *weights,
if(biases != nullptr)
{
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(biases, 1, DataType::QS8, DataType::F16, DataType::F32);
ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, biases);
ARM_COMPUTE_ERROR_ON_MISMATCHING_FIXED_POINT(input, biases);
ARM_COMPUTE_ERROR_ON(!weights_info.are_reshaped() && biases->info()->dimension(0) != weights->info()->dimension(3));
@@ -197,7 +194,7 @@ void NEConvolutionLayer::configure(const ITensor *input, const ITensor *weights,
_gemm_output.allocator()->init(TensorInfo(shape_gemm, 1, dt, fixed_point_position));
// Configure kernels
- _input_im2col_kernel.configure(input, &_input_im2col_reshaped, std::make_pair(conv_w, conv_h), conv_info, _has_bias);
+ _input_im2col_kernel.configure(input, &_input_im2col_reshaped, Size2D(kernel_width, kernel_height), conv_info, _has_bias);
if(_is_fully_connected_convolution)
{
_mm_kernel.configure(&_input_im2col_reshaped, weights, &_gemm_output, 1.0f);
diff --git a/src/runtime/NEON/functions/NEFullyConnectedLayer.cpp b/src/runtime/NEON/functions/NEFullyConnectedLayer.cpp
index abb41e9f70..6e27ed344a 100644
--- a/src/runtime/NEON/functions/NEFullyConnectedLayer.cpp
+++ b/src/runtime/NEON/functions/NEFullyConnectedLayer.cpp
@@ -23,6 +23,7 @@
*/
#include "arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h"
+#include "arm_compute/core/Size2D.h"
#include "arm_compute/core/Validate.h"
#include "arm_compute/runtime/NEON/NEScheduler.h"
@@ -126,7 +127,7 @@ void NEFullyConnectedLayer::configure_conv_fc_wb(const ITensor *input, const ITe
_interleave4x4_output.allocator()->init(TensorInfo(shape_interleaved, 1, dt, fixed_point_position));
// Configure im2col kernel
- _im2col_kernel.configure(input, &_im2col_output, std::make_pair(1, 1), PadStrideInfo(1, 1, 0, 0), false);
+ _im2col_kernel.configure(input, &_im2col_output, Size2D(1, 1), PadStrideInfo(1, 1, 0, 0), false);
// Configure interleave4x4 kernel
_interleave4x4_kernel.configure(&_im2col_output, &_interleave4x4_output);
@@ -176,7 +177,7 @@ void NEFullyConnectedLayer::configure_conv_fc_nb(const ITensor *input, const ITe
_im2col_output.allocator()->init(TensorInfo(shape_im2col, 1, dt, fixed_point_position));
// Configure im2col kernel
- _im2col_kernel.configure(input, &_im2col_output, std::make_pair(1, 1), PadStrideInfo(1, 1, 0, 0), false);
+ _im2col_kernel.configure(input, &_im2col_output, Size2D(1, 1), PadStrideInfo(1, 1, 0, 0), false);
// Configure matrix multiply kernel
_mm_kernel.configure(&_im2col_output, weights, output, 1.0f);
diff --git a/src/runtime/NEON/functions/NELocallyConnectedLayer.cpp b/src/runtime/NEON/functions/NELocallyConnectedLayer.cpp
index 3b095b4688..e7c71e04d1 100644
--- a/src/runtime/NEON/functions/NELocallyConnectedLayer.cpp
+++ b/src/runtime/NEON/functions/NELocallyConnectedLayer.cpp
@@ -65,10 +65,13 @@ void NELocallyConnectedLayer::configure(const ITensor *input, const ITensor *wei
std::tie(stride_x, stride_y) = conv_info.stride();
std::tie(pad_x, pad_y) = conv_info.pad();
+ const unsigned int kernel_width = weights->info()->dimension(0);
+ const unsigned int kernel_height = weights->info()->dimension(1);
+
// Get convolved dimensions
unsigned int conv_w = 0;
unsigned int conv_h = 0;
- std::tie(conv_w, conv_h) = scaled_dimensions(input->info()->dimension(0), input->info()->dimension(1), weights->info()->dimension(0), weights->info()->dimension(1),
+ std::tie(conv_w, conv_h) = scaled_dimensions(input->info()->dimension(0), input->info()->dimension(1), kernel_width, kernel_height,
conv_info);
ARM_COMPUTE_ERROR_ON_MSG((output->info()->dimension(0) != conv_w) || (output->info()->dimension(1) != conv_h), "Output shape does not match the expected one");
@@ -100,7 +103,7 @@ void NELocallyConnectedLayer::configure(const ITensor *input, const ITensor *wei
_gemm_output.allocator()->init(TensorInfo(shape_gemm, 1, input->info()->data_type()));
// Configure kernels
- _input_im2col_kernel.configure(input, &_input_im2col_reshaped, std::make_pair(conv_w, conv_h), conv_info, _has_bias);
+ _input_im2col_kernel.configure(input, &_input_im2col_reshaped, Size2D(kernel_width, kernel_height), conv_info, _has_bias);
_weights_reshape_kernel.configure(weights, biases, &_weights_reshaped);
_mm_kernel.configure(&_input_im2col_reshaped, &_weights_reshaped, &_gemm_output);
_output_col2im_kernel.configure(&_gemm_output, output, std::make_pair(conv_w, conv_h));
diff --git a/tests/dataset/ConvolutionLayerDataset.h b/tests/dataset/ConvolutionLayerDataset.h
index 85f46cceb5..402fae31ad 100644
--- a/tests/dataset/ConvolutionLayerDataset.h
+++ b/tests/dataset/ConvolutionLayerDataset.h
@@ -73,7 +73,7 @@ template <unsigned int Size>
using ConvolutionLayerDataset = GenericDataset<ConvolutionLayerDataObject, Size>;
/** Data set containing small convolution layer shapes */
-class SmallConvolutionLayerDataset final : public ConvolutionLayerDataset<3>
+class SmallConvolutionLayerDataset final : public ConvolutionLayerDataset<6>
{
public:
SmallConvolutionLayerDataset()
@@ -81,7 +81,10 @@ public:
{
ConvolutionLayerDataObject{ TensorShape(23U, 27U, 5U), TensorShape(3U, 3U, 5U, 21U), TensorShape(21U), TensorShape(11U, 25U, 21U), PadStrideInfo(2, 1, 0, 0) },
ConvolutionLayerDataObject{ TensorShape(33U, 27U, 7U), TensorShape(5U, 5U, 7U, 16U), TensorShape(16U), TensorShape(11U, 12U, 16U), PadStrideInfo(3, 2, 1, 0) },
- ConvolutionLayerDataObject{ TensorShape(17U, 31U, 2U, 7U), TensorShape(5U, 5U, 2U, 19U), TensorShape(19U), TensorShape(15U, 15U, 19U, 7U), PadStrideInfo(1, 2, 1, 1) }
+ ConvolutionLayerDataObject{ TensorShape(17U, 31U, 2U, 7U), TensorShape(5U, 5U, 2U, 19U), TensorShape(19U), TensorShape(15U, 15U, 19U, 7U), PadStrideInfo(1, 2, 1, 1) },
+ ConvolutionLayerDataObject{ TensorShape(23U, 27U, 5U), TensorShape(3U, 1U, 5U, 21U), TensorShape(21U), TensorShape(11U, 27U, 21U), PadStrideInfo(2, 1, 0, 0) },
+ ConvolutionLayerDataObject{ TensorShape(33U, 27U, 7U), TensorShape(5U, 7U, 7U, 16U), TensorShape(16U), TensorShape(11U, 11U, 16U), PadStrideInfo(3, 2, 1, 0) },
+ ConvolutionLayerDataObject{ TensorShape(17U, 31U, 2U, 7U), TensorShape(5U, 3U, 2U, 19U), TensorShape(19U), TensorShape(15U, 16U, 19U, 7U), PadStrideInfo(1, 2, 1, 1) }
}
{
}
diff --git a/tests/validation/CL/ConvolutionLayer.cpp b/tests/validation/CL/ConvolutionLayer.cpp
new file mode 100644
index 0000000000..60e8754193
--- /dev/null
+++ b/tests/validation/CL/ConvolutionLayer.cpp
@@ -0,0 +1,191 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "CL/CLAccessor.h"
+
+#include "TypePrinter.h"
+#include "dataset/ConvolutionLayerDataset.h"
+#include "tests/Globals.h"
+#include "tests/Utils.h"
+#include "validation/Datasets.h"
+#include "validation/Reference.h"
+#include "validation/Validation.h"
+
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/CL/functions/CLConvolutionLayer.h"
+#include "arm_compute/runtime/Tensor.h"
+#include "arm_compute/runtime/TensorAllocator.h"
+
+#include <random>
+
+using namespace arm_compute;
+using namespace arm_compute::test;
+using namespace arm_compute::test::cl;
+using namespace arm_compute::test::validation;
+
+namespace
+{
+const float tolerance_f32 = 1e-03f; /**< Tolerance value for comparing reference's output against implementation's output for DataType::F32 */
+
+CLTensor compute_convolution_layer(const TensorShape &input_shape, const TensorShape &weights_shape, const TensorShape &bias_shape, const TensorShape &output_shape, DataType dt,
+ const PadStrideInfo &conv_info, int fixed_point_position)
+{
+ // Create tensors
+ CLTensor src = create_tensor<CLTensor>(input_shape, dt, 1, fixed_point_position);
+ CLTensor weights = create_tensor<CLTensor>(weights_shape, dt, 1, fixed_point_position);
+ CLTensor bias = create_tensor<CLTensor>(bias_shape, dt, 1, fixed_point_position);
+ CLTensor dst = create_tensor<CLTensor>(output_shape, dt, 1, fixed_point_position);
+
+ // Create and configure function
+ CLConvolutionLayer conv;
+ conv.configure(&src, &weights, &bias, &dst, conv_info);
+
+ // Allocate tensors
+ src.allocator()->allocate();
+ weights.allocator()->allocate();
+ bias.allocator()->allocate();
+ dst.allocator()->allocate();
+
+ BOOST_TEST(!src.info()->is_resizable());
+ BOOST_TEST(!weights.info()->is_resizable());
+ BOOST_TEST(!bias.info()->is_resizable());
+ BOOST_TEST(!dst.info()->is_resizable());
+
+ // Fill tensors
+ if(dt == DataType::F32)
+ {
+ std::uniform_real_distribution<> distribution(-1.0f, 1.0f);
+ library->fill(CLAccessor(src), distribution, 0);
+ library->fill(CLAccessor(weights), distribution, 1);
+ library->fill(CLAccessor(bias), distribution, 2);
+ }
+ else
+ {
+ library->fill_tensor_uniform(CLAccessor(src), 0);
+ library->fill_tensor_uniform(CLAccessor(weights), 1);
+ library->fill_tensor_uniform(CLAccessor(bias), 2);
+ }
+
+ // Compute CLConvolutionLayer function
+ conv.run();
+
+ return dst;
+}
+} // namespace
+
+#ifndef DOXYGEN_SKIP_THIS
+BOOST_AUTO_TEST_SUITE(CL)
+BOOST_AUTO_TEST_SUITE(ConvolutionLayer)
+BOOST_AUTO_TEST_SUITE(GEMM)
+
+BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit") * boost::unit_test::label("nightly"))
+BOOST_DATA_TEST_CASE(Configuration,
+ AlexNetConvolutionLayerDataset() * boost::unit_test::data::make({ DataType::F32 }),
+ conv_set, dt)
+{
+ // Set fixed point position data type allowed
+ int fixed_point_position = (dt == DataType::F32) ? 0 : 3;
+
+ // Create tensors
+ CLTensor src = create_tensor<CLTensor>(conv_set.src_shape, dt, 1, fixed_point_position);
+ CLTensor weights = create_tensor<CLTensor>(conv_set.weights_shape, dt, 1, fixed_point_position);
+ CLTensor bias = create_tensor<CLTensor>(conv_set.bias_shape, dt, 1, fixed_point_position);
+ CLTensor dst = create_tensor<CLTensor>(conv_set.dst_shape, dt, 1, fixed_point_position);
+
+ BOOST_TEST(src.info()->is_resizable());
+ BOOST_TEST(weights.info()->is_resizable());
+ BOOST_TEST(bias.info()->is_resizable());
+ BOOST_TEST(dst.info()->is_resizable());
+
+ // Create and configure function
+ CLConvolutionLayer conv;
+ conv.configure(&src, &weights, &bias, &dst, conv_set.info);
+
+ // Validate valid region
+ const ValidRegion src_valid_region = shape_to_valid_region(conv_set.src_shape);
+ const ValidRegion weights_valid_region = shape_to_valid_region(conv_set.weights_shape);
+ const ValidRegion bias_valid_region = shape_to_valid_region(conv_set.bias_shape);
+ const ValidRegion dst_valid_region = shape_to_valid_region(conv_set.dst_shape);
+
+ validate(src.info()->valid_region(), src_valid_region);
+ validate(weights.info()->valid_region(), weights_valid_region);
+ validate(bias.info()->valid_region(), bias_valid_region);
+ validate(dst.info()->valid_region(), dst_valid_region);
+}
+
+#ifdef ARM_COMPUTE_ENABLE_FP16
+BOOST_AUTO_TEST_SUITE(Float16)
+BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit"))
+BOOST_DATA_TEST_CASE(SmallConvolutionLayer,
+ SmallConvolutionLayerDataset() * boost::unit_test::data::make(DataType::F16),
+ conv_set, dt)
+{
+ // Compute function
+ CLTensor dst = compute_convolution_layer(conv_set.src_shape, conv_set.weights_shape, conv_set.bias_shape, conv_set.dst_shape, dt, conv_set.info, 0);
+
+ // Compute reference
+ RawTensor ref_dst = Reference::compute_reference_convolution_layer(conv_set.src_shape, conv_set.weights_shape, conv_set.bias_shape, conv_set.dst_shape, dt, conv_set.info, 0);
+
+ // Validate output
+ validate(CLAccessor(dst), ref_dst, tolerance_f32);
+}
+BOOST_AUTO_TEST_SUITE_END()
+#endif
+
+BOOST_AUTO_TEST_SUITE(Float)
+BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit"))
+BOOST_DATA_TEST_CASE(SmallConvolutionLayer,
+ SmallConvolutionLayerDataset() * boost::unit_test::data::make(DataType::F32),
+ conv_set, dt)
+{
+ // Compute function
+ CLTensor dst = compute_convolution_layer(conv_set.src_shape, conv_set.weights_shape, conv_set.bias_shape, conv_set.dst_shape, dt, conv_set.info, 0);
+
+ // Compute reference
+ RawTensor ref_dst = Reference::compute_reference_convolution_layer(conv_set.src_shape, conv_set.weights_shape, conv_set.bias_shape, conv_set.dst_shape, dt, conv_set.info, 0);
+
+ // Validate output
+ validate(CLAccessor(dst), ref_dst, tolerance_f32);
+}
+
+BOOST_TEST_DECORATOR(*boost::unit_test::label("nightly"))
+BOOST_DATA_TEST_CASE(LargeConvolutionLayer,
+ AlexNetConvolutionLayerDataset() * boost::unit_test::data::make(DataType::F32),
+ conv_set, dt)
+{
+ // Compute function
+ CLTensor dst = compute_convolution_layer(conv_set.src_shape, conv_set.weights_shape, conv_set.bias_shape, conv_set.dst_shape, dt, conv_set.info, 0);
+
+ // Compute reference
+ RawTensor ref_dst = Reference::compute_reference_convolution_layer(conv_set.src_shape, conv_set.weights_shape, conv_set.bias_shape, conv_set.dst_shape, dt, conv_set.info, 0);
+
+ // Validate output
+ validate(CLAccessor(dst), ref_dst, tolerance_f32);
+}
+BOOST_AUTO_TEST_SUITE_END()
+
+BOOST_AUTO_TEST_SUITE_END()
+BOOST_AUTO_TEST_SUITE_END()
+BOOST_AUTO_TEST_SUITE_END()
+#endif