aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorAlex Gilday <alexander.gilday@arm.com>2018-03-23 14:16:00 +0000
committerAnthony Barbier <anthony.barbier@arm.com>2018-11-02 16:49:16 +0000
commit7da29b6b12ff319ed2b6e2c46588dfa1991556fb (patch)
tree24e766d916ae8da32deb5cd4fac4d82207cbe6ea /src
parentf92cb23f06572fe73ec5ab9da0ec5713724c2dde (diff)
downloadComputeLibrary-7da29b6b12ff319ed2b6e2c46588dfa1991556fb.tar.gz
COMPMID-1017: Implement dilated convolution in NEON, OpenCL, and GC
Change-Id: If4626ec9e215e14dffe22e80812da5bac84a52e2 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/125734 Reviewed-by: Anthony Barbier <anthony.barbier@arm.com> Tested-by: Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src')
-rw-r--r--src/core/CL/cl_kernels/im2col.cl7
-rw-r--r--src/core/CL/kernels/CLIm2ColKernel.cpp136
-rw-r--r--src/core/GLES_COMPUTE/cs_shaders/convolution_layer.cs5
-rw-r--r--src/core/GLES_COMPUTE/kernels/GCIm2ColKernel.cpp12
-rw-r--r--src/core/NEON/kernels/NEIm2ColKernel.cpp45
-rw-r--r--src/core/Utils.cpp11
-rw-r--r--src/runtime/CL/functions/CLConvolutionLayer.cpp18
-rw-r--r--src/runtime/CL/functions/CLGEMMConvolutionLayer.cpp16
-rw-r--r--src/runtime/GLES_COMPUTE/functions/GCConvolutionLayer.cpp7
-rw-r--r--src/runtime/NEON/functions/NEConvolutionLayer.cpp19
-rw-r--r--src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp17
11 files changed, 161 insertions, 132 deletions
diff --git a/src/core/CL/cl_kernels/im2col.cl b/src/core/CL/cl_kernels/im2col.cl
index 75d99bda85..1e85e1b303 100644
--- a/src/core/CL/cl_kernels/im2col.cl
+++ b/src/core/CL/cl_kernels/im2col.cl
@@ -680,6 +680,7 @@ __kernel void im2col_generic_padx0_pady0_dchw(
* @note The pad_left, pad_right, pad_top and pad_bottom must be passed at compile time using -DPAD_LEFT, -DPAD_RIGHT, -DPAD_TOP and -DPAD_BOTTOM: e.g. -DPAD_LEFT=1, -DPAD_RIGHT=2, -DPAD_TOP=3 and -DPAD_BOTTOM=2
* @note The zero value to store in case we load values out-of-bounds must be passed at compile time using -DPAD_VALUE: e.g. -DPAD_VALUE=0.0
* @note The stride along the X and Y directions must be passed at compile time using -DSTRIDE_X and -DSTRIDE_Y: e.g. -DSTRIDE_X=1 and -DSTRIDE_Y=1
+ * @note The dilation_x and dilation_y must be passed at compile time using -DDILATION_X and -DDILATION_Y: e.g. -DDILATION_X=1, -DDILATION_Y=1
* @note In case biases will be added to the convolution -DHAS_BIAS has to be passed to append the final matrix with 1 in each row.
*
* @param[in] src_ptr Pointer to the source tensor. Supported data types: QS8/QASYMM8/QS16/F16/F32
@@ -722,10 +723,12 @@ __kernel void im2col_generic_dchw(
__global DATA_TYPE *output_ptr = ((__global DATA_TYPE *)(dst_ptr + dst_offset_first_element_in_bytes + yo * dst_stride_y + batch * dst_stride_w)) + xo;
// Linearize convolution elements
- for(int y = yi, y_e = yi + KERNEL_HEIGHT; y < y_e; ++y)
+ for(int yk = 0; yk < KERNEL_HEIGHT; ++yk)
{
- for(int x = xi, x_e = xi + KERNEL_WIDTH; x < x_e; ++x, ++output_ptr)
+ int y = yi + yk * DILATION_Y;
+ for(int xk = 0; xk < KERNEL_WIDTH; ++xk, ++output_ptr)
{
+ int x = xi + xk * DILATION_X;
#if PAD_LEFT == 0 && PAD_TOP == 0 && PAD_RIGHT == 0 && PAD_BOTTOM == 0
*output_ptr = *((__global DATA_TYPE *)(input_ptr + x * src_stride_x + y * src_stride_y));
#else // PAD_LEFT == 0 && PAD_TOP == 0 && PAD_RIGHT == 0 && PAD_BOTTOM == 0
diff --git a/src/core/CL/kernels/CLIm2ColKernel.cpp b/src/core/CL/kernels/CLIm2ColKernel.cpp
index 9bc4787384..cc19d3c263 100644
--- a/src/core/CL/kernels/CLIm2ColKernel.cpp
+++ b/src/core/CL/kernels/CLIm2ColKernel.cpp
@@ -41,11 +41,12 @@ using namespace arm_compute;
namespace
{
-Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, bool has_bias)
+Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, bool has_bias, const Size2D &dilation)
{
ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QS8, DataType::QASYMM8, DataType::QS16, DataType::F16, DataType::F32);
ARM_COMPUTE_RETURN_ERROR_ON(input->data_type() == DataType::QASYMM8 && has_bias);
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(output);
+ ARM_COMPUTE_RETURN_ERROR_ON((dilation.x() < 1) || (dilation.y() < 1));
// Checks performed when output is configured
if(output->total_size() != 0)
@@ -63,12 +64,12 @@ CLIm2ColKernel::CLIm2ColKernel()
{
}
-void CLIm2ColKernel::configure(const ICLTensor *input, ICLTensor *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias)
+void CLIm2ColKernel::configure(const ICLTensor *input, ICLTensor *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias, const Size2D &dilation)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
// Perform validation step
- ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info(), has_bias));
+ ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info(), has_bias, dilation));
_input = input;
_output = output;
@@ -107,7 +108,7 @@ void CLIm2ColKernel::configure(const ICLTensor *input, ICLTensor *output, const
_convolved_dims = scaled_dimensions(input->info()->dimension(0), input->info()->dimension(1),
kernel_dims.width, kernel_dims.height,
- conv_info);
+ conv_info, dilation);
build_opts.add_option("-DKERNEL_WIDTH=" + support::cpp11::to_string(kernel_dims.width));
build_opts.add_option("-DKERNEL_HEIGHT=" + support::cpp11::to_string(kernel_dims.height));
@@ -122,77 +123,82 @@ void CLIm2ColKernel::configure(const ICLTensor *input, ICLTensor *output, const
build_opts.add_option("-DPAD_BOTTOM=" + support::cpp11::to_string(conv_info.pad_bottom()));
build_opts.add_option("-DSRC_WIDTH=" + support::cpp11::to_string(input->info()->dimension(0)));
build_opts.add_option("-DSRC_HEIGHT=" + support::cpp11::to_string(input->info()->dimension(1)));
+ build_opts.add_option("-DDILATION_X=" + support::cpp11::to_string(dilation.x()));
+ build_opts.add_option("-DDILATION_Y=" + support::cpp11::to_string(dilation.y()));
build_opts.add_option_if_else(is_data_type_quantized(data_type), "-DPAD_VALUE=" + support::cpp11::to_string(input->info()->quantization_info().offset), "-DPAD_VALUE=0");
const bool squared_im2col = kernel_dims.width == kernel_dims.height;
- if(squared_im2col && !is_data_type_fixed_point(data_type))
+ if(dilation == Size2D(1U, 1U))
{
- // Check if we can run an optimized im2col
- switch(kernel_dims.width)
+ if(squared_im2col && !is_data_type_fixed_point(data_type))
{
- case 1:
- // Optimized im2col1x1 if stride_x = 1 and conv_info.has_padding() = false
- if(conv_info.stride().first == 1 && !conv_info.has_padding())
- {
- // Set hint for LWS
+ // Check if we can run an optimized im2col
+ switch(kernel_dims.width)
+ {
+ case 1:
+ // Optimized im2col1x1 if stride_x = 1 and conv_info.has_padding() = false
+ if(conv_info.stride().first == 1 && !conv_info.has_padding())
+ {
+ // Set hint for LWS
+ _lws_hint = cl::NDRange(1, 1, 8);
+ _num_elems_processed_per_iteration = 4;
+ is_optimized_path = true;
+ kernel_name = "im2col1x1_stridex1_dchw";
+ }
+ break;
+ case 3:
_lws_hint = cl::NDRange(1, 1, 8);
- _num_elems_processed_per_iteration = 4;
+ _num_elems_processed_per_iteration = 1;
is_optimized_path = true;
- kernel_name = "im2col1x1_stridex1_dchw";
- }
- break;
- case 3:
- _lws_hint = cl::NDRange(1, 1, 8);
- _num_elems_processed_per_iteration = 1;
- is_optimized_path = true;
- kernel_name = "im2col3x3_dchw";
- break;
- case 5:
- _num_elems_processed_per_iteration = 1;
- is_optimized_path = true;
- kernel_name = "im2col5x5_dchw";
- break;
- case 11:
- // Optimized im2col11x11 if pad_x = pad_y = 0
- if(!conv_info.has_padding())
- {
+ kernel_name = "im2col3x3_dchw";
+ break;
+ case 5:
_num_elems_processed_per_iteration = 1;
is_optimized_path = true;
- kernel_name = "im2col11x11_padx0_pady0_dchw";
- }
- break;
- default:
- is_optimized_path = false;
- break;
- }
- }
- else if(kernel_dims.width > 1 && !conv_info.has_padding())
- {
- _num_elems_processed_per_iteration = 1;
- kernel_name = "im2col_generic_padx0_pady0_dchw";
-
- // Optimized im2col is performed using one or more vector operations with the specified vector size
- // and a remainder. For example, for 5x5 convolutions, im2col is performed using vectors of size 4
- // and scalars; for 7x7 convolutions, using vectors of size 4 and vectors of size 3.
- // Using the vector size of 4 is always safe since OpenCL supports vectors of size 2 and 3.
- // Using the vector size of 8, however, may be faster.
- size_t vector_size = 4;
- // For 2x2 convolutions, use vectors of size 2. (For 3x3 convolutions, im2col_kernel3x3_padx0_pady0
- // is used instead.)
- if(kernel_dims.width < vector_size)
- {
- vector_size = kernel_dims.width;
+ kernel_name = "im2col5x5_dchw";
+ break;
+ case 11:
+ // Optimized im2col11x11 if pad_x = pad_y = 0
+ if(!conv_info.has_padding())
+ {
+ _num_elems_processed_per_iteration = 1;
+ is_optimized_path = true;
+ kernel_name = "im2col11x11_padx0_pady0_dchw";
+ }
+ break;
+ default:
+ is_optimized_path = false;
+ break;
+ }
}
- // Local work size and vector size optimized for the 11x11 AlexNet convolution on Bifrost.
- if(gpu_target_is_in(gpu_target, GPUTarget::G71, GPUTarget::G72) && kernel_dims.width == 11)
+ else if(kernel_dims.width > 1 && !conv_info.has_padding())
{
- _lws_hint = cl::NDRange(1, 1, 1);
- vector_size = 8;
+ _num_elems_processed_per_iteration = 1;
+ kernel_name = "im2col_generic_padx0_pady0_dchw";
+
+ // Optimized im2col is performed using one or more vector operations with the specified vector size
+ // and a remainder. For example, for 5x5 convolutions, im2col is performed using vectors of size 4
+ // and scalars; for 7x7 convolutions, using vectors of size 4 and vectors of size 3.
+ // Using the vector size of 4 is always safe since OpenCL supports vectors of size 2 and 3.
+ // Using the vector size of 8, however, may be faster.
+ size_t vector_size = 4;
+ // For 2x2 convolutions, use vectors of size 2. (For 3x3 convolutions, im2col_kernel3x3_padx0_pady0
+ // is used instead.)
+ if(kernel_dims.width < vector_size)
+ {
+ vector_size = kernel_dims.width;
+ }
+ // Local work size and vector size optimized for the 11x11 AlexNet convolution on Bifrost.
+ if(gpu_target_is_in(gpu_target, GPUTarget::G71, GPUTarget::G72) && kernel_dims.width == 11)
+ {
+ _lws_hint = cl::NDRange(1, 1, 1);
+ vector_size = 8;
+ }
+ const size_t width_mod_vector_size = kernel_dims.width % vector_size;
+ build_opts.add_option("-DVECTOR_SIZE=" + support::cpp11::to_string(vector_size));
+ build_opts.add_option("-DWIDTH_MOD_VECTOR_SIZE=" + support::cpp11::to_string(width_mod_vector_size));
}
- const size_t width_mod_vector_size = kernel_dims.width % vector_size;
- build_opts.add_option("-DVECTOR_SIZE=" + support::cpp11::to_string(vector_size));
- build_opts.add_option("-DWIDTH_MOD_VECTOR_SIZE=" + support::cpp11::to_string(width_mod_vector_size));
}
_run_func = &CLIm2ColKernel::run_generic;
}
@@ -206,7 +212,7 @@ void CLIm2ColKernel::configure(const ICLTensor *input, ICLTensor *output, const
// Create kernel
_kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel(kernel_name, build_opts.options()));
- // Configure kernel window
+ // Configure kernel window
Window win;
if(is_optimized_path)
{
@@ -250,12 +256,12 @@ void CLIm2ColKernel::configure(const ICLTensor *input, ICLTensor *output, const
_config_id += support::cpp11::to_string(output->info()->dimension(1));
}
-Status CLIm2ColKernel::validate(const ITensorInfo *input, const ITensorInfo *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias)
+Status CLIm2ColKernel::validate(const ITensorInfo *input, const ITensorInfo *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias, const Size2D &dilation)
{
ARM_COMPUTE_UNUSED(kernel_dims);
ARM_COMPUTE_UNUSED(conv_info);
ARM_COMPUTE_UNUSED(has_bias);
- ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output, has_bias));
+ ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output, has_bias, dilation));
return Status{};
}
diff --git a/src/core/GLES_COMPUTE/cs_shaders/convolution_layer.cs b/src/core/GLES_COMPUTE/cs_shaders/convolution_layer.cs
index 2701f5b262..ad3f14d442 100644
--- a/src/core/GLES_COMPUTE/cs_shaders/convolution_layer.cs
+++ b/src/core/GLES_COMPUTE/cs_shaders/convolution_layer.cs
@@ -164,6 +164,7 @@ void main()
* @note STRIDE_X/STRIDE_Y must be passed for stride info, e.g. "#define STRIDE_X xxx"
* @note CONVOLVED_WIDTH/CONVOLVED_HEIGHT must be passed for convolved dimension, e.g. "#define CONVOLVED_WIDTH xxx"
* @note SRC_WIDTH/SRC_HEIGHT must be passed for input dimension, e.g. "#define SRC_WIDTH xxx"
+ * @note DILATION_X/DILATION_Y must be passed for dilation sizes, e.g. "#define DILATION_X xxx"
* @note In case biases will be added to the convolution "#define HAS_BIAS" has to be passed to append the final matrix with 1 in each row.
*
* @param[in] src_ptr Pointer to the source tensor. Supported data types: F16/F32
@@ -210,9 +211,9 @@ void main(void)
uint src_pos = 0u;
// Linearize convolution elements
- for(uint y = yi, y_e = yi + uint(KERNEL_HEIGHT); y < y_e; ++y)
+ for(uint y = yi, y_e = yi + uint(KERNEL_HEIGHT) * uint(DILATION_Y); y < y_e; y += uint(DILATION_Y))
{
- for(uint x = xi, x_e = xi + uint(KERNEL_WIDTH); x < x_e; ++x, TENSOR_OFFSET_ADVANCE(dst_iter, 1u))
+ for(uint x = xi, x_e = xi + uint(KERNEL_WIDTH) * uint(DILATION_X); x < x_e; x += uint(DILATION_X), TENSOR_OFFSET_ADVANCE(dst_iter, 1u))
{
#if PAD_LEFT == 0 && PAD_TOP == 0 && PAD_RIGHT == 0 && PAD_BOTTOM == 0
src_pos = TENSOR_OFFSET_ADVANCE_IN_BYTES(src_iter, x * src_attrs.stride_x + y * src_attrs.stride_y);
diff --git a/src/core/GLES_COMPUTE/kernels/GCIm2ColKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCIm2ColKernel.cpp
index 47bfebcc09..eb790471fb 100644
--- a/src/core/GLES_COMPUTE/kernels/GCIm2ColKernel.cpp
+++ b/src/core/GLES_COMPUTE/kernels/GCIm2ColKernel.cpp
@@ -65,7 +65,7 @@ GCIm2ColKernel::GCIm2ColKernel()
{
}
-void GCIm2ColKernel::configure(const IGCTensor *input, IGCTensor *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias)
+void GCIm2ColKernel::configure(const IGCTensor *input, IGCTensor *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias, const Size2D &dilation)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
@@ -98,7 +98,8 @@ void GCIm2ColKernel::configure(const IGCTensor *input, IGCTensor *output, const
&& (std::equal(input->info()->tensor_shape().cbegin() + 3,
input->info()->tensor_shape().cend(),
output->info()->tensor_shape().cbegin() + 1))
- && ((stride_x == 1) && (stride_y == 1) && !conv_info.has_padding());
+ && ((stride_x == 1) && (stride_y == 1) && !conv_info.has_padding())
+ && (dilation == Size2D(1U, 1U));
std::string kernel_name = "im2col_generic";
if(!run_img2col_reduced)
@@ -111,7 +112,7 @@ void GCIm2ColKernel::configure(const IGCTensor *input, IGCTensor *output, const
build_opts.emplace("#define IM2COL_GENERIC");
_convolved_dims = scaled_dimensions(input->info()->dimension(0), input->info()->dimension(1),
kernel_dims.width, kernel_dims.height,
- conv_info);
+ conv_info, dilation);
_num_elems_processed_per_iteration = 2;
build_opts.emplace("#define KERNEL_WIDTH " + support::cpp11::to_string(kernel_dims.width));
@@ -127,6 +128,8 @@ void GCIm2ColKernel::configure(const IGCTensor *input, IGCTensor *output, const
build_opts.emplace("#define PAD_BOTTOM " + support::cpp11::to_string(conv_info.pad_bottom()));
build_opts.emplace("#define SRC_WIDTH " + support::cpp11::to_string(input->info()->dimension(0)));
build_opts.emplace("#define SRC_HEIGHT " + support::cpp11::to_string(input->info()->dimension(1)));
+ build_opts.emplace("#define DILATION_X " + support::cpp11::to_string(dilation.x()));
+ build_opts.emplace("#define DILATION_Y " + support::cpp11::to_string(dilation.y()));
_run_func = &GCIm2ColKernel::run_generic;
}
@@ -206,11 +209,12 @@ void GCIm2ColKernel::configure(const IGCTensor *input, IGCTensor *output, const
IGCKernel::configure(win);
}
-Status GCIm2ColKernel::validate(const ITensorInfo *input, const ITensorInfo *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias)
+Status GCIm2ColKernel::validate(const ITensorInfo *input, const ITensorInfo *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias, const Size2D &dilation)
{
ARM_COMPUTE_UNUSED(kernel_dims);
ARM_COMPUTE_UNUSED(conv_info);
ARM_COMPUTE_UNUSED(has_bias);
+ ARM_COMPUTE_UNUSED(dilation);
ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output));
return Status{};
}
diff --git a/src/core/NEON/kernels/NEIm2ColKernel.cpp b/src/core/NEON/kernels/NEIm2ColKernel.cpp
index dee1608c43..348722c55d 100644
--- a/src/core/NEON/kernels/NEIm2ColKernel.cpp
+++ b/src/core/NEON/kernels/NEIm2ColKernel.cpp
@@ -45,12 +45,13 @@ using namespace arm_compute;
namespace
{
Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info,
- bool has_bias, bool is_fully_connected, bool is_flatten)
+ bool has_bias, bool is_fully_connected, bool is_flatten, const Size2D &dilation)
{
ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QS8, DataType::QASYMM8, DataType::QS16, DataType::F16, DataType::F32);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_FIXED_POINT_POSITION(input, output);
ARM_COMPUTE_RETURN_ERROR_ON(input->data_type() == DataType::QASYMM8 && has_bias);
+ ARM_COMPUTE_RETURN_ERROR_ON((dilation.x() < 1) || (dilation.y() < 1));
if(is_flatten) /* Called by FlattenLayer */
{
@@ -59,7 +60,7 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, c
}
else if(!is_fully_connected) /* Called by ConvolutionLayer */
{
- std::pair<unsigned int, unsigned int> out_dims = scaled_dimensions(input->dimension(0), input->dimension(1), kernel_dims.width, kernel_dims.height, conv_info);
+ std::pair<unsigned int, unsigned int> out_dims = scaled_dimensions(input->dimension(0), input->dimension(1), kernel_dims.width, kernel_dims.height, conv_info, dilation);
ARM_COMPUTE_RETURN_ERROR_ON(output->dimension(0) != (input->dimension(2) * kernel_dims.area() + (has_bias ? 1 : 0)));
ARM_COMPUTE_RETURN_ERROR_ON(output->dimension(1) != (out_dims.first * out_dims.second));
ARM_COMPUTE_RETURN_ERROR_ON(output->dimension(2) != 1);
@@ -91,11 +92,13 @@ inline void linearize_volume(const uint8_t *const in_ptr,
int input_stride_y,
int input_stride_z,
int fixed_point_position,
- int pad_value)
+ int pad_value,
+ int dilation_x,
+ int dilation_y)
{
const int kernel_size2 = kernel_width * kernel_height;
- const int x_e = top_left_x + kernel_width;
- const int y_e = top_left_y + kernel_height;
+ const int x_e = top_left_x + kernel_width * dilation_x;
+ const int y_e = top_left_y + kernel_height * dilation_y;
// Linearize volume
int d = 0;
@@ -104,12 +107,12 @@ inline void linearize_volume(const uint8_t *const in_ptr,
// 2) to have an optimized im2col for the first convolution layer where usually we have 3 IFMs
for(; d <= (kernel_depth - 3); d += 3)
{
- for(int y = top_left_y; y < y_e; ++y)
+ for(int y = top_left_y; y < y_e; y += dilation_y)
{
if((y < 0 || y >= input_h) && has_pads)
{
// All the values will be the offset (will be zeros when not quantized)
- for(int x = top_left_x; x < x_e; ++x, ++out_ptr)
+ for(int x = top_left_x; x < x_e; x += dilation_x, ++out_ptr)
{
*(out_ptr + 0 * kernel_size2) = pad_value;
*(out_ptr + 1 * kernel_size2) = pad_value;
@@ -118,7 +121,7 @@ inline void linearize_volume(const uint8_t *const in_ptr,
}
else
{
- for(int x = top_left_x; x < x_e; ++x, ++out_ptr)
+ for(int x = top_left_x; x < x_e; x += dilation_x, ++out_ptr)
{
if((x < 0 || x >= input_w) && has_pads)
{
@@ -141,7 +144,7 @@ inline void linearize_volume(const uint8_t *const in_ptr,
// Left over
for(; d < kernel_depth; d++)
{
- for(int y = top_left_y; y < y_e; ++y)
+ for(int y = top_left_y; y < y_e; y += dilation_y)
{
if((y < 0 || y >= input_h) && has_pads)
{
@@ -151,7 +154,7 @@ inline void linearize_volume(const uint8_t *const in_ptr,
}
else
{
- for(int x = top_left_x; x < x_e; ++x, ++out_ptr)
+ for(int x = top_left_x; x < x_e; x += dilation_x, ++out_ptr)
{
if((x < 0 || x >= input_w) && has_pads)
{
@@ -251,7 +254,9 @@ void NEIm2ColKernel::run_generic(const Window &window)
input_stride_y,
input_stride_z,
_input->info()->fixed_point_position(),
- offset);
+ offset,
+ _dilation.x(),
+ _dilation.y());
},
in, out);
}
@@ -309,27 +314,28 @@ void NEIm2ColKernel::run_reduced(const Window &window)
}
NEIm2ColKernel::NEIm2ColKernel()
- : _func(), _input(nullptr), _output(nullptr), _convolved_dims(), _conv_info(), _kernel_width(0), _kernel_height(0), _has_bias(false)
+ : _func(), _input(nullptr), _output(nullptr), _convolved_dims(), _conv_info(), _kernel_width(0), _kernel_height(0), _has_bias(false), _dilation(1U, 1U)
{
}
void NEIm2ColKernel::configure(const ITensor *input, ITensor *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info,
- bool has_bias, bool is_fully_connected, bool is_flatten)
+ bool has_bias, bool is_fully_connected, bool is_flatten, const Size2D &dilation)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
// Perform validation step
ARM_COMPUTE_UNUSED(is_fully_connected, is_flatten);
- ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info(), kernel_dims, conv_info, has_bias, is_fully_connected, is_flatten));
+ ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info(), kernel_dims, conv_info, has_bias, is_fully_connected, is_flatten, dilation));
_input = input;
_output = output;
_conv_info = conv_info;
_kernel_width = kernel_dims.width;
- _kernel_height = kernel_dims.height,
+ _kernel_height = kernel_dims.height;
+ _dilation = dilation;
_convolved_dims = scaled_dimensions(input->info()->dimension(0), input->info()->dimension(1),
_kernel_width, _kernel_height,
- _conv_info);
+ _conv_info, _dilation);
_has_bias = has_bias;
unsigned int stride_x = 0;
@@ -340,7 +346,8 @@ void NEIm2ColKernel::configure(const ITensor *input, ITensor *output, const Size
&& (std::equal(input->info()->tensor_shape().cbegin() + 3,
input->info()->tensor_shape().cend(),
output->info()->tensor_shape().cbegin() + 1))
- && ((stride_x == 1) && (stride_y == 1) && !conv_info.has_padding());
+ && ((stride_x == 1) && (stride_y == 1) && !conv_info.has_padding())
+ && ((dilation.x() == 1) && (dilation.y() == 1));
Window window = calculate_max_window(*input->info(), Steps());
@@ -407,9 +414,9 @@ void NEIm2ColKernel::configure(const ITensor *input, ITensor *output, const Size
}
Status NEIm2ColKernel::validate(const ITensorInfo *input, const ITensorInfo *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info,
- bool has_bias, bool is_fully_connected, bool is_flatten)
+ bool has_bias, bool is_fully_connected, bool is_flatten, const Size2D &dilation)
{
- ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output, kernel_dims, conv_info, has_bias, is_fully_connected, is_flatten));
+ ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output, kernel_dims, conv_info, has_bias, is_fully_connected, is_flatten, dilation));
return Status{};
}
diff --git a/src/core/Utils.cpp b/src/core/Utils.cpp
index f4b45532cf..4a237f9daa 100644
--- a/src/core/Utils.cpp
+++ b/src/core/Utils.cpp
@@ -292,7 +292,8 @@ const std::pair<unsigned int, unsigned int> arm_compute::deconvolution_output_di
const std::pair<unsigned int, unsigned int> arm_compute::scaled_dimensions(unsigned int width, unsigned int height,
unsigned int kernel_width, unsigned int kernel_height,
- const PadStrideInfo &pad_stride_info)
+ const PadStrideInfo &pad_stride_info,
+ const Size2D &dilation)
{
const unsigned int pad_left = pad_stride_info.pad_left();
const unsigned int pad_top = pad_stride_info.pad_top();
@@ -305,12 +306,12 @@ const std::pair<unsigned int, unsigned int> arm_compute::scaled_dimensions(unsig
switch(pad_stride_info.round())
{
case DimensionRoundingType::FLOOR:
- w = static_cast<unsigned int>(std::floor((static_cast<float>(width + pad_left + pad_right - kernel_width) / stride_x) + 1));
- h = static_cast<unsigned int>(std::floor((static_cast<float>(height + pad_top + pad_bottom - kernel_height) / stride_y) + 1));
+ w = static_cast<unsigned int>(std::floor((static_cast<float>(width + pad_left + pad_right - (dilation.x() * (kernel_width - 1) + 1)) / stride_x) + 1));
+ h = static_cast<unsigned int>(std::floor((static_cast<float>(height + pad_top + pad_bottom - (dilation.y() * (kernel_height - 1) + 1)) / stride_y) + 1));
break;
case DimensionRoundingType::CEIL:
- w = static_cast<unsigned int>(std::ceil((static_cast<float>(width + pad_left + pad_right - kernel_width) / stride_x) + 1));
- h = static_cast<unsigned int>(std::ceil((static_cast<float>(height + pad_top + pad_bottom - kernel_height) / stride_y) + 1));
+ w = static_cast<unsigned int>(std::ceil((static_cast<float>(width + pad_left + pad_right - (dilation.x() * (kernel_width - 1) + 1)) / stride_x) + 1));
+ h = static_cast<unsigned int>(std::ceil((static_cast<float>(height + pad_top + pad_bottom - (dilation.y() * (kernel_height - 1) + 1)) / stride_y) + 1));
break;
default:
ARM_COMPUTE_ERROR("Unsupported rounding type");
diff --git a/src/runtime/CL/functions/CLConvolutionLayer.cpp b/src/runtime/CL/functions/CLConvolutionLayer.cpp
index 1a486ce5c7..64bda93ff0 100644
--- a/src/runtime/CL/functions/CLConvolutionLayer.cpp
+++ b/src/runtime/CL/functions/CLConvolutionLayer.cpp
@@ -42,13 +42,14 @@ CLConvolutionLayer::CLConvolutionLayer(std::shared_ptr<IMemoryManager> memory_ma
{
}
-void CLConvolutionLayer::configure(ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info)
+void CLConvolutionLayer::configure(ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info,
+ const Size2D &dilation)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output);
- ARM_COMPUTE_ERROR_THROW_ON(CLConvolutionLayer::validate(input->info(), weights->info(), ((biases != nullptr) ? biases->info() : nullptr), output->info(), conv_info, weights_info));
+ ARM_COMPUTE_ERROR_THROW_ON(CLConvolutionLayer::validate(input->info(), weights->info(), ((biases != nullptr) ? biases->info() : nullptr), output->info(), conv_info, weights_info, dilation));
switch(CLConvolutionLayer::get_convolution_method(input->info(), weights->info(), ((biases != nullptr) ? biases->info() : nullptr), output->info(), conv_info,
- weights_info, CLScheduler::get().target()))
+ weights_info, CLScheduler::get().target(), dilation))
{
case ConvolutionMethod::DIRECT:
{
@@ -60,7 +61,7 @@ void CLConvolutionLayer::configure(ICLTensor *input, const ICLTensor *weights, c
case ConvolutionMethod::GEMM:
{
auto f = arm_compute::support::cpp14::make_unique<CLGEMMConvolutionLayer>(_memory_manager);
- f->configure(input, weights, biases, output, conv_info, weights_info);
+ f->configure(input, weights, biases, output, conv_info, weights_info, dilation);
_function = std::move(f);
break;
}
@@ -71,14 +72,14 @@ void CLConvolutionLayer::configure(ICLTensor *input, const ICLTensor *weights, c
}
Status CLConvolutionLayer::validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info,
- const WeightsInfo &weights_info)
+ const WeightsInfo &weights_info, const Size2D &dilation)
{
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, output);
//Configure if the parameters match the direct convolution or the gemm-based
const GPUTarget gpu_target = CLScheduler::get().target();
- switch(CLConvolutionLayer::get_convolution_method(input, weights, biases, output, conv_info, weights_info, gpu_target))
+ switch(CLConvolutionLayer::get_convolution_method(input, weights, biases, output, conv_info, weights_info, gpu_target, dilation))
{
case ConvolutionMethod::DIRECT:
{
@@ -89,7 +90,7 @@ Status CLConvolutionLayer::validate(const ITensorInfo *input, const ITensorInfo
case ConvolutionMethod::GEMM:
{
// Validate gemm-based convolution layer
- CLGEMMConvolutionLayer::validate(input, weights, biases, output, conv_info, weights_info);
+ CLGEMMConvolutionLayer::validate(input, weights, biases, output, conv_info, weights_info, dilation);
break;
}
default:
@@ -101,7 +102,7 @@ Status CLConvolutionLayer::validate(const ITensorInfo *input, const ITensorInfo
}
ConvolutionMethod CLConvolutionLayer::get_convolution_method(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info,
- const WeightsInfo &weights_info, const GPUTarget gpu_target)
+ const WeightsInfo &weights_info, const GPUTarget gpu_target, const Size2D &dilation)
{
ARM_COMPUTE_UNUSED(input);
ARM_COMPUTE_UNUSED(weights);
@@ -110,6 +111,7 @@ ConvolutionMethod CLConvolutionLayer::get_convolution_method(const ITensorInfo *
ARM_COMPUTE_UNUSED(conv_info);
ARM_COMPUTE_UNUSED(weights_info);
ARM_COMPUTE_UNUSED(gpu_target);
+ ARM_COMPUTE_UNUSED(dilation);
return ConvolutionMethod::GEMM;
}
diff --git a/src/runtime/CL/functions/CLGEMMConvolutionLayer.cpp b/src/runtime/CL/functions/CLGEMMConvolutionLayer.cpp
index bc339f176f..e7ad62f5ff 100644
--- a/src/runtime/CL/functions/CLGEMMConvolutionLayer.cpp
+++ b/src/runtime/CL/functions/CLGEMMConvolutionLayer.cpp
@@ -151,7 +151,8 @@ Status CLGEMMConvolutionLayer::validate_mm(const ITensorInfo *input, const ITens
return Status{};
}
-void CLGEMMConvolutionLayer::configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info)
+void CLGEMMConvolutionLayer::configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info,
+ const Size2D &dilation)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output);
@@ -160,7 +161,8 @@ void CLGEMMConvolutionLayer::configure(const ICLTensor *input, const ICLTensor *
biases != nullptr ? biases->info() : nullptr,
output->info(),
conv_info,
- weights_info));
+ weights_info,
+ dilation));
_is_quantized = is_data_type_quantized_asymmetric(input->info()->data_type());
@@ -187,7 +189,7 @@ void CLGEMMConvolutionLayer::configure(const ICLTensor *input, const ICLTensor *
const unsigned int kernel_width = weights->info()->dimension(0);
const unsigned int kernel_height = weights->info()->dimension(1);
std::tie(conv_w, conv_h) = scaled_dimensions(input->info()->dimension(0), input->info()->dimension(1), kernel_width, kernel_height,
- conv_info);
+ conv_info, dilation);
unsigned int mat_weights_cols = weights->info()->dimension(3);
unsigned int mat_weights_rows = weights->info()->dimension(0) * weights->info()->dimension(1) * weights->info()->dimension(2) + bias_element;
@@ -224,7 +226,7 @@ void CLGEMMConvolutionLayer::configure(const ICLTensor *input, const ICLTensor *
_memory_group.manage(&_gemm_output);
// Configure im2col
- _im2col_kernel.configure(input, &_im2col_output, Size2D(kernel_width, kernel_height), conv_info, append_bias);
+ _im2col_kernel.configure(input, &_im2col_output, Size2D(kernel_width, kernel_height), conv_info, append_bias, dilation);
// Configure GEMM
configure_mm(&_im2col_output, weights, &_gemm_output);
@@ -260,7 +262,7 @@ void CLGEMMConvolutionLayer::configure(const ICLTensor *input, const ICLTensor *
}
Status CLGEMMConvolutionLayer::validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info,
- const WeightsInfo &weights_info)
+ const WeightsInfo &weights_info, const Size2D &dilation)
{
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, output);
ARM_COMPUTE_RETURN_ERROR_ON_MSG(weights_info.are_reshaped(), "Weights already reshaped are not supported!");
@@ -282,7 +284,7 @@ Status CLGEMMConvolutionLayer::validate(const ITensorInfo *input, const ITensorI
const unsigned int kernel_width = weights->dimension(0);
const unsigned int kernel_height = weights->dimension(1);
- std::tie(conv_w, conv_h) = scaled_dimensions(input->dimension(0), input->dimension(1), kernel_width, kernel_height, conv_info);
+ std::tie(conv_w, conv_h) = scaled_dimensions(input->dimension(0), input->dimension(1), kernel_width, kernel_height, conv_info, dilation);
unsigned int mat_weights_cols = weights->dimension(3);
unsigned int mat_weights_rows = weights->dimension(0) * weights->dimension(1) * weights->dimension(2) + bias_element;
@@ -298,7 +300,7 @@ Status CLGEMMConvolutionLayer::validate(const ITensorInfo *input, const ITensorI
shape_im2col.set(2, 1);
TensorInfo im2col_reshaped_info(shape_im2col, 1, dt, input->fixed_point_position());
im2col_reshaped_info.set_quantization_info(input->quantization_info());
- CLIm2ColKernel::validate(input, &im2col_reshaped_info, Size2D(kernel_width, kernel_height), conv_info, append_bias);
+ CLIm2ColKernel::validate(input, &im2col_reshaped_info, Size2D(kernel_width, kernel_height), conv_info, append_bias, dilation);
// Create GEMM output tensor
TensorShape shape_gemm = im2col_reshaped_info.tensor_shape();
diff --git a/src/runtime/GLES_COMPUTE/functions/GCConvolutionLayer.cpp b/src/runtime/GLES_COMPUTE/functions/GCConvolutionLayer.cpp
index f4c073668a..c2b7e02284 100644
--- a/src/runtime/GLES_COMPUTE/functions/GCConvolutionLayer.cpp
+++ b/src/runtime/GLES_COMPUTE/functions/GCConvolutionLayer.cpp
@@ -102,7 +102,8 @@ void GCConvolutionLayer::configure_mm(const IGCTensor *input, const IGCTensor *w
_mm_kernel.configure(input, weights, output, 1.f, is_interleaved_transposed);
}
-void GCConvolutionLayer::configure(const IGCTensor *input, const IGCTensor *weights, const IGCTensor *biases, IGCTensor *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info)
+void GCConvolutionLayer::configure(const IGCTensor *input, const IGCTensor *weights, const IGCTensor *biases, IGCTensor *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info,
+ const Size2D &dilation)
{
ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32);
ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights);
@@ -136,7 +137,7 @@ void GCConvolutionLayer::configure(const IGCTensor *input, const IGCTensor *weig
const unsigned int kernel_width = (_are_weights_reshaped) ? weights_info.kernel_size().first : weights->info()->dimension(0);
const unsigned int kernel_height = (_are_weights_reshaped) ? weights_info.kernel_size().second : weights->info()->dimension(1);
std::tie(conv_w, conv_h) = scaled_dimensions(input->info()->dimension(0), input->info()->dimension(1), kernel_width, kernel_height,
- conv_info);
+ conv_info, dilation);
// Check if its a "fully connected" convolution
_is_fully_connected_convolution = ((conv_w == 1) && (conv_h == 1));
@@ -229,7 +230,7 @@ void GCConvolutionLayer::configure(const IGCTensor *input, const IGCTensor *weig
input->info()->extend_padding(border_size);
_fill_border.configure(input, border_size, BorderMode::CONSTANT, PixelValue(0)); // for PAD of im2col fp16: consider it as border
}
- _input_im2col_kernel.configure(input, &_input_im2col_reshaped, Size2D(kernel_width, kernel_height), conv_info, _append_bias);
+ _input_im2col_kernel.configure(input, &_input_im2col_reshaped, Size2D(kernel_width, kernel_height), conv_info, _append_bias, dilation);
// Configure matrix multiply
if(run_interleaved)
diff --git a/src/runtime/NEON/functions/NEConvolutionLayer.cpp b/src/runtime/NEON/functions/NEConvolutionLayer.cpp
index d4421e8429..e659495b7c 100644
--- a/src/runtime/NEON/functions/NEConvolutionLayer.cpp
+++ b/src/runtime/NEON/functions/NEConvolutionLayer.cpp
@@ -40,14 +40,15 @@ NEConvolutionLayer::NEConvolutionLayer(std::shared_ptr<IMemoryManager> memory_ma
{
}
-void NEConvolutionLayer::configure(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info)
+void NEConvolutionLayer::configure(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info,
+ const Size2D &dilation)
{
// Perform validate step
ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output);
- ARM_COMPUTE_ERROR_THROW_ON(NEConvolutionLayer::validate(input->info(), weights->info(), ((biases != nullptr) ? biases->info() : nullptr), output->info(), conv_info, weights_info));
+ ARM_COMPUTE_ERROR_THROW_ON(NEConvolutionLayer::validate(input->info(), weights->info(), ((biases != nullptr) ? biases->info() : nullptr), output->info(), conv_info, weights_info, dilation));
switch(NEConvolutionLayer::get_convolution_method(input->info(), weights->info(), ((biases != nullptr) ? biases->info() : nullptr), output->info(), conv_info,
- weights_info))
+ weights_info, dilation))
{
case ConvolutionMethod::WINOGRAD:
{
@@ -59,7 +60,7 @@ void NEConvolutionLayer::configure(ITensor *input, const ITensor *weights, const
case ConvolutionMethod::GEMM:
{
auto f = arm_compute::support::cpp14::make_unique<NEGEMMConvolutionLayer>(_memory_manager);
- f->configure(input, weights, biases, output, conv_info, weights_info);
+ f->configure(input, weights, biases, output, conv_info, weights_info, dilation);
_function = std::move(f);
break;
}
@@ -77,9 +78,9 @@ void NEConvolutionLayer::configure(ITensor *input, const ITensor *weights, const
}
Status NEConvolutionLayer::validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info,
- const WeightsInfo &weights_info)
+ const WeightsInfo &weights_info, const Size2D &dilation)
{
- switch(NEConvolutionLayer::get_convolution_method(input, weights, biases, output, conv_info, weights_info))
+ switch(NEConvolutionLayer::get_convolution_method(input, weights, biases, output, conv_info, weights_info, dilation))
{
case ConvolutionMethod::WINOGRAD:
//Validate Winograd
@@ -87,7 +88,7 @@ Status NEConvolutionLayer::validate(const ITensorInfo *input, const ITensorInfo
break;
case ConvolutionMethod::GEMM:
//Validate Gemm-based Convolution
- NEGEMMConvolutionLayer::validate(input, weights, biases, output, conv_info, weights_info);
+ NEGEMMConvolutionLayer::validate(input, weights, biases, output, conv_info, weights_info, dilation);
break;
case ConvolutionMethod::DIRECT:
//Validate Gemm-based Convolution
@@ -101,12 +102,12 @@ Status NEConvolutionLayer::validate(const ITensorInfo *input, const ITensorInfo
}
ConvolutionMethod NEConvolutionLayer::get_convolution_method(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info,
- const WeightsInfo &weights_info)
+ const WeightsInfo &weights_info, const Size2D &dilation)
{
ARM_COMPUTE_UNUSED(output);
ARM_COMPUTE_UNUSED(weights_info);
if((input->data_type() == DataType::F32) && (weights->dimension(0) == 3) && (weights->dimension(1) == 3) && (weights->num_dimensions() <= 4) && (conv_info.stride().first == 1)
- && (conv_info.stride().second == 1) && (biases != nullptr))
+ && (conv_info.stride().second == 1) && (biases != nullptr) && (dilation == Size2D(1U, 1U)))
{
return ConvolutionMethod::WINOGRAD;
}
diff --git a/src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp b/src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp
index 3b8b4243e5..d9707d95e0 100644
--- a/src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp
+++ b/src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp
@@ -170,7 +170,7 @@ Status validate_and_initialize_values(const ITensorInfo *input, const ITensorInf
bool &are_weights_reshaped, unsigned int &kernel_width, unsigned int &kernel_height,
bool &is_fully_connected_convolution, bool &is_interleaved, bool &is_quantized,
unsigned int &mat_weights_cols, unsigned int &mat_weights_rows,
- unsigned int &conv_w, unsigned int &conv_h)
+ unsigned int &conv_w, unsigned int &conv_h, const Size2D &dilation)
{
ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QS8, DataType::QASYMM8, DataType::QS16, DataType::F16, DataType::F32);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights);
@@ -205,7 +205,7 @@ Status validate_and_initialize_values(const ITensorInfo *input, const ITensorInf
mat_weights_rows = weights->dimension(0) * weights->dimension(1) * weights->dimension(2) + (append_bias ? 1 : 0);
std::tie(conv_w, conv_h) = scaled_dimensions(input->dimension(0), input->dimension(1), kernel_width, kernel_height,
- conv_info);
+ conv_info, dilation);
// Check if its a "fully connected" convolution
is_fully_connected_convolution = ((conv_w == 1) && (conv_h == 1));
@@ -246,7 +246,8 @@ void NEGEMMConvolutionLayer::configure_mm(const ITensor *input, const ITensor *w
}
}
-void NEGEMMConvolutionLayer::configure(const ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info)
+void NEGEMMConvolutionLayer::configure(const ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info,
+ const Size2D &dilation)
{
// Perform validate step
ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output);
@@ -262,7 +263,7 @@ void NEGEMMConvolutionLayer::configure(const ITensor *input, const ITensor *weig
Status status = validate_and_initialize_values(input->info(), weights->info(), (biases == nullptr) ? nullptr : biases->info(), conv_info, weights_info, dt, _append_bias, _are_weights_reshaped,
kernel_width, kernel_height,
_is_fully_connected_convolution, _is_interleaved, _is_quantized,
- mat_weights_cols, mat_weights_rows, conv_w, conv_h);
+ mat_weights_cols, mat_weights_rows, conv_w, conv_h, dilation);
ARM_COMPUTE_ERROR_THROW_ON(status);
@@ -362,7 +363,7 @@ void NEGEMMConvolutionLayer::configure(const ITensor *input, const ITensor *weig
// Configure kernels
// Configure im2col
- _input_im2col_kernel.configure(input, &_input_im2col_reshaped, Size2D(kernel_width, kernel_height), conv_info, _append_bias);
+ _input_im2col_kernel.configure(input, &_input_im2col_reshaped, Size2D(kernel_width, kernel_height), conv_info, _append_bias, false, false, dilation);
// Configure matrix multiply
if(run_optimised)
@@ -420,7 +421,7 @@ void NEGEMMConvolutionLayer::configure(const ITensor *input, const ITensor *weig
}
Status NEGEMMConvolutionLayer::validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info,
- const WeightsInfo &weights_info)
+ const WeightsInfo &weights_info, const Size2D &dilation)
{
ARM_COMPUTE_UNUSED(output);
@@ -439,7 +440,7 @@ Status NEGEMMConvolutionLayer::validate(const ITensorInfo *input, const ITensorI
Status status = validate_and_initialize_values(input, weights, biases, conv_info, weights_info, dt, append_bias, are_weights_reshaped, kernel_width, kernel_height,
is_fully_connected_convolution, is_interleaved, is_quantized, mat_weights_cols, mat_weights_rows,
- conv_w, conv_h);
+ conv_w, conv_h, dilation);
const Size2D kernel_weights = Size2D(kernel_width, kernel_height);
@@ -517,7 +518,7 @@ Status NEGEMMConvolutionLayer::validate(const ITensorInfo *input, const ITensorI
shape_im2col.set(1, mat_input_rows);
shape_im2col.set(2, 1);
TensorInfo im2_col_info = input->clone()->set_tensor_shape(shape_im2col);
- ARM_COMPUTE_RETURN_ON_ERROR(NEIm2ColKernel::validate(input, &im2_col_info, kernel_weights, conv_info, append_bias, false));
+ ARM_COMPUTE_RETURN_ON_ERROR(NEIm2ColKernel::validate(input, &im2_col_info, kernel_weights, conv_info, append_bias, false, false, dilation));
// Create GEMM output tensor
TensorShape shape_gemm(im2_col_info.tensor_shape());