aboutsummaryrefslogtreecommitdiff
path: root/src/core/CL/kernels/CLIm2ColKernel.cpp
diff options
context:
space:
mode:
authorAlex Gilday <alexander.gilday@arm.com>2018-03-23 14:16:00 +0000
committerAnthony Barbier <anthony.barbier@arm.com>2018-11-02 16:49:16 +0000
commit7da29b6b12ff319ed2b6e2c46588dfa1991556fb (patch)
tree24e766d916ae8da32deb5cd4fac4d82207cbe6ea /src/core/CL/kernels/CLIm2ColKernel.cpp
parentf92cb23f06572fe73ec5ab9da0ec5713724c2dde (diff)
downloadComputeLibrary-7da29b6b12ff319ed2b6e2c46588dfa1991556fb.tar.gz
COMPMID-1017: Implement dilated convolution in NEON, OpenCL, and GC
Change-Id: If4626ec9e215e14dffe22e80812da5bac84a52e2 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/125734 Reviewed-by: Anthony Barbier <anthony.barbier@arm.com> Tested-by: Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/core/CL/kernels/CLIm2ColKernel.cpp')
-rw-r--r--src/core/CL/kernels/CLIm2ColKernel.cpp136
1 files changed, 71 insertions, 65 deletions
diff --git a/src/core/CL/kernels/CLIm2ColKernel.cpp b/src/core/CL/kernels/CLIm2ColKernel.cpp
index 9bc4787384..cc19d3c263 100644
--- a/src/core/CL/kernels/CLIm2ColKernel.cpp
+++ b/src/core/CL/kernels/CLIm2ColKernel.cpp
@@ -41,11 +41,12 @@ using namespace arm_compute;
namespace
{
-Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, bool has_bias)
+Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, bool has_bias, const Size2D &dilation)
{
ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QS8, DataType::QASYMM8, DataType::QS16, DataType::F16, DataType::F32);
ARM_COMPUTE_RETURN_ERROR_ON(input->data_type() == DataType::QASYMM8 && has_bias);
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(output);
+ ARM_COMPUTE_RETURN_ERROR_ON((dilation.x() < 1) || (dilation.y() < 1));
// Checks performed when output is configured
if(output->total_size() != 0)
@@ -63,12 +64,12 @@ CLIm2ColKernel::CLIm2ColKernel()
{
}
-void CLIm2ColKernel::configure(const ICLTensor *input, ICLTensor *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias)
+void CLIm2ColKernel::configure(const ICLTensor *input, ICLTensor *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias, const Size2D &dilation)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
// Perform validation step
- ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info(), has_bias));
+ ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info(), has_bias, dilation));
_input = input;
_output = output;
@@ -107,7 +108,7 @@ void CLIm2ColKernel::configure(const ICLTensor *input, ICLTensor *output, const
_convolved_dims = scaled_dimensions(input->info()->dimension(0), input->info()->dimension(1),
kernel_dims.width, kernel_dims.height,
- conv_info);
+ conv_info, dilation);
build_opts.add_option("-DKERNEL_WIDTH=" + support::cpp11::to_string(kernel_dims.width));
build_opts.add_option("-DKERNEL_HEIGHT=" + support::cpp11::to_string(kernel_dims.height));
@@ -122,77 +123,82 @@ void CLIm2ColKernel::configure(const ICLTensor *input, ICLTensor *output, const
build_opts.add_option("-DPAD_BOTTOM=" + support::cpp11::to_string(conv_info.pad_bottom()));
build_opts.add_option("-DSRC_WIDTH=" + support::cpp11::to_string(input->info()->dimension(0)));
build_opts.add_option("-DSRC_HEIGHT=" + support::cpp11::to_string(input->info()->dimension(1)));
+ build_opts.add_option("-DDILATION_X=" + support::cpp11::to_string(dilation.x()));
+ build_opts.add_option("-DDILATION_Y=" + support::cpp11::to_string(dilation.y()));
build_opts.add_option_if_else(is_data_type_quantized(data_type), "-DPAD_VALUE=" + support::cpp11::to_string(input->info()->quantization_info().offset), "-DPAD_VALUE=0");
const bool squared_im2col = kernel_dims.width == kernel_dims.height;
- if(squared_im2col && !is_data_type_fixed_point(data_type))
+ if(dilation == Size2D(1U, 1U))
{
- // Check if we can run an optimized im2col
- switch(kernel_dims.width)
+ if(squared_im2col && !is_data_type_fixed_point(data_type))
{
- case 1:
- // Optimized im2col1x1 if stride_x = 1 and conv_info.has_padding() = false
- if(conv_info.stride().first == 1 && !conv_info.has_padding())
- {
- // Set hint for LWS
+ // Check if we can run an optimized im2col
+ switch(kernel_dims.width)
+ {
+ case 1:
+ // Optimized im2col1x1 if stride_x = 1 and conv_info.has_padding() = false
+ if(conv_info.stride().first == 1 && !conv_info.has_padding())
+ {
+ // Set hint for LWS
+ _lws_hint = cl::NDRange(1, 1, 8);
+ _num_elems_processed_per_iteration = 4;
+ is_optimized_path = true;
+ kernel_name = "im2col1x1_stridex1_dchw";
+ }
+ break;
+ case 3:
_lws_hint = cl::NDRange(1, 1, 8);
- _num_elems_processed_per_iteration = 4;
+ _num_elems_processed_per_iteration = 1;
is_optimized_path = true;
- kernel_name = "im2col1x1_stridex1_dchw";
- }
- break;
- case 3:
- _lws_hint = cl::NDRange(1, 1, 8);
- _num_elems_processed_per_iteration = 1;
- is_optimized_path = true;
- kernel_name = "im2col3x3_dchw";
- break;
- case 5:
- _num_elems_processed_per_iteration = 1;
- is_optimized_path = true;
- kernel_name = "im2col5x5_dchw";
- break;
- case 11:
- // Optimized im2col11x11 if pad_x = pad_y = 0
- if(!conv_info.has_padding())
- {
+ kernel_name = "im2col3x3_dchw";
+ break;
+ case 5:
_num_elems_processed_per_iteration = 1;
is_optimized_path = true;
- kernel_name = "im2col11x11_padx0_pady0_dchw";
- }
- break;
- default:
- is_optimized_path = false;
- break;
- }
- }
- else if(kernel_dims.width > 1 && !conv_info.has_padding())
- {
- _num_elems_processed_per_iteration = 1;
- kernel_name = "im2col_generic_padx0_pady0_dchw";
-
- // Optimized im2col is performed using one or more vector operations with the specified vector size
- // and a remainder. For example, for 5x5 convolutions, im2col is performed using vectors of size 4
- // and scalars; for 7x7 convolutions, using vectors of size 4 and vectors of size 3.
- // Using the vector size of 4 is always safe since OpenCL supports vectors of size 2 and 3.
- // Using the vector size of 8, however, may be faster.
- size_t vector_size = 4;
- // For 2x2 convolutions, use vectors of size 2. (For 3x3 convolutions, im2col_kernel3x3_padx0_pady0
- // is used instead.)
- if(kernel_dims.width < vector_size)
- {
- vector_size = kernel_dims.width;
+ kernel_name = "im2col5x5_dchw";
+ break;
+ case 11:
+ // Optimized im2col11x11 if pad_x = pad_y = 0
+ if(!conv_info.has_padding())
+ {
+ _num_elems_processed_per_iteration = 1;
+ is_optimized_path = true;
+ kernel_name = "im2col11x11_padx0_pady0_dchw";
+ }
+ break;
+ default:
+ is_optimized_path = false;
+ break;
+ }
}
- // Local work size and vector size optimized for the 11x11 AlexNet convolution on Bifrost.
- if(gpu_target_is_in(gpu_target, GPUTarget::G71, GPUTarget::G72) && kernel_dims.width == 11)
+ else if(kernel_dims.width > 1 && !conv_info.has_padding())
{
- _lws_hint = cl::NDRange(1, 1, 1);
- vector_size = 8;
+ _num_elems_processed_per_iteration = 1;
+ kernel_name = "im2col_generic_padx0_pady0_dchw";
+
+ // Optimized im2col is performed using one or more vector operations with the specified vector size
+ // and a remainder. For example, for 5x5 convolutions, im2col is performed using vectors of size 4
+ // and scalars; for 7x7 convolutions, using vectors of size 4 and vectors of size 3.
+ // Using the vector size of 4 is always safe since OpenCL supports vectors of size 2 and 3.
+ // Using the vector size of 8, however, may be faster.
+ size_t vector_size = 4;
+ // For 2x2 convolutions, use vectors of size 2. (For 3x3 convolutions, im2col_kernel3x3_padx0_pady0
+ // is used instead.)
+ if(kernel_dims.width < vector_size)
+ {
+ vector_size = kernel_dims.width;
+ }
+ // Local work size and vector size optimized for the 11x11 AlexNet convolution on Bifrost.
+ if(gpu_target_is_in(gpu_target, GPUTarget::G71, GPUTarget::G72) && kernel_dims.width == 11)
+ {
+ _lws_hint = cl::NDRange(1, 1, 1);
+ vector_size = 8;
+ }
+ const size_t width_mod_vector_size = kernel_dims.width % vector_size;
+ build_opts.add_option("-DVECTOR_SIZE=" + support::cpp11::to_string(vector_size));
+ build_opts.add_option("-DWIDTH_MOD_VECTOR_SIZE=" + support::cpp11::to_string(width_mod_vector_size));
}
- const size_t width_mod_vector_size = kernel_dims.width % vector_size;
- build_opts.add_option("-DVECTOR_SIZE=" + support::cpp11::to_string(vector_size));
- build_opts.add_option("-DWIDTH_MOD_VECTOR_SIZE=" + support::cpp11::to_string(width_mod_vector_size));
}
_run_func = &CLIm2ColKernel::run_generic;
}
@@ -206,7 +212,7 @@ void CLIm2ColKernel::configure(const ICLTensor *input, ICLTensor *output, const
// Create kernel
_kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel(kernel_name, build_opts.options()));
- // Configure kernel window
+ // Configure kernel window
Window win;
if(is_optimized_path)
{
@@ -250,12 +256,12 @@ void CLIm2ColKernel::configure(const ICLTensor *input, ICLTensor *output, const
_config_id += support::cpp11::to_string(output->info()->dimension(1));
}
-Status CLIm2ColKernel::validate(const ITensorInfo *input, const ITensorInfo *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias)
+Status CLIm2ColKernel::validate(const ITensorInfo *input, const ITensorInfo *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias, const Size2D &dilation)
{
ARM_COMPUTE_UNUSED(kernel_dims);
ARM_COMPUTE_UNUSED(conv_info);
ARM_COMPUTE_UNUSED(has_bias);
- ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output, has_bias));
+ ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output, has_bias, dilation));
return Status{};
}