aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorJaroslaw Rzepecki <jaroslaw.rzepecki@arm.com>2017-10-13 11:13:58 +0100
committerAnthony Barbier <anthony.barbier@arm.com>2018-11-02 16:35:24 +0000
commita1ed41fe2427dfa2b5d0139444ceb77ad16a5a73 (patch)
treea57bc2369afea73c190d9bb595b0a229bf8da749 /src
parentb4276c5b76f6eda22d973bfa48ff9612e7f183e5 (diff)
downloadComputeLibrary-a1ed41fe2427dfa2b5d0139444ceb77ad16a5a73.tar.gz
IVGCVSW-601: support for asymetric padding in cl conv and depthwise conv
Change-Id: I5c6c95091ae77dba96459c0640f9f6167a988c8c Reviewed-on: http://mpd-gerrit.cambridge.arm.com/91700 Reviewed-by: Anthony Barbier <anthony.barbier@arm.com> Tested-by: Kaizen <jeremy.johnson+kaizengerrit@arm.com>
Diffstat (limited to 'src')
-rw-r--r--src/core/CL/cl_kernels/convolution_layer.cl14
-rw-r--r--src/core/CL/cl_kernels/depthwise_convolution.cl12
-rw-r--r--src/core/CL/kernels/CLDepthwiseIm2ColKernel.cpp6
-rw-r--r--src/core/CL/kernels/CLIm2ColKernel.cpp14
-rw-r--r--src/core/NEON/kernels/NEIm2ColKernel.cpp24
-rw-r--r--src/core/Utils.cpp30
-rw-r--r--src/runtime/CL/functions/CLConvolutionLayer.cpp3
-rw-r--r--src/runtime/NEON/functions/NEConvolutionLayer.cpp3
8 files changed, 51 insertions, 55 deletions
diff --git a/src/core/CL/cl_kernels/convolution_layer.cl b/src/core/CL/cl_kernels/convolution_layer.cl
index 9e9d0b0ccc..e3018461e3 100644
--- a/src/core/CL/cl_kernels/convolution_layer.cl
+++ b/src/core/CL/cl_kernels/convolution_layer.cl
@@ -97,7 +97,7 @@ __kernel void reshape_to_columns(
}
}
-#if defined(CONVOLVED_WIDTH) && defined(STRIDE_X) && defined(STRIDE_Y) && defined(PAD_X) && defined(PAD_Y) && defined(KERNEL_WIDTH) && defined(KERNEL_HEIGHT) && defined(KERNEL_DEPTH) && defined(SRC_WIDTH) && defined(SRC_HEIGHT)
+#if defined(CONVOLVED_WIDTH) && defined(STRIDE_X) && defined(STRIDE_Y) && defined(PAD_LEFT) && defined(PAD_TOP) && defined(PAD_RIGHT) && defined(PAD_BOTTOM) && defined(KERNEL_WIDTH) && defined(KERNEL_HEIGHT) && defined(KERNEL_DEPTH) && defined(SRC_WIDTH) && defined(SRC_HEIGHT)
/** This kernel performs a reshaping of the input tensor to a tensor used to perform convolution using GEMM.
*
* @note The data type must be passed at compile time using -DDATA_TYPE: e.g. -DDATA_TYPE=float
@@ -134,8 +134,8 @@ __kernel void im2col_generic(
const int batch = get_global_id(2) / filter_depth; // the batch
// Calculate input indeces
- const int xi = xc * STRIDE_X - PAD_X;
- const int yi = yc * STRIDE_Y - PAD_Y;
+ const int xi = xc * STRIDE_X - PAD_LEFT;
+ const int yi = yc * STRIDE_Y - PAD_TOP;
// Calculate output indeces
const int xo = ch * KERNEL_WIDTH * KERNEL_HEIGHT;
@@ -149,9 +149,9 @@ __kernel void im2col_generic(
{
for(int x = xi, x_e = xi + KERNEL_WIDTH; x < x_e; ++x, ++output_ptr)
{
-#if PAD_X == 0 && PAD_Y == 0
+#if PAD_LEFT == 0 && PAD_TOP == 0 && PAD_RIGHT == 0 && PAD_BOTTOM == 0
*output_ptr = *((__global DATA_TYPE *)(input_ptr + x * src_stride_x + y * src_stride_y));
-#else // PAD_X == 0 && PAD_Y == 0
+#else // PAD_LEFT == 0 && PAD_TOP == 0 && PAD_RIGHT == 0 && PAD_BOTTOM == 0
if(x < 0 || x >= SRC_WIDTH || y < 0 || y >= SRC_HEIGHT)
{
*output_ptr = 0;
@@ -160,7 +160,7 @@ __kernel void im2col_generic(
{
*output_ptr = *((__global DATA_TYPE *)(input_ptr + x * src_stride_x + y * src_stride_y));
}
-#endif // PAD_X == 0 && PAD_Y == 0
+#endif // PAD_LEFT == 0 && PAD_TOP == 0 && PAD_RIGHT == 0 && PAD_BOTTOM == 0
}
}
@@ -245,7 +245,7 @@ __kernel void im2col_kernel3x3_padx0_pady0(
}
#endif // HAS_BIAS
}
-#endif //defined(CONVOLVED_WIDTH) && defined(STRIDE_X) && defined(STRIDE_Y) && defined(PAD_X) && defined(PAD_Y) && defined(KERNEL_WIDTH) && defined(KERNEL_HEIGHT) && defined(KERNEL_DEPTH) && defined(SRC_WIDTH) && defined(SRC_HEIGHT)
+#endif //defined(CONVOLVED_WIDTH) && defined(STRIDE_X) && defined(STRIDE_Y) && defined(PAD_LEFT) && defined(PAD_TOP) && defined(PAD_RIGHT) && defined(PAD_BOTTOM) && defined(KERNEL_WIDTH) && defined(KERNEL_HEIGHT) && defined(KERNEL_DEPTH) && defined(SRC_WIDTH) && defined(SRC_HEIGHT)
#if defined(WIDTH_OUTPUT)
/** This kernel performs a reshaping of the output of the convolution layer.
diff --git a/src/core/CL/cl_kernels/depthwise_convolution.cl b/src/core/CL/cl_kernels/depthwise_convolution.cl
index 9c2c3a5b37..081a4e6c44 100644
--- a/src/core/CL/cl_kernels/depthwise_convolution.cl
+++ b/src/core/CL/cl_kernels/depthwise_convolution.cl
@@ -226,11 +226,11 @@ __kernel void depthwise_weights_reshape(TENSOR3D_DECLARATION(src), IMAGE_DECLARA
}
#endif //defined(SRC_WIDTH) && defined(DATA_TYPE)
-#if defined(STRIDE_X) && defined(STRIDE_Y) && defined(PAD_X) && defined(PAD_Y) && defined(KERNEL_WIDTH) && defined(KERNEL_HEIGHT) && defined(SRC_WIDTH) && defined(SRC_HEIGHT) && defined(DATA_TYPE)
+#if defined(STRIDE_X) && defined(STRIDE_Y) && defined(PAD_LEFT) && defined(PAD_TOP) && defined(PAD_RIGHT) && defined(PAD_BOTTOM) && defined(KERNEL_WIDTH) && defined(KERNEL_HEIGHT) && defined(SRC_WIDTH) && defined(SRC_HEIGHT) && defined(DATA_TYPE)
/** This kernel performs a reshaping of the input tensor to a tensor used to perform depthwise convolution using vector to matrix multiplication.
*
* @note The data type must be passed at compile time using -DDATA_TYPE: e.g. -DDATA_TYPE=float
- * @note The convolution information must be passed at compile time using -DSTRIDE_X, -DSTRIDE_Y, -DPAD_X, -DPAD_Y, -DKERNEL_WIDHT, -DKERNEL_HEIGHT, -DSRC_WIDTH, -DSRC_HEIGHT
+ * @note The convolution information must be passed at compile time using -DSTRIDE_X, -DSTRIDE_Y, -DPAD_LEFT, -DPAD_TOP, -DPAD_RIGHT, -DPAD_BOTTOM, -DKERNEL_WIDHT, -DKERNEL_HEIGHT, -DSRC_WIDTH, -DSRC_HEIGHT
*
* @param[in] src_ptr Pointer to the source tensor. Supported data types: QS8/QS16/F16/F32
* @param[in] src_stride_x Stride of the source tensor in X dimension (in bytes)
@@ -255,11 +255,11 @@ __kernel void depthwise_im2col(TENSOR3D_DECLARATION(src), TENSOR3D_DECLARATION(d
Tensor3D dst = CONVERT_TO_TENSOR3D_STRUCT(dst);
const int src_pixel_linear = get_global_id(1) * STRIDE_X;
- const int full_length = SRC_WIDTH + 2 * PAD_X;
+ const int full_length = SRC_WIDTH + PAD_LEFT + PAD_RIGHT;
const int max_initial_x = STRIDE_X * (((full_length - KERNEL_WIDTH) / STRIDE_X) + 1);
- const int src_x = -PAD_X + src_pixel_linear % max_initial_x;
- const int src_y = -PAD_Y + src_pixel_linear / max_initial_x * STRIDE_Y;
+ const int src_x = -PAD_LEFT + src_pixel_linear % max_initial_x;
+ const int src_y = -PAD_TOP + src_pixel_linear / max_initial_x * STRIDE_Y;
const int src_z = get_global_id(2);
__global uchar *input_ptr = src_ptr + src_offset_first_element_in_bytes + src_z * src_stride_z;
@@ -281,7 +281,7 @@ __kernel void depthwise_im2col(TENSOR3D_DECLARATION(src), TENSOR3D_DECLARATION(d
}
}
-#endif //defined(STRIDE_X) && defined(STRIDE_Y) && defined(PAD_X) && defined(PAD_Y) && defined(KERNEL_WIDTH) && defined(KERNEL_HEIGHT) && defined(SRC_WIDTH) && defined(DATA_TYPE)
+#endif //defined(STRIDE_X) && defined(STRIDE_Y) && defined(PAD_LEFT) && defined(PAD_TOP) && defined(PAD_RIGHT) && defined(PAD_BOTTOM) && defined(KERNEL_WIDTH) && defined(KERNEL_HEIGHT) && defined(SRC_WIDTH) && defined(DATA_TYPE)
#if defined(CONV_WIDTH) && defined(CONV_HEIGHT) && defined(DATA_TYPE)
diff --git a/src/core/CL/kernels/CLDepthwiseIm2ColKernel.cpp b/src/core/CL/kernels/CLDepthwiseIm2ColKernel.cpp
index 0eaadb80c6..5c7fe7e0b4 100644
--- a/src/core/CL/kernels/CLDepthwiseIm2ColKernel.cpp
+++ b/src/core/CL/kernels/CLDepthwiseIm2ColKernel.cpp
@@ -58,8 +58,10 @@ void CLDepthwiseIm2ColKernel::configure(const ICLTensor *input, ICLTensor *outpu
build_opts.emplace("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type()));
build_opts.emplace("-DSTRIDE_X=" + support::cpp11::to_string(conv_info.stride().first));
build_opts.emplace("-DSTRIDE_Y=" + support::cpp11::to_string(conv_info.stride().second));
- build_opts.emplace("-DPAD_X=" + support::cpp11::to_string(conv_info.pad().first));
- build_opts.emplace("-DPAD_Y=" + support::cpp11::to_string(conv_info.pad().second));
+ build_opts.emplace("-DPAD_LEFT=" + support::cpp11::to_string(conv_info.pad_left()));
+ build_opts.emplace("-DPAD_TOP=" + support::cpp11::to_string(conv_info.pad_top()));
+ build_opts.emplace("-DPAD_RIGHT=" + support::cpp11::to_string(conv_info.pad_right()));
+ build_opts.emplace("-DPAD_BOTTOM=" + support::cpp11::to_string(conv_info.pad_bottom()));
build_opts.emplace("-DSRC_WIDTH=" + support::cpp11::to_string(input->info()->dimension(0)));
build_opts.emplace("-DSRC_HEIGHT=" + support::cpp11::to_string(input->info()->dimension(1)));
build_opts.emplace("-DKERNEL_WIDTH=" + support::cpp11::to_string(kernel_dims.width));
diff --git a/src/core/CL/kernels/CLIm2ColKernel.cpp b/src/core/CL/kernels/CLIm2ColKernel.cpp
index 98a799f783..6cc45573d8 100644
--- a/src/core/CL/kernels/CLIm2ColKernel.cpp
+++ b/src/core/CL/kernels/CLIm2ColKernel.cpp
@@ -63,18 +63,16 @@ void CLIm2ColKernel::configure(const ICLTensor *input, ICLTensor *output, const
build_opts.emplace("-DFIXED_POINT_POSITION=" + support::cpp11::to_string(input->info()->fixed_point_position()));
}
- int pad_x = 0;
- int pad_y = 0;
int stride_x = 0;
int stride_y = 0;
- std::tie(pad_x, pad_y) = conv_info.pad();
+
std::tie(stride_x, stride_y) = conv_info.stride();
const bool run_img2col_reduced = (output->info()->dimension(0) == (input->info()->dimension(0) * input->info()->dimension(1) * input->info()->dimension(2))) && (TensorShape::num_max_dimensions >= 4)
&& (std::equal(input->info()->tensor_shape().cbegin() + 3,
input->info()->tensor_shape().cend(),
output->info()->tensor_shape().cbegin() + 1))
- && ((stride_x == 1) && (stride_y == 1) && (pad_x == 0) && (pad_y == 0));
+ && ((stride_x == 1) && (stride_y == 1) && !conv_info.has_padding());
if(!run_img2col_reduced)
{
@@ -90,12 +88,14 @@ void CLIm2ColKernel::configure(const ICLTensor *input, ICLTensor *output, const
build_opts.emplace("-DCONVOLVED_HEIGHT=" + support::cpp11::to_string(_convolved_dims.second));
build_opts.emplace("-DSTRIDE_X=" + support::cpp11::to_string(conv_info.stride().first));
build_opts.emplace("-DSTRIDE_Y=" + support::cpp11::to_string(conv_info.stride().second));
- build_opts.emplace("-DPAD_X=" + support::cpp11::to_string(conv_info.pad().first));
- build_opts.emplace("-DPAD_Y=" + support::cpp11::to_string(conv_info.pad().second));
+ build_opts.emplace("-DPAD_LEFT=" + support::cpp11::to_string(conv_info.pad_left()));
+ build_opts.emplace("-DPAD_TOP=" + support::cpp11::to_string(conv_info.pad_top()));
+ build_opts.emplace("-DPAD_RIGHT=" + support::cpp11::to_string(conv_info.pad_right()));
+ build_opts.emplace("-DPAD_BOTTOM=" + support::cpp11::to_string(conv_info.pad_bottom()));
build_opts.emplace("-DSRC_WIDTH=" + support::cpp11::to_string(input->info()->dimension(0)));
build_opts.emplace("-DSRC_HEIGHT=" + support::cpp11::to_string(input->info()->dimension(1)));
- if(kernel_dims.width == 3 && kernel_dims.height == 3 && conv_info.pad().first == 0 && conv_info.pad().second == 0)
+ if(kernel_dims.width == 3 && kernel_dims.height == 3 && !conv_info.has_padding())
{
_kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel("im2col_kernel3x3_padx0_pady0", build_opts));
}
diff --git a/src/core/NEON/kernels/NEIm2ColKernel.cpp b/src/core/NEON/kernels/NEIm2ColKernel.cpp
index 71910e3a69..1c018b269b 100644
--- a/src/core/NEON/kernels/NEIm2ColKernel.cpp
+++ b/src/core/NEON/kernels/NEIm2ColKernel.cpp
@@ -163,16 +163,17 @@ void NEIm2ColKernel::run_generic(const Window &window)
const int input_stride_y = _input->info()->strides_in_bytes().y();
const int input_stride_z = _input->info()->strides_in_bytes().z();
- int pad_x = 0;
- int pad_y = 0;
+ int pad_left = 0;
+ int pad_top = 0;
int stride_x = 0;
int stride_y = 0;
- std::tie(pad_x, pad_y) = _conv_info.pad();
+ pad_left = _conv_info.pad_left();
+ pad_top = _conv_info.pad_top();
std::tie(stride_x, stride_y) = _conv_info.stride();
// Setup input window
- const int start_x = -pad_x;
- const int start_y = -pad_y;
+ const int start_x = -pad_left;
+ const int start_y = -pad_top;
Window window_in(window);
// The first three dimensions of the input are increased by the inner loops
@@ -291,18 +292,15 @@ void NEIm2ColKernel::configure(const ITensor *input, ITensor *output, const Size
_conv_info);
_has_bias = has_bias;
- unsigned int pad_x = 0;
- unsigned int pad_y = 0;
unsigned int stride_x = 0;
unsigned int stride_y = 0;
- std::tie(pad_x, pad_y) = conv_info.pad();
std::tie(stride_x, stride_y) = conv_info.stride();
bool run_img2col_reduced = (output->info()->dimension(0) == (input->info()->dimension(0) * input->info()->dimension(1) * input->info()->dimension(2))) && (TensorShape::num_max_dimensions >= 4)
&& (std::equal(input->info()->tensor_shape().cbegin() + 3,
input->info()->tensor_shape().cend(),
output->info()->tensor_shape().cbegin() + 1))
- && ((stride_x == 1) && (stride_y == 1) && (pad_x == 0) && (pad_y == 0));
+ && ((stride_x == 1) && (stride_y == 1) && !conv_info.has_padding());
Window window = calculate_max_window(*input->info(), Steps());
@@ -334,18 +332,18 @@ void NEIm2ColKernel::configure(const ITensor *input, ITensor *output, const Size
switch(_input->info()->data_type())
{
case DataType::F32:
- _func = ((pad_x == 0) && (pad_y == 0)) ? &NEIm2ColKernel::run_generic<float, false> : &NEIm2ColKernel::run_generic<float, true>;
+ _func = (!conv_info.has_padding()) ? &NEIm2ColKernel::run_generic<float, false> : &NEIm2ColKernel::run_generic<float, true>;
break;
#ifdef ARM_COMPUTE_ENABLE_FP16
case DataType::F16:
- _func = ((pad_x == 0) && (pad_y == 0)) ? &NEIm2ColKernel::run_generic<float16_t, false> : &NEIm2ColKernel::run_generic<float16_t, true>;
+ _func = (!conv_info.has_padding()) ? &NEIm2ColKernel::run_generic<float16_t, false> : &NEIm2ColKernel::run_generic<float16_t, true>;
break;
#endif /* ARM_COMPUTE_ENABLE_FP16 */
case DataType::QS8:
- _func = ((pad_x == 0) && (pad_y == 0)) ? &NEIm2ColKernel::run_generic<qint8_t, false> : &NEIm2ColKernel::run_generic<qint8_t, true>;
+ _func = (!conv_info.has_padding()) ? &NEIm2ColKernel::run_generic<qint8_t, false> : &NEIm2ColKernel::run_generic<qint8_t, true>;
break;
case DataType::QS16:
- _func = ((pad_x == 0) && (pad_y == 0)) ? &NEIm2ColKernel::run_generic<qint16_t, false> : &NEIm2ColKernel::run_generic<qint16_t, true>;
+ _func = (!conv_info.has_padding()) ? &NEIm2ColKernel::run_generic<qint16_t, false> : &NEIm2ColKernel::run_generic<qint16_t, true>;
break;
default:
ARM_COMPUTE_ERROR("Data type not supported");
diff --git a/src/core/Utils.cpp b/src/core/Utils.cpp
index d5ce1ea027..0a35e07430 100644
--- a/src/core/Utils.cpp
+++ b/src/core/Utils.cpp
@@ -288,37 +288,39 @@ const std::pair<unsigned int, unsigned int> arm_compute::scaled_dimensions(unsig
unsigned int kernel_width, unsigned int kernel_height,
const PadStrideInfo &pad_stride_info)
{
- const unsigned int pad_x = pad_stride_info.pad().first;
- const unsigned int pad_y = pad_stride_info.pad().second;
- const unsigned int stride_x = pad_stride_info.stride().first;
- const unsigned int stride_y = pad_stride_info.stride().second;
- unsigned int w = 0;
- unsigned int h = 0;
+ const unsigned int pad_left = pad_stride_info.pad_left();
+ const unsigned int pad_top = pad_stride_info.pad_top();
+ const unsigned int pad_right = pad_stride_info.pad_right();
+ const unsigned int pad_bottom = pad_stride_info.pad_bottom();
+ const unsigned int stride_x = pad_stride_info.stride().first;
+ const unsigned int stride_y = pad_stride_info.stride().second;
+ unsigned int w = 0;
+ unsigned int h = 0;
switch(pad_stride_info.round())
{
case DimensionRoundingType::FLOOR:
- w = static_cast<unsigned int>(std::floor((static_cast<float>(width + 2 * pad_x - kernel_width) / stride_x) + 1));
- h = static_cast<unsigned int>(std::floor((static_cast<float>(height + 2 * pad_y - kernel_height) / stride_y) + 1));
+ w = static_cast<unsigned int>(std::floor((static_cast<float>(width + pad_left + pad_right - kernel_width) / stride_x) + 1));
+ h = static_cast<unsigned int>(std::floor((static_cast<float>(height + pad_top + pad_bottom - kernel_height) / stride_y) + 1));
break;
case DimensionRoundingType::CEIL:
- w = static_cast<unsigned int>(std::ceil((static_cast<float>(width + 2 * pad_x - kernel_width) / stride_x) + 1));
- h = static_cast<unsigned int>(std::ceil((static_cast<float>(height + 2 * pad_y - kernel_height) / stride_y) + 1));
+ w = static_cast<unsigned int>(std::ceil((static_cast<float>(width + pad_left + pad_right - kernel_width) / stride_x) + 1));
+ h = static_cast<unsigned int>(std::ceil((static_cast<float>(height + pad_top + pad_bottom - kernel_height) / stride_y) + 1));
break;
default:
ARM_COMPUTE_ERROR("Unsupported rounding type");
}
// Make sure that border operations will start from inside the input and not the padded area
- if(((w - 1) * stride_x) >= (width + pad_x))
+ if(((w - 1) * stride_x) >= (width + pad_left))
{
--w;
}
- if(((h - 1) * stride_y) >= (height + pad_y))
+ if(((h - 1) * stride_y) >= (height + pad_top))
{
--h;
}
- ARM_COMPUTE_ERROR_ON(((w - 1) * stride_x) >= (width + pad_x));
- ARM_COMPUTE_ERROR_ON(((h - 1) * stride_y) >= (height + pad_y));
+ ARM_COMPUTE_ERROR_ON(((w - 1) * stride_x) >= (width + pad_left));
+ ARM_COMPUTE_ERROR_ON(((h - 1) * stride_y) >= (height + pad_top));
return std::make_pair(w, h);
}
diff --git a/src/runtime/CL/functions/CLConvolutionLayer.cpp b/src/runtime/CL/functions/CLConvolutionLayer.cpp
index 4b1bfd8b8f..a3be6f4144 100644
--- a/src/runtime/CL/functions/CLConvolutionLayer.cpp
+++ b/src/runtime/CL/functions/CLConvolutionLayer.cpp
@@ -128,10 +128,7 @@ void CLConvolutionLayer::configure(const ICLTensor *input, const ICLTensor *weig
// Get parameters from conv_info
unsigned int stride_x = 0;
unsigned int stride_y = 0;
- unsigned int pad_x = 0;
- unsigned int pad_y = 0;
std::tie(stride_x, stride_y) = conv_info.stride();
- std::tie(pad_x, pad_y) = conv_info.pad();
// Get convolved dimensions
unsigned int conv_w = 0;
diff --git a/src/runtime/NEON/functions/NEConvolutionLayer.cpp b/src/runtime/NEON/functions/NEConvolutionLayer.cpp
index f34f497436..155f4e561a 100644
--- a/src/runtime/NEON/functions/NEConvolutionLayer.cpp
+++ b/src/runtime/NEON/functions/NEConvolutionLayer.cpp
@@ -136,10 +136,7 @@ void NEConvolutionLayer::configure(const ITensor *input, const ITensor *weights,
// Get parameters from conv_info
unsigned int stride_x = 0;
unsigned int stride_y = 0;
- unsigned int pad_x = 0;
- unsigned int pad_y = 0;
std::tie(stride_x, stride_y) = conv_info.stride();
- std::tie(pad_x, pad_y) = conv_info.pad();
// Get convolved dimensions
unsigned int conv_w = 0;