From cc6129c06af98616a0e4d68475cfa3d92aaf63b3 Mon Sep 17 00:00:00 2001 From: Isabella Gottardi Date: Fri, 14 Dec 2018 11:40:40 +0000 Subject: COMPMID-1812: CLSpaceToBatch paddings not calculated correctly Change-Id: I63fed6799c4ed2848ff80cd7458124692a52bb98 Reviewed-on: https://review.mlplatform.org/400 Tested-by: Arm Jenkins Reviewed-by: Michele Di Giorgio Reviewed-by: Michalis Spyrou --- src/core/CL/cl_kernels/space_to_batch.cl | 60 ++++++++++++++--------- src/core/CL/kernels/CLSpaceToBatchLayerKernel.cpp | 16 +++++- 2 files changed, 50 insertions(+), 26 deletions(-) (limited to 'src/core') diff --git a/src/core/CL/cl_kernels/space_to_batch.cl b/src/core/CL/cl_kernels/space_to_batch.cl index d42a79d3ff..79343d49c7 100644 --- a/src/core/CL/cl_kernels/space_to_batch.cl +++ b/src/core/CL/cl_kernels/space_to_batch.cl @@ -23,7 +23,7 @@ */ #include "helpers.h" -#if defined(BATCH_SIZE) && defined(DATA_TYPE) +#if defined(BATCH_SIZE) && defined(DATA_TYPE) && defined(WIDTH_IN) && defined(HEIGHT_IN) /** Calculate the space to batch conversion. * * @note Datatype should be given as a preprocessor argument using -DDATA_TYPE=type. e.g. -DDATA_TYPE=float @@ -83,12 +83,15 @@ __kernel void space_to_batch_nchw( const int out_y = get_global_id(1); const int z = get_global_id(2); - if((out_x >= pad_left_x && out_x < WIDTH_OUT - pad_right_x) && (out_y >= pad_left_y && out_y < HEIGHT_OUT - pad_right_y)) + const int pos_x = out_x * block_x + ((batch_id / BATCH_IN) % block_x); + const int pos_y = out_y * block_y + ((batch_id / BATCH_IN) / block_x); + + if(((pos_y >= pad_left_y) && (pos_y < pad_left_y + HEIGHT_IN) && (pos_x >= pad_left_x) && (pos_x < pad_left_x + WIDTH_IN))) { - const int r = (BATCH_SIZE / (block_x * block_y)); - const int w = batch_id % r; - const int in_x = (out_x - pad_left_x) * block_x + (batch_id / r) % block_x; - const int in_y = (out_y - pad_left_y) * block_y + (batch_id / r) / block_x; + const int w = batch_id % BATCH_IN; + const int in_x = pos_x - pad_left_x; + const int in_y = pos_y - pad_left_y; + *((__global DATA_TYPE *)out.ptr) = *((__global DATA_TYPE *)tensor4D_offset(&in, in_x, in_y, z, w)); } } @@ -151,18 +154,21 @@ __kernel void space_to_batch_nhwc( const int out_y = get_global_id(2); const int z = get_global_id(0); - if((out_x >= pad_left_x && out_x < WIDTH_OUT - pad_right_x) && (out_y >= pad_left_y && out_y < HEIGHT_OUT - pad_right_y)) + const int pos_x = out_x * block_x + ((batch_id / BATCH_IN) % block_x); + const int pos_y = out_y * block_y + ((batch_id / BATCH_IN) / block_x); + + if(((pos_y >= pad_left_y) && (pos_y < pad_left_y + HEIGHT_IN) && (pos_x >= pad_left_x) && (pos_x < pad_left_x + WIDTH_IN))) { - const int r = (BATCH_SIZE / (block_x * block_y)); - const int w = batch_id % r; - const int in_x = (out_x - pad_left_x) * block_x + (batch_id / r) % block_x; - const int in_y = (out_y - pad_left_y) * block_y + (batch_id / r) / block_x; + const int w = batch_id % BATCH_IN; + const int in_x = pos_x - pad_left_x; + const int in_y = pos_y - pad_left_y; + *((__global DATA_TYPE *)out.ptr) = *((__global DATA_TYPE *)tensor4D_offset(&in, z, in_x, in_y, w)); } } -#endif // defined(BATCH_SIZE) && defined(DATA_TYPE) +#endif // defined(BATCH_SIZE) && defined(DATA_TYPE) && defined(WIDTH_IN) && defined(HEIGHT_IN) -#if defined(BATCH_SIZE) && defined(DATA_TYPE) && defined(BLOCK_SHAPE_X) && defined(BLOCK_SHAPE_Y) && defined(PAD_LEFT_X) && defined(PAD_RIGHT_X) && defined(PAD_LEFT_Y) && defined(PAD_RIGHT_Y) +#if defined(BATCH_SIZE) && defined(DATA_TYPE) && defined(BLOCK_SHAPE_X) && defined(BLOCK_SHAPE_Y) && defined(PAD_LEFT_X) && defined(PAD_RIGHT_X) && defined(PAD_LEFT_Y) && defined(PAD_RIGHT_Y) && defined(WIDTH_IN) && defined(HEIGHT_IN) /** Calculate the space to batch conversion. * * @note Datatype should be given as a preprocessor argument using -DDATA_TYPE=type. e.g. -DDATA_TYPE=float @@ -207,12 +213,15 @@ __kernel void space_to_batch_static_nchw( const int out_y = get_global_id(1); const int z = get_global_id(2); - if((out_x >= PAD_LEFT_X && out_x < WIDTH_OUT - PAD_RIGHT_X) && (out_y >= PAD_LEFT_Y && out_y < HEIGHT_OUT - PAD_RIGHT_Y)) + const int pos_x = out_x * block_x + ((batch_id / BATCH_IN) % block_x); + const int pos_y = out_y * block_y + ((batch_id / BATCH_IN) / block_x); + + if(pos_y >= PAD_LEFT_Y && pos_y < PAD_LEFT_Y + HEIGHT_IN && pos_x >= PAD_LEFT_X && pos_x < PAD_LEFT_X + WIDTH_IN) { - const int r = (BATCH_SIZE / (block_x * block_y)); - const int w = batch_id % r; - const int in_x = (out_x - PAD_LEFT_X) * block_x + (batch_id / r) % block_x; - const int in_y = (out_y - PAD_LEFT_Y) * block_y + (batch_id / r) / block_x; + const int w = batch_id % BATCH_IN; + const int in_x = pos_x - PAD_LEFT_X; + const int in_y = pos_y - PAD_LEFT_Y; + *((__global DATA_TYPE *)out.ptr) = *((__global DATA_TYPE *)tensor4D_offset(&in, in_x, in_y, z, w)); } } @@ -260,13 +269,16 @@ __kernel void space_to_batch_static_nhwc( const int out_y = get_global_id(2); const int z = get_global_id(0); - if((out_x >= PAD_LEFT_X && out_x < WIDTH_OUT - PAD_RIGHT_X) && (out_y >= PAD_LEFT_Y && out_y < HEIGHT_OUT - PAD_RIGHT_Y)) + const int pos_x = out_x * block_x + ((batch_id / BATCH_IN) % block_x); + const int pos_y = out_y * block_y + ((batch_id / BATCH_IN) / block_x); + + if(pos_y >= PAD_LEFT_Y && pos_y < PAD_LEFT_Y + HEIGHT_IN && pos_x >= PAD_LEFT_X && pos_x < PAD_LEFT_X + WIDTH_IN) { - const int r = (BATCH_SIZE / (block_x * block_y)); - const int w = batch_id % r; - const int in_x = (out_x - PAD_LEFT_X) * block_x + (batch_id / r) % block_x; - const int in_y = (out_y - PAD_LEFT_Y) * block_y + (batch_id / r) / block_x; + const int w = batch_id % BATCH_IN; + const int in_x = pos_x - PAD_LEFT_X; + const int in_y = pos_y - PAD_LEFT_Y; + *((__global DATA_TYPE *)out.ptr) = *((__global DATA_TYPE *)tensor4D_offset(&in, z, in_x, in_y, w)); } } -#endif // defined(BATCH_SIZE) && defined(DATA_TYPE) && defined(BLOCK_SHAPE_X) && defined(BLOCK_SHAPE_Y) && defined(PAD_LEFT_X) && defined(PAD_RIGHT_X) && defined(PAD_LEFT_Y) && defined(PAD_RIGHT_Y) +#endif // defined(BATCH_SIZE) && defined(DATA_TYPE) && defined(BLOCK_SHAPE_X) && defined(BLOCK_SHAPE_Y) && defined(PAD_LEFT_X) && defined(PAD_RIGHT_X) && defined(PAD_LEFT_Y) && defined(PAD_RIGHT_Y) && defined(WIDTH_IN) && defined(HEIGHT_IN) diff --git a/src/core/CL/kernels/CLSpaceToBatchLayerKernel.cpp b/src/core/CL/kernels/CLSpaceToBatchLayerKernel.cpp index d488631ae9..f0391989a7 100644 --- a/src/core/CL/kernels/CLSpaceToBatchLayerKernel.cpp +++ b/src/core/CL/kernels/CLSpaceToBatchLayerKernel.cpp @@ -39,10 +39,16 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *block_inf ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, block_info, padddings, output); ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(block_info, 1, DataType::S32); ARM_COMPUTE_RETURN_ERROR_ON(input->num_dimensions() > 4); + ARM_COMPUTE_RETURN_ERROR_ON(block_info->num_dimensions() > 1); + ARM_COMPUTE_RETURN_ERROR_ON(padddings->num_dimensions() > 2); + ARM_COMPUTE_RETURN_ERROR_ON(padddings->tensor_shape()[1] != block_info->tensor_shape()[0]); // Validate output if initialized if(output->total_size() != 0) { + const DataLayout data_layout = input->data_layout(); + const int idx_channel = get_data_layout_dimension_index(data_layout, DataLayoutDimension::CHANNEL); + ARM_COMPUTE_RETURN_ERROR_ON(input->tensor_shape()[idx_channel] != output->tensor_shape()[idx_channel]); ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); } @@ -64,8 +70,8 @@ Status validate_arguments_static(const ITensorInfo *input, const int block_shape const int idx_channel = get_data_layout_dimension_index(data_layout, DataLayoutDimension::CHANNEL); const int idx_batch = get_data_layout_dimension_index(data_layout, DataLayoutDimension::BATCHES); ARM_COMPUTE_RETURN_ERROR_ON(output->tensor_shape()[idx_width] < padding_left.x() + padding_right.y()); - ARM_COMPUTE_RETURN_ERROR_ON(input->tensor_shape()[idx_width] / block_shape_x != (output->tensor_shape()[idx_width] - padding_left.x() - padding_right.y())); - ARM_COMPUTE_RETURN_ERROR_ON(input->tensor_shape()[idx_height] / block_shape_y != (output->tensor_shape()[idx_height] - padding_left.x() - padding_right.y())); + ARM_COMPUTE_RETURN_ERROR_ON((input->tensor_shape()[idx_width] + padding_left.x() + padding_right.x()) % block_shape_x != 0); + ARM_COMPUTE_RETURN_ERROR_ON((input->tensor_shape()[idx_height] + padding_left.y() + padding_right.y()) % block_shape_y != 0); ARM_COMPUTE_RETURN_ERROR_ON(input->tensor_shape()[idx_channel] != output->tensor_shape()[idx_channel]); ARM_COMPUTE_RETURN_ERROR_ON(output->tensor_shape()[idx_batch] % (block_shape_x * block_shape_y) != 0); ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); @@ -101,6 +107,9 @@ void CLSpaceToBatchLayerKernel::configure(const ICLTensor *input, const ICLTenso build_opts.add_option("-DWIDTH_OUT=" + support::cpp11::to_string(output->info()->dimension(idx_width))); build_opts.add_option("-DHEIGHT_OUT=" + support::cpp11::to_string(output->info()->dimension(idx_height))); build_opts.add_option("-DBATCH_SIZE=" + support::cpp11::to_string(output->info()->dimension(idx_batch))); + build_opts.add_option("-DWIDTH_IN=" + support::cpp11::to_string(input->info()->dimension(idx_width))); + build_opts.add_option("-DHEIGHT_IN=" + support::cpp11::to_string(input->info()->dimension(idx_height))); + build_opts.add_option("-DBATCH_IN=" + support::cpp11::to_string(input->info()->dimension(idx_batch))); _kernel = static_cast(CLKernelLibrary::get().create_kernel("space_to_batch_" + lower_string(string_from_data_layout(input->info()->data_layout())), build_opts.options())); // Configure kernel window @@ -132,6 +141,9 @@ void CLSpaceToBatchLayerKernel::configure(const ICLTensor *input, const int bloc build_opts.add_option("-DWIDTH_OUT=" + support::cpp11::to_string(output->info()->dimension(idx_width))); build_opts.add_option("-DHEIGHT_OUT=" + support::cpp11::to_string(output->info()->dimension(idx_height))); build_opts.add_option("-DBATCH_SIZE=" + support::cpp11::to_string(output->info()->dimension(idx_batch))); + build_opts.add_option("-DWIDTH_IN=" + support::cpp11::to_string(input->info()->dimension(idx_width))); + build_opts.add_option("-DHEIGHT_IN=" + support::cpp11::to_string(input->info()->dimension(idx_height))); + build_opts.add_option("-DBATCH_IN=" + support::cpp11::to_string(input->info()->dimension(idx_batch))); build_opts.add_option("-DBLOCK_SHAPE_X=" + support::cpp11::to_string(block_shape_x)); build_opts.add_option("-DBLOCK_SHAPE_Y=" + support::cpp11::to_string(block_shape_y)); build_opts.add_option("-DPAD_LEFT_X=" + support::cpp11::to_string(padding_left.x())); -- cgit v1.2.1