aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorIsabella Gottardi <isabella.gottardi@arm.com>2018-12-14 11:40:40 +0000
committerIsabella Gottardi <isabella.gottardi@arm.com>2018-12-17 14:02:06 +0000
commitcc6129c06af98616a0e4d68475cfa3d92aaf63b3 (patch)
treeb44e5804492d45544a0f4034dfac35ddb86c528f /src
parent49b1015f3505b920f9e4495017c99f91eed68965 (diff)
downloadComputeLibrary-cc6129c06af98616a0e4d68475cfa3d92aaf63b3.tar.gz
COMPMID-1812: CLSpaceToBatch paddings not calculated correctly
Change-Id: I63fed6799c4ed2848ff80cd7458124692a52bb98 Reviewed-on: https://review.mlplatform.org/400 Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Michele Di Giorgio <michele.digiorgio@arm.com> Reviewed-by: Michalis Spyrou <michalis.spyrou@arm.com>
Diffstat (limited to 'src')
-rw-r--r--src/core/CL/cl_kernels/space_to_batch.cl60
-rw-r--r--src/core/CL/kernels/CLSpaceToBatchLayerKernel.cpp16
-rw-r--r--src/runtime/CL/functions/CLSpaceToBatchLayer.cpp34
3 files changed, 63 insertions, 47 deletions
diff --git a/src/core/CL/cl_kernels/space_to_batch.cl b/src/core/CL/cl_kernels/space_to_batch.cl
index d42a79d3ff..79343d49c7 100644
--- a/src/core/CL/cl_kernels/space_to_batch.cl
+++ b/src/core/CL/cl_kernels/space_to_batch.cl
@@ -23,7 +23,7 @@
*/
#include "helpers.h"
-#if defined(BATCH_SIZE) && defined(DATA_TYPE)
+#if defined(BATCH_SIZE) && defined(DATA_TYPE) && defined(WIDTH_IN) && defined(HEIGHT_IN)
/** Calculate the space to batch conversion.
*
* @note Datatype should be given as a preprocessor argument using -DDATA_TYPE=type. e.g. -DDATA_TYPE=float
@@ -83,12 +83,15 @@ __kernel void space_to_batch_nchw(
const int out_y = get_global_id(1);
const int z = get_global_id(2);
- if((out_x >= pad_left_x && out_x < WIDTH_OUT - pad_right_x) && (out_y >= pad_left_y && out_y < HEIGHT_OUT - pad_right_y))
+ const int pos_x = out_x * block_x + ((batch_id / BATCH_IN) % block_x);
+ const int pos_y = out_y * block_y + ((batch_id / BATCH_IN) / block_x);
+
+ if(((pos_y >= pad_left_y) && (pos_y < pad_left_y + HEIGHT_IN) && (pos_x >= pad_left_x) && (pos_x < pad_left_x + WIDTH_IN)))
{
- const int r = (BATCH_SIZE / (block_x * block_y));
- const int w = batch_id % r;
- const int in_x = (out_x - pad_left_x) * block_x + (batch_id / r) % block_x;
- const int in_y = (out_y - pad_left_y) * block_y + (batch_id / r) / block_x;
+ const int w = batch_id % BATCH_IN;
+ const int in_x = pos_x - pad_left_x;
+ const int in_y = pos_y - pad_left_y;
+
*((__global DATA_TYPE *)out.ptr) = *((__global DATA_TYPE *)tensor4D_offset(&in, in_x, in_y, z, w));
}
}
@@ -151,18 +154,21 @@ __kernel void space_to_batch_nhwc(
const int out_y = get_global_id(2);
const int z = get_global_id(0);
- if((out_x >= pad_left_x && out_x < WIDTH_OUT - pad_right_x) && (out_y >= pad_left_y && out_y < HEIGHT_OUT - pad_right_y))
+ const int pos_x = out_x * block_x + ((batch_id / BATCH_IN) % block_x);
+ const int pos_y = out_y * block_y + ((batch_id / BATCH_IN) / block_x);
+
+ if(((pos_y >= pad_left_y) && (pos_y < pad_left_y + HEIGHT_IN) && (pos_x >= pad_left_x) && (pos_x < pad_left_x + WIDTH_IN)))
{
- const int r = (BATCH_SIZE / (block_x * block_y));
- const int w = batch_id % r;
- const int in_x = (out_x - pad_left_x) * block_x + (batch_id / r) % block_x;
- const int in_y = (out_y - pad_left_y) * block_y + (batch_id / r) / block_x;
+ const int w = batch_id % BATCH_IN;
+ const int in_x = pos_x - pad_left_x;
+ const int in_y = pos_y - pad_left_y;
+
*((__global DATA_TYPE *)out.ptr) = *((__global DATA_TYPE *)tensor4D_offset(&in, z, in_x, in_y, w));
}
}
-#endif // defined(BATCH_SIZE) && defined(DATA_TYPE)
+#endif // defined(BATCH_SIZE) && defined(DATA_TYPE) && defined(WIDTH_IN) && defined(HEIGHT_IN)
-#if defined(BATCH_SIZE) && defined(DATA_TYPE) && defined(BLOCK_SHAPE_X) && defined(BLOCK_SHAPE_Y) && defined(PAD_LEFT_X) && defined(PAD_RIGHT_X) && defined(PAD_LEFT_Y) && defined(PAD_RIGHT_Y)
+#if defined(BATCH_SIZE) && defined(DATA_TYPE) && defined(BLOCK_SHAPE_X) && defined(BLOCK_SHAPE_Y) && defined(PAD_LEFT_X) && defined(PAD_RIGHT_X) && defined(PAD_LEFT_Y) && defined(PAD_RIGHT_Y) && defined(WIDTH_IN) && defined(HEIGHT_IN)
/** Calculate the space to batch conversion.
*
* @note Datatype should be given as a preprocessor argument using -DDATA_TYPE=type. e.g. -DDATA_TYPE=float
@@ -207,12 +213,15 @@ __kernel void space_to_batch_static_nchw(
const int out_y = get_global_id(1);
const int z = get_global_id(2);
- if((out_x >= PAD_LEFT_X && out_x < WIDTH_OUT - PAD_RIGHT_X) && (out_y >= PAD_LEFT_Y && out_y < HEIGHT_OUT - PAD_RIGHT_Y))
+ const int pos_x = out_x * block_x + ((batch_id / BATCH_IN) % block_x);
+ const int pos_y = out_y * block_y + ((batch_id / BATCH_IN) / block_x);
+
+ if(pos_y >= PAD_LEFT_Y && pos_y < PAD_LEFT_Y + HEIGHT_IN && pos_x >= PAD_LEFT_X && pos_x < PAD_LEFT_X + WIDTH_IN)
{
- const int r = (BATCH_SIZE / (block_x * block_y));
- const int w = batch_id % r;
- const int in_x = (out_x - PAD_LEFT_X) * block_x + (batch_id / r) % block_x;
- const int in_y = (out_y - PAD_LEFT_Y) * block_y + (batch_id / r) / block_x;
+ const int w = batch_id % BATCH_IN;
+ const int in_x = pos_x - PAD_LEFT_X;
+ const int in_y = pos_y - PAD_LEFT_Y;
+
*((__global DATA_TYPE *)out.ptr) = *((__global DATA_TYPE *)tensor4D_offset(&in, in_x, in_y, z, w));
}
}
@@ -260,13 +269,16 @@ __kernel void space_to_batch_static_nhwc(
const int out_y = get_global_id(2);
const int z = get_global_id(0);
- if((out_x >= PAD_LEFT_X && out_x < WIDTH_OUT - PAD_RIGHT_X) && (out_y >= PAD_LEFT_Y && out_y < HEIGHT_OUT - PAD_RIGHT_Y))
+ const int pos_x = out_x * block_x + ((batch_id / BATCH_IN) % block_x);
+ const int pos_y = out_y * block_y + ((batch_id / BATCH_IN) / block_x);
+
+ if(pos_y >= PAD_LEFT_Y && pos_y < PAD_LEFT_Y + HEIGHT_IN && pos_x >= PAD_LEFT_X && pos_x < PAD_LEFT_X + WIDTH_IN)
{
- const int r = (BATCH_SIZE / (block_x * block_y));
- const int w = batch_id % r;
- const int in_x = (out_x - PAD_LEFT_X) * block_x + (batch_id / r) % block_x;
- const int in_y = (out_y - PAD_LEFT_Y) * block_y + (batch_id / r) / block_x;
+ const int w = batch_id % BATCH_IN;
+ const int in_x = pos_x - PAD_LEFT_X;
+ const int in_y = pos_y - PAD_LEFT_Y;
+
*((__global DATA_TYPE *)out.ptr) = *((__global DATA_TYPE *)tensor4D_offset(&in, z, in_x, in_y, w));
}
}
-#endif // defined(BATCH_SIZE) && defined(DATA_TYPE) && defined(BLOCK_SHAPE_X) && defined(BLOCK_SHAPE_Y) && defined(PAD_LEFT_X) && defined(PAD_RIGHT_X) && defined(PAD_LEFT_Y) && defined(PAD_RIGHT_Y)
+#endif // defined(BATCH_SIZE) && defined(DATA_TYPE) && defined(BLOCK_SHAPE_X) && defined(BLOCK_SHAPE_Y) && defined(PAD_LEFT_X) && defined(PAD_RIGHT_X) && defined(PAD_LEFT_Y) && defined(PAD_RIGHT_Y) && defined(WIDTH_IN) && defined(HEIGHT_IN)
diff --git a/src/core/CL/kernels/CLSpaceToBatchLayerKernel.cpp b/src/core/CL/kernels/CLSpaceToBatchLayerKernel.cpp
index d488631ae9..f0391989a7 100644
--- a/src/core/CL/kernels/CLSpaceToBatchLayerKernel.cpp
+++ b/src/core/CL/kernels/CLSpaceToBatchLayerKernel.cpp
@@ -39,10 +39,16 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *block_inf
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, block_info, padddings, output);
ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(block_info, 1, DataType::S32);
ARM_COMPUTE_RETURN_ERROR_ON(input->num_dimensions() > 4);
+ ARM_COMPUTE_RETURN_ERROR_ON(block_info->num_dimensions() > 1);
+ ARM_COMPUTE_RETURN_ERROR_ON(padddings->num_dimensions() > 2);
+ ARM_COMPUTE_RETURN_ERROR_ON(padddings->tensor_shape()[1] != block_info->tensor_shape()[0]);
// Validate output if initialized
if(output->total_size() != 0)
{
+ const DataLayout data_layout = input->data_layout();
+ const int idx_channel = get_data_layout_dimension_index(data_layout, DataLayoutDimension::CHANNEL);
+ ARM_COMPUTE_RETURN_ERROR_ON(input->tensor_shape()[idx_channel] != output->tensor_shape()[idx_channel]);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
}
@@ -64,8 +70,8 @@ Status validate_arguments_static(const ITensorInfo *input, const int block_shape
const int idx_channel = get_data_layout_dimension_index(data_layout, DataLayoutDimension::CHANNEL);
const int idx_batch = get_data_layout_dimension_index(data_layout, DataLayoutDimension::BATCHES);
ARM_COMPUTE_RETURN_ERROR_ON(output->tensor_shape()[idx_width] < padding_left.x() + padding_right.y());
- ARM_COMPUTE_RETURN_ERROR_ON(input->tensor_shape()[idx_width] / block_shape_x != (output->tensor_shape()[idx_width] - padding_left.x() - padding_right.y()));
- ARM_COMPUTE_RETURN_ERROR_ON(input->tensor_shape()[idx_height] / block_shape_y != (output->tensor_shape()[idx_height] - padding_left.x() - padding_right.y()));
+ ARM_COMPUTE_RETURN_ERROR_ON((input->tensor_shape()[idx_width] + padding_left.x() + padding_right.x()) % block_shape_x != 0);
+ ARM_COMPUTE_RETURN_ERROR_ON((input->tensor_shape()[idx_height] + padding_left.y() + padding_right.y()) % block_shape_y != 0);
ARM_COMPUTE_RETURN_ERROR_ON(input->tensor_shape()[idx_channel] != output->tensor_shape()[idx_channel]);
ARM_COMPUTE_RETURN_ERROR_ON(output->tensor_shape()[idx_batch] % (block_shape_x * block_shape_y) != 0);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
@@ -101,6 +107,9 @@ void CLSpaceToBatchLayerKernel::configure(const ICLTensor *input, const ICLTenso
build_opts.add_option("-DWIDTH_OUT=" + support::cpp11::to_string(output->info()->dimension(idx_width)));
build_opts.add_option("-DHEIGHT_OUT=" + support::cpp11::to_string(output->info()->dimension(idx_height)));
build_opts.add_option("-DBATCH_SIZE=" + support::cpp11::to_string(output->info()->dimension(idx_batch)));
+ build_opts.add_option("-DWIDTH_IN=" + support::cpp11::to_string(input->info()->dimension(idx_width)));
+ build_opts.add_option("-DHEIGHT_IN=" + support::cpp11::to_string(input->info()->dimension(idx_height)));
+ build_opts.add_option("-DBATCH_IN=" + support::cpp11::to_string(input->info()->dimension(idx_batch)));
_kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel("space_to_batch_" + lower_string(string_from_data_layout(input->info()->data_layout())), build_opts.options()));
// Configure kernel window
@@ -132,6 +141,9 @@ void CLSpaceToBatchLayerKernel::configure(const ICLTensor *input, const int bloc
build_opts.add_option("-DWIDTH_OUT=" + support::cpp11::to_string(output->info()->dimension(idx_width)));
build_opts.add_option("-DHEIGHT_OUT=" + support::cpp11::to_string(output->info()->dimension(idx_height)));
build_opts.add_option("-DBATCH_SIZE=" + support::cpp11::to_string(output->info()->dimension(idx_batch)));
+ build_opts.add_option("-DWIDTH_IN=" + support::cpp11::to_string(input->info()->dimension(idx_width)));
+ build_opts.add_option("-DHEIGHT_IN=" + support::cpp11::to_string(input->info()->dimension(idx_height)));
+ build_opts.add_option("-DBATCH_IN=" + support::cpp11::to_string(input->info()->dimension(idx_batch)));
build_opts.add_option("-DBLOCK_SHAPE_X=" + support::cpp11::to_string(block_shape_x));
build_opts.add_option("-DBLOCK_SHAPE_Y=" + support::cpp11::to_string(block_shape_y));
build_opts.add_option("-DPAD_LEFT_X=" + support::cpp11::to_string(padding_left.x()));
diff --git a/src/runtime/CL/functions/CLSpaceToBatchLayer.cpp b/src/runtime/CL/functions/CLSpaceToBatchLayer.cpp
index 76c1e188e6..a24b72e461 100644
--- a/src/runtime/CL/functions/CLSpaceToBatchLayer.cpp
+++ b/src/runtime/CL/functions/CLSpaceToBatchLayer.cpp
@@ -33,20 +33,19 @@
namespace arm_compute
{
CLSpaceToBatchLayer::CLSpaceToBatchLayer()
- : _space_to_batch_kernel(), _output(nullptr), _has_padding(false)
+ : _space_to_batch_kernel(), _memset_kernel(), _has_padding(false)
{
}
void CLSpaceToBatchLayer::configure(const ICLTensor *input, const ICLTensor *block_shape, const ICLTensor *paddings, ICLTensor *output)
{
- ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
+ ARM_COMPUTE_ERROR_ON_NULLPTR(input, block_shape, paddings, output);
if(input->info()->tensor_shape().total_size() != output->info()->tensor_shape().total_size())
{
_has_padding = true;
+ _memset_kernel.configure(output, PixelValue());
}
-
- _output = output;
_space_to_batch_kernel.configure(input, block_shape, paddings, output);
}
@@ -57,42 +56,35 @@ void CLSpaceToBatchLayer::configure(const ICLTensor *input, const int block_shap
if(input->info()->tensor_shape().total_size() != output->info()->tensor_shape().total_size())
{
_has_padding = true;
+ _memset_kernel.configure(output, PixelValue());
}
-
- _output = output;
_space_to_batch_kernel.configure(input, block_shape_x, block_shape_y, padding_left, padding_right, output);
}
Status CLSpaceToBatchLayer::validate(const ITensorInfo *input, const ITensorInfo *block_shape, const ITensorInfo *paddings, const ITensorInfo *output)
{
- return CLSpaceToBatchLayerKernel::validate(input, block_shape, paddings, output);
+ ARM_COMPUTE_RETURN_ON_ERROR(CLMemsetKernel::validate(output, PixelValue()));
+ ARM_COMPUTE_RETURN_ON_ERROR(CLSpaceToBatchLayerKernel::validate(input, block_shape, paddings, output));
+
+ return Status{};
}
Status CLSpaceToBatchLayer::validate(const ITensorInfo *input, const int block_shape_x, const int block_shape_y, const Size2D &padding_left, const Size2D &padding_right,
const ITensorInfo *output)
{
- return CLSpaceToBatchLayerKernel::validate(input, block_shape_x, block_shape_y, padding_left, padding_right, output);
+ ARM_COMPUTE_RETURN_ON_ERROR(CLMemsetKernel::validate(output, PixelValue()));
+ ARM_COMPUTE_RETURN_ON_ERROR(CLSpaceToBatchLayerKernel::validate(input, block_shape_x, block_shape_y, padding_left, padding_right, output));
+
+ return Status{};
}
void CLSpaceToBatchLayer::run()
{
// Zero out output only if we have paddings
- // TODO(micspy01): replace with memset once ready
if(_has_padding)
{
- _output->map(CLScheduler::get().queue(), true);
- if(is_data_type_quantized_asymmetric(_output->info()->data_type()))
- {
- const uint8_t quantized_zero = _output->info()->quantization_info().offset;
- std::fill_n(_output->buffer(), _output->info()->total_size(), quantized_zero);
- }
- else
- {
- memset(_output->buffer(), 0, _output->info()->total_size());
- }
- _output->unmap(CLScheduler::get().queue());
+ CLScheduler::get().enqueue(_memset_kernel, true);
}
-
CLScheduler::get().enqueue(_space_to_batch_kernel, true);
}
} // namespace arm_compute