aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMoritz Pflanzer <moritz.pflanzer@arm.com>2017-09-23 10:47:54 +0100
committerAnthony Barbier <anthony.barbier@arm.com>2018-11-02 16:35:24 +0000
commit4726fdf9f7bb9638ffbb4e358cc788686b68a414 (patch)
tree5f578776ae671a54c91d11c8b76de687dc7d02c3
parent219c69108f72a0c01f0f14dda579fc0bce808d07 (diff)
downloadComputeLibrary-4726fdf9f7bb9638ffbb4e358cc788686b68a414.tar.gz
COMPMID-541: Fix padding in CLMinMaxLocationKernel
Change-Id: Ie17e3f14c428553d433da2a564e016bfac7749a9 Reviewed-on: http://mpd-gerrit.cambridge.arm.com/88881 Tested-by: Kaizen <jeremy.johnson+kaizengerrit@arm.com> Reviewed-by: Gian Marco Iodice <gianmarco.iodice@arm.com> Reviewed-by: Michalis Spyrou <michalis.spyrou@arm.com>
-rw-r--r--src/core/CL/cl_kernels/minmaxloc.cl20
-rw-r--r--src/core/CL/kernels/CLMinMaxLocationKernel.cpp19
-rw-r--r--src/runtime/CL/functions/CLMinMaxLocation.cpp5
-rw-r--r--tests/validation/CL/MinMaxLocation.cpp6
4 files changed, 28 insertions, 22 deletions
diff --git a/src/core/CL/cl_kernels/minmaxloc.cl b/src/core/CL/cl_kernels/minmaxloc.cl
index 05fc78dcb5..0f557a499d 100644
--- a/src/core/CL/cl_kernels/minmaxloc.cl
+++ b/src/core/CL/cl_kernels/minmaxloc.cl
@@ -45,7 +45,7 @@ inline int FloatFlip(float val)
__constant VEC_DATA_TYPE(DATA_TYPE, 16) type_min = (VEC_DATA_TYPE(DATA_TYPE, 16))(DATA_TYPE_MIN);
__constant VEC_DATA_TYPE(DATA_TYPE, 16) type_max = (VEC_DATA_TYPE(DATA_TYPE, 16))(DATA_TYPE_MAX);
-__constant uint16 idx16 = (uint16)(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+__constant int16 idx16 = (int16)(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
/** This function identifies the min and maximum value of an input image.
*
@@ -65,7 +65,7 @@ __constant uint16 idx16 = (uint16)(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,
__kernel void minmax(
IMAGE_DECLARATION(src),
__global int *min_max,
- uint width)
+ int width)
{
Image src = CONVERT_TO_IMAGE_STRUCT(src);
@@ -76,11 +76,11 @@ __kernel void minmax(
local_max = type_min;
// Calculate min/max of row
- uint width4 = width >> 4;
- for(uint i = 0; i < width4; i++)
+ int i = 0;
+ for(; i + 16 <= width; i += 16)
{
VEC_DATA_TYPE(DATA_TYPE, 16)
- data = vload16(0, (__global DATA_TYPE *)offset(&src, i << 4, 0));
+ data = vload16(0, (__global DATA_TYPE *)offset(&src, i, 0));
local_min = min(data, local_min);
local_max = max(data, local_max);
}
@@ -88,15 +88,15 @@ __kernel void minmax(
#ifdef NON_MULTIPLE_OF_16
// Handle non multiple of 16
VEC_DATA_TYPE(DATA_TYPE, 16)
- data = vload16(0, (__global DATA_TYPE *)offset(&src, width4 << 4, 0));
+ data = vload16(0, (__global DATA_TYPE *)offset(&src, i, 0));
#ifdef IS_DATA_TYPE_FLOAT
- int16 widx = convert_int16(((uint16)(width4 << 4) + idx16) < width);
+ int16 valid_indices = (i + idx16) < width;
#else /* IS_DATA_TYPE_FLOAT */
VEC_DATA_TYPE(DATA_TYPE, 16)
- widx = CONVERT(((uint16)(width4 << 4) + idx16) < width, VEC_DATA_TYPE(DATA_TYPE, 16));
+ valid_indices = CONVERT((i + idx16) < width, VEC_DATA_TYPE(DATA_TYPE, 16));
#endif /* IS_DATA_TYPE_FLOAT */
- local_max = max(local_max, select(type_min, data, widx));
- local_min = min(local_min, select(type_max, data, widx));
+ local_max = max(local_max, select(type_min, data, valid_indices));
+ local_min = min(local_min, select(type_max, data, valid_indices));
#endif /* NON_MULTIPLE_OF_16 */
// Perform min/max reduction
diff --git a/src/core/CL/kernels/CLMinMaxLocationKernel.cpp b/src/core/CL/kernels/CLMinMaxLocationKernel.cpp
index be0034382c..5636592347 100644
--- a/src/core/CL/kernels/CLMinMaxLocationKernel.cpp
+++ b/src/core/CL/kernels/CLMinMaxLocationKernel.cpp
@@ -32,8 +32,8 @@
#include <climits>
-using namespace arm_compute;
-
+namespace arm_compute
+{
inline int32_t FloatFlip(float val)
{
static_assert(sizeof(float) == sizeof(int32_t), "Float must be same size as int32_t");
@@ -88,9 +88,13 @@ void CLMinMaxKernel::configure(const ICLImage *input, cl::Buffer *min_max)
}
// Set kernel build options
- std::set<std::string> build_opts;
- build_opts.emplace("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type()));
- build_opts.emplace((0 != (num_elems_processed_per_iteration % max_cl_vector_width)) ? "-DNON_MULTIPLE_OF_16" : "");
+ std::set<std::string> build_opts{ "-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type()) };
+
+ if(num_elems_processed_per_iteration % max_cl_vector_width != 0)
+ {
+ build_opts.emplace("-DNON_MULTIPLE_OF_16");
+ }
+
if(input->info()->data_type() == DataType::F32)
{
build_opts.emplace("-DDATA_TYPE_MAX=" + support::cpp11::to_string(std::numeric_limits<float>::max()));
@@ -109,11 +113,11 @@ void CLMinMaxKernel::configure(const ICLImage *input, cl::Buffer *min_max)
// Set fixed arguments
unsigned int idx = num_arguments_per_2D_tensor(); //Skip the input and output parameters
_kernel.setArg(idx++, *_min_max);
- _kernel.setArg<cl_uint>(idx++, input->info()->dimension(0));
+ _kernel.setArg<cl_int>(idx++, static_cast<cl_int>(input->info()->dimension(0)));
// Configure kernel window
Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration));
- update_window_and_padding(win, AccessWindowHorizontal(input->info(), 0, num_elems_processed_per_iteration));
+ update_window_and_padding(win, AccessWindowHorizontal(input->info(), 0, ceil_to_multiple(num_elems_processed_per_iteration, 16)));
ICLKernel::configure(win);
}
@@ -226,3 +230,4 @@ void CLMinMaxLocationKernel::run(const Window &window, cl::CommandQueue &queue)
}
while(window.slide_window_slice_2D(slice));
}
+} // namespace arm_compute
diff --git a/src/runtime/CL/functions/CLMinMaxLocation.cpp b/src/runtime/CL/functions/CLMinMaxLocation.cpp
index bc70ceb621..49dcbcb7df 100644
--- a/src/runtime/CL/functions/CLMinMaxLocation.cpp
+++ b/src/runtime/CL/functions/CLMinMaxLocation.cpp
@@ -25,8 +25,8 @@
#include "arm_compute/core/CL/CLHelpers.h"
-using namespace arm_compute;
-
+namespace arm_compute
+{
CLMinMaxLocation::CLMinMaxLocation()
: _min_max_kernel(),
_min_max_loc_kernel(),
@@ -96,3 +96,4 @@ void CLMinMaxLocation::run()
_max_loc->resize(max_corner_size);
}
}
+} // namespace arm_compute
diff --git a/tests/validation/CL/MinMaxLocation.cpp b/tests/validation/CL/MinMaxLocation.cpp
index 58a84bd649..acc4cbf1cb 100644
--- a/tests/validation/CL/MinMaxLocation.cpp
+++ b/tests/validation/CL/MinMaxLocation.cpp
@@ -47,8 +47,8 @@ void validate_configuration(const CLTensor &src, TensorShape shape)
ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS);
// Create output storage
- int32_t min{};
- int32_t max{};
+ int32_t min = 0;
+ int32_t max = 0;
CLCoordinates2DArray min_loc(shape.total_size());
CLCoordinates2DArray max_loc(shape.total_size());
@@ -57,7 +57,7 @@ void validate_configuration(const CLTensor &src, TensorShape shape)
min_max_loc.configure(&src, &min, &max, &min_loc, &max_loc);
// Validate padding
- const PaddingSize padding = PaddingCalculator(shape.x(), src.info()->dimension(0)).required_padding();
+ const PaddingSize padding = PaddingCalculator(shape.x(), 16).required_padding();
validate(src.info()->padding(), padding);
}