diff options
author | Moritz Pflanzer <moritz.pflanzer@arm.com> | 2017-09-23 10:47:54 +0100 |
---|---|---|
committer | Anthony Barbier <anthony.barbier@arm.com> | 2018-11-02 16:35:24 +0000 |
commit | 4726fdf9f7bb9638ffbb4e358cc788686b68a414 (patch) | |
tree | 5f578776ae671a54c91d11c8b76de687dc7d02c3 | |
parent | 219c69108f72a0c01f0f14dda579fc0bce808d07 (diff) | |
download | ComputeLibrary-4726fdf9f7bb9638ffbb4e358cc788686b68a414.tar.gz |
COMPMID-541: Fix padding in CLMinMaxLocationKernel
Change-Id: Ie17e3f14c428553d433da2a564e016bfac7749a9
Reviewed-on: http://mpd-gerrit.cambridge.arm.com/88881
Tested-by: Kaizen <jeremy.johnson+kaizengerrit@arm.com>
Reviewed-by: Gian Marco Iodice <gianmarco.iodice@arm.com>
Reviewed-by: Michalis Spyrou <michalis.spyrou@arm.com>
-rw-r--r-- | src/core/CL/cl_kernels/minmaxloc.cl | 20 | ||||
-rw-r--r-- | src/core/CL/kernels/CLMinMaxLocationKernel.cpp | 19 | ||||
-rw-r--r-- | src/runtime/CL/functions/CLMinMaxLocation.cpp | 5 | ||||
-rw-r--r-- | tests/validation/CL/MinMaxLocation.cpp | 6 |
4 files changed, 28 insertions, 22 deletions
diff --git a/src/core/CL/cl_kernels/minmaxloc.cl b/src/core/CL/cl_kernels/minmaxloc.cl index 05fc78dcb5..0f557a499d 100644 --- a/src/core/CL/cl_kernels/minmaxloc.cl +++ b/src/core/CL/cl_kernels/minmaxloc.cl @@ -45,7 +45,7 @@ inline int FloatFlip(float val) __constant VEC_DATA_TYPE(DATA_TYPE, 16) type_min = (VEC_DATA_TYPE(DATA_TYPE, 16))(DATA_TYPE_MIN); __constant VEC_DATA_TYPE(DATA_TYPE, 16) type_max = (VEC_DATA_TYPE(DATA_TYPE, 16))(DATA_TYPE_MAX); -__constant uint16 idx16 = (uint16)(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); +__constant int16 idx16 = (int16)(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); /** This function identifies the min and maximum value of an input image. * @@ -65,7 +65,7 @@ __constant uint16 idx16 = (uint16)(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, __kernel void minmax( IMAGE_DECLARATION(src), __global int *min_max, - uint width) + int width) { Image src = CONVERT_TO_IMAGE_STRUCT(src); @@ -76,11 +76,11 @@ __kernel void minmax( local_max = type_min; // Calculate min/max of row - uint width4 = width >> 4; - for(uint i = 0; i < width4; i++) + int i = 0; + for(; i + 16 <= width; i += 16) { VEC_DATA_TYPE(DATA_TYPE, 16) - data = vload16(0, (__global DATA_TYPE *)offset(&src, i << 4, 0)); + data = vload16(0, (__global DATA_TYPE *)offset(&src, i, 0)); local_min = min(data, local_min); local_max = max(data, local_max); } @@ -88,15 +88,15 @@ __kernel void minmax( #ifdef NON_MULTIPLE_OF_16 // Handle non multiple of 16 VEC_DATA_TYPE(DATA_TYPE, 16) - data = vload16(0, (__global DATA_TYPE *)offset(&src, width4 << 4, 0)); + data = vload16(0, (__global DATA_TYPE *)offset(&src, i, 0)); #ifdef IS_DATA_TYPE_FLOAT - int16 widx = convert_int16(((uint16)(width4 << 4) + idx16) < width); + int16 valid_indices = (i + idx16) < width; #else /* IS_DATA_TYPE_FLOAT */ VEC_DATA_TYPE(DATA_TYPE, 16) - widx = CONVERT(((uint16)(width4 << 4) + idx16) < width, VEC_DATA_TYPE(DATA_TYPE, 16)); + valid_indices = CONVERT((i + idx16) < width, VEC_DATA_TYPE(DATA_TYPE, 16)); #endif /* IS_DATA_TYPE_FLOAT */ - local_max = max(local_max, select(type_min, data, widx)); - local_min = min(local_min, select(type_max, data, widx)); + local_max = max(local_max, select(type_min, data, valid_indices)); + local_min = min(local_min, select(type_max, data, valid_indices)); #endif /* NON_MULTIPLE_OF_16 */ // Perform min/max reduction diff --git a/src/core/CL/kernels/CLMinMaxLocationKernel.cpp b/src/core/CL/kernels/CLMinMaxLocationKernel.cpp index be0034382c..5636592347 100644 --- a/src/core/CL/kernels/CLMinMaxLocationKernel.cpp +++ b/src/core/CL/kernels/CLMinMaxLocationKernel.cpp @@ -32,8 +32,8 @@ #include <climits> -using namespace arm_compute; - +namespace arm_compute +{ inline int32_t FloatFlip(float val) { static_assert(sizeof(float) == sizeof(int32_t), "Float must be same size as int32_t"); @@ -88,9 +88,13 @@ void CLMinMaxKernel::configure(const ICLImage *input, cl::Buffer *min_max) } // Set kernel build options - std::set<std::string> build_opts; - build_opts.emplace("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type())); - build_opts.emplace((0 != (num_elems_processed_per_iteration % max_cl_vector_width)) ? "-DNON_MULTIPLE_OF_16" : ""); + std::set<std::string> build_opts{ "-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type()) }; + + if(num_elems_processed_per_iteration % max_cl_vector_width != 0) + { + build_opts.emplace("-DNON_MULTIPLE_OF_16"); + } + if(input->info()->data_type() == DataType::F32) { build_opts.emplace("-DDATA_TYPE_MAX=" + support::cpp11::to_string(std::numeric_limits<float>::max())); @@ -109,11 +113,11 @@ void CLMinMaxKernel::configure(const ICLImage *input, cl::Buffer *min_max) // Set fixed arguments unsigned int idx = num_arguments_per_2D_tensor(); //Skip the input and output parameters _kernel.setArg(idx++, *_min_max); - _kernel.setArg<cl_uint>(idx++, input->info()->dimension(0)); + _kernel.setArg<cl_int>(idx++, static_cast<cl_int>(input->info()->dimension(0))); // Configure kernel window Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration)); - update_window_and_padding(win, AccessWindowHorizontal(input->info(), 0, num_elems_processed_per_iteration)); + update_window_and_padding(win, AccessWindowHorizontal(input->info(), 0, ceil_to_multiple(num_elems_processed_per_iteration, 16))); ICLKernel::configure(win); } @@ -226,3 +230,4 @@ void CLMinMaxLocationKernel::run(const Window &window, cl::CommandQueue &queue) } while(window.slide_window_slice_2D(slice)); } +} // namespace arm_compute diff --git a/src/runtime/CL/functions/CLMinMaxLocation.cpp b/src/runtime/CL/functions/CLMinMaxLocation.cpp index bc70ceb621..49dcbcb7df 100644 --- a/src/runtime/CL/functions/CLMinMaxLocation.cpp +++ b/src/runtime/CL/functions/CLMinMaxLocation.cpp @@ -25,8 +25,8 @@ #include "arm_compute/core/CL/CLHelpers.h" -using namespace arm_compute; - +namespace arm_compute +{ CLMinMaxLocation::CLMinMaxLocation() : _min_max_kernel(), _min_max_loc_kernel(), @@ -96,3 +96,4 @@ void CLMinMaxLocation::run() _max_loc->resize(max_corner_size); } } +} // namespace arm_compute diff --git a/tests/validation/CL/MinMaxLocation.cpp b/tests/validation/CL/MinMaxLocation.cpp index 58a84bd649..acc4cbf1cb 100644 --- a/tests/validation/CL/MinMaxLocation.cpp +++ b/tests/validation/CL/MinMaxLocation.cpp @@ -47,8 +47,8 @@ void validate_configuration(const CLTensor &src, TensorShape shape) ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS); // Create output storage - int32_t min{}; - int32_t max{}; + int32_t min = 0; + int32_t max = 0; CLCoordinates2DArray min_loc(shape.total_size()); CLCoordinates2DArray max_loc(shape.total_size()); @@ -57,7 +57,7 @@ void validate_configuration(const CLTensor &src, TensorShape shape) min_max_loc.configure(&src, &min, &max, &min_loc, &max_loc); // Validate padding - const PaddingSize padding = PaddingCalculator(shape.x(), src.info()->dimension(0)).required_padding(); + const PaddingSize padding = PaddingCalculator(shape.x(), 16).required_padding(); validate(src.info()->padding(), padding); } |