From cfb6553e6114fd84e1972b1a5c5cc42784996333 Mon Sep 17 00:00:00 2001 From: SiCong Li Date: Tue, 12 Sep 2017 19:06:28 +0100 Subject: COMPMID-417 Fix ROIPooling * Fix ROIPooling in NEON, CL and Reference. Change-Id: Id5066625e5073e0bfebe69391f7941e993003296 Reviewed-on: http://mpd-gerrit.cambridge.arm.com/87435 Reviewed-by: Moritz Pflanzer Tested-by: Kaizen Reviewed-by: Georgios Pinitas Reviewed-by: Anthony Barbier --- src/core/CL/cl_kernels/roi_pooling_layer.cl | 17 ++++++++--------- src/core/NEON/kernels/NEROIPoolingLayerKernel.cpp | 12 ++++-------- tests/benchmark/CL/ROIPoolingLayer.cpp | 9 ++++----- tests/validation_old/CL/ROIPoolingLayer.cpp | 3 +-- tests/validation_old/TensorOperations.h | 12 ++++-------- 5 files changed, 21 insertions(+), 32 deletions(-) diff --git a/src/core/CL/cl_kernels/roi_pooling_layer.cl b/src/core/CL/cl_kernels/roi_pooling_layer.cl index 35a9c0a21f..042b102a15 100644 --- a/src/core/CL/cl_kernels/roi_pooling_layer.cl +++ b/src/core/CL/cl_kernels/roi_pooling_layer.cl @@ -138,24 +138,23 @@ __kernel void roi_pooling_layer( // Load roi parameters // roi is laid out as follows: // { x, y, width, height, batch_index } - const ushort8 roi = vload8(0, (__global ushort *)vector_offset(&rois, pw)); - const int2 roi_anchor = convert_int2_sat(round(convert_float2(roi.s01) * (float)SPATIAL_SCALE)); - const int2 roi_dims = convert_int2_sat(fmax(round(convert_float2(roi.s23) * (float)SPATIAL_SCALE), 1.f)); - - // Determine pooled region in input image to pooled region in output image ratio - const float2 pool_region_ratio = convert_float2(roi_dims) / (float2)(POOLED_DIM_X, POOLED_DIM_Y); + const ushort4 roi = vload4(0, (__global ushort *)vector_offset(&rois, pw)); + const ushort roi_batch = *((__global ushort *)vector_offset(&rois, pw) + 4); + const int2 roi_anchor = convert_int2_sat(round(convert_float2(roi.s01) * (float)SPATIAL_SCALE)); + const int2 roi_dims = convert_int2_sat(fmax(round(convert_float2(roi.s23) * (float)SPATIAL_SCALE), 1.f)); // Calculate pooled region start and end const float2 spatial_indx = (float2)(px, py); + const float2 pooled_dims = (float2)(POOLED_DIM_X, POOLED_DIM_Y); const int2 max_spatial_dims = (int2)(MAX_DIM_X, MAX_DIM_Y); - int2 region_start = convert_int2_sat(floor(spatial_indx * pool_region_ratio)) + roi_anchor; - int2 region_end = convert_int2_sat(ceil((spatial_indx + 1) * pool_region_ratio)) + roi_anchor; + int2 region_start = convert_int2_sat(floor(spatial_indx / pooled_dims * convert_float2(roi_dims))) + roi_anchor; + int2 region_end = convert_int2_sat(floor((spatial_indx + 1) / pooled_dims * convert_float2(roi_dims))) + roi_anchor; region_start = clamp(region_start, 0, max_spatial_dims); region_end = clamp(region_end, 0, max_spatial_dims); // Move input and output pointer across the fourth dimension - input.ptr += roi.s4 * input_stride_w; + input.ptr += roi_batch * input_stride_w; output.ptr += pw * output_stride_w; for(int pz = 0; pz < MAX_DIM_Z; ++pz) diff --git a/src/core/NEON/kernels/NEROIPoolingLayerKernel.cpp b/src/core/NEON/kernels/NEROIPoolingLayerKernel.cpp index 83ab611b67..a209a523d3 100644 --- a/src/core/NEON/kernels/NEROIPoolingLayerKernel.cpp +++ b/src/core/NEON/kernels/NEROIPoolingLayerKernel.cpp @@ -105,10 +105,6 @@ void NEROIPoolingLayerKernel::run(const Window &window, const ThreadInfo &info) const int roi_width = std::max(support::cpp11::round(curr_roi.rect.width * spatial_scale), 1.f); const int roi_height = std::max(support::cpp11::round(curr_roi.rect.height * spatial_scale), 1.f); - // Determine pooling regions - const auto pool_region_size_x = static_cast(roi_width) / pooled_w; - const auto pool_region_size_y = static_cast(roi_height) / pooled_h; - // Iterate through all feature maps for(int fm = 0; fm < fms; ++fm) { @@ -117,10 +113,10 @@ void NEROIPoolingLayerKernel::run(const Window &window, const ThreadInfo &info) { for(int px = 0; px < pooled_w; ++px) { - auto region_start_x = static_cast(std::floor(px * pool_region_size_x)); - auto region_end_x = static_cast(std::ceil((px + 1) * pool_region_size_x)); - auto region_start_y = static_cast(std::floor(py * pool_region_size_y)); - auto region_end_y = static_cast(std::ceil((py + 1) * pool_region_size_y)); + auto region_start_x = static_cast(std::floor((static_cast(px) / pooled_w) * roi_width)); + auto region_end_x = static_cast(std::floor((static_cast(px + 1) / pooled_w) * roi_width)); + auto region_start_y = static_cast(std::floor((static_cast(py) / pooled_h) * roi_height)); + auto region_end_y = static_cast(std::floor((static_cast(py + 1) / pooled_h) * roi_height)); region_start_x = std::min(std::max(region_start_x + roi_anchor_x, 0), width); region_end_x = std::min(std::max(region_end_x + roi_anchor_x, 0), width); diff --git a/tests/benchmark/CL/ROIPoolingLayer.cpp b/tests/benchmark/CL/ROIPoolingLayer.cpp index 186e42c0af..a10134fb04 100644 --- a/tests/benchmark/CL/ROIPoolingLayer.cpp +++ b/tests/benchmark/CL/ROIPoolingLayer.cpp @@ -43,11 +43,10 @@ using CLROIPoolingLayerFixture = ROIPoolingLayerFixture &in, Tensor &out, const std::vector(roi_width) / pool_w; - float pool_region_size_y = static_cast(roi_height) / pool_h; - // Iterate through all channel for(int fm = 0; fm < fms; ++fm) { @@ -1049,10 +1045,10 @@ void roi_pooling_layer(const Tensor &in, Tensor &out, const std::vector(std::floor(px * pool_region_size_x)); - int region_end_x = static_cast(std::ceil((px + 1) * pool_region_size_x)); - int region_start_y = static_cast(std::floor(py * pool_region_size_y)); - int region_end_y = static_cast(std::ceil((py + 1) * pool_region_size_y)); + int region_start_x = static_cast(std::floor((static_cast(px) / pool_w) * roi_width)); + int region_end_x = static_cast(std::floor((static_cast(px + 1) / pool_w) * roi_width)); + int region_start_y = static_cast(std::floor((static_cast(py) / pool_h) * roi_height)); + int region_end_y = static_cast(std::floor((static_cast(py + 1) / pool_h) * roi_height)); region_start_x = std::min(std::max(region_start_x + roi_start_x, 0), width_in); region_end_x = std::min(std::max(region_end_x + roi_start_x, 0), width_in); -- cgit v1.2.1