aboutsummaryrefslogtreecommitdiff
path: root/src/core/NEON/kernels/NEScaleKernel.cpp
diff options
context:
space:
mode:
authorGeorgios Pinitas <georgios.pinitas@arm.com>2018-05-15 17:38:40 +0100
committerAnthony Barbier <anthony.barbier@arm.com>2018-11-02 16:52:35 +0000
commitfa7ad56c4fc9e63a2f9e9a16e97ac9c275a5e3d8 (patch)
treef827d1603b5711a2c452a0d8d70af44a7d3d77ff /src/core/NEON/kernels/NEScaleKernel.cpp
parentefac7c63c045ea8fec17df54231f0d44611c7c78 (diff)
downloadComputeLibrary-fa7ad56c4fc9e63a2f9e9a16e97ac9c275a5e3d8.tar.gz
COMPMID-1163: NEON Scale NHWC failures
Change-Id: Ice620385ce787b568b38fcbdddc94ef385396141 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/131355 Reviewed-by: Anthony Barbier <anthony.barbier@arm.com> Tested-by: Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/core/NEON/kernels/NEScaleKernel.cpp')
-rw-r--r--src/core/NEON/kernels/NEScaleKernel.cpp94
1 files changed, 56 insertions, 38 deletions
diff --git a/src/core/NEON/kernels/NEScaleKernel.cpp b/src/core/NEON/kernels/NEScaleKernel.cpp
index 3f57ffba5c..71116447f4 100644
--- a/src/core/NEON/kernels/NEScaleKernel.cpp
+++ b/src/core/NEON/kernels/NEScaleKernel.cpp
@@ -190,8 +190,8 @@ inline void scale_bilinear_nhwc_core(const ITensor *input, const ITensor *offset
const size_t stride_w_elems = stride_w / sizeof(T);
const size_t stride_h_elems = stride_h / sizeof(T);
- const size_t input_width = input->info()->dimension(1);
- const size_t input_height = input->info()->dimension(2);
+ const int input_width = input->info()->dimension(1);
+ const int input_height = input->info()->dimension(2);
const T *border_area = reinterpret_cast<T *>(input->buffer() + input->info()->offset_first_element_in_bytes() - stride_w);
@@ -200,55 +200,73 @@ inline void scale_bilinear_nhwc_core(const ITensor *input, const ITensor *offset
return !(x < low_x || x > high_x || y < low_y || y > high_y);
};
+ int border_size = (border_mode == BorderMode::UNDEFINED) ? 0 : 1;
+
execute_window_loop(window, [&](const Coordinates & id)
{
- const auto offset = (*reinterpret_cast<const int32_t *>(offsets->ptr_to_element(Coordinates(id.y(), id.z())))) / sizeof(T);
+ const auto offset = (*reinterpret_cast<const int32_t *>(offsets->ptr_to_element(Coordinates(id.y(), id.z())))) / static_cast<int>(sizeof(T));
const auto dx_scale = *reinterpret_cast<const float *>(dx->ptr_to_element(Coordinates(id.y(), id.z())));
const auto dy_scale = *reinterpret_cast<const float *>(dy->ptr_to_element(Coordinates(id.y(), id.z())));
const int in_yi = std::floor((id.z() + 0.5f) * hr - 0.5f);
const int offset_row = in_yi * stride_h + id.x() * stride_c;
const T *in_ptr = reinterpret_cast<T *>(in.ptr() + offset * stride_w + offset_row);
- T a00 = 0, a01 = 0, a10 = 0, a11 = 0;
-
- if(border_mode == BorderMode::CONSTANT)
- {
- a00 = is_valid(offset, 0, input_width - 1, in_yi, 0, input_height - 1) ? *in_ptr : *border_area;
- a01 = is_valid(offset + 1, 0, input_width - 1, in_yi, 0, input_height - 1) ? *(in_ptr + stride_w_elems) : *border_area;
- a10 = is_valid(offset, 0, input_width - 1, in_yi + 1, 0, input_height - 1) ? *(in_ptr + stride_h_elems) : *border_area;
- a11 = is_valid(offset + 1, 0, input_width - 1, in_yi + 1, 0, input_height - 1) ? *(in_ptr + stride_h_elems + stride_w_elems) : *border_area;
- }
- else if(border_mode == BorderMode::REPLICATE)
+ if(is_valid(offset, -border_size, input_width - 1 + border_size, in_yi, -border_size, input_height - 1 + border_size))
{
- auto clamped_x = utility::clamp<int>(offset, 0, input_width - 1);
- auto clamped_x1 = utility::clamp<int>(offset + 1, 0, input_width - 1);
- auto clamped_y = utility::clamp<int>(in_yi, 0, input_height - 1);
- auto clamped_y1 = utility::clamp<int>(in_yi + 1, 0, input_height - 1);
-
- a00 = *reinterpret_cast<T *>(in.ptr() + clamped_x * stride_w + clamped_y * stride_h + id.x() * stride_c);
- a01 = *reinterpret_cast<T *>(in.ptr() + clamped_x1 * stride_w + clamped_y * stride_h + id.x() * stride_c);
- a10 = *reinterpret_cast<T *>(in.ptr() + clamped_x * stride_w + clamped_y1 * stride_h + id.x() * stride_c);
- a11 = *reinterpret_cast<T *>(in.ptr() + clamped_x1 * stride_w + clamped_y1 * stride_h + id.x() * stride_c);
+ T a00 = 0, a01 = 0, a10 = 0, a11 = 0;
+
+ if(border_mode == BorderMode::CONSTANT)
+ {
+ a00 = is_valid(offset, 0, input_width - 1, in_yi, 0, input_height - 1) ? *in_ptr : *border_area;
+ a01 = is_valid(offset + 1, 0, input_width - 1, in_yi, 0, input_height - 1) ? *(in_ptr + stride_w_elems) : *border_area;
+ a10 = is_valid(offset, 0, input_width - 1, in_yi + 1, 0, input_height - 1) ? *(in_ptr + stride_h_elems) : *border_area;
+ a11 = is_valid(offset + 1, 0, input_width - 1, in_yi + 1, 0, input_height - 1) ? *(in_ptr + stride_h_elems + stride_w_elems) : *border_area;
+ }
+ else if(border_mode == BorderMode::REPLICATE)
+ {
+ auto clamped_x = utility::clamp<int>(offset, 0, input_width - 1);
+ auto clamped_x1 = utility::clamp<int>(offset + 1, 0, input_width - 1);
+ auto clamped_y = utility::clamp<int>(in_yi, 0, input_height - 1);
+ auto clamped_y1 = utility::clamp<int>(in_yi + 1, 0, input_height - 1);
+
+ a00 = *reinterpret_cast<T *>(in.ptr() + clamped_x * stride_w + clamped_y * stride_h + id.x() * stride_c);
+ a01 = *reinterpret_cast<T *>(in.ptr() + clamped_x1 * stride_w + clamped_y * stride_h + id.x() * stride_c);
+ a10 = *reinterpret_cast<T *>(in.ptr() + clamped_x * stride_w + clamped_y1 * stride_h + id.x() * stride_c);
+ a11 = *reinterpret_cast<T *>(in.ptr() + clamped_x1 * stride_w + clamped_y1 * stride_h + id.x() * stride_c);
+ }
+ else
+ {
+ a00 = is_valid(offset, 0, input_width - 1, in_yi, 0, input_height - 1) ? *in_ptr : 0;
+ a01 = is_valid(offset + 1, 0, input_width - 1, in_yi, 0, input_height - 1) ? *(in_ptr + stride_w_elems) : 0;
+ a10 = is_valid(offset, 0, input_width - 1, in_yi + 1, 0, input_height - 1) ? *(in_ptr + stride_h_elems) : 0;
+ a11 = is_valid(offset + 1, 0, input_width - 1, in_yi + 1, 0, input_height - 1) ? *(in_ptr + stride_h_elems + stride_w_elems) : 0;
+ }
+
+ // Perform interpolation
+ const float dx1 = 1.0f - dx_scale;
+ const float dy1 = 1.0f - dy_scale;
+
+ const float w1 = dx1 * dy1;
+ const float w2 = dx_scale * dy1;
+ const float w3 = dx1 * dy_scale;
+ const float w4 = dx_scale * dy_scale;
+
+ // Store result
+ *reinterpret_cast<T *>(out.ptr()) = static_cast<T>(a00 * w1 + a01 * w2 + a10 * w3 + a11 * w4);
}
else
{
- a00 = *in_ptr;
- a01 = *(in_ptr + stride_w_elems);
- a10 = *(in_ptr + stride_h_elems);
- a11 = *(in_ptr + stride_h_elems + stride_w_elems);
+ if(border_mode == BorderMode::CONSTANT)
+ {
+ *reinterpret_cast<T *>(out.ptr()) = *border_area;
+ }
+ else if(border_mode == BorderMode::REPLICATE)
+ {
+ auto clamped_x = utility::clamp<int>(offset, 0, input_width - 1);
+ auto clamped_y = utility::clamp<int>(in_yi, 0, input_height - 1);
+ *reinterpret_cast<T *>(out.ptr()) = *reinterpret_cast<T *>(in.ptr() + clamped_x * stride_w + clamped_y * stride_h + id.x() * stride_c);
+ }
}
-
- // Perform interpolation
- const float dx1 = 1.0f - dx_scale;
- const float dy1 = 1.0f - dy_scale;
-
- const float w1 = dx1 * dy1;
- const float w2 = dx_scale * dy1;
- const float w3 = dx1 * dy_scale;
- const float w4 = dx_scale * dy_scale;
-
- // Store result
- *reinterpret_cast<T *>(out.ptr()) = static_cast<T>(a00 * w1 + a01 * w2 + a10 * w3 + a11 * w4);
},
in, out);
}