diff options
Diffstat (limited to 'src/core/NEON/kernels/NEWarpKernel.cpp')
-rw-r--r-- | src/core/NEON/kernels/NEWarpKernel.cpp | 107 |
1 files changed, 91 insertions, 16 deletions
diff --git a/src/core/NEON/kernels/NEWarpKernel.cpp b/src/core/NEON/kernels/NEWarpKernel.cpp index 62f4e5d057..ab8ab14ae5 100644 --- a/src/core/NEON/kernels/NEWarpKernel.cpp +++ b/src/core/NEON/kernels/NEWarpKernel.cpp @@ -143,7 +143,11 @@ void NEWarpAffineKernel<interpolation>::warp_undefined(const Window &window) const float start_y0 = M10 * window.x().start(); // Current row - int y_cur = window.y().start(); + int y_cur = window.y().start(); + int z_cur = window.z().start(); + int d3_cur = window[3].start(); + int d4_cur = window[4].start(); + int d5_cur = window[5].start(); // const_x0 and const_y0 are the constant parts of x0 and y0 during the row processing float const_x0 = M01 * y_cur + M02; @@ -155,10 +159,14 @@ void NEWarpAffineKernel<interpolation>::warp_undefined(const Window &window) execute_window_loop(window, [&](const Coordinates & id) { - // Check if we are processing a new row. If so, update the current row (y_cur), x0 and y0 - if(y_cur != id.y()) + // Check if we are processing a new row. If so, update the current processed row (y_cur), x0, y0 and z0 + if((y_cur != id.y()) || (z_cur != id.z()) || (d3_cur != id[3]) || (d4_cur != id[4]) || (d5_cur != id[5])) { - y_cur = id.y(); + y_cur = id.y(); + z_cur = id.z(); + d3_cur = id[3]; + d4_cur = id[4]; + d5_cur = id[5]; const_x0 = M01 * y_cur + M02; const_y0 = M11 * y_cur + M12; @@ -222,7 +230,11 @@ void NEWarpAffineKernel<interpolation>::warp_constant(const Window &window) const float start_y0 = M10 * window.x().start(); // Current row - int y_cur = window.y().start(); + int y_cur = window.y().start(); + int z_cur = window.z().start(); + int d3_cur = window[3].start(); + int d4_cur = window[4].start(); + int d5_cur = window[5].start(); // const_x0 and const_y0 are the constant parts of x0 and y0 during the row processing float const_x0 = M01 * y_cur + M02; @@ -234,10 +246,14 @@ void NEWarpAffineKernel<interpolation>::warp_constant(const Window &window) execute_window_loop(window, [&](const Coordinates & id) { - // Check if we are processing a new row. If so, update the current row (y_cur), x0 and y0 - if(y_cur != id.y()) + // Check if we are processing a new row. If so, update the current processed row (y_cur), x0, y0 and z0 + if((y_cur != id.y()) || (z_cur != id.z()) || (d3_cur != id[3]) || (d4_cur != id[4]) || (d5_cur != id[5])) { - y_cur = id.y(); + y_cur = id.y(); + z_cur = id.z(); + d3_cur = id[3]; + d4_cur = id[4]; + d5_cur = id[5]; const_x0 = M01 * y_cur + M02; const_y0 = M11 * y_cur + M12; @@ -264,7 +280,34 @@ void NEWarpAffineKernel<interpolation>::warp_constant(const Window &window) } else { - *out.ptr() = _constant_border_value; + switch(interpolation) + { + case InterpolationPolicy::NEAREST_NEIGHBOR: + *out.ptr() = _constant_border_value; + break; + case InterpolationPolicy::BILINEAR: + { + const auto xi = clamp<int>(std::floor(x0), min_x - 1, max_x); + const auto yi = clamp<int>(std::floor(y0), min_y - 1, max_y); + const auto xi_1 = clamp<int>(std::floor(x0 + 1), min_x - 1, max_x); + const auto yi_1 = clamp<int>(std::floor(y0 + 1), min_y - 1, max_y); + + const float dx = x0 - std::floor(x0); + const float dy = y0 - std::floor(y0); + const float dx1 = 1.0f - dx; + const float dy1 = 1.0f - dy; + + const float a00 = *(in.ptr() + xi + yi * stride); + const float a01 = *(in.ptr() + xi_1 + yi * stride); + const float a10 = *(in.ptr() + xi + yi_1 * stride); + const float a11 = *(in.ptr() + xi_1 + yi_1 * stride); + + *out.ptr() = a00 * (dx1 * dy1) + a01 * (dx * dy1) + a10 * (dx1 * dy) + a11 * (dx * dy); + } + break; + default: + ARM_COMPUTE_ERROR("Interpolation not supported"); + } } x0 += M00; @@ -292,7 +335,11 @@ void NEWarpAffineKernel<interpolation>::warp_replicate(const Window &window) const size_t stride = _input->info()->strides_in_bytes()[1]; // Current row - int y_cur = window.y().start(); + int y_cur = window.y().start(); + int z_cur = window.z().start(); + int d3_cur = window[3].start(); + int d4_cur = window[4].start(); + int d5_cur = window[5].start(); const float M00 = _matrix[0]; const float M10 = _matrix[1]; @@ -314,10 +361,14 @@ void NEWarpAffineKernel<interpolation>::warp_replicate(const Window &window) execute_window_loop(window, [&](const Coordinates & id) { - // Check if we are processing a new row. If so, update the current row (y_cur), x0 and y0 - if(y_cur != id.y()) + // Check if we are processing a new row. If so, update the current processed row (y_cur), x0, y0 and z0 + if((y_cur != id.y()) || (z_cur != id.z()) || (d3_cur != id[3]) || (d4_cur != id[4]) || (d5_cur != id[5])) { - y_cur = id.y(); + y_cur = id.y(); + z_cur = id.z(); + d3_cur = id[3]; + d4_cur = id[4]; + d5_cur = id[5]; const_x0 = M01 * y_cur + M02; const_y0 = M11 * y_cur + M12; @@ -345,10 +396,34 @@ void NEWarpAffineKernel<interpolation>::warp_replicate(const Window &window) else { // Clamp coordinates - const auto xi = clamp<int>(x0, min_x, max_x - 1); - const auto yi = clamp<int>(y0, min_y, max_y - 1); + const auto xi = clamp<int>(std::floor(x0), min_x, max_x - 1); + const auto yi = clamp<int>(std::floor(y0), min_y, max_y - 1); + switch(interpolation) + { + case InterpolationPolicy::NEAREST_NEIGHBOR: + *out.ptr() = *(in.ptr() + xi + yi * stride); + break; + case InterpolationPolicy::BILINEAR: + { + const auto xi_1 = clamp<int>(std::floor(x0 + 1), min_x, max_x - 1); + const auto yi_1 = clamp<int>(std::floor(y0 + 1), min_y, max_y - 1); - *out.ptr() = *(in.ptr() + xi + yi * stride); + const float dx = x0 - std::floor(x0); + const float dy = y0 - std::floor(y0); + const float dx1 = 1.0f - dx; + const float dy1 = 1.0f - dy; + + const float a00 = *(in.ptr() + xi + yi * stride); + const float a01 = *(in.ptr() + xi_1 + yi * stride); + const float a10 = *(in.ptr() + xi + yi_1 * stride); + const float a11 = *(in.ptr() + xi_1 + yi_1 * stride); + + *out.ptr() = a00 * (dx1 * dy1) + a01 * (dx * dy1) + a10 * (dx1 * dy) + a11 * (dx * dy); + } + break; + default: + ARM_COMPUTE_ERROR("Interpolation not supported"); + } } x0 += M00; |