aboutsummaryrefslogtreecommitdiff
path: root/src/core/NEON/kernels/NEWarpKernel.cpp
diff options
context:
space:
mode:
authorAnthony Barbier <anthony.barbier@arm.com>2017-09-04 18:44:23 +0100
committerAnthony Barbier <anthony.barbier@arm.com>2018-09-17 13:03:09 +0100
commit6ff3b19ee6120edf015fad8caab2991faa3070af (patch)
treea7a6dcd16dfd56d79fa1b56a313caeebcc939b68 /src/core/NEON/kernels/NEWarpKernel.cpp
downloadComputeLibrary-6ff3b19ee6120edf015fad8caab2991faa3070af.tar.gz
COMPMID-344 Updated doxygen
Change-Id: I32f7b84daa560e460b77216add529c8fa8b327ae
Diffstat (limited to 'src/core/NEON/kernels/NEWarpKernel.cpp')
-rw-r--r--src/core/NEON/kernels/NEWarpKernel.cpp651
1 files changed, 651 insertions, 0 deletions
diff --git a/src/core/NEON/kernels/NEWarpKernel.cpp b/src/core/NEON/kernels/NEWarpKernel.cpp
new file mode 100644
index 0000000000..6c90a334af
--- /dev/null
+++ b/src/core/NEON/kernels/NEWarpKernel.cpp
@@ -0,0 +1,651 @@
+/*
+ * Copyright (c) 2016, 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/NEON/kernels/NEWarpKernel.h"
+
+#include "arm_compute/core/AccessWindowStatic.h"
+#include "arm_compute/core/Coordinates.h"
+#include "arm_compute/core/Error.h"
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/ITensor.h"
+#include "arm_compute/core/TensorInfo.h"
+#include "arm_compute/core/Validate.h"
+#include "arm_compute/core/Window.h"
+
+#include <cstddef>
+
+using namespace arm_compute;
+
+namespace
+{
+inline uint8_t nearest_interpolation(const uint8_t *in_ptr, int x, int y, size_t stride)
+{
+ return in_ptr[x + y * stride];
+}
+} // namespace
+
+INEWarpKernel::INEWarpKernel()
+ : _func(nullptr), _input(nullptr), _output(nullptr), _constant_border_value(0), _matrix(nullptr)
+{
+}
+
+void INEWarpKernel::run(const Window &window)
+{
+ ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
+ ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window);
+ ARM_COMPUTE_ERROR_ON(_func == nullptr);
+
+ (this->*_func)(window);
+}
+
+void INEWarpKernel::configure(const ITensor *input, ITensor *output, const float *matrix, BorderMode border_mode, uint8_t constant_border_value)
+{
+ ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8);
+ ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8);
+ ARM_COMPUTE_ERROR_ON(nullptr == matrix);
+
+ _matrix = matrix;
+ _constant_border_value = constant_border_value;
+
+ switch(border_mode)
+ {
+ case BorderMode::UNDEFINED:
+ _func = &INEWarpKernel::warp_undefined;
+ break;
+ case BorderMode::CONSTANT:
+ _func = &INEWarpKernel::warp_constant;
+ break;
+ case BorderMode::REPLICATE:
+ _func = &INEWarpKernel::warp_replicate;
+ break;
+ default:
+ ARM_COMPUTE_ERROR("Border mode not supported");
+ break;
+ }
+
+ _input = input;
+ _output = output;
+
+ // Configure kernel window
+ Window win = calculate_max_window(*output->info(), Steps(1U));
+
+ const ValidRegion &input_valid_region = input->info()->valid_region();
+
+ // Reads can occur within the valid region of the input
+ AccessWindowStatic input_access(input->info(),
+ input_valid_region.anchor[0], input_valid_region.anchor[1],
+ input_valid_region.anchor[0] + input_valid_region.shape[0],
+ input_valid_region.anchor[1] + input_valid_region.shape[1]);
+ AccessWindowHorizontal output_access(output->info(), 0, 1);
+
+ update_window_and_padding(win, input_access, output_access);
+
+ output_access.set_valid_region(win, ValidRegion(Coordinates(), output->info()->tensor_shape()));
+
+ INEKernel::configure(win);
+}
+
+template <InterpolationPolicy interpolation>
+void NEWarpAffineKernel<interpolation>::warp_undefined(const Window &window)
+{
+ // Don't increment in X and Y direction for the input tensor
+ // A pointer to the start of this plane is needed as base for the precomputed offsets
+ Window win_in(window);
+ win_in.set(Window::DimX, Window::Dimension(0, 0, 0));
+ win_in.set(Window::DimY, Window::Dimension(0, 0, 0));
+
+ Iterator in(_input, win_in);
+ Iterator out(_output, window);
+
+ const int min_x = _input->info()->valid_region().anchor[0];
+ const int max_x = min_x + _input->info()->valid_region().shape[0];
+ const int min_y = _input->info()->valid_region().anchor[1];
+ const int max_y = min_y + _input->info()->valid_region().shape[1];
+ const size_t stride = _input->info()->strides_in_bytes()[1];
+
+ // x0 = M01 * x + M01 * y + M02
+ // y0 = M11 * x + M11 * y + M12
+ const float M00 = _matrix[0];
+ const float M10 = _matrix[1];
+ const float M01 = _matrix[0 + 1 * 2];
+ const float M11 = _matrix[1 + 1 * 2];
+ const float M02 = _matrix[0 + 2 * 2];
+ const float M12 = _matrix[1 + 2 * 2];
+
+ // "M00 * x" and "M10 * x", when x = window.x.start
+ const float start_x0 = M00 * window.x().start();
+ const float start_y0 = M10 * window.x().start();
+
+ // Current row
+ int y_cur = window.y().start();
+
+ // const_x0 and const_y0 are the constant parts of x0 and y0 during the row processing
+ float const_x0 = M01 * y_cur + M02;
+ float const_y0 = M11 * y_cur + M12;
+
+ // Affine warp coordinates
+ float x0 = start_x0 + const_x0;
+ float y0 = start_y0 + const_y0;
+
+ execute_window_loop(window, [&](const Coordinates & id)
+ {
+ // Check if we are processing a new row. If so, update the current row (y_cur), x0 and y0
+ if(y_cur != id.y())
+ {
+ y_cur = id.y();
+
+ const_x0 = M01 * y_cur + M02;
+ const_y0 = M11 * y_cur + M12;
+
+ x0 = start_x0 + const_x0;
+ y0 = start_y0 + const_y0;
+ }
+
+ // Only write to output if x0 and y0 are within the valid region.
+ // Otherwise the read value would be undefined.
+ if((min_y <= y0) && (y0 < max_y) && (min_x <= x0) && (x0 < max_x))
+ {
+ switch(interpolation)
+ {
+ case InterpolationPolicy::NEAREST_NEIGHBOR:
+ *out.ptr() = nearest_interpolation(in.ptr(), x0, y0, stride);
+ break;
+ case InterpolationPolicy::BILINEAR:
+ *out.ptr() = pixel_bilinear_c1u8(in.ptr(), stride, x0, y0);
+ break;
+ default:
+ ARM_COMPUTE_ERROR("Interpolation not supported");
+ }
+ }
+
+ x0 += M00;
+ y0 += M10;
+ },
+ in, out);
+}
+
+template <InterpolationPolicy interpolation>
+void NEWarpAffineKernel<interpolation>::warp_constant(const Window &window)
+{
+ // Don't increment in X and Y direction for the input tensor
+ // A pointer to the start of this plane is needed as base for the precomputed offsets
+ Window win_in(window);
+ win_in.set(Window::DimX, Window::Dimension(0, 0, 0));
+ win_in.set(Window::DimY, Window::Dimension(0, 0, 0));
+
+ Iterator in(_input, win_in);
+ Iterator out(_output, window);
+
+ const int min_x = _input->info()->valid_region().anchor[0];
+ const int max_x = min_x + _input->info()->valid_region().shape[0];
+ const int min_y = _input->info()->valid_region().anchor[1];
+ const int max_y = min_y + _input->info()->valid_region().shape[1];
+ const size_t stride = _input->info()->strides_in_bytes()[1];
+
+ // x0 = M01 * x + M01 * y + M02
+ // y0 = M11 * x + M11 * y + M12
+ const float M00 = _matrix[0];
+ const float M10 = _matrix[1];
+ const float M01 = _matrix[0 + 1 * 2];
+ const float M11 = _matrix[1 + 1 * 2];
+ const float M02 = _matrix[0 + 2 * 2];
+ const float M12 = _matrix[1 + 2 * 2];
+
+ // "M00 * x" and "M10 * x", when x = window.x.start
+ const float start_x0 = M00 * window.x().start();
+ const float start_y0 = M10 * window.x().start();
+
+ // Current row
+ int y_cur = window.y().start();
+
+ // const_x0 and const_y0 are the constant parts of x0 and y0 during the row processing
+ float const_x0 = M01 * y_cur + M02;
+ float const_y0 = M11 * y_cur + M12;
+
+ // Affine warp coordinates
+ float x0 = start_x0 + const_x0;
+ float y0 = start_y0 + const_y0;
+
+ execute_window_loop(window, [&](const Coordinates & id)
+ {
+ // Check if we are processing a new row. If so, update the current row (y_cur), x0 and y0
+ if(y_cur != id.y())
+ {
+ y_cur = id.y();
+
+ const_x0 = M01 * y_cur + M02;
+ const_y0 = M11 * y_cur + M12;
+
+ x0 = start_x0 + const_x0;
+ y0 = start_y0 + const_y0;
+ }
+
+ // Only use input values if x0 and y0 are within the valid region.
+ // Otherwise write the constant border value.
+ if((min_y <= y0) && (y0 < max_y) && (min_x <= x0) && (x0 < max_x))
+ {
+ switch(interpolation)
+ {
+ case InterpolationPolicy::NEAREST_NEIGHBOR:
+ *out.ptr() = nearest_interpolation(in.ptr(), x0, y0, stride);
+ break;
+ case InterpolationPolicy::BILINEAR:
+ *out.ptr() = pixel_bilinear_c1u8(in.ptr(), stride, x0, y0);
+ break;
+ default:
+ ARM_COMPUTE_ERROR("Interpolation not supported");
+ }
+ }
+ else
+ {
+ *out.ptr() = _constant_border_value;
+ }
+
+ x0 += M00;
+ y0 += M10;
+ },
+ in, out);
+}
+
+template <InterpolationPolicy interpolation>
+void NEWarpAffineKernel<interpolation>::warp_replicate(const Window &window)
+{
+ // Don't increment in X and Y direction for the input tensor
+ // A pointer to the start of this plane is needed as base for the precomputed offsets
+ Window win_in(window);
+ win_in.set(Window::DimX, Window::Dimension(0, 0, 0));
+ win_in.set(Window::DimY, Window::Dimension(0, 0, 0));
+
+ Iterator in(_input, win_in);
+ Iterator out(_output, window);
+
+ const int min_x = _input->info()->valid_region().anchor[0];
+ const int max_x = min_x + _input->info()->valid_region().shape[0];
+ const int min_y = _input->info()->valid_region().anchor[1];
+ const int max_y = min_y + _input->info()->valid_region().shape[1];
+ const size_t stride = _input->info()->strides_in_bytes()[1];
+
+ // Current row
+ int y_cur = window.y().start();
+
+ const float M00 = _matrix[0];
+ const float M10 = _matrix[1];
+ const float M01 = _matrix[0 + 1 * 2];
+ const float M11 = _matrix[1 + 1 * 2];
+ const float M02 = _matrix[0 + 2 * 2];
+ const float M12 = _matrix[1 + 2 * 2];
+
+ // "M00 * x" and "M10 * x", when x = window.x.start
+ const float start_x0 = M00 * window.x().start();
+ const float start_y0 = M10 * window.x().start();
+
+ // const_x0 and const_y0 are the constant parts of x0 and y0 during the row processing
+ float const_x0 = M01 * y_cur + M02;
+ float const_y0 = M11 * y_cur + M12;
+
+ float x0 = start_x0 + const_x0;
+ float y0 = start_y0 + const_y0;
+
+ execute_window_loop(window, [&](const Coordinates & id)
+ {
+ // Check if we are processing a new row. If so, update the current row (y_cur), x0 and y0
+ if(y_cur != id.y())
+ {
+ y_cur = id.y();
+
+ const_x0 = M01 * y_cur + M02;
+ const_y0 = M11 * y_cur + M12;
+
+ x0 = start_x0 + const_x0;
+ y0 = start_y0 + const_y0;
+ }
+
+ // Only load from (x0, y0) if the point is within the valid region.
+ // Otherwise load from the edge of the valid region.
+ if((min_y <= y0) && (y0 < max_y) && (min_x <= x0) && (x0 < max_x))
+ {
+ switch(interpolation)
+ {
+ case InterpolationPolicy::NEAREST_NEIGHBOR:
+ *out.ptr() = nearest_interpolation(in.ptr(), x0, y0, stride);
+ break;
+ case InterpolationPolicy::BILINEAR:
+ *out.ptr() = pixel_bilinear_c1u8(in.ptr(), stride, x0, y0);
+ break;
+ default:
+ ARM_COMPUTE_ERROR("Interpolation not supported");
+ }
+ }
+ else
+ {
+ // Clamp coordinates
+ const auto xi = clamp<int>(x0, min_x, max_x - 1);
+ const auto yi = clamp<int>(y0, min_y, max_y - 1);
+
+ *out.ptr() = *(in.ptr() + xi + yi * stride);
+ }
+
+ x0 += M00;
+ y0 += M10;
+ },
+ in, out);
+}
+
+template <InterpolationPolicy interpolation>
+void NEWarpPerspectiveKernel<interpolation>::warp_undefined(const Window &window)
+{
+ // Don't increment in X and Y direction for the input tensor
+ // A pointer to the start of this plane is needed as base for the precomputed offsets
+ Window win_in(window);
+ win_in.set(Window::DimX, Window::Dimension(0, 0, 0));
+ win_in.set(Window::DimY, Window::Dimension(0, 0, 0));
+
+ Iterator in(_input, win_in);
+ Iterator out(_output, window);
+
+ const int min_x = _input->info()->valid_region().anchor[0];
+ const int max_x = min_x + _input->info()->valid_region().shape[0];
+ const int min_y = _input->info()->valid_region().anchor[1];
+ const int max_y = min_y + _input->info()->valid_region().shape[1];
+ const size_t stride = _input->info()->strides_in_bytes()[1];
+
+ // x0 = M00 * x + M01 * y + M02
+ // y0 = M10 * x + M11 * y + M12
+ // z0 = M20 * x + M21 * y + M22
+ // xn = x0 / z0
+ // yn = y0 / z0
+ const float M00 = _matrix[0];
+ const float M10 = _matrix[1];
+ const float M20 = _matrix[2];
+ const float M01 = _matrix[0 + 1 * 3];
+ const float M11 = _matrix[1 + 1 * 3];
+ const float M21 = _matrix[2 + 1 * 3];
+ const float M02 = _matrix[0 + 2 * 3];
+ const float M12 = _matrix[1 + 2 * 3];
+ const float M22 = _matrix[2 + 2 * 3];
+
+ // "M00 * x", "M10 * x" and "M20 * x", when x = window.x.start
+ const float start_x0 = M00 * window.x().start();
+ const float start_y0 = M10 * window.x().start();
+ const float start_z0 = M20 * window.x().start();
+
+ // Current row
+ int y_cur = window.y().start();
+
+ // const_x0, const_y0 and const_z0 are the constant parts of x0, y0 and z0 during the row processing
+ float const_x0 = M01 * y_cur + M02;
+ float const_y0 = M11 * y_cur + M12;
+ float const_z0 = M21 * y_cur + M22;
+
+ // Perspective warp coordinates
+ float x0 = start_x0 + const_x0;
+ float y0 = start_y0 + const_y0;
+ float z0 = start_z0 + const_z0;
+
+ execute_window_loop(window, [&](const Coordinates & id)
+ {
+ // Check if we are processing a new row. If so, update the current processed row (y_cur), x0, y0 and z0
+ if(y_cur != id.y())
+ {
+ y_cur = id.y();
+
+ const_x0 = M01 * y_cur + M02;
+ const_y0 = M11 * y_cur + M12;
+ const_z0 = M21 * y_cur + M22;
+
+ x0 = start_x0 + const_x0;
+ y0 = start_y0 + const_y0;
+ z0 = start_z0 + const_z0;
+ }
+
+ const float xn = x0 / z0;
+ const float yn = y0 / z0;
+
+ // Only write to output if xn and yn are within the valid region.
+ // Otherwise the read value would be undefined.
+ if((min_y <= yn) && (yn < max_y) && (min_x <= xn) && (xn < max_x))
+ {
+ switch(interpolation)
+ {
+ case InterpolationPolicy::NEAREST_NEIGHBOR:
+ *out.ptr() = nearest_interpolation(in.ptr(), xn, yn, stride);
+ break;
+ case InterpolationPolicy::BILINEAR:
+ *out.ptr() = pixel_bilinear_c1u8(in.ptr(), stride, xn, yn);
+ break;
+ default:
+ ARM_COMPUTE_ERROR("Interpolation not supported");
+ }
+ }
+
+ x0 += M00;
+ y0 += M10;
+ z0 += M20;
+ },
+ in, out);
+}
+
+template <InterpolationPolicy interpolation>
+void NEWarpPerspectiveKernel<interpolation>::warp_constant(const Window &window)
+{
+ // Don't increment in X and Y direction for the input tensor
+ // A pointer to the start of this plane is needed as base for the precomputed offsets
+ Window win_in(window);
+ win_in.set(Window::DimX, Window::Dimension(0, 0, 0));
+ win_in.set(Window::DimY, Window::Dimension(0, 0, 0));
+
+ Iterator in(_input, win_in);
+ Iterator out(_output, window);
+
+ const int min_x = _input->info()->valid_region().anchor[0];
+ const int max_x = min_x + _input->info()->valid_region().shape[0];
+ const int min_y = _input->info()->valid_region().anchor[1];
+ const int max_y = min_y + _input->info()->valid_region().shape[1];
+ const size_t stride = _input->info()->strides_in_bytes()[1];
+
+ // x0 = M00 * x + M01 * y + M02
+ // y0 = M10 * x + M11 * y + M12
+ // z0 = M20 * x + M21 * y + M22
+ // xn = x0 / z0
+ // yn = y0 / z0
+ const float M00 = _matrix[0];
+ const float M10 = _matrix[1];
+ const float M20 = _matrix[2];
+ const float M01 = _matrix[0 + 1 * 3];
+ const float M11 = _matrix[1 + 1 * 3];
+ const float M21 = _matrix[2 + 1 * 3];
+ const float M02 = _matrix[0 + 2 * 3];
+ const float M12 = _matrix[1 + 2 * 3];
+ const float M22 = _matrix[2 + 2 * 3];
+
+ // "M00 * x", "M10 * x" and "M20 * x", when x = window.x.start
+ const float start_x0 = M00 * window.x().start();
+ const float start_y0 = M10 * window.x().start();
+ const float start_z0 = M20 * window.x().start();
+
+ // Current row
+ int y_cur = window.y().start();
+
+ // const_x0, const_y0 and const_z0 are the constant parts of x0, y0 and z0 during the row processing
+ float const_x0 = M01 * y_cur + M02;
+ float const_y0 = M11 * y_cur + M12;
+ float const_z0 = M21 * y_cur + M22;
+
+ // Perspective warp coordinates
+ float x0 = start_x0 + const_x0;
+ float y0 = start_y0 + const_y0;
+ float z0 = start_z0 + const_z0;
+
+ execute_window_loop(window, [&](const Coordinates & id)
+ {
+ // Check if we are processing a new row. If so, update the current row (y_cur), x0, y0 and z0
+ if(y_cur != id.y())
+ {
+ y_cur = id.y();
+
+ const_x0 = M01 * y_cur + M02;
+ const_y0 = M11 * y_cur + M12;
+ const_z0 = M21 * y_cur + M22;
+
+ x0 = start_x0 + const_x0;
+ y0 = start_y0 + const_y0;
+ z0 = start_z0 + const_z0;
+ }
+
+ const float xn = x0 / z0;
+ const float yn = y0 / z0;
+
+ // Only use input values if xn and yn are within the valid region.
+ // Otherwise write the constant border value.
+ if((min_y <= yn) && (yn < max_y) && (min_x <= xn) && (xn < max_x))
+ {
+ switch(interpolation)
+ {
+ case InterpolationPolicy::NEAREST_NEIGHBOR:
+ *out.ptr() = nearest_interpolation(in.ptr(), xn, yn, stride);
+ break;
+ case InterpolationPolicy::BILINEAR:
+ *out.ptr() = pixel_bilinear_c1u8(in.ptr(), stride, xn, yn);
+ break;
+ default:
+ ARM_COMPUTE_ERROR("Interpolation not supported");
+ }
+ }
+ else
+ {
+ *out.ptr() = _constant_border_value;
+ }
+
+ x0 += M00;
+ y0 += M10;
+ z0 += M20;
+ },
+ in, out);
+}
+
+template <InterpolationPolicy interpolation>
+void NEWarpPerspectiveKernel<interpolation>::warp_replicate(const Window &window)
+{
+ // Don't increment in X and Y direction for the input tensor
+ // A pointer to the start of this plane is needed as base for the precomputed offsets
+ Window win_in(window);
+ win_in.set(Window::DimX, Window::Dimension(0, 0, 0));
+ win_in.set(Window::DimY, Window::Dimension(0, 0, 0));
+
+ Iterator in(_input, win_in);
+ Iterator out(_output, window);
+
+ const int min_x = _input->info()->valid_region().anchor[0];
+ const int max_x = min_x + _input->info()->valid_region().shape[0];
+ const int min_y = _input->info()->valid_region().anchor[1];
+ const int max_y = min_y + _input->info()->valid_region().shape[1];
+ const size_t stride = _input->info()->strides_in_bytes()[1];
+
+ // Current row
+ int y_cur = window.y().start();
+
+ // x0 = M00 * x + M01 * y + M02
+ // y0 = M10 * x + M11 * y + M12
+ // z0 = M20 * x + M21 * y + M22
+ // xn = x0 / z0
+ // yn = y0 / z0
+ const float M00 = _matrix[0];
+ const float M10 = _matrix[1];
+ const float M20 = _matrix[2];
+ const float M01 = _matrix[0 + 1 * 3];
+ const float M11 = _matrix[1 + 1 * 3];
+ const float M21 = _matrix[2 + 1 * 3];
+ const float M02 = _matrix[0 + 2 * 3];
+ const float M12 = _matrix[1 + 2 * 3];
+ const float M22 = _matrix[2 + 2 * 3];
+
+ // "M00 * x", "M10 * x" and "M20 * x", when x = window.x.start
+ const float start_x0 = M00 * window.x().start();
+ const float start_y0 = M10 * window.x().start();
+ const float start_z0 = M20 * window.x().start();
+
+ // const_x0, const_y0 and const_z0 are the constant parts of x0, y0 and z0 during the row processing
+ float const_x0 = M01 * y_cur + M02;
+ float const_y0 = M11 * y_cur + M12;
+ float const_z0 = M21 * y_cur + M22;
+
+ // Perspective warp coordinates
+ float x0 = start_x0 + const_x0;
+ float y0 = start_y0 + const_y0;
+ float z0 = start_z0 + const_z0;
+
+ execute_window_loop(window, [&](const Coordinates & id)
+ {
+ // Check if we are processing a new row. If so, update the current row (y_cur), x0, y0 and z0
+ if(y_cur != id.y())
+ {
+ y_cur = id.y();
+
+ const_x0 = M01 * y_cur + M02;
+ const_y0 = M11 * y_cur + M12;
+ const_z0 = M21 * y_cur + M22;
+
+ x0 = start_x0 + const_x0;
+ y0 = start_y0 + const_y0;
+ z0 = start_z0 + const_z0;
+ }
+
+ const float xn = x0 / z0;
+ const float yn = y0 / z0;
+
+ // Only load from (x0, y0) if the point is within the valid region.
+ // Otherwise load from the edge of the valid region.
+ if((min_y <= yn) && (yn < max_y) && (min_x <= xn) && (xn < max_x))
+ {
+ switch(interpolation)
+ {
+ case InterpolationPolicy::NEAREST_NEIGHBOR:
+ *out.ptr() = nearest_interpolation(in.ptr(), xn, yn, stride);
+ break;
+ case InterpolationPolicy::BILINEAR:
+ *out.ptr() = pixel_bilinear_c1u8(in.ptr(), stride, xn, yn);
+ break;
+ default:
+ ARM_COMPUTE_ERROR("Interpolation not supported");
+ }
+ }
+ else
+ {
+ // Clamp coordinates
+ const auto xi = clamp<int>(x0, min_x, max_x - 1);
+ const auto yi = clamp<int>(y0, min_y, max_y - 1);
+
+ *out.ptr() = *(in.ptr() + xi + yi * stride);
+ }
+
+ x0 += M00;
+ y0 += M10;
+ z0 += M20;
+ },
+ in, out);
+}
+
+template class arm_compute::NEWarpAffineKernel<InterpolationPolicy::NEAREST_NEIGHBOR>;
+template class arm_compute::NEWarpAffineKernel<InterpolationPolicy::BILINEAR>;
+template class arm_compute::NEWarpPerspectiveKernel<InterpolationPolicy::NEAREST_NEIGHBOR>;
+template class arm_compute::NEWarpPerspectiveKernel<InterpolationPolicy::BILINEAR>;