From 50ce277853056b68465509f30ca212a4421a9935 Mon Sep 17 00:00:00 2001
From: Dana Zlotnik <dana.zlotnik@arm.com>
Date: Wed, 29 Dec 2021 13:55:56 +0200
Subject: Decouple NEGenerateProposalsLayerKernel

Resolves COMPMID-4621
Change-Id: I3d89fa6d8273cc5f61a5cc0470fd730919bcd432
Signed-off-by: Dana Zlotnik <dana.zlotnik@arm.com>
Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/c/VisualCompute/ComputeLibrary/+/385363
Tested-by: bsgcomp <bsgcomp@arm.com>
Comments-Addressed: bsgcomp <bsgcomp@arm.com>
Reviewed-by: Giorgio Arena <giorgio.arena@arm.com>
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/6867
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
---
 Android.bp                                         |   4 +
 filelist.json                                      |   8 +-
 .../kernels/NEGenerateProposalsLayerKernel.cpp     | 154 +++++++++------------
 .../NEON/kernels/NEGenerateProposalsLayerKernel.h  |   5 +-
 src/cpu/kernels/genproposals/generic/neon/fp16.cpp |  36 +++++
 src/cpu/kernels/genproposals/generic/neon/fp32.cpp |  34 +++++
 src/cpu/kernels/genproposals/generic/neon/impl.cpp | 100 +++++++++++++
 src/cpu/kernels/genproposals/generic/neon/impl.h   |  41 ++++++
 .../kernels/genproposals/generic/neon/qsymm16.cpp  |  34 +++++
 src/cpu/kernels/genproposals/list.h                |  40 ++++++
 10 files changed, 360 insertions(+), 96 deletions(-)
 create mode 100644 src/cpu/kernels/genproposals/generic/neon/fp16.cpp
 create mode 100644 src/cpu/kernels/genproposals/generic/neon/fp32.cpp
 create mode 100644 src/cpu/kernels/genproposals/generic/neon/impl.cpp
 create mode 100644 src/cpu/kernels/genproposals/generic/neon/impl.h
 create mode 100644 src/cpu/kernels/genproposals/generic/neon/qsymm16.cpp
 create mode 100644 src/cpu/kernels/genproposals/list.h

diff --git a/Android.bp b/Android.bp
index f5db63b35c..d472e6250f 100644
--- a/Android.bp
+++ b/Android.bp
@@ -461,6 +461,10 @@ cc_library_static {
         "src/cpu/kernels/elementwise/sve/elementwise_unary.cpp",
         "src/cpu/kernels/floor/neon/fp16.cpp",
         "src/cpu/kernels/floor/neon/fp32.cpp",
+        "src/cpu/kernels/genproposals/generic/neon/fp16.cpp",
+        "src/cpu/kernels/genproposals/generic/neon/fp32.cpp",
+        "src/cpu/kernels/genproposals/generic/neon/impl.cpp",
+        "src/cpu/kernels/genproposals/generic/neon/qsymm16.cpp",
         "src/cpu/kernels/internal/CpuDepthwiseConv2dAssemblyWrapperKernel.cpp",
         "src/cpu/kernels/internal/CpuPool2dAssemblyWrapperKernel.cpp",
         "src/cpu/kernels/maxunpool/generic/neon/fp16.cpp",
diff --git a/filelist.json b/filelist.json
index da05ad9563..359c0046b9 100644
--- a/filelist.json
+++ b/filelist.json
@@ -1508,7 +1508,13 @@
           "common": [
             "src/core/NEON/kernels/NEGenerateProposalsLayerKernel.cpp",
             "src/runtime/NEON/functions/NEGenerateProposalsLayer.cpp"
-          ]
+          ],
+          "neon":{
+            "common":["src/cpu/kernels/genproposals/generic/neon/impl.cpp"],
+            "fp16":["src/cpu/kernels/genproposals/generic/neon/fp16.cpp"],
+            "fp32":["src/cpu/kernels/genproposals/generic/neon/fp32.cpp"],
+            "qsymm16":["src/cpu/kernels/genproposals/generic/neon/qsymm16.cpp"]
+          }
         }
       },
       "InstanceNormalize": {
diff --git a/src/core/NEON/kernels/NEGenerateProposalsLayerKernel.cpp b/src/core/NEON/kernels/NEGenerateProposalsLayerKernel.cpp
index 56aed0ca25..7bba136e84 100644
--- a/src/core/NEON/kernels/NEGenerateProposalsLayerKernel.cpp
+++ b/src/core/NEON/kernels/NEGenerateProposalsLayerKernel.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2021 Arm Limited.
+ * Copyright (c) 2019-2022 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -28,15 +28,72 @@
 #include "arm_compute/core/Utils.h"
 #include "arm_compute/core/Window.h"
 #include "src/core/CPP/Validate.h"
+#include "src/core/common/Registrars.h"
 #include "src/core/helpers/AutoConfiguration.h"
 #include "src/core/helpers/WindowHelpers.h"
-
+#include "src/cpu/kernels/genproposals/list.h"
 #include <arm_neon.h>
 
 namespace arm_compute
 {
 namespace
 {
+struct ComputeAllAnchorsData
+{
+    DataType dt;
+};
+
+using ComputeAllAnchorsSelectorPtr = std::add_pointer<bool(const ComputeAllAnchorsData &data)>::type;
+using ComputeAllAnchorsUKernelPtr  = std::add_pointer<void(const ITensor *anchors, ITensor *all_anchors, ComputeAnchorsInfo anchors_info, const Window &window)>::type;
+
+struct ComputeAllAnchorsKernel
+{
+    const char                        *name;
+    const ComputeAllAnchorsSelectorPtr is_selected;
+    ComputeAllAnchorsUKernelPtr        ukernel;
+};
+
+static const ComputeAllAnchorsKernel available_kernels[] =
+{
+#if defined(ARM_COMPUTE_ENABLE_NEON)
+    {
+        "neon_qu16_computeallanchors",
+        [](const ComputeAllAnchorsData & data) { return data.dt == DataType::QSYMM16; },
+        REGISTER_QSYMM16_NEON(arm_compute::cpu::neon_qu16_computeallanchors)
+    },
+#endif //defined(ARM_COMPUTE_ENABLE_NEON)
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+    {
+        "neon_fp16_computeallanchors",
+        [](const ComputeAllAnchorsData & data) { return data.dt == DataType::F16; },
+        REGISTER_FP16_NEON(arm_compute::cpu::neon_fp16_computeallanchors)
+    },
+#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+    {
+        "neon_fp32_computeallanchors",
+        [](const ComputeAllAnchorsData & data) { return data.dt == DataType::F32; },
+        REGISTER_FP32_NEON(arm_compute::cpu::neon_fp32_computeallanchors)
+    },
+};
+
+/** Micro-kernel selector
+ *
+ * @param[in] data Selection data passed to help pick the appropriate micro-kernel
+ *
+ * @return A matching micro-kernel else nullptr
+ */
+const ComputeAllAnchorsKernel *get_implementation(const ComputeAllAnchorsData &data)
+{
+    for(const auto &uk : available_kernels)
+    {
+        if(uk.is_selected(data))
+        {
+            return &uk;
+        }
+    }
+    return nullptr;
+}
+
 Status validate_arguments(const ITensorInfo *anchors, const ITensorInfo *all_anchors, const ComputeAnchorsInfo &info)
 {
     ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(anchors, all_anchors);
@@ -100,100 +157,15 @@ Status NEComputeAllAnchorsKernel::validate(const ITensorInfo *anchors, const ITe
     return Status{};
 }
 
-template <>
-void NEComputeAllAnchorsKernel::internal_run<int16_t>(const Window &window)
-{
-    Iterator all_anchors_it(_all_anchors, window);
-    Iterator anchors_it(_all_anchors, window);
-
-    const size_t num_anchors = _anchors->info()->dimension(1);
-    const float  stride      = 1.f / _anchors_info.spatial_scale();
-    const size_t feat_width  = _anchors_info.feat_width();
-
-    const UniformQuantizationInfo qinfo = _anchors->info()->quantization_info().uniform();
-
-    execute_window_loop(window, [&](const Coordinates & id)
-    {
-        const size_t anchor_offset = id.y() % num_anchors;
-
-        const auto out_anchor_ptr = reinterpret_cast<int16_t *>(all_anchors_it.ptr());
-        const auto anchor_ptr     = reinterpret_cast<int16_t *>(_anchors->ptr_to_element(Coordinates(0, anchor_offset)));
-
-        const size_t shift_idy = id.y() / num_anchors;
-        const float  shiftx    = (shift_idy % feat_width) * stride;
-        const float  shifty    = (shift_idy / feat_width) * stride;
-
-        const float new_anchor_x1 = dequantize_qsymm16(*anchor_ptr, qinfo.scale) + shiftx;
-        const float new_anchor_y1 = dequantize_qsymm16(*(1 + anchor_ptr), qinfo.scale) + shifty;
-        const float new_anchor_x2 = dequantize_qsymm16(*(2 + anchor_ptr), qinfo.scale) + shiftx;
-        const float new_anchor_y2 = dequantize_qsymm16(*(3 + anchor_ptr), qinfo.scale) + shifty;
-
-        *out_anchor_ptr       = quantize_qsymm16(new_anchor_x1, qinfo.scale);
-        *(out_anchor_ptr + 1) = quantize_qsymm16(new_anchor_y1, qinfo.scale);
-        *(out_anchor_ptr + 2) = quantize_qsymm16(new_anchor_x2, qinfo.scale);
-        *(out_anchor_ptr + 3) = quantize_qsymm16(new_anchor_y2, qinfo.scale);
-    },
-    all_anchors_it);
-}
-
-template <typename T>
-void NEComputeAllAnchorsKernel::internal_run(const Window &window)
-{
-    Iterator all_anchors_it(_all_anchors, window);
-    Iterator anchors_it(_all_anchors, window);
-
-    const size_t num_anchors = _anchors->info()->dimension(1);
-    const T      stride      = 1.f / _anchors_info.spatial_scale();
-    const size_t feat_width  = _anchors_info.feat_width();
-
-    execute_window_loop(window, [&](const Coordinates & id)
-    {
-        const size_t anchor_offset = id.y() % num_anchors;
-
-        const auto out_anchor_ptr = reinterpret_cast<T *>(all_anchors_it.ptr());
-        const auto anchor_ptr     = reinterpret_cast<T *>(_anchors->ptr_to_element(Coordinates(0, anchor_offset)));
-
-        const size_t shift_idy = id.y() / num_anchors;
-        const T      shiftx    = (shift_idy % feat_width) * stride;
-        const T      shifty    = (shift_idy / feat_width) * stride;
-
-        *out_anchor_ptr       = *anchor_ptr + shiftx;
-        *(out_anchor_ptr + 1) = *(1 + anchor_ptr) + shifty;
-        *(out_anchor_ptr + 2) = *(2 + anchor_ptr) + shiftx;
-        *(out_anchor_ptr + 3) = *(3 + anchor_ptr) + shifty;
-    },
-    all_anchors_it);
-}
-
 void NEComputeAllAnchorsKernel::run(const Window &window, const ThreadInfo &info)
 {
     ARM_COMPUTE_UNUSED(info);
     ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
     ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window);
 
-    switch(_anchors->info()->data_type())
-    {
-        case DataType::QSYMM16:
-        {
-            internal_run<int16_t>(window);
-            break;
-        }
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-        case DataType::F16:
-        {
-            internal_run<float16_t>(window);
-            break;
-        }
-#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-        case DataType::F32:
-        {
-            internal_run<float>(window);
-            break;
-        }
-        default:
-        {
-            ARM_COMPUTE_ERROR("Data type not supported");
-        }
-    }
+    const auto *uk = get_implementation(ComputeAllAnchorsData{ _anchors->info()->data_type() });
+    ARM_COMPUTE_ERROR_ON(uk == nullptr || uk->ukernel == nullptr);
+
+    uk->ukernel(_anchors, _all_anchors, _anchors_info, window);
 }
 } // namespace arm_compute
diff --git a/src/core/NEON/kernels/NEGenerateProposalsLayerKernel.h b/src/core/NEON/kernels/NEGenerateProposalsLayerKernel.h
index f6d39e50a7..297d6d4abe 100644
--- a/src/core/NEON/kernels/NEGenerateProposalsLayerKernel.h
+++ b/src/core/NEON/kernels/NEGenerateProposalsLayerKernel.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2020 Arm Limited.
+ * Copyright (c) 2019-2022 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -74,9 +74,6 @@ public:
     void run(const Window &window, const ThreadInfo &info) override;
 
 private:
-    template <typename T>
-    void internal_run(const Window &window);
-
     const ITensor     *_anchors;
     ITensor           *_all_anchors;
     ComputeAnchorsInfo _anchors_info;
diff --git a/src/cpu/kernels/genproposals/generic/neon/fp16.cpp b/src/cpu/kernels/genproposals/generic/neon/fp16.cpp
new file mode 100644
index 0000000000..d4e469b691
--- /dev/null
+++ b/src/cpu/kernels/genproposals/generic/neon/fp16.cpp
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2022 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) && defined(ENABLE_FP16_KERNELS)
+#include "src/cpu/kernels/genproposals/generic/neon/impl.h"
+namespace arm_compute
+{
+namespace cpu
+{
+void neon_fp16_computeallanchors(const ITensor *anchors, ITensor *all_anchors, ComputeAnchorsInfo anchors_info, const Window &window)
+{
+    return compute_all_anchors<float16_t>(anchors, all_anchors, anchors_info, window);
+}
+}
+} // namespace arm_compute
+#endif //defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) && defined(ENABLE_FP16_KERNELS)
diff --git a/src/cpu/kernels/genproposals/generic/neon/fp32.cpp b/src/cpu/kernels/genproposals/generic/neon/fp32.cpp
new file mode 100644
index 0000000000..09aa6ecec4
--- /dev/null
+++ b/src/cpu/kernels/genproposals/generic/neon/fp32.cpp
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2022 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "src/cpu/kernels/genproposals/generic/neon/impl.h"
+namespace arm_compute
+{
+namespace cpu
+{
+void neon_fp32_computeallanchors(const ITensor *anchors, ITensor *all_anchors, ComputeAnchorsInfo anchors_info, const Window &window)
+{
+    return compute_all_anchors<float>(anchors, all_anchors, anchors_info, window);
+}
+}
+} // namespace arm_compute
diff --git a/src/cpu/kernels/genproposals/generic/neon/impl.cpp b/src/cpu/kernels/genproposals/generic/neon/impl.cpp
new file mode 100644
index 0000000000..824e85adac
--- /dev/null
+++ b/src/cpu/kernels/genproposals/generic/neon/impl.cpp
@@ -0,0 +1,100 @@
+/*
+ * Copyright (c) 2019-2022 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "src/cpu/kernels/genproposals/generic/neon/impl.h"
+namespace arm_compute
+{
+class ITensor;
+class Window;
+namespace cpu
+{
+template <typename T>
+void compute_all_anchors(const ITensor *anchors, ITensor *all_anchors, ComputeAnchorsInfo anchors_info, const Window &window)
+{
+    Iterator all_anchors_it(all_anchors, window);
+    Iterator anchors_it(all_anchors, window);
+
+    const size_t num_anchors = anchors->info()->dimension(1);
+    const T      stride      = 1.f / anchors_info.spatial_scale();
+    const size_t feat_width  = anchors_info.feat_width();
+
+    execute_window_loop(window, [&](const Coordinates & id)
+    {
+        const size_t anchor_offset = id.y() % num_anchors;
+
+        const auto out_anchor_ptr = reinterpret_cast<T *>(all_anchors_it.ptr());
+        const auto anchor_ptr     = reinterpret_cast<T *>(anchors->ptr_to_element(Coordinates(0, anchor_offset)));
+
+        const size_t shift_idy = id.y() / num_anchors;
+        const T      shiftx    = (shift_idy % feat_width) * stride;
+        const T      shifty    = (shift_idy / feat_width) * stride;
+
+        *out_anchor_ptr       = *anchor_ptr + shiftx;
+        *(out_anchor_ptr + 1) = *(1 + anchor_ptr) + shifty;
+        *(out_anchor_ptr + 2) = *(2 + anchor_ptr) + shiftx;
+        *(out_anchor_ptr + 3) = *(3 + anchor_ptr) + shifty;
+    },
+    all_anchors_it);
+}
+
+template void compute_all_anchors<float>(const ITensor *anchors, ITensor *all_anchors, ComputeAnchorsInfo anchors_info, const Window &window);
+#if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) && defined(ENABLE_FP16_KERNELS)
+template void compute_all_anchors<float16_t>(const ITensor *anchors, ITensor *all_anchors, ComputeAnchorsInfo anchors_info, const Window &window);
+#endif //defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) && defined(ENABLE_FP16_KERNELS)
+
+void compute_all_anchors_qasymm16(const ITensor *anchors, ITensor *all_anchors, ComputeAnchorsInfo anchors_info, const Window &window)
+{
+    Iterator all_anchors_it(all_anchors, window);
+    Iterator anchors_it(all_anchors, window);
+
+    const size_t num_anchors = anchors->info()->dimension(1);
+    const float  stride      = 1.f / anchors_info.spatial_scale();
+    const size_t feat_width  = anchors_info.feat_width();
+
+    const UniformQuantizationInfo qinfo = anchors->info()->quantization_info().uniform();
+
+    execute_window_loop(window, [&](const Coordinates & id)
+    {
+        const size_t anchor_offset = id.y() % num_anchors;
+
+        const auto out_anchor_ptr = reinterpret_cast<int16_t *>(all_anchors_it.ptr());
+        const auto anchor_ptr     = reinterpret_cast<int16_t *>(anchors->ptr_to_element(Coordinates(0, anchor_offset)));
+
+        const size_t shift_idy = id.y() / num_anchors;
+        const float  shiftx    = (shift_idy % feat_width) * stride;
+        const float  shifty    = (shift_idy / feat_width) * stride;
+
+        const float new_anchor_x1 = dequantize_qsymm16(*anchor_ptr, qinfo.scale) + shiftx;
+        const float new_anchor_y1 = dequantize_qsymm16(*(1 + anchor_ptr), qinfo.scale) + shifty;
+        const float new_anchor_x2 = dequantize_qsymm16(*(2 + anchor_ptr), qinfo.scale) + shiftx;
+        const float new_anchor_y2 = dequantize_qsymm16(*(3 + anchor_ptr), qinfo.scale) + shifty;
+
+        *out_anchor_ptr       = quantize_qsymm16(new_anchor_x1, qinfo.scale);
+        *(out_anchor_ptr + 1) = quantize_qsymm16(new_anchor_y1, qinfo.scale);
+        *(out_anchor_ptr + 2) = quantize_qsymm16(new_anchor_x2, qinfo.scale);
+        *(out_anchor_ptr + 3) = quantize_qsymm16(new_anchor_y2, qinfo.scale);
+    },
+    all_anchors_it);
+}
+} // namespace cpu
+} // namespace arm_compute
diff --git a/src/cpu/kernels/genproposals/generic/neon/impl.h b/src/cpu/kernels/genproposals/generic/neon/impl.h
new file mode 100644
index 0000000000..88f5e52020
--- /dev/null
+++ b/src/cpu/kernels/genproposals/generic/neon/impl.h
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2022 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef SRC_CORE_SVE_KERNELS_NEGENERATEPROPOSALSLAYERKERNEL_IMPL_H
+#define SRC_CORE_SVE_KERNELS_NEGENERATEPROPOSALSLAYERKERNEL_IMPL_H
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/Window.h"
+#include "src/core/NEON/wrapper/wrapper.h"
+namespace arm_compute
+{
+class ITensor;
+class Window;
+namespace cpu
+{
+template <typename T>
+void compute_all_anchors(const ITensor *anchors, ITensor *all_anchors, ComputeAnchorsInfo anchors_info, const Window &window);
+
+void compute_all_anchors_qasymm16(const ITensor *anchors, ITensor *all_anchors, ComputeAnchorsInfo anchors_info, const Window &window);
+} // namespace cpu
+} // namespace arm_compute
+#endif //define SRC_CORE_SVE_KERNELS_NEGENERATEPROPOSALSLAYERKERNEL_IMPL_H
diff --git a/src/cpu/kernels/genproposals/generic/neon/qsymm16.cpp b/src/cpu/kernels/genproposals/generic/neon/qsymm16.cpp
new file mode 100644
index 0000000000..cfb5a41d6e
--- /dev/null
+++ b/src/cpu/kernels/genproposals/generic/neon/qsymm16.cpp
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2022 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "src/cpu/kernels/genproposals/generic/neon/impl.h"
+namespace arm_compute
+{
+namespace cpu
+{
+void neon_qu16_computeallanchors(const ITensor *anchors, ITensor *all_anchors, ComputeAnchorsInfo anchors_info, const Window &window)
+{
+    return compute_all_anchors_qasymm16(anchors, all_anchors, anchors_info, window);
+}
+}
+} // namespace arm_compute
diff --git a/src/cpu/kernels/genproposals/list.h b/src/cpu/kernels/genproposals/list.h
new file mode 100644
index 0000000000..570c686e89
--- /dev/null
+++ b/src/cpu/kernels/genproposals/list.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2022 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef SRC_CORE_NEON_KERNELS_NEGENERATEPROPOSALSLAYERKERNEL_LIST_H
+#define SRC_CORE_NEON_KERNELS_NEGENERATEPROPOSALSLAYERKERNEL_LIST_H
+namespace arm_compute
+{
+namespace cpu
+{
+#define DECLARE_NEGENERATEPROPOSALSLAYERKERNEL_KERNEL(func_name) \
+    void func_name(const ITensor *anchors, ITensor *all_anchors, ComputeAnchorsInfo anchors_info, const Window &window)
+
+DECLARE_NEGENERATEPROPOSALSLAYERKERNEL_KERNEL(neon_qu16_computeallanchors);
+DECLARE_NEGENERATEPROPOSALSLAYERKERNEL_KERNEL(neon_fp16_computeallanchors);
+DECLARE_NEGENERATEPROPOSALSLAYERKERNEL_KERNEL(neon_fp32_computeallanchors);
+
+#undef DECLARE_NEGENERATEPROPOSALSLAYERKERNEL_KERNEL
+} // namespace cpu
+} // namespace arm_compute
+#endif /* SRC_CORE_NEON_KERNELS_NEGENERATEPROPOSALSLAYERKERNEL_LIST_H */
-- 
cgit v1.2.1