From 5209be567a0a7df4d205d3dc2b971b8f03964593 Mon Sep 17 00:00:00 2001
From: Manuel Bottini <manuel.bottini@arm.com>
Date: Wed, 13 Feb 2019 16:34:56 +0000
Subject: COMPMID-1999: Add support for GenerateProposals operator in CL

Change-Id: Ie08a6874347085f96b00f25bdb605eee7d683c25
Signed-off-by: giuros01 <giuseppe.rossini@arm.com>
Reviewed-on: https://review.mlplatform.org/719
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com>
Reviewed-by: Michalis Spyrou <michalis.spyrou@arm.com>
---
 arm_compute/core/CL/CLKernels.h                    |   1 +
 .../CL/kernels/CLGenerateProposalsLayerKernel.h    |  76 ++++++++++
 arm_compute/core/Types.h                           | 163 ++++++++++++++++++++-
 arm_compute/graph/GraphBuilder.h                   |  13 ++
 arm_compute/graph/TypePrinter.h                    |   3 +
 arm_compute/graph/Types.h                          |   1 +
 arm_compute/graph/backends/FunctionHelpers.h       |  49 +++++++
 arm_compute/graph/backends/ValidateHelpers.h       |  27 ++++
 arm_compute/graph/frontend/Layers.h                |  38 +++++
 .../graph/nodes/GenerateProposalsLayerNode.h       |  60 ++++++++
 arm_compute/graph/nodes/Nodes.h                    |   1 +
 arm_compute/graph/nodes/NodesFwd.h                 |   1 +
 arm_compute/runtime/CL/CLFunctions.h               |   2 +
 .../runtime/CL/functions/CLComputeAllAnchors.h     |  62 ++++++++
 .../CL/functions/CLGenerateProposalsLayer.h        | 148 +++++++++++++++++++
 15 files changed, 643 insertions(+), 2 deletions(-)
 create mode 100644 arm_compute/core/CL/kernels/CLGenerateProposalsLayerKernel.h
 create mode 100644 arm_compute/graph/nodes/GenerateProposalsLayerNode.h
 create mode 100644 arm_compute/runtime/CL/functions/CLComputeAllAnchors.h
 create mode 100644 arm_compute/runtime/CL/functions/CLGenerateProposalsLayer.h

(limited to 'arm_compute')

diff --git a/arm_compute/core/CL/CLKernels.h b/arm_compute/core/CL/CLKernels.h
index d8b9934313..cc4888c663 100644
--- a/arm_compute/core/CL/CLKernels.h
+++ b/arm_compute/core/CL/CLKernels.h
@@ -89,6 +89,7 @@
 #include "arm_compute/core/CL/kernels/CLGaussian3x3Kernel.h"
 #include "arm_compute/core/CL/kernels/CLGaussian5x5Kernel.h"
 #include "arm_compute/core/CL/kernels/CLGaussianPyramidKernel.h"
+#include "arm_compute/core/CL/kernels/CLGenerateProposalsLayerKernel.h"
 #include "arm_compute/core/CL/kernels/CLHOGDescriptorKernel.h"
 #include "arm_compute/core/CL/kernels/CLHOGDetectorKernel.h"
 #include "arm_compute/core/CL/kernels/CLHarrisCornersKernel.h"
diff --git a/arm_compute/core/CL/kernels/CLGenerateProposalsLayerKernel.h b/arm_compute/core/CL/kernels/CLGenerateProposalsLayerKernel.h
new file mode 100644
index 0000000000..5900d79821
--- /dev/null
+++ b/arm_compute/core/CL/kernels/CLGenerateProposalsLayerKernel.h
@@ -0,0 +1,76 @@
+/*
+ * Copyright (c) 2019 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLGENERATEPROPOSALSLAYERKERNEL_H__
+#define __ARM_COMPUTE_CLGENERATEPROPOSALSLAYERKERNEL_H__
+
+#include "arm_compute/core/CL/ICLKernel.h"
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Interface for Compute All Anchors kernel */
+class CLComputeAllAnchorsKernel : public ICLKernel
+{
+public:
+    /** Default constructor */
+    CLComputeAllAnchorsKernel();
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    CLComputeAllAnchorsKernel(const CLComputeAllAnchorsKernel &) = delete;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    CLComputeAllAnchorsKernel &operator=(const CLComputeAllAnchorsKernel &) = delete;
+    /** Allow instances of this class to be moved */
+    CLComputeAllAnchorsKernel(CLComputeAllAnchorsKernel &&) = default;
+    /** Allow instances of this class to be moved */
+    CLComputeAllAnchorsKernel &operator=(CLComputeAllAnchorsKernel &&) = default;
+    /** Default destructor */
+    ~CLComputeAllAnchorsKernel() = default;
+
+    /** Set the input and output tensors.
+     *
+     * @param[in]  anchors     Source tensor. Original set of anchors of size (4, A), where A is the number of anchors. Data types supported: F16/F32
+     * @param[out] all_anchors Destination tensor. Destination anchors of size (4, H*W*A) where H and W are the height and width of the feature map and A is the number of anchors. Data types supported: Same as @p input
+     * @param[in]  info        Contains Compute Anchors operation information described in @ref ComputeAnchorsInfo
+     *
+     */
+    void configure(const ICLTensor *anchors, ICLTensor *all_anchors, const ComputeAnchorsInfo &info);
+
+    /** Static function to check if given info will lead to a valid configuration of @ref CLComputeAllAnchorsKernel
+     *
+     * @param[in] anchors     Source tensor info. Original set of anchors of size (4, A), where A is the number of anchors. Data types supported: F16/F32
+     * @param[in] all_anchors Destination tensor info. Destination anchors of size (4, H*W*A) where H and W are the height and width of the feature map and A is the number of anchors. Data types supported: Same as @p input
+     * @param[in] info        Contains Compute Anchors operation information described in @ref ComputeAnchorsInfo
+     *
+     * @return a Status
+     */
+    static Status validate(const ITensorInfo *anchors, const ITensorInfo *all_anchors, const ComputeAnchorsInfo &info);
+
+    // Inherited methods overridden:
+    void run(const Window &window, cl::CommandQueue &queue) override;
+
+private:
+    const ICLTensor *_anchors;
+    ICLTensor       *_all_anchors;
+};
+} // arm_compute
+#endif // __ARM_COMPUTE_CLGENERATEPROSPOSALSLAYERKERNEL_H__
diff --git a/arm_compute/core/Types.h b/arm_compute/core/Types.h
index b0f792e92b..1ce44ee2e8 100644
--- a/arm_compute/core/Types.h
+++ b/arm_compute/core/Types.h
@@ -634,13 +634,17 @@ public:
      * @param[in] soft_nms_method          (Optional) Soft NMS method
      * @param[in] soft_nms_sigma           (Optional) Soft NMS sigma value
      * @param[in] soft_nms_min_score_thres (Optional) Soft NMS minimum score threshold
+     * @param[in] suppress_size            (Optional) Filter out boxes based on their size. Defaults to false
+     * @param[in] min_size                 (Optional) Smaller boxes than min_size will be filtered out. Defaults to 1
+     * @param[in] im_width                 (Optional) Boxes whose centers (on the x axis) is beyond im_width will be filtered. Defaults to 1
+     * @param[in] im_height                (Optional) Boxes whose centers (on the y axis) is beyond im_height will be filtered. Defaults to 1
      */
     BoxNMSLimitInfo(float score_thresh = 0.05f, float nms = 0.3f,
                     int detections = 100, bool soft_nms_enabled = false,
                     NMSType soft_nms_method = NMSType::LINEAR,
-                    float soft_nms_sigma = 0.5f, float soft_nms_min_score_thres = 0.001f)
+                    float soft_nms_sigma = 0.5f, float soft_nms_min_score_thres = 0.001f, bool suppress_size = false, float min_size = 1.0f, float im_width = 1.0f, float im_height = 1.0f)
         : _score_thresh(score_thresh), _nms(nms), _detections_per_im(detections), _soft_nms_enabled(soft_nms_enabled), _soft_nms_method(soft_nms_method), _soft_nms_sigma(soft_nms_sigma),
-          _soft_nms_min_score_thres(soft_nms_min_score_thres)
+          _soft_nms_min_score_thres(soft_nms_min_score_thres), _suppress_size(suppress_size), _min_size(min_size), _im_width(im_width), _im_height(im_height)
     {
     }
     /** Get the score threshold */
@@ -678,6 +682,26 @@ public:
     {
         return _soft_nms_min_score_thres;
     }
+    /** Get if NMS will suppress boxes based on their size/position */
+    bool suppress_size() const
+    {
+        return _suppress_size;
+    }
+    /** Get size suppression threshold */
+    float min_size() const
+    {
+        return _min_size;
+    }
+    /** Get image width (NMS may suppress boxes whose center sits beyond the image width) */
+    float im_width() const
+    {
+        return _im_width;
+    }
+    /** Get image height (NMS may suppress boxes whose center sits beyond the image height) */
+    float im_height() const
+    {
+        return _im_height;
+    }
 
 private:
     float   _score_thresh;
@@ -687,6 +711,10 @@ private:
     NMSType _soft_nms_method;
     float   _soft_nms_sigma;
     float   _soft_nms_min_score_thres;
+    bool    _suppress_size;
+    float   _min_size;
+    float   _im_width;
+    float   _im_height;
 };
 
 /** Padding and stride information class */
@@ -1217,6 +1245,137 @@ private:
     unsigned int _sampling_ratio;
 };
 
+/** Generate Proposals Information class */
+class GenerateProposalsInfo
+{
+public:
+    /** Constructor
+     *
+     * @param[in] im_width       Width of the original image
+     * @param[in] im_height      Height of the original image
+     * @param[in] im_scale       Scale applied to the original image
+     * @param[in] spatial_scale  (Optional)Scale applied to the feature map. Defaults to 1.0
+     * @param[in] pre_nms_topN   (Optional)Number of the best scores to be selected from the transformations. Defaults to 6000.
+     * @param[in] post_nms_topN  (Optional)Number of the best scores to be selected from the NMS operation. Defaults to 300.
+     * @param[in] nms_thres      (Optional)NMS overlap threshold. Defaults to 0.7.
+     * @param[in] min_size       (Optional)Size used to validate the anchors produced. Defaults to 16.
+     * @param[in] values_per_roi (Optional)Values used to represent a ROI(Region of interest). Defaults to 4.
+     */
+    GenerateProposalsInfo(float im_width, float im_height, float im_scale, float spatial_scale = 1.0, int pre_nms_topN = 6000, int post_nms_topN = 300, float nms_thres = 0.7, float min_size = 16.0,
+                          size_t values_per_roi = 4)
+        : _im_height(im_height), _im_width(im_width), _im_scale(im_scale), _spatial_scale(spatial_scale), _pre_nms_topN(pre_nms_topN), _post_nms_topN(post_nms_topN), _nms_thres(nms_thres),
+          _min_size(min_size), _values_per_roi(values_per_roi)
+    {
+    }
+
+    /* Get the original height */
+    float im_height() const
+    {
+        return _im_height;
+    }
+    /* Get the original width */
+    float im_width() const
+    {
+        return _im_width;
+    }
+    /* Get the image scale */
+    float im_scale() const
+    {
+        return _im_scale;
+    }
+    /* Get the value of how many best scores to select (before NMS) */
+    int pre_nms_topN() const
+    {
+        return _pre_nms_topN;
+    }
+    /* Get the value of how many best scores to select (after NMS) */
+    int post_nms_topN() const
+    {
+        return _post_nms_topN;
+    }
+    /* Get the NMS overlap threshold */
+    float nms_thres() const
+    {
+        return _nms_thres;
+    }
+    /* Get the minimal size */
+    float min_size() const
+    {
+        return _min_size;
+    }
+    /* Get the spatial scale to be applied to the feature maps */
+    float spatial_scale() const
+    {
+        return _spatial_scale;
+    }
+    /* Get the values used to represent a ROI(Region of interest)*/
+    size_t values_per_roi() const
+    {
+        return _values_per_roi;
+    }
+
+private:
+    float  _im_height;
+    float  _im_width;
+    float  _im_scale;
+    float  _spatial_scale;
+    int    _pre_nms_topN;
+    int    _post_nms_topN;
+    float  _nms_thres;
+    float  _min_size;
+    size_t _values_per_roi;
+};
+
+/** ComputeAnchors information class */
+class ComputeAnchorsInfo
+{
+public:
+    /** Constructor
+     *
+     * @param[in] feat_width     Feature map width
+     * @param[in] feat_height    Feature map height
+     * @param[in] spatial_scale  Feature map scale
+     * @param[in] values_per_roi (Optional)Values used to represent a ROI(Region Of Interest). Defaults to 4
+     */
+    ComputeAnchorsInfo(float feat_width, float feat_height, float spatial_scale, size_t values_per_roi = 4)
+        : _feat_height(feat_height),
+          _feat_width(feat_width),
+          _spatial_scale(spatial_scale),
+          _values_per_roi(values_per_roi)
+    {
+    }
+
+    /* Get the height of the feature map */
+    float feat_height() const
+    {
+        return _feat_height;
+    }
+
+    /* Get the width of the feature map */
+    float feat_width() const
+    {
+        return _feat_width;
+    }
+
+    /* Get the scale of the feature map */
+    float spatial_scale() const
+    {
+        return _spatial_scale;
+    }
+
+    /* Get the values used to represent a ROI(Region Of Interest)*/
+    size_t values_per_roi() const
+    {
+        return _values_per_roi;
+    }
+
+private:
+    float  _feat_height;
+    float  _feat_width;
+    float  _spatial_scale;
+    size_t _values_per_roi;
+};
+
 /** Bounding Box Transform information class */
 class BoundingBoxTransformInfo final
 {
diff --git a/arm_compute/graph/GraphBuilder.h b/arm_compute/graph/GraphBuilder.h
index cf213e4e51..1296f56482 100644
--- a/arm_compute/graph/GraphBuilder.h
+++ b/arm_compute/graph/GraphBuilder.h
@@ -253,6 +253,19 @@ public:
                                             const FullyConnectedLayerInfo fc_info            = FullyConnectedLayerInfo(),
                                             const QuantizationInfo        weights_quant_info = QuantizationInfo(),
                                             const QuantizationInfo        out_quant_info     = QuantizationInfo());
+    /** Adds a generate proposals layer node to the graph
+     *
+     * @param[in] g       Graph to add the layer to
+     * @param[in] params  Common node parameters
+     * @param[in] scores  Input scores to the generate proposals layer node as a NodeID-Index pair
+     * @param[in] deltas  Input deltas to the generate proposals layer node as a NodeID-Index pair
+     * @param[in] anchors Input anchors to the generate proposals layer node as a NodeID-Index pair
+     * @param[in] info    Generate proposals operation information
+     *
+     * @return Node ID of the created node, EmptyNodeID in case of error
+     */
+    static NodeID add_generate_proposals_node(Graph &g, NodeParams params, NodeIdxPair scores, NodeIdxPair deltas,
+                                              NodeIdxPair anchors, GenerateProposalsInfo info);
     /** Adds a normalization layer node to the graph
      *
      * @param[in] g         Graph to add the node to
diff --git a/arm_compute/graph/TypePrinter.h b/arm_compute/graph/TypePrinter.h
index faa7f31911..ca62d4ec17 100644
--- a/arm_compute/graph/TypePrinter.h
+++ b/arm_compute/graph/TypePrinter.h
@@ -98,6 +98,9 @@ inline ::std::ostream &operator<<(::std::ostream &os, const NodeType &node_type)
         case NodeType::FullyConnectedLayer:
             os << "FullyConnectedLayer";
             break;
+        case NodeType::GenerateProposalsLayer:
+            os << "GenerateProposalsLayer";
+            break;
         case NodeType::NormalizationLayer:
             os << "NormalizationLayer";
             break;
diff --git a/arm_compute/graph/Types.h b/arm_compute/graph/Types.h
index ee136e2a1e..8377253338 100644
--- a/arm_compute/graph/Types.h
+++ b/arm_compute/graph/Types.h
@@ -138,6 +138,7 @@ enum class NodeType
     EltwiseLayer,
     FlattenLayer,
     FullyConnectedLayer,
+    GenerateProposalsLayer,
     NormalizationLayer,
     NormalizePlanarYUVLayer,
     PadLayer,
diff --git a/arm_compute/graph/backends/FunctionHelpers.h b/arm_compute/graph/backends/FunctionHelpers.h
index 548afd27c5..7242bc6ede 100644
--- a/arm_compute/graph/backends/FunctionHelpers.h
+++ b/arm_compute/graph/backends/FunctionHelpers.h
@@ -688,6 +688,55 @@ std::unique_ptr<IFunction> create_fully_connected_layer(FullyConnectedLayerNode
     return std::move(func);
 }
 
+/** Create a backend generate proposals layer function
+ *
+ * @tparam GenerateProposalsLayerFunction Backend generate proposals function
+ * @tparam TargetInfo                     Target-specific information
+ *
+ * @param[in] node Node to create the backend function for
+ * @param[in] ctx  Graph context
+ *
+ * @return Backend generate proposals layer function
+ */
+template <typename GenerateProposalsLayerFunction, typename TargetInfo>
+std::unique_ptr<IFunction> create_generate_proposals_layer(GenerateProposalsLayerNode &node, GraphContext &ctx)
+{
+    validate_node<TargetInfo>(node, 3 /* expected inputs */, 3 /* expected outputs */);
+
+    // Extract IO and info
+    typename TargetInfo::TensorType *scores              = get_backing_tensor<TargetInfo>(node.input(0));
+    typename TargetInfo::TensorType *deltas              = get_backing_tensor<TargetInfo>(node.input(1));
+    typename TargetInfo::TensorType *anchors             = get_backing_tensor<TargetInfo>(node.input(2));
+    typename TargetInfo::TensorType *proposals           = get_backing_tensor<TargetInfo>(node.output(0));
+    typename TargetInfo::TensorType *scores_out          = get_backing_tensor<TargetInfo>(node.output(1));
+    typename TargetInfo::TensorType *num_valid_proposals = get_backing_tensor<TargetInfo>(node.output(2));
+    const GenerateProposalsInfo      info                = node.info();
+
+    ARM_COMPUTE_ERROR_ON(scores == nullptr);
+    ARM_COMPUTE_ERROR_ON(deltas == nullptr);
+    ARM_COMPUTE_ERROR_ON(anchors == nullptr);
+    ARM_COMPUTE_ERROR_ON(proposals == nullptr);
+    ARM_COMPUTE_ERROR_ON(scores_out == nullptr);
+
+    // Create and configure function
+    auto func = support::cpp14::make_unique<GenerateProposalsLayerFunction>(get_memory_manager(ctx, TargetInfo::TargetType));
+    func->configure(scores, deltas, anchors, proposals, scores_out, num_valid_proposals, info);
+
+    // Log info
+    ARM_COMPUTE_LOG_GRAPH_INFO("Instantiated " << node.type()
+                               << " Target " << TargetInfo::TargetType
+                               << " Data Type: " << scores->info()->data_type()
+                               << " Scores shape: " << scores->info()->tensor_shape()
+                               << " Deltas shape: " << deltas->info()->tensor_shape()
+                               << " Anchors shape: " << anchors->info()->tensor_shape()
+                               << " Proposals shape: " << proposals->info()->tensor_shape()
+                               << " Num valid proposals shape: " << num_valid_proposals->info()->tensor_shape()
+                               << " Scores Out shape: " << scores_out->info()->tensor_shape()
+                               << std::endl);
+
+    return std::move(func);
+}
+
 /** Create a backend normalization layer function
  *
  * @tparam NormalizationLayerFunction Backend normalization function
diff --git a/arm_compute/graph/backends/ValidateHelpers.h b/arm_compute/graph/backends/ValidateHelpers.h
index 1b06f31bed..8942be2da8 100644
--- a/arm_compute/graph/backends/ValidateHelpers.h
+++ b/arm_compute/graph/backends/ValidateHelpers.h
@@ -228,6 +228,33 @@ Status validate_detection_output_layer(DetectionOutputLayerNode &node)
     return DetectionOutputLayer::validate(input0, input1, input2, output, detect_info);
 }
 
+/** Validates a Generate Proposals layer node
+ *
+ * @tparam GenerateProposalsLayer Generate Proposals layer type
+ *
+ * @param[in] node Node to validate
+ *
+ * @return Status
+ */
+template <typename GenerateProposalsLayer>
+Status validate_generate_proposals_layer(GenerateProposalsLayerNode &node)
+{
+    ARM_COMPUTE_LOG_GRAPH_VERBOSE("Validating GenerateProposalsLayer node with ID : " << node.id() << " and Name: " << node.name() << std::endl);
+    ARM_COMPUTE_RETURN_ERROR_ON(node.num_inputs() != 3);
+    ARM_COMPUTE_RETURN_ERROR_ON(node.num_outputs() != 3);
+
+    // Extract IO and info
+    arm_compute::ITensorInfo   *scores              = detail::get_backing_tensor_info(node.input(0));
+    arm_compute::ITensorInfo   *deltas              = detail::get_backing_tensor_info(node.input(1));
+    arm_compute::ITensorInfo   *anchors             = detail::get_backing_tensor_info(node.input(2));
+    arm_compute::ITensorInfo   *proposals           = get_backing_tensor_info(node.output(0));
+    arm_compute::ITensorInfo   *scores_out          = get_backing_tensor_info(node.output(1));
+    arm_compute::ITensorInfo   *num_valid_proposals = get_backing_tensor_info(node.output(2));
+    const GenerateProposalsInfo info                = node.info();
+
+    return GenerateProposalsLayer::validate(scores, deltas, anchors, proposals, scores_out, num_valid_proposals, info);
+}
+
 /** Validates a NormalizePlanarYUV layer node
  *
  * @tparam NormalizePlanarYUVLayer layer type
diff --git a/arm_compute/graph/frontend/Layers.h b/arm_compute/graph/frontend/Layers.h
index d10fa7f27a..1a71c89e54 100644
--- a/arm_compute/graph/frontend/Layers.h
+++ b/arm_compute/graph/frontend/Layers.h
@@ -608,6 +608,44 @@ private:
     const QuantizationInfo        _out_quant_info;
 };
 
+/** Generate Proposals Layer */
+class GenerateProposalsLayer final : public ILayer
+{
+public:
+    /** Construct a generate proposals layer.
+     *
+     * @param[in] ss_scores  Graph sub-stream for the scores.
+     * @param[in] ss_deltas  Graph sub-stream for the deltas.
+     * @param[in] ss_anchors Graph sub-stream for the anchors.
+     * @param[in] info       Generate Proposals operation information.
+     */
+    GenerateProposalsLayer(SubStream &&ss_scores, SubStream &&ss_deltas, SubStream &&ss_anchors, GenerateProposalsInfo info)
+        : _ss_scores(std::move(ss_scores)), _ss_deltas(std::move(ss_deltas)), _ss_anchors(std::move(ss_anchors)), _info(info)
+    {
+    }
+
+    /** Create layer and add to the given stream.
+     *
+     * @param[in] s Stream to add layer to.
+     *
+     * @return ID of the created node.
+     */
+    NodeID create_layer(IStream &s) override
+    {
+        NodeParams  common_params = { name(), s.hints().target_hint };
+        NodeIdxPair scores        = { _ss_scores.tail_node(), 0 };
+        NodeIdxPair deltas        = { _ss_deltas.tail_node(), 0 };
+        NodeIdxPair anchors       = { _ss_anchors.tail_node(), 0 };
+        return GraphBuilder::add_generate_proposals_node(s.graph(), common_params, scores, deltas, anchors, _info);
+    }
+
+private:
+    SubStream             _ss_scores;
+    SubStream             _ss_deltas;
+    SubStream             _ss_anchors;
+    GenerateProposalsInfo _info;
+};
+
 /** Normalization Layer */
 class NormalizationLayer final : public ILayer
 {
diff --git a/arm_compute/graph/nodes/GenerateProposalsLayerNode.h b/arm_compute/graph/nodes/GenerateProposalsLayerNode.h
new file mode 100644
index 0000000000..d8c0c78f22
--- /dev/null
+++ b/arm_compute/graph/nodes/GenerateProposalsLayerNode.h
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2019 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __ARM_COMPUTE_GENERATE_PROPOSALS_NODE_H__
+#define __ARM_COMPUTE_GENERATE_PROPOSALS_NODE_H__
+
+#include "arm_compute/graph/INode.h"
+
+namespace arm_compute
+{
+namespace graph
+{
+/** Generate Proposals Layer node */
+class GenerateProposalsLayerNode final : public INode
+{
+public:
+    /** Constructor
+     *
+     * @param[in] info Generate proposals operation information.
+     */
+    GenerateProposalsLayerNode(GenerateProposalsInfo &info);
+    /** GenerateProposalsInfo accessor
+     *
+     * @return GenerateProposalsInfo
+     */
+    const GenerateProposalsInfo &info() const;
+
+    // Inherited overridden methods:
+    NodeType         type() const override;
+    bool             forward_descriptors() override;
+    TensorDescriptor configure_output(size_t idx) const override;
+    void accept(INodeVisitor &v) override;
+
+private:
+    GenerateProposalsInfo _info;
+};
+} // namespace graph
+} // namespace arm_compute
+#endif /* __ARM_COMPUTE_GENERATE_PROPOSALS_NODE_H__ */
diff --git a/arm_compute/graph/nodes/Nodes.h b/arm_compute/graph/nodes/Nodes.h
index 79ae5d4ae7..24064855e8 100644
--- a/arm_compute/graph/nodes/Nodes.h
+++ b/arm_compute/graph/nodes/Nodes.h
@@ -38,6 +38,7 @@
 #include "arm_compute/graph/nodes/EltwiseLayerNode.h"
 #include "arm_compute/graph/nodes/FlattenLayerNode.h"
 #include "arm_compute/graph/nodes/FullyConnectedLayerNode.h"
+#include "arm_compute/graph/nodes/GenerateProposalsLayerNode.h"
 #include "arm_compute/graph/nodes/InputNode.h"
 #include "arm_compute/graph/nodes/NormalizationLayerNode.h"
 #include "arm_compute/graph/nodes/NormalizePlanarYUVLayerNode.h"
diff --git a/arm_compute/graph/nodes/NodesFwd.h b/arm_compute/graph/nodes/NodesFwd.h
index 6a0be1bf59..cbda3092fd 100644
--- a/arm_compute/graph/nodes/NodesFwd.h
+++ b/arm_compute/graph/nodes/NodesFwd.h
@@ -44,6 +44,7 @@ class DummyNode;
 class EltwiseLayerNode;
 class FlattenLayerNode;
 class FullyConnectedLayerNode;
+class GenerateProposalsLayerNode;
 class InputNode;
 class NormalizationLayerNode;
 class NormalizePlanarYUVLayerNode;
diff --git a/arm_compute/runtime/CL/CLFunctions.h b/arm_compute/runtime/CL/CLFunctions.h
index 504443e806..686d266557 100644
--- a/arm_compute/runtime/CL/CLFunctions.h
+++ b/arm_compute/runtime/CL/CLFunctions.h
@@ -44,6 +44,7 @@
 #include "arm_compute/runtime/CL/functions/CLChannelShuffleLayer.h"
 #include "arm_compute/runtime/CL/functions/CLColorConvert.h"
 #include "arm_compute/runtime/CL/functions/CLComparison.h"
+#include "arm_compute/runtime/CL/functions/CLComputeAllAnchors.h"
 #include "arm_compute/runtime/CL/functions/CLConcatenateLayer.h"
 #include "arm_compute/runtime/CL/functions/CLConvertFullyConnectedWeights.h"
 #include "arm_compute/runtime/CL/functions/CLConvolution.h"
@@ -79,6 +80,7 @@
 #include "arm_compute/runtime/CL/functions/CLGaussian3x3.h"
 #include "arm_compute/runtime/CL/functions/CLGaussian5x5.h"
 #include "arm_compute/runtime/CL/functions/CLGaussianPyramid.h"
+#include "arm_compute/runtime/CL/functions/CLGenerateProposalsLayer.h"
 #include "arm_compute/runtime/CL/functions/CLHOGDescriptor.h"
 #include "arm_compute/runtime/CL/functions/CLHOGDetector.h"
 #include "arm_compute/runtime/CL/functions/CLHOGGradient.h"
diff --git a/arm_compute/runtime/CL/functions/CLComputeAllAnchors.h b/arm_compute/runtime/CL/functions/CLComputeAllAnchors.h
new file mode 100644
index 0000000000..39d9b553b8
--- /dev/null
+++ b/arm_compute/runtime/CL/functions/CLComputeAllAnchors.h
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2019 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLCOMPUTEALLANCHORS_H__
+#define __ARM_COMPUTE_CLCOMPUTEALLANCHORS_H__
+
+#include "arm_compute/core/CL/kernels/CLGenerateProposalsLayerKernel.h"
+#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Basic function to run @ref CLComputeAllAnchorsKernel.
+ *
+ * This function calls the following OpenCL kernels:
+ * -# @ref CLComputeAllAnchorsKernel
+ */
+class CLComputeAllAnchors : public ICLSimpleFunction
+{
+public:
+    /** Set the input and output tensors.
+     *
+     * @param[in]  anchors     Source tensor. Original set of anchors of size (4, A) where A is the number of anchors. Data types supported: F16/F32
+     * @param[out] all_anchors Destination tensor. Destination anchors of size (4, H*W*A) where H and W are the height and width of the feature map and A is the number of anchors. Data types supported: Same as @p input
+     * @param[in]  info        Contains Compute Anchors operation information described in @ref ComputeAnchorsInfo
+     *
+     */
+    void configure(const ICLTensor *anchors, ICLTensor *all_anchors, const ComputeAnchorsInfo &info);
+
+    /** Static function to check if given info will lead to a valid configuration of @ref CLComputeAllAnchorsKernel
+     *
+     * @param[in] anchors     Source tensor info. Original set of anchors of size (4, A) where A is the number of anchors. Data types supported: F16/F32
+     * @param[in] all_anchors Destination tensor info. Destination anchors of size (4, H*W*A) where H and W are the height and width of the feature map and A is the number of anchors. Data types supported: Same as @p input
+     * @param[in] info        Contains Compute Anchors operation information described in @ref ComputeAnchorsInfo
+     *
+     * @return a Status
+     */
+    static Status validate(const ITensorInfo *anchors, const ITensorInfo *all_anchors, const ComputeAnchorsInfo &info);
+};
+} // namespace arm_compute
+#endif /* __ARM_COMPUTE_CLCOMPUTEALLANCOHORS_H__ */
diff --git a/arm_compute/runtime/CL/functions/CLGenerateProposalsLayer.h b/arm_compute/runtime/CL/functions/CLGenerateProposalsLayer.h
new file mode 100644
index 0000000000..26da0bfd7e
--- /dev/null
+++ b/arm_compute/runtime/CL/functions/CLGenerateProposalsLayer.h
@@ -0,0 +1,148 @@
+/*
+ * Copyright (c) 2019 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLGENERATEPROPOSALSLAYER_H__
+#define __ARM_COMPUTE_CLGENERATEPROPOSALSLAYER_H__
+#include "arm_compute/core/CL/kernels/CLBoundingBoxTransformKernel.h"
+#include "arm_compute/core/CL/kernels/CLCopyKernel.h"
+#include "arm_compute/core/CL/kernels/CLGenerateProposalsLayerKernel.h"
+#include "arm_compute/core/CL/kernels/CLMemsetKernel.h"
+#include "arm_compute/core/CL/kernels/CLPermuteKernel.h"
+#include "arm_compute/core/CL/kernels/CLReshapeLayerKernel.h"
+#include "arm_compute/core/CL/kernels/CLStridedSliceKernel.h"
+#include "arm_compute/core/CPP/kernels/CPPBoxWithNonMaximaSuppressionLimitKernel.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/CL/CLMemoryGroup.h"
+#include "arm_compute/runtime/CL/CLScheduler.h"
+#include "arm_compute/runtime/CL/CLTensor.h"
+#include "arm_compute/runtime/CPP/CPPScheduler.h"
+#include "arm_compute/runtime/IFunction.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Basic function to generate proposals for a RPN (Region Proposal Network)
+ *
+ * This function calls the following OpenCL kernels:
+ * -# @ref CLComputeAllAnchors
+ * -# @ref CLPermute x 2
+ * -# @ref CLReshapeLayer x 2
+ * -# @ref CLStridedSlice x 3
+ * -# @ref CLBoundingBoxTransform
+ * -# @ref CLCopyKernel
+ * -# @ref CLMemsetKernel
+ * And the following CPP kernels:
+ * -# @ref CPPBoxWithNonMaximaSuppressionLimit
+ */
+class CLGenerateProposalsLayer : public IFunction
+{
+public:
+    /** Default constructor
+     *
+     * @param[in] memory_manager (Optional) Memory manager.
+     */
+    CLGenerateProposalsLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    CLGenerateProposalsLayer(const CLGenerateProposalsLayer &) = delete;
+    /** Default move constructor */
+    CLGenerateProposalsLayer(CLGenerateProposalsLayer &&) = default;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    CLGenerateProposalsLayer &operator=(const CLGenerateProposalsLayer &) = delete;
+    /** Default move assignment operator */
+    CLGenerateProposalsLayer &operator=(CLGenerateProposalsLayer &&) = default;
+
+    /** Set the input and output tensors.
+     *
+     * @param[in]  scores              Scores from convolution layer of size (W, H, A), where H and W are the height and width of the feature map, and A is the number of anchors. Data types supported: F16/F32
+     * @param[in]  deltas              Bounding box deltas from convolution layer of size (W, H, 4*A). Data types supported: Same as @p scores
+     * @param[in]  anchors             Anchors tensor of size (4, A). Data types supported: Same as @p input
+     * @param[out] proposals           Box proposals output tensor of size (5, W*H*A). Data types supported: Same as @p input
+     * @param[out] scores_out          Box scores output tensor of size (W*H*A). Data types supported: Same as @p input
+     * @param[out] num_valid_proposals Scalar output tensor which says which of the first proposals are valid. Data types supported: U32
+     * @param[in]  info                Contains GenerateProposals operation information described in @ref GenerateProposalsInfo
+     *
+     * @note Only single image prediction is supported. Height and Width (and scale) of the image will be contained in the @ref GenerateProposalsInfo struct.
+     * @note Proposals contains all the proposals. Of those, only the first num_valid_proposals are valid.
+     */
+    void configure(const ICLTensor *scores, const ICLTensor *deltas, const ICLTensor *anchors, ICLTensor *proposals, ICLTensor *scores_out, ICLTensor *num_valid_proposals,
+                   const GenerateProposalsInfo &info);
+
+    /** Static function to check if given info will lead to a valid configuration of @ref CLGenerateProposalsLayer
+     *
+     * @param[in] scores              Scores info from convolution layer of size (W, H, A), where H and W are the height and width of the feature map, and A is the number of anchors. Data types supported: F16/F32
+     * @param[in] deltas              Bounding box deltas info from convolution layer of size (W, H, 4*A). Data types supported: Same as @p scores
+     * @param[in] anchors             Anchors tensor info of size (4, A). Data types supported: Same as @p input
+     * @param[in] proposals           Box proposals info  output tensor of size (5, W*H*A). Data types supported: Data types supported: U32
+     * @param[in] scores_out          Box scores output tensor info of size (W*H*A). Data types supported: Same as @p input
+     * @param[in] num_valid_proposals Scalar output tensor info which says which of the first proposals are valid. Data types supported: Same as @p input
+     * @param[in] info                Contains GenerateProposals operation information described in @ref GenerateProposalsInfo
+     *
+     * @return a Status
+     */
+    static Status validate(const ITensorInfo *scores, const ITensorInfo *deltas, const ITensorInfo *anchors, const ITensorInfo *proposals, const ITensorInfo *scores_out,
+                           const ITensorInfo           *num_valid_proposals,
+                           const GenerateProposalsInfo &info);
+
+    // Inherited methods overridden:
+    void run() override;
+
+private:
+    // Memory group manager
+    CLMemoryGroup _memory_group;
+
+    // OpenCL kernels
+    CLPermuteKernel              _permute_deltas_kernel;
+    CLReshapeLayerKernel         _flatten_deltas_kernel;
+    CLPermuteKernel              _permute_scores_kernel;
+    CLReshapeLayerKernel         _flatten_scores_kernel;
+    CLComputeAllAnchorsKernel    _compute_anchors_kernel;
+    CLBoundingBoxTransformKernel _bounding_box_kernel;
+    CLMemsetKernel               _memset_kernel;
+    CLCopyKernel                 _padded_copy_kernel;
+
+    // CPP kernels
+    CPPBoxWithNonMaximaSuppressionLimitKernel _cpp_nms_kernel;
+
+    bool _is_nhwc;
+
+    // Temporary tensors
+    CLTensor _deltas_permuted;
+    CLTensor _deltas_flattened;
+    CLTensor _scores_permuted;
+    CLTensor _scores_flattened;
+    CLTensor _all_anchors;
+    CLTensor _all_proposals;
+    CLTensor _keeps_nms_unused;
+    CLTensor _classes_nms_unused;
+    CLTensor _proposals_4_roi_values;
+
+    // Output tensor pointers
+    ICLTensor *_num_valid_proposals;
+    ICLTensor *_scores_out;
+
+    /** Internal function to run the CPP BoxWithNMS kernel */
+    void run_cpp_nms_kernel();
+};
+} // namespace arm_compute
+#endif /* __ARM_COMPUTE_CLGENERATEPROPOSALSLAYER_H__ */
-- 
cgit v1.2.1