aboutsummaryrefslogtreecommitdiff
path: root/arm_compute/core/CL
diff options
context:
space:
mode:
authorSiCong Li <sicong.li@arm.com>2017-07-04 15:02:10 +0100
committerAnthony Barbier <anthony.barbier@arm.com>2018-11-02 16:35:24 +0000
commit3e36369a5511c3028c30fc820752dc1248bddf5c (patch)
tree58eb09548bdcc276e62f41f01f86fa06fea211e7 /arm_compute/core/CL
parentedfa9f463bed084f8b0953557202b2a1e56da817 (diff)
downloadComputeLibrary-3e36369a5511c3028c30fc820752dc1248bddf5c.tar.gz
COMPMID-358 Implement OpenCL ROI Pooling
* Implement OpenCL ROI Pooling * Add CLROIPoolingLayer benchmarks Change-Id: I8786d01d551850a1b4d599a48fabe3925e0a27d0 Reviewed-on: http://mpd-gerrit.cambridge.arm.com/79833 Reviewed-by: Anthony Barbier <anthony.barbier@arm.com> Tested-by: Kaizen <jeremy.johnson+kaizengerrit@arm.com>
Diffstat (limited to 'arm_compute/core/CL')
-rw-r--r--arm_compute/core/CL/ICLArray.h1
-rw-r--r--arm_compute/core/CL/ICLKernel.h78
-rw-r--r--arm_compute/core/CL/kernels/CLROIPoolingLayerKernel.h76
3 files changed, 155 insertions, 0 deletions
diff --git a/arm_compute/core/CL/ICLArray.h b/arm_compute/core/CL/ICLArray.h
index 1b676ed5a3..e12695f206 100644
--- a/arm_compute/core/CL/ICLArray.h
+++ b/arm_compute/core/CL/ICLArray.h
@@ -107,6 +107,7 @@ private:
using ICLKeyPointArray = ICLArray<KeyPoint>;
using ICLCoordinates2DArray = ICLArray<Coordinates2D>;
using ICLDetectionWindowArray = ICLArray<DetectionWindow>;
+using ICLROIArray = ICLArray<ROI>;
using ICLSize2DArray = ICLArray<Size2D>;
using ICLUInt8Array = ICLArray<cl_uchar>;
using ICLUInt16Array = ICLArray<cl_ushort>;
diff --git a/arm_compute/core/CL/ICLKernel.h b/arm_compute/core/CL/ICLKernel.h
index cfbf760f1e..1334c54a6c 100644
--- a/arm_compute/core/CL/ICLKernel.h
+++ b/arm_compute/core/CL/ICLKernel.h
@@ -31,6 +31,8 @@
namespace arm_compute
{
+template <typename T>
+class ICLArray;
class ICLTensor;
class Window;
@@ -45,6 +47,16 @@ public:
* @return A reference to the OpenCL kernel of this object.
*/
cl::Kernel &kernel();
+ /** Add the passed 1D array's parameters to the object's kernel's arguments starting from the index idx.
+ *
+ * @param[in,out] idx Index at which to start adding the array's arguments. Will be incremented by the number of kernel arguments set.
+ * @param[in] array Array to set as an argument of the object's kernel.
+ * @param[in] strides @ref Strides object containing stride of each dimension in bytes.
+ * @param[in] num_dimensions Number of dimensions of the @p array.
+ * @param[in] window Window the kernel will be executed on.
+ */
+ template <typename T>
+ void add_1D_array_argument(unsigned int &idx, const ICLArray<T> *array, const Strides &strides, unsigned int num_dimensions, const Window &window);
/** Add the passed 1D tensor's parameters to the object's kernel's arguments starting from the index idx.
*
* @param[in,out] idx Index at which to start adding the tensor's arguments. Will be incremented by the number of kernel arguments set.
@@ -73,6 +85,11 @@ public:
* @param[in] window Window the kernel will be executed on.
*/
void add_4D_tensor_argument(unsigned int &idx, const ICLTensor *tensor, const Window &window);
+ /** Returns the number of arguments enqueued per 1D array object.
+ *
+ * @return The number of arguments enqueues per 1D array object.
+ */
+ unsigned int num_arguments_per_1D_array() const;
/** Returns the number of arguments enqueued per 1D tensor object.
*
* @return The number of arguments enqueues per 1D tensor object.
@@ -142,6 +159,16 @@ public:
GPUTarget get_target() const;
private:
+ /** Add the passed array's parameters to the object's kernel's arguments starting from the index idx.
+ *
+ * @param[in,out] idx Index at which to start adding the array's arguments. Will be incremented by the number of kernel arguments set.
+ * @param[in] array Array to set as an argument of the object's kernel.
+ * @param[in] strides @ref Strides object containing stride of each dimension in bytes.
+ * @param[in] num_dimensions Number of dimensions of the @p array.
+ * @param[in] window Window the kernel will be executed on.
+ */
+ template <typename T, unsigned int dimension_size>
+ void add_array_argument(unsigned int &idx, const ICLArray<T> *array, const Strides &strides, unsigned int num_dimensions, const Window &window);
/** Add the passed tensor's parameters to the object's kernel's arguments starting from the index idx.
*
* @param[in,out] idx Index at which to start adding the tensor's arguments. Will be incremented by the number of kernel arguments set.
@@ -150,6 +177,12 @@ private:
*/
template <unsigned int dimension_size>
void add_tensor_argument(unsigned int &idx, const ICLTensor *tensor, const Window &window);
+ /** Returns the number of arguments enqueued per array object.
+ *
+ * @return The number of arguments enqueued per array object.
+ */
+ template <unsigned int dimension_size>
+ unsigned int num_arguments_per_array() const;
/** Returns the number of arguments enqueued per tensor object.
*
* @return The number of arguments enqueued per tensor object.
@@ -177,5 +210,50 @@ protected:
* @note If any dimension of the lws is greater than the global workgroup size then no lws will be passed.
*/
void enqueue(cl::CommandQueue &queue, ICLKernel &kernel, const Window &window, const cl::NDRange &lws_hint = CLKernelLibrary::get().default_ndrange());
+
+template <typename T, unsigned int dimension_size>
+void ICLKernel::add_array_argument(unsigned &idx, const ICLArray<T> *array, const Strides &strides, unsigned int num_dimensions, const Window &window)
+{
+ // Calculate offset to the start of the window
+ unsigned int offset_first_element = 0;
+
+ for(unsigned int n = 0; n < num_dimensions; ++n)
+ {
+ offset_first_element += window[n].start() * strides[n];
+ }
+
+ unsigned int idx_start = idx;
+ _kernel.setArg(idx++, array->cl_buffer());
+
+ for(unsigned int dimension = 0; dimension < dimension_size; dimension++)
+ {
+ _kernel.setArg<cl_uint>(idx++, strides[dimension]);
+ _kernel.setArg<cl_uint>(idx++, strides[dimension] * window[dimension].step());
+ }
+
+ _kernel.setArg<cl_uint>(idx++, offset_first_element);
+
+ ARM_COMPUTE_ERROR_ON_MSG(idx_start + num_arguments_per_array<dimension_size>() != idx,
+ "add_%dD_array_argument() is supposed to add exactly %d arguments to the kernel", dimension_size, num_arguments_per_array<dimension_size>());
+ ARM_COMPUTE_UNUSED(idx_start);
+}
+
+template <typename T>
+void ICLKernel::add_1D_array_argument(unsigned int &idx, const ICLArray<T> *array, const Strides &strides, unsigned int num_dimensions, const Window &window)
+{
+ add_array_argument<T, 1>(idx, array, strides, num_dimensions, window);
+}
+
+template <unsigned int dimension_size>
+unsigned int ICLKernel::num_arguments_per_array() const
+{
+ return num_arguments_per_tensor<dimension_size>();
+}
+
+template <unsigned int dimension_size>
+unsigned int ICLKernel::num_arguments_per_tensor() const
+{
+ return 2 + 2 * dimension_size;
+}
}
#endif /*__ARM_COMPUTE_ICLKERNEL_H__ */
diff --git a/arm_compute/core/CL/kernels/CLROIPoolingLayerKernel.h b/arm_compute/core/CL/kernels/CLROIPoolingLayerKernel.h
new file mode 100644
index 0000000000..51aae30561
--- /dev/null
+++ b/arm_compute/core/CL/kernels/CLROIPoolingLayerKernel.h
@@ -0,0 +1,76 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLROIPOOLINGLAYERKERNEL_H__
+#define __ARM_COMPUTE_CLROIPOOLINGLAYERKERNEL_H__
+
+#include "arm_compute/core/CL/ICLKernel.h"
+
+#include "arm_compute/core/CL/ICLArray.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Interface for the ROI pooling layer kernel */
+class CLROIPoolingLayerKernel : public ICLKernel
+{
+public:
+ /** Default constructor */
+ CLROIPoolingLayerKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLROIPoolingLayerKernel(const CLROIPoolingLayerKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLROIPoolingLayerKernel &operator=(const CLROIPoolingLayerKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ CLROIPoolingLayerKernel(CLROIPoolingLayerKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ CLROIPoolingLayerKernel &operator=(CLROIPoolingLayerKernel &&) = default;
+ /** Default destructor */
+ ~CLROIPoolingLayerKernel() = default;
+
+ /** Set the input and output tensors.
+ *
+ * @param[in] input Source tensor. Data types supported: F16/F32.
+ * @param[in] rois Array containing @ref ROI.
+ * @param[out] output Destination tensor. Data types supported: Same as @p input.
+ * @param[in] pool_info Contains pooling operation information described in @ref ROIPoolingLayerInfo.
+ *
+ * @note The x and y dimensions of @p output tensor must be the same as @p pool_info 's pooled
+ * width and pooled height.
+ * @note The z dimensions of @p output tensor and @p input tensor must be the same.
+ * @note The fourth dimension of @p output tensor must be the same as the number of elements in @p rois array.
+ */
+ void configure(const ICLTensor *input, const ICLROIArray *rois, ICLTensor *output, const ROIPoolingLayerInfo &pool_info);
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+
+private:
+ const ICLTensor *_input;
+ const ICLROIArray *_rois;
+ ICLTensor *_output;
+ ROIPoolingLayerInfo _pool_info;
+};
+}
+#endif /*__ARM_COMPUTE_CLROIPOOLINGLAYERKERNEL_H__ */