diff options
author | SiCong Li <sicong.li@arm.com> | 2017-07-04 15:02:10 +0100 |
---|---|---|
committer | Anthony Barbier <anthony.barbier@arm.com> | 2018-11-02 16:35:24 +0000 |
commit | 3e36369a5511c3028c30fc820752dc1248bddf5c (patch) | |
tree | 58eb09548bdcc276e62f41f01f86fa06fea211e7 /arm_compute/core | |
parent | edfa9f463bed084f8b0953557202b2a1e56da817 (diff) | |
download | ComputeLibrary-3e36369a5511c3028c30fc820752dc1248bddf5c.tar.gz |
COMPMID-358 Implement OpenCL ROI Pooling
* Implement OpenCL ROI Pooling
* Add CLROIPoolingLayer benchmarks
Change-Id: I8786d01d551850a1b4d599a48fabe3925e0a27d0
Reviewed-on: http://mpd-gerrit.cambridge.arm.com/79833
Reviewed-by: Anthony Barbier <anthony.barbier@arm.com>
Tested-by: Kaizen <jeremy.johnson+kaizengerrit@arm.com>
Diffstat (limited to 'arm_compute/core')
-rw-r--r-- | arm_compute/core/CL/ICLArray.h | 1 | ||||
-rw-r--r-- | arm_compute/core/CL/ICLKernel.h | 78 | ||||
-rw-r--r-- | arm_compute/core/CL/kernels/CLROIPoolingLayerKernel.h | 76 | ||||
-rw-r--r-- | arm_compute/core/NEON/kernels/NEROIPoolingLayerKernel.h | 7 |
4 files changed, 161 insertions, 1 deletions
diff --git a/arm_compute/core/CL/ICLArray.h b/arm_compute/core/CL/ICLArray.h index 1b676ed5a3..e12695f206 100644 --- a/arm_compute/core/CL/ICLArray.h +++ b/arm_compute/core/CL/ICLArray.h @@ -107,6 +107,7 @@ private: using ICLKeyPointArray = ICLArray<KeyPoint>; using ICLCoordinates2DArray = ICLArray<Coordinates2D>; using ICLDetectionWindowArray = ICLArray<DetectionWindow>; +using ICLROIArray = ICLArray<ROI>; using ICLSize2DArray = ICLArray<Size2D>; using ICLUInt8Array = ICLArray<cl_uchar>; using ICLUInt16Array = ICLArray<cl_ushort>; diff --git a/arm_compute/core/CL/ICLKernel.h b/arm_compute/core/CL/ICLKernel.h index cfbf760f1e..1334c54a6c 100644 --- a/arm_compute/core/CL/ICLKernel.h +++ b/arm_compute/core/CL/ICLKernel.h @@ -31,6 +31,8 @@ namespace arm_compute { +template <typename T> +class ICLArray; class ICLTensor; class Window; @@ -45,6 +47,16 @@ public: * @return A reference to the OpenCL kernel of this object. */ cl::Kernel &kernel(); + /** Add the passed 1D array's parameters to the object's kernel's arguments starting from the index idx. + * + * @param[in,out] idx Index at which to start adding the array's arguments. Will be incremented by the number of kernel arguments set. + * @param[in] array Array to set as an argument of the object's kernel. + * @param[in] strides @ref Strides object containing stride of each dimension in bytes. + * @param[in] num_dimensions Number of dimensions of the @p array. + * @param[in] window Window the kernel will be executed on. + */ + template <typename T> + void add_1D_array_argument(unsigned int &idx, const ICLArray<T> *array, const Strides &strides, unsigned int num_dimensions, const Window &window); /** Add the passed 1D tensor's parameters to the object's kernel's arguments starting from the index idx. * * @param[in,out] idx Index at which to start adding the tensor's arguments. Will be incremented by the number of kernel arguments set. @@ -73,6 +85,11 @@ public: * @param[in] window Window the kernel will be executed on. */ void add_4D_tensor_argument(unsigned int &idx, const ICLTensor *tensor, const Window &window); + /** Returns the number of arguments enqueued per 1D array object. + * + * @return The number of arguments enqueues per 1D array object. + */ + unsigned int num_arguments_per_1D_array() const; /** Returns the number of arguments enqueued per 1D tensor object. * * @return The number of arguments enqueues per 1D tensor object. @@ -142,6 +159,16 @@ public: GPUTarget get_target() const; private: + /** Add the passed array's parameters to the object's kernel's arguments starting from the index idx. + * + * @param[in,out] idx Index at which to start adding the array's arguments. Will be incremented by the number of kernel arguments set. + * @param[in] array Array to set as an argument of the object's kernel. + * @param[in] strides @ref Strides object containing stride of each dimension in bytes. + * @param[in] num_dimensions Number of dimensions of the @p array. + * @param[in] window Window the kernel will be executed on. + */ + template <typename T, unsigned int dimension_size> + void add_array_argument(unsigned int &idx, const ICLArray<T> *array, const Strides &strides, unsigned int num_dimensions, const Window &window); /** Add the passed tensor's parameters to the object's kernel's arguments starting from the index idx. * * @param[in,out] idx Index at which to start adding the tensor's arguments. Will be incremented by the number of kernel arguments set. @@ -150,6 +177,12 @@ private: */ template <unsigned int dimension_size> void add_tensor_argument(unsigned int &idx, const ICLTensor *tensor, const Window &window); + /** Returns the number of arguments enqueued per array object. + * + * @return The number of arguments enqueued per array object. + */ + template <unsigned int dimension_size> + unsigned int num_arguments_per_array() const; /** Returns the number of arguments enqueued per tensor object. * * @return The number of arguments enqueued per tensor object. @@ -177,5 +210,50 @@ protected: * @note If any dimension of the lws is greater than the global workgroup size then no lws will be passed. */ void enqueue(cl::CommandQueue &queue, ICLKernel &kernel, const Window &window, const cl::NDRange &lws_hint = CLKernelLibrary::get().default_ndrange()); + +template <typename T, unsigned int dimension_size> +void ICLKernel::add_array_argument(unsigned &idx, const ICLArray<T> *array, const Strides &strides, unsigned int num_dimensions, const Window &window) +{ + // Calculate offset to the start of the window + unsigned int offset_first_element = 0; + + for(unsigned int n = 0; n < num_dimensions; ++n) + { + offset_first_element += window[n].start() * strides[n]; + } + + unsigned int idx_start = idx; + _kernel.setArg(idx++, array->cl_buffer()); + + for(unsigned int dimension = 0; dimension < dimension_size; dimension++) + { + _kernel.setArg<cl_uint>(idx++, strides[dimension]); + _kernel.setArg<cl_uint>(idx++, strides[dimension] * window[dimension].step()); + } + + _kernel.setArg<cl_uint>(idx++, offset_first_element); + + ARM_COMPUTE_ERROR_ON_MSG(idx_start + num_arguments_per_array<dimension_size>() != idx, + "add_%dD_array_argument() is supposed to add exactly %d arguments to the kernel", dimension_size, num_arguments_per_array<dimension_size>()); + ARM_COMPUTE_UNUSED(idx_start); +} + +template <typename T> +void ICLKernel::add_1D_array_argument(unsigned int &idx, const ICLArray<T> *array, const Strides &strides, unsigned int num_dimensions, const Window &window) +{ + add_array_argument<T, 1>(idx, array, strides, num_dimensions, window); +} + +template <unsigned int dimension_size> +unsigned int ICLKernel::num_arguments_per_array() const +{ + return num_arguments_per_tensor<dimension_size>(); +} + +template <unsigned int dimension_size> +unsigned int ICLKernel::num_arguments_per_tensor() const +{ + return 2 + 2 * dimension_size; +} } #endif /*__ARM_COMPUTE_ICLKERNEL_H__ */ diff --git a/arm_compute/core/CL/kernels/CLROIPoolingLayerKernel.h b/arm_compute/core/CL/kernels/CLROIPoolingLayerKernel.h new file mode 100644 index 0000000000..51aae30561 --- /dev/null +++ b/arm_compute/core/CL/kernels/CLROIPoolingLayerKernel.h @@ -0,0 +1,76 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLROIPOOLINGLAYERKERNEL_H__ +#define __ARM_COMPUTE_CLROIPOOLINGLAYERKERNEL_H__ + +#include "arm_compute/core/CL/ICLKernel.h" + +#include "arm_compute/core/CL/ICLArray.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the ROI pooling layer kernel */ +class CLROIPoolingLayerKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLROIPoolingLayerKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLROIPoolingLayerKernel(const CLROIPoolingLayerKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLROIPoolingLayerKernel &operator=(const CLROIPoolingLayerKernel &) = delete; + /** Allow instances of this class to be moved */ + CLROIPoolingLayerKernel(CLROIPoolingLayerKernel &&) = default; + /** Allow instances of this class to be moved */ + CLROIPoolingLayerKernel &operator=(CLROIPoolingLayerKernel &&) = default; + /** Default destructor */ + ~CLROIPoolingLayerKernel() = default; + + /** Set the input and output tensors. + * + * @param[in] input Source tensor. Data types supported: F16/F32. + * @param[in] rois Array containing @ref ROI. + * @param[out] output Destination tensor. Data types supported: Same as @p input. + * @param[in] pool_info Contains pooling operation information described in @ref ROIPoolingLayerInfo. + * + * @note The x and y dimensions of @p output tensor must be the same as @p pool_info 's pooled + * width and pooled height. + * @note The z dimensions of @p output tensor and @p input tensor must be the same. + * @note The fourth dimension of @p output tensor must be the same as the number of elements in @p rois array. + */ + void configure(const ICLTensor *input, const ICLROIArray *rois, ICLTensor *output, const ROIPoolingLayerInfo &pool_info); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input; + const ICLROIArray *_rois; + ICLTensor *_output; + ROIPoolingLayerInfo _pool_info; +}; +} +#endif /*__ARM_COMPUTE_CLROIPOOLINGLAYERKERNEL_H__ */ diff --git a/arm_compute/core/NEON/kernels/NEROIPoolingLayerKernel.h b/arm_compute/core/NEON/kernels/NEROIPoolingLayerKernel.h index 3a2f761370..40f79acc79 100644 --- a/arm_compute/core/NEON/kernels/NEROIPoolingLayerKernel.h +++ b/arm_compute/core/NEON/kernels/NEROIPoolingLayerKernel.h @@ -52,9 +52,14 @@ public: /** Set the input and output tensors. * * @param[in] input Source tensor. Data types supported: F32. - * @param[in] rois Array containing the regions of interest. + * @param[in] rois Array containing @ref ROI. * @param[out] output Destination tensor. Data types supported: Same as @p input. * @param[in] pool_info Contains pooling operation information described in @ref ROIPoolingLayerInfo. + * + * @note The x and y dimensions of @p output tensor must be the same as that specified by @p pool_info 's pooled + * width and pooled height. + * @note The z dimensions of @p output tensor and @p input tensor must be the same. + * @note The fourth dimension of @p output tensor must be the same as the number of elements in @p rois array. */ void configure(const ITensor *input, const IROIArray *rois, ITensor *output, const ROIPoolingLayerInfo &pool_info); |