From 3e36369a5511c3028c30fc820752dc1248bddf5c Mon Sep 17 00:00:00 2001 From: SiCong Li Date: Tue, 4 Jul 2017 15:02:10 +0100 Subject: COMPMID-358 Implement OpenCL ROI Pooling * Implement OpenCL ROI Pooling * Add CLROIPoolingLayer benchmarks Change-Id: I8786d01d551850a1b4d599a48fabe3925e0a27d0 Reviewed-on: http://mpd-gerrit.cambridge.arm.com/79833 Reviewed-by: Anthony Barbier Tested-by: Kaizen --- arm_compute/core/CL/ICLArray.h | 1 + arm_compute/core/CL/ICLKernel.h | 78 ++++++++++++++++++++++ .../core/CL/kernels/CLROIPoolingLayerKernel.h | 76 +++++++++++++++++++++ .../core/NEON/kernels/NEROIPoolingLayerKernel.h | 7 +- arm_compute/runtime/CL/CLArray.h | 1 + .../runtime/CL/functions/CLROIPoolingLayer.h | 60 +++++++++++++++++ .../runtime/NEON/functions/NEROIPoolingLayer.h | 7 +- 7 files changed, 228 insertions(+), 2 deletions(-) create mode 100644 arm_compute/core/CL/kernels/CLROIPoolingLayerKernel.h create mode 100644 arm_compute/runtime/CL/functions/CLROIPoolingLayer.h (limited to 'arm_compute') diff --git a/arm_compute/core/CL/ICLArray.h b/arm_compute/core/CL/ICLArray.h index 1b676ed5a3..e12695f206 100644 --- a/arm_compute/core/CL/ICLArray.h +++ b/arm_compute/core/CL/ICLArray.h @@ -107,6 +107,7 @@ private: using ICLKeyPointArray = ICLArray; using ICLCoordinates2DArray = ICLArray; using ICLDetectionWindowArray = ICLArray; +using ICLROIArray = ICLArray; using ICLSize2DArray = ICLArray; using ICLUInt8Array = ICLArray; using ICLUInt16Array = ICLArray; diff --git a/arm_compute/core/CL/ICLKernel.h b/arm_compute/core/CL/ICLKernel.h index cfbf760f1e..1334c54a6c 100644 --- a/arm_compute/core/CL/ICLKernel.h +++ b/arm_compute/core/CL/ICLKernel.h @@ -31,6 +31,8 @@ namespace arm_compute { +template +class ICLArray; class ICLTensor; class Window; @@ -45,6 +47,16 @@ public: * @return A reference to the OpenCL kernel of this object. */ cl::Kernel &kernel(); + /** Add the passed 1D array's parameters to the object's kernel's arguments starting from the index idx. + * + * @param[in,out] idx Index at which to start adding the array's arguments. Will be incremented by the number of kernel arguments set. + * @param[in] array Array to set as an argument of the object's kernel. + * @param[in] strides @ref Strides object containing stride of each dimension in bytes. + * @param[in] num_dimensions Number of dimensions of the @p array. + * @param[in] window Window the kernel will be executed on. + */ + template + void add_1D_array_argument(unsigned int &idx, const ICLArray *array, const Strides &strides, unsigned int num_dimensions, const Window &window); /** Add the passed 1D tensor's parameters to the object's kernel's arguments starting from the index idx. * * @param[in,out] idx Index at which to start adding the tensor's arguments. Will be incremented by the number of kernel arguments set. @@ -73,6 +85,11 @@ public: * @param[in] window Window the kernel will be executed on. */ void add_4D_tensor_argument(unsigned int &idx, const ICLTensor *tensor, const Window &window); + /** Returns the number of arguments enqueued per 1D array object. + * + * @return The number of arguments enqueues per 1D array object. + */ + unsigned int num_arguments_per_1D_array() const; /** Returns the number of arguments enqueued per 1D tensor object. * * @return The number of arguments enqueues per 1D tensor object. @@ -142,6 +159,16 @@ public: GPUTarget get_target() const; private: + /** Add the passed array's parameters to the object's kernel's arguments starting from the index idx. + * + * @param[in,out] idx Index at which to start adding the array's arguments. Will be incremented by the number of kernel arguments set. + * @param[in] array Array to set as an argument of the object's kernel. + * @param[in] strides @ref Strides object containing stride of each dimension in bytes. + * @param[in] num_dimensions Number of dimensions of the @p array. + * @param[in] window Window the kernel will be executed on. + */ + template + void add_array_argument(unsigned int &idx, const ICLArray *array, const Strides &strides, unsigned int num_dimensions, const Window &window); /** Add the passed tensor's parameters to the object's kernel's arguments starting from the index idx. * * @param[in,out] idx Index at which to start adding the tensor's arguments. Will be incremented by the number of kernel arguments set. @@ -150,6 +177,12 @@ private: */ template void add_tensor_argument(unsigned int &idx, const ICLTensor *tensor, const Window &window); + /** Returns the number of arguments enqueued per array object. + * + * @return The number of arguments enqueued per array object. + */ + template + unsigned int num_arguments_per_array() const; /** Returns the number of arguments enqueued per tensor object. * * @return The number of arguments enqueued per tensor object. @@ -177,5 +210,50 @@ protected: * @note If any dimension of the lws is greater than the global workgroup size then no lws will be passed. */ void enqueue(cl::CommandQueue &queue, ICLKernel &kernel, const Window &window, const cl::NDRange &lws_hint = CLKernelLibrary::get().default_ndrange()); + +template +void ICLKernel::add_array_argument(unsigned &idx, const ICLArray *array, const Strides &strides, unsigned int num_dimensions, const Window &window) +{ + // Calculate offset to the start of the window + unsigned int offset_first_element = 0; + + for(unsigned int n = 0; n < num_dimensions; ++n) + { + offset_first_element += window[n].start() * strides[n]; + } + + unsigned int idx_start = idx; + _kernel.setArg(idx++, array->cl_buffer()); + + for(unsigned int dimension = 0; dimension < dimension_size; dimension++) + { + _kernel.setArg(idx++, strides[dimension]); + _kernel.setArg(idx++, strides[dimension] * window[dimension].step()); + } + + _kernel.setArg(idx++, offset_first_element); + + ARM_COMPUTE_ERROR_ON_MSG(idx_start + num_arguments_per_array() != idx, + "add_%dD_array_argument() is supposed to add exactly %d arguments to the kernel", dimension_size, num_arguments_per_array()); + ARM_COMPUTE_UNUSED(idx_start); +} + +template +void ICLKernel::add_1D_array_argument(unsigned int &idx, const ICLArray *array, const Strides &strides, unsigned int num_dimensions, const Window &window) +{ + add_array_argument(idx, array, strides, num_dimensions, window); +} + +template +unsigned int ICLKernel::num_arguments_per_array() const +{ + return num_arguments_per_tensor(); +} + +template +unsigned int ICLKernel::num_arguments_per_tensor() const +{ + return 2 + 2 * dimension_size; +} } #endif /*__ARM_COMPUTE_ICLKERNEL_H__ */ diff --git a/arm_compute/core/CL/kernels/CLROIPoolingLayerKernel.h b/arm_compute/core/CL/kernels/CLROIPoolingLayerKernel.h new file mode 100644 index 0000000000..51aae30561 --- /dev/null +++ b/arm_compute/core/CL/kernels/CLROIPoolingLayerKernel.h @@ -0,0 +1,76 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLROIPOOLINGLAYERKERNEL_H__ +#define __ARM_COMPUTE_CLROIPOOLINGLAYERKERNEL_H__ + +#include "arm_compute/core/CL/ICLKernel.h" + +#include "arm_compute/core/CL/ICLArray.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the ROI pooling layer kernel */ +class CLROIPoolingLayerKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLROIPoolingLayerKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLROIPoolingLayerKernel(const CLROIPoolingLayerKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLROIPoolingLayerKernel &operator=(const CLROIPoolingLayerKernel &) = delete; + /** Allow instances of this class to be moved */ + CLROIPoolingLayerKernel(CLROIPoolingLayerKernel &&) = default; + /** Allow instances of this class to be moved */ + CLROIPoolingLayerKernel &operator=(CLROIPoolingLayerKernel &&) = default; + /** Default destructor */ + ~CLROIPoolingLayerKernel() = default; + + /** Set the input and output tensors. + * + * @param[in] input Source tensor. Data types supported: F16/F32. + * @param[in] rois Array containing @ref ROI. + * @param[out] output Destination tensor. Data types supported: Same as @p input. + * @param[in] pool_info Contains pooling operation information described in @ref ROIPoolingLayerInfo. + * + * @note The x and y dimensions of @p output tensor must be the same as @p pool_info 's pooled + * width and pooled height. + * @note The z dimensions of @p output tensor and @p input tensor must be the same. + * @note The fourth dimension of @p output tensor must be the same as the number of elements in @p rois array. + */ + void configure(const ICLTensor *input, const ICLROIArray *rois, ICLTensor *output, const ROIPoolingLayerInfo &pool_info); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input; + const ICLROIArray *_rois; + ICLTensor *_output; + ROIPoolingLayerInfo _pool_info; +}; +} +#endif /*__ARM_COMPUTE_CLROIPOOLINGLAYERKERNEL_H__ */ diff --git a/arm_compute/core/NEON/kernels/NEROIPoolingLayerKernel.h b/arm_compute/core/NEON/kernels/NEROIPoolingLayerKernel.h index 3a2f761370..40f79acc79 100644 --- a/arm_compute/core/NEON/kernels/NEROIPoolingLayerKernel.h +++ b/arm_compute/core/NEON/kernels/NEROIPoolingLayerKernel.h @@ -52,9 +52,14 @@ public: /** Set the input and output tensors. * * @param[in] input Source tensor. Data types supported: F32. - * @param[in] rois Array containing the regions of interest. + * @param[in] rois Array containing @ref ROI. * @param[out] output Destination tensor. Data types supported: Same as @p input. * @param[in] pool_info Contains pooling operation information described in @ref ROIPoolingLayerInfo. + * + * @note The x and y dimensions of @p output tensor must be the same as that specified by @p pool_info 's pooled + * width and pooled height. + * @note The z dimensions of @p output tensor and @p input tensor must be the same. + * @note The fourth dimension of @p output tensor must be the same as the number of elements in @p rois array. */ void configure(const ITensor *input, const IROIArray *rois, ITensor *output, const ROIPoolingLayerInfo &pool_info); diff --git a/arm_compute/runtime/CL/CLArray.h b/arm_compute/runtime/CL/CLArray.h index f4c2ef06d9..3dc7f19bc7 100644 --- a/arm_compute/runtime/CL/CLArray.h +++ b/arm_compute/runtime/CL/CLArray.h @@ -97,6 +97,7 @@ private: using CLKeyPointArray = CLArray; using CLCoordinates2DArray = CLArray; using CLDetectionWindowArray = CLArray; +using CLROIArray = CLArray; using CLSize2DArray = CLArray; using CLUInt8Array = CLArray; using CLUInt16Array = CLArray; diff --git a/arm_compute/runtime/CL/functions/CLROIPoolingLayer.h b/arm_compute/runtime/CL/functions/CLROIPoolingLayer.h new file mode 100644 index 0000000000..f089375e51 --- /dev/null +++ b/arm_compute/runtime/CL/functions/CLROIPoolingLayer.h @@ -0,0 +1,60 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLROIPOOLINGLAYER_H__ +#define __ARM_COMPUTE_CLROIPOOLINGLAYER_H__ + +#include "arm_compute/runtime/CL/ICLSimpleFunction.h" + +#include "arm_compute/core/CL/ICLArray.h" +#include "arm_compute/core/CL/kernels/CLROIPoolingLayerKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Basic function to run @ref CLROIPoolingLayerKernel. + * + * This function calls the following OpenCL kernels: + * -# @ref CLROIPoolingLayerKernel + * + */ +class CLROIPoolingLayer : public ICLSimpleFunction +{ +public: + /** Set the input and output tensors. + * + * @param[in] input Source tensor. Data types supported: F16/F32. + * @param[in] rois Array containing @ref ROI. + * @param[out] output Destination tensor. Data types supported: Same as @p input. + * @param[in] pool_info Contains pooling operation information described in @ref ROIPoolingLayerInfo. + * + * @note The x and y dimensions of @p output tensor must be the same as @p pool_info 's pooled + * width and pooled height. + * @note The z dimensions of @p output tensor and @p input tensor must be the same. + * @note The fourth dimension of @p output tensor must be the same as the number of elements in @p rois array. + */ + void configure(const ICLTensor *input, const ICLROIArray *rois, ICLTensor *output, const ROIPoolingLayerInfo &pool_info); +}; +} +#endif /* __ARM_COMPUTE_CLROIPOOLINGLAYER_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NEROIPoolingLayer.h b/arm_compute/runtime/NEON/functions/NEROIPoolingLayer.h index 04b5c35150..5adc1110d5 100644 --- a/arm_compute/runtime/NEON/functions/NEROIPoolingLayer.h +++ b/arm_compute/runtime/NEON/functions/NEROIPoolingLayer.h @@ -47,9 +47,14 @@ public: /** Set the input and output tensors. * * @param[in] input Source tensor. Data types supported: F32. - * @param[in] rois Array containing the regions of interest. + * @param[in] rois Array containing @ref ROI. * @param[out] output Destination tensor. Data types supported: Same as @p input. * @param[in] pool_info Contains pooling operation information described in @ref ROIPoolingLayerInfo. + * + * @note The x and y dimensions of @p output tensor must be the same as that specified by @p pool_info 's pooled + * width and pooled height. + * @note The z dimensions of @p output tensor and @p input tensor must be the same. + * @note The fourth dimension of @p output tensor must be the same as the number of elements in @p rois array. */ void configure(const ITensor *input, const IROIArray *rois, ITensor *output, const ROIPoolingLayerInfo &pool_info); -- cgit v1.2.1