diff options
author | SiCong Li <sicong.li@arm.com> | 2017-07-04 15:02:10 +0100 |
---|---|---|
committer | Anthony Barbier <anthony.barbier@arm.com> | 2018-11-02 16:35:24 +0000 |
commit | 3e36369a5511c3028c30fc820752dc1248bddf5c (patch) | |
tree | 58eb09548bdcc276e62f41f01f86fa06fea211e7 /arm_compute/core/CL/ICLKernel.h | |
parent | edfa9f463bed084f8b0953557202b2a1e56da817 (diff) | |
download | ComputeLibrary-3e36369a5511c3028c30fc820752dc1248bddf5c.tar.gz |
COMPMID-358 Implement OpenCL ROI Pooling
* Implement OpenCL ROI Pooling
* Add CLROIPoolingLayer benchmarks
Change-Id: I8786d01d551850a1b4d599a48fabe3925e0a27d0
Reviewed-on: http://mpd-gerrit.cambridge.arm.com/79833
Reviewed-by: Anthony Barbier <anthony.barbier@arm.com>
Tested-by: Kaizen <jeremy.johnson+kaizengerrit@arm.com>
Diffstat (limited to 'arm_compute/core/CL/ICLKernel.h')
-rw-r--r-- | arm_compute/core/CL/ICLKernel.h | 78 |
1 files changed, 78 insertions, 0 deletions
diff --git a/arm_compute/core/CL/ICLKernel.h b/arm_compute/core/CL/ICLKernel.h index cfbf760f1e..1334c54a6c 100644 --- a/arm_compute/core/CL/ICLKernel.h +++ b/arm_compute/core/CL/ICLKernel.h @@ -31,6 +31,8 @@ namespace arm_compute { +template <typename T> +class ICLArray; class ICLTensor; class Window; @@ -45,6 +47,16 @@ public: * @return A reference to the OpenCL kernel of this object. */ cl::Kernel &kernel(); + /** Add the passed 1D array's parameters to the object's kernel's arguments starting from the index idx. + * + * @param[in,out] idx Index at which to start adding the array's arguments. Will be incremented by the number of kernel arguments set. + * @param[in] array Array to set as an argument of the object's kernel. + * @param[in] strides @ref Strides object containing stride of each dimension in bytes. + * @param[in] num_dimensions Number of dimensions of the @p array. + * @param[in] window Window the kernel will be executed on. + */ + template <typename T> + void add_1D_array_argument(unsigned int &idx, const ICLArray<T> *array, const Strides &strides, unsigned int num_dimensions, const Window &window); /** Add the passed 1D tensor's parameters to the object's kernel's arguments starting from the index idx. * * @param[in,out] idx Index at which to start adding the tensor's arguments. Will be incremented by the number of kernel arguments set. @@ -73,6 +85,11 @@ public: * @param[in] window Window the kernel will be executed on. */ void add_4D_tensor_argument(unsigned int &idx, const ICLTensor *tensor, const Window &window); + /** Returns the number of arguments enqueued per 1D array object. + * + * @return The number of arguments enqueues per 1D array object. + */ + unsigned int num_arguments_per_1D_array() const; /** Returns the number of arguments enqueued per 1D tensor object. * * @return The number of arguments enqueues per 1D tensor object. @@ -142,6 +159,16 @@ public: GPUTarget get_target() const; private: + /** Add the passed array's parameters to the object's kernel's arguments starting from the index idx. + * + * @param[in,out] idx Index at which to start adding the array's arguments. Will be incremented by the number of kernel arguments set. + * @param[in] array Array to set as an argument of the object's kernel. + * @param[in] strides @ref Strides object containing stride of each dimension in bytes. + * @param[in] num_dimensions Number of dimensions of the @p array. + * @param[in] window Window the kernel will be executed on. + */ + template <typename T, unsigned int dimension_size> + void add_array_argument(unsigned int &idx, const ICLArray<T> *array, const Strides &strides, unsigned int num_dimensions, const Window &window); /** Add the passed tensor's parameters to the object's kernel's arguments starting from the index idx. * * @param[in,out] idx Index at which to start adding the tensor's arguments. Will be incremented by the number of kernel arguments set. @@ -150,6 +177,12 @@ private: */ template <unsigned int dimension_size> void add_tensor_argument(unsigned int &idx, const ICLTensor *tensor, const Window &window); + /** Returns the number of arguments enqueued per array object. + * + * @return The number of arguments enqueued per array object. + */ + template <unsigned int dimension_size> + unsigned int num_arguments_per_array() const; /** Returns the number of arguments enqueued per tensor object. * * @return The number of arguments enqueued per tensor object. @@ -177,5 +210,50 @@ protected: * @note If any dimension of the lws is greater than the global workgroup size then no lws will be passed. */ void enqueue(cl::CommandQueue &queue, ICLKernel &kernel, const Window &window, const cl::NDRange &lws_hint = CLKernelLibrary::get().default_ndrange()); + +template <typename T, unsigned int dimension_size> +void ICLKernel::add_array_argument(unsigned &idx, const ICLArray<T> *array, const Strides &strides, unsigned int num_dimensions, const Window &window) +{ + // Calculate offset to the start of the window + unsigned int offset_first_element = 0; + + for(unsigned int n = 0; n < num_dimensions; ++n) + { + offset_first_element += window[n].start() * strides[n]; + } + + unsigned int idx_start = idx; + _kernel.setArg(idx++, array->cl_buffer()); + + for(unsigned int dimension = 0; dimension < dimension_size; dimension++) + { + _kernel.setArg<cl_uint>(idx++, strides[dimension]); + _kernel.setArg<cl_uint>(idx++, strides[dimension] * window[dimension].step()); + } + + _kernel.setArg<cl_uint>(idx++, offset_first_element); + + ARM_COMPUTE_ERROR_ON_MSG(idx_start + num_arguments_per_array<dimension_size>() != idx, + "add_%dD_array_argument() is supposed to add exactly %d arguments to the kernel", dimension_size, num_arguments_per_array<dimension_size>()); + ARM_COMPUTE_UNUSED(idx_start); +} + +template <typename T> +void ICLKernel::add_1D_array_argument(unsigned int &idx, const ICLArray<T> *array, const Strides &strides, unsigned int num_dimensions, const Window &window) +{ + add_array_argument<T, 1>(idx, array, strides, num_dimensions, window); +} + +template <unsigned int dimension_size> +unsigned int ICLKernel::num_arguments_per_array() const +{ + return num_arguments_per_tensor<dimension_size>(); +} + +template <unsigned int dimension_size> +unsigned int ICLKernel::num_arguments_per_tensor() const +{ + return 2 + 2 * dimension_size; +} } #endif /*__ARM_COMPUTE_ICLKERNEL_H__ */ |