From 3e36369a5511c3028c30fc820752dc1248bddf5c Mon Sep 17 00:00:00 2001 From: SiCong Li Date: Tue, 4 Jul 2017 15:02:10 +0100 Subject: COMPMID-358 Implement OpenCL ROI Pooling * Implement OpenCL ROI Pooling * Add CLROIPoolingLayer benchmarks Change-Id: I8786d01d551850a1b4d599a48fabe3925e0a27d0 Reviewed-on: http://mpd-gerrit.cambridge.arm.com/79833 Reviewed-by: Anthony Barbier Tested-by: Kaizen --- arm_compute/core/CL/ICLKernel.h | 78 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 78 insertions(+) (limited to 'arm_compute/core/CL/ICLKernel.h') diff --git a/arm_compute/core/CL/ICLKernel.h b/arm_compute/core/CL/ICLKernel.h index cfbf760f1e..1334c54a6c 100644 --- a/arm_compute/core/CL/ICLKernel.h +++ b/arm_compute/core/CL/ICLKernel.h @@ -31,6 +31,8 @@ namespace arm_compute { +template +class ICLArray; class ICLTensor; class Window; @@ -45,6 +47,16 @@ public: * @return A reference to the OpenCL kernel of this object. */ cl::Kernel &kernel(); + /** Add the passed 1D array's parameters to the object's kernel's arguments starting from the index idx. + * + * @param[in,out] idx Index at which to start adding the array's arguments. Will be incremented by the number of kernel arguments set. + * @param[in] array Array to set as an argument of the object's kernel. + * @param[in] strides @ref Strides object containing stride of each dimension in bytes. + * @param[in] num_dimensions Number of dimensions of the @p array. + * @param[in] window Window the kernel will be executed on. + */ + template + void add_1D_array_argument(unsigned int &idx, const ICLArray *array, const Strides &strides, unsigned int num_dimensions, const Window &window); /** Add the passed 1D tensor's parameters to the object's kernel's arguments starting from the index idx. * * @param[in,out] idx Index at which to start adding the tensor's arguments. Will be incremented by the number of kernel arguments set. @@ -73,6 +85,11 @@ public: * @param[in] window Window the kernel will be executed on. */ void add_4D_tensor_argument(unsigned int &idx, const ICLTensor *tensor, const Window &window); + /** Returns the number of arguments enqueued per 1D array object. + * + * @return The number of arguments enqueues per 1D array object. + */ + unsigned int num_arguments_per_1D_array() const; /** Returns the number of arguments enqueued per 1D tensor object. * * @return The number of arguments enqueues per 1D tensor object. @@ -142,6 +159,16 @@ public: GPUTarget get_target() const; private: + /** Add the passed array's parameters to the object's kernel's arguments starting from the index idx. + * + * @param[in,out] idx Index at which to start adding the array's arguments. Will be incremented by the number of kernel arguments set. + * @param[in] array Array to set as an argument of the object's kernel. + * @param[in] strides @ref Strides object containing stride of each dimension in bytes. + * @param[in] num_dimensions Number of dimensions of the @p array. + * @param[in] window Window the kernel will be executed on. + */ + template + void add_array_argument(unsigned int &idx, const ICLArray *array, const Strides &strides, unsigned int num_dimensions, const Window &window); /** Add the passed tensor's parameters to the object's kernel's arguments starting from the index idx. * * @param[in,out] idx Index at which to start adding the tensor's arguments. Will be incremented by the number of kernel arguments set. @@ -150,6 +177,12 @@ private: */ template void add_tensor_argument(unsigned int &idx, const ICLTensor *tensor, const Window &window); + /** Returns the number of arguments enqueued per array object. + * + * @return The number of arguments enqueued per array object. + */ + template + unsigned int num_arguments_per_array() const; /** Returns the number of arguments enqueued per tensor object. * * @return The number of arguments enqueued per tensor object. @@ -177,5 +210,50 @@ protected: * @note If any dimension of the lws is greater than the global workgroup size then no lws will be passed. */ void enqueue(cl::CommandQueue &queue, ICLKernel &kernel, const Window &window, const cl::NDRange &lws_hint = CLKernelLibrary::get().default_ndrange()); + +template +void ICLKernel::add_array_argument(unsigned &idx, const ICLArray *array, const Strides &strides, unsigned int num_dimensions, const Window &window) +{ + // Calculate offset to the start of the window + unsigned int offset_first_element = 0; + + for(unsigned int n = 0; n < num_dimensions; ++n) + { + offset_first_element += window[n].start() * strides[n]; + } + + unsigned int idx_start = idx; + _kernel.setArg(idx++, array->cl_buffer()); + + for(unsigned int dimension = 0; dimension < dimension_size; dimension++) + { + _kernel.setArg(idx++, strides[dimension]); + _kernel.setArg(idx++, strides[dimension] * window[dimension].step()); + } + + _kernel.setArg(idx++, offset_first_element); + + ARM_COMPUTE_ERROR_ON_MSG(idx_start + num_arguments_per_array() != idx, + "add_%dD_array_argument() is supposed to add exactly %d arguments to the kernel", dimension_size, num_arguments_per_array()); + ARM_COMPUTE_UNUSED(idx_start); +} + +template +void ICLKernel::add_1D_array_argument(unsigned int &idx, const ICLArray *array, const Strides &strides, unsigned int num_dimensions, const Window &window) +{ + add_array_argument(idx, array, strides, num_dimensions, window); +} + +template +unsigned int ICLKernel::num_arguments_per_array() const +{ + return num_arguments_per_tensor(); +} + +template +unsigned int ICLKernel::num_arguments_per_tensor() const +{ + return 2 + 2 * dimension_size; +} } #endif /*__ARM_COMPUTE_ICLKERNEL_H__ */ -- cgit v1.2.1