aboutsummaryrefslogtreecommitdiff
path: root/arm_compute/core/CL/ICLKernel.h
diff options
context:
space:
mode:
authorSiCong Li <sicong.li@arm.com>2017-07-04 15:02:10 +0100
committerAnthony Barbier <anthony.barbier@arm.com>2018-11-02 16:35:24 +0000
commit3e36369a5511c3028c30fc820752dc1248bddf5c (patch)
tree58eb09548bdcc276e62f41f01f86fa06fea211e7 /arm_compute/core/CL/ICLKernel.h
parentedfa9f463bed084f8b0953557202b2a1e56da817 (diff)
downloadComputeLibrary-3e36369a5511c3028c30fc820752dc1248bddf5c.tar.gz
COMPMID-358 Implement OpenCL ROI Pooling
* Implement OpenCL ROI Pooling * Add CLROIPoolingLayer benchmarks Change-Id: I8786d01d551850a1b4d599a48fabe3925e0a27d0 Reviewed-on: http://mpd-gerrit.cambridge.arm.com/79833 Reviewed-by: Anthony Barbier <anthony.barbier@arm.com> Tested-by: Kaizen <jeremy.johnson+kaizengerrit@arm.com>
Diffstat (limited to 'arm_compute/core/CL/ICLKernel.h')
-rw-r--r--arm_compute/core/CL/ICLKernel.h78
1 files changed, 78 insertions, 0 deletions
diff --git a/arm_compute/core/CL/ICLKernel.h b/arm_compute/core/CL/ICLKernel.h
index cfbf760f1e..1334c54a6c 100644
--- a/arm_compute/core/CL/ICLKernel.h
+++ b/arm_compute/core/CL/ICLKernel.h
@@ -31,6 +31,8 @@
namespace arm_compute
{
+template <typename T>
+class ICLArray;
class ICLTensor;
class Window;
@@ -45,6 +47,16 @@ public:
* @return A reference to the OpenCL kernel of this object.
*/
cl::Kernel &kernel();
+ /** Add the passed 1D array's parameters to the object's kernel's arguments starting from the index idx.
+ *
+ * @param[in,out] idx Index at which to start adding the array's arguments. Will be incremented by the number of kernel arguments set.
+ * @param[in] array Array to set as an argument of the object's kernel.
+ * @param[in] strides @ref Strides object containing stride of each dimension in bytes.
+ * @param[in] num_dimensions Number of dimensions of the @p array.
+ * @param[in] window Window the kernel will be executed on.
+ */
+ template <typename T>
+ void add_1D_array_argument(unsigned int &idx, const ICLArray<T> *array, const Strides &strides, unsigned int num_dimensions, const Window &window);
/** Add the passed 1D tensor's parameters to the object's kernel's arguments starting from the index idx.
*
* @param[in,out] idx Index at which to start adding the tensor's arguments. Will be incremented by the number of kernel arguments set.
@@ -73,6 +85,11 @@ public:
* @param[in] window Window the kernel will be executed on.
*/
void add_4D_tensor_argument(unsigned int &idx, const ICLTensor *tensor, const Window &window);
+ /** Returns the number of arguments enqueued per 1D array object.
+ *
+ * @return The number of arguments enqueues per 1D array object.
+ */
+ unsigned int num_arguments_per_1D_array() const;
/** Returns the number of arguments enqueued per 1D tensor object.
*
* @return The number of arguments enqueues per 1D tensor object.
@@ -142,6 +159,16 @@ public:
GPUTarget get_target() const;
private:
+ /** Add the passed array's parameters to the object's kernel's arguments starting from the index idx.
+ *
+ * @param[in,out] idx Index at which to start adding the array's arguments. Will be incremented by the number of kernel arguments set.
+ * @param[in] array Array to set as an argument of the object's kernel.
+ * @param[in] strides @ref Strides object containing stride of each dimension in bytes.
+ * @param[in] num_dimensions Number of dimensions of the @p array.
+ * @param[in] window Window the kernel will be executed on.
+ */
+ template <typename T, unsigned int dimension_size>
+ void add_array_argument(unsigned int &idx, const ICLArray<T> *array, const Strides &strides, unsigned int num_dimensions, const Window &window);
/** Add the passed tensor's parameters to the object's kernel's arguments starting from the index idx.
*
* @param[in,out] idx Index at which to start adding the tensor's arguments. Will be incremented by the number of kernel arguments set.
@@ -150,6 +177,12 @@ private:
*/
template <unsigned int dimension_size>
void add_tensor_argument(unsigned int &idx, const ICLTensor *tensor, const Window &window);
+ /** Returns the number of arguments enqueued per array object.
+ *
+ * @return The number of arguments enqueued per array object.
+ */
+ template <unsigned int dimension_size>
+ unsigned int num_arguments_per_array() const;
/** Returns the number of arguments enqueued per tensor object.
*
* @return The number of arguments enqueued per tensor object.
@@ -177,5 +210,50 @@ protected:
* @note If any dimension of the lws is greater than the global workgroup size then no lws will be passed.
*/
void enqueue(cl::CommandQueue &queue, ICLKernel &kernel, const Window &window, const cl::NDRange &lws_hint = CLKernelLibrary::get().default_ndrange());
+
+template <typename T, unsigned int dimension_size>
+void ICLKernel::add_array_argument(unsigned &idx, const ICLArray<T> *array, const Strides &strides, unsigned int num_dimensions, const Window &window)
+{
+ // Calculate offset to the start of the window
+ unsigned int offset_first_element = 0;
+
+ for(unsigned int n = 0; n < num_dimensions; ++n)
+ {
+ offset_first_element += window[n].start() * strides[n];
+ }
+
+ unsigned int idx_start = idx;
+ _kernel.setArg(idx++, array->cl_buffer());
+
+ for(unsigned int dimension = 0; dimension < dimension_size; dimension++)
+ {
+ _kernel.setArg<cl_uint>(idx++, strides[dimension]);
+ _kernel.setArg<cl_uint>(idx++, strides[dimension] * window[dimension].step());
+ }
+
+ _kernel.setArg<cl_uint>(idx++, offset_first_element);
+
+ ARM_COMPUTE_ERROR_ON_MSG(idx_start + num_arguments_per_array<dimension_size>() != idx,
+ "add_%dD_array_argument() is supposed to add exactly %d arguments to the kernel", dimension_size, num_arguments_per_array<dimension_size>());
+ ARM_COMPUTE_UNUSED(idx_start);
+}
+
+template <typename T>
+void ICLKernel::add_1D_array_argument(unsigned int &idx, const ICLArray<T> *array, const Strides &strides, unsigned int num_dimensions, const Window &window)
+{
+ add_array_argument<T, 1>(idx, array, strides, num_dimensions, window);
+}
+
+template <unsigned int dimension_size>
+unsigned int ICLKernel::num_arguments_per_array() const
+{
+ return num_arguments_per_tensor<dimension_size>();
+}
+
+template <unsigned int dimension_size>
+unsigned int ICLKernel::num_arguments_per_tensor() const
+{
+ return 2 + 2 * dimension_size;
+}
}
#endif /*__ARM_COMPUTE_ICLKERNEL_H__ */