aboutsummaryrefslogtreecommitdiff
path: root/arm_compute/core/CL/ICLKernel.h
diff options
context:
space:
mode:
authorDiego Lopez Recas <Diego.LopezRecas@arm.com>2017-12-18 14:42:56 +0000
committerAnthony Barbier <anthony.barbier@arm.com>2018-11-02 16:45:00 +0000
commit0021d750d66d199c411df00cdd8308c325f1fef3 (patch)
treeb96e618977442a8aab335c136d369a958998d416 /arm_compute/core/CL/ICLKernel.h
parent5b6904b8d9cb5e8a343cde96fd5a8701f44dff90 (diff)
downloadComputeLibrary-0021d750d66d199c411df00cdd8308c325f1fef3.tar.gz
IVGCVSW-863 Broadcast support in CL/NEON Arithmetic Add
Also, added instrumentation to support generic tensor broadcasting for NEON and CL backends. Change-Id: I1bc5747a286e1a4b464c209067581e103d473b9a Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/114201 Reviewed-by: Anthony Barbier <anthony.barbier@arm.com> Tested-by: Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'arm_compute/core/CL/ICLKernel.h')
-rw-r--r--arm_compute/core/CL/ICLKernel.h124
1 files changed, 79 insertions, 45 deletions
diff --git a/arm_compute/core/CL/ICLKernel.h b/arm_compute/core/CL/ICLKernel.h
index a1bc3eb8d2..e660ae55a0 100644
--- a/arm_compute/core/CL/ICLKernel.h
+++ b/arm_compute/core/CL/ICLKernel.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016, 2017 ARM Limited.
+ * Copyright (c) 2016-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -41,14 +41,40 @@ class Window;
/** Common interface for all the OpenCL kernels */
class ICLKernel : public IKernel
{
+private:
+ /** Returns the number of arguments enqueued per array object.
+ *
+ * @return The number of arguments enqueued per array object.
+ */
+ template <unsigned int dimension_size>
+ constexpr static unsigned int num_arguments_per_array()
+ {
+ return num_arguments_per_tensor<dimension_size>();
+ }
+ /** Returns the number of arguments enqueued per tensor object.
+ *
+ * @return The number of arguments enqueued per tensor object.
+ */
+ template <unsigned int dimension_size>
+ constexpr static unsigned int num_arguments_per_tensor()
+ {
+ return 2 + 2 * dimension_size;
+ }
+
public:
/** Constructor */
- ICLKernel();
+ ICLKernel()
+ : _kernel(nullptr), _lws_hint(CLKernelLibrary::get().default_ndrange()), _target(GPUTarget::MIDGARD), _config_id(arm_compute::default_config_id), _max_workgroup_size(0)
+ {
+ }
/** Returns a reference to the OpenCL kernel of this object.
*
* @return A reference to the OpenCL kernel of this object.
*/
- cl::Kernel &kernel();
+ cl::Kernel &kernel()
+ {
+ return _kernel;
+ }
/** Add the passed 1D array's parameters to the object's kernel's arguments starting from the index idx.
*
* @param[in,out] idx Index at which to start adding the array's arguments. Will be incremented by the number of kernel arguments set.
@@ -58,60 +84,90 @@ public:
* @param[in] window Window the kernel will be executed on.
*/
template <typename T>
- void add_1D_array_argument(unsigned int &idx, const ICLArray<T> *array, const Strides &strides, unsigned int num_dimensions, const Window &window);
+ void add_1D_array_argument(unsigned int &idx, const ICLArray<T> *array, const Strides &strides, unsigned int num_dimensions, const Window &window)
+ {
+ add_array_argument<T, 1>(idx, array, strides, num_dimensions, window);
+ }
/** Add the passed 1D tensor's parameters to the object's kernel's arguments starting from the index idx.
*
* @param[in,out] idx Index at which to start adding the tensor's arguments. Will be incremented by the number of kernel arguments set.
* @param[in] tensor Tensor to set as an argument of the object's kernel.
* @param[in] window Window the kernel will be executed on.
*/
- void add_1D_tensor_argument(unsigned int &idx, const ICLTensor *tensor, const Window &window);
+ void add_1D_tensor_argument(unsigned int &idx, const ICLTensor *tensor, const Window &window)
+ {
+ add_tensor_argument<1>(idx, tensor, window);
+ }
/** Add the passed 2D tensor's parameters to the object's kernel's arguments starting from the index idx.
*
* @param[in,out] idx Index at which to start adding the tensor's arguments. Will be incremented by the number of kernel arguments set.
* @param[in] tensor Tensor to set as an argument of the object's kernel.
* @param[in] window Window the kernel will be executed on.
*/
- void add_2D_tensor_argument(unsigned int &idx, const ICLTensor *tensor, const Window &window);
+ void add_2D_tensor_argument(unsigned int &idx, const ICLTensor *tensor, const Window &window)
+ {
+ add_tensor_argument<2>(idx, tensor, window);
+ }
/** Add the passed 3D tensor's parameters to the object's kernel's arguments starting from the index idx.
*
* @param[in,out] idx Index at which to start adding the tensor's arguments. Will be incremented by the number of kernel arguments set.
* @param[in] tensor Tensor to set as an argument of the object's kernel.
* @param[in] window Window the kernel will be executed on.
*/
- void add_3D_tensor_argument(unsigned int &idx, const ICLTensor *tensor, const Window &window);
+ void add_3D_tensor_argument(unsigned int &idx, const ICLTensor *tensor, const Window &window)
+ {
+ add_tensor_argument<3>(idx, tensor, window);
+ }
/** Add the passed 4D tensor's parameters to the object's kernel's arguments starting from the index idx.
*
* @param[in,out] idx Index at which to start adding the tensor's arguments. Will be incremented by the number of kernel arguments set.
* @param[in] tensor Tensor to set as an argument of the object's kernel.
* @param[in] window Window the kernel will be executed on.
*/
- void add_4D_tensor_argument(unsigned int &idx, const ICLTensor *tensor, const Window &window);
+ void add_4D_tensor_argument(unsigned int &idx, const ICLTensor *tensor, const Window &window)
+ {
+ add_tensor_argument<4>(idx, tensor, window);
+ }
/** Returns the number of arguments enqueued per 1D array object.
*
* @return The number of arguments enqueues per 1D array object.
*/
- unsigned int num_arguments_per_1D_array() const;
+ constexpr static unsigned int num_arguments_per_1D_array()
+ {
+ return num_arguments_per_array<1>();
+ }
/** Returns the number of arguments enqueued per 1D tensor object.
*
* @return The number of arguments enqueues per 1D tensor object.
*/
- unsigned int num_arguments_per_1D_tensor() const;
+ constexpr static unsigned int num_arguments_per_1D_tensor()
+ {
+ return num_arguments_per_tensor<1>();
+ }
/** Returns the number of arguments enqueued per 2D tensor object.
*
* @return The number of arguments enqueues per 2D tensor object.
*/
- unsigned int num_arguments_per_2D_tensor() const;
+ constexpr static unsigned int num_arguments_per_2D_tensor()
+ {
+ return num_arguments_per_tensor<2>();
+ }
/** Returns the number of arguments enqueued per 3D tensor object.
*
* @return The number of arguments enqueues per 3D tensor object.
*/
- unsigned int num_arguments_per_3D_tensor() const;
+ constexpr static unsigned int num_arguments_per_3D_tensor()
+ {
+ return num_arguments_per_tensor<3>();
+ }
/** Returns the number of arguments enqueued per 4D tensor object.
*
* @return The number of arguments enqueues per 4D tensor object.
*/
- unsigned int num_arguments_per_4D_tensor() const;
+ constexpr static unsigned int num_arguments_per_4D_tensor()
+ {
+ return num_arguments_per_tensor<4>();
+ }
/** Enqueue the OpenCL kernel to process the given window on the passed OpenCL command queue.
*
* @note The queue is *not* flushed by this method, and therefore the kernel will not have been executed by the time this method returns.
@@ -161,7 +217,10 @@ public:
*
* @param[in] target The targeted GPU architecture
*/
- void set_target(GPUTarget target);
+ void set_target(GPUTarget target)
+ {
+ _target = target;
+ }
/** Set the targeted GPU architecture according to the CL device
*
@@ -173,7 +232,10 @@ public:
*
* @return The targeted GPU architecture.
*/
- GPUTarget get_target() const;
+ GPUTarget get_target() const
+ {
+ return _target;
+ }
/** Get the maximum workgroup size for the device the CLKernelLibrary uses.
*
@@ -207,18 +269,6 @@ private:
*/
template <unsigned int dimension_size>
void add_tensor_argument(unsigned int &idx, const ICLTensor *tensor, const Window &window);
- /** Returns the number of arguments enqueued per array object.
- *
- * @return The number of arguments enqueued per array object.
- */
- template <unsigned int dimension_size>
- unsigned int num_arguments_per_array() const;
- /** Returns the number of arguments enqueued per tensor object.
- *
- * @return The number of arguments enqueued per tensor object.
- */
- template <unsigned int dimension_size>
- unsigned int num_arguments_per_tensor() const;
protected:
cl::Kernel _kernel; /**< OpenCL kernel to run */
@@ -246,6 +296,8 @@ void enqueue(cl::CommandQueue &queue, ICLKernel &kernel, const Window &window, c
template <typename T, unsigned int dimension_size>
void ICLKernel::add_array_argument(unsigned &idx, const ICLArray<T> *array, const Strides &strides, unsigned int num_dimensions, const Window &window)
{
+ ARM_COMPUTE_ERROR_ON(array == nullptr);
+
// Calculate offset to the start of the window
unsigned int offset_first_element = 0;
@@ -269,23 +321,5 @@ void ICLKernel::add_array_argument(unsigned &idx, const ICLArray<T> *array, cons
"add_%dD_array_argument() is supposed to add exactly %d arguments to the kernel", dimension_size, num_arguments_per_array<dimension_size>());
ARM_COMPUTE_UNUSED(idx_start);
}
-
-template <typename T>
-void ICLKernel::add_1D_array_argument(unsigned int &idx, const ICLArray<T> *array, const Strides &strides, unsigned int num_dimensions, const Window &window)
-{
- add_array_argument<T, 1>(idx, array, strides, num_dimensions, window);
-}
-
-template <unsigned int dimension_size>
-unsigned int ICLKernel::num_arguments_per_array() const
-{
- return num_arguments_per_tensor<dimension_size>();
-}
-
-template <unsigned int dimension_size>
-unsigned int ICLKernel::num_arguments_per_tensor() const
-{
- return 2 + 2 * dimension_size;
-}
}
#endif /*__ARM_COMPUTE_ICLKERNEL_H__ */