aboutsummaryrefslogtreecommitdiff
path: root/arm_compute/core/CL
diff options
context:
space:
mode:
authorGian Marco Iodice <gianmarco.iodice@arm.com>2019-03-15 10:13:05 +0000
committerGian Marco Iodice <gianmarco.iodice@arm.com>2019-03-20 11:21:46 +0000
commitb0c5037d94ba7073ccabb0ebaff54db320f184c4 (patch)
tree126f2332df60b6eff1e630b2585b2bd407501a20 /arm_compute/core/CL
parent5ed7b5bc98feb848874730c9bb9c30759e58d453 (diff)
downloadComputeLibrary-b0c5037d94ba7073ccabb0ebaff54db320f184c4.tar.gz
COMPMID-2043: Add support for "dummy threads" in CLGEMMReshaped
Change-Id: I89403b97503fbb99f6a32f5d62b8c535ab26a7be Signed-off-by: Gian Marco Iodice <gianmarco.iodice@arm.com> Reviewed-on: https://review.mlplatform.org/c/877 Reviewed-by: Michele Di Giorgio <michele.digiorgio@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'arm_compute/core/CL')
-rw-r--r--arm_compute/core/CL/CLHelpers.h11
-rw-r--r--arm_compute/core/CL/ICLKernel.h14
-rw-r--r--arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedKernel.h1
-rw-r--r--arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyReshapedKernel.h1
-rw-r--r--arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.h1
5 files changed, 21 insertions, 7 deletions
diff --git a/arm_compute/core/CL/CLHelpers.h b/arm_compute/core/CL/CLHelpers.h
index 78427c3738..16fe09fb96 100644
--- a/arm_compute/core/CL/CLHelpers.h
+++ b/arm_compute/core/CL/CLHelpers.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2018 ARM Limited.
+ * Copyright (c) 2016-2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -144,5 +144,14 @@ bool cl_winograd_convolution_layer_supported(const Size2D &output_tile, const Si
* @return preferred vector width
*/
size_t preferred_vector_width(const cl::Device &device, DataType dt);
+
+/** Helper function to check if "dummy work-items" are preferred to have a power of two NDRange
+ * In case dummy work-items is enabled, it is OpenCL kernel responsibility to check if the work-item is out-of range or not
+ *
+ * @param[in] device A CL device
+ *
+ * @return True if dummy work-items should be preferred to dispatch the NDRange
+ */
+bool preferred_dummy_work_items_support(const cl::Device &device);
}
#endif /* __ARM_COMPUTE_CLHELPERS_H__ */
diff --git a/arm_compute/core/CL/ICLKernel.h b/arm_compute/core/CL/ICLKernel.h
index f5423584e1..590f8929cb 100644
--- a/arm_compute/core/CL/ICLKernel.h
+++ b/arm_compute/core/CL/ICLKernel.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2018 ARM Limited.
+ * Copyright (c) 2016-2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -308,14 +308,16 @@ private:
*
* @note If kernel->kernel() is empty then the function will return without adding anything to the queue.
*
- * @param[in,out] queue OpenCL command queue.
- * @param[in] kernel Kernel to enqueue
- * @param[in] window Window the kernel has to process.
- * @param[in] lws_hint Local workgroup size requested. Default is based on the device target.
+ * @param[in,out] queue OpenCL command queue.
+ * @param[in] kernel Kernel to enqueue
+ * @param[in] window Window the kernel has to process.
+ * @param[in] lws_hint (Optional) Local workgroup size requested. Default is based on the device target.
+ * @param[in] use_dummy_work_items (Optional) Use dummy work items in order to have two dimensional power of two NDRange. Default is false
+ * Note: it is kernel responsibility to check if the work-item is out-of-range
*
* @note If any dimension of the lws is greater than the global workgroup size then no lws will be passed.
*/
-void enqueue(cl::CommandQueue &queue, ICLKernel &kernel, const Window &window, const cl::NDRange &lws_hint = CLKernelLibrary::get().default_ndrange());
+void enqueue(cl::CommandQueue &queue, ICLKernel &kernel, const Window &window, const cl::NDRange &lws_hint = CLKernelLibrary::get().default_ndrange(), bool use_dummy_work_items = false);
/** Add the passed array's parameters to the object's kernel's arguments starting from the index idx.
*
diff --git a/arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedKernel.h b/arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedKernel.h
index 1cf7236446..d361236380 100644
--- a/arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedKernel.h
+++ b/arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedKernel.h
@@ -81,6 +81,7 @@ private:
bool _slide_matrix_b;
bool _reinterpret_output_as_3d;
unsigned int _k;
+ bool _use_dummy_work_items;
};
} // namespace arm_compute
#endif /*__ARM_COMPUTE_CLGEMMLOWPMATRIXMULTIPLYRESHAPEDKERNEL_H__*/ \ No newline at end of file
diff --git a/arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyReshapedKernel.h b/arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyReshapedKernel.h
index cb23b969dd..b0d245f342 100644
--- a/arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyReshapedKernel.h
+++ b/arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyReshapedKernel.h
@@ -84,6 +84,7 @@ private:
bool _slide_matrix_b;
bool _reinterpret_output_as_3d;
unsigned int _k;
+ bool _use_dummy_work_items;
};
} // namespace arm_compute
#endif /*__ARM_COMPUTE_CLGEMMMATRIXMULTIPLYRESHAPEDKERNEL_H__*/ \ No newline at end of file
diff --git a/arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.h b/arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.h
index 74715949f2..b3ee43555a 100644
--- a/arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.h
+++ b/arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.h
@@ -84,6 +84,7 @@ private:
bool _slide_matrix_b;
bool _reinterpret_input_as_3d;
bool _reinterpret_output_as_3d;
+ bool _use_dummy_work_items;
};
} // namespace arm_compute
#endif /*__ARM_COMPUTE_CLGEMMMATRIXMULTIPLYRESHAPEDONLYRHSKERNEL_H__*/ \ No newline at end of file