aboutsummaryrefslogtreecommitdiff
path: root/src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h
diff options
context:
space:
mode:
authorSiCong Li <sicong.li@arm.com>2023-06-28 09:49:45 +0100
committerSiCong Li <sicong.li@arm.com>2023-07-25 15:48:50 +0000
commit23882a9014eb3972bca958206866c8e0d0b829cc (patch)
tree9139b91699099160e26a64abd8cf182bd7447278 /src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h
parent0a59e69fd922b02d9e3b5b043ee7f891061df7be (diff)
downloadComputeLibrary-23882a9014eb3972bca958206866c8e0d0b829cc.tar.gz
Add GpuKernelArgumentBinding for runtime argument setting
* Add flexible runtime argument setting that accept argument bindings exported from ckw. * Introduce internal build flag ACL_INTERNAL_TEST_CKW_IN_DF. If set to true, ckw will be tested in dynamic fusion validation tests. Otherwise it will not be tested and the dynamic fusion will keep using ClTemplateWriter instead. * Fix CKW sampler for elementwise binary to deal with tile sizes > 1 in both dimensions Resolves: COMPMID-6282 Partially resolves: COMPMID-6260 Signed-off-by: SiCong Li <sicong.li@arm.com> Change-Id: I0ab225a4484eb2119643d900a4e72806558626ee Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/9917 Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Jakub Sujak <jakub.sujak@arm.com> Reviewed-by: Anitha Raj <Anitha.Raj@arm.com> Reviewed-by: Viet-Hoa Do <viet-hoa.do@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Benchmark: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h')
-rw-r--r--src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h128
1 files changed, 126 insertions, 2 deletions
diff --git a/src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h b/src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h
index 302d4c8562..226e1a2df3 100644
--- a/src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h
+++ b/src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h
@@ -33,6 +33,7 @@ namespace experimental
namespace dynamic_fusion
{
/** Contain information required to set up a kernel argument at run time
+ * @deprecated To be removed along with ClTemplateWriter
*/
struct GpuKernelArgumentInfo
{
@@ -66,10 +67,9 @@ struct GpuKernelArgumentInfo
}
Type type{ Type::Tensor_4D_t_Buffer };
};
-
bool operator==(const GpuKernelArgumentInfo &info0, const GpuKernelArgumentInfo &info1);
-
/** Kernel argument information linked with its corresponding @ref ITensorInfo
+ * @deprecated To be removed along with ClTemplateWriter
*/
class GpuKernelArgument
{
@@ -124,6 +124,130 @@ private:
TensorInfo _tensor_info{};
GpuKernelArgumentInfo _kernel_arg_info{};
};
+#ifdef ACL_INTERNAL_TEST_CKW_IN_DF
+/** Describe how the tensor runtime memory can be accessed
+ *
+ * Please see documentation under @ref GpuKernelArgumentBinding
+ */
+enum class TensorStorageType
+{
+ Unknown,
+ ClBufferUint8Ptr,
+ ClImage2dReadOnly,
+ ClImage2dWriteOnly,
+};
+
+/** Describe additional runtime information about the tensor
+ *
+ * Please see documentation under @ref GpuKernelArgumentBinding
+ */
+enum class TensorComponentType
+{
+ Unknown,
+ OffsetFirstElement,
+ Stride0,
+ Stride1,
+ Stride2,
+ Stride3,
+ Stride4,
+ Dim0,
+ Dim1,
+ Dim2,
+ Dim3,
+ Dim4,
+ Dim1xDim2,
+ Dim2xDim3,
+ Dim1xDim2xDim3,
+};
+
+/** Describe how to extract information from a runtime Gpu tensor, and set it as an argument to a gpu kernel at runtime
+ *
+ * A kernel argument is just an argument to the gpu kernel as shown in the argument list below. This contrasts with a "workload argument" which is a tensor (@ref GpuWorkloadArgument)
+ * void kernel(arg0, arg1, ... argN)
+ *
+ * In a kernel generated using dynamic fusion (@ref GpuKernelSourceCode), every kernel argument describes part of a tensor.
+ * A tensor is described as: **storages** followed by **components**
+ *
+ * A storage (@ref TensorStorageType) describes how the tensor runtime memory can be accessed (e.g. via a global uint8 pointer to a CL buffer)
+ * A component (@ref TensorComponentType) describes additional runtime information about the tensor (e.g. the dimensions of the tensor)
+ *
+ * The arguments are arranged in the order of use in the generated kernel code:
+ *
+ * arg0 , arg1 , arg2 , ..., , argN
+ * storage, component0, component1, ..., componentX, storage, component0, component1, ..., componentY
+ * | tensor0 | tensor1 |
+ *
+ * An example argument list:
+ *
+ * void kernel(
+ * image2d_t t0_image, // TensorStorageType::ClImage2dReadOnly
+ * uint8_t* t0_ptr, // TensorStorageType::ClBufferUint8Ptr
+ * uint t0_dim0, // TensorComponentType::Dim0
+ * uint t0_stride1, // TensorComponentType::Stride1
+ * image2d_t t1_ptr, // TensorStorageType::ClImage2dReadOnly
+ * uint t1_dim1xdim2, // TensorComponentType::Dim1xDim2
+ * uint t1_stride1, // TensorComponentType::Stride1
+ * uint t1_stride2, // TensorComponentType:Stride2
+ * )
+ *
+ */
+class GpuKernelArgumentBinding
+{
+public:
+ enum class Type : int32_t
+ {
+ TensorStorage, /** @ref TensorStorageType */
+ TensorComponent /** @ref TensorComponentType */
+ };
+ GpuKernelArgumentBinding(ITensorInfo::Id id, TensorStorageType storage)
+ : _type{ Type::TensorStorage }, _id{ id }, _value{}
+ {
+ _value.tensor_storage_type = storage;
+ }
+ GpuKernelArgumentBinding(ITensorInfo::Id id, TensorComponentType component)
+ : _type{ Type::TensorComponent }, _id{ id }, _value{}
+ {
+ _value.tensor_component_type = component;
+ }
+ /** Storage type of the tensor
+ */
+ TensorStorageType tensor_storage_type() const
+ {
+ ARM_COMPUTE_ERROR_ON(_type != Type::TensorStorage);
+ return _value.tensor_storage_type;
+ }
+ /** Component of the tensor
+ */
+ TensorComponentType tensor_component_type() const
+ {
+ ARM_COMPUTE_ERROR_ON(_type != Type::TensorComponent);
+ return _value.tensor_component_type;
+ }
+ /** Id of the tensor this kernel argument belongs to
+ */
+ ITensorInfo::Id id() const
+ {
+ return _id;
+ }
+ /** Type of the kernel argument
+ */
+ Type type() const
+ {
+ return _type;
+ }
+
+private:
+ Type _type;
+ ITensorInfo::Id _id;
+ union Value
+ {
+ TensorStorageType tensor_storage_type;
+ TensorComponentType tensor_component_type;
+ };
+ Value _value;
+};
+#endif // ACL_INTERNAL_TEST_CKW_IN_DF
+
} // namespace dynamic_fusion
} // namespace experimental
} // namespace arm_compute