aboutsummaryrefslogtreecommitdiff
path: root/arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyReshapedKernel.h
diff options
context:
space:
mode:
authorGian Marco Iodice <gianmarco.iodice@arm.com>2020-06-10 17:59:30 +0100
committerGian Marco Iodice <gianmarco.iodice@arm.com>2020-06-18 10:45:59 +0000
commite3a849af3d9e108704c6ce162f377398300d990d (patch)
treead9cb7004c64cc6747b2b04bf982b3aabef33676 /arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyReshapedKernel.h
parent19023835fa5a73dea2823edf667c711b03bc5060 (diff)
downloadComputeLibrary-e3a849af3d9e108704c6ce162f377398300d990d.tar.gz
COMPMID-3320: Add cl_image support for GEMMReshaped T_NT
COMPMID-3321: Add cl_image support for GEMMReshaped NT_T - Added support for cl_image in CLGEMMMatrixMultiplyReshapedKernel (both NT and T kernels) - Extended the tests for the validating rhs_info.export_to_cl_image = true - Added utility macros in OpenCL to load data from a OpenCL image object - Updated doxygen documentation in CLGEMMMatrixMultiplyReshapedKernel.h - Updated doxygen documentation in CLGEMMReshapeRHSMatrixKernel.h Change-Id: I953b10e4ef205d1b76dcbc366e5a91fd5a8e1d5c Signed-off-by: Gian Marco Iodice <gianmarco.iodice@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/3329 Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com>
Diffstat (limited to 'arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyReshapedKernel.h')
-rw-r--r--arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyReshapedKernel.h59
1 files changed, 48 insertions, 11 deletions
diff --git a/arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyReshapedKernel.h b/arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyReshapedKernel.h
index ee8e57fa8c..aeedd50e0b 100644
--- a/arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyReshapedKernel.h
+++ b/arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyReshapedKernel.h
@@ -55,19 +55,30 @@ public:
* Mixed precision combines different floating precisions during the computation, in particular, F32 for the accumulations and F16 for the
* multiplications. i.e. float c = (half)a * (half)b
*
- * @param[in] input0 Input tensor containing the LHS reshaped matrix. Data type supported: F16/F32. The number of dimensions for the LHS matrix must be less or equal than 4
+ * @note If rhs_info.export_to_cl_image = true, this OpenCL kernel will fetch the RHS data using the OpenCL read_image built-in function.
+ * Reading from the OpenCL image object can increase the performance. However, since the OpenCL image object is created importing the OpenCL buffer,
+ * the following conditions are required:
+ * -# rhs_info.n0 can only be 4, 8 and 16
+ * -# rhs_info.k0 can only be 4, 8 and 16
+ * -# Data type can only be F32
+ * -# The platform should support the OpenCL cl_khr_image2d_from_buffer extension
+ * -# The stride Y for the input1 should satisfy the OpenCL pitch alignment requirement
+ * -# input1 width should be less or equal to (CL_DEVICE_IMAGE2D_MAX_WIDTH * 4)
+ * -# input1 (height * depth) should be less or equal to CL_DEVICE_IMAGE2D_MAX_HEIGHT
+ *
+ * @param[in] input0 Input tensor containing the LHS reshaped matrix. Data type supported: F16/F32 (only F32 if rhs_info.export_to_cl_image = true). The number of dimensions for the LHS matrix must be less or equal than 4
* @param[in] input1 Input tensor containing the RHS reshaped matrix. Data type supported: same as @p input0. The number of dimensions for the RHS matrix must be less or equal than 3
* @param[in] input2 Input tensor containing the bias matrix. Data type supported: same as @p input0.
* @param[out] output Output tensor to store the result of matrix multiplication. Data type supported: same as @p input0
* @param[in] alpha Weight of the matrix product
* @param[in] beta Weight of the matrix bias
- * @param[in] lhs_info LHS matrix information used for reshaping the input0 tensor. Only the following values are supported:
+ * @param[in] lhs_info LHS matrix information used for reshaping the input0 tensor. Only the following values are supported:
* lhs_info.m0: 2,3,4,5,6,7,8
* lhs_info.k0: 2,3,4,8,16
* lhs_info.transpose: false
* @param[in] rhs_info RHS matrix information used for reshaping the input1 tensor. Only the following values are supported:
- * rhs_info.n0: 2,3,4,8,16
- * rhs_info.k0: 2,3,4,8,16
+ * rhs_info.n0: 2,3,4,8,16 (only 4, 8 and 16 if rhs_info.export_to_cl_image = true)
+ * rhs_info.k0: 2,3,4,8,16 (only 4, 8 and 16 if rhs_info.export_to_cl_image = true)
* rhs_info.transpose: true
* @param[in] gemm_info GEMM information used to retrieve the original dimensions of the input matrices
*
@@ -82,8 +93,19 @@ public:
* Mixed precision combines different floating precisions during the computation, in particular, F32 for the accumulations and F16 for the
* multiplications. i.e. float c = (half)a * (half)b
*
+ * @note If rhs_info.export_to_cl_image = true, this OpenCL kernel will fetch the RHS data using the OpenCL read_image built-in function.
+ * Reading from the OpenCL image object can increase the performance. However, since the OpenCL image object is created importing the OpenCL buffer,
+ * the following conditions are required:
+ * -# rhs_info.n0 can only be 4, 8 and 16
+ * -# rhs_info.k0 can only be 4, 8 and 16
+ * -# Data type can only be F32
+ * -# The platform should support the OpenCL cl_khr_image2d_from_buffer extension
+ * -# The stride Y for the input1 should satisfy the OpenCL pitch alignment requirement
+ * -# input1 width should be less or equal to (CL_DEVICE_IMAGE2D_MAX_WIDTH * 4)
+ * -# input1 (height * depth) should be less or equal to CL_DEVICE_IMAGE2D_MAX_HEIGHT
+ *
* @param[in] compile_context The compile context to be used.
- * @param[in] input0 Input tensor containing the LHS reshaped matrix. Data type supported: F16/F32. The number of dimensions for the LHS matrix must be less or equal than 4
+ * @param[in] input0 Input tensor containing the LHS reshaped matrix. Data type supported: F16/F32 (only F32 if rhs_info.export_to_cl_image = true). The number of dimensions for the LHS matrix must be less or equal than 4
* @param[in] input1 Input tensor containing the RHS reshaped matrix. Data type supported: same as @p input0. The number of dimensions for the RHS matrix must be less or equal than 3
* @param[in] input2 Input tensor containing the bias matrix. Data type supported: same as @p input0.
* @param[out] output Output tensor to store the result of matrix multiplication. Data type supported: same as @p input0
@@ -94,8 +116,8 @@ public:
* lhs_info.k0: 2,3,4,8,16
* lhs_info.transpose: false
* @param[in] rhs_info RHS matrix information used for reshaping the input1 tensor. Only the following values are supported:
- * rhs_info.n0: 2,3,4,8,16
- * rhs_info.k0: 2,3,4,8,16
+ * rhs_info.n0: 2,3,4,8,16 (only 4, 8 and 16 if rhs_info.export_to_cl_image = true)
+ * rhs_info.k0: 2,3,4,8,16 (only 4, 8 and 16 if rhs_info.export_to_cl_image = true)
* rhs_info.transpose: true
* @param[in] gemm_info GEMM information used to retrieve the original dimensions of the input matrices
*
@@ -107,7 +129,22 @@ public:
const GEMMKernelInfo &gemm_info);
/** Static function to check if given info will lead to a valid configuration of @ref CLGEMMMatrixMultiplyReshapedKernel
*
- * @param[in] input0 Input tensor containing the LHS reshaped matrix. Data type supported: F16/F32. The number of dimensions for the LHS matrix must be less or equal than 4
+ * @note The F16 computation also supports mixed precision through the gemm_info.fp_mixed_precision flag.
+ * Mixed precision combines different floating precisions during the computation, in particular, F32 for the accumulations and F16 for the
+ * multiplications. i.e. float c = (half)a * (half)b
+ *
+ * @note If rhs_info.export_to_cl_image = true, this OpenCL kernel will fetch the RHS data using the OpenCL read_image built-in function.
+ * Reading from the OpenCL image object can increase the performance. However, since the OpenCL image object is created importing the OpenCL buffer,
+ * the following conditions are required:
+ * -# rhs_info.n0 can only be 4, 8 and 16
+ * -# rhs_info.k0 can only be 4, 8 and 16
+ * -# Data type can only be F32
+ * -# The platform should support the OpenCL cl_khr_image2d_from_buffer extension
+ * -# The stride Y for the input1 should satisfy the OpenCL pitch alignment requirement
+ * -# input1 width should be less or equal to (CL_DEVICE_IMAGE2D_MAX_WIDTH * 4)
+ * -# input1 (height * depth) should be less or equal to CL_DEVICE_IMAGE2D_MAX_HEIGHT
+ *
+ * @param[in] input0 Input tensor containing the LHS reshaped matrix. Data type supported: F16/F32 (only F32 if rhs_info.export_to_cl_image = true). The number of dimensions for the LHS matrix must be less or equal than 4
* @param[in] input1 Input tensor containing the RHS reshaped matrix. Data type supported: same as @p input0. The number of dimensions for the RHS matrix must be less or equal than 3
* @param[in] input2 Input tensor info containing the bias matrix. Data type supported: same as @p input0.
* @param[in] output Output tensor to store the result of matrix multiplication. Data type supported: same as @p input0
@@ -118,8 +155,8 @@ public:
* lhs_info.k0: 2,3,4,8,16
* lhs_info.transpose: false
* @param[in] rhs_info RHS matrix information used for reshaping the input1 tensor. Only the following values are supported:
- * rhs_info.n0: 2,3,4,8,16
- * rhs_info.k0: 2,3,4,8,16
+ * rhs_info.n0: 2,3,4,8,16 (only 4, 8 and 16 if rhs_info.export_to_cl_image = true)
+ * rhs_info.k0: 2,3,4,8,16 (only 4, 8 and 16 if rhs_info.export_to_cl_image = true)
* rhs_info.transpose: true
* @param[in] gemm_info GEMM information used to retrieve the original dimensions of the input matrices
*
@@ -141,10 +178,10 @@ private:
ICLTensor *_output;
bool _slide_matrix_b;
bool _reinterpret_output_as_3d;
- unsigned int _k;
bool _use_dummy_work_items;
bool _add_bias;
bool _broadcast_bias;
+ bool _export_to_cl_image;
};
} // namespace arm_compute
#endif /*ARM_COMPUTE_CLGEMMMATRIXMULTIPLYRESHAPEDKERNEL_H*/ \ No newline at end of file