COMPMID-3318: Add flag to export to cl_image

- Added flag to export to cl_image in GEMMRHSMatrixInfo - Returned an error in the GEMM/Lowp kernels without this support Change-Id: I4a523d93c0984626bbf23e2efeb114f9c7c20a24 Signed-off-by: Gian Marco Iodice <gianmarco.iodice@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/3274 Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com>
author: Gian Marco Iodice <gianmarco.iodice@arm.com> 2020-05-28 10:22:03 +0100
committer: Gian Marco Iodice <gianmarco.iodice@arm.com> 2020-05-28 14:00:46 +0000
commit: dd717c3150b813da403fbfd38e1200936998824a (patch)
tree: 30c13f8cc428b03fae14999d18f8b4c2c1f21517
parent: 04c8e636c1083411e2f49fb0c80e2d1be498aa25 (diff)
download: ComputeLibrary-dd717c3150b813da403fbfd38e1200936998824a.tar.gz
5 files changed, 12 insertions, 6 deletions
diff --git a/arm_compute/core/Types.h b/arm_compute/core/Types.h
index 759ff07822..4e73edba4b 100644
--- a/arm_compute/core/Types.h
+++ b/arm_compute/core/Types.h
@@ -1916,11 +1916,12 @@ struct GEMMRHSMatrixInfo
         : n0(n), k0(k), h0(h), transpose(trans), interleave(inter)
     {
     }
-    unsigned int n0{ 1 };            /**< Number of columns processed by the matrix multiplication */
-    unsigned int k0{ 1 };            /**< Number of partial accumulations performed by the matrix multiplication */
-    unsigned int h0{ 1 };            /**< Number of horizontal blocks of size (k0xn0) stored on the same output row */
-    bool         transpose{ true };  /**< True if the (k0xn0) block has to be transposed before been stored */
-    bool         interleave{ true }; /**< True if the h0 (k0xn0) blocks have to be interleaved in the output row */
+    unsigned int n0{ 1 };                     /**< Number of columns processed by the matrix multiplication */
+    unsigned int k0{ 1 };                     /**< Number of partial accumulations performed by the matrix multiplication */
+    unsigned int h0{ 1 };                     /**< Number of horizontal blocks of size (k0xn0) stored on the same output row */
+    bool         transpose{ true };           /**< True if the (k0xn0) block has to be transposed before been stored */
+    bool         interleave{ true };          /**< True if the h0 (k0xn0) blocks have to be interleaved in the output row */
+    bool         export_to_cl_image{ false }; /**< True if the reshaped rhs has to be exported to cl_image. n0 must be equal to 4 */
 };
 
 /** GEMM information class. This class stores the necessary information to compute GEMM functions
diff --git a/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyNativeKernel.cpp b/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyNativeKernel.cpp
index 663cc70a0b..9e0594b129 100644
--- a/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyNativeKernel.cpp
+++ b/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyNativeKernel.cpp
@@ -70,6 +70,7 @@ Status validate_arguments(const ITensorInfo *input0, const ITensorInfo *input1,
     ARM_COMPUTE_RETURN_ERROR_ON(lhs_info.k0 > 16);
     ARM_COMPUTE_RETURN_ERROR_ON(lhs_info.m0 < 1 || lhs_info.m0 > 8);
     ARM_COMPUTE_RETURN_ERROR_ON_MSG(((rhs_info.n0 & (rhs_info.n0 - 1)) && rhs_info.n0 != 3), "Only 2,3,4,8,16 are supported for n0");
+    ARM_COMPUTE_RETURN_ERROR_ON_MSG(rhs_info.export_to_cl_image, "Export to CLImage not supported for quantized GEMM");
 
     const int m = gemm_info.m();
     const int n = gemm_info.n();
diff --git a/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedKernel.cpp b/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedKernel.cpp
index eeedfdaab1..76303cfd16 100644
--- a/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedKernel.cpp
+++ b/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedKernel.cpp
@@ -65,6 +65,7 @@ Status validate_arguments(const ITensorInfo *input0, const ITensorInfo *input1,
     ARM_COMPUTE_RETURN_ERROR_ON(lhs_info.k0 > 16);
     ARM_COMPUTE_RETURN_ERROR_ON(lhs_info.m0 < 2 || lhs_info.m0 > 8);
     ARM_COMPUTE_RETURN_ERROR_ON_MSG(((rhs_info.n0 & (rhs_info.n0 - 1)) && rhs_info.n0 != 3), "Only 2,3,4,8,16 are supported for n0");
+    ARM_COMPUTE_RETURN_ERROR_ON_MSG(rhs_info.export_to_cl_image, "Export to CLImage not supported for quantized GEMM");
 
     const int m = gemm_info.m();
     const int n = gemm_info.n();
diff --git a/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.cpp b/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.cpp
index 0fdc899197..11f45e894a 100644
--- a/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.cpp
+++ b/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.cpp
@@ -74,6 +74,7 @@ Status validate_arguments(const ITensorInfo *input0, const ITensorInfo *input1,
     ARM_COMPUTE_RETURN_ERROR_ON_MSG((((rhs_info.k0 & (rhs_info.k0 - 1)) && rhs_info.k0 != 3) || (rhs_info.k0 > 16)), "Only 2,3,4,8,16 are supported for k0");
     ARM_COMPUTE_RETURN_ERROR_ON(lhs_info.m0 < 1 || lhs_info.m0 > 8);
     ARM_COMPUTE_RETURN_ERROR_ON_MSG((((rhs_info.n0 & (rhs_info.n0 - 1)) && rhs_info.n0 != 3) || rhs_info.n0 > 16), "Only 2,3,4,8,16 are supported for n0");
+    ARM_COMPUTE_RETURN_ERROR_ON_MSG(rhs_info.export_to_cl_image, "Export to CLImage not supported for quantized GEMM");
 
     const int m = gemm_info.m;
     const int n = gemm_info.n;
@@ -320,7 +321,8 @@ void CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel::configure(const ICLTensor *i
     configure(CLKernelLibrary::get().get_compile_context(), input0, input1, output, gemm_info, vector_sum_col, vector_sum_row, bias, output_multipliers, output_shifts);
 }
 
-void CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel::configure(const CLCompileContext &compile_context, const ICLTensor *input0, const ICLTensor *input1, ICLTensor *output, const GEMMKernelInfo &gemm_info,
+void CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel::configure(const CLCompileContext &compile_context, const ICLTensor *input0, const ICLTensor *input1, ICLTensor *output,
+                                                              const GEMMKernelInfo &gemm_info,
                                                               const ICLTensor *vector_sum_col, const ICLTensor *vector_sum_row, const ICLTensor *bias,
                                                               const ICLTensor *output_multipliers, const ICLTensor *output_shifts)
 {
diff --git a/src/core/CL/kernels/CLGEMMMatrixMultiplyNativeKernel.cpp b/src/core/CL/kernels/CLGEMMMatrixMultiplyNativeKernel.cpp
index d5a52845a1..dce8d81ca8 100644
--- a/src/core/CL/kernels/CLGEMMMatrixMultiplyNativeKernel.cpp
+++ b/src/core/CL/kernels/CLGEMMMatrixMultiplyNativeKernel.cpp
@@ -69,6 +69,7 @@ Status validate_arguments(const ITensorInfo *input0, const ITensorInfo *input1,
                                     && (!gemm_info.broadcast_bias),
                                     "Bias addition only supported with broadcast mode in case the input or output has to be reinterpreted as 3D");
     ARM_COMPUTE_RETURN_ERROR_ON_MSG(gemm_info.fp_mixed_precision, "Mixed precision not supported");
+    ARM_COMPUTE_RETURN_ERROR_ON_MSG(rhs_info.export_to_cl_image, "Export to CLImage not supported for GEMM native");
 
     const unsigned int m = gemm_info.m;
     const unsigned int n = gemm_info.n;
author	Gian Marco Iodice <gianmarco.iodice@arm.com>	2020-05-28 10:22:03 +0100
committer	Gian Marco Iodice <gianmarco.iodice@arm.com>	2020-05-28 14:00:46 +0000
commit	dd717c3150b813da403fbfd38e1200936998824a (patch)
tree	30c13f8cc428b03fae14999d18f8b4c2c1f21517
parent	04c8e636c1083411e2f49fb0c80e2d1be498aa25 (diff)
download	ComputeLibrary-dd717c3150b813da403fbfd38e1200936998824a.tar.gz