COMPMID-1687: Optimize CLGEMMMatrixMultiplyKernel for Mali-G76 - Part1

The current implementation is limited just to FP32 Change-Id: I185ab57e483e879d7c301e9cc3033efc8b41e244 Reviewed-on: https://review.mlplatform.org/389 Reviewed-by: Anthony Barbier <Anthony.barbier@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Michele Di Giorgio <michele.digiorgio@arm.com>
author: Gian Marco Iodice <gianmarco.iodice@arm.com> 2018-12-12 10:18:04 +0000
committer: Gian Marco Iodice <gianmarco.iodice@arm.com> 2018-12-14 14:57:48 +0000
commit: bf9731edfa0439cad4d70efc3065e71e199c62b8 (patch)
tree: 71340a3d04a6294744c642ed6e4a56c0e8a77592 /arm_compute/core/utils
parent: 92e278d5f462c930af1947883a5f48c10586ae9c (diff)
download: ComputeLibrary-bf9731edfa0439cad4d70efc3065e71e199c62b8.tar.gz
1 files changed, 25 insertions, 0 deletions
diff --git a/arm_compute/core/utils/misc/ShapeCalculator.h b/arm_compute/core/utils/misc/ShapeCalculator.h
index 33893ad877..f41d00f54d 100644
--- a/arm_compute/core/utils/misc/ShapeCalculator.h
+++ b/arm_compute/core/utils/misc/ShapeCalculator.h
@@ -619,6 +619,31 @@ inline TensorShape compute_mm_shape(const ITensorInfo &input0, const ITensorInfo
     return output_shape;
 }
 
+inline TensorShape compute_mm_shape(const ITensorInfo &input0, const ITensorInfo &input1, const GEMMReshapeInfo &gemm_info)
+{
+    ARM_COMPUTE_ERROR_ON_MSG(input0.num_dimensions() > 4, "The number of dimensions for the matrix A must be <= 4");
+
+    const bool reinterpret_output_as_3d = gemm_info.depth_output_gemm3d() != 0;
+    const int  depth_output_gemm3d      = reinterpret_output_as_3d ? gemm_info.depth_output_gemm3d() : 1;
+
+    // If the output of GEMM has to be reinterpreted as 3D, the number of input0 rows (M) is obtained collapsing the second and third
+    // dimension of the output tensor
+    const int dim0 = gemm_info.n();
+    const int dim1 = gemm_info.m() / depth_output_gemm3d;
+    const int dim2 = input0.tensor_shape()[2];
+    const int dim3 = input0.tensor_shape()[3];
+
+    TensorShape output_shape{ input0.tensor_shape() };
+
+    output_shape.set(0, dim0);
+    output_shape.set(1, dim1);
+    output_shape.set(2, reinterpret_output_as_3d ? depth_output_gemm3d : dim2);
+    output_shape.set(3, reinterpret_output_as_3d ? dim2 : dim3);
+    output_shape.set(4, reinterpret_output_as_3d ? dim3 : 1);
+
+    return output_shape;
+}
+
 inline TensorShape compute_output_stage_shape(const ITensorInfo &input, unsigned int gemm_3d_depth = 1, bool batch_size_on_z = false)
 {
     ARM_COMPUTE_ERROR_ON(input.data_layout() != DataLayout::NHWC && gemm_3d_depth > 1);
author	Gian Marco Iodice <gianmarco.iodice@arm.com>	2018-12-12 10:18:04 +0000
committer	Gian Marco Iodice <gianmarco.iodice@arm.com>	2018-12-14 14:57:48 +0000
commit	bf9731edfa0439cad4d70efc3065e71e199c62b8 (patch)
tree	71340a3d04a6294744c642ed6e4a56c0e8a77592 /arm_compute/core/utils
parent	92e278d5f462c930af1947883a5f48c10586ae9c (diff)
download	ComputeLibrary-bf9731edfa0439cad4d70efc3065e71e199c62b8.tar.gz