aboutsummaryrefslogtreecommitdiff
path: root/arm_compute/core/utils
diff options
context:
space:
mode:
authorGian Marco Iodice <gianmarco.iodice@arm.com>2018-12-12 10:18:04 +0000
committerGian Marco Iodice <gianmarco.iodice@arm.com>2018-12-14 14:57:48 +0000
commitbf9731edfa0439cad4d70efc3065e71e199c62b8 (patch)
tree71340a3d04a6294744c642ed6e4a56c0e8a77592 /arm_compute/core/utils
parent92e278d5f462c930af1947883a5f48c10586ae9c (diff)
downloadComputeLibrary-bf9731edfa0439cad4d70efc3065e71e199c62b8.tar.gz
COMPMID-1687: Optimize CLGEMMMatrixMultiplyKernel for Mali-G76 - Part1
The current implementation is limited just to FP32 Change-Id: I185ab57e483e879d7c301e9cc3033efc8b41e244 Reviewed-on: https://review.mlplatform.org/389 Reviewed-by: Anthony Barbier <Anthony.barbier@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Michele Di Giorgio <michele.digiorgio@arm.com>
Diffstat (limited to 'arm_compute/core/utils')
-rw-r--r--arm_compute/core/utils/misc/ShapeCalculator.h25
1 files changed, 25 insertions, 0 deletions
diff --git a/arm_compute/core/utils/misc/ShapeCalculator.h b/arm_compute/core/utils/misc/ShapeCalculator.h
index 33893ad877..f41d00f54d 100644
--- a/arm_compute/core/utils/misc/ShapeCalculator.h
+++ b/arm_compute/core/utils/misc/ShapeCalculator.h
@@ -619,6 +619,31 @@ inline TensorShape compute_mm_shape(const ITensorInfo &input0, const ITensorInfo
return output_shape;
}
+inline TensorShape compute_mm_shape(const ITensorInfo &input0, const ITensorInfo &input1, const GEMMReshapeInfo &gemm_info)
+{
+ ARM_COMPUTE_ERROR_ON_MSG(input0.num_dimensions() > 4, "The number of dimensions for the matrix A must be <= 4");
+
+ const bool reinterpret_output_as_3d = gemm_info.depth_output_gemm3d() != 0;
+ const int depth_output_gemm3d = reinterpret_output_as_3d ? gemm_info.depth_output_gemm3d() : 1;
+
+ // If the output of GEMM has to be reinterpreted as 3D, the number of input0 rows (M) is obtained collapsing the second and third
+ // dimension of the output tensor
+ const int dim0 = gemm_info.n();
+ const int dim1 = gemm_info.m() / depth_output_gemm3d;
+ const int dim2 = input0.tensor_shape()[2];
+ const int dim3 = input0.tensor_shape()[3];
+
+ TensorShape output_shape{ input0.tensor_shape() };
+
+ output_shape.set(0, dim0);
+ output_shape.set(1, dim1);
+ output_shape.set(2, reinterpret_output_as_3d ? depth_output_gemm3d : dim2);
+ output_shape.set(3, reinterpret_output_as_3d ? dim2 : dim3);
+ output_shape.set(4, reinterpret_output_as_3d ? dim3 : 1);
+
+ return output_shape;
+}
+
inline TensorShape compute_output_stage_shape(const ITensorInfo &input, unsigned int gemm_3d_depth = 1, bool batch_size_on_z = false)
{
ARM_COMPUTE_ERROR_ON(input.data_layout() != DataLayout::NHWC && gemm_3d_depth > 1);