From bf9731edfa0439cad4d70efc3065e71e199c62b8 Mon Sep 17 00:00:00 2001 From: Gian Marco Iodice Date: Wed, 12 Dec 2018 10:18:04 +0000 Subject: COMPMID-1687: Optimize CLGEMMMatrixMultiplyKernel for Mali-G76 - Part1 The current implementation is limited just to FP32 Change-Id: I185ab57e483e879d7c301e9cc3033efc8b41e244 Reviewed-on: https://review.mlplatform.org/389 Reviewed-by: Anthony Barbier Tested-by: Arm Jenkins Reviewed-by: Michele Di Giorgio --- arm_compute/core/utils/misc/ShapeCalculator.h | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) (limited to 'arm_compute/core/utils') diff --git a/arm_compute/core/utils/misc/ShapeCalculator.h b/arm_compute/core/utils/misc/ShapeCalculator.h index 33893ad877..f41d00f54d 100644 --- a/arm_compute/core/utils/misc/ShapeCalculator.h +++ b/arm_compute/core/utils/misc/ShapeCalculator.h @@ -619,6 +619,31 @@ inline TensorShape compute_mm_shape(const ITensorInfo &input0, const ITensorInfo return output_shape; } +inline TensorShape compute_mm_shape(const ITensorInfo &input0, const ITensorInfo &input1, const GEMMReshapeInfo &gemm_info) +{ + ARM_COMPUTE_ERROR_ON_MSG(input0.num_dimensions() > 4, "The number of dimensions for the matrix A must be <= 4"); + + const bool reinterpret_output_as_3d = gemm_info.depth_output_gemm3d() != 0; + const int depth_output_gemm3d = reinterpret_output_as_3d ? gemm_info.depth_output_gemm3d() : 1; + + // If the output of GEMM has to be reinterpreted as 3D, the number of input0 rows (M) is obtained collapsing the second and third + // dimension of the output tensor + const int dim0 = gemm_info.n(); + const int dim1 = gemm_info.m() / depth_output_gemm3d; + const int dim2 = input0.tensor_shape()[2]; + const int dim3 = input0.tensor_shape()[3]; + + TensorShape output_shape{ input0.tensor_shape() }; + + output_shape.set(0, dim0); + output_shape.set(1, dim1); + output_shape.set(2, reinterpret_output_as_3d ? depth_output_gemm3d : dim2); + output_shape.set(3, reinterpret_output_as_3d ? dim2 : dim3); + output_shape.set(4, reinterpret_output_as_3d ? dim3 : 1); + + return output_shape; +} + inline TensorShape compute_output_stage_shape(const ITensorInfo &input, unsigned int gemm_3d_depth = 1, bool batch_size_on_z = false) { ARM_COMPUTE_ERROR_ON(input.data_layout() != DataLayout::NHWC && gemm_3d_depth > 1); -- cgit v1.2.1