diff options
author | Gian Marco Iodice <gianmarco.iodice@arm.com> | 2018-12-12 10:18:04 +0000 |
---|---|---|
committer | Gian Marco Iodice <gianmarco.iodice@arm.com> | 2018-12-14 14:57:48 +0000 |
commit | bf9731edfa0439cad4d70efc3065e71e199c62b8 (patch) | |
tree | 71340a3d04a6294744c642ed6e4a56c0e8a77592 /arm_compute/core/utils/misc | |
parent | 92e278d5f462c930af1947883a5f48c10586ae9c (diff) | |
download | ComputeLibrary-bf9731edfa0439cad4d70efc3065e71e199c62b8.tar.gz |
COMPMID-1687: Optimize CLGEMMMatrixMultiplyKernel for Mali-G76 - Part1
The current implementation is limited just to FP32
Change-Id: I185ab57e483e879d7c301e9cc3033efc8b41e244
Reviewed-on: https://review.mlplatform.org/389
Reviewed-by: Anthony Barbier <Anthony.barbier@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Michele Di Giorgio <michele.digiorgio@arm.com>
Diffstat (limited to 'arm_compute/core/utils/misc')
-rw-r--r-- | arm_compute/core/utils/misc/ShapeCalculator.h | 25 |
1 files changed, 25 insertions, 0 deletions
diff --git a/arm_compute/core/utils/misc/ShapeCalculator.h b/arm_compute/core/utils/misc/ShapeCalculator.h index 33893ad877..f41d00f54d 100644 --- a/arm_compute/core/utils/misc/ShapeCalculator.h +++ b/arm_compute/core/utils/misc/ShapeCalculator.h @@ -619,6 +619,31 @@ inline TensorShape compute_mm_shape(const ITensorInfo &input0, const ITensorInfo return output_shape; } +inline TensorShape compute_mm_shape(const ITensorInfo &input0, const ITensorInfo &input1, const GEMMReshapeInfo &gemm_info) +{ + ARM_COMPUTE_ERROR_ON_MSG(input0.num_dimensions() > 4, "The number of dimensions for the matrix A must be <= 4"); + + const bool reinterpret_output_as_3d = gemm_info.depth_output_gemm3d() != 0; + const int depth_output_gemm3d = reinterpret_output_as_3d ? gemm_info.depth_output_gemm3d() : 1; + + // If the output of GEMM has to be reinterpreted as 3D, the number of input0 rows (M) is obtained collapsing the second and third + // dimension of the output tensor + const int dim0 = gemm_info.n(); + const int dim1 = gemm_info.m() / depth_output_gemm3d; + const int dim2 = input0.tensor_shape()[2]; + const int dim3 = input0.tensor_shape()[3]; + + TensorShape output_shape{ input0.tensor_shape() }; + + output_shape.set(0, dim0); + output_shape.set(1, dim1); + output_shape.set(2, reinterpret_output_as_3d ? depth_output_gemm3d : dim2); + output_shape.set(3, reinterpret_output_as_3d ? dim2 : dim3); + output_shape.set(4, reinterpret_output_as_3d ? dim3 : 1); + + return output_shape; +} + inline TensorShape compute_output_stage_shape(const ITensorInfo &input, unsigned int gemm_3d_depth = 1, bool batch_size_on_z = false) { ARM_COMPUTE_ERROR_ON(input.data_layout() != DataLayout::NHWC && gemm_3d_depth > 1); |