aboutsummaryrefslogtreecommitdiff
path: root/arm_compute/core/KernelDescriptors.h
diff options
context:
space:
mode:
authorMichele Di Giorgio <michele.digiorgio@arm.com>2020-01-14 15:31:55 +0000
committerMichele Di Giorgio <michele.digiorgio@arm.com>2020-03-06 09:14:46 +0000
commitb54ba2848515bf0aee0619c760518481f58c7525 (patch)
tree7082afffb3b087401904454e33e005544a6d7ab2 /arm_compute/core/KernelDescriptors.h
parentb46702118eddcfec11487be8dd23234066642d62 (diff)
downloadComputeLibrary-b54ba2848515bf0aee0619c760518481f58c7525.tar.gz
COMPMID-2847: Fuse output stage in GEMMLowpMatrixMultiplyReshapedOnlyRHS
Change-Id: Icd60eb368a34295434e8c141885b4666973a92a1 Signed-off-by: Michele Di Giorgio <michele.digiorgio@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/2732 Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com> Reviewed-by: Gian Marco Iodice <gianmarco.iodice@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'arm_compute/core/KernelDescriptors.h')
-rw-r--r--arm_compute/core/KernelDescriptors.h23
1 files changed, 15 insertions, 8 deletions
diff --git a/arm_compute/core/KernelDescriptors.h b/arm_compute/core/KernelDescriptors.h
index 4b04bebdef..58400b190b 100644
--- a/arm_compute/core/KernelDescriptors.h
+++ b/arm_compute/core/KernelDescriptors.h
@@ -54,14 +54,21 @@ struct FFTRadixStageKernelInfo
/** Descriptor used by the GEMM kernels */
struct GEMMKernelInfo
{
- unsigned int m{ 0 }; /**< Number of LHS rows*/
- unsigned int n{ 0 }; /**< Number of RHS columns*/
- unsigned int k{ 0 }; /**< Number of LHS columns or RHS rows */
- unsigned int depth_output_gemm3d{ 0 }; /**< Depth of the output tensor in case is reinterpreted as 3D */
- bool reinterpret_input_as_3d{ false }; /**< Flag used to reinterpret the input as 3D */
- bool broadcast_bias{ false }; /**< Flag used to broadcase the bias addition */
- bool fp_mixed_precision{ false }; /**< Flag used to indicate wider accumulators (32 bit instead of 16 for FP16). */
- ActivationLayerInfo activation_info{}; /**< Activation function to perform after the matrix multiplication */
+ unsigned int m{ 0 }; /**< Number of LHS rows*/
+ unsigned int n{ 0 }; /**< Number of RHS columns*/
+ unsigned int k{ 0 }; /**< Number of LHS columns or RHS rows */
+ unsigned int depth_output_gemm3d{ 0 }; /**< Depth of the output tensor in case is reinterpreted as 3D */
+ bool reinterpret_input_as_3d{ false }; /**< Flag used to reinterpret the input as 3D */
+ bool broadcast_bias{ false }; /**< Flag used to broadcast the bias addition */
+ bool fp_mixed_precision{ false }; /**< Flag used to indicate wider accumulators (32 bit instead of 16 for FP16). */
+ ActivationLayerInfo activation_info{}; /**< Activation function to perform after the matrix multiplication */
+ int mult_transpose1xW_width{ 1 }; /**< Multiplication factor for the width of the 1xW transposed block */
+ int mult_interleave4x4_height{ 1 }; /**< Multiplication factor for the height of the 4x4 interleaved block */
+ GEMMLHSMatrixInfo lhs_info{}; /**< LHS matrix information used to retrieve the number of rows processed by each thread */
+ GEMMRHSMatrixInfo rhs_info{}; /**< RHS matrix information used for reshaping the RHS matrix */
+ int32_t a_offset{ 0 }; /**< Offset to be added to each element of the matrix A */
+ int32_t b_offset{ 0 }; /**< Offset to be added to each element of the matrix B */
+ GEMMLowpOutputStageInfo output_stage{}; /**< GEMMLowp output stage information */
};
/** Descriptor used by the depthwise convolution kernels */