diff options
author | Gian Marco Iodice <gianmarco.iodice@arm.com> | 2020-10-22 16:37:12 +0100 |
---|---|---|
committer | Gian Marco Iodice <gianmarco.iodice@arm.com> | 2020-10-26 14:46:44 +0000 |
commit | 9ae06d4986bc3055f7786c1097b465bd321cf8eb (patch) | |
tree | adb50e965f860893fe83e3937026056bf1f054c9 /arm_compute/core | |
parent | 5f91041aef3eb7373d5d2cebcbe60f279da85904 (diff) | |
download | ComputeLibrary-9ae06d4986bc3055f7786c1097b465bd321cf8eb.tar.gz |
COMPMID-3925: Dispatch CLGEMM with no padding y requirement
- Add has_pad_y flag in GEMMKernelInfo
- Skip reinterpret as 3D in CLGEMMMatrixMultiplyReshapedOnlyRHSKernel if
has_pad_y = false
- Add test to validate CLGEMMMatrixMultiplyReshapedOnlyRHSkernel with
had_pad_y = false/true
- Configure two variants of CLGEMMMatrixMultiplyReshapedOnlyRHSKernel to
run with has_pad_y = false/true in CLGEMM
- Check if the lhs/dst tensors have pad y. If not, run
CLGEMMMatrixMultiplyReshapedOnlyRHSKernel without padding requirement
Change-Id: I68bb43389789736d676b899ac7c77fd9138babaf
Signed-off-by: Gian Marco Iodice <gianmarco.iodice@arm.com>
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/4248
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com>
Reviewed-by: Michele Di Giorgio <michele.digiorgio@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'arm_compute/core')
-rw-r--r-- | arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.h | 1 | ||||
-rw-r--r-- | arm_compute/core/KernelDescriptors.h | 4 |
2 files changed, 4 insertions, 1 deletions
diff --git a/arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.h b/arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.h index fc21f2a0f6..eab7fd219e 100644 --- a/arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.h +++ b/arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.h @@ -162,6 +162,7 @@ private: bool _add_bias; bool _broadcast_bias; bool _export_to_cl_image; + bool _has_pad_y; }; } // namespace arm_compute #endif /*ARM_COMPUTE_CLGEMMMATRIXMULTIPLYRESHAPEDONLYRHSKERNEL_H*/ diff --git a/arm_compute/core/KernelDescriptors.h b/arm_compute/core/KernelDescriptors.h index 1ee1686fb1..ea46bfa5a6 100644 --- a/arm_compute/core/KernelDescriptors.h +++ b/arm_compute/core/KernelDescriptors.h @@ -64,6 +64,7 @@ struct GEMMKernelInfo bool ireinterpret_input_as_3d, bool ibroadcast_bias, bool ifp_mixed_precision, + bool ihas_pad_y, ActivationLayerInfo iactivation_info, int inmult_transpose1xW_width, int imult_interleave4x4_height, @@ -72,7 +73,7 @@ struct GEMMKernelInfo int32_t ina_offset, int32_t inb_offset) : m(im), n(in), k(ik), depth_output_gemm3d(idepth_output_gemm3d), reinterpret_input_as_3d(ireinterpret_input_as_3d), broadcast_bias(ibroadcast_bias), fp_mixed_precision(ifp_mixed_precision), - activation_info(iactivation_info), mult_transpose1xW_width(inmult_transpose1xW_width), mult_interleave4x4_height(imult_interleave4x4_height), lhs_info(ilhs_info), rhs_info(irhs_info), + has_pad_y(ihas_pad_y), activation_info(iactivation_info), mult_transpose1xW_width(inmult_transpose1xW_width), mult_interleave4x4_height(imult_interleave4x4_height), lhs_info(ilhs_info), rhs_info(irhs_info), a_offset(ina_offset), b_offset(inb_offset) { } @@ -84,6 +85,7 @@ struct GEMMKernelInfo bool reinterpret_input_as_3d{ false }; /**< Flag used to reinterpret the input as 3D */ bool broadcast_bias{ false }; /**< Flag used to broadcast the bias addition */ bool fp_mixed_precision{ false }; /**< Flag used to indicate wider accumulators (32 bit instead of 16 for FP16). */ + bool has_pad_y{ false }; /**< Flag used to indicate if the input/output tensors have internal pad on the y direction */ ActivationLayerInfo activation_info{}; /**< Activation function to perform after the matrix multiplication */ int mult_transpose1xW_width{ 1 }; /**< Multiplication factor for the width of the 1xW transposed block */ int mult_interleave4x4_height{ 1 }; /**< Multiplication factor for the height of the 4x4 interleaved block */ |