aboutsummaryrefslogtreecommitdiff
path: root/arm_compute/core/Types.h
diff options
context:
space:
mode:
authorGeorgios Pinitas <georgios.pinitas@arm.com>2021-07-16 16:16:43 +0100
committerGeorgios Pinitas <georgios.pinitas@arm.com>2021-07-22 02:25:50 +0000
commit4ee8b1599dbaf7634d25607fa5ac96ba3dc6b0f2 (patch)
tree2f8362d33cdad4212f4b96995681c68184c759e1 /arm_compute/core/Types.h
parent59fd7a722e5bc7e85309d6200bc37a772721a719 (diff)
downloadComputeLibrary-4ee8b1599dbaf7634d25607fa5ac96ba3dc6b0f2.tar.gz
Update GEMM assembly kernels
- Introduce Fp32 kernels with internal calculations in Bfloat16 when fast_mode is enabled - Improve kernel selection heuristics Signed-off-by: Georgios Pinitas <georgios.pinitas@arm.com> Change-Id: I68a9e7e862b6fd2721b46e0d7cc791091c4ab279 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/5965 Tested-by: Arm Jenkins <bsgcomp@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'arm_compute/core/Types.h')
-rw-r--r--arm_compute/core/Types.h14
1 files changed, 13 insertions, 1 deletions
diff --git a/arm_compute/core/Types.h b/arm_compute/core/Types.h
index f6658e7544..9c00cbc88c 100644
--- a/arm_compute/core/Types.h
+++ b/arm_compute/core/Types.h
@@ -1948,6 +1948,7 @@ public:
_reinterpret_input_as_3d(false),
_retain_internal_weights(false),
_gemmlowp_output_stage(),
+ _fast_math(false),
_fp_mixed_precision(false),
_broadcast_bias(false),
_pretranpose_B(true),
@@ -1967,12 +1968,13 @@ public:
* @param[in] retain_internal_weights (Optional) Retain the weights tensor from previous run
* @param[in] gemmlowp_output_stage (Optional) GEMMLowp Output stage info
* @param[in] fp_mixed_precision (Optional) Use wider accumulators (32 bit instead of 16 for FP16) to improve accuracy.
+ * @param[in] fast_math (Optional) Use a data type of shorter width to improve performance
* @param[in] broadcast_bias (Optional) Broadcast the shape of the bias tensor from a vector to a matrix.
* @param[in] activation_info (Optional) Activation to apply after the matrix multiplication
* @param[in] constant_weights (Optional) Weights have constant values throughout multiple executions
*/
GEMMInfo(bool is_a_reshaped, bool is_b_reshaped, bool reshape_b_only_on_first_run, int depth_output_gemm3d = 0, bool reinterpret_input_as_3d = false, bool retain_internal_weights = false,
- GEMMLowpOutputStageInfo gemmlowp_output_stage = GEMMLowpOutputStageInfo(), bool fp_mixed_precision = false, bool broadcast_bias = false,
+ GEMMLowpOutputStageInfo gemmlowp_output_stage = GEMMLowpOutputStageInfo(), bool fp_mixed_precision = false, bool fast_math = false, bool broadcast_bias = false,
const ActivationLayerInfo &activation_info = ActivationLayerInfo(), bool constant_weights = true) noexcept
: _is_a_reshaped(is_a_reshaped),
_is_b_reshaped(is_b_reshaped),
@@ -1981,6 +1983,7 @@ public:
_reinterpret_input_as_3d(reinterpret_input_as_3d),
_retain_internal_weights(retain_internal_weights),
_gemmlowp_output_stage(gemmlowp_output_stage),
+ _fast_math(fast_math),
_fp_mixed_precision(fp_mixed_precision),
_broadcast_bias(broadcast_bias),
_pretranpose_B(reshape_b_only_on_first_run),
@@ -2062,6 +2065,14 @@ public:
{
return _fp_mixed_precision;
};
+ /** Flag which specifies if a shorter accumulator to be used.
+ *
+ * @return True if a shorter accumulator has to be used
+ */
+ bool fast_math() const
+ {
+ return _fast_math;
+ };
/** Flag which specifies whether to broadcast the shape of the bias tensor.
*
* @return True if the shape of the bias tensor is to be broadcasted.
@@ -2119,6 +2130,7 @@ private:
bool _reinterpret_input_as_3d;
bool _retain_internal_weights;
GEMMLowpOutputStageInfo _gemmlowp_output_stage;
+ bool _fast_math;
bool _fp_mixed_precision;
bool _broadcast_bias;
bool _pretranpose_B;