aboutsummaryrefslogtreecommitdiff
path: root/arm_compute/core
diff options
context:
space:
mode:
authorVidhya Sudhan Loganathan <vidhyasudhan.loganathan@arm.com>2018-11-16 11:33:12 +0000
committerGeorgios Pinitas <georgios.pinitas@arm.com>2018-11-16 17:37:40 +0000
commita25d16c86f0d870408bc8b941aa755093417b0f0 (patch)
treeb62d145a4e5009d894262a7ffa66cdba8260bb03 /arm_compute/core
parenta7b54f44e2bf133179f24a34007bc93237dd2265 (diff)
downloadComputeLibrary-a25d16c86f0d870408bc8b941aa755093417b0f0.tar.gz
COMPMID-1266 : Add support for FP16 in CLWinogradConvolutionLayer: 5x5 kernels
Introduced F32 accumulation for F16 winograd gemm and output transform WinogradConvolution will be available for F16 only if fast math flag is enabled Change-Id: I215593c205236a0f9669218437bb40b184ec6a4f
Diffstat (limited to 'arm_compute/core')
-rw-r--r--arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyKernel.h7
-rw-r--r--arm_compute/core/Types.h17
2 files changed, 19 insertions, 5 deletions
diff --git a/arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyKernel.h b/arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyKernel.h
index e030fa2d2a..f61c330de6 100644
--- a/arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyKernel.h
+++ b/arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyKernel.h
@@ -59,9 +59,11 @@ public:
* @param[in] alpha Weight of the matrix product
* @param[in] is_interleaved_transposed (Optional) True if input0 and input1 have been reshaped respectively using @ref CLGEMMInterleave4x4Kernel and @ref CLGEMMTranspose1xWKernel
* @param[in] reshape_info (Optional) GEMM reshape info. If is_interleaved_transposed = true, this object must contain the information to understand how the matrix A and matrix B have been reshaped
+ * @param[in] fp_mixed_precision (Optional) Use wider accumulators (32 bit instead of 16 for FP16) to improve accuracy
*
*/
- void configure(const ICLTensor *input0, const ICLTensor *input1, ICLTensor *output, float alpha, bool is_interleaved_transposed = true, const GEMMReshapeInfo &reshape_info = GEMMReshapeInfo());
+ void configure(const ICLTensor *input0, const ICLTensor *input1, ICLTensor *output, float alpha, bool is_interleaved_transposed = true, const GEMMReshapeInfo &reshape_info = GEMMReshapeInfo(),
+ bool fp_mixed_precision = false);
/** Static function to check if given info will lead to a valid configuration of @ref CLGEMMMatrixMultiplyKernel
*
* @param[in] input0 Input tensor containing the Matrix A. Data types supported: F16/F32
@@ -71,11 +73,12 @@ public:
* @param[in] is_interleaved_transposed True if input0 and input1 have been reshaped respectively using @ref CLGEMMInterleave4x4Kernel and @ref CLGEMMTranspose1xWKernel
* @param[in] reshape_info GEMM reshape info. If is_interleaved_transposed = true, this object must contain the information to understand how the matrix A and matrix B have been reshaped
* @param[in] gpu_target GPU Target
+ * @param[in] fp_mixed_precision (Optional) Use wider accumulators (32 bit instead of 16 for FP16) to improve accuracy
*
* @return a status
*/
static Status validate(const ITensorInfo *input0, const ITensorInfo *input1, const ITensorInfo *output, float alpha, bool is_interleaved_transposed, const GEMMReshapeInfo &reshape_info,
- GPUTarget gpu_target);
+ GPUTarget gpu_target, bool fp_mixed_precision = false);
// Inherited methods overridden:
void run(const Window &window, cl::CommandQueue &queue) override;
diff --git a/arm_compute/core/Types.h b/arm_compute/core/Types.h
index fb277584fd..4eb8129b62 100644
--- a/arm_compute/core/Types.h
+++ b/arm_compute/core/Types.h
@@ -1593,7 +1593,8 @@ class GEMMInfo
public:
/** Default constructor */
GEMMInfo()
- : _is_a_reshaped(false), _is_b_reshaped(false), _reshape_b_only_on_first_run(false), _depth_output_gemm3d(0), _reinterpret_input_as_3d(false), _retain_internal_weights(false), _gemmlowp_output_stage()
+ : _is_a_reshaped(false), _is_b_reshaped(false), _reshape_b_only_on_first_run(false), _depth_output_gemm3d(0), _reinterpret_input_as_3d(false), _retain_internal_weights(false),
+ _gemmlowp_output_stage(), _fp_mixed_precision(false)
{
}
/** Constructor
@@ -1607,12 +1608,13 @@ public:
* to perform 1x1 convolutions with the NHWC data layout)
* @param[in] retain_internal_weights (Optional) Retain the weights tensor from previous run
* @param[in] gemmlowp_output_stage (Optional) GEMMLowp Output stage info
+ * @param[in] fp_mixed_precision (Optional) Use wider accumulators (32 bit instead of 16 for FP16) to improve accuracy.
*
*/
GEMMInfo(bool is_a_reshaped, bool is_b_reshaped, bool reshape_b_only_on_first_run, int depth_output_gemm3d = 0, bool reinterpret_input_as_3d = false, bool retain_internal_weights = false,
- GEMMLowpOutputStageInfo gemmlowp_output_stage = GEMMLowpOutputStageInfo())
+ GEMMLowpOutputStageInfo gemmlowp_output_stage = GEMMLowpOutputStageInfo(), bool fp_mixed_precision = false)
: _is_a_reshaped(is_a_reshaped), _is_b_reshaped(is_b_reshaped), _reshape_b_only_on_first_run(reshape_b_only_on_first_run), _depth_output_gemm3d(depth_output_gemm3d),
- _reinterpret_input_as_3d(reinterpret_input_as_3d), _retain_internal_weights(retain_internal_weights), _gemmlowp_output_stage(gemmlowp_output_stage)
+ _reinterpret_input_as_3d(reinterpret_input_as_3d), _retain_internal_weights(retain_internal_weights), _gemmlowp_output_stage(gemmlowp_output_stage), _fp_mixed_precision(fp_mixed_precision)
{
}
/** Flag which specifies if the matrix A has been reshaped
@@ -1673,6 +1675,14 @@ public:
{
return _gemmlowp_output_stage;
};
+ /** Flag which specifies if a wider accumulator should be used.
+ *
+ * @return True if a wider accumulator has to be used
+ */
+ bool fp_mixed_precision() const
+ {
+ return _fp_mixed_precision;
+ };
private:
const bool _is_a_reshaped;
@@ -1682,6 +1692,7 @@ private:
const bool _reinterpret_input_as_3d;
const bool _retain_internal_weights;
const GEMMLowpOutputStageInfo _gemmlowp_output_stage;
+ const bool _fp_mixed_precision;
};
/** Winograd information */