COMPMID-1266 : Add support for FP16 in CLWinogradConvolutionLayer: 5x5 kernels

Introduced F32 accumulation for F16 winograd gemm and output transform WinogradConvolution will be available for F16 only if fast math flag is enabled Change-Id: I215593c205236a0f9669218437bb40b184ec6a4f
author: Vidhya Sudhan Loganathan <vidhyasudhan.loganathan@arm.com> 2018-11-16 11:33:12 +0000
committer: Georgios Pinitas <georgios.pinitas@arm.com> 2018-11-16 17:37:40 +0000
commit: a25d16c86f0d870408bc8b941aa755093417b0f0 (patch)
tree: b62d145a4e5009d894262a7ffa66cdba8260bb03 /arm_compute/core/CL
parent: a7b54f44e2bf133179f24a34007bc93237dd2265 (diff)
download: ComputeLibrary-a25d16c86f0d870408bc8b941aa755093417b0f0.tar.gz
1 files changed, 5 insertions, 2 deletions
diff --git a/arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyKernel.h b/arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyKernel.h
index e030fa2d2a..f61c330de6 100644
--- a/arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyKernel.h
+++ b/arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyKernel.h
@@ -59,9 +59,11 @@ public:
      * @param[in]  alpha                     Weight of the matrix product
      * @param[in]  is_interleaved_transposed (Optional) True if input0 and input1 have been reshaped respectively using @ref CLGEMMInterleave4x4Kernel and @ref CLGEMMTranspose1xWKernel
      * @param[in]  reshape_info              (Optional) GEMM reshape info. If is_interleaved_transposed = true, this object must contain the information to understand how the matrix A and matrix B have been reshaped
+     * @param[in]  fp_mixed_precision        (Optional) Use wider accumulators (32 bit instead of 16 for FP16) to improve accuracy
      *
      */
-    void configure(const ICLTensor *input0, const ICLTensor *input1, ICLTensor *output, float alpha, bool is_interleaved_transposed = true, const GEMMReshapeInfo &reshape_info = GEMMReshapeInfo());
+    void configure(const ICLTensor *input0, const ICLTensor *input1, ICLTensor *output, float alpha, bool is_interleaved_transposed = true, const GEMMReshapeInfo &reshape_info = GEMMReshapeInfo(),
+                   bool fp_mixed_precision = false);
     /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMMatrixMultiplyKernel
      *
      * @param[in] input0                    Input tensor containing the Matrix A. Data types supported: F16/F32
@@ -71,11 +73,12 @@ public:
      * @param[in] is_interleaved_transposed True if input0 and input1 have been reshaped respectively using @ref CLGEMMInterleave4x4Kernel and @ref CLGEMMTranspose1xWKernel
      * @param[in] reshape_info              GEMM reshape info. If is_interleaved_transposed = true, this object must contain the information to understand how the matrix A and matrix B have been reshaped
      * @param[in] gpu_target                GPU Target
+     * @param[in] fp_mixed_precision        (Optional) Use wider accumulators (32 bit instead of 16 for FP16) to improve accuracy
      *
      * @return a status
      */
     static Status validate(const ITensorInfo *input0, const ITensorInfo *input1, const ITensorInfo *output, float alpha, bool is_interleaved_transposed, const GEMMReshapeInfo &reshape_info,
-                           GPUTarget gpu_target);
+                           GPUTarget gpu_target, bool fp_mixed_precision = false);
 
     // Inherited methods overridden:
     void run(const Window &window, cl::CommandQueue &queue) override;
author	Vidhya Sudhan Loganathan <vidhyasudhan.loganathan@arm.com>	2018-11-16 11:33:12 +0000
committer	Georgios Pinitas <georgios.pinitas@arm.com>	2018-11-16 17:37:40 +0000
commit	a25d16c86f0d870408bc8b941aa755093417b0f0 (patch)
tree	b62d145a4e5009d894262a7ffa66cdba8260bb03 /arm_compute/core/CL
parent	a7b54f44e2bf133179f24a34007bc93237dd2265 (diff)
download	ComputeLibrary-a25d16c86f0d870408bc8b941aa755093417b0f0.tar.gz