From 55a687d5e2cf3434e4c2564ac73959dcc7163304 Mon Sep 17 00:00:00 2001
From: Georgios Pinitas <georgios.pinitas@arm.com>
Date: Thu, 30 Jan 2020 12:00:23 +0000
Subject: COMPMID-2927: Add support for mixed precision in
 CLInstanceNormalizationLayer

Signed-off-by: Georgios Pinitas <georgios.pinitas@arm.com>
Change-Id: I91482e2e4b723606aef76afef09a8277813e5d1b
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/2668
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Giorgio Arena <giorgio.arena@arm.com>
---
 .../kernels/CLInstanceNormalizationLayerKernel.h   | 30 ++++++++++------------
 arm_compute/core/KernelDescriptors.h               | 25 ++++++++++++++++++
 .../CL/functions/CLInstanceNormalizationLayer.h    | 30 ++++++++++++----------
 3 files changed, 54 insertions(+), 31 deletions(-)

(limited to 'arm_compute')

diff --git a/arm_compute/core/CL/kernels/CLInstanceNormalizationLayerKernel.h b/arm_compute/core/CL/kernels/CLInstanceNormalizationLayerKernel.h
index cf726d8cdd..9982cc2f1c 100644
--- a/arm_compute/core/CL/kernels/CLInstanceNormalizationLayerKernel.h
+++ b/arm_compute/core/CL/kernels/CLInstanceNormalizationLayerKernel.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019 ARM Limited.
+ * Copyright (c) 2019-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -26,8 +26,11 @@
 
 #include "arm_compute/core/CL/ICLKernel.h"
 
+#include "arm_compute/core/KernelDescriptors.h"
+
 namespace arm_compute
 {
+// Forward declarations
 class ICLTensor;
 
 /** Interface for performing an instance normalization */
@@ -49,26 +52,22 @@ public:
 
     /** Set the input and output tensors.
      *
-     * @param[in, out] input   Source tensor. Data types supported: F16/F32. Data layout supported: NCHW, NHWC
-     *                         In case of @p output tensor = nullptr this tensor will store the result of the normalization.
-     * @param[out]     output  Destination tensor. Data types and data layouts supported: same as @p input.
-     * @param[in]      gamma   (Optional) The scale scalar value applied to the normalized tensor. Defaults to 1.0
-     * @param[in]      beta    (Optional) The offset scalar value applied to the normalized tensor. Defaults to 0.0
-     * @param[in]      epsilon (Optional) Lower bound value for the normalization. Defaults to 1e-12
+     * @param[in, out] input  Source tensor. Data types supported: F16/F32. Data layout supported: NCHW, NHWC
+     *                        In case of @p output tensor = nullptr this tensor will store the result of the normalization.
+     * @param[out]     output Destination tensor. Data types and data layouts supported: same as @p input.
+     * @param[in]      info   Kernel meta-data descriptor
      */
-    void configure(ICLTensor *input, ICLTensor *output, float gamma = 1.0f, float beta = 0.0f, float epsilon = 1e-12f);
+    void configure(ICLTensor *input, ICLTensor *output, const InstanceNormalizationLayerKernelInfo &info);
 
     /** Static function to check if given info will lead to a valid configuration of @ref CLInstanceNormalizationLayer.
      *
-     * @param[in] input   Source tensor info. Data types supported: F16/F32. Data layout supported: NHWC, NCHW
-     * @param[in] output  Destination tensor info. Data types and data layouts supported: same as @p input.
-     * @param[in] gamma   (Optional) The scale scalar value applied to the normalized tensor. Defaults to 1.0
-     * @param[in] beta    (Optional) The offset scalar value applied to the normalized tensor. Defaults to 0.0
-     * @param[in] epsilon (Optional) Lower bound value for the normalization. Defaults to 1e-12
+     * @param[in] input  Source tensor info. Data types supported: F16/F32. Data layout supported: NHWC, NCHW
+     * @param[in] output Destination tensor info. Data types and data layouts supported: same as @p input.
+     * @param[in] info   Kernel meta-data descriptor
      *
      * @return a status
      */
-    static Status validate(const ITensorInfo *input, const ITensorInfo *output, float gamma = 1.0f, float beta = 0.0f, float epsilon = 1e-12f);
+    static Status validate(const ITensorInfo *input, const ITensorInfo *output, const InstanceNormalizationLayerKernelInfo &info);
 
     // Inherited methods overridden:
     void run(const Window &window, cl::CommandQueue &queue) override;
@@ -76,9 +75,6 @@ public:
 private:
     ICLTensor *_input;
     ICLTensor *_output;
-    float      _gamma;
-    float      _beta;
-    float      _epsilon;
     bool       _run_in_place;
 };
 } // namespace arm_compute
diff --git a/arm_compute/core/KernelDescriptors.h b/arm_compute/core/KernelDescriptors.h
index d009ccc73d..4b04bebdef 100644
--- a/arm_compute/core/KernelDescriptors.h
+++ b/arm_compute/core/KernelDescriptors.h
@@ -92,5 +92,30 @@ struct DirectConvolutionLayerOutputStageKernelInfo
     int32_t  result_offset_after_shift{ 0 };        /**< Result offset used for quantizing */
     DataType output_data_type{ DataType::UNKNOWN }; /**< Output tensor data type to use if the output is not initialized */
 };
+
+struct InstanceNormalizationLayerKernelInfo
+{
+    /** Default constructor */
+    InstanceNormalizationLayerKernelInfo()
+        : InstanceNormalizationLayerKernelInfo(1.f, 0.f, 1e-12, true)
+    {
+    }
+    /** Constructor
+     *
+     * @param[in] gamma               The scale scalar value applied to the normalized tensor.
+     * @param[in] beta                The offset scalar value applied to the normalized tensor
+     * @param[in] epsilon             Lower bound value for the normalization.
+     * @param[in] use_mixed_precision Use mixed precision in case of FP16 execution.
+     */
+    InstanceNormalizationLayerKernelInfo(float gamma, float beta, float epsilon, bool use_mixed_precision)
+        : gamma(gamma), beta(beta), epsilon(epsilon), use_mixed_precision(use_mixed_precision)
+    {
+    }
+
+    float gamma;               /**< The scale scalar value applied to the normalized tensor. Defaults to 1.0 */
+    float beta;                /**< The offset scalar value applied to the normalized tensor. Defaults to 0.0 */
+    float epsilon;             /**< Lower bound value for the normalization. Defaults to 1e-12 */
+    bool  use_mixed_precision; /**< Use mixed precision in case of FP16 execution. Defaults to true */
+};
 } // namespace arm_compute
 #endif /* ARM_COMPUTE_CORE_KERNEL_DESCRIPTORS_H */
diff --git a/arm_compute/runtime/CL/functions/CLInstanceNormalizationLayer.h b/arm_compute/runtime/CL/functions/CLInstanceNormalizationLayer.h
index ca0da513ad..ddd4b12eca 100644
--- a/arm_compute/runtime/CL/functions/CLInstanceNormalizationLayer.h
+++ b/arm_compute/runtime/CL/functions/CLInstanceNormalizationLayer.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019 ARM Limited.
+ * Copyright (c) 2019-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -42,26 +42,28 @@ public:
     CLInstanceNormalizationLayer();
     /** Set the input and output tensors.
      *
-     * @param[in, out] input   Source tensor. In case of @p output tensor = nullptr this tensor will store the result of the normalization.
-     *                         Data types supported: F16/F32. Data layout supported: NHWC, NCHW
-     * @param[out]     output  Destination tensor. Data types and data layouts supported: same as @p input.
-     * @param[in]      gamma   (Optional) The scale scalar value applied to the normalized tensor. Defaults to 1.0
-     * @param[in]      beta    (Optional) The offset scalar value applied to the normalized tensor. Defaults to 0.0
-     * @param[in]      epsilon (Optional) Lower bound value for the normalization. Defaults to 1e-12
+     * @param[in, out] input               Source tensor. In case of @p output tensor = nullptr this tensor will store the result of the normalization.
+     *                                     Data types supported: F16/F32. Data layout supported: NHWC, NCHW
+     * @param[out]     output              Destination tensor. Data types and data layouts supported: same as @p input.
+     * @param[in]      gamma               (Optional) The scale scalar value applied to the normalized tensor. Defaults to 1.0
+     * @param[in]      beta                (Optional) The offset scalar value applied to the normalized tensor. Defaults to 0.0
+     * @param[in]      epsilon             (Optional) Lower bound value for the normalization. Defaults to 1e-12
+     * @param[in]      use_mixed_precision (Optional) Use mixed precision in case of FP16 execution
      */
-    void configure(ICLTensor *input, ICLTensor *output, float gamma = 1.0f, float beta = 0.0f, float epsilon = 1e-12f);
+    void configure(ICLTensor *input, ICLTensor *output, float gamma = 1.0f, float beta = 0.0f, float epsilon = 1e-12f, bool use_mixed_precision = true);
 
     /** Static function to check if given info will lead to a valid configuration of @ref CLInstanceNormalizationLayer.
      *
-     * @param[in] input   Source tensor info. Data types supported: F16/F32. Data layout supported: NHWC, NCHW
-     * @param[in] output  Destination tensor info. Data types and data layouts supported: same as @p input.
-     * @param[in] gamma   (Optional) The scale scalar value applied to the normalized tensor. Defaults to 1.0
-     * @param[in] beta    (Optional) The offset scalar value applied to the normalized tensor. Defaults to 0.0
-     * @param[in] epsilon (Optional) Lower bound value for the normalization. Defaults to 1e-12
+     * @param[in] input               Source tensor info. Data types supported: F16/F32. Data layout supported: NHWC, NCHW
+     * @param[in] output              Destination tensor info. Data types and data layouts supported: same as @p input.
+     * @param[in] gamma               (Optional) The scale scalar value applied to the normalized tensor. Defaults to 1.0
+     * @param[in] beta                (Optional) The offset scalar value applied to the normalized tensor. Defaults to 0.0
+     * @param[in] epsilon             (Optional) Lower bound value for the normalization. Defaults to 1e-12
+     * @param[in] use_mixed_precision (Optional) Use mixed precision in case of FP16 execution
      *
      * @return a status
      */
-    static Status validate(const ITensorInfo *input, const ITensorInfo *output, float gamma = 1.0f, float beta = 0.0f, float epsilon = 1e-12f);
+    static Status validate(const ITensorInfo *input, const ITensorInfo *output, float gamma = 1.0f, float beta = 0.0f, float epsilon = 1e-12f, bool use_mixed_precision = true);
 };
 } // namespace arm_compute
 #endif /* ARM_COMPUTE_CLINSTANCENORMALIZATIONLAYER_H */
-- 
cgit v1.2.1