From fe7ae817755577be29f4c07aa27d8ef9e821da45 Mon Sep 17 00:00:00 2001 From: Pablo Marquez Tello Date: Wed, 3 Mar 2021 12:12:35 +0000 Subject: CLInstanceNormalizationLayer NHWC optimisation * Make changes to split the workload into two kernels. One kernel precomputes mean and variance and the second kernel just loads these precomputed values. * The new approach runs %30 faster than the original code for NHWC workloads like 32x192x256. * Resolves MLCE-337 Change-Id: I8356fcefa2d131ab4dcb32268ce7142421d073e4 Signed-off-by: Pablo Marquez Tello Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/5355 Tested-by: Arm Jenkins Comments-Addressed: Arm Jenkins Reviewed-by: Manuel Bottini Reviewed-by: Michele Di Giorgio --- .../kernels/CLInstanceNormalizationLayerKernel.h | 57 ++++++++++++++++++---- 1 file changed, 47 insertions(+), 10 deletions(-) (limited to 'src/core/CL/kernels/CLInstanceNormalizationLayerKernel.h') diff --git a/src/core/CL/kernels/CLInstanceNormalizationLayerKernel.h b/src/core/CL/kernels/CLInstanceNormalizationLayerKernel.h index d4444f0b20..33a3ff97c3 100644 --- a/src/core/CL/kernels/CLInstanceNormalizationLayerKernel.h +++ b/src/core/CL/kernels/CLInstanceNormalizationLayerKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020 Arm Limited. + * Copyright (c) 2019-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -50,23 +50,16 @@ public: /** Default destructor */ ~CLInstanceNormalizationLayerKernel() = default; - /** Set the input and output tensors. - * - * @param[in, out] input Source tensor. Data types supported: F16/F32. Data layout supported: NCHW, NHWC - * In case of @p output tensor = nullptr this tensor will store the result of the normalization. - * @param[out] output Destination tensor. Data types and data layouts supported: same as @p input. - * @param[in] info Kernel meta-data descriptor - */ - void configure(ICLTensor *input, ICLTensor *output, const InstanceNormalizationLayerKernelInfo &info); /** Set the input and output tensors. * * @param[in] compile_context The compile context to be used. * @param[in, out] input Source tensor. Data types supported: F16/F32. Data layout supported: NCHW, NHWC * In case of @p output tensor = nullptr this tensor will store the result of the normalization. + * @param[in] mean_var Tensor containing the precomputed mean and variance values. Data types supported: F32. * @param[out] output Destination tensor. Data types and data layouts supported: same as @p input. * @param[in] info Kernel meta-data descriptor */ - void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, const InstanceNormalizationLayerKernelInfo &info); + void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *mean_var, ICLTensor *output, const InstanceNormalizationLayerKernelInfo &info); /** Static function to check if given info will lead to a valid configuration of @ref CLInstanceNormalizationLayer. * @@ -84,7 +77,51 @@ public: private: ICLTensor *_input; ICLTensor *_output; + ICLTensor *_mean; bool _run_in_place; }; + +/** Interface for compute Mean and Variance per channel */ +class CLComputeMeanVariance : public ICLKernel +{ +public: + /** Constructor */ + CLComputeMeanVariance(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLComputeMeanVariance(const CLComputeMeanVariance &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLComputeMeanVariance &operator=(const CLComputeMeanVariance &) = delete; + /** Default Move Constructor. */ + CLComputeMeanVariance(CLComputeMeanVariance &&) = default; + /** Default move assignment operator */ + CLComputeMeanVariance &operator=(CLComputeMeanVariance &&) = default; + /** Default destructor */ + ~CLComputeMeanVariance() = default; + + /** Set the input and output tensors. + * + * @param[in] compile_context The compile context to be used. + * @param[in, out] input Source tensor. Data types supported: F16/F32. Data layout supported: NCHW, NHWC + * In case of @p output tensor = nullptr this tensor will store the result of the normalization. + * @param[out] output Destination tensor. Data types and data layouts supported: same as @p input. + */ + void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output); + + /** Static function to check if given info will lead to a valid configuration of @ref CLInstanceNormalizationLayer. + * + * @param[in] input Source tensor info. Data types supported: F16/F32. Data layout supported: NHWC, NCHW + * @param[in] output Destination tensor info. Data types and data layouts supported: same as @p input. + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + ICLTensor *_input; + ICLTensor *_output; +}; } // namespace arm_compute #endif /*ARM_COMPUTE_CLINSTANCENORMALIZATIONLAYERKERNEL_H */ -- cgit v1.2.1