diff options
author | Pablo Marquez Tello <pablo.tello@arm.com> | 2021-03-03 12:12:35 +0000 |
---|---|---|
committer | Pablo Marquez Tello <pablo.tello@arm.com> | 2021-04-19 15:02:29 +0000 |
commit | fe7ae817755577be29f4c07aa27d8ef9e821da45 (patch) | |
tree | 459b1b22f59cf5144cd72b839fbfdf21fa341479 /src/core/CL/kernels/CLInstanceNormalizationLayerKernel.h | |
parent | 60c3b0e6821a80d78ffca5be30e05d062d071cd2 (diff) | |
download | ComputeLibrary-fe7ae817755577be29f4c07aa27d8ef9e821da45.tar.gz |
CLInstanceNormalizationLayer NHWC optimisation
* Make changes to split the workload into two kernels. One kernel precomputes
mean and variance and the second kernel just loads these precomputed values.
* The new approach runs %30 faster than the original code for NHWC workloads
like 32x192x256.
* Resolves MLCE-337
Change-Id: I8356fcefa2d131ab4dcb32268ce7142421d073e4
Signed-off-by: Pablo Marquez Tello <pablo.tello@arm.com>
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/5355
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Manuel Bottini <manuel.bottini@arm.com>
Reviewed-by: Michele Di Giorgio <michele.digiorgio@arm.com>
Diffstat (limited to 'src/core/CL/kernels/CLInstanceNormalizationLayerKernel.h')
-rw-r--r-- | src/core/CL/kernels/CLInstanceNormalizationLayerKernel.h | 57 |
1 files changed, 47 insertions, 10 deletions
diff --git a/src/core/CL/kernels/CLInstanceNormalizationLayerKernel.h b/src/core/CL/kernels/CLInstanceNormalizationLayerKernel.h index d4444f0b20..33a3ff97c3 100644 --- a/src/core/CL/kernels/CLInstanceNormalizationLayerKernel.h +++ b/src/core/CL/kernels/CLInstanceNormalizationLayerKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020 Arm Limited. + * Copyright (c) 2019-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -52,21 +52,14 @@ public: /** Set the input and output tensors. * - * @param[in, out] input Source tensor. Data types supported: F16/F32. Data layout supported: NCHW, NHWC - * In case of @p output tensor = nullptr this tensor will store the result of the normalization. - * @param[out] output Destination tensor. Data types and data layouts supported: same as @p input. - * @param[in] info Kernel meta-data descriptor - */ - void configure(ICLTensor *input, ICLTensor *output, const InstanceNormalizationLayerKernelInfo &info); - /** Set the input and output tensors. - * * @param[in] compile_context The compile context to be used. * @param[in, out] input Source tensor. Data types supported: F16/F32. Data layout supported: NCHW, NHWC * In case of @p output tensor = nullptr this tensor will store the result of the normalization. + * @param[in] mean_var Tensor containing the precomputed mean and variance values. Data types supported: F32. * @param[out] output Destination tensor. Data types and data layouts supported: same as @p input. * @param[in] info Kernel meta-data descriptor */ - void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, const InstanceNormalizationLayerKernelInfo &info); + void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *mean_var, ICLTensor *output, const InstanceNormalizationLayerKernelInfo &info); /** Static function to check if given info will lead to a valid configuration of @ref CLInstanceNormalizationLayer. * @@ -84,7 +77,51 @@ public: private: ICLTensor *_input; ICLTensor *_output; + ICLTensor *_mean; bool _run_in_place; }; + +/** Interface for compute Mean and Variance per channel */ +class CLComputeMeanVariance : public ICLKernel +{ +public: + /** Constructor */ + CLComputeMeanVariance(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLComputeMeanVariance(const CLComputeMeanVariance &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLComputeMeanVariance &operator=(const CLComputeMeanVariance &) = delete; + /** Default Move Constructor. */ + CLComputeMeanVariance(CLComputeMeanVariance &&) = default; + /** Default move assignment operator */ + CLComputeMeanVariance &operator=(CLComputeMeanVariance &&) = default; + /** Default destructor */ + ~CLComputeMeanVariance() = default; + + /** Set the input and output tensors. + * + * @param[in] compile_context The compile context to be used. + * @param[in, out] input Source tensor. Data types supported: F16/F32. Data layout supported: NCHW, NHWC + * In case of @p output tensor = nullptr this tensor will store the result of the normalization. + * @param[out] output Destination tensor. Data types and data layouts supported: same as @p input. + */ + void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output); + + /** Static function to check if given info will lead to a valid configuration of @ref CLInstanceNormalizationLayer. + * + * @param[in] input Source tensor info. Data types supported: F16/F32. Data layout supported: NHWC, NCHW + * @param[in] output Destination tensor info. Data types and data layouts supported: same as @p input. + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + ICLTensor *_input; + ICLTensor *_output; +}; } // namespace arm_compute #endif /*ARM_COMPUTE_CLINSTANCENORMALIZATIONLAYERKERNEL_H */ |