From fe7ae817755577be29f4c07aa27d8ef9e821da45 Mon Sep 17 00:00:00 2001 From: Pablo Marquez Tello Date: Wed, 3 Mar 2021 12:12:35 +0000 Subject: CLInstanceNormalizationLayer NHWC optimisation * Make changes to split the workload into two kernels. One kernel precomputes mean and variance and the second kernel just loads these precomputed values. * The new approach runs %30 faster than the original code for NHWC workloads like 32x192x256. * Resolves MLCE-337 Change-Id: I8356fcefa2d131ab4dcb32268ce7142421d073e4 Signed-off-by: Pablo Marquez Tello Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/5355 Tested-by: Arm Jenkins Comments-Addressed: Arm Jenkins Reviewed-by: Manuel Bottini Reviewed-by: Michele Di Giorgio --- src/core/CL/CLKernelLibrary.cpp | 1 + 1 file changed, 1 insertion(+) (limited to 'src/core/CL/CLKernelLibrary.cpp') diff --git a/src/core/CL/CLKernelLibrary.cpp b/src/core/CL/CLKernelLibrary.cpp index eef204fde9..002a14400f 100644 --- a/src/core/CL/CLKernelLibrary.cpp +++ b/src/core/CL/CLKernelLibrary.cpp @@ -356,6 +356,7 @@ const std::map CLKernelLibrary::_kernel_program_map = { "im2col9x9_nhwc", "im2col.cl" }, { "im2col_generic_nhwc", "im2col.cl" }, { "instance_normalization", "instance_normalization.cl" }, + { "compute_mean_var", "instance_normalization.cl" }, { "l2_normalize_x", "l2_normalize.cl" }, { "l2_normalize_y", "l2_normalize.cl" }, { "l2_normalize_z", "l2_normalize.cl" }, -- cgit v1.2.1