From acce504ec4aebe5e5da470c1cfc3cee401ff11f3 Mon Sep 17 00:00:00 2001 From: giuros01 Date: Thu, 21 Feb 2019 17:32:34 +0000 Subject: COMPMID-1740: Fuse batch normalization with Convolution Layer at graph level Change-Id: I77ca51c2c72783cc26a099a6a9c3210cdbbe822d Signed-off-by: giuros01 Reviewed-on: https://review.mlplatform.org/c/797 Tested-by: Arm Jenkins Reviewed-by: Michele Di Giorgio Reviewed-by: Georgios Pinitas --- src/core/CL/cl_kernels/batchnormalization_layer.cl | 29 +++++++++++----------- 1 file changed, 15 insertions(+), 14 deletions(-) (limited to 'src/core/CL/cl_kernels/batchnormalization_layer.cl') diff --git a/src/core/CL/cl_kernels/batchnormalization_layer.cl b/src/core/CL/cl_kernels/batchnormalization_layer.cl index dfd16e0da3..60307bc9a7 100644 --- a/src/core/CL/cl_kernels/batchnormalization_layer.cl +++ b/src/core/CL/cl_kernels/batchnormalization_layer.cl @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018 ARM Limited. + * Copyright (c) 2017-2019 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -341,22 +341,10 @@ __kernel void fuse_batchnormalization_layer(TENSOR4D_DECLARATION(conv_w), Vector bn_mean = CONVERT_TO_VECTOR_STRUCT_NO_STEP(bn_mean); Vector bn_var = CONVERT_TO_VECTOR_STRUCT_NO_STEP(bn_var); - // In-place ops -#ifdef IN_PLACE_W - Tensor4D fused_w = conv_w; -#else /* IN_PLACE_W */ - Tensor4D fused_w = CONVERT_TO_TENSOR4D_STRUCT(fused_w, NUM_CHANNELS); -#endif /* IN_PLACE */ -#ifdef IN_PLACE_B - Vector fused_b = conv_b; -#else /* IN_PLACE_W */ - Vector fused_b = CONVERT_TO_VECTOR_STRUCT_NO_STEP(fused_b); -#endif /* IN_PLACE */ - // Conditional ops #ifdef HAS_BIAS Vector conv_b = CONVERT_TO_VECTOR_STRUCT_NO_STEP(conv_b); -#endif /* USE_DEFAULT_BETA */ +#endif /* HAS_BIAS */ #ifndef USE_DEFAULT_BETA Vector bn_beta = CONVERT_TO_VECTOR_STRUCT_NO_STEP(bn_beta); #endif /* USE_DEFAULT_BETA */ @@ -364,6 +352,19 @@ __kernel void fuse_batchnormalization_layer(TENSOR4D_DECLARATION(conv_w), Vector bn_gamma = CONVERT_TO_VECTOR_STRUCT_NO_STEP(bn_gamma); #endif /* USE_DEFAULT_GAMMA */ + // In-place ops +#ifdef IN_PLACE_W + Tensor4D fused_w = conv_w; + uint fused_w_stride_x = conv_w_stride_x; +#else /* IN_PLACE_W */ + Tensor4D fused_w = CONVERT_TO_TENSOR4D_STRUCT(fused_w, NUM_CHANNELS); +#endif /* IN_PLACE_W */ +#ifdef IN_PLACE_B + Vector fused_b = conv_b; +#else /* IN_PLACE_B */ + Vector fused_b = CONVERT_TO_VECTOR_STRUCT_NO_STEP(fused_b); +#endif /* IN_PLACE_B */ + const int current_slice = get_global_id(2) / NUM_CHANNELS; #if defined(VEC_SIZE) && defined(LAST_ACCESSED_X) -- cgit v1.2.1