From 923241eb998ad031f4cce7b12d8c24a0b6c80be8 Mon Sep 17 00:00:00 2001 From: zhenglin Date: Tue, 5 Dec 2017 11:30:51 +0800 Subject: APPBROWSER-314: Performance optimazation for BatchNormalizationLayer Change-Id: Ie3ad9abb64e90720609bb6e67662eaf9dd4f3689 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/111826 Reviewed-by: Joel Liang Tested-by: BSG Visual Compute Jenkins server to access repositories on http://mpd-gerrit.cambridge.arm.com (cherry picked from commit 02c1fa663926cc4fcd1995d4d18d7528e0c85d94) Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/111834 Reviewed-by: Anthony Barbier --- .../kernels/GCBatchNormalizationLayerKernel.cpp | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) (limited to 'src/core/GLES_COMPUTE/kernels') diff --git a/src/core/GLES_COMPUTE/kernels/GCBatchNormalizationLayerKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCBatchNormalizationLayerKernel.cpp index 982143f0b2..dee2a5579b 100644 --- a/src/core/GLES_COMPUTE/kernels/GCBatchNormalizationLayerKernel.cpp +++ b/src/core/GLES_COMPUTE/kernels/GCBatchNormalizationLayerKernel.cpp @@ -64,7 +64,11 @@ void GCBatchNormalizationLayerKernel::configure(const IGCTensor *input, IGCTenso _gamma = gamma; _epsilon = epsilon; - const unsigned int num_elems_processed_per_iteration = 4 / input->info()->element_size(); + unsigned int num_elems_processed_per_iteration = 1; + if(input->info()->data_type() == DataType::F16) + { + num_elems_processed_per_iteration = 4; + } // Set build options std::set build_opts; @@ -83,10 +87,10 @@ void GCBatchNormalizationLayerKernel::configure(const IGCTensor *input, IGCTenso AccessWindowHorizontal input_access(input->info(), 0, num_elems_processed_per_iteration); AccessWindowHorizontal output_access(output->info(), 0, num_elems_processed_per_iteration); - AccessWindowStatic mean_access(mean->info(), 0, 0, mean->info()->dimension(0) + 1, mean->info()->dimension(1)); - AccessWindowStatic var_access(var->info(), 0, 0, var->info()->dimension(0) + 1, var->info()->dimension(1)); - AccessWindowStatic beta_access(beta->info(), 0, 0, beta->info()->dimension(0) + 1, beta->info()->dimension(1)); - AccessWindowStatic gamma_access(gamma->info(), 0, 0, gamma->info()->dimension(0) + 1, gamma->info()->dimension(1)); + AccessWindowStatic mean_access(mean->info(), 0, 0, mean->info()->dimension(0) + 3, mean->info()->dimension(1)); + AccessWindowStatic var_access(var->info(), 0, 0, var->info()->dimension(0) + 3, var->info()->dimension(1)); + AccessWindowStatic beta_access(beta->info(), 0, 0, beta->info()->dimension(0) + 3, beta->info()->dimension(1)); + AccessWindowStatic gamma_access(gamma->info(), 0, 0, gamma->info()->dimension(0) + 3, gamma->info()->dimension(1)); update_window_and_padding(win, input_access, output_access, mean_access, var_access, beta_access, gamma_access); output_access.set_valid_region(win, input->info()->valid_region()); -- cgit v1.2.1