From 99ac60bca77e9977c844cc1293751d63ddc3065c Mon Sep 17 00:00:00 2001 From: Giorgio Arena Date: Fri, 16 Feb 2018 15:17:23 +0000 Subject: COMPMID-853 Fuse CL DepthwiseConvolution with Activation for QASYM8 Change-Id: I287908f76af458ad4b4d865d353dc37e33877250 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/120839 Tested-by: Jenkins Reviewed-by: Anthony Barbier Reviewed-by: Georgios Pinitas --- .../CL/kernels/CLBatchNormalizationLayerKernel.cpp | 2 +- .../CLDepthwiseConvolutionLayer3x3Kernel.cpp | 30 +++++++++++++++++++++- 2 files changed, 30 insertions(+), 2 deletions(-) (limited to 'src/core/CL/kernels') diff --git a/src/core/CL/kernels/CLBatchNormalizationLayerKernel.cpp b/src/core/CL/kernels/CLBatchNormalizationLayerKernel.cpp index 87fc1d097c..95c8250ee7 100644 --- a/src/core/CL/kernels/CLBatchNormalizationLayerKernel.cpp +++ b/src/core/CL/kernels/CLBatchNormalizationLayerKernel.cpp @@ -136,7 +136,7 @@ void CLBatchNormalizationLayerKernel::configure(ICLTensor *input, ICLTensor *out CLBuildOptions build_opts; build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type())); build_opts.add_option("-DVEC_SIZE=" + support::cpp11::to_string(num_elems_processed_per_iteration)); - build_opts.add_option_if(act_info.enabled(), "-D" + string_from_activation_func(act_info.activation())); + build_opts.add_option_if(act_info.enabled(), "-DFUSED_ACTIVATION=" + lower_string(string_from_activation_func(act_info.activation()))); build_opts.add_option_if(act_info.enabled(), "-DA_VAL=" + float_to_string_with_full_precision(act_info.a())); build_opts.add_option_if(act_info.enabled(), "-DB_VAL=" + float_to_string_with_full_precision(act_info.b())); build_opts.add_option_if(_run_in_place, "-DIN_PLACE"); diff --git a/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3Kernel.cpp b/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3Kernel.cpp index 7a47bcc704..d50e4d695e 100644 --- a/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3Kernel.cpp +++ b/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3Kernel.cpp @@ -49,7 +49,8 @@ BorderSize CLDepthwiseConvolutionLayer3x3Kernel::border_size() const return _border_size; } -void CLDepthwiseConvolutionLayer3x3Kernel::configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info) +void CLDepthwiseConvolutionLayer3x3Kernel::configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, + ActivationLayerInfo act_info) { ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::F16, DataType::F32); ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights); @@ -114,6 +115,33 @@ void CLDepthwiseConvolutionLayer3x3Kernel::configure(const ICLTensor *input, con build_opts.add_option("-DK_OFFSET=" + support::cpp11::to_string(9 * input->info()->quantization_info().offset * weights->info()->quantization_info().offset)); build_opts.add_option("-DOUTPUT_MULTIPLIER=" + support::cpp11::to_string(output_multiplier)); build_opts.add_option("-DOUTPUT_SHIFT=" + support::cpp11::to_string(output_shift)); + + if(act_info.enabled()) + { + const int a_val = input->info()->quantization_info().quantize(act_info.a(), RoundingPolicy::TO_NEAREST_UP); + const int b_val = input->info()->quantization_info().quantize(act_info.b(), RoundingPolicy::TO_NEAREST_UP); + const int o1 = input->info()->quantization_info().offset; + + build_opts.add_option("-DFUSED_ACTIVATION=" + lower_string(string_from_activation_func(act_info.activation()))); + build_opts.add_option("-DA_VAL=" + support::cpp11::to_string(a_val)); + build_opts.add_option("-DB_VAL=" + support::cpp11::to_string(b_val)); + build_opts.add_option("-DCONST_0=" + support::cpp11::to_string(o1)); + + if(output != nullptr) + { + const float s1 = input->info()->quantization_info().scale; + const float s2 = output->info()->quantization_info().scale; + const int o2 = output->info()->quantization_info().offset; + + if(o1 != o2 || s1 != s2) + { + build_opts.add_option("-DS1_VAL=" + float_to_string_with_full_precision(s1)); + build_opts.add_option("-DS2_VAL=" + float_to_string_with_full_precision(s2)); + build_opts.add_option("-DO1_VAL=" + support::cpp11::to_string(o1)); + build_opts.add_option("-DO2_VAL=" + support::cpp11::to_string(o2)); + } + } + } } // Configure the local work size for Bifrost with a value obtained -- cgit v1.2.1