aboutsummaryrefslogtreecommitdiff
path: root/src/core/CL/kernels
diff options
context:
space:
mode:
authorGiorgio Arena <giorgio.arena@arm.com>2018-02-16 15:17:23 +0000
committerAnthony Barbier <anthony.barbier@arm.com>2018-11-02 16:49:16 +0000
commit99ac60bca77e9977c844cc1293751d63ddc3065c (patch)
tree30867264809a61c817e5c543a6ac997620f4beaa /src/core/CL/kernels
parent60954c671ffdc3422bbdb728fc022eb6896c1e17 (diff)
downloadComputeLibrary-99ac60bca77e9977c844cc1293751d63ddc3065c.tar.gz
COMPMID-853 Fuse CL DepthwiseConvolution with Activation for QASYM8
Change-Id: I287908f76af458ad4b4d865d353dc37e33877250 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/120839 Tested-by: Jenkins <bsgcomp@arm.com> Reviewed-by: Anthony Barbier <anthony.barbier@arm.com> Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com>
Diffstat (limited to 'src/core/CL/kernels')
-rw-r--r--src/core/CL/kernels/CLBatchNormalizationLayerKernel.cpp2
-rw-r--r--src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3Kernel.cpp30
2 files changed, 30 insertions, 2 deletions
diff --git a/src/core/CL/kernels/CLBatchNormalizationLayerKernel.cpp b/src/core/CL/kernels/CLBatchNormalizationLayerKernel.cpp
index 87fc1d097c..95c8250ee7 100644
--- a/src/core/CL/kernels/CLBatchNormalizationLayerKernel.cpp
+++ b/src/core/CL/kernels/CLBatchNormalizationLayerKernel.cpp
@@ -136,7 +136,7 @@ void CLBatchNormalizationLayerKernel::configure(ICLTensor *input, ICLTensor *out
CLBuildOptions build_opts;
build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type()));
build_opts.add_option("-DVEC_SIZE=" + support::cpp11::to_string(num_elems_processed_per_iteration));
- build_opts.add_option_if(act_info.enabled(), "-D" + string_from_activation_func(act_info.activation()));
+ build_opts.add_option_if(act_info.enabled(), "-DFUSED_ACTIVATION=" + lower_string(string_from_activation_func(act_info.activation())));
build_opts.add_option_if(act_info.enabled(), "-DA_VAL=" + float_to_string_with_full_precision(act_info.a()));
build_opts.add_option_if(act_info.enabled(), "-DB_VAL=" + float_to_string_with_full_precision(act_info.b()));
build_opts.add_option_if(_run_in_place, "-DIN_PLACE");
diff --git a/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3Kernel.cpp b/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3Kernel.cpp
index 7a47bcc704..d50e4d695e 100644
--- a/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3Kernel.cpp
+++ b/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3Kernel.cpp
@@ -49,7 +49,8 @@ BorderSize CLDepthwiseConvolutionLayer3x3Kernel::border_size() const
return _border_size;
}
-void CLDepthwiseConvolutionLayer3x3Kernel::configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info)
+void CLDepthwiseConvolutionLayer3x3Kernel::configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info,
+ ActivationLayerInfo act_info)
{
ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::F16, DataType::F32);
ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights);
@@ -114,6 +115,33 @@ void CLDepthwiseConvolutionLayer3x3Kernel::configure(const ICLTensor *input, con
build_opts.add_option("-DK_OFFSET=" + support::cpp11::to_string(9 * input->info()->quantization_info().offset * weights->info()->quantization_info().offset));
build_opts.add_option("-DOUTPUT_MULTIPLIER=" + support::cpp11::to_string(output_multiplier));
build_opts.add_option("-DOUTPUT_SHIFT=" + support::cpp11::to_string(output_shift));
+
+ if(act_info.enabled())
+ {
+ const int a_val = input->info()->quantization_info().quantize(act_info.a(), RoundingPolicy::TO_NEAREST_UP);
+ const int b_val = input->info()->quantization_info().quantize(act_info.b(), RoundingPolicy::TO_NEAREST_UP);
+ const int o1 = input->info()->quantization_info().offset;
+
+ build_opts.add_option("-DFUSED_ACTIVATION=" + lower_string(string_from_activation_func(act_info.activation())));
+ build_opts.add_option("-DA_VAL=" + support::cpp11::to_string(a_val));
+ build_opts.add_option("-DB_VAL=" + support::cpp11::to_string(b_val));
+ build_opts.add_option("-DCONST_0=" + support::cpp11::to_string(o1));
+
+ if(output != nullptr)
+ {
+ const float s1 = input->info()->quantization_info().scale;
+ const float s2 = output->info()->quantization_info().scale;
+ const int o2 = output->info()->quantization_info().offset;
+
+ if(o1 != o2 || s1 != s2)
+ {
+ build_opts.add_option("-DS1_VAL=" + float_to_string_with_full_precision(s1));
+ build_opts.add_option("-DS2_VAL=" + float_to_string_with_full_precision(s2));
+ build_opts.add_option("-DO1_VAL=" + support::cpp11::to_string(o1));
+ build_opts.add_option("-DO2_VAL=" + support::cpp11::to_string(o2));
+ }
+ }
+ }
}
// Configure the local work size for Bifrost with a value obtained