From a788c2f7b143731704cdbc6a7f0016e4f38896d9 Mon Sep 17 00:00:00 2001 From: Manuel Bottini Date: Mon, 8 Apr 2019 13:18:00 +0100 Subject: COMPMID-2108: Fuse Activation Layer in CLDepthwiseConvolutionLayer3x3Kernels for F32 Change-Id: I39dd23696b6d8573e172a59b9e327b6a69886f08 Signed-off-by: Manuel Bottini Reviewed-on: https://review.mlplatform.org/c/973 Comments-Addressed: Arm Jenkins Tested-by: Arm Jenkins Reviewed-by: Usama Arif Reviewed-by: Giuseppe Rossini --- .../CLDepthwiseConvolutionLayer3x3NCHWKernel.cpp | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) (limited to 'src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.cpp') diff --git a/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.cpp b/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.cpp index ec27e419c4..02d8c6d9c2 100644 --- a/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.cpp +++ b/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.cpp @@ -47,10 +47,10 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *weights, { ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(input); ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::F16, DataType::F32); - ARM_COMPUTE_RETURN_ERROR_ON_MSG(act_info.enabled() && ((input->data_type() != DataType::QASYMM8) || ((act_info.activation() != ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU) - && (act_info.activation() != ActivationLayerInfo::ActivationFunction::BOUNDED_RELU) - && (act_info.activation() != ActivationLayerInfo::ActivationFunction::RELU) - && (act_info.activation() != ActivationLayerInfo::ActivationFunction::LOGISTIC))), + ARM_COMPUTE_RETURN_ERROR_ON_MSG((act_info.enabled()) && (input->data_type() == DataType::QASYMM8) && (act_info.activation() != ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU) + && (act_info.activation() != ActivationLayerInfo::ActivationFunction::BOUNDED_RELU) + && (act_info.activation() != ActivationLayerInfo::ActivationFunction::RELU) + && (act_info.activation() != ActivationLayerInfo::ActivationFunction::LOGISTIC), "For QASYMM8 only logistic, relu, lower bounded relu and lower-upper bounded relu are supported"); ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights); ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(0) != 3 || weights->dimension(1) != 3); @@ -241,6 +241,7 @@ void CLDepthwiseConvolutionLayer3x3NCHWKernel::configure(const ICLTensor *input, // Set build options CLBuildOptions build_opts; + build_opts.add_option_if(act_info.enabled(), "-DFUSED_ACTIVATION=" + lower_string(string_from_activation_func(act_info.activation()))); build_opts.add_option("-DDST_CHANNELS=" + support::cpp11::to_string(_output->info()->tensor_shape().z())); build_opts.add_option("-DDEPTH_MULTIPLIER=" + support::cpp11::to_string(depth_multiplier)); build_opts.add_option("-DCONV_STRIDE_X=" + support::cpp11::to_string(_conv_stride_x)); @@ -269,7 +270,6 @@ void CLDepthwiseConvolutionLayer3x3NCHWKernel::configure(const ICLTensor *input, const int b_val = output->info()->quantization_info().quantize(act_info.b(), RoundingPolicy::TO_NEAREST_UP); const int o1 = output->info()->quantization_info().offset; - build_opts.add_option("-DFUSED_ACTIVATION=" + lower_string(string_from_activation_func(act_info.activation()))); build_opts.add_option("-DA_VAL=" + support::cpp11::to_string(a_val)); build_opts.add_option("-DB_VAL=" + support::cpp11::to_string(b_val)); build_opts.add_option("-DCONST_0=" + support::cpp11::to_string(o1)); @@ -279,6 +279,18 @@ void CLDepthwiseConvolutionLayer3x3NCHWKernel::configure(const ICLTensor *input, build_opts.add_option("-DO1_VAL=" + support::cpp11::to_string(o1)); } } + else + { + build_opts.add_option_if(act_info.enabled(), "-DA_VAL=" + float_to_string_with_full_precision(act_info.a())); + build_opts.add_option_if(act_info.enabled(), "-DB_VAL=" + float_to_string_with_full_precision(act_info.b())); + build_opts.add_option_if(act_info.enabled(), "-DSELECT_DATA_TYPE=" + get_cl_select_type_from_data_type(input->info()->data_type())); + build_opts.add_option_if(act_info.enabled(), "-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type())); + build_opts.add_option("-DVEC_SIZE=" + support::cpp11::to_string(win_config.second.x().step())); + } + + build_opts.add_option_if(input->info()->data_type() == DataType::F16, "-DIS_F16"); + build_opts.add_option_if(input->info()->data_type() == DataType::F32, "-DIS_F32"); + _kernel = static_cast(CLKernelLibrary::get().create_kernel(kernel_name, build_opts.options())); // Set config_id for enabling LWS tuning -- cgit v1.2.1