From a788c2f7b143731704cdbc6a7f0016e4f38896d9 Mon Sep 17 00:00:00 2001 From: Manuel Bottini Date: Mon, 8 Apr 2019 13:18:00 +0100 Subject: COMPMID-2108: Fuse Activation Layer in CLDepthwiseConvolutionLayer3x3Kernels for F32 Change-Id: I39dd23696b6d8573e172a59b9e327b6a69886f08 Signed-off-by: Manuel Bottini Reviewed-on: https://review.mlplatform.org/c/973 Comments-Addressed: Arm Jenkins Tested-by: Arm Jenkins Reviewed-by: Usama Arif Reviewed-by: Giuseppe Rossini --- .../CLDepthwiseConvolutionLayer3x3NHWCKernel.cpp | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) (limited to 'src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.cpp') diff --git a/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.cpp b/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.cpp index 86d186b95e..c31825cc2c 100644 --- a/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.cpp +++ b/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.cpp @@ -46,11 +46,11 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *weights, { ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(input); ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32, DataType::QASYMM8); - ARM_COMPUTE_RETURN_ERROR_ON_MSG((act_info.enabled()) && ((input->data_type() != DataType::QASYMM8) || ((act_info.activation() != ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU) - && (act_info.activation() != ActivationLayerInfo::ActivationFunction::BOUNDED_RELU) - && (act_info.activation() != ActivationLayerInfo::ActivationFunction::RELU) - && (act_info.activation() != ActivationLayerInfo::ActivationFunction::LOGISTIC))), - "For QASYMM8 only logistic, relu, lower bounded relu and lower-upper bounded relu are supported"); //COMPMID-1317 add fused activation for F32 + ARM_COMPUTE_RETURN_ERROR_ON_MSG((act_info.enabled()) && (input->data_type() == DataType::QASYMM8) && (act_info.activation() != ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU) + && (act_info.activation() != ActivationLayerInfo::ActivationFunction::BOUNDED_RELU) + && (act_info.activation() != ActivationLayerInfo::ActivationFunction::RELU) + && (act_info.activation() != ActivationLayerInfo::ActivationFunction::LOGISTIC), + "For QASYMM8 only logistic, relu, lower bounded relu and lower-upper bounded relu are supported"); ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights); ARM_COMPUTE_RETURN_ERROR_ON(depth_multiplier > 1); // COMPMID-1071 Add depth multiplier support for NHWC @@ -202,6 +202,7 @@ void CLDepthwiseConvolutionLayer3x3NHWCKernel::configure(const ICLTensor *input, const unsigned int num_elems_accessed_per_iteration = is_qasymm ? 4 : (8 / input->info()->element_size()); CLBuildOptions build_opts; + build_opts.add_option_if(act_info.enabled(), "-DFUSED_ACTIVATION=" + lower_string(string_from_activation_func(act_info.activation()))); build_opts.add_option_if(_biases != nullptr, "-DHAS_BIAS"); build_opts.add_option("-DVEC_SIZE=" + support::cpp11::to_string(num_elems_accessed_per_iteration)); build_opts.add_option("-DSRC_DIM_2=" + support::cpp11::to_string(_input->info()->dimension(2))); @@ -231,7 +232,6 @@ void CLDepthwiseConvolutionLayer3x3NHWCKernel::configure(const ICLTensor *input, const int b_val = output->info()->quantization_info().quantize(act_info.b(), RoundingPolicy::TO_NEAREST_UP); const int o1 = output->info()->quantization_info().offset; - build_opts.add_option("-DFUSED_ACTIVATION=" + lower_string(string_from_activation_func(act_info.activation()))); build_opts.add_option("-DA_VAL=" + support::cpp11::to_string(a_val)); build_opts.add_option("-DB_VAL=" + support::cpp11::to_string(b_val)); build_opts.add_option("-DCONST_0=" + support::cpp11::to_string(o1)); @@ -243,6 +243,9 @@ void CLDepthwiseConvolutionLayer3x3NHWCKernel::configure(const ICLTensor *input, } else { + build_opts.add_option_if(act_info.enabled(), "-DA_VAL=" + float_to_string_with_full_precision(act_info.a())); + build_opts.add_option_if(act_info.enabled(), "-DB_VAL=" + float_to_string_with_full_precision(act_info.b())); + build_opts.add_option_if(act_info.enabled(), "-DSELECT_DATA_TYPE=" + get_cl_select_type_from_data_type(input->info()->data_type())); build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(_input->info()->data_type())); } @@ -275,6 +278,9 @@ void CLDepthwiseConvolutionLayer3x3NHWCKernel::configure(const ICLTensor *input, kernel_name += (is_stride_1_dilation_1 ? "_stride1" : ""); } + build_opts.add_option_if(input->info()->data_type() == DataType::F16, "-DIS_F16"); + build_opts.add_option_if(input->info()->data_type() == DataType::F32, "-DIS_F32"); + ICLKernel::configure_internal(win_config.second); _kernel = static_cast(CLKernelLibrary::get().create_kernel(kernel_name, build_opts.options())); -- cgit v1.2.1