aboutsummaryrefslogtreecommitdiff
path: root/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.cpp')
-rw-r--r--src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.cpp18
1 files changed, 12 insertions, 6 deletions
diff --git a/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.cpp b/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.cpp
index 86d186b95e..c31825cc2c 100644
--- a/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.cpp
+++ b/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.cpp
@@ -46,11 +46,11 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *weights,
{
ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(input);
ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32, DataType::QASYMM8);
- ARM_COMPUTE_RETURN_ERROR_ON_MSG((act_info.enabled()) && ((input->data_type() != DataType::QASYMM8) || ((act_info.activation() != ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU)
- && (act_info.activation() != ActivationLayerInfo::ActivationFunction::BOUNDED_RELU)
- && (act_info.activation() != ActivationLayerInfo::ActivationFunction::RELU)
- && (act_info.activation() != ActivationLayerInfo::ActivationFunction::LOGISTIC))),
- "For QASYMM8 only logistic, relu, lower bounded relu and lower-upper bounded relu are supported"); //COMPMID-1317 add fused activation for F32
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG((act_info.enabled()) && (input->data_type() == DataType::QASYMM8) && (act_info.activation() != ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU)
+ && (act_info.activation() != ActivationLayerInfo::ActivationFunction::BOUNDED_RELU)
+ && (act_info.activation() != ActivationLayerInfo::ActivationFunction::RELU)
+ && (act_info.activation() != ActivationLayerInfo::ActivationFunction::LOGISTIC),
+ "For QASYMM8 only logistic, relu, lower bounded relu and lower-upper bounded relu are supported");
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights);
ARM_COMPUTE_RETURN_ERROR_ON(depth_multiplier > 1); // COMPMID-1071 Add depth multiplier support for NHWC
@@ -202,6 +202,7 @@ void CLDepthwiseConvolutionLayer3x3NHWCKernel::configure(const ICLTensor *input,
const unsigned int num_elems_accessed_per_iteration = is_qasymm ? 4 : (8 / input->info()->element_size());
CLBuildOptions build_opts;
+ build_opts.add_option_if(act_info.enabled(), "-DFUSED_ACTIVATION=" + lower_string(string_from_activation_func(act_info.activation())));
build_opts.add_option_if(_biases != nullptr, "-DHAS_BIAS");
build_opts.add_option("-DVEC_SIZE=" + support::cpp11::to_string(num_elems_accessed_per_iteration));
build_opts.add_option("-DSRC_DIM_2=" + support::cpp11::to_string(_input->info()->dimension(2)));
@@ -231,7 +232,6 @@ void CLDepthwiseConvolutionLayer3x3NHWCKernel::configure(const ICLTensor *input,
const int b_val = output->info()->quantization_info().quantize(act_info.b(), RoundingPolicy::TO_NEAREST_UP);
const int o1 = output->info()->quantization_info().offset;
- build_opts.add_option("-DFUSED_ACTIVATION=" + lower_string(string_from_activation_func(act_info.activation())));
build_opts.add_option("-DA_VAL=" + support::cpp11::to_string(a_val));
build_opts.add_option("-DB_VAL=" + support::cpp11::to_string(b_val));
build_opts.add_option("-DCONST_0=" + support::cpp11::to_string(o1));
@@ -243,6 +243,9 @@ void CLDepthwiseConvolutionLayer3x3NHWCKernel::configure(const ICLTensor *input,
}
else
{
+ build_opts.add_option_if(act_info.enabled(), "-DA_VAL=" + float_to_string_with_full_precision(act_info.a()));
+ build_opts.add_option_if(act_info.enabled(), "-DB_VAL=" + float_to_string_with_full_precision(act_info.b()));
+ build_opts.add_option_if(act_info.enabled(), "-DSELECT_DATA_TYPE=" + get_cl_select_type_from_data_type(input->info()->data_type()));
build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(_input->info()->data_type()));
}
@@ -275,6 +278,9 @@ void CLDepthwiseConvolutionLayer3x3NHWCKernel::configure(const ICLTensor *input,
kernel_name += (is_stride_1_dilation_1 ? "_stride1" : "");
}
+ build_opts.add_option_if(input->info()->data_type() == DataType::F16, "-DIS_F16");
+ build_opts.add_option_if(input->info()->data_type() == DataType::F32, "-DIS_F32");
+
ICLKernel::configure_internal(win_config.second);
_kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel(kernel_name, build_opts.options()));