From 8b2a7d3aa119e7f1d6a03690d05eb27c5d178b9f Mon Sep 17 00:00:00 2001 From: Giorgio Arena Date: Tue, 11 Feb 2020 17:21:31 +0000 Subject: COMPMID-3101 Fuse activation with floating point elementwise operation layers in CL Signed-off-by: Giorgio Arena Change-Id: I1693f8664ba7c0dc8c076bbe7365cef1e667bd25 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/2718 Tested-by: Arm Jenkins Reviewed-by: Gian Marco Iodice Comments-Addressed: Arm Jenkins --- .../CL/kernels/CLElementwiseOperationKernel.cpp | 28 +++++++++++----- .../CL/kernels/CLPixelWiseMultiplicationKernel.cpp | 38 +++++++++++++++------- 2 files changed, 47 insertions(+), 19 deletions(-) (limited to 'src/core/CL/kernels') diff --git a/src/core/CL/kernels/CLElementwiseOperationKernel.cpp b/src/core/CL/kernels/CLElementwiseOperationKernel.cpp index 1ac35a286f..0f2e26f186 100644 --- a/src/core/CL/kernels/CLElementwiseOperationKernel.cpp +++ b/src/core/CL/kernels/CLElementwiseOperationKernel.cpp @@ -231,7 +231,7 @@ std::pair validate_and_configure_window_for_division(ITensorInfo } // namespace CLElementwiseOperationKernel::CLElementwiseOperationKernel() - : _input1(nullptr), _input2(nullptr), _output(nullptr) + : _act_info(), _input1(nullptr), _input2(nullptr), _output(nullptr) { } @@ -256,6 +256,12 @@ void CLElementwiseOperationKernel::configure_common(const ICLTensor *input1, con // Set kernel build options CLBuildOptions build_opts = generate_build_options(*input1->info(), *input2->info(), *output->info()); + if(_act_info.enabled()) + { + build_opts.add_option("-DACTIVATION_TYPE=" + lower_string(string_from_activation_func(_act_info.activation()))); + build_opts.add_option("-DA_VAL=" + float_to_string_with_full_precision(_act_info.a())); + build_opts.add_option("-DB_VAL=" + float_to_string_with_full_precision(_act_info.b())); + } // Create kernel _kernel = static_cast(CLKernelLibrary::get().create_kernel(kernel_name, build_opts.options())); @@ -320,19 +326,23 @@ BorderSize CLElementwiseOperationKernel::border_size() const /** Arithmetic operations with saturation*/ -void CLSaturatedArithmeticOperationKernel::configure(ArithmeticOperation op, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, const ConvertPolicy &policy) +void CLSaturatedArithmeticOperationKernel::configure(ArithmeticOperation op, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, const ConvertPolicy &policy, + const ActivationLayerInfo &act_info) { - _policy = policy; - _op = op; + _policy = policy; + _op = op; + _act_info = act_info; configure_common(input1, input2, output); } -Status CLSaturatedArithmeticOperationKernel::validate(ArithmeticOperation op, const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, const ConvertPolicy &policy) +Status CLSaturatedArithmeticOperationKernel::validate(ArithmeticOperation op, const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, const ConvertPolicy &policy, + const ActivationLayerInfo &act_info) { ARM_COMPUTE_UNUSED(op, policy); ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input1, input2, output); ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments_with_arithmetic_rules(*input1, *input2, *output)); ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window_for_arithmetic_operators(*input1->clone(), *input2->clone(), *output->clone()).first); + ARM_COMPUTE_RETURN_ERROR_ON(act_info.enabled() && !is_data_type_float(output->data_type())); return Status{}; } @@ -369,13 +379,14 @@ std::string CLSaturatedArithmeticOperationKernel::name() /** Arithmetic operations*/ -void CLArithmeticOperationKernel::configure(ArithmeticOperation op, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output) +void CLArithmeticOperationKernel::configure(ArithmeticOperation op, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, const ActivationLayerInfo &act_info) { - _op = op; + _op = op; + _act_info = act_info; configure_common(input1, input2, output); } -Status CLArithmeticOperationKernel::validate(ArithmeticOperation op, const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output) +Status CLArithmeticOperationKernel::validate(ArithmeticOperation op, const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, const ActivationLayerInfo &act_info) { ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input1, input2, output); if(op == ArithmeticOperation::DIV || op == ArithmeticOperation::POWER) @@ -389,6 +400,7 @@ Status CLArithmeticOperationKernel::validate(ArithmeticOperation op, const ITens ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments_with_arithmetic_rules(*input1, *input2, *output)); ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window_for_arithmetic_operators(*input1->clone(), *input2->clone(), *output->clone()).first); } + ARM_COMPUTE_RETURN_ERROR_ON(act_info.enabled() && !is_data_type_float(output->data_type())); return Status{}; } diff --git a/src/core/CL/kernels/CLPixelWiseMultiplicationKernel.cpp b/src/core/CL/kernels/CLPixelWiseMultiplicationKernel.cpp index d31c1de402..ff5afa3d95 100644 --- a/src/core/CL/kernels/CLPixelWiseMultiplicationKernel.cpp +++ b/src/core/CL/kernels/CLPixelWiseMultiplicationKernel.cpp @@ -46,7 +46,7 @@ namespace constexpr unsigned int num_elems_processed_per_iteration = 16; Status validate_arguments(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, float scale, - ConvertPolicy overflow_policy, RoundingPolicy rounding_policy) + ConvertPolicy overflow_policy, RoundingPolicy rounding_policy, const ActivationLayerInfo &act_info) { ARM_COMPUTE_UNUSED(overflow_policy); ARM_COMPUTE_UNUSED(rounding_policy); @@ -64,6 +64,7 @@ Status validate_arguments(const ITensorInfo *input1, const ITensorInfo *input2, DataType::S16, DataType::QSYMM16, DataType::F16, DataType::F32); ARM_COMPUTE_RETURN_ERROR_ON_MSG(scale < 0, "Scale cannot be negative."); + ARM_COMPUTE_RETURN_ERROR_ON(act_info.enabled() && !is_data_type_float(output->data_type())); const TensorShape &out_shape = TensorShape::broadcast_shape(input1->tensor_shape(), input2->tensor_shape()); @@ -148,11 +149,11 @@ CLPixelWiseMultiplicationKernel::CLPixelWiseMultiplicationKernel() } void CLPixelWiseMultiplicationKernel::configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, float scale, - ConvertPolicy overflow_policy, RoundingPolicy rounding_policy) + ConvertPolicy overflow_policy, RoundingPolicy rounding_policy, const ActivationLayerInfo &act_info) { ARM_COMPUTE_ERROR_ON_NULLPTR(input1, input2, output); ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input1->info(), input2->info(), output->info(), - scale, overflow_policy, rounding_policy)); + scale, overflow_policy, rounding_policy, act_info)); // Configure kernel window auto win_config = validate_and_configure_window(input1->info(), input2->info(), output->info()); @@ -227,6 +228,12 @@ void CLPixelWiseMultiplicationKernel::configure(const ICLTensor *input1, const I build_opts.add_option_if_else(overflow_policy == ConvertPolicy::WRAP || is_data_type_float(output->info()->data_type()), "-DWRAP", "-DSATURATE"); build_opts.add_option_if_else(rounding_policy == RoundingPolicy::TO_ZERO, "-DROUND=_rtz", "-DROUND=_rte"); build_opts.add_option("-DDATA_TYPE_RES=" + compute_type); + if(act_info.enabled()) + { + build_opts.add_option("-DACTIVATION_TYPE=" + lower_string(string_from_activation_func(act_info.activation()))); + build_opts.add_option("-DA_VAL=" + float_to_string_with_full_precision(act_info.a())); + build_opts.add_option("-DB_VAL=" + float_to_string_with_full_precision(act_info.b())); + } } // Create kernel @@ -248,10 +255,10 @@ void CLPixelWiseMultiplicationKernel::configure(const ICLTensor *input1, const I } Status CLPixelWiseMultiplicationKernel::validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, float scale, - ConvertPolicy overflow_policy, RoundingPolicy rounding_policy) + ConvertPolicy overflow_policy, RoundingPolicy rounding_policy, const ActivationLayerInfo &act_info) { ARM_COMPUTE_ERROR_ON_NULLPTR(input1, input2, output); - ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input1, input2, output, scale, overflow_policy, rounding_policy)); + ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input1, input2, output, scale, overflow_policy, rounding_policy, act_info)); ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window(input1->clone().get(), input2->clone().get(), output->clone().get()).first); return Status{}; @@ -311,7 +318,7 @@ namespace { constexpr unsigned int num_elems_processed_per_iteration_complex = 1; -Status validate_arguments_complex(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output) +Status validate_arguments_complex(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, const ActivationLayerInfo &act_info) { ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input1, 2, DataType::F32); ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input2, 2, DataType::F32); @@ -319,6 +326,7 @@ Status validate_arguments_complex(const ITensorInfo *input1, const ITensorInfo * const TensorShape &out_shape = TensorShape::broadcast_shape(input1->tensor_shape(), input2->tensor_shape()); ARM_COMPUTE_RETURN_ERROR_ON_MSG(out_shape.total_size() == 0, "Inputs are not broadcast compatible"); + ARM_COMPUTE_RETURN_ERROR_ON(act_info.enabled() && !is_data_type_float(output->data_type())); // Validate in case of configured output if(output->total_size() > 0) @@ -364,10 +372,10 @@ CLComplexPixelWiseMultiplicationKernel::CLComplexPixelWiseMultiplicationKernel() { } -void CLComplexPixelWiseMultiplicationKernel::configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output) +void CLComplexPixelWiseMultiplicationKernel::configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, const ActivationLayerInfo &act_info) { ARM_COMPUTE_ERROR_ON_NULLPTR(input1, input2, output); - ARM_COMPUTE_ERROR_THROW_ON(validate_arguments_complex(input1->info(), input2->info(), output->info())); + ARM_COMPUTE_ERROR_THROW_ON(validate_arguments_complex(input1->info(), input2->info(), output->info(), act_info)); // Configure kernel window auto win_config = validate_and_configure_window_complex(input1->info(), input2->info(), output->info()); @@ -377,16 +385,24 @@ void CLComplexPixelWiseMultiplicationKernel::configure(const ICLTensor *input1, _input2 = input2; _output = output; + CLBuildOptions build_opts; + if(act_info.enabled()) + { + build_opts.add_option("-DACTIVATION_TYPE=" + lower_string(string_from_activation_func(act_info.activation()))); + build_opts.add_option("-DA_VAL=" + float_to_string_with_full_precision(act_info.a())); + build_opts.add_option("-DB_VAL=" + float_to_string_with_full_precision(act_info.b())); + } + // Create kernel - _kernel = static_cast(CLKernelLibrary::get().create_kernel("pixelwise_mul_complex")); + _kernel = static_cast(CLKernelLibrary::get().create_kernel("pixelwise_mul_complex", build_opts.options())); ICLKernel::configure_internal(win_config.second); } -Status CLComplexPixelWiseMultiplicationKernel::validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output) +Status CLComplexPixelWiseMultiplicationKernel::validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, const ActivationLayerInfo &act_info) { ARM_COMPUTE_ERROR_ON_NULLPTR(input1, input2, output); - ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments_complex(input1, input2, output)); + ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments_complex(input1, input2, output, act_info)); ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window_complex(input1->clone().get(), input2->clone().get(), output->clone().get()).first); return Status{}; -- cgit v1.2.1