From dde9ec96f471127e5b6d8dfaeffce024b6326f1a Mon Sep 17 00:00:00 2001 From: Michele Di Giorgio Date: Tue, 13 Feb 2018 15:24:04 +0000 Subject: COMPMID-909: Enabling in-place computation for batchnormalization and activation at graph level Change-Id: I84d4a212629b21794451ab5fb5c5b187b5e28f98 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/120127 Reviewed-by: Georgios Pinitas Tested-by: Jenkins --- src/core/CL/kernels/CLActivationLayerKernel.cpp | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) (limited to 'src/core/CL/kernels/CLActivationLayerKernel.cpp') diff --git a/src/core/CL/kernels/CLActivationLayerKernel.cpp b/src/core/CL/kernels/CLActivationLayerKernel.cpp index 8d4c0b82d2..a78b3e1b93 100644 --- a/src/core/CL/kernels/CLActivationLayerKernel.cpp +++ b/src/core/CL/kernels/CLActivationLayerKernel.cpp @@ -96,7 +96,7 @@ std::pair validate_and_configure_window(ITensorInfo *input, ITen } // namespace CLActivationLayerKernel::CLActivationLayerKernel() - : _input(nullptr), _output(nullptr) + : _input(nullptr), _output(nullptr), _run_in_place(false) { } @@ -104,6 +104,8 @@ void CLActivationLayerKernel::configure(ICLTensor *input, ICLTensor *output, Act { ARM_COMPUTE_ERROR_ON_NULLPTR(input); + _run_in_place = (output == nullptr) || (output == input); + if(output != nullptr) { // Output auto inizialitation if not yet initialized @@ -147,12 +149,15 @@ void CLActivationLayerKernel::configure(ICLTensor *input, ICLTensor *output, Act build_opts.emplace(("-DA_VAL=" + support::cpp11::to_string(a_const_int))); build_opts.emplace(("-DB_VAL=" + support::cpp11::to_string(b_const_int))); + const int o1 = input->info()->quantization_info().offset; + // Quantized value of 0 corresponds to the offset o1 + build_opts.emplace(("-DCONST_0=" + support::cpp11::to_string(o1))); + // Set scale and offset of the input and output if they have different quantization info if(is_data_type_quantized_asymmetric(dt) && output != nullptr) { const float s1 = input->info()->quantization_info().scale; const float s2 = output->info()->quantization_info().scale; - const int o1 = input->info()->quantization_info().offset; const int o2 = output->info()->quantization_info().offset; if(o1 != o2 || s1 != s2) @@ -162,9 +167,6 @@ void CLActivationLayerKernel::configure(ICLTensor *input, ICLTensor *output, Act build_opts.emplace(("-DO1_VAL=" + support::cpp11::to_string(o1))); build_opts.emplace(("-DO2_VAL=" + support::cpp11::to_string(o2))); } - - // Quantized value of 0 corresponds to the offset o1 - build_opts.emplace(("-DCONST_0=" + support::cpp11::to_string(o1))); } } else @@ -173,7 +175,7 @@ void CLActivationLayerKernel::configure(ICLTensor *input, ICLTensor *output, Act build_opts.emplace(("-DB_VAL=" + float_to_string_with_full_precision(b_const))); } - build_opts.emplace(output == nullptr ? "-DIN_PLACE" : ""); + build_opts.emplace((_run_in_place) ? "-DIN_PLACE" : ""); if(is_data_type_fixed_point(dt)) { build_opts.emplace(("-DFIXED_POINT_POSITION=" + support::cpp11::to_string(fixed_point_position))); @@ -188,7 +190,7 @@ void CLActivationLayerKernel::configure(ICLTensor *input, ICLTensor *output, Act _output = output; // Configure kernel window - auto win_config = validate_and_configure_window(input->info(), (output == nullptr) ? nullptr : output->info()); + auto win_config = validate_and_configure_window(input->info(), (_run_in_place) ? nullptr : output->info()); ARM_COMPUTE_ERROR_THROW_ON(win_config.first); ICLKernel::configure(win_config.second); @@ -203,8 +205,9 @@ void CLActivationLayerKernel::configure(ICLTensor *input, ICLTensor *output, Act Status CLActivationLayerKernel::validate(const ITensorInfo *input, const ITensorInfo *output, const ActivationLayerInfo &act_info) { + const bool run_in_place = (output == nullptr) || (output == input); ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output, act_info)); - ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window(input->clone().get(), (output == nullptr) ? nullptr : output->clone().get()).first); + ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window(input->clone().get(), (run_in_place) ? nullptr : output->clone().get()).first); return Status{}; } @@ -221,7 +224,7 @@ void CLActivationLayerKernel::run(const Window &window, cl::CommandQueue &queue) { unsigned int idx = 0; add_3D_tensor_argument(idx, _input, slice); - if(_output != nullptr) + if(!_run_in_place) { add_3D_tensor_argument(idx, _output, slice); } -- cgit v1.2.1