aboutsummaryrefslogtreecommitdiff
path: root/src/core/CL/kernels/CLActivationLayerKernel.cpp
diff options
context:
space:
mode:
authorMichele Di Giorgio <michele.digiorgio@arm.com>2018-02-13 15:24:04 +0000
committerAnthony Barbier <anthony.barbier@arm.com>2018-11-02 16:47:18 +0000
commitdde9ec96f471127e5b6d8dfaeffce024b6326f1a (patch)
tree3aa88c0dec625feeb9d17da825b87398cac6cc68 /src/core/CL/kernels/CLActivationLayerKernel.cpp
parente3fba0afa892c66379da1e3d3843f2155a1fb29a (diff)
downloadComputeLibrary-dde9ec96f471127e5b6d8dfaeffce024b6326f1a.tar.gz
COMPMID-909: Enabling in-place computation for batchnormalization and activation at graph level
Change-Id: I84d4a212629b21794451ab5fb5c5b187b5e28f98 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/120127 Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com> Tested-by: Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/core/CL/kernels/CLActivationLayerKernel.cpp')
-rw-r--r--src/core/CL/kernels/CLActivationLayerKernel.cpp21
1 files changed, 12 insertions, 9 deletions
diff --git a/src/core/CL/kernels/CLActivationLayerKernel.cpp b/src/core/CL/kernels/CLActivationLayerKernel.cpp
index 8d4c0b82d2..a78b3e1b93 100644
--- a/src/core/CL/kernels/CLActivationLayerKernel.cpp
+++ b/src/core/CL/kernels/CLActivationLayerKernel.cpp
@@ -96,7 +96,7 @@ std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITen
} // namespace
CLActivationLayerKernel::CLActivationLayerKernel()
- : _input(nullptr), _output(nullptr)
+ : _input(nullptr), _output(nullptr), _run_in_place(false)
{
}
@@ -104,6 +104,8 @@ void CLActivationLayerKernel::configure(ICLTensor *input, ICLTensor *output, Act
{
ARM_COMPUTE_ERROR_ON_NULLPTR(input);
+ _run_in_place = (output == nullptr) || (output == input);
+
if(output != nullptr)
{
// Output auto inizialitation if not yet initialized
@@ -147,12 +149,15 @@ void CLActivationLayerKernel::configure(ICLTensor *input, ICLTensor *output, Act
build_opts.emplace(("-DA_VAL=" + support::cpp11::to_string(a_const_int)));
build_opts.emplace(("-DB_VAL=" + support::cpp11::to_string(b_const_int)));
+ const int o1 = input->info()->quantization_info().offset;
+ // Quantized value of 0 corresponds to the offset o1
+ build_opts.emplace(("-DCONST_0=" + support::cpp11::to_string(o1)));
+
// Set scale and offset of the input and output if they have different quantization info
if(is_data_type_quantized_asymmetric(dt) && output != nullptr)
{
const float s1 = input->info()->quantization_info().scale;
const float s2 = output->info()->quantization_info().scale;
- const int o1 = input->info()->quantization_info().offset;
const int o2 = output->info()->quantization_info().offset;
if(o1 != o2 || s1 != s2)
@@ -162,9 +167,6 @@ void CLActivationLayerKernel::configure(ICLTensor *input, ICLTensor *output, Act
build_opts.emplace(("-DO1_VAL=" + support::cpp11::to_string(o1)));
build_opts.emplace(("-DO2_VAL=" + support::cpp11::to_string(o2)));
}
-
- // Quantized value of 0 corresponds to the offset o1
- build_opts.emplace(("-DCONST_0=" + support::cpp11::to_string(o1)));
}
}
else
@@ -173,7 +175,7 @@ void CLActivationLayerKernel::configure(ICLTensor *input, ICLTensor *output, Act
build_opts.emplace(("-DB_VAL=" + float_to_string_with_full_precision(b_const)));
}
- build_opts.emplace(output == nullptr ? "-DIN_PLACE" : "");
+ build_opts.emplace((_run_in_place) ? "-DIN_PLACE" : "");
if(is_data_type_fixed_point(dt))
{
build_opts.emplace(("-DFIXED_POINT_POSITION=" + support::cpp11::to_string(fixed_point_position)));
@@ -188,7 +190,7 @@ void CLActivationLayerKernel::configure(ICLTensor *input, ICLTensor *output, Act
_output = output;
// Configure kernel window
- auto win_config = validate_and_configure_window(input->info(), (output == nullptr) ? nullptr : output->info());
+ auto win_config = validate_and_configure_window(input->info(), (_run_in_place) ? nullptr : output->info());
ARM_COMPUTE_ERROR_THROW_ON(win_config.first);
ICLKernel::configure(win_config.second);
@@ -203,8 +205,9 @@ void CLActivationLayerKernel::configure(ICLTensor *input, ICLTensor *output, Act
Status CLActivationLayerKernel::validate(const ITensorInfo *input, const ITensorInfo *output, const ActivationLayerInfo &act_info)
{
+ const bool run_in_place = (output == nullptr) || (output == input);
ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output, act_info));
- ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window(input->clone().get(), (output == nullptr) ? nullptr : output->clone().get()).first);
+ ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window(input->clone().get(), (run_in_place) ? nullptr : output->clone().get()).first);
return Status{};
}
@@ -221,7 +224,7 @@ void CLActivationLayerKernel::run(const Window &window, cl::CommandQueue &queue)
{
unsigned int idx = 0;
add_3D_tensor_argument(idx, _input, slice);
- if(_output != nullptr)
+ if(!_run_in_place)
{
add_3D_tensor_argument(idx, _output, slice);
}