diff options
author | Diego Lopez Recas <Diego.LopezRecas@arm.com> | 2017-12-18 14:42:56 +0000 |
---|---|---|
committer | Anthony Barbier <anthony.barbier@arm.com> | 2018-11-02 16:45:00 +0000 |
commit | 0021d750d66d199c411df00cdd8308c325f1fef3 (patch) | |
tree | b96e618977442a8aab335c136d369a958998d416 /src/runtime/CL/functions | |
parent | 5b6904b8d9cb5e8a343cde96fd5a8701f44dff90 (diff) | |
download | ComputeLibrary-0021d750d66d199c411df00cdd8308c325f1fef3.tar.gz |
IVGCVSW-863 Broadcast support in CL/NEON Arithmetic Add
Also, added instrumentation to support generic tensor broadcasting for
NEON and CL backends.
Change-Id: I1bc5747a286e1a4b464c209067581e103d473b9a
Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/114201
Reviewed-by: Anthony Barbier <anthony.barbier@arm.com>
Tested-by: Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/runtime/CL/functions')
-rw-r--r-- | src/runtime/CL/functions/CLArithmeticAddition.cpp | 15 | ||||
-rw-r--r-- | src/runtime/CL/functions/CLLaplacianReconstruct.cpp | 4 |
2 files changed, 15 insertions, 4 deletions
diff --git a/src/runtime/CL/functions/CLArithmeticAddition.cpp b/src/runtime/CL/functions/CLArithmeticAddition.cpp index 5c2e582ba2..0b05058c4d 100644 --- a/src/runtime/CL/functions/CLArithmeticAddition.cpp +++ b/src/runtime/CL/functions/CLArithmeticAddition.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, 2017 ARM Limited. + * Copyright (c) 2016-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -23,6 +23,7 @@ */ #include "arm_compute/runtime/CL/functions/CLArithmeticAddition.h" +#include "arm_compute/core/CL/ICLTensor.h" #include "arm_compute/core/CL/kernels/CLArithmeticAdditionKernel.h" #include "support/ToolchainSupport.h" @@ -30,11 +31,21 @@ using namespace arm_compute; -void CLArithmeticAddition::configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, ConvertPolicy policy) +void CLArithmeticAddition::configure(ICLTensor *input1, ICLTensor *input2, ICLTensor *output, ConvertPolicy policy) { auto k = arm_compute::support::cpp14::make_unique<CLArithmeticAdditionKernel>(); k->configure(input1, input2, output, policy); _kernel = std::move(k); + + if(output->info()->dimension(0) > 1) + { + ICLTensor *broadcasted_info = (input1->info()->dimension(0) == 1) ? input1 : input2; + + if(broadcasted_info->info()->dimension(0) == 1) + { + _border_handler.configure(broadcasted_info, _kernel->border_size(), BorderMode::REPLICATE); + } + } } Status CLArithmeticAddition::validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, ConvertPolicy policy) diff --git a/src/runtime/CL/functions/CLLaplacianReconstruct.cpp b/src/runtime/CL/functions/CLLaplacianReconstruct.cpp index 678848b82e..911c9b3b27 100644 --- a/src/runtime/CL/functions/CLLaplacianReconstruct.cpp +++ b/src/runtime/CL/functions/CLLaplacianReconstruct.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 ARM Limited. + * Copyright (c) 2017-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -42,7 +42,7 @@ CLLaplacianReconstruct::CLLaplacianReconstruct() // NOLINT { } -void CLLaplacianReconstruct::configure(const CLPyramid *pyramid, const ICLTensor *input, ICLTensor *output, BorderMode border_mode, uint8_t constant_border_value) +void CLLaplacianReconstruct::configure(const CLPyramid *pyramid, ICLTensor *input, ICLTensor *output, BorderMode border_mode, uint8_t constant_border_value) { ARM_COMPUTE_ERROR_ON(nullptr == pyramid); ARM_COMPUTE_ERROR_ON(input == output); |