From 0021d750d66d199c411df00cdd8308c325f1fef3 Mon Sep 17 00:00:00 2001 From: Diego Lopez Recas Date: Mon, 18 Dec 2017 14:42:56 +0000 Subject: IVGCVSW-863 Broadcast support in CL/NEON Arithmetic Add Also, added instrumentation to support generic tensor broadcasting for NEON and CL backends. Change-Id: I1bc5747a286e1a4b464c209067581e103d473b9a Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/114201 Reviewed-by: Anthony Barbier Tested-by: Jenkins --- src/runtime/CL/functions/CLArithmeticAddition.cpp | 15 +++++++++++++-- src/runtime/CL/functions/CLLaplacianReconstruct.cpp | 4 ++-- src/runtime/NEON/functions/NEArithmeticAddition.cpp | 15 +++++++++++++-- src/runtime/NEON/functions/NELaplacianReconstruct.cpp | 4 ++-- 4 files changed, 30 insertions(+), 8 deletions(-) (limited to 'src/runtime') diff --git a/src/runtime/CL/functions/CLArithmeticAddition.cpp b/src/runtime/CL/functions/CLArithmeticAddition.cpp index 5c2e582ba2..0b05058c4d 100644 --- a/src/runtime/CL/functions/CLArithmeticAddition.cpp +++ b/src/runtime/CL/functions/CLArithmeticAddition.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, 2017 ARM Limited. + * Copyright (c) 2016-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -23,6 +23,7 @@ */ #include "arm_compute/runtime/CL/functions/CLArithmeticAddition.h" +#include "arm_compute/core/CL/ICLTensor.h" #include "arm_compute/core/CL/kernels/CLArithmeticAdditionKernel.h" #include "support/ToolchainSupport.h" @@ -30,11 +31,21 @@ using namespace arm_compute; -void CLArithmeticAddition::configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, ConvertPolicy policy) +void CLArithmeticAddition::configure(ICLTensor *input1, ICLTensor *input2, ICLTensor *output, ConvertPolicy policy) { auto k = arm_compute::support::cpp14::make_unique(); k->configure(input1, input2, output, policy); _kernel = std::move(k); + + if(output->info()->dimension(0) > 1) + { + ICLTensor *broadcasted_info = (input1->info()->dimension(0) == 1) ? input1 : input2; + + if(broadcasted_info->info()->dimension(0) == 1) + { + _border_handler.configure(broadcasted_info, _kernel->border_size(), BorderMode::REPLICATE); + } + } } Status CLArithmeticAddition::validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, ConvertPolicy policy) diff --git a/src/runtime/CL/functions/CLLaplacianReconstruct.cpp b/src/runtime/CL/functions/CLLaplacianReconstruct.cpp index 678848b82e..911c9b3b27 100644 --- a/src/runtime/CL/functions/CLLaplacianReconstruct.cpp +++ b/src/runtime/CL/functions/CLLaplacianReconstruct.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 ARM Limited. + * Copyright (c) 2017-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -42,7 +42,7 @@ CLLaplacianReconstruct::CLLaplacianReconstruct() // NOLINT { } -void CLLaplacianReconstruct::configure(const CLPyramid *pyramid, const ICLTensor *input, ICLTensor *output, BorderMode border_mode, uint8_t constant_border_value) +void CLLaplacianReconstruct::configure(const CLPyramid *pyramid, ICLTensor *input, ICLTensor *output, BorderMode border_mode, uint8_t constant_border_value) { ARM_COMPUTE_ERROR_ON(nullptr == pyramid); ARM_COMPUTE_ERROR_ON(input == output); diff --git a/src/runtime/NEON/functions/NEArithmeticAddition.cpp b/src/runtime/NEON/functions/NEArithmeticAddition.cpp index b5dd4d0d06..7d8e3cff1c 100644 --- a/src/runtime/NEON/functions/NEArithmeticAddition.cpp +++ b/src/runtime/NEON/functions/NEArithmeticAddition.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 ARM Limited. + * Copyright (c) 2017-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -23,6 +23,7 @@ */ #include "arm_compute/runtime/NEON/functions/NEArithmeticAddition.h" +#include "arm_compute/core/ITensor.h" #include "arm_compute/core/NEON/kernels/NEArithmeticAdditionKernel.h" #include "support/ToolchainSupport.h" @@ -30,11 +31,21 @@ using namespace arm_compute; -void NEArithmeticAddition::configure(const ITensor *input1, const ITensor *input2, ITensor *output, ConvertPolicy policy) +void NEArithmeticAddition::configure(ITensor *input1, ITensor *input2, ITensor *output, ConvertPolicy policy) { auto k = arm_compute::support::cpp14::make_unique(); k->configure(input1, input2, output, policy); _kernel = std::move(k); + + if(output->info()->dimension(0) > 1) + { + ITensor *broadcasted_info = (input1->info()->dimension(0) == 1) ? input1 : input2; + + if(broadcasted_info->info()->dimension(0) == 1) + { + _border_handler.configure(broadcasted_info, _kernel->border_size(), BorderMode::REPLICATE); + } + } } Status NEArithmeticAddition::validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, ConvertPolicy policy) { diff --git a/src/runtime/NEON/functions/NELaplacianReconstruct.cpp b/src/runtime/NEON/functions/NELaplacianReconstruct.cpp index 0893701cd5..9ad9689b13 100644 --- a/src/runtime/NEON/functions/NELaplacianReconstruct.cpp +++ b/src/runtime/NEON/functions/NELaplacianReconstruct.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, 2017 ARM Limited. + * Copyright (c) 2016-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -42,7 +42,7 @@ NELaplacianReconstruct::NELaplacianReconstruct() // NOLINT { } -void NELaplacianReconstruct::configure(const IPyramid *pyramid, const ITensor *input, ITensor *output, BorderMode border_mode, uint8_t constant_border_value) +void NELaplacianReconstruct::configure(const IPyramid *pyramid, ITensor *input, ITensor *output, BorderMode border_mode, uint8_t constant_border_value) { ARM_COMPUTE_ERROR_ON(nullptr == pyramid); ARM_COMPUTE_ERROR_ON(input == output); -- cgit v1.2.1