From 0021d750d66d199c411df00cdd8308c325f1fef3 Mon Sep 17 00:00:00 2001 From: Diego Lopez Recas Date: Mon, 18 Dec 2017 14:42:56 +0000 Subject: IVGCVSW-863 Broadcast support in CL/NEON Arithmetic Add Also, added instrumentation to support generic tensor broadcasting for NEON and CL backends. Change-Id: I1bc5747a286e1a4b464c209067581e103d473b9a Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/114201 Reviewed-by: Anthony Barbier Tested-by: Jenkins --- src/runtime/NEON/functions/NEArithmeticAddition.cpp | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) (limited to 'src/runtime/NEON/functions/NEArithmeticAddition.cpp') diff --git a/src/runtime/NEON/functions/NEArithmeticAddition.cpp b/src/runtime/NEON/functions/NEArithmeticAddition.cpp index b5dd4d0d06..7d8e3cff1c 100644 --- a/src/runtime/NEON/functions/NEArithmeticAddition.cpp +++ b/src/runtime/NEON/functions/NEArithmeticAddition.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 ARM Limited. + * Copyright (c) 2017-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -23,6 +23,7 @@ */ #include "arm_compute/runtime/NEON/functions/NEArithmeticAddition.h" +#include "arm_compute/core/ITensor.h" #include "arm_compute/core/NEON/kernels/NEArithmeticAdditionKernel.h" #include "support/ToolchainSupport.h" @@ -30,11 +31,21 @@ using namespace arm_compute; -void NEArithmeticAddition::configure(const ITensor *input1, const ITensor *input2, ITensor *output, ConvertPolicy policy) +void NEArithmeticAddition::configure(ITensor *input1, ITensor *input2, ITensor *output, ConvertPolicy policy) { auto k = arm_compute::support::cpp14::make_unique(); k->configure(input1, input2, output, policy); _kernel = std::move(k); + + if(output->info()->dimension(0) > 1) + { + ITensor *broadcasted_info = (input1->info()->dimension(0) == 1) ? input1 : input2; + + if(broadcasted_info->info()->dimension(0) == 1) + { + _border_handler.configure(broadcasted_info, _kernel->border_size(), BorderMode::REPLICATE); + } + } } Status NEArithmeticAddition::validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, ConvertPolicy policy) { -- cgit v1.2.1