From cbf39c63a6eb89a2c80b2338afc374081803d79d Mon Sep 17 00:00:00 2001
From: Georgios Pinitas <georgios.pinitas@arm.com>
Date: Mon, 10 Sep 2018 15:07:45 +0100
Subject: COMPMID-1566: Add broadcast to CLArithmeticSubtraction

Change-Id: I05d21f9a92013ecfd1128d12cf1561cfd6e5c5e9
Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/147983
Tested-by: bsgcomp <bsgcomp@arm.com>
Reviewed-by: Anthony Barbier <anthony.barbier@arm.com>
---
 arm_compute/core/CL/kernels/CLArithmeticAdditionKernel.h  |  2 +-
 .../core/CL/kernels/CLArithmeticSubtractionKernel.h       | 15 ++++++++-------
 .../core/NEON/kernels/NEArithmeticSubtractionKernel.h     |  1 +
 .../runtime/CL/functions/CLArithmeticSubtraction.h        |  6 +++++-
 .../runtime/NEON/functions/NEArithmeticSubtraction.h      | 12 ++++++++++--
 5 files changed, 25 insertions(+), 11 deletions(-)

(limited to 'arm_compute')

diff --git a/arm_compute/core/CL/kernels/CLArithmeticAdditionKernel.h b/arm_compute/core/CL/kernels/CLArithmeticAdditionKernel.h
index f4275f4153..48e72f3c13 100644
--- a/arm_compute/core/CL/kernels/CLArithmeticAdditionKernel.h
+++ b/arm_compute/core/CL/kernels/CLArithmeticAdditionKernel.h
@@ -51,7 +51,7 @@ public:
     CLArithmeticAdditionKernel &operator=(CLArithmeticAdditionKernel &&) = default;
     /** Default destructor */
     ~CLArithmeticAdditionKernel() = default;
-    /** Initialise the kernel's inputs, output and convertion policy.
+    /** Initialise the kernel's inputs, output and conversion policy.
      *
      * @param[in]  input1 First tensor input. Data types supported: U8/QASYMM8/S16/F16/F32.
      * @param[in]  input2 Second tensor input. Data types supported: U8, QASYMM8 (only if @p input1 is QASYMM8), S16/F16/F32.
diff --git a/arm_compute/core/CL/kernels/CLArithmeticSubtractionKernel.h b/arm_compute/core/CL/kernels/CLArithmeticSubtractionKernel.h
index 35b918fe4b..9875ac7a31 100644
--- a/arm_compute/core/CL/kernels/CLArithmeticSubtractionKernel.h
+++ b/arm_compute/core/CL/kernels/CLArithmeticSubtractionKernel.h
@@ -53,19 +53,19 @@ public:
     /** Default destructor */
     ~CLArithmeticSubtractionKernel() = default;
 
-    /** Initialise the kernel's inputs, output and convertion policy.
+    /** Initialise the kernel's inputs, output and conversion policy.
      *
-     * @param[in]  input1 First tensor input. Data types supported: U8/S16/F16/F32.
-     * @param[in]  input2 Second tensor input. Data types supported: U8/S16/F16/F32.
-     * @param[out] output Output tensor. Data types supported: U8 (Only if both inputs are U8), S16/F16/F32.
+     * @param[in]  input1 First tensor input. Data types supported: U8/QASYMM8/S16/F16/F32.
+     * @param[in]  input2 Second tensor input. Data types supported: U8/QASYMM8/S16/F16/F32.
+     * @param[out] output Output tensor. Data types supported: U8 (Only if both inputs are U8), QASYMM8/S16/F16/F32.
      * @param[in]  policy Policy to use to handle overflow.
      */
     void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, ConvertPolicy policy);
     /** Static function to check if given info will lead to a valid configuration of @ref CLArithmeticSubtractionKernel
      *
-     * @param[in] input1 First tensor input info. Data types supported: U8/S16/F16/F32.
-     * @param[in] input2 Second tensor input info. Data types supported: U8/S16/F16/F32.
-     * @param[in] output Output tensor info. Data types supported: U8 (Only if both inputs are U8), S16/F16/F32.
+     * @param[in] input1 First tensor input info. Data types supported: U8/QASYMM8/S16/F16/F32.
+     * @param[in] input2 Second tensor input info. Data types supported: U8/QASYMM8/S16/F16/F32.
+     * @param[in] output Output tensor info. Data types supported: U8 (Only if both inputs are U8), QASYMM8/S16/F16/F32.
      * @param[in] policy Policy to use to handle overflow.
      *
      * @return a status
@@ -74,6 +74,7 @@ public:
 
     // Inherited methods overridden:
     void run(const Window &window, cl::CommandQueue &queue) override;
+    BorderSize border_size() const override;
 
 private:
     const ICLTensor *_input1; /**< Source tensor 1 */
diff --git a/arm_compute/core/NEON/kernels/NEArithmeticSubtractionKernel.h b/arm_compute/core/NEON/kernels/NEArithmeticSubtractionKernel.h
index 3e93922b65..64ad6e072d 100644
--- a/arm_compute/core/NEON/kernels/NEArithmeticSubtractionKernel.h
+++ b/arm_compute/core/NEON/kernels/NEArithmeticSubtractionKernel.h
@@ -83,6 +83,7 @@ public:
 
     // Inherited methods overridden:
     void run(const Window &window, const ThreadInfo &info) override;
+    BorderSize border_size() const override;
 
 private:
     /** Common signature for all the specialised sub functions
diff --git a/arm_compute/runtime/CL/functions/CLArithmeticSubtraction.h b/arm_compute/runtime/CL/functions/CLArithmeticSubtraction.h
index b9690806d7..2940044ed9 100644
--- a/arm_compute/runtime/CL/functions/CLArithmeticSubtraction.h
+++ b/arm_compute/runtime/CL/functions/CLArithmeticSubtraction.h
@@ -36,6 +36,10 @@ class ICLTensor;
  *
  * @note The tensor data type for the inputs must be U8/S16/F16/F32.
  * @note The function performs an arithmetic subtraction between two tensors.
+ *
+ *  This function calls the following kernels:
+ * -# @ref CLFillBorderKernel (In case of broadcasting, in the input being broadcasted)
+ * -# @ref CLArithmeticSubtractionKernel
  */
 class CLArithmeticSubtraction : public ICLSimpleFunction
 {
@@ -47,7 +51,7 @@ public:
      * @param[out] output Output tensor. Data types supported: U8 (Only if both inputs are U8), S16/F16/F32.
      * @param[in]  policy Policy to use to handle overflow.
      */
-    void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, ConvertPolicy policy);
+    void configure(ICLTensor *input1, ICLTensor *input2, ICLTensor *output, ConvertPolicy policy);
     /** Static function to check if given info will lead to a valid configuration of @ref CLArithmeticSubtraction
      *
      * @param[in] input1 First tensor input info. Data types supported: U8/S16/F16/F32.
diff --git a/arm_compute/runtime/NEON/functions/NEArithmeticSubtraction.h b/arm_compute/runtime/NEON/functions/NEArithmeticSubtraction.h
index 9b460c1031..541756cd2c 100644
--- a/arm_compute/runtime/NEON/functions/NEArithmeticSubtraction.h
+++ b/arm_compute/runtime/NEON/functions/NEArithmeticSubtraction.h
@@ -31,7 +31,15 @@ namespace arm_compute
 {
 class ITensor;
 
-/** Basic function to run @ref NEArithmeticSubtractionKernel */
+/** Basic function to run @ref NEArithmeticSubtractionKernel
+ *
+ * @note The tensor data type for the inputs must be U8/S16/F16/F32.
+ * @note The function performs an arithmetic subtraction between two tensors.
+ *
+ *  This function calls the following kernels:
+ * -# @ref NEFillBorderKernel (In case of broadcasting, in the input being broadcasted)
+ * -# @ref NEArithmeticSubtractionKernel
+ */
 class NEArithmeticSubtraction : public INESimpleFunction
 {
 public:
@@ -42,7 +50,7 @@ public:
      * @param[out] output Output tensor. Data types supported: U8/S16/F16/F32
      * @param[in]  policy Policy to use to handle overflow.
      */
-    void configure(const ITensor *input1, const ITensor *input2, ITensor *output, ConvertPolicy policy);
+    void configure(ITensor *input1, ITensor *input2, ITensor *output, ConvertPolicy policy);
     /** Static function to check if given info will lead to a valid configuration of @ref NEArithmeticSubtraction
      *
      * @param[in] input1 First tensor input. Data types supported: U8/S16/F16/F32
-- 
cgit v1.2.1