aboutsummaryrefslogtreecommitdiff
path: root/arm_compute/core/NEON/kernels
diff options
context:
space:
mode:
authorMichele Di Giorgio <michele.digiorgio@arm.com>2017-07-11 15:00:52 +0100
committerAnthony Barbier <anthony.barbier@arm.com>2018-09-17 14:16:42 +0100
commit81f0d15d6840a0ae8ef571114555a26da74c4a43 (patch)
treea9eeda0a2b69961cd6a51d74e039bbed26a9b436 /arm_compute/core/NEON/kernels
parentf70256bd46f03090281581c152bd17b4a50febcd (diff)
downloadComputeLibrary-81f0d15d6840a0ae8ef571114555a26da74c4a43.tar.gz
COMPMID-444: Add support for QS8/QS16 NEON Arithmetic Add/Sub/Mul.
Change-Id: Ia482498688ca1884272b5062e3415e736e03d36f Reviewed-on: http://mpd-gerrit.cambridge.arm.com/80448 Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com> Tested-by: Kaizen <jeremy.johnson+kaizengerrit@arm.com>
Diffstat (limited to 'arm_compute/core/NEON/kernels')
-rw-r--r--arm_compute/core/NEON/kernels/NEArithmeticAdditionKernel.h12
-rw-r--r--arm_compute/core/NEON/kernels/NEArithmeticSubtractionKernel.h12
-rw-r--r--arm_compute/core/NEON/kernels/NEPixelWiseMultiplicationKernel.h7
3 files changed, 16 insertions, 15 deletions
diff --git a/arm_compute/core/NEON/kernels/NEArithmeticAdditionKernel.h b/arm_compute/core/NEON/kernels/NEArithmeticAdditionKernel.h
index 9bfdde1616..7ad5893b70 100644
--- a/arm_compute/core/NEON/kernels/NEArithmeticAdditionKernel.h
+++ b/arm_compute/core/NEON/kernels/NEArithmeticAdditionKernel.h
@@ -50,9 +50,9 @@ public:
/** Initialise the kernel's input, output and border mode.
*
- * @param[in] input1 An input tensor. Data types supported: U8/S16/F16/F32
- * @param[in] input2 An input tensor. Data types supported: U8/S16/F16 (only if @p input1 is F16)/F32 (only if @p input1 is F32).
- * @param[out] output The output tensor. Data types supported: U8 (Only if both inputs are U8), S16/F16 (only if both inputs are F16), F32 (only if both inputs are F32).
+ * @param[in] input1 An input tensor. Data types supported: U8/QS8/QS16/S16/F16/F32
+ * @param[in] input2 An input tensor. Data types supported: U8, QS8 (only if @p input1 is QS8), QS16 (only if @p input1 is QS16), S16/F16 (only if @p input1 is F16), F32 (only if @p input1 is F32).
+ * @param[out] output The output tensor. Data types supported: U8 (Only if both inputs are U8), QS8 (only if both inputs are QS8), QS16 (only if both inputs are QS16), S16/F16 (only if @p input1 is F16), F32 (only if both inputs are F32).
* @param[in] policy Overflow policy.
*/
void configure(const ITensor *input1, const ITensor *input2, ITensor *output, ConvertPolicy policy);
@@ -63,9 +63,9 @@ public:
private:
/** Common signature for all the specialised add functions
*
- * @param[in] input1 An input tensor. Data types supported: U8/S16/F16/F32
- * @param[in] input2 An input tensor. Data types supported: U8/S16/F16 (only if @p input1 is F16)/F32 (only if @p input1 is F32).
- * @param[out] output The output tensor. Data types supported: U8 (Only if both inputs are U8), S16/F16 (only if both inputs are F16), F32 (only if both inputs are F32).
+ * @param[in] input1 An input tensor. Data types supported: U8/QS8/QS16/S16/F16/F32
+ * @param[in] input2 An input tensor. Data types supported: U8, QS8 (only if @p input1 is QS8), QS16 (only if @p input1 is QS16), S16/F16 (only if @p input1 is F16), F32 (only if @p input1 is F32).
+ * @param[out] output The output tensor. Data types supported: U8 (Only if both inputs are U8), QS8 (only if both inputs are QS8), QS16 (only if both inputs are QS16), S16/F16 (only if @p input1 is F16), F32 (only if both inputs are F32).
* @param[in] window Region on which to execute the kernel.
*/
using AddFunction = void(const ITensor *input1, const ITensor *input2, ITensor *output, const Window &window);
diff --git a/arm_compute/core/NEON/kernels/NEArithmeticSubtractionKernel.h b/arm_compute/core/NEON/kernels/NEArithmeticSubtractionKernel.h
index 0eb9c23686..6f88d2757a 100644
--- a/arm_compute/core/NEON/kernels/NEArithmeticSubtractionKernel.h
+++ b/arm_compute/core/NEON/kernels/NEArithmeticSubtractionKernel.h
@@ -50,9 +50,9 @@ public:
/** Initialise the kernel's input, output and border mode.
*
- * @param[in] input1 An input tensor. Data types supported: U8/S16/F32
- * @param[in] input2 An input tensor. Data types supported: U8/S16/F32 (only if @p input1 is F32).
- * @param[out] output The output tensor. Data types supported: U8 (Only if both inputs are U8), S16/F32 (only if both inputs are F32).
+ * @param[in] input1 An input tensor. Data types supported: U8/QS8/QS16/S16/F32
+ * @param[in] input2 An input tensor. Data types supported: U8, QS8 (only if @p input1 is QS8),QS16 (only if @p input1 is QS16), S16/F32 (only if @p input1 is F32).
+ * @param[out] output The output tensor. Data types supported: U8 (Only if both inputs are U8), QS8 (only if both inputs are QS8), QS16 (only if both inputs are QS16), S16/F32 (only if both inputs are F32).
* @param[in] policy Overflow policy.
*/
void configure(const ITensor *input1, const ITensor *input2, ITensor *output, ConvertPolicy policy);
@@ -63,9 +63,9 @@ public:
private:
/** Common signature for all the specialised sub functions
*
- * @param[in] input1 An input tensor. Data types supported: U8, S16, F32.
- * @param[in] input2 An input tensor. Data types supported: U8, S16, F32 (only if @p input1 is F32).
- * @param[out] output The output tensor. Data types supported: U8 (Only if both inputs are U8), S16, F32 (only if both inputs are F32)
+ * @param[in] input1 An input tensor. Data types supported: U8/S16/F32
+ * @param[in] input2 An input tensor. Data types supported: U8/S16/F32 (only if @p input1 is F32).
+ * @param[out] output The output tensor. Data types supported: U8 (Only if both inputs are U8), S16/F32 (only if both inputs are F32).
* @param[in] window Region on which to execute the kernel.
*/
using SubFunction = void(const ITensor *input1, const ITensor *input2, ITensor *output, const Window &window);
diff --git a/arm_compute/core/NEON/kernels/NEPixelWiseMultiplicationKernel.h b/arm_compute/core/NEON/kernels/NEPixelWiseMultiplicationKernel.h
index 433a20e48e..bf96c9026c 100644
--- a/arm_compute/core/NEON/kernels/NEPixelWiseMultiplicationKernel.h
+++ b/arm_compute/core/NEON/kernels/NEPixelWiseMultiplicationKernel.h
@@ -51,10 +51,11 @@ public:
*
* @note For @p scale equal to 1/255 only round to nearest even (implemented as round half up) is supported.
* For all other scale values only round to zero (implemented as round towards minus infinity) is supported.
+ * For QS8/QS16 scale = 1 is the only supported value.
*
- * @param[in] input1 An input tensor. Data types supported: U8/QS8/S16/F16/F32.
- * @param[in] input2 An input tensor. Data types supported: U8/QS8/S16/F16/F32.
- * @param[out] output The output tensor. Data types supported: U8 (Only if both inputs are U8) /S16/F16/F32.
+ * @param[in] input1 An input tensor. Data types supported: U8/QS8/QS16/S16/F16/F32
+ * @param[in] input2 An input tensor. Data types supported: U8, QS8 (only if @p input1 is QS8), QS16 (only if @p input1 is QS16), S16/F16 (only if @p input1 is F16), F32 (only if @p input1 is F32).
+ * @param[out] output The output tensor. Data types supported: U8 (Only if both inputs are U8), QS8 (only if both inputs are QS8), QS16 (only if both inputs are QS16), S16/F16 (only if @p input1 is F16), F32 (only if both inputs are F32).
* @param[in] scale Scale to apply after multiplication.
* Scale must be positive and its value must be either 1/255 or 1/2^n where n is between 0 and 15.
* @param[in] overflow_policy Overflow policy.