aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorManuel Bottini <manuel.bottini@arm.com>2019-06-26 15:17:09 +0100
committerManuel Bottini <manuel.bottini@arm.com>2019-07-03 12:46:08 +0000
commit7bb56c6337997281df10fa28ad7924c921b920eb (patch)
treeaf1ee9244c7c0f9265bb6d075816b18fac2f66df
parent6b9f388f719dc9ff1181c9a43a41140f19e15ec8 (diff)
downloadComputeLibrary-7bb56c6337997281df10fa28ad7924c921b920eb.tar.gz
COMPMID-2409: Add QSYMM16 support for PixelWiseMultiplication for NEON
Change-Id: Idfd3b45857201d5143242f9517d3353150b2c923 Signed-off-by: Manuel Bottini <manuel.bottini@arm.com> Reviewed-on: https://review.mlplatform.org/c/1422 Reviewed-by: Pablo Marquez <pablo.tello@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com>
-rw-r--r--arm_compute/core/NEON/NEAsymm.h23
-rw-r--r--arm_compute/core/NEON/NEAsymm.inl15
-rw-r--r--arm_compute/core/NEON/NEMath.h22
-rw-r--r--arm_compute/core/NEON/NEMath.inl28
-rw-r--r--arm_compute/core/NEON/NESymm.h68
-rw-r--r--arm_compute/core/NEON/kernels/NEPixelWiseMultiplicationKernel.h26
-rw-r--r--arm_compute/core/QuantizationInfo.h1
-rw-r--r--arm_compute/runtime/NEON/functions/NEPixelWiseMultiplication.h16
-rw-r--r--src/core/NEON/kernels/NEPixelWiseMultiplicationKernel.cpp76
-rw-r--r--tests/validation/NEON/PixelWiseMultiplication.cpp175
10 files changed, 340 insertions, 110 deletions
diff --git a/arm_compute/core/NEON/NEAsymm.h b/arm_compute/core/NEON/NEAsymm.h
index 4c8f797360..981c7b075c 100644
--- a/arm_compute/core/NEON/NEAsymm.h
+++ b/arm_compute/core/NEON/NEAsymm.h
@@ -24,6 +24,7 @@
#ifndef __ARM_COMPUTE_NEASYMM_H__
#define __ARM_COMPUTE_NEASYMM_H__
+#include "arm_compute/core/NEON/NEMath.h"
#include <arm_neon.h>
namespace arm_compute
@@ -34,28 +35,6 @@ using qasymm8x8x3_t = uint8x8x3_t; /**< 8 bit quantized asymmetric vector with 2
using qasymm8x8x4_t = uint8x8x4_t; /**< 8 bit quantized asymmetric vector with 32 elements */
using qasymm8x16_t = uint8x16_t; /**< 8 bit quantized asymmetric vector with 16 elements */
-/** Round to the nearest division by a power-of-two using exponent
- *
- * @note This function calculates the following expression: (x + 2^n -1 ) / 2^n where n = exponent
- *
- * @param[in] x Vector of 4 elements
- * @param[in] exponent Integer value used to round to nearest division by a power-of-two
- *
- * @return the nearest division by a power-of-two using exponent
- */
-int32x4_t rounding_divide_by_pow2(int32x4_t x, int exponent);
-
-/** Round to the nearest division by a power-of-two using exponent
- *
- * @note This function calculates the following expression: (x + 2^n -1 ) / 2^n where n = exponent
- *
- * @param[in] x Element to divide.
- * @param[in] exponent Integer value used to round to nearest division by a power-of-two
- *
- * @return the nearest division by a power-of-two using exponent
- */
-int32_t rounding_divide_by_pow2(int32_t x, int exponent);
-
/** Perform a multiply-accumulate on all 16 components of a QASYMM8 vector
*
* vd*vs + vo
diff --git a/arm_compute/core/NEON/NEAsymm.inl b/arm_compute/core/NEON/NEAsymm.inl
index 209785d94e..a98c6aa390 100644
--- a/arm_compute/core/NEON/NEAsymm.inl
+++ b/arm_compute/core/NEON/NEAsymm.inl
@@ -23,21 +23,6 @@
*/
namespace arm_compute
{
-inline int32x4_t rounding_divide_by_pow2(int32x4_t x, int exponent)
-{
- const int32x4_t shift_vec = vdupq_n_s32(-exponent);
- const int32x4_t fixup = vshrq_n_s32(vandq_s32(x, shift_vec), 31);
- const int32x4_t fixed_up_x = vqaddq_s32(x, fixup);
- return vrshlq_s32(fixed_up_x, shift_vec);
-}
-
-inline int32_t rounding_divide_by_pow2(int32_t x, int exponent)
-{
- const int32_t mask = (1 << exponent) - 1;
- const int32_t threshold = (mask >> 1) + (x < 0 ? 1 : 0);
- return (x >> exponent) + ((x & mask) > threshold ? 1 : 0);
-}
-
inline qasymm8x16_t vmlaq_qasymm8(qasymm8x16_t vd, float32x4_t vs, float32x4_t vo)
{
// Convert uint8 vectors to uint16 vectors
diff --git a/arm_compute/core/NEON/NEMath.h b/arm_compute/core/NEON/NEMath.h
index 46d97f6a0d..59a03c9d11 100644
--- a/arm_compute/core/NEON/NEMath.h
+++ b/arm_compute/core/NEON/NEMath.h
@@ -124,6 +124,28 @@ float32x4_t vtanhq_f32(float32x4_t val);
*/
float32x4_t vpowq_f32(float32x4_t val, float32x4_t n);
+/** Round to the nearest division by a power-of-two using exponent
+ *
+ * @note This function calculates the following expression: (x + 2^n -1 ) / 2^n where n = exponent
+ *
+ * @param[in] x Vector of 4 elements
+ * @param[in] exponent Integer value used to round to nearest division by a power-of-two
+ *
+ * @return the nearest division by a power-of-two using exponent
+ */
+int32x4_t rounding_divide_by_pow2(int32x4_t x, int exponent);
+
+/** Round to the nearest division by a power-of-two using exponent
+ *
+ * @note This function calculates the following expression: (x + 2^n -1 ) / 2^n where n = exponent
+ *
+ * @param[in] x Element to divide.
+ * @param[in] exponent Integer value used to round to nearest division by a power-of-two
+ *
+ * @return the nearest division by a power-of-two using exponent
+ */
+int32_t rounding_divide_by_pow2(int32_t x, int exponent);
+
#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
/** Calculate hyperbolic tangent.
*
diff --git a/arm_compute/core/NEON/NEMath.inl b/arm_compute/core/NEON/NEMath.inl
index 172aaef941..2247c14f47 100644
--- a/arm_compute/core/NEON/NEMath.inl
+++ b/arm_compute/core/NEON/NEMath.inl
@@ -69,19 +69,20 @@ inline float32x4_t vroundq_rte_f32(float32x4_t val)
{
#ifdef __aarch64__
return vrndnq_f32(val);
-#else // __aarch64__
+#else // __aarch64__
static const float32x4_t CONST_HALF_FLOAT = vdupq_n_f32(0.5f);
- static const float32x4_t CONST_1_FLOAT = vdupq_n_f32(1.f);
- static const int32x4_t CONST_1_INT = vdupq_n_s32(1);
- const float32x4_t floor_val = vfloorq_f32(val);
- const float32x4_t diff = vsubq_f32(val, floor_val);
+ static const float32x4_t CONST_1_FLOAT = vdupq_n_f32(1.f);
+ static const int32x4_t CONST_1_INT = vdupq_n_s32(1);
+ const float32x4_t floor_val = vfloorq_f32(val);
+ const float32x4_t diff = vsubq_f32(val, floor_val);
/*
* Select the floor value when (diff<0.5 || (diff==0.5 && floor_val%2==0).
* This condition is checked by vorrq_u32(vcltq_f32(diff, CONST_HALF_FLOAT) ,vandq_u32(vceqq_f32(diff, CONST_HALF_FLOAT) , vmvnq_u32(vtstq_s32(vandq_s32(vcvtq_s32_f32(floor_val), CONST_1_INT),CONST_1_INT))))
*/
- return vbslq_f32(vorrq_u32(vcltq_f32(diff, CONST_HALF_FLOAT) ,vandq_u32(vceqq_f32(diff, CONST_HALF_FLOAT) , vmvnq_u32(vtstq_s32(vandq_s32(vcvtq_s32_f32(floor_val), CONST_1_INT),CONST_1_INT)))), floor_val, vaddq_f32(floor_val, CONST_1_FLOAT));
+ return vbslq_f32(vorrq_u32(vcltq_f32(diff, CONST_HALF_FLOAT), vandq_u32(vceqq_f32(diff, CONST_HALF_FLOAT), vmvnq_u32(vtstq_s32(vandq_s32(vcvtq_s32_f32(floor_val), CONST_1_INT), CONST_1_INT)))),
+ floor_val, vaddq_f32(floor_val, CONST_1_FLOAT));
#endif // __aarch64__
}
@@ -191,6 +192,21 @@ inline float32x4_t vpowq_f32(float32x4_t val, float32x4_t n)
}
#endif /* DOXYGEN_SKIP_THIS */
+inline int32x4_t rounding_divide_by_pow2(int32x4_t x, int exponent)
+{
+ const int32x4_t shift_vec = vdupq_n_s32(-exponent);
+ const int32x4_t fixup = vshrq_n_s32(vandq_s32(x, shift_vec), 31);
+ const int32x4_t fixed_up_x = vqaddq_s32(x, fixup);
+ return vrshlq_s32(fixed_up_x, shift_vec);
+}
+
+inline int32_t rounding_divide_by_pow2(int32_t x, int exponent)
+{
+ const int32_t mask = (1 << exponent) - 1;
+ const int32_t threshold = (mask >> 1) + (x < 0 ? 1 : 0);
+ return (x >> exponent) + ((x & mask) > threshold ? 1 : 0);
+}
+
#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
/** Exponent polynomial coefficients */
/** Logarithm polynomial coefficients */
diff --git a/arm_compute/core/NEON/NESymm.h b/arm_compute/core/NEON/NESymm.h
index 364a317bc7..a60d5d0fde 100644
--- a/arm_compute/core/NEON/NESymm.h
+++ b/arm_compute/core/NEON/NESymm.h
@@ -24,11 +24,17 @@
#ifndef __ARM_COMPUTE_NESYMM_H__
#define __ARM_COMPUTE_NESYMM_H__
-#include "NEAsymm.h"
+#include "arm_compute/core/NEON/NEMath.h"
#include <arm_neon.h>
namespace arm_compute
{
+using qsymm8_t = int8_t; /**< 8 bit quantized symmetric scalar value */
+using qsymm16_t = int16_t; /**< 16 bit quantized symmetric scalar value */
+
+using qsymm16x8_t = int16x8_t; /**< 16 bit quantized symmetric vector with 8 elements */
+using qsymm16x8x2_t = int16x8x2_t; /**< 16 bit quantized symmetric vector with 16 elements */
+
/** Performs final quantization step on 8 signed 16-bit elements
*
* @tparam is_bounded_relu Specified if a fused bounded relu should be applied
@@ -149,5 +155,65 @@ inline int16x8_t vquantize_int16(const float32x4x2_t &qv, float scale)
return vcombine_s16(vqmovn_s32(rf.val[0]), vqmovn_s32(rf.val[1]));
}
+/** Dequantize a neon vector holding 16 16-bit quantized values.
+ *
+ * @param[in] qv Input values to be dequantized.
+ * @param[in] qi Quantization information to be used in the computation.
+ *
+ * @return Dequantized values in a neon vector
+ */
+inline float32x4x4_t vdequantize(const int16x8x2_t &qv, const UniformQuantizationInfo &qi)
+{
+ const float scale = qi.scale;
+ const float32x4_t vscale = vdupq_n_f32(scale);
+ const float32x4x4_t vdequantized_input =
+ {
+ {
+ vmulq_f32(vcvtq_f32_s32(vmovl_s16(vget_low_s16(qv.val[0]))), vscale),
+ vmulq_f32(vcvtq_f32_s32(vmovl_s16(vget_high_s16(qv.val[0]))), vscale),
+ vmulq_f32(vcvtq_f32_s32(vmovl_s16(vget_low_s16(qv.val[1]))), vscale),
+ vmulq_f32(vcvtq_f32_s32(vmovl_s16(vget_high_s16(qv.val[1]))), vscale),
+ }
+ };
+ return vdequantized_input;
+}
+
+/** Quantize a neon vector holding 16 floating point values.
+ *
+ * @param[in] qv Input values to be quantized.
+ * @param[in] qi Quantization information to be used in the computation.
+ *
+ * @return A neon vector holding the quantized values
+ */
+inline qsymm16x8x2_t vquantize_qsymm16(const float32x4x4_t &qv, const UniformQuantizationInfo &qi)
+{
+ const float scale = qi.scale;
+ ARM_COMPUTE_ERROR_ON(scale == 0.f);
+ const float32x4_t vinvscale = vdupq_n_f32(1.f / scale);
+ const int32x4x4_t rf =
+ {
+ {
+#ifdef __aarch64__
+ vcvtnq_s32_f32(vmulq_f32(qv.val[0], vinvscale)),
+ vcvtnq_s32_f32(vmulq_f32(qv.val[1], vinvscale)),
+ vcvtnq_s32_f32(vmulq_f32(qv.val[2], vinvscale)),
+ vcvtnq_s32_f32(vmulq_f32(qv.val[3], vinvscale)),
+#else //__aarch64__
+ vcvtq_s32_f32(vmulq_f32(qv.val[0], vinvscale)),
+ vcvtq_s32_f32(vmulq_f32(qv.val[1], vinvscale)),
+ vcvtq_s32_f32(vmulq_f32(qv.val[2], vinvscale)),
+ vcvtq_s32_f32(vmulq_f32(qv.val[3], vinvscale)),
+#endif //__aarch64__
+ }
+ };
+ const qsymm16x8x2_t res =
+ {
+ vcombine_s16(vqmovn_s32(rf.val[0]), vqmovn_s32(rf.val[1])),
+ vcombine_s16(vqmovn_s32(rf.val[2]), vqmovn_s32(rf.val[3])),
+ };
+
+ return res;
+}
+
} // namespace arm_compute
#endif // __ARM_COMPUTE_NESYMM_H__
diff --git a/arm_compute/core/NEON/kernels/NEPixelWiseMultiplicationKernel.h b/arm_compute/core/NEON/kernels/NEPixelWiseMultiplicationKernel.h
index daa29fdf4f..e2ea90a33f 100644
--- a/arm_compute/core/NEON/kernels/NEPixelWiseMultiplicationKernel.h
+++ b/arm_compute/core/NEON/kernels/NEPixelWiseMultiplicationKernel.h
@@ -56,12 +56,12 @@ public:
* @note For @p scale equal to 1/255 only round to nearest even (implemented as round half up) is supported.
* For all other scale values only round to zero (implemented as round towards minus infinity) is supported.
*
- * @param[in] input1 An input tensor. Data types supported: U8/QASYMM8/S16/F16/F32
- * @param[in] input2 An input tensor. Data types supported: U8, QASYMM8 (only if @p input1 is QASYMM8), S16, F16 (only if @p input1 is F16), F32 (only if @p input1 is F32).
- * @param[out] output Output tensor. Data types supported: U8 (Only if both inputs are U8), QASYMM8 (only if both inputs are QASYMM8), S16/F16 (only if @p input1 is F16), F32 (only if both inputs are F32).
+ * @param[in] input1 An input tensor. Data types supported: U8/QASYMM8/S16/QSYMM16/F16/F32
+ * @param[in] input2 An input tensor. Data types supported: U8, QASYMM8 (only if @p input1 is QASYMM8), S16, QSYMM16 (only if @p input1 is QSYMM16), F16 (only if @p input1 is F16), F32 (only if @p input1 is F32).
+ * @param[out] output Output tensor. Data types supported: U8 (Only if both inputs are U8), QASYMM8 (only if both inputs are QASYMM8), S16, QSYMM16 (only if both inputs are QSYMM16), F16 (only if @p input1 is F16), F32 (only if both inputs are F32).
* @param[in] scale Scale to apply after multiplication.
* Scale must be positive and its value must be either 1/255 or 1/2^n where n is between 0 and 15.
- * @param[in] overflow_policy Overflow policy. ConvertPolicy cannot be WRAP if datatype is QASYMM8.
+ * @param[in] overflow_policy Overflow policy. ConvertPolicy cannot be WRAP if datatype is QASYMM8 or QSYMM16.
* @param[in] rounding_policy Rounding policy.
*/
void configure(const ITensor *input1, const ITensor *input2, ITensor *output, float scale, ConvertPolicy overflow_policy, RoundingPolicy rounding_policy);
@@ -70,12 +70,12 @@ public:
* @note For @p scale equal to 1/255 only round to nearest even (implemented as round half up) is supported.
* For all other scale values only round to zero (implemented as round towards minus infinity) is supported.
*
- * @param[in] input1 An input tensor info. Data types supported: U8/QASYMM8/S16/F16/F32
- * @param[in] input2 An input tensor info. Data types supported: U8, QASYMM8 (only if @p input1 is QASYMM8), S16, F16 (only if @p input1 is F16), F32 (only if @p input1 is F32).
- * @param[in] output Output tensor info. Data types supported: U8 (Only if both inputs are U8), QASYMM8 (only if both inputs are QASYMM8), S16/F16 (only if @p input1 is F16), F32 (only if both inputs are F32).
+ * @param[in] input1 An input tensor info. Data types supported: U8/QASYMM8/QSYMM16/S16/F16/F32
+ * @param[in] input2 An input tensor info. Data types supported: U8, QASYMM8 (only if @p input1 is QASYMM8), S16, QSYMM16 (only if @p input1 is QSYMM16), F16 (only if @p input1 is F16), F32 (only if @p input1 is F32).
+ * @param[in] output Output tensor info. Data types supported: U8 (Only if both inputs are U8), QASYMM8 (only if both inputs are QASYMM8), S16, QSYMM16 (only if both inputs are QSYMM16), F16 (only if @p input1 is F16), F32 (only if both inputs are F32).
* @param[in] scale Scale to apply after multiplication.
* Scale must be positive and its value must be either 1/255 or 1/2^n where n is between 0 and 15.
- * @param[in] overflow_policy Overflow policy. ConvertPolicy cannot be WRAP if datatype is QASYMM8.
+ * @param[in] overflow_policy Overflow policy. ConvertPolicy cannot be WRAP if datatype is QASYMM8 or QSYMM16.
* @param[in] rounding_policy Rounding policy.
*
* @return a status
@@ -114,12 +114,12 @@ private:
* @param[in] output_qua_info Quantization Info of tensor output.
*
*/
- using MulFunctionQASYMM8 = void(const void *__restrict input1_ptr, const void *__restrict input2_ptr, void *__restrict output_ptr, float scale,
- const UniformQuantizationInfo &input1_qua_info, const UniformQuantizationInfo &input2_qua_info, const UniformQuantizationInfo &output_qua_info);
+ using MulFunctionQuantized = void(const void *__restrict input1_ptr, const void *__restrict input2_ptr, void *__restrict output_ptr, float scale,
+ const UniformQuantizationInfo &input1_qua_info, const UniformQuantizationInfo &input2_qua_info, const UniformQuantizationInfo &output_qua_info);
- MulFunctionFloat *_func_float;
- MulFunctionInt *_func_int;
- MulFunctionQASYMM8 *_func_qasymm8;
+ MulFunctionFloat *_func_float;
+ MulFunctionInt *_func_int;
+ MulFunctionQuantized *_func_quantized;
private:
const ITensor *_input1;
diff --git a/arm_compute/core/QuantizationInfo.h b/arm_compute/core/QuantizationInfo.h
index 1c49cd29ed..587a380d63 100644
--- a/arm_compute/core/QuantizationInfo.h
+++ b/arm_compute/core/QuantizationInfo.h
@@ -33,7 +33,6 @@
namespace arm_compute
{
using qasymm8_t = uint8_t; /**< 8 bit quantized asymmetric scalar value */
-using qsymm8_t = int8_t; /**< 8 bit quantized symmetric scalar value */
using qsymm16_t = int16_t; /**< 16 bit quantized symmetric scalar value */
/** Quantization info when assuming per layer quantization */
diff --git a/arm_compute/runtime/NEON/functions/NEPixelWiseMultiplication.h b/arm_compute/runtime/NEON/functions/NEPixelWiseMultiplication.h
index 53c27c47bf..41137c0135 100644
--- a/arm_compute/runtime/NEON/functions/NEPixelWiseMultiplication.h
+++ b/arm_compute/runtime/NEON/functions/NEPixelWiseMultiplication.h
@@ -40,14 +40,14 @@ public:
* @note For @p scale equal to 1/255 only round to nearest even (implemented as round half up) is supported.
* For all other scale values only round to zero (implemented as round towards minus infinity) is supported.
*
- * @param[in, out] input1 An input tensor. Data types supported: U8/QASYMM8/S16/F16/F32
+ * @param[in, out] input1 An input tensor. Data types supported: U8/QASYMM8/S16/QSYMM16/F16/F32
* This input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0.
- * @param[in, out] input2 An input tensor. Data types supported: U8, QASYMM8 (only if @p input1 is QASYMM8), S16, F16 (only if @p input1 is F16), F32 (only if @p input1 is F32).
+ * @param[in, out] input2 An input tensor. Data types supported: U8, QASYMM8 (only if @p input1 is QASYMM8), S16, QSYMM16 (only if @p input1 is QSYMM16), F16 (only if @p input1 is F16), F32 (only if @p input1 is F32).
* This input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0.
- * @param[out] output Output tensor. Data types supported: U8 (Only if both inputs are U8), QASYMM8 (only if both inputs are QASYMM8), S16/F16 (only if @p input1 is F16), F32 (only if both inputs are F32).
+ * @param[out] output Output tensor. Data types supported: U8 (Only if both inputs are U8), QASYMM8 (only if both inputs are QASYMM8), S16, QSYMM16 (only if both inputs are QSYMM16), F16 (only if @p input1 is F16), F32 (only if both inputs are F32).
* @param[in] scale Scale to apply after multiplication.
* Scale must be positive and its value must be either 1/255 or 1/2^n where n is between 0 and 15.
- * @param[in] overflow_policy Overflow policy. ConvertPolicy cannot be WRAP if datatype is QASYMM8.
+ * @param[in] overflow_policy Overflow policy. ConvertPolicy cannot be WRAP if datatype is QASYMM8 or QSYMM16.
* @param[in] rounding_policy Rounding policy.
*/
void configure(ITensor *input1, ITensor *input2, ITensor *output, float scale, ConvertPolicy overflow_policy, RoundingPolicy rounding_policy);
@@ -56,12 +56,12 @@ public:
* @note For @p scale equal to 1/255 only round to nearest even (implemented as round half up) is supported.
* For all other scale values only round to zero (implemented as round towards minus infinity) is supported.
*
- * @param[in] input1 An input tensor info. Data types supported: U8/QASYMM8/S16/F16/F32
- * @param[in] input2 An input tensor info. Data types supported: U8, QASYMM8 (only if @p input1 is QASYMM8), S16, F16 (only if @p input1 is F16), F32 (only if @p input1 is F32).
- * @param[in] output Output tensor info. Data types supported: U8 (Only if both inputs are U8), QASYMM8 (only if both inputs are QASYMM8), S16/F16 (only if @p input1 is F16), F32 (only if both inputs are F32).
+ * @param[in] input1 An input tensor info. Data types supported: U8/QASYMM8/S16/QSYMM16/F16/F32
+ * @param[in] input2 An input tensor info. Data types supported: U8, QASYMM8 (only if @p input1 is QASYMM8), S16, QSYMM16 (only if both inputs are QSYMM16), F16 (only if @p input1 is F16), F32 (only if @p input1 is F32).
+ * @param[in] output Output tensor info. Data types supported: U8 (Only if both inputs are U8), QASYMM8 (only if both inputs are QASYMM8), S16, QSYMM16 (only if both inputs are QSYMM16), F16 (only if @p input1 is F16), F32 (only if both inputs are F32).
* @param[in] scale Scale to apply after multiplication.
* Scale must be positive and its value must be either 1/255 or 1/2^n where n is between 0 and 15.
- * @param[in] overflow_policy Overflow policy. ConvertPolicy cannot be WRAP if datatype is QASYMM8.
+ * @param[in] overflow_policy Overflow policy. ConvertPolicy cannot be WRAP if datatype is QASYMM8 or QSYMM16.
* @param[in] rounding_policy Rounding policy.
*
* @return a status
diff --git a/src/core/NEON/kernels/NEPixelWiseMultiplicationKernel.cpp b/src/core/NEON/kernels/NEPixelWiseMultiplicationKernel.cpp
index c313b23ad3..6aaac818e9 100644
--- a/src/core/NEON/kernels/NEPixelWiseMultiplicationKernel.cpp
+++ b/src/core/NEON/kernels/NEPixelWiseMultiplicationKernel.cpp
@@ -30,6 +30,7 @@
#include "arm_compute/core/ITensor.h"
#include "arm_compute/core/NEON/NEAsymm.h"
#include "arm_compute/core/NEON/NEFixedPoint.h"
+#include "arm_compute/core/NEON/NESymm.h"
#include "arm_compute/core/NEON/wrapper/wrapper.h"
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Types.h"
@@ -63,21 +64,30 @@ inline Status validate_arguments(const ITensorInfo *input1, const ITensorInfo *i
ARM_COMPUTE_UNUSED(rounding_policy);
ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input1);
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input1, 1, DataType::U8, DataType::QASYMM8, DataType::S16, DataType::F16, DataType::F32);
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input2, 1, DataType::U8, DataType::QASYMM8, DataType::S16, DataType::F16, DataType::F32);
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8, DataType::QASYMM8, DataType::S16, DataType::F16, DataType::F32);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input1, 1, DataType::U8, DataType::QASYMM8, DataType::S16, DataType::QSYMM16, DataType::F16, DataType::F32);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input2, 1, DataType::U8, DataType::QASYMM8, DataType::S16, DataType::QSYMM16, DataType::F16, DataType::F32);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8, DataType::QASYMM8, DataType::S16, DataType::QSYMM16, DataType::F16, DataType::F32);
ARM_COMPUTE_RETURN_ERROR_ON_MSG(output->data_type() == DataType::U8 && (input1->data_type() != DataType::U8 || input2->data_type() != DataType::U8),
"Output can only be U8 if both inputs are U8");
ARM_COMPUTE_RETURN_ERROR_ON_MSG(input1->data_type() == DataType::QASYMM8 && input2->data_type() != DataType::QASYMM8,
- "Input2 must be QASYMM8 if both input1 is QASYMM8");
+ "Input2 must be QASYMM8 if input1 is QASYMM8");
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(input1->data_type() == DataType::QASYMM8 && input2->data_type() == DataType::QASYMM8 && overflow_policy == ConvertPolicy::WRAP,
- "ConvertPolicy cannot be WRAP if datatype is QASYMM8");
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG(input1->data_type() != DataType::QASYMM8 && input2->data_type() == DataType::QASYMM8,
+ "Input1 must be QASYMM8 if input2 is QASYMM8");
+
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG(input1->data_type() == DataType::QSYMM16 && input2->data_type() != DataType::QSYMM16,
+ "Input2 must be QSYMM16 if input1 is QSYMM16");
+
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG(input1->data_type() != DataType::QSYMM16 && input2->data_type() == DataType::QSYMM16,
+ "Input1 must be QSYMM16 if input2 is QSYMM16");
+
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG(is_data_type_quantized(input1->data_type()) && overflow_policy == ConvertPolicy::WRAP,
+ "ConvertPolicy cannot be WRAP if datatype is quantized");
if(output->total_size() > 0)
{
- if(output->data_type() == DataType::QASYMM8)
+ if(is_data_type_quantized(output->data_type()))
{
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input1, input2, output);
}
@@ -128,6 +138,14 @@ inline std::pair<Status, Window> validate_and_configure_window(ITensorInfo *inpu
{
set_format_if_unknown(*output, Format::F16);
}
+ else if(input1->data_type() == DataType::QASYMM8)
+ {
+ set_data_type_if_unknown(*output, DataType::QASYMM8);
+ }
+ else if(input1->data_type() == DataType::QSYMM16)
+ {
+ set_data_type_if_unknown(*output, DataType::QSYMM16);
+ }
}
// Configure kernel window
@@ -201,6 +219,34 @@ void mul_saturate_QASYMM8_QASYMM8_QASYMM8_n(const void *__restrict input1_ptr, c
vst1q_u8(output, result);
}
+void mul_saturate_QSYMM16_QSYMM16_QSYMM16_n(const void *__restrict input1_ptr, const void *__restrict input2_ptr, void *__restrict output_ptr, float scale,
+ const UniformQuantizationInfo &input1_qua_info, const UniformQuantizationInfo &input2_qua_info, const UniformQuantizationInfo &output_qua_info)
+{
+ const auto input1 = static_cast<const qsymm16_t *__restrict>(input1_ptr);
+ const auto input2 = static_cast<const qsymm16_t *__restrict>(input2_ptr);
+ const auto output = static_cast<qsymm16_t *__restrict>(output_ptr);
+
+ const qsymm16x8x2_t input1_q = vld2q_s16(input1);
+ const qsymm16x8x2_t input2_q = vld2q_s16(input2);
+
+ // Dequantitize inputs
+ const float32x4x4_t in1_f32x4x4 = vdequantize(input1_q, input1_qua_info);
+ const float32x4x4_t in2_f32x4x4 = vdequantize(input2_q, input2_qua_info);
+
+ const UniformQuantizationInfo tmp_qua_info = { output_qua_info.scale / scale, output_qua_info.offset };
+
+ const float32x4x4_t out_f32x4x4 =
+ {
+ vmulq_f32(in1_f32x4x4.val[0], in2_f32x4x4.val[0]),
+ vmulq_f32(in1_f32x4x4.val[1], in2_f32x4x4.val[1]),
+ vmulq_f32(in1_f32x4x4.val[2], in2_f32x4x4.val[2]),
+ vmulq_f32(in1_f32x4x4.val[3], in2_f32x4x4.val[3]),
+ };
+
+ const qsymm16x8x2_t result = vquantize_qsymm16(out_f32x4x4, tmp_qua_info);
+ vst2q_s16(output, result);
+}
+
template <bool is_scale255, bool is_sat>
void mul_U8_U8_U8_n(const void *__restrict input1_ptr, const void *__restrict input2_ptr, void *__restrict output_ptr, int n)
{
@@ -488,7 +534,7 @@ void mul_U8_S16_S16_n(const void *__restrict input1_ptr, const void *__restrict
} // namespace
NEPixelWiseMultiplicationKernel::NEPixelWiseMultiplicationKernel()
- : _func_float(nullptr), _func_int(nullptr), _func_qasymm8(nullptr), _input1(nullptr), _input2(nullptr), _output(nullptr), _scale{ 0 }, _scale_exponent{ 0 }
+ : _func_float(nullptr), _func_int(nullptr), _func_quantized(nullptr), _input1(nullptr), _input2(nullptr), _output(nullptr), _scale{ 0 }, _scale_exponent{ 0 }
{
}
@@ -508,7 +554,7 @@ void NEPixelWiseMultiplicationKernel::configure(const ITensor *input1, const ITe
_output = output;
_scale = scale;
_scale_exponent = 0;
- _func_qasymm8 = nullptr;
+ _func_quantized = nullptr;
_func_int = nullptr;
_func_float = nullptr;
@@ -536,7 +582,11 @@ void NEPixelWiseMultiplicationKernel::configure(const ITensor *input1, const ITe
if(dt_input1 == DataType::QASYMM8 && dt_input2 == DataType::QASYMM8)
{
- _func_qasymm8 = &mul_saturate_QASYMM8_QASYMM8_QASYMM8_n;
+ _func_quantized = &mul_saturate_QASYMM8_QASYMM8_QASYMM8_n;
+ }
+ else if(dt_input1 == DataType::QSYMM16 && dt_input2 == DataType::QSYMM16)
+ {
+ _func_quantized = &mul_saturate_QSYMM16_QSYMM16_QSYMM16_n;
}
else if(DataType::U8 == dt_input1 && DataType::U8 == dt_input2 && DataType::U8 == dt_output)
{
@@ -655,12 +705,12 @@ void NEPixelWiseMultiplicationKernel::run(const Window &window, const ThreadInfo
Iterator input2(_input2, slice_input2);
Iterator output(_output, slice);
- if(_func_qasymm8 != nullptr)
+ if(is_data_type_quantized(_input1->info()->data_type()))
{
execute_window_loop(collapsed, [&](const Coordinates &)
{
- (*_func_qasymm8)(input1.ptr(), input2.ptr(), output.ptr(), _scale,
- _input1->info()->quantization_info().uniform(), _input2->info()->quantization_info().uniform(), _output->info()->quantization_info().uniform());
+ (*_func_quantized)(input1.ptr(), input2.ptr(), output.ptr(), _scale,
+ _input1->info()->quantization_info().uniform(), _input2->info()->quantization_info().uniform(), _output->info()->quantization_info().uniform());
collapsed.slide_window_slice_3D(slice_input1);
collapsed.slide_window_slice_3D(slice_input2);
},
diff --git a/tests/validation/NEON/PixelWiseMultiplication.cpp b/tests/validation/NEON/PixelWiseMultiplication.cpp
index 0cc97a2c26..70b88ae9c8 100644
--- a/tests/validation/NEON/PixelWiseMultiplication.cpp
+++ b/tests/validation/NEON/PixelWiseMultiplication.cpp
@@ -21,6 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
+#include "arm_compute/core/Rounding.h"
#include "arm_compute/runtime/NEON/functions/NEPixelWiseMultiplication.h"
#include "tests/NEON/Accessor.h"
#include "tests/PaddingCalculator.h"
@@ -42,8 +43,28 @@ const float scale_unity = 1.f;
const float scale_255 = 1.f / 255.f;
const float scale_other = 1.f / 32768.f;
+constexpr AbsoluteTolerance<float> tolerance_qasymm8(1); /**< Tolerance value for comparing reference's output against implementation's output for 8-bit quantized asymmetric data types */
+constexpr AbsoluteTolerance<float> tolerance_qsymm16(1); /**< Tolerance value for comparing reference's output against implementation's output for 16-bit quantized symmetric data types */
+
+const auto PixelWiseMultiplicationQSYMM16QuantDataset = combine(combine(
+ framework::dataset::make("Src0QInfo", { QuantizationInfo(1.f / 32768.f, 0) }),
+ framework::dataset::make("Src1QInfo", { QuantizationInfo(2.f / 32768.f, 0) })),
+ framework::dataset::make("OutQInfo", { QuantizationInfo(5.f / 32768.f, 0) }));
+
+const auto PixelWiseMultiplicationQASYMM8QuantDataset = combine(combine(
+ framework::dataset::make("Src0QInfo", { QuantizationInfo(5.f / 32768.f, 0) }),
+ framework::dataset::make("Src1QInfo", { QuantizationInfo(2.f / 32768.f, 0) })),
+ framework::dataset::make("OutQInfo", { QuantizationInfo(1.f / 32768.f, 0) }));
+
+const auto PixelWiseMultiplicationPolicySTNUDataset = combine(
+ framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE }),
+ framework::dataset::make("RoundingPolicy", { RoundingPolicy::TO_NEAREST_UP }));
+
+const auto PixelWiseMultiplicationPolicySTZDataset = combine(
+ framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE }),
+ framework::dataset::make("RoundingPolicy", { RoundingPolicy::TO_ZERO }));
+
#define DEFAULT_VALIDATE validate(Accessor(_target), _reference);
-#define QASYMM8_VALIDATE validate(Accessor(_target), _reference, AbsoluteTolerance<uint8_t>(1), 0.f);
#define VALIDATE(TYPE, TOLERANCE) validate(Accessor(_target), _reference, AbsoluteTolerance<TYPE>(TOLERANCE), 0.f);
#define WRAP_VALIDATE(TYPE, TOLERANCE) validate_wrap(Accessor(_target), _reference, AbsoluteTolerance<TYPE>(TOLERANCE), 0.f);
@@ -76,23 +97,6 @@ const float scale_other = 1.f / 32768.f;
VALIDATE \
}
-#define PIXEL_WISE_MULTIPLICATION_FIXTURE_QASYMM8_DATA_TEST_CASE(TEST_NAME, FIXTURE, MODE, SHAPES, DT1, DT2, SCALE, RP, VALIDATE) \
- FIXTURE_DATA_TEST_CASE(TEST_NAME, NEPixelWiseMultiplication##FIXTURE, framework::DatasetMode::MODE, \
- combine(combine(combine(combine(combine(combine(combine(combine( \
- datasets::SHAPES, \
- framework::dataset::make("DataType1", DataType::DT1)), \
- framework::dataset::make("DataType2", DataType::DT2)), \
- framework::dataset::make("Scale", std::move(SCALE))), \
- framework::dataset::make("ConvertPolicy", ConvertPolicy::SATURATE)), \
- framework::dataset::make("RoundingPolicy", RoundingPolicy::RP)), \
- framework::dataset::make("QuantizationInfoIn1", QuantizationInfo(1.0 , 0))), \
- framework::dataset::make("QuantizationInfoIn2", QuantizationInfo(1.0 , 0))), \
- framework::dataset::make("QuantizationInfoOut", QuantizationInfo(100.0, 10)))) \
- { \
- VALIDATE \
- }
-
-
// *INDENT-ON*
// clang-format on
@@ -124,7 +128,8 @@ void validate_configuration(TensorShape shape, DataType dt1, DataType dt2, float
}
} // namespace
-using NEPixelWiseMultiplicationToQASYMM8Fixture = PixelWiseMultiplicationQuatizedValidationFixture<Tensor, Accessor, NEPixelWiseMultiplication, uint8_t, uint8_t>;
+using NEPixelWiseMultiplicationQASYMM8Fixture = PixelWiseMultiplicationValidationQuantizedFixture<Tensor, Accessor, NEPixelWiseMultiplication, uint8_t, uint8_t>;
+using NEPixelWiseMultiplicationQSYMM16Fixture = PixelWiseMultiplicationValidationQuantizedFixture<Tensor, Accessor, NEPixelWiseMultiplication, int16_t, int16_t>;
template <typename T>
using NEPixelWiseMultiplicationToU8Fixture = PixelWiseMultiplicationValidationFixture<Tensor, Accessor, NEPixelWiseMultiplication, T, uint8_t>;
template <typename T>
@@ -179,24 +184,132 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(
// clang-format on
// *INDENT-ON*
-TEST_SUITE(QASYMM8toQASYMM8)
-
+TEST_SUITE(Quantized)
+TEST_SUITE(QASYMM8)
TEST_SUITE(Scale255)
-PIXEL_WISE_MULTIPLICATION_FIXTURE_QASYMM8_DATA_TEST_CASE(RunSmall, ToQASYMM8Fixture, PRECOMMIT, SmallShapes(), QASYMM8, QASYMM8, scale_255, TO_NEAREST_UP, WRAP_VALIDATE(uint8_t, 1))
-PIXEL_WISE_MULTIPLICATION_FIXTURE_QASYMM8_DATA_TEST_CASE(RunLarge, ToQASYMM8Fixture, NIGHTLY, LargeShapes(), QASYMM8, QASYMM8, scale_255, TO_NEAREST_UP, WRAP_VALIDATE(uint8_t, 1))
+FIXTURE_DATA_TEST_CASE(RunSmall, NEPixelWiseMultiplicationQASYMM8Fixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(datasets::SmallShapes(),
+ framework::dataset::make("DataType", DataType::QASYMM8)),
+ framework::dataset::make("Scale", { scale_255 })),
+ PixelWiseMultiplicationPolicySTNUDataset),
+ PixelWiseMultiplicationQASYMM8QuantDataset))
+{
+ // Validate output
+ validate(Accessor(_target), _reference, tolerance_qasymm8);
+}
+FIXTURE_DATA_TEST_CASE(RunLarge, NEPixelWiseMultiplicationQASYMM8Fixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(datasets::LargeShapes(),
+ framework::dataset::make("DataType", DataType::QASYMM8)),
+ framework::dataset::make("Scale", { scale_255 })),
+ PixelWiseMultiplicationPolicySTNUDataset),
+ PixelWiseMultiplicationQASYMM8QuantDataset))
+{
+ // Validate output
+ validate(Accessor(_target), _reference, tolerance_qasymm8);
+}
TEST_SUITE_END() // Scale255
-
TEST_SUITE(ScaleUnity)
-PIXEL_WISE_MULTIPLICATION_FIXTURE_QASYMM8_DATA_TEST_CASE(RunSmall, ToQASYMM8Fixture, PRECOMMIT, SmallShapes(), QASYMM8, QASYMM8, scale_unity, TO_ZERO, QASYMM8_VALIDATE)
-PIXEL_WISE_MULTIPLICATION_FIXTURE_QASYMM8_DATA_TEST_CASE(RunLarge, ToQASYMM8Fixture, NIGHTLY, LargeShapes(), QASYMM8, QASYMM8, scale_unity, TO_ZERO, QASYMM8_VALIDATE)
+FIXTURE_DATA_TEST_CASE(RunSmall, NEPixelWiseMultiplicationQASYMM8Fixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(datasets::SmallShapes(),
+ framework::dataset::make("DataType", DataType::QASYMM8)),
+ framework::dataset::make("Scale", { scale_unity })),
+ PixelWiseMultiplicationPolicySTZDataset),
+ PixelWiseMultiplicationQASYMM8QuantDataset))
+{
+ // Validate output
+ validate(Accessor(_target), _reference, tolerance_qasymm8);
+}
+FIXTURE_DATA_TEST_CASE(RunLarge, NEPixelWiseMultiplicationQASYMM8Fixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(datasets::LargeShapes(),
+ framework::dataset::make("DataType", DataType::QASYMM8)),
+ framework::dataset::make("Scale", { scale_unity })),
+ PixelWiseMultiplicationPolicySTZDataset),
+ PixelWiseMultiplicationQASYMM8QuantDataset))
+{
+ // Validate output
+ validate(Accessor(_target), _reference, tolerance_qasymm8);
+}
+TEST_SUITE_END() // ScaleUnity
+TEST_SUITE(ScaleOther)
+FIXTURE_DATA_TEST_CASE(RunSmall, NEPixelWiseMultiplicationQASYMM8Fixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(datasets::SmallShapes(),
+ framework::dataset::make("DataType", DataType::QASYMM8)),
+ framework::dataset::make("Scale", { scale_other })),
+ PixelWiseMultiplicationPolicySTZDataset),
+ PixelWiseMultiplicationQASYMM8QuantDataset))
+{
+ // Validate output
+ validate(Accessor(_target), _reference, tolerance_qasymm8);
+}
+FIXTURE_DATA_TEST_CASE(RunLarge, NEPixelWiseMultiplicationQASYMM8Fixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(datasets::LargeShapes(),
+ framework::dataset::make("DataType", DataType::QASYMM8)),
+ framework::dataset::make("Scale", { scale_other })),
+ PixelWiseMultiplicationPolicySTZDataset),
+ PixelWiseMultiplicationQASYMM8QuantDataset))
+{
+ // Validate output
+ validate(Accessor(_target), _reference, tolerance_qasymm8);
+}
+TEST_SUITE_END() // ScaleOther
+TEST_SUITE_END() // QASYMM8
+TEST_SUITE(QSYMM16)
+TEST_SUITE(Scale255)
+FIXTURE_DATA_TEST_CASE(RunSmall, NEPixelWiseMultiplicationQSYMM16Fixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(datasets::SmallShapes(),
+ framework::dataset::make("DataType", DataType::QSYMM16)),
+ framework::dataset::make("Scale", { scale_255 })),
+ PixelWiseMultiplicationPolicySTNUDataset),
+ PixelWiseMultiplicationQSYMM16QuantDataset))
+{
+ // Validate output
+ validate(Accessor(_target), _reference, tolerance_qasymm8);
+}
+FIXTURE_DATA_TEST_CASE(RunLarge, NEPixelWiseMultiplicationQSYMM16Fixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(datasets::LargeShapes(),
+ framework::dataset::make("DataType", DataType::QSYMM16)),
+ framework::dataset::make("Scale", { scale_255 })),
+ PixelWiseMultiplicationPolicySTNUDataset),
+ PixelWiseMultiplicationQSYMM16QuantDataset))
+{
+ // Validate output
+ validate(Accessor(_target), _reference, tolerance_qasymm8);
+}
+TEST_SUITE_END() // Scale255
+TEST_SUITE(ScaleUnity)
+FIXTURE_DATA_TEST_CASE(RunSmall, NEPixelWiseMultiplicationQSYMM16Fixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(datasets::SmallShapes(),
+ framework::dataset::make("DataType", DataType::QSYMM16)),
+ framework::dataset::make("Scale", { scale_unity })),
+ PixelWiseMultiplicationPolicySTZDataset),
+ PixelWiseMultiplicationQSYMM16QuantDataset))
+{
+ // Validate output
+ validate(Accessor(_target), _reference, tolerance_qasymm8);
+}
+FIXTURE_DATA_TEST_CASE(RunLarge, NEPixelWiseMultiplicationQSYMM16Fixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(datasets::LargeShapes(),
+ framework::dataset::make("DataType", DataType::QSYMM16)),
+ framework::dataset::make("Scale", { scale_unity })),
+ PixelWiseMultiplicationPolicySTZDataset),
+ PixelWiseMultiplicationQSYMM16QuantDataset))
+{
+ // Validate output
+ validate(Accessor(_target), _reference, tolerance_qasymm8);
+}
TEST_SUITE_END() // ScaleUnity
-
TEST_SUITE(ScaleOther)
-PIXEL_WISE_MULTIPLICATION_FIXTURE_QASYMM8_DATA_TEST_CASE(RunSmall, ToQASYMM8Fixture, PRECOMMIT, SmallShapes(), QASYMM8, QASYMM8, scale_other, TO_ZERO, QASYMM8_VALIDATE)
-PIXEL_WISE_MULTIPLICATION_FIXTURE_QASYMM8_DATA_TEST_CASE(RunLarge, ToQASYMM8Fixture, NIGHTLY, LargeShapes(), QASYMM8, QASYMM8, scale_other, TO_ZERO, QASYMM8_VALIDATE)
+FIXTURE_DATA_TEST_CASE(RunSmall, NEPixelWiseMultiplicationQSYMM16Fixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(datasets::SmallShapes(),
+ framework::dataset::make("DataType", DataType::QSYMM16)),
+ framework::dataset::make("Scale", { scale_other })),
+ PixelWiseMultiplicationPolicySTZDataset),
+ PixelWiseMultiplicationQSYMM16QuantDataset))
+{
+ // Validate output
+ validate(Accessor(_target), _reference, tolerance_qasymm8);
+}
+FIXTURE_DATA_TEST_CASE(RunLarge, NEPixelWiseMultiplicationQSYMM16Fixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(datasets::LargeShapes(),
+ framework::dataset::make("DataType", DataType::QSYMM16)),
+ framework::dataset::make("Scale", { scale_other })),
+ PixelWiseMultiplicationPolicySTZDataset),
+ PixelWiseMultiplicationQSYMM16QuantDataset))
+{
+ // Validate output
+ validate(Accessor(_target), _reference, tolerance_qasymm8);
+}
TEST_SUITE_END() // ScaleOther
-
-TEST_SUITE_END() // QASYMM8toQASYMM8
+TEST_SUITE_END() // QSYMM16
+TEST_SUITE_END() // Quantized
TEST_SUITE(U8toU8)