aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMichele Di Giorgio <michele.digiorgio@arm.com>2017-07-17 15:06:34 +0100
committerAnthony Barbier <anthony.barbier@arm.com>2018-09-17 14:16:42 +0100
commit1b80b6c7255e41257fed3b4dd0fa018e2eeee4c2 (patch)
treecb927c37b64173545c30b5a7d4b0ad64082b93c6
parent2ac5040c9b21734610b51b232ddac5a9067aa2c2 (diff)
downloadComputeLibrary-1b80b6c7255e41257fed3b4dd0fa018e2eeee4c2.tar.gz
COMPMID-447: Support scaling factors different than 1 for QS8/QS16 NEPixelWiseMultiplication.
Change-Id: I6d90a18df861d53546bdca982192b4ffc0dbb3c2 Reviewed-on: http://mpd-gerrit.cambridge.arm.com/80794 Tested-by: Kaizen <jeremy.johnson+kaizengerrit@arm.com> Reviewed-by: Pablo Tello <pablo.tello@arm.com>
-rw-r--r--src/core/NEON/kernels/NEPixelWiseMultiplicationKernel.cpp108
-rw-r--r--tests/validation/NEON/PixelWiseMultiplication.cpp90
-rw-r--r--tests/validation/TensorOperations.h20
3 files changed, 176 insertions, 42 deletions
diff --git a/src/core/NEON/kernels/NEPixelWiseMultiplicationKernel.cpp b/src/core/NEON/kernels/NEPixelWiseMultiplicationKernel.cpp
index 150db39695..33663eb57d 100644
--- a/src/core/NEON/kernels/NEPixelWiseMultiplicationKernel.cpp
+++ b/src/core/NEON/kernels/NEPixelWiseMultiplicationKernel.cpp
@@ -131,60 +131,100 @@ void mul_U8_U8_U8_n(const void *__restrict input1_ptr, const void *__restrict in
template <bool is_scale255, bool is_sat>
void mul_QS8_QS8_QS8_n(const void *__restrict input1_ptr, const void *__restrict input2_ptr, void *__restrict output_ptr, int n, int fixed_point_position)
{
- // n is the exponent of the scaling factor, that is scale = 1/2^n. Currently, we only support scaling factor equal to 1 => n = 0.
- ARM_COMPUTE_ERROR_ON_MSG(n != 0, "Scaling factor different than 1 not supported for 8-bit fixed-point pixel-wise multiplication");
- ARM_COMPUTE_UNUSED(n);
-
- const auto input1 = static_cast<const qint8_t *__restrict>(input1_ptr);
- const auto input2 = static_cast<const qint8_t *__restrict>(input2_ptr);
const auto output = static_cast<qint8_t *__restrict>(output_ptr);
- const qint8x16_t ta1 = vld1q_qs8(input1);
- const qint8x16_t ta2 = vld1q_qs8(input2);
+ const qint8x16_t ta1 = vld1q_qs8(static_cast<const qint8_t *__restrict>(input1_ptr));
+ const qint8x16_t ta2 = vld1q_qs8(static_cast<const qint8_t *__restrict>(input2_ptr));
+
+ if(is_scale255)
+ {
+ qint16x8_t tmp1_high = vmovl_s8(vget_high_s8(ta1));
+ qint16x8_t tmp1_low = vmovl_s8(vget_low_s8(ta1));
+ const qint16x8_t tmp2_high = vmovl_s8(vget_high_s8(ta2));
+ const qint16x8_t tmp2_low = vmovl_s8(vget_low_s8(ta2));
+
+ const float32x4x2_t scale255_f32 =
+ {
+ {
+ scale255_constant_f32q,
+ scale255_constant_f32q
+ }
+ };
+ const qint16x8_t scale255 = vqcvtq_qs16_f32(scale255_f32, fixed_point_position);
- qint8x16_t res = (is_sat) ? vqmulq_qs8(ta1, ta2, fixed_point_position) : vmulq_qs8(ta1, ta2, fixed_point_position);
+ tmp1_high = vmulq_qs16(tmp1_high, tmp2_high, fixed_point_position);
+ tmp1_low = vmulq_qs16(tmp1_low, tmp2_low, fixed_point_position);
+ tmp1_high = vmulq_qs16(tmp1_high, scale255, fixed_point_position);
+ tmp1_low = vmulq_qs16(tmp1_low, scale255, fixed_point_position);
- vst1q_s8(output, res);
+ if(is_sat)
+ {
+ vst1q_qs8(output, vcombine_s8(vqmovn_s16(tmp1_low), vqmovn_s16(tmp1_high)));
+ }
+ else
+ {
+ vst1q_qs8(output, vcombine_s8(vmovn_s16(tmp1_low), vmovn_s16(tmp1_high)));
+ }
+ }
+ else
+ {
+ const qint8x16_t vn = vdupq_n_s8(-n);
+ qint8x16_t res = ta2;
+
+ if(is_sat)
+ {
+ res = vqshlq_s8(vqmulq_qs8(ta1, res, fixed_point_position), vn);
+ }
+ else
+ {
+ res = vshlq_s8(vmulq_qs8(ta1, res, fixed_point_position), vn);
+ }
+ vst1q_qs8(output, res);
+ }
}
template <bool is_scale255, bool is_sat>
void mul_QS16_QS16_QS16_n(const void *__restrict input1_ptr, const void *__restrict input2_ptr, void *__restrict output_ptr, int n, int fixed_point_position)
{
- // n is the exponent of the scaling factor, that is scale = 1/2^n. Currently, we only support scaling factor equal to 1 => n = 0.
- ARM_COMPUTE_ERROR_ON_MSG(n != 0, "Scaling factor different than 1 not supported for 16-bit fixed-point pixel-wise multiplication");
- ARM_COMPUTE_UNUSED(n);
-
const qint16x8x2_t ta1 = vld2q_qs16(static_cast<const qint16_t *__restrict>(input1_ptr));
- const qint16x8x2_t ta2 = vld2q_qs16(static_cast<const qint16_t *__restrict>(input2_ptr));
+ qint16x8x2_t res = vld2q_qs16(static_cast<const qint16_t *__restrict>(input2_ptr));
- if(is_sat)
+ if(is_scale255)
{
- const qint16x8x2_t res =
+ const float32x4x2_t scale255_f32 =
{
{
- // First 8 elements
- vqmulq_qs16(ta1.val[0], ta2.val[0], fixed_point_position),
- // Second 8 elements
- vqmulq_qs16(ta1.val[1], ta2.val[1], fixed_point_position)
+ scale255_constant_f32q,
+ scale255_constant_f32q
}
};
-
- vst2q_s16(static_cast<qint16_t *__restrict>(output_ptr), res);
+ const qint16x8_t scale255 = vqcvtq_qs16_f32(scale255_f32, fixed_point_position);
+ if(is_sat)
+ {
+ res.val[0] = vqmulq_qs16(vqmulq_qs16(ta1.val[0], res.val[0], fixed_point_position), scale255, fixed_point_position);
+ res.val[1] = vqmulq_qs16(vqmulq_qs16(ta1.val[1], res.val[1], fixed_point_position), scale255, fixed_point_position);
+ }
+ else
+ {
+ res.val[0] = vmulq_qs16(vmulq_qs16(ta1.val[0], res.val[0], fixed_point_position), scale255, fixed_point_position);
+ res.val[1] = vmulq_qs16(vmulq_qs16(ta1.val[1], res.val[1], fixed_point_position), scale255, fixed_point_position);
+ }
}
else
{
- const qint16x8x2_t res =
+ const qint16x8_t vn = vdupq_n_s16(-n);
+ if(is_sat)
{
- {
- // First 8 elements
- vmulq_qs16(ta1.val[0], ta2.val[0], fixed_point_position),
- // Second 8 elements
- vmulq_qs16(ta1.val[1], ta2.val[1], fixed_point_position)
- }
- };
-
- vst2q_s16(static_cast<qint16_t *__restrict>(output_ptr), res);
+ res.val[0] = vqshlq_s16(vqmulq_qs16(ta1.val[0], res.val[0], fixed_point_position), vn);
+ res.val[1] = vqshlq_s16(vqmulq_qs16(ta1.val[1], res.val[1], fixed_point_position), vn);
+ }
+ else
+ {
+ res.val[0] = vshlq_s16(vmulq_qs16(ta1.val[0], res.val[0], fixed_point_position), vn);
+ res.val[1] = vshlq_s16(vmulq_qs16(ta1.val[1], res.val[1], fixed_point_position), vn);
+ }
}
+ vst2q_s16(static_cast<qint16_t *__restrict>(output_ptr), res);
}
template <bool is_scale255, bool is_sat>
@@ -438,6 +478,8 @@ void NEPixelWiseMultiplicationKernel::configure(const ITensor *input1, const ITe
{
// Check that all data types are the same and all fixed-point positions are the same
ARM_COMPUTE_ERROR_ON_MISMATCHING_FIXED_POINT(input1, input2, output);
+ // Check if scale is representable in fixed-point with the provided settings
+ ARM_COMPUTE_ERROR_ON_VALUE_NOT_REPRESENTABLE_IN_FIXED_POINT(scale, input1);
}
_input1 = input1;
diff --git a/tests/validation/NEON/PixelWiseMultiplication.cpp b/tests/validation/NEON/PixelWiseMultiplication.cpp
index c46ad6a9d7..f80944821c 100644
--- a/tests/validation/NEON/PixelWiseMultiplication.cpp
+++ b/tests/validation/NEON/PixelWiseMultiplication.cpp
@@ -442,6 +442,36 @@ BOOST_DATA_TEST_CASE(RunSmall, SmallShapes() * DataType::QS8 *ConvertPolicies()
validate(Accessor(dst), ref_dst);
}
+BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit"))
+BOOST_DATA_TEST_CASE(RunSmallScale255, SmallShapes() * DataType::QS8 * (1.f / 255.f) * ConvertPolicies() * RoundingPolicy::TO_NEAREST_UP * boost::unit_test::data::xrange(1, 7),
+ shape, dt, scale, convert_policy, rounding_policy, fixed_point_position)
+{
+ // Compute function
+ Tensor dst = compute_pixel_wise_multiplication(shape, dt, dt, dt, scale, convert_policy, rounding_policy, fixed_point_position);
+
+ // Compute reference
+ RawTensor ref_dst = Reference::compute_reference_fixed_point_pixel_wise_multiplication(shape, dt, dt, dt, scale, fixed_point_position, convert_policy, rounding_policy);
+
+ // Validate output
+ validate(Accessor(dst), ref_dst);
+}
+
+BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit"))
+BOOST_DATA_TEST_CASE(RunSmallScaleOther, SmallShapes() * DataType::QS8 *ConvertPolicies() * RoundingPolicy::TO_ZERO * boost::unit_test::data::xrange(1, 7),
+ shape, dt, convert_policy, rounding_policy, fixed_point_position)
+{
+ const float scale = 1.f / static_cast<float>(1 << fixed_point_position);
+
+ // Compute function
+ Tensor dst = compute_pixel_wise_multiplication(shape, dt, dt, dt, scale, convert_policy, rounding_policy, fixed_point_position);
+
+ // Compute reference
+ RawTensor ref_dst = Reference::compute_reference_fixed_point_pixel_wise_multiplication(shape, dt, dt, dt, scale, fixed_point_position, convert_policy, rounding_policy);
+
+ // Validate output
+ validate(Accessor(dst), ref_dst, 1.f);
+}
+
BOOST_TEST_DECORATOR(*boost::unit_test::label("nightly"))
BOOST_DATA_TEST_CASE(RunLarge, LargeShapes() * DataType::QS8 *ConvertPolicies() * RoundingPolicy::TO_ZERO * boost::unit_test::data::xrange<int>(1, 7),
shape, dt, convert_policy, rounding_policy, fixed_point_position)
@@ -455,6 +485,36 @@ BOOST_DATA_TEST_CASE(RunLarge, LargeShapes() * DataType::QS8 *ConvertPolicies()
// Validate output
validate(Accessor(dst), ref_dst);
}
+
+BOOST_TEST_DECORATOR(*boost::unit_test::label("nightly"))
+BOOST_DATA_TEST_CASE(RunLargeScale255, LargeShapes() * DataType::QS8 * (1.f / 255.f) * ConvertPolicies() * RoundingPolicy::TO_ZERO * boost::unit_test::data::xrange(1, 7),
+ shape, dt, scale, convert_policy, rounding_policy, fixed_point_position)
+{
+ // Compute function
+ Tensor dst = compute_pixel_wise_multiplication(shape, dt, dt, dt, scale, convert_policy, rounding_policy, fixed_point_position);
+
+ // Compute reference
+ RawTensor ref_dst = Reference::compute_reference_fixed_point_pixel_wise_multiplication(shape, dt, dt, dt, scale, fixed_point_position, convert_policy, rounding_policy);
+
+ // Validate output
+ validate(Accessor(dst), ref_dst);
+}
+
+BOOST_TEST_DECORATOR(*boost::unit_test::label("nightly"))
+BOOST_DATA_TEST_CASE(RunLargeScaleOther, LargeShapes() * DataType::QS8 *ConvertPolicies() * RoundingPolicy::TO_ZERO * boost::unit_test::data::xrange(1, 7),
+ shape, dt, convert_policy, rounding_policy, fixed_point_position)
+{
+ const float scale = 1.f / static_cast<float>(1 << fixed_point_position);
+
+ // Compute function
+ Tensor dst = compute_pixel_wise_multiplication(shape, dt, dt, dt, scale, convert_policy, rounding_policy, fixed_point_position);
+
+ // Compute reference
+ RawTensor ref_dst = Reference::compute_reference_fixed_point_pixel_wise_multiplication(shape, dt, dt, dt, scale, fixed_point_position, convert_policy, rounding_policy);
+
+ // Validate output
+ validate(Accessor(dst), ref_dst, 1.f);
+}
BOOST_AUTO_TEST_SUITE_END()
BOOST_AUTO_TEST_SUITE(QS16)
@@ -472,6 +532,36 @@ BOOST_DATA_TEST_CASE(RunSmall, SmallShapes() * DataType::QS16 *ConvertPolicies()
validate(Accessor(dst), ref_dst);
}
+BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit"))
+BOOST_DATA_TEST_CASE(RunSmallScale255, SmallShapes() * DataType::QS16 * (1.f / 255.f) * ConvertPolicies() * RoundingPolicy::TO_NEAREST_UP * boost::unit_test::data::xrange(1, 15),
+ shape, dt, scale, convert_policy, rounding_policy, fixed_point_position)
+{
+ // Compute function
+ Tensor dst = compute_pixel_wise_multiplication(shape, dt, dt, dt, scale, convert_policy, rounding_policy, fixed_point_position);
+
+ // Compute reference
+ RawTensor ref_dst = Reference::compute_reference_fixed_point_pixel_wise_multiplication(shape, dt, dt, dt, scale, fixed_point_position, convert_policy, rounding_policy);
+
+ // Validate output
+ validate(Accessor(dst), ref_dst);
+}
+
+BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit"))
+BOOST_DATA_TEST_CASE(RunSmallScaleOther, SmallShapes() * DataType::QS16 *ConvertPolicies() * RoundingPolicy::TO_ZERO * boost::unit_test::data::xrange(1, 15),
+ shape, dt, convert_policy, rounding_policy, fixed_point_position)
+{
+ const float scale = 1.f / static_cast<float>(1 << fixed_point_position);
+
+ // Compute function
+ Tensor dst = compute_pixel_wise_multiplication(shape, dt, dt, dt, scale, convert_policy, rounding_policy, fixed_point_position);
+
+ // Compute reference
+ RawTensor ref_dst = Reference::compute_reference_fixed_point_pixel_wise_multiplication(shape, dt, dt, dt, scale, fixed_point_position, convert_policy, rounding_policy);
+
+ // Validate output
+ validate(Accessor(dst), ref_dst, 1.f);
+}
+
BOOST_TEST_DECORATOR(*boost::unit_test::label("nightly"))
BOOST_DATA_TEST_CASE(RunLarge, LargeShapes() * DataType::QS16 *ConvertPolicies() * RoundingPolicy::TO_ZERO * boost::unit_test::data::xrange<int>(1, 15),
shape, dt, convert_policy, rounding_policy, fixed_point_position)
diff --git a/tests/validation/TensorOperations.h b/tests/validation/TensorOperations.h
index 4d067ac748..319047816c 100644
--- a/tests/validation/TensorOperations.h
+++ b/tests/validation/TensorOperations.h
@@ -866,7 +866,7 @@ void pixel_wise_multiplication(const Tensor<T1> &in1, const Tensor<T2> &in2, Ten
// Fixed-point Pixel-wise Multiplication
template <typename T, typename = typename std::enable_if<std::is_integral<T>::value>::type>
-void fixed_point_pixel_wise_multiplication(const Tensor<T> &in1, const Tensor<T> &in2, Tensor<T> &out, int scale, ConvertPolicy convert_policy, RoundingPolicy rounding_policy)
+void fixed_point_pixel_wise_multiplication(const Tensor<T> &in1, const Tensor<T> &in2, Tensor<T> &out, float scale, ConvertPolicy convert_policy, RoundingPolicy rounding_policy)
{
using namespace fixed_point_arithmetic;
@@ -881,18 +881,20 @@ void fixed_point_pixel_wise_multiplication(const Tensor<T> &in1, const Tensor<T>
ARM_COMPUTE_ERROR_ON((in1.data_type() == DataType::QS8) && (fixed_point_position == 0 || fixed_point_position > 7));
ARM_COMPUTE_ERROR_ON((in1.data_type() == DataType::QS16) && (fixed_point_position == 0 || fixed_point_position > 15));
- fixed_point<T> fp_scale(scale, fixed_point_position);
- const bool is_sat = convert_policy == ConvertPolicy::SATURATE;
- const bool do_scaling = scale != 1;
+ const fixed_point<T> fp_scale(scale, fixed_point_position);
+ const bool is_sat = convert_policy == ConvertPolicy::SATURATE;
for(int i = 0; i < in1.num_elements(); ++i)
{
- fixed_point<T> val1(in1[i], fixed_point_position, true);
- fixed_point<T> val2(in2[i], fixed_point_position, true);
- fixed_point<T> res = (is_sat) ? val1 * val2 : mul<OverflowPolicy::WRAP>(val1, val2);
- if(do_scaling)
+ const fixed_point<T> val1(in1[i], fixed_point_position, true);
+ fixed_point<T> res(in2[i], fixed_point_position, true);
+ if(is_sat)
{
- res = (is_sat) ? res * fp_scale : mul<OverflowPolicy::WRAP>(res, fp_scale);
+ res = mul(mul(res, val1), fp_scale);
+ }
+ else
+ {
+ res = mul<OverflowPolicy::WRAP>(mul<OverflowPolicy::WRAP>(res, val1), fp_scale);
}
out[i] = res.raw();
}