aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSang-Hoon Park <sang-hoon.park@arm.com>2019-12-04 09:46:28 +0000
committerMichele Di Giorgio <michele.digiorgio@arm.com>2019-12-20 09:56:36 +0000
commitd817647a4fabc8eccd0e64f54465e378a4239b32 (patch)
tree361ba03c78d83974cf7396510c85feb6dd3159e6
parente7be8a072967f9ae547468a7625e11477ea32221 (diff)
downloadComputeLibrary-d817647a4fabc8eccd0e64f54465e378a4239b32.tar.gz
COMPMID-2767 [NEON] add support for QASYMM8_SIGNED to DequantizationLayer
Change-Id: If5b21d1e656b21baf39346c2fd74e8edc75007f5 Signed-off-by: Sang-Hoon Park <sang-hoon.park@arm.com> Reviewed-on: https://review.mlplatform.org/c/2429 Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Michele Di Giorgio <michele.digiorgio@arm.com>
-rw-r--r--arm_compute/core/NEON/NEAsymm.h24
-rw-r--r--src/core/NEON/kernels/NEDequantizationLayerKernel.cpp19
-rw-r--r--tests/AssetsLibrary.h1
-rw-r--r--tests/validation/NEON/DequantizationLayer.cpp26
-rw-r--r--tests/validation/fixtures/DequantizationLayerFixture.h3
-rw-r--r--tests/validation/reference/DequantizationLayer.cpp21
6 files changed, 75 insertions, 19 deletions
diff --git a/arm_compute/core/NEON/NEAsymm.h b/arm_compute/core/NEON/NEAsymm.h
index 234d48882c..67adcef9b1 100644
--- a/arm_compute/core/NEON/NEAsymm.h
+++ b/arm_compute/core/NEON/NEAsymm.h
@@ -451,6 +451,30 @@ inline float32x4x4_t vdequantize(const uint8x16_t &qv, float scale, int32_t offs
return vdequantized_input;
}
+/** Dequantize a vector of 16 values stored as signed asymmetric.
+ *
+ * @param[in] qv Input values to be dequantized.
+ * @param[in] scale Quantization scaling factor.
+ * @param[in] offset Zero quantization offset.
+ *
+ * @return Dequantized values in a neon vector
+ */
+inline float32x4x4_t vdequantize(const int8x16_t &qv, float scale, int32_t offset)
+{
+ const int32x4_t voffset = vdupq_n_s32(offset);
+ const float32x4_t vscale = vdupq_n_f32(scale);
+ const float32x4x4_t vdequantized_input =
+ {
+ {
+ vmulq_f32(vcvtq_f32_s32(vsubq_s32(vmovl_s16(vget_low_s16(vmovl_s8(vget_low_s8(qv)))), voffset)), vscale),
+ vmulq_f32(vcvtq_f32_s32(vsubq_s32(vmovl_s16(vget_high_s16(vmovl_s8(vget_low_s8(qv)))), voffset)), vscale),
+ vmulq_f32(vcvtq_f32_s32(vsubq_s32(vmovl_s16(vget_low_s16(vmovl_s8(vget_high_s8(qv)))), voffset)), vscale),
+ vmulq_f32(vcvtq_f32_s32(vsubq_s32(vmovl_s16(vget_high_s16(vmovl_s8(vget_high_s8(qv)))), voffset)), vscale),
+ }
+ };
+ return vdequantized_input;
+}
+
/** Dequantize following symmetric quantization scheme a neon vector holding 16 quantized values.
*
* @param[in] qv Input values to be dequantized.
diff --git a/src/core/NEON/kernels/NEDequantizationLayerKernel.cpp b/src/core/NEON/kernels/NEDequantizationLayerKernel.cpp
index f555df3828..947f257bcb 100644
--- a/src/core/NEON/kernels/NEDequantizationLayerKernel.cpp
+++ b/src/core/NEON/kernels/NEDequantizationLayerKernel.cpp
@@ -43,7 +43,7 @@ namespace
Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output)
{
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output);
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::QSYMM8_PER_CHANNEL, DataType::QSYMM8, DataType::QSYMM16);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::QSYMM8_PER_CHANNEL, DataType::QSYMM8, DataType::QSYMM16);
if(output->tensor_shape().total_size() > 0)
{
@@ -116,7 +116,7 @@ inline void store_result<float16_t>(float16_t *ptr, const float32x4x2_t &v)
}
#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
-template <typename T>
+template <typename TOut, typename TIn>
void run_dequantization_qasymm8(const ITensor *input, ITensor *output, const Window &window)
{
const UniformQuantizationInfo &qinfo = input->info()->quantization_info().uniform();
@@ -137,8 +137,8 @@ void run_dequantization_qasymm8(const ITensor *input, ITensor *output, const Win
execute_window_loop(win_collapsed, [&](const Coordinates &)
{
- const auto in_ptr = reinterpret_cast<const uint8_t *>(in.ptr());
- const auto out_ptr = reinterpret_cast<T *>(out.ptr());
+ const auto in_ptr = reinterpret_cast<const TIn *>(in.ptr());
+ const auto out_ptr = reinterpret_cast<TOut *>(out.ptr());
int x = window_start_x;
for(; x <= (window_end_x - window_step_x); x += window_step_x)
@@ -146,14 +146,14 @@ void run_dequantization_qasymm8(const ITensor *input, ITensor *output, const Win
const auto vin = wrapper::vloadq(in_ptr + x);
const auto vdeq = vdequantize(vin, scale, offset);
- store_result<T>(reinterpret_cast<T *>(out_ptr + x), vdeq);
+ store_result(reinterpret_cast<TOut *>(out_ptr + x), vdeq);
}
// Compute left-over elements
for(; x < window_end_x; ++x)
{
- uint8_t val = *(in_ptr + x);
- *(out_ptr + x) = static_cast<T>(dequantize(val, scale, offset));
+ auto val = *(in_ptr + x);
+ *(out_ptr + x) = static_cast<TOut>(Qasymm8QuantizationHelper<TIn>::dequantize(val, qinfo));
}
},
in, out);
@@ -340,7 +340,10 @@ void run_dequantization_core(const ITensor *input, ITensor *output, const Window
switch(input->info()->data_type())
{
case DataType::QASYMM8:
- run_dequantization_qasymm8<T>(input, output, window);
+ run_dequantization_qasymm8<T, uint8_t>(input, output, window);
+ break;
+ case DataType::QASYMM8_SIGNED:
+ run_dequantization_qasymm8<T, int8_t>(input, output, window);
break;
case DataType::QSYMM8_PER_CHANNEL:
input->info()->data_layout() == DataLayout::NHWC ? run_dequantization_qsymm8_per_channel_nhwc<T>(input, output, window) : run_dequantization_qsymm8_per_channel_nchw<T>(input, output, window);
diff --git a/tests/AssetsLibrary.h b/tests/AssetsLibrary.h
index 29d9cc6d49..c4892748f4 100644
--- a/tests/AssetsLibrary.h
+++ b/tests/AssetsLibrary.h
@@ -846,6 +846,7 @@ void AssetsLibrary::fill_tensor_uniform(T &&tensor, std::random_device::result_t
}
case DataType::S8:
case DataType::QSYMM8:
+ case DataType::QASYMM8_SIGNED:
{
ARM_COMPUTE_ERROR_ON(!(std::is_same<int8_t, D>::value));
std::uniform_int_distribution<int8_t> distribution_s8(low, high);
diff --git a/tests/validation/NEON/DequantizationLayer.cpp b/tests/validation/NEON/DequantizationLayer.cpp
index 0dce76a933..4389419d73 100644
--- a/tests/validation/NEON/DequantizationLayer.cpp
+++ b/tests/validation/NEON/DequantizationLayer.cpp
@@ -55,6 +55,14 @@ const auto dataset_quant_f32 = combine(combine(combine(datasets::SmallShapes(),
const auto dataset_quant_f16 = combine(combine(combine(datasets::SmallShapes(), datasets::QuantizedTypes()),
framework::dataset::make("DataType", DataType::F16)),
framework::dataset::make("DataLayout", { DataLayout::NCHW }));
+const auto dataset_quant_asymm_signed_f32 = combine(combine(combine(datasets::SmallShapes(),
+ framework::dataset::make("QuantizedTypes", { DataType::QASYMM8_SIGNED })),
+ framework::dataset::make("DataType", DataType::F32)),
+ framework::dataset::make("DataLayout", { DataLayout::NCHW }));
+const auto dataset_quant_asymm_signed_f16 = combine(combine(combine(datasets::SmallShapes(),
+ framework::dataset::make("QuantizedTypes", { DataType::QASYMM8_SIGNED })),
+ framework::dataset::make("DataType", DataType::F16)),
+ framework::dataset::make("DataLayout", { DataLayout::NCHW }));
const auto dataset_quant_per_channel_f32 = combine(combine(combine(datasets::SmallShapes(), datasets::QuantizedPerChannelTypes()),
framework::dataset::make("DataType", DataType::F32)),
framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }));
@@ -73,6 +81,12 @@ const auto dataset_quant_per_channel_nightly_f32 = combine(combine(combine(datas
const auto dataset_quant_per_channel_nightly_f16 = combine(combine(combine(datasets::LargeShapes(), datasets::QuantizedPerChannelTypes()),
framework::dataset::make("DataType", DataType::F16)),
framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }));
+
+const auto dataset_precommit_f16 = concat(concat(dataset_quant_f16, dataset_quant_per_channel_f16), dataset_quant_asymm_signed_f16);
+const auto dataset_precommit_f32 = concat(concat(dataset_quant_f32, dataset_quant_per_channel_f32), dataset_quant_asymm_signed_f32);
+const auto dataset_nightly_f16 = concat(dataset_quant_f16, dataset_quant_per_channel_f16);
+const auto dataset_nightly_f32 = concat(dataset_quant_f32, dataset_quant_per_channel_f32);
+
} // namespace
TEST_SUITE(NEON)
@@ -86,14 +100,16 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(
TensorInfo(TensorShape(16U, 16U, 2U, 5U), 1, DataType::QASYMM8), // Missmatching shapes
TensorInfo(TensorShape(17U, 16U, 16U, 5U), 1, DataType::QASYMM8), // Valid
TensorInfo(TensorShape(16U, 16U, 16U, 5U), 1, DataType::QASYMM8), // Valid
+ TensorInfo(TensorShape(16U, 16U, 16U, 5U), 1, DataType::QASYMM8_SIGNED), // Valid
}),
framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(16U, 16U, 16U, 5U), 1, DataType::F32),
TensorInfo(TensorShape(16U, 16U, 16U, 5U), 1, DataType::U8),
TensorInfo(TensorShape(16U, 16U, 16U, 5U), 1, DataType::F32),
TensorInfo(TensorShape(17U, 16U, 16U, 5U), 1, DataType::F32),
TensorInfo(TensorShape(16U, 16U, 16U, 5U), 1, DataType::F32),
+ TensorInfo(TensorShape(16U, 16U, 16U, 5U), 1, DataType::F32),
})),
- framework::dataset::make("Expected", { false, false, false, true, true})),
+ framework::dataset::make("Expected", { false, false, false, true, true, true })),
input_info, output_info, expected)
{
ARM_COMPUTE_EXPECT(bool(NEDequantizationLayer::validate(&input_info.clone()->set_is_resizable(false), &output_info.clone()->set_is_resizable(false))) == expected, framework::LogLevel::ERRORS);
@@ -132,12 +148,12 @@ using NEDequantizationLayerFixture = DequantizationValidationFixture<Tensor, Acc
#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
TEST_SUITE(FP16)
-FIXTURE_DATA_TEST_CASE(RunSmall, NEDequantizationLayerFixture<half>, framework::DatasetMode::PRECOMMIT, concat(dataset_quant_f16, dataset_quant_per_channel_f16))
+FIXTURE_DATA_TEST_CASE(RunSmall, NEDequantizationLayerFixture<half>, framework::DatasetMode::PRECOMMIT, dataset_precommit_f16)
{
// Validate output
validate(Accessor(_target), _reference);
}
-FIXTURE_DATA_TEST_CASE(RunLarge, NEDequantizationLayerFixture<half>, framework::DatasetMode::NIGHTLY, concat(dataset_quant_nightly_f16, dataset_quant_per_channel_nightly_f16))
+FIXTURE_DATA_TEST_CASE(RunLarge, NEDequantizationLayerFixture<half>, framework::DatasetMode::NIGHTLY, dataset_nightly_f16)
{
// Validate output
validate(Accessor(_target), _reference);
@@ -146,12 +162,12 @@ TEST_SUITE_END() // FP16
#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
TEST_SUITE(FP32)
-FIXTURE_DATA_TEST_CASE(RunSmall, NEDequantizationLayerFixture<float>, framework::DatasetMode::PRECOMMIT, concat(dataset_quant_f32, dataset_quant_per_channel_f32))
+FIXTURE_DATA_TEST_CASE(RunSmall, NEDequantizationLayerFixture<float>, framework::DatasetMode::PRECOMMIT, dataset_precommit_f32)
{
// Validate output
validate(Accessor(_target), _reference);
}
-FIXTURE_DATA_TEST_CASE(RunLarge, NEDequantizationLayerFixture<float>, framework::DatasetMode::NIGHTLY, concat(dataset_quant_nightly_f32, dataset_quant_per_channel_nightly_f32))
+FIXTURE_DATA_TEST_CASE(RunLarge, NEDequantizationLayerFixture<float>, framework::DatasetMode::NIGHTLY, dataset_nightly_f32)
{
// Validate output
validate(Accessor(_target), _reference);
diff --git a/tests/validation/fixtures/DequantizationLayerFixture.h b/tests/validation/fixtures/DequantizationLayerFixture.h
index f44f8658c2..3699613a39 100644
--- a/tests/validation/fixtures/DequantizationLayerFixture.h
+++ b/tests/validation/fixtures/DequantizationLayerFixture.h
@@ -106,6 +106,7 @@ protected:
fill(src);
return reference::dequantization_layer<T>(src);
}
+ case DataType::QASYMM8_SIGNED:
case DataType::QSYMM8_PER_CHANNEL:
case DataType::QSYMM8:
{
@@ -149,6 +150,8 @@ protected:
}
case DataType::QASYMM8:
return QuantizationInfo(1.f / distribution_scale_q8(gen), distribution_offset_q8(gen));
+ case DataType::QASYMM8_SIGNED:
+ return QuantizationInfo(1.f / distribution_scale_q8(gen), -distribution_offset_q8(gen));
default:
ARM_COMPUTE_ERROR("Unsupported data type");
}
diff --git a/tests/validation/reference/DequantizationLayer.cpp b/tests/validation/reference/DequantizationLayer.cpp
index 16f25c4427..7dd36402b3 100644
--- a/tests/validation/reference/DequantizationLayer.cpp
+++ b/tests/validation/reference/DequantizationLayer.cpp
@@ -36,18 +36,27 @@ namespace reference
namespace
{
template <typename TOut>
-TOut dequantize(int8_t val, const UniformQuantizationInfo qinfo)
+TOut dequantize(int8_t val, const UniformQuantizationInfo qinfo, DataType dt)
{
- return static_cast<TOut>(dequantize_qsymm8(val, qinfo));
+ if(dt == DataType::QSYMM8 || dt == DataType::QSYMM8_PER_CHANNEL)
+ {
+ return static_cast<TOut>(dequantize_qsymm8(val, qinfo));
+ }
+ else
+ {
+ return static_cast<TOut>(dequantize_qasymm8_signed(val, qinfo));
+ }
}
template <typename TOut>
-TOut dequantize(uint8_t val, const UniformQuantizationInfo qinfo)
+TOut dequantize(uint8_t val, const UniformQuantizationInfo qinfo, DataType dt)
{
+ ARM_COMPUTE_UNUSED(dt);
return static_cast<TOut>(dequantize_qasymm8(val, qinfo));
}
template <typename TOut>
-TOut dequantize(int16_t val, const UniformQuantizationInfo qinfo)
+TOut dequantize(int16_t val, const UniformQuantizationInfo qinfo, DataType dt)
{
+ ARM_COMPUTE_UNUSED(dt);
return static_cast<TOut>(dequantize_qsymm16(val, qinfo));
}
} // namespace
@@ -77,7 +86,7 @@ SimpleTensor<TOut> dequantization_layer(const SimpleTensor<TIn> &src)
// Dequantize slice
for(int s = 0; s < WH; ++s)
{
- dst[idx + s] = dequantize<TOut>(static_cast<TIn>(src[idx + s]), channel_qinfo);
+ dst[idx + s] = dequantize<TOut>(static_cast<TIn>(src[idx + s]), channel_qinfo, src_data_type);
}
}
}
@@ -89,7 +98,7 @@ SimpleTensor<TOut> dequantization_layer(const SimpleTensor<TIn> &src)
for(int i = 0; i < src.num_elements(); ++i)
{
- dst[i] = static_cast<TOut>(dequantize<TOut>(static_cast<TIn>(src[i]), quantization_info));
+ dst[i] = static_cast<TOut>(dequantize<TOut>(static_cast<TIn>(src[i]), quantization_info, src_data_type));
}
}