aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMichele Di Giorgio <michele.digiorgio@arm.com>2019-10-01 12:25:49 +0100
committerMichele Di Giorgio <michele.digiorgio@arm.com>2019-10-01 17:26:16 +0000
commitd64a46c6dfa81ce4607fc3de57bc9d9ac7e01e4a (patch)
treee4b2a1e670a6002cd70e920ad7043c090b5d25f1
parent79f88e6d825402388bb79fc123ee2dfe01985bda (diff)
downloadComputeLibrary-d64a46c6dfa81ce4607fc3de57bc9d9ac7e01e4a.tar.gz
COMPMID-2699: Add support for QASYMM16 in NEQuantizationLayer
Change-Id: Icb968e37551a9048040e9aaff5329e874c53a2ee Signed-off-by: Michele Di Giorgio <michele.digiorgio@arm.com> Reviewed-on: https://review.mlplatform.org/c/2016 Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com>
-rw-r--r--arm_compute/core/NEON/NEAsymm.h34
-rw-r--r--arm_compute/core/NEON/kernels/NEQuantizationLayerKernel.h25
-rw-r--r--arm_compute/runtime/NEON/functions/NEQuantizationLayer.h4
-rw-r--r--src/core/NEON/kernels/NEQuantizationLayerKernel.cpp100
-rw-r--r--src/runtime/NEON/functions/NEQuantizationLayer.cpp5
-rw-r--r--tests/validation/NEON/QuantizationLayer.cpp41
6 files changed, 176 insertions, 33 deletions
diff --git a/arm_compute/core/NEON/NEAsymm.h b/arm_compute/core/NEON/NEAsymm.h
index f2d20d373a..56d4c09f92 100644
--- a/arm_compute/core/NEON/NEAsymm.h
+++ b/arm_compute/core/NEON/NEAsymm.h
@@ -331,6 +331,40 @@ inline uint8x16_t vquantize(const float32x4x4_t &qv, const UniformQuantizationIn
const uint8x8_t pb = vqmovun_s16(vcombine_s16(vqmovn_s32(rf.val[2]), vqmovn_s32(rf.val[3])));
return vcombine_u8(pa, pb);
}
+
+/** Quantize to QASYMM16 a neon vector holding 16 floating point values.
+ *
+ * @param[in] qv Input values to be quantized.
+ * @param[in] qi Quantization information to be used in the computation.
+ *
+ * @return A neon vector holding the quantized values
+ */
+inline uint16x8x2_t vquantize_qasymm16(const float32x4x4_t &qv, const UniformQuantizationInfo &qi)
+{
+ const float scale = qi.scale;
+ const int offset = qi.offset;
+ const float32x4_t voffset = vdupq_n_f32(offset);
+ const float32x4_t vinvscale = vdupq_n_f32(1.f / scale);
+ const int32x4x4_t rf =
+ {
+ {
+#ifdef __aarch64__
+ vcvtnq_s32_f32(vmlaq_f32(voffset, qv.val[0], vinvscale)),
+ vcvtnq_s32_f32(vmlaq_f32(voffset, qv.val[1], vinvscale)),
+ vcvtnq_s32_f32(vmlaq_f32(voffset, qv.val[2], vinvscale)),
+ vcvtnq_s32_f32(vmlaq_f32(voffset, qv.val[3], vinvscale)),
+#else //__aarch64__
+ vcvtq_s32_f32(vmlaq_f32(voffset, qv.val[0], vinvscale)),
+ vcvtq_s32_f32(vmlaq_f32(voffset, qv.val[1], vinvscale)),
+ vcvtq_s32_f32(vmlaq_f32(voffset, qv.val[2], vinvscale)),
+ vcvtq_s32_f32(vmlaq_f32(voffset, qv.val[3], vinvscale)),
+#endif //__aarch64__
+ }
+ };
+ const uint16x8_t pa = vcombine_u16(vqmovun_s32(rf.val[0]), vqmovun_s32(rf.val[1]));
+ const uint16x8_t pb = vcombine_u16(vqmovun_s32(rf.val[2]), vqmovun_s32(rf.val[3]));
+ return { pa, pb };
+}
} // namespace arm_compute
#include "arm_compute/core/NEON/NEAsymm.inl"
#endif // __ARM_COMPUTE_NEASYMM_H__
diff --git a/arm_compute/core/NEON/kernels/NEQuantizationLayerKernel.h b/arm_compute/core/NEON/kernels/NEQuantizationLayerKernel.h
index 391a72c6db..e1aaad5094 100644
--- a/arm_compute/core/NEON/kernels/NEQuantizationLayerKernel.h
+++ b/arm_compute/core/NEON/kernels/NEQuantizationLayerKernel.h
@@ -57,13 +57,15 @@ public:
/** Set the input, output.
*
* @param[in] input Source tensor. The dimensions over the third will be interpreted as batches. Data types supported: F32/F16.
- * @param[out] output Destination tensor with the same dimensions of input. Data types supported: QASYMM8.
+ * @param[out] output Destination tensor with the same dimensions of input. Data types supported: QASYMM8/QASYMM16.
+ *
+ * @note Output auto initialization is not supported by this kernel
*/
void configure(const ITensor *input, ITensor *output);
/** Static function to check if given info will lead to a valid configuration of @ref NEQuantizationLayerKernel
*
* @param[in] input Input tensor info. Data types supported: F32/F16.
- * @param[in] output Output tensor info. Data types supported: QASYMM8.
+ * @param[in] output Output tensor info. Data types supported: QASYMM8/QASYMM16.
*
* @return a status
*/
@@ -73,11 +75,28 @@ public:
void run(const Window &window, const ThreadInfo &info) override;
private:
+ /** Common signature for all the specialised @ref NEQuantizationLayerKernel functions
+ *
+ * @param[in] window Region on which to execute the kernel.
+ */
+ using QuantizationFunctionExecutorPtr = void (NEQuantizationLayerKernel::*)(const Window &window);
+ /** Function to apply QASYMM8 quantization on a tensor.
+ *
+ * @param[in] window Region on which to execute the kernel.
+ */
template <typename T>
- void quantize(const Window &window, const QuantizationInfo &qinfo);
+ void run_quantize_qasymm8(const Window &window);
+ /** Function to apply QASYMM16 quantization on a tensor.
+ *
+ * @param[in] window Region on which to execute the kernel.
+ */
+ template <typename T>
+ void run_quantize_qasymm16(const Window &window);
const ITensor *_input;
ITensor *_output;
+
+ QuantizationFunctionExecutorPtr _func;
};
} // namespace arm_compute
#endif /*__ARM_COMPUTE_NEQUANTIZATIONLAYERKERNEL_H__ */
diff --git a/arm_compute/runtime/NEON/functions/NEQuantizationLayer.h b/arm_compute/runtime/NEON/functions/NEQuantizationLayer.h
index 46a62bd903..25609324a0 100644
--- a/arm_compute/runtime/NEON/functions/NEQuantizationLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEQuantizationLayer.h
@@ -49,13 +49,13 @@ public:
/** Set the input and output tensors.
*
* @param[in] input Source tensor. The dimensions over the third will be interpreted as batches. Data types supported: F32/F16.
- * @param[out] output Destination tensor with the same dimensions of input. Data types supported: QASYMM8/QSYMM16
+ * @param[out] output Destination tensor with the same dimensions of input. Data types supported: QASYMM8/QASYMM16
*/
void configure(const ITensor *input, ITensor *output);
/** Static function to check if given info will lead to a valid configuration of @ref NEQuantizationLayer
*
* @param[in] input Input tensor info. The dimensions over the third will be interpreted as batches. Data types supported: F32/F16.
- * @param[in] output Output tensor info. Data types supported: QASYMM8/QSYMM16
+ * @param[in] output Output tensor info. Data types supported: QASYMM8/QASYMM16
*
* @return a status
*/
diff --git a/src/core/NEON/kernels/NEQuantizationLayerKernel.cpp b/src/core/NEON/kernels/NEQuantizationLayerKernel.cpp
index 0aa34cd411..6a9c4ae14c 100644
--- a/src/core/NEON/kernels/NEQuantizationLayerKernel.cpp
+++ b/src/core/NEON/kernels/NEQuantizationLayerKernel.cpp
@@ -34,9 +34,10 @@
#include "arm_compute/core/CPP/Validate.h"
#include <arm_neon.h>
+#include <map>
-using namespace arm_compute;
-
+namespace arm_compute
+{
namespace
{
Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output)
@@ -45,7 +46,7 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output)
ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input);
ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32);
ARM_COMPUTE_RETURN_ERROR_ON(output->tensor_shape().total_size() == 0);
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::QASYMM8);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::QASYMM8, DataType::QASYMM16);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input, output);
return Status{};
@@ -71,7 +72,7 @@ inline const float32x4x4_t load_value(const float16_t *input_ptr)
} // namespace
NEQuantizationLayerKernel::NEQuantizationLayerKernel()
- : _input(nullptr), _output(nullptr)
+ : _input(nullptr), _output(nullptr), _func(nullptr)
{
}
@@ -83,6 +84,33 @@ void NEQuantizationLayerKernel::configure(const ITensor *input, ITensor *output)
_input = input;
_output = output;
+ static std::map<DataType, QuantizationFunctionExecutorPtr> quant_map_f32 =
+ {
+ { DataType::QASYMM8, &NEQuantizationLayerKernel::run_quantize_qasymm8<float> },
+ { DataType::QASYMM16, &NEQuantizationLayerKernel::run_quantize_qasymm16<float> },
+ };
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+ static std::map<DataType, QuantizationFunctionExecutorPtr> quant_map_f16 =
+ {
+ { DataType::QASYMM8, &NEQuantizationLayerKernel::run_quantize_qasymm8<float16_t> },
+ { DataType::QASYMM16, &NEQuantizationLayerKernel::run_quantize_qasymm16<float16_t> },
+ };
+#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC*/
+
+ switch(input->info()->data_type())
+ {
+ case DataType::F32:
+ _func = quant_map_f32[output->info()->data_type()];
+ break;
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+ case DataType::F16:
+ _func = quant_map_f16[output->info()->data_type()];
+ break;
+#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
+ default:
+ ARM_COMPUTE_ERROR("Unsupported input data type.");
+ }
+
// Configure kernel window
Window win_config = calculate_max_window(*input->info(), Steps());
@@ -96,18 +124,17 @@ void NEQuantizationLayerKernel::configure(const ITensor *input, ITensor *output)
Status NEQuantizationLayerKernel::validate(const ITensorInfo *input, const ITensorInfo *output)
{
ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output));
-
return Status{};
}
template <typename T>
-void NEQuantizationLayerKernel::quantize(const Window &window, const QuantizationInfo &qinfo)
+void NEQuantizationLayerKernel::run_quantize_qasymm8(const Window &window)
{
constexpr auto window_step = 16;
const auto window_start_x = static_cast<int>(window.x().start());
const auto window_end_x = static_cast<int>(window.x().end());
- const UniformQuantizationInfo uqinfo = qinfo.uniform();
+ const UniformQuantizationInfo uqinfo = _output->info()->quantization_info().uniform();
#ifdef __aarch64__
constexpr RoundingPolicy rounding_policy = RoundingPolicy::TO_NEAREST_EVEN;
#else //__aarch64__
@@ -139,25 +166,54 @@ void NEQuantizationLayerKernel::quantize(const Window &window, const Quantizatio
input, output);
}
+template <typename T>
+void NEQuantizationLayerKernel::run_quantize_qasymm16(const Window &window)
+{
+ constexpr auto window_step = 16;
+ const auto window_start_x = static_cast<int>(window.x().start());
+ const auto window_end_x = static_cast<int>(window.x().end());
+
+ const UniformQuantizationInfo uqinfo = _output->info()->quantization_info().uniform();
+#ifdef __aarch64__
+ constexpr RoundingPolicy rounding_policy = RoundingPolicy::TO_NEAREST_EVEN;
+#else //__aarch64__
+ constexpr RoundingPolicy rounding_policy = RoundingPolicy::TO_ZERO;
+#endif //__aarch64__
+
+ // Collapse window and reset first dimension to handle tail calculations manually
+ Window win_collapsed = window.collapse_if_possible(window, Window::DimZ);
+ win_collapsed.set(Window::DimX, Window::Dimension(0, 1, 1));
+
+ Iterator input(_input, win_collapsed);
+ Iterator output(_output, win_collapsed);
+ execute_window_loop(win_collapsed, [&](const Coordinates &)
+ {
+ auto input_ptr = reinterpret_cast<const T *>(input.ptr());
+ auto output_ptr = reinterpret_cast<uint16_t *>(output.ptr());
+
+ int x = window_start_x;
+ for(; x <= (window_end_x - window_step); x += window_step)
+ {
+ uint16x8x2_t tmp = vquantize_qasymm16(load_value(&input_ptr[x]), uqinfo);
+ vst1q_u16(&output_ptr[x], tmp.val[0]);
+ vst1q_u16(&output_ptr[x + 8], tmp.val[1]);
+ }
+ // Compute left-over elements
+ for(; x < window_end_x; ++x)
+ {
+ output_ptr[x] = quantize_qasymm16(input_ptr[x], uqinfo, rounding_policy);
+ }
+ },
+ input, output);
+}
+
void NEQuantizationLayerKernel::run(const Window &window, const ThreadInfo &info)
{
ARM_COMPUTE_UNUSED(info);
ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window);
+ ARM_COMPUTE_ERROR_ON(_func == nullptr);
- const QuantizationInfo &qinfo = _output->info()->quantization_info();
-
- switch(_input->info()->data_type())
- {
- case DataType::F32:
- NEQuantizationLayerKernel::quantize<float>(window, qinfo);
- break;
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
- case DataType::F16:
- NEQuantizationLayerKernel::quantize<float16_t>(window, qinfo);
- break;
-#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
- default:
- ARM_COMPUTE_ERROR("Unsupported data type.");
- }
+ (this->*_func)(window);
}
+} // namespace arm_compute
diff --git a/src/runtime/NEON/functions/NEQuantizationLayer.cpp b/src/runtime/NEON/functions/NEQuantizationLayer.cpp
index 65873b1b14..4464978762 100644
--- a/src/runtime/NEON/functions/NEQuantizationLayer.cpp
+++ b/src/runtime/NEON/functions/NEQuantizationLayer.cpp
@@ -27,8 +27,8 @@
#include "arm_compute/core/Types.h"
#include "arm_compute/core/Validate.h"
-using namespace arm_compute;
-
+namespace arm_compute
+{
Status NEQuantizationLayer::validate(const ITensorInfo *input, const ITensorInfo *output)
{
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output);
@@ -46,3 +46,4 @@ void NEQuantizationLayer::configure(const ITensor *input, ITensor *output)
k->configure(input, output);
_kernel = std::move(k);
}
+} // namespace arm_compute
diff --git a/tests/validation/NEON/QuantizationLayer.cpp b/tests/validation/NEON/QuantizationLayer.cpp
index 8d19c93761..49118f7dc5 100644
--- a/tests/validation/NEON/QuantizationLayer.cpp
+++ b/tests/validation/NEON/QuantizationLayer.cpp
@@ -43,7 +43,8 @@ namespace validation
namespace
{
/** Tolerance for quantization */
-constexpr AbsoluteTolerance<uint8_t> tolerance_u8(1);
+constexpr AbsoluteTolerance<uint8_t> tolerance_u8(1);
+constexpr AbsoluteTolerance<uint16_t> tolerance_u16(1);
const auto QuantizationSmallShapes = concat(datasets::Small3DShapes(), datasets::Small4DShapes());
const auto QuantizationLargeShapes = concat(datasets::Large3DShapes(), datasets::Large4DShapes());
@@ -98,6 +99,8 @@ DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(QuantizationS
template <typename T>
using NEQuantizationLayerQASYMM8Fixture = QuantizationValidationFixture<Tensor, Accessor, NEQuantizationLayer, T, uint8_t>;
+template <typename T>
+using NEQuantizationLayerQASYMM16Fixture = QuantizationValidationFixture<Tensor, Accessor, NEQuantizationLayer, T, uint16_t>;
TEST_SUITE(Float)
TEST_SUITE(FP32)
@@ -109,6 +112,14 @@ FIXTURE_DATA_TEST_CASE(RunSmallQASYMM8, NEQuantizationLayerQASYMM8Fixture<float>
// Validate output
validate(Accessor(_target), _reference, tolerance_u8);
}
+FIXTURE_DATA_TEST_CASE(RunSmallQASYMM16, NEQuantizationLayerQASYMM16Fixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(QuantizationSmallShapes,
+ framework::dataset::make("DataType", DataType::F32)),
+ framework::dataset::make("DataTypeOut", { DataType::QASYMM16 })),
+ framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.5f, 10) })))
+{
+ // Validate output
+ validate(Accessor(_target), _reference, tolerance_u16);
+}
FIXTURE_DATA_TEST_CASE(RunLargeQASYMM8, NEQuantizationLayerQASYMM8Fixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(combine(QuantizationLargeShapes,
framework::dataset::make("DataType", DataType::F32)),
framework::dataset::make("DataTypeOut", { DataType::QASYMM8 })),
@@ -117,10 +128,16 @@ FIXTURE_DATA_TEST_CASE(RunLargeQASYMM8, NEQuantizationLayerQASYMM8Fixture<float>
// Validate output
validate(Accessor(_target), _reference, tolerance_u8);
}
+FIXTURE_DATA_TEST_CASE(RunLargeQASYMM16, NEQuantizationLayerQASYMM16Fixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(combine(QuantizationLargeShapes,
+ framework::dataset::make("DataType", DataType::F32)),
+ framework::dataset::make("DataTypeOut", { DataType::QASYMM16 })),
+ framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.5f, 10) })))
+{
+ // Validate output
+ validate(Accessor(_target), _reference, tolerance_u16);
+}
TEST_SUITE_END() // FP32
-TEST_SUITE_END() // Float
#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-TEST_SUITE(Half)
TEST_SUITE(FP16)
FIXTURE_DATA_TEST_CASE(RunSmallQASYMM8, NEQuantizationLayerQASYMM8Fixture<half>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(QuantizationSmallShapes,
framework::dataset::make("DataType", DataType::F16)),
@@ -130,6 +147,14 @@ FIXTURE_DATA_TEST_CASE(RunSmallQASYMM8, NEQuantizationLayerQASYMM8Fixture<half>,
// Validate output
validate(Accessor(_target), _reference, tolerance_u8);
}
+FIXTURE_DATA_TEST_CASE(RunSmallQASYMM16, NEQuantizationLayerQASYMM16Fixture<half>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(QuantizationSmallShapes,
+ framework::dataset::make("DataType", DataType::F16)),
+ framework::dataset::make("DataTypeOut", { DataType::QASYMM16 })),
+ framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.5f, 10) })))
+{
+ // Validate output
+ validate(Accessor(_target), _reference, tolerance_u16);
+}
FIXTURE_DATA_TEST_CASE(RunLargeQASYMM8, NEQuantizationLayerQASYMM8Fixture<half>, framework::DatasetMode::NIGHTLY, combine(combine(combine(QuantizationLargeShapes,
framework::dataset::make("DataType", DataType::F16)),
framework::dataset::make("DataTypeOut", { DataType::QASYMM8 })),
@@ -138,9 +163,17 @@ FIXTURE_DATA_TEST_CASE(RunLargeQASYMM8, NEQuantizationLayerQASYMM8Fixture<half>,
// Validate output
validate(Accessor(_target), _reference, tolerance_u8);
}
+FIXTURE_DATA_TEST_CASE(RunLargeQASYMM16, NEQuantizationLayerQASYMM16Fixture<half>, framework::DatasetMode::NIGHTLY, combine(combine(combine(QuantizationLargeShapes,
+ framework::dataset::make("DataType", DataType::F16)),
+ framework::dataset::make("DataTypeOut", { DataType::QASYMM16 })),
+ framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.5f, 10) })))
+{
+ // Validate output
+ validate(Accessor(_target), _reference, tolerance_u16);
+}
TEST_SUITE_END() // FP16
-TEST_SUITE_END() // Half
#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+TEST_SUITE_END() // Float
TEST_SUITE_END() // QuantizationLayer
TEST_SUITE_END() // NEON