aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--arm_compute/core/NEON/kernels/NEArithmeticAdditionKernel.h12
-rw-r--r--src/core/NEON/kernels/NEArithmeticAdditionKernel.cpp52
-rw-r--r--tests/validation/NEON/ArithmeticAddition.cpp16
3 files changed, 71 insertions, 9 deletions
diff --git a/arm_compute/core/NEON/kernels/NEArithmeticAdditionKernel.h b/arm_compute/core/NEON/kernels/NEArithmeticAdditionKernel.h
index b36ca46e1a..9bfdde1616 100644
--- a/arm_compute/core/NEON/kernels/NEArithmeticAdditionKernel.h
+++ b/arm_compute/core/NEON/kernels/NEArithmeticAdditionKernel.h
@@ -50,9 +50,9 @@ public:
/** Initialise the kernel's input, output and border mode.
*
- * @param[in] input1 An input tensor. Data types supported: U8/S16/F32
- * @param[in] input2 An input tensor. Data types supported: U8/S16/F32 (only if @p input1 is F32).
- * @param[out] output The output tensor. Data types supported: U8 (Only if both inputs are U8), S16/F32 (only if both inputs are F32).
+ * @param[in] input1 An input tensor. Data types supported: U8/S16/F16/F32
+ * @param[in] input2 An input tensor. Data types supported: U8/S16/F16 (only if @p input1 is F16)/F32 (only if @p input1 is F32).
+ * @param[out] output The output tensor. Data types supported: U8 (Only if both inputs are U8), S16/F16 (only if both inputs are F16), F32 (only if both inputs are F32).
* @param[in] policy Overflow policy.
*/
void configure(const ITensor *input1, const ITensor *input2, ITensor *output, ConvertPolicy policy);
@@ -63,9 +63,9 @@ public:
private:
/** Common signature for all the specialised add functions
*
- * @param[in] input1 An input tensor. Data types supported: U8/S16/F32.
- * @param[in] input2 An input tensor. Data types supported: U8/S16/F32 (only if @p input1 is F32).
- * @param[out] output The output tensor. Data types supported: U8 (Only if both inputs are U8), S16/F32 (only if both inputs are F32).
+ * @param[in] input1 An input tensor. Data types supported: U8/S16/F16/F32
+ * @param[in] input2 An input tensor. Data types supported: U8/S16/F16 (only if @p input1 is F16)/F32 (only if @p input1 is F32).
+ * @param[out] output The output tensor. Data types supported: U8 (Only if both inputs are U8), S16/F16 (only if both inputs are F16), F32 (only if both inputs are F32).
* @param[in] window Region on which to execute the kernel.
*/
using AddFunction = void(const ITensor *input1, const ITensor *input2, ITensor *output, const Window &window);
diff --git a/src/core/NEON/kernels/NEArithmeticAdditionKernel.cpp b/src/core/NEON/kernels/NEArithmeticAdditionKernel.cpp
index 60b8006d21..c0809eb9fa 100644
--- a/src/core/NEON/kernels/NEArithmeticAdditionKernel.cpp
+++ b/src/core/NEON/kernels/NEArithmeticAdditionKernel.cpp
@@ -112,6 +112,45 @@ inline int16x8x2_t vqadd2q_s16(const int16x8x2_t &a, const int16x8x2_t &b)
return res;
}
+#ifdef ARM_COMPUTE_ENABLE_FP16
+inline float16x8x2_t vadd2q_f16(const float16x8x2_t &a, const float16x8x2_t &b)
+{
+ const float16x8x2_t res =
+ {
+ {
+ vaddq_f16(a.val[0], b.val[0]),
+ vaddq_f16(a.val[1], b.val[1])
+ }
+ };
+
+ return res;
+}
+#endif /* ARM_COMPUTE_ENABLE_FP16 */
+
+void add_F16_F16_F16(const ITensor *in1, const ITensor *in2, ITensor *out, const Window &window)
+{
+#ifdef ARM_COMPUTE_ENABLE_FP16
+ Iterator input1(in1, window);
+ Iterator input2(in2, window);
+ Iterator output(out, window);
+
+ execute_window_loop(window, [&](const Coordinates & id)
+ {
+ const float16x8x2_t a = vld2q_f16(reinterpret_cast<const float16_t *>(input1.ptr()));
+ const float16x8x2_t b = vld2q_f16(reinterpret_cast<const float16_t *>(input2.ptr()));
+
+ vst2q_f16(reinterpret_cast<float16_t *>(output.ptr()), vadd2q_f16(a, b));
+ },
+ input1, input2, output);
+#else /* ARM_COMPUTE_ENABLE_FP16 */
+ ARM_COMPUTE_UNUSED(in1);
+ ARM_COMPUTE_UNUSED(in2);
+ ARM_COMPUTE_UNUSED(out);
+ ARM_COMPUTE_UNUSED(window);
+ ARM_COMPUTE_ERROR("Not supported, recompile the library with arch=arm64-v8.2-a");
+#endif /* ARM_COMPUTE_ENABLE_FP16 */
+}
+
void add_F32_F32_F32(const ITensor *in1, const ITensor *in2, ITensor *out, const Window &window)
{
Iterator input1(in1, window);
@@ -302,6 +341,10 @@ void NEArithmeticAdditionKernel::configure(const ITensor *input1, const ITensor
{
set_format_if_unknown(*output->info(), Format::S16);
}
+ else if(input1->info()->data_type() == DataType::F16 || input2->info()->data_type() == DataType::F16)
+ {
+ set_format_if_unknown(*output->info(), Format::F16);
+ }
else if(input1->info()->data_type() == DataType::F32 || input2->info()->data_type() == DataType::F32)
{
set_format_if_unknown(*output->info(), Format::F32);
@@ -309,9 +352,9 @@ void NEArithmeticAdditionKernel::configure(const ITensor *input1, const ITensor
}
ARM_COMPUTE_ERROR_ON_MISMATCHING_SHAPES(input1, input2, output);
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input1, 1, DataType::U8, DataType::S16, DataType::F32);
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input2, 1, DataType::U8, DataType::S16, DataType::F32);
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8, DataType::S16, DataType::F32);
+ ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input1, 1, DataType::U8, DataType::S16, DataType::F16, DataType::F32);
+ ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input2, 1, DataType::U8, DataType::S16, DataType::F16, DataType::F32);
+ ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8, DataType::S16, DataType::F16, DataType::F32);
ARM_COMPUTE_ERROR_ON_MSG(output->info()->data_type() == DataType::U8 && (input1->info()->data_type() != DataType::U8 || input2->info()->data_type() != DataType::U8),
"Output can only be U8 if both inputs are U8");
@@ -329,6 +372,9 @@ void NEArithmeticAdditionKernel::configure(const ITensor *input1, const ITensor
{ "add_saturate_S16_S16_S16", &add_saturate_S16_S16_S16 },
{ "add_wrap_F32_F32_F32", &add_F32_F32_F32 },
{ "add_saturate_F32_F32_F32", &add_F32_F32_F32 },
+ { "add_wrap_F16_F16_F16", &add_F16_F16_F16 },
+ { "add_saturate_F16_F16_F16", &add_F16_F16_F16 },
+
};
_input1 = input1;
diff --git a/tests/validation/NEON/ArithmeticAddition.cpp b/tests/validation/NEON/ArithmeticAddition.cpp
index def203b95f..65c3295e18 100644
--- a/tests/validation/NEON/ArithmeticAddition.cpp
+++ b/tests/validation/NEON/ArithmeticAddition.cpp
@@ -184,6 +184,22 @@ BOOST_DATA_TEST_CASE(RunLarge, LargeShapes() * boost::unit_test::data::make({ Da
}
BOOST_AUTO_TEST_SUITE_END()
+#ifdef ARM_COMPUTE_ENABLE_FP16
+BOOST_AUTO_TEST_SUITE(F16)
+BOOST_DATA_TEST_CASE(RunSmall, SmallShapes(), shape)
+{
+ // Compute function
+ Tensor dst = compute_arithmetic_addition(shape, DataType::F16, DataType::F16, DataType::F16, ConvertPolicy::WRAP);
+
+ // Compute reference
+ RawTensor ref_dst = Reference::compute_reference_arithmetic_addition(shape, DataType::F16, DataType::F16, DataType::F16, ConvertPolicy::WRAP);
+
+ // Validate output
+ validate(NEAccessor(dst), ref_dst);
+}
+BOOST_AUTO_TEST_SUITE_END()
+#endif /* ARM_COMPUTE_ENABLE_FP16 */
+
BOOST_AUTO_TEST_SUITE(F32)
BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit") * boost::unit_test::label("nightly"))
BOOST_DATA_TEST_CASE(Configuration, (SmallShapes() + LargeShapes()) * boost::unit_test::data::make({ ConvertPolicy::SATURATE, ConvertPolicy::WRAP }),