aboutsummaryrefslogtreecommitdiff
path: root/src/core/NEON/kernels/NEArithmeticSubtractionKernel.cpp
diff options
context:
space:
mode:
authorPablo Tello <pablo.tello@arm.com>2017-07-11 13:54:43 +0100
committerAnthony Barbier <anthony.barbier@arm.com>2018-09-17 14:16:42 +0100
commitd7a5d22dd6b2a968469ea511f11907b131ec1c67 (patch)
tree00e16ee769fb2fe91d29224e2eb7ea71f06d069b /src/core/NEON/kernels/NEArithmeticSubtractionKernel.cpp
parent7281834f8554670c9d8ae59069a4d80adec21fa7 (diff)
downloadComputeLibrary-d7a5d22dd6b2a968469ea511f11907b131ec1c67.tar.gz
COMPMID-421: Added FP16 support to Arithmetic Subtraction.
Change-Id: I2043531e8e81f28354a208ff91024c3954389422 Reviewed-on: http://mpd-gerrit.cambridge.arm.com/80304 Tested-by: Kaizen <jeremy.johnson+kaizengerrit@arm.com> Reviewed-by: Anthony Barbier <anthony.barbier@arm.com>
Diffstat (limited to 'src/core/NEON/kernels/NEArithmeticSubtractionKernel.cpp')
-rw-r--r--src/core/NEON/kernels/NEArithmeticSubtractionKernel.cpp52
1 files changed, 49 insertions, 3 deletions
diff --git a/src/core/NEON/kernels/NEArithmeticSubtractionKernel.cpp b/src/core/NEON/kernels/NEArithmeticSubtractionKernel.cpp
index cac2a6bd05..be8574317b 100644
--- a/src/core/NEON/kernels/NEArithmeticSubtractionKernel.cpp
+++ b/src/core/NEON/kernels/NEArithmeticSubtractionKernel.cpp
@@ -157,6 +157,45 @@ void sub_saturate_S16_S16_S16(const ITensor *in1, const ITensor *in2, ITensor *o
input1, input2, output);
}
+#ifdef ARM_COMPUTE_ENABLE_FP16
+inline float16x8x2_t vsub2q_f16(const float16x8x2_t &a, const float16x8x2_t &b)
+{
+ const float16x8x2_t res =
+ {
+ {
+ vsubq_f16(a.val[0], b.val[0]),
+ vsubq_f16(a.val[1], b.val[1])
+ }
+ };
+
+ return res;
+}
+#endif /* ARM_COMPUTE_ENABLE_FP16 */
+
+void sub_F16_F16_F16(const ITensor *in1, const ITensor *in2, ITensor *out, const Window &window)
+{
+#ifdef ARM_COMPUTE_ENABLE_FP16
+ Iterator input1(in1, window);
+ Iterator input2(in2, window);
+ Iterator output(out, window);
+
+ execute_window_loop(window, [&](const Coordinates & id)
+ {
+ const float16x8x2_t a = vld2q_f16(reinterpret_cast<const float16_t *>(input1.ptr()));
+ const float16x8x2_t b = vld2q_f16(reinterpret_cast<const float16_t *>(input2.ptr()));
+
+ vst2q_f16(reinterpret_cast<float16_t *>(output.ptr()), vsub2q_f16(a, b));
+ },
+ input1, input2, output);
+#else /* ARM_COMPUTE_ENABLE_FP16 */
+ ARM_COMPUTE_UNUSED(in1);
+ ARM_COMPUTE_UNUSED(in2);
+ ARM_COMPUTE_UNUSED(out);
+ ARM_COMPUTE_UNUSED(window);
+ ARM_COMPUTE_ERROR("Not supported, recompile the library with arch=arm64-v8.2-a");
+#endif /* ARM_COMPUTE_ENABLE_FP16 */
+}
+
void sub_F32_F32_F32(const ITensor *in1, const ITensor *in2, ITensor *out, const Window &window)
{
Iterator input1(in1, window);
@@ -328,6 +367,10 @@ void NEArithmeticSubtractionKernel::configure(const ITensor *input1, const ITens
{
set_format_if_unknown(*output->info(), Format::S16);
}
+ else if(input1->info()->data_type() == DataType::F16 || input2->info()->data_type() == DataType::F16)
+ {
+ set_format_if_unknown(*output->info(), Format::F16);
+ }
else if(input1->info()->data_type() == DataType::F32 || input2->info()->data_type() == DataType::F32)
{
set_format_if_unknown(*output->info(), Format::F32);
@@ -335,9 +378,9 @@ void NEArithmeticSubtractionKernel::configure(const ITensor *input1, const ITens
}
ARM_COMPUTE_ERROR_ON_MISMATCHING_SHAPES(input1, input2, output);
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input1, 1, DataType::QS8, DataType::U8, DataType::QS16, DataType::S16, DataType::F32);
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input2, 1, DataType::QS8, DataType::U8, DataType::QS16, DataType::S16, DataType::F32);
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::QS8, DataType::U8, DataType::QS16, DataType::S16, DataType::F32);
+ ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input1, 1, DataType::QS8, DataType::U8, DataType::QS16, DataType::S16, DataType::F16, DataType::F32);
+ ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input2, 1, DataType::QS8, DataType::U8, DataType::QS16, DataType::S16, DataType::F16, DataType::F32);
+ ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::QS8, DataType::U8, DataType::QS16, DataType::S16, DataType::F16, DataType::F32);
ARM_COMPUTE_ERROR_ON_MSG(output->info()->data_type() == DataType::U8 && (input1->info()->data_type() != DataType::U8 || input2->info()->data_type() != DataType::U8),
"Output can only be U8 if both inputs are U8");
if(is_data_type_fixed_point(input1->info()->data_type()) || is_data_type_fixed_point(input2->info()->data_type()) || is_data_type_fixed_point(output->info()->data_type()))
@@ -364,6 +407,9 @@ void NEArithmeticSubtractionKernel::configure(const ITensor *input1, const ITens
{ "sub_saturate_S16_S16_S16", &sub_saturate_S16_S16_S16 },
{ "sub_wrap_F32_F32_F32", &sub_F32_F32_F32 },
{ "sub_saturate_F32_F32_F32", &sub_F32_F32_F32 },
+ { "sub_wrap_F16_F16_F16", &sub_F16_F16_F16 },
+ { "sub_saturate_F16_F16_F16", &sub_F16_F16_F16 },
+
};
_input1 = input1;