COMPMID-1764 NEON: Implement ArgMax/ArgMin

Change-Id: Ibe23aa90b36ffd8553d1d1c35fada5d300fab829 Reviewed-on: https://review.mlplatform.org/475 Reviewed-by: Isabella Gottardi <isabella.gottardi@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Giuseppe Rossini <giuseppe.rossini@arm.com>
author: Michalis Spyrou <michalis.spyrou@arm.com> 2019-01-03 11:10:25 +0000
committer: Michalis Spyrou <michalis.spyrou@arm.com> 2019-01-10 16:24:26 +0000
commit: aea14c63e2efeda9d5f7492099389d439c65204f (patch)
tree: 176a6181bbf00e4df078d5da0a17dd44f248958e /arm_compute
parent: c10bc0b5db5169a6ccea02a1aaefe34f082709e5 (diff)
download: ComputeLibrary-aea14c63e2efeda9d5f7492099389d439c65204f.tar.gz
11 files changed, 442 insertions, 49 deletions
diff --git a/arm_compute/core/NEON/wrapper/intrinsics/bitselect.h b/arm_compute/core/NEON/wrapper/intrinsics/bsl.h
index 8223f6d463..9831b4b842 100644
--- a/arm_compute/core/NEON/wrapper/intrinsics/bitselect.h
+++ b/arm_compute/core/NEON/wrapper/intrinsics/bsl.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018 ARM Limited.
+ * Copyright (c) 2018-2019 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -14,15 +14,15 @@
  * copies or substantial portions of the Software.
  *
  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT SELECT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
-#ifndef __ARM_COMPUTE_WRAPPER_BITSELECT_H__
-#define __ARM_COMPUTE_WRAPPER_BITSELECT_H__
+#ifndef __ARM_COMPUTE_WRAPPER_BSL_H__
+#define __ARM_COMPUTE_WRAPPER_BSL_H__
 
 #include <arm_neon.h>
 
@@ -30,35 +30,35 @@ namespace arm_compute
 {
 namespace wrapper
 {
-#define VBITSELECT_IMPL(stype, vtype, ctype, prefix, postfix)               \
-    inline vtype vbitselect(const ctype &a, const vtype &b, const vtype &c) \
-    {                                                                       \
-        return prefix##_##postfix(a, b, c);                                 \
+#define VBSL_IMPL(vctype, vtype, prefix, postfix)                      \
+    inline vtype vbsl(const vctype &a, const vtype &b, const vtype &c) \
+    {                                                                  \
+        return prefix##_##postfix(a, b, c);                            \
     }
 
-VBITSELECT_IMPL(uint8_t, uint8x8_t, uint8x8_t, vbsl, u8)
-VBITSELECT_IMPL(int8_t, int8x8_t, uint8x8_t, vbsl, s8)
-VBITSELECT_IMPL(uint16_t, uint16x4_t, uint16x4_t, vbsl, u16)
-VBITSELECT_IMPL(int16_t, int16x4_t, uint16x4_t, vbsl, s16)
-VBITSELECT_IMPL(uint32_t, uint32x2_t, uint32x2_t, vbsl, u32)
-VBITSELECT_IMPL(int32_t, int32x2_t, uint32x2_t, vbsl, s32)
-VBITSELECT_IMPL(float32x2_t, float32x2_t, uint32x2_t, vbsl, f32)
+VBSL_IMPL(uint8x8_t, uint8x8_t, vbsl, u8)
+VBSL_IMPL(uint8x8_t, int8x8_t, vbsl, s8)
+VBSL_IMPL(uint16x4_t, uint16x4_t, vbsl, u16)
+VBSL_IMPL(uint16x4_t, int16x4_t, vbsl, s16)
+VBSL_IMPL(uint32x2_t, uint32x2_t, vbsl, u32)
+VBSL_IMPL(uint32x2_t, int32x2_t, vbsl, s32)
+VBSL_IMPL(uint32x2_t, float32x2_t, vbsl, f32)
 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-VBITSELECT_IMPL(float16x4_t, float16x4_t, uint16x4_t, vbsl, f16)
+VBSL_IMPL(uint16x4_t, float16x4_t, vbsl, f16)
 #endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
 
-VBITSELECT_IMPL(uint8_t, uint8x16_t, uint8x16_t, vbslq, u8)
-VBITSELECT_IMPL(int8_t, int8x16_t, uint8x16_t, vbslq, s8)
-VBITSELECT_IMPL(uint16_t, uint16x8_t, uint16x8_t, vbslq, u16)
-VBITSELECT_IMPL(int16_t, int16x8_t, uint16x8_t, vbslq, s16)
-VBITSELECT_IMPL(uint32_t, uint32x4_t, uint32x4_t, vbslq, u32)
-VBITSELECT_IMPL(int32_t, int32x4_t, uint32x4_t, vbslq, s32)
-VBITSELECT_IMPL(float32x4_t, float32x4_t, uint32x4_t, vbslq, f32)
+VBSL_IMPL(uint8x16_t, uint8x16_t, vbslq, u8)
+VBSL_IMPL(uint8x16_t, int8x16_t, vbslq, s8)
+VBSL_IMPL(uint16x8_t, uint16x8_t, vbslq, u16)
+VBSL_IMPL(uint16x8_t, int16x8_t, vbslq, s16)
+VBSL_IMPL(uint32x4_t, uint32x4_t, vbslq, u32)
+VBSL_IMPL(uint32x4_t, int32x4_t, vbslq, s32)
+VBSL_IMPL(uint32x4_t, float32x4_t, vbslq, f32)
 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-VBITSELECT_IMPL(float16x8_t, float16x8_t, uint16x8_t, vbslq, f16)
+VBSL_IMPL(uint16x8_t, float16x8_t, vbslq, f16)
 #endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
 
-#undef VBITSELECT_IMPL
+#undef VBSL_IMPL
 } // namespace wrapper
 } // namespace arm_compute
-#endif /* __ARM_COMPUTE_WRAPPER_BITSELECT_H__ */
+#endif /* __ARM_COMPUTE_WRAPPER_BSL_H__ */
diff --git a/arm_compute/core/NEON/wrapper/intrinsics/ceq.h b/arm_compute/core/NEON/wrapper/intrinsics/ceq.h
new file mode 100644
index 0000000000..812ac326a8
--- /dev/null
+++ b/arm_compute/core/NEON/wrapper/intrinsics/ceq.h
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2018-2019 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_WRAPPER_CEQ_H__
+#define __ARM_COMPUTE_WRAPPER_CEQ_H__
+
+#include <arm_neon.h>
+
+namespace arm_compute
+{
+namespace wrapper
+{
+#define VCEQ_IMPL(votype, vtype, prefix, postfix)      \
+    inline votype vceq(const vtype &a, const vtype &b) \
+    {                                                  \
+        return prefix##_##postfix(a, b);               \
+    }
+
+VCEQ_IMPL(uint8x8_t, uint8x8_t, vceq, u8)
+VCEQ_IMPL(uint8x8_t, int8x8_t, vceq, s8)
+VCEQ_IMPL(uint16x4_t, uint16x4_t, vceq, u16)
+VCEQ_IMPL(uint16x4_t, int16x4_t, vceq, s16)
+VCEQ_IMPL(uint32x2_t, uint32x2_t, vceq, u32)
+VCEQ_IMPL(uint32x2_t, int32x2_t, vceq, s32)
+VCEQ_IMPL(uint32x2_t, float32x2_t, vceq, f32)
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+VCEQ_IMPL(uint16x4_t, float16x4_t, vceq, f16)
+#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+
+VCEQ_IMPL(uint8x16_t, uint8x16_t, vceqq, u8)
+VCEQ_IMPL(uint8x16_t, int8x16_t, vceqq, s8)
+VCEQ_IMPL(uint16x8_t, uint16x8_t, vceqq, u16)
+VCEQ_IMPL(uint16x8_t, int16x8_t, vceqq, s16)
+VCEQ_IMPL(uint32x4_t, uint32x4_t, vceqq, u32)
+VCEQ_IMPL(uint32x4_t, int32x4_t, vceqq, s32)
+VCEQ_IMPL(uint32x4_t, float32x4_t, vceqq, f32)
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+VCEQ_IMPL(uint16x8_t, float16x8_t, vceqq, f16)
+#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+
+#undef VCEQ_IMPL
+} // namespace wrapper
+} // namespace arm_compute
+#endif /* __ARM_COMPUTE_WRAPPER_CEQ_H__ */
diff --git a/arm_compute/core/NEON/wrapper/intrinsics/greaterthan.h b/arm_compute/core/NEON/wrapper/intrinsics/cgt.h
index 5ee7516a4e..c2ed9df1dc 100644
--- a/arm_compute/core/NEON/wrapper/intrinsics/greaterthan.h
+++ b/arm_compute/core/NEON/wrapper/intrinsics/cgt.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018 ARM Limited.
+ * Copyright (c) 2018-2019 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -30,32 +30,32 @@ namespace arm_compute
 {
 namespace wrapper
 {
-#define VCGT_IMPL(stype, vtype, rtype, prefix, postfix)       \
-    inline rtype vgreaterthan(const vtype &a, const vtype &b) \
-    {                                                         \
-        return prefix##_##postfix(a, b);                      \
+#define VCGT_IMPL(votype, vtype, prefix, postfix)      \
+    inline votype vcgt(const vtype &a, const vtype &b) \
+    {                                                  \
+        return prefix##_##postfix(a, b);               \
     }
 
-VCGT_IMPL(uint8_t, uint8x8_t, uint8x8_t, vcgt, u8)
-VCGT_IMPL(int8_t, int8x8_t, uint8x8_t, vcgt, s8)
-VCGT_IMPL(uint16_t, uint16x4_t, uint16x4_t, vcgt, u16)
-VCGT_IMPL(int16_t, int16x4_t, uint16x4_t, vcgt, s16)
-VCGT_IMPL(uint32_t, uint32x2_t, uint32x2_t, vcgt, u32)
-VCGT_IMPL(int32_t, int32x2_t, uint32x2_t, vcgt, s32)
-VCGT_IMPL(float32x2_t, float32x2_t, uint32x2_t, vcgt, f32)
+VCGT_IMPL(uint8x8_t, uint8x8_t, vcgt, u8)
+VCGT_IMPL(uint8x8_t, int8x8_t, vcgt, s8)
+VCGT_IMPL(uint16x4_t, uint16x4_t, vcgt, u16)
+VCGT_IMPL(uint16x4_t, int16x4_t, vcgt, s16)
+VCGT_IMPL(uint32x2_t, uint32x2_t, vcgt, u32)
+VCGT_IMPL(uint32x2_t, int32x2_t, vcgt, s32)
+VCGT_IMPL(uint32x2_t, float32x2_t, vcgt, f32)
 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-VCGT_IMPL(float16x4_t, float16x4_t, uint16x4_t, vcgt, f16)
+VCGT_IMPL(uint16x4_t, float16x4_t, vcgt, f16)
 #endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
 
-VCGT_IMPL(uint8_t, uint8x16_t, uint8x16_t, vcgtq, u8)
-VCGT_IMPL(int8_t, int8x16_t, uint8x16_t, vcgtq, s8)
-VCGT_IMPL(uint16_t, uint16x8_t, uint16x8_t, vcgtq, u16)
-VCGT_IMPL(int16_t, int16x8_t, uint16x8_t, vcgtq, s16)
-VCGT_IMPL(uint32_t, uint32x4_t, uint32x4_t, vcgtq, u32)
-VCGT_IMPL(int32_t, int32x4_t, uint32x4_t, vcgtq, s32)
-VCGT_IMPL(float32x4_t, float32x4_t, uint32x4_t, vcgtq, f32)
+VCGT_IMPL(uint8x16_t, uint8x16_t, vcgtq, u8)
+VCGT_IMPL(uint8x16_t, int8x16_t, vcgtq, s8)
+VCGT_IMPL(uint16x8_t, uint16x8_t, vcgtq, u16)
+VCGT_IMPL(uint16x8_t, int16x8_t, vcgtq, s16)
+VCGT_IMPL(uint32x4_t, uint32x4_t, vcgtq, u32)
+VCGT_IMPL(uint32x4_t, int32x4_t, vcgtq, s32)
+VCGT_IMPL(uint32x4_t, float32x4_t, vcgtq, f32)
 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-VCGT_IMPL(float16x8_t, float16x8_t, uint16x8_t, vcgtq, f16)
+VCGT_IMPL(uint16x8_t, float16x8_t, vcgtq, f16)
 #endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
 
 #undef VCGT_IMPL
diff --git a/arm_compute/core/NEON/wrapper/intrinsics/clt.h b/arm_compute/core/NEON/wrapper/intrinsics/clt.h
new file mode 100644
index 0000000000..a187c216d7
--- /dev/null
+++ b/arm_compute/core/NEON/wrapper/intrinsics/clt.h
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2018-2019 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_WRAPPER_CLT_H__
+#define __ARM_COMPUTE_WRAPPER_CLT_H__
+
+#include <arm_neon.h>
+
+namespace arm_compute
+{
+namespace wrapper
+{
+#define VCLT_IMPL(votype, vtype, prefix, postfix)      \
+    inline votype vclt(const vtype &a, const vtype &b) \
+    {                                                  \
+        return prefix##_##postfix(a, b);               \
+    }
+
+VCLT_IMPL(uint8x8_t, uint8x8_t, vclt, u8)
+VCLT_IMPL(uint8x8_t, int8x8_t, vclt, s8)
+VCLT_IMPL(uint16x4_t, uint16x4_t, vclt, u16)
+VCLT_IMPL(uint16x4_t, int16x4_t, vclt, s16)
+VCLT_IMPL(uint32x2_t, uint32x2_t, vclt, u32)
+VCLT_IMPL(uint32x2_t, int32x2_t, vclt, s32)
+VCLT_IMPL(uint32x2_t, float32x2_t, vclt, f32)
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+VCLT_IMPL(uint16x4_t, float16x4_t, vclt, f16)
+#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+
+VCLT_IMPL(uint8x16_t, uint8x16_t, vcltq, u8)
+VCLT_IMPL(uint8x16_t, int8x16_t, vcltq, s8)
+VCLT_IMPL(uint16x8_t, uint16x8_t, vcltq, u16)
+VCLT_IMPL(uint16x8_t, int16x8_t, vcltq, s16)
+VCLT_IMPL(uint32x4_t, uint32x4_t, vcltq, u32)
+VCLT_IMPL(uint32x4_t, int32x4_t, vcltq, s32)
+VCLT_IMPL(uint32x4_t, float32x4_t, vcltq, f32)
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+VCLT_IMPL(uint16x8_t, float16x8_t, vcltq, f16)
+#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+
+#undef VCLT_IMPL
+} // namespace wrapper
+} // namespace arm_compute
+#endif /* __ARM_COMPUTE_WRAPPER_CLT_H__ */
diff --git a/arm_compute/core/NEON/wrapper/intrinsics/intrinsics.h b/arm_compute/core/NEON/wrapper/intrinsics/intrinsics.h
index d00d3303f1..97af983e62 100644
--- a/arm_compute/core/NEON/wrapper/intrinsics/intrinsics.h
+++ b/arm_compute/core/NEON/wrapper/intrinsics/intrinsics.h
@@ -26,14 +26,16 @@
 
 #include "arm_compute/core/NEON/wrapper/intrinsics/add.h"
 #include "arm_compute/core/NEON/wrapper/intrinsics/and.h"
-#include "arm_compute/core/NEON/wrapper/intrinsics/bitselect.h"
+#include "arm_compute/core/NEON/wrapper/intrinsics/bsl.h"
+#include "arm_compute/core/NEON/wrapper/intrinsics/ceq.h"
+#include "arm_compute/core/NEON/wrapper/intrinsics/cgt.h"
+#include "arm_compute/core/NEON/wrapper/intrinsics/clt.h"
 #include "arm_compute/core/NEON/wrapper/intrinsics/combine.h"
 #include "arm_compute/core/NEON/wrapper/intrinsics/dup_n.h"
 #include "arm_compute/core/NEON/wrapper/intrinsics/exp.h"
 #include "arm_compute/core/NEON/wrapper/intrinsics/gethigh.h"
 #include "arm_compute/core/NEON/wrapper/intrinsics/getlane.h"
 #include "arm_compute/core/NEON/wrapper/intrinsics/getlow.h"
-#include "arm_compute/core/NEON/wrapper/intrinsics/greaterthan.h"
 #include "arm_compute/core/NEON/wrapper/intrinsics/inv.h"
 #include "arm_compute/core/NEON/wrapper/intrinsics/invsqrt.h"
 #include "arm_compute/core/NEON/wrapper/intrinsics/load.h"
@@ -44,7 +46,10 @@
 #include "arm_compute/core/NEON/wrapper/intrinsics/movn.h"
 #include "arm_compute/core/NEON/wrapper/intrinsics/mul.h"
 #include "arm_compute/core/NEON/wrapper/intrinsics/neg.h"
+#include "arm_compute/core/NEON/wrapper/intrinsics/orr.h"
 #include "arm_compute/core/NEON/wrapper/intrinsics/padd.h"
+#include "arm_compute/core/NEON/wrapper/intrinsics/pmax.h"
+#include "arm_compute/core/NEON/wrapper/intrinsics/pmin.h"
 #include "arm_compute/core/NEON/wrapper/intrinsics/pow.h"
 #include "arm_compute/core/NEON/wrapper/intrinsics/rev64.h"
 #include "arm_compute/core/NEON/wrapper/intrinsics/store.h"
diff --git a/arm_compute/core/NEON/wrapper/intrinsics/orr.h b/arm_compute/core/NEON/wrapper/intrinsics/orr.h
new file mode 100644
index 0000000000..d82dc56a6d
--- /dev/null
+++ b/arm_compute/core/NEON/wrapper/intrinsics/orr.h
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2018-2019 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_WRAPPER_ORR_H__
+#define __ARM_COMPUTE_WRAPPER_ORR_H__
+
+#include <arm_neon.h>
+
+namespace arm_compute
+{
+namespace wrapper
+{
+#define VORR_IMPL(stype, vtype, prefix, postfix)      \
+    inline vtype vorr(const vtype &a, const vtype &b) \
+    {                                                 \
+        return prefix##_##postfix(a, b);              \
+    }
+
+VORR_IMPL(uint8_t, uint8x8_t, vorr, u8)
+VORR_IMPL(int8_t, int8x8_t, vorr, s8)
+VORR_IMPL(uint16_t, uint16x4_t, vorr, u16)
+VORR_IMPL(int16_t, int16x4_t, vorr, s16)
+VORR_IMPL(uint32_t, uint32x2_t, vorr, u32)
+VORR_IMPL(int32_t, int32x2_t, vorr, s32)
+VORR_IMPL(uint64_t, uint64x1_t, vorr, u64)
+VORR_IMPL(int64_t, int64x1_t, vorr, s64)
+
+VORR_IMPL(uint8_t, uint8x16_t, vorrq, u8)
+VORR_IMPL(int8_t, int8x16_t, vorrq, s8)
+VORR_IMPL(uint16_t, uint16x8_t, vorrq, u16)
+VORR_IMPL(int16_t, int16x8_t, vorrq, s16)
+VORR_IMPL(uint32_t, uint32x4_t, vorrq, u32)
+VORR_IMPL(int32_t, int32x4_t, vorrq, s32)
+VORR_IMPL(uint64_t, uint64x2_t, vorrq, u64)
+VORR_IMPL(int64_t, int64x2_t, vorrq, s64)
+
+#undef VORR_IMPL
+} // namespace wrapper
+} // namespace arm_compute
+#endif /* __ARM_COMPUTE_WRAPPER_ORR_H__ */
diff --git a/arm_compute/core/NEON/wrapper/intrinsics/pmax.h b/arm_compute/core/NEON/wrapper/intrinsics/pmax.h
new file mode 100644
index 0000000000..7f701f89c4
--- /dev/null
+++ b/arm_compute/core/NEON/wrapper/intrinsics/pmax.h
@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 2018-2019 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_WRAPPER_PMAX_H__
+#define __ARM_COMPUTE_WRAPPER_PMAX_H__
+
+#include <arm_neon.h>
+
+namespace arm_compute
+{
+namespace wrapper
+{
+#define VPMAX_IMPL(stype, vtype, prefix, postfix)      \
+    inline vtype vpmax(const vtype &a, const vtype &b) \
+    {                                                  \
+        return prefix##_##postfix(a, b);               \
+    }
+
+VPMAX_IMPL(uint8_t, uint8x8_t, vpmax, u8)
+VPMAX_IMPL(int8_t, int8x8_t, vpmax, s8)
+VPMAX_IMPL(uint16_t, uint16x4_t, vpmax, u16)
+VPMAX_IMPL(int16_t, int16x4_t, vpmax, s16)
+VPMAX_IMPL(uint32_t, uint32x2_t, vpmax, u32)
+VPMAX_IMPL(int32_t, int32x2_t, vpmax, s32)
+VPMAX_IMPL(float, float32x2_t, vpmax, f32)
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+VPMAX_IMPL(float16_t, float16x4_t, vpmax, f16)
+#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+
+#undef VPMAX_IMPL
+} // namespace wrapper
+} // namespace arm_compute
+#endif /* __ARM_COMPUTE_WRAPPER_PMAX_H__ */
diff --git a/arm_compute/core/NEON/wrapper/intrinsics/pmin.h b/arm_compute/core/NEON/wrapper/intrinsics/pmin.h
new file mode 100644
index 0000000000..52d5eb17a0
--- /dev/null
+++ b/arm_compute/core/NEON/wrapper/intrinsics/pmin.h
@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 2018-2019 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_WRAPPER_PMIN_H__
+#define __ARM_COMPUTE_WRAPPER_PMIN_H__
+
+#include <arm_neon.h>
+
+namespace arm_compute
+{
+namespace wrapper
+{
+#define VPMIN_IMPL(stype, vtype, prefix, postfix)      \
+    inline vtype vpmin(const vtype &a, const vtype &b) \
+    {                                                  \
+        return prefix##_##postfix(a, b);               \
+    }
+
+VPMIN_IMPL(uint8_t, uint8x8_t, vpmin, u8)
+VPMIN_IMPL(int8_t, int8x8_t, vpmin, s8)
+VPMIN_IMPL(uint16_t, uint16x4_t, vpmin, u16)
+VPMIN_IMPL(int16_t, int16x4_t, vpmin, s16)
+VPMIN_IMPL(uint32_t, uint32x2_t, vpmin, u32)
+VPMIN_IMPL(int32_t, int32x2_t, vpmin, s32)
+VPMIN_IMPL(float, float32x2_t, vpmin, f32)
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+VPMIN_IMPL(float16_t, float16x4_t, vpmin, f16)
+#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+
+#undef VPMIN_IMPL
+} // namespace wrapper
+} // namespace arm_compute
+#endif /* __ARM_COMPUTE_WRAPPER_PMIN_H__ */
diff --git a/arm_compute/core/utils/misc/ShapeCalculator.h b/arm_compute/core/utils/misc/ShapeCalculator.h
index 70727424b3..619234d306 100644
--- a/arm_compute/core/utils/misc/ShapeCalculator.h
+++ b/arm_compute/core/utils/misc/ShapeCalculator.h
@@ -1033,6 +1033,21 @@ inline TensorShape compute_tiled_shape(const TensorShape &input_shape, const Mul
     return tiled_shape;
 }
 
+/** Calculate the reduced shape of a tensor given an axis
+ *
+ * @param[in] input Input tensor info
+ * @param[in] axis  Axis on which to perform reduction
+ *
+ * @return the calculated shape
+ */
+inline TensorShape compute_reduced_shape(const TensorShape &input, unsigned int axis)
+{
+    TensorShape output_shape{ input };
+    output_shape.set(axis, 1);
+
+    return output_shape;
+}
+
 /** Calculate the upsampled shape of a tensor
  *
  * @param[in] input Input tensor info
diff --git a/arm_compute/runtime/NEON/NEFunctions.h b/arm_compute/runtime/NEON/NEFunctions.h
index 2e94030e53..2daef70cef 100644
--- a/arm_compute/runtime/NEON/NEFunctions.h
+++ b/arm_compute/runtime/NEON/NEFunctions.h
@@ -28,6 +28,7 @@
 #include "arm_compute/runtime/NEON/functions/NEAbsoluteDifference.h"
 #include "arm_compute/runtime/NEON/functions/NEAccumulate.h"
 #include "arm_compute/runtime/NEON/functions/NEActivationLayer.h"
+#include "arm_compute/runtime/NEON/functions/NEArgMinMaxLayer.h"
 #include "arm_compute/runtime/NEON/functions/NEArithmeticAddition.h"
 #include "arm_compute/runtime/NEON/functions/NEArithmeticSubtraction.h"
 #include "arm_compute/runtime/NEON/functions/NEBatchNormalizationLayer.h"
diff --git a/arm_compute/runtime/NEON/functions/NEArgMinMaxLayer.h b/arm_compute/runtime/NEON/functions/NEArgMinMaxLayer.h
new file mode 100644
index 0000000000..87d77a5e13
--- /dev/null
+++ b/arm_compute/runtime/NEON/functions/NEArgMinMaxLayer.h
@@ -0,0 +1,78 @@
+/*
+ * Copyright (c) 2018-2019 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_NEARGMINMAXLAYER_H__
+#define __ARM_COMPUTE_NEARGMINMAXLAYER_H__
+
+#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h"
+#include "arm_compute/core/NEON/kernels/NEReductionOperationKernel.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/MemoryGroup.h"
+#include "arm_compute/runtime/NEON/INESimpleFunction.h"
+
+namespace arm_compute
+{
+class IsTensor;
+
+/** Function to calculate the index of the minimum or maximum values in a tensor based on an axis.
+ *  This function calls the following NEON kernels:
+ *
+ * -# @ref NEReductionOperationKernel
+ * -# @ref NEFillBorderKernel
+ *
+ */
+class NEArgMinMaxLayer : public IFunction
+{
+public:
+    /** Constructor */
+    NEArgMinMaxLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
+    /** Set the input and output tensors.
+     *
+     * @param[in]  input  Input source tensor. Data types supported: F16/F32.
+     * @param[in]  axis   Axis to find max/min index.
+     * @param[out] output Output source tensor. Data types supported: U32.
+     * @param[in]  op     Operation to perform: min or max
+     */
+    void configure(ITensor *input, int axis, ITensor *output, const ReductionOperation &op);
+    /** Static function to check if given info will lead to a valid configuration of @ref NEArgMinMaxLayer
+     *
+     * @param[in] input  Input source tensor info. Data types supported: F16/F32.
+     * @param[in] axis   Axis to find max/min index.
+     * @param[in] output Output source tensor info. Data types supported: U32.
+     * @param[in] op     Operation to perform: min or max
+     *
+     * @return a status
+     */
+    static Status validate(const ITensorInfo *input, int axis, const ITensorInfo *output, const ReductionOperation &op);
+
+    // Inherited methods overridden:
+    void run() override;
+
+private:
+    MemoryGroup                _memory_group;
+    NEReductionOperationKernel _reduction_kernel;
+    NEFillBorderKernel         _fill_border_kernel;
+    bool                       _run_fill_border;
+};
+} // namespace arm_compute
+#endif /* __ARM_COMPUTE_NEARGMINMAXLAYER_H__ */
author	Michalis Spyrou <michalis.spyrou@arm.com>	2019-01-03 11:10:25 +0000
committer	Michalis Spyrou <michalis.spyrou@arm.com>	2019-01-10 16:24:26 +0000
commit	aea14c63e2efeda9d5f7492099389d439c65204f (patch)
tree	176a6181bbf00e4df078d5da0a17dd44f248958e /arm_compute
parent	c10bc0b5db5169a6ccea02a1aaefe34f082709e5 (diff)
download	ComputeLibrary-aea14c63e2efeda9d5f7492099389d439c65204f.tar.gz