aboutsummaryrefslogtreecommitdiff
path: root/arm_compute
diff options
context:
space:
mode:
authorMichalis Spyrou <michalis.spyrou@arm.com>2019-01-03 11:10:25 +0000
committerMichalis Spyrou <michalis.spyrou@arm.com>2019-01-10 16:24:26 +0000
commitaea14c63e2efeda9d5f7492099389d439c65204f (patch)
tree176a6181bbf00e4df078d5da0a17dd44f248958e /arm_compute
parentc10bc0b5db5169a6ccea02a1aaefe34f082709e5 (diff)
downloadComputeLibrary-aea14c63e2efeda9d5f7492099389d439c65204f.tar.gz
COMPMID-1764 NEON: Implement ArgMax/ArgMin
Change-Id: Ibe23aa90b36ffd8553d1d1c35fada5d300fab829 Reviewed-on: https://review.mlplatform.org/475 Reviewed-by: Isabella Gottardi <isabella.gottardi@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Giuseppe Rossini <giuseppe.rossini@arm.com>
Diffstat (limited to 'arm_compute')
-rw-r--r--arm_compute/core/NEON/wrapper/intrinsics/bsl.h (renamed from arm_compute/core/NEON/wrapper/intrinsics/bitselect.h)52
-rw-r--r--arm_compute/core/NEON/wrapper/intrinsics/ceq.h64
-rw-r--r--arm_compute/core/NEON/wrapper/intrinsics/cgt.h (renamed from arm_compute/core/NEON/wrapper/intrinsics/greaterthan.h)42
-rw-r--r--arm_compute/core/NEON/wrapper/intrinsics/clt.h64
-rw-r--r--arm_compute/core/NEON/wrapper/intrinsics/intrinsics.h9
-rw-r--r--arm_compute/core/NEON/wrapper/intrinsics/orr.h60
-rw-r--r--arm_compute/core/NEON/wrapper/intrinsics/pmax.h53
-rw-r--r--arm_compute/core/NEON/wrapper/intrinsics/pmin.h53
-rw-r--r--arm_compute/core/utils/misc/ShapeCalculator.h15
-rw-r--r--arm_compute/runtime/NEON/NEFunctions.h1
-rw-r--r--arm_compute/runtime/NEON/functions/NEArgMinMaxLayer.h78
11 files changed, 442 insertions, 49 deletions
diff --git a/arm_compute/core/NEON/wrapper/intrinsics/bitselect.h b/arm_compute/core/NEON/wrapper/intrinsics/bsl.h
index 8223f6d463..9831b4b842 100644
--- a/arm_compute/core/NEON/wrapper/intrinsics/bitselect.h
+++ b/arm_compute/core/NEON/wrapper/intrinsics/bsl.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018 ARM Limited.
+ * Copyright (c) 2018-2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -14,15 +14,15 @@
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT SELECT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#ifndef __ARM_COMPUTE_WRAPPER_BITSELECT_H__
-#define __ARM_COMPUTE_WRAPPER_BITSELECT_H__
+#ifndef __ARM_COMPUTE_WRAPPER_BSL_H__
+#define __ARM_COMPUTE_WRAPPER_BSL_H__
#include <arm_neon.h>
@@ -30,35 +30,35 @@ namespace arm_compute
{
namespace wrapper
{
-#define VBITSELECT_IMPL(stype, vtype, ctype, prefix, postfix) \
- inline vtype vbitselect(const ctype &a, const vtype &b, const vtype &c) \
- { \
- return prefix##_##postfix(a, b, c); \
+#define VBSL_IMPL(vctype, vtype, prefix, postfix) \
+ inline vtype vbsl(const vctype &a, const vtype &b, const vtype &c) \
+ { \
+ return prefix##_##postfix(a, b, c); \
}
-VBITSELECT_IMPL(uint8_t, uint8x8_t, uint8x8_t, vbsl, u8)
-VBITSELECT_IMPL(int8_t, int8x8_t, uint8x8_t, vbsl, s8)
-VBITSELECT_IMPL(uint16_t, uint16x4_t, uint16x4_t, vbsl, u16)
-VBITSELECT_IMPL(int16_t, int16x4_t, uint16x4_t, vbsl, s16)
-VBITSELECT_IMPL(uint32_t, uint32x2_t, uint32x2_t, vbsl, u32)
-VBITSELECT_IMPL(int32_t, int32x2_t, uint32x2_t, vbsl, s32)
-VBITSELECT_IMPL(float32x2_t, float32x2_t, uint32x2_t, vbsl, f32)
+VBSL_IMPL(uint8x8_t, uint8x8_t, vbsl, u8)
+VBSL_IMPL(uint8x8_t, int8x8_t, vbsl, s8)
+VBSL_IMPL(uint16x4_t, uint16x4_t, vbsl, u16)
+VBSL_IMPL(uint16x4_t, int16x4_t, vbsl, s16)
+VBSL_IMPL(uint32x2_t, uint32x2_t, vbsl, u32)
+VBSL_IMPL(uint32x2_t, int32x2_t, vbsl, s32)
+VBSL_IMPL(uint32x2_t, float32x2_t, vbsl, f32)
#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-VBITSELECT_IMPL(float16x4_t, float16x4_t, uint16x4_t, vbsl, f16)
+VBSL_IMPL(uint16x4_t, float16x4_t, vbsl, f16)
#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-VBITSELECT_IMPL(uint8_t, uint8x16_t, uint8x16_t, vbslq, u8)
-VBITSELECT_IMPL(int8_t, int8x16_t, uint8x16_t, vbslq, s8)
-VBITSELECT_IMPL(uint16_t, uint16x8_t, uint16x8_t, vbslq, u16)
-VBITSELECT_IMPL(int16_t, int16x8_t, uint16x8_t, vbslq, s16)
-VBITSELECT_IMPL(uint32_t, uint32x4_t, uint32x4_t, vbslq, u32)
-VBITSELECT_IMPL(int32_t, int32x4_t, uint32x4_t, vbslq, s32)
-VBITSELECT_IMPL(float32x4_t, float32x4_t, uint32x4_t, vbslq, f32)
+VBSL_IMPL(uint8x16_t, uint8x16_t, vbslq, u8)
+VBSL_IMPL(uint8x16_t, int8x16_t, vbslq, s8)
+VBSL_IMPL(uint16x8_t, uint16x8_t, vbslq, u16)
+VBSL_IMPL(uint16x8_t, int16x8_t, vbslq, s16)
+VBSL_IMPL(uint32x4_t, uint32x4_t, vbslq, u32)
+VBSL_IMPL(uint32x4_t, int32x4_t, vbslq, s32)
+VBSL_IMPL(uint32x4_t, float32x4_t, vbslq, f32)
#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-VBITSELECT_IMPL(float16x8_t, float16x8_t, uint16x8_t, vbslq, f16)
+VBSL_IMPL(uint16x8_t, float16x8_t, vbslq, f16)
#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-#undef VBITSELECT_IMPL
+#undef VBSL_IMPL
} // namespace wrapper
} // namespace arm_compute
-#endif /* __ARM_COMPUTE_WRAPPER_BITSELECT_H__ */
+#endif /* __ARM_COMPUTE_WRAPPER_BSL_H__ */
diff --git a/arm_compute/core/NEON/wrapper/intrinsics/ceq.h b/arm_compute/core/NEON/wrapper/intrinsics/ceq.h
new file mode 100644
index 0000000000..812ac326a8
--- /dev/null
+++ b/arm_compute/core/NEON/wrapper/intrinsics/ceq.h
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2018-2019 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_WRAPPER_CEQ_H__
+#define __ARM_COMPUTE_WRAPPER_CEQ_H__
+
+#include <arm_neon.h>
+
+namespace arm_compute
+{
+namespace wrapper
+{
+#define VCEQ_IMPL(votype, vtype, prefix, postfix) \
+ inline votype vceq(const vtype &a, const vtype &b) \
+ { \
+ return prefix##_##postfix(a, b); \
+ }
+
+VCEQ_IMPL(uint8x8_t, uint8x8_t, vceq, u8)
+VCEQ_IMPL(uint8x8_t, int8x8_t, vceq, s8)
+VCEQ_IMPL(uint16x4_t, uint16x4_t, vceq, u16)
+VCEQ_IMPL(uint16x4_t, int16x4_t, vceq, s16)
+VCEQ_IMPL(uint32x2_t, uint32x2_t, vceq, u32)
+VCEQ_IMPL(uint32x2_t, int32x2_t, vceq, s32)
+VCEQ_IMPL(uint32x2_t, float32x2_t, vceq, f32)
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+VCEQ_IMPL(uint16x4_t, float16x4_t, vceq, f16)
+#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+
+VCEQ_IMPL(uint8x16_t, uint8x16_t, vceqq, u8)
+VCEQ_IMPL(uint8x16_t, int8x16_t, vceqq, s8)
+VCEQ_IMPL(uint16x8_t, uint16x8_t, vceqq, u16)
+VCEQ_IMPL(uint16x8_t, int16x8_t, vceqq, s16)
+VCEQ_IMPL(uint32x4_t, uint32x4_t, vceqq, u32)
+VCEQ_IMPL(uint32x4_t, int32x4_t, vceqq, s32)
+VCEQ_IMPL(uint32x4_t, float32x4_t, vceqq, f32)
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+VCEQ_IMPL(uint16x8_t, float16x8_t, vceqq, f16)
+#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+
+#undef VCEQ_IMPL
+} // namespace wrapper
+} // namespace arm_compute
+#endif /* __ARM_COMPUTE_WRAPPER_CEQ_H__ */
diff --git a/arm_compute/core/NEON/wrapper/intrinsics/greaterthan.h b/arm_compute/core/NEON/wrapper/intrinsics/cgt.h
index 5ee7516a4e..c2ed9df1dc 100644
--- a/arm_compute/core/NEON/wrapper/intrinsics/greaterthan.h
+++ b/arm_compute/core/NEON/wrapper/intrinsics/cgt.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018 ARM Limited.
+ * Copyright (c) 2018-2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -30,32 +30,32 @@ namespace arm_compute
{
namespace wrapper
{
-#define VCGT_IMPL(stype, vtype, rtype, prefix, postfix) \
- inline rtype vgreaterthan(const vtype &a, const vtype &b) \
- { \
- return prefix##_##postfix(a, b); \
+#define VCGT_IMPL(votype, vtype, prefix, postfix) \
+ inline votype vcgt(const vtype &a, const vtype &b) \
+ { \
+ return prefix##_##postfix(a, b); \
}
-VCGT_IMPL(uint8_t, uint8x8_t, uint8x8_t, vcgt, u8)
-VCGT_IMPL(int8_t, int8x8_t, uint8x8_t, vcgt, s8)
-VCGT_IMPL(uint16_t, uint16x4_t, uint16x4_t, vcgt, u16)
-VCGT_IMPL(int16_t, int16x4_t, uint16x4_t, vcgt, s16)
-VCGT_IMPL(uint32_t, uint32x2_t, uint32x2_t, vcgt, u32)
-VCGT_IMPL(int32_t, int32x2_t, uint32x2_t, vcgt, s32)
-VCGT_IMPL(float32x2_t, float32x2_t, uint32x2_t, vcgt, f32)
+VCGT_IMPL(uint8x8_t, uint8x8_t, vcgt, u8)
+VCGT_IMPL(uint8x8_t, int8x8_t, vcgt, s8)
+VCGT_IMPL(uint16x4_t, uint16x4_t, vcgt, u16)
+VCGT_IMPL(uint16x4_t, int16x4_t, vcgt, s16)
+VCGT_IMPL(uint32x2_t, uint32x2_t, vcgt, u32)
+VCGT_IMPL(uint32x2_t, int32x2_t, vcgt, s32)
+VCGT_IMPL(uint32x2_t, float32x2_t, vcgt, f32)
#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-VCGT_IMPL(float16x4_t, float16x4_t, uint16x4_t, vcgt, f16)
+VCGT_IMPL(uint16x4_t, float16x4_t, vcgt, f16)
#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-VCGT_IMPL(uint8_t, uint8x16_t, uint8x16_t, vcgtq, u8)
-VCGT_IMPL(int8_t, int8x16_t, uint8x16_t, vcgtq, s8)
-VCGT_IMPL(uint16_t, uint16x8_t, uint16x8_t, vcgtq, u16)
-VCGT_IMPL(int16_t, int16x8_t, uint16x8_t, vcgtq, s16)
-VCGT_IMPL(uint32_t, uint32x4_t, uint32x4_t, vcgtq, u32)
-VCGT_IMPL(int32_t, int32x4_t, uint32x4_t, vcgtq, s32)
-VCGT_IMPL(float32x4_t, float32x4_t, uint32x4_t, vcgtq, f32)
+VCGT_IMPL(uint8x16_t, uint8x16_t, vcgtq, u8)
+VCGT_IMPL(uint8x16_t, int8x16_t, vcgtq, s8)
+VCGT_IMPL(uint16x8_t, uint16x8_t, vcgtq, u16)
+VCGT_IMPL(uint16x8_t, int16x8_t, vcgtq, s16)
+VCGT_IMPL(uint32x4_t, uint32x4_t, vcgtq, u32)
+VCGT_IMPL(uint32x4_t, int32x4_t, vcgtq, s32)
+VCGT_IMPL(uint32x4_t, float32x4_t, vcgtq, f32)
#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-VCGT_IMPL(float16x8_t, float16x8_t, uint16x8_t, vcgtq, f16)
+VCGT_IMPL(uint16x8_t, float16x8_t, vcgtq, f16)
#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
#undef VCGT_IMPL
diff --git a/arm_compute/core/NEON/wrapper/intrinsics/clt.h b/arm_compute/core/NEON/wrapper/intrinsics/clt.h
new file mode 100644
index 0000000000..a187c216d7
--- /dev/null
+++ b/arm_compute/core/NEON/wrapper/intrinsics/clt.h
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2018-2019 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_WRAPPER_CLT_H__
+#define __ARM_COMPUTE_WRAPPER_CLT_H__
+
+#include <arm_neon.h>
+
+namespace arm_compute
+{
+namespace wrapper
+{
+#define VCLT_IMPL(votype, vtype, prefix, postfix) \
+ inline votype vclt(const vtype &a, const vtype &b) \
+ { \
+ return prefix##_##postfix(a, b); \
+ }
+
+VCLT_IMPL(uint8x8_t, uint8x8_t, vclt, u8)
+VCLT_IMPL(uint8x8_t, int8x8_t, vclt, s8)
+VCLT_IMPL(uint16x4_t, uint16x4_t, vclt, u16)
+VCLT_IMPL(uint16x4_t, int16x4_t, vclt, s16)
+VCLT_IMPL(uint32x2_t, uint32x2_t, vclt, u32)
+VCLT_IMPL(uint32x2_t, int32x2_t, vclt, s32)
+VCLT_IMPL(uint32x2_t, float32x2_t, vclt, f32)
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+VCLT_IMPL(uint16x4_t, float16x4_t, vclt, f16)
+#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+
+VCLT_IMPL(uint8x16_t, uint8x16_t, vcltq, u8)
+VCLT_IMPL(uint8x16_t, int8x16_t, vcltq, s8)
+VCLT_IMPL(uint16x8_t, uint16x8_t, vcltq, u16)
+VCLT_IMPL(uint16x8_t, int16x8_t, vcltq, s16)
+VCLT_IMPL(uint32x4_t, uint32x4_t, vcltq, u32)
+VCLT_IMPL(uint32x4_t, int32x4_t, vcltq, s32)
+VCLT_IMPL(uint32x4_t, float32x4_t, vcltq, f32)
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+VCLT_IMPL(uint16x8_t, float16x8_t, vcltq, f16)
+#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+
+#undef VCLT_IMPL
+} // namespace wrapper
+} // namespace arm_compute
+#endif /* __ARM_COMPUTE_WRAPPER_CLT_H__ */
diff --git a/arm_compute/core/NEON/wrapper/intrinsics/intrinsics.h b/arm_compute/core/NEON/wrapper/intrinsics/intrinsics.h
index d00d3303f1..97af983e62 100644
--- a/arm_compute/core/NEON/wrapper/intrinsics/intrinsics.h
+++ b/arm_compute/core/NEON/wrapper/intrinsics/intrinsics.h
@@ -26,14 +26,16 @@
#include "arm_compute/core/NEON/wrapper/intrinsics/add.h"
#include "arm_compute/core/NEON/wrapper/intrinsics/and.h"
-#include "arm_compute/core/NEON/wrapper/intrinsics/bitselect.h"
+#include "arm_compute/core/NEON/wrapper/intrinsics/bsl.h"
+#include "arm_compute/core/NEON/wrapper/intrinsics/ceq.h"
+#include "arm_compute/core/NEON/wrapper/intrinsics/cgt.h"
+#include "arm_compute/core/NEON/wrapper/intrinsics/clt.h"
#include "arm_compute/core/NEON/wrapper/intrinsics/combine.h"
#include "arm_compute/core/NEON/wrapper/intrinsics/dup_n.h"
#include "arm_compute/core/NEON/wrapper/intrinsics/exp.h"
#include "arm_compute/core/NEON/wrapper/intrinsics/gethigh.h"
#include "arm_compute/core/NEON/wrapper/intrinsics/getlane.h"
#include "arm_compute/core/NEON/wrapper/intrinsics/getlow.h"
-#include "arm_compute/core/NEON/wrapper/intrinsics/greaterthan.h"
#include "arm_compute/core/NEON/wrapper/intrinsics/inv.h"
#include "arm_compute/core/NEON/wrapper/intrinsics/invsqrt.h"
#include "arm_compute/core/NEON/wrapper/intrinsics/load.h"
@@ -44,7 +46,10 @@
#include "arm_compute/core/NEON/wrapper/intrinsics/movn.h"
#include "arm_compute/core/NEON/wrapper/intrinsics/mul.h"
#include "arm_compute/core/NEON/wrapper/intrinsics/neg.h"
+#include "arm_compute/core/NEON/wrapper/intrinsics/orr.h"
#include "arm_compute/core/NEON/wrapper/intrinsics/padd.h"
+#include "arm_compute/core/NEON/wrapper/intrinsics/pmax.h"
+#include "arm_compute/core/NEON/wrapper/intrinsics/pmin.h"
#include "arm_compute/core/NEON/wrapper/intrinsics/pow.h"
#include "arm_compute/core/NEON/wrapper/intrinsics/rev64.h"
#include "arm_compute/core/NEON/wrapper/intrinsics/store.h"
diff --git a/arm_compute/core/NEON/wrapper/intrinsics/orr.h b/arm_compute/core/NEON/wrapper/intrinsics/orr.h
new file mode 100644
index 0000000000..d82dc56a6d
--- /dev/null
+++ b/arm_compute/core/NEON/wrapper/intrinsics/orr.h
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2018-2019 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_WRAPPER_ORR_H__
+#define __ARM_COMPUTE_WRAPPER_ORR_H__
+
+#include <arm_neon.h>
+
+namespace arm_compute
+{
+namespace wrapper
+{
+#define VORR_IMPL(stype, vtype, prefix, postfix) \
+ inline vtype vorr(const vtype &a, const vtype &b) \
+ { \
+ return prefix##_##postfix(a, b); \
+ }
+
+VORR_IMPL(uint8_t, uint8x8_t, vorr, u8)
+VORR_IMPL(int8_t, int8x8_t, vorr, s8)
+VORR_IMPL(uint16_t, uint16x4_t, vorr, u16)
+VORR_IMPL(int16_t, int16x4_t, vorr, s16)
+VORR_IMPL(uint32_t, uint32x2_t, vorr, u32)
+VORR_IMPL(int32_t, int32x2_t, vorr, s32)
+VORR_IMPL(uint64_t, uint64x1_t, vorr, u64)
+VORR_IMPL(int64_t, int64x1_t, vorr, s64)
+
+VORR_IMPL(uint8_t, uint8x16_t, vorrq, u8)
+VORR_IMPL(int8_t, int8x16_t, vorrq, s8)
+VORR_IMPL(uint16_t, uint16x8_t, vorrq, u16)
+VORR_IMPL(int16_t, int16x8_t, vorrq, s16)
+VORR_IMPL(uint32_t, uint32x4_t, vorrq, u32)
+VORR_IMPL(int32_t, int32x4_t, vorrq, s32)
+VORR_IMPL(uint64_t, uint64x2_t, vorrq, u64)
+VORR_IMPL(int64_t, int64x2_t, vorrq, s64)
+
+#undef VORR_IMPL
+} // namespace wrapper
+} // namespace arm_compute
+#endif /* __ARM_COMPUTE_WRAPPER_ORR_H__ */
diff --git a/arm_compute/core/NEON/wrapper/intrinsics/pmax.h b/arm_compute/core/NEON/wrapper/intrinsics/pmax.h
new file mode 100644
index 0000000000..7f701f89c4
--- /dev/null
+++ b/arm_compute/core/NEON/wrapper/intrinsics/pmax.h
@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 2018-2019 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_WRAPPER_PMAX_H__
+#define __ARM_COMPUTE_WRAPPER_PMAX_H__
+
+#include <arm_neon.h>
+
+namespace arm_compute
+{
+namespace wrapper
+{
+#define VPMAX_IMPL(stype, vtype, prefix, postfix) \
+ inline vtype vpmax(const vtype &a, const vtype &b) \
+ { \
+ return prefix##_##postfix(a, b); \
+ }
+
+VPMAX_IMPL(uint8_t, uint8x8_t, vpmax, u8)
+VPMAX_IMPL(int8_t, int8x8_t, vpmax, s8)
+VPMAX_IMPL(uint16_t, uint16x4_t, vpmax, u16)
+VPMAX_IMPL(int16_t, int16x4_t, vpmax, s16)
+VPMAX_IMPL(uint32_t, uint32x2_t, vpmax, u32)
+VPMAX_IMPL(int32_t, int32x2_t, vpmax, s32)
+VPMAX_IMPL(float, float32x2_t, vpmax, f32)
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+VPMAX_IMPL(float16_t, float16x4_t, vpmax, f16)
+#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+
+#undef VPMAX_IMPL
+} // namespace wrapper
+} // namespace arm_compute
+#endif /* __ARM_COMPUTE_WRAPPER_PMAX_H__ */
diff --git a/arm_compute/core/NEON/wrapper/intrinsics/pmin.h b/arm_compute/core/NEON/wrapper/intrinsics/pmin.h
new file mode 100644
index 0000000000..52d5eb17a0
--- /dev/null
+++ b/arm_compute/core/NEON/wrapper/intrinsics/pmin.h
@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 2018-2019 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_WRAPPER_PMIN_H__
+#define __ARM_COMPUTE_WRAPPER_PMIN_H__
+
+#include <arm_neon.h>
+
+namespace arm_compute
+{
+namespace wrapper
+{
+#define VPMIN_IMPL(stype, vtype, prefix, postfix) \
+ inline vtype vpmin(const vtype &a, const vtype &b) \
+ { \
+ return prefix##_##postfix(a, b); \
+ }
+
+VPMIN_IMPL(uint8_t, uint8x8_t, vpmin, u8)
+VPMIN_IMPL(int8_t, int8x8_t, vpmin, s8)
+VPMIN_IMPL(uint16_t, uint16x4_t, vpmin, u16)
+VPMIN_IMPL(int16_t, int16x4_t, vpmin, s16)
+VPMIN_IMPL(uint32_t, uint32x2_t, vpmin, u32)
+VPMIN_IMPL(int32_t, int32x2_t, vpmin, s32)
+VPMIN_IMPL(float, float32x2_t, vpmin, f32)
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+VPMIN_IMPL(float16_t, float16x4_t, vpmin, f16)
+#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+
+#undef VPMIN_IMPL
+} // namespace wrapper
+} // namespace arm_compute
+#endif /* __ARM_COMPUTE_WRAPPER_PMIN_H__ */
diff --git a/arm_compute/core/utils/misc/ShapeCalculator.h b/arm_compute/core/utils/misc/ShapeCalculator.h
index 70727424b3..619234d306 100644
--- a/arm_compute/core/utils/misc/ShapeCalculator.h
+++ b/arm_compute/core/utils/misc/ShapeCalculator.h
@@ -1033,6 +1033,21 @@ inline TensorShape compute_tiled_shape(const TensorShape &input_shape, const Mul
return tiled_shape;
}
+/** Calculate the reduced shape of a tensor given an axis
+ *
+ * @param[in] input Input tensor info
+ * @param[in] axis Axis on which to perform reduction
+ *
+ * @return the calculated shape
+ */
+inline TensorShape compute_reduced_shape(const TensorShape &input, unsigned int axis)
+{
+ TensorShape output_shape{ input };
+ output_shape.set(axis, 1);
+
+ return output_shape;
+}
+
/** Calculate the upsampled shape of a tensor
*
* @param[in] input Input tensor info
diff --git a/arm_compute/runtime/NEON/NEFunctions.h b/arm_compute/runtime/NEON/NEFunctions.h
index 2e94030e53..2daef70cef 100644
--- a/arm_compute/runtime/NEON/NEFunctions.h
+++ b/arm_compute/runtime/NEON/NEFunctions.h
@@ -28,6 +28,7 @@
#include "arm_compute/runtime/NEON/functions/NEAbsoluteDifference.h"
#include "arm_compute/runtime/NEON/functions/NEAccumulate.h"
#include "arm_compute/runtime/NEON/functions/NEActivationLayer.h"
+#include "arm_compute/runtime/NEON/functions/NEArgMinMaxLayer.h"
#include "arm_compute/runtime/NEON/functions/NEArithmeticAddition.h"
#include "arm_compute/runtime/NEON/functions/NEArithmeticSubtraction.h"
#include "arm_compute/runtime/NEON/functions/NEBatchNormalizationLayer.h"
diff --git a/arm_compute/runtime/NEON/functions/NEArgMinMaxLayer.h b/arm_compute/runtime/NEON/functions/NEArgMinMaxLayer.h
new file mode 100644
index 0000000000..87d77a5e13
--- /dev/null
+++ b/arm_compute/runtime/NEON/functions/NEArgMinMaxLayer.h
@@ -0,0 +1,78 @@
+/*
+ * Copyright (c) 2018-2019 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_NEARGMINMAXLAYER_H__
+#define __ARM_COMPUTE_NEARGMINMAXLAYER_H__
+
+#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h"
+#include "arm_compute/core/NEON/kernels/NEReductionOperationKernel.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/MemoryGroup.h"
+#include "arm_compute/runtime/NEON/INESimpleFunction.h"
+
+namespace arm_compute
+{
+class IsTensor;
+
+/** Function to calculate the index of the minimum or maximum values in a tensor based on an axis.
+ * This function calls the following NEON kernels:
+ *
+ * -# @ref NEReductionOperationKernel
+ * -# @ref NEFillBorderKernel
+ *
+ */
+class NEArgMinMaxLayer : public IFunction
+{
+public:
+ /** Constructor */
+ NEArgMinMaxLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
+ /** Set the input and output tensors.
+ *
+ * @param[in] input Input source tensor. Data types supported: F16/F32.
+ * @param[in] axis Axis to find max/min index.
+ * @param[out] output Output source tensor. Data types supported: U32.
+ * @param[in] op Operation to perform: min or max
+ */
+ void configure(ITensor *input, int axis, ITensor *output, const ReductionOperation &op);
+ /** Static function to check if given info will lead to a valid configuration of @ref NEArgMinMaxLayer
+ *
+ * @param[in] input Input source tensor info. Data types supported: F16/F32.
+ * @param[in] axis Axis to find max/min index.
+ * @param[in] output Output source tensor info. Data types supported: U32.
+ * @param[in] op Operation to perform: min or max
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, int axis, const ITensorInfo *output, const ReductionOperation &op);
+
+ // Inherited methods overridden:
+ void run() override;
+
+private:
+ MemoryGroup _memory_group;
+ NEReductionOperationKernel _reduction_kernel;
+ NEFillBorderKernel _fill_border_kernel;
+ bool _run_fill_border;
+};
+} // namespace arm_compute
+#endif /* __ARM_COMPUTE_NEARGMINMAXLAYER_H__ */