aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAdnan AlSinan <adnan.alsinan@arm.com>2022-04-06 16:19:31 +0100
committerAdnan AlSinan <adnan.alsinan@arm.com>2022-04-13 11:55:45 +0000
commit9104cd559222b98f2b21f14d4fd561ed4a4e9bc2 (patch)
tree628b30de762e8e1dc3d21c5dcb76a92212fa00af
parent16c5697085c256c19fb8ba4bef6188d61f30a88b (diff)
downloadComputeLibrary-9104cd559222b98f2b21f14d4fd561ed4a4e9bc2.tar.gz
Add support for int8 CpuPool3d
- Add implementation for the CPU pooling 3d layer. - NDHWC data layout support. - Support QASYMM8/QASYMM8_SIGNED. - Add Pooling helper file for Pool3d/2d common functions. Resolves COMPMID-4668 Change-Id: Iadf042036b076099c2353d6e2fe9fc623bc263d8 Signed-off-by: Adnan AlSinan <adnan.alsinan@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/7387 Reviewed-by: Gunes Bayir <gunes.bayir@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com>
-rw-r--r--Android.bp2
-rw-r--r--arm_compute/runtime/NEON/functions/NEPooling3dLayer.h6
-rw-r--r--docs/user_guide/operator_list.dox2
-rw-r--r--filelist.json8
-rw-r--r--src/core/helpers/PoolingHelpers.h202
-rw-r--r--src/cpu/kernels/CpuPool3dKernel.cpp17
-rw-r--r--src/cpu/kernels/CpuPool3dKernel.h4
-rw-r--r--src/cpu/kernels/pool2d/neon/fp16.cpp10
-rw-r--r--src/cpu/kernels/pool2d/neon/fp32.cpp10
-rw-r--r--src/cpu/kernels/pool2d/neon/nchw/all.cpp14
-rw-r--r--src/cpu/kernels/pool2d/neon/quantized.h161
-rw-r--r--src/cpu/kernels/pool3d/list.h2
-rw-r--r--src/cpu/kernels/pool3d/neon/impl.cpp62
-rw-r--r--src/cpu/kernels/pool3d/neon/impl.h2
-rw-r--r--src/cpu/kernels/pool3d/neon/qasymm8.cpp34
-rw-r--r--src/cpu/kernels/pool3d/neon/qasymm8_signed.cpp34
-rw-r--r--src/cpu/kernels/pool3d/neon/quantized.h390
-rw-r--r--src/cpu/operators/CpuPool3d.h2
-rw-r--r--tests/validation/NEON/Pooling3dLayer.cpp78
-rw-r--r--tests/validation/fixtures/Pooling3dLayerFixture.h37
20 files changed, 855 insertions, 222 deletions
diff --git a/Android.bp b/Android.bp
index a440e79ffd..691e46e5ac 100644
--- a/Android.bp
+++ b/Android.bp
@@ -512,6 +512,8 @@ cc_library_static {
"src/cpu/kernels/pool3d/neon/fp16.cpp",
"src/cpu/kernels/pool3d/neon/fp32.cpp",
"src/cpu/kernels/pool3d/neon/impl.cpp",
+ "src/cpu/kernels/pool3d/neon/qasymm8.cpp",
+ "src/cpu/kernels/pool3d/neon/qasymm8_signed.cpp",
"src/cpu/kernels/range/generic/neon/fp16.cpp",
"src/cpu/kernels/range/generic/neon/fp32.cpp",
"src/cpu/kernels/range/generic/neon/impl.cpp",
diff --git a/arm_compute/runtime/NEON/functions/NEPooling3dLayer.h b/arm_compute/runtime/NEON/functions/NEPooling3dLayer.h
index 7b31f916f6..4c5eb58e05 100644
--- a/arm_compute/runtime/NEON/functions/NEPooling3dLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEPooling3dLayer.h
@@ -64,10 +64,12 @@ public:
* |:--------------|:--------------|
* |F16 |F16 |
* |F32 |F32 |
+ * |QASYMM8 |QASYMM8 |
+ * |QASYMM8_SIGNED |QASYMM8_SIGNED |
*
* @note Source tensor is padded with -inf for MAX pooling and 0 otherwise
*
- * @param[in] input Source tensor. Data types supported: F16/F32.
+ * @param[in] input Source tensor. Data types supported: F16/F32/QASYMM8/QASYMM8_SIGNED.
* @param[out] output Destination tensor.
* @param[in] pool_info Contains pooling operation information described in @ref Pooling3dLayerInfo.
*/
@@ -75,7 +77,7 @@ public:
/** Static function to check if given info will lead to a valid configuration of @ref NEPooling3dLayer
*
*
- * @param[in] input Source tensor info. Data types supported: F16/F32.
+ * @param[in] input Source tensor info. Data types supported: F16/F32/QASYMM8/QASYMM8_SIGNED.
* @param[in] output Destination tensor info.
* @param[in] pool_info Contains pooling operation information described in @ref Pooling3dLayerInfo.
*
diff --git a/docs/user_guide/operator_list.dox b/docs/user_guide/operator_list.dox
index ee337d46ea..c0888f1775 100644
--- a/docs/user_guide/operator_list.dox
+++ b/docs/user_guide/operator_list.dox
@@ -2316,6 +2316,8 @@ where N = batches, C = channels, H = height, W = width, D = depth
<tr><th>src<th>dst
<tr><td>F16<td>F16
<tr><td>F32<td>F32
+ <tr><td>QASYMM8<td>QASYMM8
+ <tr><td>QASYMM8_SIGNED<td>QASYMM8_SIGNED
</table>
<tr>
<td>CLPooling3dLayer
diff --git a/filelist.json b/filelist.json
index 44e71c7e69..d02a6fc0c1 100644
--- a/filelist.json
+++ b/filelist.json
@@ -1803,9 +1803,11 @@
"src/runtime/NEON/functions/NEPooling3dLayer.cpp"
],
"neon": {
- "common":[ "src/cpu/kernels/pool3d/neon/impl.cpp" ],
- "fp16": [ "src/cpu/kernels/pool3d/neon/fp16.cpp" ],
- "fp32": [ "src/cpu/kernels/pool3d/neon/fp32.cpp" ]
+ "common": [ "src/cpu/kernels/pool3d/neon/impl.cpp" ],
+ "fp16": [ "src/cpu/kernels/pool3d/neon/fp16.cpp" ],
+ "fp32": [ "src/cpu/kernels/pool3d/neon/fp32.cpp" ],
+ "qasymm8": [ "src/cpu/kernels/pool3d/neon/qasymm8.cpp" ],
+ "qasymm8_signed": [ "src/cpu/kernels/pool3d/neon/qasymm8_signed.cpp" ]
}
}
},
diff --git a/src/core/helpers/PoolingHelpers.h b/src/core/helpers/PoolingHelpers.h
new file mode 100644
index 0000000000..079629ee6a
--- /dev/null
+++ b/src/core/helpers/PoolingHelpers.h
@@ -0,0 +1,202 @@
+/*
+* Copyright (c) 2022 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef SRC_CORE_HELPERS_POOLINGHELPERS_H
+#define SRC_CORE_HELPERS_POOLINGHELPERS_H
+
+#include "src/core/NEON/NEAsymm.h"
+
+namespace arm_compute
+{
+namespace cpu
+{
+namespace
+{
+
+inline float calculate_avg_scale_pool3d(bool exclude_padding, const Coordinates &id, const int pool_size_x, const int pool_size_y, const int pool_size_z, const int upper_bound_w,
+ const int upper_bound_h, const int upper_bound_d, const int pad_x, const int pad_y, const int pad_z, const int stride_x, const int stride_y, const int stride_z)
+{
+ // Based on NDHWC
+ int start_x = id[1] * stride_x - pad_x;
+ int start_y = id[2] * stride_y - pad_y;
+ int start_z = id[3] * stride_z - pad_z;
+
+ const int end_x = std::min(start_x + pool_size_x, upper_bound_w);
+ const int end_y = std::min(start_y + pool_size_y, upper_bound_h);
+ const int end_z = std::min(start_z + pool_size_z, upper_bound_d);
+ if(exclude_padding)
+ {
+ start_x = std::max(0, start_x);
+ start_y = std::max(0, start_y);
+ start_z = std::max(0, start_z);
+ }
+ return 1.f / ((end_y - start_y) * (end_x - start_x) * (end_z - start_z));
+}
+
+inline float calculate_avg_scale_pool2d(bool exclude_padding, DataLayout data_layout, const Coordinates &id, const int pool_size_x, const int pool_size_y, const int upper_bound_w, const int upper_bound_h,
+ const int pad_x, const int pad_y, const int stride_x, const int stride_y)
+{
+ const unsigned int idx_width = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH);
+ const unsigned int idx_height = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT);
+
+ int start_x = id[idx_width] * stride_x - pad_x;
+ int start_y = id[idx_height] * stride_y - pad_y;
+
+ const int end_x = std::min(start_x + pool_size_x, upper_bound_w);
+ const int end_y = std::min(start_y + pool_size_y, upper_bound_h);
+ if(exclude_padding)
+ {
+ start_x = std::max(0, start_x);
+ start_y = std::max(0, start_y);
+ }
+ return 1.f / ((end_y - start_y) * (end_x - start_x));
+}
+
+template <typename T>
+inline typename std::enable_if<std::is_same<T, int8_t>::value, int8_t>::type
+quantize(float val, const UniformQuantizationInfo &info)
+{
+ return quantize_qasymm8_signed(val, info);
+}
+
+template <typename T>
+inline typename std::enable_if<std::is_same<T, uint8_t>::value, uint8_t>::type
+quantize(float val, const UniformQuantizationInfo &info)
+{
+ return quantize_qasymm8(val, info);
+}
+
+template <typename T>
+inline T vcvtq_q32_f32(float32x4_t values);
+
+template <>
+inline uint32x4_t vcvtq_q32_f32(float32x4_t values)
+{
+ return vcvtq_u32_f32(values);
+}
+
+template <>
+inline int32x4_t vcvtq_q32_f32(float32x4_t values)
+{
+ return vcvtq_s32_f32(values);
+}
+
+template <typename T>
+inline float32x4_t vcvtq_f32_q32(T values);
+
+template <>
+inline float32x4_t vcvtq_f32_q32(uint32x4_t values)
+{
+ return vcvtq_f32_u32(values);
+}
+
+template <>
+inline float32x4_t vcvtq_f32_q32(int32x4_t values)
+{
+ return vcvtq_f32_s32(values);
+}
+
+template <typename Tout>
+inline Tout vrequantize_pooling_with_scale(const float32x4x4_t &acc, const float quant_rescale, const float scale_pooling, const int32_t new_offset);
+
+template <>
+inline uint8x16_t vrequantize_pooling_with_scale(const float32x4x4_t &acc, const float quant_rescale, const float scale_pooling, const int32_t new_offset)
+{
+ const float new_scale = quant_rescale / scale_pooling;
+ return vquantize(acc, UniformQuantizationInfo(new_scale, new_offset));
+}
+
+template <>
+inline int8x16_t vrequantize_pooling_with_scale(const float32x4x4_t &acc, const float quant_rescale, const float scale_pooling, const int32_t new_offset)
+{
+ const float new_scale = quant_rescale / scale_pooling;
+ return vquantize_signed(acc, UniformQuantizationInfo(new_scale, new_offset));
+}
+
+template <typename Tin, typename Tout>
+inline Tout vrequantize_pooling(Tin vec1, Tin vec2, const UniformQuantizationInfo &requant_qinfo);
+
+template <>
+inline uint8x16_t vrequantize_pooling(uint8x8_t vec1, uint8x8_t vec2, const UniformQuantizationInfo &requant_qinfo)
+{
+ const float32x4x4_t acc =
+ {
+ {
+ vcvtq_f32_u32(vmovl_u16(vget_low_u16(vmovl_u8((vec1))))),
+ vcvtq_f32_u32(vmovl_u16(vget_high_u16(vmovl_u8((vec1))))),
+ vcvtq_f32_u32(vmovl_u16(vget_low_u16(vmovl_u8((vec2))))),
+ vcvtq_f32_u32(vmovl_u16(vget_high_u16(vmovl_u8((vec2))))),
+ }
+ };
+ return vquantize(acc, requant_qinfo);
+}
+
+template <>
+inline int8x16_t vrequantize_pooling(int8x8_t vec1, int8x8_t vec2, const UniformQuantizationInfo &requant_qinfo)
+{
+ const float32x4x4_t acc =
+ {
+ {
+ vcvtq_f32_s32(vmovl_s16(vget_low_s16(vmovl_s8((vec1))))),
+ vcvtq_f32_s32(vmovl_s16(vget_high_s16(vmovl_s8((vec1))))),
+ vcvtq_f32_s32(vmovl_s16(vget_low_s16(vmovl_s8((vec2))))),
+ vcvtq_f32_s32(vmovl_s16(vget_high_s16(vmovl_s8((vec2))))),
+ }
+ };
+ return vquantize_signed(acc, requant_qinfo);
+}
+
+template <typename T>
+inline T vrequantize_pooling(T &vec, const UniformQuantizationInfo &requant_qinfo);
+
+template <>
+inline uint8x8_t vrequantize_pooling(uint8x8_t &vec, const UniformQuantizationInfo &requant_qinfo)
+{
+ const float32x4x2_t acc =
+ {
+ {
+ vcvtq_f32_u32(vmovl_u16(vget_low_u16(vmovl_u8((vec))))),
+ vcvtq_f32_u32(vmovl_u16(vget_high_u16(vmovl_u8((vec))))),
+ }
+ };
+ return vquantize(acc, requant_qinfo);
+}
+
+template <>
+inline int8x8_t vrequantize_pooling(int8x8_t &vec, const UniformQuantizationInfo &requant_qinfo)
+{
+ const float32x4x2_t acc =
+ {
+ {
+ vcvtq_f32_s32(vmovl_s16(vget_low_s16(vmovl_s8((vec))))),
+ vcvtq_f32_s32(vmovl_s16(vget_high_s16(vmovl_s8((vec))))),
+ }
+ };
+ return vquantize_signed(acc, requant_qinfo);
+}
+
+} // namespace
+} // namespace cpu
+} // namespace arm_compute
+#endif /* SRC_CORE_HELPERS_POOLINGHELPERS_H */
+
diff --git a/src/cpu/kernels/CpuPool3dKernel.cpp b/src/cpu/kernels/CpuPool3dKernel.cpp
index 3321967d2f..1305f7c5e8 100644
--- a/src/cpu/kernels/CpuPool3dKernel.cpp
+++ b/src/cpu/kernels/CpuPool3dKernel.cpp
@@ -44,11 +44,20 @@ using namespace misc::shape_calculator;
static const std::vector<CpuPool3dKernel::Pooling3dKernel> available_kernels =
{
{
+ "neon_qu8_ndhwc_poolMxNxD",
+ [](const DataTypeISASelectorData & data) { return (data.dt == DataType::QASYMM8); },
+ REGISTER_QASYMM8_NEON(arm_compute::cpu::neon_q8_pool3d)
+ },
+ {
+ "neon_qs8_ndhwc_poolMxNxD",
+ [](const DataTypeISASelectorData & data) { return (data.dt == DataType::QASYMM8_SIGNED); },
+ REGISTER_QASYMM8_SIGNED_NEON(arm_compute::cpu::neon_q8_signed_pool3d)
+ },
+ {
"neon_fp16_ndhwc_poolMxNxD",
[](const DataTypeISASelectorData & data) { return (data.dt == DataType::F16 && data.isa.fp16); },
REGISTER_FP16_NEON(arm_compute::cpu::neon_fp16_pool3d)
},
-
{
"neon_fp32_ndhwc_poolMxNxD",
[](const DataTypeISASelectorData & data) { return (data.dt == DataType::F32); },
@@ -61,7 +70,11 @@ Status validate_arguments(const ITensorInfo *src, const ITensorInfo *dst, const
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src, dst);
ARM_COMPUTE_RETURN_ERROR_ON_MSG(src->data_layout() != DataLayout::NDHWC, "Only NDHWC layout supported");
ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(src);
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(src, 1, DataType::F16, DataType::F32);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(src, 1, DataType::F16, DataType::F32, DataType::QASYMM8, DataType::QASYMM8_SIGNED);
+
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG((!is_data_type_float(src->data_type())) && (!pool_info.exclude_padding
+ && (pool_info.pool_type == PoolingType::AVG)),
+ "Exclude padding is unsupported for non-float types for Avg op");
const auto data_layout = src->data_layout();
const int idx_width = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH);
diff --git a/src/cpu/kernels/CpuPool3dKernel.h b/src/cpu/kernels/CpuPool3dKernel.h
index f762cfca9a..437f2af7e4 100644
--- a/src/cpu/kernels/CpuPool3dKernel.h
+++ b/src/cpu/kernels/CpuPool3dKernel.h
@@ -51,8 +51,10 @@ public:
* |:--------------|:--------------|
* |F16 |F16 |
* |F32 |F32 |
+ * |QASYMM8 |QASYMM8 |
+ * |QASYMM8_SIGNED |QASYMM8_SIGNED |
*
- * @param[in] src Source tensor info. Data types supported: F16/F32.
+ * @param[in] src Source tensor info. Data types supported: F16/F32/QASYMM8/QASYMM8_SIGNED.
* @param[out] dst Destination tensor info. Data types supported: Same as @p src.
* @param[in] pool_info Contains pooling operation information described in @ref Pooling3dLayerInfo.
*/
diff --git a/src/cpu/kernels/pool2d/neon/fp16.cpp b/src/cpu/kernels/pool2d/neon/fp16.cpp
index 72f63af3be..13e21b1e70 100644
--- a/src/cpu/kernels/pool2d/neon/fp16.cpp
+++ b/src/cpu/kernels/pool2d/neon/fp16.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2021 Arm Limited.
+ * Copyright (c) 2021-2022 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -199,8 +199,8 @@ void poolingMxN_fp16_neon_nhwc(const ITensor *src, ITensor *dst0, ITensor *dst1,
if(pool_info.pool_type != PoolingType::MAX)
{
// Calculate scale
- const float scale = calculate_avg_scale(pool_info.exclude_padding, DataLayout::NHWC, id, pool_size_x, pool_size_y, upper_bound_w, upper_bound_h, pool_pad_left, pool_pad_top, pool_stride_x,
- pool_stride_y);
+ const float scale = calculate_avg_scale_pool2d(pool_info.exclude_padding, DataLayout::NHWC, id, pool_size_x, pool_size_y, upper_bound_w, upper_bound_h, pool_pad_left, pool_pad_top, pool_stride_x,
+ pool_stride_y);
const float16x8_t scale_v = vdupq_n_f16(scale);
// Perform pooling
@@ -260,8 +260,8 @@ void poolingMxN_fp16_neon_nhwc(const ITensor *src, ITensor *dst0, ITensor *dst1,
if(pool_info.pool_type != PoolingType::MAX)
{
// Calculate scale
- const float16_t scale = calculate_avg_scale(pool_info.exclude_padding, DataLayout::NHWC, id, pool_size_x, pool_size_y, upper_bound_w, upper_bound_h, pool_pad_left, pool_pad_top, pool_stride_x,
- pool_stride_y);
+ const float16_t scale = calculate_avg_scale_pool2d(pool_info.exclude_padding, DataLayout::NHWC, id, pool_size_x, pool_size_y, upper_bound_w, upper_bound_h, pool_pad_left, pool_pad_top, pool_stride_x,
+ pool_stride_y);
for(int y = pool_start_y; y < pool_end_y; ++y)
{
diff --git a/src/cpu/kernels/pool2d/neon/fp32.cpp b/src/cpu/kernels/pool2d/neon/fp32.cpp
index e4261f746d..1ed199be8d 100644
--- a/src/cpu/kernels/pool2d/neon/fp32.cpp
+++ b/src/cpu/kernels/pool2d/neon/fp32.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2021 Arm Limited.
+ * Copyright (c) 2021-2022 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -193,8 +193,8 @@ void poolingMxN_fp32_neon_nhwc(const ITensor *src, ITensor *dst0, ITensor *dst1,
if(pool_info.pool_type != PoolingType::MAX)
{
// Calculate scale
- const float scale = calculate_avg_scale(pool_info.exclude_padding, DataLayout::NHWC, id, pool_size_x, pool_size_y, upper_bound_w, upper_bound_h, pool_pad_left, pool_pad_top, pool_stride_x,
- pool_stride_y);
+ const float scale = calculate_avg_scale_pool2d(pool_info.exclude_padding, DataLayout::NHWC, id, pool_size_x, pool_size_y, upper_bound_w, upper_bound_h, pool_pad_left, pool_pad_top, pool_stride_x,
+ pool_stride_y);
const float32x4_t scale_v = vdupq_n_f32(scale);
// Perform pooling
@@ -258,8 +258,8 @@ void poolingMxN_fp32_neon_nhwc(const ITensor *src, ITensor *dst0, ITensor *dst1,
if(pool_info.pool_type != PoolingType::MAX)
{
// Calculate scale
- const float scale = calculate_avg_scale(pool_info.exclude_padding, DataLayout::NHWC, id, pool_size_x, pool_size_y, upper_bound_w, upper_bound_h, pool_pad_left, pool_pad_top, pool_stride_x,
- pool_stride_y);
+ const float scale = calculate_avg_scale_pool2d(pool_info.exclude_padding, DataLayout::NHWC, id, pool_size_x, pool_size_y, upper_bound_w, upper_bound_h, pool_pad_left, pool_pad_top, pool_stride_x,
+ pool_stride_y);
for(int y = pool_start_y; y < pool_end_y; ++y)
{
diff --git a/src/cpu/kernels/pool2d/neon/nchw/all.cpp b/src/cpu/kernels/pool2d/neon/nchw/all.cpp
index 10cbfc56a1..77f63c6f77 100644
--- a/src/cpu/kernels/pool2d/neon/nchw/all.cpp
+++ b/src/cpu/kernels/pool2d/neon/nchw/all.cpp
@@ -124,7 +124,7 @@ void pooling3_fp16_neon_nchw(const ITensor *src, ITensor *dst0, ITensor *dst1, P
if(pool_info.pool_type != PoolingType::MAX)
{
// Calculate scale
- const float scale = calculate_avg_scale(pool_info.exclude_padding, DataLayout::NCHW, id, pool_size, pool_size, upper_bound_w, upper_bound_h, pool_pad_left, pool_pad_top, pool_stride_x,
+ const float scale = calculate_avg_scale_pool2d(pool_info.exclude_padding, DataLayout::NCHW, id, pool_size, pool_size, upper_bound_w, upper_bound_h, pool_pad_left, pool_pad_top, pool_stride_x,
pool_stride_y);
const float16x4_t scale_v = vdup_n_f16(scale);
// Perform pooling
@@ -288,7 +288,7 @@ void pooling2_fp16_neon_nchw(const ITensor *src, ITensor *dst0, ITensor *dst1, P
if(pool_info.pool_type != PoolingType::MAX)
{
- const float scale = calculate_avg_scale(pool_info.exclude_padding, DataLayout::NCHW, id, pool_size, pool_size, upper_bound_w, upper_bound_h, pool_pad_left, pool_pad_top, pool_stride_x,
+ const float scale = calculate_avg_scale_pool2d(pool_info.exclude_padding, DataLayout::NCHW, id, pool_size, pool_size, upper_bound_w, upper_bound_h, pool_pad_left, pool_pad_top, pool_stride_x,
pool_stride_y);
const float16x4_t scale_v = vdup_n_f16(scale);
@@ -343,7 +343,7 @@ void poolingMxN_fp16_neon_nchw(const ITensor *src, ITensor *dst0, ITensor *dst1,
if(pool_info.pool_type != PoolingType::MAX)
{
// Calculate scale
- const float16_t scale = calculate_avg_scale(pool_info.exclude_padding, DataLayout::NCHW, id, pool_size_x, pool_size_y, upper_bound_w, upper_bound_h, pool_pad_left, pool_pad_top, pool_stride_x,
+ const float16_t scale = calculate_avg_scale_pool2d(pool_info.exclude_padding, DataLayout::NCHW, id, pool_size_x, pool_size_y, upper_bound_w, upper_bound_h, pool_pad_left, pool_pad_top, pool_stride_x,
pool_stride_y);
// Perform pooling
@@ -430,7 +430,7 @@ void poolingMxN_fp32_neon_nchw(const ITensor *src, ITensor *dst0, ITensor *dst1,
if(pool_info.pool_type != PoolingType::MAX)
{
// Calculate scale
- const float scale = calculate_avg_scale(pool_info.exclude_padding, DataLayout::NCHW, id, pool_size_x, pool_size_y, upper_bound_w, upper_bound_h,
+ const float scale = calculate_avg_scale_pool2d(pool_info.exclude_padding, DataLayout::NCHW, id, pool_size_x, pool_size_y, upper_bound_w, upper_bound_h,
pool_pad_left, pool_pad_top, pool_stride_x, pool_stride_y);
// Perform pooling
@@ -538,7 +538,7 @@ void pooling2_fp32_neon_nchw(const ITensor *src, ITensor *dst0, ITensor *dst1, P
if(pool_info.pool_type != PoolingType::MAX)
{
// Calculate scale
- float scale = calculate_avg_scale(pool_info.exclude_padding, DataLayout::NCHW, id, pool_size, pool_size, upper_bound_w, upper_bound_h, pool_pad_left, pool_pad_top, pool_stride_x,
+ float scale = calculate_avg_scale_pool2d(pool_info.exclude_padding, DataLayout::NCHW, id, pool_size, pool_size, upper_bound_w, upper_bound_h, pool_pad_left, pool_pad_top, pool_stride_x,
pool_stride_y);
const float32x2_t scale_v = vdup_n_f32(scale);
@@ -618,7 +618,7 @@ void pooling3_fp32_neon_nchw(const ITensor *src, ITensor *dst0, ITensor *dst1, P
if(pool_info.pool_type != PoolingType::MAX)
{
// Calculate scale
- float scale = calculate_avg_scale(pool_info.exclude_padding, DataLayout::NCHW, id, pool_size, pool_size, upper_bound_w, upper_bound_h, pool_pad_left, pool_pad_top, pool_stride_x,
+ float scale = calculate_avg_scale_pool2d(pool_info.exclude_padding, DataLayout::NCHW, id, pool_size, pool_size, upper_bound_w, upper_bound_h, pool_pad_left, pool_pad_top, pool_stride_x,
pool_stride_y);
const float32x2_t scale_v = vdup_n_f32(scale);
@@ -687,7 +687,7 @@ void pooling7_fp32_neon_nchw(const ITensor *src, ITensor *dst0, ITensor *dst1, P
if(pool_info.pool_type != PoolingType::MAX)
{
// Calculate scale
- float scale = calculate_avg_scale(pool_info.exclude_padding, DataLayout::NCHW, id, pool_size, pool_size, upper_bound_w, upper_bound_h, pool_pad_left, pool_pad_top, pool_stride_x,
+ float scale = calculate_avg_scale_pool2d(pool_info.exclude_padding, DataLayout::NCHW, id, pool_size, pool_size, upper_bound_w, upper_bound_h, pool_pad_left, pool_pad_top, pool_stride_x,
pool_stride_y);
const float32x2_t scale_v = vdup_n_f32(scale);
diff --git a/src/cpu/kernels/pool2d/neon/quantized.h b/src/cpu/kernels/pool2d/neon/quantized.h
index 386e043984..a2cd3991be 100644
--- a/src/cpu/kernels/pool2d/neon/quantized.h
+++ b/src/cpu/kernels/pool2d/neon/quantized.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2021 Arm Limited.
+ * Copyright (c) 2021-2022 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -30,6 +30,7 @@
#include "src/core/NEON/NEFixedPoint.h"
#include "src/core/NEON/NEMath.h"
#include "src/core/NEON/wrapper/wrapper.h"
+#include "src/core/helpers/PoolingHelpers.h"
#include <arm_neon.h>
namespace arm_compute
@@ -37,148 +38,6 @@ namespace arm_compute
namespace cpu
{
template <typename T>
-inline typename std::enable_if<std::is_same<T, int8_t>::value, int8_t>::type
-quantize(float val, const UniformQuantizationInfo &info)
-{
- return quantize_qasymm8_signed(val, info);
-}
-
-template <typename T>
-inline typename std::enable_if<std::is_same<T, uint8_t>::value, uint8_t>::type
-quantize(float val, const UniformQuantizationInfo &info)
-{
- return quantize_qasymm8(val, info);
-}
-
-template <typename T>
-inline T vcvtq_q32_f32(float32x4_t values);
-
-template <>
-inline uint32x4_t vcvtq_q32_f32(float32x4_t values)
-{
- return vcvtq_u32_f32(values);
-}
-
-template <>
-inline int32x4_t vcvtq_q32_f32(float32x4_t values)
-{
- return vcvtq_s32_f32(values);
-}
-
-template <typename T>
-inline float32x4_t vcvtq_f32_q32(T values);
-
-template <>
-inline float32x4_t vcvtq_f32_q32(uint32x4_t values)
-{
- return vcvtq_f32_u32(values);
-}
-
-template <>
-inline float32x4_t vcvtq_f32_q32(int32x4_t values)
-{
- return vcvtq_f32_s32(values);
-}
-
-template <typename Tout>
-inline Tout vrequantize_pooling_with_scale(const float32x4x4_t &acc, const float quant_rescale, const float scale_pooling, const int32_t new_offset);
-
-template <>
-inline uint8x16_t vrequantize_pooling_with_scale(const float32x4x4_t &acc, const float quant_rescale, const float scale_pooling, const int32_t new_offset)
-{
- const float new_scale = quant_rescale / scale_pooling;
- return vquantize(acc, UniformQuantizationInfo(new_scale, new_offset));
-}
-
-template <>
-inline int8x16_t vrequantize_pooling_with_scale(const float32x4x4_t &acc, const float quant_rescale, const float scale_pooling, const int32_t new_offset)
-{
- const float new_scale = quant_rescale / scale_pooling;
- return vquantize_signed(acc, UniformQuantizationInfo(new_scale, new_offset));
-}
-
-template <typename Tin, typename Tout>
-inline Tout vrequantize_pooling(Tin vec1, Tin vec2, const UniformQuantizationInfo &requant_qinfo);
-
-template <>
-inline uint8x16_t vrequantize_pooling(uint8x8_t vec1, uint8x8_t vec2, const UniformQuantizationInfo &requant_qinfo)
-{
- const float32x4x4_t acc =
- {
- {
- vcvtq_f32_u32(vmovl_u16(vget_low_u16(vmovl_u8((vec1))))),
- vcvtq_f32_u32(vmovl_u16(vget_high_u16(vmovl_u8((vec1))))),
- vcvtq_f32_u32(vmovl_u16(vget_low_u16(vmovl_u8((vec2))))),
- vcvtq_f32_u32(vmovl_u16(vget_high_u16(vmovl_u8((vec2))))),
- }
- };
- return vquantize(acc, requant_qinfo);
-}
-
-template <>
-inline int8x16_t vrequantize_pooling(int8x8_t vec1, int8x8_t vec2, const UniformQuantizationInfo &requant_qinfo)
-{
- const float32x4x4_t acc =
- {
- {
- vcvtq_f32_s32(vmovl_s16(vget_low_s16(vmovl_s8((vec1))))),
- vcvtq_f32_s32(vmovl_s16(vget_high_s16(vmovl_s8((vec1))))),
- vcvtq_f32_s32(vmovl_s16(vget_low_s16(vmovl_s8((vec2))))),
- vcvtq_f32_s32(vmovl_s16(vget_high_s16(vmovl_s8((vec2))))),
- }
- };
- return vquantize_signed(acc, requant_qinfo);
-}
-
-template <typename T>
-inline T vrequantize_pooling(T &vec, const UniformQuantizationInfo &requant_qinfo);
-
-template <>
-inline uint8x8_t vrequantize_pooling(uint8x8_t &vec, const UniformQuantizationInfo &requant_qinfo)
-{
- const float32x4x2_t acc =
- {
- {
- vcvtq_f32_u32(vmovl_u16(vget_low_u16(vmovl_u8((vec))))),
- vcvtq_f32_u32(vmovl_u16(vget_high_u16(vmovl_u8((vec))))),
- }
- };
- return vquantize(acc, requant_qinfo);
-}
-
-template <>
-inline int8x8_t vrequantize_pooling(int8x8_t &vec, const UniformQuantizationInfo &requant_qinfo)
-{
- const float32x4x2_t acc =
- {
- {
- vcvtq_f32_s32(vmovl_s16(vget_low_s16(vmovl_s8((vec))))),
- vcvtq_f32_s32(vmovl_s16(vget_high_s16(vmovl_s8((vec))))),
- }
- };
- return vquantize_signed(acc, requant_qinfo);
-}
-
-inline float calculate_avg_scale(bool exclude_padding, DataLayout data_layout, const Coordinates &id, const int pool_size_x, const int pool_size_y, const int upper_bound_w, const int upper_bound_h,
- const int pad_x, const int pad_y, const int stride_x, const int stride_y)
-{
- const unsigned int idx_width = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH);
- const unsigned int idx_height = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT);
-
- int start_x = id[idx_width] * stride_x - pad_x;
- int start_y = id[idx_height] * stride_y - pad_y;
-
- const int end_x = std::min(start_x + pool_size_x, upper_bound_w);
- const int end_y = std::min(start_y + pool_size_y, upper_bound_h);
- if(exclude_padding)
- {
- start_x = std::max(0, start_x);
- start_y = std::max(0, start_y);
- }
- return 1.f / ((end_y - start_y) * (end_x - start_x));
-}
-
-template <typename T>
void poolingMxN_q8_neon_nhwc(const ITensor *src, ITensor *dst0, ITensor *dst1, PoolingLayerInfo &pool_info, const Window &window_src, const Window &window)
{
ARM_COMPUTE_UNUSED(dst1);
@@ -250,8 +109,8 @@ void poolingMxN_q8_neon_nhwc(const ITensor *src, ITensor *dst0, ITensor *dst1, P
q32x4_t vres4 = wrapper::vdup_n(static_cast<q32_t>(0.f), wrapper::traits::vector_128_tag{});
// Calculate scale
- const float scale = calculate_avg_scale(pool_info.exclude_padding, DataLayout::NHWC, id, pool_size_x, pool_size_y, upper_bound_w, upper_bound_h, pool_pad_left, pool_pad_top, pool_stride_x,
- pool_stride_y);
+ const float scale = calculate_avg_scale_pool2d(pool_info.exclude_padding, DataLayout::NHWC, id, pool_size_x, pool_size_y, upper_bound_w, upper_bound_h, pool_pad_left, pool_pad_top, pool_stride_x,
+ pool_stride_y);
// Perform pooling
for(int y = pool_start_y; y < pool_end_y; ++y)
@@ -352,8 +211,8 @@ void poolingMxN_q8_neon_nhwc(const ITensor *src, ITensor *dst0, ITensor *dst1, P
q32_t res = static_cast<q32_t>(0.f);
// Calculate scale
- const float scale = calculate_avg_scale(pool_info.exclude_padding, DataLayout::NHWC, id, pool_size_x, pool_size_y, upper_bound_w, upper_bound_h, pool_pad_left, pool_pad_top, pool_stride_x,
- pool_stride_y);
+ const float scale = calculate_avg_scale_pool2d(pool_info.exclude_padding, DataLayout::NHWC, id, pool_size_x, pool_size_y, upper_bound_w, upper_bound_h, pool_pad_left, pool_pad_top, pool_stride_x,
+ pool_stride_y);
// Perform pooling
for(int y = pool_start_y; y < pool_end_y; ++y)
@@ -531,8 +390,8 @@ void pooling2_quantized_neon_nchw(const ITensor *src, ITensor *dst0, ITensor *ds
Iterator out(dst0, window);
/** SIMD vector types */
- using q8x8_t = typename wrapper::traits::neon_vector<T, 8>::type;
- using q8x16_t = typename wrapper::traits::neon_vector<T, 16>::type;
+ using q8x8_t = typename wrapper::traits::neon_vector<T, 8>::type;
+ using q8x16_t = typename wrapper::traits::neon_vector<T, 16>::type;
using q16_t = typename wrapper::traits::promote_t<T>;
using q16x4_t = typename wrapper::traits::neon_vector<q16_t, 4>::type;
using q16x8_t = typename wrapper::traits::neon_vector<q16_t, 8>::type;
@@ -867,8 +726,8 @@ void poolingMxN_quantized_neon_nchw(const ITensor *src, ITensor *dst0, ITensor *
q32_t sres = 0;
// Calculate scale
- const float scale = calculate_avg_scale(pool_info.exclude_padding, DataLayout::NCHW, id, pool_size_x, pool_size_y, upper_bound_w, upper_bound_h, pool_pad_left, pool_pad_top, pool_stride_x,
- pool_stride_y);
+ const float scale = calculate_avg_scale_pool2d(pool_info.exclude_padding, DataLayout::NCHW, id, pool_size_x, pool_size_y, upper_bound_w, upper_bound_h, pool_pad_left, pool_pad_top, pool_stride_x,
+ pool_stride_y);
// Perform pooling
for(int y = 0; y < pool_size_y; ++y)
diff --git a/src/cpu/kernels/pool3d/list.h b/src/cpu/kernels/pool3d/list.h
index ece780eb0b..3426360f93 100644
--- a/src/cpu/kernels/pool3d/list.h
+++ b/src/cpu/kernels/pool3d/list.h
@@ -31,6 +31,8 @@ namespace cpu
#define DECLARE_POOLING_KERNEL(func_name) \
void func_name(const ITensor *src0, ITensor *dst0, Pooling3dLayerInfo &, const Window &window)
+DECLARE_POOLING_KERNEL(neon_q8_pool3d);
+DECLARE_POOLING_KERNEL(neon_q8_signed_pool3d);
DECLARE_POOLING_KERNEL(neon_fp16_pool3d);
DECLARE_POOLING_KERNEL(neon_fp32_pool3d);
diff --git a/src/cpu/kernels/pool3d/neon/impl.cpp b/src/cpu/kernels/pool3d/neon/impl.cpp
index bb3999b104..2b089f3079 100644
--- a/src/cpu/kernels/pool3d/neon/impl.cpp
+++ b/src/cpu/kernels/pool3d/neon/impl.cpp
@@ -22,11 +22,10 @@
* SOFTWARE.
*/
#include "arm_compute/core/Helpers.h"
-#include "arm_compute/core/ITensor.h"
-#include "arm_compute/core/Types.h"
-#include "arm_compute/core/utils/misc/Traits.h"
#include "src/core/NEON/wrapper/intrinsics/intrinsics.h"
+#include "src/core/helpers/PoolingHelpers.h"
#include "src/core/helpers/WindowHelpers.h"
+#include "src/cpu/kernels/pool3d/neon/quantized.h"
#include "src/cpu/kernels/pool3d/neon/impl.h"
@@ -36,27 +35,6 @@ namespace cpu
{
namespace
{
-inline float calculate_avg_scale(bool exclude_padding, const Coordinates &id, const int pool_size_x, const int pool_size_y, const int pool_size_z, const int upper_bound_w,
- const int upper_bound_h, const int upper_bound_d, const int pad_x, const int pad_y, const int pad_z, const int stride_x, const int stride_y, const int stride_z)
-{
- // Based on NDHWC
- int start_x = id[1] * stride_x - pad_x;
- int start_y = id[2] * stride_y - pad_y;
- int start_z = id[3] * stride_z - pad_z;
-
- const int end_x = std::min(start_x + pool_size_x, upper_bound_w);
- const int end_y = std::min(start_y + pool_size_y, upper_bound_h);
- const int end_z = std::min(start_z + pool_size_z, upper_bound_d);
- if(exclude_padding)
- {
- start_x = std::max(0, start_x);
- start_y = std::max(0, start_y);
- start_z = std::max(0, start_z);
- }
- return 1.f / ((end_y - start_y) * (end_x - start_x) * (end_z - start_z));
-}
-
-
template <typename T>
void max_poolingMxNxD_fp_neon_ndhwc(const ITensor *src, ITensor *dst0, Pooling3dLayerInfo &pool_info, const Window &window_out,
const int window_start_x, const int window_end_x, const int window_step_x)
@@ -227,9 +205,9 @@ void avg_poolingMxNxD_fp_neon_ndhwc(const ITensor *src, ITensor *dst0, Pooling3d
const uint8_t *in_ptr_n = in_ptr_start + id[4] * n_stride;
// Calculate scale
- const float scale = calculate_avg_scale(pool_info.exclude_padding, id, pool_size_x, pool_size_y, pool_size_z, upper_bound_w, upper_bound_h, upper_bound_d, pool_pad_left,
- pool_pad_top, pool_pad_front, pool_stride_x,
- pool_stride_y, pool_stride_z);
+ const float scale = calculate_avg_scale_pool3d(pool_info.exclude_padding, id, pool_size_x, pool_size_y, pool_size_z, upper_bound_w, upper_bound_h, upper_bound_d, pool_pad_left,
+ pool_pad_top, pool_pad_front, pool_stride_x,
+ pool_stride_y, pool_stride_z);
const vector_type scale_v = wrapper::vdup_n(static_cast<T>(scale), tag_type());
int x_off = window_start_x;
@@ -354,9 +332,9 @@ void l2_poolingMxNxD_fp_neon_ndhwc(const ITensor *src, ITensor *dst0, Pooling3dL
const uint8_t *in_ptr_n = in_ptr_start + id[4] * n_stride;
// Calculate scale
- const float scale = calculate_avg_scale(pool_info.exclude_padding, id, pool_size_x, pool_size_y, pool_size_z, upper_bound_w, upper_bound_h, upper_bound_d, pool_pad_left,
- pool_pad_top, pool_pad_front, pool_stride_x,
- pool_stride_y, pool_stride_z);
+ const float scale = calculate_avg_scale_pool3d(pool_info.exclude_padding, id, pool_size_x, pool_size_y, pool_size_z, upper_bound_w, upper_bound_h, upper_bound_d, pool_pad_left,
+ pool_pad_top, pool_pad_front, pool_stride_x,
+ pool_stride_y, pool_stride_z);
int x_off = window_start_x;
@@ -452,9 +430,33 @@ void poolingMxNxD_fp_neon_ndhwc(const ITensor *src, ITensor *dst0, Pooling3dLaye
}
}
+template <typename T>
+void poolingMxNxD_q8_neon_ndhwc(const ITensor *src, ITensor *dst0, Pooling3dLayerInfo &pool_info, const Window &window)
+{
+ constexpr int window_step_x = 16;
+ Window window_out = window;
+
+ // Needed to handle loop left-over
+ window_out.set(Window::DimX, Window::Dimension(0, 1, 1));
+
+ switch(pool_info.pool_type)
+ {
+ case PoolingType::MAX:
+ max_poolingMxNxD_q8_neon_ndhwc<T>(src, dst0, pool_info, window_out, window_step_x);
+ break;
+ case PoolingType::AVG:
+ avg_poolingMxNxD_q8_neon_ndhwc<T>(src, dst0, pool_info, window_out, window_step_x);
+ break;
+ default:
+ ARM_COMPUTE_ERROR("Pool operation not supported");
+ }
+}
+
template void poolingMxNxD_fp_neon_ndhwc<float>(const ITensor *src, ITensor *dst0, Pooling3dLayerInfo &pool_info, const Window &window);
#if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) && defined(ENABLE_FP16_KERNELS)
template void poolingMxNxD_fp_neon_ndhwc<float16_t>(const ITensor *src, ITensor *dst0, Pooling3dLayerInfo &pool_info, const Window &window);
#endif /* defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) && defined(ENABLE_FP16_KERNELS) */
+template void poolingMxNxD_q8_neon_ndhwc<uint8_t>(const ITensor *src, ITensor *dst0, Pooling3dLayerInfo &pool_info, const Window &window);
+template void poolingMxNxD_q8_neon_ndhwc<int8_t>(const ITensor *src, ITensor *dst0, Pooling3dLayerInfo &pool_info, const Window &window);
} // namespace cpu
} // namespace arm_compute
diff --git a/src/cpu/kernels/pool3d/neon/impl.h b/src/cpu/kernels/pool3d/neon/impl.h
index 829a9bd192..7ad8c8eb05 100644
--- a/src/cpu/kernels/pool3d/neon/impl.h
+++ b/src/cpu/kernels/pool3d/neon/impl.h
@@ -37,6 +37,8 @@ namespace cpu
template <typename T>
void poolingMxNxD_fp_neon_ndhwc(const ITensor *src, ITensor *dst0, Pooling3dLayerInfo &pool_info, const Window &window);
+template <typename T>
+void poolingMxNxD_q8_neon_ndhwc(const ITensor *src, ITensor *dst0, Pooling3dLayerInfo &pool_info, const Window &window);
} // namespace cpu
} // namespace arm_compute
#endif //define SRC_CORE_POOLING_3D_LAYER_IMPL_H
diff --git a/src/cpu/kernels/pool3d/neon/qasymm8.cpp b/src/cpu/kernels/pool3d/neon/qasymm8.cpp
new file mode 100644
index 0000000000..650a815e76
--- /dev/null
+++ b/src/cpu/kernels/pool3d/neon/qasymm8.cpp
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2022 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "src/cpu/kernels/pool3d/neon/impl.h"
+namespace arm_compute
+{
+namespace cpu
+{
+void neon_q8_pool3d(const ITensor *src, ITensor *dst0, Pooling3dLayerInfo &pool_info, const Window &window)
+{
+ return poolingMxNxD_q8_neon_ndhwc<uint8_t>(src, dst0, pool_info, window);
+}
+} // namespace cpu
+} // namespace arm_compute \ No newline at end of file
diff --git a/src/cpu/kernels/pool3d/neon/qasymm8_signed.cpp b/src/cpu/kernels/pool3d/neon/qasymm8_signed.cpp
new file mode 100644
index 0000000000..374b2435ea
--- /dev/null
+++ b/src/cpu/kernels/pool3d/neon/qasymm8_signed.cpp
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2022 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "src/cpu/kernels/pool3d/neon/impl.h"
+namespace arm_compute
+{
+namespace cpu
+{
+void neon_q8_signed_pool3d(const ITensor *src, ITensor *dst0, Pooling3dLayerInfo &pool_info, const Window &window)
+{
+ return poolingMxNxD_q8_neon_ndhwc<int8_t>(src, dst0, pool_info, window);
+}
+} // namespace cpu
+} // namespace arm_compute \ No newline at end of file
diff --git a/src/cpu/kernels/pool3d/neon/quantized.h b/src/cpu/kernels/pool3d/neon/quantized.h
new file mode 100644
index 0000000000..ac14f5eafa
--- /dev/null
+++ b/src/cpu/kernels/pool3d/neon/quantized.h
@@ -0,0 +1,390 @@
+/*
+ * Copyright (c) 2022 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef SRC_CORE_NEON_KERNELS_POOL3D_QUANTIZED_H
+#define SRC_CORE_NEON_KERNELS_POOL3D_QUANTIZED_H
+
+#include "arm_compute/core/ITensor.h"
+#include "arm_compute/core/Types.h"
+#include "src/core/NEON/wrapper/wrapper.h"
+#include "src/core/helpers/PoolingHelpers.h"
+#include "src/core/helpers/WindowHelpers.h"
+
+namespace arm_compute
+{
+namespace cpu
+{
+template <typename T>
+void avg_poolingMxNxD_q8_neon_ndhwc(const ITensor *src, ITensor *dst0, Pooling3dLayerInfo &pool_info, const Window &window_out,
+ const int window_step_x)
+
+{
+ using q8x8_t = typename wrapper::traits::neon_vector<T, 8>::type;
+ using q8x16_t = typename wrapper::traits::neon_vector<T, 16>::type;
+ using q16_t = typename wrapper::traits::promote_t<T>;
+ using q16x8_t = typename wrapper::traits::neon_vector<q16_t, 8>::type;
+ using q32_t = typename wrapper::traits::promote_t<q16_t>;
+ using q32x4_t = typename wrapper::traits::neon_vector<q32_t, 4>::type;
+
+ int pool_stride_x = static_cast<int>(pool_info.stride.width);
+ int pool_stride_y = static_cast<int>(pool_info.stride.height);
+ int pool_stride_z = static_cast<int>(pool_info.stride.depth);
+
+ const int pool_size_x = pool_info.is_global_pooling ? src->info()->tensor_shape().y() : pool_info.pool_size.width;
+ const int pool_size_y = pool_info.is_global_pooling ? src->info()->tensor_shape().z() : pool_info.pool_size.height;
+ const int pool_size_z = pool_info.is_global_pooling ? src->info()->tensor_shape()[3] : pool_info.pool_size.depth;
+
+ const int pool_pad_top = static_cast<int>(pool_info.padding.top);
+ const int pool_pad_bottom = static_cast<int>(pool_info.padding.bottom);
+ const int pool_pad_left = static_cast<int>(pool_info.padding.left);
+ const int pool_pad_right = static_cast<int>(pool_info.padding.right);
+ const int pool_pad_front = static_cast<int>(pool_info.padding.front);
+ const int pool_pad_back = static_cast<int>(pool_info.padding.back);
+
+ const int upper_bound_w = src->info()->dimension(1) + (pool_info.exclude_padding ? 0 : pool_pad_right);
+ const int upper_bound_h = src->info()->dimension(2) + (pool_info.exclude_padding ? 0 : pool_pad_bottom);
+ const int upper_bound_d = src->info()->dimension(3) + (pool_info.exclude_padding ? 0 : pool_pad_back);
+
+ const int input_dim_c = src->info()->dimension(0);
+ const int input_dim_w = src->info()->dimension(1);
+ const int input_dim_h = src->info()->dimension(2);
+ const int input_dim_d = src->info()->dimension(3);
+
+ const int y_stride = static_cast<int>(src->info()->strides_in_bytes().y());
+ const int z_stride = static_cast<int>(src->info()->strides_in_bytes().z());
+ const int w_stride = static_cast<int>(src->info()->strides_in_bytes()[3]);
+ const int n_stride = static_cast<int>(src->info()->strides_in_bytes()[4]);
+
+ const uint8_t *in_ptr_start = src->buffer() + src->info()->offset_first_element_in_bytes();
+
+ const int window_end_x = input_dim_c;
+ const int window_start_x = 0;
+
+ Iterator out(dst0, window_out);
+
+ const float32x4_t half_scale_v = vdupq_n_f32(0.5f);
+ const UniformQuantizationInfo src_qinfo = src->info()->quantization_info().uniform();
+ const UniformQuantizationInfo dst_qinfo = dst0->info()->quantization_info().uniform();
+
+ const float quant_rescale = dst_qinfo.scale / src_qinfo.scale;
+ // "new_offset" doesn't have to consider the "half_scale_v" in its computation
+ // With a requantization performed in a single step there won't be uncertainties introduced
+ const int32_t new_offset = dst_qinfo.offset - static_cast<int32_t>(static_cast<float>(src_qinfo.offset) / quant_rescale);
+
+ execute_window_loop(window_out, [&](const Coordinates & id)
+ {
+ // Computing the theoretical input starting/ending points
+ const int in_idx_width = static_cast<int>(id.y()) * pool_stride_x - pool_pad_left;
+ const int in_idx_height = static_cast<int>(id.z()) * pool_stride_y - pool_pad_top;
+ const int in_idx_depth = static_cast<int>(id[3]) * pool_stride_z - pool_pad_front;
+
+ const int pool_start_x = std::max(0, -in_idx_width);
+ const int pool_end_x_t = std::min(input_dim_w + pool_pad_left - in_idx_width, pool_size_x);
+ const int pool_start_y = std::max(0, -in_idx_height);
+ const int pool_end_y_t = std::min(input_dim_h + pool_pad_top - in_idx_height, pool_size_y);
+
+ const int pool_start_z = std::max(0, -in_idx_depth);
+ const int pool_end_z_t = std::min(input_dim_d + pool_pad_front - in_idx_depth, pool_size_z);
+
+ // The end of width to consider in calculation should exclude PAD_X, PAD_Y and PAD_Z
+ const int pool_end_x = std::min(pool_end_x_t, input_dim_w - in_idx_width);
+ const int pool_end_y = std::min(pool_end_y_t, input_dim_h - in_idx_height);
+ const int pool_end_z = std::min(pool_end_z_t, input_dim_d - in_idx_depth);
+
+ // Calculate scale
+ const float scale = calculate_avg_scale_pool3d(pool_info.exclude_padding, id, pool_size_x, pool_size_y, pool_size_z, upper_bound_w, upper_bound_h, upper_bound_d, pool_pad_left,
+ pool_pad_top, pool_pad_front, pool_stride_x, pool_stride_y, pool_stride_z);
+
+ const uint8_t *in_ptr_n = in_ptr_start + id[4] * n_stride;
+
+ int x_off = window_start_x;
+
+ for(; x_off <= (window_end_x - window_step_x); x_off += window_step_x) // C
+ {
+ q32x4_t vres1 = wrapper::vdup_n(static_cast<q32_t>(0.f), wrapper::traits::vector_128_tag{});
+ q32x4_t vres2 = wrapper::vdup_n(static_cast<q32_t>(0.f), wrapper::traits::vector_128_tag{});
+ q32x4_t vres3 = wrapper::vdup_n(static_cast<q32_t>(0.f), wrapper::traits::vector_128_tag{});
+ q32x4_t vres4 = wrapper::vdup_n(static_cast<q32_t>(0.f), wrapper::traits::vector_128_tag{});
+
+ // Perform pooling
+ for(int z = pool_start_z; z < pool_end_z; ++z)
+ {
+ const uint8_t *in_ptr_z = in_ptr_n + (z + in_idx_depth) * w_stride;
+ for(int y = pool_start_y; y < pool_end_y; ++y)
+ {
+ const uint8_t *in_ptr_y = in_ptr_z + (y + in_idx_height) * z_stride;
+ for(int x = pool_start_x; x < pool_end_x; ++x)
+ {
+ const uint8_t *in_ptr_x = in_ptr_y + (x + in_idx_width) * y_stride;
+ const q8x16_t data = wrapper::vloadq(reinterpret_cast<const T *>(in_ptr_x) + x_off);
+
+ const q16x8_t data_q16 = wrapper::vmovl(wrapper::vgetlow(data));
+ const q16x8_t data2_q16 = wrapper::vmovl(wrapper::vgethigh(data));
+ vres1 = wrapper::vadd(vres1, wrapper::vmovl(wrapper::vgetlow(data_q16)));
+ vres2 = wrapper::vadd(vres2, wrapper::vmovl(wrapper::vgethigh(data_q16)));
+ vres3 = wrapper::vadd(vres3, wrapper::vmovl(wrapper::vgetlow(data2_q16)));
+ vres4 = wrapper::vadd(vres4, wrapper::vmovl(wrapper::vgethigh(data2_q16)));
+ }
+ }
+ }
+
+ if(src_qinfo != dst_qinfo)
+ {
+ const float32x4x4_t vres =
+ {
+ {
+ vcvtq_f32_q32(vres1),
+ vcvtq_f32_q32(vres2),
+ vcvtq_f32_q32(vres3),
+ vcvtq_f32_q32(vres4),
+ }
+ };
+ const auto requantized_dst = vrequantize_pooling_with_scale<q8x16_t>(vres, quant_rescale, scale, new_offset);
+ // Store result
+ wrapper::vstore(reinterpret_cast<T *>(out.ptr()) + x_off, wrapper::vgetlow(requantized_dst));
+ wrapper::vstore(reinterpret_cast<T *>(out.ptr()) + x_off + 8, wrapper::vgethigh(requantized_dst));
+ }
+ else
+ {
+ const float32x4_t scale_v = vdupq_n_f32(scale);
+ // Divide by scale and add 0.5f to round to nearest instead of rounding towards zero
+ vres1 = vcvtq_q32_f32<q32x4_t>(wrapper::vmla(half_scale_v, vcvtq_f32_q32(vres1), scale_v));
+ vres2 = vcvtq_q32_f32<q32x4_t>(wrapper::vmla(half_scale_v, vcvtq_f32_q32(vres2), scale_v));
+ vres3 = vcvtq_q32_f32<q32x4_t>(wrapper::vmla(half_scale_v, vcvtq_f32_q32(vres3), scale_v));
+ vres4 = vcvtq_q32_f32<q32x4_t>(wrapper::vmla(half_scale_v, vcvtq_f32_q32(vres4), scale_v));
+
+ const q8x8_t res1 = wrapper::vmovn(wrapper::vcombine(wrapper::vmovn(vres1), wrapper::vmovn(vres2)));
+ const q8x8_t res2 = wrapper::vmovn(wrapper::vcombine(wrapper::vmovn(vres3), wrapper::vmovn(vres4)));
+ // Store result
+ wrapper::vstore(reinterpret_cast<T *>(out.ptr()) + x_off, res1);
+ wrapper::vstore(reinterpret_cast<T *>(out.ptr()) + x_off + 8, res2);
+ }
+ }
+
+ // Left-overs loop
+ for(; x_off < window_end_x; ++x_off)
+ {
+ q32_t res = static_cast<q32_t>(0.f);
+
+ // Perform pooling
+ for(int z = pool_start_z; z < pool_end_z; ++z)
+ {
+ const uint8_t *in_ptr_z = in_ptr_n + (z + in_idx_depth) * w_stride;
+ for(int y = pool_start_y; y < pool_end_y; ++y)
+ {
+ const uint8_t *in_ptr_y = in_ptr_z + (y + in_idx_height) * z_stride;
+ for(int x = pool_start_x; x < pool_end_x; ++x)
+ {
+ const uint8_t *in_ptr_x = in_ptr_y + (x + in_idx_width) * y_stride;
+ const T data = *(reinterpret_cast<const T *>(in_ptr_x) + x_off);
+ res += data;
+ }
+ }
+ }
+
+ if(src_qinfo != dst_qinfo)
+ {
+ const float res_f = static_cast<float>(res);
+ const float new_scale = quant_rescale / scale;
+ const auto requantized_dst = quantize<T>(res_f, UniformQuantizationInfo(new_scale, new_offset));
+
+ // Store result
+ *(reinterpret_cast<T *>(out.ptr()) + x_off) = requantized_dst;
+ }
+ else
+ {
+ // Divide by scale and add 0.5f to round to nearest instead of rounding towards zero
+ res = static_cast<T>(0.5f + static_cast<float>(res) * scale);
+
+ // Store result
+ *(reinterpret_cast<T *>(out.ptr()) + x_off) = res;
+ }
+ }
+ },
+ out);
+}
+
+template <typename T>
+void max_poolingMxNxD_q8_neon_ndhwc(const ITensor *src, ITensor *dst0, Pooling3dLayerInfo &pool_info, const Window &window_out,
+ const int window_step_x)
+
+{
+ using q8x8_t = typename wrapper::traits::neon_vector<T, 8>::type;
+ using q8x16_t = typename wrapper::traits::neon_vector<T, 16>::type;
+
+ const int window_half_step_x = window_step_x / 2;
+
+ int pool_stride_x = static_cast<int>(pool_info.stride.width);
+ int pool_stride_y = static_cast<int>(pool_info.stride.height);
+ int pool_stride_z = static_cast<int>(pool_info.stride.depth);
+
+ const int pool_size_x = pool_info.is_global_pooling ? src->info()->tensor_shape().y() : pool_info.pool_size.width;
+ const int pool_size_y = pool_info.is_global_pooling ? src->info()->tensor_shape().z() : pool_info.pool_size.height;
+ const int pool_size_z = pool_info.is_global_pooling ? src->info()->tensor_shape()[3] : pool_info.pool_size.depth;
+
+ const int pool_pad_top = static_cast<int>(pool_info.padding.top);
+ const int pool_pad_left = static_cast<int>(pool_info.padding.left);
+ const int pool_pad_front = static_cast<int>(pool_info.padding.front);
+
+ const int input_dim_c = src->info()->dimension(0);
+ const int input_dim_w = src->info()->dimension(1);
+ const int input_dim_h = src->info()->dimension(2);
+ const int input_dim_d = src->info()->dimension(3);
+
+ const int y_stride = static_cast<int>(src->info()->strides_in_bytes().y());
+ const int z_stride = static_cast<int>(src->info()->strides_in_bytes().z());
+ const int w_stride = static_cast<int>(src->info()->strides_in_bytes()[3]);
+ const int n_stride = static_cast<int>(src->info()->strides_in_bytes()[4]);
+
+ const uint8_t *in_ptr_start = src->buffer() + src->info()->offset_first_element_in_bytes();
+
+ const int window_end_x = input_dim_c;
+ const int window_start_x = 0;
+
+ Iterator out(dst0, window_out);
+
+ const UniformQuantizationInfo src_qinfo = src->info()->quantization_info().uniform();
+ const UniformQuantizationInfo dst_qinfo = dst0->info()->quantization_info().uniform();
+
+ const float requant_scale = dst_qinfo.scale / src_qinfo.scale;
+ const int32_t requant_offset = dst_qinfo.offset - static_cast<int32_t>(static_cast<float>(src_qinfo.offset) / requant_scale);
+ const UniformQuantizationInfo requant_qinfo = UniformQuantizationInfo(requant_scale, requant_offset);
+
+ execute_window_loop(window_out, [&](const Coordinates & id)
+ {
+ // Computing the theoretical input starting/ending points
+ const int in_idx_width = static_cast<int>(id.y()) * pool_stride_x - pool_pad_left;
+ const int in_idx_height = static_cast<int>(id.z()) * pool_stride_y - pool_pad_top;
+ const int in_idx_depth = static_cast<int>(id[3]) * pool_stride_z - pool_pad_front;
+
+ const int pool_start_x = std::max(0, -in_idx_width);
+ const int pool_end_x_t = std::min(input_dim_w + pool_pad_left - in_idx_width, pool_size_x);
+ const int pool_start_y = std::max(0, -in_idx_height);
+ const int pool_end_y_t = std::min(input_dim_h + pool_pad_top - in_idx_height, pool_size_y);
+
+ const int pool_start_z = std::max(0, -in_idx_depth);
+ const int pool_end_z_t = std::min(input_dim_d + pool_pad_front - in_idx_depth, pool_size_z);
+
+ // The end of width to consider in calculation should exclude PAD_X, PAD_Y and PAD_Z
+ const int pool_end_x = std::min(pool_end_x_t, input_dim_w - in_idx_width);
+ const int pool_end_y = std::min(pool_end_y_t, input_dim_h - in_idx_height);
+ const int pool_end_z = std::min(pool_end_z_t, input_dim_d - in_idx_depth);
+
+ const uint8_t *in_ptr_n = in_ptr_start + id[4] * n_stride;
+
+ int x_off = window_start_x;
+
+ for(; x_off <= (window_end_x - window_step_x); x_off += window_step_x) // C
+ {
+ q8x16_t vres = wrapper::vdup_n(std::numeric_limits<T>::min(), wrapper::traits::vector_128_tag{});
+
+ // Perform pooling
+ for(int z = pool_start_z; z < pool_end_z; ++z)
+ {
+ const uint8_t *in_ptr_z = in_ptr_n + (z + in_idx_depth) * w_stride;
+ for(int y = pool_start_y; y < pool_end_y; ++y)
+ {
+ const uint8_t *in_ptr_y = in_ptr_z + (y + in_idx_height) * z_stride;
+ for(int x = pool_start_x; x < pool_end_x; ++x)
+ {
+ const uint8_t *in_ptr_x = in_ptr_y + (x + in_idx_width) * y_stride;
+ const q8x16_t data = wrapper::vloadq(reinterpret_cast<const T *>(in_ptr_x) + x_off);
+
+ vres = wrapper::vmax(vres, data);
+ }
+ }
+ }
+
+ // Store result
+ wrapper::vstore(reinterpret_cast<T *>(out.ptr()) + x_off, (src_qinfo != dst_qinfo) ? vrequantize_pooling<q8x8_t, q8x16_t>(wrapper::vgetlow(vres), wrapper::vgethigh(vres),
+ requant_qinfo) :
+ vres);
+ }
+
+ // Leftovers using half the window step
+ for(; x_off <= (window_end_x - window_half_step_x); x_off += window_half_step_x)
+ {
+ q8x8_t vres = wrapper::vdup_n(std::numeric_limits<T>::min(), wrapper::traits::vector_64_tag{});
+
+ // Perform pooling
+ for(int z = pool_start_z; z < pool_end_z; ++z)
+ {
+ const uint8_t *in_ptr_z = in_ptr_n + (z + in_idx_depth) * w_stride;
+ for(int y = pool_start_y; y < pool_end_y; ++y)
+ {
+ const uint8_t *in_ptr_y = in_ptr_z + (y + in_idx_height) * z_stride;
+ for(int x = pool_start_x; x < pool_end_x; ++x)
+ {
+ const uint8_t *in_ptr_x = in_ptr_y + (x + in_idx_width) * y_stride;
+ const q8x8_t data = wrapper::vload(reinterpret_cast<const T *>(in_ptr_x) + x_off);
+
+ vres = wrapper::vmax(vres, data);
+ }
+ }
+ }
+
+ // Store result
+ wrapper::vstore(reinterpret_cast<T *>(out.ptr()) + x_off,
+ (src_qinfo != dst_qinfo) ? vrequantize_pooling<q8x8_t>(vres, requant_qinfo) : vres);
+ }
+
+ // Left-overs loop
+ for(; x_off < window_end_x; ++x_off)
+ {
+ T res = std::numeric_limits<T>::min();
+
+ for(int z = pool_start_z; z < pool_end_z; ++z)
+ {
+ const uint8_t *in_ptr_z = in_ptr_n + (z + in_idx_depth) * w_stride;
+ for(int y = pool_start_y; y < pool_end_y; ++y)
+ {
+ const uint8_t *in_ptr_y = in_ptr_z + (y + in_idx_height) * z_stride;
+ for(int x = pool_start_x; x < pool_end_x; ++x)
+ {
+ const uint8_t *in_ptr_x = in_ptr_y + (x + in_idx_width) * y_stride;
+ const T data = *(reinterpret_cast<const T *>(in_ptr_x) + x_off);
+
+ res = std::max(res, data);
+ }
+ }
+ }
+
+ // Store result
+ if(src_qinfo != dst_qinfo)
+ {
+ const float res_f = static_cast<float>(res);
+ *(reinterpret_cast<T *>(out.ptr()) + x_off) = quantize<T>(res_f, requant_qinfo);
+ }
+ else
+ {
+ *(reinterpret_cast<T *>(out.ptr()) + x_off) = res;
+ }
+ }
+ },
+ out);
+}
+
+} // namespace cpu
+} // namespace arm_compute
+
+#endif // SRC_CORE_NEON_KERNELS_POOL3D_QUANTIZED_H \ No newline at end of file
diff --git a/src/cpu/operators/CpuPool3d.h b/src/cpu/operators/CpuPool3d.h
index fc73cf0e0e..8a73f8a0af 100644
--- a/src/cpu/operators/CpuPool3d.h
+++ b/src/cpu/operators/CpuPool3d.h
@@ -47,7 +47,7 @@ public:
/** Set the src and dst tensors.
*
*
- * @param[in] src Source tensor info. Data types supported: F16/F32.
+ * @param[in] src Source tensor info. Data types supported: F16/F32/QASYMM8/QASYMM8_SIGNED.
* @param[out] dst Destination tensor info. Data types supported: same as @p src.
* @param[in] pool_info Contains pooling operation information described in @ref Pooling3dLayerInfo.
*/
diff --git a/tests/validation/NEON/Pooling3dLayer.cpp b/tests/validation/NEON/Pooling3dLayer.cpp
index ae5ca466b3..07054462f5 100644
--- a/tests/validation/NEON/Pooling3dLayer.cpp
+++ b/tests/validation/NEON/Pooling3dLayer.cpp
@@ -55,12 +55,43 @@ const auto Pooling3dLayerDatasetFPSmall = combine(combine(combine(combine(datase
framework::dataset::make("Padding", { Padding3D(0, 0, 0), Padding3D(1, 1, 1), Padding3D(1, 0, 0) })),
framework::dataset::make("ExcludePadding", { true, false }));
+const auto Pooling3dLayerDatasetQASYMM8Small = combine(combine(combine(combine(framework::dataset::make("PoolingType", { PoolingType::MAX, PoolingType::AVG }),
+ framework::dataset::make("PoolingSize", { Size3D(3, 3, 3) })),
+ framework::dataset::make("Stride", { Size3D(1, 1, 1), Size3D(2, 1, 1), Size3D(1, 2, 1), Size3D(2, 2, 1) })),
+ framework::dataset::make("Padding", { Padding3D(0, 0, 0), Padding3D(1, 1, 1), Padding3D(1, 0, 0) })),
+ framework::dataset::make("ExcludePadding", { true }));
+
+const auto Pooling3dLayerDatasetQASYMM8Large = combine(combine(combine(combine(framework::dataset::make("PoolingType", { PoolingType::MAX, PoolingType::AVG }),
+ framework::dataset::make("PoolingSize", { Size3D(3, 3, 3) })),
+ framework::dataset::make("Stride", { Size3D(1, 1, 1), Size3D(2, 2, 1) })),
+ framework::dataset::make("Padding", { Padding3D(0, 0, 0), Padding3D(1, 1, 0) })),
+ framework::dataset::make("ExcludePadding", { true }));
+
using ShapeDataset = framework::dataset::ContainerDataset<std::vector<TensorShape>>;
constexpr AbsoluteTolerance<float> tolerance_f32(0.001f); /**< Tolerance value for comparing reference's output against implementation's output for 32-bit floating-point type */
#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-constexpr AbsoluteTolerance<float> tolerance_f16(0.01f); /**< Tolerance value for comparing reference's output against implementation's output for 16-bit floating-point type */
-#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
+constexpr AbsoluteTolerance<float> tolerance_f16(0.01f); /**< Tolerance value for comparing reference's output against implementation's output for 16-bit floating-point type */
+#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
+constexpr AbsoluteTolerance<uint8_t> tolerance_qasymm8(1); /**< Tolerance value for comparing reference's output against implementation's output for unsigned 8-bit asymmetric type */
+constexpr AbsoluteTolerance<int8_t> tolerance_qasymm8_s(1); /**< Tolerance value for comparing reference's output against implementation's output for signed 8-bit asymmetric type */
+
+const auto qasymm8_in_qinfo_dataset = framework::dataset::make("InputQuantInfo", { QuantizationInfo(.2f, 10) });
+const auto qasymm8_out_qinfo_dataset = framework::dataset::make("OutputQuantInfo",
+{
+ QuantizationInfo(.2f, 10), // Same qinfo
+ QuantizationInfo(.1f, 5), // Multiplier <= 1
+ QuantizationInfo(2.f, 3) // Multiplier > 1
+});
+
+const auto qasymm8_signed_in_qinfo_dataset = framework::dataset::make("InputQuantInfo", { QuantizationInfo(.2f, -10) });
+const auto qasymm8_signed_out_qinfo_dataset = framework::dataset::make("OutputQuantInfo",
+{
+ QuantizationInfo(.2f, -10), // Same qinfo
+ QuantizationInfo(.1f, -5), // Multiplier <= 1
+ QuantizationInfo(2.f, -3) // Multiplier > 1
+});
+
} //namespace
TEST_SUITE(NEON)
@@ -280,8 +311,49 @@ FIXTURE_DATA_TEST_CASE(RunLarge, NEPoolingLayer3dFixture<half>, framework::Datas
TEST_SUITE_END() // GlobalPooling
TEST_SUITE_END() // FP16
#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
-
TEST_SUITE_END() // Float
+TEST_SUITE(Quantized)
+
+template <typename T>
+using NEPooling3dLayerQuantizedFixture = Pooling3dLayerValidationQuantizedFixture<Tensor, Accessor, NEPooling3dLayer, T>;
+
+TEST_SUITE(QASYMM8)
+FIXTURE_DATA_TEST_CASE(RunSmall, NEPooling3dLayerQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::Small5dShapes(),
+ combine(Pooling3dLayerDatasetQASYMM8Small,
+ framework::dataset::make("DataType", DataType::QASYMM8))),
+ qasymm8_in_qinfo_dataset),
+ qasymm8_out_qinfo_dataset))
+{
+ // Validate output
+ validate(Accessor(_target), _reference, tolerance_qasymm8);
+}
+
+FIXTURE_DATA_TEST_CASE(RunLarge, NEPooling3dLayerQuantizedFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::Large5dShapes(),
+ combine(Pooling3dLayerDatasetQASYMM8Large,
+ framework::dataset::make("DataType", DataType::QASYMM8))),
+ qasymm8_in_qinfo_dataset),
+ qasymm8_out_qinfo_dataset))
+{
+ // Validate output
+ validate(Accessor(_target), _reference, tolerance_qasymm8);
+}
+
+TEST_SUITE_END() // QASYMM8
+
+TEST_SUITE(QASYMM8_SIGNED)
+
+FIXTURE_DATA_TEST_CASE(RunSmall, NEPooling3dLayerQuantizedFixture<int8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::Small5dShapes(),
+ combine(Pooling3dLayerDatasetQASYMM8Small,
+ framework::dataset::make("DataType", DataType::QASYMM8_SIGNED))),
+ qasymm8_signed_in_qinfo_dataset),
+ qasymm8_signed_out_qinfo_dataset))
+{
+ // Validate output
+ validate(Accessor(_target), _reference, tolerance_qasymm8_s);
+}
+
+TEST_SUITE_END() // QASYMM8_SIGNED
+TEST_SUITE_END() // Quantized
TEST_SUITE_END() // Pooling3dLayer
TEST_SUITE_END() // NEON
} // namespace validation
diff --git a/tests/validation/fixtures/Pooling3dLayerFixture.h b/tests/validation/fixtures/Pooling3dLayerFixture.h
index c1b3519e80..563f1dcced 100644
--- a/tests/validation/fixtures/Pooling3dLayerFixture.h
+++ b/tests/validation/fixtures/Pooling3dLayerFixture.h
@@ -46,10 +46,10 @@ class Pooling3dLayerValidationGenericFixture : public framework::Fixture
{
public:
template <typename...>
- void setup(TensorShape shape, Pooling3dLayerInfo pool_info, DataType data_type)
+ void setup(TensorShape shape, Pooling3dLayerInfo pool_info, DataType data_type, QuantizationInfo input_qinfo = QuantizationInfo(), QuantizationInfo output_qinfo = QuantizationInfo())
{
- _target = compute_target(shape, pool_info, data_type);
- _reference = compute_reference(shape, pool_info, data_type);
+ _target = compute_target(shape, pool_info, data_type, input_qinfo, output_qinfo);
+ _reference = compute_reference(shape, pool_info, data_type, input_qinfo, output_qinfo);
}
protected:
@@ -68,17 +68,17 @@ protected:
}
else // data type is quantized_asymmetric
{
- ARM_COMPUTE_ERROR("Passed Type Not Supported");
+ library->fill_tensor_uniform(tensor, 0);
}
}
TensorType compute_target(TensorShape shape, Pooling3dLayerInfo info,
- DataType data_type)
+ DataType data_type, QuantizationInfo input_qinfo, QuantizationInfo output_qinfo)
{
// Create tensors
- TensorType src = create_tensor<TensorType>(shape, data_type, 1, QuantizationInfo(), DataLayout::NDHWC);
+ TensorType src = create_tensor<TensorType>(shape, data_type, 1, input_qinfo, DataLayout::NDHWC);
const TensorShape dst_shape = misc::shape_calculator::compute_pool3d_shape((src.info()->tensor_shape()), info);
- TensorType dst = create_tensor<TensorType>(dst_shape, data_type, 1, QuantizationInfo(), DataLayout::NDHWC);
+ TensorType dst = create_tensor<TensorType>(dst_shape, data_type, 1, output_qinfo, DataLayout::NDHWC);
// Create and configure function
FunctionType pool_layer;
@@ -103,17 +103,17 @@ protected:
return dst;
}
- SimpleTensor<T> compute_reference(TensorShape shape, Pooling3dLayerInfo info, DataType data_type)
+ SimpleTensor<T> compute_reference(TensorShape shape, Pooling3dLayerInfo info, DataType data_type, QuantizationInfo input_qinfo, QuantizationInfo output_qinfo)
{
// Create reference
- SimpleTensor<T> src(shape, data_type, 1, QuantizationInfo(), DataLayout::NDHWC);
+ SimpleTensor<T> src(shape, data_type, 1, input_qinfo, DataLayout::NDHWC);
// Fill reference
fill(src);
- return reference::pooling_3d_layer<T>(src, info);
+ return reference::pooling_3d_layer<T>(src, info, output_qinfo);
}
- TensorType _target{};
- SimpleTensor<T> _reference{};
+ TensorType _target{};
+ SimpleTensor<T> _reference{};
};
template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
@@ -129,6 +129,19 @@ public:
};
template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
+class Pooling3dLayerValidationQuantizedFixture : public Pooling3dLayerValidationGenericFixture<TensorType, AccessorType, FunctionType, T>
+{
+public:
+ template <typename...>
+ void setup(TensorShape shape, PoolingType pool_type, Size3D pool_size, Size3D stride, Padding3D padding, bool exclude_padding, DataType data_type,
+ QuantizationInfo input_qinfo = QuantizationInfo(), QuantizationInfo output_qinfo = QuantizationInfo())
+ {
+ Pooling3dLayerValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(shape, Pooling3dLayerInfo(pool_type, pool_size, stride, padding, exclude_padding),
+ data_type, input_qinfo, output_qinfo);
+ }
+};
+
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
class Pooling3dLayerGlobalValidationFixture : public Pooling3dLayerValidationGenericFixture<TensorType, AccessorType, FunctionType, T>
{
public: