aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMichele Di Giorgio <michele.digiorgio@arm.com>2019-08-15 15:00:37 +0100
committerGian Marco Iodice <gianmarco.iodice@arm.com>2019-08-20 17:02:40 +0100
commit7c2d92c477e893b797d8db0a1bb1beffe7c26a63 (patch)
tree61a3c42893c8366f60359792543f29bbe15a8b65
parentd460c29db6fb51cafba7b5591cacc2ce4a7f4592 (diff)
downloadComputeLibrary-7c2d92c477e893b797d8db0a1bb1beffe7c26a63.tar.gz
COMPMID-2590: Avg_Pooling2d Fails on CL NHWC FP16
For large tensors with large pooling sizes, accumulation on FP16 gets saturated. This patch provides accumulation on F32 to overcome this issue. Change-Id: I5b2985c801970c8f2cb0053442296cc69f854186 Signed-off-by: Michele Di Giorgio <michele.digiorgio@arm.com> Reviewed-on: https://review.mlplatform.org/c/1762 Reviewed-by: Giorgio Arena <giorgio.arena@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Gian Marco Iodice <gianmarco.iodice@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
-rw-r--r--src/core/CL/cl_kernels/pooling_layer.cl19
1 files changed, 9 insertions, 10 deletions
diff --git a/src/core/CL/cl_kernels/pooling_layer.cl b/src/core/CL/cl_kernels/pooling_layer.cl
index 7d15d100e9..6b2da0b87f 100644
--- a/src/core/CL/cl_kernels/pooling_layer.cl
+++ b/src/core/CL/cl_kernels/pooling_layer.cl
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2018 ARM Limited.
+ * Copyright (c) 2017-2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -38,7 +38,7 @@
#define DIV_OP(x, y) (x * (1.f / y))
#define SQRT_OP(x) sqrt((x))
-#define DIV_OP_NHWC(x, y) (x * (VEC_DATA_TYPE(DATA_TYPE, 8))(1.f / y))
+#define DIV_OP_NHWC(x, y) (x * (VEC_DATA_TYPE(float, 8))(1.f / y))
#if STRIDE_X == 1
#define POOLING3x3(res, input, output) POOLING3x3_STRIDE1(res, input, output)
@@ -485,14 +485,14 @@ __kernel void pooling_layer_MxN_nchw(
}
#endif // defined(POOL_SIZE_X) && defined(POOL_SIZE_Y)
-DATA_TYPE calculate_avg_scale_nhwc(const int pool_size_x, const int pool_size_y, int upper_bound_w, int upper_bound_h,
- const int pad_x, const int pad_y, const int stride_x, const int stride_y)
+float calculate_avg_scale_nhwc(const int pool_size_x, const int pool_size_y, int upper_bound_w, int upper_bound_h,
+ const int pad_x, const int pad_y, const int stride_x, const int stride_y)
{
int start_x = get_global_id(1) * stride_x - pad_x;
#if defined(DST_DEPTH)
int start_y = (get_global_id(2) % DST_DEPTH) * stride_y - pad_y;
#else /* defined(DST_DEPTH) */
- int start_y = get_global_id(2) * stride_y - pad_y;
+ int start_y = get_global_id(2) * stride_y - pad_y;
#endif /* defined(DST_DEPTH) */
#if !defined(EXCLUDE_PADDING)
@@ -553,9 +553,8 @@ __kernel void pooling_layer_MxN_nhwc(
Tensor3D output = CONVERT_TO_TENSOR3D_STRUCT(output);
#endif /* defined(DST_DEPTH) */
- VEC_DATA_TYPE(DATA_TYPE, 8)
- vdata = INITIAL_VALUE;
- DATA_TYPE sdata = INITIAL_VALUE;
+ VEC_DATA_TYPE(float, 8)
+ vdata = INITIAL_VALUE;
const int idx_width = get_global_id(1) * STRIDE_X;
#if defined(DST_DEPTH)
@@ -584,7 +583,7 @@ __kernel void pooling_layer_MxN_nhwc(
// Raise to power of 2 for L2 Pooling
data0 *= data0;
#endif /* defined(POOL_L2) */
- vdata = POOL_OP(vdata, data0);
+ vdata = POOL_OP(vdata, CONVERT(data0, float8));
}
}
@@ -599,5 +598,5 @@ __kernel void pooling_layer_MxN_nhwc(
#endif /* defined(POOL_L2) */
// Store result
- vstore8(vdata, 0, (__global DATA_TYPE *)output.ptr);
+ vstore8(CONVERT(vdata, VEC_DATA_TYPE(DATA_TYPE, 8)), 0, (__global DATA_TYPE *)output.ptr);
}