aboutsummaryrefslogtreecommitdiff
path: root/src/core/CL/cl_kernels/direct_convolution1x1.cl
diff options
context:
space:
mode:
authorMichalis Spyrou <michalis.spyrou@arm.com>2017-08-14 11:26:37 +0100
committerAnthony Barbier <anthony.barbier@arm.com>2018-11-02 16:35:24 +0000
commitdef665a1a2e92baa1cfb192b65425b91ff6046b3 (patch)
tree03271d3e78190d81709618f40191d105b7c94917 /src/core/CL/cl_kernels/direct_convolution1x1.cl
parentfc2817dc0436ef2d5064df0a061aafd3d324d894 (diff)
downloadComputeLibrary-def665a1a2e92baa1cfb192b65425b91ff6046b3.tar.gz
COMPMID-474 - Add support for QS8/QS16 DirectConvolution CL
Change-Id: I537e4acbc02c8d880ff8630ea62223e0f1a1dda3 Reviewed-on: http://mpd-gerrit.cambridge.arm.com/82875 Tested-by: Kaizen <jeremy.johnson+kaizengerrit@arm.com> Reviewed-by: Pablo Tello <pablo.tello@arm.com>
Diffstat (limited to 'src/core/CL/cl_kernels/direct_convolution1x1.cl')
-rw-r--r--src/core/CL/cl_kernels/direct_convolution1x1.cl25
1 files changed, 21 insertions, 4 deletions
diff --git a/src/core/CL/cl_kernels/direct_convolution1x1.cl b/src/core/CL/cl_kernels/direct_convolution1x1.cl
index ec0551b018..66c618e033 100644
--- a/src/core/CL/cl_kernels/direct_convolution1x1.cl
+++ b/src/core/CL/cl_kernels/direct_convolution1x1.cl
@@ -23,6 +23,23 @@
*/
#include "helpers.h"
+#if defined(FIXED_POINT_POSITION)
+#include "fixed_point.h"
+
+#define ADD_OP(a, b) ADD_SAT_OP_EXPAND((a), (b), DATA_TYPE_PROMOTED, 8)
+#define MUL_OP(a, b) MUL_SAT_OP_EXPAND(CONVERT((a), VEC_DATA_TYPE(DATA_TYPE_PROMOTED, 8)), CONVERT((b), VEC_DATA_TYPE(DATA_TYPE_PROMOTED, 8)), DATA_TYPE_PROMOTED, 8, FIXED_POINT_POSITION)
+
+// There is no need to have a larger intermediate type for qs32 because all the arguments are already promoted
+MULQ_SAT_IMPL(qs32x8, qs32x8)
+
+#else /* FIXED_POINT_POSITION */
+
+#define ADD_OP(a, b) ((a) + (b))
+#define MUL_OP(a, b) ((a) * (b))
+#define CONVERT_SAT(a, b) ((a))
+
+#endif /* FIXED_POINT_POSITION */
+
#if STRIDE_X == 3
#define INPUT_PIXEL_STR(data_size) extract_input_stride3_##data_size
#define INPUT_PIXEL(data_size) INPUT_PIXEL_STR(data_size)
@@ -165,7 +182,7 @@ __kernel void direct_convolution1x1(
Vector biases = CONVERT_TO_VECTOR_STRUCT_NO_STEP(biases);
#endif /* defined(HAS_BIAS) */
- VEC_DATA_TYPE(DATA_TYPE, 8)
+ VEC_DATA_TYPE(DATA_TYPE_PROMOTED, 8)
pixels = 0;
const uint z_index = get_global_id(2);
@@ -177,15 +194,15 @@ __kernel void direct_convolution1x1(
DATA_TYPE weight = *(__global DATA_TYPE *)weights.ptr;
VEC_DATA_TYPE(DATA_TYPE, 8)
input_pixel = INPUT_PIXEL(DATA_SIZE)((__global DATA_TYPE *)src.ptr);
- pixels += weight * input_pixel;
+ pixels = ADD_OP(pixels, MUL_OP((VEC_DATA_TYPE(DATA_TYPE, 8))weight, input_pixel));
src.ptr += src_stride_z;
weights.ptr += weights_stride_z;
}
#ifdef HAS_BIAS
- pixels += (VEC_DATA_TYPE(DATA_TYPE, 8)) * ((__global DATA_TYPE *)(vector_offset(&biases, z_index)));
+ pixels = ADD_OP(pixels, (VEC_DATA_TYPE(DATA_TYPE_PROMOTED, 8)) * ((__global DATA_TYPE *)(vector_offset(&biases, z_index))));
#endif /* defined(HAS_BIAS) */
- vstore8(pixels, 0, (__global DATA_TYPE *)dst.ptr);
+ vstore8(CONVERT_SAT(pixels, VEC_DATA_TYPE(DATA_TYPE, 8)), 0, (__global DATA_TYPE *)dst.ptr);
}
#endif // defined(DATA_TYPE) && defined(DATA_SIZE) && defined(STRIDE_X) && defined(WEIGHTS_DEPTH) \ No newline at end of file