From da37e2fa661f85090bd4a3b93c07178df268bd8c Mon Sep 17 00:00:00 2001 From: steniu01 Date: Thu, 29 Jun 2017 10:14:58 +0100 Subject: COMPMID-431 Port CLDepthConvert to use 8-bit and 16-bit fixed point Change-Id: Iedea9e985427e6242f34a5362615f79c0526d5bd Reviewed-on: http://mpd-gerrit.cambridge.arm.com/79786 Reviewed-by: Georgios Pinitas Tested-by: Kaizen --- src/core/CL/cl_kernels/depth_convert.cl | 49 ++++++++++++++++++++++++++++----- src/core/CL/cl_kernels/fixed_point.h | 30 ++++++++++++++++++++ 2 files changed, 72 insertions(+), 7 deletions(-) (limited to 'src/core/CL/cl_kernels') diff --git a/src/core/CL/cl_kernels/depth_convert.cl b/src/core/CL/cl_kernels/depth_convert.cl index 3a1c7ca2c5..a9b7284c83 100644 --- a/src/core/CL/cl_kernels/depth_convert.cl +++ b/src/core/CL/cl_kernels/depth_convert.cl @@ -23,24 +23,47 @@ */ #include "helpers.h" +#if defined(FIXED_POINT_POSITION) + +#include "fixed_point.h" + +#ifdef SATURATE +#define CONVERT_DOWN(x, in_type, out_type, fixed_point_position) CONVERT_DOWN1_SAT(x, in_type, out_type, fixed_point_position) +#define CONVERT_DOWN1_SAT(x, in_type, out_type, fixed_point_position) convert_##out_type##_##in_type##_sat(x, fixed_point_position) +#else /* SATURATE */ +#define CONVERT_DOWN(x, in_type, out_type, fixed_point_position) CONVERT_DOWN1(x, in_type, out_type, fixed_point_position) +#define CONVERT_DOWN1(x, in_type, out_type, fixed_point_position) convert_##out_type##_##in_type(x, fixed_point_position) +#endif /* SATURATE */ + +#define CONVERT_UP(x, in_type, out_type, fixed_point_position) CONVERT_UP1(x, in_type, out_type, fixed_point_position) +#define CONVERT_UP1(x, in_type, out_type, fixed_point_position) convert_##out_type##_##in_type(x, fixed_point_position) + +#else /* FIXED_POINT_POSITION */ + #ifdef SATURATE #define CONVERT_DOWN(x, type) CONVERT_SAT(x, type) #else /* SATURATE */ #define CONVERT_DOWN(x, type) CONVERT(x, type) #endif /* SATURATE */ +#define CONVERT_UP(x, type) CONVERT(x, type) + +#endif /* FIXED_POINT_POSITION */ + /** This function performs a down-scaling depth conversion. * * @attention The input and output data_types need to be passed at compile time using -DDATA_TYPE_IN and -DDATA_TYPE_OUT: * e.g. -DDATA_TYPE_IN=uchar -DDATA_TYPE_OUT=short * - * @param[in] in_ptr Pointer to the source image. Supported data types: U8, U16, S16, U32 or S32 + * @note In case of fixed-point operation -DFIXED_POINT_POSITION=fixed_point_position must be provided: e.g. -DFIXED_POINT_POSITION=3 + * + * @param[in] in_ptr Pointer to the source image. Supported data types: U8, U16, S16, U32, S32, F16, F32 * @param[in] in_stride_x Stride of the source image in X dimension (in bytes) * @param[in] in_step_x in_stride_x * number of elements along X processed per workitem(in bytes) * @param[in] in_stride_y Stride of the source image in Y dimension (in bytes) * @param[in] in_step_y in_stride_y * number of elements along Y processed per workitem(in bytes) * @param[in] in_offset_first_element_in_bytes The offset of the first element in the source image - * @param[out] out_ptr Pointer to the destination image. Supported data types: U8, U16, S16, U32 or S32 + * @param[out] out_ptr Pointer to the destination image. Supported data types: QS8, U8, QS16, U16, S16, U32, S32 * @param[in] out_stride_x Stride of the destination image in X dimension (in bytes) * @param[in] out_step_x out_stride_x * number of elements along X processed per workitem(in bytes) * @param[in] out_stride_y Stride of the destination image in Y dimension (in bytes) @@ -60,7 +83,12 @@ __kernel void convert_depth_down( // Load data VEC_DATA_TYPE(DATA_TYPE_IN, 16) in_data = vload16(0, (__global DATA_TYPE_IN *)in.ptr); + +#if defined(FIXED_POINT_POSITION) + vstore16(CONVERT_DOWN(in_data, VEC_DATA_TYPE(DATA_TYPE_IN, 16), VEC_DATA_TYPE(DATA_TYPE_OUT, 16), FIXED_POINT_POSITION), 0, (__global DATA_TYPE_OUT *)out.ptr); +#else /* FIXED_POINT_POSITION */ vstore16(CONVERT_DOWN(in_data >> shift, VEC_DATA_TYPE(DATA_TYPE_OUT, 16)), 0, (__global DATA_TYPE_OUT *)out.ptr); +#endif /* FIXED_POINT_POSITION */ } /** This function performs a up-scaling depth conversion. @@ -68,13 +96,15 @@ __kernel void convert_depth_down( * @attention The input and output data_types need to be passed at compile time using -DDATA_TYPE_IN and -DDATA_TYPE_OUT: * e.g. -DDATA_TYPE_IN=uchar -DDATA_TYPE_OUT=short * - * @param[in] in_ptr Pointer to the source image. Supported data types: U8, U16, S16, U32 or S32 + * @note In case of fixed-point operation -DFIXED_POINT_POSITION=fixed_point_position must be provided: e.g. -DFIXED_POINT_POSITION=3 + * + * @param[in] in_ptr Pointer to the source image. Supported data types: U8, QS8, U16, S16, QS16, U32 or S32 * @param[in] in_stride_x Stride of the source image in X dimension (in bytes) * @param[in] in_step_x in_stride_x * number of elements along X processed per workitem(in bytes) * @param[in] in_stride_y Stride of the source image in Y dimension (in bytes) * @param[in] in_step_y in_stride_y * number of elements along Y processed per workitem(in bytes) * @param[in] in_offset_first_element_in_bytes The offset of the first element in the source image - * @param[out] out_ptr Pointer to the destination image. Supported data types: U8, U16, S16, U32 or S32 + * @param[out] out_ptr Pointer to the destination image. Supported data types: U8, U16, S16, U32, S32, F16 or F32 * @param[in] out_stride_x Stride of the destination image in X dimension (in bytes) * @param[in] out_step_x out_stride_x * number of elements along X processed per workitem(in bytes) * @param[in] out_stride_y Stride of the destination image in Y dimension (in bytes) @@ -92,7 +122,12 @@ __kernel void convert_depth_up( Image out = CONVERT_TO_IMAGE_STRUCT(out); // Load data - VEC_DATA_TYPE(DATA_TYPE_OUT, 16) - in_data = CONVERT(vload16(0, (__global DATA_TYPE_IN *)in.ptr), VEC_DATA_TYPE(DATA_TYPE_OUT, 16)); - vstore16(in_data << shift, 0, (__global DATA_TYPE_OUT *)out.ptr); + VEC_DATA_TYPE(DATA_TYPE_IN, 16) + in_data = vload16(0, (__global DATA_TYPE_IN *)in.ptr); + +#if defined(FIXED_POINT_POSITION) + vstore16(CONVERT_UP(in_data, VEC_DATA_TYPE(DATA_TYPE_IN, 16), VEC_DATA_TYPE(DATA_TYPE_OUT, 16), FIXED_POINT_POSITION), 0, (__global DATA_TYPE_OUT *)out.ptr); +#else /* FIXED_POINT_POSITION */ + vstore16(CONVERT_UP(in_data, VEC_DATA_TYPE(DATA_TYPE_OUT, 16)) << shift, 0, (__global DATA_TYPE_OUT *)out.ptr); +#endif /* FIXED_POINT_POSITION */ } diff --git a/src/core/CL/cl_kernels/fixed_point.h b/src/core/CL/cl_kernels/fixed_point.h index 9fd3a6f899..5d340c4e95 100644 --- a/src/core/CL/cl_kernels/fixed_point.h +++ b/src/core/CL/cl_kernels/fixed_point.h @@ -381,4 +381,34 @@ INVSQRTQ_IMPL(qs8, qs8x16, 16) #define INVSQRT_OP_EXPAND_STR(a, type, size, position) invsqrt_sat_##type##x##size((a), (position)) #define INVSQRT_OP_EXPAND(a, type, size, position) INVSQRT_OP_EXPAND_STR(a, type, size, position) +#define floatx16 float16 +#define float16_TYPE float16 + +#define CONVERTQ_DOWN_IMPL(in_type, out_type) \ + inline out_type convert_##out_type##_##in_type(in_type a, int fixed_point_position) \ + { \ + return CONVERT(a * (1 << fixed_point_position) + select((in_type)-0.5, (in_type)0.5, isgreater(a, (in_type)0)), out_type); \ + } + +CONVERTQ_DOWN_IMPL(float16, qs8x16) +CONVERTQ_DOWN_IMPL(float16, qs16x16) + +#define CONVERTQ_DOWN_SAT_IMPL(in_type, out_type) \ + inline out_type convert_##out_type##_##in_type##_sat(in_type a, int fixed_point_position) \ + { \ + return CONVERT_SAT(a * (1 << fixed_point_position) + select((in_type)-0.5, (in_type)0.5, isgreater(a, (in_type)0)), out_type); \ + } + +CONVERTQ_DOWN_SAT_IMPL(float16, qs8x16) +CONVERTQ_DOWN_SAT_IMPL(float16, qs16x16) + +#define CONVERTQ_UP_IMPL(in_type, out_type) \ + inline out_type convert_##out_type##_##in_type(in_type a, int fixed_point_position) \ + { \ + return CONVERT(a, out_type) / (1 << fixed_point_position); \ + } + +CONVERTQ_UP_IMPL(qs8x16, float16) +CONVERTQ_UP_IMPL(qs16x16, float16) + #endif // ARM_COMPUTE_FIXED_POINT_H -- cgit v1.2.1