diff options
Diffstat (limited to 'src/core/CL/cl_kernels/dequantization_layer.cl')
-rw-r--r-- | src/core/CL/cl_kernels/dequantization_layer.cl | 35 |
1 files changed, 12 insertions, 23 deletions
diff --git a/src/core/CL/cl_kernels/dequantization_layer.cl b/src/core/CL/cl_kernels/dequantization_layer.cl index 5826847a5e..7550b4ba76 100644 --- a/src/core/CL/cl_kernels/dequantization_layer.cl +++ b/src/core/CL/cl_kernels/dequantization_layer.cl @@ -90,13 +90,13 @@ __kernel void dequantization_layer( #endif // defined(VEC_SIZE) && defined(DATA_TYPE_SRC) && defined(DATA_TYPE_DST) && defined(SCALE) && defined(OFFSET) #if defined(VEC_SIZE) && defined(DATA_TYPE_SRC) && defined(DATA_TYPE_DST) -/** This performs per channel dequantization of 8-bit unsigned integers to floating point. (NCHW) +/** This performs per channel dequantization of 8-bit signed integers to floating point. (NCHW) * * @note Source datatype should be given as a preprocessor argument using -DDATA_TYPE_SRC=type. e.g. -DDATA_TYPE_SRC=char * @note Destination datatype should be given as a preprocessor argument using -DDATA_TYPE_DST=type. e.g. -DDATA_TYPE_DST=float * @note Vector size should be given as a preprocessor argument using -DVEC_SIZE=size. e.g. -DVEC_SIZE=16 * - * @param[in] input_ptr Pointer to the source tensor. Supported data types: QASYMM8_PER_CHANNEL + * @param[in] input_ptr Pointer to the source tensor. Supported data types: QSYMM8_PER_CHANNEL * @param[in] input_stride_x Stride of the source tensor in X dimension (in bytes) * @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes) * @param[in] input_stride_y Stride of the source tensor in Y dimension (in bytes) @@ -113,13 +113,11 @@ __kernel void dequantization_layer( * @param[in] output_step_z output_stride_z * number of elements along Z processed per workitem(in bytes) * @param[in] output_offset_first_element_in_bytes The offset of the first element in the destination tensor * @param[in] scale Pointer to buffer with the per channel quantized scales - * @param[in] offset Pointer to buffer with the per channel quantized offsets */ __kernel void dequantization_layer_per_channel_nchw( TENSOR3D_DECLARATION(input), TENSOR3D_DECLARATION(output), - __global float *scale, - __global int *offset) + __global float *scale) { // Get pixels pointer Tensor3D input = CONVERT_TO_TENSOR3D_STRUCT(input); @@ -136,31 +134,28 @@ __kernel void dequantization_layer_per_channel_nchw( VEC_DATA_TYPE(int, VEC_SIZE) val = CONVERT(VLOAD(VEC_SIZE)(0, (__global DATA_TYPE_SRC *)input.ptr), VEC_DATA_TYPE(int, VEC_SIZE)); - // Create scale and offset vectors + // Create scale vectors const VEC_DATA_TYPE(float, VEC_SIZE) vscale = scale[get_global_id(2)]; - const VEC_DATA_TYPE(int, VEC_SIZE) - voffset = offset[get_global_id(2)]; - // Dequantize VEC_DATA_TYPE(float, VEC_SIZE) - res = vscale * CONVERT((val - voffset), VEC_DATA_TYPE(float, VEC_SIZE)); + res = vscale * CONVERT((val), VEC_DATA_TYPE(float, VEC_SIZE)); // Store result VSTORE(VEC_SIZE) (CONVERT(res, VEC_DATA_TYPE(DATA_TYPE_DST, VEC_SIZE)), 0, (__global DATA_TYPE_DST *)output.ptr); #else // !defined(LAST_ACCESSED_X) - *((__global DATA_TYPE_DST *)(output.ptr)) = (DATA_TYPE_DST)((float)((int)(*((__global DATA_TYPE_SRC *)(input.ptr))) - offset[get_global_id(2)]) * scale[get_global_id(2)]); + *((__global DATA_TYPE_DST *)(output.ptr)) = (DATA_TYPE_DST)((float)((int)(*((__global DATA_TYPE_SRC *)(input.ptr)))) * scale[get_global_id(2)]); #endif // defined(LAST_ACCESSED_X) } -/** This performs per channel dequantization of 8-bit unsigned integers to floating point. (NHWC) +/** This performs per channel dequantization of 8-bit signed integers to floating point. (NHWC) * * @note Source datatype should be given as a preprocessor argument using -DDATA_TYPE_SRC=type. e.g. -DDATA_TYPE_SRC=char * @note Destination datatype should be given as a preprocessor argument using -DDATA_TYPE_DST=type. e.g. -DDATA_TYPE_DST=float * @note Vector size should be given as a preprocessor argument using -DVEC_SIZE=size. e.g. -DVEC_SIZE=16 * - * @param[in] input_ptr Pointer to the source tensor. Supported data types: QASYMM8_PER_CHANNEL + * @param[in] input_ptr Pointer to the source tensor. Supported data types: QSYMM8_PER_CHANNEL * @param[in] input_stride_x Stride of the source tensor in X dimension (in bytes) * @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes) * @param[in] input_stride_y Stride of the source tensor in Y dimension (in bytes) @@ -177,13 +172,11 @@ __kernel void dequantization_layer_per_channel_nchw( * @param[in] output_step_z output_stride_z * number of elements along Z processed per workitem(in bytes) * @param[in] output_offset_first_element_in_bytes The offset of the first element in the destination tensor * @param[in] scale Pointer to buffer with the per channel quantized scales - * @param[in] offset Pointer to buffer with the per channel quantized offsets */ __kernel void dequantization_layer_per_channel_nhwc( TENSOR3D_DECLARATION(input), TENSOR3D_DECLARATION(output), - __global float *scale, - __global int *offset) + __global float *scale) { // Get pixels pointer Tensor3D input = CONVERT_TO_TENSOR3D_STRUCT(input); @@ -196,28 +189,24 @@ __kernel void dequantization_layer_per_channel_nhwc( input.ptr -= max(xi - (int)LAST_ACCESSED_X, 0) * input_stride_x; output.ptr -= max(xi - (int)LAST_ACCESSED_X, 0) * output_stride_x; scale -= max(xi - (int)LAST_ACCESSED_X, 0); - offset -= max(xi - (int)LAST_ACCESSED_X, 0); // Load data VEC_DATA_TYPE(int, VEC_SIZE) val = CONVERT(VLOAD(VEC_SIZE)(0, (__global DATA_TYPE_SRC *)input.ptr), VEC_DATA_TYPE(int, VEC_SIZE)); - // Create scale and offset vectors + // Create scale vectors const VEC_DATA_TYPE(float, VEC_SIZE) vscale = VLOAD(VEC_SIZE)(0, &scale[xi]); - const VEC_DATA_TYPE(int, VEC_SIZE) - voffset = VLOAD(VEC_SIZE)(0, &offset[xi]); - // Dequantize VEC_DATA_TYPE(float, VEC_SIZE) - res = vscale * CONVERT((val - voffset), VEC_DATA_TYPE(float, VEC_SIZE)); + res = vscale * CONVERT((val), VEC_DATA_TYPE(float, VEC_SIZE)); // Store result VSTORE(VEC_SIZE) (CONVERT(res, VEC_DATA_TYPE(DATA_TYPE_DST, VEC_SIZE)), 0, (__global DATA_TYPE_DST *)output.ptr); #else // !defined(LAST_ACCESSED_X) - *((__global DATA_TYPE_DST *)(output.ptr)) = (DATA_TYPE_DST)((float)((int)(*((__global DATA_TYPE_SRC *)(input.ptr))) - offset[get_global_id(0)]) * scale[get_global_id(0)]); + *((__global DATA_TYPE_DST *)(output.ptr)) = (DATA_TYPE_DST)((float)((int)(*((__global DATA_TYPE_SRC *)(input.ptr)))) * scale[get_global_id(0)]); #endif // defined(LAST_ACCESSED_X) } #endif // defined(VEC_SIZE) && defined(DATA_TYPE_SRC) && defined(DATA_TYPE_DST) |