diff options
Diffstat (limited to 'src/core/CL')
-rw-r--r-- | src/core/CL/CLHelpers.cpp | 5 | ||||
-rw-r--r-- | src/core/CL/cl_kernels/dequantization_layer.cl | 35 | ||||
-rw-r--r-- | src/core/CL/kernels/CLDequantizationLayerKernel.cpp | 3 |
3 files changed, 13 insertions, 30 deletions
diff --git a/src/core/CL/CLHelpers.cpp b/src/core/CL/CLHelpers.cpp index 26660ce215..17274d38ad 100644 --- a/src/core/CL/CLHelpers.cpp +++ b/src/core/CL/CLHelpers.cpp @@ -40,7 +40,6 @@ std::string get_cl_type_from_data_type(const DataType &dt) { case DataType::U8: case DataType::QASYMM8: - case DataType::QASYMM8_PER_CHANNEL: return "uchar"; case DataType::S8: case DataType::QSYMM8: @@ -76,7 +75,6 @@ std::string get_cl_promoted_type_from_data_type(const DataType &dt) { case DataType::U8: case DataType::QASYMM8: - case DataType::QASYMM8_PER_CHANNEL: return "ushort"; case DataType::S8: case DataType::QSYMM8: @@ -124,7 +122,6 @@ std::string get_cl_select_type_from_data_type(const DataType &dt) { case DataType::U8: case DataType::QASYMM8: - case DataType::QASYMM8_PER_CHANNEL: return "uchar"; case DataType::S8: case DataType::QSYMM8: @@ -161,7 +158,6 @@ std::string get_data_size_from_data_type(const DataType &dt) case DataType::QSYMM8: case DataType::QASYMM8: case DataType::QSYMM8_PER_CHANNEL: - case DataType::QASYMM8_PER_CHANNEL: return "8"; case DataType::U16: case DataType::S16: @@ -306,7 +302,6 @@ size_t preferred_vector_width(const cl::Device &device, const DataType dt) case DataType::QASYMM8: case DataType::QSYMM8: case DataType::QSYMM8_PER_CHANNEL: - case DataType::QASYMM8_PER_CHANNEL: return device.getInfo<CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR>(); case DataType::U16: case DataType::S16: diff --git a/src/core/CL/cl_kernels/dequantization_layer.cl b/src/core/CL/cl_kernels/dequantization_layer.cl index 5826847a5e..7550b4ba76 100644 --- a/src/core/CL/cl_kernels/dequantization_layer.cl +++ b/src/core/CL/cl_kernels/dequantization_layer.cl @@ -90,13 +90,13 @@ __kernel void dequantization_layer( #endif // defined(VEC_SIZE) && defined(DATA_TYPE_SRC) && defined(DATA_TYPE_DST) && defined(SCALE) && defined(OFFSET) #if defined(VEC_SIZE) && defined(DATA_TYPE_SRC) && defined(DATA_TYPE_DST) -/** This performs per channel dequantization of 8-bit unsigned integers to floating point. (NCHW) +/** This performs per channel dequantization of 8-bit signed integers to floating point. (NCHW) * * @note Source datatype should be given as a preprocessor argument using -DDATA_TYPE_SRC=type. e.g. -DDATA_TYPE_SRC=char * @note Destination datatype should be given as a preprocessor argument using -DDATA_TYPE_DST=type. e.g. -DDATA_TYPE_DST=float * @note Vector size should be given as a preprocessor argument using -DVEC_SIZE=size. e.g. -DVEC_SIZE=16 * - * @param[in] input_ptr Pointer to the source tensor. Supported data types: QASYMM8_PER_CHANNEL + * @param[in] input_ptr Pointer to the source tensor. Supported data types: QSYMM8_PER_CHANNEL * @param[in] input_stride_x Stride of the source tensor in X dimension (in bytes) * @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes) * @param[in] input_stride_y Stride of the source tensor in Y dimension (in bytes) @@ -113,13 +113,11 @@ __kernel void dequantization_layer( * @param[in] output_step_z output_stride_z * number of elements along Z processed per workitem(in bytes) * @param[in] output_offset_first_element_in_bytes The offset of the first element in the destination tensor * @param[in] scale Pointer to buffer with the per channel quantized scales - * @param[in] offset Pointer to buffer with the per channel quantized offsets */ __kernel void dequantization_layer_per_channel_nchw( TENSOR3D_DECLARATION(input), TENSOR3D_DECLARATION(output), - __global float *scale, - __global int *offset) + __global float *scale) { // Get pixels pointer Tensor3D input = CONVERT_TO_TENSOR3D_STRUCT(input); @@ -136,31 +134,28 @@ __kernel void dequantization_layer_per_channel_nchw( VEC_DATA_TYPE(int, VEC_SIZE) val = CONVERT(VLOAD(VEC_SIZE)(0, (__global DATA_TYPE_SRC *)input.ptr), VEC_DATA_TYPE(int, VEC_SIZE)); - // Create scale and offset vectors + // Create scale vectors const VEC_DATA_TYPE(float, VEC_SIZE) vscale = scale[get_global_id(2)]; - const VEC_DATA_TYPE(int, VEC_SIZE) - voffset = offset[get_global_id(2)]; - // Dequantize VEC_DATA_TYPE(float, VEC_SIZE) - res = vscale * CONVERT((val - voffset), VEC_DATA_TYPE(float, VEC_SIZE)); + res = vscale * CONVERT((val), VEC_DATA_TYPE(float, VEC_SIZE)); // Store result VSTORE(VEC_SIZE) (CONVERT(res, VEC_DATA_TYPE(DATA_TYPE_DST, VEC_SIZE)), 0, (__global DATA_TYPE_DST *)output.ptr); #else // !defined(LAST_ACCESSED_X) - *((__global DATA_TYPE_DST *)(output.ptr)) = (DATA_TYPE_DST)((float)((int)(*((__global DATA_TYPE_SRC *)(input.ptr))) - offset[get_global_id(2)]) * scale[get_global_id(2)]); + *((__global DATA_TYPE_DST *)(output.ptr)) = (DATA_TYPE_DST)((float)((int)(*((__global DATA_TYPE_SRC *)(input.ptr)))) * scale[get_global_id(2)]); #endif // defined(LAST_ACCESSED_X) } -/** This performs per channel dequantization of 8-bit unsigned integers to floating point. (NHWC) +/** This performs per channel dequantization of 8-bit signed integers to floating point. (NHWC) * * @note Source datatype should be given as a preprocessor argument using -DDATA_TYPE_SRC=type. e.g. -DDATA_TYPE_SRC=char * @note Destination datatype should be given as a preprocessor argument using -DDATA_TYPE_DST=type. e.g. -DDATA_TYPE_DST=float * @note Vector size should be given as a preprocessor argument using -DVEC_SIZE=size. e.g. -DVEC_SIZE=16 * - * @param[in] input_ptr Pointer to the source tensor. Supported data types: QASYMM8_PER_CHANNEL + * @param[in] input_ptr Pointer to the source tensor. Supported data types: QSYMM8_PER_CHANNEL * @param[in] input_stride_x Stride of the source tensor in X dimension (in bytes) * @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes) * @param[in] input_stride_y Stride of the source tensor in Y dimension (in bytes) @@ -177,13 +172,11 @@ __kernel void dequantization_layer_per_channel_nchw( * @param[in] output_step_z output_stride_z * number of elements along Z processed per workitem(in bytes) * @param[in] output_offset_first_element_in_bytes The offset of the first element in the destination tensor * @param[in] scale Pointer to buffer with the per channel quantized scales - * @param[in] offset Pointer to buffer with the per channel quantized offsets */ __kernel void dequantization_layer_per_channel_nhwc( TENSOR3D_DECLARATION(input), TENSOR3D_DECLARATION(output), - __global float *scale, - __global int *offset) + __global float *scale) { // Get pixels pointer Tensor3D input = CONVERT_TO_TENSOR3D_STRUCT(input); @@ -196,28 +189,24 @@ __kernel void dequantization_layer_per_channel_nhwc( input.ptr -= max(xi - (int)LAST_ACCESSED_X, 0) * input_stride_x; output.ptr -= max(xi - (int)LAST_ACCESSED_X, 0) * output_stride_x; scale -= max(xi - (int)LAST_ACCESSED_X, 0); - offset -= max(xi - (int)LAST_ACCESSED_X, 0); // Load data VEC_DATA_TYPE(int, VEC_SIZE) val = CONVERT(VLOAD(VEC_SIZE)(0, (__global DATA_TYPE_SRC *)input.ptr), VEC_DATA_TYPE(int, VEC_SIZE)); - // Create scale and offset vectors + // Create scale vectors const VEC_DATA_TYPE(float, VEC_SIZE) vscale = VLOAD(VEC_SIZE)(0, &scale[xi]); - const VEC_DATA_TYPE(int, VEC_SIZE) - voffset = VLOAD(VEC_SIZE)(0, &offset[xi]); - // Dequantize VEC_DATA_TYPE(float, VEC_SIZE) - res = vscale * CONVERT((val - voffset), VEC_DATA_TYPE(float, VEC_SIZE)); + res = vscale * CONVERT((val), VEC_DATA_TYPE(float, VEC_SIZE)); // Store result VSTORE(VEC_SIZE) (CONVERT(res, VEC_DATA_TYPE(DATA_TYPE_DST, VEC_SIZE)), 0, (__global DATA_TYPE_DST *)output.ptr); #else // !defined(LAST_ACCESSED_X) - *((__global DATA_TYPE_DST *)(output.ptr)) = (DATA_TYPE_DST)((float)((int)(*((__global DATA_TYPE_SRC *)(input.ptr))) - offset[get_global_id(0)]) * scale[get_global_id(0)]); + *((__global DATA_TYPE_DST *)(output.ptr)) = (DATA_TYPE_DST)((float)((int)(*((__global DATA_TYPE_SRC *)(input.ptr)))) * scale[get_global_id(0)]); #endif // defined(LAST_ACCESSED_X) } #endif // defined(VEC_SIZE) && defined(DATA_TYPE_SRC) && defined(DATA_TYPE_DST) diff --git a/src/core/CL/kernels/CLDequantizationLayerKernel.cpp b/src/core/CL/kernels/CLDequantizationLayerKernel.cpp index 3ec0b87636..60659faaaf 100644 --- a/src/core/CL/kernels/CLDequantizationLayerKernel.cpp +++ b/src/core/CL/kernels/CLDequantizationLayerKernel.cpp @@ -40,7 +40,7 @@ namespace Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output) { ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::QASYMM8_PER_CHANNEL, DataType::QSYMM8, DataType::QSYMM16); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::QSYMM8_PER_CHANNEL, DataType::QSYMM8, DataType::QSYMM16); if(output->tensor_shape().total_size() > 0) { @@ -144,7 +144,6 @@ void CLDequantizationLayerKernel::run(const Window &window, cl::CommandQueue &qu { unsigned int idx = num_arguments_per_3D_tensor() * 2; //Skip the input and output parameters _kernel.setArg(idx++, _input->quantization().scale->cl_buffer()); - _kernel.setArg(idx++, _input->quantization().offset->cl_buffer()); } do |