From 8217c8e4f488eb32733c481ab3a4d905069479f1 Mon Sep 17 00:00:00 2001 From: Georgios Pinitas Date: Mon, 11 Nov 2019 18:24:22 +0000 Subject: COMPMID-2895: Remove QASYMM8_PER_CHANNEL data type Change-Id: I2d1b77370f8eceeaeae95306b4db5d90ababb76f Signed-off-by: Georgios Pinitas Reviewed-on: https://review.mlplatform.org/c/2266 Reviewed-by: Michele Di Giorgio Comments-Addressed: Arm Jenkins Tested-by: Arm Jenkins --- src/core/CL/CLHelpers.cpp | 5 --- src/core/CL/cl_kernels/dequantization_layer.cl | 35 +++++++----------- .../CL/kernels/CLDequantizationLayerKernel.cpp | 3 +- .../NEON/kernels/NEDequantizationLayerKernel.cpp | 41 ++++++++-------------- src/core/Utils.cpp | 4 --- 5 files changed, 28 insertions(+), 60 deletions(-) (limited to 'src') diff --git a/src/core/CL/CLHelpers.cpp b/src/core/CL/CLHelpers.cpp index 26660ce215..17274d38ad 100644 --- a/src/core/CL/CLHelpers.cpp +++ b/src/core/CL/CLHelpers.cpp @@ -40,7 +40,6 @@ std::string get_cl_type_from_data_type(const DataType &dt) { case DataType::U8: case DataType::QASYMM8: - case DataType::QASYMM8_PER_CHANNEL: return "uchar"; case DataType::S8: case DataType::QSYMM8: @@ -76,7 +75,6 @@ std::string get_cl_promoted_type_from_data_type(const DataType &dt) { case DataType::U8: case DataType::QASYMM8: - case DataType::QASYMM8_PER_CHANNEL: return "ushort"; case DataType::S8: case DataType::QSYMM8: @@ -124,7 +122,6 @@ std::string get_cl_select_type_from_data_type(const DataType &dt) { case DataType::U8: case DataType::QASYMM8: - case DataType::QASYMM8_PER_CHANNEL: return "uchar"; case DataType::S8: case DataType::QSYMM8: @@ -161,7 +158,6 @@ std::string get_data_size_from_data_type(const DataType &dt) case DataType::QSYMM8: case DataType::QASYMM8: case DataType::QSYMM8_PER_CHANNEL: - case DataType::QASYMM8_PER_CHANNEL: return "8"; case DataType::U16: case DataType::S16: @@ -306,7 +302,6 @@ size_t preferred_vector_width(const cl::Device &device, const DataType dt) case DataType::QASYMM8: case DataType::QSYMM8: case DataType::QSYMM8_PER_CHANNEL: - case DataType::QASYMM8_PER_CHANNEL: return device.getInfo(); case DataType::U16: case DataType::S16: diff --git a/src/core/CL/cl_kernels/dequantization_layer.cl b/src/core/CL/cl_kernels/dequantization_layer.cl index 5826847a5e..7550b4ba76 100644 --- a/src/core/CL/cl_kernels/dequantization_layer.cl +++ b/src/core/CL/cl_kernels/dequantization_layer.cl @@ -90,13 +90,13 @@ __kernel void dequantization_layer( #endif // defined(VEC_SIZE) && defined(DATA_TYPE_SRC) && defined(DATA_TYPE_DST) && defined(SCALE) && defined(OFFSET) #if defined(VEC_SIZE) && defined(DATA_TYPE_SRC) && defined(DATA_TYPE_DST) -/** This performs per channel dequantization of 8-bit unsigned integers to floating point. (NCHW) +/** This performs per channel dequantization of 8-bit signed integers to floating point. (NCHW) * * @note Source datatype should be given as a preprocessor argument using -DDATA_TYPE_SRC=type. e.g. -DDATA_TYPE_SRC=char * @note Destination datatype should be given as a preprocessor argument using -DDATA_TYPE_DST=type. e.g. -DDATA_TYPE_DST=float * @note Vector size should be given as a preprocessor argument using -DVEC_SIZE=size. e.g. -DVEC_SIZE=16 * - * @param[in] input_ptr Pointer to the source tensor. Supported data types: QASYMM8_PER_CHANNEL + * @param[in] input_ptr Pointer to the source tensor. Supported data types: QSYMM8_PER_CHANNEL * @param[in] input_stride_x Stride of the source tensor in X dimension (in bytes) * @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes) * @param[in] input_stride_y Stride of the source tensor in Y dimension (in bytes) @@ -113,13 +113,11 @@ __kernel void dequantization_layer( * @param[in] output_step_z output_stride_z * number of elements along Z processed per workitem(in bytes) * @param[in] output_offset_first_element_in_bytes The offset of the first element in the destination tensor * @param[in] scale Pointer to buffer with the per channel quantized scales - * @param[in] offset Pointer to buffer with the per channel quantized offsets */ __kernel void dequantization_layer_per_channel_nchw( TENSOR3D_DECLARATION(input), TENSOR3D_DECLARATION(output), - __global float *scale, - __global int *offset) + __global float *scale) { // Get pixels pointer Tensor3D input = CONVERT_TO_TENSOR3D_STRUCT(input); @@ -136,31 +134,28 @@ __kernel void dequantization_layer_per_channel_nchw( VEC_DATA_TYPE(int, VEC_SIZE) val = CONVERT(VLOAD(VEC_SIZE)(0, (__global DATA_TYPE_SRC *)input.ptr), VEC_DATA_TYPE(int, VEC_SIZE)); - // Create scale and offset vectors + // Create scale vectors const VEC_DATA_TYPE(float, VEC_SIZE) vscale = scale[get_global_id(2)]; - const VEC_DATA_TYPE(int, VEC_SIZE) - voffset = offset[get_global_id(2)]; - // Dequantize VEC_DATA_TYPE(float, VEC_SIZE) - res = vscale * CONVERT((val - voffset), VEC_DATA_TYPE(float, VEC_SIZE)); + res = vscale * CONVERT((val), VEC_DATA_TYPE(float, VEC_SIZE)); // Store result VSTORE(VEC_SIZE) (CONVERT(res, VEC_DATA_TYPE(DATA_TYPE_DST, VEC_SIZE)), 0, (__global DATA_TYPE_DST *)output.ptr); #else // !defined(LAST_ACCESSED_X) - *((__global DATA_TYPE_DST *)(output.ptr)) = (DATA_TYPE_DST)((float)((int)(*((__global DATA_TYPE_SRC *)(input.ptr))) - offset[get_global_id(2)]) * scale[get_global_id(2)]); + *((__global DATA_TYPE_DST *)(output.ptr)) = (DATA_TYPE_DST)((float)((int)(*((__global DATA_TYPE_SRC *)(input.ptr)))) * scale[get_global_id(2)]); #endif // defined(LAST_ACCESSED_X) } -/** This performs per channel dequantization of 8-bit unsigned integers to floating point. (NHWC) +/** This performs per channel dequantization of 8-bit signed integers to floating point. (NHWC) * * @note Source datatype should be given as a preprocessor argument using -DDATA_TYPE_SRC=type. e.g. -DDATA_TYPE_SRC=char * @note Destination datatype should be given as a preprocessor argument using -DDATA_TYPE_DST=type. e.g. -DDATA_TYPE_DST=float * @note Vector size should be given as a preprocessor argument using -DVEC_SIZE=size. e.g. -DVEC_SIZE=16 * - * @param[in] input_ptr Pointer to the source tensor. Supported data types: QASYMM8_PER_CHANNEL + * @param[in] input_ptr Pointer to the source tensor. Supported data types: QSYMM8_PER_CHANNEL * @param[in] input_stride_x Stride of the source tensor in X dimension (in bytes) * @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes) * @param[in] input_stride_y Stride of the source tensor in Y dimension (in bytes) @@ -177,13 +172,11 @@ __kernel void dequantization_layer_per_channel_nchw( * @param[in] output_step_z output_stride_z * number of elements along Z processed per workitem(in bytes) * @param[in] output_offset_first_element_in_bytes The offset of the first element in the destination tensor * @param[in] scale Pointer to buffer with the per channel quantized scales - * @param[in] offset Pointer to buffer with the per channel quantized offsets */ __kernel void dequantization_layer_per_channel_nhwc( TENSOR3D_DECLARATION(input), TENSOR3D_DECLARATION(output), - __global float *scale, - __global int *offset) + __global float *scale) { // Get pixels pointer Tensor3D input = CONVERT_TO_TENSOR3D_STRUCT(input); @@ -196,28 +189,24 @@ __kernel void dequantization_layer_per_channel_nhwc( input.ptr -= max(xi - (int)LAST_ACCESSED_X, 0) * input_stride_x; output.ptr -= max(xi - (int)LAST_ACCESSED_X, 0) * output_stride_x; scale -= max(xi - (int)LAST_ACCESSED_X, 0); - offset -= max(xi - (int)LAST_ACCESSED_X, 0); // Load data VEC_DATA_TYPE(int, VEC_SIZE) val = CONVERT(VLOAD(VEC_SIZE)(0, (__global DATA_TYPE_SRC *)input.ptr), VEC_DATA_TYPE(int, VEC_SIZE)); - // Create scale and offset vectors + // Create scale vectors const VEC_DATA_TYPE(float, VEC_SIZE) vscale = VLOAD(VEC_SIZE)(0, &scale[xi]); - const VEC_DATA_TYPE(int, VEC_SIZE) - voffset = VLOAD(VEC_SIZE)(0, &offset[xi]); - // Dequantize VEC_DATA_TYPE(float, VEC_SIZE) - res = vscale * CONVERT((val - voffset), VEC_DATA_TYPE(float, VEC_SIZE)); + res = vscale * CONVERT((val), VEC_DATA_TYPE(float, VEC_SIZE)); // Store result VSTORE(VEC_SIZE) (CONVERT(res, VEC_DATA_TYPE(DATA_TYPE_DST, VEC_SIZE)), 0, (__global DATA_TYPE_DST *)output.ptr); #else // !defined(LAST_ACCESSED_X) - *((__global DATA_TYPE_DST *)(output.ptr)) = (DATA_TYPE_DST)((float)((int)(*((__global DATA_TYPE_SRC *)(input.ptr))) - offset[get_global_id(0)]) * scale[get_global_id(0)]); + *((__global DATA_TYPE_DST *)(output.ptr)) = (DATA_TYPE_DST)((float)((int)(*((__global DATA_TYPE_SRC *)(input.ptr)))) * scale[get_global_id(0)]); #endif // defined(LAST_ACCESSED_X) } #endif // defined(VEC_SIZE) && defined(DATA_TYPE_SRC) && defined(DATA_TYPE_DST) diff --git a/src/core/CL/kernels/CLDequantizationLayerKernel.cpp b/src/core/CL/kernels/CLDequantizationLayerKernel.cpp index 3ec0b87636..60659faaaf 100644 --- a/src/core/CL/kernels/CLDequantizationLayerKernel.cpp +++ b/src/core/CL/kernels/CLDequantizationLayerKernel.cpp @@ -40,7 +40,7 @@ namespace Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output) { ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::QASYMM8_PER_CHANNEL, DataType::QSYMM8, DataType::QSYMM16); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::QSYMM8_PER_CHANNEL, DataType::QSYMM8, DataType::QSYMM16); if(output->tensor_shape().total_size() > 0) { @@ -144,7 +144,6 @@ void CLDequantizationLayerKernel::run(const Window &window, cl::CommandQueue &qu { unsigned int idx = num_arguments_per_3D_tensor() * 2; //Skip the input and output parameters _kernel.setArg(idx++, _input->quantization().scale->cl_buffer()); - _kernel.setArg(idx++, _input->quantization().offset->cl_buffer()); } do diff --git a/src/core/NEON/kernels/NEDequantizationLayerKernel.cpp b/src/core/NEON/kernels/NEDequantizationLayerKernel.cpp index 5abd6a122d..f555df3828 100644 --- a/src/core/NEON/kernels/NEDequantizationLayerKernel.cpp +++ b/src/core/NEON/kernels/NEDequantizationLayerKernel.cpp @@ -43,7 +43,7 @@ namespace Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output) { ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::QASYMM8_PER_CHANNEL, DataType::QSYMM8, DataType::QSYMM16); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::QSYMM8_PER_CHANNEL, DataType::QSYMM8, DataType::QSYMM16); if(output->tensor_shape().total_size() > 0) { @@ -160,10 +160,9 @@ void run_dequantization_qasymm8(const ITensor *input, ITensor *output, const Win } template -void run_dequantization_qasymm8_per_channel_nchw(const ITensor *input, ITensor *output, const Window &window) +void run_dequantization_qsymm8_per_channel_nchw(const ITensor *input, ITensor *output, const Window &window) { - const std::vector scale = input->info()->quantization_info().scale(); - const std::vector offset = input->info()->quantization_info().offset(); + const auto scale = input->info()->quantization_info().scale(); const int window_step_x = 16; const auto window_start_x = static_cast(window.x().start()); @@ -179,14 +178,14 @@ void run_dequantization_qasymm8_per_channel_nchw(const ITensor *input, ITensor * execute_window_loop(win, [&](const Coordinates & id) { - const auto in_ptr = reinterpret_cast(in.ptr()); + const auto in_ptr = reinterpret_cast(in.ptr()); const auto out_ptr = reinterpret_cast(out.ptr()); int x = window_start_x; for(; x <= (window_end_x - window_step_x); x += window_step_x) { const auto vin = wrapper::vloadq(in_ptr + x); - const auto vdeq = vdequantize(vin, scale[id.z()], offset[id.z()]); + const auto vdeq = vdequantize(vin, scale[id.z()]); store_result(reinterpret_cast(out_ptr + x), vdeq); } @@ -194,18 +193,17 @@ void run_dequantization_qasymm8_per_channel_nchw(const ITensor *input, ITensor * // Compute left-over elements for(; x < window_end_x; ++x) { - uint8_t val = *(in_ptr + x); - *(out_ptr + x) = static_cast(dequantize(val, scale[id.z()], offset[id.z()])); + int8_t val = *(in_ptr + x); + *(out_ptr + x) = static_cast(dequantize(val, scale[id.z()])); } }, in, out); } template -void run_dequantization_qasymm8_per_channel_nhwc(const ITensor *input, ITensor *output, const Window &window) +void run_dequantization_qsymm8_per_channel_nhwc(const ITensor *input, ITensor *output, const Window &window) { - const std::vector scale = input->info()->quantization_info().scale(); - const std::vector offset = input->info()->quantization_info().offset(); + const auto scale = input->info()->quantization_info().scale(); const int window_step_x = 16; const auto window_start_x = static_cast(window.x().start()); @@ -221,7 +219,7 @@ void run_dequantization_qasymm8_per_channel_nhwc(const ITensor *input, ITensor * execute_window_loop(win, [&](const Coordinates &) { - const auto in_ptr = reinterpret_cast(in.ptr()); + const auto in_ptr = reinterpret_cast(in.ptr()); const auto out_ptr = reinterpret_cast(out.ptr()); int x = window_start_x; @@ -236,17 +234,8 @@ void run_dequantization_qasymm8_per_channel_nhwc(const ITensor *input, ITensor * scale[x + 12], scale[x + 13], scale[x + 14], scale[x + 15] } }; - const int32x4x4_t voffset = - { - { - offset[x + 0], offset[x + 1], offset[x + 2], offset[x + 3], - offset[x + 4], offset[x + 5], offset[x + 6], offset[x + 7], - offset[x + 8], offset[x + 9], offset[x + 10], offset[x + 11], - offset[x + 12], offset[x + 13], offset[x + 14], offset[x + 15] - } - }; const auto vin = wrapper::vloadq(in_ptr + x); - const auto vdeq = vdequantize(vin, vscale, voffset); + const auto vdeq = vdequantize(vin, vscale); store_result(reinterpret_cast(out_ptr + x), vdeq); } @@ -254,8 +243,8 @@ void run_dequantization_qasymm8_per_channel_nhwc(const ITensor *input, ITensor * // Compute left-over elements for(; x < window_end_x; ++x) { - uint8_t val = *(in_ptr + x); - *(out_ptr + x) = static_cast(dequantize(val, scale[x], offset[x])); + int8_t val = *(in_ptr + x); + *(out_ptr + x) = static_cast(dequantize(val, scale[x])); } }, in, out); @@ -353,8 +342,8 @@ void run_dequantization_core(const ITensor *input, ITensor *output, const Window case DataType::QASYMM8: run_dequantization_qasymm8(input, output, window); break; - case DataType::QASYMM8_PER_CHANNEL: - input->info()->data_layout() == DataLayout::NHWC ? run_dequantization_qasymm8_per_channel_nhwc(input, output, window) : run_dequantization_qasymm8_per_channel_nchw(input, output, window); + case DataType::QSYMM8_PER_CHANNEL: + input->info()->data_layout() == DataLayout::NHWC ? run_dequantization_qsymm8_per_channel_nhwc(input, output, window) : run_dequantization_qsymm8_per_channel_nchw(input, output, window); break; case DataType::QSYMM8: run_dequantization_qsymm8(input, output, window); diff --git a/src/core/Utils.cpp b/src/core/Utils.cpp index 6d276d1322..9f1255dcaf 100644 --- a/src/core/Utils.cpp +++ b/src/core/Utils.cpp @@ -160,7 +160,6 @@ const std::string &arm_compute::string_from_data_type(DataType dt) { DataType::SIZET, "SIZET" }, { DataType::QSYMM8, "QSYMM8" }, { DataType::QSYMM8_PER_CHANNEL, "QSYMM8_PER_CHANNEL" }, - { DataType::QASYMM8_PER_CHANNEL, "QASYMM8_PER_CHANNEL" }, { DataType::QASYMM8, "QASYMM8" }, { DataType::QASYMM8_SIGNED, "QASYMM8_SIGNED" }, { DataType::QSYMM16, "QSYMM16" }, @@ -287,7 +286,6 @@ std::string arm_compute::string_from_pixel_value(const PixelValue &value, const { case DataType::U8: case DataType::QASYMM8: - case DataType::QASYMM8_PER_CHANNEL: // Needs conversion to 32 bit, otherwise interpreted as ASCII values ss << uint32_t(value.get()); converted_string = ss.str(); @@ -446,7 +444,6 @@ void arm_compute::print_consecutive_elements(std::ostream &s, DataType dt, const { case DataType::U8: case DataType::QASYMM8: - case DataType::QASYMM8_PER_CHANNEL: print_consecutive_elements_impl(s, ptr, n, stream_width, element_delim); break; case DataType::S8: @@ -485,7 +482,6 @@ int arm_compute::max_consecutive_elements_display_width(std::ostream &s, DataTyp { case DataType::U8: case DataType::QASYMM8: - case DataType::QASYMM8_PER_CHANNEL: return max_consecutive_elements_display_width_impl(s, ptr, n); case DataType::S8: case DataType::QASYMM8_SIGNED: -- cgit v1.2.1