From 8217c8e4f488eb32733c481ab3a4d905069479f1 Mon Sep 17 00:00:00 2001 From: Georgios Pinitas Date: Mon, 11 Nov 2019 18:24:22 +0000 Subject: COMPMID-2895: Remove QASYMM8_PER_CHANNEL data type Change-Id: I2d1b77370f8eceeaeae95306b4db5d90ababb76f Signed-off-by: Georgios Pinitas Reviewed-on: https://review.mlplatform.org/c/2266 Reviewed-by: Michele Di Giorgio Comments-Addressed: Arm Jenkins Tested-by: Arm Jenkins --- .../core/CL/kernels/CLDequantizationLayerKernel.h | 4 +-- arm_compute/core/NEON/NEAsymm.h | 17 +++++---- .../NEON/kernels/NEDequantizationLayerKernel.h | 4 +-- arm_compute/core/QuantizationInfo.h | 15 -------- arm_compute/core/Types.h | 39 ++++++++++---------- arm_compute/core/Utils.h | 5 --- .../runtime/CL/functions/CLDequantizationLayer.h | 4 +-- .../runtime/NEON/functions/NEDequantizationLayer.h | 4 +-- src/core/CL/CLHelpers.cpp | 5 --- src/core/CL/cl_kernels/dequantization_layer.cl | 35 +++++++----------- .../CL/kernels/CLDequantizationLayerKernel.cpp | 3 +- .../NEON/kernels/NEDequantizationLayerKernel.cpp | 41 ++++++++-------------- src/core/Utils.cpp | 4 --- tests/AssetsLibrary.h | 2 +- tests/Utils.h | 1 - tests/datasets/DatatypeDataset.h | 2 +- tests/validation/Helpers.cpp | 9 ----- tests/validation/Helpers.h | 9 ----- .../fixtures/DequantizationLayerFixture.h | 12 +++---- tests/validation/reference/DequantizationLayer.cpp | 6 ++-- utils/TypePrinter.h | 3 -- utils/Utils.h | 1 - 22 files changed, 72 insertions(+), 153 deletions(-) diff --git a/arm_compute/core/CL/kernels/CLDequantizationLayerKernel.h b/arm_compute/core/CL/kernels/CLDequantizationLayerKernel.h index 830d7518ce..739e2d45d2 100644 --- a/arm_compute/core/CL/kernels/CLDequantizationLayerKernel.h +++ b/arm_compute/core/CL/kernels/CLDequantizationLayerKernel.h @@ -48,13 +48,13 @@ public: ~CLDequantizationLayerKernel() = default; /** Set the input, output, min and max. * - * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_PER_CHANNEL/QSYMM8/QSYMM16. + * @param[in] input Source tensor. Data types supported: QASYMM8/QSYMM8_PER_CHANNEL/QSYMM8/QSYMM16. * @param[out] output Destination tensor. Data types supported: F16/F32. */ void configure(const ICLTensor *input, ICLTensor *output); /** Static function to check if given info will lead to a valid configuration of @ref CLDequantizationLayerKernel * - * @param[in] input Input tensor info. Data types supported: QASYMM8/QASYMM8_PER_CHANNEL/QSYMM8/QSYMM16. + * @param[in] input Input tensor info. Data types supported: QASYMM8/QSYMM8_PER_CHANNEL/QSYMM8/QSYMM16. * @param[in] output Output tensor info. Data types supported: F16/F32. * * @return a status diff --git a/arm_compute/core/NEON/NEAsymm.h b/arm_compute/core/NEON/NEAsymm.h index a3bd7e28f0..c75a58046b 100644 --- a/arm_compute/core/NEON/NEAsymm.h +++ b/arm_compute/core/NEON/NEAsymm.h @@ -325,23 +325,22 @@ inline float32x4x4_t vdequantize(const uint8x16_t &qv, float scale, int32_t offs return vdequantized_input; } -/** Dequantize following an asymmetric quantization scheme a neon vector holding 16 quantized values. +/** Dequantize following symmetric quantization scheme a neon vector holding 16 quantized values. * - * @param[in] qv Input values to be dequantized. - * @param[in] vscale Vector containing quantization scaling factors. - * @param[in] voffset Vector containing quantization offset. + * @param[in] qv Input values to be dequantized. + * @param[in] vscale Vector containing quantization scaling factors. * * @return Dequantized values in a neon vector */ -inline float32x4x4_t vdequantize(const uint8x16_t &qv, const float32x4x4_t vscale, const int32x4x4_t voffset) +inline float32x4x4_t vdequantize(const int8x16_t &qv, const float32x4x4_t vscale) { const float32x4x4_t vdequantized_input = { { - vmulq_f32(vcvtq_f32_s32(vsubq_s32(vreinterpretq_s32_u32(vmovl_u16(vget_low_u16(vmovl_u8(vget_low_u8(qv))))), voffset.val[0])), vscale.val[0]), - vmulq_f32(vcvtq_f32_s32(vsubq_s32(vreinterpretq_s32_u32(vmovl_u16(vget_high_u16(vmovl_u8(vget_low_u8(qv))))), voffset.val[1])), vscale.val[1]), - vmulq_f32(vcvtq_f32_s32(vsubq_s32(vreinterpretq_s32_u32(vmovl_u16(vget_low_u16(vmovl_u8(vget_high_u8(qv))))), voffset.val[2])), vscale.val[2]), - vmulq_f32(vcvtq_f32_s32(vsubq_s32(vreinterpretq_s32_u32(vmovl_u16(vget_high_u16(vmovl_u8(vget_high_u8(qv))))), voffset.val[3])), vscale.val[3]), + vmulq_f32(vcvtq_f32_s32(vmovl_s16(vget_low_s16(vmovl_s8(vget_low_s8(qv))))), vscale.val[0]), + vmulq_f32(vcvtq_f32_s32(vmovl_s16(vget_high_s16(vmovl_s8(vget_low_s8(qv))))), vscale.val[1]), + vmulq_f32(vcvtq_f32_s32(vmovl_s16(vget_low_s16(vmovl_s8(vget_high_s8(qv))))), vscale.val[2]), + vmulq_f32(vcvtq_f32_s32(vmovl_s16(vget_high_s16(vmovl_s8(vget_high_s8(qv))))), vscale.val[3]), } }; return vdequantized_input; diff --git a/arm_compute/core/NEON/kernels/NEDequantizationLayerKernel.h b/arm_compute/core/NEON/kernels/NEDequantizationLayerKernel.h index 3e7feda650..7e65384677 100644 --- a/arm_compute/core/NEON/kernels/NEDequantizationLayerKernel.h +++ b/arm_compute/core/NEON/kernels/NEDequantizationLayerKernel.h @@ -52,13 +52,13 @@ public: ~NEDequantizationLayerKernel() = default; /** Set input, output tensors. * - * @param[in] input Source tensor. Data type supported: QASYMM8/QASYMM8_PER_CHANNEL/QSYMM8/QSYMM16. + * @param[in] input Source tensor. Data type supported: QASYMM8/QSYMM8_PER_CHANNEL/QSYMM8/QSYMM16. * @param[out] output Destination tensor with the same dimensions of input. Data type supported: F16/F32. */ void configure(const ITensor *input, ITensor *output); /** Static function to check if given info will lead to a valid configuration of @ref NEDequantizationLayerKernel * - * @param[in] input Input tensor info. Data types supported: QASYMM8/QASYMM8_PER_CHANNEL/QSYMM8/QSYMM16. + * @param[in] input Input tensor info. Data types supported: QASYMM8/QSYMM8_PER_CHANNEL/QSYMM8/QSYMM16. * @param[in] output Output tensor info. Data types supported: F16/F32. * * @return a status diff --git a/arm_compute/core/QuantizationInfo.h b/arm_compute/core/QuantizationInfo.h index 949ee66b7c..ebd9b677da 100644 --- a/arm_compute/core/QuantizationInfo.h +++ b/arm_compute/core/QuantizationInfo.h @@ -265,21 +265,6 @@ inline int8_t quantize_qsymm8_per_channel(float value, const QuantizationInfo &q return quantized; } -/** Quantize a value given a 8-bit asymmetric per channel quantization scheme - * - * @param[in] value Value to quantize - * @param[in] qinfo Quantization information to use for quantizing - * @param[in] channel_id channel index into the scale vector of quantization info - * - * @return Quantized value - */ -inline int8_t quantize_qasymm8_per_channel(float value, const QuantizationInfo &qinfo, size_t channel_id = 0) -{ - int quantized = arm_compute::round(value / qinfo.scale()[channel_id], RoundingPolicy::TO_NEAREST_UP); - quantized = std::max(0, std::min(quantized, 255)); - return quantized; -} - /** Dequantize a value given a 8-bit asymmetric quantization scheme * * @param[in] value Value to dequantize diff --git a/arm_compute/core/Types.h b/arm_compute/core/Types.h index 9551cc6547..851292f1e1 100644 --- a/arm_compute/core/Types.h +++ b/arm_compute/core/Types.h @@ -73,26 +73,25 @@ enum class Format /** Available data types */ enum class DataType { - UNKNOWN, /**< Unknown data type */ - U8, /**< unsigned 8-bit number */ - S8, /**< signed 8-bit number */ - QSYMM8, /**< quantized, symmetric fixed-point 8-bit number */ - QASYMM8, /**< quantized, asymmetric fixed-point 8-bit number unsigned */ - QASYMM8_SIGNED, /**< quantized, asymmetric fixed-point 8-bit number signed */ - QSYMM8_PER_CHANNEL, /**< quantized, symmetric per channel fixed-point 8-bit number */ - QASYMM8_PER_CHANNEL, /**< quantized, asymmetric per channel fixed-point 8-bit number */ - U16, /**< unsigned 16-bit number */ - S16, /**< signed 16-bit number */ - QSYMM16, /**< quantized, symmetric fixed-point 16-bit number */ - QASYMM16, /**< quantized, asymmetric fixed-point 16-bit number */ - U32, /**< unsigned 32-bit number */ - S32, /**< signed 32-bit number */ - U64, /**< unsigned 64-bit number */ - S64, /**< signed 64-bit number */ - F16, /**< 16-bit floating-point number */ - F32, /**< 32-bit floating-point number */ - F64, /**< 64-bit floating-point number */ - SIZET /**< size_t */ + UNKNOWN, /**< Unknown data type */ + U8, /**< unsigned 8-bit number */ + S8, /**< signed 8-bit number */ + QSYMM8, /**< quantized, symmetric fixed-point 8-bit number */ + QASYMM8, /**< quantized, asymmetric fixed-point 8-bit number unsigned */ + QASYMM8_SIGNED, /**< quantized, asymmetric fixed-point 8-bit number signed */ + QSYMM8_PER_CHANNEL, /**< quantized, symmetric per channel fixed-point 8-bit number */ + U16, /**< unsigned 16-bit number */ + S16, /**< signed 16-bit number */ + QSYMM16, /**< quantized, symmetric fixed-point 16-bit number */ + QASYMM16, /**< quantized, asymmetric fixed-point 16-bit number */ + U32, /**< unsigned 32-bit number */ + S32, /**< signed 32-bit number */ + U64, /**< unsigned 64-bit number */ + S64, /**< signed 64-bit number */ + F16, /**< 16-bit floating-point number */ + F32, /**< 32-bit floating-point number */ + F64, /**< 64-bit floating-point number */ + SIZET /**< size_t */ }; /** Available Sampling Policies */ diff --git a/arm_compute/core/Utils.h b/arm_compute/core/Utils.h index a6e1ea1a89..366d5dcc68 100644 --- a/arm_compute/core/Utils.h +++ b/arm_compute/core/Utils.h @@ -116,7 +116,6 @@ inline size_t data_size_from_type(DataType data_type) case DataType::QASYMM8: case DataType::QASYMM8_SIGNED: case DataType::QSYMM8_PER_CHANNEL: - case DataType::QASYMM8_PER_CHANNEL: return 1; case DataType::U16: case DataType::S16: @@ -537,7 +536,6 @@ inline DataType get_promoted_data_type(DataType dt) case DataType::QASYMM8: case DataType::QASYMM8_SIGNED: case DataType::QSYMM8_PER_CHANNEL: - case DataType::QASYMM8_PER_CHANNEL: case DataType::QSYMM16: case DataType::QASYMM16: case DataType::F16: @@ -1029,7 +1027,6 @@ inline bool is_data_type_quantized(DataType dt) case DataType::QASYMM8: case DataType::QASYMM8_SIGNED: case DataType::QSYMM8_PER_CHANNEL: - case DataType::QASYMM8_PER_CHANNEL: case DataType::QSYMM16: case DataType::QASYMM16: return true; @@ -1050,7 +1047,6 @@ inline bool is_data_type_quantized_asymmetric(DataType dt) { case DataType::QASYMM8: case DataType::QASYMM8_SIGNED: - case DataType::QASYMM8_PER_CHANNEL: case DataType::QASYMM16: return true; default: @@ -1088,7 +1084,6 @@ inline bool is_data_type_quantized_per_channel(DataType dt) switch(dt) { case DataType::QSYMM8_PER_CHANNEL: - case DataType::QASYMM8_PER_CHANNEL: return true; default: return false; diff --git a/arm_compute/runtime/CL/functions/CLDequantizationLayer.h b/arm_compute/runtime/CL/functions/CLDequantizationLayer.h index c519311fb1..f3e507a267 100644 --- a/arm_compute/runtime/CL/functions/CLDequantizationLayer.h +++ b/arm_compute/runtime/CL/functions/CLDequantizationLayer.h @@ -40,13 +40,13 @@ public: /** Set the input and output tensors. * * @param[in] input Source tensor with at least 3 dimensions. The dimensions over the third will be interpreted as batches. - * Data types supported: QASYMM8/QASYMM8_PER_CHANNEL/QSYMM8/QSYMM16. + * Data types supported: QASYMM8/QSYMM8_PER_CHANNEL/QSYMM8/QSYMM16. * @param[out] output Destination tensor with the same dimensions of input. Data type supported: F16/F32. */ void configure(const ICLTensor *input, ICLTensor *output); /** Static function to check if given info will lead to a valid configuration of @ref CLDequantizationLayer * - * @param[in] input Input tensor info. Data types supported: QASYMM8/QASYMM8_PER_CHANNEL/QSYMM8/QSYMM16. + * @param[in] input Input tensor info. Data types supported: QASYMM8/QSYMM8_PER_CHANNEL/QSYMM8/QSYMM16. * @param[in] output Output tensor info. Data type supported: F16/F32. * * @return a status diff --git a/arm_compute/runtime/NEON/functions/NEDequantizationLayer.h b/arm_compute/runtime/NEON/functions/NEDequantizationLayer.h index 88c8777a68..4031ae01ed 100644 --- a/arm_compute/runtime/NEON/functions/NEDequantizationLayer.h +++ b/arm_compute/runtime/NEON/functions/NEDequantizationLayer.h @@ -39,13 +39,13 @@ class NEDequantizationLayer : public INESimpleFunctionNoBorder public: /** Configure the kernel. * - * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_PER_CHANNEL/QSYMM8/QSYMM16. + * @param[in] input Source tensor. Data types supported: QASYMM8/QSYMM8_PER_CHANNEL/QSYMM8/QSYMM16. * @param[out] output Destination tensor with the same dimensions of input. Data type supported: F16/F32. */ void configure(const ITensor *input, ITensor *output); /** Static function to check if given info will lead to a valid configuration of @ref NEDequantizationLayer * - * @param[in] input Input tensor info. Data types supported: QASYMM8/QASYMM8_PER_CHANNEL/QSYMM8/QSYMM16. + * @param[in] input Input tensor info. Data types supported: QASYMM8/QSYMM8_PER_CHANNEL/QSYMM8/QSYMM16. * @param[in] output Output tensor info. Data type supported: F16/F32. * * @return a status diff --git a/src/core/CL/CLHelpers.cpp b/src/core/CL/CLHelpers.cpp index 26660ce215..17274d38ad 100644 --- a/src/core/CL/CLHelpers.cpp +++ b/src/core/CL/CLHelpers.cpp @@ -40,7 +40,6 @@ std::string get_cl_type_from_data_type(const DataType &dt) { case DataType::U8: case DataType::QASYMM8: - case DataType::QASYMM8_PER_CHANNEL: return "uchar"; case DataType::S8: case DataType::QSYMM8: @@ -76,7 +75,6 @@ std::string get_cl_promoted_type_from_data_type(const DataType &dt) { case DataType::U8: case DataType::QASYMM8: - case DataType::QASYMM8_PER_CHANNEL: return "ushort"; case DataType::S8: case DataType::QSYMM8: @@ -124,7 +122,6 @@ std::string get_cl_select_type_from_data_type(const DataType &dt) { case DataType::U8: case DataType::QASYMM8: - case DataType::QASYMM8_PER_CHANNEL: return "uchar"; case DataType::S8: case DataType::QSYMM8: @@ -161,7 +158,6 @@ std::string get_data_size_from_data_type(const DataType &dt) case DataType::QSYMM8: case DataType::QASYMM8: case DataType::QSYMM8_PER_CHANNEL: - case DataType::QASYMM8_PER_CHANNEL: return "8"; case DataType::U16: case DataType::S16: @@ -306,7 +302,6 @@ size_t preferred_vector_width(const cl::Device &device, const DataType dt) case DataType::QASYMM8: case DataType::QSYMM8: case DataType::QSYMM8_PER_CHANNEL: - case DataType::QASYMM8_PER_CHANNEL: return device.getInfo(); case DataType::U16: case DataType::S16: diff --git a/src/core/CL/cl_kernels/dequantization_layer.cl b/src/core/CL/cl_kernels/dequantization_layer.cl index 5826847a5e..7550b4ba76 100644 --- a/src/core/CL/cl_kernels/dequantization_layer.cl +++ b/src/core/CL/cl_kernels/dequantization_layer.cl @@ -90,13 +90,13 @@ __kernel void dequantization_layer( #endif // defined(VEC_SIZE) && defined(DATA_TYPE_SRC) && defined(DATA_TYPE_DST) && defined(SCALE) && defined(OFFSET) #if defined(VEC_SIZE) && defined(DATA_TYPE_SRC) && defined(DATA_TYPE_DST) -/** This performs per channel dequantization of 8-bit unsigned integers to floating point. (NCHW) +/** This performs per channel dequantization of 8-bit signed integers to floating point. (NCHW) * * @note Source datatype should be given as a preprocessor argument using -DDATA_TYPE_SRC=type. e.g. -DDATA_TYPE_SRC=char * @note Destination datatype should be given as a preprocessor argument using -DDATA_TYPE_DST=type. e.g. -DDATA_TYPE_DST=float * @note Vector size should be given as a preprocessor argument using -DVEC_SIZE=size. e.g. -DVEC_SIZE=16 * - * @param[in] input_ptr Pointer to the source tensor. Supported data types: QASYMM8_PER_CHANNEL + * @param[in] input_ptr Pointer to the source tensor. Supported data types: QSYMM8_PER_CHANNEL * @param[in] input_stride_x Stride of the source tensor in X dimension (in bytes) * @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes) * @param[in] input_stride_y Stride of the source tensor in Y dimension (in bytes) @@ -113,13 +113,11 @@ __kernel void dequantization_layer( * @param[in] output_step_z output_stride_z * number of elements along Z processed per workitem(in bytes) * @param[in] output_offset_first_element_in_bytes The offset of the first element in the destination tensor * @param[in] scale Pointer to buffer with the per channel quantized scales - * @param[in] offset Pointer to buffer with the per channel quantized offsets */ __kernel void dequantization_layer_per_channel_nchw( TENSOR3D_DECLARATION(input), TENSOR3D_DECLARATION(output), - __global float *scale, - __global int *offset) + __global float *scale) { // Get pixels pointer Tensor3D input = CONVERT_TO_TENSOR3D_STRUCT(input); @@ -136,31 +134,28 @@ __kernel void dequantization_layer_per_channel_nchw( VEC_DATA_TYPE(int, VEC_SIZE) val = CONVERT(VLOAD(VEC_SIZE)(0, (__global DATA_TYPE_SRC *)input.ptr), VEC_DATA_TYPE(int, VEC_SIZE)); - // Create scale and offset vectors + // Create scale vectors const VEC_DATA_TYPE(float, VEC_SIZE) vscale = scale[get_global_id(2)]; - const VEC_DATA_TYPE(int, VEC_SIZE) - voffset = offset[get_global_id(2)]; - // Dequantize VEC_DATA_TYPE(float, VEC_SIZE) - res = vscale * CONVERT((val - voffset), VEC_DATA_TYPE(float, VEC_SIZE)); + res = vscale * CONVERT((val), VEC_DATA_TYPE(float, VEC_SIZE)); // Store result VSTORE(VEC_SIZE) (CONVERT(res, VEC_DATA_TYPE(DATA_TYPE_DST, VEC_SIZE)), 0, (__global DATA_TYPE_DST *)output.ptr); #else // !defined(LAST_ACCESSED_X) - *((__global DATA_TYPE_DST *)(output.ptr)) = (DATA_TYPE_DST)((float)((int)(*((__global DATA_TYPE_SRC *)(input.ptr))) - offset[get_global_id(2)]) * scale[get_global_id(2)]); + *((__global DATA_TYPE_DST *)(output.ptr)) = (DATA_TYPE_DST)((float)((int)(*((__global DATA_TYPE_SRC *)(input.ptr)))) * scale[get_global_id(2)]); #endif // defined(LAST_ACCESSED_X) } -/** This performs per channel dequantization of 8-bit unsigned integers to floating point. (NHWC) +/** This performs per channel dequantization of 8-bit signed integers to floating point. (NHWC) * * @note Source datatype should be given as a preprocessor argument using -DDATA_TYPE_SRC=type. e.g. -DDATA_TYPE_SRC=char * @note Destination datatype should be given as a preprocessor argument using -DDATA_TYPE_DST=type. e.g. -DDATA_TYPE_DST=float * @note Vector size should be given as a preprocessor argument using -DVEC_SIZE=size. e.g. -DVEC_SIZE=16 * - * @param[in] input_ptr Pointer to the source tensor. Supported data types: QASYMM8_PER_CHANNEL + * @param[in] input_ptr Pointer to the source tensor. Supported data types: QSYMM8_PER_CHANNEL * @param[in] input_stride_x Stride of the source tensor in X dimension (in bytes) * @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes) * @param[in] input_stride_y Stride of the source tensor in Y dimension (in bytes) @@ -177,13 +172,11 @@ __kernel void dequantization_layer_per_channel_nchw( * @param[in] output_step_z output_stride_z * number of elements along Z processed per workitem(in bytes) * @param[in] output_offset_first_element_in_bytes The offset of the first element in the destination tensor * @param[in] scale Pointer to buffer with the per channel quantized scales - * @param[in] offset Pointer to buffer with the per channel quantized offsets */ __kernel void dequantization_layer_per_channel_nhwc( TENSOR3D_DECLARATION(input), TENSOR3D_DECLARATION(output), - __global float *scale, - __global int *offset) + __global float *scale) { // Get pixels pointer Tensor3D input = CONVERT_TO_TENSOR3D_STRUCT(input); @@ -196,28 +189,24 @@ __kernel void dequantization_layer_per_channel_nhwc( input.ptr -= max(xi - (int)LAST_ACCESSED_X, 0) * input_stride_x; output.ptr -= max(xi - (int)LAST_ACCESSED_X, 0) * output_stride_x; scale -= max(xi - (int)LAST_ACCESSED_X, 0); - offset -= max(xi - (int)LAST_ACCESSED_X, 0); // Load data VEC_DATA_TYPE(int, VEC_SIZE) val = CONVERT(VLOAD(VEC_SIZE)(0, (__global DATA_TYPE_SRC *)input.ptr), VEC_DATA_TYPE(int, VEC_SIZE)); - // Create scale and offset vectors + // Create scale vectors const VEC_DATA_TYPE(float, VEC_SIZE) vscale = VLOAD(VEC_SIZE)(0, &scale[xi]); - const VEC_DATA_TYPE(int, VEC_SIZE) - voffset = VLOAD(VEC_SIZE)(0, &offset[xi]); - // Dequantize VEC_DATA_TYPE(float, VEC_SIZE) - res = vscale * CONVERT((val - voffset), VEC_DATA_TYPE(float, VEC_SIZE)); + res = vscale * CONVERT((val), VEC_DATA_TYPE(float, VEC_SIZE)); // Store result VSTORE(VEC_SIZE) (CONVERT(res, VEC_DATA_TYPE(DATA_TYPE_DST, VEC_SIZE)), 0, (__global DATA_TYPE_DST *)output.ptr); #else // !defined(LAST_ACCESSED_X) - *((__global DATA_TYPE_DST *)(output.ptr)) = (DATA_TYPE_DST)((float)((int)(*((__global DATA_TYPE_SRC *)(input.ptr))) - offset[get_global_id(0)]) * scale[get_global_id(0)]); + *((__global DATA_TYPE_DST *)(output.ptr)) = (DATA_TYPE_DST)((float)((int)(*((__global DATA_TYPE_SRC *)(input.ptr)))) * scale[get_global_id(0)]); #endif // defined(LAST_ACCESSED_X) } #endif // defined(VEC_SIZE) && defined(DATA_TYPE_SRC) && defined(DATA_TYPE_DST) diff --git a/src/core/CL/kernels/CLDequantizationLayerKernel.cpp b/src/core/CL/kernels/CLDequantizationLayerKernel.cpp index 3ec0b87636..60659faaaf 100644 --- a/src/core/CL/kernels/CLDequantizationLayerKernel.cpp +++ b/src/core/CL/kernels/CLDequantizationLayerKernel.cpp @@ -40,7 +40,7 @@ namespace Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output) { ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::QASYMM8_PER_CHANNEL, DataType::QSYMM8, DataType::QSYMM16); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::QSYMM8_PER_CHANNEL, DataType::QSYMM8, DataType::QSYMM16); if(output->tensor_shape().total_size() > 0) { @@ -144,7 +144,6 @@ void CLDequantizationLayerKernel::run(const Window &window, cl::CommandQueue &qu { unsigned int idx = num_arguments_per_3D_tensor() * 2; //Skip the input and output parameters _kernel.setArg(idx++, _input->quantization().scale->cl_buffer()); - _kernel.setArg(idx++, _input->quantization().offset->cl_buffer()); } do diff --git a/src/core/NEON/kernels/NEDequantizationLayerKernel.cpp b/src/core/NEON/kernels/NEDequantizationLayerKernel.cpp index 5abd6a122d..f555df3828 100644 --- a/src/core/NEON/kernels/NEDequantizationLayerKernel.cpp +++ b/src/core/NEON/kernels/NEDequantizationLayerKernel.cpp @@ -43,7 +43,7 @@ namespace Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output) { ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::QASYMM8_PER_CHANNEL, DataType::QSYMM8, DataType::QSYMM16); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::QSYMM8_PER_CHANNEL, DataType::QSYMM8, DataType::QSYMM16); if(output->tensor_shape().total_size() > 0) { @@ -160,10 +160,9 @@ void run_dequantization_qasymm8(const ITensor *input, ITensor *output, const Win } template -void run_dequantization_qasymm8_per_channel_nchw(const ITensor *input, ITensor *output, const Window &window) +void run_dequantization_qsymm8_per_channel_nchw(const ITensor *input, ITensor *output, const Window &window) { - const std::vector scale = input->info()->quantization_info().scale(); - const std::vector offset = input->info()->quantization_info().offset(); + const auto scale = input->info()->quantization_info().scale(); const int window_step_x = 16; const auto window_start_x = static_cast(window.x().start()); @@ -179,14 +178,14 @@ void run_dequantization_qasymm8_per_channel_nchw(const ITensor *input, ITensor * execute_window_loop(win, [&](const Coordinates & id) { - const auto in_ptr = reinterpret_cast(in.ptr()); + const auto in_ptr = reinterpret_cast(in.ptr()); const auto out_ptr = reinterpret_cast(out.ptr()); int x = window_start_x; for(; x <= (window_end_x - window_step_x); x += window_step_x) { const auto vin = wrapper::vloadq(in_ptr + x); - const auto vdeq = vdequantize(vin, scale[id.z()], offset[id.z()]); + const auto vdeq = vdequantize(vin, scale[id.z()]); store_result(reinterpret_cast(out_ptr + x), vdeq); } @@ -194,18 +193,17 @@ void run_dequantization_qasymm8_per_channel_nchw(const ITensor *input, ITensor * // Compute left-over elements for(; x < window_end_x; ++x) { - uint8_t val = *(in_ptr + x); - *(out_ptr + x) = static_cast(dequantize(val, scale[id.z()], offset[id.z()])); + int8_t val = *(in_ptr + x); + *(out_ptr + x) = static_cast(dequantize(val, scale[id.z()])); } }, in, out); } template -void run_dequantization_qasymm8_per_channel_nhwc(const ITensor *input, ITensor *output, const Window &window) +void run_dequantization_qsymm8_per_channel_nhwc(const ITensor *input, ITensor *output, const Window &window) { - const std::vector scale = input->info()->quantization_info().scale(); - const std::vector offset = input->info()->quantization_info().offset(); + const auto scale = input->info()->quantization_info().scale(); const int window_step_x = 16; const auto window_start_x = static_cast(window.x().start()); @@ -221,7 +219,7 @@ void run_dequantization_qasymm8_per_channel_nhwc(const ITensor *input, ITensor * execute_window_loop(win, [&](const Coordinates &) { - const auto in_ptr = reinterpret_cast(in.ptr()); + const auto in_ptr = reinterpret_cast(in.ptr()); const auto out_ptr = reinterpret_cast(out.ptr()); int x = window_start_x; @@ -236,17 +234,8 @@ void run_dequantization_qasymm8_per_channel_nhwc(const ITensor *input, ITensor * scale[x + 12], scale[x + 13], scale[x + 14], scale[x + 15] } }; - const int32x4x4_t voffset = - { - { - offset[x + 0], offset[x + 1], offset[x + 2], offset[x + 3], - offset[x + 4], offset[x + 5], offset[x + 6], offset[x + 7], - offset[x + 8], offset[x + 9], offset[x + 10], offset[x + 11], - offset[x + 12], offset[x + 13], offset[x + 14], offset[x + 15] - } - }; const auto vin = wrapper::vloadq(in_ptr + x); - const auto vdeq = vdequantize(vin, vscale, voffset); + const auto vdeq = vdequantize(vin, vscale); store_result(reinterpret_cast(out_ptr + x), vdeq); } @@ -254,8 +243,8 @@ void run_dequantization_qasymm8_per_channel_nhwc(const ITensor *input, ITensor * // Compute left-over elements for(; x < window_end_x; ++x) { - uint8_t val = *(in_ptr + x); - *(out_ptr + x) = static_cast(dequantize(val, scale[x], offset[x])); + int8_t val = *(in_ptr + x); + *(out_ptr + x) = static_cast(dequantize(val, scale[x])); } }, in, out); @@ -353,8 +342,8 @@ void run_dequantization_core(const ITensor *input, ITensor *output, const Window case DataType::QASYMM8: run_dequantization_qasymm8(input, output, window); break; - case DataType::QASYMM8_PER_CHANNEL: - input->info()->data_layout() == DataLayout::NHWC ? run_dequantization_qasymm8_per_channel_nhwc(input, output, window) : run_dequantization_qasymm8_per_channel_nchw(input, output, window); + case DataType::QSYMM8_PER_CHANNEL: + input->info()->data_layout() == DataLayout::NHWC ? run_dequantization_qsymm8_per_channel_nhwc(input, output, window) : run_dequantization_qsymm8_per_channel_nchw(input, output, window); break; case DataType::QSYMM8: run_dequantization_qsymm8(input, output, window); diff --git a/src/core/Utils.cpp b/src/core/Utils.cpp index 6d276d1322..9f1255dcaf 100644 --- a/src/core/Utils.cpp +++ b/src/core/Utils.cpp @@ -160,7 +160,6 @@ const std::string &arm_compute::string_from_data_type(DataType dt) { DataType::SIZET, "SIZET" }, { DataType::QSYMM8, "QSYMM8" }, { DataType::QSYMM8_PER_CHANNEL, "QSYMM8_PER_CHANNEL" }, - { DataType::QASYMM8_PER_CHANNEL, "QASYMM8_PER_CHANNEL" }, { DataType::QASYMM8, "QASYMM8" }, { DataType::QASYMM8_SIGNED, "QASYMM8_SIGNED" }, { DataType::QSYMM16, "QSYMM16" }, @@ -287,7 +286,6 @@ std::string arm_compute::string_from_pixel_value(const PixelValue &value, const { case DataType::U8: case DataType::QASYMM8: - case DataType::QASYMM8_PER_CHANNEL: // Needs conversion to 32 bit, otherwise interpreted as ASCII values ss << uint32_t(value.get()); converted_string = ss.str(); @@ -446,7 +444,6 @@ void arm_compute::print_consecutive_elements(std::ostream &s, DataType dt, const { case DataType::U8: case DataType::QASYMM8: - case DataType::QASYMM8_PER_CHANNEL: print_consecutive_elements_impl(s, ptr, n, stream_width, element_delim); break; case DataType::S8: @@ -485,7 +482,6 @@ int arm_compute::max_consecutive_elements_display_width(std::ostream &s, DataTyp { case DataType::U8: case DataType::QASYMM8: - case DataType::QASYMM8_PER_CHANNEL: return max_consecutive_elements_display_width_impl(s, ptr, n); case DataType::S8: case DataType::QASYMM8_SIGNED: diff --git a/tests/AssetsLibrary.h b/tests/AssetsLibrary.h index 280f6ddbd0..f8635ea576 100644 --- a/tests/AssetsLibrary.h +++ b/tests/AssetsLibrary.h @@ -632,7 +632,6 @@ void AssetsLibrary::fill_tensor_uniform(T &&tensor, std::random_device::result_t { case DataType::U8: case DataType::QASYMM8: - case DataType::QASYMM8_PER_CHANNEL: { std::uniform_int_distribution distribution_u8(std::numeric_limits::lowest(), std::numeric_limits::max()); fill(tensor, distribution_u8, seed_offset); @@ -640,6 +639,7 @@ void AssetsLibrary::fill_tensor_uniform(T &&tensor, std::random_device::result_t } case DataType::S8: case DataType::QSYMM8: + case DataType::QSYMM8_PER_CHANNEL: case DataType::QASYMM8_SIGNED: { std::uniform_int_distribution distribution_s8(std::numeric_limits::lowest(), std::numeric_limits::max()); diff --git a/tests/Utils.h b/tests/Utils.h index 6b3935e526..aff63d3119 100644 --- a/tests/Utils.h +++ b/tests/Utils.h @@ -355,7 +355,6 @@ void store_value_with_data_type(void *ptr, T value, DataType data_type) { case DataType::U8: case DataType::QASYMM8: - case DataType::QASYMM8_PER_CHANNEL: *reinterpret_cast(ptr) = value; break; case DataType::S8: diff --git a/tests/datasets/DatatypeDataset.h b/tests/datasets/DatatypeDataset.h index 9bdb346340..df0ddb3ce5 100644 --- a/tests/datasets/DatatypeDataset.h +++ b/tests/datasets/DatatypeDataset.h @@ -54,7 +54,7 @@ public: QuantizedPerChannelTypes() : ContainerDataset("QuantizedPerChannelTypes", { - DataType::QASYMM8_PER_CHANNEL + DataType::QSYMM8_PER_CHANNEL }) { } diff --git a/tests/validation/Helpers.cpp b/tests/validation/Helpers.cpp index 95a5548628..e362e05b81 100644 --- a/tests/validation/Helpers.cpp +++ b/tests/validation/Helpers.cpp @@ -335,15 +335,6 @@ std::pair get_symm_quantized_per_channel_bounds(const QuantizationInfo return std::pair { min_bound, max_bound }; } -std::pair get_asymm_quantized_per_channel_bounds(const QuantizationInfo &quant_info, float min, float max, size_t channel_id) -{ - ARM_COMPUTE_ERROR_ON_MSG(min > max, "min must be lower equal than max"); - - const int min_bound = quantize_qasymm8_per_channel(min, quant_info, channel_id); - const int max_bound = quantize_qasymm8_per_channel(max, quant_info, channel_id); - return std::pair { min_bound, max_bound }; -} - template void get_tile(const SimpleTensor &in, SimpleTensor &roi, const Coordinates &coord); template void get_tile(const SimpleTensor &in, SimpleTensor &roi, const Coordinates &coord); template void get_tile(const SimpleTensor &in, SimpleTensor &roi, const Coordinates &coord); diff --git a/tests/validation/Helpers.h b/tests/validation/Helpers.h index 2c1df39f14..a0169752de 100644 --- a/tests/validation/Helpers.h +++ b/tests/validation/Helpers.h @@ -285,15 +285,6 @@ std::pair get_quantized_bounds(const QuantizationInfo &quant_info, flo * @param[in] channel_id Channel id for per channel quantization info. */ std::pair get_symm_quantized_per_channel_bounds(const QuantizationInfo &quant_info, float min, float max, size_t channel_id = 0); - -/** Helper function to compute asymmetric quantized min and max bounds - * - * @param[in] quant_info Quantization info to be used for conversion - * @param[in] min Floating point minimum value to be quantized - * @param[in] max Floating point maximum value to be quantized - * @param[in] channel_id Channel id for per channel quantization info. - */ -std::pair get_asymm_quantized_per_channel_bounds(const QuantizationInfo &quant_info, float min, float max, size_t channel_id = 0); } // namespace validation } // namespace test } // namespace arm_compute diff --git a/tests/validation/fixtures/DequantizationLayerFixture.h b/tests/validation/fixtures/DequantizationLayerFixture.h index c7a818fcc7..f44f8658c2 100644 --- a/tests/validation/fixtures/DequantizationLayerFixture.h +++ b/tests/validation/fixtures/DequantizationLayerFixture.h @@ -101,12 +101,12 @@ protected: switch(src_data_type) { case DataType::QASYMM8: - case DataType::QASYMM8_PER_CHANNEL: { SimpleTensor src{ shape, src_data_type, 1, _quantization_info }; fill(src); return reference::dequantization_layer(src); } + case DataType::QSYMM8_PER_CHANNEL: case DataType::QSYMM8: { SimpleTensor src{ shape, src_data_type, 1, _quantization_info }; @@ -138,16 +138,14 @@ protected: return QuantizationInfo(1.f / distribution_scale_q16(gen)); case DataType::QSYMM8: return QuantizationInfo(1.f / distribution_scale_q8(gen)); - case DataType::QASYMM8_PER_CHANNEL: + case DataType::QSYMM8_PER_CHANNEL: { - std::vector scale(num_channels); - std::vector offset(num_channels); + std::vector scale(num_channels); for(int32_t i = 0; i < num_channels; ++i) { - scale[i] = 1.f / distribution_scale_q8(gen); - offset[i] = distribution_offset_q8(gen); + scale[i] = 1.f / distribution_offset_q8(gen); } - return QuantizationInfo(scale, offset); + return QuantizationInfo(scale); } case DataType::QASYMM8: return QuantizationInfo(1.f / distribution_scale_q8(gen), distribution_offset_q8(gen)); diff --git a/tests/validation/reference/DequantizationLayer.cpp b/tests/validation/reference/DequantizationLayer.cpp index 69a49a3d6d..16f25c4427 100644 --- a/tests/validation/reference/DequantizationLayer.cpp +++ b/tests/validation/reference/DequantizationLayer.cpp @@ -65,16 +65,14 @@ SimpleTensor dequantization_layer(const SimpleTensor &src) const int C = src.shape().z(); const int N = src.shape().total_size() / (WH * C); - const std::vector qscales = src.quantization_info().scale(); - const std::vector qoffsets = src.quantization_info().offset(); - const bool has_offsets = src_data_type == DataType::QASYMM8_PER_CHANNEL; + const std::vector qscales = src.quantization_info().scale(); for(int n = 0; n < N; ++n) { for(int c = 0; c < C; ++c) { const size_t idx = n * C * WH + c * WH; - const UniformQuantizationInfo channel_qinfo = { qscales[c], has_offsets ? qoffsets[c] : 0 }; + const UniformQuantizationInfo channel_qinfo = { qscales[c], 0 }; // Dequantize slice for(int s = 0; s < WH; ++s) diff --git a/utils/TypePrinter.h b/utils/TypePrinter.h index 41bfd1d6c8..ede2ea4b63 100644 --- a/utils/TypePrinter.h +++ b/utils/TypePrinter.h @@ -631,9 +631,6 @@ inline ::std::ostream &operator<<(::std::ostream &os, const DataType &data_type) case DataType::QSYMM8_PER_CHANNEL: os << "QSYMM8_PER_CHANNEL"; break; - case DataType::QASYMM8_PER_CHANNEL: - os << "QASYMM8_PER_CHANNEL"; - break; case DataType::S8: os << "S8"; break; diff --git a/utils/Utils.h b/utils/Utils.h index 1d59ee5679..752271cc79 100644 --- a/utils/Utils.h +++ b/utils/Utils.h @@ -171,7 +171,6 @@ inline std::string get_typestring(DataType data_type) { case DataType::U8: case DataType::QASYMM8: - case DataType::QASYMM8_PER_CHANNEL: return no_endianness + "u" + support::cpp11::to_string(sizeof(uint8_t)); case DataType::S8: case DataType::QSYMM8: -- cgit v1.2.1