aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorGeorgios Pinitas <georgios.pinitas@arm.com>2019-11-11 18:24:22 +0000
committerGeorgios Pinitas <georgios.pinitas@arm.com>2019-11-12 21:22:47 +0000
commit8217c8e4f488eb32733c481ab3a4d905069479f1 (patch)
treef54e10c459d5bc5b847ef0caba075a636d9f1df5 /src
parent94e0cf960ea6116eb57fa88d9b951f859b52c602 (diff)
downloadComputeLibrary-8217c8e4f488eb32733c481ab3a4d905069479f1.tar.gz
COMPMID-2895: Remove QASYMM8_PER_CHANNEL data type
Change-Id: I2d1b77370f8eceeaeae95306b4db5d90ababb76f Signed-off-by: Georgios Pinitas <georgios.pinitas@arm.com> Reviewed-on: https://review.mlplatform.org/c/2266 Reviewed-by: Michele Di Giorgio <michele.digiorgio@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src')
-rw-r--r--src/core/CL/CLHelpers.cpp5
-rw-r--r--src/core/CL/cl_kernels/dequantization_layer.cl35
-rw-r--r--src/core/CL/kernels/CLDequantizationLayerKernel.cpp3
-rw-r--r--src/core/NEON/kernels/NEDequantizationLayerKernel.cpp41
-rw-r--r--src/core/Utils.cpp4
5 files changed, 28 insertions, 60 deletions
diff --git a/src/core/CL/CLHelpers.cpp b/src/core/CL/CLHelpers.cpp
index 26660ce215..17274d38ad 100644
--- a/src/core/CL/CLHelpers.cpp
+++ b/src/core/CL/CLHelpers.cpp
@@ -40,7 +40,6 @@ std::string get_cl_type_from_data_type(const DataType &dt)
{
case DataType::U8:
case DataType::QASYMM8:
- case DataType::QASYMM8_PER_CHANNEL:
return "uchar";
case DataType::S8:
case DataType::QSYMM8:
@@ -76,7 +75,6 @@ std::string get_cl_promoted_type_from_data_type(const DataType &dt)
{
case DataType::U8:
case DataType::QASYMM8:
- case DataType::QASYMM8_PER_CHANNEL:
return "ushort";
case DataType::S8:
case DataType::QSYMM8:
@@ -124,7 +122,6 @@ std::string get_cl_select_type_from_data_type(const DataType &dt)
{
case DataType::U8:
case DataType::QASYMM8:
- case DataType::QASYMM8_PER_CHANNEL:
return "uchar";
case DataType::S8:
case DataType::QSYMM8:
@@ -161,7 +158,6 @@ std::string get_data_size_from_data_type(const DataType &dt)
case DataType::QSYMM8:
case DataType::QASYMM8:
case DataType::QSYMM8_PER_CHANNEL:
- case DataType::QASYMM8_PER_CHANNEL:
return "8";
case DataType::U16:
case DataType::S16:
@@ -306,7 +302,6 @@ size_t preferred_vector_width(const cl::Device &device, const DataType dt)
case DataType::QASYMM8:
case DataType::QSYMM8:
case DataType::QSYMM8_PER_CHANNEL:
- case DataType::QASYMM8_PER_CHANNEL:
return device.getInfo<CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR>();
case DataType::U16:
case DataType::S16:
diff --git a/src/core/CL/cl_kernels/dequantization_layer.cl b/src/core/CL/cl_kernels/dequantization_layer.cl
index 5826847a5e..7550b4ba76 100644
--- a/src/core/CL/cl_kernels/dequantization_layer.cl
+++ b/src/core/CL/cl_kernels/dequantization_layer.cl
@@ -90,13 +90,13 @@ __kernel void dequantization_layer(
#endif // defined(VEC_SIZE) && defined(DATA_TYPE_SRC) && defined(DATA_TYPE_DST) && defined(SCALE) && defined(OFFSET)
#if defined(VEC_SIZE) && defined(DATA_TYPE_SRC) && defined(DATA_TYPE_DST)
-/** This performs per channel dequantization of 8-bit unsigned integers to floating point. (NCHW)
+/** This performs per channel dequantization of 8-bit signed integers to floating point. (NCHW)
*
* @note Source datatype should be given as a preprocessor argument using -DDATA_TYPE_SRC=type. e.g. -DDATA_TYPE_SRC=char
* @note Destination datatype should be given as a preprocessor argument using -DDATA_TYPE_DST=type. e.g. -DDATA_TYPE_DST=float
* @note Vector size should be given as a preprocessor argument using -DVEC_SIZE=size. e.g. -DVEC_SIZE=16
*
- * @param[in] input_ptr Pointer to the source tensor. Supported data types: QASYMM8_PER_CHANNEL
+ * @param[in] input_ptr Pointer to the source tensor. Supported data types: QSYMM8_PER_CHANNEL
* @param[in] input_stride_x Stride of the source tensor in X dimension (in bytes)
* @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes)
* @param[in] input_stride_y Stride of the source tensor in Y dimension (in bytes)
@@ -113,13 +113,11 @@ __kernel void dequantization_layer(
* @param[in] output_step_z output_stride_z * number of elements along Z processed per workitem(in bytes)
* @param[in] output_offset_first_element_in_bytes The offset of the first element in the destination tensor
* @param[in] scale Pointer to buffer with the per channel quantized scales
- * @param[in] offset Pointer to buffer with the per channel quantized offsets
*/
__kernel void dequantization_layer_per_channel_nchw(
TENSOR3D_DECLARATION(input),
TENSOR3D_DECLARATION(output),
- __global float *scale,
- __global int *offset)
+ __global float *scale)
{
// Get pixels pointer
Tensor3D input = CONVERT_TO_TENSOR3D_STRUCT(input);
@@ -136,31 +134,28 @@ __kernel void dequantization_layer_per_channel_nchw(
VEC_DATA_TYPE(int, VEC_SIZE)
val = CONVERT(VLOAD(VEC_SIZE)(0, (__global DATA_TYPE_SRC *)input.ptr), VEC_DATA_TYPE(int, VEC_SIZE));
- // Create scale and offset vectors
+ // Create scale vectors
const VEC_DATA_TYPE(float, VEC_SIZE)
vscale = scale[get_global_id(2)];
- const VEC_DATA_TYPE(int, VEC_SIZE)
- voffset = offset[get_global_id(2)];
-
// Dequantize
VEC_DATA_TYPE(float, VEC_SIZE)
- res = vscale * CONVERT((val - voffset), VEC_DATA_TYPE(float, VEC_SIZE));
+ res = vscale * CONVERT((val), VEC_DATA_TYPE(float, VEC_SIZE));
// Store result
VSTORE(VEC_SIZE)
(CONVERT(res, VEC_DATA_TYPE(DATA_TYPE_DST, VEC_SIZE)), 0, (__global DATA_TYPE_DST *)output.ptr);
#else // !defined(LAST_ACCESSED_X)
- *((__global DATA_TYPE_DST *)(output.ptr)) = (DATA_TYPE_DST)((float)((int)(*((__global DATA_TYPE_SRC *)(input.ptr))) - offset[get_global_id(2)]) * scale[get_global_id(2)]);
+ *((__global DATA_TYPE_DST *)(output.ptr)) = (DATA_TYPE_DST)((float)((int)(*((__global DATA_TYPE_SRC *)(input.ptr)))) * scale[get_global_id(2)]);
#endif // defined(LAST_ACCESSED_X)
}
-/** This performs per channel dequantization of 8-bit unsigned integers to floating point. (NHWC)
+/** This performs per channel dequantization of 8-bit signed integers to floating point. (NHWC)
*
* @note Source datatype should be given as a preprocessor argument using -DDATA_TYPE_SRC=type. e.g. -DDATA_TYPE_SRC=char
* @note Destination datatype should be given as a preprocessor argument using -DDATA_TYPE_DST=type. e.g. -DDATA_TYPE_DST=float
* @note Vector size should be given as a preprocessor argument using -DVEC_SIZE=size. e.g. -DVEC_SIZE=16
*
- * @param[in] input_ptr Pointer to the source tensor. Supported data types: QASYMM8_PER_CHANNEL
+ * @param[in] input_ptr Pointer to the source tensor. Supported data types: QSYMM8_PER_CHANNEL
* @param[in] input_stride_x Stride of the source tensor in X dimension (in bytes)
* @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes)
* @param[in] input_stride_y Stride of the source tensor in Y dimension (in bytes)
@@ -177,13 +172,11 @@ __kernel void dequantization_layer_per_channel_nchw(
* @param[in] output_step_z output_stride_z * number of elements along Z processed per workitem(in bytes)
* @param[in] output_offset_first_element_in_bytes The offset of the first element in the destination tensor
* @param[in] scale Pointer to buffer with the per channel quantized scales
- * @param[in] offset Pointer to buffer with the per channel quantized offsets
*/
__kernel void dequantization_layer_per_channel_nhwc(
TENSOR3D_DECLARATION(input),
TENSOR3D_DECLARATION(output),
- __global float *scale,
- __global int *offset)
+ __global float *scale)
{
// Get pixels pointer
Tensor3D input = CONVERT_TO_TENSOR3D_STRUCT(input);
@@ -196,28 +189,24 @@ __kernel void dequantization_layer_per_channel_nhwc(
input.ptr -= max(xi - (int)LAST_ACCESSED_X, 0) * input_stride_x;
output.ptr -= max(xi - (int)LAST_ACCESSED_X, 0) * output_stride_x;
scale -= max(xi - (int)LAST_ACCESSED_X, 0);
- offset -= max(xi - (int)LAST_ACCESSED_X, 0);
// Load data
VEC_DATA_TYPE(int, VEC_SIZE)
val = CONVERT(VLOAD(VEC_SIZE)(0, (__global DATA_TYPE_SRC *)input.ptr), VEC_DATA_TYPE(int, VEC_SIZE));
- // Create scale and offset vectors
+ // Create scale vectors
const VEC_DATA_TYPE(float, VEC_SIZE)
vscale = VLOAD(VEC_SIZE)(0, &scale[xi]);
- const VEC_DATA_TYPE(int, VEC_SIZE)
- voffset = VLOAD(VEC_SIZE)(0, &offset[xi]);
-
// Dequantize
VEC_DATA_TYPE(float, VEC_SIZE)
- res = vscale * CONVERT((val - voffset), VEC_DATA_TYPE(float, VEC_SIZE));
+ res = vscale * CONVERT((val), VEC_DATA_TYPE(float, VEC_SIZE));
// Store result
VSTORE(VEC_SIZE)
(CONVERT(res, VEC_DATA_TYPE(DATA_TYPE_DST, VEC_SIZE)), 0, (__global DATA_TYPE_DST *)output.ptr);
#else // !defined(LAST_ACCESSED_X)
- *((__global DATA_TYPE_DST *)(output.ptr)) = (DATA_TYPE_DST)((float)((int)(*((__global DATA_TYPE_SRC *)(input.ptr))) - offset[get_global_id(0)]) * scale[get_global_id(0)]);
+ *((__global DATA_TYPE_DST *)(output.ptr)) = (DATA_TYPE_DST)((float)((int)(*((__global DATA_TYPE_SRC *)(input.ptr)))) * scale[get_global_id(0)]);
#endif // defined(LAST_ACCESSED_X)
}
#endif // defined(VEC_SIZE) && defined(DATA_TYPE_SRC) && defined(DATA_TYPE_DST)
diff --git a/src/core/CL/kernels/CLDequantizationLayerKernel.cpp b/src/core/CL/kernels/CLDequantizationLayerKernel.cpp
index 3ec0b87636..60659faaaf 100644
--- a/src/core/CL/kernels/CLDequantizationLayerKernel.cpp
+++ b/src/core/CL/kernels/CLDequantizationLayerKernel.cpp
@@ -40,7 +40,7 @@ namespace
Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output)
{
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output);
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::QASYMM8_PER_CHANNEL, DataType::QSYMM8, DataType::QSYMM16);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::QSYMM8_PER_CHANNEL, DataType::QSYMM8, DataType::QSYMM16);
if(output->tensor_shape().total_size() > 0)
{
@@ -144,7 +144,6 @@ void CLDequantizationLayerKernel::run(const Window &window, cl::CommandQueue &qu
{
unsigned int idx = num_arguments_per_3D_tensor() * 2; //Skip the input and output parameters
_kernel.setArg(idx++, _input->quantization().scale->cl_buffer());
- _kernel.setArg(idx++, _input->quantization().offset->cl_buffer());
}
do
diff --git a/src/core/NEON/kernels/NEDequantizationLayerKernel.cpp b/src/core/NEON/kernels/NEDequantizationLayerKernel.cpp
index 5abd6a122d..f555df3828 100644
--- a/src/core/NEON/kernels/NEDequantizationLayerKernel.cpp
+++ b/src/core/NEON/kernels/NEDequantizationLayerKernel.cpp
@@ -43,7 +43,7 @@ namespace
Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output)
{
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output);
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::QASYMM8_PER_CHANNEL, DataType::QSYMM8, DataType::QSYMM16);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::QSYMM8_PER_CHANNEL, DataType::QSYMM8, DataType::QSYMM16);
if(output->tensor_shape().total_size() > 0)
{
@@ -160,10 +160,9 @@ void run_dequantization_qasymm8(const ITensor *input, ITensor *output, const Win
}
template <typename T>
-void run_dequantization_qasymm8_per_channel_nchw(const ITensor *input, ITensor *output, const Window &window)
+void run_dequantization_qsymm8_per_channel_nchw(const ITensor *input, ITensor *output, const Window &window)
{
- const std::vector<float> scale = input->info()->quantization_info().scale();
- const std::vector<int32_t> offset = input->info()->quantization_info().offset();
+ const auto scale = input->info()->quantization_info().scale();
const int window_step_x = 16;
const auto window_start_x = static_cast<int>(window.x().start());
@@ -179,14 +178,14 @@ void run_dequantization_qasymm8_per_channel_nchw(const ITensor *input, ITensor *
execute_window_loop(win, [&](const Coordinates & id)
{
- const auto in_ptr = reinterpret_cast<const uint8_t *>(in.ptr());
+ const auto in_ptr = reinterpret_cast<const int8_t *>(in.ptr());
const auto out_ptr = reinterpret_cast<T *>(out.ptr());
int x = window_start_x;
for(; x <= (window_end_x - window_step_x); x += window_step_x)
{
const auto vin = wrapper::vloadq(in_ptr + x);
- const auto vdeq = vdequantize(vin, scale[id.z()], offset[id.z()]);
+ const auto vdeq = vdequantize(vin, scale[id.z()]);
store_result<T>(reinterpret_cast<T *>(out_ptr + x), vdeq);
}
@@ -194,18 +193,17 @@ void run_dequantization_qasymm8_per_channel_nchw(const ITensor *input, ITensor *
// Compute left-over elements
for(; x < window_end_x; ++x)
{
- uint8_t val = *(in_ptr + x);
- *(out_ptr + x) = static_cast<T>(dequantize(val, scale[id.z()], offset[id.z()]));
+ int8_t val = *(in_ptr + x);
+ *(out_ptr + x) = static_cast<T>(dequantize(val, scale[id.z()]));
}
},
in, out);
}
template <typename T>
-void run_dequantization_qasymm8_per_channel_nhwc(const ITensor *input, ITensor *output, const Window &window)
+void run_dequantization_qsymm8_per_channel_nhwc(const ITensor *input, ITensor *output, const Window &window)
{
- const std::vector<float> scale = input->info()->quantization_info().scale();
- const std::vector<int32_t> offset = input->info()->quantization_info().offset();
+ const auto scale = input->info()->quantization_info().scale();
const int window_step_x = 16;
const auto window_start_x = static_cast<int>(window.x().start());
@@ -221,7 +219,7 @@ void run_dequantization_qasymm8_per_channel_nhwc(const ITensor *input, ITensor *
execute_window_loop(win, [&](const Coordinates &)
{
- const auto in_ptr = reinterpret_cast<const uint8_t *>(in.ptr());
+ const auto in_ptr = reinterpret_cast<const int8_t *>(in.ptr());
const auto out_ptr = reinterpret_cast<T *>(out.ptr());
int x = window_start_x;
@@ -236,17 +234,8 @@ void run_dequantization_qasymm8_per_channel_nhwc(const ITensor *input, ITensor *
scale[x + 12], scale[x + 13], scale[x + 14], scale[x + 15]
}
};
- const int32x4x4_t voffset =
- {
- {
- offset[x + 0], offset[x + 1], offset[x + 2], offset[x + 3],
- offset[x + 4], offset[x + 5], offset[x + 6], offset[x + 7],
- offset[x + 8], offset[x + 9], offset[x + 10], offset[x + 11],
- offset[x + 12], offset[x + 13], offset[x + 14], offset[x + 15]
- }
- };
const auto vin = wrapper::vloadq(in_ptr + x);
- const auto vdeq = vdequantize(vin, vscale, voffset);
+ const auto vdeq = vdequantize(vin, vscale);
store_result<T>(reinterpret_cast<T *>(out_ptr + x), vdeq);
}
@@ -254,8 +243,8 @@ void run_dequantization_qasymm8_per_channel_nhwc(const ITensor *input, ITensor *
// Compute left-over elements
for(; x < window_end_x; ++x)
{
- uint8_t val = *(in_ptr + x);
- *(out_ptr + x) = static_cast<T>(dequantize(val, scale[x], offset[x]));
+ int8_t val = *(in_ptr + x);
+ *(out_ptr + x) = static_cast<T>(dequantize(val, scale[x]));
}
},
in, out);
@@ -353,8 +342,8 @@ void run_dequantization_core(const ITensor *input, ITensor *output, const Window
case DataType::QASYMM8:
run_dequantization_qasymm8<T>(input, output, window);
break;
- case DataType::QASYMM8_PER_CHANNEL:
- input->info()->data_layout() == DataLayout::NHWC ? run_dequantization_qasymm8_per_channel_nhwc<T>(input, output, window) : run_dequantization_qasymm8_per_channel_nchw<T>(input, output, window);
+ case DataType::QSYMM8_PER_CHANNEL:
+ input->info()->data_layout() == DataLayout::NHWC ? run_dequantization_qsymm8_per_channel_nhwc<T>(input, output, window) : run_dequantization_qsymm8_per_channel_nchw<T>(input, output, window);
break;
case DataType::QSYMM8:
run_dequantization_qsymm8<T>(input, output, window);
diff --git a/src/core/Utils.cpp b/src/core/Utils.cpp
index 6d276d1322..9f1255dcaf 100644
--- a/src/core/Utils.cpp
+++ b/src/core/Utils.cpp
@@ -160,7 +160,6 @@ const std::string &arm_compute::string_from_data_type(DataType dt)
{ DataType::SIZET, "SIZET" },
{ DataType::QSYMM8, "QSYMM8" },
{ DataType::QSYMM8_PER_CHANNEL, "QSYMM8_PER_CHANNEL" },
- { DataType::QASYMM8_PER_CHANNEL, "QASYMM8_PER_CHANNEL" },
{ DataType::QASYMM8, "QASYMM8" },
{ DataType::QASYMM8_SIGNED, "QASYMM8_SIGNED" },
{ DataType::QSYMM16, "QSYMM16" },
@@ -287,7 +286,6 @@ std::string arm_compute::string_from_pixel_value(const PixelValue &value, const
{
case DataType::U8:
case DataType::QASYMM8:
- case DataType::QASYMM8_PER_CHANNEL:
// Needs conversion to 32 bit, otherwise interpreted as ASCII values
ss << uint32_t(value.get<uint8_t>());
converted_string = ss.str();
@@ -446,7 +444,6 @@ void arm_compute::print_consecutive_elements(std::ostream &s, DataType dt, const
{
case DataType::U8:
case DataType::QASYMM8:
- case DataType::QASYMM8_PER_CHANNEL:
print_consecutive_elements_impl<uint8_t>(s, ptr, n, stream_width, element_delim);
break;
case DataType::S8:
@@ -485,7 +482,6 @@ int arm_compute::max_consecutive_elements_display_width(std::ostream &s, DataTyp
{
case DataType::U8:
case DataType::QASYMM8:
- case DataType::QASYMM8_PER_CHANNEL:
return max_consecutive_elements_display_width_impl<uint8_t>(s, ptr, n);
case DataType::S8:
case DataType::QASYMM8_SIGNED: