aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorMichalis Spyrou <michalis.spyrou@arm.com>2020-01-30 18:11:13 +0000
committerMichalis Spyrou <michalis.spyrou@arm.com>2020-02-07 15:17:57 +0000
commit0b18d9740f04cc4e9cb6000a76b9c1dcd8327e24 (patch)
tree8ffd0b1c676b13499314d3396818f0c30c1020f2 /src
parentc971cf1034a61875c8e8c87d48634cbfac3865fd (diff)
downloadComputeLibrary-0b18d9740f04cc4e9cb6000a76b9c1dcd8327e24.tar.gz
COMPMID-2762: Add support for QASYMM8_SIGNED in CLReductionOperation and CLReduceMean
Change-Id: Ib6babd9ad80c57cf21c2f0ee2aab404221088595 Signed-off-by: Michalis Spyrou <michalis.spyrou@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/2670 Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Michele Di Giorgio <michele.digiorgio@arm.com>
Diffstat (limited to 'src')
-rw-r--r--src/core/CL/cl_kernels/helpers_asymm.h16
-rw-r--r--src/core/CL/cl_kernels/reduction_operation.cl125
-rw-r--r--src/core/CL/kernels/CLReductionOperationKernel.cpp27
-rw-r--r--src/runtime/CL/functions/CLReduceMean.cpp6
-rw-r--r--src/runtime/CL/functions/CLReductionOperation.cpp12
5 files changed, 154 insertions, 32 deletions
diff --git a/src/core/CL/cl_kernels/helpers_asymm.h b/src/core/CL/cl_kernels/helpers_asymm.h
index 5a7c7126dc..6377dbadb1 100644
--- a/src/core/CL/cl_kernels/helpers_asymm.h
+++ b/src/core/CL/cl_kernels/helpers_asymm.h
@@ -399,15 +399,31 @@ inline float dequantize_qasymm8_signed(char input, float offset, float scale)
QUANTIZE_IMPL(uchar, 1)
QUANTIZE_IMPL(char, 1)
+QUANTIZE_IMPL(uint, 1)
+QUANTIZE_IMPL(int, 1)
QUANTIZE_IMPL(uchar, 4)
QUANTIZE_IMPL(ushort, 4)
QUANTIZE_IMPL(short, 4)
+QUANTIZE_IMPL(uchar, 16)
+QUANTIZE_IMPL(char, 16)
+QUANTIZE_IMPL(ushort, 16)
+QUANTIZE_IMPL(short, 16)
+QUANTIZE_IMPL(uint, 16)
+QUANTIZE_IMPL(int, 16)
DEQUANTIZE_IMPL(uchar, 1)
DEQUANTIZE_IMPL(char, 1)
+DEQUANTIZE_IMPL(uint, 1)
+DEQUANTIZE_IMPL(int, 1)
DEQUANTIZE_IMPL(uchar, 4)
DEQUANTIZE_IMPL(ushort, 4)
DEQUANTIZE_IMPL(short, 4)
+DEQUANTIZE_IMPL(uchar, 16)
+DEQUANTIZE_IMPL(char, 16)
+DEQUANTIZE_IMPL(ushort, 16)
+DEQUANTIZE_IMPL(short, 16)
+DEQUANTIZE_IMPL(uint, 16)
+DEQUANTIZE_IMPL(int, 16)
ASYMM_ROUNDING_DIVIDE_BY_POW2_IMPL(1)
ASYMM_ROUNDING_DIVIDE_BY_POW2_IMPL(2)
diff --git a/src/core/CL/cl_kernels/reduction_operation.cl b/src/core/CL/cl_kernels/reduction_operation.cl
index 0c393345e2..a5fd0b3622 100644
--- a/src/core/CL/cl_kernels/reduction_operation.cl
+++ b/src/core/CL/cl_kernels/reduction_operation.cl
@@ -22,6 +22,7 @@
* SOFTWARE.
*/
#include "helpers.h"
+#include "helpers_asymm.h"
#if defined(FLOAT_DATA_TYPE)
#define ISGREATER(x, y) isgreater(x, y)
@@ -182,28 +183,50 @@ __kernel void reduction_operation_non_parallel_x(
Vector src = CONVERT_TO_VECTOR_STRUCT(src);
Vector output = CONVERT_TO_VECTOR_STRUCT(output);
- DATA_TYPE_PROMOTED res = *((__global DATA_TYPE *)vector_offset(&src, 0));
+ DATA_TYPE_PROMOTED res = CONVERT(*((__global DATA_TYPE *)vector_offset(&src, 0)), DATA_TYPE_PROMOTED);
+
+ // Convert input into F32 in order to perform quantized multiplication
+#if defined(PROD) && defined(OFFSET) && defined(SCALE)
+ float res_f = DEQUANTIZE(res, OFFSET, SCALE, DATA_TYPE_PROMOTED, 1);
+#endif // defined(PROD) && defined(OFFSET) && defined(SCALE)
for(unsigned int x = 1; x < WIDTH; ++x)
{
- DATA_TYPE_PROMOTED in = *((__global DATA_TYPE *)vector_offset(&src, x));
+ DATA_TYPE_PROMOTED in = CONVERT(*((__global DATA_TYPE *)vector_offset(&src, x)), DATA_TYPE_PROMOTED);
#if defined(MIN)
res = select(res, in, CONVERT(ISLESS(in, res), COND_DATA_TYPE));
#elif defined(MAX)
- res = select(res, in, CONVERT(ISGREATER(in, res), COND_DATA_TYPE));
-#else // !(defined(MAX) || defined(MIN))
+ res = select(res, in, CONVERT(ISGREATER(in, res), COND_DATA_TYPE));
+#elif defined(PROD)
+#if defined(OFFSET) && defined(SCALE)
+ res_f *= DEQUANTIZE(in, OFFSET, SCALE, DATA_TYPE_PROMOTED, 1);
+#else // !(defined(OFFSET) && defined(SCALE))
+ res *= in;
+#endif // defined(OFFSET) && defined(SCALE)
+#else // defined(SUM))
res += in;
-#endif // defined(MAX) || defined(MIN)
+#endif // defined(MAX) || defined(MIN) || defined(PROD)
}
// Store result
#if defined(MEAN)
res /= WIDTH;
#endif // defined(MEAN)
+
+ // Subtract the offsets in case of quantized SUM
+#if defined(SUM) && defined(OFFSET) && defined(SCALE)
+ res -= (WIDTH - 1) * OFFSET;
+#endif // defined(OFFSET) && defined(OFFSET) && defined(SCALE)
+
+ // Re-quantize
+#if defined(PROD) && defined(OFFSET) && defined(SCALE)
+ res = QUANTIZE(res_f, OFFSET, SCALE, DATA_TYPE_PROMOTED, 1);
+#endif // defined(PROD) && defined(OFFSET) && defined(SCALE)
+
#if defined(MIN) || defined(MAX)
*((__global DATA_TYPE_PROMOTED *)output.ptr) = res;
-#else // defined(MIN) || defined(MAX)
- *((__global uchar *)output.ptr) = convert_uchar(res);
+#else // !(defined(MIN) || defined(MAX))
+ *((__global DATA_TYPE *)output.ptr) = CONVERT_SAT(res, DATA_TYPE);
#endif // defined(MIN) || defined(MAX)
}
#endif // defined(WIDTH)
@@ -237,6 +260,11 @@ __kernel void reduction_operation_y(
VEC_DATA_TYPE(DATA_TYPE_PROMOTED, 16)
res = CONVERT(vload16(0, (__global DATA_TYPE *)offset(&src, 0, 0)), VEC_DATA_TYPE(DATA_TYPE_PROMOTED, 16));
+ // Convert input into F32 in order to perform quantized multiplication
+#if defined(PROD) && defined(OFFSET) && defined(SCALE)
+ float16 res_f = DEQUANTIZE(res, OFFSET, SCALE, DATA_TYPE_PROMOTED, 16);
+#endif // defined(PROD) && defined(OFFSET) && defined(SCALE)
+
#if defined(SUM_SQUARE)
res *= res;
#endif // defined(SUM_SQUARE)
@@ -248,24 +276,41 @@ __kernel void reduction_operation_y(
#if defined(MIN)
res = select(res, in, ISLESS(in, res));
#elif defined(MAX)
- res = select(res, in, ISGREATER(in, res));
+ res = select(res, in, ISGREATER(in, res));
#else // !(defined(MAX) || defined(MIN))
#if defined(SUM_SQUARE)
in *= in;
#endif // defined(SUM_SQUARE)
#if defined(PROD)
+
+#if defined(OFFSET) && defined(SCALE)
+ res_f *= DEQUANTIZE(in, OFFSET, SCALE, DATA_TYPE_PROMOTED, 16);
+#else // !(defined(OFFSET) && defined(SCALE))
res *= in;
+#endif // defined(OFFSET) && defined(SCALE)
+
#else // !defined(PROD)
res += in;
#endif // defined(PROD)
#endif // defined(MAX) || defined(MIN)
}
- // Store result
#if defined(MEAN)
res /= HEIGHT;
#endif // defined(MEAN)
- vstore16(CONVERT(res, VEC_DATA_TYPE(DATA_TYPE, 16)), 0, (__global DATA_TYPE *)output.ptr);
+
+ // Subtract the offsets in case of quantized SUM
+#if defined(SUM) && defined(OFFSET) && defined(SCALE)
+ res -= (HEIGHT - 1) * OFFSET;
+#endif // defined(OFFSET) && defined(OFFSET) && defined(SCALE)
+
+ // Re-quantize
+#if defined(PROD) && defined(OFFSET) && defined(SCALE)
+ res = QUANTIZE(res_f, OFFSET, SCALE, DATA_TYPE_PROMOTED, 16);
+#endif // defined(PROD) && defined(OFFSET) && defined(SCALE)
+
+ // Store result
+ vstore16(CONVERT_SAT(res, VEC_DATA_TYPE(DATA_TYPE, 16)), 0, (__global DATA_TYPE *)output.ptr);
}
#endif // defined(HEIGHT)
@@ -302,6 +347,11 @@ __kernel void reduction_operation_z(
VEC_DATA_TYPE(DATA_TYPE_PROMOTED, 16)
res = CONVERT(vload16(0, (__global DATA_TYPE *)tensor3D_offset(&input, 0, 0, 0)), VEC_DATA_TYPE(DATA_TYPE_PROMOTED, 16));
+ // Convert input into F32 in order to perform quantized multiplication
+#if defined(PROD) && defined(OFFSET) && defined(SCALE)
+ float16 res_f = DEQUANTIZE(res, OFFSET, SCALE, DATA_TYPE_PROMOTED, 16);
+#endif // defined(PROD) && defined(OFFSET) && defined(SCALE)
+
#if defined(COMPLEX)
VEC_DATA_TYPE(DATA_TYPE_PROMOTED, 16)
res1 = CONVERT(vload16(0, (__global DATA_TYPE *)tensor3D_offset(&input, 8, 0, 0)), VEC_DATA_TYPE(DATA_TYPE_PROMOTED, 16));
@@ -323,14 +373,20 @@ __kernel void reduction_operation_z(
#if defined(MIN)
res = select(res, in, ISLESS(in, res));
#elif defined(MAX)
- res = select(res, in, ISGREATER(in, res));
+ res = select(res, in, ISGREATER(in, res));
#else // !(defined(MAX) || defined(MIN))
#if defined(SUM_SQUARE)
in *= in;
#endif // defined(SUM_SQUARE)
#if defined(PROD)
+
+#if defined(OFFSET) && defined(SCALE)
+ res_f *= DEQUANTIZE(in, OFFSET, SCALE, DATA_TYPE_PROMOTED, 16);
+#else // !(defined(OFFSET) && defined(SCALE))
res *= in;
-#else //!defined(PROD)
+#endif // defined(OFFSET) && defined(SCALE)
+
+#else // !defined(PROD)
res += in;
#if defined(COMPLEX)
res1 += in1;
@@ -339,11 +395,22 @@ __kernel void reduction_operation_z(
#endif // defined(MAX) || defined(MIN)
}
- // Store result
#if defined(MEAN)
res /= DEPTH;
#endif // defined(MEAN)
- vstore16(CONVERT(res, VEC_DATA_TYPE(DATA_TYPE, 16)), 0, (__global DATA_TYPE *)output.ptr);
+
+ // Subtract the offsets in case of quantized SUM
+#if defined(SUM) && defined(OFFSET) && defined(SCALE)
+ res -= (DEPTH - 1) * OFFSET;
+#endif // defined(OFFSET) && defined(OFFSET) && defined(SCALE)
+
+ // Re-quantize
+#if defined(PROD) && defined(OFFSET) && defined(SCALE)
+ res = QUANTIZE(res_f, OFFSET, SCALE, DATA_TYPE_PROMOTED, 16);
+#endif // defined(PROD) && defined(OFFSET) && defined(SCALE)
+
+ // Store result
+ vstore16(CONVERT_SAT(res, VEC_DATA_TYPE(DATA_TYPE, 16)), 0, (__global DATA_TYPE *)output.ptr);
#if defined(COMPLEX)
vstore16(CONVERT(res1, VEC_DATA_TYPE(DATA_TYPE, 16)), 0, (__global DATA_TYPE *)tensor3D_offset(&output, 8, 0, 0));
#endif // defined(COMPLEX)
@@ -388,6 +455,11 @@ __kernel void reduction_operation_w(
VEC_DATA_TYPE(DATA_TYPE_PROMOTED, 16)
res = CONVERT(vload16(0, (__global DATA_TYPE *)tensor4D_offset(&input, 0, 0, 0, 0)), VEC_DATA_TYPE(DATA_TYPE_PROMOTED, 16));
+ // Convert input into F32 in order to perform quantized multiplication
+#if defined(PROD) && defined(OFFSET) && defined(SCALE)
+ float16 res_f = DEQUANTIZE(res, OFFSET, SCALE, DATA_TYPE_PROMOTED, 16);
+#endif // defined(PROD) && defined(OFFSET) && defined(SCALE)
+
#if defined(SUM_SQUARE)
res *= res;
#endif // defined(SUM_SQUARE)
@@ -400,23 +472,40 @@ __kernel void reduction_operation_w(
#if defined(MIN)
res = select(res, in, ISLESS(in, res));
#elif defined(MAX)
- res = select(res, in, ISGREATER(in, res));
+ res = select(res, in, ISGREATER(in, res));
#else // !(defined(MAX) || defined(MIN))
#if defined(SUM_SQUARE)
in *= in;
#endif // defined(SUM_SQUARE)
#if defined(PROD)
+
+#if defined(OFFSET) && defined(SCALE)
+ res_f *= DEQUANTIZE(in, OFFSET, SCALE, DATA_TYPE_PROMOTED, 16);
+#else // !(defined(OFFSET) && defined(SCALE))
res *= in;
-#else //!defined(PROD)
+#endif // defined(OFFSET) && defined(SCALE)
+
+#else // !defined(PROD)
res += in;
#endif //defined(PROD)
#endif // defined(MAX) || defined(MIN)
}
- // Store result
#if defined(MEAN)
res /= BATCH;
#endif // defined(MEAN)
- vstore16(CONVERT(res, VEC_DATA_TYPE(DATA_TYPE, 16)), 0, (__global DATA_TYPE *)output.ptr);
+
+ // Subtract the offsets in case of quantized SUM
+#if defined(SUM) && defined(OFFSET) && defined(SCALE)
+ res -= (BATCH - 1) * OFFSET;
+#endif // defined(OFFSET) && defined(OFFSET) && defined(SCALE)
+
+ // Re-quantize
+#if defined(PROD) && defined(OFFSET) && defined(SCALE)
+ res = QUANTIZE(res_f, OFFSET, SCALE, DATA_TYPE_PROMOTED, 16);
+#endif // defined(PROD) && defined(OFFSET) && defined(SCALE)
+
+ // Store result
+ vstore16(CONVERT_SAT(res, VEC_DATA_TYPE(DATA_TYPE, 16)), 0, (__global DATA_TYPE *)output.ptr);
}
#endif /* defined(BATCH) && defined(DEPTH) */
diff --git a/src/core/CL/kernels/CLReductionOperationKernel.cpp b/src/core/CL/kernels/CLReductionOperationKernel.cpp
index 91ee83e530..a2a5f2be6d 100644
--- a/src/core/CL/kernels/CLReductionOperationKernel.cpp
+++ b/src/core/CL/kernels/CLReductionOperationKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -50,7 +50,7 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, u
ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(input);
if(input->num_channels() == 1)
{
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::S32, DataType::F16, DataType::F32);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::S32, DataType::F16, DataType::F32);
}
else
{
@@ -59,8 +59,8 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, u
ARM_COMPUTE_RETURN_ERROR_ON_MSG(op == ReductionOperation::SUM_SQUARE && input->data_type() == DataType::QASYMM8, "Not supported reduction operation for QASYMM8");
ARM_COMPUTE_RETURN_ERROR_ON_MSG(axis >= TensorShape::num_max_dimensions, "Reduction axis greater than max number of dimensions");
ARM_COMPUTE_RETURN_ERROR_ON_MSG(axis > 3, "Unsupported reduction axis");
- ARM_COMPUTE_RETURN_ERROR_ON(op == ReductionOperation::MEAN_SUM && axis == 0 && width == 0 && input->data_type() != DataType::QASYMM8);
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(op == ReductionOperation::ARG_IDX_MAX || op == ReductionOperation::ARG_IDX_MIN, "Not supported reduction operation, use CLArgMinMaxLayer");
+ ARM_COMPUTE_RETURN_ERROR_ON((op == ReductionOperation::MEAN_SUM) && (axis == 0) && (width == 0) && (input->data_type() != DataType::QASYMM8) && (input->data_type() != DataType::QASYMM8_SIGNED));
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG((op == ReductionOperation::ARG_IDX_MAX) || (op == ReductionOperation::ARG_IDX_MIN), "Not supported reduction operation, use CLArgMinMaxLayer");
if(output->total_size() != 0)
{
@@ -147,21 +147,30 @@ void CLReductionOperationKernel::configure(const ICLTensor *input, ICLTensor *ou
// Set build options
CLBuildOptions build_opts;
- std::string data_type_promoted = get_cl_type_from_data_type(input->info()->data_type());
- if(is_data_type_quantized(input->info()->data_type()))
+ DataType data_type = input->info()->data_type();
+ std::string data_type_promoted{};
+
+ if(is_data_type_quantized(data_type))
+ {
+ data_type_promoted = get_cl_dot8_acc_type_from_data_type(data_type);
+ }
+ else
{
- data_type_promoted = "uint";
+ data_type_promoted = get_cl_type_from_data_type(data_type);
}
- build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type()));
+ build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(data_type));
build_opts.add_option("-DDATA_TYPE_PROMOTED=" + data_type_promoted);
- build_opts.add_option_if(is_data_type_float(input->info()->data_type()), "-DFLOAT_DATA_TYPE");
+ build_opts.add_option_if(is_data_type_float(data_type), "-DFLOAT_DATA_TYPE");
build_opts.add_option_if(op == ReductionOperation::SUM_SQUARE, "-DSUM_SQUARE");
build_opts.add_option_if(op == ReductionOperation::MEAN_SUM, "-DMEAN");
+ build_opts.add_option_if(op == ReductionOperation::SUM, "-DSUM");
build_opts.add_option_if(op == ReductionOperation::PROD, "-DPROD");
build_opts.add_option_if(op == ReductionOperation::MIN, "-DMIN");
build_opts.add_option_if(op == ReductionOperation::MAX, "-DMAX");
build_opts.add_option_if(input->info()->num_channels() == 2, "-DCOMPLEX");
+ build_opts.add_option_if(is_data_type_quantized(data_type), "-DOFFSET=" + support::cpp11::to_string(input->info()->quantization_info().uniform().offset));
+ build_opts.add_option_if(is_data_type_quantized(data_type), "-DSCALE=" + float_to_string_with_full_precision(input->info()->quantization_info().uniform().scale));
switch(op)
{
diff --git a/src/runtime/CL/functions/CLReduceMean.cpp b/src/runtime/CL/functions/CLReduceMean.cpp
index c5de43da35..9920617880 100644
--- a/src/runtime/CL/functions/CLReduceMean.cpp
+++ b/src/runtime/CL/functions/CLReduceMean.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2019 ARM Limited.
+ * Copyright (c) 2018-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -28,9 +28,7 @@
#include "arm_compute/core/CL/kernels/CLReductionOperationKernel.h"
#include "arm_compute/core/Error.h"
#include "arm_compute/core/Types.h"
-#include "arm_compute/core/utils/helpers/tensor_transform.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
-#include "arm_compute/runtime/CL/CLScheduler.h"
#include "support/ToolchainSupport.h"
namespace arm_compute
@@ -42,7 +40,7 @@ Status validate_config(const ITensorInfo *input, const Coordinates &reduction_ax
ARM_COMPUTE_UNUSED(keep_dims);
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output);
ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(input);
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::F16, DataType::F32);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::F16, DataType::F32);
ARM_COMPUTE_RETURN_ERROR_ON(reduction_axis.num_dimensions() < 1);
ARM_COMPUTE_RETURN_ERROR_ON(reduction_axis.num_dimensions() > input->num_dimensions());
diff --git a/src/runtime/CL/functions/CLReductionOperation.cpp b/src/runtime/CL/functions/CLReductionOperation.cpp
index 2f9a38601d..e04982a315 100644
--- a/src/runtime/CL/functions/CLReductionOperation.cpp
+++ b/src/runtime/CL/functions/CLReductionOperation.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -271,6 +271,11 @@ void CLReductionOperation::configure(ICLTensor *input, ICLTensor *output, unsign
pixelValue = PixelValue(255, input->info()->data_type(), input->info()->quantization_info());
break;
}
+ case DataType::QASYMM8_SIGNED:
+ {
+ pixelValue = PixelValue(127, input->info()->data_type(), input->info()->quantization_info());
+ break;
+ }
default:
{
ARM_COMPUTE_ERROR("Unsupported DataType");
@@ -298,6 +303,11 @@ void CLReductionOperation::configure(ICLTensor *input, ICLTensor *output, unsign
pixelValue = PixelValue(0, input->info()->data_type(), input->info()->quantization_info());
break;
}
+ case DataType::QASYMM8_SIGNED:
+ {
+ pixelValue = PixelValue(-128, input->info()->data_type(), input->info()->quantization_info());
+ break;
+ }
default:
{
ARM_COMPUTE_ERROR("Unsupported DataType");