From 7281834f8554670c9d8ae59069a4d80adec21fa7 Mon Sep 17 00:00:00 2001 From: Michele Di Giorgio Date: Thu, 13 Jul 2017 11:03:35 +0100 Subject: COMPMID-446: Add support for QS8/QS16 CL Arithmetic Add/Sub Change-Id: I84fc457a9c28856a11322944822d2fabaf92e8e4 Reviewed-on: http://mpd-gerrit.cambridge.arm.com/80528 Tested-by: Kaizen Reviewed-by: Moritz Pflanzer --- src/core/CL/cl_kernels/arithmetic_op.cl | 10 +++++++--- src/core/CL/kernels/CLArithmeticAdditionKernel.cpp | 15 ++++++++++++--- src/core/CL/kernels/CLArithmeticSubtractionKernel.cpp | 15 ++++++++++++--- 3 files changed, 31 insertions(+), 9 deletions(-) (limited to 'src/core') diff --git a/src/core/CL/cl_kernels/arithmetic_op.cl b/src/core/CL/cl_kernels/arithmetic_op.cl index 5102d34552..03414105e6 100644 --- a/src/core/CL/cl_kernels/arithmetic_op.cl +++ b/src/core/CL/cl_kernels/arithmetic_op.cl @@ -23,6 +23,10 @@ */ #include "helpers.h" +#if defined(FIXED_POINT_POSITION) +#include "fixed_point.h" +#endif /* FIXED_POINT_POSITION */ + #ifdef SATURATE #define ADD(x, y) add_sat((x), (y)) #define SUB(x, y) sub_sat((x), (y)) @@ -37,19 +41,19 @@ * e.g. -DDATA_TYPE_IN1=uchar -DDATA_TYPE_IN2=uchar -DDATA_TYPE_OUT=short * @attention To perform saturating operation -DSATURATE has to be passed to the compiler otherwise wrapping policy will be used. * - * @param[in] in1_ptr Pointer to the source image. Supported data types: U8, S16 + * @param[in] in1_ptr Pointer to the source image. Supported data types: U8/QS8/QS16/S16/F16/F32 * @param[in] in1_stride_x Stride of the source image in X dimension (in bytes) * @param[in] in1_step_x in1_stride_x * number of elements along X processed per workitem(in bytes) * @param[in] in1_stride_y Stride of the source image in Y dimension (in bytes) * @param[in] in1_step_y in1_stride_y * number of elements along Y processed per workitem(in bytes) * @param[in] in1_offset_first_element_in_bytes The offset of the first element in the source image - * @param[in] in2_ptr Pointer to the source image. Supported data types: U8, S16 + * @param[in] in2_ptr Pointer to the source image. Supported data types: U8/QS8 (only if @p in1_ptr is QS8), QS16 (only if @p in1_ptr is QS16), S16/F16/F32 * @param[in] in2_stride_x Stride of the source image in X dimension (in bytes) * @param[in] in2_step_x in2_stride_x * number of elements along X processed per workitem(in bytes) * @param[in] in2_stride_y Stride of the source image in Y dimension (in bytes) * @param[in] in2_step_y in2_stride_y * number of elements along Y processed per workitem(in bytes) * @param[in] in2_offset_first_element_in_bytes The offset of the first element in the source image - * @param[out] out_ptr Pointer to the destination image. Supported data types: U8, S16 + * @param[out] out_ptr Pointer to the destination image. Supported data types: U8 (only if both inputs are U8), QS8 (only if both inputs are QS8), QS16 (only if both inputs are QS16), S16/F16/F32 * @param[in] out_stride_x Stride of the destination image in X dimension (in bytes) * @param[in] out_step_x out_stride_x * number of elements along X processed per workitem(in bytes) * @param[in] out_stride_y Stride of the destination image in Y dimension (in bytes) diff --git a/src/core/CL/kernels/CLArithmeticAdditionKernel.cpp b/src/core/CL/kernels/CLArithmeticAdditionKernel.cpp index 0cb0847784..65422c2bbf 100644 --- a/src/core/CL/kernels/CLArithmeticAdditionKernel.cpp +++ b/src/core/CL/kernels/CLArithmeticAdditionKernel.cpp @@ -69,11 +69,16 @@ void CLArithmeticAdditionKernel::configure(const ICLTensor *input1, const ICLTen } ARM_COMPUTE_ERROR_ON_MISMATCHING_SHAPES(input1, input2, output); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input1, 1, DataType::U8, DataType::S16, DataType::F16, DataType::F32); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input2, 1, DataType::U8, DataType::S16, DataType::F16, DataType::F32); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8, DataType::S16, DataType::F16, DataType::F32); + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input1, 1, DataType::U8, DataType::QS8, DataType::QS16, DataType::S16, DataType::F16, DataType::F32); + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input2, 1, DataType::U8, DataType::QS8, DataType::QS16, DataType::S16, DataType::F16, DataType::F32); + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8, DataType::QS8, DataType::QS16, DataType::S16, DataType::F16, DataType::F32); ARM_COMPUTE_ERROR_ON_MSG(output->info()->data_type() == DataType::U8 && (input1->info()->data_type() != DataType::U8 || input2->info()->data_type() != DataType::U8), "Output can only be U8 if both inputs are U8"); + if(is_data_type_fixed_point(input1->info()->data_type()) || is_data_type_fixed_point(input2->info()->data_type()) || is_data_type_fixed_point(output->info()->data_type())) + { + // Check that all data types are the same and all fixed-point positions are the same + ARM_COMPUTE_ERROR_ON_MISMATCHING_FIXED_POINT(input1, input2, output); + } _input1 = input1; _input2 = input2; @@ -87,6 +92,10 @@ void CLArithmeticAdditionKernel::configure(const ICLTensor *input1, const ICLTen build_opts.emplace("-DDATA_TYPE_IN1=" + get_cl_type_from_data_type(input1->info()->data_type())); build_opts.emplace("-DDATA_TYPE_IN2=" + get_cl_type_from_data_type(input2->info()->data_type())); build_opts.emplace("-DDATA_TYPE_OUT=" + get_cl_type_from_data_type(output->info()->data_type())); + if(is_data_type_fixed_point(input1->info()->data_type())) + { + build_opts.emplace("-DFIXED_POINT_POSITION=" + support::cpp11::to_string(input1->info()->fixed_point_position())); + } // Create kernel _kernel = static_cast(CLKernelLibrary::get().create_kernel("arithmetic_add", build_opts)); diff --git a/src/core/CL/kernels/CLArithmeticSubtractionKernel.cpp b/src/core/CL/kernels/CLArithmeticSubtractionKernel.cpp index 69f9ff17d3..c5183af7d7 100644 --- a/src/core/CL/kernels/CLArithmeticSubtractionKernel.cpp +++ b/src/core/CL/kernels/CLArithmeticSubtractionKernel.cpp @@ -62,11 +62,16 @@ void CLArithmeticSubtractionKernel::configure(const ICLTensor *input1, const ICL } ARM_COMPUTE_ERROR_ON_MISMATCHING_SHAPES(input1, input2, output); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input1, 1, DataType::U8, DataType::S16, DataType::F16, DataType::F32); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input2, 1, DataType::U8, DataType::S16, DataType::F16, DataType::F32); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8, DataType::S16, DataType::F16, DataType::F32); + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input1, 1, DataType::U8, DataType::QS8, DataType::QS16, DataType::S16, DataType::F16, DataType::F32); + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input2, 1, DataType::U8, DataType::QS8, DataType::QS16, DataType::S16, DataType::F16, DataType::F32); + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8, DataType::QS8, DataType::QS16, DataType::S16, DataType::F16, DataType::F32); ARM_COMPUTE_ERROR_ON_MSG(output->info()->data_type() == DataType::U8 && (input1->info()->data_type() != DataType::U8 || input2->info()->data_type() != DataType::U8), "Output can only be U8 if both inputs are U8"); + if(is_data_type_fixed_point(input1->info()->data_type()) || is_data_type_fixed_point(input2->info()->data_type()) || is_data_type_fixed_point(output->info()->data_type())) + { + // Check that all data types are the same and all fixed-point positions are the same + ARM_COMPUTE_ERROR_ON_MISMATCHING_FIXED_POINT(input1, input2, output); + } _input1 = input1; _input2 = input2; @@ -80,6 +85,10 @@ void CLArithmeticSubtractionKernel::configure(const ICLTensor *input1, const ICL build_opts.emplace("-DDATA_TYPE_IN1=" + get_cl_type_from_data_type(input1->info()->data_type())); build_opts.emplace("-DDATA_TYPE_IN2=" + get_cl_type_from_data_type(input2->info()->data_type())); build_opts.emplace("-DDATA_TYPE_OUT=" + get_cl_type_from_data_type(output->info()->data_type())); + if(is_data_type_fixed_point(input1->info()->data_type())) + { + build_opts.emplace("-DFIXED_POINT_POSITION=" + support::cpp11::to_string(input1->info()->fixed_point_position())); + } // Create kernel _kernel = static_cast(CLKernelLibrary::get().create_kernel("arithmetic_sub", build_opts)); -- cgit v1.2.1