diff options
-rw-r--r-- | arm_compute/core/CL/kernels/CLDepthConvertKernel.h | 7 | ||||
-rw-r--r-- | arm_compute/runtime/CL/functions/CLDepthConvert.h | 8 | ||||
-rw-r--r-- | src/core/CL/cl_kernels/depth_convert.cl | 49 | ||||
-rw-r--r-- | src/core/CL/cl_kernels/fixed_point.h | 30 | ||||
-rw-r--r-- | src/core/CL/kernels/CLDepthConvertKernel.cpp | 25 | ||||
-rw-r--r-- | tests/validation/CL/DepthConvert.cpp | 105 |
6 files changed, 198 insertions, 26 deletions
diff --git a/arm_compute/core/CL/kernels/CLDepthConvertKernel.h b/arm_compute/core/CL/kernels/CLDepthConvertKernel.h index 2c3b1b8b69..b03b90b275 100644 --- a/arm_compute/core/CL/kernels/CLDepthConvertKernel.h +++ b/arm_compute/core/CL/kernels/CLDepthConvertKernel.h @@ -43,14 +43,17 @@ public: * * Valid conversions Input -> Output : * + * - QS8 -> F32 + * - QS16 -> F32 * - U8 -> U16, S16, U32, S32 * - U16 -> U8, U32, S32 * - S16 -> U8, U32, S32 * - U32 -> U8, U16, S16 * - S32 -> U8, U16, S16 + * - F32 -> QS8, QS16 * - * @param[in] input The input tensor to convert. Data types supported: U8, U16, S16, U32 or S32. - * @param[out] output The output tensor. Data types supported: U8, U16, S16, U32 or S32. + * @param[in] input The input tensor to convert. Data types supported: U8/QS8/U16/S16/QS16/U32/S32/F32. + * @param[out] output The output tensor. Data types supported: U8/QS8/U16/S16/QS16/U32/S32/F32. * @param[in] policy Conversion policy * @param[in] shift Value for down/up conversions. Must be 0 <= shift < 8. */ diff --git a/arm_compute/runtime/CL/functions/CLDepthConvert.h b/arm_compute/runtime/CL/functions/CLDepthConvert.h index f11027656d..9a4c63dd6d 100644 --- a/arm_compute/runtime/CL/functions/CLDepthConvert.h +++ b/arm_compute/runtime/CL/functions/CLDepthConvert.h @@ -43,16 +43,20 @@ public: * * Valid conversions Input -> Output : * + * - QS8 -> F32 + * - QS16 -> F32 * - U8 -> U16, S16, U32, S32 * - U16 -> U8, U32, S32 * - S16 -> U8, U32, S32 * - U32 -> U8, U16, S16 * - S32 -> U8, U16, S16 + * - F32 -> QS8, QS16 * - * @param[in] input The input tensor to convert. Data types supported: U8, U16, S16, U32 or S32. - * @param[out] output The output tensor. Data types supported: U8, U16, S16, U32 or S32. + * @param[in] input The input tensor to convert. Data types supported: QS8/U8/U16/S16/Q16/U32/S32/F32. + * @param[out] output The output tensor. Data types supported: QS8/U8/U16/S16/QS16/U32/S32/F32. * @param[in] policy Conversion policy. * @param[in] shift Value for down/up conversions. Must be 0 <= shift < 8. + * It is not used on fixed point conversion. */ void configure(const ICLTensor *input, ICLTensor *output, ConvertPolicy policy, uint32_t shift); }; diff --git a/src/core/CL/cl_kernels/depth_convert.cl b/src/core/CL/cl_kernels/depth_convert.cl index 3a1c7ca2c5..a9b7284c83 100644 --- a/src/core/CL/cl_kernels/depth_convert.cl +++ b/src/core/CL/cl_kernels/depth_convert.cl @@ -23,24 +23,47 @@ */ #include "helpers.h" +#if defined(FIXED_POINT_POSITION) + +#include "fixed_point.h" + +#ifdef SATURATE +#define CONVERT_DOWN(x, in_type, out_type, fixed_point_position) CONVERT_DOWN1_SAT(x, in_type, out_type, fixed_point_position) +#define CONVERT_DOWN1_SAT(x, in_type, out_type, fixed_point_position) convert_##out_type##_##in_type##_sat(x, fixed_point_position) +#else /* SATURATE */ +#define CONVERT_DOWN(x, in_type, out_type, fixed_point_position) CONVERT_DOWN1(x, in_type, out_type, fixed_point_position) +#define CONVERT_DOWN1(x, in_type, out_type, fixed_point_position) convert_##out_type##_##in_type(x, fixed_point_position) +#endif /* SATURATE */ + +#define CONVERT_UP(x, in_type, out_type, fixed_point_position) CONVERT_UP1(x, in_type, out_type, fixed_point_position) +#define CONVERT_UP1(x, in_type, out_type, fixed_point_position) convert_##out_type##_##in_type(x, fixed_point_position) + +#else /* FIXED_POINT_POSITION */ + #ifdef SATURATE #define CONVERT_DOWN(x, type) CONVERT_SAT(x, type) #else /* SATURATE */ #define CONVERT_DOWN(x, type) CONVERT(x, type) #endif /* SATURATE */ +#define CONVERT_UP(x, type) CONVERT(x, type) + +#endif /* FIXED_POINT_POSITION */ + /** This function performs a down-scaling depth conversion. * * @attention The input and output data_types need to be passed at compile time using -DDATA_TYPE_IN and -DDATA_TYPE_OUT: * e.g. -DDATA_TYPE_IN=uchar -DDATA_TYPE_OUT=short * - * @param[in] in_ptr Pointer to the source image. Supported data types: U8, U16, S16, U32 or S32 + * @note In case of fixed-point operation -DFIXED_POINT_POSITION=fixed_point_position must be provided: e.g. -DFIXED_POINT_POSITION=3 + * + * @param[in] in_ptr Pointer to the source image. Supported data types: U8, U16, S16, U32, S32, F16, F32 * @param[in] in_stride_x Stride of the source image in X dimension (in bytes) * @param[in] in_step_x in_stride_x * number of elements along X processed per workitem(in bytes) * @param[in] in_stride_y Stride of the source image in Y dimension (in bytes) * @param[in] in_step_y in_stride_y * number of elements along Y processed per workitem(in bytes) * @param[in] in_offset_first_element_in_bytes The offset of the first element in the source image - * @param[out] out_ptr Pointer to the destination image. Supported data types: U8, U16, S16, U32 or S32 + * @param[out] out_ptr Pointer to the destination image. Supported data types: QS8, U8, QS16, U16, S16, U32, S32 * @param[in] out_stride_x Stride of the destination image in X dimension (in bytes) * @param[in] out_step_x out_stride_x * number of elements along X processed per workitem(in bytes) * @param[in] out_stride_y Stride of the destination image in Y dimension (in bytes) @@ -60,7 +83,12 @@ __kernel void convert_depth_down( // Load data VEC_DATA_TYPE(DATA_TYPE_IN, 16) in_data = vload16(0, (__global DATA_TYPE_IN *)in.ptr); + +#if defined(FIXED_POINT_POSITION) + vstore16(CONVERT_DOWN(in_data, VEC_DATA_TYPE(DATA_TYPE_IN, 16), VEC_DATA_TYPE(DATA_TYPE_OUT, 16), FIXED_POINT_POSITION), 0, (__global DATA_TYPE_OUT *)out.ptr); +#else /* FIXED_POINT_POSITION */ vstore16(CONVERT_DOWN(in_data >> shift, VEC_DATA_TYPE(DATA_TYPE_OUT, 16)), 0, (__global DATA_TYPE_OUT *)out.ptr); +#endif /* FIXED_POINT_POSITION */ } /** This function performs a up-scaling depth conversion. @@ -68,13 +96,15 @@ __kernel void convert_depth_down( * @attention The input and output data_types need to be passed at compile time using -DDATA_TYPE_IN and -DDATA_TYPE_OUT: * e.g. -DDATA_TYPE_IN=uchar -DDATA_TYPE_OUT=short * - * @param[in] in_ptr Pointer to the source image. Supported data types: U8, U16, S16, U32 or S32 + * @note In case of fixed-point operation -DFIXED_POINT_POSITION=fixed_point_position must be provided: e.g. -DFIXED_POINT_POSITION=3 + * + * @param[in] in_ptr Pointer to the source image. Supported data types: U8, QS8, U16, S16, QS16, U32 or S32 * @param[in] in_stride_x Stride of the source image in X dimension (in bytes) * @param[in] in_step_x in_stride_x * number of elements along X processed per workitem(in bytes) * @param[in] in_stride_y Stride of the source image in Y dimension (in bytes) * @param[in] in_step_y in_stride_y * number of elements along Y processed per workitem(in bytes) * @param[in] in_offset_first_element_in_bytes The offset of the first element in the source image - * @param[out] out_ptr Pointer to the destination image. Supported data types: U8, U16, S16, U32 or S32 + * @param[out] out_ptr Pointer to the destination image. Supported data types: U8, U16, S16, U32, S32, F16 or F32 * @param[in] out_stride_x Stride of the destination image in X dimension (in bytes) * @param[in] out_step_x out_stride_x * number of elements along X processed per workitem(in bytes) * @param[in] out_stride_y Stride of the destination image in Y dimension (in bytes) @@ -92,7 +122,12 @@ __kernel void convert_depth_up( Image out = CONVERT_TO_IMAGE_STRUCT(out); // Load data - VEC_DATA_TYPE(DATA_TYPE_OUT, 16) - in_data = CONVERT(vload16(0, (__global DATA_TYPE_IN *)in.ptr), VEC_DATA_TYPE(DATA_TYPE_OUT, 16)); - vstore16(in_data << shift, 0, (__global DATA_TYPE_OUT *)out.ptr); + VEC_DATA_TYPE(DATA_TYPE_IN, 16) + in_data = vload16(0, (__global DATA_TYPE_IN *)in.ptr); + +#if defined(FIXED_POINT_POSITION) + vstore16(CONVERT_UP(in_data, VEC_DATA_TYPE(DATA_TYPE_IN, 16), VEC_DATA_TYPE(DATA_TYPE_OUT, 16), FIXED_POINT_POSITION), 0, (__global DATA_TYPE_OUT *)out.ptr); +#else /* FIXED_POINT_POSITION */ + vstore16(CONVERT_UP(in_data, VEC_DATA_TYPE(DATA_TYPE_OUT, 16)) << shift, 0, (__global DATA_TYPE_OUT *)out.ptr); +#endif /* FIXED_POINT_POSITION */ } diff --git a/src/core/CL/cl_kernels/fixed_point.h b/src/core/CL/cl_kernels/fixed_point.h index 9fd3a6f899..5d340c4e95 100644 --- a/src/core/CL/cl_kernels/fixed_point.h +++ b/src/core/CL/cl_kernels/fixed_point.h @@ -381,4 +381,34 @@ INVSQRTQ_IMPL(qs8, qs8x16, 16) #define INVSQRT_OP_EXPAND_STR(a, type, size, position) invsqrt_sat_##type##x##size((a), (position)) #define INVSQRT_OP_EXPAND(a, type, size, position) INVSQRT_OP_EXPAND_STR(a, type, size, position) +#define floatx16 float16 +#define float16_TYPE float16 + +#define CONVERTQ_DOWN_IMPL(in_type, out_type) \ + inline out_type convert_##out_type##_##in_type(in_type a, int fixed_point_position) \ + { \ + return CONVERT(a * (1 << fixed_point_position) + select((in_type)-0.5, (in_type)0.5, isgreater(a, (in_type)0)), out_type); \ + } + +CONVERTQ_DOWN_IMPL(float16, qs8x16) +CONVERTQ_DOWN_IMPL(float16, qs16x16) + +#define CONVERTQ_DOWN_SAT_IMPL(in_type, out_type) \ + inline out_type convert_##out_type##_##in_type##_sat(in_type a, int fixed_point_position) \ + { \ + return CONVERT_SAT(a * (1 << fixed_point_position) + select((in_type)-0.5, (in_type)0.5, isgreater(a, (in_type)0)), out_type); \ + } + +CONVERTQ_DOWN_SAT_IMPL(float16, qs8x16) +CONVERTQ_DOWN_SAT_IMPL(float16, qs16x16) + +#define CONVERTQ_UP_IMPL(in_type, out_type) \ + inline out_type convert_##out_type##_##in_type(in_type a, int fixed_point_position) \ + { \ + return CONVERT(a, out_type) / (1 << fixed_point_position); \ + } + +CONVERTQ_UP_IMPL(qs8x16, float16) +CONVERTQ_UP_IMPL(qs16x16, float16) + #endif // ARM_COMPUTE_FIXED_POINT_H diff --git a/src/core/CL/kernels/CLDepthConvertKernel.cpp b/src/core/CL/kernels/CLDepthConvertKernel.cpp index 24608bd17c..c43884a509 100644 --- a/src/core/CL/kernels/CLDepthConvertKernel.cpp +++ b/src/core/CL/kernels/CLDepthConvertKernel.cpp @@ -40,13 +40,21 @@ using namespace arm_compute; void CLDepthConvertKernel::configure(const ICLTensor *input, ICLTensor *output, ConvertPolicy policy, uint32_t shift) { - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8, DataType::S16, DataType::U16, DataType::U32, DataType::S32); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8, DataType::S16, DataType::U16, DataType::U32, DataType::S32); + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QS8, DataType::U8, DataType::S16, DataType::QS16, + DataType::U16, DataType::U32, DataType::S32, DataType::F32); + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::QS8, DataType::U8, DataType::S16, DataType::QS16, + DataType::U16, DataType::U32, DataType::S32, DataType::F32); ARM_COMPUTE_ERROR_ON(input == output); ARM_COMPUTE_ERROR_ON_MSG(input->info()->data_type() == output->info()->data_type(), "Input and output data types must be different"); ARM_COMPUTE_ERROR_ON(shift >= 8); // Check if convertion is supported + ARM_COMPUTE_ERROR_ON_MSG(input->info()->data_type() == DataType::QS8 && output->info()->data_type() != DataType::F32, + "Only data types supported [in] QS8 -> [out] F32"); + ARM_COMPUTE_ERROR_ON_MSG(input->info()->data_type() == DataType::QS16 && (output->info()->data_type() != DataType::F32), + "Only data types supported [in] QS16 -> [out] F32"); + ARM_COMPUTE_ERROR_ON_MSG(input->info()->data_type() == DataType::F32 && ((output->info()->data_type() != DataType::QS8) && output->info()->data_type() != DataType::QS16), + "Only data types supported [in] F32 -> [out] QS8, QS16"); ARM_COMPUTE_ERROR_ON_MSG(input->info()->data_type() == DataType::U8 && (output->info()->data_type() != DataType::U16 && output->info()->data_type() != DataType::S16 && output->info()->data_type() != DataType::U32 && output->info()->data_type() != DataType::S32), "Only data types supported [in] U8 -> [out] U16, S16, U32, S32"); @@ -67,6 +75,11 @@ void CLDepthConvertKernel::configure(const ICLTensor *input, ICLTensor *output, && output->info()->data_type() != DataType::S16), "Only data types supported [in] S32 -> [out] U8, U16, S16"); + // Auto initialize output shape if not initialized (We can only auto-configure the shape, datatype must be given) + set_shape_if_empty(*output->info(), input->info()->tensor_shape()); + + ARM_COMPUTE_ERROR_ON_MISMATCHING_SHAPES(input, output); + // Get data sizes const size_t input_size = data_size_from_type(input->info()->data_type()); const size_t output_size = data_size_from_type(output->info()->data_type()); @@ -83,8 +96,12 @@ void CLDepthConvertKernel::configure(const ICLTensor *input, ICLTensor *output, { kernel_name += "_up"; } - build_opts.insert("-DDATA_TYPE_IN=" + get_cl_type_from_data_type(input->info()->data_type())); - build_opts.insert("-DDATA_TYPE_OUT=" + get_cl_type_from_data_type(output->info()->data_type())); + build_opts.emplace("-DDATA_TYPE_IN=" + get_cl_type_from_data_type(input->info()->data_type())); + build_opts.emplace("-DDATA_TYPE_OUT=" + get_cl_type_from_data_type(output->info()->data_type())); + if(is_data_type_fixed_point(input->info()->data_type()) || is_data_type_fixed_point(output->info()->data_type())) + { + build_opts.emplace("-DFIXED_POINT_POSITION=" + support::cpp11::to_string(input->info()->fixed_point_position())); + } // Create kernel _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel(kernel_name, build_opts)); diff --git a/tests/validation/CL/DepthConvert.cpp b/tests/validation/CL/DepthConvert.cpp index 340ddf2562..2655f0024a 100644 --- a/tests/validation/CL/DepthConvert.cpp +++ b/tests/validation/CL/DepthConvert.cpp @@ -59,11 +59,11 @@ namespace * * @return Computed output CLtensor. */ -CLTensor compute_depth_convert(const TensorShape &shape, DataType dt_in, DataType dt_out, ConvertPolicy policy, uint32_t shift) +CLTensor compute_depth_convert(const TensorShape &shape, DataType dt_in, DataType dt_out, ConvertPolicy policy, uint32_t shift, uint32_t fixed_point_position = 0) { // Create tensors - CLTensor src = create_tensor<CLTensor>(shape, dt_in); - CLTensor dst = create_tensor<CLTensor>(shape, dt_out); + CLTensor src = create_tensor<CLTensor>(shape, dt_in, 1, fixed_point_position); + CLTensor dst = create_tensor<CLTensor>(shape, dt_out, 1, fixed_point_position); // Create and configure function CLDepthConvert depth_convert; @@ -86,18 +86,19 @@ CLTensor compute_depth_convert(const TensorShape &shape, DataType dt_in, DataTyp } /** Configure and validate region/padding function. * - * @param[in] shape Shape of the input and output tensors. - * @param[in] dt_in Data type of input tensor. - * @param[in] dt_out Data type of the output tensor. - * @param[in] policy Conversion policy. - * @param[in] shift Value for down/up conversions. Must be 0 <= shift < 8. + * @param[in] shape Shape of the input and output tensors. + * @param[in] dt_in Data type of input tensor. + * @param[in] dt_out Data type of the output tensor. + * @param[in] policy Conversion policy. + * @param[in] shift Value for down/up conversions. Must be 0 <= shift < 8. + * @param[in] fixed_point_position Fixed point position. * */ -void compute_configure_validate(const TensorShape &shape, DataType dt_in, DataType dt_out, ConvertPolicy policy, uint32_t shift) +void compute_configure_validate(const TensorShape &shape, DataType dt_in, DataType dt_out, ConvertPolicy policy, uint32_t shift, uint32_t fixed_point_position = 0) { // Create tensors - CLTensor src = create_tensor<CLTensor>(shape, dt_in); - CLTensor dst = create_tensor<CLTensor>(shape, dt_out); + CLTensor src = create_tensor<CLTensor>(shape, dt_in, 1, fixed_point_position); + CLTensor dst = create_tensor<CLTensor>(shape, dt_out, 1, fixed_point_position); BOOST_TEST(src.info()->is_resizable()); BOOST_TEST(dst.info()->is_resizable()); @@ -408,6 +409,88 @@ BOOST_DATA_TEST_CASE(RunLarge, LargeShapes() * boost::unit_test::data::make({ Co } BOOST_AUTO_TEST_SUITE_END() +BOOST_AUTO_TEST_SUITE(Quantized_to_F32) +BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit") * boost::unit_test::label("nightly")) +BOOST_DATA_TEST_CASE(Configuration, (SmallShapes() + LargeShapes()) * boost::unit_test::data::make({ DataType::QS8, DataType::QS16 }) + * boost::unit_test::data::make({ ConvertPolicy::SATURATE }) * boost::unit_test::data::xrange(1, 7, 1), + shape, dt, policy, fixed_point_position) +{ + // Compute configure and validate region/padding + compute_configure_validate(shape, dt, DataType::F32, policy, 0, fixed_point_position); +} + +BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit")) +BOOST_DATA_TEST_CASE(RunSmall, SmallShapes() * boost::unit_test::data::make({ ConvertPolicy::SATURATE }) + * boost::unit_test::data::make({ DataType::QS8, DataType::QS16 }) * boost::unit_test::data::xrange(1, 7, 1), + shape, policy, dt, fixed_point_position) +{ + // Compute function + CLTensor dst = compute_depth_convert(shape, dt, DataType::F32, policy, 0, fixed_point_position); + + // Compute reference + RawTensor ref_dst = Reference::compute_reference_depth_convert(shape, dt, DataType::F32, policy, 0, fixed_point_position); + + // Validate output + validate(CLAccessor(dst), ref_dst); +} + +BOOST_TEST_DECORATOR(*boost::unit_test::label("nightly")) +BOOST_DATA_TEST_CASE(RunLarge, LargeShapes() * boost::unit_test::data::make({ DataType::QS8, DataType::QS16 }) + * boost::unit_test::data::make({ ConvertPolicy::SATURATE }) * boost::unit_test::data::xrange(1, 7, 1), + shape, dt, policy, fixed_point_position) +{ + // Compute function + CLTensor dst = compute_depth_convert(shape, dt, DataType::F32, policy, 0, fixed_point_position); + + // Compute reference + RawTensor ref_dst = Reference::compute_reference_depth_convert(shape, dt, DataType::F32, policy, 0, fixed_point_position); + + // Validate output + validate(CLAccessor(dst), ref_dst); +} +BOOST_AUTO_TEST_SUITE_END() + +BOOST_AUTO_TEST_SUITE(F32_to_Quantized) +BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit") * boost::unit_test::label("nightly")) +BOOST_DATA_TEST_CASE(Configuration, (SmallShapes() + LargeShapes()) * boost::unit_test::data::make({ DataType::QS8, DataType::QS16 }) + * boost::unit_test::data::make({ ConvertPolicy::SATURATE }) * boost::unit_test::data::xrange(1, 7, 1), + shape, dt, policy, fixed_point_position) +{ + // Compute configure and validate region/padding + compute_configure_validate(shape, DataType::F32, dt, policy, 0, fixed_point_position); +} + +BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit")) +BOOST_DATA_TEST_CASE(RunSmall, SmallShapes() * boost::unit_test::data::make({ DataType::QS8, DataType::QS16 }) + * boost::unit_test::data::make({ ConvertPolicy::SATURATE }) * boost::unit_test::data::xrange(1, 7, 1), + shape, dt, policy, fixed_point_position) +{ + // Compute function + CLTensor dst = compute_depth_convert(shape, DataType::F32, dt, policy, 0, fixed_point_position); + + // Compute reference + RawTensor ref_dst = Reference::compute_reference_depth_convert(shape, DataType::F32, dt, policy, 0, fixed_point_position); + + // Validate output + validate(CLAccessor(dst), ref_dst); +} + +BOOST_TEST_DECORATOR(*boost::unit_test::label("nightly")) +BOOST_DATA_TEST_CASE(RunLarge, LargeShapes() * boost::unit_test::data::make({ DataType::QS8, DataType::QS16 }) + * boost::unit_test::data::make({ ConvertPolicy::SATURATE }) * boost::unit_test::data::xrange(1, 7, 1), + shape, dt, policy, fixed_point_position) +{ + // Compute function + CLTensor dst = compute_depth_convert(shape, DataType::F32, dt, policy, 0, fixed_point_position); + + // Compute reference + RawTensor ref_dst = Reference::compute_reference_depth_convert(shape, DataType::F32, dt, policy, 0, fixed_point_position); + + // Validate output + validate(CLAccessor(dst), ref_dst); +} +BOOST_AUTO_TEST_SUITE_END() + BOOST_AUTO_TEST_SUITE_END() BOOST_AUTO_TEST_SUITE_END() #endif /* DOXYGEN_SKIP_THIS */ |