From 4aff98fcfd3c736115f3983dc448c3280e570841 Mon Sep 17 00:00:00 2001 From: Michele Di Giorgio Date: Wed, 28 Aug 2019 16:27:26 +0100 Subject: COMPMID-2247: Extend support of CLBoundingBoxTransform for QUANT16_ASYMM Change-Id: I8af7a382c0bccf55cf7f4a64f46ce9e6cd965afe Signed-off-by: Michele Di Giorgio Reviewed-on: https://review.mlplatform.org/c/1833 Comments-Addressed: Arm Jenkins Reviewed-by: Pablo Marquez Tested-by: Arm Jenkins --- .../CL/kernels/CLBoundingBoxTransformKernel.cpp | 45 +++++++++++++++++++--- 1 file changed, 39 insertions(+), 6 deletions(-) (limited to 'src/core/CL/kernels') diff --git a/src/core/CL/kernels/CLBoundingBoxTransformKernel.cpp b/src/core/CL/kernels/CLBoundingBoxTransformKernel.cpp index 045bd02d73..08e5cc6b3b 100644 --- a/src/core/CL/kernels/CLBoundingBoxTransformKernel.cpp +++ b/src/core/CL/kernels/CLBoundingBoxTransformKernel.cpp @@ -43,21 +43,37 @@ Status validate_arguments(const ITensorInfo *boxes, const ITensorInfo *pred_boxe { ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(boxes, pred_boxes, deltas); ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(boxes); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_NOT_IN(boxes, DataType::F32, DataType::F16); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_NOT_IN(deltas, DataType::F32, DataType::F16); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_NOT_IN(boxes, DataType::QASYMM16, DataType::F32, DataType::F16); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_NOT_IN(deltas, DataType::QASYMM8, DataType::F32, DataType::F16); ARM_COMPUTE_RETURN_ERROR_ON(deltas->tensor_shape()[1] != boxes->tensor_shape()[1]); ARM_COMPUTE_RETURN_ERROR_ON(deltas->tensor_shape()[0] % 4 != 0); ARM_COMPUTE_RETURN_ERROR_ON(boxes->tensor_shape()[0] != 4); ARM_COMPUTE_RETURN_ERROR_ON(deltas->num_dimensions() > 2); ARM_COMPUTE_RETURN_ERROR_ON(boxes->num_dimensions() > 2); + const bool is_qasymm16 = boxes->data_type() == DataType::QASYMM16; + if(is_qasymm16) + { + const UniformQuantizationInfo boxes_qinfo = boxes->quantization_info().uniform(); + ARM_COMPUTE_RETURN_ERROR_ON(boxes_qinfo.scale != 0.125f); + ARM_COMPUTE_RETURN_ERROR_ON(boxes_qinfo.offset != 0); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_NOT_IN(deltas, DataType::QASYMM8); + } + if(pred_boxes->total_size() > 0) { ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(pred_boxes->tensor_shape(), deltas->tensor_shape()); - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(pred_boxes, deltas); + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(pred_boxes, boxes); ARM_COMPUTE_RETURN_ERROR_ON(pred_boxes->num_dimensions() > 2); + if(is_qasymm16) + { + const UniformQuantizationInfo pred_boxes_qinfo = pred_boxes->quantization_info().uniform(); + ARM_COMPUTE_RETURN_ERROR_ON(pred_boxes_qinfo.scale != 0.125f); + ARM_COMPUTE_RETURN_ERROR_ON(pred_boxes_qinfo.offset != 0); + } } ARM_COMPUTE_RETURN_ERROR_ON(info.scale() <= 0); + return Status{}; } } // namespace @@ -70,7 +86,7 @@ CLBoundingBoxTransformKernel::CLBoundingBoxTransformKernel() void CLBoundingBoxTransformKernel::configure(const ICLTensor *boxes, ICLTensor *pred_boxes, const ICLTensor *deltas, const BoundingBoxTransformInfo &info) { ARM_COMPUTE_ERROR_ON_NULLPTR(boxes, pred_boxes, deltas); - auto_init_if_empty(*pred_boxes->info(), *deltas->info()); + auto_init_if_empty(*pred_boxes->info(), deltas->info()->clone()->set_data_type(boxes->info()->data_type()).set_quantization_info(boxes->info()->quantization_info())); ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(boxes->info(), pred_boxes->info(), deltas->info(), info)); @@ -83,9 +99,11 @@ void CLBoundingBoxTransformKernel::configure(const ICLTensor *boxes, ICLTensor * const int img_h = floor(info.img_height() / info.scale() + 0.5f); const int img_w = floor(info.img_width() / info.scale() + 0.5f); + const bool is_quantized = is_data_type_quantized(boxes->info()->data_type()); + // Set build options CLBuildOptions build_opts; - build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(deltas->info()->data_type())); + build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(boxes->info()->data_type())); build_opts.add_option("-DWEIGHT_X=" + float_to_string_with_full_precision(info.weights()[0])); build_opts.add_option("-DWEIGHT_Y=" + float_to_string_with_full_precision(info.weights()[1])); build_opts.add_option("-DWEIGHT_W=" + float_to_string_with_full_precision(info.weights()[2])); @@ -98,8 +116,23 @@ void CLBoundingBoxTransformKernel::configure(const ICLTensor *boxes, ICLTensor * build_opts.add_option_if(info.apply_scale(), "-DSCALE_AFTER=" + float_to_string_with_full_precision(info.scale())); build_opts.add_option_if(info.correct_transform_coords(), "-DOFFSET=1"); + if(is_quantized) + { + build_opts.add_option("-DDATA_TYPE_DELTAS=" + get_cl_type_from_data_type(deltas->info()->data_type())); + const UniformQuantizationInfo boxes_qinfo = boxes->info()->quantization_info().uniform(); + const UniformQuantizationInfo deltas_qinfo = deltas->info()->quantization_info().uniform(); + const UniformQuantizationInfo pred_boxes_qinfo = pred_boxes->info()->quantization_info().uniform(); + build_opts.add_option("-DOFFSET_BOXES=" + float_to_string_with_full_precision(boxes_qinfo.offset)); + build_opts.add_option("-DSCALE_BOXES=" + float_to_string_with_full_precision(boxes_qinfo.scale)); + build_opts.add_option("-DOFFSET_DELTAS=" + float_to_string_with_full_precision(deltas_qinfo.offset)); + build_opts.add_option("-DSCALE_DELTAS=" + float_to_string_with_full_precision(deltas_qinfo.scale)); + build_opts.add_option("-DOFFSET_PRED_BOXES=" + float_to_string_with_full_precision(pred_boxes_qinfo.offset)); + build_opts.add_option("-DSCALE_PRED_BOXES=" + float_to_string_with_full_precision(pred_boxes_qinfo.scale)); + } + // Create kernel - _kernel = static_cast(CLKernelLibrary::get().create_kernel("bounding_box_transform", build_opts.options())); + const std::string kernel_name = (is_quantized) ? "bounding_box_transform_quantized" : "bounding_box_transform"; + _kernel = static_cast(CLKernelLibrary::get().create_kernel(kernel_name, build_opts.options())); // Since the number of columns is a multiple of 4 by definition, we don't need to pad the tensor const unsigned int num_elems_processed_per_iteration = 4; -- cgit v1.2.1