From ffd31defdb84d4ca1e24e9248d628c0075767302 Mon Sep 17 00:00:00 2001 From: Pablo Tello Date: Wed, 4 Sep 2019 13:38:14 +0100 Subject: COMPMID-2246: NEBoundingBoxTransform support QASYMM16 Change-Id: I0704f71159a3caec4705779cab2ef38aeb33aaca Signed-off-by: Pablo Tello Reviewed-on: https://review.mlplatform.org/c/1864 Comments-Addressed: Arm Jenkins Tested-by: Arm Jenkins Reviewed-by: Giorgio Arena --- .../core/CL/kernels/CLBoundingBoxTransformKernel.h | 4 +- .../NEON/kernels/NEBoundingBoxTransformKernel.h | 10 ++- .../runtime/CL/functions/CLBoundingBoxTransform.h | 4 +- .../NEON/functions/NEBoundingBoxTransform.h | 10 ++- .../CL/kernels/CLBoundingBoxTransformKernel.cpp | 4 + .../NEON/kernels/NEBoundingBoxTransformKernel.cpp | 97 ++++++++++++++++++++-- tests/validation/NEON/BoundingBoxTransform.cpp | 17 ++++ 7 files changed, 126 insertions(+), 20 deletions(-) diff --git a/arm_compute/core/CL/kernels/CLBoundingBoxTransformKernel.h b/arm_compute/core/CL/kernels/CLBoundingBoxTransformKernel.h index 27ba8f2271..75d3c89102 100644 --- a/arm_compute/core/CL/kernels/CLBoundingBoxTransformKernel.h +++ b/arm_compute/core/CL/kernels/CLBoundingBoxTransformKernel.h @@ -52,7 +52,7 @@ public: * @param[in] boxes Source tensor. Bounding box proposals in pixel coordinates. Size(M, 4), format [x1, y1, x2, y2]. Data types supported: QASYMM16/F16/F32. * @param[out] pred_boxes Destination tensor. Pixel coordinates of the transformed bounding boxes. Size (M, 4*K), format [x1, y1, x2, y2]. Data types supported: Same as @p input * @param[in] deltas Bounding box translations and scales. Size (M, 4*K), format [dx, dy, dw, dh], K is the number of classes. - * Data types supported: QASYMM8 if @p input is QASYMM16, otherise same as @p input + * Data types supported: QASYMM8 if @p input is QASYMM16, otherwise same as @p input * @param[in] info Contains BoundingBox operation information described in @ref BoundingBoxTransformInfo. * * @note Only single image prediction is supported. Height and Width (and scale) of the image will be contained in the BoundingBoxTransformInfo struct. @@ -65,7 +65,7 @@ public: * @param[in] boxes Source tensor info. Bounding box proposals in pixel coordinates. Size(M, 4), format [x1, y1, x2, y2]. Data types supported: QASYMM16/F16/F32. * @param[in] pred_boxes Destination tensor info. Pixel coordinates of the transformed bounding boxes. Size (M, 4*K), format [x1, y1, x2, y2]. Data types supported: Same as @p input * @param[in] deltas Bounding box translations and scales. Size (M, 4*K), format [dx, dy, dw, dh], K is the number of classes. - * Data types supported: QASYMM8 if @p input is QASYMM16, otherise same as @p input + * Data types supported: QASYMM8 if @p input is QASYMM16, otherwise same as @p input * @param[in] info Contains BoundingBox operation information described in @ref BoundingBoxTransformInfo. * * @note Only single image prediction is supported. Height and Width (and scale) of the image will be contained in the BoundingBoxTransformInfo struct. diff --git a/arm_compute/core/NEON/kernels/NEBoundingBoxTransformKernel.h b/arm_compute/core/NEON/kernels/NEBoundingBoxTransformKernel.h index 0f0a1f6a2d..c2b3862b13 100644 --- a/arm_compute/core/NEON/kernels/NEBoundingBoxTransformKernel.h +++ b/arm_compute/core/NEON/kernels/NEBoundingBoxTransformKernel.h @@ -54,9 +54,10 @@ public: /** Set the input and output tensors. * - * @param[in] boxes Source tensor. Bounding box proposals in pixel coordinates. Size(M, 4), format [x1, y1, x2, y2]. Data types supported: F16/F32. + * @param[in] boxes Source tensor. Bounding box proposals in pixel coordinates. Size(M, 4), format [x1, y1, x2, y2]. Data types supported: QASYMM16/F16/F32. * @param[out] pred_boxes Destination tensor. Pixel coordinates of the transformed bounding boxes. Size (M, 4*K), format [x1, y1, x2, y2]. Data types supported: Same as @p input - * @param[in] deltas Bounding box translations and scales. Size (M, 4*K), format [dx, dy, dw, dh], K is the number of classes. Data types supported: Same as @p input + * @param[in] deltas Bounding box translations and scales. Size (M, 4*K), format [dx, dy, dw, dh], K is the number of classes. + * Data types supported: QASYMM8 if @p input is QASYMM16, otherwise same as @p input. * @param[in] info Contains BoundingBox operation information described in @ref BoundingBoxTransformInfo. * * @note Only single image prediction is supported. Height and Width (and scale) of the image will be contained in the BoundingBoxTransformInfo struct. @@ -66,9 +67,10 @@ public: /** Static function to check if given info will lead to a valid configuration of @ref CLBoundingBoxTransform * - * @param[in] boxes Source tensor info. Bounding box proposals in pixel coordinates. Size(M, 4), format [x1, y1, x2, y2]. Data types supported: F16/F32. + * @param[in] boxes Source tensor info. Bounding box proposals in pixel coordinates. Size(M, 4), format [x1, y1, x2, y2]. Data types supported: QASYMM16/F16/F32. * @param[in] pred_boxes Destination tensor info. Pixel coordinates of the transformed bounding boxes. Size (M, 4*K), format [x1, y1, x2, y2]. Data types supported: Same as @p input - * @param[in] deltas Bounding box translations and scales. Size (M, 4*K), format [dx, dy, dw, dh], K is the number of classes. Data types supported: Same as @p input + * @param[in] deltas Bounding box translations and scales. Size (M, 4*K), format [dx, dy, dw, dh], K is the number of classes. + * Data types supported: QASYMM8 if @p input is QASYMM16, otherwise same as @p input. * @param[in] info Contains BoundingBox operation information described in @ref BoundingBoxTransformInfo. * * @note Only single image prediction is supported. Height and Width (and scale) of the image will be contained in the BoundingBoxTransformInfo struct. diff --git a/arm_compute/runtime/CL/functions/CLBoundingBoxTransform.h b/arm_compute/runtime/CL/functions/CLBoundingBoxTransform.h index 4424f50597..5ce44899d5 100644 --- a/arm_compute/runtime/CL/functions/CLBoundingBoxTransform.h +++ b/arm_compute/runtime/CL/functions/CLBoundingBoxTransform.h @@ -44,7 +44,7 @@ public: * @param[in] boxes Source tensor. Bounding box proposals in pixel coordinates. Size(M, 4), format [x1, y1, x2, y2]. Data types supported: QASYMM16/F16/F32. * @param[out] pred_boxes Destination tensor. Pixel coordinates of the transformed bounding boxes. Size (M, 4*K), format [x1, y1, x2, y2]. Data types supported: Same as @p input * @param[in] deltas Bounding box translations and scales. Size (M, 4*K), format [dx, dy, dw, dh], K is the number of classes. - * Data types supported: QASYMM8 if @p input is QASYMM16, otherise same as @p input + * Data types supported: QASYMM8 if @p input is QASYMM16, otherwise same as @p input * @param[in] info Contains BoundingBox operation information described in @ref BoundingBoxTransformInfo. * * @note Only single image prediction is supported. Height and Width (and scale) of the image will be contained in the BoundingBoxTransformInfo struct. @@ -56,7 +56,7 @@ public: * @param[in] boxes Source tensor info. Bounding box proposals in pixel coordinates. Size(M, 4), format [x1, y1, x2, y2]. Data types supported: QASYMM16/F16/F32. * @param[in] pred_boxes Destination tensor info. Pixel coordinates of the transformed bounding boxes. Size (M, 4*K), format [x1, y1, x2, y2]. Data types supported: Same as @p input * @param[in] deltas Bounding box translations and scales. Size (M, 4*K), format [dx, dy, dw, dh], K is the number of classes. - * Data types supported: QASYMM8 if @p input is QASYMM16, otherise same as @p input + * Data types supported: QASYMM8 if @p input is QASYMM16, otherwise same as @p input * @param[in] info Contains BoundingBox operation information described in @ref BoundingBoxTransformInfo. * * @note Only single image prediction is supported. Height and Width (and scale) of the image will be contained in the BoundingBoxTransformInfo struct. diff --git a/arm_compute/runtime/NEON/functions/NEBoundingBoxTransform.h b/arm_compute/runtime/NEON/functions/NEBoundingBoxTransform.h index 1f512b938b..c73371a530 100644 --- a/arm_compute/runtime/NEON/functions/NEBoundingBoxTransform.h +++ b/arm_compute/runtime/NEON/functions/NEBoundingBoxTransform.h @@ -41,9 +41,10 @@ class NEBoundingBoxTransform : public INESimpleFunction public: /** Set the input and output tensors. * - * @param[in] boxes Source tensor. Bounding box proposals in pixel coordinates. Size(M, 4), format [x1, y1, x2, y2]. Data types supported: F16/F32. + * @param[in] boxes Source tensor. Bounding box proposals in pixel coordinates. Size(M, 4), format [x1, y1, x2, y2]. Data types supported: QASYMM16/F16/F32. * @param[out] pred_boxes Destination tensor. Pixel coordinates of the transformed bounding boxes. Size (M, 4*K), format [x1, y1, x2, y2]. Data types supported: Same as @p input - * @param[in] deltas Bounding box translations and scales. Size (M, 4*K), format [dx, dy, dw, dh], K is the number of classes. Data types supported: Same as @p input + * @param[in] deltas Bounding box translations and scales. Size (M, 4*K), format [dx, dy, dw, dh], K is the number of classes. + * Data types supported: QASYMM8 if @p input is QASYMM16, otherwise same as @p input. * @param[in] info Contains BoundingBox operation information described in @ref BoundingBoxTransformInfo. * * @note Only single image prediction is supported. Height and Width (and scale) of the image will be contained in the BoundingBoxTransformInfo struct. @@ -52,9 +53,10 @@ public: /** Static function to check if given info will lead to a valid configuration of @ref NEBoundingBoxTransform * - * @param[in] boxes Source tensor info. Bounding box proposals in pixel coordinates. Size(M, 4), format [x1, y1, x2, y2]. Data types supported: F16/F32. + * @param[in] boxes Source tensor info. Bounding box proposals in pixel coordinates. Size(M, 4), format [x1, y1, x2, y2]. Data types supported: QASYMM16/F16/F32. * @param[in] pred_boxes Destination tensor info. Pixel coordinates of the transformed bounding boxes. Size (M, 4*K), format [x1, y1, x2, y2]. Data types supported: Same as @p input - * @param[in] deltas Bounding box translations and scales. Size (M, 4*K), format [dx, dy, dw, dh], K is the number of classes. Data types supported: Same as @p input + * @param[in] deltas Bounding box translations and scales. Size (M, 4*K), format [dx, dy, dw, dh], K is the number of classes. + * Data types supported: QASYMM8 if @p input is QASYMM16, otherwise same as @p input. * @param[in] info Contains BoundingBox operation information described in @ref BoundingBoxTransformInfo. * * @note Only single image prediction is supported. Height and Width (and scale) of the image will be contained in the BoundingBoxTransformInfo struct. diff --git a/src/core/CL/kernels/CLBoundingBoxTransformKernel.cpp b/src/core/CL/kernels/CLBoundingBoxTransformKernel.cpp index 08e5cc6b3b..8fc6f82bd6 100644 --- a/src/core/CL/kernels/CLBoundingBoxTransformKernel.cpp +++ b/src/core/CL/kernels/CLBoundingBoxTransformKernel.cpp @@ -59,6 +59,10 @@ Status validate_arguments(const ITensorInfo *boxes, const ITensorInfo *pred_boxe ARM_COMPUTE_RETURN_ERROR_ON(boxes_qinfo.offset != 0); ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_NOT_IN(deltas, DataType::QASYMM8); } + else + { + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(boxes, deltas); + } if(pred_boxes->total_size() > 0) { diff --git a/src/core/NEON/kernels/NEBoundingBoxTransformKernel.cpp b/src/core/NEON/kernels/NEBoundingBoxTransformKernel.cpp index cfd3e708e5..5a40b99609 100644 --- a/src/core/NEON/kernels/NEBoundingBoxTransformKernel.cpp +++ b/src/core/NEON/kernels/NEBoundingBoxTransformKernel.cpp @@ -40,21 +40,40 @@ Status validate_arguments(const ITensorInfo *boxes, const ITensorInfo *pred_boxe { ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(boxes, pred_boxes, deltas); ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(boxes); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_NOT_IN(boxes, DataType::F32, DataType::F16); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_NOT_IN(deltas, DataType::F32, DataType::F16); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_NOT_IN(boxes, DataType::QASYMM16, DataType::F32, DataType::F16); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_NOT_IN(deltas, DataType::QASYMM8, DataType::F32, DataType::F16); ARM_COMPUTE_RETURN_ERROR_ON(deltas->tensor_shape()[1] != boxes->tensor_shape()[1]); ARM_COMPUTE_RETURN_ERROR_ON(deltas->tensor_shape()[0] % 4 != 0); ARM_COMPUTE_RETURN_ERROR_ON(boxes->tensor_shape()[0] != 4); ARM_COMPUTE_RETURN_ERROR_ON(deltas->num_dimensions() > 2); ARM_COMPUTE_RETURN_ERROR_ON(boxes->num_dimensions() > 2); + ARM_COMPUTE_RETURN_ERROR_ON(info.scale() <= 0); + + if(boxes->data_type() == DataType::QASYMM16) + { + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(deltas, 1, DataType::QASYMM8); + const UniformQuantizationInfo deltas_qinfo = deltas->quantization_info().uniform(); + ARM_COMPUTE_RETURN_ERROR_ON(deltas_qinfo.scale != 0.125f); + ARM_COMPUTE_RETURN_ERROR_ON(deltas_qinfo.offset != 0); + } + else + { + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(boxes, deltas); + } if(pred_boxes->total_size() > 0) { ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(pred_boxes->tensor_shape(), deltas->tensor_shape()); ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(pred_boxes, deltas); ARM_COMPUTE_RETURN_ERROR_ON(pred_boxes->num_dimensions() > 2); + if(pred_boxes->data_type() == DataType::QASYMM16) + { + const UniformQuantizationInfo pred_qinfo = pred_boxes->quantization_info().uniform(); + ARM_COMPUTE_RETURN_ERROR_ON(pred_qinfo.scale != 0.125f); + ARM_COMPUTE_RETURN_ERROR_ON(pred_qinfo.offset != 0); + } } - ARM_COMPUTE_RETURN_ERROR_ON(info.scale() <= 0); + return Status{}; } @@ -62,7 +81,7 @@ std::pair validate_and_configure_window(ITensorInfo *boxes, ITen { ARM_COMPUTE_ERROR_ON_NULLPTR(boxes, pred_boxes); - auto_init_if_empty(*pred_boxes, *deltas); + auto_init_if_empty(*pred_boxes, deltas->clone()->set_data_type(boxes->data_type()).set_quantization_info(boxes->quantization_info())); const unsigned int num_boxes = boxes->dimension(1); @@ -110,13 +129,70 @@ Status NEBoundingBoxTransformKernel::validate(const ITensorInfo *boxes, const IT return Status{}; } +template <> +void NEBoundingBoxTransformKernel::internal_run(const Window &window, const ThreadInfo &info) +{ + const size_t num_classes = _deltas->info()->tensor_shape()[0] >> 2; + const size_t deltas_width = _deltas->info()->tensor_shape()[0]; + const int img_h = std::floor(_bbinfo.img_height() / _bbinfo.scale() + 0.5f); + const int img_w = std::floor(_bbinfo.img_width() / _bbinfo.scale() + 0.5f); + + const auto scale_after = (_bbinfo.apply_scale() ? _bbinfo.scale() : 1.f); + const auto scale_before = _bbinfo.scale(); + const auto offset = (_bbinfo.correct_transform_coords() ? 1.f : 0.f); + + auto pred_ptr = reinterpret_cast(_pred_boxes->buffer() + _pred_boxes->info()->offset_first_element_in_bytes()); + auto delta_ptr = reinterpret_cast(_deltas->buffer() + _deltas->info()->offset_first_element_in_bytes()); + + const auto boxes_qinfo = _boxes->info()->quantization_info().uniform(); + const auto deltas_qinfo = _deltas->info()->quantization_info().uniform(); + const auto pred_qinfo = _pred_boxes->info()->quantization_info().uniform(); + + Iterator box_it(_boxes, window); + execute_window_loop(window, [&](const Coordinates & id) + { + const auto ptr = reinterpret_cast(box_it.ptr()); + const auto b0 = dequantize_qasymm16(*ptr, boxes_qinfo); + const auto b1 = dequantize_qasymm16(*(ptr + 1), boxes_qinfo); + const auto b2 = dequantize_qasymm16(*(ptr + 2), boxes_qinfo); + const auto b3 = dequantize_qasymm16(*(ptr + 3), boxes_qinfo); + const float width = (b2 / scale_before) - (b0 / scale_before) + 1.f; + const float height = (b3 / scale_before) - (b1 / scale_before) + 1.f; + const float ctr_x = (b0 / scale_before) + 0.5f * width; + const float ctr_y = (b1 / scale_before) + 0.5f * height; + for(size_t j = 0; j < num_classes; ++j) + { + // Extract deltas + const size_t delta_id = id.y() * deltas_width + 4u * j; + const float dx = dequantize_qasymm8(delta_ptr[delta_id], deltas_qinfo) / _bbinfo.weights()[0]; + const float dy = dequantize_qasymm8(delta_ptr[delta_id + 1], deltas_qinfo) / _bbinfo.weights()[1]; + float dw = dequantize_qasymm8(delta_ptr[delta_id + 2], deltas_qinfo) / _bbinfo.weights()[2]; + float dh = dequantize_qasymm8(delta_ptr[delta_id + 3], deltas_qinfo) / _bbinfo.weights()[3]; + // Clip dw and dh + dw = std::min(dw, _bbinfo.bbox_xform_clip()); + dh = std::min(dh, _bbinfo.bbox_xform_clip()); + // Determine the predictions + const float pred_ctr_x = dx * width + ctr_x; + const float pred_ctr_y = dy * height + ctr_y; + const float pred_w = std::exp(dw) * width; + const float pred_h = std::exp(dh) * height; + // Store the prediction into the output tensor + pred_ptr[delta_id] = quantize_qasymm16(scale_after * utility::clamp(pred_ctr_x - 0.5f * pred_w, 0.f, img_w - 1.f), pred_qinfo); + pred_ptr[delta_id + 1] = quantize_qasymm16(scale_after * utility::clamp(pred_ctr_y - 0.5f * pred_h, 0.f, img_h - 1.f), pred_qinfo); + pred_ptr[delta_id + 2] = quantize_qasymm16(scale_after * utility::clamp(pred_ctr_x + 0.5f * pred_w - offset, 0.f, img_w - 1.f), pred_qinfo); + pred_ptr[delta_id + 3] = quantize_qasymm16(scale_after * utility::clamp(pred_ctr_y + 0.5f * pred_h - offset, 0.f, img_h - 1.f), pred_qinfo); + } + }, + box_it); +} + template void NEBoundingBoxTransformKernel::internal_run(const Window &window, const ThreadInfo &info) { const size_t num_classes = _deltas->info()->tensor_shape()[0] >> 2; const size_t deltas_width = _deltas->info()->tensor_shape()[0]; - const int img_h = floor(_bbinfo.img_height() / _bbinfo.scale() + 0.5f); - const int img_w = floor(_bbinfo.img_width() / _bbinfo.scale() + 0.5f); + const int img_h = std::floor(_bbinfo.img_height() / _bbinfo.scale() + 0.5f); + const int img_w = std::floor(_bbinfo.img_width() / _bbinfo.scale() + 0.5f); const auto scale_after = (_bbinfo.apply_scale() ? T(_bbinfo.scale()) : T(1)); const auto scale_before = T(_bbinfo.scale()); @@ -152,8 +228,8 @@ void NEBoundingBoxTransformKernel::internal_run(const Window &window, const Thre // Determine the predictions const T pred_ctr_x = dx * width + ctr_x; const T pred_ctr_y = dy * height + ctr_y; - const T pred_w = T(std::exp(dw)) * width; - const T pred_h = T(std::exp(dh)) * height; + const T pred_w = std::exp(dw) * width; + const T pred_h = std::exp(dh) * height; // Store the prediction into the output tensor pred_ptr[delta_id] = scale_after * utility::clamp(pred_ctr_x - T(0.5f) * pred_w, T(0), T(img_w - 1)); pred_ptr[delta_id + 1] = scale_after * utility::clamp(pred_ctr_y - T(0.5f) * pred_h, T(0), T(img_h - 1)); @@ -175,6 +251,11 @@ void NEBoundingBoxTransformKernel::run(const Window &window, const ThreadInfo &i internal_run(window, info); break; } + case DataType::QASYMM16: + { + internal_run(window, info); + break; + } #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC case DataType::F16: { diff --git a/tests/validation/NEON/BoundingBoxTransform.cpp b/tests/validation/NEON/BoundingBoxTransform.cpp index 070ea8b3ba..3f16b4525f 100644 --- a/tests/validation/NEON/BoundingBoxTransform.cpp +++ b/tests/validation/NEON/BoundingBoxTransform.cpp @@ -50,6 +50,8 @@ RelativeTolerance relative_tolerance_f16(half(0.2)); AbsoluteTolerance absolute_tolerance_f16(half(0.02f)); #endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +constexpr AbsoluteTolerance tolerance_qasymm16(1); + // *INDENT-OFF* // clang-format off const auto BboxInfoDataset = framework::dataset::make("BboxInfo", { BoundingBoxTransformInfo(20U, 20U, 2U, true), @@ -135,6 +137,21 @@ TEST_SUITE_END() // FP16 TEST_SUITE_END() // Float +TEST_SUITE(Quantized) +TEST_SUITE(QASYMM16) +template +using NEBoundingBoxTransformQuantizedFixture = BoundingBoxTransformQuantizedFixture; + +FIXTURE_DATA_TEST_CASE(BoundingBox, NEBoundingBoxTransformQuantizedFixture, framework::DatasetMode::ALL, + combine(combine(combine(DeltaDataset, BboxInfoDataset), framework::dataset::make("DataType", { DataType::QASYMM16 })), + framework::dataset::make("DeltasQuantInfo", { QuantizationInfo(0.125f, 0) }))) +{ + // Validate output + validate(Accessor(_target), _reference, tolerance_qasymm16); +} +TEST_SUITE_END() // QASYMM16 +TEST_SUITE_END() // Quantized + TEST_SUITE_END() // BBoxTransform TEST_SUITE_END() // NEON } // namespace validation -- cgit v1.2.1