From 4aff98fcfd3c736115f3983dc448c3280e570841 Mon Sep 17 00:00:00 2001 From: Michele Di Giorgio Date: Wed, 28 Aug 2019 16:27:26 +0100 Subject: COMPMID-2247: Extend support of CLBoundingBoxTransform for QUANT16_ASYMM Change-Id: I8af7a382c0bccf55cf7f4a64f46ce9e6cd965afe Signed-off-by: Michele Di Giorgio Reviewed-on: https://review.mlplatform.org/c/1833 Comments-Addressed: Arm Jenkins Reviewed-by: Pablo Marquez Tested-by: Arm Jenkins --- .../core/CL/kernels/CLBoundingBoxTransformKernel.h | 12 +- .../runtime/CL/functions/CLBoundingBoxTransform.h | 12 +- src/core/CL/CLHelpers.cpp | 4 + src/core/CL/CLKernelLibrary.cpp | 5 + .../cl_kernels/bounding_box_transform_quantized.cl | 110 +++++++++++ src/core/CL/cl_kernels/helpers_asymm.h | 76 +++++++- .../CL/cl_kernels/roi_align_layer_quantized.cl | 24 +-- .../CL/kernels/CLBoundingBoxTransformKernel.cpp | 45 ++++- src/core/Utils.cpp | 10 +- tests/validation/CL/BoundingBoxTransform.cpp | 17 ++ tests/validation/Helpers.cpp | 14 ++ tests/validation/Helpers.h | 11 +- .../fixtures/BoundingBoxTransformFixture.h | 205 ++++++++++++++------- tests/validation/reference/ActivationLayer.cpp | 2 +- .../validation/reference/ArithmeticOperations.cpp | 2 +- .../validation/reference/BoundingBoxTransform.cpp | 40 ++-- tests/validation/reference/BoundingBoxTransform.h | 6 +- .../validation/reference/ElementwiseOperations.cpp | 2 +- .../reference/NormalizePlanarYUVLayer.cpp | 4 +- .../reference/PixelWiseMultiplication.cpp | 2 +- tests/validation/reference/PoolingLayer.cpp | 2 +- tests/validation/reference/ROIAlignLayer.cpp | 2 +- tests/validation/reference/Range.cpp | 4 +- tests/validation/reference/ReductionOperation.cpp | 2 +- tests/validation/reference/Scale.cpp | 2 +- tests/validation/reference/SoftmaxLayer.cpp | 4 +- tests/validation/reference/UpsampleLayer.cpp | 2 +- tests/validation/reference/YOLOLayer.cpp | 4 +- 28 files changed, 478 insertions(+), 147 deletions(-) create mode 100644 src/core/CL/cl_kernels/bounding_box_transform_quantized.cl diff --git a/arm_compute/core/CL/kernels/CLBoundingBoxTransformKernel.h b/arm_compute/core/CL/kernels/CLBoundingBoxTransformKernel.h index 6f0abc1888..27ba8f2271 100644 --- a/arm_compute/core/CL/kernels/CLBoundingBoxTransformKernel.h +++ b/arm_compute/core/CL/kernels/CLBoundingBoxTransformKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018 ARM Limited. + * Copyright (c) 2018-2019 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -49,9 +49,10 @@ public: /** Set the input and output tensors. * - * @param[in] boxes Source tensor. Bounding box proposals in pixel coordinates. Size(M, 4), format [x1, y1, x2, y2]. Data types supported: F16/F32. + * @param[in] boxes Source tensor. Bounding box proposals in pixel coordinates. Size(M, 4), format [x1, y1, x2, y2]. Data types supported: QASYMM16/F16/F32. * @param[out] pred_boxes Destination tensor. Pixel coordinates of the transformed bounding boxes. Size (M, 4*K), format [x1, y1, x2, y2]. Data types supported: Same as @p input - * @param[in] deltas Bounding box translations and scales. Size (M, 4*K), format [dx, dy, dw, dh], K is the number of classes. Data types supported: Same as @p input + * @param[in] deltas Bounding box translations and scales. Size (M, 4*K), format [dx, dy, dw, dh], K is the number of classes. + * Data types supported: QASYMM8 if @p input is QASYMM16, otherise same as @p input * @param[in] info Contains BoundingBox operation information described in @ref BoundingBoxTransformInfo. * * @note Only single image prediction is supported. Height and Width (and scale) of the image will be contained in the BoundingBoxTransformInfo struct. @@ -61,9 +62,10 @@ public: /** Static function to check if given info will lead to a valid configuration of @ref CLBoundingBoxTransform * - * @param[in] boxes Source tensor info. Bounding box proposals in pixel coordinates. Size(M, 4), format [x1, y1, x2, y2]. Data types supported: F16/F32. + * @param[in] boxes Source tensor info. Bounding box proposals in pixel coordinates. Size(M, 4), format [x1, y1, x2, y2]. Data types supported: QASYMM16/F16/F32. * @param[in] pred_boxes Destination tensor info. Pixel coordinates of the transformed bounding boxes. Size (M, 4*K), format [x1, y1, x2, y2]. Data types supported: Same as @p input - * @param[in] deltas Bounding box translations and scales. Size (M, 4*K), format [dx, dy, dw, dh], K is the number of classes. Data types supported: Same as @p input + * @param[in] deltas Bounding box translations and scales. Size (M, 4*K), format [dx, dy, dw, dh], K is the number of classes. + * Data types supported: QASYMM8 if @p input is QASYMM16, otherise same as @p input * @param[in] info Contains BoundingBox operation information described in @ref BoundingBoxTransformInfo. * * @note Only single image prediction is supported. Height and Width (and scale) of the image will be contained in the BoundingBoxTransformInfo struct. diff --git a/arm_compute/runtime/CL/functions/CLBoundingBoxTransform.h b/arm_compute/runtime/CL/functions/CLBoundingBoxTransform.h index 11be4301a0..4424f50597 100644 --- a/arm_compute/runtime/CL/functions/CLBoundingBoxTransform.h +++ b/arm_compute/runtime/CL/functions/CLBoundingBoxTransform.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018 ARM Limited. + * Copyright (c) 2018-2019 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -41,9 +41,10 @@ class CLBoundingBoxTransform : public ICLSimpleFunction public: /** Set the input and output tensors. * - * @param[in] boxes Source tensor. Bounding box proposals in pixel coordinates. Size(M, 4), format [x1, y1, x2, y2]. Data types supported: F16/F32. + * @param[in] boxes Source tensor. Bounding box proposals in pixel coordinates. Size(M, 4), format [x1, y1, x2, y2]. Data types supported: QASYMM16/F16/F32. * @param[out] pred_boxes Destination tensor. Pixel coordinates of the transformed bounding boxes. Size (M, 4*K), format [x1, y1, x2, y2]. Data types supported: Same as @p input - * @param[in] deltas Bounding box translations and scales. Size (M, 4*K), format [dx, dy, dw, dh], K is the number of classes. Data types supported: Same as @p input + * @param[in] deltas Bounding box translations and scales. Size (M, 4*K), format [dx, dy, dw, dh], K is the number of classes. + * Data types supported: QASYMM8 if @p input is QASYMM16, otherise same as @p input * @param[in] info Contains BoundingBox operation information described in @ref BoundingBoxTransformInfo. * * @note Only single image prediction is supported. Height and Width (and scale) of the image will be contained in the BoundingBoxTransformInfo struct. @@ -52,9 +53,10 @@ public: /** Static function to check if given info will lead to a valid configuration of @ref CLBoundingBoxTransform * - * @param[in] boxes Source tensor info. Bounding box proposals in pixel coordinates. Size(M, 4), format [x1, y1, x2, y2]. Data types supported: F16/F32. + * @param[in] boxes Source tensor info. Bounding box proposals in pixel coordinates. Size(M, 4), format [x1, y1, x2, y2]. Data types supported: QASYMM16/F16/F32. * @param[in] pred_boxes Destination tensor info. Pixel coordinates of the transformed bounding boxes. Size (M, 4*K), format [x1, y1, x2, y2]. Data types supported: Same as @p input - * @param[in] deltas Bounding box translations and scales. Size (M, 4*K), format [dx, dy, dw, dh], K is the number of classes. Data types supported: Same as @p input + * @param[in] deltas Bounding box translations and scales. Size (M, 4*K), format [dx, dy, dw, dh], K is the number of classes. + * Data types supported: QASYMM8 if @p input is QASYMM16, otherise same as @p input * @param[in] info Contains BoundingBox operation information described in @ref BoundingBoxTransformInfo. * * @note Only single image prediction is supported. Height and Width (and scale) of the image will be contained in the BoundingBoxTransformInfo struct. diff --git a/src/core/CL/CLHelpers.cpp b/src/core/CL/CLHelpers.cpp index bb3cf7fda2..d051810090 100644 --- a/src/core/CL/CLHelpers.cpp +++ b/src/core/CL/CLHelpers.cpp @@ -45,6 +45,7 @@ std::string get_cl_type_from_data_type(const DataType &dt) case DataType::QSYMM8_PER_CHANNEL: return "char"; case DataType::U16: + case DataType::QASYMM16: return "ushort"; case DataType::S16: case DataType::QSYMM16: @@ -80,6 +81,7 @@ std::string get_cl_select_type_from_data_type(const DataType &dt) case DataType::QSYMM8_PER_CHANNEL: return "char"; case DataType::U16: + case DataType::QASYMM16: return "ushort"; case DataType::F16: case DataType::S16: @@ -114,6 +116,7 @@ std::string get_data_size_from_data_type(const DataType &dt) case DataType::U16: case DataType::S16: case DataType::QSYMM16: + case DataType::QASYMM16: case DataType::F16: return "16"; case DataType::U32: @@ -258,6 +261,7 @@ size_t preferred_vector_width(const cl::Device &device, const DataType dt) case DataType::U16: case DataType::S16: case DataType::QSYMM16: + case DataType::QASYMM16: return device.getInfo(); case DataType::U32: case DataType::S32: diff --git a/src/core/CL/CLKernelLibrary.cpp b/src/core/CL/CLKernelLibrary.cpp index 1f530a27c5..4f017b792b 100644 --- a/src/core/CL/CLKernelLibrary.cpp +++ b/src/core/CL/CLKernelLibrary.cpp @@ -161,6 +161,7 @@ const std::map CLKernelLibrary::_kernel_program_map = { "bitwise_xor", "bitwise_op.cl" }, { "bitwise_not", "bitwise_op.cl" }, { "bounding_box_transform", "bounding_box_transform.cl" }, + { "bounding_box_transform_quantized", "bounding_box_transform_quantized.cl" }, { "channel_combine_NV", "channel_combine.cl" }, { "channel_combine_RGB888", "channel_combine.cl" }, { "channel_combine_RGBA8888", "channel_combine.cl" }, @@ -593,6 +594,10 @@ const std::map CLKernelLibrary::_program_source_map = { "bounding_box_transform.cl", #include "./cl_kernels/bounding_box_transform.clembed" + }, + { + "bounding_box_transform_quantized.cl", +#include "./cl_kernels/bounding_box_transform_quantized.clembed" }, { "canny.cl", diff --git a/src/core/CL/cl_kernels/bounding_box_transform_quantized.cl b/src/core/CL/cl_kernels/bounding_box_transform_quantized.cl new file mode 100644 index 0000000000..bebad62933 --- /dev/null +++ b/src/core/CL/cl_kernels/bounding_box_transform_quantized.cl @@ -0,0 +1,110 @@ +/* + * Copyright (c) 2019 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "helpers_asymm.h" + +#if defined(DATA_TYPE) && defined(DATA_TYPE_DELTAS) && defined(WEIGHT_X) && defined(WEIGHT_Y) && defined(WEIGHT_W) && defined(WEIGHT_H) && defined(IMG_WIDTH) && defined(IMG_HEIGHT) && defined(BOX_FIELDS) && defined(SCALE_BEFORE) && defined(OFFSET_BOXES) && defined(SCALE_BOXES) && defined(OFFSET_DELTAS) && defined(SCALE_DELTAS) && defined(OFFSET_PRED_BOXES) && defined(SCALE_PRED_BOXES) // Check for compile time constants + +/** Perform a padded copy of input tensor to the output tensor for quantized data types. Padding values are defined at compile time + * + * @attention The following variables must be passed at compile time: + * -# -DDATA_TYPE= Tensor data type. Supported data types: QASYMM16 for boxes and pred_boxes, QASYMM8 for for deltas + * -# -DWEIGHT{X,Y,W,H}= Weights [wx, wy, ww, wh] for the deltas + * -# -DIMG_WIDTH= Original image width + * -# -DIMG_HEIGHT= Original image height + * -# -DBOX_FIELDS= Number of fields that are used to represent a box in boxes + * + * @param[in] boxes_ptr Pointer to the boxes tensor. Supported data types: QASYMM16 + * @param[in] boxes_stride_x Stride of the boxes tensor in X dimension (in bytes) + * @param[in] boxes_step_x boxes_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] boxes_stride_y Stride of the boxes tensor in Y dimension (in bytes) + * @param[in] boxes_step_y boxes_stride_y * number of elements along Y processed per workitem(in bytes) + * @param[in] boxes_stride_z Stride of the source tensor in Z dimension (in bytes) + * @param[in] boxes_step_z boxes_stride_z * number of elements along Z processed per workitem(in bytes) + * @param[in] boxes_offset_first_element_in_bytes The offset of the first element in the boxes tensor + * @param[out] pred_boxes_ptr Pointer to the predicted boxes. Supported data types: same as @p in_ptr + * @param[in] pred_boxes_stride_x Stride of the predicted boxes in X dimension (in bytes) + * @param[in] pred_boxes_step_x pred_boxes_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] pred_boxes_stride_y Stride of the predicted boxes in Y dimension (in bytes) + * @param[in] pred_boxes_step_y pred_boxes_stride_y * number of elements along Y processed per workitem(in bytes) + * @param[in] pred_boxes_stride_z Stride of the predicted boxes in Z dimension (in bytes) + * @param[in] pred_boxes_step_z pred_boxes_stride_z * number of elements along Z processed per workitem(in bytes) + * @param[in] pred_boxes_offset_first_element_in_bytes The offset of the first element in the predicted boxes + * @param[in] deltas_ptr Pointer to the deltas tensor. Supported data types: QASYMM8 + * @param[in] deltas_stride_x Stride of the deltas tensor in X dimension (in bytes) + * @param[in] deltas_step_x deltas_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] deltas_stride_y Stride of the deltas tensor in Y dimension (in bytes) + * @param[in] deltas_step_y deltas_stride_y * number of elements along Y processed per workitem(in bytes) + * @param[in] deltas_stride_z Stride of the source tensor in Z dimension (in bytes) + * @param[in] deltas_step_z deltas_stride_z * number of elements along Z processed per workitem(in bytes) + * @param[in] deltas_offset_first_element_in_bytes The offset of the first element in the deltas tensor + */ +__kernel void bounding_box_transform_quantized( + VECTOR_DECLARATION(boxes), + IMAGE_DECLARATION(pred_boxes), + IMAGE_DECLARATION(deltas)) +{ + // Get pixels pointer + Vector boxes = CONVERT_TO_VECTOR_STRUCT_NO_STEP(boxes); + Image pred_boxes = CONVERT_TO_IMAGE_STRUCT(pred_boxes); + Image deltas = CONVERT_TO_IMAGE_STRUCT(deltas); + + // Load delta and box values into registers + const float one = 1.f; + const float halfone = 0.5f; + + const int py = get_global_id(1); // box + float4 scale_before = (float4)SCALE_BEFORE; + float4 delta = DEQUANTIZE(vload4(0, (__global DATA_TYPE_DELTAS *)deltas.ptr), OFFSET_DELTAS, SCALE_DELTAS, DATA_TYPE_DELTAS, 4); + float4 box = DEQUANTIZE(vload4(0, (__global DATA_TYPE *)vector_offset(&boxes, BOX_FIELDS * py)), OFFSET_BOXES, SCALE_BOXES, DATA_TYPE, 4) / scale_before; + + // Calculate width and centers of the old boxes + float2 dims = box.s23 - box.s01 + one; + float2 ctr = box.s01 + halfone * dims; + float4 weights = (float4)(WEIGHT_X, WEIGHT_Y, WEIGHT_W, WEIGHT_H); + delta /= weights; + delta.s23 = min(delta.s23, (float)BBOX_XFORM_CLIP); + + // Calculate widths and centers of the new boxes (translation + aspect ratio transformation) + float2 pred_ctr = delta.s01 * dims + ctr; + float2 pred_dims = exp(delta.s23) * dims; + + // Useful vector constant definitions + float4 max_values = (float4)(IMG_WIDTH - 1, IMG_HEIGHT - 1, IMG_WIDTH - 1, IMG_HEIGHT - 1); + float4 sign = (float4)(-1, -1, 1, 1); + float4 min_values = 0; + + // Calculate the coordinates of the new boxes + float4 pred_box = pred_ctr.s0101 + sign * halfone * pred_dims.s0101; +#ifdef OFFSET // Possibly adjust the predicted boxes + pred_box.s23 -= one; +#endif // Possibly adjust the predicted boxes + pred_box = CLAMP(pred_box, min_values, max_values); +#ifdef SCALE_AFTER // Possibly scale the predicted boxes + pred_box *= (float4)SCALE_AFTER; +#endif // Possibly scale the predicted boxes + + // Store them into the output + vstore4(QUANTIZE(pred_box, OFFSET_PRED_BOXES, SCALE_PRED_BOXES, DATA_TYPE, 4), 0, (__global DATA_TYPE *)pred_boxes.ptr); +} +#endif // Check for compile time constants diff --git a/src/core/CL/cl_kernels/helpers_asymm.h b/src/core/CL/cl_kernels/helpers_asymm.h index c314d17993..ad06451f13 100644 --- a/src/core/CL/cl_kernels/helpers_asymm.h +++ b/src/core/CL/cl_kernels/helpers_asymm.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018 ARM Limited. + * Copyright (c) 2017-2019 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -26,6 +26,67 @@ #include "helpers.h" +#define CONVERT_DOWN_RTE_STR(x, type) (convert_##type##_rte((x))) +#define CONVERT_DOWN_RTE(x, type) CONVERT_DOWN_RTE_STR(x, type) + +/** Quantize a floating-point scalar value to 8-bit asymmetric + * + * @param[in] input Input value to quantize + * @param[in] offset Quantization offset + * @param[in] scale Quantization scale + * + * @return quantized value + */ +inline uchar quantize_qasymm8(float input, float offset, float scale) +{ + float out_f32 = input / scale + offset; + uchar res_u8 = CONVERT_SAT(CONVERT_DOWN_RTE(out_f32, int), uchar); + return res_u8; +} + +/** Dequantize a scalar value from 8-bit asymmetric to floating-point + * + * @param[in] input Input value to quantize + * @param[in] offset Quantization offset + * @param[in] scale Quantization scale + * + * @return quantized value + */ +inline float dequantize_qasymm8(uchar input, float offset, float scale) +{ + return ((float)input - offset) * scale; +} + +/** Quantize a vector of values from floating-point + * + * @param[in] type Output data type. + * @param[in] size Size of vector. + * + * @return quantized values + */ +#define QUANTIZE_IMPL(type, size) \ + inline VEC_DATA_TYPE(type, size) quantize_##type##size(VEC_DATA_TYPE(float, size) input, float offset, float scale) \ + { \ + VEC_DATA_TYPE(float, size) \ + out_f32 = input / (VEC_DATA_TYPE(float, size))(scale) + (VEC_DATA_TYPE(float, size))(offset); \ + VEC_DATA_TYPE(type, size) \ + res = CONVERT_SAT(CONVERT_DOWN_RTE(out_f32, VEC_DATA_TYPE(int, size)), VEC_DATA_TYPE(type, size)); \ + return res; \ + } + +/** Dequantize a vector of values to floating-point + * + * @param[in] type Input data type. + * @param[in] size Size of vector. + * + * @return dequantized values in floating point + */ +#define DEQUANTIZE_IMPL(type, size) \ + inline VEC_DATA_TYPE(float, size) dequantize_##type##size(VEC_DATA_TYPE(type, size) input, float offset, float scale) \ + { \ + return (CONVERT(input, VEC_DATA_TYPE(float, 4)) - offset) * scale; \ + } + /** Correctly-rounded-to-nearest division by a power-of-two. * * @param[in] size Size of vector. @@ -292,6 +353,11 @@ return ASYMM_SATURATING_ROUNDING_MULT_BY_POW2(value, exponent, size); \ } +#define QUANTIZE_STR(input, offset, scale, type, size) quantize_##type##size(input, offset, scale) +#define QUANTIZE(input, offset, scale, type, size) QUANTIZE_STR(input, offset, scale, type, size) +#define DEQUANTIZE_STR(input, offset, scale, type, size) dequantize_##type##size(input, offset, scale) +#define DEQUANTIZE(input, offset, scale, type, size) DEQUANTIZE_STR(input, offset, scale, type, size) + #define ASYMM_ROUNDING_DIVIDE_BY_POW2(x, exponent, size) asymm_rounding_divide_by_POW2_##size(x, exponent) #define ASYMM_MULT(a, b, size) asymm_mult##size(a, b) #define ASYMM_MULT_BY_QUANT_MULTIPLIER_LESS_THAN_ONE(x, quantized_multiplier, right_shift, size) \ @@ -307,6 +373,12 @@ #define ASYMM_ROUNDING_HALF_SUM(a, b, size) asymm_rounding_half_sum##size(a, b) #define ASYMM_RESCALE(value, src_integer_bits, dst_integer_bits, size) asymm_rescale##size(value, src_integer_bits, dst_integer_bits) +QUANTIZE_IMPL(uchar, 4) +QUANTIZE_IMPL(ushort, 4) + +DEQUANTIZE_IMPL(uchar, 4) +DEQUANTIZE_IMPL(ushort, 4) + ASYMM_ROUNDING_DIVIDE_BY_POW2_IMPL(2) ASYMM_ROUNDING_DIVIDE_BY_POW2_IMPL(4) ASYMM_ROUNDING_DIVIDE_BY_POW2_IMPL(8) @@ -367,4 +439,4 @@ ASYMM_RESCALE_IMPL(4) ASYMM_RESCALE_IMPL(8) ASYMM_RESCALE_IMPL(16) -#endif // ARM_COMPUTE_HELPERS_ASYMM_H \ No newline at end of file +#endif // ARM_COMPUTE_HELPERS_ASYMM_H diff --git a/src/core/CL/cl_kernels/roi_align_layer_quantized.cl b/src/core/CL/cl_kernels/roi_align_layer_quantized.cl index f9360e98f1..030731b7d3 100644 --- a/src/core/CL/cl_kernels/roi_align_layer_quantized.cl +++ b/src/core/CL/cl_kernels/roi_align_layer_quantized.cl @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "helpers.h" +#include "helpers_asymm.h" // This specifies the value to shift the result of roi_dims / pooled_dims before ceiling. // It is close to the epsilon machine (for a floating point system, x and x+EPS are the same number). @@ -29,26 +29,6 @@ #if defined(DATA_TYPE) && defined(POOLED_DIM_X) && defined(POOLED_DIM_Y) && defined(MAX_DIM_X) && defined(MAX_DIM_Y) && defined(MAX_DIM_Z) && defined(SPATIAL_SCALE) && defined(OFFSET_IN) && defined(OFFSET_OUT) && defined(SCALE_IN) && defined(SCALE_OUT) && defined(OFFSET_ROIS) && defined(SCALE_ROIS) // Check for compile time constants -#define CONVERT_RTE(x, type) (convert_##type##_rte((x))) -#define CONVERT_DOWN(x, type) CONVERT_RTE(x, type) -inline float dequantize_qasymm8(uchar input, float offset, float scale) -{ - return ((float)input - offset) * scale; -} - -inline uchar quantize_qasymm8(float input, float offset, float scale) -{ - float out_f32 = input / scale + offset; - uchar res_u8 = CONVERT_SAT(CONVERT_DOWN(out_f32, int), uchar); - return res_u8; -} - -inline float4 dequantize_qasymm16(ushort4 input, float offset, float scale) -{ - float4 in_f32 = (CONVERT(input, float4) - (float4)(offset)) * (float4)(scale); - return in_f32; -} - /** Performs a roi align on a single output pixel. * * @param[in] input Pointer to input Tensor3D struct. @@ -178,7 +158,7 @@ __kernel void roi_align_layer_quantized( // Load roi parameters // roi is laid out as follows { batch_index, x1, y1, x2, y2 } const ushort roi_batch = *((__global ushort *)offset(&rois, 0, pw)); - float4 roi = dequantize_qasymm16(vload4(0, (__global ushort *)offset(&rois, 1, pw)), OFFSET_ROIS, SCALE_ROIS); + float4 roi = DEQUANTIZE(vload4(0, (__global ushort *)offset(&rois, 1, pw)), OFFSET_ROIS, SCALE_ROIS, ushort, 4); float2 roi_anchor = roi.s01 * convert_float(SPATIAL_SCALE); float2 roi_dims = fmax((roi.s23 - roi.s01) * convert_float(SPATIAL_SCALE), 1.f); diff --git a/src/core/CL/kernels/CLBoundingBoxTransformKernel.cpp b/src/core/CL/kernels/CLBoundingBoxTransformKernel.cpp index 045bd02d73..08e5cc6b3b 100644 --- a/src/core/CL/kernels/CLBoundingBoxTransformKernel.cpp +++ b/src/core/CL/kernels/CLBoundingBoxTransformKernel.cpp @@ -43,21 +43,37 @@ Status validate_arguments(const ITensorInfo *boxes, const ITensorInfo *pred_boxe { ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(boxes, pred_boxes, deltas); ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(boxes); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_NOT_IN(boxes, DataType::F32, DataType::F16); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_NOT_IN(deltas, DataType::F32, DataType::F16); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_NOT_IN(boxes, DataType::QASYMM16, DataType::F32, DataType::F16); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_NOT_IN(deltas, DataType::QASYMM8, DataType::F32, DataType::F16); ARM_COMPUTE_RETURN_ERROR_ON(deltas->tensor_shape()[1] != boxes->tensor_shape()[1]); ARM_COMPUTE_RETURN_ERROR_ON(deltas->tensor_shape()[0] % 4 != 0); ARM_COMPUTE_RETURN_ERROR_ON(boxes->tensor_shape()[0] != 4); ARM_COMPUTE_RETURN_ERROR_ON(deltas->num_dimensions() > 2); ARM_COMPUTE_RETURN_ERROR_ON(boxes->num_dimensions() > 2); + const bool is_qasymm16 = boxes->data_type() == DataType::QASYMM16; + if(is_qasymm16) + { + const UniformQuantizationInfo boxes_qinfo = boxes->quantization_info().uniform(); + ARM_COMPUTE_RETURN_ERROR_ON(boxes_qinfo.scale != 0.125f); + ARM_COMPUTE_RETURN_ERROR_ON(boxes_qinfo.offset != 0); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_NOT_IN(deltas, DataType::QASYMM8); + } + if(pred_boxes->total_size() > 0) { ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(pred_boxes->tensor_shape(), deltas->tensor_shape()); - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(pred_boxes, deltas); + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(pred_boxes, boxes); ARM_COMPUTE_RETURN_ERROR_ON(pred_boxes->num_dimensions() > 2); + if(is_qasymm16) + { + const UniformQuantizationInfo pred_boxes_qinfo = pred_boxes->quantization_info().uniform(); + ARM_COMPUTE_RETURN_ERROR_ON(pred_boxes_qinfo.scale != 0.125f); + ARM_COMPUTE_RETURN_ERROR_ON(pred_boxes_qinfo.offset != 0); + } } ARM_COMPUTE_RETURN_ERROR_ON(info.scale() <= 0); + return Status{}; } } // namespace @@ -70,7 +86,7 @@ CLBoundingBoxTransformKernel::CLBoundingBoxTransformKernel() void CLBoundingBoxTransformKernel::configure(const ICLTensor *boxes, ICLTensor *pred_boxes, const ICLTensor *deltas, const BoundingBoxTransformInfo &info) { ARM_COMPUTE_ERROR_ON_NULLPTR(boxes, pred_boxes, deltas); - auto_init_if_empty(*pred_boxes->info(), *deltas->info()); + auto_init_if_empty(*pred_boxes->info(), deltas->info()->clone()->set_data_type(boxes->info()->data_type()).set_quantization_info(boxes->info()->quantization_info())); ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(boxes->info(), pred_boxes->info(), deltas->info(), info)); @@ -83,9 +99,11 @@ void CLBoundingBoxTransformKernel::configure(const ICLTensor *boxes, ICLTensor * const int img_h = floor(info.img_height() / info.scale() + 0.5f); const int img_w = floor(info.img_width() / info.scale() + 0.5f); + const bool is_quantized = is_data_type_quantized(boxes->info()->data_type()); + // Set build options CLBuildOptions build_opts; - build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(deltas->info()->data_type())); + build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(boxes->info()->data_type())); build_opts.add_option("-DWEIGHT_X=" + float_to_string_with_full_precision(info.weights()[0])); build_opts.add_option("-DWEIGHT_Y=" + float_to_string_with_full_precision(info.weights()[1])); build_opts.add_option("-DWEIGHT_W=" + float_to_string_with_full_precision(info.weights()[2])); @@ -98,8 +116,23 @@ void CLBoundingBoxTransformKernel::configure(const ICLTensor *boxes, ICLTensor * build_opts.add_option_if(info.apply_scale(), "-DSCALE_AFTER=" + float_to_string_with_full_precision(info.scale())); build_opts.add_option_if(info.correct_transform_coords(), "-DOFFSET=1"); + if(is_quantized) + { + build_opts.add_option("-DDATA_TYPE_DELTAS=" + get_cl_type_from_data_type(deltas->info()->data_type())); + const UniformQuantizationInfo boxes_qinfo = boxes->info()->quantization_info().uniform(); + const UniformQuantizationInfo deltas_qinfo = deltas->info()->quantization_info().uniform(); + const UniformQuantizationInfo pred_boxes_qinfo = pred_boxes->info()->quantization_info().uniform(); + build_opts.add_option("-DOFFSET_BOXES=" + float_to_string_with_full_precision(boxes_qinfo.offset)); + build_opts.add_option("-DSCALE_BOXES=" + float_to_string_with_full_precision(boxes_qinfo.scale)); + build_opts.add_option("-DOFFSET_DELTAS=" + float_to_string_with_full_precision(deltas_qinfo.offset)); + build_opts.add_option("-DSCALE_DELTAS=" + float_to_string_with_full_precision(deltas_qinfo.scale)); + build_opts.add_option("-DOFFSET_PRED_BOXES=" + float_to_string_with_full_precision(pred_boxes_qinfo.offset)); + build_opts.add_option("-DSCALE_PRED_BOXES=" + float_to_string_with_full_precision(pred_boxes_qinfo.scale)); + } + // Create kernel - _kernel = static_cast(CLKernelLibrary::get().create_kernel("bounding_box_transform", build_opts.options())); + const std::string kernel_name = (is_quantized) ? "bounding_box_transform_quantized" : "bounding_box_transform"; + _kernel = static_cast(CLKernelLibrary::get().create_kernel(kernel_name, build_opts.options())); // Since the number of columns is a multiple of 4 by definition, we don't need to pad the tensor const unsigned int num_elems_processed_per_iteration = 4; diff --git a/src/core/Utils.cpp b/src/core/Utils.cpp index d11788acd3..7ce94e2aa4 100644 --- a/src/core/Utils.cpp +++ b/src/core/Utils.cpp @@ -286,6 +286,7 @@ std::string arm_compute::string_from_pixel_value(const PixelValue &value, const { case DataType::U8: case DataType::QASYMM8: + case DataType::QASYMM8_PER_CHANNEL: // Needs conversion to 32 bit, otherwise interpreted as ASCII values ss << uint32_t(value.get()); converted_string = ss.str(); @@ -296,6 +297,7 @@ std::string arm_compute::string_from_pixel_value(const PixelValue &value, const converted_string = ss.str(); break; case DataType::U16: + case DataType::QASYMM16: ss << value.get(); converted_string = ss.str(); break; @@ -429,14 +431,16 @@ void arm_compute::print_consecutive_elements(std::ostream &s, DataType dt, const { switch(dt) { - case DataType::QASYMM8: case DataType::U8: + case DataType::QASYMM8: + case DataType::QASYMM8_PER_CHANNEL: print_consecutive_elements_impl(s, ptr, n, stream_width, element_delim); break; case DataType::S8: print_consecutive_elements_impl(s, reinterpret_cast(ptr), n, stream_width, element_delim); break; case DataType::U16: + case DataType::QASYMM16: print_consecutive_elements_impl(s, reinterpret_cast(ptr), n, stream_width, element_delim); break; case DataType::S16: @@ -464,12 +468,14 @@ int arm_compute::max_consecutive_elements_display_width(std::ostream &s, DataTyp { switch(dt) { - case DataType::QASYMM8: case DataType::U8: + case DataType::QASYMM8: + case DataType::QASYMM8_PER_CHANNEL: return max_consecutive_elements_display_width_impl(s, ptr, n); case DataType::S8: return max_consecutive_elements_display_width_impl(s, reinterpret_cast(ptr), n); case DataType::U16: + case DataType::QASYMM16: return max_consecutive_elements_display_width_impl(s, reinterpret_cast(ptr), n); case DataType::S16: case DataType::QSYMM16: diff --git a/tests/validation/CL/BoundingBoxTransform.cpp b/tests/validation/CL/BoundingBoxTransform.cpp index b6334b5868..2491e185d8 100644 --- a/tests/validation/CL/BoundingBoxTransform.cpp +++ b/tests/validation/CL/BoundingBoxTransform.cpp @@ -46,6 +46,8 @@ AbsoluteTolerance absolute_tolerance_f32(0.001f); RelativeTolerance relative_tolerance_f16(half(0.2)); AbsoluteTolerance absolute_tolerance_f16(half(0.02f)); +constexpr AbsoluteTolerance tolerance_qasymm16(1); + // *INDENT-OFF* // clang-format off const auto BboxInfoDataset = framework::dataset::make("BboxInfo", { BoundingBoxTransformInfo(20U, 20U, 2U, true), @@ -128,6 +130,21 @@ FIXTURE_DATA_TEST_CASE(BoundingBox, CLBoundingBoxTransformFixture, framewo TEST_SUITE_END() // FP16 TEST_SUITE_END() // Float +template +using CLBoundingBoxTransformQuantizedFixture = BoundingBoxTransformQuantizedFixture; + +TEST_SUITE(Quantized) +TEST_SUITE(QASYMM16) +FIXTURE_DATA_TEST_CASE(BoundingBox, CLBoundingBoxTransformQuantizedFixture, framework::DatasetMode::ALL, + combine(combine(combine(DeltaDataset, BboxInfoDataset), framework::dataset::make("DataType", { DataType::QASYMM16 })), + framework::dataset::make("DeltasQuantInfo", { QuantizationInfo(1.f / 255.f, 127) }))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_qasymm16); +} +TEST_SUITE_END() // QASYMM16 +TEST_SUITE_END() // Quantized + TEST_SUITE_END() // BBoxTransform TEST_SUITE_END() // CL } // namespace validation diff --git a/tests/validation/Helpers.cpp b/tests/validation/Helpers.cpp index a811cabf56..4158793295 100644 --- a/tests/validation/Helpers.cpp +++ b/tests/validation/Helpers.cpp @@ -132,6 +132,7 @@ SimpleTensor convert_from_asymmetric(const SimpleTensor &src) return dst; } +template <> SimpleTensor convert_to_asymmetric(const SimpleTensor &src, const QuantizationInfo &quantization_info) { SimpleTensor dst{ src.shape(), DataType::QASYMM8, 1, quantization_info }; @@ -144,6 +145,19 @@ SimpleTensor convert_to_asymmetric(const SimpleTensor &src, cons return dst; } +template <> +SimpleTensor convert_to_asymmetric(const SimpleTensor &src, const QuantizationInfo &quantization_info) +{ + SimpleTensor dst{ src.shape(), DataType::QASYMM16, 1, quantization_info }; + const UniformQuantizationInfo &qinfo = quantization_info.uniform(); + + for(int i = 0; i < src.num_elements(); ++i) + { + dst[i] = quantize_qasymm16(src[i], qinfo); + } + return dst; +} + template <> SimpleTensor convert_to_symmetric(const SimpleTensor &src, const QuantizationInfo &quantization_info) { diff --git a/tests/validation/Helpers.h b/tests/validation/Helpers.h index 0d6515b5c5..2ee2dc7aab 100644 --- a/tests/validation/Helpers.h +++ b/tests/validation/Helpers.h @@ -200,7 +200,16 @@ SimpleTensor convert_from_asymmetric(const SimpleTensor &src); * * @return Quantized tensor. */ -SimpleTensor convert_to_asymmetric(const SimpleTensor &src, const QuantizationInfo &quantization_info); +template +SimpleTensor convert_to_asymmetric(const SimpleTensor &src, const QuantizationInfo &quantization_info); + +/** Convert quantized simple tensor into float using tensor quantization information. + * + * @param[in] src Quantized tensor. + * + * @return Float tensor. + */ +SimpleTensor convert_from_asymmetric(const SimpleTensor &src); /** Convert quantized simple tensor into float using tensor quantization information. * diff --git a/tests/validation/fixtures/BoundingBoxTransformFixture.h b/tests/validation/fixtures/BoundingBoxTransformFixture.h index b71da8e97d..5e4c598f73 100644 --- a/tests/validation/fixtures/BoundingBoxTransformFixture.h +++ b/tests/validation/fixtures/BoundingBoxTransformFixture.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018 ARM Limited. + * Copyright (c) 2018-2019 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -40,28 +40,117 @@ namespace test { namespace validation { +namespace +{ +std::vector generate_deltas(std::vector &boxes, const TensorShape &image_shape, size_t num_boxes, size_t num_classes, std::mt19937 &gen) +{ + std::vector deltas(num_boxes * 4 * num_classes); + + std::uniform_int_distribution<> dist_x1(0, image_shape[0] - 1); + std::uniform_int_distribution<> dist_y1(0, image_shape[1] - 1); + std::uniform_int_distribution<> dist_w(1, image_shape[0]); + std::uniform_int_distribution<> dist_h(1, image_shape[1]); + + for(size_t i = 0; i < num_boxes; ++i) + { + const float ex_width = boxes[4 * i + 2] - boxes[4 * i] + 1.f; + const float ex_height = boxes[4 * i + 3] - boxes[4 * i + 1] + 1.f; + const float ex_ctr_x = boxes[4 * i] + 0.5f * ex_width; + const float ex_ctr_y = boxes[4 * i + 1] + 0.5f * ex_height; + + for(size_t j = 0; j < num_classes; ++j) + { + const float x1 = dist_x1(gen); + const float y1 = dist_y1(gen); + const float width = dist_w(gen); + const float height = dist_h(gen); + const float ctr_x = x1 + 0.5f * width; + const float ctr_y = y1 + 0.5f * height; + + deltas[4 * num_classes * i + 4 * j] = (ctr_x - ex_ctr_x) / ex_width; + deltas[4 * num_classes * i + 4 * j + 1] = (ctr_y - ex_ctr_y) / ex_height; + deltas[4 * num_classes * i + 4 * j + 2] = log(width / ex_width); + deltas[4 * num_classes * i + 4 * j + 3] = log(height / ex_height); + } + } + return deltas; +} + +std::vector generate_boxes(const TensorShape &image_shape, size_t num_boxes, std::mt19937 &gen) +{ + std::vector boxes(num_boxes * 4); + + std::uniform_int_distribution<> dist_x1(0, image_shape[0] - 1); + std::uniform_int_distribution<> dist_y1(0, image_shape[1] - 1); + std::uniform_int_distribution<> dist_w(1, image_shape[0]); + std::uniform_int_distribution<> dist_h(1, image_shape[1]); + + for(size_t i = 0; i < num_boxes; ++i) + { + boxes[4 * i] = dist_x1(gen); + boxes[4 * i + 1] = dist_y1(gen); + boxes[4 * i + 2] = boxes[4 * i] + dist_w(gen) - 1; + boxes[4 * i + 3] = boxes[4 * i + 1] + dist_h(gen) - 1; + } + return boxes; +} +} // namespace + template -class BoundingBoxTransformFixture : public framework::Fixture +class BoundingBoxTransformGenericFixture : public framework::Fixture { public: + using TDeltas = typename std::conditional::type, uint16_t>::value, uint8_t, T>::type; + template - void setup(TensorShape deltas_shape, const BoundingBoxTransformInfo &info, DataType data_type) + void setup(TensorShape deltas_shape, const BoundingBoxTransformInfo &info, DataType data_type, QuantizationInfo deltas_qinfo) { + const bool is_qasymm16 = data_type == DataType::QASYMM16; + _data_type_deltas = (is_qasymm16) ? DataType::QASYMM8 : data_type; + _boxes_qinfo = (is_qasymm16) ? QuantizationInfo(.125f, 0) : QuantizationInfo(); + std::mt19937 gen_target(library->seed()); - _target = compute_target(deltas_shape, data_type, info, gen_target); + _target = compute_target(deltas_shape, data_type, info, gen_target, deltas_qinfo); std::mt19937 gen_reference(library->seed()); - _reference = compute_reference(deltas_shape, data_type, info, gen_reference); + _reference = compute_reference(deltas_shape, data_type, info, gen_reference, deltas_qinfo); } protected: + template + void fill(U &&tensor, std::vector values) + { + data_type *data_ptr = reinterpret_cast(tensor.data()); + switch(tensor.data_type()) + { + case DataType::QASYMM8: + for(size_t i = 0; i < values.size(); ++i) + { + data_ptr[i] = quantize_qasymm8(values[i], tensor.quantization_info()); + } + break; + case DataType::QASYMM16: + for(size_t i = 0; i < values.size(); ++i) + { + data_ptr[i] = quantize_qasymm16(values[i], tensor.quantization_info()); + } + break; + default: + for(size_t i = 0; i < values.size(); ++i) + { + data_ptr[i] = static_cast(values[i]); + } + } + } + TensorType compute_target(const TensorShape &deltas_shape, DataType data_type, - const BoundingBoxTransformInfo &bbox_info, std::mt19937 &gen) + const BoundingBoxTransformInfo &bbox_info, std::mt19937 &gen, + QuantizationInfo deltas_qinfo) { // Create tensors TensorShape boxes_shape(4, deltas_shape[1]); - TensorType deltas = create_tensor(deltas_shape, data_type); - TensorType boxes = create_tensor(boxes_shape, data_type); + TensorType deltas = create_tensor(deltas_shape, _data_type_deltas, 1, deltas_qinfo); + TensorType boxes = create_tensor(boxes_shape, data_type, 1, _boxes_qinfo); TensorType pred_boxes; // Create and configure function @@ -81,9 +170,11 @@ protected: ARM_COMPUTE_EXPECT(!boxes.info()->is_resizable(), framework::LogLevel::ERRORS); // Fill tensors - TensorShape img_shape(bbox_info.scale() * bbox_info.img_width(), bbox_info.scale() * bbox_info.img_height()); - generate_boxes(AccessorType(boxes), img_shape, boxes_shape[1], gen); - generate_deltas(AccessorType(deltas), AccessorType(boxes), img_shape, deltas_shape[1], deltas_shape[0] / 4, gen); + TensorShape img_shape(bbox_info.scale() * bbox_info.img_width(), bbox_info.scale() * bbox_info.img_height()); + std::vector boxes_vec = generate_boxes(img_shape, boxes_shape[1], gen); + std::vector deltas_vec = generate_deltas(boxes_vec, img_shape, deltas_shape[1], deltas_shape[0] / 4, gen); + fill(AccessorType(boxes), boxes_vec); + fill(AccessorType(deltas), deltas_vec); // Compute function bbox_transform.run(); @@ -93,80 +184,56 @@ protected: SimpleTensor compute_reference(const TensorShape &deltas_shape, DataType data_type, - const BoundingBoxTransformInfo &bbox_info, std::mt19937 &gen) + const BoundingBoxTransformInfo &bbox_info, + std::mt19937 &gen, + QuantizationInfo deltas_qinfo) { // Create reference tensor - TensorShape boxes_shape(4, deltas_shape[1]); - SimpleTensor boxes{ boxes_shape, data_type }; - SimpleTensor deltas{ deltas_shape, data_type }; + TensorShape boxes_shape(4, deltas_shape[1]); + SimpleTensor boxes{ boxes_shape, data_type, 1, _boxes_qinfo }; + SimpleTensor deltas{ deltas_shape, _data_type_deltas, 1, deltas_qinfo }; // Fill reference tensor - TensorShape img_shape(bbox_info.scale() * bbox_info.img_width(), bbox_info.scale() * bbox_info.img_height()); - generate_boxes(boxes, img_shape, boxes_shape[1], gen); - generate_deltas(deltas, boxes, img_shape, deltas_shape[1], deltas_shape[0] / 4, gen); + TensorShape img_shape(bbox_info.scale() * bbox_info.img_width(), bbox_info.scale() * bbox_info.img_height()); + std::vector boxes_vec = generate_boxes(img_shape, boxes_shape[1], gen); + std::vector deltas_vec = generate_deltas(boxes_vec, img_shape, deltas_shape[1], deltas_shape[0] / 4, gen); + fill(boxes, boxes_vec); + fill(deltas, deltas_vec); return reference::bounding_box_transform(boxes, deltas, bbox_info); } - TensorType _target{}; - SimpleTensor _reference{}; + TensorType _target{}; + SimpleTensor _reference{}; + DataType _data_type_deltas{}; + QuantizationInfo _boxes_qinfo{}; private: - template - void generate_deltas(U &&deltas, U &&boxes, const TensorShape &image_shape, size_t num_boxes, size_t num_classes, std::mt19937 &gen) - { - T *deltas_ptr = static_cast(deltas.data()); - T *boxes_ptr = static_cast(boxes.data()); - - std::uniform_int_distribution<> dist_x1(0, image_shape[0] - 1); - std::uniform_int_distribution<> dist_y1(0, image_shape[1] - 1); - std::uniform_int_distribution<> dist_w(1, image_shape[0]); - std::uniform_int_distribution<> dist_h(1, image_shape[1]); - - for(size_t i = 0; i < num_boxes; ++i) - { - const T ex_width = boxes_ptr[4 * i + 2] - boxes_ptr[4 * i] + T(1); - const T ex_height = boxes_ptr[4 * i + 3] - boxes_ptr[4 * i + 1] + T(1); - const T ex_ctr_x = boxes_ptr[4 * i] + T(0.5) * ex_width; - const T ex_ctr_y = boxes_ptr[4 * i + 1] + T(0.5) * ex_height; - - for(size_t j = 0; j < num_classes; ++j) - { - const T x1 = T(dist_x1(gen)); - const T y1 = T(dist_y1(gen)); - const T width = T(dist_w(gen)); - const T height = T(dist_h(gen)); - const T ctr_x = x1 + T(0.5) * width; - const T ctr_y = y1 + T(0.5) * height; - - deltas_ptr[4 * num_classes * i + 4 * j] = (ctr_x - ex_ctr_x) / ex_width; - deltas_ptr[4 * num_classes * i + 4 * j + 1] = (ctr_y - ex_ctr_y) / ex_height; - deltas_ptr[4 * num_classes * i + 4 * j + 2] = log(width / ex_width); - deltas_ptr[4 * num_classes * i + 4 * j + 3] = log(height / ex_height); - } - } - } +}; - template - void generate_boxes(U &&boxes, const TensorShape &image_shape, size_t num_boxes, std::mt19937 &gen) +template +class BoundingBoxTransformFixture : public BoundingBoxTransformGenericFixture +{ +public: + template + void setup(TensorShape deltas_shape, const BoundingBoxTransformInfo &info, DataType data_type) { - T *boxes_ptr = (T *)boxes.data(); + BoundingBoxTransformGenericFixture::setup(deltas_shape, info, data_type, QuantizationInfo()); + } - std::uniform_int_distribution<> dist_x1(0, image_shape[0] - 1); - std::uniform_int_distribution<> dist_y1(0, image_shape[1] - 1); - std::uniform_int_distribution<> dist_w(1, image_shape[0]); - std::uniform_int_distribution<> dist_h(1, image_shape[1]); +private: +}; - for(size_t i = 0; i < num_boxes; ++i) - { - boxes_ptr[4 * i] = dist_x1(gen); - boxes_ptr[4 * i + 1] = dist_y1(gen); - boxes_ptr[4 * i + 2] = boxes_ptr[4 * i] + dist_w(gen) - 1; - boxes_ptr[4 * i + 3] = boxes_ptr[4 * i + 1] + dist_h(gen) - 1; - } +template +class BoundingBoxTransformQuantizedFixture : public BoundingBoxTransformGenericFixture +{ +public: + template + void setup(TensorShape deltas_shape, const BoundingBoxTransformInfo &info, DataType data_type, QuantizationInfo deltas_qinfo) + { + BoundingBoxTransformGenericFixture::setup(deltas_shape, info, data_type, deltas_qinfo); } }; - } // namespace validation } // namespace test } // namespace arm_compute diff --git a/tests/validation/reference/ActivationLayer.cpp b/tests/validation/reference/ActivationLayer.cpp index f573d12df8..6cdba09c75 100644 --- a/tests/validation/reference/ActivationLayer.cpp +++ b/tests/validation/reference/ActivationLayer.cpp @@ -61,7 +61,7 @@ SimpleTensor activation_layer(const SimpleTensor &src SimpleTensor src_tmp = convert_from_asymmetric(src); SimpleTensor dst_tmp = activation_layer(src_tmp, info); - SimpleTensor dst = convert_to_asymmetric(dst_tmp, dst_qinfo); + SimpleTensor dst = convert_to_asymmetric(dst_tmp, dst_qinfo); return dst; } diff --git a/tests/validation/reference/ArithmeticOperations.cpp b/tests/validation/reference/ArithmeticOperations.cpp index abd4f31d72..0ec328ee6a 100644 --- a/tests/validation/reference/ArithmeticOperations.cpp +++ b/tests/validation/reference/ArithmeticOperations.cpp @@ -112,7 +112,7 @@ SimpleTensor arithmetic_operation(ArithmeticOperation op, const SimpleT BroadcastUnroll::unroll(op, src1_tmp, src2_tmp, dst_tmp, convert_policy, id_src1, id_src2, id_dst); - dst = convert_to_asymmetric(dst_tmp, dst.quantization_info()); + dst = convert_to_asymmetric(dst_tmp, dst.quantization_info()); return dst; } else diff --git a/tests/validation/reference/BoundingBoxTransform.cpp b/tests/validation/reference/BoundingBoxTransform.cpp index 55dd165b51..e09bcff1c6 100644 --- a/tests/validation/reference/BoundingBoxTransform.cpp +++ b/tests/validation/reference/BoundingBoxTransform.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018 ARM Limited. + * Copyright (c) 2018-2019 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -36,16 +36,16 @@ namespace validation { namespace reference { -template -SimpleTensor bounding_box_transform(const SimpleTensor &boxes, const SimpleTensor &deltas, const BoundingBoxTransformInfo &info) +template +SimpleTensor bounding_box_transform(const SimpleTensor &boxes, const SimpleTensor &deltas, const BoundingBoxTransformInfo &info) { - const DataType boxes_data_type = deltas.data_type(); + const DataType boxes_data_type = boxes.data_type(); SimpleTensor pred_boxes(deltas.shape(), boxes_data_type); - const size_t num_classes = deltas.shape()[0] / 4; - const size_t num_boxes = deltas.shape()[1]; - const T *deltas_ptr = deltas.data(); - T *pred_boxes_ptr = pred_boxes.data(); + const size_t num_classes = deltas.shape()[0] / 4; + const size_t num_boxes = deltas.shape()[1]; + const TDeltas *deltas_ptr = deltas.data(); + T *pred_boxes_ptr = pred_boxes.data(); const int img_h = floor(info.img_height() / info.scale() + 0.5f); const int img_w = floor(info.img_width() / info.scale() + 0.5f); @@ -70,15 +70,15 @@ SimpleTensor bounding_box_transform(const SimpleTensor &boxes, const Simpl for(size_t j = 0; j < num_classes; ++j) { // Extract deltas - const size_t start_delta = i * num_classes * class_fields + class_fields * j; - const T dx = deltas_ptr[start_delta] / T(info.weights()[0]); - const T dy = deltas_ptr[start_delta + 1] / T(info.weights()[1]); - T dw = deltas_ptr[start_delta + 2] / T(info.weights()[2]); - T dh = deltas_ptr[start_delta + 3] / T(info.weights()[3]); + const size_t start_delta = i * num_classes * class_fields + class_fields * j; + const TDeltas dx = deltas_ptr[start_delta] / TDeltas(info.weights()[0]); + const TDeltas dy = deltas_ptr[start_delta + 1] / TDeltas(info.weights()[1]); + TDeltas dw = deltas_ptr[start_delta + 2] / TDeltas(info.weights()[2]); + TDeltas dh = deltas_ptr[start_delta + 3] / TDeltas(info.weights()[3]); // Clip dw and dh - dw = std::min(dw, T(info.bbox_xform_clip())); - dh = std::min(dh, T(info.bbox_xform_clip())); + dw = std::min(dw, TDeltas(info.bbox_xform_clip())); + dh = std::min(dh, TDeltas(info.bbox_xform_clip())); // Determine the predictions const T pred_ctr_x = dx * width + ctr_x; @@ -98,6 +98,16 @@ SimpleTensor bounding_box_transform(const SimpleTensor &boxes, const Simpl template SimpleTensor bounding_box_transform(const SimpleTensor &boxes, const SimpleTensor &deltas, const BoundingBoxTransformInfo &info); template SimpleTensor bounding_box_transform(const SimpleTensor &boxes, const SimpleTensor &deltas, const BoundingBoxTransformInfo &info); + +template <> +SimpleTensor bounding_box_transform(const SimpleTensor &boxes, const SimpleTensor &deltas, const BoundingBoxTransformInfo &info) +{ + SimpleTensor boxes_tmp = convert_from_asymmetric(boxes); + SimpleTensor deltas_tmp = convert_from_asymmetric(deltas); + SimpleTensor pred_boxes_tmp = bounding_box_transform(boxes_tmp, deltas_tmp, info); + SimpleTensor pred_boxes = convert_to_asymmetric(pred_boxes_tmp, boxes.quantization_info()); + return pred_boxes; +} } // namespace reference } // namespace validation } // namespace test diff --git a/tests/validation/reference/BoundingBoxTransform.h b/tests/validation/reference/BoundingBoxTransform.h index 33ef9d984f..dbe2a147e9 100644 --- a/tests/validation/reference/BoundingBoxTransform.h +++ b/tests/validation/reference/BoundingBoxTransform.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018 ARM Limited. + * Copyright (c) 2018-2019 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -38,8 +38,8 @@ namespace validation { namespace reference { -template -SimpleTensor bounding_box_transform(const SimpleTensor &boxes, const SimpleTensor &deltas, const BoundingBoxTransformInfo &info); +template +SimpleTensor bounding_box_transform(const SimpleTensor &boxes, const SimpleTensor &deltas, const BoundingBoxTransformInfo &info); } // namespace reference } // namespace validation } // namespace test diff --git a/tests/validation/reference/ElementwiseOperations.cpp b/tests/validation/reference/ElementwiseOperations.cpp index d5a37a0fae..7b39e18bd9 100644 --- a/tests/validation/reference/ElementwiseOperations.cpp +++ b/tests/validation/reference/ElementwiseOperations.cpp @@ -168,7 +168,7 @@ SimpleTensor arithmetic_operation(ArithmeticOperation op, const SimpleT BroadcastUnroll::unroll(op, src1_tmp, src2_tmp, dst_tmp, convert_policy, id_src1, id_src2, id_dst); - dst = convert_to_asymmetric(dst_tmp, dst.quantization_info()); + dst = convert_to_asymmetric(dst_tmp, dst.quantization_info()); return dst; } else diff --git a/tests/validation/reference/NormalizePlanarYUVLayer.cpp b/tests/validation/reference/NormalizePlanarYUVLayer.cpp index 563e2a7444..ea0e75a3c7 100644 --- a/tests/validation/reference/NormalizePlanarYUVLayer.cpp +++ b/tests/validation/reference/NormalizePlanarYUVLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018 ARM Limited. + * Copyright (c) 2017-2019 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -68,7 +68,7 @@ SimpleTensor normalize_planar_yuv_layer(const SimpleTensor mean_tmp = convert_from_asymmetric(mean); SimpleTensor std_tmp = convert_from_asymmetric(std); SimpleTensor dst_tmp = normalize_planar_yuv_layer(src_tmp, mean_tmp, std_tmp); - SimpleTensor dst = convert_to_asymmetric(dst_tmp, src.quantization_info()); + SimpleTensor dst = convert_to_asymmetric(dst_tmp, src.quantization_info()); return dst; } diff --git a/tests/validation/reference/PixelWiseMultiplication.cpp b/tests/validation/reference/PixelWiseMultiplication.cpp index 41a919249e..d9895e5ed9 100644 --- a/tests/validation/reference/PixelWiseMultiplication.cpp +++ b/tests/validation/reference/PixelWiseMultiplication.cpp @@ -160,7 +160,7 @@ SimpleTensor pixel_wise_multiplication(const SimpleTensor &src SimpleTensor src1_tmp = convert_from_asymmetric(src1); SimpleTensor src2_tmp = convert_from_asymmetric(src2); SimpleTensor dst_tmp = pixel_wise_multiplication(src1_tmp, src2_tmp, scale, convert_policy, rounding_policy, qout); - dst = convert_to_asymmetric(dst_tmp, qout); + dst = convert_to_asymmetric(dst_tmp, qout); } else { diff --git a/tests/validation/reference/PoolingLayer.cpp b/tests/validation/reference/PoolingLayer.cpp index f4112a486d..34b19ffb4f 100644 --- a/tests/validation/reference/PoolingLayer.cpp +++ b/tests/validation/reference/PoolingLayer.cpp @@ -157,7 +157,7 @@ SimpleTensor pooling_layer(const SimpleTensor &src, c { SimpleTensor src_tmp = convert_from_asymmetric(src); SimpleTensor dst_tmp = pooling_layer(src_tmp, info, output_qinfo); - SimpleTensor dst = convert_to_asymmetric(dst_tmp, output_qinfo); + SimpleTensor dst = convert_to_asymmetric(dst_tmp, output_qinfo); return dst; } diff --git a/tests/validation/reference/ROIAlignLayer.cpp b/tests/validation/reference/ROIAlignLayer.cpp index 8ad78ff915..415b483bc0 100644 --- a/tests/validation/reference/ROIAlignLayer.cpp +++ b/tests/validation/reference/ROIAlignLayer.cpp @@ -209,7 +209,7 @@ SimpleTensor roi_align_layer(const SimpleTensor &src, const Si SimpleTensor src_tmp = convert_from_asymmetric(src); SimpleTensor rois_tmp = convert_rois_from_asymmetric(rois); SimpleTensor dst_tmp = roi_align_layer(src_tmp, rois_tmp, pool_info, output_qinfo); - SimpleTensor dst = convert_to_asymmetric(dst_tmp, output_qinfo); + SimpleTensor dst = convert_to_asymmetric(dst_tmp, output_qinfo); return dst; } } // namespace reference diff --git a/tests/validation/reference/Range.cpp b/tests/validation/reference/Range.cpp index c24512fa9d..ad1345425a 100644 --- a/tests/validation/reference/Range.cpp +++ b/tests/validation/reference/Range.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018 ARM Limited. + * Copyright (c) 2018-2019 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -60,7 +60,7 @@ SimpleTensor range(SimpleTensor &dst, float start, const size_ { SimpleTensor dst_tmp{ dst.shape(), DataType::F32, 1 }; generate_range(dst_tmp, start, num_of_elements, step); - return convert_to_asymmetric(dst_tmp, dst.quantization_info()); + return convert_to_asymmetric(dst_tmp, dst.quantization_info()); } generate_range(dst, start, num_of_elements, step); return dst; diff --git a/tests/validation/reference/ReductionOperation.cpp b/tests/validation/reference/ReductionOperation.cpp index fe128cc6ac..965365db9d 100644 --- a/tests/validation/reference/ReductionOperation.cpp +++ b/tests/validation/reference/ReductionOperation.cpp @@ -281,7 +281,7 @@ SimpleTensor reduction_operation(const SimpleTensor &src, cons { SimpleTensor src_f = convert_from_asymmetric(src); SimpleTensor dst_f = reference::reduction_operation(src_f, dst_shape, axis, op); - return convert_to_asymmetric(dst_f, src.quantization_info()); + return convert_to_asymmetric(dst_f, src.quantization_info()); } else { diff --git a/tests/validation/reference/Scale.cpp b/tests/validation/reference/Scale.cpp index 63a2853c66..4405e79263 100644 --- a/tests/validation/reference/Scale.cpp +++ b/tests/validation/reference/Scale.cpp @@ -196,7 +196,7 @@ SimpleTensor scale(const SimpleTensor &src, float scale_x, flo SimpleTensor src_tmp = convert_from_asymmetric(src); float constant_border_value_f = dequantize_qasymm8(constant_border_value, src.quantization_info()); SimpleTensor dst_tmp = scale_core(src_tmp, scale_x, scale_y, policy, border_mode, constant_border_value_f, sampling_policy, ceil_policy_scale); - dst = convert_to_asymmetric(dst_tmp, src.quantization_info()); + dst = convert_to_asymmetric(dst_tmp, src.quantization_info()); } else { diff --git a/tests/validation/reference/SoftmaxLayer.cpp b/tests/validation/reference/SoftmaxLayer.cpp index f1b94c0a02..fabc62bedb 100644 --- a/tests/validation/reference/SoftmaxLayer.cpp +++ b/tests/validation/reference/SoftmaxLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018 ARM Limited. + * Copyright (c) 2017-2019 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -90,7 +90,7 @@ SimpleTensor softmax_layer(const SimpleTensor &src, float beta, size_t axi SimpleTensor src_tmp = convert_from_asymmetric(src); SimpleTensor dst_tmp = softmax_layer(src_tmp, beta, axis); - SimpleTensor dst = convert_to_asymmetric(dst_tmp, output_quantization_info); + SimpleTensor dst = convert_to_asymmetric(dst_tmp, output_quantization_info); return dst; } diff --git a/tests/validation/reference/UpsampleLayer.cpp b/tests/validation/reference/UpsampleLayer.cpp index 8e36ee857e..79d726796a 100644 --- a/tests/validation/reference/UpsampleLayer.cpp +++ b/tests/validation/reference/UpsampleLayer.cpp @@ -93,7 +93,7 @@ SimpleTensor upsample_layer(const SimpleTensor &src, const Siz { SimpleTensor src_tmp = convert_from_asymmetric(src); SimpleTensor dst_tmp = upsample_function(src_tmp, info, policy); - dst = convert_to_asymmetric(dst_tmp, src.quantization_info()); + dst = convert_to_asymmetric(dst_tmp, src.quantization_info()); } else { diff --git a/tests/validation/reference/YOLOLayer.cpp b/tests/validation/reference/YOLOLayer.cpp index a12f411680..cf5e256cf9 100644 --- a/tests/validation/reference/YOLOLayer.cpp +++ b/tests/validation/reference/YOLOLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018 ARM Limited. + * Copyright (c) 2018-2019 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -68,7 +68,7 @@ SimpleTensor yolo_layer(const SimpleTensor &src, cons { SimpleTensor src_tmp = convert_from_asymmetric(src); SimpleTensor dst_tmp = yolo_layer(src_tmp, info, num_classes); - SimpleTensor dst = convert_to_asymmetric(dst_tmp, src.quantization_info()); + SimpleTensor dst = convert_to_asymmetric(dst_tmp, src.quantization_info()); return dst; } -- cgit v1.2.1