From 1643a45557fde79ee209f55c507860307ffe627c Mon Sep 17 00:00:00 2001 From: Georgios Pinitas Date: Tue, 22 Sep 2020 02:24:25 +0100 Subject: COMPMID-3760: Reduce template instantiations for ROIAlign reference Core algorithm for calculating the ROIAlign reference is implemented in single precision floats, thus no reason for specializing for half. Signed-off-by: Georgios Pinitas Change-Id: I75f4edaf47b70ea0cdc7262cb1509fe69a6aa5b7 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/4010 Tested-by: Arm Jenkins Comments-Addressed: Arm Jenkins Reviewed-by: Michele Di Giorgio --- tests/validation/reference/ROIAlignLayer.cpp | 84 ++++++++++++++++------------ 1 file changed, 49 insertions(+), 35 deletions(-) diff --git a/tests/validation/reference/ROIAlignLayer.cpp b/tests/validation/reference/ROIAlignLayer.cpp index b75415c6cb..2c176de407 100644 --- a/tests/validation/reference/ROIAlignLayer.cpp +++ b/tests/validation/reference/ROIAlignLayer.cpp @@ -40,21 +40,20 @@ namespace reference namespace { /** Average pooling over an aligned window */ -template -inline T roi_align_1x1(const T *input, TensorShape input_shape, - float region_start_x, - float bin_size_x, - int grid_size_x, - float region_end_x, - float region_start_y, - float bin_size_y, - int grid_size_y, - float region_end_y, - int pz) +inline float roi_align_1x1(const float *input, TensorShape input_shape, + float region_start_x, + float bin_size_x, + int grid_size_x, + float region_end_x, + float region_start_y, + float bin_size_y, + int grid_size_y, + float region_end_y, + int pz) { if((region_end_x <= region_start_x) || (region_end_y <= region_start_y)) { - return T(0); + return 0; } else { @@ -85,16 +84,16 @@ inline T roi_align_1x1(const T *input, TensorShape input_shape, const float w4 = ly * lx; const size_t idx1 = coord2index(input_shape, Coordinates(x_low, y_low, pz)); - T data1 = input[idx1]; + float data1 = input[idx1]; const size_t idx2 = coord2index(input_shape, Coordinates(x_high, y_low, pz)); - T data2 = input[idx2]; + float data2 = input[idx2]; const size_t idx3 = coord2index(input_shape, Coordinates(x_low, y_high, pz)); - T data3 = input[idx3]; + float data3 = input[idx3]; const size_t idx4 = coord2index(input_shape, Coordinates(x_high, y_high, pz)); - T data4 = input[idx4]; + float data4 = input[idx4]; avg += w1 * data1 + w2 * data2 + w3 * data3 + w4 * data4; } @@ -102,15 +101,22 @@ inline T roi_align_1x1(const T *input, TensorShape input_shape, avg /= grid_size_x * grid_size_y; - return T(avg); + return avg; } } -/** Clamp the value between lower and upper */ -template -T clamp(T value, T lower, T upper) +template +SimpleTensor float_converter(const SimpleTensor &tensor, DataType dst_dt) { - return std::max(lower, std::min(value, upper)); + SimpleTensor dst{ tensor.shape(), dst_dt, 1, QuantizationInfo(), tensor.data_layout() }; +#if defined(_OPENMP) + #pragma omp parallel for +#endif /* _OPENMP */ + for(int i = 0; i < tensor.num_elements(); ++i) + { + dst[i] = tensor[i]; + } + return dst; } SimpleTensor convert_rois_from_asymmetric(SimpleTensor rois) @@ -129,8 +135,9 @@ SimpleTensor convert_rois_from_asymmetric(SimpleTensor rois) return dst; } } // namespace -template -SimpleTensor roi_align_layer(const SimpleTensor &src, const SimpleTensor &rois, const ROIPoolingLayerInfo &pool_info, const QuantizationInfo &output_qinfo) + +template <> +SimpleTensor roi_align_layer(const SimpleTensor &src, const SimpleTensor &rois, const ROIPoolingLayerInfo &pool_info, const QuantizationInfo &output_qinfo) { ARM_COMPUTE_UNUSED(output_qinfo); @@ -138,11 +145,11 @@ SimpleTensor roi_align_layer(const SimpleTensor &src, const SimpleTensor(rois.data()); + const auto *rois_ptr = static_cast(rois.data()); - TensorShape input_shape = src.shape(); - TensorShape output_shape(pool_info.pooled_width(), pool_info.pooled_height(), src.shape()[2], num_rois); - SimpleTensor dst(output_shape, dst_data_type); + TensorShape input_shape = src.shape(); + TensorShape output_shape(pool_info.pooled_width(), pool_info.pooled_height(), src.shape()[2], num_rois); + SimpleTensor dst(output_shape, dst_data_type); // Iterate over every pixel of the input image for(size_t px = 0; px < pool_info.pooled_width(); ++px) @@ -169,10 +176,10 @@ SimpleTensor roi_align_layer(const SimpleTensor &src, const SimpleTensor 0) ? pool_info.sampling_ratio() : int(ceil(bin_size_x)); const int roi_bin_grid_y = (pool_info.sampling_ratio() > 0) ? pool_info.sampling_ratio() : int(ceil(bin_size_y)); @@ -180,8 +187,8 @@ SimpleTensor roi_align_layer(const SimpleTensor &src, const SimpleTensor roi_align_layer(const SimpleTensor &src, const SimpleTensor roi_align_layer(const SimpleTensor &src, const SimpleTensor &rois, const ROIPoolingLayerInfo &pool_info, const QuantizationInfo &output_qinfo); -template SimpleTensor roi_align_layer(const SimpleTensor &src, const SimpleTensor &rois, const ROIPoolingLayerInfo &pool_info, const QuantizationInfo &output_qinfo); +template <> +SimpleTensor roi_align_layer(const SimpleTensor &src, const SimpleTensor &rois, const ROIPoolingLayerInfo &pool_info, const QuantizationInfo &output_qinfo) +{ + SimpleTensor src_tmp = float_converter(src, DataType::F32); + SimpleTensor rois_tmp = float_converter(rois, DataType::F32); + SimpleTensor dst_tmp = roi_align_layer(src_tmp, rois_tmp, pool_info, output_qinfo); + SimpleTensor dst = float_converter(dst_tmp, DataType::F16); + return dst; +} template <> SimpleTensor roi_align_layer(const SimpleTensor &src, const SimpleTensor &rois, const ROIPoolingLayerInfo &pool_info, const QuantizationInfo &output_qinfo) -- cgit v1.2.1