From 44b4e974590f1a6a07b235f203006cc9010b37e8 Mon Sep 17 00:00:00 2001 From: George Wort Date: Tue, 8 Jan 2019 11:41:54 +0000 Subject: COMPMID-1794: Add support for NHWC in CLROIAlignLayer Change-Id: If1df8f6c0549c986e607cbceb0977c80b2891b75 Reviewed-on: https://review.mlplatform.org/493 Tested-by: Arm Jenkins Reviewed-by: Isabella Gottardi Reviewed-by: Michele Di Giorgio --- arm_compute/core/utils/misc/ShapeCalculator.h | 22 ++++++++++ src/core/CL/cl_kernels/roi_align_layer.cl | 55 ++++++++++++++++-------- src/core/CL/kernels/CLROIAlignLayerKernel.cpp | 24 ++++++----- tests/validation/CL/ROIAlignLayer.cpp | 36 ++++++++-------- tests/validation/fixtures/ROIAlignLayerFixture.h | 24 +++++++---- 5 files changed, 105 insertions(+), 56 deletions(-) diff --git a/arm_compute/core/utils/misc/ShapeCalculator.h b/arm_compute/core/utils/misc/ShapeCalculator.h index f8a6df7fe7..35e21679d2 100644 --- a/arm_compute/core/utils/misc/ShapeCalculator.h +++ b/arm_compute/core/utils/misc/ShapeCalculator.h @@ -745,6 +745,28 @@ inline TensorShape compute_pool_shape(const ITensorInfo &input, PoolingLayerInfo return output_shape; } +/** Calculate the output roi align shape of a tensor + * + * @param[in] input Input tensor info + * @param[in] rois Rois tensor info + * @param[in] pool_info Pooling layer info + * + * @return the calculated shape + */ +inline TensorShape compute_roi_align_shape(const ITensorInfo &input, const ITensorInfo &rois, ROIPoolingLayerInfo pool_info) +{ + TensorShape output_shape{ input.tensor_shape() }; + + const unsigned int idx_width = get_data_layout_dimension_index(input.data_layout(), DataLayoutDimension::WIDTH); + const unsigned int idx_height = get_data_layout_dimension_index(input.data_layout(), DataLayoutDimension::HEIGHT); + + output_shape.set(idx_width, pool_info.pooled_width()); + output_shape.set(idx_height, pool_info.pooled_height()); + output_shape.set(3, rois.dimension(1)); + + return output_shape; +} + /** Calculate the RNN shape of a tensor * * @param[in] input Input tensor info diff --git a/src/core/CL/cl_kernels/roi_align_layer.cl b/src/core/CL/cl_kernels/roi_align_layer.cl index f52eb18078..a956860be2 100644 --- a/src/core/CL/cl_kernels/roi_align_layer.cl +++ b/src/core/CL/cl_kernels/roi_align_layer.cl @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018 ARM Limited. + * Copyright (c) 2018-2019 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -75,11 +75,17 @@ inline DATA_TYPE roi_align_1x1(const Tensor3D *input, float region_start_x, const float w2 = hy * lx; const float w3 = ly * hx; const float w4 = ly * lx; - - const DATA_TYPE data1 = *(__global DATA_TYPE *)tensor3D_offset(input, x_low, y_low, pz); - const DATA_TYPE data2 = *(__global DATA_TYPE *)tensor3D_offset(input, x_high, y_low, pz); - const DATA_TYPE data3 = *(__global DATA_TYPE *)tensor3D_offset(input, x_low, y_high, pz); - const DATA_TYPE data4 = *(__global DATA_TYPE *)tensor3D_offset(input, x_high, y_high, pz); +#if defined(NHWC) + const DATA_TYPE data1 = *(__global DATA_TYPE *)tensor3D_offset(input, pz, x_low, y_low); + const DATA_TYPE data2 = *(__global DATA_TYPE *)tensor3D_offset(input, pz, x_high, y_low); + const DATA_TYPE data3 = *(__global DATA_TYPE *)tensor3D_offset(input, pz, x_low, y_high); + const DATA_TYPE data4 = *(__global DATA_TYPE *)tensor3D_offset(input, pz, x_high, y_high); +#else // !defined(NHWC) + const DATA_TYPE data1 = *(__global DATA_TYPE *)tensor3D_offset(input, x_low, y_low, pz); + const DATA_TYPE data2 = *(__global DATA_TYPE *)tensor3D_offset(input, x_high, y_low, pz); + const DATA_TYPE data3 = *(__global DATA_TYPE *)tensor3D_offset(input, x_low, y_high, pz); + const DATA_TYPE data4 = *(__global DATA_TYPE *)tensor3D_offset(input, x_high, y_high, pz); +#endif // defined(NHWC) sum += w1 * data1 + w2 * data2 + w3 * data3 + w4 * data4; } } @@ -133,9 +139,15 @@ __kernel void roi_align_layer( Image rois = CONVERT_TO_IMAGE_STRUCT_NO_STEP(rois); Tensor3D output = CONVERT_TO_TENSOR3D_STRUCT_NO_STEP(output); - const int px = get_global_id(0); - const int py = get_global_id(1); - const int pw = get_global_id(2); +#if defined(NHWC) + const int px = get_global_id(1); + const int py = get_global_id(2); + const int pw = get_global_id(0); +#else // !defined(NHWC) + const int px = get_global_id(0); + const int py = get_global_id(1); + const int pw = get_global_id(2); +#endif // defined(NHWC) // Load roi parameters // roi is laid out as follows { batch_index, x1, y1, x2, y2 } @@ -161,7 +173,7 @@ __kernel void roi_align_layer( const float2 roi_bin_grid = SAMPLING_RATIO; #else // !defined(SAMPLING_RATIO) // Note that we subtract EPS_GRID before ceiling. This is to avoid situations where 1.000001 gets ceiled to 2. - const float2 roi_bin_grid = ceil(bin_size - EPS_GRID); + const float2 roi_bin_grid = ceil(bin_size - EPS_GRID); #endif // defined(SAMPLING_RATIO) // Move input and output pointer across the fourth dimension @@ -169,15 +181,20 @@ __kernel void roi_align_layer( output.ptr += pw * output_stride_w; for(int pz = 0; pz < MAX_DIM_Z; ++pz) { - *(__global DATA_TYPE *)tensor3D_offset(&output, px, py, pz) = (__global DATA_TYPE)roi_align_1x1(&input, - region_start.x, - bin_size.x, - roi_bin_grid.x, - region_end.x, - region_start.y, - bin_size.y, - roi_bin_grid.y, - region_end.y, pz); +#if defined(NHWC) + DATA_TYPE *_output_ptr = (__global DATA_TYPE *)tensor3D_offset(&output, pz, px, py); +#else // !defined(NHWC) + DATA_TYPE *_output_ptr = (__global DATA_TYPE *)tensor3D_offset(&output, px, py, pz); +#endif // defined(NHWC) + *_output_ptr = (__global DATA_TYPE)roi_align_1x1(&input, + region_start.x, + bin_size.x, + roi_bin_grid.x, + region_end.x, + region_start.y, + bin_size.y, + roi_bin_grid.y, + region_end.y, pz); } } #endif // Check for compile time constants diff --git a/src/core/CL/kernels/CLROIAlignLayerKernel.cpp b/src/core/CL/kernels/CLROIAlignLayerKernel.cpp index 325eeb240f..66d26231d7 100644 --- a/src/core/CL/kernels/CLROIAlignLayerKernel.cpp +++ b/src/core/CL/kernels/CLROIAlignLayerKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018 ARM Limited. + * Copyright (c) 2018-2019 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -34,6 +34,9 @@ #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Utils.h" #include "arm_compute/core/Window.h" +#include "arm_compute/core/utils/misc/ShapeCalculator.h" + +using namespace arm_compute::misc::shape_calculator; namespace arm_compute { @@ -47,18 +50,15 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *rois, ITe ARM_COMPUTE_RETURN_ERROR_ON(rois->num_dimensions() > 2); ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(input); ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F32, DataType::F16); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_LAYOUT_NOT_IN(input, DataLayout::NCHW); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_LAYOUT_NOT_IN(input, DataLayout::NHWC, DataLayout::NCHW); ARM_COMPUTE_RETURN_ERROR_ON((pool_info.pooled_width() == 0) || (pool_info.pooled_height() == 0)); if(output->total_size() != 0) { ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(input, output); - ARM_COMPUTE_RETURN_ERROR_ON((output->dimension(0) != pool_info.pooled_width()) || (output->dimension(1) != pool_info.pooled_height())); - ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(2) != output->dimension(2)); - ARM_COMPUTE_RETURN_ERROR_ON(rois->dimension(1) != output->dimension(3)); + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(compute_roi_align_shape(*input, *rois, pool_info), output->tensor_shape()); } - return Status{}; } @@ -67,8 +67,9 @@ std::pair validate_and_configure_window(ITensorInfo *input, ITen ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); // Output auto inizialitation if not yet initialized - TensorShape output_shape(pool_info.pooled_width(), pool_info.pooled_height(), input->dimension(2), rois->dimension(1)); + const TensorShape output_shape = compute_roi_align_shape(*input, *rois, pool_info); auto_init_if_empty((*output), output_shape, 1, input->data_type()); + output->set_data_layout(input->data_layout()); // Configure kernel window const unsigned int num_elems_processed_per_iteration = 1; @@ -107,12 +108,13 @@ void CLROIAlignLayerKernel::configure(const ICLTensor *input, const ICLTensor *r CLBuildOptions build_opts; build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type())); build_opts.add_option("-DDATA_SIZE=" + get_data_size_from_data_type(input->info()->data_type())); - build_opts.add_option("-DMAX_DIM_X=" + support::cpp11::to_string(_input->info()->dimension(Window::DimX))); - build_opts.add_option("-DMAX_DIM_Y=" + support::cpp11::to_string(_input->info()->dimension(Window::DimY))); - build_opts.add_option("-DMAX_DIM_Z=" + support::cpp11::to_string(_input->info()->dimension(Window::DimZ))); + build_opts.add_option("-DMAX_DIM_X=" + support::cpp11::to_string(_input->info()->dimension(get_data_layout_dimension_index(input->info()->data_layout(), DataLayoutDimension::WIDTH)))); + build_opts.add_option("-DMAX_DIM_Y=" + support::cpp11::to_string(_input->info()->dimension(get_data_layout_dimension_index(input->info()->data_layout(), DataLayoutDimension::HEIGHT)))); + build_opts.add_option("-DMAX_DIM_Z=" + support::cpp11::to_string(_input->info()->dimension(get_data_layout_dimension_index(input->info()->data_layout(), DataLayoutDimension::CHANNEL)))); build_opts.add_option("-DPOOLED_DIM_X=" + support::cpp11::to_string(pool_info.pooled_width())); build_opts.add_option("-DPOOLED_DIM_Y=" + support::cpp11::to_string(pool_info.pooled_height())); build_opts.add_option("-DSPATIAL_SCALE=" + float_to_string_with_full_precision(pool_info.spatial_scale())); + build_opts.add_option_if(input->info()->data_layout() == DataLayout::NHWC, "-DNHWC"); build_opts.add_option_if(pool_info.sampling_ratio() > 0, "-DSAMPLING_RATIO=" + support::cpp11::to_string(pool_info.sampling_ratio())); // Create kernel @@ -137,7 +139,7 @@ void CLROIAlignLayerKernel::run(const Window &window, cl::CommandQueue &queue) Window slice_rois = slice; // Parallelize spatially and across the fourth dimension of the output tensor (also across ROITensor) slice_rois.set_dimension_step(Window::DimX, _rois->info()->dimension(0)); - slice.set(Window::DimZ, window[3]); + slice.set(get_data_layout_dimension_index(_input->info()->data_layout(), DataLayoutDimension::CHANNEL), window[3]); // Set arguments unsigned int idx = 0; diff --git a/tests/validation/CL/ROIAlignLayer.cpp b/tests/validation/CL/ROIAlignLayer.cpp index 926a3de68d..566e1985b3 100644 --- a/tests/validation/CL/ROIAlignLayer.cpp +++ b/tests/validation/CL/ROIAlignLayer.cpp @@ -58,26 +58,26 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip( TensorInfo(TensorShape(250U, 128U, 3U), 1, DataType::F32), // Mismatching data type input/rois TensorInfo(TensorShape(250U, 128U, 3U), 1, DataType::F32), // Mismatching data type input/output TensorInfo(TensorShape(250U, 128U, 2U), 1, DataType::F32), // Mismatching depth size input/output - TensorInfo(TensorShape(250U, 128U, 2U), 1, DataType::F32), // Mismatching number of rois and output batch size + TensorInfo(TensorShape(250U, 128U, 3U), 1, DataType::F32), // Mismatching number of rois and output batch size TensorInfo(TensorShape(250U, 128U, 3U), 1, DataType::F32), // Invalid number of values per ROIS - TensorInfo(TensorShape(250U, 128U, 2U), 1, DataType::F32), // Mismatching height and width input/output + TensorInfo(TensorShape(250U, 128U, 3U), 1, DataType::F32), // Mismatching height and width input/output }), - framework::dataset::make("RoisInfo", { TensorInfo(TensorShape(5, 3U), 1, DataType::F32), - TensorInfo(TensorShape(5, 3U), 1, DataType::F16), - TensorInfo(TensorShape(5, 3U), 1, DataType::F32), + framework::dataset::make("RoisInfo", { TensorInfo(TensorShape(5, 4U), 1, DataType::F32), + TensorInfo(TensorShape(5, 4U), 1, DataType::F16), + TensorInfo(TensorShape(5, 4U), 1, DataType::F32), TensorInfo(TensorShape(5, 4U), 1, DataType::F32), TensorInfo(TensorShape(5, 10U), 1, DataType::F32), - TensorInfo(TensorShape(4, 3U), 1, DataType::F32), + TensorInfo(TensorShape(4, 4U), 1, DataType::F32), TensorInfo(TensorShape(5, 4U), 1, DataType::F32), })), - framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(7U, 7U, 3U, 3U), 1, DataType::F32), - TensorInfo(TensorShape(7U, 7U, 3U, 3U), 1, DataType::F32), - TensorInfo(TensorShape(7U, 7U, 3U, 3U), 1, DataType::F16), - TensorInfo(TensorShape(7U, 7U, 4U, 3U), 1, DataType::F32), - TensorInfo(TensorShape(7U, 7U, 2U, 3U), 1, DataType::F32), - TensorInfo(TensorShape(7U, 7U, 3U, 3U), 1, DataType::F32), - TensorInfo(TensorShape(5U, 5U, 2U, 4U), 1, DataType::F32), + framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(7U, 7U, 3U, 4U), 1, DataType::F32), + TensorInfo(TensorShape(7U, 7U, 3U, 4U), 1, DataType::F32), + TensorInfo(TensorShape(7U, 7U, 3U, 4U), 1, DataType::F16), + TensorInfo(TensorShape(7U, 7U, 3U, 4U), 1, DataType::F32), + TensorInfo(TensorShape(7U, 7U, 3U, 4U), 1, DataType::F32), + TensorInfo(TensorShape(7U, 7U, 3U, 4U), 1, DataType::F32), + TensorInfo(TensorShape(5U, 5U, 3U, 4U), 1, DataType::F32), })), framework::dataset::make("PoolInfo", { ROIPoolingLayerInfo(7U, 7U, 1./8), ROIPoolingLayerInfo(7U, 7U, 1./8), @@ -100,15 +100,17 @@ using CLROIAlignLayerFixture = ROIAlignLayerFixture, framework::DatasetMode::ALL, - framework::dataset::combine(datasets::SmallROIDataset(), - framework::dataset::make("DataType", { DataType::F32 }))) + framework::dataset::combine(framework::dataset::combine(datasets::SmallROIDataset(), + framework::dataset::make("DataType", { DataType::F32 })), + framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) { // Validate output validate(CLAccessor(_target), _reference, relative_tolerance_f32, .02f, absolute_tolerance_f32); } FIXTURE_DATA_TEST_CASE(SmallROIAlignLayerHalf, CLROIAlignLayerFixture, framework::DatasetMode::ALL, - framework::dataset::combine(datasets::SmallROIDataset(), - framework::dataset::make("DataType", { DataType::F16 }))) + framework::dataset::combine(framework::dataset::combine(datasets::SmallROIDataset(), + framework::dataset::make("DataType", { DataType::F16 })), + framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) { // Validate output validate(CLAccessor(_target), _reference, relative_tolerance_f16, .02f, absolute_tolerance_f16); diff --git a/tests/validation/fixtures/ROIAlignLayerFixture.h b/tests/validation/fixtures/ROIAlignLayerFixture.h index c029fbae8a..dfbb478a41 100644 --- a/tests/validation/fixtures/ROIAlignLayerFixture.h +++ b/tests/validation/fixtures/ROIAlignLayerFixture.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018 ARM Limited. + * Copyright (c) 2018-2019 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -46,9 +46,9 @@ class ROIAlignLayerFixture : public framework::Fixture { public: template - void setup(TensorShape input_shape, const ROIPoolingLayerInfo pool_info, TensorShape rois_shape, DataType data_type) + void setup(TensorShape input_shape, const ROIPoolingLayerInfo pool_info, TensorShape rois_shape, DataType data_type, DataLayout data_layout) { - _target = compute_target(input_shape, data_type, pool_info, rois_shape); + _target = compute_target(input_shape, data_type, data_layout, pool_info, rois_shape); _reference = compute_reference(input_shape, data_type, pool_info, rois_shape); } @@ -60,7 +60,7 @@ protected: } template - void generate_rois(U &&rois, const TensorShape &shape, const ROIPoolingLayerInfo &pool_info, TensorShape rois_shape) + void generate_rois(U &&rois, const TensorShape &shape, const ROIPoolingLayerInfo &pool_info, TensorShape rois_shape, DataLayout data_layout = DataLayout::NCHW) { const size_t values_per_roi = rois_shape.x(); const size_t num_rois = rois_shape.y(); @@ -73,8 +73,8 @@ protected: const float roi_scale = pool_info.spatial_scale(); // Calculate distribution bounds - const auto scaled_width = static_cast((shape.x() / roi_scale) / pool_width); - const auto scaled_height = static_cast((shape.y() / roi_scale) / pool_height); + const auto scaled_width = static_cast((shape[get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH)] / roi_scale) / pool_width); + const auto scaled_height = static_cast((shape[get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT)] / roi_scale) / pool_height); const auto min_width = static_cast(pool_width / roi_scale); const auto min_height = static_cast(pool_height / roi_scale); @@ -101,13 +101,19 @@ protected: } } - TensorType compute_target(const TensorShape &input_shape, + TensorType compute_target(TensorShape input_shape, DataType data_type, + DataLayout data_layout, const ROIPoolingLayerInfo &pool_info, const TensorShape rois_shape) { + if(data_layout == DataLayout::NHWC) + { + permute(input_shape, PermutationVector(2U, 0U, 1U)); + } + // Create tensors - TensorType src = create_tensor(input_shape, data_type); + TensorType src = create_tensor(input_shape, data_type, 1, QuantizationInfo(), data_layout); TensorType rois_tensor = create_tensor(rois_shape, data_type); TensorType dst; @@ -130,7 +136,7 @@ protected: // Fill tensors fill(AccessorType(src)); - generate_rois(AccessorType(rois_tensor), input_shape, pool_info, rois_shape); + generate_rois(AccessorType(rois_tensor), input_shape, pool_info, rois_shape, data_layout); // Compute function roi_align_layer.run(); -- cgit v1.2.1