From 3112e33d8d0b987e85107390a0350bd5988f5f01 Mon Sep 17 00:00:00 2001 From: Michalis Spyrou Date: Wed, 21 Nov 2018 15:44:55 +0000 Subject: COMPMID-1451 Change PriorBox output to NCHw Output of Priorbox should be independent of the input data layout and should always be in NCHW format Change-Id: Ie80cd4e51c78945b158c0db1af1923bdf8d7ea7b --- .../core/NEON/kernels/NEPriorBoxLayerKernel.h | 9 +- src/core/CL/CLKernelLibrary.cpp | 1 - src/core/CL/cl_kernels/prior_box_layer.cl | 117 ------------- src/core/CL/kernels/CLPriorBoxLayerKernel.cpp | 82 ++------- src/core/NEON/kernels/NEPriorBoxLayerKernel.cpp | 189 +++++---------------- tests/validation/fixtures/PriorBoxLayerFixture.h | 3 +- 6 files changed, 54 insertions(+), 347 deletions(-) diff --git a/arm_compute/core/NEON/kernels/NEPriorBoxLayerKernel.h b/arm_compute/core/NEON/kernels/NEPriorBoxLayerKernel.h index ac61b61142..338e9c55b6 100644 --- a/arm_compute/core/NEON/kernels/NEPriorBoxLayerKernel.h +++ b/arm_compute/core/NEON/kernels/NEPriorBoxLayerKernel.h @@ -82,20 +82,13 @@ private: * @param[in] width Input width. * @param[in] height Input height. */ - template void store_coordinates(float *out, const int offset, const float center_x, const float center_y, const float box_width, const float box_height, const int width, const int height); /** Function to calculate prior boxes. * * @param[in] window Input region on which to execute the kernel. */ - template void calculate_prior_boxes(const Window &window); - /** Common signature for all the specialised PriorBox functions - * - * @param[in] window Input region on which to execute the kernel.. - */ - using PriorBoxFunction = void (NEPriorBoxLayerKernel::*)(const Window &window); - PriorBoxFunction _func; + const ITensor *_input1; const ITensor *_input2; ITensor *_output; diff --git a/src/core/CL/CLKernelLibrary.cpp b/src/core/CL/CLKernelLibrary.cpp index ff4803e79c..33e66705e3 100644 --- a/src/core/CL/CLKernelLibrary.cpp +++ b/src/core/CL/CLKernelLibrary.cpp @@ -355,7 +355,6 @@ const std::map CLKernelLibrary::_kernel_program_map = { "pooling_layer_MxN_quantized_nhwc", "pooling_layer_quantized.cl" }, { "pooling_layer_MxN_quantized_nchw", "pooling_layer_quantized.cl" }, { "prior_box_layer_nchw", "prior_box_layer.cl" }, - { "prior_box_layer_nhwc", "prior_box_layer.cl" }, { "quantization_layer", "quantization_layer.cl" }, { "reduction_operation_x", "reduction_operation.cl" }, { "reduction_operation_quantized_x", "reduction_operation.cl" }, diff --git a/src/core/CL/cl_kernels/prior_box_layer.cl b/src/core/CL/cl_kernels/prior_box_layer.cl index be072ecb1d..046151bca4 100644 --- a/src/core/CL/cl_kernels/prior_box_layer.cl +++ b/src/core/CL/cl_kernels/prior_box_layer.cl @@ -104,88 +104,6 @@ inline int calculate_min_nchw(Image *out, __global float *max, __global float *a return idx; } - -/** Compute prior boxes and clip (NHWC) - * - * @param[out] output_ptr Pointer to the destination tensor. Supported data types: F32 - * @param[in] output_stride_x Stride of the destination tensor in X dimension (in bytes) - * @param[in] output_step_x output_stride_x * number of elements along X processed per workitem(in bytes) - * @param[in] output_stride_y Stride of the destination tensor in Y dimension (in bytes) - * @param[in] output_step_y output_stride_y * number of elements along Y processed per workitem(in bytes) - * @param[in] output_offset_first_element_in_bytes The offset of the first element in the destination tensor - * @param[in] idx Index to write to - * @param[in] center_x Center value of the x axis - * @param[in] center_y Center value of the y axis - * @param[in] box_width Prior box width - * @param[in] box_height Prior box height - * - */ -inline void calculate_xy_min_max_nhwc(Tensor3D *out, int idx, float center_x, float center_y, float box_width, float box_height) -{ - float xmin = (center_x - box_width / 2.f) / WIDTH; - float ymin = (center_y - box_height / 2.f) / HEIGHT; - float xmax = (center_x + box_width / 2.f) / WIDTH; - float ymax = (center_y + box_height / 2.f) / HEIGHT; - -#if defined(CLIP) - xmin = clamp(xmin, 0.f, 1.f); - ymin = clamp(ymin, 0.f, 1.f); - xmax = clamp(xmax, 0.f, 1.f); - ymax = clamp(ymax, 0.f, 1.f); -#endif // defined(CLIP) - - *((__global DATA_TYPE *)tensor3D_offset(out, 0, idx + 0, 0)) = xmin; - *((__global DATA_TYPE *)tensor3D_offset(out, 0, idx + 1, 0)) = ymin; - *((__global DATA_TYPE *)tensor3D_offset(out, 0, idx + 2, 0)) = xmax; - *((__global DATA_TYPE *)tensor3D_offset(out, 0, idx + 3, 0)) = ymax; -} - -/** Compute prior boxes (NHWC) - * - * @param[in,out] out Tensor output - * @param[in] max The maximum values - * @param[in] aspect_ratios The aspect ratio values - * @param[in] max_size The maximum values values size - * @param[in] aspect_ratios_size The aspect ratio values size - * @param[in] min_size The minimum values size - * @param[in] min_idx Index of the min vector - * @param[in] idx Index to write to - * - * @return The updated index - */ -inline int calculate_min_nhwc(Tensor3D *out, __global float *max, __global float *aspect_ratios, int max_size, int aspect_ratios_size, float min_size, int min_idx, int idx) -{ - const float center_x = ((float)(get_global_id(1) % LAYER_WIDTH) + OFFSET) * STEP_X; - const float center_y = ((float)(get_global_id(1) / LAYER_WIDTH) + OFFSET) * STEP_Y; - - float box_width = min_size; - float box_height = min_size; - - calculate_xy_min_max_nhwc(out, idx, center_x, center_y, box_width, box_height); - idx += 4; - if(max_size > 0) - { - box_width = sqrt(min_size * max[min_idx]); - box_height = box_width; - calculate_xy_min_max_nhwc(out, idx, center_x, center_y, box_width, box_height); - idx += 4; - } - for(unsigned int i = 0; i < aspect_ratios_size; ++i) - { - if(fabs(aspect_ratios[i] - 1.f) < 1e-6f) - { - continue; - } - box_width = min_size * sqrt(aspect_ratios[i]); - box_height = min_size * rsqrt(aspect_ratios[i]); - - calculate_xy_min_max_nhwc(out, idx, center_x, center_y, box_width, box_height); - idx += 4; - } - - return idx; -} - /** Calculate prior boxes with NCHW format. * * @param[out] output_ptr Pointer to the destination tensor. Supported data types: F32 @@ -218,39 +136,4 @@ __kernel void prior_box_layer_nchw(IMAGE_DECLARATION(output), __global float *mi vstore4((VEC_DATA_TYPE(DATA_TYPE, 4))(VARIANCE_0, VARIANCE_1, VARIANCE_2, VARIANCE_3), 0, ((__global DATA_TYPE *)offset(&out, i, 1))); } } - -/** Calculate prior boxes with NHWC format. - * - * @param[out] output_ptr Pointer to the destination tensor. Supported data types: F32 - * @param[in] output_stride_x Stride of the destination tensor in X dimension (in bytes) - * @param[in] output_step_x output_stride_x * number of elements along X processed per workitem(in bytes) - * @param[in] output_stride_y Stride of the destination tensor in Y dimension (in bytes) - * @param[in] output_step_y output_stride_y * number of elements along Y processed per workitem(in bytes) - * @param[in] output_offset_first_element_in_bytes The offset of the first element in the destination tensor - * @param[in] min The minimum values - * @param[in] max The maximum_values - * @param[in] aspect_ratios The aspect ratio values - * @param[in] min_size The minimum values size - * @param[in] max_size The maximum_values values size - * @param[in] aspect_ratios_size The aspect ratio values size - */ -__kernel void prior_box_layer_nhwc(TENSOR3D_DECLARATION(output), __global float *min, __global float *max, __global float *aspect_ratios, unsigned int min_size, unsigned int max_size, - unsigned int aspect_ratios_size) -{ - Tensor3D out = CONVERT_TO_TENSOR3D_STRUCT(output); - - int idx = 0; - for(unsigned int i = 0; i < min_size; ++i) - { - idx = calculate_min_nhwc(&out, max, aspect_ratios, max_size, aspect_ratios_size, min[i], i, idx); - } - - for(int i = 0; i < (NUM_PRIORS * 4); i += 4) - { - *((__global DATA_TYPE *)tensor3D_offset(&out, 0, i + 0, 1)) = VARIANCE_0; - *((__global DATA_TYPE *)tensor3D_offset(&out, 0, i + 1, 1)) = VARIANCE_1; - *((__global DATA_TYPE *)tensor3D_offset(&out, 0, i + 2, 1)) = VARIANCE_2; - *((__global DATA_TYPE *)tensor3D_offset(&out, 0, i + 3, 1)) = VARIANCE_3; - } -} #endif /* defined(DATA_TYPE) && defined(WIDTH) && defined(HEIGHT) && defined(LAYER_WIDTH) && defined(LAYER_HEIGHT) && defined(OFFSET) && defined(STEP_X) && defined(STEP_Y) && defined(NUM_PRIORS) && defined(VARIANCE_0) && defined(VARIANCE_1) && defined(VARIANCE_2) && defined(VARIANCE_3) */ diff --git a/src/core/CL/kernels/CLPriorBoxLayerKernel.cpp b/src/core/CL/kernels/CLPriorBoxLayerKernel.cpp index 63e745ed10..c76d839551 100644 --- a/src/core/CL/kernels/CLPriorBoxLayerKernel.cpp +++ b/src/core/CL/kernels/CLPriorBoxLayerKernel.cpp @@ -73,8 +73,7 @@ Status validate_arguments(const ITensorInfo *input1, const ITensorInfo *input2, if(output != nullptr && output->total_size() != 0) { - ARM_COMPUTE_RETURN_ERROR_ON(output->dimension(get_data_layout_dimension_index(input1->data_layout(), DataLayoutDimension::HEIGHT)) != 2); - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(input1, output); + ARM_COMPUTE_RETURN_ERROR_ON(output->dimension(1) != 2); } return Status{}; @@ -87,29 +86,11 @@ std::pair validate_and_configure_window(const ITensorInfo *input TensorShape output_shape = compute_prior_box_shape(*input1, info); auto_init_if_empty(*output, output_shape, 1, input1->data_type()); - Window win{}; - bool window_changed = false; - - switch(input1->data_layout()) - { - case DataLayout::NCHW: - { - const unsigned int num_elems_processed_per_iteration = 4 * num_priors; - - win = calculate_max_window(*output, Steps(num_elems_processed_per_iteration)); - AccessWindowHorizontal output_access(output, 0, num_elems_processed_per_iteration); - window_changed = update_window_and_padding(win, output_access); - break; - } - case DataLayout::NHWC: - { - win = calculate_max_window(*output, Steps()); - break; - } - default: - ARM_COMPUTE_ERROR("Not implemented"); - }; - Status err = (window_changed) ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!") : Status{}; + const unsigned int num_elems_processed_per_iteration = 4 * num_priors; + Window win = calculate_max_window(*output, Steps(num_elems_processed_per_iteration)); + AccessWindowHorizontal output_access(output, 0, num_elems_processed_per_iteration); + bool window_changed = update_window_and_padding(win, output_access); + Status err = (window_changed) ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!") : Status{}; return std::make_pair(err, win); } } // namespace @@ -188,25 +169,8 @@ void CLPriorBoxLayerKernel::configure(const ICLTensor *input1, const ICLTensor * } } - unsigned int idx = 0; - // Create kernel - switch(data_layout) - { - case DataLayout::NCHW: - { - idx = num_arguments_per_2D_tensor(); - _kernel = static_cast(CLKernelLibrary::get().create_kernel("prior_box_layer_nchw", build_opts.options())); - break; - } - case DataLayout::NHWC: - { - idx = num_arguments_per_3D_tensor(); - _kernel = static_cast(CLKernelLibrary::get().create_kernel("prior_box_layer_nhwc", build_opts.options())); - break; - } - default: - ARM_COMPUTE_ERROR("Not implemented"); - } + unsigned int idx = num_arguments_per_2D_tensor(); + _kernel = static_cast(CLKernelLibrary::get().create_kernel("prior_box_layer_nchw", build_opts.options())); _kernel.setArg(idx++, *_min); _kernel.setArg(idx++, *_max); @@ -245,31 +209,11 @@ void CLPriorBoxLayerKernel::run(const Window &window, cl::CommandQueue &queue) queue.enqueueWriteBuffer(*_max, CL_TRUE, 0, _info.max_sizes().size() * sizeof(float), _info.max_sizes().data()); } - switch(_input1->info()->data_layout()) - { - case DataLayout::NCHW: - { - Window slice = window.first_slice_window_2D(); - slice.set(Window::DimY, Window::Dimension(0, _output->info()->dimension(1), 2)); + Window slice = window.first_slice_window_2D(); + slice.set(Window::DimY, Window::Dimension(0, _output->info()->dimension(1), 2)); - unsigned int idx = 0; - add_2D_tensor_argument(idx, _output, slice); - enqueue(queue, *this, slice); - break; - } - case DataLayout::NHWC: - { - Window slice = window.first_slice_window_3D(); - slice.set(Window::DimY, Window::Dimension(0, _output->info()->dimension(1), 4 * _num_priors)); - slice.set(Window::DimZ, Window::Dimension(0, _output->info()->dimension(2), 2)); - - unsigned int idx = 0; - add_3D_tensor_argument(idx, _output, slice); - enqueue(queue, *this, slice); - break; - } - default: - ARM_COMPUTE_ERROR("Not implemented"); - } + unsigned int idx = 0; + add_2D_tensor_argument(idx, _output, slice); + enqueue(queue, *this, slice); } } // namespace arm_compute diff --git a/src/core/NEON/kernels/NEPriorBoxLayerKernel.cpp b/src/core/NEON/kernels/NEPriorBoxLayerKernel.cpp index 2f6317921b..365fc83fd0 100644 --- a/src/core/NEON/kernels/NEPriorBoxLayerKernel.cpp +++ b/src/core/NEON/kernels/NEPriorBoxLayerKernel.cpp @@ -67,8 +67,7 @@ Status validate_arguments(const ITensorInfo *input1, const ITensorInfo *input2, if(output != nullptr && output->total_size() != 0) { - ARM_COMPUTE_RETURN_ERROR_ON(output->dimension(get_data_layout_dimension_index(input1->data_layout(), DataLayoutDimension::HEIGHT)) != 2); - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(input1, output); + ARM_COMPUTE_RETURN_ERROR_ON(output->dimension(1) != 2); } return Status{}; @@ -76,29 +75,13 @@ Status validate_arguments(const ITensorInfo *input1, const ITensorInfo *input2, std::pair validate_and_configure_window(const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output, const PriorBoxLayerInfo &info) { - ARM_COMPUTE_UNUSED(input2); + ARM_COMPUTE_UNUSED(input1, input2); - Window win = {}; - bool window_changed = false; - switch(input1->data_layout()) - { - case DataLayout::NCHW: - { - const int num_priors = info.aspect_ratios().size() * info.min_sizes().size() + info.max_sizes().size(); - const unsigned int num_elems_processed_per_iteration = 4 * num_priors; - win = calculate_max_window(*output, Steps(num_elems_processed_per_iteration)); - AccessWindowHorizontal output_access(output, 0, num_elems_processed_per_iteration); - window_changed = update_window_and_padding(win, output_access); - break; - } - case DataLayout::NHWC: - { - win = calculate_max_window(*output, Steps()); - break; - } - default: - ARM_COMPUTE_ERROR("Not implemented"); - }; + const int num_priors = info.aspect_ratios().size() * info.min_sizes().size() + info.max_sizes().size(); + const unsigned int num_elems_processed_per_iteration = 4 * num_priors; + Window win = calculate_max_window(*output, Steps(num_elems_processed_per_iteration)); + AccessWindowHorizontal output_access(output, 0, num_elems_processed_per_iteration); + bool window_changed = update_window_and_padding(win, output_access); Status err = (window_changed) ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!") : Status{}; return std::make_pair(err, win); @@ -106,11 +89,10 @@ std::pair validate_and_configure_window(const ITensorInfo *input } // namespace NEPriorBoxLayerKernel::NEPriorBoxLayerKernel() - : _func(nullptr), _input1(nullptr), _input2(nullptr), _output(nullptr), _info() + : _input1(nullptr), _input2(nullptr), _output(nullptr), _info() { } -template void NEPriorBoxLayerKernel::store_coordinates(float *out, const int offset, const float center_x, const float center_y, const float box_width, const float box_height, const int width, const int height) { @@ -119,49 +101,23 @@ void NEPriorBoxLayerKernel::store_coordinates(float *out, const int offset, cons float xmax = (center_x + box_width / 2.f) / width; float ymax = (center_y + box_height / 2.f) / height; - switch(DL) + float32x4_t vec_elements = { xmin, ymin, xmax, ymax }; + if(_info.clip()) { - case DataLayout::NCHW: - { - float32x4_t vec_elements = { xmin, ymin, xmax, ymax }; - if(_info.clip()) - { - static const float32x4_t CONST_0 = vdupq_n_f32(0.f); - static const float32x4_t CONST_1 = vdupq_n_f32(1.f); - vec_elements = vmaxq_f32(vminq_f32(vec_elements, CONST_1), CONST_0); - } - vst1q_f32(out + offset, vec_elements); - } - break; - case DataLayout::NHWC: - { - const int output_offset = _output->info()->strides_in_bytes()[1] / _output->info()->element_size(); - if(_info.clip()) - { - xmin = std::min(std::max(xmin, 0.f), 1.f); - ymin = std::min(std::max(ymin, 0.f), 1.f); - xmax = std::min(std::max(xmax, 0.f), 1.f); - ymax = std::min(std::max(ymax, 0.f), 1.f); - } - - *(out + output_offset * offset) = xmin; - *(out + output_offset * (offset + 1)) = ymin; - *(out + output_offset * (offset + 2)) = xmax; - *(out + output_offset * (offset + 3)) = ymax; - } - break; - default: - ARM_COMPUTE_ERROR("Not implemented"); + static const float32x4_t CONST_0 = vdupq_n_f32(0.f); + static const float32x4_t CONST_1 = vdupq_n_f32(1.f); + vec_elements = vmaxq_f32(vminq_f32(vec_elements, CONST_1), CONST_0); } + vst1q_f32(out + offset, vec_elements); } -template void NEPriorBoxLayerKernel::calculate_prior_boxes(const Window &window) { const int num_priors = _info.aspect_ratios().size() * _info.min_sizes().size() + _info.max_sizes().size(); - const int width_idx = get_data_layout_dimension_index(DL, DataLayoutDimension::WIDTH); - const int height_idx = get_data_layout_dimension_index(DL, DataLayoutDimension::HEIGHT); + const DataLayout data_layout = _input1->info()->data_layout(); + const int width_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH); + const int height_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT); const int layer_width = _input1->info()->dimension(width_idx); const int layer_height = _input1->info()->dimension(height_idx); @@ -182,44 +138,17 @@ void NEPriorBoxLayerKernel::calculate_prior_boxes(const Window &window) step_y = static_cast(img_height) / layer_height; } - Window slice = {}; - - switch(DL) - { - case DataLayout::NCHW: - slice = window.first_slice_window_2D(); - slice.set(Window::DimY, Window::Dimension(0, _output->info()->dimension(1), 2)); - break; - case DataLayout::NHWC: - slice = window.first_slice_window_3D(); - slice.set(Window::DimY, Window::Dimension(0, _output->info()->dimension(1), 4 * num_priors)); - slice.set(Window::DimZ, Window::Dimension(0, _output->info()->dimension(2), 2)); - break; - default: - ARM_COMPUTE_ERROR("Not implemented"); - } + Window slice = window.first_slice_window_2D(); + slice.set(Window::DimY, Window::Dimension(0, _output->info()->dimension(1), 2)); Iterator output(_output, slice); execute_window_loop(slice, [&](const Coordinates & id) { float center_x = 0; float center_y = 0; - int idx = 0; - switch(DL) - { - case DataLayout::NCHW: - idx = id.x() / (4 * num_priors); - center_x = (static_cast(idx % layer_width) + _info.offset()) * step_x; - center_y = (static_cast(idx / layer_width) + _info.offset()) * step_y; - break; - case DataLayout::NHWC: - idx = id.y() / (4 * num_priors); - center_x = (static_cast(idx % layer_width) + _info.offset()) * step_x; - center_y = (static_cast(idx / layer_width) + _info.offset()) * step_y; - break; - default: - ARM_COMPUTE_ERROR("Not implemented"); - } + int idx = id.x() / (4 * num_priors); + center_x = (static_cast(idx % layer_width) + _info.offset()) * step_x; + center_y = (static_cast(idx / layer_width) + _info.offset()) * step_y; float box_width; float box_height; @@ -231,7 +160,7 @@ void NEPriorBoxLayerKernel::calculate_prior_boxes(const Window &window) const float min_size = _info.min_sizes().at(i); box_width = min_size; box_height = min_size; - store_coordinates
(out, offset, center_x, center_y, box_width, box_height, img_width, img_height); + store_coordinates(out, offset, center_x, center_y, box_width, box_height, img_width, img_height); offset += 4; if(!_info.max_sizes().empty()) @@ -240,7 +169,7 @@ void NEPriorBoxLayerKernel::calculate_prior_boxes(const Window &window) box_width = std::sqrt(min_size * max_size); box_height = box_width; - store_coordinates
(out, offset, center_x, center_y, box_width, box_height, img_width, img_height); + store_coordinates(out, offset, center_x, center_y, box_width, box_height, img_width, img_height); offset += 4; } @@ -255,50 +184,27 @@ void NEPriorBoxLayerKernel::calculate_prior_boxes(const Window &window) box_width = min_size * sqrt(ar); box_height = min_size / sqrt(ar); - store_coordinates
(out, offset, center_x, center_y, box_width, box_height, img_width, img_height); + store_coordinates(out, offset, center_x, center_y, box_width, box_height, img_width, img_height); offset += 4; } } // set the variance - switch(DL) + out = reinterpret_cast(_output->ptr_to_element(Coordinates(id.x(), 1))); + float32x4_t var; + if(_info.variances().size() == 1) { - case DataLayout::NCHW: - { - out = reinterpret_cast(_output->ptr_to_element(Coordinates(id.x(), 1))); - float32x4_t var; - if(_info.variances().size() == 1) - { - var = vdupq_n_f32(_info.variances().at(0)); - } - else - { - const float32x4_t vars = { _info.variances().at(0), _info.variances().at(1), _info.variances().at(2), _info.variances().at(3) }; - var = vars; - } - for(int i = 0; i < num_priors; ++i) - { - vst1q_f32(out + 4 * i, var); - } - } - break; - case DataLayout::NHWC: - { - for(int i = 0; i < num_priors; ++i) - { - const int prior_offset = 4 * i; - const bool single_var = _info.variances().size() == 1; - *(reinterpret_cast(_output->ptr_to_element(Coordinates(0, id.y() + prior_offset + 0, 1)))) = _info.variances().at(0); - *(reinterpret_cast(_output->ptr_to_element(Coordinates(0, id.y() + prior_offset + 1, 1)))) = single_var ? _info.variances().at(0) : _info.variances().at(1); - *(reinterpret_cast(_output->ptr_to_element(Coordinates(0, id.y() + prior_offset + 2, 1)))) = single_var ? _info.variances().at(0) : _info.variances().at(2); - *(reinterpret_cast(_output->ptr_to_element(Coordinates(0, id.y() + prior_offset + 3, 1)))) = single_var ? _info.variances().at(0) : _info.variances().at(3); - } - } - break; - default: - ARM_COMPUTE_ERROR("Not implemented"); + var = vdupq_n_f32(_info.variances().at(0)); + } + else + { + const float32x4_t vars = { _info.variances().at(0), _info.variances().at(1), _info.variances().at(2), _info.variances().at(3) }; + var = vars; + } + for(int i = 0; i < num_priors; ++i) + { + vst1q_f32(out + 4 * i, var); } - }, output); } @@ -314,22 +220,6 @@ void NEPriorBoxLayerKernel::configure(const ITensor *input1, const ITensor *inpu _info = info; _output = output; - switch(input1->info()->data_layout()) - { - case DataLayout::NCHW: - { - _func = &NEPriorBoxLayerKernel::calculate_prior_boxes; - break; - } - case DataLayout::NHWC: - { - _func = &NEPriorBoxLayerKernel::calculate_prior_boxes; - break; - } - default: - ARM_COMPUTE_ERROR("Not implemented."); - } - // Configure kernel window auto win_config = validate_and_configure_window(input1->info(), input2->info(), output->info(), info); ARM_COMPUTE_ERROR_THROW_ON(win_config.first); @@ -350,9 +240,8 @@ void NEPriorBoxLayerKernel::run(const Window &window, const ThreadInfo &info) ARM_COMPUTE_UNUSED(info); ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window); - ARM_COMPUTE_ERROR_ON(_func == nullptr); // Run function - (this->*_func)(window); + calculate_prior_boxes(window); } } // namespace arm_compute \ No newline at end of file diff --git a/tests/validation/fixtures/PriorBoxLayerFixture.h b/tests/validation/fixtures/PriorBoxLayerFixture.h index dd7a49ee1f..fb15631789 100644 --- a/tests/validation/fixtures/PriorBoxLayerFixture.h +++ b/tests/validation/fixtures/PriorBoxLayerFixture.h @@ -62,13 +62,12 @@ protected: if(data_layout == DataLayout::NHWC) { permute(input_shape, PermutationVector(2U, 0U, 1U)); - permute(output_shape, PermutationVector(2U, 0U, 1U)); } // Create tensors TensorType src1 = create_tensor(input_shape, data_type, 1, QuantizationInfo(), data_layout); TensorType src2 = create_tensor(input_shape, data_type, 1, QuantizationInfo(), data_layout); - TensorType dst = create_tensor(output_shape, data_type, 1, QuantizationInfo(), data_layout); + TensorType dst = create_tensor(output_shape, data_type, 1, QuantizationInfo()); // Create and configure function FunctionType prior_box; -- cgit v1.2.1