diff options
Diffstat (limited to 'src/core/NEON/kernels/NEPriorBoxLayerKernel.cpp')
-rw-r--r-- | src/core/NEON/kernels/NEPriorBoxLayerKernel.cpp | 166 |
1 files changed, 93 insertions, 73 deletions
diff --git a/src/core/NEON/kernels/NEPriorBoxLayerKernel.cpp b/src/core/NEON/kernels/NEPriorBoxLayerKernel.cpp index 3d89933377..15e933e66e 100644 --- a/src/core/NEON/kernels/NEPriorBoxLayerKernel.cpp +++ b/src/core/NEON/kernels/NEPriorBoxLayerKernel.cpp @@ -27,6 +27,7 @@ #include "arm_compute/core/ITensor.h" #include "arm_compute/core/Types.h" #include "arm_compute/core/Validate.h" + #include "src/core/helpers/AutoConfiguration.h" #include "src/core/helpers/WindowHelpers.h" @@ -36,7 +37,10 @@ namespace arm_compute { namespace { -Status validate_arguments(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, const PriorBoxLayerInfo &info) +Status validate_arguments(const ITensorInfo *input1, + const ITensorInfo *input2, + const ITensorInfo *output, + const PriorBoxLayerInfo &info) { ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input1, input2, output); ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input1, 1, DataType::F32); @@ -45,10 +49,10 @@ Status validate_arguments(const ITensorInfo *input1, const ITensorInfo *input2, // Check variances const int var_size = info.variances().size(); - if(var_size > 1) + if (var_size > 1) { ARM_COMPUTE_RETURN_ERROR_ON_MSG(var_size != 4, "Must provide 4 variance values"); - for(int i = 0; i < var_size; ++i) + for (int i = 0; i < var_size; ++i) { ARM_COMPUTE_RETURN_ERROR_ON_MSG(var_size <= 0, "Must be greater than 0"); } @@ -56,17 +60,19 @@ Status validate_arguments(const ITensorInfo *input1, const ITensorInfo *input2, ARM_COMPUTE_RETURN_ERROR_ON_MSG(info.steps()[0] < 0.f, "Step x should be greater or equal to 0"); ARM_COMPUTE_RETURN_ERROR_ON_MSG(info.steps()[1] < 0.f, "Step y should be greater or equal to 0"); - if(!info.max_sizes().empty()) + if (!info.max_sizes().empty()) { - ARM_COMPUTE_RETURN_ERROR_ON_MSG(info.max_sizes().size() != info.min_sizes().size(), "Max and min sizes dimensions should match"); + ARM_COMPUTE_RETURN_ERROR_ON_MSG(info.max_sizes().size() != info.min_sizes().size(), + "Max and min sizes dimensions should match"); } - for(unsigned int i = 0; i < info.max_sizes().size(); ++i) + for (unsigned int i = 0; i < info.max_sizes().size(); ++i) { - ARM_COMPUTE_RETURN_ERROR_ON_MSG(info.max_sizes()[i] < info.min_sizes()[i], "Max size should be greater than min size"); + ARM_COMPUTE_RETURN_ERROR_ON_MSG(info.max_sizes()[i] < info.min_sizes()[i], + "Max size should be greater than min size"); } - if(output != nullptr && output->total_size() != 0) + if (output != nullptr && output->total_size() != 0) { ARM_COMPUTE_RETURN_ERROR_ON(output->dimension(1) != 2); ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input1, output); @@ -76,21 +82,26 @@ Status validate_arguments(const ITensorInfo *input1, const ITensorInfo *input2, } } // namespace -NEPriorBoxLayerKernel::NEPriorBoxLayerKernel() - : _input1(nullptr), _input2(nullptr), _output(nullptr), _info() +NEPriorBoxLayerKernel::NEPriorBoxLayerKernel() : _input1(nullptr), _input2(nullptr), _output(nullptr), _info() { } -void NEPriorBoxLayerKernel::store_coordinates(float *out, const int offset, const float center_x, const float center_y, const float box_width, const float box_height, const int width, - const int height) +void NEPriorBoxLayerKernel::store_coordinates(float *out, + const int offset, + const float center_x, + const float center_y, + const float box_width, + const float box_height, + const int width, + const int height) { float xmin = (center_x - box_width / 2.f) / width; float ymin = (center_y - box_height / 2.f) / height; float xmax = (center_x + box_width / 2.f) / width; float ymax = (center_y + box_height / 2.f) / height; - float32x4_t vec_elements = { xmin, ymin, xmax, ymax }; - if(_info.clip()) + float32x4_t vec_elements = {xmin, ymin, xmax, ymax}; + if (_info.clip()) { static const float32x4_t CONST_0 = vdupq_n_f32(0.f); static const float32x4_t CONST_1 = vdupq_n_f32(1.f); @@ -112,7 +123,7 @@ void NEPriorBoxLayerKernel::calculate_prior_boxes(const Window &window) int img_width = _info.img_size().x; int img_height = _info.img_size().y; - if(img_width == 0 || img_height == 0) + if (img_width == 0 || img_height == 0) { img_width = _input2->info()->dimension(width_idx); img_height = _input2->info()->dimension(height_idx); @@ -120,7 +131,7 @@ void NEPriorBoxLayerKernel::calculate_prior_boxes(const Window &window) float step_x = _info.steps()[0]; float step_y = _info.steps()[1]; - if(step_x == 0.f || step_y == 0.f) + if (step_x == 0.f || step_y == 0.f) { step_x = static_cast<float>(img_width) / layer_width; step_y = static_cast<float>(img_height) / layer_height; @@ -130,74 +141,80 @@ void NEPriorBoxLayerKernel::calculate_prior_boxes(const Window &window) slice.set(Window::DimY, Window::Dimension(0, _output->info()->dimension(1), 2)); Iterator output(_output, slice); - execute_window_loop(slice, [&](const Coordinates & id) - { - float center_x = 0; - float center_y = 0; - int idx = id.x() / (4 * num_priors); - center_x = (static_cast<float>(idx % layer_width) + _info.offset()) * step_x; - center_y = (static_cast<float>(idx / layer_width) + _info.offset()) * step_y; - - float box_width; - float box_height; - int offset = 0; - - auto out = reinterpret_cast<float *>(output.ptr()); - for(unsigned int i = 0; i < _info.min_sizes().size(); ++i) + execute_window_loop( + slice, + [&](const Coordinates &id) { - const float min_size = _info.min_sizes().at(i); - box_width = min_size; - box_height = min_size; - store_coordinates(out, offset, center_x, center_y, box_width, box_height, img_width, img_height); - offset += 4; - - if(!_info.max_sizes().empty()) + float center_x = 0; + float center_y = 0; + int idx = id.x() / (4 * num_priors); + center_x = (static_cast<float>(idx % layer_width) + _info.offset()) * step_x; + center_y = (static_cast<float>(idx / layer_width) + _info.offset()) * step_y; + + float box_width; + float box_height; + int offset = 0; + + auto out = reinterpret_cast<float *>(output.ptr()); + for (unsigned int i = 0; i < _info.min_sizes().size(); ++i) { - const float max_size = _info.max_sizes().at(i); - box_width = std::sqrt(min_size * max_size); - box_height = box_width; - + const float min_size = _info.min_sizes().at(i); + box_width = min_size; + box_height = min_size; store_coordinates(out, offset, center_x, center_y, box_width, box_height, img_width, img_height); offset += 4; - } - // rest of priors - for(auto ar : _info.aspect_ratios()) - { - if(fabs(ar - 1.) < 1e-6) + if (!_info.max_sizes().empty()) { - continue; + const float max_size = _info.max_sizes().at(i); + box_width = std::sqrt(min_size * max_size); + box_height = box_width; + + store_coordinates(out, offset, center_x, center_y, box_width, box_height, img_width, img_height); + offset += 4; } - box_width = min_size * sqrt(ar); - box_height = min_size / sqrt(ar); + // rest of priors + for (auto ar : _info.aspect_ratios()) + { + if (fabs(ar - 1.) < 1e-6) + { + continue; + } - store_coordinates(out, offset, center_x, center_y, box_width, box_height, img_width, img_height); - offset += 4; + box_width = min_size * sqrt(ar); + box_height = min_size / sqrt(ar); + + store_coordinates(out, offset, center_x, center_y, box_width, box_height, img_width, img_height); + offset += 4; + } } - } - // set the variance - out = reinterpret_cast<float *>(_output->ptr_to_element(Coordinates(id.x(), 1))); - float32x4_t var; - if(_info.variances().size() == 1) - { - var = vdupq_n_f32(_info.variances().at(0)); - } - else - { - const float32x4_t vars = { _info.variances().at(0), _info.variances().at(1), _info.variances().at(2), _info.variances().at(3) }; - var = vars; - } - for(int i = 0; i < num_priors; ++i) - { - vst1q_f32(out + 4 * i, var); - } - }, - output); + // set the variance + out = reinterpret_cast<float *>(_output->ptr_to_element(Coordinates(id.x(), 1))); + float32x4_t var; + if (_info.variances().size() == 1) + { + var = vdupq_n_f32(_info.variances().at(0)); + } + else + { + const float32x4_t vars = {_info.variances().at(0), _info.variances().at(1), _info.variances().at(2), + _info.variances().at(3)}; + var = vars; + } + for (int i = 0; i < num_priors; ++i) + { + vst1q_f32(out + 4 * i, var); + } + }, + output); } -void NEPriorBoxLayerKernel::configure(const ITensor *input1, const ITensor *input2, ITensor *output, const PriorBoxLayerInfo &info) +void NEPriorBoxLayerKernel::configure(const ITensor *input1, + const ITensor *input2, + ITensor *output, + const PriorBoxLayerInfo &info) { ARM_COMPUTE_ERROR_ON_NULLPTR(input1, input2, output); @@ -215,7 +232,10 @@ void NEPriorBoxLayerKernel::configure(const ITensor *input1, const ITensor *inpu INEKernel::configure(win); } -Status NEPriorBoxLayerKernel::validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, const PriorBoxLayerInfo &info) +Status NEPriorBoxLayerKernel::validate(const ITensorInfo *input1, + const ITensorInfo *input2, + const ITensorInfo *output, + const PriorBoxLayerInfo &info) { ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input1, input2, output); ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input1, input2, output, info)); @@ -231,4 +251,4 @@ void NEPriorBoxLayerKernel::run(const Window &window, const ThreadInfo &info) // Run function calculate_prior_boxes(window); } -} // namespace arm_compute
\ No newline at end of file +} // namespace arm_compute |