aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMichalis Spyrou <michalis.spyrou@arm.com>2018-11-21 15:44:55 +0000
committerMichalis Spyrou <michalis.spyrou@arm.com>2018-11-21 17:32:46 +0000
commit3112e33d8d0b987e85107390a0350bd5988f5f01 (patch)
tree050c8400608caaa216305611ca2de1d8163e462b
parentedf26ea4921fd0c162abb962fc4863846e73f53f (diff)
downloadComputeLibrary-3112e33d8d0b987e85107390a0350bd5988f5f01.tar.gz
COMPMID-1451 Change PriorBox output to NCHw
Output of Priorbox should be independent of the input data layout and should always be in NCHW format Change-Id: Ie80cd4e51c78945b158c0db1af1923bdf8d7ea7b
-rw-r--r--arm_compute/core/NEON/kernels/NEPriorBoxLayerKernel.h9
-rw-r--r--src/core/CL/CLKernelLibrary.cpp1
-rw-r--r--src/core/CL/cl_kernels/prior_box_layer.cl117
-rw-r--r--src/core/CL/kernels/CLPriorBoxLayerKernel.cpp82
-rw-r--r--src/core/NEON/kernels/NEPriorBoxLayerKernel.cpp189
-rw-r--r--tests/validation/fixtures/PriorBoxLayerFixture.h3
6 files changed, 54 insertions, 347 deletions
diff --git a/arm_compute/core/NEON/kernels/NEPriorBoxLayerKernel.h b/arm_compute/core/NEON/kernels/NEPriorBoxLayerKernel.h
index ac61b61142..338e9c55b6 100644
--- a/arm_compute/core/NEON/kernels/NEPriorBoxLayerKernel.h
+++ b/arm_compute/core/NEON/kernels/NEPriorBoxLayerKernel.h
@@ -82,20 +82,13 @@ private:
* @param[in] width Input width.
* @param[in] height Input height.
*/
- template <DataLayout DL>
void store_coordinates(float *out, const int offset, const float center_x, const float center_y, const float box_width, const float box_height, const int width, const int height);
/** Function to calculate prior boxes.
*
* @param[in] window Input region on which to execute the kernel.
*/
- template <DataLayout DL>
void calculate_prior_boxes(const Window &window);
- /** Common signature for all the specialised PriorBox functions
- *
- * @param[in] window Input region on which to execute the kernel..
- */
- using PriorBoxFunction = void (NEPriorBoxLayerKernel::*)(const Window &window);
- PriorBoxFunction _func;
+
const ITensor *_input1;
const ITensor *_input2;
ITensor *_output;
diff --git a/src/core/CL/CLKernelLibrary.cpp b/src/core/CL/CLKernelLibrary.cpp
index ff4803e79c..33e66705e3 100644
--- a/src/core/CL/CLKernelLibrary.cpp
+++ b/src/core/CL/CLKernelLibrary.cpp
@@ -355,7 +355,6 @@ const std::map<std::string, std::string> CLKernelLibrary::_kernel_program_map =
{ "pooling_layer_MxN_quantized_nhwc", "pooling_layer_quantized.cl" },
{ "pooling_layer_MxN_quantized_nchw", "pooling_layer_quantized.cl" },
{ "prior_box_layer_nchw", "prior_box_layer.cl" },
- { "prior_box_layer_nhwc", "prior_box_layer.cl" },
{ "quantization_layer", "quantization_layer.cl" },
{ "reduction_operation_x", "reduction_operation.cl" },
{ "reduction_operation_quantized_x", "reduction_operation.cl" },
diff --git a/src/core/CL/cl_kernels/prior_box_layer.cl b/src/core/CL/cl_kernels/prior_box_layer.cl
index be072ecb1d..046151bca4 100644
--- a/src/core/CL/cl_kernels/prior_box_layer.cl
+++ b/src/core/CL/cl_kernels/prior_box_layer.cl
@@ -104,88 +104,6 @@ inline int calculate_min_nchw(Image *out, __global float *max, __global float *a
return idx;
}
-
-/** Compute prior boxes and clip (NHWC)
- *
- * @param[out] output_ptr Pointer to the destination tensor. Supported data types: F32
- * @param[in] output_stride_x Stride of the destination tensor in X dimension (in bytes)
- * @param[in] output_step_x output_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] output_stride_y Stride of the destination tensor in Y dimension (in bytes)
- * @param[in] output_step_y output_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] output_offset_first_element_in_bytes The offset of the first element in the destination tensor
- * @param[in] idx Index to write to
- * @param[in] center_x Center value of the x axis
- * @param[in] center_y Center value of the y axis
- * @param[in] box_width Prior box width
- * @param[in] box_height Prior box height
- *
- */
-inline void calculate_xy_min_max_nhwc(Tensor3D *out, int idx, float center_x, float center_y, float box_width, float box_height)
-{
- float xmin = (center_x - box_width / 2.f) / WIDTH;
- float ymin = (center_y - box_height / 2.f) / HEIGHT;
- float xmax = (center_x + box_width / 2.f) / WIDTH;
- float ymax = (center_y + box_height / 2.f) / HEIGHT;
-
-#if defined(CLIP)
- xmin = clamp(xmin, 0.f, 1.f);
- ymin = clamp(ymin, 0.f, 1.f);
- xmax = clamp(xmax, 0.f, 1.f);
- ymax = clamp(ymax, 0.f, 1.f);
-#endif // defined(CLIP)
-
- *((__global DATA_TYPE *)tensor3D_offset(out, 0, idx + 0, 0)) = xmin;
- *((__global DATA_TYPE *)tensor3D_offset(out, 0, idx + 1, 0)) = ymin;
- *((__global DATA_TYPE *)tensor3D_offset(out, 0, idx + 2, 0)) = xmax;
- *((__global DATA_TYPE *)tensor3D_offset(out, 0, idx + 3, 0)) = ymax;
-}
-
-/** Compute prior boxes (NHWC)
- *
- * @param[in,out] out Tensor output
- * @param[in] max The maximum values
- * @param[in] aspect_ratios The aspect ratio values
- * @param[in] max_size The maximum values values size
- * @param[in] aspect_ratios_size The aspect ratio values size
- * @param[in] min_size The minimum values size
- * @param[in] min_idx Index of the min vector
- * @param[in] idx Index to write to
- *
- * @return The updated index
- */
-inline int calculate_min_nhwc(Tensor3D *out, __global float *max, __global float *aspect_ratios, int max_size, int aspect_ratios_size, float min_size, int min_idx, int idx)
-{
- const float center_x = ((float)(get_global_id(1) % LAYER_WIDTH) + OFFSET) * STEP_X;
- const float center_y = ((float)(get_global_id(1) / LAYER_WIDTH) + OFFSET) * STEP_Y;
-
- float box_width = min_size;
- float box_height = min_size;
-
- calculate_xy_min_max_nhwc(out, idx, center_x, center_y, box_width, box_height);
- idx += 4;
- if(max_size > 0)
- {
- box_width = sqrt(min_size * max[min_idx]);
- box_height = box_width;
- calculate_xy_min_max_nhwc(out, idx, center_x, center_y, box_width, box_height);
- idx += 4;
- }
- for(unsigned int i = 0; i < aspect_ratios_size; ++i)
- {
- if(fabs(aspect_ratios[i] - 1.f) < 1e-6f)
- {
- continue;
- }
- box_width = min_size * sqrt(aspect_ratios[i]);
- box_height = min_size * rsqrt(aspect_ratios[i]);
-
- calculate_xy_min_max_nhwc(out, idx, center_x, center_y, box_width, box_height);
- idx += 4;
- }
-
- return idx;
-}
-
/** Calculate prior boxes with NCHW format.
*
* @param[out] output_ptr Pointer to the destination tensor. Supported data types: F32
@@ -218,39 +136,4 @@ __kernel void prior_box_layer_nchw(IMAGE_DECLARATION(output), __global float *mi
vstore4((VEC_DATA_TYPE(DATA_TYPE, 4))(VARIANCE_0, VARIANCE_1, VARIANCE_2, VARIANCE_3), 0, ((__global DATA_TYPE *)offset(&out, i, 1)));
}
}
-
-/** Calculate prior boxes with NHWC format.
- *
- * @param[out] output_ptr Pointer to the destination tensor. Supported data types: F32
- * @param[in] output_stride_x Stride of the destination tensor in X dimension (in bytes)
- * @param[in] output_step_x output_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] output_stride_y Stride of the destination tensor in Y dimension (in bytes)
- * @param[in] output_step_y output_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] output_offset_first_element_in_bytes The offset of the first element in the destination tensor
- * @param[in] min The minimum values
- * @param[in] max The maximum_values
- * @param[in] aspect_ratios The aspect ratio values
- * @param[in] min_size The minimum values size
- * @param[in] max_size The maximum_values values size
- * @param[in] aspect_ratios_size The aspect ratio values size
- */
-__kernel void prior_box_layer_nhwc(TENSOR3D_DECLARATION(output), __global float *min, __global float *max, __global float *aspect_ratios, unsigned int min_size, unsigned int max_size,
- unsigned int aspect_ratios_size)
-{
- Tensor3D out = CONVERT_TO_TENSOR3D_STRUCT(output);
-
- int idx = 0;
- for(unsigned int i = 0; i < min_size; ++i)
- {
- idx = calculate_min_nhwc(&out, max, aspect_ratios, max_size, aspect_ratios_size, min[i], i, idx);
- }
-
- for(int i = 0; i < (NUM_PRIORS * 4); i += 4)
- {
- *((__global DATA_TYPE *)tensor3D_offset(&out, 0, i + 0, 1)) = VARIANCE_0;
- *((__global DATA_TYPE *)tensor3D_offset(&out, 0, i + 1, 1)) = VARIANCE_1;
- *((__global DATA_TYPE *)tensor3D_offset(&out, 0, i + 2, 1)) = VARIANCE_2;
- *((__global DATA_TYPE *)tensor3D_offset(&out, 0, i + 3, 1)) = VARIANCE_3;
- }
-}
#endif /* defined(DATA_TYPE) && defined(WIDTH) && defined(HEIGHT) && defined(LAYER_WIDTH) && defined(LAYER_HEIGHT) && defined(OFFSET) && defined(STEP_X) && defined(STEP_Y) && defined(NUM_PRIORS) && defined(VARIANCE_0) && defined(VARIANCE_1) && defined(VARIANCE_2) && defined(VARIANCE_3) */
diff --git a/src/core/CL/kernels/CLPriorBoxLayerKernel.cpp b/src/core/CL/kernels/CLPriorBoxLayerKernel.cpp
index 63e745ed10..c76d839551 100644
--- a/src/core/CL/kernels/CLPriorBoxLayerKernel.cpp
+++ b/src/core/CL/kernels/CLPriorBoxLayerKernel.cpp
@@ -73,8 +73,7 @@ Status validate_arguments(const ITensorInfo *input1, const ITensorInfo *input2,
if(output != nullptr && output->total_size() != 0)
{
- ARM_COMPUTE_RETURN_ERROR_ON(output->dimension(get_data_layout_dimension_index(input1->data_layout(), DataLayoutDimension::HEIGHT)) != 2);
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(input1, output);
+ ARM_COMPUTE_RETURN_ERROR_ON(output->dimension(1) != 2);
}
return Status{};
@@ -87,29 +86,11 @@ std::pair<Status, Window> validate_and_configure_window(const ITensorInfo *input
TensorShape output_shape = compute_prior_box_shape(*input1, info);
auto_init_if_empty(*output, output_shape, 1, input1->data_type());
- Window win{};
- bool window_changed = false;
-
- switch(input1->data_layout())
- {
- case DataLayout::NCHW:
- {
- const unsigned int num_elems_processed_per_iteration = 4 * num_priors;
-
- win = calculate_max_window(*output, Steps(num_elems_processed_per_iteration));
- AccessWindowHorizontal output_access(output, 0, num_elems_processed_per_iteration);
- window_changed = update_window_and_padding(win, output_access);
- break;
- }
- case DataLayout::NHWC:
- {
- win = calculate_max_window(*output, Steps());
- break;
- }
- default:
- ARM_COMPUTE_ERROR("Not implemented");
- };
- Status err = (window_changed) ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!") : Status{};
+ const unsigned int num_elems_processed_per_iteration = 4 * num_priors;
+ Window win = calculate_max_window(*output, Steps(num_elems_processed_per_iteration));
+ AccessWindowHorizontal output_access(output, 0, num_elems_processed_per_iteration);
+ bool window_changed = update_window_and_padding(win, output_access);
+ Status err = (window_changed) ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!") : Status{};
return std::make_pair(err, win);
}
} // namespace
@@ -188,25 +169,8 @@ void CLPriorBoxLayerKernel::configure(const ICLTensor *input1, const ICLTensor *
}
}
- unsigned int idx = 0;
- // Create kernel
- switch(data_layout)
- {
- case DataLayout::NCHW:
- {
- idx = num_arguments_per_2D_tensor();
- _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel("prior_box_layer_nchw", build_opts.options()));
- break;
- }
- case DataLayout::NHWC:
- {
- idx = num_arguments_per_3D_tensor();
- _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel("prior_box_layer_nhwc", build_opts.options()));
- break;
- }
- default:
- ARM_COMPUTE_ERROR("Not implemented");
- }
+ unsigned int idx = num_arguments_per_2D_tensor();
+ _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel("prior_box_layer_nchw", build_opts.options()));
_kernel.setArg(idx++, *_min);
_kernel.setArg(idx++, *_max);
@@ -245,31 +209,11 @@ void CLPriorBoxLayerKernel::run(const Window &window, cl::CommandQueue &queue)
queue.enqueueWriteBuffer(*_max, CL_TRUE, 0, _info.max_sizes().size() * sizeof(float), _info.max_sizes().data());
}
- switch(_input1->info()->data_layout())
- {
- case DataLayout::NCHW:
- {
- Window slice = window.first_slice_window_2D();
- slice.set(Window::DimY, Window::Dimension(0, _output->info()->dimension(1), 2));
+ Window slice = window.first_slice_window_2D();
+ slice.set(Window::DimY, Window::Dimension(0, _output->info()->dimension(1), 2));
- unsigned int idx = 0;
- add_2D_tensor_argument(idx, _output, slice);
- enqueue(queue, *this, slice);
- break;
- }
- case DataLayout::NHWC:
- {
- Window slice = window.first_slice_window_3D();
- slice.set(Window::DimY, Window::Dimension(0, _output->info()->dimension(1), 4 * _num_priors));
- slice.set(Window::DimZ, Window::Dimension(0, _output->info()->dimension(2), 2));
-
- unsigned int idx = 0;
- add_3D_tensor_argument(idx, _output, slice);
- enqueue(queue, *this, slice);
- break;
- }
- default:
- ARM_COMPUTE_ERROR("Not implemented");
- }
+ unsigned int idx = 0;
+ add_2D_tensor_argument(idx, _output, slice);
+ enqueue(queue, *this, slice);
}
} // namespace arm_compute
diff --git a/src/core/NEON/kernels/NEPriorBoxLayerKernel.cpp b/src/core/NEON/kernels/NEPriorBoxLayerKernel.cpp
index 2f6317921b..365fc83fd0 100644
--- a/src/core/NEON/kernels/NEPriorBoxLayerKernel.cpp
+++ b/src/core/NEON/kernels/NEPriorBoxLayerKernel.cpp
@@ -67,8 +67,7 @@ Status validate_arguments(const ITensorInfo *input1, const ITensorInfo *input2,
if(output != nullptr && output->total_size() != 0)
{
- ARM_COMPUTE_RETURN_ERROR_ON(output->dimension(get_data_layout_dimension_index(input1->data_layout(), DataLayoutDimension::HEIGHT)) != 2);
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(input1, output);
+ ARM_COMPUTE_RETURN_ERROR_ON(output->dimension(1) != 2);
}
return Status{};
@@ -76,29 +75,13 @@ Status validate_arguments(const ITensorInfo *input1, const ITensorInfo *input2,
std::pair<Status, Window> validate_and_configure_window(const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output, const PriorBoxLayerInfo &info)
{
- ARM_COMPUTE_UNUSED(input2);
+ ARM_COMPUTE_UNUSED(input1, input2);
- Window win = {};
- bool window_changed = false;
- switch(input1->data_layout())
- {
- case DataLayout::NCHW:
- {
- const int num_priors = info.aspect_ratios().size() * info.min_sizes().size() + info.max_sizes().size();
- const unsigned int num_elems_processed_per_iteration = 4 * num_priors;
- win = calculate_max_window(*output, Steps(num_elems_processed_per_iteration));
- AccessWindowHorizontal output_access(output, 0, num_elems_processed_per_iteration);
- window_changed = update_window_and_padding(win, output_access);
- break;
- }
- case DataLayout::NHWC:
- {
- win = calculate_max_window(*output, Steps());
- break;
- }
- default:
- ARM_COMPUTE_ERROR("Not implemented");
- };
+ const int num_priors = info.aspect_ratios().size() * info.min_sizes().size() + info.max_sizes().size();
+ const unsigned int num_elems_processed_per_iteration = 4 * num_priors;
+ Window win = calculate_max_window(*output, Steps(num_elems_processed_per_iteration));
+ AccessWindowHorizontal output_access(output, 0, num_elems_processed_per_iteration);
+ bool window_changed = update_window_and_padding(win, output_access);
Status err = (window_changed) ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!") : Status{};
return std::make_pair(err, win);
@@ -106,11 +89,10 @@ std::pair<Status, Window> validate_and_configure_window(const ITensorInfo *input
} // namespace
NEPriorBoxLayerKernel::NEPriorBoxLayerKernel()
- : _func(nullptr), _input1(nullptr), _input2(nullptr), _output(nullptr), _info()
+ : _input1(nullptr), _input2(nullptr), _output(nullptr), _info()
{
}
-template <DataLayout DL>
void NEPriorBoxLayerKernel::store_coordinates(float *out, const int offset, const float center_x, const float center_y, const float box_width, const float box_height, const int width,
const int height)
{
@@ -119,49 +101,23 @@ void NEPriorBoxLayerKernel::store_coordinates(float *out, const int offset, cons
float xmax = (center_x + box_width / 2.f) / width;
float ymax = (center_y + box_height / 2.f) / height;
- switch(DL)
+ float32x4_t vec_elements = { xmin, ymin, xmax, ymax };
+ if(_info.clip())
{
- case DataLayout::NCHW:
- {
- float32x4_t vec_elements = { xmin, ymin, xmax, ymax };
- if(_info.clip())
- {
- static const float32x4_t CONST_0 = vdupq_n_f32(0.f);
- static const float32x4_t CONST_1 = vdupq_n_f32(1.f);
- vec_elements = vmaxq_f32(vminq_f32(vec_elements, CONST_1), CONST_0);
- }
- vst1q_f32(out + offset, vec_elements);
- }
- break;
- case DataLayout::NHWC:
- {
- const int output_offset = _output->info()->strides_in_bytes()[1] / _output->info()->element_size();
- if(_info.clip())
- {
- xmin = std::min(std::max(xmin, 0.f), 1.f);
- ymin = std::min(std::max(ymin, 0.f), 1.f);
- xmax = std::min(std::max(xmax, 0.f), 1.f);
- ymax = std::min(std::max(ymax, 0.f), 1.f);
- }
-
- *(out + output_offset * offset) = xmin;
- *(out + output_offset * (offset + 1)) = ymin;
- *(out + output_offset * (offset + 2)) = xmax;
- *(out + output_offset * (offset + 3)) = ymax;
- }
- break;
- default:
- ARM_COMPUTE_ERROR("Not implemented");
+ static const float32x4_t CONST_0 = vdupq_n_f32(0.f);
+ static const float32x4_t CONST_1 = vdupq_n_f32(1.f);
+ vec_elements = vmaxq_f32(vminq_f32(vec_elements, CONST_1), CONST_0);
}
+ vst1q_f32(out + offset, vec_elements);
}
-template <DataLayout DL>
void NEPriorBoxLayerKernel::calculate_prior_boxes(const Window &window)
{
const int num_priors = _info.aspect_ratios().size() * _info.min_sizes().size() + _info.max_sizes().size();
- const int width_idx = get_data_layout_dimension_index(DL, DataLayoutDimension::WIDTH);
- const int height_idx = get_data_layout_dimension_index(DL, DataLayoutDimension::HEIGHT);
+ const DataLayout data_layout = _input1->info()->data_layout();
+ const int width_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH);
+ const int height_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT);
const int layer_width = _input1->info()->dimension(width_idx);
const int layer_height = _input1->info()->dimension(height_idx);
@@ -182,44 +138,17 @@ void NEPriorBoxLayerKernel::calculate_prior_boxes(const Window &window)
step_y = static_cast<float>(img_height) / layer_height;
}
- Window slice = {};
-
- switch(DL)
- {
- case DataLayout::NCHW:
- slice = window.first_slice_window_2D();
- slice.set(Window::DimY, Window::Dimension(0, _output->info()->dimension(1), 2));
- break;
- case DataLayout::NHWC:
- slice = window.first_slice_window_3D();
- slice.set(Window::DimY, Window::Dimension(0, _output->info()->dimension(1), 4 * num_priors));
- slice.set(Window::DimZ, Window::Dimension(0, _output->info()->dimension(2), 2));
- break;
- default:
- ARM_COMPUTE_ERROR("Not implemented");
- }
+ Window slice = window.first_slice_window_2D();
+ slice.set(Window::DimY, Window::Dimension(0, _output->info()->dimension(1), 2));
Iterator output(_output, slice);
execute_window_loop(slice, [&](const Coordinates & id)
{
float center_x = 0;
float center_y = 0;
- int idx = 0;
- switch(DL)
- {
- case DataLayout::NCHW:
- idx = id.x() / (4 * num_priors);
- center_x = (static_cast<float>(idx % layer_width) + _info.offset()) * step_x;
- center_y = (static_cast<float>(idx / layer_width) + _info.offset()) * step_y;
- break;
- case DataLayout::NHWC:
- idx = id.y() / (4 * num_priors);
- center_x = (static_cast<float>(idx % layer_width) + _info.offset()) * step_x;
- center_y = (static_cast<float>(idx / layer_width) + _info.offset()) * step_y;
- break;
- default:
- ARM_COMPUTE_ERROR("Not implemented");
- }
+ int idx = id.x() / (4 * num_priors);
+ center_x = (static_cast<float>(idx % layer_width) + _info.offset()) * step_x;
+ center_y = (static_cast<float>(idx / layer_width) + _info.offset()) * step_y;
float box_width;
float box_height;
@@ -231,7 +160,7 @@ void NEPriorBoxLayerKernel::calculate_prior_boxes(const Window &window)
const float min_size = _info.min_sizes().at(i);
box_width = min_size;
box_height = min_size;
- store_coordinates<DL>(out, offset, center_x, center_y, box_width, box_height, img_width, img_height);
+ store_coordinates(out, offset, center_x, center_y, box_width, box_height, img_width, img_height);
offset += 4;
if(!_info.max_sizes().empty())
@@ -240,7 +169,7 @@ void NEPriorBoxLayerKernel::calculate_prior_boxes(const Window &window)
box_width = std::sqrt(min_size * max_size);
box_height = box_width;
- store_coordinates<DL>(out, offset, center_x, center_y, box_width, box_height, img_width, img_height);
+ store_coordinates(out, offset, center_x, center_y, box_width, box_height, img_width, img_height);
offset += 4;
}
@@ -255,50 +184,27 @@ void NEPriorBoxLayerKernel::calculate_prior_boxes(const Window &window)
box_width = min_size * sqrt(ar);
box_height = min_size / sqrt(ar);
- store_coordinates<DL>(out, offset, center_x, center_y, box_width, box_height, img_width, img_height);
+ store_coordinates(out, offset, center_x, center_y, box_width, box_height, img_width, img_height);
offset += 4;
}
}
// set the variance
- switch(DL)
+ out = reinterpret_cast<float *>(_output->ptr_to_element(Coordinates(id.x(), 1)));
+ float32x4_t var;
+ if(_info.variances().size() == 1)
{
- case DataLayout::NCHW:
- {
- out = reinterpret_cast<float *>(_output->ptr_to_element(Coordinates(id.x(), 1)));
- float32x4_t var;
- if(_info.variances().size() == 1)
- {
- var = vdupq_n_f32(_info.variances().at(0));
- }
- else
- {
- const float32x4_t vars = { _info.variances().at(0), _info.variances().at(1), _info.variances().at(2), _info.variances().at(3) };
- var = vars;
- }
- for(int i = 0; i < num_priors; ++i)
- {
- vst1q_f32(out + 4 * i, var);
- }
- }
- break;
- case DataLayout::NHWC:
- {
- for(int i = 0; i < num_priors; ++i)
- {
- const int prior_offset = 4 * i;
- const bool single_var = _info.variances().size() == 1;
- *(reinterpret_cast<float *>(_output->ptr_to_element(Coordinates(0, id.y() + prior_offset + 0, 1)))) = _info.variances().at(0);
- *(reinterpret_cast<float *>(_output->ptr_to_element(Coordinates(0, id.y() + prior_offset + 1, 1)))) = single_var ? _info.variances().at(0) : _info.variances().at(1);
- *(reinterpret_cast<float *>(_output->ptr_to_element(Coordinates(0, id.y() + prior_offset + 2, 1)))) = single_var ? _info.variances().at(0) : _info.variances().at(2);
- *(reinterpret_cast<float *>(_output->ptr_to_element(Coordinates(0, id.y() + prior_offset + 3, 1)))) = single_var ? _info.variances().at(0) : _info.variances().at(3);
- }
- }
- break;
- default:
- ARM_COMPUTE_ERROR("Not implemented");
+ var = vdupq_n_f32(_info.variances().at(0));
+ }
+ else
+ {
+ const float32x4_t vars = { _info.variances().at(0), _info.variances().at(1), _info.variances().at(2), _info.variances().at(3) };
+ var = vars;
+ }
+ for(int i = 0; i < num_priors; ++i)
+ {
+ vst1q_f32(out + 4 * i, var);
}
-
},
output);
}
@@ -314,22 +220,6 @@ void NEPriorBoxLayerKernel::configure(const ITensor *input1, const ITensor *inpu
_info = info;
_output = output;
- switch(input1->info()->data_layout())
- {
- case DataLayout::NCHW:
- {
- _func = &NEPriorBoxLayerKernel::calculate_prior_boxes<DataLayout::NCHW>;
- break;
- }
- case DataLayout::NHWC:
- {
- _func = &NEPriorBoxLayerKernel::calculate_prior_boxes<DataLayout::NHWC>;
- break;
- }
- default:
- ARM_COMPUTE_ERROR("Not implemented.");
- }
-
// Configure kernel window
auto win_config = validate_and_configure_window(input1->info(), input2->info(), output->info(), info);
ARM_COMPUTE_ERROR_THROW_ON(win_config.first);
@@ -350,9 +240,8 @@ void NEPriorBoxLayerKernel::run(const Window &window, const ThreadInfo &info)
ARM_COMPUTE_UNUSED(info);
ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window);
- ARM_COMPUTE_ERROR_ON(_func == nullptr);
// Run function
- (this->*_func)(window);
+ calculate_prior_boxes(window);
}
} // namespace arm_compute \ No newline at end of file
diff --git a/tests/validation/fixtures/PriorBoxLayerFixture.h b/tests/validation/fixtures/PriorBoxLayerFixture.h
index dd7a49ee1f..fb15631789 100644
--- a/tests/validation/fixtures/PriorBoxLayerFixture.h
+++ b/tests/validation/fixtures/PriorBoxLayerFixture.h
@@ -62,13 +62,12 @@ protected:
if(data_layout == DataLayout::NHWC)
{
permute(input_shape, PermutationVector(2U, 0U, 1U));
- permute(output_shape, PermutationVector(2U, 0U, 1U));
}
// Create tensors
TensorType src1 = create_tensor<TensorType>(input_shape, data_type, 1, QuantizationInfo(), data_layout);
TensorType src2 = create_tensor<TensorType>(input_shape, data_type, 1, QuantizationInfo(), data_layout);
- TensorType dst = create_tensor<TensorType>(output_shape, data_type, 1, QuantizationInfo(), data_layout);
+ TensorType dst = create_tensor<TensorType>(output_shape, data_type, 1, QuantizationInfo());
// Create and configure function
FunctionType prior_box;