From 7ce53c620b50c718bac62017d28072cf61457233 Mon Sep 17 00:00:00 2001 From: steniu01 Date: Fri, 29 Sep 2017 14:55:00 +0100 Subject: COMPMID-546 Add auto config to depth concatenate Change-Id: I7798a56677d541338a73e3888ed0a2cfe0375794 Reviewed-on: http://mpd-gerrit.cambridge.arm.com/89726 Tested-by: Kaizen Reviewed-by: Georgios Pinitas --- arm_compute/core/Utils.h | 32 +++++++++++++++++++++++ src/runtime/CL/functions/CLDepthConcatenate.cpp | 6 +++++ src/runtime/NEON/functions/NEDepthConcatenate.cpp | 6 +++++ 3 files changed, 44 insertions(+) diff --git a/arm_compute/core/Utils.h b/arm_compute/core/Utils.h index ab5d110f91..06d674644b 100644 --- a/arm_compute/core/Utils.h +++ b/arm_compute/core/Utils.h @@ -35,6 +35,7 @@ #include #include #include +#include namespace arm_compute { @@ -419,6 +420,37 @@ inline uint32_t calculate_matrix_scale(const int16_t *matrix, unsigned int matri return std::max(1, std::abs(std::accumulate(matrix, matrix + size, 0))); } +/** Calculate the output shapes of the depth concatenate function. + * + * @param[in] inputs_vector The vector that stores all the pointers to input. + * + * @return the output shape + */ +template +TensorShape calculate_depth_concatenate_shape(const std::vector &inputs_vector) +{ + TensorShape out_shape = inputs_vector[0]->info()->tensor_shape(); + + size_t max_x = 0; + size_t max_y = 0; + size_t depth = 0; + + for(const auto &tensor : inputs_vector) + { + ARM_COMPUTE_ERROR_ON(tensor == nullptr); + const TensorShape shape = tensor->info()->tensor_shape(); + max_x = std::max(shape.x(), max_x); + max_y = std::max(shape.y(), max_y); + depth += shape.z(); + } + + out_shape.set(0, max_x); + out_shape.set(1, max_y); + out_shape.set(2, depth); + + return out_shape; +} + /** Calculate accurary required by the horizontal and vertical convolution computations * * @param[in] conv_col Pointer to the vertical vector of the separated convolution filter diff --git a/src/runtime/CL/functions/CLDepthConcatenate.cpp b/src/runtime/CL/functions/CLDepthConcatenate.cpp index f42627f34c..89e44ca98e 100644 --- a/src/runtime/CL/functions/CLDepthConcatenate.cpp +++ b/src/runtime/CL/functions/CLDepthConcatenate.cpp @@ -25,6 +25,7 @@ #include "arm_compute/core/CL/ICLTensor.h" #include "arm_compute/core/Error.h" +#include "arm_compute/core/Helpers.h" #include "arm_compute/core/PixelValue.h" #include "arm_compute/core/Types.h" #include "arm_compute/runtime/CL/CLScheduler.h" @@ -51,6 +52,11 @@ void CLDepthConcatenate::configure(std::vector inputs_vector, ICLTe _concat_kernels_vector = arm_compute::support::cpp14::make_unique(_num_inputs); _border_handlers_vector = arm_compute::support::cpp14::make_unique(_num_inputs); + TensorShape output_shape = calculate_depth_concatenate_shape(inputs_vector); + + // Output auto inizialitation if not yet initialized + auto_init_if_empty(*output->info(), output_shape, 1, inputs_vector[0]->info()->data_type(), inputs_vector[0]->info()->fixed_point_position()); + for(unsigned int i = 0; i < _num_inputs; i++) { _concat_kernels_vector[i].configure(inputs_vector.at(i), depth_offset, output); diff --git a/src/runtime/NEON/functions/NEDepthConcatenate.cpp b/src/runtime/NEON/functions/NEDepthConcatenate.cpp index 90eee4f45f..f8ad2abe61 100644 --- a/src/runtime/NEON/functions/NEDepthConcatenate.cpp +++ b/src/runtime/NEON/functions/NEDepthConcatenate.cpp @@ -24,6 +24,7 @@ #include "arm_compute/runtime/NEON/functions/NEDepthConcatenate.h" #include "arm_compute/core/Error.h" +#include "arm_compute/core/Helpers.h" #include "arm_compute/core/ITensor.h" #include "arm_compute/core/PixelValue.h" #include "arm_compute/core/Types.h" @@ -48,6 +49,11 @@ void NEDepthConcatenate::configure(std::vector inputs_vector, ITensor _concat_kernels_vector = arm_compute::support::cpp14::make_unique(_num_inputs); _border_handlers_vector = arm_compute::support::cpp14::make_unique(_num_inputs); + TensorShape output_shape = calculate_depth_concatenate_shape(inputs_vector); + + // Output auto inizialitation if not yet initialized + auto_init_if_empty(*output->info(), output_shape, 1, inputs_vector[0]->info()->data_type(), inputs_vector[0]->info()->fixed_point_position()); + unsigned int depth_offset = 0; for(unsigned int i = 0; i < _num_inputs; ++i) { -- cgit v1.2.1