aboutsummaryrefslogtreecommitdiff
path: root/src/gpu/cl/operators/ClConcatenate.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/gpu/cl/operators/ClConcatenate.cpp')
-rw-r--r--src/gpu/cl/operators/ClConcatenate.cpp73
1 files changed, 39 insertions, 34 deletions
diff --git a/src/gpu/cl/operators/ClConcatenate.cpp b/src/gpu/cl/operators/ClConcatenate.cpp
index a27fc37cc4..31018b9768 100644
--- a/src/gpu/cl/operators/ClConcatenate.cpp
+++ b/src/gpu/cl/operators/ClConcatenate.cpp
@@ -23,9 +23,14 @@
*/
#include "src/gpu/cl/operators/ClConcatenate.h"
+#include "arm_compute/core/Error.h"
+#include "arm_compute/core/TensorInfo.h"
+#include "arm_compute/core/Types.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
+#include "src/common/utils/Log.h"
+#include "src/core/helpers/AutoConfiguration.h"
#include "src/gpu/cl/kernels/ClBatchConcatenateKernel.h"
#include "src/gpu/cl/kernels/ClDepthConcatenateKernel.h"
#include "src/gpu/cl/kernels/ClHeightConcatenateKernel.h"
@@ -33,42 +38,39 @@
#include "src/gpu/cl/kernels/ClWidthConcatenate4TensorsKernel.h"
#include "src/gpu/cl/kernels/ClWidthConcatenateKernel.h"
-#include "arm_compute/core/Error.h"
-#include "arm_compute/core/TensorInfo.h"
-#include "arm_compute/core/Types.h"
-
-#include "src/common/utils/Log.h"
-#include "src/core/helpers/AutoConfiguration.h"
-
namespace arm_compute
{
namespace opencl
{
-void ClConcatenate::configure(const CLCompileContext &compile_context, const std::vector<ITensorInfo *> &src_vector, ITensorInfo *dst, size_t axis)
+void ClConcatenate::configure(const CLCompileContext &compile_context,
+ const std::vector<ITensorInfo *> &src_vector,
+ ITensorInfo *dst,
+ size_t axis)
{
ARM_COMPUTE_ERROR_ON(dst == nullptr);
ARM_COMPUTE_LOG_PARAMS(src_vector, dst, axis);
_axis = axis;
_num_inputs = src_vector.size();
- TensorShape dst_shape = arm_compute::misc::shape_calculator::calculate_concatenate_shape(src_vector, _axis);
+ TensorShape dst_shape = arm_compute::misc::shape_calculator::calculate_concatenate_shape(src_vector, _axis);
std::vector<const ITensorInfo *> const_src_vector(src_vector.size());
- std::transform(src_vector.begin(), src_vector.end(), const_src_vector.begin(), [](ITensorInfo * t)
- {
- ARM_COMPUTE_ERROR_ON_NULLPTR(t);
- return t;
- });
+ std::transform(src_vector.begin(), src_vector.end(), const_src_vector.begin(),
+ [](ITensorInfo *t)
+ {
+ ARM_COMPUTE_ERROR_ON_NULLPTR(t);
+ return t;
+ });
// dst auto inizialitation if not yet initialized
auto_init_if_empty(*dst, dst_shape, 1, src_vector[0]->data_type());
ARM_COMPUTE_ERROR_THROW_ON(ClConcatenate::validate(const_src_vector, dst, axis));
unsigned int offset = 0;
- switch(_axis)
+ switch (_axis)
{
case Window::DimX:
{
- switch(_num_inputs)
+ switch (_num_inputs)
{
case 2:
{
@@ -82,14 +84,15 @@ void ClConcatenate::configure(const CLCompileContext &compile_context, const std
{
// Configure WidthConcatenate4Tensors kernel
auto kernel = std::make_unique<kernels::ClWidthConcatenate4TensorsKernel>();
- kernel->configure(compile_context, src_vector.at(0), src_vector.at(1), src_vector.at(2), src_vector.at(3), dst);
+ kernel->configure(compile_context, src_vector.at(0), src_vector.at(1), src_vector.at(2),
+ src_vector.at(3), dst);
_concat_kernels.emplace_back(std::move(kernel));
break;
}
default:
{
// Configure generic case WidthConcatenate kernels
- for(unsigned int i = 0; i < _num_inputs; ++i)
+ for (unsigned int i = 0; i < _num_inputs; ++i)
{
auto kernel = std::make_unique<kernels::ClWidthConcatenateKernel>();
kernel->configure(compile_context, src_vector.at(i), offset, dst);
@@ -103,7 +106,7 @@ void ClConcatenate::configure(const CLCompileContext &compile_context, const std
}
case Window::DimY:
{
- for(unsigned int i = 0; i < _num_inputs; ++i)
+ for (unsigned int i = 0; i < _num_inputs; ++i)
{
auto kernel = std::make_unique<kernels::ClHeightConcatenateKernel>();
kernel->configure(compile_context, src_vector.at(i), offset, dst);
@@ -114,7 +117,7 @@ void ClConcatenate::configure(const CLCompileContext &compile_context, const std
}
case Window::DimZ:
{
- for(unsigned int i = 0; i < _num_inputs; ++i)
+ for (unsigned int i = 0; i < _num_inputs; ++i)
{
auto kernel = std::make_unique<kernels::ClDepthConcatenateKernel>();
kernel->configure(compile_context, src_vector.at(i), offset, dst);
@@ -125,7 +128,7 @@ void ClConcatenate::configure(const CLCompileContext &compile_context, const std
}
case 3:
{
- for(unsigned int i = 0; i < _num_inputs; ++i)
+ for (unsigned int i = 0; i < _num_inputs; ++i)
{
auto kernel = std::make_unique<kernels::ClBatchConcatenateKernel>();
kernel->configure(compile_context, src_vector.at(i), offset, dst);
@@ -148,25 +151,27 @@ Status ClConcatenate::validate(const std::vector<const ITensorInfo *> &src_vecto
ARM_COMPUTE_RETURN_ERROR_ON(num_inputs < 2);
unsigned int offset = 0;
- switch(axis)
+ switch (axis)
{
case Window::DimX:
{
- switch(num_inputs)
+ switch (num_inputs)
{
case 2:
// Validate WidthConcatenate2Tensors kernels if there are 2 inputs
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src_vector[0], src_vector[1]);
- ARM_COMPUTE_RETURN_ON_ERROR(kernels::ClWidthConcatenate2TensorsKernel::validate(src_vector[0], src_vector[1], dst));
+ ARM_COMPUTE_RETURN_ON_ERROR(
+ kernels::ClWidthConcatenate2TensorsKernel::validate(src_vector[0], src_vector[1], dst));
break;
case 4:
// Validate WidthConcatenate4Tensors kernels if there are 4 inputs
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src_vector[0], src_vector[1], src_vector[2], src_vector[3]);
- ARM_COMPUTE_RETURN_ON_ERROR(kernels::ClWidthConcatenate4TensorsKernel::validate(src_vector[0], src_vector[1], src_vector[2], src_vector[3], dst));
+ ARM_COMPUTE_RETURN_ON_ERROR(kernels::ClWidthConcatenate4TensorsKernel::validate(
+ src_vector[0], src_vector[1], src_vector[2], src_vector[3], dst));
break;
default:
// Validate generic case of WidthConcatenate kernel
- for(const auto &src : src_vector)
+ for (const auto &src : src_vector)
{
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src);
ARM_COMPUTE_RETURN_ON_ERROR(kernels::ClWidthConcatenateKernel::validate(src, offset, dst));
@@ -178,7 +183,7 @@ Status ClConcatenate::validate(const std::vector<const ITensorInfo *> &src_vecto
}
case Window::DimY:
{
- for(const auto &src : src_vector)
+ for (const auto &src : src_vector)
{
ARM_COMPUTE_RETURN_ON_ERROR(kernels::ClHeightConcatenateKernel::validate(src, offset, dst));
offset += src->dimension(axis);
@@ -187,7 +192,7 @@ Status ClConcatenate::validate(const std::vector<const ITensorInfo *> &src_vecto
}
case Window::DimZ:
{
- for(const auto &src : src_vector)
+ for (const auto &src : src_vector)
{
ARM_COMPUTE_RETURN_ON_ERROR(kernels::ClDepthConcatenateKernel::validate(src, offset, dst));
offset += src->dimension(axis);
@@ -196,7 +201,7 @@ Status ClConcatenate::validate(const std::vector<const ITensorInfo *> &src_vecto
}
case 3:
{
- for(const auto &src : src_vector)
+ for (const auto &src : src_vector)
{
ARM_COMPUTE_RETURN_ON_ERROR(kernels::ClBatchConcatenateKernel::validate(src, offset, dst));
offset += src->dimension(axis);
@@ -207,7 +212,7 @@ Status ClConcatenate::validate(const std::vector<const ITensorInfo *> &src_vecto
ARM_COMPUTE_ERROR("Axis not supported");
}
- if(dst->total_size() != 0)
+ if (dst->total_size() != 0)
{
TensorShape dst_shape = arm_compute::misc::shape_calculator::calculate_concatenate_shape(src_vector, axis);
ARM_COMPUTE_RETURN_ERROR_ON(dst_shape.total_size() != dst->tensor_shape().total_size());
@@ -218,17 +223,17 @@ Status ClConcatenate::validate(const std::vector<const ITensorInfo *> &src_vecto
void ClConcatenate::run(ITensorPack &tensors)
{
- if(tensors.empty())
+ if (tensors.empty())
{
ARM_COMPUTE_ERROR("No inputs provided");
}
- if(static_cast<int>(tensors.size()) - 1 != static_cast<int>(_num_inputs))
+ if (static_cast<int>(tensors.size()) - 1 != static_cast<int>(_num_inputs))
{
ARM_COMPUTE_ERROR("Configured with different number of inputs");
}
- if(_axis == Window::DimX && (_num_inputs == 2 || _num_inputs == 4))
+ if (_axis == Window::DimX && (_num_inputs == 2 || _num_inputs == 4))
{
ARM_COMPUTE_ERROR_ON(_concat_kernels.empty());
CLScheduler::get().enqueue_op(*_concat_kernels.at(0), tensors, true);
@@ -236,7 +241,7 @@ void ClConcatenate::run(ITensorPack &tensors)
else
{
int i = 0;
- for(auto &k : _concat_kernels)
+ for (auto &k : _concat_kernels)
{
ITensorPack pack;
pack.add_tensor(TensorType::ACL_SRC, tensors.get_const_tensor(ACL_SRC_VEC + i));