aboutsummaryrefslogtreecommitdiff
path: root/src/runtime/CL
diff options
context:
space:
mode:
authorMichele Di Giorgio <michele.digiorgio@arm.com>2018-11-01 13:44:05 +0000
committerGeorgios Pinitas <georgios.pinitas@arm.com>2018-11-13 14:49:34 +0000
commit27400b90a9cb3fe028c5b724b58ce0e82d89b5e8 (patch)
tree4b7dd9d4b29653ada018172dae826fe3e6ef5e08 /src/runtime/CL
parentbb081cac4f386eb6db6e9927fce27c7027dd7be5 (diff)
downloadComputeLibrary-27400b90a9cb3fe028c5b724b58ce0e82d89b5e8.tar.gz
COMPMID-1707: Create 3 special CLWidthConcatenate kernel to concatenate 2/4 and 8 tensors (Part 1)
Creating special cases for concatening 2 and 4 tensors. Change-Id: I6a739a494ae45011acb65369e353f9ef96970b90
Diffstat (limited to 'src/runtime/CL')
-rw-r--r--src/runtime/CL/functions/CLWidthConcatenateLayer.cpp75
1 files changed, 60 insertions, 15 deletions
diff --git a/src/runtime/CL/functions/CLWidthConcatenateLayer.cpp b/src/runtime/CL/functions/CLWidthConcatenateLayer.cpp
index 5233ff4f52..46a2d80d10 100644
--- a/src/runtime/CL/functions/CLWidthConcatenateLayer.cpp
+++ b/src/runtime/CL/functions/CLWidthConcatenateLayer.cpp
@@ -36,26 +36,46 @@ using namespace arm_compute;
CLWidthConcatenateLayer::CLWidthConcatenateLayer() // NOLINT
: _concat_kernels_vector(),
+ _concat_x2_kernel(),
+ _concat_x4_kernel(),
_num_inputs(0)
{
}
Status CLWidthConcatenateLayer::validate(const std::vector<ITensorInfo *> &inputs_vector, const ITensorInfo *output) // NOLINT
{
+ const unsigned int num_inputs = inputs_vector.size();
+
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(output);
- ARM_COMPUTE_RETURN_ERROR_ON(inputs_vector.size() < 2);
+ ARM_COMPUTE_RETURN_ERROR_ON(num_inputs < 2);
// Output auto inizialitation if not yet initialized
TensorInfo tmp_output_info = *output->clone();
TensorShape output_shape = arm_compute::misc::shape_calculator::calculate_width_concatenate_shape(inputs_vector);
auto_init_if_empty(tmp_output_info, output_shape, 1, inputs_vector[0]->data_type());
- unsigned int width_offset = 0;
- for(const auto &input : inputs_vector)
+ switch(num_inputs)
{
- ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input);
- ARM_COMPUTE_RETURN_ON_ERROR(CLWidthConcatenateLayerKernel::validate(input, width_offset, &tmp_output_info));
- width_offset += input->dimension(0);
+ case 2:
+ // Validate WidthConcatenate2Tensors kernels if there are 2 inputs
+ ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(inputs_vector[0], inputs_vector[1]);
+ ARM_COMPUTE_RETURN_ON_ERROR(CLWidthConcatenate2TensorsKernel::validate(inputs_vector[0], inputs_vector[1], &tmp_output_info));
+ break;
+ case 4:
+ // Validate WidthConcatenate4Tensors kernels if there are 4 inputs
+ ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(inputs_vector[0], inputs_vector[1], inputs_vector[2], inputs_vector[3]);
+ ARM_COMPUTE_RETURN_ON_ERROR(CLWidthConcatenate4TensorsKernel::validate(inputs_vector[0], inputs_vector[1], inputs_vector[2], inputs_vector[3], &tmp_output_info));
+ break;
+ default:
+ unsigned int width_offset = 0;
+ // Validate generic case of WidthConcatenate kernel
+ for(const auto &input : inputs_vector)
+ {
+ ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input);
+ ARM_COMPUTE_RETURN_ON_ERROR(CLWidthConcatenateLayerKernel::validate(input, width_offset, &tmp_output_info));
+ width_offset += input->dimension(0);
+ }
+ break;
}
return Status{};
@@ -74,16 +94,30 @@ void CLWidthConcatenateLayer::configure(std::vector<ICLTensor *> inputs_vector,
// Output auto inizialitation if not yet initialized
auto_init_if_empty(*output->info(), output_shape, 1, inputs_vector[0]->info()->data_type());
- ARM_COMPUTE_ERROR_THROW_ON(CLWidthConcatenateLayer::validate(inputs_vector_info, output->info()));
-
- unsigned int width_offset = 0;
- _concat_kernels_vector = arm_compute::support::cpp14::make_unique<CLWidthConcatenateLayerKernel[]>(_num_inputs);
+ ARM_COMPUTE_ERROR_THROW_ON(CLWidthConcatenateLayer::validate(inputs_vector_info, output->info()));
- for(unsigned int i = 0; i < _num_inputs; i++)
+ switch(_num_inputs)
{
- _concat_kernels_vector[i].configure(inputs_vector.at(i), width_offset, output);
- width_offset += inputs_vector.at(i)->info()->dimension(0);
+ case 2:
+ // Configure WidthConcatenate2Tensors kernel
+ _concat_x2_kernel.configure(inputs_vector.at(0), inputs_vector.at(1), output);
+ break;
+ case 4:
+ // Configure WidthConcatenate4Tensors kernel
+ _concat_x4_kernel.configure(inputs_vector.at(0), inputs_vector.at(1), inputs_vector.at(2), inputs_vector.at(3), output);
+ break;
+ default:
+ // Configure generic case WidthConcatenate kernels
+ _concat_kernels_vector = arm_compute::support::cpp14::make_unique<CLWidthConcatenateLayerKernel[]>(_num_inputs);
+
+ unsigned int width_offset = 0;
+ for(unsigned int i = 0; i < _num_inputs; ++i)
+ {
+ _concat_kernels_vector[i].configure(inputs_vector.at(i), width_offset, output);
+ width_offset += inputs_vector.at(i)->info()->dimension(0);
+ }
+ break;
}
}
@@ -91,8 +125,19 @@ void CLWidthConcatenateLayer::run()
{
cl::CommandQueue q = CLScheduler::get().queue();
- for(unsigned i = 0; i < _num_inputs; i++)
+ switch(_num_inputs)
{
- CLScheduler::get().enqueue(_concat_kernels_vector[i], true);
+ case 2:
+ CLScheduler::get().enqueue(_concat_x2_kernel, true);
+ break;
+ case 4:
+ CLScheduler::get().enqueue(_concat_x4_kernel, true);
+ break;
+ default:
+ for(unsigned int i = 0; i < _num_inputs; ++i)
+ {
+ CLScheduler::get().enqueue(_concat_kernels_vector[i], true);
+ }
+ break;
}
}