diff options
Diffstat (limited to 'src/runtime/CL/functions')
4 files changed, 89 insertions, 22 deletions
diff --git a/src/runtime/CL/functions/CLConvolutionLayer.cpp b/src/runtime/CL/functions/CLConvolutionLayer.cpp index 64bda93ff0..bcb5424aab 100644 --- a/src/runtime/CL/functions/CLConvolutionLayer.cpp +++ b/src/runtime/CL/functions/CLConvolutionLayer.cpp @@ -43,13 +43,13 @@ CLConvolutionLayer::CLConvolutionLayer(std::shared_ptr<IMemoryManager> memory_ma } void CLConvolutionLayer::configure(ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info, - const Size2D &dilation) + const Size2D &dilation, const ActivationLayerInfo &act_info) { ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output); - ARM_COMPUTE_ERROR_THROW_ON(CLConvolutionLayer::validate(input->info(), weights->info(), ((biases != nullptr) ? biases->info() : nullptr), output->info(), conv_info, weights_info, dilation)); + ARM_COMPUTE_ERROR_THROW_ON(CLConvolutionLayer::validate(input->info(), weights->info(), ((biases != nullptr) ? biases->info() : nullptr), output->info(), conv_info, weights_info, dilation, act_info)); switch(CLConvolutionLayer::get_convolution_method(input->info(), weights->info(), ((biases != nullptr) ? biases->info() : nullptr), output->info(), conv_info, - weights_info, CLScheduler::get().target(), dilation)) + weights_info, act_info, CLScheduler::get().target(), dilation)) { case ConvolutionMethod::DIRECT: { @@ -72,25 +72,25 @@ void CLConvolutionLayer::configure(ICLTensor *input, const ICLTensor *weights, c } Status CLConvolutionLayer::validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, - const WeightsInfo &weights_info, const Size2D &dilation) + const WeightsInfo &weights_info, const Size2D &dilation, const ActivationLayerInfo &act_info) { ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, output); //Configure if the parameters match the direct convolution or the gemm-based const GPUTarget gpu_target = CLScheduler::get().target(); - switch(CLConvolutionLayer::get_convolution_method(input, weights, biases, output, conv_info, weights_info, gpu_target, dilation)) + switch(CLConvolutionLayer::get_convolution_method(input, weights, biases, output, conv_info, weights_info, act_info, gpu_target, dilation)) { case ConvolutionMethod::DIRECT: { // Validate direct convolution layer - CLDirectConvolutionLayer::validate(input, weights, biases, output, conv_info); + CLDirectConvolutionLayer::validate(input, weights, biases, output, conv_info, act_info); break; } case ConvolutionMethod::GEMM: { // Validate gemm-based convolution layer - CLGEMMConvolutionLayer::validate(input, weights, biases, output, conv_info, weights_info, dilation); + CLGEMMConvolutionLayer::validate(input, weights, biases, output, conv_info, weights_info, dilation, act_info); break; } default: @@ -102,7 +102,7 @@ Status CLConvolutionLayer::validate(const ITensorInfo *input, const ITensorInfo } ConvolutionMethod CLConvolutionLayer::get_convolution_method(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, - const WeightsInfo &weights_info, const GPUTarget gpu_target, const Size2D &dilation) + const WeightsInfo &weights_info, const ActivationLayerInfo &act_info, const GPUTarget gpu_target, const Size2D &dilation) { ARM_COMPUTE_UNUSED(input); ARM_COMPUTE_UNUSED(weights); @@ -112,6 +112,7 @@ ConvolutionMethod CLConvolutionLayer::get_convolution_method(const ITensorInfo * ARM_COMPUTE_UNUSED(weights_info); ARM_COMPUTE_UNUSED(gpu_target); ARM_COMPUTE_UNUSED(dilation); + ARM_COMPUTE_UNUSED(act_info); return ConvolutionMethod::GEMM; } diff --git a/src/runtime/CL/functions/CLDirectConvolutionLayer.cpp b/src/runtime/CL/functions/CLDirectConvolutionLayer.cpp index c48865a0cc..c451bd4b4c 100644 --- a/src/runtime/CL/functions/CLDirectConvolutionLayer.cpp +++ b/src/runtime/CL/functions/CLDirectConvolutionLayer.cpp @@ -33,11 +33,11 @@ using namespace arm_compute; CLDirectConvolutionLayer::CLDirectConvolutionLayer() - : _direct_conv_kernel(), _input_border_handler() + : _direct_conv_kernel(), _input_border_handler(), _activationlayer_function(), _is_activationlayer_enabled(false) { } -void CLDirectConvolutionLayer::configure(ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info) +void CLDirectConvolutionLayer::configure(ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, const ActivationLayerInfo &act_info) { // Set GPU target _direct_conv_kernel.set_target(CLScheduler::get().target()); @@ -55,11 +55,25 @@ void CLDirectConvolutionLayer::configure(ICLTensor *input, const ICLTensor *weig // Tune kernels CLScheduler::get().tune_kernel_static(_direct_conv_kernel); + + _is_activationlayer_enabled = act_info.enabled(); + + //Configure Activation Layer + if(_is_activationlayer_enabled) + { + _activationlayer_function.configure(output, nullptr, act_info); + } } -Status CLDirectConvolutionLayer::validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info) +Status CLDirectConvolutionLayer::validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, + const ActivationLayerInfo &act_info) { - return CLDirectConvolutionLayerKernel::validate(input, weights, biases, output, conv_info, CLScheduler::get().target()); + ARM_COMPUTE_RETURN_ON_ERROR(CLDirectConvolutionLayerKernel::validate(input, weights, biases, output, conv_info, CLScheduler::get().target())); + if(act_info.enabled()) + { + ARM_COMPUTE_RETURN_ON_ERROR(CLActivationLayer::validate(output, nullptr, act_info)); + } + return Status{}; } void CLDirectConvolutionLayer::run() @@ -69,4 +83,10 @@ void CLDirectConvolutionLayer::run() // Run direct convolution CLScheduler::get().enqueue(_direct_conv_kernel); + + //Run Activation Layer + if(_is_activationlayer_enabled) + { + _activationlayer_function.run(); + } } diff --git a/src/runtime/CL/functions/CLGEMMConvolutionLayer.cpp b/src/runtime/CL/functions/CLGEMMConvolutionLayer.cpp index f43e100565..084c4df718 100644 --- a/src/runtime/CL/functions/CLGEMMConvolutionLayer.cpp +++ b/src/runtime/CL/functions/CLGEMMConvolutionLayer.cpp @@ -90,8 +90,8 @@ void CLConvolutionLayerReshapeWeights::run() } CLGEMMConvolutionLayer::CLGEMMConvolutionLayer(std::shared_ptr<IMemoryManager> memory_manager) - : _memory_group(memory_manager), _reshape_weights(), _im2col_kernel(), _mm_gemm(memory_manager), _mm_gemmlowp(memory_manager), _gemmlowp_output_stage(), _col2im_kernel(), _original_weights(nullptr), - _im2col_output(), _weights_reshaped(), _gemm_output(), _tmp_output(), _is_quantized(false), _is_first_run(true) + : _memory_group(memory_manager), _reshape_weights(), _im2col_kernel(), _mm_gemm(memory_manager), _mm_gemmlowp(memory_manager), _gemmlowp_output_stage(), _col2im_kernel(), _activationlayer_function(), + _original_weights(nullptr), _im2col_output(), _weights_reshaped(), _gemm_output(), _tmp_output(), _is_quantized(false), _is_first_run(true), _is_activationlayer_enabled(false) { } @@ -152,7 +152,7 @@ Status CLGEMMConvolutionLayer::validate_mm(const ITensorInfo *input, const ITens } void CLGEMMConvolutionLayer::configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info, - const Size2D &dilation) + const Size2D &dilation, const ActivationLayerInfo &act_info) { ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output); @@ -162,7 +162,8 @@ void CLGEMMConvolutionLayer::configure(const ICLTensor *input, const ICLTensor * output->info(), conv_info, weights_info, - dilation)); + dilation, + act_info)); _is_first_run = true; _original_weights = weights; @@ -260,11 +261,19 @@ void CLGEMMConvolutionLayer::configure(const ICLTensor *input, const ICLTensor * // Allocate intermediate tensor _weights_reshaped.allocator()->allocate(); + //Configure Activation Layer + _is_activationlayer_enabled = act_info.enabled(); + + if(_is_activationlayer_enabled) + { + _activationlayer_function.configure(output, nullptr, act_info); + } + ARM_COMPUTE_UNUSED(weights_info); } Status CLGEMMConvolutionLayer::validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, - const WeightsInfo &weights_info, const Size2D &dilation) + const WeightsInfo &weights_info, const Size2D &dilation, const ActivationLayerInfo &act_info) { ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, output); ARM_COMPUTE_RETURN_ERROR_ON_MSG(weights_info.are_reshaped(), "Weights already reshaped are not supported!"); @@ -274,6 +283,11 @@ Status CLGEMMConvolutionLayer::validate(const ITensorInfo *input, const ITensorI ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(2) != input->dimension(2)); ARM_COMPUTE_RETURN_ERROR_ON(weights->num_dimensions() > 4); + if(act_info.enabled()) + { + ARM_COMPUTE_ERROR_ON(act_info.b() > act_info.a()); + } + const bool is_quantized = is_data_type_quantized_asymmetric(input->data_type()); const bool append_bias = (biases != nullptr) && (!is_quantized); const unsigned bias_element = (append_bias) ? 1 : 0; @@ -343,6 +357,12 @@ Status CLGEMMConvolutionLayer::validate(const ITensorInfo *input, const ITensorI ARM_COMPUTE_RETURN_ERROR_ON(biases->num_dimensions() > 1); } + //Validate Activation Layer + if(act_info.enabled()) + { + CLActivationLayer::validate(output, nullptr, act_info); + } + return Status{}; } @@ -383,5 +403,11 @@ void CLGEMMConvolutionLayer::run() // Reshape output matrix CLScheduler::get().enqueue(_col2im_kernel, false); + //Run Activation Layer if enabled + if(_is_activationlayer_enabled) + { + _activationlayer_function.run(); + } + _memory_group.release(); } diff --git a/src/runtime/CL/functions/CLWinogradConvolutionLayer.cpp b/src/runtime/CL/functions/CLWinogradConvolutionLayer.cpp index a861e0072e..7af36bf06b 100644 --- a/src/runtime/CL/functions/CLWinogradConvolutionLayer.cpp +++ b/src/runtime/CL/functions/CLWinogradConvolutionLayer.cpp @@ -32,11 +32,12 @@ using namespace arm_compute; CLWinogradConvolutionLayer::CLWinogradConvolutionLayer(std::shared_ptr<IMemoryManager> memory_manager) - : _memory_group(memory_manager), _batched_mm(memory_manager), _input_transform(), _filter_transform(), _output_transform(), _input0(), _input1(), _batched_mm_output(), _is_first_run(true) + : _memory_group(memory_manager), _batched_mm(memory_manager), _input_transform(), _filter_transform(), _output_transform(), _activationlayer_function(), _input0(), _input1(), _batched_mm_output(), + _is_first_run(true), _is_activationlayer_enabled(false) { } -void CLWinogradConvolutionLayer::configure(ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info) +void CLWinogradConvolutionLayer::configure(ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, const ActivationLayerInfo &act_info) { // TODO(COMPMID-1013): This part will be removed // Get indeces for the width and height @@ -73,13 +74,21 @@ void CLWinogradConvolutionLayer::configure(ICLTensor *input, const ICLTensor *we _output_transform.configure(&_batched_mm_output, biases, output, Size2D(kernel_w, kernel_h), Size2D(output_convolved_shape[idx_width], output_convolved_shape[idx_height]), Size2D(num_tiles_x, num_tiles_y)); + // Configure activation layer + _is_activationlayer_enabled = act_info.enabled(); + if(_is_activationlayer_enabled) + { + _activationlayer_function.configure(output, nullptr, act_info); + } + // Allocate temporary tensors _input0.allocator()->allocate(); _input1.allocator()->allocate(); _batched_mm_output.allocator()->allocate(); } -Status CLWinogradConvolutionLayer::validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info) +Status CLWinogradConvolutionLayer::validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, + const ActivationLayerInfo &act_info) { // TODO(COMPMID-1013): This part will be removed // Get indeces for the width and height @@ -107,17 +116,23 @@ Status CLWinogradConvolutionLayer::validate(const ITensorInfo *input, const ITen const TensorInfo input1 = weights->clone()->set_tensor_shape(input1_shape); ARM_COMPUTE_RETURN_ON_ERROR(CLWinogradFilterTransformKernel::validate(weights, &input1, Size2D(2U, 2U))); - // Configure batched matrix multiply + // Validate batched matrix multiply TensorShape batched_mm_output_shape = input0.tensor_shape(); batched_mm_output_shape[0] = input1.tensor_shape()[0]; const TensorInfo batched_mm_output = input0.clone()->set_tensor_shape(batched_mm_output_shape); ARM_COMPUTE_RETURN_ON_ERROR(CLGEMM::validate(&input0, &input1, nullptr, &batched_mm_output, 1.0f, 0.0f, GEMMInfo(false, false, true /* Reshape weights only for the first run*/))); - // Configure output transform + // Validate output transform ARM_COMPUTE_RETURN_ON_ERROR(CLWinogradOutputTransformKernel::validate(&batched_mm_output, biases, output, Size2D(kernel_w, kernel_h), Size2D(output_convolved_shape[idx_width], output_convolved_shape[idx_height]), Size2D(num_tiles_x, num_tiles_y))); + // Validate Activation Layer + if(act_info.enabled()) + { + CLActivationLayer::validate(output, nullptr, act_info); + } + return Status{}; } @@ -142,5 +157,10 @@ void CLWinogradConvolutionLayer::run() // Run output transform CLScheduler::get().enqueue(_output_transform); + if(_is_activationlayer_enabled) + { + _activationlayer_function.run(); + } + _memory_group.release(); } |