aboutsummaryrefslogtreecommitdiff
path: root/src/runtime
diff options
context:
space:
mode:
authorGeorgios Pinitas <georgios.pinitas@arm.com>2018-10-22 16:17:20 +0100
committerAnthony Barbier <anthony.barbier@arm.com>2018-11-02 16:55:45 +0000
commit60e98253f1e3df1723e7b8f4c996b544aa7c7205 (patch)
tree45ca11d6fb0a16974fc8681bc7161a6ad2b1af2e /src/runtime
parentc04a0e8f93c620d05444251e1ae55dcf8c660a1b (diff)
downloadComputeLibrary-60e98253f1e3df1723e7b8f4c996b544aa7c7205.tar.gz
COMPMID-1451: Fuse activation in DepthwiseConvolution.
Change-Id: Id964d9068e18aaa13ab8adcbf7a9375b034ea6c3 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/154651 Tested-by: bsgcomp <bsgcomp@arm.com> Reviewed-by: Gian Marco Iodice <gianmarco.iodice@arm.com>
Diffstat (limited to 'src/runtime')
-rw-r--r--src/runtime/CL/functions/CLDepthwiseConvolutionLayer.cpp27
-rw-r--r--src/runtime/GLES_COMPUTE/functions/GCDepthwiseConvolutionLayer.cpp19
-rw-r--r--src/runtime/NEON/functions/NEDepthwiseConvolutionLayer.cpp57
3 files changed, 89 insertions, 14 deletions
diff --git a/src/runtime/CL/functions/CLDepthwiseConvolutionLayer.cpp b/src/runtime/CL/functions/CLDepthwiseConvolutionLayer.cpp
index 76451af9b1..497cdae85c 100644
--- a/src/runtime/CL/functions/CLDepthwiseConvolutionLayer.cpp
+++ b/src/runtime/CL/functions/CLDepthwiseConvolutionLayer.cpp
@@ -90,12 +90,13 @@ void CLDepthwiseConvolutionLayer3x3::run()
}
CLDepthwiseConvolutionLayer::CLDepthwiseConvolutionLayer()
- : _im2col_kernel(), _weights_reshape_kernel(), _v2mm_kernel(), _vector_to_tensor_kernel(), _output_stage_kernel(), _v2mm_input_fill_border(), _v2mm_weights_fill_border(), _input_reshaped(),
- _weights_reshaped(), _v2mm_output(), _output_reshaped(), _is_prepared(false), _is_quantized(false), _original_weights(nullptr)
+ : _im2col_kernel(), _weights_reshape_kernel(), _v2mm_kernel(), _vector_to_tensor_kernel(), _output_stage_kernel(), _activationlayer_function(), _v2mm_input_fill_border(), _v2mm_weights_fill_border(),
+ _input_reshaped(), _weights_reshaped(), _v2mm_output(), _output_reshaped(), _is_prepared(false), _is_quantized(false), _is_activationlayer_enabled(false), _original_weights(nullptr)
{
}
-void CLDepthwiseConvolutionLayer::configure(ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier)
+void CLDepthwiseConvolutionLayer::configure(ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info,
+ unsigned int depth_multiplier, const ActivationLayerInfo &act_info)
{
ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::F16, DataType::F32);
ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights);
@@ -188,10 +189,18 @@ void CLDepthwiseConvolutionLayer::configure(ICLTensor *input, const ICLTensor *w
// Allocate intermediate tensors
_input_reshaped.allocator()->allocate();
_v2mm_output.allocator()->allocate();
+
+ //Configure Activation Layer
+ _is_activationlayer_enabled = act_info.enabled();
+
+ if(_is_activationlayer_enabled)
+ {
+ _activationlayer_function.configure(output, nullptr, act_info);
+ }
}
Status CLDepthwiseConvolutionLayer::validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info,
- unsigned int depth_multiplier)
+ unsigned int depth_multiplier, const ActivationLayerInfo &act_info)
{
const size_t idx_w = get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::WIDTH);
const size_t idx_h = get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::HEIGHT);
@@ -238,6 +247,12 @@ Status CLDepthwiseConvolutionLayer::validate(const ITensorInfo *input, const ITe
ARM_COMPUTE_RETURN_ON_ERROR(CLDirectConvolutionLayerOutputStageKernel::validate(&output_reshaped, biases, output));
}
+ // Validate Activation Layer
+ if(act_info.enabled())
+ {
+ ARM_COMPUTE_RETURN_ON_ERROR(CLActivationLayer::validate(output, nullptr, act_info));
+ }
+
return Status{};
}
@@ -253,6 +268,10 @@ void CLDepthwiseConvolutionLayer::run()
{
CLScheduler::get().enqueue(_output_stage_kernel);
}
+ if(_is_activationlayer_enabled)
+ {
+ _activationlayer_function.run();
+ }
}
void CLDepthwiseConvolutionLayer::prepare()
diff --git a/src/runtime/GLES_COMPUTE/functions/GCDepthwiseConvolutionLayer.cpp b/src/runtime/GLES_COMPUTE/functions/GCDepthwiseConvolutionLayer.cpp
index 7121654a75..d9aa50d9e1 100644
--- a/src/runtime/GLES_COMPUTE/functions/GCDepthwiseConvolutionLayer.cpp
+++ b/src/runtime/GLES_COMPUTE/functions/GCDepthwiseConvolutionLayer.cpp
@@ -31,11 +31,12 @@
using namespace arm_compute;
GCDepthwiseConvolutionLayer3x3::GCDepthwiseConvolutionLayer3x3()
- : _kernel(nullptr), _border_handler(), _shift_handler()
+ : _kernel(nullptr), _border_handler(), _shift_handler(), _activationlayer_function(), _is_activationlayer_enabled(false)
{
}
-void GCDepthwiseConvolutionLayer3x3::configure(IGCTensor *input, const IGCTensor *weights, const IGCTensor *biases, IGCTensor *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier)
+void GCDepthwiseConvolutionLayer3x3::configure(IGCTensor *input, const IGCTensor *weights, const IGCTensor *biases, IGCTensor *output, const PadStrideInfo &conv_info,
+ unsigned int depth_multiplier, const ActivationLayerInfo &act_info)
{
auto k = arm_compute::support::cpp14::make_unique<GCDepthwiseConvolutionLayer3x3Kernel>();
k->configure(input, weights, biases, output, conv_info, depth_multiplier);
@@ -45,6 +46,14 @@ void GCDepthwiseConvolutionLayer3x3::configure(IGCTensor *input, const IGCTensor
_border_handler.configure(input, _kernel->border_size(), BorderMode::CONSTANT, PixelValue(0));
_shift_handler.configure(input);
+
+ //Configure Activation Layer
+ _is_activationlayer_enabled = act_info.enabled();
+
+ if(_is_activationlayer_enabled)
+ {
+ _activationlayer_function.configure(output, nullptr, act_info);
+ }
}
void GCDepthwiseConvolutionLayer3x3::run()
@@ -54,4 +63,10 @@ void GCDepthwiseConvolutionLayer3x3::run()
GCScheduler::get().dispatch(_border_handler, false);
GCScheduler::get().memory_barrier();
GCScheduler::get().dispatch(*_kernel);
+
+ // Run Activation Layer
+ if(_is_activationlayer_enabled)
+ {
+ _activationlayer_function.run();
+ }
}
diff --git a/src/runtime/NEON/functions/NEDepthwiseConvolutionLayer.cpp b/src/runtime/NEON/functions/NEDepthwiseConvolutionLayer.cpp
index 9dcbc99332..a2f0094f9d 100644
--- a/src/runtime/NEON/functions/NEDepthwiseConvolutionLayer.cpp
+++ b/src/runtime/NEON/functions/NEDepthwiseConvolutionLayer.cpp
@@ -36,12 +36,14 @@ using namespace arm_compute::misc;
using namespace arm_compute::misc::shape_calculator;
NEDepthwiseConvolutionLayer3x3::NEDepthwiseConvolutionLayer3x3()
- : _dwc_kernel(), _output_stage_kernel(), _border_handler(), _permute_input(), _permute_weights(), _permute_output(), _accumulator(), _permuted_input(), _permuted_weights(), _permuted_output(),
- _has_bias(false), _is_quantized(false), _is_optimized(false), _are_weights_reshaped(false), _is_nchw(true), _is_first_run(true), _permute(false)
+ : _dwc_kernel(), _output_stage_kernel(), _border_handler(), _permute_input(), _permute_weights(), _permute_output(), _activationlayer_function(), _accumulator(), _permuted_input(),
+ _permuted_weights(), _permuted_output(), _has_bias(false), _is_quantized(false), _is_optimized(false), _are_weights_reshaped(false), _is_nchw(true), _is_first_run(true), _permute(false),
+ _is_activationlayer_enabled(false)
{
}
-void NEDepthwiseConvolutionLayer3x3::configure(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier)
+void NEDepthwiseConvolutionLayer3x3::configure(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info,
+ unsigned int depth_multiplier, const ActivationLayerInfo &act_info)
{
ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::F16, DataType::F32);
ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights);
@@ -159,10 +161,18 @@ void NEDepthwiseConvolutionLayer3x3::configure(ITensor *input, const ITensor *we
_permute_output.configure(&_permuted_output, output, PermutationVector(2U, 0U, 1U));
_permuted_output.allocator()->allocate();
}
+
+ //Configure Activation Layer
+ _is_activationlayer_enabled = act_info.enabled();
+
+ if(_is_activationlayer_enabled)
+ {
+ _activationlayer_function.configure(output, nullptr, act_info);
+ }
}
Status NEDepthwiseConvolutionLayer3x3::validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info,
- unsigned int depth_multiplier)
+ unsigned int depth_multiplier, const ActivationLayerInfo &act_info)
{
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, output);
ARM_COMPUTE_RETURN_ERROR_ON(input->data_layout() == DataLayout::UNKNOWN);
@@ -184,6 +194,12 @@ Status NEDepthwiseConvolutionLayer3x3::validate(const ITensorInfo *input, const
ARM_COMPUTE_RETURN_ON_ERROR(NEDirectConvolutionLayerOutputStageKernel::validate(&accumulator, biases, output));
}
+ //Validate Activation Layer
+ if(act_info.enabled())
+ {
+ ARM_COMPUTE_RETURN_ON_ERROR(NEActivationLayer::validate(output, nullptr, act_info));
+ }
+
return Status{};
}
@@ -235,16 +251,22 @@ void NEDepthwiseConvolutionLayer3x3::run()
{
_permute_output.run();
}
+
+ if(_is_activationlayer_enabled)
+ {
+ _activationlayer_function.run();
+ }
}
NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayer()
: _im2col_kernel(), _weights_reshape_kernel(), _v2mm_kernel(), _vector_to_tensor_kernel(), _output_stage_kernel(), _v2mm_input_fill_border(), _v2mm_weights_fill_border(), _permute_input(),
- _permute_weights(), _permute_output(), _input_reshaped(), _weights_reshaped(), _v2mm_output(), _output_reshaped(), _permuted_input(), _permuted_weights(), _permuted_output(), _is_prepared(false),
- _is_quantized(false), _is_nhwc(false), _original_weights(nullptr)
+ _permute_weights(), _permute_output(), _activationlayer_function(), _input_reshaped(), _weights_reshaped(), _v2mm_output(), _output_reshaped(), _permuted_input(), _permuted_weights(),
+ _permuted_output(), _is_prepared(false), _is_quantized(false), _is_nhwc(false), _is_activationlayer_enabled(false), _original_weights(nullptr)
{
}
-void NEDepthwiseConvolutionLayer::configure(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier)
+void NEDepthwiseConvolutionLayer::configure(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info,
+ unsigned int depth_multiplier, const ActivationLayerInfo &act_info)
{
const unsigned int channel_idx = get_data_layout_dimension_index(input->info()->data_layout(), DataLayoutDimension::CHANNEL);
ARM_COMPUTE_UNUSED(channel_idx);
@@ -366,10 +388,18 @@ void NEDepthwiseConvolutionLayer::configure(ITensor *input, const ITensor *weigh
// Allocate intermediate tensors
_input_reshaped.allocator()->allocate();
_v2mm_output.allocator()->allocate();
+
+ //Configure Activation Layer
+ _is_activationlayer_enabled = act_info.enabled();
+
+ if(_is_activationlayer_enabled)
+ {
+ _activationlayer_function.configure(output, nullptr, act_info);
+ }
}
Status NEDepthwiseConvolutionLayer::validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info,
- unsigned int depth_multiplier)
+ unsigned int depth_multiplier, const ActivationLayerInfo &act_info)
{
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, output);
ARM_COMPUTE_RETURN_ERROR_ON(input->data_layout() == DataLayout::UNKNOWN);
@@ -454,6 +484,12 @@ Status NEDepthwiseConvolutionLayer::validate(const ITensorInfo *input, const ITe
ARM_COMPUTE_RETURN_ON_ERROR(NEDirectConvolutionLayerOutputStageKernel::validate(&output_reshaped, biases, output_to_use));
}
+ // Validate Activation Layer
+ if(act_info.enabled())
+ {
+ ARM_COMPUTE_RETURN_ON_ERROR(NEActivationLayer::validate(output, nullptr, act_info));
+ }
+
return Status{};
}
@@ -479,6 +515,11 @@ void NEDepthwiseConvolutionLayer::run()
{
_permute_output.run();
}
+
+ if(_is_activationlayer_enabled)
+ {
+ _activationlayer_function.run();
+ }
}
void NEDepthwiseConvolutionLayer::prepare()