aboutsummaryrefslogtreecommitdiff
path: root/src/runtime/NEON/functions/NEConvolutionLayer.cpp
diff options
context:
space:
mode:
authorGiorgio Arena <giorgio.arena@arm.com>2018-05-03 15:57:48 +0100
committerAnthony Barbier <anthony.barbier@arm.com>2018-11-02 16:52:35 +0000
commita3221e6772dc371cf5de7e525bf5c22b58ad6d08 (patch)
tree14d224e07d92dbbd97966de0b6b0aa8e6a288022 /src/runtime/NEON/functions/NEConvolutionLayer.cpp
parent20b4313365ea2ed31f59fd757f68f791f076e6bc (diff)
downloadComputeLibrary-a3221e6772dc371cf5de7e525bf5c22b58ad6d08.tar.gz
COMPMID-1106 Add fast math support in NEWinogradConvolutionLayer
Change-Id: I5fcbbb3b6f22204f0aaebbc319dfdf03593577e8 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/130067 Tested-by: Jenkins <bsgcomp@arm.com> Reviewed-by: Anthony Barbier <anthony.barbier@arm.com> Reviewed-by: Gian Marco Iodice <gianmarco.iodice@arm.com>
Diffstat (limited to 'src/runtime/NEON/functions/NEConvolutionLayer.cpp')
-rw-r--r--src/runtime/NEON/functions/NEConvolutionLayer.cpp32
1 files changed, 12 insertions, 20 deletions
diff --git a/src/runtime/NEON/functions/NEConvolutionLayer.cpp b/src/runtime/NEON/functions/NEConvolutionLayer.cpp
index 69fb948d3f..1c294f8f5d 100644
--- a/src/runtime/NEON/functions/NEConvolutionLayer.cpp
+++ b/src/runtime/NEON/functions/NEConvolutionLayer.cpp
@@ -41,18 +41,19 @@ NEConvolutionLayer::NEConvolutionLayer(std::shared_ptr<IMemoryManager> memory_ma
}
void NEConvolutionLayer::configure(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info,
- const Size2D &dilation, const ActivationLayerInfo &act_info)
+ const Size2D &dilation, const ActivationLayerInfo &act_info, bool enable_fast_math)
{
// Perform validate step
ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output);
- ARM_COMPUTE_ERROR_THROW_ON(NEConvolutionLayer::validate(input->info(), weights->info(), ((biases != nullptr) ? biases->info() : nullptr), output->info(), conv_info, weights_info, dilation, act_info));
+ ARM_COMPUTE_ERROR_THROW_ON(NEConvolutionLayer::validate(input->info(), weights->info(), ((biases != nullptr) ? biases->info() : nullptr), output->info(), conv_info, weights_info, dilation, act_info,
+ enable_fast_math));
switch(NEConvolutionLayer::get_convolution_method(input->info(), weights->info(), output->info(), conv_info, weights_info, dilation, act_info))
{
case ConvolutionMethod::WINOGRAD:
{
auto f = arm_compute::support::cpp14::make_unique<NEWinogradConvolutionLayer>(_memory_manager);
- f->configure(input, weights, biases, output, conv_info, act_info);
+ f->configure(input, weights, biases, output, conv_info, act_info, enable_fast_math);
_function = std::move(f);
break;
}
@@ -77,13 +78,13 @@ void NEConvolutionLayer::configure(ITensor *input, const ITensor *weights, const
}
Status NEConvolutionLayer::validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info,
- const WeightsInfo &weights_info, const Size2D &dilation, const ActivationLayerInfo &act_info)
+ const WeightsInfo &weights_info, const Size2D &dilation, const ActivationLayerInfo &act_info, bool enable_fast_math)
{
switch(NEConvolutionLayer::get_convolution_method(input, weights, output, conv_info, weights_info, dilation, act_info))
{
case ConvolutionMethod::WINOGRAD:
//Validate Winograd
- NEWinogradConvolutionLayer::validate(input, weights, biases, output, conv_info, act_info);
+ NEWinogradConvolutionLayer::validate(input, weights, biases, output, conv_info, act_info, enable_fast_math);
break;
case ConvolutionMethod::GEMM:
//Validate Gemm-based Convolution
@@ -102,27 +103,18 @@ Status NEConvolutionLayer::validate(const ITensorInfo *input, const ITensorInfo
ConvolutionMethod NEConvolutionLayer::get_convolution_method(const ITensorInfo *input, const ITensorInfo *weights,
const ITensorInfo *output, const PadStrideInfo &conv_info,
- const WeightsInfo &weights_info, const Size2D &dilation, const ActivationLayerInfo &act_info)
+ const WeightsInfo &weights_info, const Size2D &dilation, const ActivationLayerInfo &act_info, bool enable_fast_math)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(input, output, weights);
- ARM_COMPUTE_UNUSED(output);
ARM_COMPUTE_UNUSED(weights_info);
- const size_t idx_w = get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::WIDTH);
- const size_t idx_h = get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::HEIGHT);
- const size_t idx_c = get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::CHANNEL);
-
- if((input->data_type() == DataType::F32) && (input->data_layout() == DataLayout::NCHW) && (input->dimension(idx_c) > 16) && (weights->dimension(idx_w) == 3) && (weights->dimension(idx_h) == 3)
- && (weights->num_dimensions() <= 4)
- && (conv_info.stride().first == 1) && (conv_info.stride().second == 1) && (dilation == Size2D(1U, 1U)) && (!act_info.enabled()))
+ if(dilation != Size2D(1U, 1U) || Scheduler::get().cpu_info().get_cpu_model() == CPUModel::A53
+ || input->dimension(get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::CHANNEL)) <= 16)
{
- //FIXME Until COMPMID-1041 is implemented Winograd is slower than GEMM on A53.
- if(Scheduler::get().cpu_info().get_cpu_model() != CPUModel::A53)
- {
- return ConvolutionMethod::WINOGRAD;
- }
+ return ConvolutionMethod::GEMM;
}
- return ConvolutionMethod::GEMM;
+
+ return bool(NEWinogradConvolutionLayer::validate(input, weights, nullptr, output, conv_info, act_info, enable_fast_math)) ? ConvolutionMethod::WINOGRAD : ConvolutionMethod::GEMM;
}
void NEConvolutionLayer::run()