aboutsummaryrefslogtreecommitdiff
path: root/arm_compute/runtime/NEON/functions
diff options
context:
space:
mode:
authorMichalis Spyrou <michalis.spyrou@arm.com>2017-12-20 15:50:55 +0000
committerAnthony Barbier <anthony.barbier@arm.com>2018-11-02 16:42:33 +0000
commitb91e34c9837756c9ee45917e13fb6a6cb901f795 (patch)
tree6f5dd4c2ec527f2a188ac940a081206810ec4d44 /arm_compute/runtime/NEON/functions
parentaa1209a1bfc9fa24a24c1b47d309e27ba2cd90a7 (diff)
downloadComputeLibrary-b91e34c9837756c9ee45917e13fb6a6cb901f795.tar.gz
COMPMID-746 Allow NEDirectConvolution to work without biases for QS.
Renamed BiasAccumulateKernel to OutputStage. If no bias is provided when the input is quantized, the kernel simply downscales the input. Throw error if no bias is provided and input is floating point. Change-Id: I645a4ee9c6014b0547778fdd92c9ec72ef2f0aab Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/114158 Reviewed-by: Gian Marco Iodice <gianmarco.iodice@arm.com> Tested-by: Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'arm_compute/runtime/NEON/functions')
-rw-r--r--arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h10
-rw-r--r--arm_compute/runtime/NEON/functions/NEDirectConvolutionLayer.h17
2 files changed, 14 insertions, 13 deletions
diff --git a/arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h
index 659594fe11..6208c20227 100644
--- a/arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h
@@ -28,7 +28,7 @@
#include "arm_compute/core/NEON/kernels/NEDepthwiseIm2ColKernel.h"
#include "arm_compute/core/NEON/kernels/NEDepthwiseVectorToTensorKernel.h"
#include "arm_compute/core/NEON/kernels/NEDepthwiseWeightsReshapeKernel.h"
-#include "arm_compute/core/NEON/kernels/NEDirectConvolutionLayerBiasAccumulateKernel.h"
+#include "arm_compute/core/NEON/kernels/NEDirectConvolutionLayerOutputStageKernel.h"
#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h"
#include "arm_compute/core/NEON/kernels/NEGEMMMatrixVectorMultiplyKernel.h"
#include "arm_compute/core/Types.h"
@@ -67,10 +67,10 @@ public:
void run() override;
private:
- NEDepthwiseConvolutionLayer3x3Kernel _kernel;
- NEDirectConvolutionLayerBiasAccumulateKernel _bias_kernel;
- NEFillBorderKernel _border_handler;
- bool _has_bias;
+ NEDepthwiseConvolutionLayer3x3Kernel _kernel;
+ NEDirectConvolutionLayerOutputStageKernel _output_stage_kernel;
+ NEFillBorderKernel _border_handler;
+ bool _has_bias;
};
/** Basic function to execute a generic depthwise convolution. This function calls the following NEON kernels:
diff --git a/arm_compute/runtime/NEON/functions/NEDirectConvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEDirectConvolutionLayer.h
index 09a54968bb..e1aa839802 100644
--- a/arm_compute/runtime/NEON/functions/NEDirectConvolutionLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEDirectConvolutionLayer.h
@@ -24,8 +24,8 @@
#ifndef __ARM_COMPUTE_NEDIRECTCONVOLUTIONLAYER_H__
#define __ARM_COMPUTE_NEDIRECTCONVOLUTIONLAYER_H__
-#include "arm_compute/core/NEON/kernels/NEDirectConvolutionLayerBiasAccumulateKernel.h"
#include "arm_compute/core/NEON/kernels/NEDirectConvolutionLayerKernel.h"
+#include "arm_compute/core/NEON/kernels/NEDirectConvolutionLayerOutputStageKernel.h"
#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/IFunction.h"
@@ -42,7 +42,7 @@ namespace arm_compute
* This function calls the following NEON kernels:
*
* -# @ref NEFillBorderKernel for the input
- * -# @ref NEDirectConvolutionLayerBiasAccumulateKernel
+ * -# @ref NEDirectConvolutionLayerOutputStageKernel
* -# @ref NEDirectConvolutionLayerKernel
*/
class NEDirectConvolutionLayer : public IFunction
@@ -93,12 +93,13 @@ public:
void run() override;
private:
- MemoryGroup _memory_group;
- NEDirectConvolutionLayerBiasAccumulateKernel _accumulate_bias_kernel;
- NEDirectConvolutionLayerKernel _conv_kernel;
- NEFillBorderKernel _input_border_handler;
- Tensor _accumulator;
- bool _has_bias;
+ MemoryGroup _memory_group;
+ NEDirectConvolutionLayerOutputStageKernel _output_stage_kernel;
+ NEDirectConvolutionLayerKernel _conv_kernel;
+ NEFillBorderKernel _input_border_handler;
+ Tensor _accumulator;
+ bool _has_bias;
+ bool _is_fixed_point;
};
}
#endif /* __ARM_COMPUTE_NEDIRECTCONVOLUTIONLAYER_H__ */