aboutsummaryrefslogtreecommitdiff
path: root/arm_compute
diff options
context:
space:
mode:
Diffstat (limited to 'arm_compute')
-rw-r--r--arm_compute/core/NEON/NEKernels.h2
-rw-r--r--arm_compute/core/NEON/kernels/NEDirectConvolutionLayerKernel.h4
-rw-r--r--arm_compute/core/NEON/kernels/NEDirectConvolutionLayerOutputStageKernel.h (renamed from arm_compute/core/NEON/kernels/NEDirectConvolutionLayerBiasAccumulateKernel.h)50
-rw-r--r--arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h10
-rw-r--r--arm_compute/runtime/NEON/functions/NEDirectConvolutionLayer.h17
5 files changed, 42 insertions, 41 deletions
diff --git a/arm_compute/core/NEON/NEKernels.h b/arm_compute/core/NEON/NEKernels.h
index 6c31fa4fb1..8a4cf7abeb 100644
--- a/arm_compute/core/NEON/NEKernels.h
+++ b/arm_compute/core/NEON/NEKernels.h
@@ -53,8 +53,8 @@
#include "arm_compute/core/NEON/kernels/NEDequantizationLayerKernel.h"
#include "arm_compute/core/NEON/kernels/NEDerivativeKernel.h"
#include "arm_compute/core/NEON/kernels/NEDilateKernel.h"
-#include "arm_compute/core/NEON/kernels/NEDirectConvolutionLayerBiasAccumulateKernel.h"
#include "arm_compute/core/NEON/kernels/NEDirectConvolutionLayerKernel.h"
+#include "arm_compute/core/NEON/kernels/NEDirectConvolutionLayerOutputStageKernel.h"
#include "arm_compute/core/NEON/kernels/NEErodeKernel.h"
#include "arm_compute/core/NEON/kernels/NEFastCornersKernel.h"
#include "arm_compute/core/NEON/kernels/NEFillArrayKernel.h"
diff --git a/arm_compute/core/NEON/kernels/NEDirectConvolutionLayerKernel.h b/arm_compute/core/NEON/kernels/NEDirectConvolutionLayerKernel.h
index 4529120f02..cd482ddbdf 100644
--- a/arm_compute/core/NEON/kernels/NEDirectConvolutionLayerKernel.h
+++ b/arm_compute/core/NEON/kernels/NEDirectConvolutionLayerKernel.h
@@ -58,7 +58,7 @@ public:
* The 3rd dimension must be the same as the input's volume 3rd dimension.
* Data type supported:Same as @p input.
* @param[out] output Output tensor.
- * The 3rd dimensions must be equal to the 4th dimension of the @p kernels tensor. Data types supported: Same as @p input.
+ * The 3rd dimensions must be equal to the 4th dimension of the @p kernels tensor. Data types supported: QS16/QS32/F16/F32
* @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo.
*/
void configure(const ITensor *input, const ITensor *weights, ITensor *output, const PadStrideInfo &conv_info);
@@ -70,7 +70,7 @@ public:
* The 3rd dimension must be the same as the input's volume 3rd dimension.
* Data type supported:Same as @p input.
* @param[in] output Output tensor.
- * The 3rd dimensions must be equal to the 4th dimension of the @p kernels tensor. Data types supported: Same as @p input.
+ * The 3rd dimensions must be equal to the 4th dimension of the @p kernels tensor. Data types supported: QS16/QS32/F16/F32
* @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo.
*
* @return a status
diff --git a/arm_compute/core/NEON/kernels/NEDirectConvolutionLayerBiasAccumulateKernel.h b/arm_compute/core/NEON/kernels/NEDirectConvolutionLayerOutputStageKernel.h
index 05ade1c5dd..46d52fc182 100644
--- a/arm_compute/core/NEON/kernels/NEDirectConvolutionLayerBiasAccumulateKernel.h
+++ b/arm_compute/core/NEON/kernels/NEDirectConvolutionLayerOutputStageKernel.h
@@ -21,64 +21,64 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#ifndef __ARM_COMPUTE_NEDIRECTCONVOLUTIONLAYERBIASACCUMULATEKERNEL_H__
-#define __ARM_COMPUTE_NEDIRECTCONVOLUTIONLAYERBIASACCUMULATEKERNEL_H__
+#ifndef __ARM_COMPUTE_NEDIRECTCONVOLUTIONLAYEROUTPUTSTAGEKERNEL_H__
+#define __ARM_COMPUTE_NEDIRECTCONVOLUTIONLAYEROUTPUTSTAGEKERNEL_H__
#include "arm_compute/core/NEON/INEKernel.h"
namespace arm_compute
{
class ITensor;
-/** NEON kernel to accumulate the biases to each element of the input tensor
+/** NEON kernel to accumulate the biases, if provided, or downscale in case of quantized input.
*
* @note We assume bias to be shared
*/
-class NEDirectConvolutionLayerBiasAccumulateKernel : public INEKernel
+class NEDirectConvolutionLayerOutputStageKernel : public INEKernel
{
public:
/** Default constructor */
- NEDirectConvolutionLayerBiasAccumulateKernel();
+ NEDirectConvolutionLayerOutputStageKernel();
/** Prevent instances of this class from being copied (As this class contains pointers) */
- NEDirectConvolutionLayerBiasAccumulateKernel(const NEDirectConvolutionLayerBiasAccumulateKernel &) = delete;
+ NEDirectConvolutionLayerOutputStageKernel(const NEDirectConvolutionLayerOutputStageKernel &) = delete;
/** Prevent instances of this class from being copied (As this class contains pointers) */
- NEDirectConvolutionLayerBiasAccumulateKernel &operator=(const NEDirectConvolutionLayerBiasAccumulateKernel &) = delete;
+ NEDirectConvolutionLayerOutputStageKernel &operator=(const NEDirectConvolutionLayerOutputStageKernel &) = delete;
/** Allow instances of this class to be moved */
- NEDirectConvolutionLayerBiasAccumulateKernel(NEDirectConvolutionLayerBiasAccumulateKernel &&) = default;
+ NEDirectConvolutionLayerOutputStageKernel(NEDirectConvolutionLayerOutputStageKernel &&) = default;
/** Allow instances of this class to be moved */
- NEDirectConvolutionLayerBiasAccumulateKernel &operator=(NEDirectConvolutionLayerBiasAccumulateKernel &&) = default;
+ NEDirectConvolutionLayerOutputStageKernel &operator=(NEDirectConvolutionLayerOutputStageKernel &&) = default;
/** Default destructor */
- ~NEDirectConvolutionLayerBiasAccumulateKernel() = default;
+ ~NEDirectConvolutionLayerOutputStageKernel() = default;
/** Set the accumulate buffer and the biases of the kernel.
*
* @param[in, out] input Input to add the bias to. If @p output is not specified then accumulation is done in-place.
- * Data type supported: QS8/QS16/F16/F32
- * @param[in] bias The shared bias tensor to add. It must be 1D Tensor. Data type supported: Same as @p input
+ * Data type supported: QS16/QS32/F16/F32
+ * @param[in] bias (Optional) The shared bias tensor to add. It must be 1D Tensor. Data type supported: Same as @p input
* @param[out] output (Optional) If the output tensor is specified the accumulation is done out-of-place. (Defaults to nullptr)
- * Data type supported: Same as @p input
+ * Data type supported: QS8/QS16/F16/F32
*/
- void configure(ITensor *input, const ITensor *bias, ITensor *output = nullptr);
- /** Static function to check if given info will lead to a valid configuration of @ref NEDirectConvolutionLayerBiasAccumulateKernel
+ void configure(ITensor *input, const ITensor *bias = nullptr, ITensor *output = nullptr);
+ /** Static function to check if given info will lead to a valid configuration of @ref NEDirectConvolutionLayerOutputStageKernel
*
* @param[in] input Input to add the bias to. If @p output is not specified then accumulation is done in-place.
- * Data type supported: QS8/QS16/F16/F32
- * @param[in] bias The shared bias tensor to add. It must be 1D Tensor. Data type supported: Same as @p input
+ * Data type supported: QS16/QS32/F16/F32
+ * @param[in] bias (Optional) The shared bias tensor to add. It must be 1D Tensor. Data type supported: Same as @p input
* @param[in] output (Optional) If the output tensor is specified the accumulation is done out-of-place. (Defaults to nullptr)
- * Data type supported: Same as @p input
+ * Data type supported: QS8/QS16/F16/F32
* @return a status
*/
- static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output = nullptr);
+ static Status validate(const ITensorInfo *input, const ITensorInfo *bias = nullptr, const ITensorInfo *output = nullptr);
// Inherited methods overridden:
void run(const Window &window, const ThreadInfo &info) override;
private:
- using BiasAccumulateKernel = void(ITensor *input, const ITensor *bias, const Window window, ITensor *output);
+ using OutputStageKernel = void(ITensor *input, const ITensor *bias, const Window window, ITensor *output);
private:
- BiasAccumulateKernel *_func;
- ITensor *_input;
- const ITensor *_bias;
- ITensor *_output;
+ OutputStageKernel *_func;
+ ITensor *_input;
+ const ITensor *_bias;
+ ITensor *_output;
};
} // namespace arm_compute
-#endif /*__ARM_COMPUTE_NEDIRECTCONVOLUTIONLAYERBIASACCUMULATEKERNEL_H__ */
+#endif /*__ARM_COMPUTE_NEDIRECTCONVOLUTIONLAYEROUTPUTSTAGEKERNEL_H__ */
diff --git a/arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h
index 659594fe11..6208c20227 100644
--- a/arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h
@@ -28,7 +28,7 @@
#include "arm_compute/core/NEON/kernels/NEDepthwiseIm2ColKernel.h"
#include "arm_compute/core/NEON/kernels/NEDepthwiseVectorToTensorKernel.h"
#include "arm_compute/core/NEON/kernels/NEDepthwiseWeightsReshapeKernel.h"
-#include "arm_compute/core/NEON/kernels/NEDirectConvolutionLayerBiasAccumulateKernel.h"
+#include "arm_compute/core/NEON/kernels/NEDirectConvolutionLayerOutputStageKernel.h"
#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h"
#include "arm_compute/core/NEON/kernels/NEGEMMMatrixVectorMultiplyKernel.h"
#include "arm_compute/core/Types.h"
@@ -67,10 +67,10 @@ public:
void run() override;
private:
- NEDepthwiseConvolutionLayer3x3Kernel _kernel;
- NEDirectConvolutionLayerBiasAccumulateKernel _bias_kernel;
- NEFillBorderKernel _border_handler;
- bool _has_bias;
+ NEDepthwiseConvolutionLayer3x3Kernel _kernel;
+ NEDirectConvolutionLayerOutputStageKernel _output_stage_kernel;
+ NEFillBorderKernel _border_handler;
+ bool _has_bias;
};
/** Basic function to execute a generic depthwise convolution. This function calls the following NEON kernels:
diff --git a/arm_compute/runtime/NEON/functions/NEDirectConvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEDirectConvolutionLayer.h
index 09a54968bb..e1aa839802 100644
--- a/arm_compute/runtime/NEON/functions/NEDirectConvolutionLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEDirectConvolutionLayer.h
@@ -24,8 +24,8 @@
#ifndef __ARM_COMPUTE_NEDIRECTCONVOLUTIONLAYER_H__
#define __ARM_COMPUTE_NEDIRECTCONVOLUTIONLAYER_H__
-#include "arm_compute/core/NEON/kernels/NEDirectConvolutionLayerBiasAccumulateKernel.h"
#include "arm_compute/core/NEON/kernels/NEDirectConvolutionLayerKernel.h"
+#include "arm_compute/core/NEON/kernels/NEDirectConvolutionLayerOutputStageKernel.h"
#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/IFunction.h"
@@ -42,7 +42,7 @@ namespace arm_compute
* This function calls the following NEON kernels:
*
* -# @ref NEFillBorderKernel for the input
- * -# @ref NEDirectConvolutionLayerBiasAccumulateKernel
+ * -# @ref NEDirectConvolutionLayerOutputStageKernel
* -# @ref NEDirectConvolutionLayerKernel
*/
class NEDirectConvolutionLayer : public IFunction
@@ -93,12 +93,13 @@ public:
void run() override;
private:
- MemoryGroup _memory_group;
- NEDirectConvolutionLayerBiasAccumulateKernel _accumulate_bias_kernel;
- NEDirectConvolutionLayerKernel _conv_kernel;
- NEFillBorderKernel _input_border_handler;
- Tensor _accumulator;
- bool _has_bias;
+ MemoryGroup _memory_group;
+ NEDirectConvolutionLayerOutputStageKernel _output_stage_kernel;
+ NEDirectConvolutionLayerKernel _conv_kernel;
+ NEFillBorderKernel _input_border_handler;
+ Tensor _accumulator;
+ bool _has_bias;
+ bool _is_fixed_point;
};
}
#endif /* __ARM_COMPUTE_NEDIRECTCONVOLUTIONLAYER_H__ */