diff options
Diffstat (limited to 'arm_compute')
-rw-r--r-- | arm_compute/core/NEON/NEKernels.h | 2 | ||||
-rw-r--r-- | arm_compute/core/NEON/kernels/NEDirectConvolutionLayerKernel.h | 4 | ||||
-rw-r--r-- | arm_compute/core/NEON/kernels/NEDirectConvolutionLayerOutputStageKernel.h (renamed from arm_compute/core/NEON/kernels/NEDirectConvolutionLayerBiasAccumulateKernel.h) | 50 | ||||
-rw-r--r-- | arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h | 10 | ||||
-rw-r--r-- | arm_compute/runtime/NEON/functions/NEDirectConvolutionLayer.h | 17 |
5 files changed, 42 insertions, 41 deletions
diff --git a/arm_compute/core/NEON/NEKernels.h b/arm_compute/core/NEON/NEKernels.h index 6c31fa4fb1..8a4cf7abeb 100644 --- a/arm_compute/core/NEON/NEKernels.h +++ b/arm_compute/core/NEON/NEKernels.h @@ -53,8 +53,8 @@ #include "arm_compute/core/NEON/kernels/NEDequantizationLayerKernel.h" #include "arm_compute/core/NEON/kernels/NEDerivativeKernel.h" #include "arm_compute/core/NEON/kernels/NEDilateKernel.h" -#include "arm_compute/core/NEON/kernels/NEDirectConvolutionLayerBiasAccumulateKernel.h" #include "arm_compute/core/NEON/kernels/NEDirectConvolutionLayerKernel.h" +#include "arm_compute/core/NEON/kernels/NEDirectConvolutionLayerOutputStageKernel.h" #include "arm_compute/core/NEON/kernels/NEErodeKernel.h" #include "arm_compute/core/NEON/kernels/NEFastCornersKernel.h" #include "arm_compute/core/NEON/kernels/NEFillArrayKernel.h" diff --git a/arm_compute/core/NEON/kernels/NEDirectConvolutionLayerKernel.h b/arm_compute/core/NEON/kernels/NEDirectConvolutionLayerKernel.h index 4529120f02..cd482ddbdf 100644 --- a/arm_compute/core/NEON/kernels/NEDirectConvolutionLayerKernel.h +++ b/arm_compute/core/NEON/kernels/NEDirectConvolutionLayerKernel.h @@ -58,7 +58,7 @@ public: * The 3rd dimension must be the same as the input's volume 3rd dimension. * Data type supported:Same as @p input. * @param[out] output Output tensor. - * The 3rd dimensions must be equal to the 4th dimension of the @p kernels tensor. Data types supported: Same as @p input. + * The 3rd dimensions must be equal to the 4th dimension of the @p kernels tensor. Data types supported: QS16/QS32/F16/F32 * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. */ void configure(const ITensor *input, const ITensor *weights, ITensor *output, const PadStrideInfo &conv_info); @@ -70,7 +70,7 @@ public: * The 3rd dimension must be the same as the input's volume 3rd dimension. * Data type supported:Same as @p input. * @param[in] output Output tensor. - * The 3rd dimensions must be equal to the 4th dimension of the @p kernels tensor. Data types supported: Same as @p input. + * The 3rd dimensions must be equal to the 4th dimension of the @p kernels tensor. Data types supported: QS16/QS32/F16/F32 * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. * * @return a status diff --git a/arm_compute/core/NEON/kernels/NEDirectConvolutionLayerBiasAccumulateKernel.h b/arm_compute/core/NEON/kernels/NEDirectConvolutionLayerOutputStageKernel.h index 05ade1c5dd..46d52fc182 100644 --- a/arm_compute/core/NEON/kernels/NEDirectConvolutionLayerBiasAccumulateKernel.h +++ b/arm_compute/core/NEON/kernels/NEDirectConvolutionLayerOutputStageKernel.h @@ -21,64 +21,64 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef __ARM_COMPUTE_NEDIRECTCONVOLUTIONLAYERBIASACCUMULATEKERNEL_H__ -#define __ARM_COMPUTE_NEDIRECTCONVOLUTIONLAYERBIASACCUMULATEKERNEL_H__ +#ifndef __ARM_COMPUTE_NEDIRECTCONVOLUTIONLAYEROUTPUTSTAGEKERNEL_H__ +#define __ARM_COMPUTE_NEDIRECTCONVOLUTIONLAYEROUTPUTSTAGEKERNEL_H__ #include "arm_compute/core/NEON/INEKernel.h" namespace arm_compute { class ITensor; -/** NEON kernel to accumulate the biases to each element of the input tensor +/** NEON kernel to accumulate the biases, if provided, or downscale in case of quantized input. * * @note We assume bias to be shared */ -class NEDirectConvolutionLayerBiasAccumulateKernel : public INEKernel +class NEDirectConvolutionLayerOutputStageKernel : public INEKernel { public: /** Default constructor */ - NEDirectConvolutionLayerBiasAccumulateKernel(); + NEDirectConvolutionLayerOutputStageKernel(); /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEDirectConvolutionLayerBiasAccumulateKernel(const NEDirectConvolutionLayerBiasAccumulateKernel &) = delete; + NEDirectConvolutionLayerOutputStageKernel(const NEDirectConvolutionLayerOutputStageKernel &) = delete; /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEDirectConvolutionLayerBiasAccumulateKernel &operator=(const NEDirectConvolutionLayerBiasAccumulateKernel &) = delete; + NEDirectConvolutionLayerOutputStageKernel &operator=(const NEDirectConvolutionLayerOutputStageKernel &) = delete; /** Allow instances of this class to be moved */ - NEDirectConvolutionLayerBiasAccumulateKernel(NEDirectConvolutionLayerBiasAccumulateKernel &&) = default; + NEDirectConvolutionLayerOutputStageKernel(NEDirectConvolutionLayerOutputStageKernel &&) = default; /** Allow instances of this class to be moved */ - NEDirectConvolutionLayerBiasAccumulateKernel &operator=(NEDirectConvolutionLayerBiasAccumulateKernel &&) = default; + NEDirectConvolutionLayerOutputStageKernel &operator=(NEDirectConvolutionLayerOutputStageKernel &&) = default; /** Default destructor */ - ~NEDirectConvolutionLayerBiasAccumulateKernel() = default; + ~NEDirectConvolutionLayerOutputStageKernel() = default; /** Set the accumulate buffer and the biases of the kernel. * * @param[in, out] input Input to add the bias to. If @p output is not specified then accumulation is done in-place. - * Data type supported: QS8/QS16/F16/F32 - * @param[in] bias The shared bias tensor to add. It must be 1D Tensor. Data type supported: Same as @p input + * Data type supported: QS16/QS32/F16/F32 + * @param[in] bias (Optional) The shared bias tensor to add. It must be 1D Tensor. Data type supported: Same as @p input * @param[out] output (Optional) If the output tensor is specified the accumulation is done out-of-place. (Defaults to nullptr) - * Data type supported: Same as @p input + * Data type supported: QS8/QS16/F16/F32 */ - void configure(ITensor *input, const ITensor *bias, ITensor *output = nullptr); - /** Static function to check if given info will lead to a valid configuration of @ref NEDirectConvolutionLayerBiasAccumulateKernel + void configure(ITensor *input, const ITensor *bias = nullptr, ITensor *output = nullptr); + /** Static function to check if given info will lead to a valid configuration of @ref NEDirectConvolutionLayerOutputStageKernel * * @param[in] input Input to add the bias to. If @p output is not specified then accumulation is done in-place. - * Data type supported: QS8/QS16/F16/F32 - * @param[in] bias The shared bias tensor to add. It must be 1D Tensor. Data type supported: Same as @p input + * Data type supported: QS16/QS32/F16/F32 + * @param[in] bias (Optional) The shared bias tensor to add. It must be 1D Tensor. Data type supported: Same as @p input * @param[in] output (Optional) If the output tensor is specified the accumulation is done out-of-place. (Defaults to nullptr) - * Data type supported: Same as @p input + * Data type supported: QS8/QS16/F16/F32 * @return a status */ - static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output = nullptr); + static Status validate(const ITensorInfo *input, const ITensorInfo *bias = nullptr, const ITensorInfo *output = nullptr); // Inherited methods overridden: void run(const Window &window, const ThreadInfo &info) override; private: - using BiasAccumulateKernel = void(ITensor *input, const ITensor *bias, const Window window, ITensor *output); + using OutputStageKernel = void(ITensor *input, const ITensor *bias, const Window window, ITensor *output); private: - BiasAccumulateKernel *_func; - ITensor *_input; - const ITensor *_bias; - ITensor *_output; + OutputStageKernel *_func; + ITensor *_input; + const ITensor *_bias; + ITensor *_output; }; } // namespace arm_compute -#endif /*__ARM_COMPUTE_NEDIRECTCONVOLUTIONLAYERBIASACCUMULATEKERNEL_H__ */ +#endif /*__ARM_COMPUTE_NEDIRECTCONVOLUTIONLAYEROUTPUTSTAGEKERNEL_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h index 659594fe11..6208c20227 100644 --- a/arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h +++ b/arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h @@ -28,7 +28,7 @@ #include "arm_compute/core/NEON/kernels/NEDepthwiseIm2ColKernel.h" #include "arm_compute/core/NEON/kernels/NEDepthwiseVectorToTensorKernel.h" #include "arm_compute/core/NEON/kernels/NEDepthwiseWeightsReshapeKernel.h" -#include "arm_compute/core/NEON/kernels/NEDirectConvolutionLayerBiasAccumulateKernel.h" +#include "arm_compute/core/NEON/kernels/NEDirectConvolutionLayerOutputStageKernel.h" #include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h" #include "arm_compute/core/NEON/kernels/NEGEMMMatrixVectorMultiplyKernel.h" #include "arm_compute/core/Types.h" @@ -67,10 +67,10 @@ public: void run() override; private: - NEDepthwiseConvolutionLayer3x3Kernel _kernel; - NEDirectConvolutionLayerBiasAccumulateKernel _bias_kernel; - NEFillBorderKernel _border_handler; - bool _has_bias; + NEDepthwiseConvolutionLayer3x3Kernel _kernel; + NEDirectConvolutionLayerOutputStageKernel _output_stage_kernel; + NEFillBorderKernel _border_handler; + bool _has_bias; }; /** Basic function to execute a generic depthwise convolution. This function calls the following NEON kernels: diff --git a/arm_compute/runtime/NEON/functions/NEDirectConvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEDirectConvolutionLayer.h index 09a54968bb..e1aa839802 100644 --- a/arm_compute/runtime/NEON/functions/NEDirectConvolutionLayer.h +++ b/arm_compute/runtime/NEON/functions/NEDirectConvolutionLayer.h @@ -24,8 +24,8 @@ #ifndef __ARM_COMPUTE_NEDIRECTCONVOLUTIONLAYER_H__ #define __ARM_COMPUTE_NEDIRECTCONVOLUTIONLAYER_H__ -#include "arm_compute/core/NEON/kernels/NEDirectConvolutionLayerBiasAccumulateKernel.h" #include "arm_compute/core/NEON/kernels/NEDirectConvolutionLayerKernel.h" +#include "arm_compute/core/NEON/kernels/NEDirectConvolutionLayerOutputStageKernel.h" #include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h" #include "arm_compute/core/Types.h" #include "arm_compute/runtime/IFunction.h" @@ -42,7 +42,7 @@ namespace arm_compute * This function calls the following NEON kernels: * * -# @ref NEFillBorderKernel for the input - * -# @ref NEDirectConvolutionLayerBiasAccumulateKernel + * -# @ref NEDirectConvolutionLayerOutputStageKernel * -# @ref NEDirectConvolutionLayerKernel */ class NEDirectConvolutionLayer : public IFunction @@ -93,12 +93,13 @@ public: void run() override; private: - MemoryGroup _memory_group; - NEDirectConvolutionLayerBiasAccumulateKernel _accumulate_bias_kernel; - NEDirectConvolutionLayerKernel _conv_kernel; - NEFillBorderKernel _input_border_handler; - Tensor _accumulator; - bool _has_bias; + MemoryGroup _memory_group; + NEDirectConvolutionLayerOutputStageKernel _output_stage_kernel; + NEDirectConvolutionLayerKernel _conv_kernel; + NEFillBorderKernel _input_border_handler; + Tensor _accumulator; + bool _has_bias; + bool _is_fixed_point; }; } #endif /* __ARM_COMPUTE_NEDIRECTCONVOLUTIONLAYER_H__ */ |