COMPMID-746 Allow NEDirectConvolution to work without biases for QS.

Renamed BiasAccumulateKernel to OutputStage. If no bias is provided when the input is quantized, the kernel simply downscales the input. Throw error if no bias is provided and input is floating point. Change-Id: I645a4ee9c6014b0547778fdd92c9ec72ef2f0aab Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/114158 Reviewed-by: Gian Marco Iodice <gianmarco.iodice@arm.com> Tested-by: Jenkins <bsgcomp@arm.com>
author: Michalis Spyrou <michalis.spyrou@arm.com> 2017-12-20 15:50:55 +0000
committer: Anthony Barbier <anthony.barbier@arm.com> 2018-11-02 16:42:33 +0000
commit: b91e34c9837756c9ee45917e13fb6a6cb901f795 (patch)
tree: 6f5dd4c2ec527f2a188ac940a081206810ec4d44 /arm_compute/core/NEON/kernels
parent: aa1209a1bfc9fa24a24c1b47d309e27ba2cd90a7 (diff)
download: ComputeLibrary-b91e34c9837756c9ee45917e13fb6a6cb901f795.tar.gz
2 files changed, 27 insertions, 27 deletions
diff --git a/arm_compute/core/NEON/kernels/NEDirectConvolutionLayerKernel.h b/arm_compute/core/NEON/kernels/NEDirectConvolutionLayerKernel.h
index 4529120f02..cd482ddbdf 100644
--- a/arm_compute/core/NEON/kernels/NEDirectConvolutionLayerKernel.h
+++ b/arm_compute/core/NEON/kernels/NEDirectConvolutionLayerKernel.h
@@ -58,7 +58,7 @@ public:
      *                       The 3rd dimension must be the same as the input's volume 3rd dimension.
      *                       Data type supported:Same as @p input.
      * @param[out] output    Output tensor.
-     *                       The 3rd dimensions must be equal to the 4th dimension of the @p kernels tensor. Data types supported: Same as @p input.
+     *                       The 3rd dimensions must be equal to the 4th dimension of the @p kernels tensor. Data types supported: QS16/QS32/F16/F32
      * @param[in]  conv_info Contains padding and stride information described in @ref PadStrideInfo.
      */
     void configure(const ITensor *input, const ITensor *weights, ITensor *output, const PadStrideInfo &conv_info);
@@ -70,7 +70,7 @@ public:
      *                      The 3rd dimension must be the same as the input's volume 3rd dimension.
      *                      Data type supported:Same as @p input.
      * @param[in] output    Output tensor.
-     *                      The 3rd dimensions must be equal to the 4th dimension of the @p kernels tensor. Data types supported: Same as @p input.
+     *                      The 3rd dimensions must be equal to the 4th dimension of the @p kernels tensor. Data types supported: QS16/QS32/F16/F32
      * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo.
      *
      * @return a status
diff --git a/arm_compute/core/NEON/kernels/NEDirectConvolutionLayerBiasAccumulateKernel.h b/arm_compute/core/NEON/kernels/NEDirectConvolutionLayerOutputStageKernel.h
index 05ade1c5dd..46d52fc182 100644
--- a/arm_compute/core/NEON/kernels/NEDirectConvolutionLayerBiasAccumulateKernel.h
+++ b/arm_compute/core/NEON/kernels/NEDirectConvolutionLayerOutputStageKernel.h
@@ -21,64 +21,64 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
-#ifndef __ARM_COMPUTE_NEDIRECTCONVOLUTIONLAYERBIASACCUMULATEKERNEL_H__
-#define __ARM_COMPUTE_NEDIRECTCONVOLUTIONLAYERBIASACCUMULATEKERNEL_H__
+#ifndef __ARM_COMPUTE_NEDIRECTCONVOLUTIONLAYEROUTPUTSTAGEKERNEL_H__
+#define __ARM_COMPUTE_NEDIRECTCONVOLUTIONLAYEROUTPUTSTAGEKERNEL_H__
 
 #include "arm_compute/core/NEON/INEKernel.h"
 
 namespace arm_compute
 {
 class ITensor;
-/** NEON kernel to accumulate the biases to each element of the input tensor
+/** NEON kernel to accumulate the biases, if provided, or downscale in case of quantized input.
  *
  * @note We assume bias to be shared
  */
-class NEDirectConvolutionLayerBiasAccumulateKernel : public INEKernel
+class NEDirectConvolutionLayerOutputStageKernel : public INEKernel
 {
 public:
     /** Default constructor */
-    NEDirectConvolutionLayerBiasAccumulateKernel();
+    NEDirectConvolutionLayerOutputStageKernel();
     /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NEDirectConvolutionLayerBiasAccumulateKernel(const NEDirectConvolutionLayerBiasAccumulateKernel &) = delete;
+    NEDirectConvolutionLayerOutputStageKernel(const NEDirectConvolutionLayerOutputStageKernel &) = delete;
     /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NEDirectConvolutionLayerBiasAccumulateKernel &operator=(const NEDirectConvolutionLayerBiasAccumulateKernel &) = delete;
+    NEDirectConvolutionLayerOutputStageKernel &operator=(const NEDirectConvolutionLayerOutputStageKernel &) = delete;
     /** Allow instances of this class to be moved */
-    NEDirectConvolutionLayerBiasAccumulateKernel(NEDirectConvolutionLayerBiasAccumulateKernel &&) = default;
+    NEDirectConvolutionLayerOutputStageKernel(NEDirectConvolutionLayerOutputStageKernel &&) = default;
     /** Allow instances of this class to be moved */
-    NEDirectConvolutionLayerBiasAccumulateKernel &operator=(NEDirectConvolutionLayerBiasAccumulateKernel &&) = default;
+    NEDirectConvolutionLayerOutputStageKernel &operator=(NEDirectConvolutionLayerOutputStageKernel &&) = default;
     /** Default destructor */
-    ~NEDirectConvolutionLayerBiasAccumulateKernel() = default;
+    ~NEDirectConvolutionLayerOutputStageKernel() = default;
     /** Set the accumulate buffer and the biases of the kernel.
      *
      * @param[in, out] input  Input to add the bias to. If @p output is not specified then accumulation is done in-place.
-     *                        Data type supported: QS8/QS16/F16/F32
-     * @param[in]      bias   The shared bias tensor to add. It must be 1D Tensor. Data type supported: Same as @p input
+     *                        Data type supported: QS16/QS32/F16/F32
+     * @param[in]      bias   (Optional) The shared bias tensor to add. It must be 1D Tensor. Data type supported: Same as @p input
      * @param[out]     output (Optional) If the output tensor is specified the accumulation is done out-of-place. (Defaults to nullptr)
-     *                         Data type supported: Same as @p input
+     *                         Data type supported: QS8/QS16/F16/F32
      */
-    void configure(ITensor *input, const ITensor *bias, ITensor *output = nullptr);
-    /** Static function to check if given info will lead to a valid configuration of @ref NEDirectConvolutionLayerBiasAccumulateKernel
+    void configure(ITensor *input, const ITensor *bias = nullptr, ITensor *output = nullptr);
+    /** Static function to check if given info will lead to a valid configuration of @ref NEDirectConvolutionLayerOutputStageKernel
      *
      * @param[in] input  Input to add the bias to. If @p output is not specified then accumulation is done in-place.
-     *                   Data type supported: QS8/QS16/F16/F32
-     * @param[in] bias   The shared bias tensor to add. It must be 1D Tensor. Data type supported: Same as @p input
+     *                   Data type supported: QS16/QS32/F16/F32
+     * @param[in] bias   (Optional) The shared bias tensor to add. It must be 1D Tensor. Data type supported: Same as @p input
      * @param[in] output (Optional) If the output tensor is specified the accumulation is done out-of-place. (Defaults to nullptr)
-     *                         Data type supported: Same as @p input
+     *                         Data type supported: QS8/QS16/F16/F32
      * @return a status
      */
-    static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output = nullptr);
+    static Status validate(const ITensorInfo *input, const ITensorInfo *bias = nullptr, const ITensorInfo *output = nullptr);
 
     // Inherited methods overridden:
     void run(const Window &window, const ThreadInfo &info) override;
 
 private:
-    using BiasAccumulateKernel = void(ITensor *input, const ITensor *bias, const Window window, ITensor *output);
+    using OutputStageKernel = void(ITensor *input, const ITensor *bias, const Window window, ITensor *output);
 
 private:
-    BiasAccumulateKernel *_func;
-    ITensor              *_input;
-    const ITensor        *_bias;
-    ITensor              *_output;
+    OutputStageKernel *_func;
+    ITensor           *_input;
+    const ITensor     *_bias;
+    ITensor           *_output;
 };
 } // namespace arm_compute
-#endif /*__ARM_COMPUTE_NEDIRECTCONVOLUTIONLAYERBIASACCUMULATEKERNEL_H__ */
+#endif /*__ARM_COMPUTE_NEDIRECTCONVOLUTIONLAYEROUTPUTSTAGEKERNEL_H__ */
author	Michalis Spyrou <michalis.spyrou@arm.com>	2017-12-20 15:50:55 +0000
committer	Anthony Barbier <anthony.barbier@arm.com>	2018-11-02 16:42:33 +0000
commit	b91e34c9837756c9ee45917e13fb6a6cb901f795 (patch)
tree	6f5dd4c2ec527f2a188ac940a081206810ec4d44 /arm_compute/core/NEON/kernels
parent	aa1209a1bfc9fa24a24c1b47d309e27ba2cd90a7 (diff)
download	ComputeLibrary-b91e34c9837756c9ee45917e13fb6a6cb901f795.tar.gz