aboutsummaryrefslogtreecommitdiff
path: root/arm_compute/core/NEON/kernels
diff options
context:
space:
mode:
authorMichalis Spyrou <michalis.spyrou@arm.com>2017-12-20 15:50:55 +0000
committerAnthony Barbier <anthony.barbier@arm.com>2018-11-02 16:42:33 +0000
commitb91e34c9837756c9ee45917e13fb6a6cb901f795 (patch)
tree6f5dd4c2ec527f2a188ac940a081206810ec4d44 /arm_compute/core/NEON/kernels
parentaa1209a1bfc9fa24a24c1b47d309e27ba2cd90a7 (diff)
downloadComputeLibrary-b91e34c9837756c9ee45917e13fb6a6cb901f795.tar.gz
COMPMID-746 Allow NEDirectConvolution to work without biases for QS.
Renamed BiasAccumulateKernel to OutputStage. If no bias is provided when the input is quantized, the kernel simply downscales the input. Throw error if no bias is provided and input is floating point. Change-Id: I645a4ee9c6014b0547778fdd92c9ec72ef2f0aab Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/114158 Reviewed-by: Gian Marco Iodice <gianmarco.iodice@arm.com> Tested-by: Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'arm_compute/core/NEON/kernels')
-rw-r--r--arm_compute/core/NEON/kernels/NEDirectConvolutionLayerKernel.h4
-rw-r--r--arm_compute/core/NEON/kernels/NEDirectConvolutionLayerOutputStageKernel.h (renamed from arm_compute/core/NEON/kernels/NEDirectConvolutionLayerBiasAccumulateKernel.h)50
2 files changed, 27 insertions, 27 deletions
diff --git a/arm_compute/core/NEON/kernels/NEDirectConvolutionLayerKernel.h b/arm_compute/core/NEON/kernels/NEDirectConvolutionLayerKernel.h
index 4529120f02..cd482ddbdf 100644
--- a/arm_compute/core/NEON/kernels/NEDirectConvolutionLayerKernel.h
+++ b/arm_compute/core/NEON/kernels/NEDirectConvolutionLayerKernel.h
@@ -58,7 +58,7 @@ public:
* The 3rd dimension must be the same as the input's volume 3rd dimension.
* Data type supported:Same as @p input.
* @param[out] output Output tensor.
- * The 3rd dimensions must be equal to the 4th dimension of the @p kernels tensor. Data types supported: Same as @p input.
+ * The 3rd dimensions must be equal to the 4th dimension of the @p kernels tensor. Data types supported: QS16/QS32/F16/F32
* @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo.
*/
void configure(const ITensor *input, const ITensor *weights, ITensor *output, const PadStrideInfo &conv_info);
@@ -70,7 +70,7 @@ public:
* The 3rd dimension must be the same as the input's volume 3rd dimension.
* Data type supported:Same as @p input.
* @param[in] output Output tensor.
- * The 3rd dimensions must be equal to the 4th dimension of the @p kernels tensor. Data types supported: Same as @p input.
+ * The 3rd dimensions must be equal to the 4th dimension of the @p kernels tensor. Data types supported: QS16/QS32/F16/F32
* @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo.
*
* @return a status
diff --git a/arm_compute/core/NEON/kernels/NEDirectConvolutionLayerBiasAccumulateKernel.h b/arm_compute/core/NEON/kernels/NEDirectConvolutionLayerOutputStageKernel.h
index 05ade1c5dd..46d52fc182 100644
--- a/arm_compute/core/NEON/kernels/NEDirectConvolutionLayerBiasAccumulateKernel.h
+++ b/arm_compute/core/NEON/kernels/NEDirectConvolutionLayerOutputStageKernel.h
@@ -21,64 +21,64 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#ifndef __ARM_COMPUTE_NEDIRECTCONVOLUTIONLAYERBIASACCUMULATEKERNEL_H__
-#define __ARM_COMPUTE_NEDIRECTCONVOLUTIONLAYERBIASACCUMULATEKERNEL_H__
+#ifndef __ARM_COMPUTE_NEDIRECTCONVOLUTIONLAYEROUTPUTSTAGEKERNEL_H__
+#define __ARM_COMPUTE_NEDIRECTCONVOLUTIONLAYEROUTPUTSTAGEKERNEL_H__
#include "arm_compute/core/NEON/INEKernel.h"
namespace arm_compute
{
class ITensor;
-/** NEON kernel to accumulate the biases to each element of the input tensor
+/** NEON kernel to accumulate the biases, if provided, or downscale in case of quantized input.
*
* @note We assume bias to be shared
*/
-class NEDirectConvolutionLayerBiasAccumulateKernel : public INEKernel
+class NEDirectConvolutionLayerOutputStageKernel : public INEKernel
{
public:
/** Default constructor */
- NEDirectConvolutionLayerBiasAccumulateKernel();
+ NEDirectConvolutionLayerOutputStageKernel();
/** Prevent instances of this class from being copied (As this class contains pointers) */
- NEDirectConvolutionLayerBiasAccumulateKernel(const NEDirectConvolutionLayerBiasAccumulateKernel &) = delete;
+ NEDirectConvolutionLayerOutputStageKernel(const NEDirectConvolutionLayerOutputStageKernel &) = delete;
/** Prevent instances of this class from being copied (As this class contains pointers) */
- NEDirectConvolutionLayerBiasAccumulateKernel &operator=(const NEDirectConvolutionLayerBiasAccumulateKernel &) = delete;
+ NEDirectConvolutionLayerOutputStageKernel &operator=(const NEDirectConvolutionLayerOutputStageKernel &) = delete;
/** Allow instances of this class to be moved */
- NEDirectConvolutionLayerBiasAccumulateKernel(NEDirectConvolutionLayerBiasAccumulateKernel &&) = default;
+ NEDirectConvolutionLayerOutputStageKernel(NEDirectConvolutionLayerOutputStageKernel &&) = default;
/** Allow instances of this class to be moved */
- NEDirectConvolutionLayerBiasAccumulateKernel &operator=(NEDirectConvolutionLayerBiasAccumulateKernel &&) = default;
+ NEDirectConvolutionLayerOutputStageKernel &operator=(NEDirectConvolutionLayerOutputStageKernel &&) = default;
/** Default destructor */
- ~NEDirectConvolutionLayerBiasAccumulateKernel() = default;
+ ~NEDirectConvolutionLayerOutputStageKernel() = default;
/** Set the accumulate buffer and the biases of the kernel.
*
* @param[in, out] input Input to add the bias to. If @p output is not specified then accumulation is done in-place.
- * Data type supported: QS8/QS16/F16/F32
- * @param[in] bias The shared bias tensor to add. It must be 1D Tensor. Data type supported: Same as @p input
+ * Data type supported: QS16/QS32/F16/F32
+ * @param[in] bias (Optional) The shared bias tensor to add. It must be 1D Tensor. Data type supported: Same as @p input
* @param[out] output (Optional) If the output tensor is specified the accumulation is done out-of-place. (Defaults to nullptr)
- * Data type supported: Same as @p input
+ * Data type supported: QS8/QS16/F16/F32
*/
- void configure(ITensor *input, const ITensor *bias, ITensor *output = nullptr);
- /** Static function to check if given info will lead to a valid configuration of @ref NEDirectConvolutionLayerBiasAccumulateKernel
+ void configure(ITensor *input, const ITensor *bias = nullptr, ITensor *output = nullptr);
+ /** Static function to check if given info will lead to a valid configuration of @ref NEDirectConvolutionLayerOutputStageKernel
*
* @param[in] input Input to add the bias to. If @p output is not specified then accumulation is done in-place.
- * Data type supported: QS8/QS16/F16/F32
- * @param[in] bias The shared bias tensor to add. It must be 1D Tensor. Data type supported: Same as @p input
+ * Data type supported: QS16/QS32/F16/F32
+ * @param[in] bias (Optional) The shared bias tensor to add. It must be 1D Tensor. Data type supported: Same as @p input
* @param[in] output (Optional) If the output tensor is specified the accumulation is done out-of-place. (Defaults to nullptr)
- * Data type supported: Same as @p input
+ * Data type supported: QS8/QS16/F16/F32
* @return a status
*/
- static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output = nullptr);
+ static Status validate(const ITensorInfo *input, const ITensorInfo *bias = nullptr, const ITensorInfo *output = nullptr);
// Inherited methods overridden:
void run(const Window &window, const ThreadInfo &info) override;
private:
- using BiasAccumulateKernel = void(ITensor *input, const ITensor *bias, const Window window, ITensor *output);
+ using OutputStageKernel = void(ITensor *input, const ITensor *bias, const Window window, ITensor *output);
private:
- BiasAccumulateKernel *_func;
- ITensor *_input;
- const ITensor *_bias;
- ITensor *_output;
+ OutputStageKernel *_func;
+ ITensor *_input;
+ const ITensor *_bias;
+ ITensor *_output;
};
} // namespace arm_compute
-#endif /*__ARM_COMPUTE_NEDIRECTCONVOLUTIONLAYERBIASACCUMULATEKERNEL_H__ */
+#endif /*__ARM_COMPUTE_NEDIRECTCONVOLUTIONLAYEROUTPUTSTAGEKERNEL_H__ */