aboutsummaryrefslogtreecommitdiff
path: root/arm_compute/core/NEON/kernels/NEDirectConvolutionLayerOutputStageKernel.h
diff options
context:
space:
mode:
authorMichalis Spyrou <michalis.spyrou@arm.com>2017-12-20 15:50:55 +0000
committerAnthony Barbier <anthony.barbier@arm.com>2018-11-02 16:42:33 +0000
commitb91e34c9837756c9ee45917e13fb6a6cb901f795 (patch)
tree6f5dd4c2ec527f2a188ac940a081206810ec4d44 /arm_compute/core/NEON/kernels/NEDirectConvolutionLayerOutputStageKernel.h
parentaa1209a1bfc9fa24a24c1b47d309e27ba2cd90a7 (diff)
downloadComputeLibrary-b91e34c9837756c9ee45917e13fb6a6cb901f795.tar.gz
COMPMID-746 Allow NEDirectConvolution to work without biases for QS.
Renamed BiasAccumulateKernel to OutputStage. If no bias is provided when the input is quantized, the kernel simply downscales the input. Throw error if no bias is provided and input is floating point. Change-Id: I645a4ee9c6014b0547778fdd92c9ec72ef2f0aab Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/114158 Reviewed-by: Gian Marco Iodice <gianmarco.iodice@arm.com> Tested-by: Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'arm_compute/core/NEON/kernels/NEDirectConvolutionLayerOutputStageKernel.h')
-rw-r--r--arm_compute/core/NEON/kernels/NEDirectConvolutionLayerOutputStageKernel.h84
1 files changed, 84 insertions, 0 deletions
diff --git a/arm_compute/core/NEON/kernels/NEDirectConvolutionLayerOutputStageKernel.h b/arm_compute/core/NEON/kernels/NEDirectConvolutionLayerOutputStageKernel.h
new file mode 100644
index 0000000000..46d52fc182
--- /dev/null
+++ b/arm_compute/core/NEON/kernels/NEDirectConvolutionLayerOutputStageKernel.h
@@ -0,0 +1,84 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_NEDIRECTCONVOLUTIONLAYEROUTPUTSTAGEKERNEL_H__
+#define __ARM_COMPUTE_NEDIRECTCONVOLUTIONLAYEROUTPUTSTAGEKERNEL_H__
+
+#include "arm_compute/core/NEON/INEKernel.h"
+
+namespace arm_compute
+{
+class ITensor;
+/** NEON kernel to accumulate the biases, if provided, or downscale in case of quantized input.
+ *
+ * @note We assume bias to be shared
+ */
+class NEDirectConvolutionLayerOutputStageKernel : public INEKernel
+{
+public:
+ /** Default constructor */
+ NEDirectConvolutionLayerOutputStageKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEDirectConvolutionLayerOutputStageKernel(const NEDirectConvolutionLayerOutputStageKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEDirectConvolutionLayerOutputStageKernel &operator=(const NEDirectConvolutionLayerOutputStageKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ NEDirectConvolutionLayerOutputStageKernel(NEDirectConvolutionLayerOutputStageKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ NEDirectConvolutionLayerOutputStageKernel &operator=(NEDirectConvolutionLayerOutputStageKernel &&) = default;
+ /** Default destructor */
+ ~NEDirectConvolutionLayerOutputStageKernel() = default;
+ /** Set the accumulate buffer and the biases of the kernel.
+ *
+ * @param[in, out] input Input to add the bias to. If @p output is not specified then accumulation is done in-place.
+ * Data type supported: QS16/QS32/F16/F32
+ * @param[in] bias (Optional) The shared bias tensor to add. It must be 1D Tensor. Data type supported: Same as @p input
+ * @param[out] output (Optional) If the output tensor is specified the accumulation is done out-of-place. (Defaults to nullptr)
+ * Data type supported: QS8/QS16/F16/F32
+ */
+ void configure(ITensor *input, const ITensor *bias = nullptr, ITensor *output = nullptr);
+ /** Static function to check if given info will lead to a valid configuration of @ref NEDirectConvolutionLayerOutputStageKernel
+ *
+ * @param[in] input Input to add the bias to. If @p output is not specified then accumulation is done in-place.
+ * Data type supported: QS16/QS32/F16/F32
+ * @param[in] bias (Optional) The shared bias tensor to add. It must be 1D Tensor. Data type supported: Same as @p input
+ * @param[in] output (Optional) If the output tensor is specified the accumulation is done out-of-place. (Defaults to nullptr)
+ * Data type supported: QS8/QS16/F16/F32
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *bias = nullptr, const ITensorInfo *output = nullptr);
+
+ // Inherited methods overridden:
+ void run(const Window &window, const ThreadInfo &info) override;
+
+private:
+ using OutputStageKernel = void(ITensor *input, const ITensor *bias, const Window window, ITensor *output);
+
+private:
+ OutputStageKernel *_func;
+ ITensor *_input;
+ const ITensor *_bias;
+ ITensor *_output;
+};
+} // namespace arm_compute
+#endif /*__ARM_COMPUTE_NEDIRECTCONVOLUTIONLAYEROUTPUTSTAGEKERNEL_H__ */