aboutsummaryrefslogtreecommitdiff
path: root/arm_compute/core/NEON/kernels
diff options
context:
space:
mode:
Diffstat (limited to 'arm_compute/core/NEON/kernels')
-rw-r--r--arm_compute/core/NEON/kernels/NEDirectConvolutionLayerKernel.h4
-rw-r--r--arm_compute/core/NEON/kernels/NEDirectConvolutionLayerOutputStageKernel.h (renamed from arm_compute/core/NEON/kernels/NEDirectConvolutionLayerBiasAccumulateKernel.h)50
2 files changed, 27 insertions, 27 deletions
diff --git a/arm_compute/core/NEON/kernels/NEDirectConvolutionLayerKernel.h b/arm_compute/core/NEON/kernels/NEDirectConvolutionLayerKernel.h
index 4529120f02..cd482ddbdf 100644
--- a/arm_compute/core/NEON/kernels/NEDirectConvolutionLayerKernel.h
+++ b/arm_compute/core/NEON/kernels/NEDirectConvolutionLayerKernel.h
@@ -58,7 +58,7 @@ public:
* The 3rd dimension must be the same as the input's volume 3rd dimension.
* Data type supported:Same as @p input.
* @param[out] output Output tensor.
- * The 3rd dimensions must be equal to the 4th dimension of the @p kernels tensor. Data types supported: Same as @p input.
+ * The 3rd dimensions must be equal to the 4th dimension of the @p kernels tensor. Data types supported: QS16/QS32/F16/F32
* @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo.
*/
void configure(const ITensor *input, const ITensor *weights, ITensor *output, const PadStrideInfo &conv_info);
@@ -70,7 +70,7 @@ public:
* The 3rd dimension must be the same as the input's volume 3rd dimension.
* Data type supported:Same as @p input.
* @param[in] output Output tensor.
- * The 3rd dimensions must be equal to the 4th dimension of the @p kernels tensor. Data types supported: Same as @p input.
+ * The 3rd dimensions must be equal to the 4th dimension of the @p kernels tensor. Data types supported: QS16/QS32/F16/F32
* @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo.
*
* @return a status
diff --git a/arm_compute/core/NEON/kernels/NEDirectConvolutionLayerBiasAccumulateKernel.h b/arm_compute/core/NEON/kernels/NEDirectConvolutionLayerOutputStageKernel.h
index 05ade1c5dd..46d52fc182 100644
--- a/arm_compute/core/NEON/kernels/NEDirectConvolutionLayerBiasAccumulateKernel.h
+++ b/arm_compute/core/NEON/kernels/NEDirectConvolutionLayerOutputStageKernel.h
@@ -21,64 +21,64 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#ifndef __ARM_COMPUTE_NEDIRECTCONVOLUTIONLAYERBIASACCUMULATEKERNEL_H__
-#define __ARM_COMPUTE_NEDIRECTCONVOLUTIONLAYERBIASACCUMULATEKERNEL_H__
+#ifndef __ARM_COMPUTE_NEDIRECTCONVOLUTIONLAYEROUTPUTSTAGEKERNEL_H__
+#define __ARM_COMPUTE_NEDIRECTCONVOLUTIONLAYEROUTPUTSTAGEKERNEL_H__
#include "arm_compute/core/NEON/INEKernel.h"
namespace arm_compute
{
class ITensor;
-/** NEON kernel to accumulate the biases to each element of the input tensor
+/** NEON kernel to accumulate the biases, if provided, or downscale in case of quantized input.
*
* @note We assume bias to be shared
*/
-class NEDirectConvolutionLayerBiasAccumulateKernel : public INEKernel
+class NEDirectConvolutionLayerOutputStageKernel : public INEKernel
{
public:
/** Default constructor */
- NEDirectConvolutionLayerBiasAccumulateKernel();
+ NEDirectConvolutionLayerOutputStageKernel();
/** Prevent instances of this class from being copied (As this class contains pointers) */
- NEDirectConvolutionLayerBiasAccumulateKernel(const NEDirectConvolutionLayerBiasAccumulateKernel &) = delete;
+ NEDirectConvolutionLayerOutputStageKernel(const NEDirectConvolutionLayerOutputStageKernel &) = delete;
/** Prevent instances of this class from being copied (As this class contains pointers) */
- NEDirectConvolutionLayerBiasAccumulateKernel &operator=(const NEDirectConvolutionLayerBiasAccumulateKernel &) = delete;
+ NEDirectConvolutionLayerOutputStageKernel &operator=(const NEDirectConvolutionLayerOutputStageKernel &) = delete;
/** Allow instances of this class to be moved */
- NEDirectConvolutionLayerBiasAccumulateKernel(NEDirectConvolutionLayerBiasAccumulateKernel &&) = default;
+ NEDirectConvolutionLayerOutputStageKernel(NEDirectConvolutionLayerOutputStageKernel &&) = default;
/** Allow instances of this class to be moved */
- NEDirectConvolutionLayerBiasAccumulateKernel &operator=(NEDirectConvolutionLayerBiasAccumulateKernel &&) = default;
+ NEDirectConvolutionLayerOutputStageKernel &operator=(NEDirectConvolutionLayerOutputStageKernel &&) = default;
/** Default destructor */
- ~NEDirectConvolutionLayerBiasAccumulateKernel() = default;
+ ~NEDirectConvolutionLayerOutputStageKernel() = default;
/** Set the accumulate buffer and the biases of the kernel.
*
* @param[in, out] input Input to add the bias to. If @p output is not specified then accumulation is done in-place.
- * Data type supported: QS8/QS16/F16/F32
- * @param[in] bias The shared bias tensor to add. It must be 1D Tensor. Data type supported: Same as @p input
+ * Data type supported: QS16/QS32/F16/F32
+ * @param[in] bias (Optional) The shared bias tensor to add. It must be 1D Tensor. Data type supported: Same as @p input
* @param[out] output (Optional) If the output tensor is specified the accumulation is done out-of-place. (Defaults to nullptr)
- * Data type supported: Same as @p input
+ * Data type supported: QS8/QS16/F16/F32
*/
- void configure(ITensor *input, const ITensor *bias, ITensor *output = nullptr);
- /** Static function to check if given info will lead to a valid configuration of @ref NEDirectConvolutionLayerBiasAccumulateKernel
+ void configure(ITensor *input, const ITensor *bias = nullptr, ITensor *output = nullptr);
+ /** Static function to check if given info will lead to a valid configuration of @ref NEDirectConvolutionLayerOutputStageKernel
*
* @param[in] input Input to add the bias to. If @p output is not specified then accumulation is done in-place.
- * Data type supported: QS8/QS16/F16/F32
- * @param[in] bias The shared bias tensor to add. It must be 1D Tensor. Data type supported: Same as @p input
+ * Data type supported: QS16/QS32/F16/F32
+ * @param[in] bias (Optional) The shared bias tensor to add. It must be 1D Tensor. Data type supported: Same as @p input
* @param[in] output (Optional) If the output tensor is specified the accumulation is done out-of-place. (Defaults to nullptr)
- * Data type supported: Same as @p input
+ * Data type supported: QS8/QS16/F16/F32
* @return a status
*/
- static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output = nullptr);
+ static Status validate(const ITensorInfo *input, const ITensorInfo *bias = nullptr, const ITensorInfo *output = nullptr);
// Inherited methods overridden:
void run(const Window &window, const ThreadInfo &info) override;
private:
- using BiasAccumulateKernel = void(ITensor *input, const ITensor *bias, const Window window, ITensor *output);
+ using OutputStageKernel = void(ITensor *input, const ITensor *bias, const Window window, ITensor *output);
private:
- BiasAccumulateKernel *_func;
- ITensor *_input;
- const ITensor *_bias;
- ITensor *_output;
+ OutputStageKernel *_func;
+ ITensor *_input;
+ const ITensor *_bias;
+ ITensor *_output;
};
} // namespace arm_compute
-#endif /*__ARM_COMPUTE_NEDIRECTCONVOLUTIONLAYERBIASACCUMULATEKERNEL_H__ */
+#endif /*__ARM_COMPUTE_NEDIRECTCONVOLUTIONLAYEROUTPUTSTAGEKERNEL_H__ */