aboutsummaryrefslogtreecommitdiff
path: root/arm_compute/runtime/NEON
diff options
context:
space:
mode:
authorMichele Di Giorgio <michele.digiorgio@arm.com>2021-06-16 11:14:41 +0100
committerMichele Di Giorgio <michele.digiorgio@arm.com>2021-06-25 13:52:38 +0000
commitd7316eb877cc4ff8573219374335e917b19a0203 (patch)
tree9918f85a12424ccd53ae91f4d7b7701b6e0747a9 /arm_compute/runtime/NEON
parentcd060c47c1bad06f2aad8f0f8f94a72c4f75b919 (diff)
downloadComputeLibrary-d7316eb877cc4ff8573219374335e917b19a0203.tar.gz
Port NEGEMMConv2d to memory injecting interface
Resolves: COMPMID-4506, COMPMID-4570 Change-Id: I6d37a06da141f1fcfcaa8525322a319cb0234791 Signed-off-by: Michele Di Giorgio <michele.digiorgio@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/5824 Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'arm_compute/runtime/NEON')
-rw-r--r--arm_compute/runtime/NEON/functions/NEGEMM.h15
-rw-r--r--arm_compute/runtime/NEON/functions/NEGEMMConv2d.h5
-rw-r--r--arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h63
-rw-r--r--arm_compute/runtime/NEON/functions/NEWinogradConvolutionLayer.h15
4 files changed, 12 insertions, 86 deletions
diff --git a/arm_compute/runtime/NEON/functions/NEGEMM.h b/arm_compute/runtime/NEON/functions/NEGEMM.h
index 6fa30bd545..6c5be0eb5e 100644
--- a/arm_compute/runtime/NEON/functions/NEGEMM.h
+++ b/arm_compute/runtime/NEON/functions/NEGEMM.h
@@ -32,6 +32,7 @@
#include "arm_compute/runtime/NEON/functions/NEActivationLayer.h"
#include "arm_compute/runtime/NEON/functions/NEArithmeticAddition.h"
#include "arm_compute/runtime/Tensor.h"
+#include "src/core/helpers/MemoryHelpers.h"
#include <memory>
@@ -105,14 +106,7 @@ public:
void configure(const ITensor *a, const ITensor *b, const ITensor *c, ITensor *d, float alpha, float beta, const GEMMInfo &gemm_info = GEMMInfo());
/** Static function to check if given info will lead to a valid configuration of @ref NEGEMM.
*
- * @param[in] a First input tensor info (Matrix or Vector A). Data types supported: BFLOAT16/F16/F32
- * @param[in] b Second input tensor info (Matrix B). Data type supported: same as @p a.
- * @param[in] c Third input tensor info (Matrix C). It can be a nullptr if just the multiplication between @p a and @p b is needed. Data type supported: same as @p a.
- * @param[out] output Output tensor info. Data type supported: same as @p a
- * @param[in] alpha Weight of the matrix product
- * @param[in] beta Weight of matrix C
- * @param[in] gemm_info (Optional) Specifies if the matrix A and/or matrix B have been reshaped and
- * if the reshape of matrix B should happen only for the first run
+ * Similar to @ref NEGEMM::configure()
*
* @return a status
*/
@@ -146,7 +140,10 @@ private:
bool _reshape_b_only_on_first_run;
bool _is_prepared;
- ITensorPack _asm_glue_tensors{};
+ ITensorPack _asm_glue_run_pack;
+ ITensorPack _asm_glue_prep_pack;
+ WorkspaceData<Tensor> _asm_glue_workspace;
+ experimental::MemoryRequirements _aux_mem_req;
};
} // namespace arm_compute
#endif /*ARM_COMPUTE_NEGEMM_H */
diff --git a/arm_compute/runtime/NEON/functions/NEGEMMConv2d.h b/arm_compute/runtime/NEON/functions/NEGEMMConv2d.h
index f39ce4dfa3..53ceb6d978 100644
--- a/arm_compute/runtime/NEON/functions/NEGEMMConv2d.h
+++ b/arm_compute/runtime/NEON/functions/NEGEMMConv2d.h
@@ -29,15 +29,12 @@
#include "arm_compute/runtime/IMemoryManager.h"
#include <memory>
+
namespace arm_compute
{
// Forward declarations
class ITensor;
class ITensorInfo;
-namespace cpu
-{
-class CpuGemmAssemblyDispatch;
-}
/** Basic function to compute the convolution layer. This function calls the following kernels/functions:
*
diff --git a/arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h b/arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h
index dc9783f9eb..ff888760e1 100644
--- a/arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h
+++ b/arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h
@@ -24,32 +24,15 @@
#ifndef ARM_COMPUTE_NEGEMMLOWPMATRIXMULTIPLYCORE_H
#define ARM_COMPUTE_NEGEMMLOWPMATRIXMULTIPLYCORE_H
-#include "NEActivationLayer.h"
-#include "arm_compute/core/ITensorPack.h"
#include "arm_compute/runtime/IFunction.h"
#include "arm_compute/runtime/IMemoryManager.h"
#include "arm_compute/runtime/IWeightsManager.h"
-#include "arm_compute/runtime/MemoryGroup.h"
-#include "arm_compute/runtime/Tensor.h"
#include <memory>
namespace arm_compute
{
class ITensor;
-class NEConvertQuantizedSignednessKernel;
-class NEGEMMInterleave4x4Kernel;
-class NEGEMMLowpMatrixMultiplyKernel;
-class NEGEMMLowpOffsetContributionKernel;
-class NEGEMMLowpOffsetContributionOutputStageKernel;
-class NEGEMMLowpMatrixAReductionKernel;
-class NEGEMMLowpMatrixBReductionKernel;
-class NEGEMMTranspose1xWKernel;
-namespace cpu
-{
-class CpuGemmAssemblyDispatch;
-}
-
/** Basic function to execute GEMMLowpMatrixMultiplyCore. This function calls the following kernels if the DOT product instruction is not available:
*
* -# @ref NEGEMMInterleave4x4Kernel
@@ -119,14 +102,7 @@ public:
void configure(const ITensor *a, const ITensor *b, const ITensor *c, ITensor *output, const GEMMInfo &gemm_info = GEMMInfo());
/** Static function to check if given info will lead to a valid configuration of @ref NEGEMMLowpMatrixMultiplyCore
*
- * @note The @p output type is S32 if @p gemm_info.type == GEMMLowpOutputStageType::NONE. It is QASYMM8/QASYMM8_SIGNED otherwise
- *
- * @param[in] a First input tensor info (Matrix A). Data type supported: QASYMM8/QASYMM8_SIGNED.
- * @param[in] b Second input tensor info (Matrix B). Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8/QSYMM8_PER_CHANNEL.
- * @param[in] c Third input tensor info (Matrix C). It can be a nullptr. Data type supported: S32
- * @param[in] output Output tensor info. Data type supported: Data type supported: S32/QASYMM8/QASYMM8_SIGNED
- * @param[in] gemm_info (Optional) Specifies if the matrix A and/or matrix B have been reshaped and
- * if the reshape of matrix B should be executed only for the first run
+ * Similar to @ref NEGEMMLowpMatrixMultiplyCore::configure()
*
* @return a status
*/
@@ -137,41 +113,8 @@ public:
void prepare() override;
private:
- MemoryGroup _memory_group;
- IWeightsManager *_weights_manager;
- std::unique_ptr<cpu::CpuGemmAssemblyDispatch> _asm_glue;
- std::unique_ptr<NEGEMMLowpMatrixMultiplyKernel> _mm_kernel;
- std::unique_ptr<NEGEMMInterleave4x4Kernel> _mtx_a_reshape_kernel;
- std::unique_ptr<NEGEMMTranspose1xWKernel> _mtx_b_reshape_kernel;
- std::unique_ptr<NEGEMMLowpMatrixAReductionKernel> _mtx_a_reduction_kernel;
- std::unique_ptr<NEGEMMLowpMatrixBReductionKernel> _mtx_b_reduction_kernel;
- std::unique_ptr<NEGEMMLowpOffsetContributionKernel> _offset_contribution_kernel;
- std::unique_ptr<NEGEMMLowpOffsetContributionOutputStageKernel> _offset_contribution_output_stage_kernel;
- NEActivationLayer _activation_func;
- std::unique_ptr<NEConvertQuantizedSignednessKernel> _convert_to_signed_asymm;
- std::unique_ptr<NEConvertQuantizedSignednessKernel> _convert_from_signed_asymm;
-
- Tensor _vector_sum_col;
- Tensor _vector_sum_row;
- Tensor _tmp_a;
- Tensor _tmp_b;
- Tensor _mm_result_s32;
- Tensor _signed_a;
- Tensor _signed_output;
- const ITensor *_original_b;
- int32_t _a_offset;
- int32_t _b_offset;
-
- bool _run_vector_matrix_multiplication;
- bool _assembly_path;
- bool _fused_assembly_path;
- bool _reshape_b_only_on_first_run;
- bool _is_prepared;
- bool _fuse_output_stage;
- bool _run_activation;
- bool _flip_signedness;
-
- ITensorPack _asm_glue_tensors{};
+ struct Impl;
+ std::unique_ptr<struct Impl> _impl;
};
} // namespace arm_compute
#endif /*ARM_COMPUTE_NEGEMMLOWPMATRIXMULTIPLYCORE_H */
diff --git a/arm_compute/runtime/NEON/functions/NEWinogradConvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEWinogradConvolutionLayer.h
index f9ebf608cb..b02c4ed5b7 100644
--- a/arm_compute/runtime/NEON/functions/NEWinogradConvolutionLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEWinogradConvolutionLayer.h
@@ -96,20 +96,9 @@ public:
void run() override;
void prepare() override;
- /** Static function to check if given info will lead to a valid configuration of @ref NEGEMMConvolutionLayer
+ /** Static function to check if given info will lead to a valid configuration of @ref NEWinogradConvolutionLayer
*
- * @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM],
- * while every optional dimension from 4 and above represent a batch of inputs.
- * Data types supported: F16/F32.
- * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. Data type supported:Same as @p input.
- * Currently only 3x3 and 5x5 kernels are supported.
- * @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p weights.
- * @param[in] output Destination tensor. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs.
- * Data types supported: Same as @p input.
- * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. Currently only unit strides are supported.
- * @param[in] act_info (Optional) Activation layer information in case of a fused activation.
- * @param[in] enable_fast_math (Optional) Enable fast math computation. In case this flag were set, the function could dispatch the fastest implementation
- * available which may introduce a drop of accuracy as well. Default is false
+ * Similar to @ref NEWinogradConvolutionLayer::configure()
*
* @return a status
*/