aboutsummaryrefslogtreecommitdiff
path: root/arm_compute
diff options
context:
space:
mode:
Diffstat (limited to 'arm_compute')
-rw-r--r--arm_compute/runtime/NEON/functions/NEGEMM.h27
-rw-r--r--arm_compute/runtime/NEON/functions/NEGEMMConv2d.h21
-rw-r--r--arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h7
-rw-r--r--arm_compute/runtime/NEON/functions/NEWinogradConvolutionLayer.h2
4 files changed, 33 insertions, 24 deletions
diff --git a/arm_compute/runtime/NEON/functions/NEGEMM.h b/arm_compute/runtime/NEON/functions/NEGEMM.h
index d4a9f68beb..9df2e08956 100644
--- a/arm_compute/runtime/NEON/functions/NEGEMM.h
+++ b/arm_compute/runtime/NEON/functions/NEGEMM.h
@@ -41,12 +41,15 @@ class NEGEMMInterleave4x4Kernel;
class NEGEMMMatrixAdditionKernel;
class NEGEMMMatrixMultiplyKernel;
class NEGEMMTranspose1xWKernel;
-class NEGEMMAssemblyDispatch;
+namespace cpu
+{
+class CpuGemmAssemblyDispatch;
+}
/** Basic function to execute GEMM. This function calls the following kernels:
*
* If optimized assembly is available:
- * -# @ref NEGEMMAssemblyDispatch
+ * -# @ref cpu::CpuGemmAssemblyDispatch
* -# @ref NEActivationLayer (if alpha != 1.0)
* Else:
* -# @ref NEGEMMInterleave4x4Kernel (if the output tensor is a matrix)
@@ -119,16 +122,16 @@ public:
void prepare() override;
private:
- MemoryGroup _memory_group;
- IWeightsManager *_weights_manager;
- std::unique_ptr<NEGEMMInterleave4x4Kernel> _interleave_kernel;
- std::unique_ptr<NEGEMMTranspose1xWKernel> _transpose_kernel;
- std::unique_ptr<NEGEMMMatrixMultiplyKernel> _mm_kernel;
- std::unique_ptr<NEGEMMAssemblyDispatch> _asm_glue;
- std::unique_ptr<NEGEMMMatrixAdditionKernel> _ma_kernel;
- NEActivationLayer _alpha_scale_func;
- NEArithmeticAddition _add_bias;
- NEActivationLayer _activation_func;
+ MemoryGroup _memory_group;
+ IWeightsManager *_weights_manager;
+ std::unique_ptr<NEGEMMInterleave4x4Kernel> _interleave_kernel;
+ std::unique_ptr<NEGEMMTranspose1xWKernel> _transpose_kernel;
+ std::unique_ptr<NEGEMMMatrixMultiplyKernel> _mm_kernel;
+ std::unique_ptr<cpu::CpuGemmAssemblyDispatch> _asm_glue;
+ std::unique_ptr<NEGEMMMatrixAdditionKernel> _ma_kernel;
+ NEActivationLayer _alpha_scale_func;
+ NEArithmeticAddition _add_bias;
+ NEActivationLayer _activation_func;
Tensor _tmp_a;
Tensor _tmp_b;
diff --git a/arm_compute/runtime/NEON/functions/NEGEMMConv2d.h b/arm_compute/runtime/NEON/functions/NEGEMMConv2d.h
index b2ffd038de..6c71f0e188 100644
--- a/arm_compute/runtime/NEON/functions/NEGEMMConv2d.h
+++ b/arm_compute/runtime/NEON/functions/NEGEMMConv2d.h
@@ -36,13 +36,16 @@ namespace arm_compute
{
// Forward declarations
class ITensor;
-class NEGEMMAssemblyDispatch;
+namespace cpu
+{
+class CpuGemmAssemblyDispatch;
+}
/** Basic function to compute the convolution layer. This function calls the following kernels/functions:
*
* Supports only NHWC data layout
*
- * -# @ref NEGEMMAssemblyDispatch
+ * -# @ref cpu::CpuGemmAssemblyDispatch
* -# @ref NEActivationLayer, in case activation cannot be fused in the assembly dispatch
*
* Weights are transformed from OHWI to HWIO format using the following kernels:
@@ -111,13 +114,13 @@ public:
void prepare() override;
private:
- std::unique_ptr<NEGEMMAssemblyDispatch> _gemm_asm_func;
- NEActivationLayer _activation_func;
- NEPermute _weights_permute_func;
- const ITensor *_original_weights;
- Tensor _permuted_weights;
- bool _is_prepared;
- bool _run_activation;
+ std::unique_ptr<cpu::CpuGemmAssemblyDispatch> _gemm_asm_func;
+ NEActivationLayer _activation_func;
+ NEPermute _weights_permute_func;
+ const ITensor *_original_weights;
+ Tensor _permuted_weights;
+ bool _is_prepared;
+ bool _run_activation;
};
} // namespace arm_compute
#endif /* ARM_COMPUTE_NEGEMMCONV2D_H */
diff --git a/arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h b/arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h
index 780723e752..a292712bd7 100644
--- a/arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h
+++ b/arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h
@@ -44,7 +44,10 @@ class NEGEMMLowpOffsetContributionOutputStageKernel;
class NEGEMMLowpMatrixAReductionKernel;
class NEGEMMLowpMatrixBReductionKernel;
class NEGEMMTranspose1xWKernel;
-class NEGEMMAssemblyDispatch;
+namespace cpu
+{
+class CpuGemmAssemblyDispatch;
+}
/** Basic function to execute GEMMLowpMatrixMultiplyCore. This function calls the following kernels if the DOT product instruction is not available:
*
@@ -135,7 +138,7 @@ public:
private:
MemoryGroup _memory_group;
IWeightsManager *_weights_manager;
- std::unique_ptr<NEGEMMAssemblyDispatch> _asm_glue;
+ std::unique_ptr<cpu::CpuGemmAssemblyDispatch> _asm_glue;
std::unique_ptr<NEGEMMLowpMatrixMultiplyKernel> _mm_kernel;
std::unique_ptr<NEGEMMInterleave4x4Kernel> _mtx_a_reshape_kernel;
std::unique_ptr<NEGEMMTranspose1xWKernel> _mtx_b_reshape_kernel;
diff --git a/arm_compute/runtime/NEON/functions/NEWinogradConvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEWinogradConvolutionLayer.h
index 77f9093ed4..f9ebf608cb 100644
--- a/arm_compute/runtime/NEON/functions/NEWinogradConvolutionLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEWinogradConvolutionLayer.h
@@ -47,7 +47,7 @@ class ICPPKernel;
* -# @ref NEWinogradLayerTransformWeightsKernel (executed only once in the first call to the run() method )
* -# @ref NEWinogradLayerTransformInputKernel
* -# @ref NEWinogradLayerTransformOutputKernel
- * -# @ref NEGEMMAssemblyDispatch
+ * -# @ref cpu::CpuGemmAssemblyDispatch
* -# @ref CPPPermute (three times: weights, input and output)
*
* @note Some Winograd configurations (i.e. F(2x2, 5x5), F(4x4, 5x5)) are supported only with enable_fast_math = true