aboutsummaryrefslogtreecommitdiff
path: root/arm_compute
diff options
context:
space:
mode:
authorGeorgios Pinitas <georgios.pinitas@arm.com>2020-11-02 01:37:17 +0000
committerGeorgios Pinitas <georgios.pinitas@arm.com>2020-11-12 15:59:25 +0000
commitc0b6f76561580414f08633a804fc548ccad65659 (patch)
tree4d46b7f479de04f799e29095392948aeb370c029 /arm_compute
parent824061d9910ebb42cbe46b677c0b843db212c9a2 (diff)
downloadComputeLibrary-c0b6f76561580414f08633a804fc548ccad65659.tar.gz
COMPMID-3776: Indirect GEMM
Signed-off-by: Georgios Pinitas <georgios.pinitas@arm.com> Change-Id: I51a1b0f098bc3a8c408c50c92221e4df3061e12c Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/4343 Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Sang-Hoon Park <sang-hoon.park@arm.com> Reviewed-by: Michele Di Giorgio <michele.digiorgio@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'arm_compute')
-rw-r--r--arm_compute/core/Types.h9
-rw-r--r--arm_compute/runtime/FunctionDescriptors.h24
-rw-r--r--arm_compute/runtime/NEON/NEFunctions.h2
-rw-r--r--arm_compute/runtime/NEON/functions/NEConvolutionLayer.h9
-rw-r--r--arm_compute/runtime/NEON/functions/NEGEMMAssemblyDispatch.h58
-rw-r--r--arm_compute/runtime/NEON/functions/NEGEMMConv2d.h108
-rw-r--r--arm_compute/runtime/NEON/functions/NEGEMMLowpAssemblyMatrixMultiplyCore.h74
7 files changed, 182 insertions, 102 deletions
diff --git a/arm_compute/core/Types.h b/arm_compute/core/Types.h
index 306bdc6706..2e639c4be4 100644
--- a/arm_compute/core/Types.h
+++ b/arm_compute/core/Types.h
@@ -137,10 +137,11 @@ enum class DataLayoutDimension
/** Available ConvolutionMethod*/
enum class ConvolutionMethod
{
- GEMM, /**< Convolution using GEMM */
- DIRECT, /**< Direct convolution */
- WINOGRAD, /**< Convolution using Winograd */
- FFT /**< Convolution using FFT */
+ GEMM, /**< Convolution using GEMM */
+ GEMM_CONV2D, /**< Direct 2D GEMM convolution */
+ DIRECT, /**< Direct convolution */
+ WINOGRAD, /**< Convolution using Winograd */
+ FFT /**< Convolution using FFT */
};
/** Available DepthwiseConvolutionFunction*/
diff --git a/arm_compute/runtime/FunctionDescriptors.h b/arm_compute/runtime/FunctionDescriptors.h
index 16d6c345e2..1f4216eb21 100644
--- a/arm_compute/runtime/FunctionDescriptors.h
+++ b/arm_compute/runtime/FunctionDescriptors.h
@@ -23,6 +23,9 @@
*/
#ifndef ARM_COMPUTE_RUNTIME_FUNCTION_DESCRIPTORS_H
#define ARM_COMPUTE_RUNTIME_FUNCTION_DESCRIPTORS_H
+
+#include "arm_compute/core/Types.h"
+
#include <utility>
namespace arm_compute
@@ -48,5 +51,26 @@ struct FFT2DInfo
unsigned int axis1{ 1 }; /**< Axis to run second pass on. If same, multiple transforms are performed on single axis*/
FFTDirection direction{ FFTDirection::Forward }; /**< Direction of the FFT. */
};
+
+/** Descriptor used by the Convolution function */
+struct Conv2dInfo
+{
+ Conv2dInfo() = default;
+
+ Conv2dInfo(const PadStrideInfo &conv_info,
+ const Size2D &dilation,
+ const ActivationLayerInfo &act_info,
+ bool enable_fast_math,
+ unsigned int num_groups)
+ : conv_info(conv_info), dilation(dilation), act_info(act_info), enable_fast_math(enable_fast_math), num_groups(num_groups)
+ {
+ }
+
+ PadStrideInfo conv_info{};
+ Size2D dilation{ 1U, 1U };
+ ActivationLayerInfo act_info{};
+ bool enable_fast_math{ false };
+ unsigned int num_groups{ 1 };
+};
} // namespace arm_compute
#endif /* ARM_COMPUTE_RUNTIME_FUNCTION_DESCRIPTORS_H */
diff --git a/arm_compute/runtime/NEON/NEFunctions.h b/arm_compute/runtime/NEON/NEFunctions.h
index a97fa3b81a..e7d59e1608 100644
--- a/arm_compute/runtime/NEON/NEFunctions.h
+++ b/arm_compute/runtime/NEON/NEFunctions.h
@@ -78,9 +78,9 @@
#include "arm_compute/runtime/NEON/functions/NEFuseBatchNormalization.h"
#include "arm_compute/runtime/NEON/functions/NEGEMM.h"
#include "arm_compute/runtime/NEON/functions/NEGEMMAssemblyDispatch.h"
+#include "arm_compute/runtime/NEON/functions/NEGEMMConv2d.h"
#include "arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h"
#include "arm_compute/runtime/NEON/functions/NEGEMMInterleave4x4.h"
-#include "arm_compute/runtime/NEON/functions/NEGEMMLowpAssemblyMatrixMultiplyCore.h"
#include "arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h"
#include "arm_compute/runtime/NEON/functions/NEGEMMLowpOutputStage.h"
#include "arm_compute/runtime/NEON/functions/NEGEMMTranspose1xW.h"
diff --git a/arm_compute/runtime/NEON/functions/NEConvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEConvolutionLayer.h
index 54dae57752..a061dc7b04 100644
--- a/arm_compute/runtime/NEON/functions/NEConvolutionLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEConvolutionLayer.h
@@ -26,16 +26,15 @@
#include "arm_compute/runtime/IFunction.h"
+#include "arm_compute/core/ITensorInfo.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/MemoryGroup.h"
-#include "arm_compute/runtime/NEON/functions/NEDirectConvolutionLayer.h"
-#include "arm_compute/runtime/NEON/functions/NEFFTConvolutionLayer.h"
-#include "arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h"
-#include "arm_compute/runtime/NEON/functions/NEWinogradConvolutionLayer.h"
+
#include <memory>
namespace arm_compute
{
+// Forward declarations
class ITensor;
/** Basic function to simulate a convolution layer. This function calls one of the following NEON functions:
@@ -158,5 +157,5 @@ private:
std::shared_ptr<IMemoryManager> _memory_manager;
std::unique_ptr<IFunction> _function; /**< Function to run */
};
-}
+} // namespace arm_compute
#endif /* ARM_COMPUTE_NECONVOLUTIONLAYER_H */ \ No newline at end of file
diff --git a/arm_compute/runtime/NEON/functions/NEGEMMAssemblyDispatch.h b/arm_compute/runtime/NEON/functions/NEGEMMAssemblyDispatch.h
index ac77acf69d..8f9498d0f5 100644
--- a/arm_compute/runtime/NEON/functions/NEGEMMAssemblyDispatch.h
+++ b/arm_compute/runtime/NEON/functions/NEGEMMAssemblyDispatch.h
@@ -32,6 +32,28 @@
namespace arm_compute
{
+/* Convolution method supported by the assembly gemm interface */
+enum class AsmConvMethod
+{
+ Im2Col,
+ Indirect,
+ Conv
+};
+
+struct AsmGemmInfo
+{
+ AsmConvMethod method{ AsmConvMethod::Im2Col };
+ PadStrideInfo ps_info{};
+ ActivationLayerInfo activation_info{};
+ GEMMLowpOutputStageInfo output_stage{};
+ bool negated_offsets{ true };
+ bool reinterpret_input_as_3d{ false };
+ bool depth_output_gemm3d{ false };
+ int64_t padding_top{ 0 };
+ int64_t padding_left{ 0 };
+ float padding_value{ 0.f };
+};
+
/** Assembly kernel glue */
class NEGEMMAssemblyDispatch : public IFunction
{
@@ -55,33 +77,28 @@ public:
virtual ~IFallback() = default;
};
-private:
- /** Interface for the arm_gemm fallback */
- std::unique_ptr<IFallback> _arm_gemm;
- MemoryGroup _memory_group; /**< Function memory group */
- IWeightsManager *_weights_manager; /**< Pointer to the weights manager */
public:
/** If supported create a Compute Library function else fallback to the arm_gemm function.
*
- * @param[in] a Input tensor (Matrix A)
- * @param[in] b Input tensor (Matrix B)
- * @param[in] c Input tensor (Matrix C) used to pass the bias for quantized calculations
- * @param[out] d Output tensor to store the result of matrix multiplication. Data type supported: same as @p input0.
- * @param[in] gemm_info GEMM meta-data
+ * @param[in] a Input tensor (Matrix A)
+ * @param[in] b Input tensor (Matrix B)
+ * @param[in] c Input tensor (Matrix C) used to pass the bias for quantized calculations
+ * @param[out] d Output tensor to store the result of matrix multiplication. Data type supported: same as @p input0.
+ * @param[in] info GEMM meta-data
*/
- void configure(const ITensor *a, const ITensor *b, const ITensor *c, ITensor *d, const GEMMInfo &gemm_info);
+ void configure(const ITensor *a, const ITensor *b, const ITensor *c, ITensor *d, const AsmGemmInfo &info);
/** Indicates whether or not this function can be used to process the given parameters.
*
- * @param[in] a Input tensor info (Matrix A)
- * @param[in] b Input tensor info (Matrix B)
- * @param[in] c Input tensor info (Matrix C) used to pass the bias for quantized calculations
- * @param[in] d Output tensor to store the result of matrix multiplication. Data type supported: same as @p input0.
- * @param[in] gemm_info GEMM meta-data
+ * @param[in] a Input tensor info (Matrix A)
+ * @param[in] b Input tensor info (Matrix B)
+ * @param[in] c Input tensor info (Matrix C) used to pass the bias for quantized calculations
+ * @param[in] d Output tensor to store the result of matrix multiplication. Data type supported: same as @p input0.
+ * @param[in] info GEMM meta-data
*
* @return a status.
*/
- static Status validate(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *d, const GEMMInfo &gemm_info);
+ static Status validate(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *d, const AsmGemmInfo &info);
/** Checks if activation is supported by the gemm assembly dispatcher
*
* @param[in] activation Activation to check
@@ -94,10 +111,15 @@ public:
* @return True if the function is configured and ready to run
*/
bool is_configured() const;
+
// Inherited methods overridden:
- /** Runs a preparation step, usually for pre-transposing matrix b */
void prepare() override;
void run() override;
+
+private:
+ std::unique_ptr<IFallback> _arm_gemm; /** Interface for the arm_gemm fallback */
+ MemoryGroup _memory_group; /**< Function memory group */
+ IWeightsManager *_weights_manager; /**< Pointer to the weights manager */
};
} // namespace arm_compute
#endif /* ARM_COMPUTE_NEGEMMASSEMBLYDISPATCH_H */
diff --git a/arm_compute/runtime/NEON/functions/NEGEMMConv2d.h b/arm_compute/runtime/NEON/functions/NEGEMMConv2d.h
new file mode 100644
index 0000000000..7cae39397f
--- /dev/null
+++ b/arm_compute/runtime/NEON/functions/NEGEMMConv2d.h
@@ -0,0 +1,108 @@
+/*
+ * Copyright (c) 2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_NEGEMMCONV2D_H
+#define ARM_COMPUTE_NEGEMMCONV2D_H
+
+#include "arm_compute/runtime/FunctionDescriptors.h"
+#include "arm_compute/runtime/IFunction.h"
+#include "arm_compute/runtime/IMemoryManager.h"
+#include "arm_compute/runtime/NEON/functions/NEActivationLayer.h"
+#include "arm_compute/runtime/NEON/functions/NEGEMMAssemblyDispatch.h"
+#include "arm_compute/runtime/NEON/functions/NEPermute.h"
+#include "arm_compute/runtime/Tensor.h"
+
+#include <memory>
+namespace arm_compute
+{
+// Forward declarations
+class ITensor;
+/** Basic function to compute the convolution layer. This function calls the following NEON kernels/functions:
+ *
+ * Supports only NHWC data layout
+ *
+ * -# @ref NEGEMMAssemblyDispatch
+ * -# @ref NEActivationLayer, in case activation cannot be fused in the assembly dispatch
+ *
+ * Weights are transformed from OHWI to HWIO format using the following kernels:
+ * -# @ref NEPermute
+ */
+class NEGEMMConv2d : public IFunction
+{
+public:
+ /** Constructor */
+ NEGEMMConv2d(const std::shared_ptr<IMemoryManager> &memory_manager = nullptr);
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEGEMMConv2d(const NEGEMMConv2d &) = delete;
+ /** Default move constructor */
+ NEGEMMConv2d(NEGEMMConv2d &&) = default;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEGEMMConv2d &operator=(const NEGEMMConv2d &) = delete;
+ /** Default move assignment operator */
+ NEGEMMConv2d &operator=(NEGEMMConv2d &&) = default;
+ /** Set the input and output tensors.
+ *
+ * @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM],
+ * while every optional dimension from 4 and above represent a batch of inputs.
+ * Data types supported: QASYMM8/QASYMM8_SIGNED/BFLOAT16/F16/F32.
+ * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM].
+ * Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/BFLOAT16/F16/F32.
+ * @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM].
+ * Data type supported: Should match @p input data type, except for input of QASYMM8/QASYMM8_SIGNED type where biases should be of S32 type.
+ * @param[out] output Destination tensor. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs.
+ * Data types supported: Same as @p input.
+ * @param[in] info Convolution layer descriptor
+ */
+ void configure(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const Conv2dInfo &info);
+ /** Static function to check if given info will lead to a valid configuration of @ref NEGEMMConv2d
+ *
+ * @param[in] input Source tensor info. 3 lower dimensions represent a single input [width, height, IFM],
+ * while every optional dimension from 4 and above represent a batch of inputs.
+ * Data types supported: QASYMM8/QASYMM8_SIGNED/BFLOAT16/F16/F32.
+ * @param[in] weights Weights tensor info. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM].
+ * Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/BFLOAT16/F16/F32.
+ * @param[in] biases Biases tensor info. Shared biases supported. Biases are 1D tensor with dimensions [OFM].
+ * Data type supported: Should match @p input data type, except for input of QASYMM8/QASYMM8_SIGNED type where biases should be of S32 type.
+ * @param[in] output Destination tensor info. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs.
+ * Data types supported: Same as @p input.
+ * @param[in] info Contains padding and stride information described in @ref PadStrideInfo.
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const Conv2dInfo &info);
+
+ // Inherited methods overridden:
+ void run() override;
+ void prepare() override;
+
+private:
+ NEGEMMAssemblyDispatch _gemm_asm_func;
+ NEActivationLayer _activation_func;
+ NEPermute _weights_permute_func;
+ const ITensor *_original_weights;
+ Tensor _permuted_weights;
+ bool _is_prepared;
+ bool _run_activation;
+};
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_NEGEMMCONV2D_H */
diff --git a/arm_compute/runtime/NEON/functions/NEGEMMLowpAssemblyMatrixMultiplyCore.h b/arm_compute/runtime/NEON/functions/NEGEMMLowpAssemblyMatrixMultiplyCore.h
deleted file mode 100644
index 961b1901e7..0000000000
--- a/arm_compute/runtime/NEON/functions/NEGEMMLowpAssemblyMatrixMultiplyCore.h
+++ /dev/null
@@ -1,74 +0,0 @@
-/*
- * Copyright (c) 2017-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEGEMMLOWPASSEMBLYMATRIXMULTIPLYCORE_H
-#define ARM_COMPUTE_NEGEMMLOWPASSEMBLYMATRIXMULTIPLYCORE_H
-
-#include "arm_compute/runtime/IFunction.h"
-#include "arm_compute/runtime/IMemoryManager.h"
-#include "arm_compute/runtime/MemoryGroup.h"
-#include "arm_compute/runtime/NEON/functions/NEGEMMAssemblyDispatch.h"
-#include "arm_compute/runtime/Tensor.h"
-
-#include <memory>
-
-namespace arm_compute
-{
-// Forward declarations
-class ITensor;
-class NEGEMMInterleave4x4Kernel;
-class NEGEMMTranspose1xWKernel;
-class NEGEMMLowpMatrixMultiplyKernel;
-
-/** Basic function to execute matrix multiply assembly kernels. */
-class NEGEMMLowpAssemblyMatrixMultiplyCore : public IFunction
-{
-public:
- /** Constructor */
- NEGEMMLowpAssemblyMatrixMultiplyCore(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
- /** Destructor */
- ~NEGEMMLowpAssemblyMatrixMultiplyCore();
-
- /** Initialise the kernel's inputs, output
- *
- * @param[in] a First input tensor (Matrix A). Data type supported: U8, S8.
- * @param[in] b Second input tensor (Matrix B). Data type supported: same as @p a
- * @param[in] c Third input tensor (Matrix C). Data type supported: same as @p a
- * @param[out] output Output tensor. Data type supported: Data type supported: U32, S32
- */
- void configure(const ITensor *a, const ITensor *b, const ITensor *c, ITensor *output);
-
- // Inherited methods overridden:
- void run() override;
-
-private:
- MemoryGroup _memory_group;
- NEGEMMAssemblyDispatch _asm_glue;
- std::unique_ptr<NEGEMMLowpMatrixMultiplyKernel> _mm_kernel;
- std::unique_ptr<NEGEMMInterleave4x4Kernel> _mtx_a_reshape_kernel;
- std::unique_ptr<NEGEMMTranspose1xWKernel> _mtx_b_reshape_kernel;
- Tensor _tmp_a;
- Tensor _tmp_b;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NEGEMMLOWPASSEMBLYMATRIXMULTIPLYCORE_H */