aboutsummaryrefslogtreecommitdiff
path: root/arm_compute/core/NEON/kernels
diff options
context:
space:
mode:
authorAnthony Barbier <anthony.barbier@arm.com>2018-07-03 16:22:02 +0100
committerAnthony Barbier <anthony.barbier@arm.com>2018-11-02 16:54:10 +0000
commit5f707736413aeac77818c42838296966f8dc6761 (patch)
treeb829ed3243ea5f3085f288836132416c78bc2e72 /arm_compute/core/NEON/kernels
parent7485d5a62685cb745ab50e970adb722cb71557ac (diff)
downloadComputeLibrary-5f707736413aeac77818c42838296966f8dc6761.tar.gz
COMPMID-1369: Revert accidental formatting of RSH's repo
Pulled latest fixes from David's repo: commit f43ebe932c84083332b0b1a0348241b69dda63a7 Author: David Mansell <David.Mansell@arm.com> Date: Tue Jul 3 18:09:01 2018 +0100 Whitespace tidying, fixed comment in gemv_batched imported from ACL. Change-Id: Ie37a623f44e90d88072236cb853ac55ac82d5f51 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/138530 Tested-by: Jenkins <bsgcomp@arm.com> Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com> Reviewed-by: Gian Marco Iodice <gianmarco.iodice@arm.com> Reviewed-by: David Mansell <david.mansell@arm.com> Reviewed-by: Anthony Barbier <anthony.barbier@arm.com>
Diffstat (limited to 'arm_compute/core/NEON/kernels')
-rw-r--r--arm_compute/core/NEON/kernels/assembly/arm_gemm.hpp16
-rw-r--r--arm_compute/core/NEON/kernels/assembly/gemm_common.hpp5
2 files changed, 19 insertions, 2 deletions
diff --git a/arm_compute/core/NEON/kernels/assembly/arm_gemm.hpp b/arm_compute/core/NEON/kernels/assembly/arm_gemm.hpp
index 0a541c6db9..8d1433dd24 100644
--- a/arm_compute/core/NEON/kernels/assembly/arm_gemm.hpp
+++ b/arm_compute/core/NEON/kernels/assembly/arm_gemm.hpp
@@ -33,10 +33,26 @@ namespace arm_gemm {
template<typename Top, typename Tret>
using UniqueGemmCommon = std::unique_ptr<GemmCommon<Top, Tret> >;
+/** Request an object to process a GEMM.
+ *
+ * @param[in] ci Describes CPU properties.
+ * @param[in] M Rows in output matrix C (and input matrix A).
+ * @param[in] N Columns in output matrix C (and input matrix B).
+ * @param[in] K Columns of input matrix A (= rows of input matrix B).
+ * @param[in] nbatches Number of "batched" GEMMs (unique A and C, shared B).
+ * @param[in] nmulti Number of "multi" GEMMs (unique A, B and C).
+ * @param[in] trA Does A tensor has rows and columns transposed?
+ * @param[in] trB Does B tensor has rows and columns transposed?
+ * @param[in] alpha Scalar multiplier to apply to AB matrix product.
+ * @param[in] beta Scalar multiplier to apply to input C matrix before adding product.
+ * @param[in] maxthreads Maximum (and default) number of threads that will call execute method.
+ * @param[in] pretransposed_hint Can the B tensor can be pretransposed (ie shared across invocations)?
+ */
template<typename Top, typename Tret>
UniqueGemmCommon<Top, Tret> gemm(const CPUInfo &ci,
const unsigned int M, const unsigned int N, const unsigned int K,
const unsigned int nbatches, const unsigned int nmulti,
const bool trA, const bool trB, const Tret alpha, const Tret beta,
const int maxthreads, const bool pretransposed_hint);
+
} // namespace arm_gemm
diff --git a/arm_compute/core/NEON/kernels/assembly/gemm_common.hpp b/arm_compute/core/NEON/kernels/assembly/gemm_common.hpp
index 3919c339bf..b43d6eaca6 100644
--- a/arm_compute/core/NEON/kernels/assembly/gemm_common.hpp
+++ b/arm_compute/core/NEON/kernels/assembly/gemm_common.hpp
@@ -53,10 +53,11 @@ public:
/* Pass in the pointers to the arrays to be operated on and their
* strides. This has a default implementation that just captures them
* all in protected members. If B is pretransposed (see below) then the
- * settings for B here are ignored. */
+ * settings for B here are ignored.
+ */
virtual void set_arrays(const To *A, const int lda, const int A_batch_stride, const int A_multi_stride,
const To *B, const int ldb, /* batches share B */ const int B_multi_stride,
- Tr *C, const int ldc, const int C_batch_stride, const int C_multi_stride) {
+ Tr *C, const int ldc, const int C_batch_stride, const int C_multi_stride) {
_Aptr = A;
_lda = lda;
_A_batch_stride = A_batch_stride;