aboutsummaryrefslogtreecommitdiff
path: root/arm_compute
diff options
context:
space:
mode:
authorGeorgios Pinitas <georgios.pinitas@arm.com>2017-12-14 17:53:39 +0000
committerAnthony Barbier <anthony.barbier@arm.com>2018-11-02 16:42:33 +0000
commit08c5a06e2b49df0d7912deedd6d26d2c603cfe58 (patch)
tree0742d970b5dff2354f917063ad11980af97dee93 /arm_compute
parent941cd706bea1847ae89e4ee13f144fc51050ad1f (diff)
downloadComputeLibrary-08c5a06e2b49df0d7912deedd6d26d2c603cfe58.tar.gz
COMPMID-750: Fix assembly kernel interfaces
Assembly kernel interfaces were wrongly translating the layout of the input matrices. Boolean flags transform0 and transform1 do not match the actual interface of the gemm assembly code which expects transpose0 and transposed1. Change-Id: Ia4df65a533834647fa63e78e8c897924793949df Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/113410 Tested-by: BSG Visual Compute Jenkins server to access repositories on http://mpd-gerrit.cambridge.arm.com <bsgcomp@arm.com> Reviewed-by: Pablo Tello <pablo.tello@arm.com>
Diffstat (limited to 'arm_compute')
-rw-r--r--arm_compute/core/NEON/kernels/NEGEMMAssemblyBaseKernel.h28
-rw-r--r--arm_compute/core/NEON/kernels/arm32/NEGEMMAArch32Kernel.h2
-rw-r--r--arm_compute/core/NEON/kernels/arm64/NEGEMMAArch64Kernel.h2
-rw-r--r--arm_compute/core/NEON/kernels/arm64/NEGEMMLowpAArch64A53Kernel.h5
-rw-r--r--arm_compute/core/NEON/kernels/arm64/NEGEMMLowpAArch64Kernel.h5
-rw-r--r--arm_compute/core/NEON/kernels/arm64/NEGEMMLowpAArch64V8P4Kernel.h2
-rw-r--r--arm_compute/core/NEON/kernels/arm64/NEHGEMMAArch64FP16Kernel.h2
7 files changed, 24 insertions, 22 deletions
diff --git a/arm_compute/core/NEON/kernels/NEGEMMAssemblyBaseKernel.h b/arm_compute/core/NEON/kernels/NEGEMMAssemblyBaseKernel.h
index 9e0fe8059b..1090dd5b0a 100644
--- a/arm_compute/core/NEON/kernels/NEGEMMAssemblyBaseKernel.h
+++ b/arm_compute/core/NEON/kernels/NEGEMMAssemblyBaseKernel.h
@@ -36,7 +36,7 @@ class NEGEMMAssemblyBaseKernel : public INEKernel
public:
/** Constructor */
NEGEMMAssemblyBaseKernel()
- : _input0(nullptr), _input1(nullptr), _output(nullptr), _workspace(nullptr), _alpha(1.f), _beta(0.f), _transform_0(true), _transform_1(true)
+ : _input0(nullptr), _input1(nullptr), _output(nullptr), _workspace(nullptr), _alpha(1.f), _beta(0.f), _is_transposed_0(false), _is_transposed_1(false)
{
}
@@ -55,22 +55,22 @@ public:
*
* The computed function is C = a * AxB + b * C.
*
- * @param[in] input0 Input tensor containing the Matrix A. Data types supported: F32
- * @param[in] input1 Input tensor containing the Matrix B. Data types supported: same as @p input0
- * @param[in,out] output Output tensor to store the result of matrix multiplication. If @p beta is not zero the values are multiplied by @p beta before the result is accumulated. Otherwise the values are overwritten by the result. Data types supported: same as @p input0.
- * @param[out] workspace Space for intermediate results.
- * @param[in] alpha Weight of the matrix product
- * @param[in] beta Weight of the accumulation.
- * @param[in] transform_0 If true the kernel will transform @p input0 prior to the multiplication.
- * @param[in] transform_1 If true the kernel will transform @p input1 prior to the multiplication.
+ * @param[in] input0 Input tensor containing the Matrix A. Data types supported: F32
+ * @param[in] input1 Input tensor containing the Matrix B. Data types supported: same as @p input0
+ * @param[in,out] output Output tensor to store the result of matrix multiplication. If @p beta is not zero the values are multiplied by @p beta before the result is accumulated. Otherwise the values are overwritten by the result. Data types supported: same as @p input0.
+ * @param[out] workspace Space for intermediate results.
+ * @param[in] alpha Weight of the matrix product
+ * @param[in] beta Weight of the accumulation.
+ * @param[in] is_transposed_0 (Optional)True if @p input0 is transposed else false. (Defaults to false)
+ * @param[in] is_transposed_1 (Optional)True if @p input1 is transposed else false. (Defaults to false)
*/
- void configure(const ITensor *input0, const ITensor *input1, ITensor *output, ITensor *workspace, float alpha = 1.f, float beta = 0.f, bool transform_0 = true, bool transform_1 = true)
+ void configure(const ITensor *input0, const ITensor *input1, ITensor *output, ITensor *workspace, float alpha = 1.f, float beta = 0.f, bool is_transposed_0 = false, bool is_transposed_1 = false)
{
- internal_configure(input0, input1, output, workspace, alpha, beta, transform_0, transform_1);
+ internal_configure(input0, input1, output, workspace, alpha, beta, is_transposed_0, is_transposed_1);
}
protected:
- virtual void internal_configure(const ITensor *input0, const ITensor *input1, ITensor *output, ITensor *workspace, float alpha, float beta, bool transform_0, bool transform_1) = 0;
+ virtual void internal_configure(const ITensor *input0, const ITensor *input1, ITensor *output, ITensor *workspace, float alpha, float beta, bool _is_transposed_0, bool _is_transposed_1) = 0;
const ITensor *_input0;
const ITensor *_input1;
@@ -78,8 +78,8 @@ protected:
ITensor *_workspace;
float _alpha;
float _beta;
- bool _transform_0;
- bool _transform_1;
+ bool _is_transposed_0;
+ bool _is_transposed_1;
};
} // namespace arm_compute
#endif /*__ARM_COMPUTE_NEGEMMASSEMBLYBASE_H__*/
diff --git a/arm_compute/core/NEON/kernels/arm32/NEGEMMAArch32Kernel.h b/arm_compute/core/NEON/kernels/arm32/NEGEMMAArch32Kernel.h
index 597acca439..7564f6a0e1 100644
--- a/arm_compute/core/NEON/kernels/arm32/NEGEMMAArch32Kernel.h
+++ b/arm_compute/core/NEON/kernels/arm32/NEGEMMAArch32Kernel.h
@@ -38,7 +38,7 @@ public:
void run(const Window &window, const ThreadInfo &info) override;
protected:
- void internal_configure(const ITensor *input0, const ITensor *input1, ITensor *output, ITensor *workspace, float alpha, float beta, bool transform_0, bool transform_1) override;
+ void internal_configure(const ITensor *input0, const ITensor *input1, ITensor *output, ITensor *workspace, float alpha, float beta, bool is_transposed_0, bool is_transposed_1) override;
};
} // namespace arm_compute
#endif /*__ARM_COMPUTE_NEGEMMAARCH32KERNEL_H__*/
diff --git a/arm_compute/core/NEON/kernels/arm64/NEGEMMAArch64Kernel.h b/arm_compute/core/NEON/kernels/arm64/NEGEMMAArch64Kernel.h
index 77431d2bc8..5c29a825c2 100644
--- a/arm_compute/core/NEON/kernels/arm64/NEGEMMAArch64Kernel.h
+++ b/arm_compute/core/NEON/kernels/arm64/NEGEMMAArch64Kernel.h
@@ -38,7 +38,7 @@ public:
void run(const Window &window, const ThreadInfo &info) override;
protected:
- void internal_configure(const ITensor *input0, const ITensor *input1, ITensor *output, ITensor *workspace, float alpha, float beta, bool transform_0, bool transform_1) override;
+ void internal_configure(const ITensor *input0, const ITensor *input1, ITensor *output, ITensor *workspace, float alpha, float beta, bool is_transposed_0, bool is_transposed_1) override;
};
} // namespace arm_compute
#endif /*__ARM_COMPUTE_NEGEMMAARCH64KERNEL_H__*/
diff --git a/arm_compute/core/NEON/kernels/arm64/NEGEMMLowpAArch64A53Kernel.h b/arm_compute/core/NEON/kernels/arm64/NEGEMMLowpAArch64A53Kernel.h
index 33cd2d42d0..8e9783720e 100644
--- a/arm_compute/core/NEON/kernels/arm64/NEGEMMLowpAArch64A53Kernel.h
+++ b/arm_compute/core/NEON/kernels/arm64/NEGEMMLowpAArch64A53Kernel.h
@@ -43,10 +43,11 @@ public:
void run(const Window &window, const ThreadInfo &info) override;
protected:
- void internal_configure(const ITensor *input0, const ITensor *input1, ITensor *output, ITensor *workspace, float alpha, float beta, bool transform_0, bool transform_1) override;
+ void internal_configure(const ITensor *input0, const ITensor *input1, ITensor *output, ITensor *workspace, float alpha, float beta, bool is_transposed_0, bool is_transposed_1) override;
private:
- using NEGEMMLowpAArch64A53 = void(const ITensor *input0, const ITensor *input1, ITensor *output, ITensor *workspace, float alpha, float beta, bool transform_0, bool transform_1, const Window &window,
+ using NEGEMMLowpAArch64A53 = void(const ITensor *input0, const ITensor *input1, ITensor *output, ITensor *workspace, float alpha, float beta, bool is_transposed_0, bool is_transposed_1,
+ const Window &window,
const ThreadInfo &info);
NEGEMMLowpAArch64A53 *_func;
};
diff --git a/arm_compute/core/NEON/kernels/arm64/NEGEMMLowpAArch64Kernel.h b/arm_compute/core/NEON/kernels/arm64/NEGEMMLowpAArch64Kernel.h
index a93df033de..3829d5e1d6 100644
--- a/arm_compute/core/NEON/kernels/arm64/NEGEMMLowpAArch64Kernel.h
+++ b/arm_compute/core/NEON/kernels/arm64/NEGEMMLowpAArch64Kernel.h
@@ -44,10 +44,11 @@ public:
void run(const Window &window, const ThreadInfo &info) override;
protected:
- void internal_configure(const ITensor *input0, const ITensor *input1, ITensor *output, ITensor *workspace, float alpha, float beta, bool transform_0, bool transform_1) override;
+ void internal_configure(const ITensor *input0, const ITensor *input1, ITensor *output, ITensor *workspace, float alpha, float beta, bool is_transposed_0, bool is_transposed_1) override;
private:
- using NEGEMMLowpAArch64 = void(const ITensor *input0, const ITensor *input1, ITensor *output, ITensor *workspace, float alpha, float beta, bool transform_0, bool transform_1, const Window &window,
+ using NEGEMMLowpAArch64 = void(const ITensor *input0, const ITensor *input1, ITensor *output, ITensor *workspace, float alpha, float beta, bool is_transposed_0,
+ bool is_transposed_1, const Window &window,
const ThreadInfo &info);
NEGEMMLowpAArch64 *_func;
};
diff --git a/arm_compute/core/NEON/kernels/arm64/NEGEMMLowpAArch64V8P4Kernel.h b/arm_compute/core/NEON/kernels/arm64/NEGEMMLowpAArch64V8P4Kernel.h
index b03e5fa1a2..b94499392e 100644
--- a/arm_compute/core/NEON/kernels/arm64/NEGEMMLowpAArch64V8P4Kernel.h
+++ b/arm_compute/core/NEON/kernels/arm64/NEGEMMLowpAArch64V8P4Kernel.h
@@ -51,7 +51,7 @@ public:
static Status validate(const ITensorInfo *input0, const ITensorInfo *input1, const ITensorInfo *output);
protected:
- void internal_configure(const ITensor *input0, const ITensor *input1, ITensor *output, ITensor *workspace, float alpha, float beta, bool transform_0, bool transform_1) override;
+ void internal_configure(const ITensor *input0, const ITensor *input1, ITensor *output, ITensor *workspace, float alpha, float beta, bool is_transposed_0, bool is_transposed_1) override;
};
} // namespace arm_compute
#endif /* ARM_COMPUTE_AARCH64_V8_2 */
diff --git a/arm_compute/core/NEON/kernels/arm64/NEHGEMMAArch64FP16Kernel.h b/arm_compute/core/NEON/kernels/arm64/NEHGEMMAArch64FP16Kernel.h
index 9480a6a5d0..5671d99b85 100644
--- a/arm_compute/core/NEON/kernels/arm64/NEHGEMMAArch64FP16Kernel.h
+++ b/arm_compute/core/NEON/kernels/arm64/NEHGEMMAArch64FP16Kernel.h
@@ -38,7 +38,7 @@ public:
void run(const Window &window, const ThreadInfo &info) override;
protected:
- void internal_configure(const ITensor *input0, const ITensor *input1, ITensor *output, ITensor *workspace, float alpha, float beta, bool transform_0, bool transform_1) override;
+ void internal_configure(const ITensor *input0, const ITensor *input1, ITensor *output, ITensor *workspace, float alpha, float beta, bool is_transposed_0, bool is_transposed_1) override;
};
} // namespace arm_compute
#endif /*__ARM_COMPUTE_NEHGEMMAARCH64FP16KERNEL_H__*/