aboutsummaryrefslogtreecommitdiff
path: root/arm_compute/core/CL
diff options
context:
space:
mode:
Diffstat (limited to 'arm_compute/core/CL')
-rw-r--r--arm_compute/core/CL/CLKernels.h1
-rw-r--r--arm_compute/core/CL/gemm/native/CLGEMMNativeKernelConfiguration.h4
-rw-r--r--arm_compute/core/CL/gemm/native/CLGEMMNativeKernelConfigurationBifrost.h8
-rw-r--r--arm_compute/core/CL/gemm/native/CLGEMMNativeKernelConfigurationMidgard.h51
-rw-r--r--arm_compute/core/CL/gemm/native/CLGEMMNativeKernelConfigurationValhall.h8
-rw-r--r--arm_compute/core/CL/gemm/reshaped/CLGEMMReshapedKernelConfiguration.h2
-rw-r--r--arm_compute/core/CL/gemm/reshaped/CLGEMMReshapedKernelConfigurationBifrost.h8
-rw-r--r--arm_compute/core/CL/gemm/reshaped/CLGEMMReshapedKernelConfigurationValhall.h8
-rw-r--r--arm_compute/core/CL/gemm/reshaped_only_rhs/CLGEMMReshapedOnlyRHSKernelConfiguration.h2
-rw-r--r--arm_compute/core/CL/gemm/reshaped_only_rhs/CLGEMMReshapedOnlyRHSKernelConfigurationBifrost.h8
-rw-r--r--arm_compute/core/CL/gemm/reshaped_only_rhs/CLGEMMReshapedOnlyRHSKernelConfigurationValhall.h8
-rw-r--r--arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyKernel.h101
-rw-r--r--arm_compute/core/CL/kernels/CLGEMMLowpOffsetContributionKernel.h8
-rw-r--r--arm_compute/core/CL/kernels/CLGEMMLowpOffsetContributionOutputStageKernel.h8
-rw-r--r--arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel.h2
-rw-r--r--arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleKernel.h2
-rw-r--r--arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel.h2
-rw-r--r--arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel.h2
-rw-r--r--arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.h2
19 files changed, 66 insertions, 169 deletions
diff --git a/arm_compute/core/CL/CLKernels.h b/arm_compute/core/CL/CLKernels.h
index 583cf270e2..cd26399390 100644
--- a/arm_compute/core/CL/CLKernels.h
+++ b/arm_compute/core/CL/CLKernels.h
@@ -73,7 +73,6 @@
#include "arm_compute/core/CL/kernels/CLFlattenLayerKernel.h"
#include "arm_compute/core/CL/kernels/CLFloorKernel.h"
#include "arm_compute/core/CL/kernels/CLFuseBatchNormalizationKernel.h"
-#include "arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyKernel.h"
#include "arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyNativeKernel.h"
#include "arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedKernel.h"
#include "arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.h"
diff --git a/arm_compute/core/CL/gemm/native/CLGEMMNativeKernelConfiguration.h b/arm_compute/core/CL/gemm/native/CLGEMMNativeKernelConfiguration.h
index fced41b261..a6341e5094 100644
--- a/arm_compute/core/CL/gemm/native/CLGEMMNativeKernelConfiguration.h
+++ b/arm_compute/core/CL/gemm/native/CLGEMMNativeKernelConfiguration.h
@@ -26,6 +26,7 @@
#include "arm_compute/core/CL/ICLGEMMKernelConfiguration.h"
#include "arm_compute/core/CL/gemm/native/CLGEMMNativeKernelConfigurationBifrost.h"
+#include "arm_compute/core/CL/gemm/native/CLGEMMNativeKernelConfigurationMidgard.h"
#include "arm_compute/core/CL/gemm/native/CLGEMMNativeKernelConfigurationValhall.h"
#include <memory>
@@ -49,12 +50,11 @@ public:
switch(get_arch_from_target(gpu))
{
case GPUTarget::MIDGARD:
+ return support::cpp14::make_unique<CLGEMMNativeKernelConfigurationMidgard>(gpu);
case GPUTarget::BIFROST:
return support::cpp14::make_unique<CLGEMMNativeKernelConfigurationBifrost>(gpu);
- break;
case GPUTarget::VALHALL:
return support::cpp14::make_unique<CLGEMMNativeKernelConfigurationValhall>(gpu);
- break;
default:
ARM_COMPUTE_ERROR("Not supported GPU target");
}
diff --git a/arm_compute/core/CL/gemm/native/CLGEMMNativeKernelConfigurationBifrost.h b/arm_compute/core/CL/gemm/native/CLGEMMNativeKernelConfigurationBifrost.h
index 29b8e08a80..5b2abe6f0f 100644
--- a/arm_compute/core/CL/gemm/native/CLGEMMNativeKernelConfigurationBifrost.h
+++ b/arm_compute/core/CL/gemm/native/CLGEMMNativeKernelConfigurationBifrost.h
@@ -39,14 +39,6 @@ public:
* @param[in] gpu GPU target
*/
CLGEMMNativeKernelConfigurationBifrost(GPUTarget gpu);
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLGEMMNativeKernelConfigurationBifrost(const CLGEMMNativeKernelConfigurationBifrost &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLGEMMNativeKernelConfigurationBifrost &operator=(const CLGEMMNativeKernelConfigurationBifrost &) = delete;
- /** Default Move Constructor. */
- CLGEMMNativeKernelConfigurationBifrost(CLGEMMNativeKernelConfigurationBifrost &&) = default;
- /** Default move assignment operator */
- CLGEMMNativeKernelConfigurationBifrost &operator=(CLGEMMNativeKernelConfigurationBifrost &&) = default;
// Inherited overridden method
std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure(unsigned int m, unsigned int n, unsigned int k, unsigned int b, DataType data_type) override;
diff --git a/arm_compute/core/CL/gemm/native/CLGEMMNativeKernelConfigurationMidgard.h b/arm_compute/core/CL/gemm/native/CLGEMMNativeKernelConfigurationMidgard.h
new file mode 100644
index 0000000000..0e95a15613
--- /dev/null
+++ b/arm_compute/core/CL/gemm/native/CLGEMMNativeKernelConfigurationMidgard.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2020 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CLGEMMNATIVEKERNELCONFIGURATIONMIDGARD_H
+#define ARM_COMPUTE_CLGEMMNATIVEKERNELCONFIGURATIONMIDGARD_H
+
+#include "arm_compute/core/CL/ICLGEMMKernelConfiguration.h"
+
+namespace arm_compute
+{
+namespace cl_gemm
+{
+/** Midgard based OpenCL GEMMNative configuration */
+class CLGEMMNativeKernelConfigurationMidgard final : public ICLGEMMKernelConfiguration
+{
+public:
+ /** Constructor
+ *
+ * @param[in] gpu GPU target
+ */
+ CLGEMMNativeKernelConfigurationMidgard(GPUTarget gpu);
+
+ // Inherited overridden method
+ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure(unsigned int m, unsigned int n, unsigned int k, unsigned int b, DataType data_type) override;
+
+private:
+ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> default_q8(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
+};
+} // namespace cl_gemm
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_CLGEMMNATIVEKERNELCONFIGURATIONMIDGARD_H */
diff --git a/arm_compute/core/CL/gemm/native/CLGEMMNativeKernelConfigurationValhall.h b/arm_compute/core/CL/gemm/native/CLGEMMNativeKernelConfigurationValhall.h
index f6a61a24b8..e739997b3a 100644
--- a/arm_compute/core/CL/gemm/native/CLGEMMNativeKernelConfigurationValhall.h
+++ b/arm_compute/core/CL/gemm/native/CLGEMMNativeKernelConfigurationValhall.h
@@ -39,14 +39,6 @@ public:
* @param[in] gpu GPU target
*/
CLGEMMNativeKernelConfigurationValhall(GPUTarget gpu);
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLGEMMNativeKernelConfigurationValhall(const CLGEMMNativeKernelConfigurationValhall &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLGEMMNativeKernelConfigurationValhall &operator=(const CLGEMMNativeKernelConfigurationValhall &) = delete;
- /** Default Move Constructor. */
- CLGEMMNativeKernelConfigurationValhall(CLGEMMNativeKernelConfigurationValhall &&) = default;
- /** Default move assignment operator */
- CLGEMMNativeKernelConfigurationValhall &operator=(CLGEMMNativeKernelConfigurationValhall &&) = default;
// Inherited overridden method
std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure(unsigned int m, unsigned int n, unsigned int k, unsigned int b, DataType data_type) override;
diff --git a/arm_compute/core/CL/gemm/reshaped/CLGEMMReshapedKernelConfiguration.h b/arm_compute/core/CL/gemm/reshaped/CLGEMMReshapedKernelConfiguration.h
index e960d64964..10dc9aefdb 100644
--- a/arm_compute/core/CL/gemm/reshaped/CLGEMMReshapedKernelConfiguration.h
+++ b/arm_compute/core/CL/gemm/reshaped/CLGEMMReshapedKernelConfiguration.h
@@ -51,10 +51,8 @@ public:
case GPUTarget::MIDGARD:
case GPUTarget::BIFROST:
return support::cpp14::make_unique<CLGEMMReshapedKernelConfigurationBifrost>(gpu);
- break;
case GPUTarget::VALHALL:
return support::cpp14::make_unique<CLGEMMReshapedKernelConfigurationValhall>(gpu);
- break;
default:
ARM_COMPUTE_ERROR("Not supported GPU target");
}
diff --git a/arm_compute/core/CL/gemm/reshaped/CLGEMMReshapedKernelConfigurationBifrost.h b/arm_compute/core/CL/gemm/reshaped/CLGEMMReshapedKernelConfigurationBifrost.h
index c6ece758b9..55742e3e56 100644
--- a/arm_compute/core/CL/gemm/reshaped/CLGEMMReshapedKernelConfigurationBifrost.h
+++ b/arm_compute/core/CL/gemm/reshaped/CLGEMMReshapedKernelConfigurationBifrost.h
@@ -39,14 +39,6 @@ public:
* @param[in] gpu GPU target
*/
CLGEMMReshapedKernelConfigurationBifrost(GPUTarget gpu);
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLGEMMReshapedKernelConfigurationBifrost(const CLGEMMReshapedKernelConfigurationBifrost &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLGEMMReshapedKernelConfigurationBifrost &operator=(const CLGEMMReshapedKernelConfigurationBifrost &) = delete;
- /** Default Move Constructor. */
- CLGEMMReshapedKernelConfigurationBifrost(CLGEMMReshapedKernelConfigurationBifrost &&) = default;
- /** Default move assignment operator */
- CLGEMMReshapedKernelConfigurationBifrost &operator=(CLGEMMReshapedKernelConfigurationBifrost &&) = default;
// Inherited overridden method
std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure(unsigned int m, unsigned int n, unsigned int k, unsigned int b, DataType data_type) override;
diff --git a/arm_compute/core/CL/gemm/reshaped/CLGEMMReshapedKernelConfigurationValhall.h b/arm_compute/core/CL/gemm/reshaped/CLGEMMReshapedKernelConfigurationValhall.h
index 0dd2a2c38f..e65974144d 100644
--- a/arm_compute/core/CL/gemm/reshaped/CLGEMMReshapedKernelConfigurationValhall.h
+++ b/arm_compute/core/CL/gemm/reshaped/CLGEMMReshapedKernelConfigurationValhall.h
@@ -39,14 +39,6 @@ public:
* @param[in] gpu GPU target
*/
CLGEMMReshapedKernelConfigurationValhall(GPUTarget gpu);
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLGEMMReshapedKernelConfigurationValhall(const CLGEMMReshapedKernelConfigurationValhall &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLGEMMReshapedKernelConfigurationValhall &operator=(const CLGEMMReshapedKernelConfigurationValhall &) = delete;
- /** Default Move Constructor. */
- CLGEMMReshapedKernelConfigurationValhall(CLGEMMReshapedKernelConfigurationValhall &&) = default;
- /** Default move assignment operator */
- CLGEMMReshapedKernelConfigurationValhall &operator=(CLGEMMReshapedKernelConfigurationValhall &&) = default;
// Inherited overridden method
std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure(unsigned int m, unsigned int n, unsigned int k, unsigned int b, DataType data_type) override;
diff --git a/arm_compute/core/CL/gemm/reshaped_only_rhs/CLGEMMReshapedOnlyRHSKernelConfiguration.h b/arm_compute/core/CL/gemm/reshaped_only_rhs/CLGEMMReshapedOnlyRHSKernelConfiguration.h
index 683e39f3c1..7909726164 100644
--- a/arm_compute/core/CL/gemm/reshaped_only_rhs/CLGEMMReshapedOnlyRHSKernelConfiguration.h
+++ b/arm_compute/core/CL/gemm/reshaped_only_rhs/CLGEMMReshapedOnlyRHSKernelConfiguration.h
@@ -51,10 +51,8 @@ public:
case GPUTarget::MIDGARD:
case GPUTarget::BIFROST:
return support::cpp14::make_unique<CLGEMMReshapedOnlyRHSKernelConfigurationBifrost>(gpu);
- break;
case GPUTarget::VALHALL:
return support::cpp14::make_unique<CLGEMMReshapedOnlyRHSKernelConfigurationValhall>(gpu);
- break;
default:
ARM_COMPUTE_ERROR("Not supported GPU target");
}
diff --git a/arm_compute/core/CL/gemm/reshaped_only_rhs/CLGEMMReshapedOnlyRHSKernelConfigurationBifrost.h b/arm_compute/core/CL/gemm/reshaped_only_rhs/CLGEMMReshapedOnlyRHSKernelConfigurationBifrost.h
index ff351b6a06..044bdc7b18 100644
--- a/arm_compute/core/CL/gemm/reshaped_only_rhs/CLGEMMReshapedOnlyRHSKernelConfigurationBifrost.h
+++ b/arm_compute/core/CL/gemm/reshaped_only_rhs/CLGEMMReshapedOnlyRHSKernelConfigurationBifrost.h
@@ -39,14 +39,6 @@ public:
* @param[in] gpu GPU target
*/
CLGEMMReshapedOnlyRHSKernelConfigurationBifrost(GPUTarget gpu);
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLGEMMReshapedOnlyRHSKernelConfigurationBifrost(const CLGEMMReshapedOnlyRHSKernelConfigurationBifrost &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLGEMMReshapedOnlyRHSKernelConfigurationBifrost &operator=(const CLGEMMReshapedOnlyRHSKernelConfigurationBifrost &) = delete;
- /** Default Move Constructor. */
- CLGEMMReshapedOnlyRHSKernelConfigurationBifrost(CLGEMMReshapedOnlyRHSKernelConfigurationBifrost &&) = default;
- /** Default move assignment operator */
- CLGEMMReshapedOnlyRHSKernelConfigurationBifrost &operator=(CLGEMMReshapedOnlyRHSKernelConfigurationBifrost &&) = default;
// Inherited overridden method
std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure(unsigned int m, unsigned int n, unsigned int k, unsigned int b, DataType data_type) override;
diff --git a/arm_compute/core/CL/gemm/reshaped_only_rhs/CLGEMMReshapedOnlyRHSKernelConfigurationValhall.h b/arm_compute/core/CL/gemm/reshaped_only_rhs/CLGEMMReshapedOnlyRHSKernelConfigurationValhall.h
index 7541802776..6dba6fdb00 100644
--- a/arm_compute/core/CL/gemm/reshaped_only_rhs/CLGEMMReshapedOnlyRHSKernelConfigurationValhall.h
+++ b/arm_compute/core/CL/gemm/reshaped_only_rhs/CLGEMMReshapedOnlyRHSKernelConfigurationValhall.h
@@ -39,14 +39,6 @@ public:
* @param[in] gpu GPU target
*/
CLGEMMReshapedOnlyRHSKernelConfigurationValhall(GPUTarget gpu);
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLGEMMReshapedOnlyRHSKernelConfigurationValhall(const CLGEMMReshapedOnlyRHSKernelConfigurationValhall &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLGEMMReshapedOnlyRHSKernelConfigurationValhall &operator=(const CLGEMMReshapedOnlyRHSKernelConfigurationValhall &) = delete;
- /** Default Move Constructor. */
- CLGEMMReshapedOnlyRHSKernelConfigurationValhall(CLGEMMReshapedOnlyRHSKernelConfigurationValhall &&) = default;
- /** Default move assignment operator */
- CLGEMMReshapedOnlyRHSKernelConfigurationValhall &operator=(CLGEMMReshapedOnlyRHSKernelConfigurationValhall &&) = default;
// Inherited overridden method
std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure(unsigned int m, unsigned int n, unsigned int k, unsigned int b, DataType data_type) override;
diff --git a/arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyKernel.h b/arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyKernel.h
deleted file mode 100644
index e926f5ed36..0000000000
--- a/arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyKernel.h
+++ /dev/null
@@ -1,101 +0,0 @@
-/*
- * Copyright (c) 2017-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLGEMMLOWPMATRIXMULTIPLYKERNEL_H
-#define ARM_COMPUTE_CLGEMMLOWPMATRIXMULTIPLYKERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** OpenCL kernel to multiply matrices
- *
- * @note This kernel should be used ONLY for Midgard architectures
- *
- * This kernel performs the following computation:
- *
- * -# Convert a values from int8 to int32
- * -# Convert b values from int8 to int32
- * -# Compute the int32 matrix product of the resulting a * b and store the result as int32
- *
- */
-class CLGEMMLowpMatrixMultiplyKernel : public ICLKernel
-{
-public:
- /** Default Constructor */
- CLGEMMLowpMatrixMultiplyKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLGEMMLowpMatrixMultiplyKernel(const CLGEMMLowpMatrixMultiplyKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLGEMMLowpMatrixMultiplyKernel &operator=(const CLGEMMLowpMatrixMultiplyKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLGEMMLowpMatrixMultiplyKernel(CLGEMMLowpMatrixMultiplyKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLGEMMLowpMatrixMultiplyKernel &operator=(CLGEMMLowpMatrixMultiplyKernel &&) = default;
- /** Initialise the kernel's input and output.
- *
- * @note This kernel should be used ONLY for Midgard architectures
- *
- * @param[in] input0 Input tensor containing the LHS matrix. Data type supported: QASYMM8
- * @param[in] input1 Input tensor containing the RHS matrix. Data type supported: same as @p input0
- * @param[out] output Output tensor to store the result of matrix multiplication. Data type supported: S32
- * @param[in] gemm_info (Optional) GEMM information used to retrieve the original dimensions of the input matrices
- */
- void configure(const ICLTensor *input0, const ICLTensor *input1, ICLTensor *output, const GEMMReshapeInfo &gemm_info = GEMMReshapeInfo());
- /** Initialise the kernel's input and output.
- *
- * @note This kernel should be used ONLY for Midgard architectures
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input0 Input tensor containing the LHS matrix. Data type supported: QASYMM8
- * @param[in] input1 Input tensor containing the RHS matrix. Data type supported: same as @p input0
- * @param[out] output Output tensor to store the result of matrix multiplication. Data type supported: S32
- * @param[in] gemm_info (Optional) GEMM information used to retrieve the original dimensions of the input matrices
- */
- void configure(CLCompileContext &compile_context, const ICLTensor *input0, const ICLTensor *input1, ICLTensor *output, const GEMMReshapeInfo &gemm_info = GEMMReshapeInfo());
- /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMLowpMatrixMultiplyKernel
- *
- * @param[in] input0 Input tensor containing the LHS matrix. Data type supported: QASYMM8
- * @param[in] input1 Input tensor containing the RHS matrix. Data type supported: same as @p input0
- * @param[in] output Output tensor to store the result of matrix multiplication. Data type supported: S32
- * @param[in] gemm_info (Optional) GEMM information used to retrieve the original dimensions of the input matrices
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input0, const ITensorInfo *input1, const ITensorInfo *output, const GEMMReshapeInfo &gemm_info = GEMMReshapeInfo());
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- const ICLTensor *_input0;
- const ICLTensor *_input1;
- ICLTensor *_output;
- bool _slide_matrix_b;
- bool _reinterpret_input_as_3d;
- bool _reinterpret_output_as_3d;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLGEMMLOWPMATRIXMULTIPLYKERNEL_H*/
diff --git a/arm_compute/core/CL/kernels/CLGEMMLowpOffsetContributionKernel.h b/arm_compute/core/CL/kernels/CLGEMMLowpOffsetContributionKernel.h
index d7266b2805..f9ec558d85 100644
--- a/arm_compute/core/CL/kernels/CLGEMMLowpOffsetContributionKernel.h
+++ b/arm_compute/core/CL/kernels/CLGEMMLowpOffsetContributionKernel.h
@@ -30,9 +30,9 @@ namespace arm_compute
{
class ICLTensor;
-/** OpenCL kernel used to add the offset contribution after @ref CLGEMMLowpMatrixMultiplyKernel. The computation is performed in-place
+/** OpenCL kernel used to add the offset contribution after the matrix multiplication. The computation is performed in-place
*
- * This kernel takes a final int32 accumulator value (the output of @ref CLGEMMLowpMatrixMultiplyKernel),
+ * This kernel takes a final int32 accumulator value (the output of the matrix multiplication),
* and adds to it the offset contribution of matrix A and matrix B in-place.
*
* The final result is:
@@ -58,7 +58,7 @@ public:
CLGEMMLowpOffsetContributionKernel &operator=(CLGEMMLowpOffsetContributionKernel &&) = default;
/** Initialise the kernel's input and output.
*
- * @param[in, out] mm_result Input tensor containing the result of @ref CLGEMMLowpMatrixMultiplyKernel. Data type supported: S32
+ * @param[in, out] mm_result Input tensor containing the result of the matrix multiplication. Data type supported: S32
* @param[in] vector_sum_col Input row-vector of sums of all the entries in each column of matrix B.
* Note: vector_sum_col can be a nullptr in case a_offset = 0. Data type supported: same as @p mm_result
* @param[in] vector_sum_row Input row-vector of sums of all the entries in each row of matrix A.
@@ -73,7 +73,7 @@ public:
/** Initialise the kernel's input and output.
*
* @param[in] compile_context The compile context to be used.
- * @param[in, out] mm_result Input tensor containing the result of @ref CLGEMMLowpMatrixMultiplyKernel. Data type supported: S32
+ * @param[in, out] mm_result Input tensor containing the result of the matrix multiplication. Data type supported: S32
* @param[in] vector_sum_col Input row-vector of sums of all the entries in each column of matrix B.
* Note: vector_sum_col can be a nullptr in case a_offset = 0. Data type supported: same as @p mm_result
* @param[in] vector_sum_row Input row-vector of sums of all the entries in each row of matrix A.
diff --git a/arm_compute/core/CL/kernels/CLGEMMLowpOffsetContributionOutputStageKernel.h b/arm_compute/core/CL/kernels/CLGEMMLowpOffsetContributionOutputStageKernel.h
index 02ed20e5af..032539b699 100644
--- a/arm_compute/core/CL/kernels/CLGEMMLowpOffsetContributionOutputStageKernel.h
+++ b/arm_compute/core/CL/kernels/CLGEMMLowpOffsetContributionOutputStageKernel.h
@@ -30,9 +30,9 @@ namespace arm_compute
{
class ICLTensor;
-/** OpenCL kernel used to add the offset contribution after @ref CLGEMMLowpMatrixMultiplyKernel and perform the output stage.
+/** OpenCL kernel used to add the offset contribution after the matrix multiplication and perform the output stage.
*
- * This kernel takes a final int32 accumulator value (the output of @ref CLGEMMLowpMatrixMultiplyKernel), adds to it the offset contribution
+ * This kernel takes a final int32 accumulator value (the output of the matrix multiplication), adds to it the offset contribution
* of matrix A and matrix B and performs the output stage defined by the output_stage argument
*
* @note For quantized computations the output data type for auto-initialization must be passed as part of the @ref GEMMLowpOutputStageInfo.
@@ -52,7 +52,7 @@ public:
CLGEMMLowpOffsetContributionOutputStageKernel &operator=(CLGEMMLowpOffsetContributionOutputStageKernel &&) = default;
/** Initialise the kernel's input and output.
*
- * @param[in] mm_result Input tensor containing the result of @ref CLGEMMLowpMatrixMultiplyKernel. Data type supported: S32
+ * @param[in] mm_result Input tensor containing the result of the matrix multiplication. Data type supported: S32
* @param[in] vector_sum_col Input row-vector of sums of all the entries in each column of matrix B.
* Note: vector_sum_col can be a nullptr in case a_offset = 0. Data type supported: same as @p mm_result
* @param[in] vector_sum_row Input row-vector of sums of all the entries in each row of matrix A.
@@ -74,7 +74,7 @@ public:
/** Initialise the kernel's input and output.
*
* @param[in] compile_context The compile context to be used.
- * @param[in] mm_result Input tensor containing the result of @ref CLGEMMLowpMatrixMultiplyKernel. Data type supported: S32
+ * @param[in] mm_result Input tensor containing the result of the matrix multiplication. Data type supported: S32
* @param[in] vector_sum_col Input row-vector of sums of all the entries in each column of matrix B.
* Note: vector_sum_col can be a nullptr in case a_offset = 0. Data type supported: same as @p mm_result
* @param[in] vector_sum_row Input row-vector of sums of all the entries in each row of matrix A.
diff --git a/arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel.h b/arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel.h
index 0b5b22cafc..dd85d8a97c 100644
--- a/arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel.h
+++ b/arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel.h
@@ -33,7 +33,7 @@ class ICLTensor;
/** OpenCL kernel used to quantize down the int32 accumulator values of GEMMLowp to QASYMM8/QASYMM8_SIGNED
*
- * This kernel takes a final int32 accumulator value (the output of @ref CLGEMMLowpMatrixMultiplyKernel), and processes it to obtain the final QASYMM8/QASYMM8_SIGNED value.
+ * This kernel takes a final int32 accumulator value (the output of the matrix multiplication), and processes it to obtain the final QASYMM8/QASYMM8_SIGNED value.
* The following computations will be performed by the kernel:
*
* -# Compute fixed point multiplication between each entry of input by result_fixedpoint_multiplier
diff --git a/arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleKernel.h b/arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleKernel.h
index 0d7d1c3390..f36076dfa2 100644
--- a/arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleKernel.h
+++ b/arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleKernel.h
@@ -32,7 +32,7 @@ class ICLTensor;
/** OpenCL kernel used to quantize down the int32 accumulator values of GEMMLowp to QASYMM8/QASYMM8_SIGNED
*
- * This kernel takes a final int32 accumulator value (the output of @ref CLGEMMLowpMatrixMultiplyKernel), and processes it to obtain the final QASYMM8/QASYMM8_SIGNED value.
+ * This kernel takes a final int32 accumulator value (the output of the matrix multiplication), and processes it to obtain the final QASYMM8/QASYMM8_SIGNED value.
* The following computations will be performed by the kernel:
*
* -# Add offset terms to final result
diff --git a/arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel.h b/arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel.h
index 2845d9259e..36cd7bf693 100644
--- a/arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel.h
+++ b/arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel.h
@@ -32,7 +32,7 @@ class ICLTensor;
/** CL kernel used to quantize down the int32 accumulator values of GEMMLowp to QSYMM16
*
- * This kernel takes a final int32 accumulator value (the output of @ref CLGEMMLowpMatrixMultiplyKernel), and processes it to obtain the final QSYMM16 value.
+ * This kernel takes a final int32 accumulator value (the output of the matrix multiplication), and processes it to obtain the final QSYMM16 value.
* The following computations will be performed by the kernel:
*
* -# Compute fixed point multiplication between each entry of input by result_fixedpoint_multiplier
diff --git a/arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel.h b/arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel.h
index a768b6fba0..fd95e00d5d 100644
--- a/arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel.h
+++ b/arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel.h
@@ -32,7 +32,7 @@ class ICLTensor;
/** OpenCL kernel used to quantize down the int32 accumulator values of GEMMLowp to QASYMM8_SIGNED
*
- * This kernel takes a final int32 accumulator value (the output of @ref CLGEMMLowpMatrixMultiplyKernel), and processes it to obtain the final QASYMM8_SIGNED value.
+ * This kernel takes a final int32 accumulator value (the output of the matrix multiplication), and processes it to obtain the final QASYMM8_SIGNED value.
* The following computations will be performed by the kernel:
*
* -# Compute fixed point multiplication between each entry of input by result_fixedpoint_multiplier
diff --git a/arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.h b/arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.h
index e319c32c78..1714a02f76 100644
--- a/arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.h
+++ b/arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.h
@@ -32,7 +32,7 @@ class ICLTensor;
/** OpenCL kernel used to quantize down the int32 accumulator values of GEMMLowp to QASYMM8
*
- * This kernel takes a final int32 accumulator value (the output of @ref CLGEMMLowpMatrixMultiplyKernel), and processes it to obtain the final QASYMM8 value.
+ * This kernel takes a final int32 accumulator value (the output of the matrix multiplication), and processes it to obtain the final QASYMM8 value.
* The following computations will be performed by the kernel:
*
* -# Compute fixed point multiplication between each entry of input by result_fixedpoint_multiplier