aboutsummaryrefslogtreecommitdiff
path: root/arm_compute
diff options
context:
space:
mode:
authorGian Marco Iodice <gianmarco.iodice@arm.com>2019-03-19 11:44:13 +0000
committerGian Marco Iodice <gianmarco.iodice@arm.com>2019-04-08 14:12:59 +0000
commit926afe1c8ad6ba6a7bada62a4027fcb79d727104 (patch)
tree8dcc908a6145de6b02bcea24e3ccd830ba3f5939 /arm_compute
parent8c571692a8236be8605a753e231d240094428be5 (diff)
downloadComputeLibrary-926afe1c8ad6ba6a7bada62a4027fcb79d727104.tar.gz
COMPMID-2097: Implement a heuristic to dispatch CLGEMMReshapedOnlyRHS kernel from CLGEMM
Change-Id: I4170a80647b02501aa669e2c0347ddc39888ee76 Signed-off-by: Gian Marco Iodice <gianmarco.iodice@arm.com> Reviewed-on: https://review.mlplatform.org/c/928 Reviewed-by: Giuseppe Rossini <giuseppe.rossini@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'arm_compute')
-rw-r--r--arm_compute/core/CL/ICLGEMMKernelConfiguration.h (renamed from arm_compute/runtime/CL/ICLGEMMReshapedConfiguration.h)34
-rw-r--r--arm_compute/core/CL/gemm/CLGEMMHelpers.h53
-rw-r--r--arm_compute/core/CL/gemm/reshaped/CLGEMMReshapedKernelConfiguration.h (renamed from arm_compute/runtime/CL/gemm_reshaped/CLGEMMReshapedConfiguration.h)31
-rw-r--r--arm_compute/core/CL/gemm/reshaped/CLGEMMReshapedKernelConfigurationBifrost.h (renamed from arm_compute/runtime/CL/gemm_reshaped/CLGEMMReshapedConfigurationBifrost.h)26
-rw-r--r--arm_compute/core/CL/gemm/reshaped_only_rhs/CLGEMMReshapedOnlyRHSKernelConfiguration.h59
-rw-r--r--arm_compute/core/CL/gemm/reshaped_only_rhs/CLGEMMReshapedOnlyRHSKernelConfigurationBifrost.h62
-rw-r--r--arm_compute/core/utils/misc/ShapeCalculator.h15
-rw-r--r--arm_compute/runtime/CL/functions/CLGEMM.h59
8 files changed, 285 insertions, 54 deletions
diff --git a/arm_compute/runtime/CL/ICLGEMMReshapedConfiguration.h b/arm_compute/core/CL/ICLGEMMKernelConfiguration.h
index 500d9cd492..2e6d49566c 100644
--- a/arm_compute/runtime/CL/ICLGEMMReshapedConfiguration.h
+++ b/arm_compute/core/CL/ICLGEMMKernelConfiguration.h
@@ -21,20 +21,37 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#ifndef __ARM_COMPUTE_ICLGEMMRESHAPEDCONFIGURATION_H__
-#define __ARM_COMPUTE_ICLGEMMRESHAPEDCONFIGURATION_H__
+#ifndef __ARM_COMPUTE_ICLGEMMKERNELCONFIGURATION_H__
+#define __ARM_COMPUTE_ICLGEMMKERNELCONFIGURATION_H__
+#include "arm_compute/core/GPUTarget.h"
#include "arm_compute/core/Types.h"
namespace arm_compute
{
-/** Basic interface for the GEMM selection */
-class ICLGEMMReshapedConfiguration
+/** Basic interface for the GEMM kernel configuration */
+class ICLGEMMKernelConfiguration
{
public:
+ /** Constructor
+ *
+ * @param[in] arch GPU target
+ */
+ ICLGEMMKernelConfiguration(GPUTarget arch)
+ : _target(arch)
+ {
+ }
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ ICLGEMMKernelConfiguration(const ICLGEMMKernelConfiguration &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ ICLGEMMKernelConfiguration &operator=(const ICLGEMMKernelConfiguration &) = delete;
+ /** Default Move Constructor. */
+ ICLGEMMKernelConfiguration(ICLGEMMKernelConfiguration &&) = default;
+ /** Default move assignment operator */
+ ICLGEMMKernelConfiguration &operator=(ICLGEMMKernelConfiguration &&) = default;
/** Virtual destructor */
- virtual ~ICLGEMMReshapedConfiguration() = default;
- /** Given M, N, K and B, this method returns the @ref GEMMLHSMatrixInfo and @ref GEMMRHSMatrixInfo to be used with @ref CLGEMMMatrixMultiplyReshapedKernel
+ virtual ~ICLGEMMKernelConfiguration() = default;
+ /** Given M, N, K and B, this method returns the @ref GEMMLHSMatrixInfo and @ref GEMMRHSMatrixInfo to be used
*
* @param[in] m Number of rows LHS matrix
* @param[in] n Number of columns RHS matrix
@@ -43,6 +60,9 @@ public:
* @param[in] data_type Data type
*/
virtual std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure(unsigned int m, unsigned int n, unsigned int k, unsigned int b, DataType data_type) = 0;
+
+protected:
+ GPUTarget _target;
};
} // namespace arm_compute
-#endif /*__ARM_COMPUTE_ICLGEMMRESHAPEDCONFIGURATION_H__ */
+#endif /*__ARM_COMPUTE_ICLGEMMKERNELCONFIGURATION_H__ */
diff --git a/arm_compute/core/CL/gemm/CLGEMMHelpers.h b/arm_compute/core/CL/gemm/CLGEMMHelpers.h
new file mode 100644
index 0000000000..d263712397
--- /dev/null
+++ b/arm_compute/core/CL/gemm/CLGEMMHelpers.h
@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 2019 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLGEMMHELPERS_H__
+#define __ARM_COMPUTE_CLGEMMHELPERS_H__
+
+#include "arm_compute/core/Types.h"
+
+namespace arm_compute
+{
+namespace cl_gemm
+{
+/** Configure @ref GEMMLHSMatrixInfo and @ref GEMMRHSMatrixInfo
+ *
+ * @param[in] m Number of rows (M) in the LHS matrix not reshaped
+ * @param[in] n Number of columns (N) in the RHS matrix not reshaped
+ * @param[in] m0 Number of rows processed by each thread/work-item
+ * @param[in] n0 Number of columns processed by each thread/work-item
+ * @param[in] k0 Number of inner accumulation performed by each thread/work-item
+ * @param[in] v0 Number of vertical blocks of size (m0xk0) stored on the same output row
+ * @param[in] h0 Number of horizontal blocks of size (k0xn0) stored on the same output row
+ * @param[in] lhs_interleave True if the v0 (m0xk0) blocks have to be interleaved in the output row
+ * @param[in] rhs_interleave True if the h0 (k0xn0) blocks have to be interleaved in the output row
+ * @param[in] lhs_transpose True if the (m0xk0) block has to be transposed before been stored
+ * @param[in] rhs_transpose True if the (k0xn0) block has to be transposed before been stored
+ *
+ * @return @ref GEMMLHSMatrixInfo and @ref GEMMRHSMatrixInfo
+ */
+std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_lhs_rhs_info(unsigned int m, unsigned int n, unsigned int m0, unsigned int n0, unsigned int k0, unsigned int v0, unsigned int h0,
+ bool lhs_interleave, bool rhs_interleave, bool lhs_transpose, bool rhs_transpose);
+} // namespace cl_gemm
+} // namespace arm_compute
+#endif /*__ARM_COMPUTE_CLGEMMHELPERS_H__ */
diff --git a/arm_compute/runtime/CL/gemm_reshaped/CLGEMMReshapedConfiguration.h b/arm_compute/core/CL/gemm/reshaped/CLGEMMReshapedKernelConfiguration.h
index 3458911a97..105a58a6f8 100644
--- a/arm_compute/runtime/CL/gemm_reshaped/CLGEMMReshapedConfiguration.h
+++ b/arm_compute/core/CL/gemm/reshaped/CLGEMMReshapedKernelConfiguration.h
@@ -21,12 +21,11 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#ifndef __ARM_COMPUTE_CLGEMMRESHAPEDCONFIGURATION_H__
-#define __ARM_COMPUTE_CLGEMMRESHAPEDCONFIGURATION_H__
+#ifndef __ARM_COMPUTE_CLGEMMRESHAPEDKERNELCONFIGURATION_H__
+#define __ARM_COMPUTE_CLGEMMRESHAPEDKERNELCONFIGURATION_H__
-#include "arm_compute/runtime/CL/CLScheduler.h"
-#include "arm_compute/runtime/CL/ICLGEMMReshapedConfiguration.h"
-#include "arm_compute/runtime/CL/gemm_reshaped/CLGEMMReshapedConfigurationBifrost.h"
+#include "arm_compute/core/CL/ICLGEMMKernelConfiguration.h"
+#include "arm_compute/core/CL/gemm/reshaped/CLGEMMReshapedKernelConfigurationBifrost.h"
#include <memory>
@@ -34,23 +33,27 @@ namespace arm_compute
{
namespace cl_gemm
{
-/** Tuner factory class */
-class CLGEMMReshapedConfigurationFactory final
+/** CLGEMMReshaped factory class */
+class CLGEMMReshapedKernelConfigurationFactory final
{
public:
- static std::unique_ptr<ICLGEMMReshapedConfiguration> create()
+ /** Static method to call the CLGEMMReshaped kernel configuration class accordingly with the GPU architecture
+ *
+ * @param[in] arch GPU target
+ *
+ * @return CLGEMMReshaped kernel configuration class
+ */
+ static std::unique_ptr<ICLGEMMKernelConfiguration> create(GPUTarget arch)
{
- GPUTarget arch = get_arch_from_target(CLScheduler::get().target());
-
- switch(arch)
+ switch(get_arch_from_target(arch))
{
case GPUTarget::BIFROST:
- return support::cpp14::make_unique<CLGEMMReshapedConfigurationBifrost>();
+ return support::cpp14::make_unique<CLGEMMReshapedKernelConfigurationBifrost>(arch);
default:
return nullptr;
}
}
};
-} // namespace tuners
+} // namespace cl_gemm
} // namespace arm_compute
-#endif /*__ARM_COMPUTE_CLGEMMRESHAPEDCONFIGURATION_H__ */
+#endif /*__ARM_COMPUTE_CLGEMMRESHAPEDKERNELCONFIGURATION_H__ */
diff --git a/arm_compute/runtime/CL/gemm_reshaped/CLGEMMReshapedConfigurationBifrost.h b/arm_compute/core/CL/gemm/reshaped/CLGEMMReshapedKernelConfigurationBifrost.h
index c452e159cf..a0aae190e8 100644
--- a/arm_compute/runtime/CL/gemm_reshaped/CLGEMMReshapedConfigurationBifrost.h
+++ b/arm_compute/core/CL/gemm/reshaped/CLGEMMReshapedKernelConfigurationBifrost.h
@@ -21,19 +21,33 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#ifndef __ARM_COMPUTE_CLGEMMRESHAPEDCONFIGURATIONBIFROST_H__
-#define __ARM_COMPUTE_CLGEMMRESHAPEDCONFIGURATIONBIFROST_H__
+#ifndef __ARM_COMPUTE_CLGEMMRESHAPEDKERNELCONFIGURATIONBIFROST_H__
+#define __ARM_COMPUTE_CLGEMMRESHAPEDKERNELCONFIGURATIONBIFROST_H__
-#include "arm_compute/runtime/CL/ICLGEMMReshapedConfiguration.h"
+#include "arm_compute/core/CL/ICLGEMMKernelConfiguration.h"
namespace arm_compute
{
namespace cl_gemm
{
-/** Bifrost based OpenCL GEMM reshaped configuration */
-class CLGEMMReshapedConfigurationBifrost final : public ICLGEMMReshapedConfiguration
+/** Bifrost based OpenCL GEMMReshaped configuration */
+class CLGEMMReshapedKernelConfigurationBifrost final : public ICLGEMMKernelConfiguration
{
public:
+ /** Constructor
+ *
+ * @param[in] arch GPU target
+ */
+ CLGEMMReshapedKernelConfigurationBifrost(GPUTarget arch);
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLGEMMReshapedKernelConfigurationBifrost(const CLGEMMReshapedKernelConfigurationBifrost &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLGEMMReshapedKernelConfigurationBifrost &operator=(const CLGEMMReshapedKernelConfigurationBifrost &) = delete;
+ /** Default Move Constructor. */
+ CLGEMMReshapedKernelConfigurationBifrost(CLGEMMReshapedKernelConfigurationBifrost &&) = default;
+ /** Default move assignment operator */
+ CLGEMMReshapedKernelConfigurationBifrost &operator=(CLGEMMReshapedKernelConfigurationBifrost &&) = default;
+
// Inherited overridden method
std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure(unsigned int m, unsigned int n, unsigned int k, unsigned int b, DataType data_type) override;
@@ -45,4 +59,4 @@ private:
};
} // namespace cl_gemm
} // namespace arm_compute
-#endif /*__ARM_COMPUTE_CLGEMMRESHAPEDCONFIGURATIONBIFROST_H__ */
+#endif /*__ARM_COMPUTE_CLGEMMRESHAPEDKERNELCONFIGURATIONBIFROST_H__ */
diff --git a/arm_compute/core/CL/gemm/reshaped_only_rhs/CLGEMMReshapedOnlyRHSKernelConfiguration.h b/arm_compute/core/CL/gemm/reshaped_only_rhs/CLGEMMReshapedOnlyRHSKernelConfiguration.h
new file mode 100644
index 0000000000..b9bf150c4f
--- /dev/null
+++ b/arm_compute/core/CL/gemm/reshaped_only_rhs/CLGEMMReshapedOnlyRHSKernelConfiguration.h
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 2019 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLGEMMRESHAPEDONLYRHSKERNELCONFIGURATION_H__
+#define __ARM_COMPUTE_CLGEMMRESHAPEDONLYRHSKERNELCONFIGURATION_H__
+
+#include "arm_compute/core/CL/ICLGEMMKernelConfiguration.h"
+#include "arm_compute/core/CL/gemm/reshaped_only_rhs/CLGEMMReshapedOnlyRHSKernelConfigurationBifrost.h"
+
+#include <memory>
+
+namespace arm_compute
+{
+namespace cl_gemm
+{
+/** CLGEMMReshapedOnlyRHS factory class */
+class CLGEMMReshapedOnlyRHSKernelConfigurationFactory final
+{
+public:
+ /** Static method to call the CLGEMMReshapedOnlyRHS kernel configuration class accordingly with the GPU architecture
+ *
+ * @param[in] arch GPU target
+ *
+ * @return CLGEMMReshapedOnlyRHS kernel configuration class
+ */
+ static std::unique_ptr<ICLGEMMKernelConfiguration> create(GPUTarget arch)
+ {
+ switch(get_arch_from_target(arch))
+ {
+ case GPUTarget::BIFROST:
+ return support::cpp14::make_unique<CLGEMMReshapedOnlyRHSKernelConfigurationBifrost>(arch);
+ default:
+ return nullptr;
+ }
+ }
+};
+} // namespace cl_gemm
+} // namespace arm_compute
+#endif /*__ARM_COMPUTE_CLGEMMRESHAPEDONLYRHSKERNELCONFIGURATION_H__ */
diff --git a/arm_compute/core/CL/gemm/reshaped_only_rhs/CLGEMMReshapedOnlyRHSKernelConfigurationBifrost.h b/arm_compute/core/CL/gemm/reshaped_only_rhs/CLGEMMReshapedOnlyRHSKernelConfigurationBifrost.h
new file mode 100644
index 0000000000..3bed118f21
--- /dev/null
+++ b/arm_compute/core/CL/gemm/reshaped_only_rhs/CLGEMMReshapedOnlyRHSKernelConfigurationBifrost.h
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2019 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLGEMMRESHAPEDONLYRHSKERNELCONFIGURATIONBIFROST_H__
+#define __ARM_COMPUTE_CLGEMMRESHAPEDONLYRHSKERNELCONFIGURATIONBIFROST_H__
+
+#include "arm_compute/core/CL/ICLGEMMKernelConfiguration.h"
+
+namespace arm_compute
+{
+namespace cl_gemm
+{
+/** Bifrost based OpenCL GEMMReshapedOnlyRHS configuration */
+class CLGEMMReshapedOnlyRHSKernelConfigurationBifrost final : public ICLGEMMKernelConfiguration
+{
+public:
+ /** Constructor
+ *
+ * @param[in] arch GPU target
+ */
+ CLGEMMReshapedOnlyRHSKernelConfigurationBifrost(GPUTarget arch);
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLGEMMReshapedOnlyRHSKernelConfigurationBifrost(const CLGEMMReshapedOnlyRHSKernelConfigurationBifrost &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLGEMMReshapedOnlyRHSKernelConfigurationBifrost &operator=(const CLGEMMReshapedOnlyRHSKernelConfigurationBifrost &) = delete;
+ /** Default Move Constructor. */
+ CLGEMMReshapedOnlyRHSKernelConfigurationBifrost(CLGEMMReshapedOnlyRHSKernelConfigurationBifrost &&) = default;
+ /** Default move assignment operator */
+ CLGEMMReshapedOnlyRHSKernelConfigurationBifrost &operator=(CLGEMMReshapedOnlyRHSKernelConfigurationBifrost &&) = default;
+
+ // Inherited overridden method
+ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure(unsigned int m, unsigned int n, unsigned int k, unsigned int b, DataType data_type) override;
+
+private:
+ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G7x_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
+ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G76_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
+ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G7x_u8(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
+ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G76_u8(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
+};
+} // namespace cl_gemm
+} // namespace arm_compute
+#endif /*__ARM_COMPUTE_CLGEMMRESHAPEDONLYRHSKERNELCONFIGURATIONBIFROST_H__ */
diff --git a/arm_compute/core/utils/misc/ShapeCalculator.h b/arm_compute/core/utils/misc/ShapeCalculator.h
index 0d07266403..384bd460a0 100644
--- a/arm_compute/core/utils/misc/ShapeCalculator.h
+++ b/arm_compute/core/utils/misc/ShapeCalculator.h
@@ -887,23 +887,20 @@ inline TensorShape compute_mm_shape(const ITensorInfo &input0, const ITensorInfo
{
ARM_COMPUTE_ERROR_ON_MSG(input0.num_dimensions() > 4, "The number of dimensions for the matrix A must be <= 4");
+ const bool reinterpret_input_as_3d = gemm_info.reinterpret_input_as_3d();
const bool reinterpret_output_as_3d = gemm_info.depth_output_gemm3d() != 0;
const int depth_output_gemm3d = reinterpret_output_as_3d ? gemm_info.depth_output_gemm3d() : 1;
// If the output of GEMM has to be reinterpreted as 3D, the number of input0 rows (M) is obtained collapsing the second and third
// dimension of the output tensor
- const int dim0 = gemm_info.n();
- const int dim1 = gemm_info.m() / depth_output_gemm3d;
- const int dim2 = input0.tensor_shape()[2];
- const int dim3 = input0.tensor_shape()[3];
+ const int batch_size = reinterpret_input_as_3d ? input0.tensor_shape()[3] : input0.tensor_shape()[2];
TensorShape output_shape{ input0.tensor_shape() };
- output_shape.set(0, dim0);
- output_shape.set(1, dim1);
- output_shape.set(2, reinterpret_output_as_3d ? depth_output_gemm3d : dim2);
- output_shape.set(3, reinterpret_output_as_3d ? dim2 : dim3);
- output_shape.set(4, reinterpret_output_as_3d ? dim3 : 1);
+ output_shape.set(0, gemm_info.n());
+ output_shape.set(1, gemm_info.m() / depth_output_gemm3d);
+ output_shape.set(2, reinterpret_output_as_3d ? depth_output_gemm3d : batch_size);
+ output_shape.set(3, reinterpret_output_as_3d ? batch_size : 1);
return output_shape;
}
diff --git a/arm_compute/runtime/CL/functions/CLGEMM.h b/arm_compute/runtime/CL/functions/CLGEMM.h
index 0bad446551..8c462fa4cb 100644
--- a/arm_compute/runtime/CL/functions/CLGEMM.h
+++ b/arm_compute/runtime/CL/functions/CLGEMM.h
@@ -27,6 +27,7 @@
#include "arm_compute/core/CL/kernels/CLGEMMMatrixAdditionKernel.h"
#include "arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyKernel.h"
#include "arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyReshapedKernel.h"
+#include "arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.h"
#include "arm_compute/core/CL/kernels/CLGEMMReshapeLHSMatrixKernel.h"
#include "arm_compute/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.h"
#include "arm_compute/runtime/CL/CLMemoryGroup.h"
@@ -40,10 +41,11 @@ class ICLTensor;
/** Basic function to execute GEMM on OpenCL. This function calls the following OpenCL kernels:
*
- * -# @ref CLGEMMReshapeLHSMatrixKernel (only if the reshaped GEMM is selected by the heuristic model)
- * -# @ref CLGEMMReshapeRHSMatrixKernel (only if the reshaped GEMM is selected by the heuristic model)
- * -# @ref CLGEMMMatrixMultiplyKernel (if GPU target is NOT G76 or if the reshaped GEMM is NOT selected)
- * -# @ref CLGEMMMatrixMultiplyReshapedKernel (only if the reshaped GEMM is selected by the heuristic model and the GPU target IS Mali-G76)
+ * -# @ref CLGEMMReshapeLHSMatrixKernel (only if the RESHAPED_V1 is selected by the heuristic model)
+ * -# @ref CLGEMMReshapeRHSMatrixKernel (only if either the RESHAPED_V1 or RESHAPED_ONLY_RHS is selected by the select_gemm_type method())
+ * -# @ref CLGEMMMatrixMultiplyKernel (only if either the NATIVE or RESHAPED_V1 is selected by the select_gemm_type method())
+ * -# @ref CLGEMMMatrixMultiplyReshapedKernel (only if RESHAPED_V1 is selected by the select_gemm_type method())
+ * -# @ref CLGEMMMatrixMultiplyReshapedOnlyRHSKernel (only if RESHAPED_ONLY_RHS is selected by the select_gemm_type method())
* -# @ref CLGEMMMatrixAdditionKernel (if c != nullptr and beta != 0.0)
*
*/
@@ -102,20 +104,41 @@ public:
void prepare() override;
private:
- CLMemoryGroup _memory_group;
- CLGEMMMatrixMultiplyKernel _mm_kernel;
- CLGEMMMatrixAdditionKernel _ma_kernel;
- CLGEMMReshapeLHSMatrixKernel _reshape_lhs_kernel;
- CLGEMMReshapeRHSMatrixKernel _reshape_rhs_kernel;
- CLGEMMMatrixMultiplyReshapedKernel _mm_reshaped_kernel;
- CLTensor _tmp_a;
- CLTensor _tmp_b;
- const ICLTensor *_original_b;
- bool _is_interleaved_transposed;
- bool _run_addition;
- bool _reshape_b_only_on_first_run;
- bool _is_prepared;
- bool _is_new_gemm_reshaped; // Remove when COMPMID-1892 is completed
+ enum class GEMMType
+ {
+ NATIVE,
+ RESHAPED_V1,
+ RESHAPED_V2,
+ RESHAPED_ONLY_RHS
+ };
+
+ // TODO (COMPMID-2095)
+ static GEMMType select_gemm_type(unsigned int m, unsigned int n, unsigned int k, DataType data_type, bool reshape_b_only_on_first_run, GPUTarget gpu_target);
+
+ void configure_native(const ICLTensor *a, const ICLTensor *b, const ICLTensor *c, ICLTensor *output, float alpha, float beta, const GEMMInfo &gemm_info);
+ void configure_reshaped_v1(const ICLTensor *a, const ICLTensor *b, const ICLTensor *c, ICLTensor *output, float alpha, float beta, const GEMMInfo &gemm_info);
+ void configure_reshaped_v2(const ICLTensor *a, const ICLTensor *b, const ICLTensor *c, ICLTensor *output, float alpha, float beta, const GEMMInfo &gemm_info);
+ void configure_reshaped_only_rhs(const ICLTensor *a, const ICLTensor *b, const ICLTensor *c, ICLTensor *output, float alpha, float beta, const GEMMInfo &gemm_info);
+
+ static Status validate_native(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *output, float alpha, float beta, const GEMMInfo &gemm_info);
+ static Status validate_reshaped_v1(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *output, float alpha, float beta, const GEMMInfo &gemm_info);
+ static Status validate_reshaped_v2(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *output, float alpha, float beta, const GEMMInfo &gemm_info);
+ static Status validate_reshaped_only_rhs(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *output, float alpha, float beta, const GEMMInfo &gemm_info);
+
+ CLMemoryGroup _memory_group;
+ CLGEMMMatrixMultiplyKernel _mm_kernel;
+ CLGEMMMatrixAdditionKernel _ma_kernel;
+ CLGEMMReshapeLHSMatrixKernel _reshape_lhs_kernel;
+ CLGEMMReshapeRHSMatrixKernel _reshape_rhs_kernel;
+ CLGEMMMatrixMultiplyReshapedKernel _mm_reshaped_kernel;
+ CLGEMMMatrixMultiplyReshapedOnlyRHSKernel _mm_reshaped_only_rhs_kernel;
+ CLTensor _tmp_a;
+ CLTensor _tmp_b;
+ const ICLTensor *_original_b;
+ bool _run_addition;
+ bool _reshape_b_only_on_first_run;
+ bool _is_prepared;
+ GEMMType _gemm_type;
};
} // namespace arm_compute