aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNikhil Raj <nikhil.raj@arm.com>2023-06-26 11:52:40 +0100
committerColm Donelan <colm.donelan@arm.com>2023-06-26 22:02:38 +0000
commitd29d09de2bcee68d0820e6ee7549033b05c6c469 (patch)
tree4044d4810fde6f1609354838ad0a920a9388a7e1
parented928a91a80db4f013995a558433012b1dfc03ea (diff)
downloadarmnn-d29d09de2bcee68d0820e6ee7549033b05c6c469.tar.gz
Update ACL pin to c952596e70f2fe0073029f053e329a4e930ced8c
* activationInfo passed in directly to configure() rather than part of matMulInfo Signed-off-by: Nikhil Raj <nikhil.raj@arm.com> Change-Id: I546def1c1e1cabaf50629f7d78ae0ba459766ed4
-rwxr-xr-xscripts/get_compute_library.sh2
-rw-r--r--src/backends/cl/workloads/ClBatchMatMulWorkload.cpp8
-rw-r--r--src/backends/neon/workloads/NeonBatchMatMulWorkload.cpp7
3 files changed, 8 insertions, 9 deletions
diff --git a/scripts/get_compute_library.sh b/scripts/get_compute_library.sh
index 89ff764c57..731ab7e1ce 100755
--- a/scripts/get_compute_library.sh
+++ b/scripts/get_compute_library.sh
@@ -10,7 +10,7 @@ CMD=$( basename "$0" )
#DEFAULT_CLFRAMEWORKREVISION="branches/arm_compute_23_05" # Release 23.05
#
# For pinning to a revision use this:
-DEFAULT_CLFRAMEWORKREVISION="043613fbb199e2c4fdd12c2c9a1785db9b0c45fa" #9743: Break up Utils.h a bit to reduce unused code being included everywhere
+DEFAULT_CLFRAMEWORKREVISION="c952596e70f2fe0073029f053e329a4e930ced8c" #9819: Implement FP32/FP16 MatMul NT/T kernel using the MMUL extension
usage() {
echo -e "get_compute_library.sh: Clones the Arm Compute Library (ACL) repo from the ML Platform server and checks out
diff --git a/src/backends/cl/workloads/ClBatchMatMulWorkload.cpp b/src/backends/cl/workloads/ClBatchMatMulWorkload.cpp
index cfed1b83a1..313c3453a5 100644
--- a/src/backends/cl/workloads/ClBatchMatMulWorkload.cpp
+++ b/src/backends/cl/workloads/ClBatchMatMulWorkload.cpp
@@ -50,9 +50,8 @@ arm_compute::Status ClBatchMatMulValidate(const TensorInfo& inputInfoX,
arm_compute::MatMulInfo matMulInfo;
matMulInfo.adj_lhs(descriptor.m_TransposeX);
matMulInfo.adj_rhs(descriptor.m_TransposeY);
- matMulInfo.fused_activation(activationInfo);
- return arm_compute::CLMatMul::validate(&aclInputInfoX, &aclInputInfoY, &aclOutputInfo, matMulInfo);
+ return arm_compute::CLMatMul::validate(&aclInputInfoX, &aclInputInfoY, &aclOutputInfo, matMulInfo, activationInfo);
}
ClBatchMatMulWorkload::ClBatchMatMulWorkload(const BatchMatMulQueueDescriptor& descriptor,
@@ -92,9 +91,10 @@ ClBatchMatMulWorkload::ClBatchMatMulWorkload(const BatchMatMulQueueDescriptor& d
arm_compute::MatMulInfo matMulInfo;
matMulInfo.adj_lhs(descriptor.m_Parameters.m_TransposeX);
matMulInfo.adj_rhs(descriptor.m_Parameters.m_TransposeY);
- matMulInfo.fused_activation(activationInfo);
- m_MatMulLayer.configure(clCompileContext, &inputX, &inputY, &output, matMulInfo);
+ arm_compute::GpuMatMulSettings settings;
+
+ m_MatMulLayer.configure(clCompileContext, &inputX, &inputY, &output, matMulInfo, settings, activationInfo);
// Report Profiling Details
WorkloadInfo detailsInfo;
diff --git a/src/backends/neon/workloads/NeonBatchMatMulWorkload.cpp b/src/backends/neon/workloads/NeonBatchMatMulWorkload.cpp
index 628e314046..7f2b6eaf99 100644
--- a/src/backends/neon/workloads/NeonBatchMatMulWorkload.cpp
+++ b/src/backends/neon/workloads/NeonBatchMatMulWorkload.cpp
@@ -46,12 +46,12 @@ arm_compute::Status NeonBatchMatMulValidate(const TensorInfo& inputInfoX,
arm_compute::MatMulInfo matMulInfo;
matMulInfo.adj_lhs(descriptor.m_TransposeX);
matMulInfo.adj_rhs(descriptor.m_TransposeY);
- matMulInfo.fused_activation(activationInfo);
arm_compute::CpuMatMulSettings settings;
settings.fast_math(isFastMathEnabled);
- return arm_compute::NEMatMul::validate(&aclInputInfoX, &aclInputInfoY, &aclOutputInfo, matMulInfo, settings);
+ return arm_compute::NEMatMul::validate(&aclInputInfoX, &aclInputInfoY, &aclOutputInfo, matMulInfo, settings,
+ activationInfo);
}
NeonBatchMatMulWorkload::NeonBatchMatMulWorkload(const BatchMatMulQueueDescriptor& descriptor,
@@ -84,12 +84,11 @@ NeonBatchMatMulWorkload::NeonBatchMatMulWorkload(const BatchMatMulQueueDescripto
arm_compute::MatMulInfo matMulInfo;
matMulInfo.adj_lhs(descriptor.m_Parameters.m_TransposeX);
matMulInfo.adj_rhs(descriptor.m_Parameters.m_TransposeY);
- matMulInfo.fused_activation(activationInfo);
arm_compute::CpuMatMulSettings settings;
settings.fast_math(isFastMathEnabled);
- m_MatMulLayer.configure(&inputX, &inputY, &output, matMulInfo, settings);
+ m_MatMulLayer.configure(&inputX, &inputY, &output, matMulInfo, settings, activationInfo);
// Report Profiling Details
WorkloadInfo detailsInfo;