aboutsummaryrefslogtreecommitdiff
path: root/src/runtime/NEON
diff options
context:
space:
mode:
authorJonathan Deakin <jonathan.deakin@arm.com>2023-01-12 11:41:14 +0000
committerJonathan Deakin <jonathan.deakin@arm.com>2023-02-01 08:05:35 +0000
commit464ed2087c2ce2d2e741cc1e1dc4bd49d06e7d26 (patch)
treeda07a18be246742773a729e264080d9a9b314d59 /src/runtime/NEON
parent7594f989963724e127c3e28210d60fed590b0524 (diff)
downloadComputeLibrary-464ed2087c2ce2d2e741cc1e1dc4bd49d06e7d26.tar.gz
Remove fixed format strides hack
- Remove hack in CpuGemmAssemblyDispatch.cpp which tried to guess strides for fixed format kernels. Instead, expect that strides will have been correctly set on weights externally - Update fixed format test fixtures to set the strides - If the fixed format uses fast math mode, then weights should be of type BFLOAT16. Change the validation logic to accept this. Resolves: [ONCPUML-1131] Co-authored-by: Milos Puzovic <Milos.Puzovic@arm.com> Change-Id: I0f18d8b86b0f639be25fd122fa06a591e90645f2 Signed-off-by: Jonathan Deakin <jonathan.deakin@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/8985 Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Gunes Bayir <gunes.bayir@arm.com> Benchmark: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/runtime/NEON')
-rw-r--r--src/runtime/NEON/functions/NEFullyConnectedLayer.cpp9
1 files changed, 5 insertions, 4 deletions
diff --git a/src/runtime/NEON/functions/NEFullyConnectedLayer.cpp b/src/runtime/NEON/functions/NEFullyConnectedLayer.cpp
index 4f858fb54b..919e5ed84f 100644
--- a/src/runtime/NEON/functions/NEFullyConnectedLayer.cpp
+++ b/src/runtime/NEON/functions/NEFullyConnectedLayer.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2022 Arm Limited.
+ * Copyright (c) 2017-2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -69,7 +69,8 @@ void NEFullyConnectedLayer::configure(const ITensor *input, const ITensor *weigh
weights->info(),
biases != nullptr ? biases->info() : nullptr,
output->info(),
- fc_info));
+ fc_info,
+ weights_info));
ARM_COMPUTE_LOG_PARAMS(input, weights, biases, output, fc_info);
_impl->op = std::make_unique<cpu::CpuFullyConnected>();
@@ -96,9 +97,9 @@ Status NEFullyConnectedLayer::has_opt_impl(arm_compute::WeightFormat &expected_w
}
Status NEFullyConnectedLayer::validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output,
- FullyConnectedLayerInfo fc_info)
+ FullyConnectedLayerInfo fc_info, const WeightsInfo &weights_info)
{
- return cpu::CpuFullyConnected::validate(input, weights, biases, output, fc_info);
+ return cpu::CpuFullyConnected::validate(input, weights, biases, output, fc_info, weights_info);
}
void NEFullyConnectedLayer::run()