aboutsummaryrefslogtreecommitdiff
path: root/src/backends/neon/workloads/NeonConvertFp32ToFp16Workload.hpp
diff options
context:
space:
mode:
authorMatthew Bentham <Matthew.Bentham@arm.com>2023-04-27 12:13:50 +0000
committerMatthew Bentham <matthew.bentham@arm.com>2023-04-28 14:56:58 +0000
commit34336f9378eca4d39913ac0d9ba411a494631ad5 (patch)
tree551e3904ad77c4a15f431516109d55a5cdd63e2a /src/backends/neon/workloads/NeonConvertFp32ToFp16Workload.hpp
parent6c53f9fbea7d0b8786e1d29b850ab7bed85e167a (diff)
downloadarmnn-34336f9378eca4d39913ac0d9ba411a494631ad5.tar.gz
Make Convert workloads use arm_compute::NECast in CpuAcc backend
NECast can use conversion instructions where they are available so this should in general be faster. Signed-off-by: Matthew Bentham <Matthew.Bentham@arm.com> Change-Id: I3f259e17b280a4f4c36f363965ffbc8ee8c4c29f
Diffstat (limited to 'src/backends/neon/workloads/NeonConvertFp32ToFp16Workload.hpp')
-rw-r--r--src/backends/neon/workloads/NeonConvertFp32ToFp16Workload.hpp7
1 files changed, 6 insertions, 1 deletions
diff --git a/src/backends/neon/workloads/NeonConvertFp32ToFp16Workload.hpp b/src/backends/neon/workloads/NeonConvertFp32ToFp16Workload.hpp
index 666f48794b..c6fed76e6d 100644
--- a/src/backends/neon/workloads/NeonConvertFp32ToFp16Workload.hpp
+++ b/src/backends/neon/workloads/NeonConvertFp32ToFp16Workload.hpp
@@ -5,13 +5,17 @@
#pragma once
+#include <arm_compute/runtime/NEON/functions/NECast.h>
#include <armnn/backends/Workload.hpp>
#include <armnn/backends/WorkloadData.hpp>
+#include <memory>
#include <neon/workloads/NeonWorkloadUtils.hpp>
namespace armnn
{
+arm_compute::Status NeonConvertFp32ToFp16WorkloadValidate(const TensorInfo& input, const TensorInfo& output);
+
class NeonConvertFp32ToFp16Workload : public Float32ToFloat16Workload<ConvertFp32ToFp16QueueDescriptor>
{
public:
@@ -23,9 +27,10 @@ public:
// Replace output tensor handle with the given TensorHandle
void ReplaceOutputTensorHandle(ITensorHandle* tensorHandle, unsigned int slot) override;
private:
+ virtual void Reconfigure();
using TensorHandlePair = std::pair<const ITensorHandle*, ITensorHandle*>;
std::vector<TensorHandlePair> m_TensorHandlePairs;
- virtual void Reconfigure();
+ mutable std::unique_ptr<arm_compute::NECast> m_Cast;
};
} //namespace armnn