aboutsummaryrefslogtreecommitdiff
path: root/src/backends/neon/workloads
diff options
context:
space:
mode:
Diffstat (limited to 'src/backends/neon/workloads')
-rw-r--r--src/backends/neon/workloads/NeonConvolution2dWorkload.cpp25
-rw-r--r--src/backends/neon/workloads/NeonConvolution2dWorkload.hpp10
2 files changed, 30 insertions, 5 deletions
diff --git a/src/backends/neon/workloads/NeonConvolution2dWorkload.cpp b/src/backends/neon/workloads/NeonConvolution2dWorkload.cpp
index 83f761158a..d35b9685be 100644
--- a/src/backends/neon/workloads/NeonConvolution2dWorkload.cpp
+++ b/src/backends/neon/workloads/NeonConvolution2dWorkload.cpp
@@ -59,8 +59,10 @@ arm_compute::Status NeonConvolution2dWorkloadValidate(const TensorInfo& input,
}
NeonConvolution2dWorkload::NeonConvolution2dWorkload(
- const Convolution2dQueueDescriptor& descriptor, const WorkloadInfo& info,
- std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager)
+ const Convolution2dQueueDescriptor& descriptor,
+ const WorkloadInfo& info,
+ std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager,
+ const bool isFastMathEnabled)
: BaseWorkload<Convolution2dQueueDescriptor>(descriptor, info)
{
using arm_compute::NEDirectConvolutionLayer;
@@ -97,7 +99,19 @@ NeonConvolution2dWorkload::NeonConvolution2dWorkload(
&output,
padStrideInfo,
arm_compute::WeightsInfo(),
- aclDilationInfo);
+ aclDilationInfo,
+ arm_compute::ActivationLayerInfo(),
+ isFastMathEnabled);
+
+ m_ConvolutionMethod =
+ convolutionLayer->get_convolution_method(input.info(),
+ m_KernelTensor->info(),
+ output.info(),
+ padStrideInfo,
+ arm_compute::WeightsInfo(),
+ aclDilationInfo,
+ arm_compute::ActivationLayerInfo(),
+ isFastMathEnabled);
m_ConvolutionLayer.reset(convolutionLayer.release());
@@ -120,6 +134,11 @@ void NeonConvolution2dWorkload::Execute() const
m_ConvolutionLayer->run();
}
+arm_compute::ConvolutionMethod NeonConvolution2dWorkload::GetConvolutionMethod() const
+{
+ return m_ConvolutionMethod;
+}
+
void NeonConvolution2dWorkload::FreeUnusedTensors()
{
FreeTensorIfUnused(m_KernelTensor);
diff --git a/src/backends/neon/workloads/NeonConvolution2dWorkload.hpp b/src/backends/neon/workloads/NeonConvolution2dWorkload.hpp
index 54e08a2042..860d78ba7e 100644
--- a/src/backends/neon/workloads/NeonConvolution2dWorkload.hpp
+++ b/src/backends/neon/workloads/NeonConvolution2dWorkload.hpp
@@ -28,17 +28,23 @@ class NeonConvolution2dWorkload : public BaseWorkload<Convolution2dQueueDescript
public:
using BaseWorkload<Convolution2dQueueDescriptor>::m_Data;
- NeonConvolution2dWorkload(const Convolution2dQueueDescriptor& descriptor, const WorkloadInfo& info,
- std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager);
+ NeonConvolution2dWorkload(const Convolution2dQueueDescriptor& descriptor,
+ const WorkloadInfo& info,
+ std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager,
+ const bool isFastMathENabled = false);
void Execute() const override;
+ arm_compute::ConvolutionMethod GetConvolutionMethod() const;
+
private:
std::unique_ptr<arm_compute::IFunction> m_ConvolutionLayer;
std::unique_ptr<arm_compute::Tensor> m_KernelTensor;
std::unique_ptr<arm_compute::Tensor> m_BiasTensor;
+ arm_compute::ConvolutionMethod m_ConvolutionMethod;
+
void FreeUnusedTensors();
};