diff options
Diffstat (limited to 'src/backends/cl/workloads')
-rw-r--r-- | src/backends/cl/workloads/ClConvolution2dWorkload.cpp | 24 | ||||
-rw-r--r-- | src/backends/cl/workloads/ClConvolution2dWorkload.hpp | 10 |
2 files changed, 30 insertions, 4 deletions
diff --git a/src/backends/cl/workloads/ClConvolution2dWorkload.cpp b/src/backends/cl/workloads/ClConvolution2dWorkload.cpp index 42c9903dc4..7b52f2784f 100644 --- a/src/backends/cl/workloads/ClConvolution2dWorkload.cpp +++ b/src/backends/cl/workloads/ClConvolution2dWorkload.cpp @@ -59,7 +59,9 @@ arm_compute::Status ClConvolution2dWorkloadValidate(const TensorInfo& input, } ClConvolution2dWorkload::ClConvolution2dWorkload(const Convolution2dQueueDescriptor& descriptor, - const WorkloadInfo& info, std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager) + const WorkloadInfo& info, + std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager, + const bool isFastMathEnabled) : BaseWorkload<Convolution2dQueueDescriptor>(descriptor, info) , m_ConvolutionLayer(memoryManager) { @@ -95,7 +97,20 @@ ClConvolution2dWorkload::ClConvolution2dWorkload(const Convolution2dQueueDescrip &output, padStrideInfo, arm_compute::WeightsInfo(), - aclDilationInfo); + aclDilationInfo, + arm_compute::ActivationLayerInfo(), + isFastMathEnabled); + + m_ConvolutionMethod = + m_ConvolutionLayer.get_convolution_method(input.info(), + m_KernelTensor->info(), + output.info(), + padStrideInfo, + arm_compute::WeightsInfo(), + arm_compute::ActivationLayerInfo(), + arm_compute::CLScheduler::get().target(), + aclDilationInfo, + isFastMathEnabled); InitializeArmComputeClTensorData(*m_KernelTensor, m_Data.m_Weight); @@ -116,6 +131,11 @@ void ClConvolution2dWorkload::Execute() const RunClFunction(m_ConvolutionLayer, CHECK_LOCATION()); } +arm_compute::ConvolutionMethod ClConvolution2dWorkload::GetConvolutionMethod() const +{ + return m_ConvolutionMethod; +} + void ClConvolution2dWorkload::FreeUnusedTensors() { FreeTensorIfUnused(m_KernelTensor); diff --git a/src/backends/cl/workloads/ClConvolution2dWorkload.hpp b/src/backends/cl/workloads/ClConvolution2dWorkload.hpp index 8b0afada36..f769422a0a 100644 --- a/src/backends/cl/workloads/ClConvolution2dWorkload.hpp +++ b/src/backends/cl/workloads/ClConvolution2dWorkload.hpp @@ -28,16 +28,22 @@ arm_compute::Status ClConvolution2dWorkloadValidate(const TensorInfo& input, class ClConvolution2dWorkload : public BaseWorkload<Convolution2dQueueDescriptor> { public: - ClConvolution2dWorkload(const Convolution2dQueueDescriptor& descriptor, const WorkloadInfo& info, - std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager); + ClConvolution2dWorkload(const Convolution2dQueueDescriptor& descriptor, + const WorkloadInfo& info, + std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager, + const bool isFastMathEnabled = false); void Execute() const override; + arm_compute::ConvolutionMethod GetConvolutionMethod() const; + private: mutable arm_compute::CLConvolutionLayer m_ConvolutionLayer; std::unique_ptr<arm_compute::CLTensor> m_KernelTensor; std::unique_ptr<arm_compute::CLTensor> m_BiasTensor; + arm_compute::ConvolutionMethod m_ConvolutionMethod; + void FreeUnusedTensors(); }; |