diff options
Diffstat (limited to 'src/backends/cl/workloads/ClConvertFp32ToFp16Workload.cpp')
-rw-r--r-- | src/backends/cl/workloads/ClConvertFp32ToFp16Workload.cpp | 46 |
1 files changed, 45 insertions, 1 deletions
diff --git a/src/backends/cl/workloads/ClConvertFp32ToFp16Workload.cpp b/src/backends/cl/workloads/ClConvertFp32ToFp16Workload.cpp index 017fcaf454..a44a80c997 100644 --- a/src/backends/cl/workloads/ClConvertFp32ToFp16Workload.cpp +++ b/src/backends/cl/workloads/ClConvertFp32ToFp16Workload.cpp @@ -25,9 +25,13 @@ ClConvertFp32ToFp16Workload::ClConvertFp32ToFp16Workload( arm_compute::ICLTensor& input = static_cast<IClTensorHandle*>(this->m_Data.m_Inputs[0])->GetTensor(); arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(this->m_Data.m_Outputs[0])->GetTensor(); + // Create Proxy tensor and set the initial tensor handle to it + m_InputProxy = std::make_unique<ICLTensorProxy>(&input); + m_OutputProxy = std::make_unique<ICLTensorProxy>(&output); + { ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "ClConvertFp32ToFp16Workload_configure"); - m_Layer.configure(clCompileContext, &input, &output, g_AclConvertPolicy, 0); + m_Layer.configure(clCompileContext, m_InputProxy.get(), m_OutputProxy.get(), g_AclConvertPolicy, 0); } } @@ -57,5 +61,45 @@ arm_compute::Status ClConvertFp32ToFp16WorkloadValidate(const TensorInfo& input, return aclStatus; } +void ClConvertFp32ToFp16Workload::ReplaceInputTensorHandle(ITensorHandle* tensorHandle, unsigned int slot) +{ + ITensorHandle* backupHandle = this->m_Data.m_Inputs[slot]; + this->m_Data.m_Inputs[slot] = tensorHandle; + try + { + Reconfigure(); + } + catch(armnn::UnimplementedException& e) + { + // Cannot reconfigure, revert the slot back and throw the exception. + this->m_Data.m_Inputs[slot] = backupHandle; + throw e; + } +} + +// Replace output tensor handle with the given TensorHandle +void ClConvertFp32ToFp16Workload::ReplaceOutputTensorHandle(ITensorHandle* tensorHandle, unsigned int slot) +{ + ITensorHandle* backupHandle = this->m_Data.m_Inputs[slot]; + this->m_Data.m_Inputs[slot] = tensorHandle; + try + { + Reconfigure(); + } + catch(armnn::UnimplementedException& e) + { + // Cannot reconfigure, revert the slot back and throw the exception. + this->m_Data.m_Inputs[slot] = backupHandle; + throw e; + } +} + +void ClConvertFp32ToFp16Workload::Reconfigure() +{ + arm_compute::ICLTensor& input = static_cast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor(); + m_InputProxy->set(&input); + m_OutputProxy->set(&output); +} } //namespace armnn |