From a3f4fbaf9ce6e30b3d1337bdfbb47b7301f97d1d Mon Sep 17 00:00:00 2001 From: Cathal Corbett Date: Mon, 21 Mar 2022 09:27:08 +0000 Subject: IVGCVSW-6732 Tests surrounded in '#if defined(ARMNNREF_ENABLED)' in android-nn-driver do not execute. * Change to src/backends/cl/workloads/ClLstmFloatWorkload.cpp fix LstmTests_GpuAcc tests. * Change to src/backends/cl/workloads/ClConvertFp16ToFp32Workload.hpp & ClConvertFp32ToFp16Workload.hpp fix MeanTests_GpuAcc and Convolution2DTests_1.1 tests. * Added UnitTests to src/backends/cl/test/ClImportTensorHandleTests.cpp to test import on Convert Layers. !android-nn-driver:7264 Signed-off-by: Cathal Corbett Change-Id: I0c46dc4b9c54eca8771ab12ed0302b6224606957 --- include/armnn/INetwork.hpp | 20 ++ src/armnn/LoadedNetwork.cpp | 2 +- src/armnn/Network.cpp | 32 +- src/armnn/Network.hpp | 4 + src/backends/cl/test/ClImportTensorHandleTests.cpp | 399 +++++++++++++++++++++ .../cl/workloads/ClConvertFp16ToFp32Workload.cpp | 6 +- .../cl/workloads/ClConvertFp32ToFp16Workload.cpp | 6 +- src/backends/cl/workloads/ClLstmFloatWorkload.cpp | 2 +- 8 files changed, 459 insertions(+), 12 deletions(-) diff --git a/include/armnn/INetwork.hpp b/include/armnn/INetwork.hpp index d962fe715b..eaec973899 100644 --- a/include/armnn/INetwork.hpp +++ b/include/armnn/INetwork.hpp @@ -810,6 +810,11 @@ protected: const IDeviceSpec& deviceSpec, const OptimizerOptions& options, Optional&> messages); + friend IOptimizedNetworkPtr Optimize(const Graph& inGraph, + const std::vector& backendPreferences, + const IDeviceSpec& deviceSpec, + const OptimizerOptions& options, + Optional&> messages); IOptimizedNetwork(std::unique_ptr graph, const ModelOptions& modelOptions); @@ -830,4 +835,19 @@ IOptimizedNetworkPtr Optimize(const INetwork& network, const IDeviceSpec& deviceSpec, const OptimizerOptions& options = OptimizerOptions(), Optional&> messages = EmptyOptional()); + +/// Create an optimized version of the network +/// @param inGraph Graph to be optimized. +/// @param backendPreferences The choice of the backend ordered by user preferences. +/// @param deviceSpec DeviceSpec object as queried from the runtime. See IRuntime::GetDeviceSpec() +/// @param messages If there are failures or warnings a string describing same will be added to the vector +/// @param options OptimizerOptions object with optimizer configuration options +/// @return An IOptimizedNetworkPtr interface to the optimized network, throws an exception derived from +/// armnn::Exception if process fails. + +IOptimizedNetworkPtr Optimize(const Graph& inGraph, + const std::vector& backendPreferences, + const IDeviceSpec& deviceSpec, + const OptimizerOptions& options, + Optional&> messages = EmptyOptional()); } //namespace armnn diff --git a/src/armnn/LoadedNetwork.cpp b/src/armnn/LoadedNetwork.cpp index 6d08fc321e..0e577354f0 100644 --- a/src/armnn/LoadedNetwork.cpp +++ b/src/armnn/LoadedNetwork.cpp @@ -1456,7 +1456,7 @@ std::vector LoadedNetwork::ImportOutputs(const OutputTensors& { throw MemoryImportException("ImportOutputs: Force Import failed, incorrect number of tensors"); } - std::vector importedOutputs; + std::vector importedOutputs; Graph& graph = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().TopologicalSort(); unsigned int outputIndex = 0; diff --git a/src/armnn/Network.cpp b/src/armnn/Network.cpp index 339da0d1b8..a3655509fb 100644 --- a/src/armnn/Network.cpp +++ b/src/armnn/Network.cpp @@ -1658,7 +1658,7 @@ OptimizationResult SelectTensorHandleStrategy(Graph& optGraph, return result; } -IOptimizedNetworkPtr Optimize(const INetwork& inNetwork, +IOptimizedNetworkPtr Optimize(const Graph& inGraph, const std::vector& backendPreferences, const IDeviceSpec& deviceSpec, const OptimizerOptions& options, @@ -1667,7 +1667,7 @@ IOptimizedNetworkPtr Optimize(const INetwork& inNetwork, ARMNN_LOG(debug) << options.ToString(); // Enable profiling - auto profiler = inNetwork.pNetworkImpl->GetGraph().GetProfiler(); + auto profiler = inGraph.GetProfiler(); ProfilerManager::GetInstance().RegisterProfiler(profiler.get()); profiler->EnableProfiling(options.m_ProfilingEnabled); @@ -1683,9 +1683,9 @@ IOptimizedNetworkPtr Optimize(const INetwork& inNetwork, } // Ensure TensorInfo is set on all output slots of ConstantLayers in the graph - inNetwork.pNetworkImpl->GetGraph().VerifyConstantLayerSetTensorInfo(); + inGraph.VerifyConstantLayerSetTensorInfo(); - std::unique_ptr graph = std::make_unique(inNetwork.pNetworkImpl->GetGraph()); + std::unique_ptr graph = std::make_unique(inGraph); auto optNet = IOptimizedNetworkPtr(new IOptimizedNetwork(std::move(graph), options.m_ModelOptions), &IOptimizedNetwork::Destroy); @@ -1827,6 +1827,20 @@ IOptimizedNetworkPtr Optimize(const INetwork& inNetwork, } return optNet; } + +IOptimizedNetworkPtr Optimize(const INetwork& inNetwork, + const std::vector& backendPreferences, + const IDeviceSpec& deviceSpec, + const OptimizerOptions& options, + Optional&> messages) +{ + return Optimize(inNetwork.pNetworkImpl->GetGraph(), + backendPreferences, + deviceSpec, + options, + messages); +} + bool NetworkImpl::GetShapeInferenceMethod() { if (m_NetworkOptions.size() > 0 && m_NetworkOptions[0].GetBackendId().Get() == "ShapeInferenceMethod") @@ -2000,6 +2014,16 @@ IConnectableLayer* NetworkImpl::AddConvolution2dLayerImpl(const Convolution2dDes return layer; } +IConnectableLayer* NetworkImpl::AddConvertFp16ToFp32Layer(const char* name) +{ + return m_Graph->AddLayer(name); +} + +IConnectableLayer* NetworkImpl::AddConvertFp32ToFp16Layer(const char* name) +{ + return m_Graph->AddLayer(name); +} + IConnectableLayer* NetworkImpl::AddConvolution2dLayer(const Convolution2dDescriptor& convolution2dDescriptor, const ConstTensor& weights, const Optional& biases, diff --git a/src/armnn/Network.hpp b/src/armnn/Network.hpp index 1d88f267d1..fffad86b80 100644 --- a/src/armnn/Network.hpp +++ b/src/armnn/Network.hpp @@ -238,6 +238,10 @@ public: const LstmInputParams& params, const char* name = nullptr); + IConnectableLayer* AddConvertFp16ToFp32Layer(const char* name = nullptr); + + IConnectableLayer* AddConvertFp32ToFp16Layer(const char* name = nullptr); + ARMNN_NO_DEPRECATE_WARN_BEGIN void Accept(ILayerVisitor& visitor) const; ARMNN_NO_DEPRECATE_WARN_END diff --git a/src/backends/cl/test/ClImportTensorHandleTests.cpp b/src/backends/cl/test/ClImportTensorHandleTests.cpp index 5fac9d13b1..e10e81ac26 100644 --- a/src/backends/cl/test/ClImportTensorHandleTests.cpp +++ b/src/backends/cl/test/ClImportTensorHandleTests.cpp @@ -14,6 +14,7 @@ #include #include +#include "Network.hpp" using namespace armnn; @@ -427,6 +428,404 @@ TEST_CASE_FIXTURE(ClContextControlFixture, "ClForceImportConv2dEndToEnd") CHECK(std::equal(outputResult, outputResult + numElements, expectedOutput.begin(), expectedOutput.end())); } +TEST_CASE_FIXTURE(ClContextControlFixture, "ClForceImportConvertFp16toFp32EndToEnd") +{ + using namespace half_float::literal; + + // Create runtime in which test will run + IRuntime::CreationOptions options; + IRuntimePtr runtime(armnn::IRuntime::Create(options)); + + // build up the structure of the network + NetworkImpl network; + + armnn::TensorInfo inputInfo({1, 3, 2, 3}, armnn::DataType::Float16); + armnn::TensorInfo outputTensorInfo({1, 3, 2, 3}, armnn::DataType::Float32); + + std::vector expectedOutput = + { + -37.5f, -15.2f, -8.76f, -2.0f, -1.5f, -1.3f, -0.5f, -0.4f, 0.0f, + 1.0f, 0.4f, 0.5f, 1.3f, 1.5f, 2.0f, 8.76f, 15.2f, 37.5f + }; + + unsigned int numElements = inputInfo.GetNumElements(); + size_t totalBytesInput = numElements * sizeof(Half); + size_t totalBytesOutput = numElements * sizeof(float); + + IConnectableLayer* const inputLayer = network.AddInputLayer(0, "input"); + ARMNN_ASSERT(inputLayer); + + armnn::IConnectableLayer* const convLayer = network.AddConvertFp16ToFp32Layer("convert"); + ARMNN_ASSERT(convLayer); + + inputLayer->GetOutputSlot(0).Connect(convLayer->GetInputSlot(0)); + inputLayer->GetOutputSlot(0).SetTensorInfo(inputInfo); + + IConnectableLayer* output = network.AddOutputLayer(0, "output"); + convLayer->GetOutputSlot(0).Connect(output->GetInputSlot(0)); + convLayer->GetOutputSlot(0).SetTensorInfo(outputTensorInfo); + + // Optimize the network + OptimizerOptions optOptions; + optOptions.m_ImportEnabled = false; + std::vector backends = {armnn::Compute::GpuAcc}; + IOptimizedNetworkPtr optNet = Optimize(network.GetGraph(), backends, runtime->GetDeviceSpec(), optOptions); + CHECK(optNet); + + // Loads it into the runtime. + NetworkId netId; + std::string ignoredErrorMessage; + // Enable Importing + INetworkProperties networkProperties(false, MemorySource::Undefined, MemorySource::Undefined); + runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties); + + // Creates structures for input & output + const size_t alignment = + arm_compute::CLKernelLibrary::get().get_device().getInfo(); + size_t spaceInput = totalBytesInput + alignment + alignment; + size_t spaceOutput = totalBytesOutput + alignment + alignment; + auto inputData = std::make_unique(spaceInput); + void* alignedInputPtr = inputData.get(); + CHECK(std::align(alignment, totalBytesInput, alignedInputPtr, spaceInput)); + + // Input with negative values + auto* inputPtr = reinterpret_cast(alignedInputPtr); + inputPtr[0] = -37.5_h; + inputPtr[1] = -15.2_h; + inputPtr[2] = -8.76_h; + inputPtr[3] = -2.0_h; + inputPtr[4] = -1.5_h; + inputPtr[5] = -1.3_h; + inputPtr[6] = -0.5_h; + inputPtr[7] = -0.4_h; + inputPtr[8] = 0.0_h; + inputPtr[9] = 1.0_h; + inputPtr[10] = 0.4_h; + inputPtr[11] = 0.5_h; + inputPtr[12] = 1.3_h; + inputPtr[13] = 1.5_h; + inputPtr[14] = 2.0_h; + inputPtr[15] = 8.76_h; + inputPtr[16] = 15.2_h; + inputPtr[17] = 37.5_h; + + auto outputData = std::make_unique(spaceOutput); + void* alignedOutputPtr = outputData.get(); + CHECK(std::align(alignment, totalBytesOutput, alignedOutputPtr, spaceOutput)); + auto* outputPtr = reinterpret_cast(alignedOutputPtr); + std::fill_n(outputPtr, numElements, -10.0f); + + TensorInfo inputTensorInfo = runtime->GetInputTensorInfo(netId, 0); + inputTensorInfo.SetConstant(true); + InputTensors inputTensors + { + {0,armnn::ConstTensor(inputTensorInfo, alignedInputPtr)}, + }; + OutputTensors outputTensors + { + {0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), alignedOutputPtr)} + }; + + runtime->GetProfiler(netId)->EnableProfiling(true); + + INFO("Run ImportInputs"); + std::vector importedInputIds = + runtime->ImportInputs(netId, inputTensors, MemorySource::Malloc); + std::vector importedOutputIds = + runtime->ImportOutputs(netId, outputTensors, MemorySource::Malloc); + + // Do the inference + runtime->EnqueueWorkload(netId, inputTensors, outputTensors, importedInputIds, importedOutputIds); + + // Retrieve the Profiler.Print() output to get the workload execution + ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance(); + std::stringstream ss; + profilerManager.GetProfiler()->Print(ss);; + std::string dump = ss.str(); + + // Contains Convolution2dWorkload + std::size_t found = dump.find("ConvertFp16ToFp32Workload"); + CHECK(found != std::string::npos); + + // Contains SyncMemGeneric + found = dump.find("SyncMemGeneric"); + CHECK(found != std::string::npos); + + // Does not contain CopyMemGeneric + found = dump.find("CopyMemGeneric"); + CHECK(found == std::string::npos); + + runtime->UnloadNetwork(netId); + + // Check output is as expected + // Validate result by checking that the output has no negative values + auto* outputResult = reinterpret_cast(alignedOutputPtr); + CHECK(outputResult); + + // Check the output is correct + for (size_t i = 0; i < numElements; ++i) + { + DOCTEST_CHECK_MESSAGE(outputResult[i] == doctest::Approx(expectedOutput[i]).epsilon(0.0004), + "outputValue[" << i << "]: " << outputResult[i] << " != " << expectedOutput[i]); + } +} + + +TEST_CASE_FIXTURE(ClContextControlFixture, "ClForceImportConvertFp32toFp16EndToEnd") +{ + using namespace half_float::literal; + + // Create runtime in which test will run + IRuntime::CreationOptions options; + IRuntimePtr runtime(armnn::IRuntime::Create(options)); + + // build up the structure of the network + NetworkImpl network; + + armnn::TensorInfo inputInfo({1, 3, 2, 3}, armnn::DataType::Float32); + armnn::TensorInfo outputTensorInfo({1, 3, 2, 3}, armnn::DataType::Float16); + + std::vector expectedOutput = + { + -37.5_h, -15.2_h, -8.76_h, -2.0_h, -1.5_h, -1.3_h, -0.5_h, -0.4_h, 0.0_h, + 1.0_h, 0.4_h, 0.5_h, 1.3_h, 1.5_h, 2.0_h, 8.76_h, 15.2_h, 37.5_h + }; + + unsigned int numElements = inputInfo.GetNumElements(); + size_t totalBytesInput = numElements * sizeof(float); + size_t totalBytesOutput = numElements * sizeof(Half); + + IConnectableLayer* const inputLayer = network.AddInputLayer(0, "input"); + ARMNN_ASSERT(inputLayer); + + armnn::IConnectableLayer* const convLayer = network.AddConvertFp32ToFp16Layer("convert"); + ARMNN_ASSERT(convLayer); + + inputLayer->GetOutputSlot(0).Connect(convLayer->GetInputSlot(0)); + inputLayer->GetOutputSlot(0).SetTensorInfo(inputInfo); + + IConnectableLayer* output = network.AddOutputLayer(0, "output"); + convLayer->GetOutputSlot(0).Connect(output->GetInputSlot(0)); + convLayer->GetOutputSlot(0).SetTensorInfo(outputTensorInfo); + + // Optimize the network + OptimizerOptions optOptions; + optOptions.m_ImportEnabled = false; + std::vector backends = {armnn::Compute::GpuAcc}; + IOptimizedNetworkPtr optNet = Optimize(network.GetGraph(), backends, runtime->GetDeviceSpec(), optOptions); + CHECK(optNet); + + // Loads it into the runtime. + NetworkId netId; + std::string ignoredErrorMessage; + // Enable Importing + INetworkProperties networkProperties(false, MemorySource::Undefined, MemorySource::Undefined); + runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties); + + // Creates structures for input & output + const size_t alignment = + arm_compute::CLKernelLibrary::get().get_device().getInfo(); + size_t spaceInput = totalBytesInput + alignment + alignment; + size_t spaceOutput = totalBytesOutput + alignment + alignment; + auto inputData = std::make_unique(spaceInput); + void* alignedInputPtr = inputData.get(); + CHECK(std::align(alignment, totalBytesInput, alignedInputPtr, spaceInput)); + + // Input with negative values + auto* inputPtr = reinterpret_cast(alignedInputPtr); + inputPtr[0] = -37.5f; + inputPtr[1] = -15.2f; + inputPtr[2] = -8.76f; + inputPtr[3] = -2.0f; + inputPtr[4] = -1.5f; + inputPtr[5] = -1.3f; + inputPtr[6] = -0.5f; + inputPtr[7] = -0.4f; + inputPtr[8] = 0.0f; + inputPtr[9] = 1.0f; + inputPtr[10] = 0.4f; + inputPtr[11] = 0.5f; + inputPtr[12] = 1.3f; + inputPtr[13] = 1.5f; + inputPtr[14] = 2.0f; + inputPtr[15] = 8.76f; + inputPtr[16] = 15.2f; + inputPtr[17] = 37.5f; + + auto outputData = std::make_unique(spaceOutput); + void* alignedOutputPtr = outputData.get(); + CHECK(std::align(alignment, totalBytesOutput, alignedOutputPtr, spaceOutput)); + auto* outputPtr = reinterpret_cast(alignedOutputPtr); + std::fill_n(outputPtr, numElements, -10.0f); + + TensorInfo inputTensorInfo = runtime->GetInputTensorInfo(netId, 0); + inputTensorInfo.SetConstant(true); + InputTensors inputTensors + { + {0,armnn::ConstTensor(inputTensorInfo, alignedInputPtr)}, + }; + OutputTensors outputTensors + { + {0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), alignedOutputPtr)} + }; + + runtime->GetProfiler(netId)->EnableProfiling(true); + + INFO("Run ImportInputs"); + std::vector importedInputIds = + runtime->ImportInputs(netId, inputTensors, MemorySource::Malloc); + std::vector importedOutputIds = + runtime->ImportOutputs(netId, outputTensors, MemorySource::Malloc); + + // Do the inference + runtime->EnqueueWorkload(netId, inputTensors, outputTensors, importedInputIds, importedOutputIds); + + // Retrieve the Profiler.Print() output to get the workload execution + ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance(); + std::stringstream ss; + profilerManager.GetProfiler()->Print(ss);; + std::string dump = ss.str(); + + // Contains Convolution2dWorkload + std::size_t found = dump.find("ConvertFp32ToFp16Workload"); + CHECK(found != std::string::npos); + + // Contains SyncMemGeneric + found = dump.find("SyncMemGeneric"); + CHECK(found != std::string::npos); + + // Does not contain CopyMemGeneric + found = dump.find("CopyMemGeneric"); + CHECK(found == std::string::npos); + + runtime->UnloadNetwork(netId); + + // Check output is as expected + // Validate result by checking that the output has no negative values + auto* outputResult = reinterpret_cast(alignedOutputPtr); + CHECK(outputResult); + + // Check the output is correct + CHECK(std::equal(outputResult, outputResult + numElements, expectedOutput.begin(), expectedOutput.end())); +} + +TEST_CASE_FIXTURE(ClContextControlFixture, "ClForceImportSimpleConvertFp32toFp16EndToEnd") +{ + using namespace half_float::literal; + + // Create runtime in which test will run + IRuntime::CreationOptions options; + IRuntimePtr runtime(armnn::IRuntime::Create(options)); + + // build up the structure of the network + NetworkImpl network; + + armnn::TensorInfo inputInfo({1}, armnn::DataType::Float32); + armnn::TensorInfo outputTensorInfo({1}, armnn::DataType::Float16); + + std::vector expectedOutput = { 1.0_h }; + + unsigned int numElements = inputInfo.GetNumElements(); + size_t totalBytesInput = numElements * sizeof(float); + size_t totalBytesOutput = numElements * sizeof(Half); + + IConnectableLayer* const inputLayer = network.AddInputLayer(0, "input"); + ARMNN_ASSERT(inputLayer); + + armnn::IConnectableLayer* const convLayer = network.AddConvertFp32ToFp16Layer("convert"); + ARMNN_ASSERT(convLayer); + + inputLayer->GetOutputSlot(0).Connect(convLayer->GetInputSlot(0)); + inputLayer->GetOutputSlot(0).SetTensorInfo(inputInfo); + + IConnectableLayer* output = network.AddOutputLayer(0, "output"); + convLayer->GetOutputSlot(0).Connect(output->GetInputSlot(0)); + convLayer->GetOutputSlot(0).SetTensorInfo(outputTensorInfo); + + // Optimize the network + OptimizerOptions optOptions; + optOptions.m_ImportEnabled = false; + std::vector backends = {armnn::Compute::GpuAcc}; + IOptimizedNetworkPtr optNet = Optimize(network.GetGraph(), backends, runtime->GetDeviceSpec(), optOptions); + CHECK(optNet); + + // Loads it into the runtime. + NetworkId netId; + std::string ignoredErrorMessage; + // Enable Importing + INetworkProperties networkProperties(false, MemorySource::Undefined, MemorySource::Undefined); + runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties); + + // Creates structures for input & output + const size_t alignment = + arm_compute::CLKernelLibrary::get().get_device().getInfo(); + size_t spaceInput = totalBytesInput + alignment + alignment; + size_t spaceOutput = totalBytesOutput + alignment + alignment; + auto inputData = std::make_unique(spaceInput); + void* alignedInputPtr = inputData.get(); + CHECK(std::align(alignment, totalBytesInput, alignedInputPtr, spaceInput)); + + // Input with negative values + auto* inputPtr = reinterpret_cast(alignedInputPtr); + inputPtr[0] = 1.0f; + + auto outputData = std::make_unique(spaceOutput); + void* alignedOutputPtr = outputData.get(); + CHECK(std::align(alignment, totalBytesOutput, alignedOutputPtr, spaceOutput)); + auto* outputPtr = reinterpret_cast(alignedOutputPtr); + std::fill_n(outputPtr, numElements, -10.0f); + + TensorInfo inputTensorInfo = runtime->GetInputTensorInfo(netId, 0); + inputTensorInfo.SetConstant(true); + InputTensors inputTensors + { + {0,armnn::ConstTensor(inputTensorInfo, alignedInputPtr)}, + }; + OutputTensors outputTensors + { + {0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), alignedOutputPtr)} + }; + + runtime->GetProfiler(netId)->EnableProfiling(true); + + INFO("Run ImportInputs"); + std::vector importedInputIds = + runtime->ImportInputs(netId, inputTensors, MemorySource::Malloc); + std::vector importedOutputIds = + runtime->ImportOutputs(netId, outputTensors, MemorySource::Malloc); + + // Do the inference + runtime->EnqueueWorkload(netId, inputTensors, outputTensors, importedInputIds, importedOutputIds); + + // Retrieve the Profiler.Print() output to get the workload execution + ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance(); + std::stringstream ss; + profilerManager.GetProfiler()->Print(ss);; + std::string dump = ss.str(); + + // Contains Convolution2dWorkload + std::size_t found = dump.find("ConvertFp32ToFp16Workload"); + CHECK(found != std::string::npos); + + // Contains SyncMemGeneric + found = dump.find("SyncMemGeneric"); + CHECK(found != std::string::npos); + + // Does not contain CopyMemGeneric + found = dump.find("CopyMemGeneric"); + CHECK(found == std::string::npos); + + runtime->UnloadNetwork(netId); + + // Check output is as expected + // Validate result by checking that the output has no negative values + auto* outputResult = reinterpret_cast(alignedOutputPtr); + CHECK(outputResult); + + // Check the output is correct + CHECK(std::equal(outputResult, outputResult + numElements, expectedOutput.begin(), expectedOutput.end())); +} + TEST_CASE_FIXTURE(ClContextControlFixture, "ClForceImportRepeatedInferencesEndToEndTest") { /* diff --git a/src/backends/cl/workloads/ClConvertFp16ToFp32Workload.cpp b/src/backends/cl/workloads/ClConvertFp16ToFp32Workload.cpp index 8ccf157aca..4ac1274130 100644 --- a/src/backends/cl/workloads/ClConvertFp16ToFp32Workload.cpp +++ b/src/backends/cl/workloads/ClConvertFp16ToFp32Workload.cpp @@ -80,8 +80,8 @@ void ClConvertFp16ToFp32Workload::ReplaceInputTensorHandle(ITensorHandle* tensor // Replace output tensor handle with the given TensorHandle void ClConvertFp16ToFp32Workload::ReplaceOutputTensorHandle(ITensorHandle* tensorHandle, unsigned int slot) { - ITensorHandle* backupHandle = this->m_Data.m_Inputs[slot]; - this->m_Data.m_Inputs[slot] = tensorHandle; + ITensorHandle* backupHandle = this->m_Data.m_Outputs[slot]; + this->m_Data.m_Outputs[slot] = tensorHandle; try { Reconfigure(); @@ -89,7 +89,7 @@ void ClConvertFp16ToFp32Workload::ReplaceOutputTensorHandle(ITensorHandle* tenso catch(armnn::UnimplementedException& e) { // Cannot reconfigure, revert the slot back and throw the exception. - this->m_Data.m_Inputs[slot] = backupHandle; + this->m_Data.m_Outputs[slot] = backupHandle; throw e; } } diff --git a/src/backends/cl/workloads/ClConvertFp32ToFp16Workload.cpp b/src/backends/cl/workloads/ClConvertFp32ToFp16Workload.cpp index a44a80c997..307314d784 100644 --- a/src/backends/cl/workloads/ClConvertFp32ToFp16Workload.cpp +++ b/src/backends/cl/workloads/ClConvertFp32ToFp16Workload.cpp @@ -80,8 +80,8 @@ void ClConvertFp32ToFp16Workload::ReplaceInputTensorHandle(ITensorHandle* tensor // Replace output tensor handle with the given TensorHandle void ClConvertFp32ToFp16Workload::ReplaceOutputTensorHandle(ITensorHandle* tensorHandle, unsigned int slot) { - ITensorHandle* backupHandle = this->m_Data.m_Inputs[slot]; - this->m_Data.m_Inputs[slot] = tensorHandle; + ITensorHandle* backupHandle = this->m_Data.m_Outputs[slot]; + this->m_Data.m_Outputs[slot] = tensorHandle; try { Reconfigure(); @@ -89,7 +89,7 @@ void ClConvertFp32ToFp16Workload::ReplaceOutputTensorHandle(ITensorHandle* tenso catch(armnn::UnimplementedException& e) { // Cannot reconfigure, revert the slot back and throw the exception. - this->m_Data.m_Inputs[slot] = backupHandle; + this->m_Data.m_Outputs[slot] = backupHandle; throw e; } } diff --git a/src/backends/cl/workloads/ClLstmFloatWorkload.cpp b/src/backends/cl/workloads/ClLstmFloatWorkload.cpp index e190f33bbc..d20c6fc7b5 100644 --- a/src/backends/cl/workloads/ClLstmFloatWorkload.cpp +++ b/src/backends/cl/workloads/ClLstmFloatWorkload.cpp @@ -310,7 +310,7 @@ arm_compute::Status ClLstmFloatWorkloadValidate(const TensorInfo& input, const T if (paramsInfo.m_ProjectionBias != nullptr) { - aclProjectionBiasInfo = BuildArmComputeTensorInfo(paramsInfo.GetInputGateBias()); + aclProjectionBiasInfo = BuildArmComputeTensorInfo(paramsInfo.GetProjectionBias()); } lstm_params_info.set_projection_params(&aclProjectionWeightsInfo, paramsInfo.m_ProjectionBias != nullptr ? -- cgit v1.2.1