diff options
Diffstat (limited to 'src/backends')
109 files changed, 1150 insertions, 426 deletions
diff --git a/src/backends/backendsCommon/test/StridedSliceAsyncEndToEndTest.hpp b/src/backends/backendsCommon/test/StridedSliceAsyncEndToEndTest.hpp index 66ccdbf1d9..16b10c88ac 100644 --- a/src/backends/backendsCommon/test/StridedSliceAsyncEndToEndTest.hpp +++ b/src/backends/backendsCommon/test/StridedSliceAsyncEndToEndTest.hpp @@ -24,6 +24,100 @@ namespace experimental template<DataType ArmnnIType, DataType ArmnnOType, typename TInput = ResolveType <ArmnnIType>, typename TOutput = ResolveType <ArmnnOType>> +void AsyncThreadedEndToEndTestImpl(INetworkPtr network, + const std::vector<std::map<int, std::vector<TInput>>>& inputTensorData, + const std::vector<std::map<int, std::vector<TOutput>>>& expectedOutputData, + std::vector<BackendId> backends, + const size_t numberOfInferences, + float tolerance = 0.000001f) +{ + // Create Runtime in which test will run + IRuntime::CreationOptions options; + IRuntimePtr runtime(IRuntime::Create(options)); + + // Optimize the Network + IOptimizedNetworkPtr optNet = Optimize(*network, backends, runtime->GetDeviceSpec()); + + + // Creates AsyncNetwork + NetworkId networkId = 0; + std::string errorMessage; + const INetworkProperties networkProperties(false, false, true); + runtime->LoadNetwork(networkId, std::move(optNet), errorMessage, networkProperties); + + std::vector<InputTensors> inputTensorsVec; + std::vector<OutputTensors> outputTensorsVec; + std::vector<std::map<int, std::vector<TOutput>>> outputStorageVec; + std::vector<std::unique_ptr<IWorkingMemHandle>> workingMemHandles; + + for (unsigned int i = 0; i < numberOfInferences; ++i) + { + InputTensors inputTensors; + OutputTensors outputTensors; + outputStorageVec.emplace_back(std::map<int, std::vector<TOutput>>()); + + inputTensors.reserve(inputTensorData.size()); + for (auto&& it : inputTensorData[i]) + { + inputTensors.push_back({it.first, + ConstTensor(runtime->GetInputTensorInfo(networkId, it.first), it.second.data())}); + } + + outputTensors.reserve(expectedOutputData.size()); + for (auto&& it : expectedOutputData[i]) + { + std::vector<TOutput> out(it.second.size()); + outputStorageVec[i].emplace(it.first, out); + outputTensors.push_back({it.first, + Tensor(runtime->GetOutputTensorInfo(networkId, it.first), + outputStorageVec[i].at(it.first).data())}); + } + + inputTensorsVec.push_back(inputTensors); + outputTensorsVec.push_back(outputTensors); + + workingMemHandles.push_back(runtime->CreateWorkingMemHandle(networkId)); + } + + std::vector<std::thread> threads; + for (unsigned int i = 0; i < numberOfInferences; ++i) + { + // Access the vectors before we do anything multi-threaded + InputTensors& inputTensors = inputTensorsVec[i]; + OutputTensors& outputTensors = outputTensorsVec[i]; + IWorkingMemHandle& workingMemHandle = *workingMemHandles[i].get(); + + threads.emplace_back([&]() + { + // Run the async network + runtime->Execute(workingMemHandle, inputTensors, outputTensors); + }); + } + + for (unsigned int i = 0; i < numberOfInferences; ++i) + { + threads[i].join(); + } + + // Checks the results. + for (unsigned int i = 0; i < numberOfInferences; ++i) + { + for (auto &&it : expectedOutputData[i]) + { + std::vector<TOutput> out = outputStorageVec[i].at(it.first); + for (unsigned int j = 0; j < out.size(); ++j) + { + BOOST_CHECK(Compare<ArmnnOType>(it.second[j], out[j], tolerance) == true); + } + } + } + +} + + + +template<DataType ArmnnIType, DataType ArmnnOType, + typename TInput = ResolveType <ArmnnIType>, typename TOutput = ResolveType <ArmnnOType>> void AsyncEndToEndTestImpl(INetworkPtr network, const std::map<int, std::vector<TInput>>& inputTensorData, const std::map<int, std::vector<TOutput>>& expectedOutputData, @@ -169,7 +263,71 @@ void StridedSlicedEndToEndTest(const std::vector<BackendId>& backends) std::map<int, std::vector<T>> inputTensorData = {{0, inputData}}; std::map<int, std::vector<T>> expectedOutputData = {{0, outputExpected}}; - AsyncEndToEndTestImpl<ArmnnType, ArmnnType>(move(net), inputTensorData, expectedOutputData, backends); + AsyncEndToEndTestImpl<ArmnnType, ArmnnType>(move(net), inputTensorData, expectedOutputData, backends, 1); +} + +template<armnn::DataType ArmnnType> +void StridedSlicedMultiThreadedEndToEndTest(const std::vector<BackendId>& backends) +{ + using namespace armnn; + using T = ResolveType<ArmnnType>; + + const TensorShape& inputShape = {3, 2, 3, 1}; + const TensorShape& outputShape = {1, 2, 3, 1}; + const std::vector<int>& beginData = {1, 0, 0, 0}; + const std::vector<int>& endData = {2, 2, 3, 1}; + const std::vector<int>& stridesData = {1, 1, 1, 1}; + int beginMask = 0; + int endMask = 0; + int shrinkAxisMask = 0; + int ellipsisMask = 0; + int newAxisMask = 0; + + // Builds up the structure of the network + INetworkPtr net = CreateStridedSliceNetwork<ArmnnType>(inputShape, + outputShape, + beginData, + endData, + stridesData, + beginMask, + endMask, + shrinkAxisMask, + ellipsisMask, + newAxisMask); + + BOOST_TEST_CHECKPOINT("create a network"); + + // Creates structures for input & output. + std::vector<T> inputData1{ + 1.0f, 1.0f, 1.0f, 2.0f, 2.0f, 2.0f, + + 3.0f, 3.0f, 3.0f, 4.0f, 4.0f, 4.0f, + + 5.0f, 5.0f, 5.0f, 6.0f, 6.0f, 6.0f + }; + + std::vector<T> outputExpected1{ 3.0f, 3.0f, 3.0f, 4.0f, 4.0f, 4.0f }; + + // Creates structures for input & output. + std::vector<T> inputData2{ + 1.0f, 1.0f, 1.0f, 2.0f, 2.0f, 2.0f, + + 8.0f, 8.0f, 8.0f, 7.0f, 7.0f, 7.0f, + + 5.0f, 5.0f, 5.0f, 6.0f, 6.0f, 6.0f + }; + + std::vector<T> outputExpected2{ 8.0f, 8.0f, 8.0f, 7.0f, 7.0f, 7.0f }; + + std::vector<std::map<int, std::vector<T>>> inputTensors; + std::vector<std::map<int, std::vector<T>>> outputTensors; + + inputTensors.push_back(std::map<int, std::vector<T>> {{0, inputData1}}); + inputTensors.push_back(std::map<int, std::vector<T>> {{0, inputData2}}); + outputTensors.push_back(std::map<int, std::vector<T>> {{0, outputExpected1}}); + outputTensors.push_back(std::map<int, std::vector<T>> {{0, outputExpected2}}); + + AsyncThreadedEndToEndTestImpl<ArmnnType, ArmnnType>(move(net), inputTensors, outputTensors, backends, 2); } } // experimental namespace diff --git a/src/backends/reference/test/RefEndToEndTests.cpp b/src/backends/reference/test/RefEndToEndTests.cpp index 521854b12b..0839c1c7af 100644 --- a/src/backends/reference/test/RefEndToEndTests.cpp +++ b/src/backends/reference/test/RefEndToEndTests.cpp @@ -1341,6 +1341,11 @@ BOOST_AUTO_TEST_CASE(RefAsyncFP32StridedSlicedEndToEndTest) { armnn::experimental::StridedSlicedEndToEndTest<armnn::DataType::Float32>(defaultBackends); } + +BOOST_AUTO_TEST_CASE(RefAsyncFP32StridedSlicedMultiThreadedEndToEndTest) +{ + armnn::experimental::StridedSlicedMultiThreadedEndToEndTest<armnn::DataType::Float32>(defaultBackends); +} #endif BOOST_AUTO_TEST_SUITE_END() diff --git a/src/backends/reference/workloads/Concatenate.cpp b/src/backends/reference/workloads/Concatenate.cpp index a85e34ee61..a0e0abfaa0 100644 --- a/src/backends/reference/workloads/Concatenate.cpp +++ b/src/backends/reference/workloads/Concatenate.cpp @@ -11,11 +11,13 @@ namespace armnn { -void Concatenate(const ConcatQueueDescriptor &data) +void Concatenate(const ConcatQueueDescriptor &data, + std::vector<ITensorHandle*> inputs, + std::vector<ITensorHandle*> outputs) { - const TensorInfo& outputInfo0 = GetTensorInfo(data.m_Outputs[0]); + const TensorInfo& outputInfo0 = GetTensorInfo(outputs[0]); - std::unique_ptr<Encoder<float>> encoderPtr = MakeEncoder<float>(outputInfo0, data.m_Outputs[0]->Map()); + std::unique_ptr<Encoder<float>> encoderPtr = MakeEncoder<float>(outputInfo0, outputs[0]->Map()); Encoder<float>& encoder = *encoderPtr; for (unsigned int index = 0 ; index < outputInfo0.GetNumElements(); ++index) @@ -37,7 +39,7 @@ void Concatenate(const ConcatQueueDescriptor &data) ConcatQueueDescriptor::ViewOrigin const& view = data.m_ViewOrigins[viewIdx]; //Split view extents are defined by the size of (the corresponding) input tensor. - const TensorInfo& inputInfo = GetTensorInfo(data.m_Inputs[viewIdx]); + const TensorInfo& inputInfo = GetTensorInfo(inputs[viewIdx]); ARMNN_ASSERT(inputInfo.GetNumDimensions() == outputInfo0.GetNumDimensions()); // Check all dimensions to see if this element is inside the given input view. @@ -57,7 +59,7 @@ void Concatenate(const ConcatQueueDescriptor &data) if (insideView) { std::unique_ptr<Decoder<float>> decoderPtr = - MakeDecoder<float>(inputInfo, data.m_Inputs[viewIdx]->Map()); + MakeDecoder<float>(inputInfo,inputs[viewIdx]->Map()); Decoder<float>& decoder = *decoderPtr; unsigned int inIndex = 0; unsigned int dimensionStride = 1; diff --git a/src/backends/reference/workloads/Concatenate.hpp b/src/backends/reference/workloads/Concatenate.hpp index 75e5f8c4f4..e0264b0aac 100644 --- a/src/backends/reference/workloads/Concatenate.hpp +++ b/src/backends/reference/workloads/Concatenate.hpp @@ -10,5 +10,7 @@ namespace armnn { -void Concatenate(const ConcatQueueDescriptor &data); +void Concatenate(const ConcatQueueDescriptor &data, + std::vector<ITensorHandle*> inputs, + std::vector<ITensorHandle*> outputs); } //namespace armnn diff --git a/src/backends/reference/workloads/RefActivationWorkload.cpp b/src/backends/reference/workloads/RefActivationWorkload.cpp index a26a6399bc..77958673e9 100644 --- a/src/backends/reference/workloads/RefActivationWorkload.cpp +++ b/src/backends/reference/workloads/RefActivationWorkload.cpp @@ -17,17 +17,28 @@ namespace armnn void RefActivationWorkload::Execute() const { + Execute(m_Data.m_Inputs, m_Data.m_Outputs); +} + +void RefActivationWorkload::ExecuteAsync(WorkingMemDescriptor &workingMemDescriptor) +{ + Execute(workingMemDescriptor.m_Inputs, workingMemDescriptor.m_Outputs); +} + +void RefActivationWorkload::Execute(std::vector<ITensorHandle*> inputs, std::vector<ITensorHandle*> outputs) const +{ ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefActivationWorkload_Execute"); - const TensorInfo& inputInfo = GetTensorInfo(m_Data.m_Inputs[0]); - const TensorInfo& outputInfo = GetTensorInfo(m_Data.m_Outputs[0]); + const TensorInfo& inputInfo = GetTensorInfo(inputs[0]); + const TensorInfo& outputInfo = GetTensorInfo(outputs[0]); - Activation(*MakeDecoder<float>(inputInfo, m_Data.m_Inputs[0]->Map()), - *MakeEncoder<float>(outputInfo, m_Data.m_Outputs[0]->Map()), + Activation(*MakeDecoder<float>(inputInfo, inputs[0]->Map()), + *MakeEncoder<float>(outputInfo, outputs[0]->Map()), inputInfo, m_Data.m_Parameters.m_Function, m_Data.m_Parameters.m_A, m_Data.m_Parameters.m_B); } + } //namespace armnn diff --git a/src/backends/reference/workloads/RefActivationWorkload.hpp b/src/backends/reference/workloads/RefActivationWorkload.hpp index 5b2377e363..429fb60aaa 100644 --- a/src/backends/reference/workloads/RefActivationWorkload.hpp +++ b/src/backends/reference/workloads/RefActivationWorkload.hpp @@ -15,7 +15,11 @@ class RefActivationWorkload : public BaseWorkload<ActivationQueueDescriptor> { public: using BaseWorkload<ActivationQueueDescriptor>::BaseWorkload; - virtual void Execute() const override; + void Execute() const override; + void ExecuteAsync(WorkingMemDescriptor& workingMemDescriptor) override; + +private: + void Execute(std::vector<ITensorHandle*> inputs, std::vector<ITensorHandle*> outputs) const; }; } //namespace armnn diff --git a/src/backends/reference/workloads/RefArgMinMaxWorkload.cpp b/src/backends/reference/workloads/RefArgMinMaxWorkload.cpp index bf8649f54d..77167a866b 100644 --- a/src/backends/reference/workloads/RefArgMinMaxWorkload.cpp +++ b/src/backends/reference/workloads/RefArgMinMaxWorkload.cpp @@ -18,16 +18,27 @@ RefArgMinMaxWorkload::RefArgMinMaxWorkload( const WorkloadInfo& info) : BaseWorkload<ArgMinMaxQueueDescriptor>(descriptor, info) {} + void RefArgMinMaxWorkload::Execute() const { + Execute(m_Data.m_Inputs, m_Data.m_Outputs); +} + +void RefArgMinMaxWorkload::ExecuteAsync(WorkingMemDescriptor &workingMemDescriptor) +{ + Execute(workingMemDescriptor.m_Inputs, workingMemDescriptor.m_Outputs); +} + +void RefArgMinMaxWorkload::Execute(std::vector<ITensorHandle*> inputs, std::vector<ITensorHandle*> outputs) const +{ ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefArgMinMaxWorkload_Execute"); - const TensorInfo &inputTensorInfo = GetTensorInfo(m_Data.m_Inputs[0]); + const TensorInfo &inputTensorInfo = GetTensorInfo(inputs[0]); - std::unique_ptr<Decoder<float>> decoderPtr = MakeDecoder<float>(inputTensorInfo, m_Data.m_Inputs[0]->Map()); + std::unique_ptr<Decoder<float>> decoderPtr = MakeDecoder<float>(inputTensorInfo, inputs[0]->Map()); Decoder<float> &decoder = *decoderPtr; - const TensorInfo &outputTensorInfo = GetTensorInfo(m_Data.m_Outputs[0]); + const TensorInfo &outputTensorInfo = GetTensorInfo(outputs[0]); if (outputTensorInfo.GetDataType() == armnn::DataType::Signed32) { int32_t *output = GetOutputTensorData<int32_t>(0, m_Data); diff --git a/src/backends/reference/workloads/RefArgMinMaxWorkload.hpp b/src/backends/reference/workloads/RefArgMinMaxWorkload.hpp index 97b70772d1..df9ebcab0b 100644 --- a/src/backends/reference/workloads/RefArgMinMaxWorkload.hpp +++ b/src/backends/reference/workloads/RefArgMinMaxWorkload.hpp @@ -16,6 +16,10 @@ public: explicit RefArgMinMaxWorkload(const ArgMinMaxQueueDescriptor& descriptor, const WorkloadInfo& info); - virtual void Execute() const override; + void Execute() const override; + void ExecuteAsync(WorkingMemDescriptor& workingMemDescriptor) override; + +private: + void Execute(std::vector<ITensorHandle*> inputs, std::vector<ITensorHandle*> outputs) const; }; } //namespace armnn
\ No newline at end of file diff --git a/src/backends/reference/workloads/RefBatchNormalizationWorkload.cpp b/src/backends/reference/workloads/RefBatchNormalizationWorkload.cpp index 21fcdab5a3..e1068896ba 100644 --- a/src/backends/reference/workloads/RefBatchNormalizationWorkload.cpp +++ b/src/backends/reference/workloads/RefBatchNormalizationWorkload.cpp @@ -24,6 +24,17 @@ RefBatchNormalizationWorkload::RefBatchNormalizationWorkload(const BatchNormaliz void RefBatchNormalizationWorkload::Execute() const { + Execute(m_Data.m_Inputs, m_Data.m_Outputs); +} + +void RefBatchNormalizationWorkload::ExecuteAsync(WorkingMemDescriptor &workingMemDescriptor) +{ + Execute(workingMemDescriptor.m_Inputs, workingMemDescriptor.m_Outputs); +} + +void RefBatchNormalizationWorkload::Execute(std::vector<ITensorHandle*> inputs, + std::vector<ITensorHandle*> outputs) const +{ ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefBatchNormalizationWorkload_Execute"); std::unique_ptr<Decoder<float>> meanDecoder = MakeDecoder<float>(m_Mean->GetTensorInfo(), @@ -34,10 +45,10 @@ void RefBatchNormalizationWorkload::Execute() const m_Gamma->Map(true)); std::unique_ptr<Decoder<float>> betaDecoder = MakeDecoder<float>(m_Beta->GetTensorInfo(), m_Beta->Map(true)); - std::unique_ptr<Decoder<float>> inputDecoder = MakeDecoder<float>(GetTensorInfo(m_Data.m_Inputs[0]), - m_Data.m_Inputs[0]->Map()); - std::unique_ptr<Encoder<float>> outputEncoder = MakeEncoder<float>(GetTensorInfo(m_Data.m_Outputs[0]), - m_Data.m_Outputs[0]->Map()); + std::unique_ptr<Decoder<float>> inputDecoder = MakeDecoder<float>(GetTensorInfo(inputs[0]), + inputs[0]->Map()); + std::unique_ptr<Encoder<float>> outputEncoder = MakeEncoder<float>(GetTensorInfo(outputs[0]), + outputs[0]->Map()); BatchNormImpl(m_Data, *meanDecoder, *varianceDecoder, *betaDecoder, *gammaDecoder, *inputDecoder, *outputEncoder); } diff --git a/src/backends/reference/workloads/RefBatchNormalizationWorkload.hpp b/src/backends/reference/workloads/RefBatchNormalizationWorkload.hpp index 53d01f65da..a8a72ef65c 100644 --- a/src/backends/reference/workloads/RefBatchNormalizationWorkload.hpp +++ b/src/backends/reference/workloads/RefBatchNormalizationWorkload.hpp @@ -16,9 +16,11 @@ class RefBatchNormalizationWorkload : public BaseWorkload<BatchNormalizationQueu public: explicit RefBatchNormalizationWorkload(const BatchNormalizationQueueDescriptor& descriptor, const WorkloadInfo& info); - virtual void Execute() const override; + void Execute() const override; + void ExecuteAsync(WorkingMemDescriptor& workingMemDescriptor) override; private: + void Execute(std::vector<ITensorHandle*> inputs, std::vector<ITensorHandle*> outputs) const; std::unique_ptr<ScopedCpuTensorHandle> m_Mean; std::unique_ptr<ScopedCpuTensorHandle> m_Variance; std::unique_ptr<ScopedCpuTensorHandle> m_Beta; diff --git a/src/backends/reference/workloads/RefBatchToSpaceNdWorkload.cpp b/src/backends/reference/workloads/RefBatchToSpaceNdWorkload.cpp index c21ef7640a..441d2ba2cf 100644 --- a/src/backends/reference/workloads/RefBatchToSpaceNdWorkload.cpp +++ b/src/backends/reference/workloads/RefBatchToSpaceNdWorkload.cpp @@ -13,13 +13,23 @@ namespace armnn void RefBatchToSpaceNdWorkload::Execute() const { + Execute(m_Data.m_Inputs, m_Data.m_Outputs); +} + +void RefBatchToSpaceNdWorkload::ExecuteAsync(WorkingMemDescriptor &workingMemDescriptor) +{ + Execute(workingMemDescriptor.m_Inputs, workingMemDescriptor.m_Outputs); +} + +void RefBatchToSpaceNdWorkload::Execute(std::vector<ITensorHandle*> inputs, std::vector<ITensorHandle*> outputs) const +{ ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefBatchToSpaceNdWorkload_Execute"); - const TensorInfo& inputInfo = GetTensorInfo(m_Data.m_Inputs[0]); - const TensorInfo& outputInfo = GetTensorInfo(m_Data.m_Outputs[0]); + const TensorInfo& inputInfo = GetTensorInfo(inputs[0]); + const TensorInfo& outputInfo = GetTensorInfo(outputs[0]); - std::unique_ptr<Decoder<float>> inputDecoder = MakeDecoder<float>(inputInfo, m_Data.m_Inputs[0]->Map()); - std::unique_ptr<Encoder<float>> outputEncoder = MakeEncoder<float>(outputInfo, m_Data.m_Outputs[0]->Map()); + std::unique_ptr<Decoder<float>> inputDecoder = MakeDecoder<float>(inputInfo, inputs[0]->Map()); + std::unique_ptr<Encoder<float>> outputEncoder = MakeEncoder<float>(outputInfo, outputs[0]->Map()); BatchToSpaceNd(m_Data.m_Parameters.m_DataLayout, inputInfo, outputInfo, m_Data.m_Parameters.m_BlockShape, m_Data.m_Parameters.m_Crops, *inputDecoder, *outputEncoder); diff --git a/src/backends/reference/workloads/RefBatchToSpaceNdWorkload.hpp b/src/backends/reference/workloads/RefBatchToSpaceNdWorkload.hpp index 60577bab2e..07c800da83 100644 --- a/src/backends/reference/workloads/RefBatchToSpaceNdWorkload.hpp +++ b/src/backends/reference/workloads/RefBatchToSpaceNdWorkload.hpp @@ -16,7 +16,11 @@ class RefBatchToSpaceNdWorkload : public BaseWorkload<BatchToSpaceNdQueueDescrip public: using BaseWorkload<BatchToSpaceNdQueueDescriptor>::BaseWorkload; - virtual void Execute() const override; + void Execute() const override; + void ExecuteAsync(WorkingMemDescriptor& workingMemDescriptor) override; + +private: + void Execute(std::vector<ITensorHandle*> inputs, std::vector<ITensorHandle*> outputs) const; }; } // namespace armnn
\ No newline at end of file diff --git a/src/backends/reference/workloads/RefComparisonWorkload.cpp b/src/backends/reference/workloads/RefComparisonWorkload.cpp index 52ad9a2879..03df7a4c4a 100644 --- a/src/backends/reference/workloads/RefComparisonWorkload.cpp +++ b/src/backends/reference/workloads/RefComparisonWorkload.cpp @@ -26,9 +26,15 @@ RefComparisonWorkload::RefComparisonWorkload(const ComparisonQueueDescriptor& de void RefComparisonWorkload::PostAllocationConfigure() { - const TensorInfo& inputInfo0 = GetTensorInfo(m_Data.m_Inputs[0]); - const TensorInfo& inputInfo1 = GetTensorInfo(m_Data.m_Inputs[1]); - const TensorInfo& outputInfo = GetTensorInfo(m_Data.m_Outputs[0]); + PostAllocationConfigure(m_Data.m_Inputs, m_Data.m_Outputs); +} + +void RefComparisonWorkload::PostAllocationConfigure(std::vector<ITensorHandle*> inputs, + std::vector<ITensorHandle*> outputs) +{ + const TensorInfo& inputInfo0 = GetTensorInfo(inputs[0]); + const TensorInfo& inputInfo1 = GetTensorInfo(inputs[1]); + const TensorInfo& outputInfo = GetTensorInfo(outputs[0]); m_Input0 = MakeDecoder<InType>(inputInfo0); m_Input1 = MakeDecoder<InType>(inputInfo1); @@ -38,19 +44,31 @@ void RefComparisonWorkload::PostAllocationConfigure() void RefComparisonWorkload::Execute() const { + Execute(m_Data.m_Inputs, m_Data.m_Outputs); +} + +void RefComparisonWorkload::ExecuteAsync(WorkingMemDescriptor &workingMemDescriptor) +{ + PostAllocationConfigure(workingMemDescriptor.m_Inputs, workingMemDescriptor.m_Outputs); + + Execute(workingMemDescriptor.m_Inputs, workingMemDescriptor.m_Outputs); +} + +void RefComparisonWorkload::Execute(std::vector<ITensorHandle*> inputs, std::vector<ITensorHandle*> outputs) const +{ ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefComparisonWorkload_Execute"); - const TensorInfo& inputInfo0 = GetTensorInfo(m_Data.m_Inputs[0]); - const TensorInfo& inputInfo1 = GetTensorInfo(m_Data.m_Inputs[1]); - const TensorInfo& outputInfo = GetTensorInfo(m_Data.m_Outputs[0]); + const TensorInfo& inputInfo0 = GetTensorInfo(inputs[0]); + const TensorInfo& inputInfo1 = GetTensorInfo(inputs[1]); + const TensorInfo& outputInfo = GetTensorInfo(outputs[0]); const TensorShape& inShape0 = inputInfo0.GetShape(); const TensorShape& inShape1 = inputInfo1.GetShape(); const TensorShape& outShape = outputInfo.GetShape(); - m_Input0->Reset(m_Data.m_Inputs[0]->Map()); - m_Input1->Reset(m_Data.m_Inputs[1]->Map()); - m_Output->Reset(m_Data.m_Outputs[0]->Map()); + m_Input0->Reset(inputs[0]->Map()); + m_Input1->Reset(inputs[1]->Map()); + m_Output->Reset(outputs[0]->Map()); using EqualFunction = ElementwiseBinaryFunction<std::equal_to<InType>>; using GreaterFunction = ElementwiseBinaryFunction<std::greater<InType>>; diff --git a/src/backends/reference/workloads/RefComparisonWorkload.hpp b/src/backends/reference/workloads/RefComparisonWorkload.hpp index a19e4a0540..de0144ca15 100644 --- a/src/backends/reference/workloads/RefComparisonWorkload.hpp +++ b/src/backends/reference/workloads/RefComparisonWorkload.hpp @@ -21,8 +21,11 @@ public: RefComparisonWorkload(const ComparisonQueueDescriptor& descriptor, const WorkloadInfo& info); void PostAllocationConfigure() override; void Execute() const override; + void ExecuteAsync(WorkingMemDescriptor& workingMemDescriptor) override; private: + void PostAllocationConfigure(std::vector<ITensorHandle*> inputs, std::vector<ITensorHandle*> outputs); + void Execute(std::vector<ITensorHandle*> inputs, std::vector<ITensorHandle*> outputs) const; using InType = float; using OutType = bool; diff --git a/src/backends/reference/workloads/RefConcatWorkload.cpp b/src/backends/reference/workloads/RefConcatWorkload.cpp index e606649ed0..c04c05354e 100644 --- a/src/backends/reference/workloads/RefConcatWorkload.cpp +++ b/src/backends/reference/workloads/RefConcatWorkload.cpp @@ -14,8 +14,18 @@ namespace armnn void RefConcatWorkload::Execute() const { + Execute(m_Data.m_Inputs, m_Data.m_Outputs); +} + +void RefConcatWorkload::ExecuteAsync(WorkingMemDescriptor &workingMemDescriptor) +{ + Execute(workingMemDescriptor.m_Inputs, workingMemDescriptor.m_Outputs); +} + +void RefConcatWorkload::Execute(std::vector<ITensorHandle*> inputs, std::vector<ITensorHandle*> outputs) const +{ ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefConcatWorkload_Execute"); - Concatenate(m_Data); + Concatenate(m_Data, inputs, outputs); } } //namespace armnn diff --git a/src/backends/reference/workloads/RefConcatWorkload.hpp b/src/backends/reference/workloads/RefConcatWorkload.hpp index 0be28bb7c8..f4e1aa85f2 100644 --- a/src/backends/reference/workloads/RefConcatWorkload.hpp +++ b/src/backends/reference/workloads/RefConcatWorkload.hpp @@ -15,7 +15,10 @@ class RefConcatWorkload : public BaseWorkload<ConcatQueueDescriptor> { public: using BaseWorkload<ConcatQueueDescriptor>::BaseWorkload; - virtual void Execute() const override; + void Execute() const override; + void ExecuteAsync(WorkingMemDescriptor& workingMemDescriptor) override; +private: + void Execute(std::vector<ITensorHandle*> inputs, std::vector<ITensorHandle*> outputs) const; }; } //namespace armnn diff --git a/src/backends/reference/workloads/RefConstantWorkload.cpp b/src/backends/reference/workloads/RefConstantWorkload.cpp index d3e65e6615..6290237d69 100644 --- a/src/backends/reference/workloads/RefConstantWorkload.cpp +++ b/src/backends/reference/workloads/RefConstantWorkload.cpp @@ -20,21 +20,20 @@ RefConstantWorkload::RefConstantWorkload( const ConstantQueueDescriptor& descriptor, const WorkloadInfo& info) : BaseWorkload<ConstantQueueDescriptor>(descriptor, info) {} -void RefConstantWorkload::PostAllocationConfigure() +void RefConstantWorkload::Execute() const { - const ConstantQueueDescriptor& data = this->m_Data; - - ARMNN_ASSERT(data.m_LayerOutput != nullptr); - - const TensorInfo& outputInfo = GetTensorInfo(data.m_Outputs[0]); - ARMNN_ASSERT(data.m_LayerOutput->GetTensorInfo().GetNumBytes() == outputInfo.GetNumBytes()); + Execute(m_Data.m_Outputs); +} - memcpy(GetOutputTensorData<void>(0, data), data.m_LayerOutput->GetConstTensor<void>(), - outputInfo.GetNumBytes()); +void RefConstantWorkload::ExecuteAsync(WorkingMemDescriptor &workingMemDescriptor) +{ + Execute(workingMemDescriptor.m_Outputs); } -void RefConstantWorkload::Execute() const +void RefConstantWorkload::Execute(std::vector<ITensorHandle*> outputs) const { + memcpy(outputs[0]->Map(), m_Data.m_LayerOutput->GetConstTensor<void>(), GetTensorInfo(outputs[0]).GetNumBytes()); + ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefConstantWorkload_Execute"); } diff --git a/src/backends/reference/workloads/RefConstantWorkload.hpp b/src/backends/reference/workloads/RefConstantWorkload.hpp index ada488a7b2..9af5903329 100644 --- a/src/backends/reference/workloads/RefConstantWorkload.hpp +++ b/src/backends/reference/workloads/RefConstantWorkload.hpp @@ -19,8 +19,10 @@ class RefConstantWorkload : public BaseWorkload<ConstantQueueDescriptor> public: RefConstantWorkload(const ConstantQueueDescriptor& descriptor, const WorkloadInfo& info); - void PostAllocationConfigure() override; - virtual void Execute() const override; + void Execute() const override; + void ExecuteAsync(WorkingMemDescriptor& workingMemDescriptor) override; +private: + void Execute(std::vector<ITensorHandle*> outputs) const; }; } //namespace armnn diff --git a/src/backends/reference/workloads/RefConvertBf16ToFp32Workload.cpp b/src/backends/reference/workloads/RefConvertBf16ToFp32Workload.cpp index c4b5416836..70e377d19b 100644 --- a/src/backends/reference/workloads/RefConvertBf16ToFp32Workload.cpp +++ b/src/backends/reference/workloads/RefConvertBf16ToFp32Workload.cpp @@ -15,12 +15,23 @@ namespace armnn void RefConvertBf16ToFp32Workload::Execute() const { + Execute(m_Data.m_Inputs, m_Data.m_Outputs); +} + +void RefConvertBf16ToFp32Workload::ExecuteAsync(WorkingMemDescriptor &workingMemDescriptor) +{ + Execute(workingMemDescriptor.m_Inputs, workingMemDescriptor.m_Outputs); +} + +void RefConvertBf16ToFp32Workload::Execute(std::vector<ITensorHandle*> inputs, + std::vector<ITensorHandle*> outputs) const +{ ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefConvertBf16ToFp32Workload_Execute"); - const BFloat16* const input = GetInputTensorDataBFloat16(0, m_Data); - float* const output = GetOutputTensorDataFloat(0, m_Data); + const BFloat16* const input = reinterpret_cast<const BFloat16*>(inputs[0]->Map()); + float* const output = reinterpret_cast<float*>(outputs[0]->Map()); - unsigned int numElements = GetTensorInfo(m_Data.m_Inputs[0]).GetNumElements(); + unsigned int numElements = GetTensorInfo(inputs[0]).GetNumElements(); armnnUtils::FloatingPointConverter::ConvertBFloat16ToFloat32(input, numElements, output); } diff --git a/src/backends/reference/workloads/RefConvertBf16ToFp32Workload.hpp b/src/backends/reference/workloads/RefConvertBf16ToFp32Workload.hpp index 87cdc3e1e3..90613621b4 100644 --- a/src/backends/reference/workloads/RefConvertBf16ToFp32Workload.hpp +++ b/src/backends/reference/workloads/RefConvertBf16ToFp32Workload.hpp @@ -15,7 +15,10 @@ class RefConvertBf16ToFp32Workload : public BFloat16ToFloat32Workload<ConvertBf1 { public: using BFloat16ToFloat32Workload<ConvertBf16ToFp32QueueDescriptor>::BFloat16ToFloat32Workload; - virtual void Execute() const override; + void Execute() const override; + void ExecuteAsync(WorkingMemDescriptor& workingMemDescriptor) override; +private: + void Execute(std::vector<ITensorHandle*> inputs, std::vector<ITensorHandle*> outputs) const; }; } //namespace armnn diff --git a/src/backends/reference/workloads/RefConvertFp16ToFp32Workload.cpp b/src/backends/reference/workloads/RefConvertFp16ToFp32Workload.cpp index ef813eb69b..347132d1f6 100644 --- a/src/backends/reference/workloads/RefConvertFp16ToFp32Workload.cpp +++ b/src/backends/reference/workloads/RefConvertFp16ToFp32Workload.cpp @@ -15,12 +15,23 @@ namespace armnn void RefConvertFp16ToFp32Workload::Execute() const { + Execute(m_Data.m_Inputs, m_Data.m_Outputs); +} + +void RefConvertFp16ToFp32Workload::ExecuteAsync(WorkingMemDescriptor &workingMemDescriptor) +{ + Execute(workingMemDescriptor.m_Inputs, workingMemDescriptor.m_Outputs); +} + +void RefConvertFp16ToFp32Workload::Execute(std::vector<ITensorHandle*> inputs, + std::vector<ITensorHandle*> outputs) const +{ ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefConvertFp16ToFp32Workload_Execute"); - const Half* const input = GetInputTensorDataHalf(0, m_Data); - float* const output = GetOutputTensorDataFloat(0, m_Data); + const Half* const input = reinterpret_cast<const Half*>(inputs[0]->Map()); + float* const output = reinterpret_cast<float*>(outputs[0]->Map()); - unsigned int numElements = GetTensorInfo(m_Data.m_Inputs[0]).GetNumElements(); + unsigned int numElements = GetTensorInfo(inputs[0]).GetNumElements(); armnnUtils::FloatingPointConverter::ConvertFloat16To32(input, numElements, output); } diff --git a/src/backends/reference/workloads/RefConvertFp16ToFp32Workload.hpp b/src/backends/reference/workloads/RefConvertFp16ToFp32Workload.hpp index 7c58e9f089..99ab9e9934 100644 --- a/src/backends/reference/workloads/RefConvertFp16ToFp32Workload.hpp +++ b/src/backends/reference/workloads/RefConvertFp16ToFp32Workload.hpp @@ -15,7 +15,10 @@ class RefConvertFp16ToFp32Workload : public Float16ToFloat32Workload<ConvertFp16 { public: using Float16ToFloat32Workload<ConvertFp16ToFp32QueueDescriptor>::Float16ToFloat32Workload; - virtual void Execute() const override; + void Execute() const override; + void ExecuteAsync(WorkingMemDescriptor& workingMemDescriptor) override; +private: + void Execute(std::vector<ITensorHandle*> inputs, std::vector<ITensorHandle*> outputs) const; }; } //namespace armnn diff --git a/src/backends/reference/workloads/RefConvertFp32ToBf16Workload.cpp b/src/backends/reference/workloads/RefConvertFp32ToBf16Workload.cpp index 181b236e83..7fe302a5ad 100644 --- a/src/backends/reference/workloads/RefConvertFp32ToBf16Workload.cpp +++ b/src/backends/reference/workloads/RefConvertFp32ToBf16Workload.cpp @@ -15,12 +15,23 @@ namespace armnn void RefConvertFp32ToBf16Workload::Execute() const { + Execute(m_Data.m_Inputs, m_Data.m_Outputs); +} + +void RefConvertFp32ToBf16Workload::ExecuteAsync(WorkingMemDescriptor &workingMemDescriptor) +{ + Execute(workingMemDescriptor.m_Inputs, workingMemDescriptor.m_Outputs); +} + +void RefConvertFp32ToBf16Workload::Execute(std::vector<ITensorHandle*> inputs, + std::vector<ITensorHandle*> outputs) const +{ ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefConvertFp32ToBf16Workload_Execute"); - const float* const input = GetInputTensorDataFloat(0, m_Data); - BFloat16* const output = GetOutputTensorDataBFloat16(0, m_Data); + const float* const input = reinterpret_cast<const float*>(inputs[0]->Map()); + BFloat16* const output = reinterpret_cast<BFloat16*>(outputs[0]->Map()); - unsigned int numElements = GetTensorInfo(m_Data.m_Inputs[0]).GetNumElements(); + unsigned int numElements = GetTensorInfo(inputs[0]).GetNumElements(); armnnUtils::FloatingPointConverter::ConvertFloat32ToBFloat16(input, numElements, output); } diff --git a/src/backends/reference/workloads/RefConvertFp32ToBf16Workload.hpp b/src/backends/reference/workloads/RefConvertFp32ToBf16Workload.hpp index 409603bb6c..694032c8e6 100644 --- a/src/backends/reference/workloads/RefConvertFp32ToBf16Workload.hpp +++ b/src/backends/reference/workloads/RefConvertFp32ToBf16Workload.hpp @@ -15,7 +15,10 @@ class RefConvertFp32ToBf16Workload : public Float32ToBFloat16Workload<ConvertFp3 { public: using Float32ToBFloat16Workload<ConvertFp32ToBf16QueueDescriptor>::Float32ToBFloat16Workload; - virtual void Execute() const override; + void Execute() const override; + void ExecuteAsync(WorkingMemDescriptor& workingMemDescriptor) override; +private: + void Execute(std::vector<ITensorHandle*> inputs, std::vector<ITensorHandle*> outputs) const; }; } //namespace armnn diff --git a/src/backends/reference/workloads/RefConvertFp32ToFp16Workload.cpp b/src/backends/reference/workloads/RefConvertFp32ToFp16Workload.cpp index c68960fad2..be13458d89 100644 --- a/src/backends/reference/workloads/RefConvertFp32ToFp16Workload.cpp +++ b/src/backends/reference/workloads/RefConvertFp32ToFp16Workload.cpp @@ -16,13 +16,24 @@ namespace armnn void RefConvertFp32ToFp16Workload::Execute() const { + Execute(m_Data.m_Inputs, m_Data.m_Outputs); +} + +void RefConvertFp32ToFp16Workload::ExecuteAsync(WorkingMemDescriptor &workingMemDescriptor) +{ + Execute(workingMemDescriptor.m_Inputs, workingMemDescriptor.m_Outputs); +} + +void RefConvertFp32ToFp16Workload::Execute(std::vector<ITensorHandle*> inputs, + std::vector<ITensorHandle*> outputs) const +{ ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefConvertFp32ToFp16Workload_Execute"); - const float* const input = GetInputTensorDataFloat(0, m_Data); - Half* const output = GetOutputTensorDataHalf(0, m_Data); + const float* const input = reinterpret_cast<const float*>(inputs[0]->Map()); + Half* const output = reinterpret_cast<Half*>(outputs[0]->Map()); // convert Fp32 input to Fp16 output - unsigned int numElements = GetTensorInfo(m_Data.m_Inputs[0]).GetNumElements(); + unsigned int numElements = GetTensorInfo(inputs[0]).GetNumElements(); armnnUtils::FloatingPointConverter::ConvertFloat32To16(input, numElements, output); } diff --git a/src/backends/reference/workloads/RefConvertFp32ToFp16Workload.hpp b/src/backends/reference/workloads/RefConvertFp32ToFp16Workload.hpp index e1fd8755cb..f1daa54436 100644 --- a/src/backends/reference/workloads/RefConvertFp32ToFp16Workload.hpp +++ b/src/backends/reference/workloads/RefConvertFp32ToFp16Workload.hpp @@ -15,7 +15,10 @@ class RefConvertFp32ToFp16Workload : public Float32ToFloat16Workload<ConvertFp32 { public: using Float32ToFloat16Workload<ConvertFp32ToFp16QueueDescriptor>::Float32ToFloat16Workload; - virtual void Execute() const override; + void Execute() const override; + void ExecuteAsync(WorkingMemDescriptor& workingMemDescriptor) override; +private: + void Execute(std::vector<ITensorHandle*> inputs, std::vector<ITensorHandle*> outputs) const; }; } //namespace armnn diff --git a/src/backends/reference/workloads/RefConvolution2dWorkload.cpp b/src/backends/reference/workloads/RefConvolution2dWorkload.cpp index dad9936f1b..6d0ab413d8 100644 --- a/src/backends/reference/workloads/RefConvolution2dWorkload.cpp +++ b/src/backends/reference/workloads/RefConvolution2dWorkload.cpp @@ -30,24 +30,26 @@ RefConvolution2dWorkload::RefConvolution2dWorkload( } } -void RefConvolution2dWorkload::PostAllocationConfigure() +void RefConvolution2dWorkload::Execute() const { - const TensorInfo& inputInfo = GetTensorInfo(m_Data.m_Inputs[0]); - m_InputShape = inputInfo.GetShape(); - m_InputDecoder = MakeDecoder<float>(inputInfo); + Execute(m_Data.m_Inputs, m_Data.m_Outputs); +} - const TensorInfo& outputInfo = GetTensorInfo(m_Data.m_Outputs[0]); - m_OutputShape = outputInfo.GetShape(); - m_OutputEncoder = MakeEncoder<float>(outputInfo); +void RefConvolution2dWorkload::ExecuteAsync(WorkingMemDescriptor &workingMemDescriptor) +{ + Execute(workingMemDescriptor.m_Inputs, workingMemDescriptor.m_Outputs); } -void RefConvolution2dWorkload::Execute() const { +void RefConvolution2dWorkload::Execute(std::vector<ITensorHandle*> inputs, std::vector<ITensorHandle*> outputs) const { ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefConvolution2dWorkload_Execute"); - m_InputDecoder->Reset(m_Data.m_Inputs[0]->Map()); - m_OutputEncoder->Reset(m_Data.m_Outputs[0]->Map()); + std::unique_ptr<Decoder<float>> inputDecoder = MakeDecoder<float>(GetTensorInfo(inputs[0]), inputs[0]->Map()); + std::unique_ptr<Encoder<float>> outputEncoder = MakeEncoder<float>(GetTensorInfo(outputs[0]), outputs[0]->Map()); + + const TensorShape& inputShape = GetTensorInfo(inputs[0]).GetShape(); + const TensorShape& outputShape = GetTensorInfo(outputs[0]).GetShape(); - Convolve(m_InputShape, *m_InputDecoder, m_OutputShape, *m_OutputEncoder, m_FilterShape, + Convolve(inputShape, *inputDecoder, outputShape, *outputEncoder, m_FilterShape, *m_FilterDecoder, m_Data.m_Parameters.m_BiasEnabled, m_BiasDecoder.get(), m_Data.m_Parameters.m_DataLayout, m_Data.m_Parameters.m_PadTop, m_Data.m_Parameters.m_PadLeft, m_Data.m_Parameters.m_StrideX, m_Data.m_Parameters.m_StrideY, diff --git a/src/backends/reference/workloads/RefConvolution2dWorkload.hpp b/src/backends/reference/workloads/RefConvolution2dWorkload.hpp index b6bdf23ffa..57df3ce6ae 100644 --- a/src/backends/reference/workloads/RefConvolution2dWorkload.hpp +++ b/src/backends/reference/workloads/RefConvolution2dWorkload.hpp @@ -19,21 +19,18 @@ public: explicit RefConvolution2dWorkload(const Convolution2dQueueDescriptor& descriptor, const WorkloadInfo& info); - void PostAllocationConfigure() override; - virtual void Execute() const override; + void Execute() const override; + void ExecuteAsync(WorkingMemDescriptor& workingMemDescriptor) override; private: + void Execute(std::vector<ITensorHandle*> inputs, std::vector<ITensorHandle*> outputs) const; std::unique_ptr<ScopedCpuTensorHandle> m_Weight; std::unique_ptr<ScopedCpuTensorHandle> m_Bias; - std::unique_ptr<Decoder<float>> m_InputDecoder; - std::unique_ptr<Encoder<float>> m_OutputEncoder; std::unique_ptr<Decoder<float>> m_FilterDecoder; std::unique_ptr<Decoder<float>> m_BiasDecoder; - TensorShape m_InputShape; - TensorShape m_OutputShape; TensorShape m_FilterShape; }; diff --git a/src/backends/reference/workloads/RefDebugWorkload.cpp b/src/backends/reference/workloads/RefDebugWorkload.cpp index f9950c8231..b0e19c5851 100644 --- a/src/backends/reference/workloads/RefDebugWorkload.cpp +++ b/src/backends/reference/workloads/RefDebugWorkload.cpp @@ -17,18 +17,30 @@ namespace armnn template<armnn::DataType DataType> void RefDebugWorkload<DataType>::Execute() const { + Execute(m_Data.m_Inputs); +} + +template<armnn::DataType DataType> +void RefDebugWorkload<DataType>::ExecuteAsync(WorkingMemDescriptor &workingMemDescriptor) +{ + Execute(workingMemDescriptor.m_Inputs); +} + +template<armnn::DataType DataType> +void RefDebugWorkload<DataType>::Execute(std::vector<ITensorHandle*> inputs) const +{ using T = ResolveType<DataType>; ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, GetName() + "_Execute"); - const TensorInfo& inputInfo = GetTensorInfo(m_Data.m_Inputs[0]); + const TensorInfo& inputInfo = GetTensorInfo(inputs[0]); const T* inputData = GetInputTensorData<T>(0, m_Data); T* outputData = GetOutputTensorData<T>(0, m_Data); if (m_Callback) { - m_Callback(m_Data.m_Guid, m_Data.m_SlotIndex, m_Data.m_Inputs[0]); + m_Callback(m_Data.m_Guid, m_Data.m_SlotIndex, inputs[0]); } else { diff --git a/src/backends/reference/workloads/RefDebugWorkload.hpp b/src/backends/reference/workloads/RefDebugWorkload.hpp index d7e3cd9947..d0c47dd829 100644 --- a/src/backends/reference/workloads/RefDebugWorkload.hpp +++ b/src/backends/reference/workloads/RefDebugWorkload.hpp @@ -30,10 +30,12 @@ public: using TypedWorkload<DebugQueueDescriptor, DataType>::TypedWorkload; void Execute() const override; + void ExecuteAsync(WorkingMemDescriptor& workingMemDescriptor) override; void RegisterDebugCallback(const DebugCallbackFunction& func) override; private: + void Execute(std::vector<ITensorHandle*> inputs) const; DebugCallbackFunction m_Callback; }; diff --git a/src/backends/reference/workloads/RefDepthToSpaceWorkload.cpp b/src/backends/reference/workloads/RefDepthToSpaceWorkload.cpp index 93c1120a1c..22e35f0ec5 100644 --- a/src/backends/reference/workloads/RefDepthToSpaceWorkload.cpp +++ b/src/backends/reference/workloads/RefDepthToSpaceWorkload.cpp @@ -13,14 +13,24 @@ namespace armnn void RefDepthToSpaceWorkload::Execute() const { + Execute(m_Data.m_Inputs, m_Data.m_Outputs); +} + +void RefDepthToSpaceWorkload::ExecuteAsync(WorkingMemDescriptor &workingMemDescriptor) +{ + Execute(workingMemDescriptor.m_Inputs, workingMemDescriptor.m_Outputs); +} + +void RefDepthToSpaceWorkload::Execute(std::vector<ITensorHandle*> inputs, std::vector<ITensorHandle*> outputs) const +{ ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefDepthToSpaceWorkload_Execute"); - const TensorInfo inputInfo = GetTensorInfo(m_Data.m_Inputs[0]); + const TensorInfo inputInfo = GetTensorInfo(inputs[0]); DepthToSpace(inputInfo, m_Data.m_Parameters, - m_Data.m_Inputs[0]->Map(), - m_Data.m_Outputs[0]->Map(), + inputs[0]->Map(), + outputs[0]->Map(), GetDataTypeSize(inputInfo.GetDataType())); } diff --git a/src/backends/reference/workloads/RefDepthToSpaceWorkload.hpp b/src/backends/reference/workloads/RefDepthToSpaceWorkload.hpp index a30fadc3e9..ec260a92f7 100644 --- a/src/backends/reference/workloads/RefDepthToSpaceWorkload.hpp +++ b/src/backends/reference/workloads/RefDepthToSpaceWorkload.hpp @@ -14,7 +14,10 @@ class RefDepthToSpaceWorkload : public BaseWorkload<DepthToSpaceQueueDescriptor> { public: using BaseWorkload<DepthToSpaceQueueDescriptor>::BaseWorkload; - virtual void Execute() const override; + void Execute() const override; + void ExecuteAsync(WorkingMemDescriptor& workingMemDescriptor) override; +private: + void Execute(std::vector<ITensorHandle*> inputs, std::vector<ITensorHandle*> outputs) const; }; } // namespace armnn diff --git a/src/backends/reference/workloads/RefDepthwiseConvolution2dWorkload.cpp b/src/backends/reference/workloads/RefDepthwiseConvolution2dWorkload.cpp index cfc81ce203..8fe5dec7d1 100644 --- a/src/backends/reference/workloads/RefDepthwiseConvolution2dWorkload.cpp +++ b/src/backends/reference/workloads/RefDepthwiseConvolution2dWorkload.cpp @@ -32,26 +32,29 @@ RefDepthwiseConvolution2dWorkload::RefDepthwiseConvolution2dWorkload( } } -void RefDepthwiseConvolution2dWorkload::PostAllocationConfigure() +void RefDepthwiseConvolution2dWorkload::Execute() const { - const TensorInfo& inputInfo = GetTensorInfo(m_Data.m_Inputs[0]); - m_InputShape = inputInfo.GetShape(); - m_InputDecoder = MakeDecoder<float>(inputInfo); + Execute(m_Data.m_Inputs, m_Data.m_Outputs); +} - const TensorInfo& outputInfo = GetTensorInfo(m_Data.m_Outputs[0]); - m_OutputShape = outputInfo.GetShape(); - m_OutputEncoder = MakeEncoder<float>(outputInfo); +void RefDepthwiseConvolution2dWorkload::ExecuteAsync(WorkingMemDescriptor &workingMemDescriptor) +{ + Execute(workingMemDescriptor.m_Inputs, workingMemDescriptor.m_Outputs); } -void RefDepthwiseConvolution2dWorkload::Execute() const +void RefDepthwiseConvolution2dWorkload::Execute(std::vector<ITensorHandle*> inputs, + std::vector<ITensorHandle*> outputs) const { ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefDepthwiseConvolution2dWorkload_Execute"); std::unique_ptr<Decoder<float>> pBiasDecoder{}; - m_InputDecoder->Reset(m_Data.m_Inputs[0]->Map()); - m_OutputEncoder->Reset(m_Data.m_Outputs[0]->Map()); + std::unique_ptr<Decoder<float>> inputDecoder = MakeDecoder<float>(GetTensorInfo(inputs[0]), inputs[0]->Map()); + std::unique_ptr<Encoder<float>> OutputEncoder = MakeEncoder<float>(GetTensorInfo(outputs[0]), outputs[0]->Map()); + + const TensorShape& inputShape = GetTensorInfo(inputs[0]).GetShape(); + const TensorShape& outputShape = GetTensorInfo(outputs[0]).GetShape(); - Convolve(m_InputShape, *m_InputDecoder, m_OutputShape, *m_OutputEncoder, + Convolve(inputShape, *inputDecoder, outputShape, *OutputEncoder, m_FilterShape, *m_FilterDecoder, m_Data.m_Parameters.m_BiasEnabled, m_BiasDecoder.get(), m_Data.m_Parameters.m_DataLayout, m_Data.m_Parameters.m_PadTop, m_Data.m_Parameters.m_PadLeft, m_Data.m_Parameters.m_StrideX, m_Data.m_Parameters.m_StrideY, diff --git a/src/backends/reference/workloads/RefDepthwiseConvolution2dWorkload.hpp b/src/backends/reference/workloads/RefDepthwiseConvolution2dWorkload.hpp index 6d7037f660..65a8fd76cf 100644 --- a/src/backends/reference/workloads/RefDepthwiseConvolution2dWorkload.hpp +++ b/src/backends/reference/workloads/RefDepthwiseConvolution2dWorkload.hpp @@ -17,22 +17,19 @@ public: explicit RefDepthwiseConvolution2dWorkload(const DepthwiseConvolution2dQueueDescriptor &descriptor, const WorkloadInfo &info); - void PostAllocationConfigure() override; - virtual void Execute() const override; + void Execute() const override; + void ExecuteAsync(WorkingMemDescriptor& workingMemDescriptor) override; private: + void Execute(std::vector<ITensorHandle*> inputs, std::vector<ITensorHandle*> outputs) const; std::unique_ptr <ScopedCpuTensorHandle> m_Weight; std::unique_ptr <ScopedCpuTensorHandle> m_Bias; - std::unique_ptr <Decoder<float>> m_InputDecoder; - std::unique_ptr <Encoder<float>> m_OutputEncoder; std::unique_ptr <Decoder<float>> m_FilterDecoder; std::unique_ptr <Decoder<float>> m_BiasDecoder; - TensorShape m_InputShape; - TensorShape m_OutputShape; TensorShape m_FilterShape; }; diff --git a/src/backends/reference/workloads/RefDequantizeWorkload.cpp b/src/backends/reference/workloads/RefDequantizeWorkload.cpp index d6e4964a49..f9d80073b0 100644 --- a/src/backends/reference/workloads/RefDequantizeWorkload.cpp +++ b/src/backends/reference/workloads/RefDequantizeWorkload.cpp @@ -14,13 +14,23 @@ namespace armnn void RefDequantizeWorkload::Execute() const { + Execute(m_Data.m_Inputs, m_Data.m_Outputs); +} + +void RefDequantizeWorkload::ExecuteAsync(WorkingMemDescriptor &workingMemDescriptor) +{ + Execute(workingMemDescriptor.m_Inputs, workingMemDescriptor.m_Outputs); +} + +void RefDequantizeWorkload::Execute(std::vector<ITensorHandle*> inputs, std::vector<ITensorHandle*> outputs) const +{ ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefDequantizeWorkload_Execute"); - const TensorInfo& inputInfo = GetTensorInfo(m_Data.m_Inputs[0]); - const TensorInfo& outputInfo = GetTensorInfo(m_Data.m_Outputs[0]); + const TensorInfo& inputInfo = GetTensorInfo(inputs[0]); + const TensorInfo& outputInfo = GetTensorInfo(outputs[0]); - auto inputDecoder = MakeDecoder<float>(inputInfo, m_Data.m_Inputs[0]->Map()); - auto outputEncoder = MakeEncoder<float>(outputInfo, m_Data.m_Outputs[0]->Map()); + auto inputDecoder = MakeDecoder<float>(inputInfo, inputs[0]->Map()); + auto outputEncoder = MakeEncoder<float>(outputInfo, outputs[0]->Map()); Dequantize(*inputDecoder, *outputEncoder, inputInfo, outputInfo); } diff --git a/src/backends/reference/workloads/RefDequantizeWorkload.hpp b/src/backends/reference/workloads/RefDequantizeWorkload.hpp index 691f713076..922d57c556 100644 --- a/src/backends/reference/workloads/RefDequantizeWorkload.hpp +++ b/src/backends/reference/workloads/RefDequantizeWorkload.hpp @@ -17,6 +17,9 @@ public: using BaseWorkload<DequantizeQueueDescriptor>::BaseWorkload; void Execute() const override; + void ExecuteAsync(WorkingMemDescriptor& workingMemDescriptor) override; +private: + void Execute(std::vector<ITensorHandle*> inputs, std::vector<ITensorHandle*> outputs) const; }; } // namespace armnn diff --git a/src/backends/reference/workloads/RefDetectionPostProcessWorkload.cpp b/src/backends/reference/workloads/RefDetectionPostProcessWorkload.cpp index b9817ba1ea..25c326ad37 100644 --- a/src/backends/reference/workloads/RefDetectionPostProcessWorkload.cpp +++ b/src/backends/reference/workloads/RefDetectionPostProcessWorkload.cpp @@ -20,19 +20,30 @@ RefDetectionPostProcessWorkload::RefDetectionPostProcessWorkload( void RefDetectionPostProcessWorkload::Execute() const { + Execute(m_Data.m_Inputs, m_Data.m_Outputs); +} + +void RefDetectionPostProcessWorkload::ExecuteAsync(WorkingMemDescriptor &workingMemDescriptor) +{ + Execute(workingMemDescriptor.m_Inputs, workingMemDescriptor.m_Outputs); +} + +void RefDetectionPostProcessWorkload::Execute(std::vector<ITensorHandle*> inputs, + std::vector<ITensorHandle*> outputs) const +{ ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefDetectionPostProcessWorkload_Execute"); - const TensorInfo& boxEncodingsInfo = GetTensorInfo(m_Data.m_Inputs[0]); - const TensorInfo& scoresInfo = GetTensorInfo(m_Data.m_Inputs[1]); + const TensorInfo& boxEncodingsInfo = GetTensorInfo(inputs[0]); + const TensorInfo& scoresInfo = GetTensorInfo(inputs[1]); const TensorInfo& anchorsInfo = m_Anchors->GetTensorInfo(); - const TensorInfo& detectionBoxesInfo = GetTensorInfo(m_Data.m_Outputs[0]); - const TensorInfo& detectionClassesInfo = GetTensorInfo(m_Data.m_Outputs[1]); - const TensorInfo& detectionScoresInfo = GetTensorInfo(m_Data.m_Outputs[2]); - const TensorInfo& numDetectionsInfo = GetTensorInfo(m_Data.m_Outputs[3]); + const TensorInfo& detectionBoxesInfo = GetTensorInfo(outputs[0]); + const TensorInfo& detectionClassesInfo = GetTensorInfo(outputs[1]); + const TensorInfo& detectionScoresInfo = GetTensorInfo(outputs[2]); + const TensorInfo& numDetectionsInfo = GetTensorInfo(outputs[3]); - auto boxEncodings = MakeDecoder<float>(boxEncodingsInfo, m_Data.m_Inputs[0]->Map()); - auto scores = MakeDecoder<float>(scoresInfo, m_Data.m_Inputs[1]->Map()); + auto boxEncodings = MakeDecoder<float>(boxEncodingsInfo, inputs[0]->Map()); + auto scores = MakeDecoder<float>(scoresInfo, inputs[1]->Map()); auto anchors = MakeDecoder<float>(anchorsInfo, m_Anchors->Map(false)); float* detectionBoxes = GetOutputTensorData<float>(0, m_Data); diff --git a/src/backends/reference/workloads/RefDetectionPostProcessWorkload.hpp b/src/backends/reference/workloads/RefDetectionPostProcessWorkload.hpp index 799d0c6219..007dcea456 100644 --- a/src/backends/reference/workloads/RefDetectionPostProcessWorkload.hpp +++ b/src/backends/reference/workloads/RefDetectionPostProcessWorkload.hpp @@ -16,9 +16,11 @@ class RefDetectionPostProcessWorkload : public BaseWorkload<DetectionPostProcess public: explicit RefDetectionPostProcessWorkload(const DetectionPostProcessQueueDescriptor& descriptor, const WorkloadInfo& info); - virtual void Execute() const override; + void Execute() const override; + void ExecuteAsync(WorkingMemDescriptor& workingMemDescriptor) override; private: + void Execute(std::vector<ITensorHandle*> inputs, std::vector<ITensorHandle*> outputs) const; std::unique_ptr<ScopedCpuTensorHandle> m_Anchors; }; diff --git a/src/backends/reference/workloads/RefElementwiseUnaryWorkload.cpp b/src/backends/reference/workloads/RefElementwiseUnaryWorkload.cpp index 4fbb0d123f..b442f25c2a 100644 --- a/src/backends/reference/workloads/RefElementwiseUnaryWorkload.cpp +++ b/src/backends/reference/workloads/RefElementwiseUnaryWorkload.cpp @@ -28,28 +28,29 @@ RefElementwiseUnaryWorkload::RefElementwiseUnaryWorkload(const ElementwiseUnaryQ : BaseWorkload<ElementwiseUnaryQueueDescriptor>(desc, info) {} -void RefElementwiseUnaryWorkload::PostAllocationConfigure() +void RefElementwiseUnaryWorkload::Execute() const { - const TensorInfo& inputInfo = GetTensorInfo(m_Data.m_Inputs[0]); - const TensorInfo& outputInfo = GetTensorInfo(m_Data.m_Outputs[0]); + Execute(m_Data.m_Inputs, m_Data.m_Outputs); +} - m_Input = MakeDecoder<InType>(inputInfo); +void RefElementwiseUnaryWorkload::ExecuteAsync(WorkingMemDescriptor &workingMemDescriptor) +{ - m_Output = MakeEncoder<OutType>(outputInfo); + Execute(workingMemDescriptor.m_Inputs, workingMemDescriptor.m_Outputs); } -void RefElementwiseUnaryWorkload::Execute() const +void RefElementwiseUnaryWorkload::Execute(std::vector<ITensorHandle*> inputs, std::vector<ITensorHandle*> outputs) const { ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefElementwiseUnaryWorkload_Execute"); - const TensorInfo& inputInfo = GetTensorInfo(m_Data.m_Inputs[0]); - const TensorInfo& outputInfo = GetTensorInfo(m_Data.m_Outputs[0]); + const TensorInfo& inputInfo = GetTensorInfo(inputs[0]); + const TensorInfo& outputInfo = GetTensorInfo(outputs[0]); const TensorShape& inShape = inputInfo.GetShape(); const TensorShape& outShape = outputInfo.GetShape(); - m_Input->Reset(m_Data.m_Inputs[0]->Map()); - m_Output->Reset(m_Data.m_Outputs[0]->Map()); + std::unique_ptr<Decoder<InType>> input = MakeDecoder<InType>(inputInfo, inputs[0]->Map()); + std::unique_ptr<Encoder<OutType>> output= MakeEncoder<OutType>(outputInfo, outputs[0]->Map()); using AbsFunction = ElementwiseUnaryFunction<abs<InType>>; using ExpFunction = ElementwiseUnaryFunction<exp<InType>>; @@ -61,27 +62,27 @@ void RefElementwiseUnaryWorkload::Execute() const { case UnaryOperation::Abs: { - AbsFunction(inShape, outShape, *m_Input, *m_Output); + AbsFunction(inShape, outShape, *input, *output); break; } case UnaryOperation::Exp: { - ExpFunction(inShape, outShape, *m_Input, *m_Output); + ExpFunction(inShape, outShape, *input, *output); break; } case UnaryOperation::Neg: { - NegFunction(inShape, outShape, *m_Input, *m_Output); + NegFunction(inShape, outShape, *input, *output); break; } case UnaryOperation::Rsqrt: { - RsqrtFunction(inShape, outShape, *m_Input, *m_Output); + RsqrtFunction(inShape, outShape, *input, *output); break; } case UnaryOperation::Sqrt: { - SqrtFunction(inShape, outShape, *m_Input, *m_Output); + SqrtFunction(inShape, outShape, *input, *output); break; } default: diff --git a/src/backends/reference/workloads/RefElementwiseUnaryWorkload.hpp b/src/backends/reference/workloads/RefElementwiseUnaryWorkload.hpp index efb2865ebd..d05347bbe5 100644 --- a/src/backends/reference/workloads/RefElementwiseUnaryWorkload.hpp +++ b/src/backends/reference/workloads/RefElementwiseUnaryWorkload.hpp @@ -19,15 +19,13 @@ public: using BaseWorkload<ElementwiseUnaryQueueDescriptor>::m_Data; RefElementwiseUnaryWorkload(const ElementwiseUnaryQueueDescriptor& descriptor, const WorkloadInfo& info); - void PostAllocationConfigure() override; void Execute() const override; + void ExecuteAsync(WorkingMemDescriptor& workingMemDescriptor) override; private: + void Execute(std::vector<ITensorHandle*> inputs, std::vector<ITensorHandle*> outputs) const; using InType = float; using OutType = float; - - std::unique_ptr<Decoder<InType>> m_Input; - std::unique_ptr<Encoder<OutType>> m_Output; }; } // namespace armnn diff --git a/src/backends/reference/workloads/RefElementwiseWorkload.cpp b/src/backends/reference/workloads/RefElementwiseWorkload.cpp index 60acbd6252..dd7d325ca5 100644 --- a/src/backends/reference/workloads/RefElementwiseWorkload.cpp +++ b/src/backends/reference/workloads/RefElementwiseWorkload.cpp @@ -26,39 +26,41 @@ RefElementwiseWorkload<Functor, ParentDescriptor, DebugString>::RefElementwiseWo } template <typename Functor, typename ParentDescriptor, typename armnn::StringMapping::Id DebugString> -void RefElementwiseWorkload<Functor, ParentDescriptor, DebugString>::PostAllocationConfigure() +void RefElementwiseWorkload<Functor, ParentDescriptor, DebugString>::Execute() const { - const TensorInfo& inputInfo0 = GetTensorInfo(m_Data.m_Inputs[0]); - const TensorInfo& inputInfo1 = GetTensorInfo(m_Data.m_Inputs[1]); - const TensorInfo& outputInfo = GetTensorInfo(m_Data.m_Outputs[0]); + Execute(m_Data.m_Inputs, m_Data.m_Outputs); +} - m_Input0 = MakeDecoder<InType>(inputInfo0); - m_Input1 = MakeDecoder<InType>(inputInfo1); - m_Output = MakeEncoder<OutType>(outputInfo); +template <typename Functor, typename ParentDescriptor, typename armnn::StringMapping::Id DebugString> +void RefElementwiseWorkload<Functor, ParentDescriptor, DebugString>::ExecuteAsync( + WorkingMemDescriptor &workingMemDescriptor) +{ + Execute(workingMemDescriptor.m_Inputs, workingMemDescriptor.m_Outputs); } template <typename Functor, typename ParentDescriptor, typename armnn::StringMapping::Id DebugString> -void RefElementwiseWorkload<Functor, ParentDescriptor, DebugString>::Execute() const +void RefElementwiseWorkload<Functor, ParentDescriptor, DebugString>::Execute( + std::vector<ITensorHandle*> inputs, std::vector<ITensorHandle*> outputs) const { ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, StringMapping::Instance().Get(DebugString)); - const TensorInfo& inputInfo0 = GetTensorInfo(m_Data.m_Inputs[0]); - const TensorInfo& inputInfo1 = GetTensorInfo(m_Data.m_Inputs[1]); - const TensorInfo& outputInfo = GetTensorInfo(m_Data.m_Outputs[0]); + const TensorInfo& inputInfo0 = GetTensorInfo(inputs[0]); + const TensorInfo& inputInfo1 = GetTensorInfo(inputs[1]); + const TensorInfo& outputInfo = GetTensorInfo(outputs[0]); const TensorShape& inShape0 = inputInfo0.GetShape(); const TensorShape& inShape1 = inputInfo1.GetShape(); const TensorShape& outShape = outputInfo.GetShape(); - m_Input0->Reset(m_Data.m_Inputs[0]->Map()); - m_Input1->Reset(m_Data.m_Inputs[1]->Map()); - m_Output->Reset(m_Data.m_Outputs[0]->Map()); + std::unique_ptr<Decoder<InType>> input0 = MakeDecoder<InType>(inputInfo0, inputs[0]->Map()); + std::unique_ptr<Decoder<InType>> input1 = MakeDecoder<InType>(inputInfo1, inputs[1]->Map()); + std::unique_ptr<Encoder<OutType>> output= MakeEncoder<OutType>(outputInfo, outputs[0]->Map()); ElementwiseBinaryFunction<Functor>(inShape0, inShape1, outShape, - *m_Input0, - *m_Input1, - *m_Output); + *input0, + *input1, + *output); } } //namespace armnn diff --git a/src/backends/reference/workloads/RefElementwiseWorkload.hpp b/src/backends/reference/workloads/RefElementwiseWorkload.hpp index 03683b1a06..4dc4b5ba5a 100644 --- a/src/backends/reference/workloads/RefElementwiseWorkload.hpp +++ b/src/backends/reference/workloads/RefElementwiseWorkload.hpp @@ -26,13 +26,11 @@ public: using BaseWorkload<ParentDescriptor>::m_Data; RefElementwiseWorkload(const ParentDescriptor& descriptor, const WorkloadInfo& info); - void PostAllocationConfigure() override; void Execute() const override; + void ExecuteAsync(WorkingMemDescriptor& workingMemDescriptor) override; private: - std::unique_ptr<Decoder<InType>> m_Input0; - std::unique_ptr<Decoder<InType>> m_Input1; - std::unique_ptr<Encoder<OutType>> m_Output; + void Execute(std::vector<ITensorHandle*> inputs, std::vector<ITensorHandle*> outputs) const; }; template <typename DataType = float> diff --git a/src/backends/reference/workloads/RefFakeQuantizationFloat32Workload.cpp b/src/backends/reference/workloads/RefFakeQuantizationFloat32Workload.cpp index cf355d35d2..b30811b8ed 100644 --- a/src/backends/reference/workloads/RefFakeQuantizationFloat32Workload.cpp +++ b/src/backends/reference/workloads/RefFakeQuantizationFloat32Workload.cpp @@ -28,12 +28,23 @@ void FakeQuantization(const float* inputData, float* outputData, uint32_t numEle void RefFakeQuantizationFloat32Workload::Execute() const { + Execute(m_Data.m_Inputs, m_Data.m_Outputs); +} + +void RefFakeQuantizationFloat32Workload::ExecuteAsync(WorkingMemDescriptor &workingMemDescriptor) +{ + Execute(workingMemDescriptor.m_Inputs, workingMemDescriptor.m_Outputs); +} + +void RefFakeQuantizationFloat32Workload::Execute(std::vector<ITensorHandle*> inputs, + std::vector<ITensorHandle*> outputs) const +{ ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefFakeQuantizationFloat32Workload_Execute"); - const TensorInfo& inputInfo = GetTensorInfo(m_Data.m_Inputs[0]); + const TensorInfo& inputInfo = GetTensorInfo(inputs[0]); - const float* inputData = GetInputTensorDataFloat(0, m_Data); - float* outputData = GetOutputTensorDataFloat(0, m_Data); + const float* inputData = reinterpret_cast<const float*>(inputs[0]->Map()); + float* outputData = reinterpret_cast<float*>(outputs[0]->Map()); FakeQuantization(inputData, outputData, inputInfo.GetNumElements(), m_Data.m_Parameters.m_Min, m_Data.m_Parameters.m_Max); diff --git a/src/backends/reference/workloads/RefFakeQuantizationFloat32Workload.hpp b/src/backends/reference/workloads/RefFakeQuantizationFloat32Workload.hpp index 269ca08d2a..8f6cabb3fe 100644 --- a/src/backends/reference/workloads/RefFakeQuantizationFloat32Workload.hpp +++ b/src/backends/reference/workloads/RefFakeQuantizationFloat32Workload.hpp @@ -15,7 +15,10 @@ class RefFakeQuantizationFloat32Workload : public Float32Workload<FakeQuantizati { public: using Float32Workload<FakeQuantizationQueueDescriptor>::Float32Workload; - virtual void Execute() const override; + void Execute() const override; + void ExecuteAsync(WorkingMemDescriptor& workingMemDescriptor) override; +private: + void Execute(std::vector<ITensorHandle*> inputs, std::vector<ITensorHandle*> outputs) const; }; } //namespace armnn diff --git a/src/backends/reference/workloads/RefFillWorkload.cpp b/src/backends/reference/workloads/RefFillWorkload.cpp index 991ab45396..ea1ca87caf 100644 --- a/src/backends/reference/workloads/RefFillWorkload.cpp +++ b/src/backends/reference/workloads/RefFillWorkload.cpp @@ -16,11 +16,21 @@ namespace armnn void RefFillWorkload::Execute() const { + Execute(m_Data.m_Outputs); +} + +void RefFillWorkload::ExecuteAsync(WorkingMemDescriptor &workingMemDescriptor) +{ + Execute(workingMemDescriptor.m_Outputs); +} + +void RefFillWorkload::Execute(std::vector<ITensorHandle*> outputs) const +{ ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefFillWorkload_Execute"); - const TensorInfo &outputTensorInfo = GetTensorInfo(m_Data.m_Outputs[0]); + const TensorInfo &outputTensorInfo = GetTensorInfo(outputs[0]); - std::unique_ptr<Encoder<float>> encoderPtr = MakeEncoder<float>(outputTensorInfo, m_Data.m_Outputs[0]->Map()); + std::unique_ptr<Encoder<float>> encoderPtr = MakeEncoder<float>(outputTensorInfo, outputs[0]->Map()); Encoder<float> &encoder = *encoderPtr; Fill(encoder, outputTensorInfo.GetShape(), m_Data.m_Parameters.m_Value); diff --git a/src/backends/reference/workloads/RefFillWorkload.hpp b/src/backends/reference/workloads/RefFillWorkload.hpp index 9be773c50b..e92514d865 100644 --- a/src/backends/reference/workloads/RefFillWorkload.hpp +++ b/src/backends/reference/workloads/RefFillWorkload.hpp @@ -15,7 +15,10 @@ class RefFillWorkload : public BaseWorkload<FillQueueDescriptor> { public: using BaseWorkload<FillQueueDescriptor>::BaseWorkload; - virtual void Execute() const override; + void Execute() const override; + void ExecuteAsync(WorkingMemDescriptor& workingMemDescriptor) override; +private: + void Execute(std::vector<ITensorHandle*> outputs) const; }; } //namespace armnn diff --git a/src/backends/reference/workloads/RefFloorWorkload.cpp b/src/backends/reference/workloads/RefFloorWorkload.cpp index 0c61386b9a..e7bd50ddea 100644 --- a/src/backends/reference/workloads/RefFloorWorkload.cpp +++ b/src/backends/reference/workloads/RefFloorWorkload.cpp @@ -15,17 +15,27 @@ namespace armnn void RefFloorWorkload::Execute() const { + Execute(m_Data.m_Inputs, m_Data.m_Outputs); +} + +void RefFloorWorkload::ExecuteAsync(WorkingMemDescriptor &workingMemDescriptor) +{ + Execute(workingMemDescriptor.m_Inputs, workingMemDescriptor.m_Outputs); +} + +void RefFloorWorkload::Execute(std::vector<ITensorHandle*> inputs, std::vector<ITensorHandle*> outputs) const +{ ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefFloorFloat32Workload_Execute"); - const TensorInfo &inputTensorInfo = GetTensorInfo(m_Data.m_Inputs[0]); - std::unique_ptr<Decoder<float>> decoderPtr = MakeDecoder<float>(inputTensorInfo, m_Data.m_Inputs[0]->Map()); + const TensorInfo &inputTensorInfo = GetTensorInfo(inputs[0]); + std::unique_ptr<Decoder<float>> decoderPtr = MakeDecoder<float>(inputTensorInfo, inputs[0]->Map()); Decoder<float> &decoder = *decoderPtr; - const TensorInfo &outputTensorInfo = GetTensorInfo(m_Data.m_Outputs[0]); - std::unique_ptr<Encoder<float>> encoderPtr = MakeEncoder<float>(outputTensorInfo, m_Data.m_Outputs[0]->Map()); + const TensorInfo &outputTensorInfo = GetTensorInfo(outputs[0]); + std::unique_ptr<Encoder<float>> encoderPtr = MakeEncoder<float>(outputTensorInfo, outputs[0]->Map()); Encoder<float> &encoder = *encoderPtr; - unsigned int numElements = GetTensorInfo(m_Data.m_Inputs[0]).GetNumElements(); + unsigned int numElements = GetTensorInfo(inputs[0]).GetNumElements(); for (unsigned int i = 0; i < numElements; ++i) { diff --git a/src/backends/reference/workloads/RefFloorWorkload.hpp b/src/backends/reference/workloads/RefFloorWorkload.hpp index 563640228d..28b2695c82 100644 --- a/src/backends/reference/workloads/RefFloorWorkload.hpp +++ b/src/backends/reference/workloads/RefFloorWorkload.hpp @@ -15,7 +15,10 @@ class RefFloorWorkload : public BaseWorkload<FloorQueueDescriptor> { public: using BaseWorkload<FloorQueueDescriptor>::BaseWorkload; - virtual void Execute() const override; + void Execute() const override; + void ExecuteAsync(WorkingMemDescriptor& workingMemDescriptor) override; +private: + void Execute(std::vector<ITensorHandle*> inputs, std::vector<ITensorHandle*> outputs) const; }; } //namespace armnn diff --git a/src/backends/reference/workloads/RefFullyConnectedWorkload.cpp b/src/backends/reference/workloads/RefFullyConnectedWorkload.cpp index 49e105f206..deb56d4c6b 100644 --- a/src/backends/reference/workloads/RefFullyConnectedWorkload.cpp +++ b/src/backends/reference/workloads/RefFullyConnectedWorkload.cpp @@ -34,28 +34,32 @@ RefFullyConnectedWorkload::RefFullyConnectedWorkload( void RefFullyConnectedWorkload::PostAllocationConfigure() { - const TensorInfo& inputInfo = GetTensorInfo(m_Data.m_Inputs[0]); + PostAllocationConfigure(m_Data.m_Inputs, m_Data.m_Outputs); +} + +void RefFullyConnectedWorkload::PostAllocationConfigure(std::vector<ITensorHandle*> inputs, + std::vector<ITensorHandle*> outputs) +{ + const TensorInfo& inputInfo = GetTensorInfo(inputs[0]); ARMNN_ASSERT(inputInfo.GetNumDimensions() > 1); m_InputShape = inputInfo.GetShape(); - m_InputDecoder = MakeDecoder<float>(inputInfo); if (!m_Data.m_Parameters.m_ConstantWeights) { - const TensorInfo& rWeightInfo = GetTensorInfo(m_Data.m_Inputs[1]); + const TensorInfo& rWeightInfo = GetTensorInfo(inputs[1]); ARMNN_ASSERT(inputInfo.GetNumDimensions() > 1); m_WeightShape = rWeightInfo.GetShape(); m_WeightDecoder = MakeDecoder<float>(rWeightInfo); if (m_Data.m_Parameters.m_BiasEnabled) { - const TensorInfo& biasInfo = GetTensorInfo(m_Data.m_Inputs[2]); + const TensorInfo& biasInfo = GetTensorInfo(inputs[2]); m_BiasDecoder = MakeDecoder<float>(biasInfo); } } - const TensorInfo& outputInfo = GetTensorInfo(m_Data.m_Outputs[0]); + const TensorInfo& outputInfo = GetTensorInfo(outputs[0]); m_OutputShape = outputInfo.GetShape(); - m_OutputEncoder = MakeEncoder<float>(outputInfo); m_NumActivations = 1; // Total number of activations in the input. for (unsigned int i = 1; i < inputInfo.GetNumDimensions(); i++) @@ -66,23 +70,36 @@ void RefFullyConnectedWorkload::PostAllocationConfigure() void RefFullyConnectedWorkload::Execute() const { + Execute(m_Data.m_Inputs, m_Data.m_Outputs); +} + +void RefFullyConnectedWorkload::ExecuteAsync(WorkingMemDescriptor &workingMemDescriptor) +{ + PostAllocationConfigure(workingMemDescriptor.m_Inputs, workingMemDescriptor.m_Outputs); + + Execute(workingMemDescriptor.m_Inputs, workingMemDescriptor.m_Outputs); +} + +void RefFullyConnectedWorkload::Execute(std::vector<ITensorHandle*> inputs, std::vector<ITensorHandle*> outputs) const +{ ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefFullyConnectedWorkload_Execute"); - m_InputDecoder->Reset(m_Data.m_Inputs[0]->Map()); + std::unique_ptr<Decoder<float>> inputDecoder = MakeDecoder<float>(GetTensorInfo(inputs[0]), inputs[0]->Map()); + std::unique_ptr<Encoder<float>> OutputEncoder = MakeEncoder<float>(GetTensorInfo(outputs[0]), outputs[0]->Map()); + if (!m_Data.m_Parameters.m_ConstantWeights) { - m_WeightDecoder->Reset(m_Data.m_Inputs[1]->Map()); + m_WeightDecoder->Reset(inputs[1]->Map()); if (m_Data.m_Parameters.m_BiasEnabled) { - m_BiasDecoder->Reset(m_Data.m_Inputs[2]->Map()); + m_BiasDecoder->Reset(inputs[2]->Map()); } } - m_OutputEncoder->Reset(m_Data.m_Outputs[0]->Map()); FullyConnected(m_InputShape, - *m_InputDecoder, + *inputDecoder, m_OutputShape, - *m_OutputEncoder, + *OutputEncoder, m_WeightShape, *m_WeightDecoder, *m_BiasDecoder, diff --git a/src/backends/reference/workloads/RefFullyConnectedWorkload.hpp b/src/backends/reference/workloads/RefFullyConnectedWorkload.hpp index a8f0756223..5c0f67ebaf 100644 --- a/src/backends/reference/workloads/RefFullyConnectedWorkload.hpp +++ b/src/backends/reference/workloads/RefFullyConnectedWorkload.hpp @@ -23,14 +23,15 @@ public: void PostAllocationConfigure() override; - virtual void Execute() const override; + void Execute() const override; + void ExecuteAsync(WorkingMemDescriptor& workingMemDescriptor) override; private: + void PostAllocationConfigure(std::vector<ITensorHandle*> inputs, std::vector<ITensorHandle*> outputs); + void Execute(std::vector<ITensorHandle*> inputs, std::vector<ITensorHandle*> outputs) const; std::unique_ptr<ScopedCpuTensorHandle> m_Weight; std::unique_ptr<ScopedCpuTensorHandle> m_Bias; - std::unique_ptr<Decoder<float>> m_InputDecoder; - std::unique_ptr<Encoder<float>> m_OutputEncoder; std::unique_ptr<Decoder<float>> m_WeightDecoder; std::unique_ptr<Decoder<float>> m_BiasDecoder; diff --git a/src/backends/reference/workloads/RefGatherWorkload.cpp b/src/backends/reference/workloads/RefGatherWorkload.cpp index eaeed61b0a..020c067cfb 100644 --- a/src/backends/reference/workloads/RefGatherWorkload.cpp +++ b/src/backends/reference/workloads/RefGatherWorkload.cpp @@ -15,18 +15,28 @@ namespace armnn void RefGatherWorkload::Execute() const { + Execute(m_Data.m_Inputs, m_Data.m_Outputs); +} + +void RefGatherWorkload::ExecuteAsync(WorkingMemDescriptor &workingMemDescriptor) +{ + Execute(workingMemDescriptor.m_Inputs, workingMemDescriptor.m_Outputs); +} + +void RefGatherWorkload::Execute(std::vector<ITensorHandle*> inputs, std::vector<ITensorHandle*> outputs) const +{ ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefGatherWorkload_Execute"); - const TensorInfo& inputInfo0 = GetTensorInfo(m_Data.m_Inputs[0]); - const TensorInfo& inputInfo1 = GetTensorInfo(m_Data.m_Inputs[1]); - const TensorInfo& outputInfo = GetTensorInfo(m_Data.m_Outputs[0]); + const TensorInfo& inputInfo0 = GetTensorInfo(inputs[0]); + const TensorInfo& inputInfo1 = GetTensorInfo(inputs[1]); + const TensorInfo& outputInfo = GetTensorInfo(outputs[0]); - std::unique_ptr<Decoder<float>> decoderPtr = MakeDecoder<float>(inputInfo0, m_Data.m_Inputs[0]->Map()); + std::unique_ptr<Decoder<float>> decoderPtr = MakeDecoder<float>(inputInfo0, inputs[0]->Map()); Decoder<float>& decoder = *decoderPtr; const int32_t* indicesData = GetInputTensorData<int32_t>(1, m_Data); - std::unique_ptr<Encoder<float>> encoderPtr = MakeEncoder<float>(outputInfo, m_Data.m_Outputs[0]->Map()); + std::unique_ptr<Encoder<float>> encoderPtr = MakeEncoder<float>(outputInfo, outputs[0]->Map()); Encoder<float>& encoder = *encoderPtr; Gather(inputInfo0, inputInfo1, outputInfo, decoder, indicesData, encoder, m_Data.m_Parameters.m_Axis); diff --git a/src/backends/reference/workloads/RefGatherWorkload.hpp b/src/backends/reference/workloads/RefGatherWorkload.hpp index 30019a8d4d..1664e1611d 100644 --- a/src/backends/reference/workloads/RefGatherWorkload.hpp +++ b/src/backends/reference/workloads/RefGatherWorkload.hpp @@ -21,6 +21,9 @@ class RefGatherWorkload : public BaseWorkload<GatherQueueDescriptor> public: using BaseWorkload<GatherQueueDescriptor>::BaseWorkload; void Execute() const override; + void ExecuteAsync(WorkingMemDescriptor& workingMemDescriptor) override; +private: + void Execute(std::vector<ITensorHandle*> inputs, std::vector<ITensorHandle*> outputs) const; }; } // namespace armnn diff --git a/src/backends/reference/workloads/RefInstanceNormalizationWorkload.cpp b/src/backends/reference/workloads/RefInstanceNormalizationWorkload.cpp index 150f0cb017..daee97ae3e 100644 --- a/src/backends/reference/workloads/RefInstanceNormalizationWorkload.cpp +++ b/src/backends/reference/workloads/RefInstanceNormalizationWorkload.cpp @@ -20,12 +20,23 @@ RefInstanceNormalizationWorkload::RefInstanceNormalizationWorkload( void RefInstanceNormalizationWorkload::Execute() const { + Execute(m_Data.m_Inputs, m_Data.m_Outputs); +} + +void RefInstanceNormalizationWorkload::ExecuteAsync(WorkingMemDescriptor &workingMemDescriptor) +{ + Execute(workingMemDescriptor.m_Inputs, workingMemDescriptor.m_Outputs); +} + +void RefInstanceNormalizationWorkload::Execute(std::vector<ITensorHandle*> inputs, + std::vector<ITensorHandle*> outputs) const +{ ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefInstanceNormalizationWorkload_Execute"); - std::unique_ptr<Decoder<float>> inputDecoder = MakeDecoder<float>(GetTensorInfo(m_Data.m_Inputs[0]), - m_Data.m_Inputs[0]->Map()); - std::unique_ptr<Encoder<float>> outputEncoder = MakeEncoder<float>(GetTensorInfo(m_Data.m_Outputs[0]), - m_Data.m_Outputs[0]->Map()); + std::unique_ptr<Decoder<float>> inputDecoder = MakeDecoder<float>(GetTensorInfo(inputs[0]), + inputs[0]->Map()); + std::unique_ptr<Encoder<float>> outputEncoder = MakeEncoder<float>(GetTensorInfo(outputs[0]), + outputs[0]->Map()); InstanceNorm(m_Data, *inputDecoder, *outputEncoder); } diff --git a/src/backends/reference/workloads/RefInstanceNormalizationWorkload.hpp b/src/backends/reference/workloads/RefInstanceNormalizationWorkload.hpp index 620779f953..e366ddb05b 100644 --- a/src/backends/reference/workloads/RefInstanceNormalizationWorkload.hpp +++ b/src/backends/reference/workloads/RefInstanceNormalizationWorkload.hpp @@ -16,7 +16,10 @@ class RefInstanceNormalizationWorkload : public BaseWorkload<InstanceNormalizati public: explicit RefInstanceNormalizationWorkload(const InstanceNormalizationQueueDescriptor& descriptor, const WorkloadInfo& info); - virtual void Execute() const override; + void Execute() const override; + void ExecuteAsync(WorkingMemDescriptor& workingMemDescriptor) override; +private: + void Execute(std::vector<ITensorHandle*> inputs, std::vector<ITensorHandle*> outputs) const; }; } //namespace armnn diff --git a/src/backends/reference/workloads/RefL2NormalizationWorkload.cpp b/src/backends/reference/workloads/RefL2NormalizationWorkload.cpp index f80901edc9..ca31503620 100644 --- a/src/backends/reference/workloads/RefL2NormalizationWorkload.cpp +++ b/src/backends/reference/workloads/RefL2NormalizationWorkload.cpp @@ -26,13 +26,23 @@ RefL2NormalizationWorkload::RefL2NormalizationWorkload( void RefL2NormalizationWorkload::Execute() const { + Execute(m_Data.m_Inputs, m_Data.m_Outputs); +} + +void RefL2NormalizationWorkload::ExecuteAsync(WorkingMemDescriptor &workingMemDescriptor) +{ + Execute(workingMemDescriptor.m_Inputs, workingMemDescriptor.m_Outputs); +} + +void RefL2NormalizationWorkload::Execute(std::vector<ITensorHandle*> inputs, std::vector<ITensorHandle*> outputs) const +{ ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefL2NormalizationWorkload_Execute"); - const TensorInfo& inputInfo = GetTensorInfo(m_Data.m_Inputs[0]); - const TensorInfo& outputInfo = GetTensorInfo(m_Data.m_Outputs[0]); + const TensorInfo& inputInfo = GetTensorInfo(inputs[0]); + const TensorInfo& outputInfo = GetTensorInfo(outputs[0]); - auto inputDecoder = MakeDecoder<float>(inputInfo, m_Data.m_Inputs[0]->Map()); - auto outputEncoder = MakeEncoder<float>(outputInfo, m_Data.m_Outputs[0]->Map()); + auto inputDecoder = MakeDecoder<float>(inputInfo, inputs[0]->Map()); + auto outputEncoder = MakeEncoder<float>(outputInfo, outputs[0]->Map()); DataLayoutIndexed dataLayout(m_Data.m_Parameters.m_DataLayout); diff --git a/src/backends/reference/workloads/RefL2NormalizationWorkload.hpp b/src/backends/reference/workloads/RefL2NormalizationWorkload.hpp index 4beedc9992..c17767b943 100644 --- a/src/backends/reference/workloads/RefL2NormalizationWorkload.hpp +++ b/src/backends/reference/workloads/RefL2NormalizationWorkload.hpp @@ -18,6 +18,9 @@ public: const WorkloadInfo& info); void Execute() const override; + void ExecuteAsync(WorkingMemDescriptor& workingMemDescriptor) override; +private: + void Execute(std::vector<ITensorHandle*> inputs, std::vector<ITensorHandle*> outputs) const; }; } //namespace armnn diff --git a/src/backends/reference/workloads/RefLogSoftmaxWorkload.cpp b/src/backends/reference/workloads/RefLogSoftmaxWorkload.cpp index a2ace13144..ebe1b1ecfe 100644 --- a/src/backends/reference/workloads/RefLogSoftmaxWorkload.cpp +++ b/src/backends/reference/workloads/RefLogSoftmaxWorkload.cpp @@ -19,13 +19,23 @@ namespace armnn void RefLogSoftmaxWorkload::Execute() const { + Execute(m_Data.m_Inputs, m_Data.m_Outputs); +} + +void RefLogSoftmaxWorkload::ExecuteAsync(WorkingMemDescriptor &workingMemDescriptor) +{ + Execute(workingMemDescriptor.m_Inputs, workingMemDescriptor.m_Outputs); +} + +void RefLogSoftmaxWorkload::Execute(std::vector<ITensorHandle*> inputs, std::vector<ITensorHandle*> outputs) const +{ ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefLogSoftmaxWorkload_Execute"); - const TensorInfo& inputInfo = GetTensorInfo(m_Data.m_Inputs[0]); - const TensorInfo& outputInfo = GetTensorInfo(m_Data.m_Outputs[0]); + const TensorInfo& inputInfo = GetTensorInfo(inputs[0]); + const TensorInfo& outputInfo = GetTensorInfo(outputs[0]); - std::unique_ptr<Decoder<float>> decoder = MakeDecoder<float>(inputInfo, m_Data.m_Inputs[0]->Map()); - std::unique_ptr<Encoder<float>> encoder = MakeEncoder<float>(outputInfo, m_Data.m_Outputs[0]->Map()); + std::unique_ptr<Decoder<float>> decoder = MakeDecoder<float>(inputInfo, inputs[0]->Map()); + std::unique_ptr<Encoder<float>> encoder = MakeEncoder<float>(outputInfo, outputs[0]->Map()); ARMNN_ASSERT(decoder != nullptr); ARMNN_ASSERT(encoder != nullptr); diff --git a/src/backends/reference/workloads/RefLogSoftmaxWorkload.hpp b/src/backends/reference/workloads/RefLogSoftmaxWorkload.hpp index f5048d90b3..c5d5d5b0c9 100644 --- a/src/backends/reference/workloads/RefLogSoftmaxWorkload.hpp +++ b/src/backends/reference/workloads/RefLogSoftmaxWorkload.hpp @@ -15,7 +15,10 @@ class RefLogSoftmaxWorkload : public BaseWorkload<LogSoftmaxQueueDescriptor> { public: using BaseWorkload<LogSoftmaxQueueDescriptor>::BaseWorkload; - virtual void Execute() const override; + void Execute() const override; + void ExecuteAsync(WorkingMemDescriptor& workingMemDescriptor) override; +private: + void Execute(std::vector<ITensorHandle*> inputs, std::vector<ITensorHandle*> outputs) const; }; } // namespace armnn diff --git a/src/backends/reference/workloads/RefLogicalBinaryWorkload.cpp b/src/backends/reference/workloads/RefLogicalBinaryWorkload.cpp index 1b4e8f9aa0..f187e0ca31 100644 --- a/src/backends/reference/workloads/RefLogicalBinaryWorkload.cpp +++ b/src/backends/reference/workloads/RefLogicalBinaryWorkload.cpp @@ -22,32 +22,31 @@ RefLogicalBinaryWorkload::RefLogicalBinaryWorkload(const LogicalBinaryQueueDescr : BaseWorkload<LogicalBinaryQueueDescriptor>(desc, info) {} -void RefLogicalBinaryWorkload::PostAllocationConfigure() +void RefLogicalBinaryWorkload::Execute() const { - const TensorInfo& inputInfo0 = GetTensorInfo(m_Data.m_Inputs[0]); - const TensorInfo& inputInfo1 = GetTensorInfo(m_Data.m_Inputs[1]); - const TensorInfo& outputInfo = GetTensorInfo(m_Data.m_Outputs[0]); + Execute(m_Data.m_Inputs, m_Data.m_Outputs); +} - m_Input0 = MakeDecoder<InType>(inputInfo0); - m_Input1 = MakeDecoder<InType>(inputInfo1); - m_Output = MakeEncoder<OutType>(outputInfo); +void RefLogicalBinaryWorkload::ExecuteAsync(WorkingMemDescriptor &workingMemDescriptor) +{ + Execute(workingMemDescriptor.m_Inputs, workingMemDescriptor.m_Outputs); } -void RefLogicalBinaryWorkload::Execute() const +void RefLogicalBinaryWorkload::Execute(std::vector<ITensorHandle*> inputs, std::vector<ITensorHandle*> outputs) const { ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefLogicalBinaryWorkload_Execute"); - const TensorInfo& inputInfo0 = GetTensorInfo(m_Data.m_Inputs[0]); - const TensorInfo& inputInfo1 = GetTensorInfo(m_Data.m_Inputs[1]); - const TensorInfo& outputInfo = GetTensorInfo(m_Data.m_Outputs[0]); + const TensorInfo& inputInfo0 = GetTensorInfo(inputs[0]); + const TensorInfo& inputInfo1 = GetTensorInfo(inputs[1]); + const TensorInfo& outputInfo = GetTensorInfo(outputs[0]); const TensorShape& inShape0 = inputInfo0.GetShape(); const TensorShape& inShape1 = inputInfo1.GetShape(); const TensorShape& outShape = outputInfo.GetShape(); - m_Input0->Reset(m_Data.m_Inputs[0]->Map()); - m_Input1->Reset(m_Data.m_Inputs[1]->Map()); - m_Output->Reset(m_Data.m_Outputs[0]->Map()); + std::unique_ptr<Decoder<InType>> input0 = MakeDecoder<InType>(inputInfo0, inputs[0]->Map()); + std::unique_ptr<Decoder<InType>> input1 = MakeDecoder<InType>(inputInfo1, inputs[1]->Map()); + std::unique_ptr<Encoder<OutType>> output = MakeEncoder<OutType>(outputInfo, outputs[0]->Map()); using AndFunction = LogicalBinaryFunction<std::logical_and<bool>>; using OrFunction = LogicalBinaryFunction<std::logical_or<bool>>; @@ -56,12 +55,12 @@ void RefLogicalBinaryWorkload::Execute() const { case LogicalBinaryOperation::LogicalAnd: { - AndFunction(inShape0, inShape1, outShape, *m_Input0, *m_Input1, *m_Output); + AndFunction(inShape0, inShape1, outShape, *input0, *input1, *output); break; } case LogicalBinaryOperation::LogicalOr: { - OrFunction(inShape0, inShape1, outShape, *m_Input0, *m_Input1, *m_Output); + OrFunction(inShape0, inShape1, outShape, *input0, *input1, *output); break; } default: diff --git a/src/backends/reference/workloads/RefLogicalBinaryWorkload.hpp b/src/backends/reference/workloads/RefLogicalBinaryWorkload.hpp index 4d6baf5fa4..d79a3039c7 100644 --- a/src/backends/reference/workloads/RefLogicalBinaryWorkload.hpp +++ b/src/backends/reference/workloads/RefLogicalBinaryWorkload.hpp @@ -19,16 +19,13 @@ public: using BaseWorkload<LogicalBinaryQueueDescriptor>::m_Data; RefLogicalBinaryWorkload(const LogicalBinaryQueueDescriptor& descriptor, const WorkloadInfo& info); - void PostAllocationConfigure() override; - virtual void Execute() const override; + void Execute() const override; + void ExecuteAsync(WorkingMemDescriptor& workingMemDescriptor) override; private: + void Execute(std::vector<ITensorHandle*> inputs, std::vector<ITensorHandle*> outputs) const; using InType = bool; using OutType = bool; - - std::unique_ptr<Decoder<InType>> m_Input0; - std::unique_ptr<Decoder<InType>> m_Input1; - std::unique_ptr<Encoder<OutType>> m_Output; }; } // namespace armnn diff --git a/src/backends/reference/workloads/RefLogicalUnaryWorkload.cpp b/src/backends/reference/workloads/RefLogicalUnaryWorkload.cpp index 76eb5ac39f..bef2bdc668 100644 --- a/src/backends/reference/workloads/RefLogicalUnaryWorkload.cpp +++ b/src/backends/reference/workloads/RefLogicalUnaryWorkload.cpp @@ -22,27 +22,28 @@ RefLogicalUnaryWorkload::RefLogicalUnaryWorkload(const ElementwiseUnaryQueueDesc : BaseWorkload<ElementwiseUnaryQueueDescriptor>(desc, info) {} -void RefLogicalUnaryWorkload::PostAllocationConfigure() +void RefLogicalUnaryWorkload::Execute() const { - const TensorInfo& inputInfo = GetTensorInfo(m_Data.m_Inputs[0]); - const TensorInfo& outputInfo = GetTensorInfo(m_Data.m_Outputs[0]); + Execute(m_Data.m_Inputs, m_Data.m_Outputs); +} - m_Input = MakeDecoder<InType>(inputInfo); - m_Output = MakeEncoder<OutType>(outputInfo); +void RefLogicalUnaryWorkload::ExecuteAsync(WorkingMemDescriptor &workingMemDescriptor) +{ + Execute(workingMemDescriptor.m_Inputs, workingMemDescriptor.m_Outputs); } -void RefLogicalUnaryWorkload::Execute() const +void RefLogicalUnaryWorkload::Execute(std::vector<ITensorHandle*> inputs, std::vector<ITensorHandle*> outputs) const { ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefLogicalUnaryWorkload_Execute"); - const TensorInfo& inputInfo = GetTensorInfo(m_Data.m_Inputs[0]); - const TensorInfo& outputInfo = GetTensorInfo(m_Data.m_Outputs[0]); + const TensorInfo& inputInfo = GetTensorInfo(inputs[0]); + const TensorInfo& outputInfo = GetTensorInfo(outputs[0]); const TensorShape& inShape = inputInfo.GetShape(); const TensorShape& outShape = outputInfo.GetShape(); - m_Input->Reset(m_Data.m_Inputs[0]->Map()); - m_Output->Reset(m_Data.m_Outputs[0]->Map()); + std::unique_ptr<Decoder<InType>> input = MakeDecoder<InType>(inputInfo, inputs[0]->Map()); + std::unique_ptr<Encoder<OutType>> output = MakeEncoder<OutType>(outputInfo, outputs[0]->Map()); using NotFunction = LogicalUnaryFunction<std::logical_not<bool>>; @@ -50,7 +51,7 @@ void RefLogicalUnaryWorkload::Execute() const { case UnaryOperation::LogicalNot: { - NotFunction(inShape, outShape, *m_Input, *m_Output); + NotFunction(inShape, outShape, *input, *output); break; } default: diff --git a/src/backends/reference/workloads/RefLogicalUnaryWorkload.hpp b/src/backends/reference/workloads/RefLogicalUnaryWorkload.hpp index 0d8b35495c..117f16836d 100644 --- a/src/backends/reference/workloads/RefLogicalUnaryWorkload.hpp +++ b/src/backends/reference/workloads/RefLogicalUnaryWorkload.hpp @@ -19,15 +19,13 @@ public: using BaseWorkload<ElementwiseUnaryQueueDescriptor>::m_Data; RefLogicalUnaryWorkload(const ElementwiseUnaryQueueDescriptor& descriptor, const WorkloadInfo& info); - void PostAllocationConfigure() override; - virtual void Execute() const override; + void Execute() const override; + void ExecuteAsync(WorkingMemDescriptor& workingMemDescriptor) override; private: + void Execute(std::vector<ITensorHandle*> inputs, std::vector<ITensorHandle*> outputs) const; using InType = bool; using OutType = bool; - - std::unique_ptr<Decoder<InType>> m_Input; - std::unique_ptr<Encoder<OutType>> m_Output; }; } // namespace armnn diff --git a/src/backends/reference/workloads/RefLstmWorkload.cpp b/src/backends/reference/workloads/RefLstmWorkload.cpp index 7c37301d1d..09423547da 100644 --- a/src/backends/reference/workloads/RefLstmWorkload.cpp +++ b/src/backends/reference/workloads/RefLstmWorkload.cpp @@ -40,25 +40,35 @@ RefLstmWorkload::RefLstmWorkload(const LstmQueueDescriptor &descriptor, const Wo void RefLstmWorkload::Execute() const { + Execute(m_Data.m_Inputs, m_Data.m_Outputs); +} + +void RefLstmWorkload::ExecuteAsync(WorkingMemDescriptor &workingMemDescriptor) +{ + Execute(workingMemDescriptor.m_Inputs, workingMemDescriptor.m_Outputs); +} + +void RefLstmWorkload::Execute(std::vector<ITensorHandle*> inputs, std::vector<ITensorHandle*> outputs) const +{ // This is a porting of the LSTM::Eval() method in the Android code base // Refer to: android/frameworks/ml/nn/common/operations/LSTM.cpp - const TensorInfo& inputInfo = GetTensorInfo(m_Data.m_Inputs[0]); - const TensorInfo& outputInfo = GetTensorInfo(m_Data.m_Outputs[0]); + const TensorInfo& inputInfo = GetTensorInfo(inputs[0]); + const TensorInfo& outputInfo = GetTensorInfo(outputs[0]); const TensorShape& inputShape = inputInfo.GetShape(); const DataType& outputType = outputInfo.GetDataType(); - std::unique_ptr<Encoder<float>> outputStateOut = MakeEncoder<float>(outputInfo, m_Data.m_Outputs[1]->Map()); - std::unique_ptr<Encoder<float>> cellStateOut = MakeEncoder<float>(outputInfo, m_Data.m_Outputs[2]->Map()); - std::unique_ptr<Encoder<float>> output = MakeEncoder<float>(outputInfo, m_Data.m_Outputs[3]->Map()); + std::unique_ptr<Encoder<float>> outputStateOut = MakeEncoder<float>(outputInfo, outputs[1]->Map()); + std::unique_ptr<Encoder<float>> cellStateOut = MakeEncoder<float>(outputInfo, outputs[2]->Map()); + std::unique_ptr<Encoder<float>> output = MakeEncoder<float>(outputInfo, outputs[3]->Map()); - std::unique_ptr<Decoder<float>> cellStateOutDecoder = MakeDecoder<float>(outputInfo, m_Data.m_Outputs[2]->Map()); - std::unique_ptr<Decoder<float>> outputDecoder = MakeDecoder<float>(outputInfo, m_Data.m_Outputs[3]->Map()); + std::unique_ptr<Decoder<float>> cellStateOutDecoder = MakeDecoder<float>(outputInfo, outputs[2]->Map()); + std::unique_ptr<Decoder<float>> outputDecoder = MakeDecoder<float>(outputInfo, outputs[3]->Map()); - std::unique_ptr<Decoder<float>> inputData = MakeDecoder<float>(inputInfo, m_Data.m_Inputs[0]->Map()); - std::unique_ptr<Decoder<float>> outputStateIn = MakeDecoder<float>(inputInfo, m_Data.m_Inputs[1]->Map()); - std::unique_ptr<Decoder<float>> cellStateIn = MakeDecoder<float>(inputInfo, m_Data.m_Inputs[2]->Map()); + std::unique_ptr<Decoder<float>> inputData = MakeDecoder<float>(inputInfo, inputs[0]->Map()); + std::unique_ptr<Decoder<float>> outputStateIn = MakeDecoder<float>(inputInfo, inputs[1]->Map()); + std::unique_ptr<Decoder<float>> cellStateIn = MakeDecoder<float>(inputInfo, inputs[2]->Map()); const uint32_t nBatch = inputShape[0]; const uint32_t nInput = inputShape[1]; @@ -71,19 +81,19 @@ void RefLstmWorkload::Execute() const const bool useLayerNorm = m_Data.m_Parameters.m_LayerNormEnabled; // Index the scratch buffers pointers to the global scratch buffer. - std::unique_ptr<Encoder<float>> inputGateScratch = MakeEncoder<float>(outputInfo, m_Data.m_Outputs[0]->Map()); - std::unique_ptr<Encoder<float>> cellScratch = MakeEncoder<float>(outputInfo, m_Data.m_Outputs[0]->Map()); - std::unique_ptr<Encoder<float>> forgetGateScratch = MakeEncoder<float>(outputInfo, m_Data.m_Outputs[0]->Map()); - std::unique_ptr<Encoder<float>> outputGateScratch = MakeEncoder<float>(outputInfo, m_Data.m_Outputs[0]->Map()); + std::unique_ptr<Encoder<float>> inputGateScratch = MakeEncoder<float>(outputInfo, outputs[0]->Map()); + std::unique_ptr<Encoder<float>> cellScratch = MakeEncoder<float>(outputInfo, outputs[0]->Map()); + std::unique_ptr<Encoder<float>> forgetGateScratch = MakeEncoder<float>(outputInfo, outputs[0]->Map()); + std::unique_ptr<Encoder<float>> outputGateScratch = MakeEncoder<float>(outputInfo, outputs[0]->Map()); std::unique_ptr<Decoder<float>> inputGateScratchDecoder = - MakeDecoder<float>(outputInfo, m_Data.m_Outputs[0]->Map()); + MakeDecoder<float>(outputInfo, outputs[0]->Map()); std::unique_ptr<Decoder<float>> cellScratchDecoder = - MakeDecoder<float>(outputInfo, m_Data.m_Outputs[0]->Map()); + MakeDecoder<float>(outputInfo, outputs[0]->Map()); std::unique_ptr<Decoder<float>> forgetGateScratchDecoder = - MakeDecoder<float>(outputInfo, m_Data.m_Outputs[0]->Map()); + MakeDecoder<float>(outputInfo, outputs[0]->Map()); std::unique_ptr<Decoder<float>> outputGateScratchDecoder = - MakeDecoder<float>(outputInfo, m_Data.m_Outputs[0]->Map()); + MakeDecoder<float>(outputInfo, outputs[0]->Map()); if (useCifg) { diff --git a/src/backends/reference/workloads/RefLstmWorkload.hpp b/src/backends/reference/workloads/RefLstmWorkload.hpp index ce5a775269..b55a1f9a9e 100644 --- a/src/backends/reference/workloads/RefLstmWorkload.hpp +++ b/src/backends/reference/workloads/RefLstmWorkload.hpp @@ -18,9 +18,11 @@ class RefLstmWorkload : public BaseWorkload<LstmQueueDescriptor> public: explicit RefLstmWorkload(const LstmQueueDescriptor& descriptor, const WorkloadInfo& info); - virtual void Execute() const override; + void Execute() const override; + void ExecuteAsync(WorkingMemDescriptor& workingMemDescriptor) override; private: + void Execute(std::vector<ITensorHandle*> inputs, std::vector<ITensorHandle*> outputs) const; std::unique_ptr<ScopedCpuTensorHandle> m_InputToInputWeightsTensor; std::unique_ptr<ScopedCpuTensorHandle> m_InputToForgetWeightsTensor; std::unique_ptr<ScopedCpuTensorHandle> m_InputToCellWeightsTensor; diff --git a/src/backends/reference/workloads/RefMeanWorkload.cpp b/src/backends/reference/workloads/RefMeanWorkload.cpp index 00e59bca4c..7941ce2c36 100644 --- a/src/backends/reference/workloads/RefMeanWorkload.cpp +++ b/src/backends/reference/workloads/RefMeanWorkload.cpp @@ -20,13 +20,23 @@ RefMeanWorkload::RefMeanWorkload(const MeanQueueDescriptor& descriptor, const Wo void RefMeanWorkload::Execute() const { + Execute(m_Data.m_Inputs, m_Data.m_Outputs); +} + +void RefMeanWorkload::ExecuteAsync(WorkingMemDescriptor &workingMemDescriptor) +{ + Execute(workingMemDescriptor.m_Inputs, workingMemDescriptor.m_Outputs); +} + +void RefMeanWorkload::Execute(std::vector<ITensorHandle*> inputs, std::vector<ITensorHandle*> outputs) const +{ ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefMeanWorkload_Execute"); - const TensorInfo& inputInfo = GetTensorInfo(m_Data.m_Inputs[0]); - const TensorInfo& outputInfo = GetTensorInfo(m_Data.m_Outputs[0]); + const TensorInfo& inputInfo = GetTensorInfo(inputs[0]); + const TensorInfo& outputInfo = GetTensorInfo(outputs[0]); - auto inputDecoder = MakeDecoder<float>(inputInfo, m_Data.m_Inputs[0]->Map()); - auto outputEncoder = MakeEncoder<float>(outputInfo, m_Data.m_Outputs[0]->Map()); + auto inputDecoder = MakeDecoder<float>(inputInfo, inputs[0]->Map()); + auto outputEncoder = MakeEncoder<float>(outputInfo, outputs[0]->Map()); Reduce(inputInfo, outputInfo, diff --git a/src/backends/reference/workloads/RefMeanWorkload.hpp b/src/backends/reference/workloads/RefMeanWorkload.hpp index c673f940e0..b5a9ed812f 100644 --- a/src/backends/reference/workloads/RefMeanWorkload.hpp +++ b/src/backends/reference/workloads/RefMeanWorkload.hpp @@ -18,7 +18,10 @@ class RefMeanWorkload : public BaseWorkload<MeanQueueDescriptor> { public: explicit RefMeanWorkload (const MeanQueueDescriptor& descriptor, const WorkloadInfo& info); - virtual void Execute() const override; + void Execute() const override; + void ExecuteAsync(WorkingMemDescriptor& workingMemDescriptor) override; +private: + void Execute(std::vector<ITensorHandle*> inputs, std::vector<ITensorHandle*> outputs) const; }; } //namespace armnn diff --git a/src/backends/reference/workloads/RefNormalizationWorkload.cpp b/src/backends/reference/workloads/RefNormalizationWorkload.cpp index d5d2104cba..36828acfb3 100644 --- a/src/backends/reference/workloads/RefNormalizationWorkload.cpp +++ b/src/backends/reference/workloads/RefNormalizationWorkload.cpp @@ -163,12 +163,22 @@ RefNormalizationWorkload::RefNormalizationWorkload(const NormalizationQueueDescr void RefNormalizationWorkload::Execute() const { + Execute(m_Data.m_Inputs, m_Data.m_Outputs); +} + +void RefNormalizationWorkload::ExecuteAsync(WorkingMemDescriptor &workingMemDescriptor) +{ + Execute(workingMemDescriptor.m_Inputs, workingMemDescriptor.m_Outputs); +} + +void RefNormalizationWorkload::Execute(std::vector<ITensorHandle*> inputs, std::vector<ITensorHandle*> outputs) const +{ ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefNormalizationWorkload_Execute"); - const TensorInfo& inputInfo = GetTensorInfo(m_Data.m_Inputs[0]); + const TensorInfo& inputInfo = GetTensorInfo(inputs[0]); - auto inputDecoder = MakeDecoder<float>(inputInfo, m_Data.m_Inputs[0]->Map()); - auto outputEncoder = MakeEncoder<float>(inputInfo, m_Data.m_Outputs[0]->Map()); + auto inputDecoder = MakeDecoder<float>(inputInfo, inputs[0]->Map()); + auto outputEncoder = MakeEncoder<float>(inputInfo, outputs[0]->Map()); if (NormalizationAlgorithmMethod::LocalBrightness == m_Data.m_Parameters.m_NormMethodType) { diff --git a/src/backends/reference/workloads/RefNormalizationWorkload.hpp b/src/backends/reference/workloads/RefNormalizationWorkload.hpp index 9d68ffda58..59170b8a80 100644 --- a/src/backends/reference/workloads/RefNormalizationWorkload.hpp +++ b/src/backends/reference/workloads/RefNormalizationWorkload.hpp @@ -17,7 +17,10 @@ public: explicit RefNormalizationWorkload(const NormalizationQueueDescriptor& descriptor, const WorkloadInfo& info); - virtual void Execute() const override; + void Execute() const override; + void ExecuteAsync(WorkingMemDescriptor& workingMemDescriptor) override; +private: + void Execute(std::vector<ITensorHandle*> inputs, std::vector<ITensorHandle*> outputs) const; }; } // namespace armnn diff --git a/src/backends/reference/workloads/RefPadWorkload.cpp b/src/backends/reference/workloads/RefPadWorkload.cpp index af22c31001..ea515cae68 100644 --- a/src/backends/reference/workloads/RefPadWorkload.cpp +++ b/src/backends/reference/workloads/RefPadWorkload.cpp @@ -14,10 +14,20 @@ namespace armnn void RefPadWorkload::Execute() const { + Execute(m_Data.m_Inputs, m_Data.m_Outputs); +} + +void RefPadWorkload::ExecuteAsync(WorkingMemDescriptor &workingMemDescriptor) +{ + Execute(workingMemDescriptor.m_Inputs, workingMemDescriptor.m_Outputs); +} + +void RefPadWorkload::Execute(std::vector<ITensorHandle*> inputs, std::vector<ITensorHandle*> outputs) const +{ ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefPadWorkload_Execute"); - const TensorInfo& inputInfo = GetTensorInfo(m_Data.m_Inputs[0]); - const TensorInfo& outputInfo = GetTensorInfo(m_Data.m_Outputs[0]); + const TensorInfo& inputInfo = GetTensorInfo(inputs[0]); + const TensorInfo& outputInfo = GetTensorInfo(outputs[0]); armnn::Pad(inputInfo, outputInfo, diff --git a/src/backends/reference/workloads/RefPadWorkload.hpp b/src/backends/reference/workloads/RefPadWorkload.hpp index 0b8379a60f..afc620383f 100644 --- a/src/backends/reference/workloads/RefPadWorkload.hpp +++ b/src/backends/reference/workloads/RefPadWorkload.hpp @@ -15,7 +15,10 @@ class RefPadWorkload : public BaseWorkload<PadQueueDescriptor> { public: using BaseWorkload<PadQueueDescriptor>::BaseWorkload; - virtual void Execute() const override; + void Execute() const override; + void ExecuteAsync(WorkingMemDescriptor& workingMemDescriptor) override; +private: + void Execute(std::vector<ITensorHandle*> inputs, std::vector<ITensorHandle*> outputs) const; }; } //namespace armnn diff --git a/src/backends/reference/workloads/RefPermuteWorkload.cpp b/src/backends/reference/workloads/RefPermuteWorkload.cpp index 1fb1421ed9..f6af208e8a 100644 --- a/src/backends/reference/workloads/RefPermuteWorkload.cpp +++ b/src/backends/reference/workloads/RefPermuteWorkload.cpp @@ -16,12 +16,25 @@ namespace armnn template <armnn::DataType DataType> void RefPermuteWorkload<DataType>::Execute() const { + Execute(m_Data.m_Inputs, m_Data.m_Outputs); +} + +template <armnn::DataType DataType> +void RefPermuteWorkload<DataType>::ExecuteAsync(WorkingMemDescriptor &workingMemDescriptor) +{ + Execute(workingMemDescriptor.m_Inputs, workingMemDescriptor.m_Outputs); +} + +template <armnn::DataType DataType> +void RefPermuteWorkload<DataType>::Execute(std::vector<ITensorHandle*> inputs, + std::vector<ITensorHandle*> outputs) const +{ using T = ResolveType<DataType>; ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, GetName() + "_Execute"); - const ITensorHandle* src = m_Data.m_Inputs[0]; - ITensorHandle* dst = m_Data.m_Outputs[0]; + const ITensorHandle* src = inputs[0]; + ITensorHandle* dst = outputs[0]; const PermutationVector& mappings = m_Data.m_Parameters.m_DimMappings; armnnUtils::Permute(GetTensorInfo(dst).GetShape(), mappings, diff --git a/src/backends/reference/workloads/RefPermuteWorkload.hpp b/src/backends/reference/workloads/RefPermuteWorkload.hpp index 62a145617a..94f633423a 100644 --- a/src/backends/reference/workloads/RefPermuteWorkload.hpp +++ b/src/backends/reference/workloads/RefPermuteWorkload.hpp @@ -25,6 +25,9 @@ public: using TypedWorkload<PermuteQueueDescriptor, DataType>::m_Data; using TypedWorkload<PermuteQueueDescriptor, DataType>::TypedWorkload; void Execute() const override; + void ExecuteAsync(WorkingMemDescriptor& workingMemDescriptor) override; +private: + void Execute(std::vector<ITensorHandle*> inputs, std::vector<ITensorHandle*> outputs) const; }; using RefPermuteBFloat16Workload = RefPermuteWorkload<DataType::BFloat16>; diff --git a/src/backends/reference/workloads/RefPooling2dWorkload.cpp b/src/backends/reference/workloads/RefPooling2dWorkload.cpp index 40b814789c..d337278fe1 100644 --- a/src/backends/reference/workloads/RefPooling2dWorkload.cpp +++ b/src/backends/reference/workloads/RefPooling2dWorkload.cpp @@ -15,13 +15,23 @@ namespace armnn { void RefPooling2dWorkload::Execute() const { + Execute(m_Data.m_Inputs, m_Data.m_Outputs); +} + +void RefPooling2dWorkload::ExecuteAsync(WorkingMemDescriptor &workingMemDescriptor) +{ + Execute(workingMemDescriptor.m_Inputs, workingMemDescriptor.m_Outputs); +} + +void RefPooling2dWorkload::Execute(std::vector<ITensorHandle*> inputs, std::vector<ITensorHandle*> outputs) const +{ ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefPooling2dWorkload_Execute"); - const TensorInfo& inputInfo = GetTensorInfo(m_Data.m_Inputs[0]); - const TensorInfo& outputInfo = GetTensorInfo(m_Data.m_Outputs[0]); + const TensorInfo& inputInfo = GetTensorInfo(inputs[0]); + const TensorInfo& outputInfo = GetTensorInfo(outputs[0]); - auto inputDecoder = MakeDecoder<float>(inputInfo, m_Data.m_Inputs[0] ->Map()); - auto outputEncoder = MakeEncoder<float>(outputInfo, m_Data.m_Outputs[0]->Map()); + auto inputDecoder = MakeDecoder<float>(inputInfo, inputs[0] ->Map()); + auto outputEncoder = MakeEncoder<float>(outputInfo, outputs[0]->Map()); Pooling2d(*inputDecoder, *outputEncoder, diff --git a/src/backends/reference/workloads/RefPooling2dWorkload.hpp b/src/backends/reference/workloads/RefPooling2dWorkload.hpp index 24386b7e8d..3495d6b68d 100644 --- a/src/backends/reference/workloads/RefPooling2dWorkload.hpp +++ b/src/backends/reference/workloads/RefPooling2dWorkload.hpp @@ -18,6 +18,9 @@ class RefPooling2dWorkload : public BaseWorkload<Pooling2dQueueDescriptor> public: using BaseWorkload<Pooling2dQueueDescriptor>::BaseWorkload; - virtual void Execute() const override; + void Execute() const override; + void ExecuteAsync(WorkingMemDescriptor& workingMemDescriptor) override; +private: + void Execute(std::vector<ITensorHandle*> inputs, std::vector<ITensorHandle*> outputs) const; }; } //namespace armnn diff --git a/src/backends/reference/workloads/RefPreluWorkload.cpp b/src/backends/reference/workloads/RefPreluWorkload.cpp index cdc0a63711..b298874334 100644 --- a/src/backends/reference/workloads/RefPreluWorkload.cpp +++ b/src/backends/reference/workloads/RefPreluWorkload.cpp @@ -20,14 +20,24 @@ RefPreluWorkload::RefPreluWorkload(const PreluQueueDescriptor& descriptor, void RefPreluWorkload::Execute() const { + Execute(m_Data.m_Inputs, m_Data.m_Outputs); +} + +void RefPreluWorkload::ExecuteAsync(WorkingMemDescriptor &workingMemDescriptor) +{ + Execute(workingMemDescriptor.m_Inputs, workingMemDescriptor.m_Outputs); +} + +void RefPreluWorkload::Execute(std::vector<ITensorHandle*> inputs, std::vector<ITensorHandle*> outputs) const +{ ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefPreluWorkload_Execute"); - std::unique_ptr<Decoder<float>> inputDecoder = MakeDecoder<float>(GetTensorInfo(m_Data.m_Inputs[0]), - m_Data.m_Inputs[0]->Map()); - std::unique_ptr<Decoder<float>> alphaDecoder = MakeDecoder<float>(GetTensorInfo(m_Data.m_Inputs[1]), - m_Data.m_Inputs[1]->Map()); - std::unique_ptr<Encoder<float>> outputEncoder = MakeEncoder<float>(GetTensorInfo(m_Data.m_Outputs[0]), - m_Data.m_Outputs[0]->Map()); + std::unique_ptr<Decoder<float>> inputDecoder = MakeDecoder<float>(GetTensorInfo(inputs[0]), + inputs[0]->Map()); + std::unique_ptr<Decoder<float>> alphaDecoder = MakeDecoder<float>(GetTensorInfo(inputs[1]), + inputs[1]->Map()); + std::unique_ptr<Encoder<float>> outputEncoder = MakeEncoder<float>(GetTensorInfo(outputs[0]), + outputs[0]->Map()); PreluImpl(m_Data, *inputDecoder, *alphaDecoder, *outputEncoder); } diff --git a/src/backends/reference/workloads/RefPreluWorkload.hpp b/src/backends/reference/workloads/RefPreluWorkload.hpp index 72839e67dc..4fe5704711 100644 --- a/src/backends/reference/workloads/RefPreluWorkload.hpp +++ b/src/backends/reference/workloads/RefPreluWorkload.hpp @@ -16,7 +16,10 @@ class RefPreluWorkload : public BaseWorkload<PreluQueueDescriptor> public: explicit RefPreluWorkload(const PreluQueueDescriptor& descriptor, const WorkloadInfo& info); - virtual void Execute() const override; + void Execute() const override; + void ExecuteAsync(WorkingMemDescriptor& workingMemDescriptor) override; +private: + void Execute(std::vector<ITensorHandle*> inputs, std::vector<ITensorHandle*> outputs) const; }; } // namespace armnn diff --git a/src/backends/reference/workloads/RefQLstmWorkload.cpp b/src/backends/reference/workloads/RefQLstmWorkload.cpp index bcd6a627de..7b7961c5a0 100644 --- a/src/backends/reference/workloads/RefQLstmWorkload.cpp +++ b/src/backends/reference/workloads/RefQLstmWorkload.cpp @@ -45,19 +45,30 @@ RefQLstmWorkload::RefQLstmWorkload(const QLstmQueueDescriptor &descriptor, const void RefQLstmWorkload::Execute() const { - // This is a porting of the QLSTM::Execute() method in the Android code base + Execute(m_Data.m_Inputs, m_Data.m_Outputs); +} + +void RefQLstmWorkload::ExecuteAsync(WorkingMemDescriptor &workingMemDescriptor) +{ + Execute(workingMemDescriptor.m_Inputs, workingMemDescriptor.m_Outputs); +} + +void RefQLstmWorkload::Execute(std::vector<ITensorHandle*> inputs, std::vector<ITensorHandle*> outputs) const +{ + // This is a porting of the QLSTM::Execute(std::vector<ITensorHandle*> inputs, std::vector<ITensorHandle*> outputs) + // method in the Android code base // Note: this implementation wraps the arithmetic functions of the LSTM cell in Quantize/Dequantize ops, so all // computation is done in the floating point domain. Arithmetic functions are found in LstmUtils.cpp. // Refer to: android/frameworks/ml/nn/common/operations/QLSTM.cpp const DataType& internalType = armnn::DataType::QSymmS16; - const TensorInfo& inputInfo = GetTensorInfo(m_Data.m_Inputs[0]); - const TensorInfo& outputStateInInfo = GetTensorInfo(m_Data.m_Inputs[1]); - const TensorInfo& cellStateInInfo = GetTensorInfo(m_Data.m_Inputs[2]); + const TensorInfo& inputInfo = GetTensorInfo(inputs[0]); + const TensorInfo& outputStateInInfo = GetTensorInfo(inputs[1]); + const TensorInfo& cellStateInInfo = GetTensorInfo(inputs[2]); - const TensorInfo& outputStateOutInfo = GetTensorInfo(m_Data.m_Outputs[0]); - const TensorInfo& cellStateOutInfo = GetTensorInfo(m_Data.m_Outputs[1]); - const TensorInfo& outputInfo = GetTensorInfo(m_Data.m_Outputs[2]); + const TensorInfo& outputStateOutInfo = GetTensorInfo(outputs[0]); + const TensorInfo& cellStateOutInfo = GetTensorInfo(outputs[1]); + const TensorInfo& outputInfo = GetTensorInfo(outputs[2]); const TensorShape& inputShape = inputInfo.GetShape(); const TensorShape& outputStateInShape = outputStateInInfo.GetShape(); @@ -77,27 +88,27 @@ void RefQLstmWorkload::Execute() const // Input decoders std::unique_ptr<Decoder<float>> inputDecoder = - MakeDecoder<float>(inputInfo, m_Data.m_Inputs[0]->Map()); + MakeDecoder<float>(inputInfo, inputs[0]->Map()); std::unique_ptr<Decoder<float>> outputStateInDecoder = - MakeDecoder<float>(outputStateInInfo, m_Data.m_Inputs[1]->Map()); + MakeDecoder<float>(outputStateInInfo, inputs[1]->Map()); std::unique_ptr<Decoder<float>> cellStateInDecoder = - MakeDecoder<float>(cellStateInInfo, m_Data.m_Inputs[2]->Map()); + MakeDecoder<float>(cellStateInInfo, inputs[2]->Map()); // Output decoders std::unique_ptr<Decoder<float>> outputStateOutDecoder = - MakeDecoder<float>(outputStateOutInfo, m_Data.m_Outputs[0]->Map()); + MakeDecoder<float>(outputStateOutInfo, outputs[0]->Map()); std::unique_ptr<Decoder<float>> cellStateOutDecoder = - MakeDecoder<float>(cellStateOutInfo, m_Data.m_Outputs[1]->Map()); + MakeDecoder<float>(cellStateOutInfo, outputs[1]->Map()); std::unique_ptr<Decoder<float>> outputDecoder = - MakeDecoder<float>(outputInfo, m_Data.m_Outputs[2]->Map()); + MakeDecoder<float>(outputInfo, outputs[2]->Map()); // Output encoders std::unique_ptr<Encoder<float>> outputStateOutEncoder = - MakeEncoder<float>(outputStateOutInfo, m_Data.m_Outputs[0]->Map()); + MakeEncoder<float>(outputStateOutInfo, outputs[0]->Map()); std::unique_ptr<Encoder<float>> cellStateOutEncoder = - MakeEncoder<float>(cellStateOutInfo, m_Data.m_Outputs[1]->Map()); + MakeEncoder<float>(cellStateOutInfo, outputs[1]->Map()); std::unique_ptr<Encoder<float>> outputEncoder = - MakeEncoder<float>(outputInfo, m_Data.m_Outputs[2]->Map()); + MakeEncoder<float>(outputInfo, outputs[2]->Map()); // Weights decoders std::unique_ptr<Decoder<float>> inputToForgetWeightsDecoder = MakeDecoder<float>( diff --git a/src/backends/reference/workloads/RefQLstmWorkload.hpp b/src/backends/reference/workloads/RefQLstmWorkload.hpp index 19d3a2af0f..f4242ec8a4 100644 --- a/src/backends/reference/workloads/RefQLstmWorkload.hpp +++ b/src/backends/reference/workloads/RefQLstmWorkload.hpp @@ -18,9 +18,11 @@ class RefQLstmWorkload : public BaseWorkload<QLstmQueueDescriptor> public: explicit RefQLstmWorkload(const QLstmQueueDescriptor& descriptor, const WorkloadInfo& info); - virtual void Execute() const override; + void Execute() const override; + void ExecuteAsync(WorkingMemDescriptor& workingMemDescriptor) override; private: + void Execute(std::vector<ITensorHandle*> inputs, std::vector<ITensorHandle*> outputs) const; std::unique_ptr<ScopedCpuTensorHandle> m_InputToInputWeightsTensor; std::unique_ptr<ScopedCpuTensorHandle> m_InputToForgetWeightsTensor; std::unique_ptr<ScopedCpuTensorHandle> m_InputToCellWeightsTensor; diff --git a/src/backends/reference/workloads/RefQuantizeWorkload.cpp b/src/backends/reference/workloads/RefQuantizeWorkload.cpp index 2eef5f33db..35791e65fb 100644 --- a/src/backends/reference/workloads/RefQuantizeWorkload.cpp +++ b/src/backends/reference/workloads/RefQuantizeWorkload.cpp @@ -34,21 +34,22 @@ RefQuantizeWorkload::RefQuantizeWorkload(const QuantizeQueueDescriptor& descript { } -void RefQuantizeWorkload::PostAllocationConfigure() +void RefQuantizeWorkload::Execute() const { - const TensorInfo& inputInfo = armnn::GetTensorInfo(m_Data.m_Inputs[0]); - m_InputDecoder = MakeDecoder<float>(inputInfo); + Execute(m_Data.m_Inputs, m_Data.m_Outputs); +} - const TensorInfo& outputInfo = armnn::GetTensorInfo(m_Data.m_Outputs[0]); - m_OutputEncoder = MakeEncoder<float>(outputInfo); +void RefQuantizeWorkload::ExecuteAsync(WorkingMemDescriptor &workingMemDescriptor) +{ + Execute(workingMemDescriptor.m_Inputs, workingMemDescriptor.m_Outputs); } -void RefQuantizeWorkload::Execute() const +void RefQuantizeWorkload::Execute(std::vector<ITensorHandle*> inputs, std::vector<ITensorHandle*> outputs) const { - m_InputDecoder->Reset(m_Data.m_Inputs[0]->Map()); - m_OutputEncoder->Reset(m_Data.m_Outputs[0]->Map()); + std::unique_ptr<Decoder<float>> inputDecoder = MakeDecoder<float>(GetTensorInfo(inputs[0]), inputs[0]->Map()); + std::unique_ptr<Encoder<float>> outputEncoder = MakeEncoder<float>(GetTensorInfo(outputs[0]), outputs[0]->Map()); - QuantizeImpl(*m_InputDecoder, *m_OutputEncoder, m_NumElements); + QuantizeImpl(*inputDecoder, *outputEncoder, m_NumElements); } } //namespace armnn
\ No newline at end of file diff --git a/src/backends/reference/workloads/RefQuantizeWorkload.hpp b/src/backends/reference/workloads/RefQuantizeWorkload.hpp index 9ae107607b..48116e7b39 100644 --- a/src/backends/reference/workloads/RefQuantizeWorkload.hpp +++ b/src/backends/reference/workloads/RefQuantizeWorkload.hpp @@ -16,13 +16,11 @@ class RefQuantizeWorkload : public BaseWorkload<QuantizeQueueDescriptor> { public: RefQuantizeWorkload(const QuantizeQueueDescriptor& descriptor, const WorkloadInfo &info); - void PostAllocationConfigure() override; void Execute() const override; + void ExecuteAsync(WorkingMemDescriptor& workingMemDescriptor) override; private: - - std::unique_ptr<Decoder<float>> m_InputDecoder; - std::unique_ptr<Encoder<float>> m_OutputEncoder; + void Execute(std::vector<ITensorHandle*> inputs, std::vector<ITensorHandle*> outputs) const; size_t m_NumElements; }; diff --git a/src/backends/reference/workloads/RefRankWorkload.hpp b/src/backends/reference/workloads/RefRankWorkload.hpp index 660db6b8db..237ae999ce 100644 --- a/src/backends/reference/workloads/RefRankWorkload.hpp +++ b/src/backends/reference/workloads/RefRankWorkload.hpp @@ -19,10 +19,21 @@ public: using BaseWorkload<RankQueueDescriptor>::BaseWorkload; virtual void Execute() const override { - const int32_t rank = static_cast<int32_t>(GetTensorInfo(m_Data.m_Inputs[0]).GetNumDimensions()); + Execute(m_Data.m_Inputs, m_Data.m_Outputs); + + } + void ExecuteAsync(WorkingMemDescriptor& workingMemDescriptor) override + { + Execute(workingMemDescriptor.m_Inputs, workingMemDescriptor.m_Outputs); + } + +private: + void Execute(std::vector<ITensorHandle*> inputs, std::vector<ITensorHandle*> outputs) const + { + const int32_t rank = static_cast<int32_t>(GetTensorInfo(inputs[0]).GetNumDimensions()); std::memcpy(GetOutputTensorData<void>(0, m_Data), &rank, sizeof(int32_t)); - m_Data.m_Outputs[0]->Unmap(); + outputs[0]->Unmap(); } }; diff --git a/src/backends/reference/workloads/RefReduceWorkload.cpp b/src/backends/reference/workloads/RefReduceWorkload.cpp index 7a46ff9ffc..821e828b6e 100644 --- a/src/backends/reference/workloads/RefReduceWorkload.cpp +++ b/src/backends/reference/workloads/RefReduceWorkload.cpp @@ -20,15 +20,25 @@ RefReduceWorkload::RefReduceWorkload( void RefReduceWorkload::Execute() const { + Execute(m_Data.m_Inputs, m_Data.m_Outputs); +} + +void RefReduceWorkload::ExecuteAsync(WorkingMemDescriptor &workingMemDescriptor) +{ + Execute(workingMemDescriptor.m_Inputs, workingMemDescriptor.m_Outputs); +} + +void RefReduceWorkload::Execute(std::vector<ITensorHandle*> inputs, std::vector<ITensorHandle*> outputs) const +{ ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefReduceWorkload_Execute"); - const TensorInfo& inputInfo = GetTensorInfo(m_Data.m_Inputs[0]); - const TensorInfo& outputInfo = GetTensorInfo(m_Data.m_Outputs[0]); + const TensorInfo& inputInfo = GetTensorInfo(inputs[0]); + const TensorInfo& outputInfo = GetTensorInfo(outputs[0]); - std::unique_ptr<Decoder<float>> decoderPtr = MakeDecoder<float>(inputInfo, m_Data.m_Inputs[0]->Map()); + std::unique_ptr<Decoder<float>> decoderPtr = MakeDecoder<float>(inputInfo, inputs[0]->Map()); Decoder<float>& decoder = *decoderPtr; - std::unique_ptr<Encoder<float>> encoderPtr = MakeEncoder<float>(outputInfo, m_Data.m_Outputs[0]->Map()); + std::unique_ptr<Encoder<float>> encoderPtr = MakeEncoder<float>(outputInfo, outputs[0]->Map()); Encoder<float>& encoder = *encoderPtr; Reduce(inputInfo, diff --git a/src/backends/reference/workloads/RefReduceWorkload.hpp b/src/backends/reference/workloads/RefReduceWorkload.hpp index 1d551acb4a..d45161c692 100644 --- a/src/backends/reference/workloads/RefReduceWorkload.hpp +++ b/src/backends/reference/workloads/RefReduceWorkload.hpp @@ -17,7 +17,10 @@ public: explicit RefReduceWorkload(const ReduceQueueDescriptor& descriptor, const WorkloadInfo& info); - virtual void Execute() const override; + void Execute() const override; + void ExecuteAsync(WorkingMemDescriptor& workingMemDescriptor) override; +private: + void Execute(std::vector<ITensorHandle*> inputs, std::vector<ITensorHandle*> outputs) const; }; } //namespace armnn diff --git a/src/backends/reference/workloads/RefReshapeWorkload.cpp b/src/backends/reference/workloads/RefReshapeWorkload.cpp index 6d29781937..960d591fec 100644 --- a/src/backends/reference/workloads/RefReshapeWorkload.cpp +++ b/src/backends/reference/workloads/RefReshapeWorkload.cpp @@ -14,11 +14,21 @@ namespace armnn void RefReshapeWorkload::Execute() const { + Execute(m_Data.m_Inputs, m_Data.m_Outputs); +} + +void RefReshapeWorkload::ExecuteAsync(WorkingMemDescriptor &workingMemDescriptor) +{ + Execute(workingMemDescriptor.m_Inputs, workingMemDescriptor.m_Outputs); +} + +void RefReshapeWorkload::Execute(std::vector<ITensorHandle*> inputs, std::vector<ITensorHandle*> outputs) const +{ ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefReshapeWorkload_Execute"); - void* output = GetOutputTensorData<void>(0, m_Data); - const void* input = GetInputTensorData<void>(0, m_Data); - unsigned int numBytes = GetTensorInfo(m_Data.m_Inputs[0]).GetNumBytes(); + void* output = outputs[0]->Map(); + const void* input = inputs[0]->Map(); + unsigned int numBytes = GetTensorInfo(inputs[0]).GetNumBytes(); memcpy(output, input, numBytes); } diff --git a/src/backends/reference/workloads/RefReshapeWorkload.hpp b/src/backends/reference/workloads/RefReshapeWorkload.hpp index 7359ff9cde..2b6cf43c72 100644 --- a/src/backends/reference/workloads/RefReshapeWorkload.hpp +++ b/src/backends/reference/workloads/RefReshapeWorkload.hpp @@ -15,7 +15,10 @@ class RefReshapeWorkload : public BaseWorkload<ReshapeQueueDescriptor> { public: using BaseWorkload<ReshapeQueueDescriptor>::BaseWorkload; - virtual void Execute() const override; + void Execute() const override; + void ExecuteAsync(WorkingMemDescriptor& workingMemDescriptor) override; +private: + void Execute(std::vector<ITensorHandle*> inputs, std::vector<ITensorHandle*> outputs) const; }; } //namespace armnn diff --git a/src/backends/reference/workloads/RefResizeBilinearWorkload.cpp b/src/backends/reference/workloads/RefResizeBilinearWorkload.cpp index a23caf9fc2..2cf5888f33 100644 --- a/src/backends/reference/workloads/RefResizeBilinearWorkload.cpp +++ b/src/backends/reference/workloads/RefResizeBilinearWorkload.cpp @@ -19,14 +19,24 @@ namespace armnn void RefResizeBilinearWorkload::Execute() const { + Execute(m_Data.m_Inputs, m_Data.m_Outputs); +} + +void RefResizeBilinearWorkload::ExecuteAsync(WorkingMemDescriptor &workingMemDescriptor) +{ + Execute(workingMemDescriptor.m_Inputs, workingMemDescriptor.m_Outputs); +} + +void RefResizeBilinearWorkload::Execute(std::vector<ITensorHandle*> inputs, std::vector<ITensorHandle*> outputs) const +{ ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefResizeBilinearWorkload_Execute"); - const TensorInfo& inputInfo = GetTensorInfo(m_Data.m_Inputs[0]); - const TensorInfo& outputInfo = GetTensorInfo(m_Data.m_Outputs[0]); + const TensorInfo& inputInfo = GetTensorInfo(inputs[0]); + const TensorInfo& outputInfo = GetTensorInfo(outputs[0]); - std::unique_ptr<Decoder<float>> decoderPtr = MakeDecoder<float>(inputInfo, m_Data.m_Inputs[0]->Map()); + std::unique_ptr<Decoder<float>> decoderPtr = MakeDecoder<float>(inputInfo, inputs[0]->Map()); Decoder<float> &decoder = *decoderPtr; - std::unique_ptr<Encoder<float>> encoderPtr = MakeEncoder<float>(outputInfo, m_Data.m_Outputs[0]->Map()); + std::unique_ptr<Encoder<float>> encoderPtr = MakeEncoder<float>(outputInfo, outputs[0]->Map()); Encoder<float> &encoder = *encoderPtr; Resize(decoder, inputInfo, encoder, outputInfo, m_Data.m_Parameters.m_DataLayout, armnn::ResizeMethod::Bilinear); diff --git a/src/backends/reference/workloads/RefResizeBilinearWorkload.hpp b/src/backends/reference/workloads/RefResizeBilinearWorkload.hpp index a0e33fa320..5ada3d1ff8 100644 --- a/src/backends/reference/workloads/RefResizeBilinearWorkload.hpp +++ b/src/backends/reference/workloads/RefResizeBilinearWorkload.hpp @@ -15,7 +15,10 @@ class RefResizeBilinearWorkload : public BaseWorkload<ResizeBilinearQueueDescrip { public: using BaseWorkload<ResizeBilinearQueueDescriptor>::BaseWorkload; - virtual void Execute() const override; + void Execute() const override; + void ExecuteAsync(WorkingMemDescriptor& workingMemDescriptor) override; +private: + void Execute(std::vector<ITensorHandle*> inputs, std::vector<ITensorHandle*> outputs) const; }; } //namespace armnn diff --git a/src/backends/reference/workloads/RefResizeWorkload.cpp b/src/backends/reference/workloads/RefResizeWorkload.cpp index 21ff852320..d7a82b8f34 100644 --- a/src/backends/reference/workloads/RefResizeWorkload.cpp +++ b/src/backends/reference/workloads/RefResizeWorkload.cpp @@ -19,14 +19,24 @@ namespace armnn void RefResizeWorkload::Execute() const { + Execute(m_Data.m_Inputs, m_Data.m_Outputs); +} + +void RefResizeWorkload::ExecuteAsync(WorkingMemDescriptor &workingMemDescriptor) +{ + Execute(workingMemDescriptor.m_Inputs, workingMemDescriptor.m_Outputs); +} + +void RefResizeWorkload::Execute(std::vector<ITensorHandle*> inputs, std::vector<ITensorHandle*> outputs) const +{ ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefResizeWorkload_Execute"); - const TensorInfo& inputInfo = GetTensorInfo(m_Data.m_Inputs[0]); - const TensorInfo& outputInfo = GetTensorInfo(m_Data.m_Outputs[0]); + const TensorInfo& inputInfo = GetTensorInfo(inputs[0]); + const TensorInfo& outputInfo = GetTensorInfo(outputs[0]); - std::unique_ptr<Decoder<float>> decoderPtr = MakeDecoder<float>(inputInfo, m_Data.m_Inputs[0]->Map()); + std::unique_ptr<Decoder<float>> decoderPtr = MakeDecoder<float>(inputInfo, inputs[0]->Map()); Decoder<float> &decoder = *decoderPtr; - std::unique_ptr<Encoder<float>> encoderPtr = MakeEncoder<float>(outputInfo, m_Data.m_Outputs[0]->Map()); + std::unique_ptr<Encoder<float>> encoderPtr = MakeEncoder<float>(outputInfo, outputs[0]->Map()); Encoder<float> &encoder = *encoderPtr; Resize(decoder, diff --git a/src/backends/reference/workloads/RefResizeWorkload.hpp b/src/backends/reference/workloads/RefResizeWorkload.hpp index e72271afd8..f58eadc9af 100644 --- a/src/backends/reference/workloads/RefResizeWorkload.hpp +++ b/src/backends/reference/workloads/RefResizeWorkload.hpp @@ -15,7 +15,10 @@ class RefResizeWorkload : public BaseWorkload<ResizeQueueDescriptor> { public: using BaseWorkload<ResizeQueueDescriptor>::BaseWorkload; - virtual void Execute() const override; + void Execute() const override; + void ExecuteAsync(WorkingMemDescriptor& workingMemDescriptor) override; +private: + void Execute(std::vector<ITensorHandle*> inputs, std::vector<ITensorHandle*> outputs) const; }; } //namespace armnn diff --git a/src/backends/reference/workloads/RefSliceWorkload.cpp b/src/backends/reference/workloads/RefSliceWorkload.cpp index 2e448450c1..f94a83ee2c 100644 --- a/src/backends/reference/workloads/RefSliceWorkload.cpp +++ b/src/backends/reference/workloads/RefSliceWorkload.cpp @@ -15,14 +15,24 @@ namespace armnn void RefSliceWorkload::Execute() const { + Execute(m_Data.m_Inputs, m_Data.m_Outputs); +} + +void RefSliceWorkload::ExecuteAsync(WorkingMemDescriptor &workingMemDescriptor) +{ + Execute(workingMemDescriptor.m_Inputs, workingMemDescriptor.m_Outputs); +} + +void RefSliceWorkload::Execute(std::vector<ITensorHandle*> inputs, std::vector<ITensorHandle*> outputs) const +{ ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefSliceWorkload_Execute"); - const TensorInfo& inputInfo = GetTensorInfo(m_Data.m_Inputs[0]); + const TensorInfo& inputInfo = GetTensorInfo(inputs[0]); Slice(inputInfo, m_Data.m_Parameters, - m_Data.m_Inputs[0]->Map(), - m_Data.m_Outputs[0]->Map(), + inputs[0]->Map(), + outputs[0]->Map(), GetDataTypeSize(inputInfo.GetDataType())); } diff --git a/src/backends/reference/workloads/RefSliceWorkload.hpp b/src/backends/reference/workloads/RefSliceWorkload.hpp index 006c7b775d..8a1db8e5a7 100644 --- a/src/backends/reference/workloads/RefSliceWorkload.hpp +++ b/src/backends/reference/workloads/RefSliceWorkload.hpp @@ -16,7 +16,10 @@ class RefSliceWorkload : public BaseWorkload<SliceQueueDescriptor> public: using BaseWorkload<SliceQueueDescriptor>::BaseWorkload; - virtual void Execute() const override; + void Execute() const override; + void ExecuteAsync(WorkingMemDescriptor& workingMemDescriptor) override; +private: + void Execute(std::vector<ITensorHandle*> inputs, std::vector<ITensorHandle*> outputs) const; }; } // namespace armnn diff --git a/src/backends/reference/workloads/RefSoftmaxWorkload.cpp b/src/backends/reference/workloads/RefSoftmaxWorkload.cpp index 2e4d811674..9733cbc859 100644 --- a/src/backends/reference/workloads/RefSoftmaxWorkload.cpp +++ b/src/backends/reference/workloads/RefSoftmaxWorkload.cpp @@ -19,16 +19,26 @@ namespace armnn void RefSoftmaxWorkload::Execute() const { + Execute(m_Data.m_Inputs, m_Data.m_Outputs); +} + +void RefSoftmaxWorkload::ExecuteAsync(WorkingMemDescriptor &workingMemDescriptor) +{ + Execute(workingMemDescriptor.m_Inputs, workingMemDescriptor.m_Outputs); +} + +void RefSoftmaxWorkload::Execute(std::vector<ITensorHandle*> inputs, std::vector<ITensorHandle*> outputs) const +{ ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefSoftmaxWorkload_Execute"); - const TensorInfo &inputTensorInfo = GetTensorInfo(m_Data.m_Inputs[0]); + const TensorInfo &inputTensorInfo = GetTensorInfo(inputs[0]); - std::unique_ptr<Decoder<float>> decoderPtr = MakeDecoder<float>(inputTensorInfo, m_Data.m_Inputs[0]->Map()); + std::unique_ptr<Decoder<float>> decoderPtr = MakeDecoder<float>(inputTensorInfo, inputs[0]->Map()); Decoder<float> &decoder = *decoderPtr; - const TensorInfo &outputTensorInfo = GetTensorInfo(m_Data.m_Outputs[0]); + const TensorInfo &outputTensorInfo = GetTensorInfo(outputs[0]); - std::unique_ptr<Encoder<float>> encoderPtr = MakeEncoder<float>(outputTensorInfo, m_Data.m_Outputs[0]->Map()); + std::unique_ptr<Encoder<float>> encoderPtr = MakeEncoder<float>(outputTensorInfo, outputs[0]->Map()); Encoder<float> &encoder = *encoderPtr; Softmax(decoder, diff --git a/src/backends/reference/workloads/RefSoftmaxWorkload.hpp b/src/backends/reference/workloads/RefSoftmaxWorkload.hpp index 3d00c6ff96..6e62369880 100644 --- a/src/backends/reference/workloads/RefSoftmaxWorkload.hpp +++ b/src/backends/reference/workloads/RefSoftmaxWorkload.hpp @@ -15,7 +15,10 @@ class RefSoftmaxWorkload : public BaseWorkload<SoftmaxQueueDescriptor> { public: using BaseWorkload<SoftmaxQueueDescriptor>::BaseWorkload; - virtual void Execute() const override; + void Execute() const override; + void ExecuteAsync(WorkingMemDescriptor& workingMemDescriptor) override; +private: + void Execute(std::vector<ITensorHandle*> inputs, std::vector<ITensorHandle*> outputs) const; }; } //namespace armnn diff --git a/src/backends/reference/workloads/RefSpaceToBatchNdWorkload.cpp b/src/backends/reference/workloads/RefSpaceToBatchNdWorkload.cpp index c65d4c110c..e35632db5b 100644 --- a/src/backends/reference/workloads/RefSpaceToBatchNdWorkload.cpp +++ b/src/backends/reference/workloads/RefSpaceToBatchNdWorkload.cpp @@ -14,13 +14,23 @@ namespace armnn void RefSpaceToBatchNdWorkload::Execute() const { + Execute(m_Data.m_Inputs, m_Data.m_Outputs); +} + +void RefSpaceToBatchNdWorkload::ExecuteAsync(WorkingMemDescriptor &workingMemDescriptor) +{ + Execute(workingMemDescriptor.m_Inputs, workingMemDescriptor.m_Outputs); +} + +void RefSpaceToBatchNdWorkload::Execute(std::vector<ITensorHandle*> inputs, std::vector<ITensorHandle*> outputs) const +{ ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefSpaceToBatchNdWorkload_Execute"); - const TensorInfo& inputInfo = GetTensorInfo(m_Data.m_Inputs[0]); - std::unique_ptr<Decoder<float>> decoder = MakeDecoder<float>(inputInfo, m_Data.m_Inputs[0]->Map()); + const TensorInfo& inputInfo = GetTensorInfo(inputs[0]); + std::unique_ptr<Decoder<float>> decoder = MakeDecoder<float>(inputInfo, inputs[0]->Map()); - const TensorInfo& outputInfo = GetTensorInfo(m_Data.m_Outputs[0]); - std::unique_ptr<Encoder<float>> encoder = MakeEncoder<float>(outputInfo, m_Data.m_Outputs[0]->Map()); + const TensorInfo& outputInfo = GetTensorInfo(outputs[0]); + std::unique_ptr<Encoder<float>> encoder = MakeEncoder<float>(outputInfo, outputs[0]->Map()); SpaceToBatchNd(inputInfo, outputInfo, m_Data.m_Parameters, *decoder, *encoder); } diff --git a/src/backends/reference/workloads/RefSpaceToBatchNdWorkload.hpp b/src/backends/reference/workloads/RefSpaceToBatchNdWorkload.hpp index caf264894a..82ddb32a44 100644 --- a/src/backends/reference/workloads/RefSpaceToBatchNdWorkload.hpp +++ b/src/backends/reference/workloads/RefSpaceToBatchNdWorkload.hpp @@ -16,6 +16,9 @@ class RefSpaceToBatchNdWorkload : public BaseWorkload<SpaceToBatchNdQueueDescrip public: using BaseWorkload<SpaceToBatchNdQueueDescriptor>::BaseWorkload; void Execute() const override; + void ExecuteAsync(WorkingMemDescriptor& workingMemDescriptor) override; +private: + void Execute(std::vector<ITensorHandle*> inputs, std::vector<ITensorHandle*> outputs) const; }; } //namespace armnn diff --git a/src/backends/reference/workloads/RefSpaceToDepthWorkload.cpp b/src/backends/reference/workloads/RefSpaceToDepthWorkload.cpp index 1b12272506..88faf7a790 100644 --- a/src/backends/reference/workloads/RefSpaceToDepthWorkload.cpp +++ b/src/backends/reference/workloads/RefSpaceToDepthWorkload.cpp @@ -14,13 +14,23 @@ namespace armnn void RefSpaceToDepthWorkload::Execute() const { + Execute(m_Data.m_Inputs, m_Data.m_Outputs); +} + +void RefSpaceToDepthWorkload::ExecuteAsync(WorkingMemDescriptor &workingMemDescriptor) +{ + Execute(workingMemDescriptor.m_Inputs, workingMemDescriptor.m_Outputs); +} + +void RefSpaceToDepthWorkload::Execute(std::vector<ITensorHandle*> inputs, std::vector<ITensorHandle*> outputs) const +{ ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefSpaceToDepthWorkload_Execute"); - const TensorInfo& inputInfo = GetTensorInfo(m_Data.m_Inputs[0]); - std::unique_ptr<Decoder<float>> decoder = MakeDecoder<float>(inputInfo, m_Data.m_Inputs[0]->Map()); + const TensorInfo& inputInfo = GetTensorInfo(inputs[0]); + std::unique_ptr<Decoder<float>> decoder = MakeDecoder<float>(inputInfo, inputs[0]->Map()); - const TensorInfo& outputInfo = GetTensorInfo(m_Data.m_Outputs[0]); - std::unique_ptr<Encoder<float>> encoder = MakeEncoder<float>(outputInfo, m_Data.m_Outputs[0]->Map()); + const TensorInfo& outputInfo = GetTensorInfo(outputs[0]); + std::unique_ptr<Encoder<float>> encoder = MakeEncoder<float>(outputInfo, outputs[0]->Map()); SpaceToDepth(inputInfo, outputInfo, m_Data.m_Parameters, *decoder, *encoder); } diff --git a/src/backends/reference/workloads/RefSpaceToDepthWorkload.hpp b/src/backends/reference/workloads/RefSpaceToDepthWorkload.hpp index 89e5585249..d8f44b7995 100644 --- a/src/backends/reference/workloads/RefSpaceToDepthWorkload.hpp +++ b/src/backends/reference/workloads/RefSpaceToDepthWorkload.hpp @@ -15,7 +15,10 @@ class RefSpaceToDepthWorkload : public BaseWorkload<SpaceToDepthQueueDescriptor> { public: using BaseWorkload<SpaceToDepthQueueDescriptor>::BaseWorkload; - virtual void Execute() const override; + void Execute() const override; + void ExecuteAsync(WorkingMemDescriptor& workingMemDescriptor) override; +private: + void Execute(std::vector<ITensorHandle*> inputs, std::vector<ITensorHandle*> outputs) const; }; } //namespace armnn diff --git a/src/backends/reference/workloads/RefSplitterWorkload.cpp b/src/backends/reference/workloads/RefSplitterWorkload.cpp index 5207423995..076aefe517 100644 --- a/src/backends/reference/workloads/RefSplitterWorkload.cpp +++ b/src/backends/reference/workloads/RefSplitterWorkload.cpp @@ -13,8 +13,18 @@ namespace armnn void RefSplitterWorkload::Execute() const { + Execute(m_Data.m_Inputs, m_Data.m_Outputs); +} + +void RefSplitterWorkload::ExecuteAsync(WorkingMemDescriptor &workingMemDescriptor) +{ + Execute(workingMemDescriptor.m_Inputs, workingMemDescriptor.m_Outputs); +} + +void RefSplitterWorkload::Execute(std::vector<ITensorHandle*> inputs, std::vector<ITensorHandle*> outputs) const +{ ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefSplitterWorkload_Execute"); - Split(m_Data); + Split(m_Data, inputs, outputs); } } //namespace armnn diff --git a/src/backends/reference/workloads/RefSplitterWorkload.hpp b/src/backends/reference/workloads/RefSplitterWorkload.hpp index c491e1ebcb..99b5ff6911 100644 --- a/src/backends/reference/workloads/RefSplitterWorkload.hpp +++ b/src/backends/reference/workloads/RefSplitterWorkload.hpp @@ -17,7 +17,10 @@ class RefSplitterWorkload : public BaseWorkload<SplitterQueueDescriptor> { public: using BaseWorkload<SplitterQueueDescriptor>::BaseWorkload; - virtual void Execute() const override; + void Execute() const override; + void ExecuteAsync(WorkingMemDescriptor& workingMemDescriptor) override; +private: + void Execute(std::vector<ITensorHandle*> inputs, std::vector<ITensorHandle*> outputs) const; }; } //namespace armnn diff --git a/src/backends/reference/workloads/RefStackWorkload.cpp b/src/backends/reference/workloads/RefStackWorkload.cpp index fc859506a3..20cf3b38f5 100644 --- a/src/backends/reference/workloads/RefStackWorkload.cpp +++ b/src/backends/reference/workloads/RefStackWorkload.cpp @@ -20,6 +20,16 @@ RefStackWorkload::RefStackWorkload(const StackQueueDescriptor& descriptor, void RefStackWorkload::Execute() const { + Execute(m_Data.m_Inputs, m_Data.m_Outputs); +} + +void RefStackWorkload::ExecuteAsync(WorkingMemDescriptor &workingMemDescriptor) +{ + Execute(workingMemDescriptor.m_Inputs, workingMemDescriptor.m_Outputs); +} + +void RefStackWorkload::Execute(std::vector<ITensorHandle*> inputs, std::vector<ITensorHandle*> outputs) const +{ ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefStackWorkload_Execute"); // Can perform a simple concatenation when axis == 0 @@ -29,7 +39,7 @@ void RefStackWorkload::Execute() const ARMNN_ASSERT(output != nullptr); unsigned int numInputs = m_Data.m_Parameters.m_NumInputs; - unsigned int inputLength = GetTensorInfo(m_Data.m_Inputs[0]).GetNumElements(); + unsigned int inputLength = GetTensorInfo(inputs[0]).GetNumElements(); for (unsigned int inputIdx=0; inputIdx<numInputs; ++inputIdx) { @@ -43,13 +53,13 @@ void RefStackWorkload::Execute() const } std::vector<std::unique_ptr<Decoder<float>>> inputDecoders; - for (unsigned int i=0; i<m_Data.m_Inputs.size(); ++i) + for (unsigned int i=0; i<inputs.size(); ++i) { - inputDecoders.push_back(MakeDecoder<float>(GetTensorInfo(m_Data.m_Inputs[i]), - m_Data.m_Inputs[i]->Map())); + inputDecoders.push_back(MakeDecoder<float>(GetTensorInfo(inputs[i]), + inputs[i]->Map())); } - std::unique_ptr<Encoder<float>> outputEncoder = MakeEncoder<float>(GetTensorInfo(m_Data.m_Outputs[0]), - m_Data.m_Outputs[0]->Map()); + std::unique_ptr<Encoder<float>> outputEncoder = MakeEncoder<float>(GetTensorInfo(outputs[0]), + outputs[0]->Map()); Stack(m_Data, inputDecoders, *outputEncoder); } diff --git a/src/backends/reference/workloads/RefStackWorkload.hpp b/src/backends/reference/workloads/RefStackWorkload.hpp index ceb27d9f60..4276339a8f 100644 --- a/src/backends/reference/workloads/RefStackWorkload.hpp +++ b/src/backends/reference/workloads/RefStackWorkload.hpp @@ -16,7 +16,10 @@ class RefStackWorkload : public BaseWorkload<StackQueueDescriptor> public: explicit RefStackWorkload(const StackQueueDescriptor& descriptor, const WorkloadInfo& info); - virtual void Execute() const override; + void Execute() const override; + void ExecuteAsync(WorkingMemDescriptor& workingMemDescriptor) override; +private: + void Execute(std::vector<ITensorHandle*> inputs, std::vector<ITensorHandle*> outputs) const; }; } // namespace armnn diff --git a/src/backends/reference/workloads/RefStridedSliceWorkload.cpp b/src/backends/reference/workloads/RefStridedSliceWorkload.cpp index ce807ee087..336a687d5c 100644 --- a/src/backends/reference/workloads/RefStridedSliceWorkload.cpp +++ b/src/backends/reference/workloads/RefStridedSliceWorkload.cpp @@ -17,30 +17,20 @@ RefStridedSliceWorkload::RefStridedSliceWorkload(const StridedSliceQueueDescript void RefStridedSliceWorkload::Execute() const { - ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefStridedSliceWorkload_Execute"); - - const TensorInfo& inputInfo = GetTensorInfo(m_Data.m_Inputs[0]); - const TensorInfo& outputInfo = GetTensorInfo(m_Data.m_Outputs[0]); - - DataType inputDataType = inputInfo.GetDataType(); - DataType outputDataType = outputInfo.GetDataType(); - - ARMNN_ASSERT(inputDataType == outputDataType); - IgnoreUnused(outputDataType); + Execute(m_Data.m_Inputs, m_Data.m_Outputs); +} - StridedSlice(inputInfo, - m_Data.m_Parameters, - m_Data.m_Inputs[0]->Map(), - m_Data.m_Outputs[0]->Map(), - GetDataTypeSize(inputDataType)); +void RefStridedSliceWorkload::ExecuteAsync(WorkingMemDescriptor &workingMemDescriptor) +{ + Execute(workingMemDescriptor.m_Inputs, workingMemDescriptor.m_Outputs); } -void RefStridedSliceWorkload::ExecuteAsync(WorkingMemDescriptor& descriptor) +void RefStridedSliceWorkload::Execute(std::vector<ITensorHandle*> inputs, std::vector<ITensorHandle*> outputs) const { - ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefStridedSliceWorkload_Execute_WorkingMemDescriptor"); + ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefStridedSliceWorkload_Execute"); - const TensorInfo& inputInfo = GetTensorInfo(descriptor.m_Inputs[0]); - const TensorInfo& outputInfo = GetTensorInfo(descriptor.m_Outputs[0]); + const TensorInfo& inputInfo = GetTensorInfo(inputs[0]); + const TensorInfo& outputInfo = GetTensorInfo(outputs[0]); DataType inputDataType = inputInfo.GetDataType(); DataType outputDataType = outputInfo.GetDataType(); @@ -50,8 +40,8 @@ void RefStridedSliceWorkload::ExecuteAsync(WorkingMemDescriptor& descriptor) StridedSlice(inputInfo, m_Data.m_Parameters, - descriptor.m_Inputs[0]->Map(), - descriptor.m_Outputs[0]->Map(), + inputs[0]->Map(), + outputs[0]->Map(), GetDataTypeSize(inputDataType)); } diff --git a/src/backends/reference/workloads/RefStridedSliceWorkload.hpp b/src/backends/reference/workloads/RefStridedSliceWorkload.hpp index 3e253edcd9..38613e2779 100644 --- a/src/backends/reference/workloads/RefStridedSliceWorkload.hpp +++ b/src/backends/reference/workloads/RefStridedSliceWorkload.hpp @@ -15,7 +15,9 @@ class RefStridedSliceWorkload : public BaseWorkload<StridedSliceQueueDescriptor> public: RefStridedSliceWorkload(const StridedSliceQueueDescriptor& descriptor, const WorkloadInfo& info); void Execute() const override; - void ExecuteAsync(WorkingMemDescriptor& descriptor) override; + void ExecuteAsync(WorkingMemDescriptor& workingMemDescriptor) override; +private: + void Execute(std::vector<ITensorHandle*> inputs, std::vector<ITensorHandle*> outputs) const; }; } // namespace armnn diff --git a/src/backends/reference/workloads/RefTransposeConvolution2dWorkload.cpp b/src/backends/reference/workloads/RefTransposeConvolution2dWorkload.cpp index 2ab76041ef..634122835f 100644 --- a/src/backends/reference/workloads/RefTransposeConvolution2dWorkload.cpp +++ b/src/backends/reference/workloads/RefTransposeConvolution2dWorkload.cpp @@ -33,35 +33,32 @@ RefTransposeConvolution2dWorkload::RefTransposeConvolution2dWorkload( } } -void RefTransposeConvolution2dWorkload::PostAllocationConfigure() +void RefTransposeConvolution2dWorkload::Execute() const { - // set up input decoder - const ITensorHandle* input = m_Data.m_Inputs[0]; - const TensorInfo& inputInfo = GetTensorInfo(input); - - m_InputShape = inputInfo.GetShape(); - m_InputDecoder = MakeDecoder<float>(inputInfo); - - // set up output encoder - ITensorHandle* output = m_Data.m_Outputs[0]; - const TensorInfo& outputInfo = GetTensorInfo(output); + Execute(m_Data.m_Inputs, m_Data.m_Outputs); +} - m_OutputShape = outputInfo.GetShape(); - m_OutputEncoder = MakeEncoder<float>(outputInfo); +void RefTransposeConvolution2dWorkload::ExecuteAsync(WorkingMemDescriptor &workingMemDescriptor) +{ + Execute(workingMemDescriptor.m_Inputs, workingMemDescriptor.m_Outputs); } -void RefTransposeConvolution2dWorkload::Execute() const +void RefTransposeConvolution2dWorkload::Execute(std::vector<ITensorHandle*> inputs, + std::vector<ITensorHandle*> outputs) const { ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefTransposeConvolution2dWorkload_Execute"); - m_InputDecoder->Reset(m_Data.m_Inputs[0]->Map()); - m_OutputEncoder->Reset(m_Data.m_Outputs[0]->Map()); + const TensorInfo& inputInfo = GetTensorInfo(inputs[0]); + const TensorInfo& outputInfo = GetTensorInfo(outputs[0]); + + std::unique_ptr<Decoder<float>> inputDecoder = MakeDecoder<float>(inputInfo, inputs[0]->Map()); + std::unique_ptr<Encoder<float>> outputEncoder = MakeEncoder<float>(outputInfo, outputs[0]->Map()); TransposeConvolution2dImpl(m_Data.m_Parameters, - m_InputShape, - *m_InputDecoder, - m_OutputShape, - *m_OutputEncoder, + inputInfo.GetShape(), + *inputDecoder, + outputInfo.GetShape(), + *outputEncoder, m_WeightsShape, *m_WeightsDecoder, m_BiasesDecoder.get()); diff --git a/src/backends/reference/workloads/RefTransposeConvolution2dWorkload.hpp b/src/backends/reference/workloads/RefTransposeConvolution2dWorkload.hpp index 9ded8c971f..7c18f10293 100644 --- a/src/backends/reference/workloads/RefTransposeConvolution2dWorkload.hpp +++ b/src/backends/reference/workloads/RefTransposeConvolution2dWorkload.hpp @@ -21,22 +21,17 @@ public: const WorkloadInfo& info); ~RefTransposeConvolution2dWorkload() = default; - void PostAllocationConfigure() override; - void Execute() const override; + void ExecuteAsync(WorkingMemDescriptor& workingMemDescriptor) override; private: + void Execute(std::vector<ITensorHandle*> inputs, std::vector<ITensorHandle*> outputs) const; std::unique_ptr<ScopedCpuTensorHandle> m_Weights; std::unique_ptr<ScopedCpuTensorHandle> m_Biases; - std::unique_ptr<Decoder<float>> m_InputDecoder; - std::unique_ptr<Encoder<float>> m_OutputEncoder; - std::unique_ptr<Decoder<float>> m_WeightsDecoder; std::unique_ptr<Decoder<float>> m_BiasesDecoder; - TensorShape m_InputShape; - TensorShape m_OutputShape; TensorShape m_WeightsShape; }; diff --git a/src/backends/reference/workloads/RefTransposeWorkload.cpp b/src/backends/reference/workloads/RefTransposeWorkload.cpp index cc7a555c41..828badd042 100644 --- a/src/backends/reference/workloads/RefTransposeWorkload.cpp +++ b/src/backends/reference/workloads/RefTransposeWorkload.cpp @@ -16,12 +16,25 @@ namespace armnn template <armnn::DataType DataType> void RefTransposeWorkload<DataType>::Execute() const { + Execute(m_Data.m_Inputs, m_Data.m_Outputs); +} + +template <armnn::DataType DataType> +void RefTransposeWorkload<DataType>::ExecuteAsync(WorkingMemDescriptor &workingMemDescriptor) +{ + Execute(workingMemDescriptor.m_Inputs, workingMemDescriptor.m_Outputs); +} + +template <armnn::DataType DataType> +void RefTransposeWorkload<DataType>::Execute(std::vector<ITensorHandle*> inputs, + std::vector<ITensorHandle*> outputs) const +{ using T = ResolveType<DataType>; ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, GetName() + "_Execute"); - const ITensorHandle* src = m_Data.m_Inputs[0]; - ITensorHandle* dst = m_Data.m_Outputs[0]; + const ITensorHandle* src = inputs[0]; + ITensorHandle* dst = outputs[0]; const PermutationVector& mappings = m_Data.m_Parameters.m_DimMappings; armnnUtils::Transpose(GetTensorInfo(src).GetShape(), mappings, src->Map(), dst->Map(), sizeof(T)); diff --git a/src/backends/reference/workloads/RefTransposeWorkload.hpp b/src/backends/reference/workloads/RefTransposeWorkload.hpp index 1e03f2e694..08ba74facc 100644 --- a/src/backends/reference/workloads/RefTransposeWorkload.hpp +++ b/src/backends/reference/workloads/RefTransposeWorkload.hpp @@ -25,6 +25,9 @@ public: using TypedWorkload<TransposeQueueDescriptor, DataType>::m_Data; using TypedWorkload<TransposeQueueDescriptor, DataType>::TypedWorkload; void Execute() const override; + void ExecuteAsync(WorkingMemDescriptor& workingMemDescriptor) override; +private: + void Execute(std::vector<ITensorHandle*> inputs, std::vector<ITensorHandle*> outputs) const; }; using RefTransposeBFloat16Workload = RefTransposeWorkload<DataType::BFloat16>; diff --git a/src/backends/reference/workloads/Splitter.cpp b/src/backends/reference/workloads/Splitter.cpp index 09edc5e0f5..ed6d2b8fd8 100644 --- a/src/backends/reference/workloads/Splitter.cpp +++ b/src/backends/reference/workloads/Splitter.cpp @@ -18,12 +18,14 @@ namespace armnn { -void Split(const SplitterQueueDescriptor& data) +void Split(const SplitterQueueDescriptor& data, + std::vector<ITensorHandle*> inputs, + std::vector<ITensorHandle*> outputs) { - const TensorInfo& inputInfo = GetTensorInfo(data.m_Inputs[0]); + const TensorInfo& inputInfo = GetTensorInfo(inputs[0]); std::unique_ptr<Decoder<float>> decoderPtr = - MakeDecoder<float>(inputInfo, data.m_Inputs[0]->Map()); + MakeDecoder<float>(inputInfo, inputs[0]->Map()); Decoder<float>& decoder = *decoderPtr; for (unsigned int index = 0; index < inputInfo.GetNumElements(); ++index) @@ -45,7 +47,7 @@ void Split(const SplitterQueueDescriptor& data) SplitterQueueDescriptor::ViewOrigin const& view = data.m_ViewOrigins[viewIdx]; //Split view extents are defined by the size of (the corresponding) input tensor. - const TensorInfo& outputInfo = GetTensorInfo(data.m_Outputs[viewIdx]); + const TensorInfo& outputInfo = GetTensorInfo(outputs[viewIdx]); ARMNN_ASSERT(outputInfo.GetNumDimensions() == inputInfo.GetNumDimensions()); // Check all dimensions to see if this element is inside the given input view. @@ -65,7 +67,7 @@ void Split(const SplitterQueueDescriptor& data) if (insideView) { std::unique_ptr<Encoder<float>> encoderPtr = - MakeEncoder<float>(outputInfo, data.m_Outputs[viewIdx]->Map()); + MakeEncoder<float>(outputInfo, outputs[viewIdx]->Map()); Encoder<float>& encoder = *encoderPtr; unsigned int outIndex = 0; diff --git a/src/backends/reference/workloads/Splitter.hpp b/src/backends/reference/workloads/Splitter.hpp index aff4bcad94..e38a054650 100644 --- a/src/backends/reference/workloads/Splitter.hpp +++ b/src/backends/reference/workloads/Splitter.hpp @@ -14,9 +14,11 @@ namespace armnn { template <typename DataType> -void Splitter(const SplitterQueueDescriptor& data) +void Splitter(const SplitterQueueDescriptor& data, + std::vector<ITensorHandle*> inputs, + std::vector<ITensorHandle*> outputs) { - const TensorInfo& inputInfo0 = GetTensorInfo(data.m_Inputs[0]); + const TensorInfo& inputInfo0 = GetTensorInfo(inputs[0]); for (unsigned int index = 0; index < inputInfo0.GetNumElements(); ++index) { @@ -37,7 +39,7 @@ void Splitter(const SplitterQueueDescriptor& data) SplitterQueueDescriptor::ViewOrigin const& view = data.m_ViewOrigins[viewIdx]; //Split view extents are defined by the size of (the corresponding) input tensor. - const TensorInfo& outputInfo = GetTensorInfo(data.m_Outputs[viewIdx]); + const TensorInfo& outputInfo = GetTensorInfo(outputs[viewIdx]); ARMNN_ASSERT(outputInfo.GetNumDimensions() == inputInfo0.GetNumDimensions()); // Check all dimensions to see if this element is inside the given input view. @@ -78,5 +80,7 @@ void Splitter(const SplitterQueueDescriptor& data) } } -void Split(const SplitterQueueDescriptor& data); +void Split(const SplitterQueueDescriptor& data, + std::vector<ITensorHandle*> inputs, + std::vector<ITensorHandle*> outputs); } //namespace armnn |