From b8181f72b8c7c9132373dbcf7f8709ec2c0f23c0 Mon Sep 17 00:00:00 2001 From: Finn Williams Date: Wed, 7 Apr 2021 10:23:21 +0100 Subject: IVGCVSW-5787 Add/Update Execute() implementations in RefActivationWorkload * Added multithreaded StridedSliceEndToEndTest Signed-off-by: Finn Williams Change-Id: I4579db7b5959e0a22256f1bda00238c22e611dec --- .../test/StridedSliceAsyncEndToEndTest.hpp | 160 ++++++++++++++++++++- src/backends/reference/test/RefEndToEndTests.cpp | 5 + src/backends/reference/workloads/Concatenate.cpp | 12 +- src/backends/reference/workloads/Concatenate.hpp | 4 +- .../reference/workloads/RefActivationWorkload.cpp | 19 ++- .../reference/workloads/RefActivationWorkload.hpp | 6 +- .../reference/workloads/RefArgMinMaxWorkload.cpp | 17 ++- .../reference/workloads/RefArgMinMaxWorkload.hpp | 6 +- .../workloads/RefBatchNormalizationWorkload.cpp | 19 ++- .../workloads/RefBatchNormalizationWorkload.hpp | 4 +- .../workloads/RefBatchToSpaceNdWorkload.cpp | 18 ++- .../workloads/RefBatchToSpaceNdWorkload.hpp | 6 +- .../reference/workloads/RefComparisonWorkload.cpp | 36 +++-- .../reference/workloads/RefComparisonWorkload.hpp | 3 + .../reference/workloads/RefConcatWorkload.cpp | 12 +- .../reference/workloads/RefConcatWorkload.hpp | 5 +- .../reference/workloads/RefConstantWorkload.cpp | 19 ++- .../reference/workloads/RefConstantWorkload.hpp | 6 +- .../workloads/RefConvertBf16ToFp32Workload.cpp | 17 ++- .../workloads/RefConvertBf16ToFp32Workload.hpp | 5 +- .../workloads/RefConvertFp16ToFp32Workload.cpp | 17 ++- .../workloads/RefConvertFp16ToFp32Workload.hpp | 5 +- .../workloads/RefConvertFp32ToBf16Workload.cpp | 17 ++- .../workloads/RefConvertFp32ToBf16Workload.hpp | 5 +- .../workloads/RefConvertFp32ToFp16Workload.cpp | 17 ++- .../workloads/RefConvertFp32ToFp16Workload.hpp | 5 +- .../workloads/RefConvolution2dWorkload.cpp | 24 ++-- .../workloads/RefConvolution2dWorkload.hpp | 9 +- .../reference/workloads/RefDebugWorkload.cpp | 16 ++- .../reference/workloads/RefDebugWorkload.hpp | 2 + .../workloads/RefDepthToSpaceWorkload.cpp | 16 ++- .../workloads/RefDepthToSpaceWorkload.hpp | 5 +- .../RefDepthwiseConvolution2dWorkload.cpp | 25 ++-- .../RefDepthwiseConvolution2dWorkload.hpp | 9 +- .../reference/workloads/RefDequantizeWorkload.cpp | 18 ++- .../reference/workloads/RefDequantizeWorkload.hpp | 3 + .../workloads/RefDetectionPostProcessWorkload.cpp | 27 ++-- .../workloads/RefDetectionPostProcessWorkload.hpp | 4 +- .../workloads/RefElementwiseUnaryWorkload.cpp | 31 ++-- .../workloads/RefElementwiseUnaryWorkload.hpp | 6 +- .../reference/workloads/RefElementwiseWorkload.cpp | 36 ++--- .../reference/workloads/RefElementwiseWorkload.hpp | 6 +- .../RefFakeQuantizationFloat32Workload.cpp | 17 ++- .../RefFakeQuantizationFloat32Workload.hpp | 5 +- .../reference/workloads/RefFillWorkload.cpp | 14 +- .../reference/workloads/RefFillWorkload.hpp | 5 +- .../reference/workloads/RefFloorWorkload.cpp | 20 ++- .../reference/workloads/RefFloorWorkload.hpp | 5 +- .../workloads/RefFullyConnectedWorkload.cpp | 41 ++++-- .../workloads/RefFullyConnectedWorkload.hpp | 7 +- .../reference/workloads/RefGatherWorkload.cpp | 20 ++- .../reference/workloads/RefGatherWorkload.hpp | 3 + .../workloads/RefInstanceNormalizationWorkload.cpp | 19 ++- .../workloads/RefInstanceNormalizationWorkload.hpp | 5 +- .../workloads/RefL2NormalizationWorkload.cpp | 18 ++- .../workloads/RefL2NormalizationWorkload.hpp | 3 + .../reference/workloads/RefLogSoftmaxWorkload.cpp | 18 ++- .../reference/workloads/RefLogSoftmaxWorkload.hpp | 5 +- .../workloads/RefLogicalBinaryWorkload.cpp | 31 ++-- .../workloads/RefLogicalBinaryWorkload.hpp | 9 +- .../workloads/RefLogicalUnaryWorkload.cpp | 23 +-- .../workloads/RefLogicalUnaryWorkload.hpp | 8 +- .../reference/workloads/RefLstmWorkload.cpp | 46 +++--- .../reference/workloads/RefLstmWorkload.hpp | 4 +- .../reference/workloads/RefMeanWorkload.cpp | 18 ++- .../reference/workloads/RefMeanWorkload.hpp | 5 +- .../workloads/RefNormalizationWorkload.cpp | 16 ++- .../workloads/RefNormalizationWorkload.hpp | 5 +- .../reference/workloads/RefPadWorkload.cpp | 14 +- .../reference/workloads/RefPadWorkload.hpp | 5 +- .../reference/workloads/RefPermuteWorkload.cpp | 17 ++- .../reference/workloads/RefPermuteWorkload.hpp | 3 + .../reference/workloads/RefPooling2dWorkload.cpp | 18 ++- .../reference/workloads/RefPooling2dWorkload.hpp | 5 +- .../reference/workloads/RefPreluWorkload.cpp | 22 ++- .../reference/workloads/RefPreluWorkload.hpp | 5 +- .../reference/workloads/RefQLstmWorkload.cpp | 43 +++--- .../reference/workloads/RefQLstmWorkload.hpp | 4 +- .../reference/workloads/RefQuantizeWorkload.cpp | 19 +-- .../reference/workloads/RefQuantizeWorkload.hpp | 6 +- .../reference/workloads/RefRankWorkload.hpp | 15 +- .../reference/workloads/RefReduceWorkload.cpp | 18 ++- .../reference/workloads/RefReduceWorkload.hpp | 5 +- .../reference/workloads/RefReshapeWorkload.cpp | 16 ++- .../reference/workloads/RefReshapeWorkload.hpp | 5 +- .../workloads/RefResizeBilinearWorkload.cpp | 18 ++- .../workloads/RefResizeBilinearWorkload.hpp | 5 +- .../reference/workloads/RefResizeWorkload.cpp | 18 ++- .../reference/workloads/RefResizeWorkload.hpp | 5 +- .../reference/workloads/RefSliceWorkload.cpp | 16 ++- .../reference/workloads/RefSliceWorkload.hpp | 5 +- .../reference/workloads/RefSoftmaxWorkload.cpp | 18 ++- .../reference/workloads/RefSoftmaxWorkload.hpp | 5 +- .../workloads/RefSpaceToBatchNdWorkload.cpp | 18 ++- .../workloads/RefSpaceToBatchNdWorkload.hpp | 3 + .../workloads/RefSpaceToDepthWorkload.cpp | 18 ++- .../workloads/RefSpaceToDepthWorkload.hpp | 5 +- .../reference/workloads/RefSplitterWorkload.cpp | 12 +- .../reference/workloads/RefSplitterWorkload.hpp | 5 +- .../reference/workloads/RefStackWorkload.cpp | 22 ++- .../reference/workloads/RefStackWorkload.hpp | 5 +- .../workloads/RefStridedSliceWorkload.cpp | 32 ++--- .../workloads/RefStridedSliceWorkload.hpp | 4 +- .../RefTransposeConvolution2dWorkload.cpp | 37 +++-- .../RefTransposeConvolution2dWorkload.hpp | 9 +- .../reference/workloads/RefTransposeWorkload.cpp | 17 ++- .../reference/workloads/RefTransposeWorkload.hpp | 3 + src/backends/reference/workloads/Splitter.cpp | 12 +- src/backends/reference/workloads/Splitter.hpp | 12 +- 109 files changed, 1150 insertions(+), 426 deletions(-) (limited to 'src/backends') diff --git a/src/backends/backendsCommon/test/StridedSliceAsyncEndToEndTest.hpp b/src/backends/backendsCommon/test/StridedSliceAsyncEndToEndTest.hpp index 66ccdbf1d9..16b10c88ac 100644 --- a/src/backends/backendsCommon/test/StridedSliceAsyncEndToEndTest.hpp +++ b/src/backends/backendsCommon/test/StridedSliceAsyncEndToEndTest.hpp @@ -22,6 +22,100 @@ namespace armnn namespace experimental { +template, typename TOutput = ResolveType > +void AsyncThreadedEndToEndTestImpl(INetworkPtr network, + const std::vector>>& inputTensorData, + const std::vector>>& expectedOutputData, + std::vector backends, + const size_t numberOfInferences, + float tolerance = 0.000001f) +{ + // Create Runtime in which test will run + IRuntime::CreationOptions options; + IRuntimePtr runtime(IRuntime::Create(options)); + + // Optimize the Network + IOptimizedNetworkPtr optNet = Optimize(*network, backends, runtime->GetDeviceSpec()); + + + // Creates AsyncNetwork + NetworkId networkId = 0; + std::string errorMessage; + const INetworkProperties networkProperties(false, false, true); + runtime->LoadNetwork(networkId, std::move(optNet), errorMessage, networkProperties); + + std::vector inputTensorsVec; + std::vector outputTensorsVec; + std::vector>> outputStorageVec; + std::vector> workingMemHandles; + + for (unsigned int i = 0; i < numberOfInferences; ++i) + { + InputTensors inputTensors; + OutputTensors outputTensors; + outputStorageVec.emplace_back(std::map>()); + + inputTensors.reserve(inputTensorData.size()); + for (auto&& it : inputTensorData[i]) + { + inputTensors.push_back({it.first, + ConstTensor(runtime->GetInputTensorInfo(networkId, it.first), it.second.data())}); + } + + outputTensors.reserve(expectedOutputData.size()); + for (auto&& it : expectedOutputData[i]) + { + std::vector out(it.second.size()); + outputStorageVec[i].emplace(it.first, out); + outputTensors.push_back({it.first, + Tensor(runtime->GetOutputTensorInfo(networkId, it.first), + outputStorageVec[i].at(it.first).data())}); + } + + inputTensorsVec.push_back(inputTensors); + outputTensorsVec.push_back(outputTensors); + + workingMemHandles.push_back(runtime->CreateWorkingMemHandle(networkId)); + } + + std::vector threads; + for (unsigned int i = 0; i < numberOfInferences; ++i) + { + // Access the vectors before we do anything multi-threaded + InputTensors& inputTensors = inputTensorsVec[i]; + OutputTensors& outputTensors = outputTensorsVec[i]; + IWorkingMemHandle& workingMemHandle = *workingMemHandles[i].get(); + + threads.emplace_back([&]() + { + // Run the async network + runtime->Execute(workingMemHandle, inputTensors, outputTensors); + }); + } + + for (unsigned int i = 0; i < numberOfInferences; ++i) + { + threads[i].join(); + } + + // Checks the results. + for (unsigned int i = 0; i < numberOfInferences; ++i) + { + for (auto &&it : expectedOutputData[i]) + { + std::vector out = outputStorageVec[i].at(it.first); + for (unsigned int j = 0; j < out.size(); ++j) + { + BOOST_CHECK(Compare(it.second[j], out[j], tolerance) == true); + } + } + } + +} + + + template, typename TOutput = ResolveType > void AsyncEndToEndTestImpl(INetworkPtr network, @@ -169,7 +263,71 @@ void StridedSlicedEndToEndTest(const std::vector& backends) std::map> inputTensorData = {{0, inputData}}; std::map> expectedOutputData = {{0, outputExpected}}; - AsyncEndToEndTestImpl(move(net), inputTensorData, expectedOutputData, backends); + AsyncEndToEndTestImpl(move(net), inputTensorData, expectedOutputData, backends, 1); +} + +template +void StridedSlicedMultiThreadedEndToEndTest(const std::vector& backends) +{ + using namespace armnn; + using T = ResolveType; + + const TensorShape& inputShape = {3, 2, 3, 1}; + const TensorShape& outputShape = {1, 2, 3, 1}; + const std::vector& beginData = {1, 0, 0, 0}; + const std::vector& endData = {2, 2, 3, 1}; + const std::vector& stridesData = {1, 1, 1, 1}; + int beginMask = 0; + int endMask = 0; + int shrinkAxisMask = 0; + int ellipsisMask = 0; + int newAxisMask = 0; + + // Builds up the structure of the network + INetworkPtr net = CreateStridedSliceNetwork(inputShape, + outputShape, + beginData, + endData, + stridesData, + beginMask, + endMask, + shrinkAxisMask, + ellipsisMask, + newAxisMask); + + BOOST_TEST_CHECKPOINT("create a network"); + + // Creates structures for input & output. + std::vector inputData1{ + 1.0f, 1.0f, 1.0f, 2.0f, 2.0f, 2.0f, + + 3.0f, 3.0f, 3.0f, 4.0f, 4.0f, 4.0f, + + 5.0f, 5.0f, 5.0f, 6.0f, 6.0f, 6.0f + }; + + std::vector outputExpected1{ 3.0f, 3.0f, 3.0f, 4.0f, 4.0f, 4.0f }; + + // Creates structures for input & output. + std::vector inputData2{ + 1.0f, 1.0f, 1.0f, 2.0f, 2.0f, 2.0f, + + 8.0f, 8.0f, 8.0f, 7.0f, 7.0f, 7.0f, + + 5.0f, 5.0f, 5.0f, 6.0f, 6.0f, 6.0f + }; + + std::vector outputExpected2{ 8.0f, 8.0f, 8.0f, 7.0f, 7.0f, 7.0f }; + + std::vector>> inputTensors; + std::vector>> outputTensors; + + inputTensors.push_back(std::map> {{0, inputData1}}); + inputTensors.push_back(std::map> {{0, inputData2}}); + outputTensors.push_back(std::map> {{0, outputExpected1}}); + outputTensors.push_back(std::map> {{0, outputExpected2}}); + + AsyncThreadedEndToEndTestImpl(move(net), inputTensors, outputTensors, backends, 2); } } // experimental namespace diff --git a/src/backends/reference/test/RefEndToEndTests.cpp b/src/backends/reference/test/RefEndToEndTests.cpp index 521854b12b..0839c1c7af 100644 --- a/src/backends/reference/test/RefEndToEndTests.cpp +++ b/src/backends/reference/test/RefEndToEndTests.cpp @@ -1341,6 +1341,11 @@ BOOST_AUTO_TEST_CASE(RefAsyncFP32StridedSlicedEndToEndTest) { armnn::experimental::StridedSlicedEndToEndTest(defaultBackends); } + +BOOST_AUTO_TEST_CASE(RefAsyncFP32StridedSlicedMultiThreadedEndToEndTest) +{ + armnn::experimental::StridedSlicedMultiThreadedEndToEndTest(defaultBackends); +} #endif BOOST_AUTO_TEST_SUITE_END() diff --git a/src/backends/reference/workloads/Concatenate.cpp b/src/backends/reference/workloads/Concatenate.cpp index a85e34ee61..a0e0abfaa0 100644 --- a/src/backends/reference/workloads/Concatenate.cpp +++ b/src/backends/reference/workloads/Concatenate.cpp @@ -11,11 +11,13 @@ namespace armnn { -void Concatenate(const ConcatQueueDescriptor &data) +void Concatenate(const ConcatQueueDescriptor &data, + std::vector inputs, + std::vector outputs) { - const TensorInfo& outputInfo0 = GetTensorInfo(data.m_Outputs[0]); + const TensorInfo& outputInfo0 = GetTensorInfo(outputs[0]); - std::unique_ptr> encoderPtr = MakeEncoder(outputInfo0, data.m_Outputs[0]->Map()); + std::unique_ptr> encoderPtr = MakeEncoder(outputInfo0, outputs[0]->Map()); Encoder& encoder = *encoderPtr; for (unsigned int index = 0 ; index < outputInfo0.GetNumElements(); ++index) @@ -37,7 +39,7 @@ void Concatenate(const ConcatQueueDescriptor &data) ConcatQueueDescriptor::ViewOrigin const& view = data.m_ViewOrigins[viewIdx]; //Split view extents are defined by the size of (the corresponding) input tensor. - const TensorInfo& inputInfo = GetTensorInfo(data.m_Inputs[viewIdx]); + const TensorInfo& inputInfo = GetTensorInfo(inputs[viewIdx]); ARMNN_ASSERT(inputInfo.GetNumDimensions() == outputInfo0.GetNumDimensions()); // Check all dimensions to see if this element is inside the given input view. @@ -57,7 +59,7 @@ void Concatenate(const ConcatQueueDescriptor &data) if (insideView) { std::unique_ptr> decoderPtr = - MakeDecoder(inputInfo, data.m_Inputs[viewIdx]->Map()); + MakeDecoder(inputInfo,inputs[viewIdx]->Map()); Decoder& decoder = *decoderPtr; unsigned int inIndex = 0; unsigned int dimensionStride = 1; diff --git a/src/backends/reference/workloads/Concatenate.hpp b/src/backends/reference/workloads/Concatenate.hpp index 75e5f8c4f4..e0264b0aac 100644 --- a/src/backends/reference/workloads/Concatenate.hpp +++ b/src/backends/reference/workloads/Concatenate.hpp @@ -10,5 +10,7 @@ namespace armnn { -void Concatenate(const ConcatQueueDescriptor &data); +void Concatenate(const ConcatQueueDescriptor &data, + std::vector inputs, + std::vector outputs); } //namespace armnn diff --git a/src/backends/reference/workloads/RefActivationWorkload.cpp b/src/backends/reference/workloads/RefActivationWorkload.cpp index a26a6399bc..77958673e9 100644 --- a/src/backends/reference/workloads/RefActivationWorkload.cpp +++ b/src/backends/reference/workloads/RefActivationWorkload.cpp @@ -16,18 +16,29 @@ namespace armnn { void RefActivationWorkload::Execute() const +{ + Execute(m_Data.m_Inputs, m_Data.m_Outputs); +} + +void RefActivationWorkload::ExecuteAsync(WorkingMemDescriptor &workingMemDescriptor) +{ + Execute(workingMemDescriptor.m_Inputs, workingMemDescriptor.m_Outputs); +} + +void RefActivationWorkload::Execute(std::vector inputs, std::vector outputs) const { ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefActivationWorkload_Execute"); - const TensorInfo& inputInfo = GetTensorInfo(m_Data.m_Inputs[0]); - const TensorInfo& outputInfo = GetTensorInfo(m_Data.m_Outputs[0]); + const TensorInfo& inputInfo = GetTensorInfo(inputs[0]); + const TensorInfo& outputInfo = GetTensorInfo(outputs[0]); - Activation(*MakeDecoder(inputInfo, m_Data.m_Inputs[0]->Map()), - *MakeEncoder(outputInfo, m_Data.m_Outputs[0]->Map()), + Activation(*MakeDecoder(inputInfo, inputs[0]->Map()), + *MakeEncoder(outputInfo, outputs[0]->Map()), inputInfo, m_Data.m_Parameters.m_Function, m_Data.m_Parameters.m_A, m_Data.m_Parameters.m_B); } + } //namespace armnn diff --git a/src/backends/reference/workloads/RefActivationWorkload.hpp b/src/backends/reference/workloads/RefActivationWorkload.hpp index 5b2377e363..429fb60aaa 100644 --- a/src/backends/reference/workloads/RefActivationWorkload.hpp +++ b/src/backends/reference/workloads/RefActivationWorkload.hpp @@ -15,7 +15,11 @@ class RefActivationWorkload : public BaseWorkload { public: using BaseWorkload::BaseWorkload; - virtual void Execute() const override; + void Execute() const override; + void ExecuteAsync(WorkingMemDescriptor& workingMemDescriptor) override; + +private: + void Execute(std::vector inputs, std::vector outputs) const; }; } //namespace armnn diff --git a/src/backends/reference/workloads/RefArgMinMaxWorkload.cpp b/src/backends/reference/workloads/RefArgMinMaxWorkload.cpp index bf8649f54d..77167a866b 100644 --- a/src/backends/reference/workloads/RefArgMinMaxWorkload.cpp +++ b/src/backends/reference/workloads/RefArgMinMaxWorkload.cpp @@ -18,16 +18,27 @@ RefArgMinMaxWorkload::RefArgMinMaxWorkload( const WorkloadInfo& info) : BaseWorkload(descriptor, info) {} + void RefArgMinMaxWorkload::Execute() const +{ + Execute(m_Data.m_Inputs, m_Data.m_Outputs); +} + +void RefArgMinMaxWorkload::ExecuteAsync(WorkingMemDescriptor &workingMemDescriptor) +{ + Execute(workingMemDescriptor.m_Inputs, workingMemDescriptor.m_Outputs); +} + +void RefArgMinMaxWorkload::Execute(std::vector inputs, std::vector outputs) const { ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefArgMinMaxWorkload_Execute"); - const TensorInfo &inputTensorInfo = GetTensorInfo(m_Data.m_Inputs[0]); + const TensorInfo &inputTensorInfo = GetTensorInfo(inputs[0]); - std::unique_ptr> decoderPtr = MakeDecoder(inputTensorInfo, m_Data.m_Inputs[0]->Map()); + std::unique_ptr> decoderPtr = MakeDecoder(inputTensorInfo, inputs[0]->Map()); Decoder &decoder = *decoderPtr; - const TensorInfo &outputTensorInfo = GetTensorInfo(m_Data.m_Outputs[0]); + const TensorInfo &outputTensorInfo = GetTensorInfo(outputs[0]); if (outputTensorInfo.GetDataType() == armnn::DataType::Signed32) { int32_t *output = GetOutputTensorData(0, m_Data); diff --git a/src/backends/reference/workloads/RefArgMinMaxWorkload.hpp b/src/backends/reference/workloads/RefArgMinMaxWorkload.hpp index 97b70772d1..df9ebcab0b 100644 --- a/src/backends/reference/workloads/RefArgMinMaxWorkload.hpp +++ b/src/backends/reference/workloads/RefArgMinMaxWorkload.hpp @@ -16,6 +16,10 @@ public: explicit RefArgMinMaxWorkload(const ArgMinMaxQueueDescriptor& descriptor, const WorkloadInfo& info); - virtual void Execute() const override; + void Execute() const override; + void ExecuteAsync(WorkingMemDescriptor& workingMemDescriptor) override; + +private: + void Execute(std::vector inputs, std::vector outputs) const; }; } //namespace armnn \ No newline at end of file diff --git a/src/backends/reference/workloads/RefBatchNormalizationWorkload.cpp b/src/backends/reference/workloads/RefBatchNormalizationWorkload.cpp index 21fcdab5a3..e1068896ba 100644 --- a/src/backends/reference/workloads/RefBatchNormalizationWorkload.cpp +++ b/src/backends/reference/workloads/RefBatchNormalizationWorkload.cpp @@ -23,6 +23,17 @@ RefBatchNormalizationWorkload::RefBatchNormalizationWorkload(const BatchNormaliz {} void RefBatchNormalizationWorkload::Execute() const +{ + Execute(m_Data.m_Inputs, m_Data.m_Outputs); +} + +void RefBatchNormalizationWorkload::ExecuteAsync(WorkingMemDescriptor &workingMemDescriptor) +{ + Execute(workingMemDescriptor.m_Inputs, workingMemDescriptor.m_Outputs); +} + +void RefBatchNormalizationWorkload::Execute(std::vector inputs, + std::vector outputs) const { ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefBatchNormalizationWorkload_Execute"); @@ -34,10 +45,10 @@ void RefBatchNormalizationWorkload::Execute() const m_Gamma->Map(true)); std::unique_ptr> betaDecoder = MakeDecoder(m_Beta->GetTensorInfo(), m_Beta->Map(true)); - std::unique_ptr> inputDecoder = MakeDecoder(GetTensorInfo(m_Data.m_Inputs[0]), - m_Data.m_Inputs[0]->Map()); - std::unique_ptr> outputEncoder = MakeEncoder(GetTensorInfo(m_Data.m_Outputs[0]), - m_Data.m_Outputs[0]->Map()); + std::unique_ptr> inputDecoder = MakeDecoder(GetTensorInfo(inputs[0]), + inputs[0]->Map()); + std::unique_ptr> outputEncoder = MakeEncoder(GetTensorInfo(outputs[0]), + outputs[0]->Map()); BatchNormImpl(m_Data, *meanDecoder, *varianceDecoder, *betaDecoder, *gammaDecoder, *inputDecoder, *outputEncoder); } diff --git a/src/backends/reference/workloads/RefBatchNormalizationWorkload.hpp b/src/backends/reference/workloads/RefBatchNormalizationWorkload.hpp index 53d01f65da..a8a72ef65c 100644 --- a/src/backends/reference/workloads/RefBatchNormalizationWorkload.hpp +++ b/src/backends/reference/workloads/RefBatchNormalizationWorkload.hpp @@ -16,9 +16,11 @@ class RefBatchNormalizationWorkload : public BaseWorkload inputs, std::vector outputs) const; std::unique_ptr m_Mean; std::unique_ptr m_Variance; std::unique_ptr m_Beta; diff --git a/src/backends/reference/workloads/RefBatchToSpaceNdWorkload.cpp b/src/backends/reference/workloads/RefBatchToSpaceNdWorkload.cpp index c21ef7640a..441d2ba2cf 100644 --- a/src/backends/reference/workloads/RefBatchToSpaceNdWorkload.cpp +++ b/src/backends/reference/workloads/RefBatchToSpaceNdWorkload.cpp @@ -12,14 +12,24 @@ namespace armnn { void RefBatchToSpaceNdWorkload::Execute() const +{ + Execute(m_Data.m_Inputs, m_Data.m_Outputs); +} + +void RefBatchToSpaceNdWorkload::ExecuteAsync(WorkingMemDescriptor &workingMemDescriptor) +{ + Execute(workingMemDescriptor.m_Inputs, workingMemDescriptor.m_Outputs); +} + +void RefBatchToSpaceNdWorkload::Execute(std::vector inputs, std::vector outputs) const { ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefBatchToSpaceNdWorkload_Execute"); - const TensorInfo& inputInfo = GetTensorInfo(m_Data.m_Inputs[0]); - const TensorInfo& outputInfo = GetTensorInfo(m_Data.m_Outputs[0]); + const TensorInfo& inputInfo = GetTensorInfo(inputs[0]); + const TensorInfo& outputInfo = GetTensorInfo(outputs[0]); - std::unique_ptr> inputDecoder = MakeDecoder(inputInfo, m_Data.m_Inputs[0]->Map()); - std::unique_ptr> outputEncoder = MakeEncoder(outputInfo, m_Data.m_Outputs[0]->Map()); + std::unique_ptr> inputDecoder = MakeDecoder(inputInfo, inputs[0]->Map()); + std::unique_ptr> outputEncoder = MakeEncoder(outputInfo, outputs[0]->Map()); BatchToSpaceNd(m_Data.m_Parameters.m_DataLayout, inputInfo, outputInfo, m_Data.m_Parameters.m_BlockShape, m_Data.m_Parameters.m_Crops, *inputDecoder, *outputEncoder); diff --git a/src/backends/reference/workloads/RefBatchToSpaceNdWorkload.hpp b/src/backends/reference/workloads/RefBatchToSpaceNdWorkload.hpp index 60577bab2e..07c800da83 100644 --- a/src/backends/reference/workloads/RefBatchToSpaceNdWorkload.hpp +++ b/src/backends/reference/workloads/RefBatchToSpaceNdWorkload.hpp @@ -16,7 +16,11 @@ class RefBatchToSpaceNdWorkload : public BaseWorkload::BaseWorkload; - virtual void Execute() const override; + void Execute() const override; + void ExecuteAsync(WorkingMemDescriptor& workingMemDescriptor) override; + +private: + void Execute(std::vector inputs, std::vector outputs) const; }; } // namespace armnn \ No newline at end of file diff --git a/src/backends/reference/workloads/RefComparisonWorkload.cpp b/src/backends/reference/workloads/RefComparisonWorkload.cpp index 52ad9a2879..03df7a4c4a 100644 --- a/src/backends/reference/workloads/RefComparisonWorkload.cpp +++ b/src/backends/reference/workloads/RefComparisonWorkload.cpp @@ -26,9 +26,15 @@ RefComparisonWorkload::RefComparisonWorkload(const ComparisonQueueDescriptor& de void RefComparisonWorkload::PostAllocationConfigure() { - const TensorInfo& inputInfo0 = GetTensorInfo(m_Data.m_Inputs[0]); - const TensorInfo& inputInfo1 = GetTensorInfo(m_Data.m_Inputs[1]); - const TensorInfo& outputInfo = GetTensorInfo(m_Data.m_Outputs[0]); + PostAllocationConfigure(m_Data.m_Inputs, m_Data.m_Outputs); +} + +void RefComparisonWorkload::PostAllocationConfigure(std::vector inputs, + std::vector outputs) +{ + const TensorInfo& inputInfo0 = GetTensorInfo(inputs[0]); + const TensorInfo& inputInfo1 = GetTensorInfo(inputs[1]); + const TensorInfo& outputInfo = GetTensorInfo(outputs[0]); m_Input0 = MakeDecoder(inputInfo0); m_Input1 = MakeDecoder(inputInfo1); @@ -37,20 +43,32 @@ void RefComparisonWorkload::PostAllocationConfigure() } void RefComparisonWorkload::Execute() const +{ + Execute(m_Data.m_Inputs, m_Data.m_Outputs); +} + +void RefComparisonWorkload::ExecuteAsync(WorkingMemDescriptor &workingMemDescriptor) +{ + PostAllocationConfigure(workingMemDescriptor.m_Inputs, workingMemDescriptor.m_Outputs); + + Execute(workingMemDescriptor.m_Inputs, workingMemDescriptor.m_Outputs); +} + +void RefComparisonWorkload::Execute(std::vector inputs, std::vector outputs) const { ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefComparisonWorkload_Execute"); - const TensorInfo& inputInfo0 = GetTensorInfo(m_Data.m_Inputs[0]); - const TensorInfo& inputInfo1 = GetTensorInfo(m_Data.m_Inputs[1]); - const TensorInfo& outputInfo = GetTensorInfo(m_Data.m_Outputs[0]); + const TensorInfo& inputInfo0 = GetTensorInfo(inputs[0]); + const TensorInfo& inputInfo1 = GetTensorInfo(inputs[1]); + const TensorInfo& outputInfo = GetTensorInfo(outputs[0]); const TensorShape& inShape0 = inputInfo0.GetShape(); const TensorShape& inShape1 = inputInfo1.GetShape(); const TensorShape& outShape = outputInfo.GetShape(); - m_Input0->Reset(m_Data.m_Inputs[0]->Map()); - m_Input1->Reset(m_Data.m_Inputs[1]->Map()); - m_Output->Reset(m_Data.m_Outputs[0]->Map()); + m_Input0->Reset(inputs[0]->Map()); + m_Input1->Reset(inputs[1]->Map()); + m_Output->Reset(outputs[0]->Map()); using EqualFunction = ElementwiseBinaryFunction>; using GreaterFunction = ElementwiseBinaryFunction>; diff --git a/src/backends/reference/workloads/RefComparisonWorkload.hpp b/src/backends/reference/workloads/RefComparisonWorkload.hpp index a19e4a0540..de0144ca15 100644 --- a/src/backends/reference/workloads/RefComparisonWorkload.hpp +++ b/src/backends/reference/workloads/RefComparisonWorkload.hpp @@ -21,8 +21,11 @@ public: RefComparisonWorkload(const ComparisonQueueDescriptor& descriptor, const WorkloadInfo& info); void PostAllocationConfigure() override; void Execute() const override; + void ExecuteAsync(WorkingMemDescriptor& workingMemDescriptor) override; private: + void PostAllocationConfigure(std::vector inputs, std::vector outputs); + void Execute(std::vector inputs, std::vector outputs) const; using InType = float; using OutType = bool; diff --git a/src/backends/reference/workloads/RefConcatWorkload.cpp b/src/backends/reference/workloads/RefConcatWorkload.cpp index e606649ed0..c04c05354e 100644 --- a/src/backends/reference/workloads/RefConcatWorkload.cpp +++ b/src/backends/reference/workloads/RefConcatWorkload.cpp @@ -13,9 +13,19 @@ namespace armnn { void RefConcatWorkload::Execute() const +{ + Execute(m_Data.m_Inputs, m_Data.m_Outputs); +} + +void RefConcatWorkload::ExecuteAsync(WorkingMemDescriptor &workingMemDescriptor) +{ + Execute(workingMemDescriptor.m_Inputs, workingMemDescriptor.m_Outputs); +} + +void RefConcatWorkload::Execute(std::vector inputs, std::vector outputs) const { ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefConcatWorkload_Execute"); - Concatenate(m_Data); + Concatenate(m_Data, inputs, outputs); } } //namespace armnn diff --git a/src/backends/reference/workloads/RefConcatWorkload.hpp b/src/backends/reference/workloads/RefConcatWorkload.hpp index 0be28bb7c8..f4e1aa85f2 100644 --- a/src/backends/reference/workloads/RefConcatWorkload.hpp +++ b/src/backends/reference/workloads/RefConcatWorkload.hpp @@ -15,7 +15,10 @@ class RefConcatWorkload : public BaseWorkload { public: using BaseWorkload::BaseWorkload; - virtual void Execute() const override; + void Execute() const override; + void ExecuteAsync(WorkingMemDescriptor& workingMemDescriptor) override; +private: + void Execute(std::vector inputs, std::vector outputs) const; }; } //namespace armnn diff --git a/src/backends/reference/workloads/RefConstantWorkload.cpp b/src/backends/reference/workloads/RefConstantWorkload.cpp index d3e65e6615..6290237d69 100644 --- a/src/backends/reference/workloads/RefConstantWorkload.cpp +++ b/src/backends/reference/workloads/RefConstantWorkload.cpp @@ -20,21 +20,20 @@ RefConstantWorkload::RefConstantWorkload( const ConstantQueueDescriptor& descriptor, const WorkloadInfo& info) : BaseWorkload(descriptor, info) {} -void RefConstantWorkload::PostAllocationConfigure() +void RefConstantWorkload::Execute() const { - const ConstantQueueDescriptor& data = this->m_Data; - - ARMNN_ASSERT(data.m_LayerOutput != nullptr); - - const TensorInfo& outputInfo = GetTensorInfo(data.m_Outputs[0]); - ARMNN_ASSERT(data.m_LayerOutput->GetTensorInfo().GetNumBytes() == outputInfo.GetNumBytes()); + Execute(m_Data.m_Outputs); +} - memcpy(GetOutputTensorData(0, data), data.m_LayerOutput->GetConstTensor(), - outputInfo.GetNumBytes()); +void RefConstantWorkload::ExecuteAsync(WorkingMemDescriptor &workingMemDescriptor) +{ + Execute(workingMemDescriptor.m_Outputs); } -void RefConstantWorkload::Execute() const +void RefConstantWorkload::Execute(std::vector outputs) const { + memcpy(outputs[0]->Map(), m_Data.m_LayerOutput->GetConstTensor(), GetTensorInfo(outputs[0]).GetNumBytes()); + ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefConstantWorkload_Execute"); } diff --git a/src/backends/reference/workloads/RefConstantWorkload.hpp b/src/backends/reference/workloads/RefConstantWorkload.hpp index ada488a7b2..9af5903329 100644 --- a/src/backends/reference/workloads/RefConstantWorkload.hpp +++ b/src/backends/reference/workloads/RefConstantWorkload.hpp @@ -19,8 +19,10 @@ class RefConstantWorkload : public BaseWorkload public: RefConstantWorkload(const ConstantQueueDescriptor& descriptor, const WorkloadInfo& info); - void PostAllocationConfigure() override; - virtual void Execute() const override; + void Execute() const override; + void ExecuteAsync(WorkingMemDescriptor& workingMemDescriptor) override; +private: + void Execute(std::vector outputs) const; }; } //namespace armnn diff --git a/src/backends/reference/workloads/RefConvertBf16ToFp32Workload.cpp b/src/backends/reference/workloads/RefConvertBf16ToFp32Workload.cpp index c4b5416836..70e377d19b 100644 --- a/src/backends/reference/workloads/RefConvertBf16ToFp32Workload.cpp +++ b/src/backends/reference/workloads/RefConvertBf16ToFp32Workload.cpp @@ -14,13 +14,24 @@ namespace armnn { void RefConvertBf16ToFp32Workload::Execute() const +{ + Execute(m_Data.m_Inputs, m_Data.m_Outputs); +} + +void RefConvertBf16ToFp32Workload::ExecuteAsync(WorkingMemDescriptor &workingMemDescriptor) +{ + Execute(workingMemDescriptor.m_Inputs, workingMemDescriptor.m_Outputs); +} + +void RefConvertBf16ToFp32Workload::Execute(std::vector inputs, + std::vector outputs) const { ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefConvertBf16ToFp32Workload_Execute"); - const BFloat16* const input = GetInputTensorDataBFloat16(0, m_Data); - float* const output = GetOutputTensorDataFloat(0, m_Data); + const BFloat16* const input = reinterpret_cast(inputs[0]->Map()); + float* const output = reinterpret_cast(outputs[0]->Map()); - unsigned int numElements = GetTensorInfo(m_Data.m_Inputs[0]).GetNumElements(); + unsigned int numElements = GetTensorInfo(inputs[0]).GetNumElements(); armnnUtils::FloatingPointConverter::ConvertBFloat16ToFloat32(input, numElements, output); } diff --git a/src/backends/reference/workloads/RefConvertBf16ToFp32Workload.hpp b/src/backends/reference/workloads/RefConvertBf16ToFp32Workload.hpp index 87cdc3e1e3..90613621b4 100644 --- a/src/backends/reference/workloads/RefConvertBf16ToFp32Workload.hpp +++ b/src/backends/reference/workloads/RefConvertBf16ToFp32Workload.hpp @@ -15,7 +15,10 @@ class RefConvertBf16ToFp32Workload : public BFloat16ToFloat32Workload::BFloat16ToFloat32Workload; - virtual void Execute() const override; + void Execute() const override; + void ExecuteAsync(WorkingMemDescriptor& workingMemDescriptor) override; +private: + void Execute(std::vector inputs, std::vector outputs) const; }; } //namespace armnn diff --git a/src/backends/reference/workloads/RefConvertFp16ToFp32Workload.cpp b/src/backends/reference/workloads/RefConvertFp16ToFp32Workload.cpp index ef813eb69b..347132d1f6 100644 --- a/src/backends/reference/workloads/RefConvertFp16ToFp32Workload.cpp +++ b/src/backends/reference/workloads/RefConvertFp16ToFp32Workload.cpp @@ -14,13 +14,24 @@ namespace armnn { void RefConvertFp16ToFp32Workload::Execute() const +{ + Execute(m_Data.m_Inputs, m_Data.m_Outputs); +} + +void RefConvertFp16ToFp32Workload::ExecuteAsync(WorkingMemDescriptor &workingMemDescriptor) +{ + Execute(workingMemDescriptor.m_Inputs, workingMemDescriptor.m_Outputs); +} + +void RefConvertFp16ToFp32Workload::Execute(std::vector inputs, + std::vector outputs) const { ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefConvertFp16ToFp32Workload_Execute"); - const Half* const input = GetInputTensorDataHalf(0, m_Data); - float* const output = GetOutputTensorDataFloat(0, m_Data); + const Half* const input = reinterpret_cast(inputs[0]->Map()); + float* const output = reinterpret_cast(outputs[0]->Map()); - unsigned int numElements = GetTensorInfo(m_Data.m_Inputs[0]).GetNumElements(); + unsigned int numElements = GetTensorInfo(inputs[0]).GetNumElements(); armnnUtils::FloatingPointConverter::ConvertFloat16To32(input, numElements, output); } diff --git a/src/backends/reference/workloads/RefConvertFp16ToFp32Workload.hpp b/src/backends/reference/workloads/RefConvertFp16ToFp32Workload.hpp index 7c58e9f089..99ab9e9934 100644 --- a/src/backends/reference/workloads/RefConvertFp16ToFp32Workload.hpp +++ b/src/backends/reference/workloads/RefConvertFp16ToFp32Workload.hpp @@ -15,7 +15,10 @@ class RefConvertFp16ToFp32Workload : public Float16ToFloat32Workload::Float16ToFloat32Workload; - virtual void Execute() const override; + void Execute() const override; + void ExecuteAsync(WorkingMemDescriptor& workingMemDescriptor) override; +private: + void Execute(std::vector inputs, std::vector outputs) const; }; } //namespace armnn diff --git a/src/backends/reference/workloads/RefConvertFp32ToBf16Workload.cpp b/src/backends/reference/workloads/RefConvertFp32ToBf16Workload.cpp index 181b236e83..7fe302a5ad 100644 --- a/src/backends/reference/workloads/RefConvertFp32ToBf16Workload.cpp +++ b/src/backends/reference/workloads/RefConvertFp32ToBf16Workload.cpp @@ -14,13 +14,24 @@ namespace armnn { void RefConvertFp32ToBf16Workload::Execute() const +{ + Execute(m_Data.m_Inputs, m_Data.m_Outputs); +} + +void RefConvertFp32ToBf16Workload::ExecuteAsync(WorkingMemDescriptor &workingMemDescriptor) +{ + Execute(workingMemDescriptor.m_Inputs, workingMemDescriptor.m_Outputs); +} + +void RefConvertFp32ToBf16Workload::Execute(std::vector inputs, + std::vector outputs) const { ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefConvertFp32ToBf16Workload_Execute"); - const float* const input = GetInputTensorDataFloat(0, m_Data); - BFloat16* const output = GetOutputTensorDataBFloat16(0, m_Data); + const float* const input = reinterpret_cast(inputs[0]->Map()); + BFloat16* const output = reinterpret_cast(outputs[0]->Map()); - unsigned int numElements = GetTensorInfo(m_Data.m_Inputs[0]).GetNumElements(); + unsigned int numElements = GetTensorInfo(inputs[0]).GetNumElements(); armnnUtils::FloatingPointConverter::ConvertFloat32ToBFloat16(input, numElements, output); } diff --git a/src/backends/reference/workloads/RefConvertFp32ToBf16Workload.hpp b/src/backends/reference/workloads/RefConvertFp32ToBf16Workload.hpp index 409603bb6c..694032c8e6 100644 --- a/src/backends/reference/workloads/RefConvertFp32ToBf16Workload.hpp +++ b/src/backends/reference/workloads/RefConvertFp32ToBf16Workload.hpp @@ -15,7 +15,10 @@ class RefConvertFp32ToBf16Workload : public Float32ToBFloat16Workload::Float32ToBFloat16Workload; - virtual void Execute() const override; + void Execute() const override; + void ExecuteAsync(WorkingMemDescriptor& workingMemDescriptor) override; +private: + void Execute(std::vector inputs, std::vector outputs) const; }; } //namespace armnn diff --git a/src/backends/reference/workloads/RefConvertFp32ToFp16Workload.cpp b/src/backends/reference/workloads/RefConvertFp32ToFp16Workload.cpp index c68960fad2..be13458d89 100644 --- a/src/backends/reference/workloads/RefConvertFp32ToFp16Workload.cpp +++ b/src/backends/reference/workloads/RefConvertFp32ToFp16Workload.cpp @@ -15,14 +15,25 @@ namespace armnn { void RefConvertFp32ToFp16Workload::Execute() const +{ + Execute(m_Data.m_Inputs, m_Data.m_Outputs); +} + +void RefConvertFp32ToFp16Workload::ExecuteAsync(WorkingMemDescriptor &workingMemDescriptor) +{ + Execute(workingMemDescriptor.m_Inputs, workingMemDescriptor.m_Outputs); +} + +void RefConvertFp32ToFp16Workload::Execute(std::vector inputs, + std::vector outputs) const { ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefConvertFp32ToFp16Workload_Execute"); - const float* const input = GetInputTensorDataFloat(0, m_Data); - Half* const output = GetOutputTensorDataHalf(0, m_Data); + const float* const input = reinterpret_cast(inputs[0]->Map()); + Half* const output = reinterpret_cast(outputs[0]->Map()); // convert Fp32 input to Fp16 output - unsigned int numElements = GetTensorInfo(m_Data.m_Inputs[0]).GetNumElements(); + unsigned int numElements = GetTensorInfo(inputs[0]).GetNumElements(); armnnUtils::FloatingPointConverter::ConvertFloat32To16(input, numElements, output); } diff --git a/src/backends/reference/workloads/RefConvertFp32ToFp16Workload.hpp b/src/backends/reference/workloads/RefConvertFp32ToFp16Workload.hpp index e1fd8755cb..f1daa54436 100644 --- a/src/backends/reference/workloads/RefConvertFp32ToFp16Workload.hpp +++ b/src/backends/reference/workloads/RefConvertFp32ToFp16Workload.hpp @@ -15,7 +15,10 @@ class RefConvertFp32ToFp16Workload : public Float32ToFloat16Workload::Float32ToFloat16Workload; - virtual void Execute() const override; + void Execute() const override; + void ExecuteAsync(WorkingMemDescriptor& workingMemDescriptor) override; +private: + void Execute(std::vector inputs, std::vector outputs) const; }; } //namespace armnn diff --git a/src/backends/reference/workloads/RefConvolution2dWorkload.cpp b/src/backends/reference/workloads/RefConvolution2dWorkload.cpp index dad9936f1b..6d0ab413d8 100644 --- a/src/backends/reference/workloads/RefConvolution2dWorkload.cpp +++ b/src/backends/reference/workloads/RefConvolution2dWorkload.cpp @@ -30,24 +30,26 @@ RefConvolution2dWorkload::RefConvolution2dWorkload( } } -void RefConvolution2dWorkload::PostAllocationConfigure() +void RefConvolution2dWorkload::Execute() const { - const TensorInfo& inputInfo = GetTensorInfo(m_Data.m_Inputs[0]); - m_InputShape = inputInfo.GetShape(); - m_InputDecoder = MakeDecoder(inputInfo); + Execute(m_Data.m_Inputs, m_Data.m_Outputs); +} - const TensorInfo& outputInfo = GetTensorInfo(m_Data.m_Outputs[0]); - m_OutputShape = outputInfo.GetShape(); - m_OutputEncoder = MakeEncoder(outputInfo); +void RefConvolution2dWorkload::ExecuteAsync(WorkingMemDescriptor &workingMemDescriptor) +{ + Execute(workingMemDescriptor.m_Inputs, workingMemDescriptor.m_Outputs); } -void RefConvolution2dWorkload::Execute() const { +void RefConvolution2dWorkload::Execute(std::vector inputs, std::vector outputs) const { ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefConvolution2dWorkload_Execute"); - m_InputDecoder->Reset(m_Data.m_Inputs[0]->Map()); - m_OutputEncoder->Reset(m_Data.m_Outputs[0]->Map()); + std::unique_ptr> inputDecoder = MakeDecoder(GetTensorInfo(inputs[0]), inputs[0]->Map()); + std::unique_ptr> outputEncoder = MakeEncoder(GetTensorInfo(outputs[0]), outputs[0]->Map()); + + const TensorShape& inputShape = GetTensorInfo(inputs[0]).GetShape(); + const TensorShape& outputShape = GetTensorInfo(outputs[0]).GetShape(); - Convolve(m_InputShape, *m_InputDecoder, m_OutputShape, *m_OutputEncoder, m_FilterShape, + Convolve(inputShape, *inputDecoder, outputShape, *outputEncoder, m_FilterShape, *m_FilterDecoder, m_Data.m_Parameters.m_BiasEnabled, m_BiasDecoder.get(), m_Data.m_Parameters.m_DataLayout, m_Data.m_Parameters.m_PadTop, m_Data.m_Parameters.m_PadLeft, m_Data.m_Parameters.m_StrideX, m_Data.m_Parameters.m_StrideY, diff --git a/src/backends/reference/workloads/RefConvolution2dWorkload.hpp b/src/backends/reference/workloads/RefConvolution2dWorkload.hpp index b6bdf23ffa..57df3ce6ae 100644 --- a/src/backends/reference/workloads/RefConvolution2dWorkload.hpp +++ b/src/backends/reference/workloads/RefConvolution2dWorkload.hpp @@ -19,21 +19,18 @@ public: explicit RefConvolution2dWorkload(const Convolution2dQueueDescriptor& descriptor, const WorkloadInfo& info); - void PostAllocationConfigure() override; - virtual void Execute() const override; + void Execute() const override; + void ExecuteAsync(WorkingMemDescriptor& workingMemDescriptor) override; private: + void Execute(std::vector inputs, std::vector outputs) const; std::unique_ptr m_Weight; std::unique_ptr m_Bias; - std::unique_ptr> m_InputDecoder; - std::unique_ptr> m_OutputEncoder; std::unique_ptr> m_FilterDecoder; std::unique_ptr> m_BiasDecoder; - TensorShape m_InputShape; - TensorShape m_OutputShape; TensorShape m_FilterShape; }; diff --git a/src/backends/reference/workloads/RefDebugWorkload.cpp b/src/backends/reference/workloads/RefDebugWorkload.cpp index f9950c8231..b0e19c5851 100644 --- a/src/backends/reference/workloads/RefDebugWorkload.cpp +++ b/src/backends/reference/workloads/RefDebugWorkload.cpp @@ -16,19 +16,31 @@ namespace armnn template void RefDebugWorkload::Execute() const +{ + Execute(m_Data.m_Inputs); +} + +template +void RefDebugWorkload::ExecuteAsync(WorkingMemDescriptor &workingMemDescriptor) +{ + Execute(workingMemDescriptor.m_Inputs); +} + +template +void RefDebugWorkload::Execute(std::vector inputs) const { using T = ResolveType; ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, GetName() + "_Execute"); - const TensorInfo& inputInfo = GetTensorInfo(m_Data.m_Inputs[0]); + const TensorInfo& inputInfo = GetTensorInfo(inputs[0]); const T* inputData = GetInputTensorData(0, m_Data); T* outputData = GetOutputTensorData(0, m_Data); if (m_Callback) { - m_Callback(m_Data.m_Guid, m_Data.m_SlotIndex, m_Data.m_Inputs[0]); + m_Callback(m_Data.m_Guid, m_Data.m_SlotIndex, inputs[0]); } else { diff --git a/src/backends/reference/workloads/RefDebugWorkload.hpp b/src/backends/reference/workloads/RefDebugWorkload.hpp index d7e3cd9947..d0c47dd829 100644 --- a/src/backends/reference/workloads/RefDebugWorkload.hpp +++ b/src/backends/reference/workloads/RefDebugWorkload.hpp @@ -30,10 +30,12 @@ public: using TypedWorkload::TypedWorkload; void Execute() const override; + void ExecuteAsync(WorkingMemDescriptor& workingMemDescriptor) override; void RegisterDebugCallback(const DebugCallbackFunction& func) override; private: + void Execute(std::vector inputs) const; DebugCallbackFunction m_Callback; }; diff --git a/src/backends/reference/workloads/RefDepthToSpaceWorkload.cpp b/src/backends/reference/workloads/RefDepthToSpaceWorkload.cpp index 93c1120a1c..22e35f0ec5 100644 --- a/src/backends/reference/workloads/RefDepthToSpaceWorkload.cpp +++ b/src/backends/reference/workloads/RefDepthToSpaceWorkload.cpp @@ -12,15 +12,25 @@ namespace armnn { void RefDepthToSpaceWorkload::Execute() const +{ + Execute(m_Data.m_Inputs, m_Data.m_Outputs); +} + +void RefDepthToSpaceWorkload::ExecuteAsync(WorkingMemDescriptor &workingMemDescriptor) +{ + Execute(workingMemDescriptor.m_Inputs, workingMemDescriptor.m_Outputs); +} + +void RefDepthToSpaceWorkload::Execute(std::vector inputs, std::vector outputs) const { ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefDepthToSpaceWorkload_Execute"); - const TensorInfo inputInfo = GetTensorInfo(m_Data.m_Inputs[0]); + const TensorInfo inputInfo = GetTensorInfo(inputs[0]); DepthToSpace(inputInfo, m_Data.m_Parameters, - m_Data.m_Inputs[0]->Map(), - m_Data.m_Outputs[0]->Map(), + inputs[0]->Map(), + outputs[0]->Map(), GetDataTypeSize(inputInfo.GetDataType())); } diff --git a/src/backends/reference/workloads/RefDepthToSpaceWorkload.hpp b/src/backends/reference/workloads/RefDepthToSpaceWorkload.hpp index a30fadc3e9..ec260a92f7 100644 --- a/src/backends/reference/workloads/RefDepthToSpaceWorkload.hpp +++ b/src/backends/reference/workloads/RefDepthToSpaceWorkload.hpp @@ -14,7 +14,10 @@ class RefDepthToSpaceWorkload : public BaseWorkload { public: using BaseWorkload::BaseWorkload; - virtual void Execute() const override; + void Execute() const override; + void ExecuteAsync(WorkingMemDescriptor& workingMemDescriptor) override; +private: + void Execute(std::vector inputs, std::vector outputs) const; }; } // namespace armnn diff --git a/src/backends/reference/workloads/RefDepthwiseConvolution2dWorkload.cpp b/src/backends/reference/workloads/RefDepthwiseConvolution2dWorkload.cpp index cfc81ce203..8fe5dec7d1 100644 --- a/src/backends/reference/workloads/RefDepthwiseConvolution2dWorkload.cpp +++ b/src/backends/reference/workloads/RefDepthwiseConvolution2dWorkload.cpp @@ -32,26 +32,29 @@ RefDepthwiseConvolution2dWorkload::RefDepthwiseConvolution2dWorkload( } } -void RefDepthwiseConvolution2dWorkload::PostAllocationConfigure() +void RefDepthwiseConvolution2dWorkload::Execute() const { - const TensorInfo& inputInfo = GetTensorInfo(m_Data.m_Inputs[0]); - m_InputShape = inputInfo.GetShape(); - m_InputDecoder = MakeDecoder(inputInfo); + Execute(m_Data.m_Inputs, m_Data.m_Outputs); +} - const TensorInfo& outputInfo = GetTensorInfo(m_Data.m_Outputs[0]); - m_OutputShape = outputInfo.GetShape(); - m_OutputEncoder = MakeEncoder(outputInfo); +void RefDepthwiseConvolution2dWorkload::ExecuteAsync(WorkingMemDescriptor &workingMemDescriptor) +{ + Execute(workingMemDescriptor.m_Inputs, workingMemDescriptor.m_Outputs); } -void RefDepthwiseConvolution2dWorkload::Execute() const +void RefDepthwiseConvolution2dWorkload::Execute(std::vector inputs, + std::vector outputs) const { ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefDepthwiseConvolution2dWorkload_Execute"); std::unique_ptr> pBiasDecoder{}; - m_InputDecoder->Reset(m_Data.m_Inputs[0]->Map()); - m_OutputEncoder->Reset(m_Data.m_Outputs[0]->Map()); + std::unique_ptr> inputDecoder = MakeDecoder(GetTensorInfo(inputs[0]), inputs[0]->Map()); + std::unique_ptr> OutputEncoder = MakeEncoder(GetTensorInfo(outputs[0]), outputs[0]->Map()); + + const TensorShape& inputShape = GetTensorInfo(inputs[0]).GetShape(); + const TensorShape& outputShape = GetTensorInfo(outputs[0]).GetShape(); - Convolve(m_InputShape, *m_InputDecoder, m_OutputShape, *m_OutputEncoder, + Convolve(inputShape, *inputDecoder, outputShape, *OutputEncoder, m_FilterShape, *m_FilterDecoder, m_Data.m_Parameters.m_BiasEnabled, m_BiasDecoder.get(), m_Data.m_Parameters.m_DataLayout, m_Data.m_Parameters.m_PadTop, m_Data.m_Parameters.m_PadLeft, m_Data.m_Parameters.m_StrideX, m_Data.m_Parameters.m_StrideY, diff --git a/src/backends/reference/workloads/RefDepthwiseConvolution2dWorkload.hpp b/src/backends/reference/workloads/RefDepthwiseConvolution2dWorkload.hpp index 6d7037f660..65a8fd76cf 100644 --- a/src/backends/reference/workloads/RefDepthwiseConvolution2dWorkload.hpp +++ b/src/backends/reference/workloads/RefDepthwiseConvolution2dWorkload.hpp @@ -17,22 +17,19 @@ public: explicit RefDepthwiseConvolution2dWorkload(const DepthwiseConvolution2dQueueDescriptor &descriptor, const WorkloadInfo &info); - void PostAllocationConfigure() override; - virtual void Execute() const override; + void Execute() const override; + void ExecuteAsync(WorkingMemDescriptor& workingMemDescriptor) override; private: + void Execute(std::vector inputs, std::vector outputs) const; std::unique_ptr m_Weight; std::unique_ptr m_Bias; - std::unique_ptr > m_InputDecoder; - std::unique_ptr > m_OutputEncoder; std::unique_ptr > m_FilterDecoder; std::unique_ptr > m_BiasDecoder; - TensorShape m_InputShape; - TensorShape m_OutputShape; TensorShape m_FilterShape; }; diff --git a/src/backends/reference/workloads/RefDequantizeWorkload.cpp b/src/backends/reference/workloads/RefDequantizeWorkload.cpp index d6e4964a49..f9d80073b0 100644 --- a/src/backends/reference/workloads/RefDequantizeWorkload.cpp +++ b/src/backends/reference/workloads/RefDequantizeWorkload.cpp @@ -13,14 +13,24 @@ namespace armnn { void RefDequantizeWorkload::Execute() const +{ + Execute(m_Data.m_Inputs, m_Data.m_Outputs); +} + +void RefDequantizeWorkload::ExecuteAsync(WorkingMemDescriptor &workingMemDescriptor) +{ + Execute(workingMemDescriptor.m_Inputs, workingMemDescriptor.m_Outputs); +} + +void RefDequantizeWorkload::Execute(std::vector inputs, std::vector outputs) const { ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefDequantizeWorkload_Execute"); - const TensorInfo& inputInfo = GetTensorInfo(m_Data.m_Inputs[0]); - const TensorInfo& outputInfo = GetTensorInfo(m_Data.m_Outputs[0]); + const TensorInfo& inputInfo = GetTensorInfo(inputs[0]); + const TensorInfo& outputInfo = GetTensorInfo(outputs[0]); - auto inputDecoder = MakeDecoder(inputInfo, m_Data.m_Inputs[0]->Map()); - auto outputEncoder = MakeEncoder(outputInfo, m_Data.m_Outputs[0]->Map()); + auto inputDecoder = MakeDecoder(inputInfo, inputs[0]->Map()); + auto outputEncoder = MakeEncoder(outputInfo, outputs[0]->Map()); Dequantize(*inputDecoder, *outputEncoder, inputInfo, outputInfo); } diff --git a/src/backends/reference/workloads/RefDequantizeWorkload.hpp b/src/backends/reference/workloads/RefDequantizeWorkload.hpp index 691f713076..922d57c556 100644 --- a/src/backends/reference/workloads/RefDequantizeWorkload.hpp +++ b/src/backends/reference/workloads/RefDequantizeWorkload.hpp @@ -17,6 +17,9 @@ public: using BaseWorkload::BaseWorkload; void Execute() const override; + void ExecuteAsync(WorkingMemDescriptor& workingMemDescriptor) override; +private: + void Execute(std::vector inputs, std::vector outputs) const; }; } // namespace armnn diff --git a/src/backends/reference/workloads/RefDetectionPostProcessWorkload.cpp b/src/backends/reference/workloads/RefDetectionPostProcessWorkload.cpp index b9817ba1ea..25c326ad37 100644 --- a/src/backends/reference/workloads/RefDetectionPostProcessWorkload.cpp +++ b/src/backends/reference/workloads/RefDetectionPostProcessWorkload.cpp @@ -19,20 +19,31 @@ RefDetectionPostProcessWorkload::RefDetectionPostProcessWorkload( m_Anchors(std::make_unique(*(descriptor.m_Anchors))) {} void RefDetectionPostProcessWorkload::Execute() const +{ + Execute(m_Data.m_Inputs, m_Data.m_Outputs); +} + +void RefDetectionPostProcessWorkload::ExecuteAsync(WorkingMemDescriptor &workingMemDescriptor) +{ + Execute(workingMemDescriptor.m_Inputs, workingMemDescriptor.m_Outputs); +} + +void RefDetectionPostProcessWorkload::Execute(std::vector inputs, + std::vector outputs) const { ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefDetectionPostProcessWorkload_Execute"); - const TensorInfo& boxEncodingsInfo = GetTensorInfo(m_Data.m_Inputs[0]); - const TensorInfo& scoresInfo = GetTensorInfo(m_Data.m_Inputs[1]); + const TensorInfo& boxEncodingsInfo = GetTensorInfo(inputs[0]); + const TensorInfo& scoresInfo = GetTensorInfo(inputs[1]); const TensorInfo& anchorsInfo = m_Anchors->GetTensorInfo(); - const TensorInfo& detectionBoxesInfo = GetTensorInfo(m_Data.m_Outputs[0]); - const TensorInfo& detectionClassesInfo = GetTensorInfo(m_Data.m_Outputs[1]); - const TensorInfo& detectionScoresInfo = GetTensorInfo(m_Data.m_Outputs[2]); - const TensorInfo& numDetectionsInfo = GetTensorInfo(m_Data.m_Outputs[3]); + const TensorInfo& detectionBoxesInfo = GetTensorInfo(outputs[0]); + const TensorInfo& detectionClassesInfo = GetTensorInfo(outputs[1]); + const TensorInfo& detectionScoresInfo = GetTensorInfo(outputs[2]); + const TensorInfo& numDetectionsInfo = GetTensorInfo(outputs[3]); - auto boxEncodings = MakeDecoder(boxEncodingsInfo, m_Data.m_Inputs[0]->Map()); - auto scores = MakeDecoder(scoresInfo, m_Data.m_Inputs[1]->Map()); + auto boxEncodings = MakeDecoder(boxEncodingsInfo, inputs[0]->Map()); + auto scores = MakeDecoder(scoresInfo, inputs[1]->Map()); auto anchors = MakeDecoder(anchorsInfo, m_Anchors->Map(false)); float* detectionBoxes = GetOutputTensorData(0, m_Data); diff --git a/src/backends/reference/workloads/RefDetectionPostProcessWorkload.hpp b/src/backends/reference/workloads/RefDetectionPostProcessWorkload.hpp index 799d0c6219..007dcea456 100644 --- a/src/backends/reference/workloads/RefDetectionPostProcessWorkload.hpp +++ b/src/backends/reference/workloads/RefDetectionPostProcessWorkload.hpp @@ -16,9 +16,11 @@ class RefDetectionPostProcessWorkload : public BaseWorkload inputs, std::vector outputs) const; std::unique_ptr m_Anchors; }; diff --git a/src/backends/reference/workloads/RefElementwiseUnaryWorkload.cpp b/src/backends/reference/workloads/RefElementwiseUnaryWorkload.cpp index 4fbb0d123f..b442f25c2a 100644 --- a/src/backends/reference/workloads/RefElementwiseUnaryWorkload.cpp +++ b/src/backends/reference/workloads/RefElementwiseUnaryWorkload.cpp @@ -28,28 +28,29 @@ RefElementwiseUnaryWorkload::RefElementwiseUnaryWorkload(const ElementwiseUnaryQ : BaseWorkload(desc, info) {} -void RefElementwiseUnaryWorkload::PostAllocationConfigure() +void RefElementwiseUnaryWorkload::Execute() const { - const TensorInfo& inputInfo = GetTensorInfo(m_Data.m_Inputs[0]); - const TensorInfo& outputInfo = GetTensorInfo(m_Data.m_Outputs[0]); + Execute(m_Data.m_Inputs, m_Data.m_Outputs); +} - m_Input = MakeDecoder(inputInfo); +void RefElementwiseUnaryWorkload::ExecuteAsync(WorkingMemDescriptor &workingMemDescriptor) +{ - m_Output = MakeEncoder(outputInfo); + Execute(workingMemDescriptor.m_Inputs, workingMemDescriptor.m_Outputs); } -void RefElementwiseUnaryWorkload::Execute() const +void RefElementwiseUnaryWorkload::Execute(std::vector inputs, std::vector outputs) const { ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefElementwiseUnaryWorkload_Execute"); - const TensorInfo& inputInfo = GetTensorInfo(m_Data.m_Inputs[0]); - const TensorInfo& outputInfo = GetTensorInfo(m_Data.m_Outputs[0]); + const TensorInfo& inputInfo = GetTensorInfo(inputs[0]); + const TensorInfo& outputInfo = GetTensorInfo(outputs[0]); const TensorShape& inShape = inputInfo.GetShape(); const TensorShape& outShape = outputInfo.GetShape(); - m_Input->Reset(m_Data.m_Inputs[0]->Map()); - m_Output->Reset(m_Data.m_Outputs[0]->Map()); + std::unique_ptr> input = MakeDecoder(inputInfo, inputs[0]->Map()); + std::unique_ptr> output= MakeEncoder(outputInfo, outputs[0]->Map()); using AbsFunction = ElementwiseUnaryFunction>; using ExpFunction = ElementwiseUnaryFunction>; @@ -61,27 +62,27 @@ void RefElementwiseUnaryWorkload::Execute() const { case UnaryOperation::Abs: { - AbsFunction(inShape, outShape, *m_Input, *m_Output); + AbsFunction(inShape, outShape, *input, *output); break; } case UnaryOperation::Exp: { - ExpFunction(inShape, outShape, *m_Input, *m_Output); + ExpFunction(inShape, outShape, *input, *output); break; } case UnaryOperation::Neg: { - NegFunction(inShape, outShape, *m_Input, *m_Output); + NegFunction(inShape, outShape, *input, *output); break; } case UnaryOperation::Rsqrt: { - RsqrtFunction(inShape, outShape, *m_Input, *m_Output); + RsqrtFunction(inShape, outShape, *input, *output); break; } case UnaryOperation::Sqrt: { - SqrtFunction(inShape, outShape, *m_Input, *m_Output); + SqrtFunction(inShape, outShape, *input, *output); break; } default: diff --git a/src/backends/reference/workloads/RefElementwiseUnaryWorkload.hpp b/src/backends/reference/workloads/RefElementwiseUnaryWorkload.hpp index efb2865ebd..d05347bbe5 100644 --- a/src/backends/reference/workloads/RefElementwiseUnaryWorkload.hpp +++ b/src/backends/reference/workloads/RefElementwiseUnaryWorkload.hpp @@ -19,15 +19,13 @@ public: using BaseWorkload::m_Data; RefElementwiseUnaryWorkload(const ElementwiseUnaryQueueDescriptor& descriptor, const WorkloadInfo& info); - void PostAllocationConfigure() override; void Execute() const override; + void ExecuteAsync(WorkingMemDescriptor& workingMemDescriptor) override; private: + void Execute(std::vector inputs, std::vector outputs) const; using InType = float; using OutType = float; - - std::unique_ptr> m_Input; - std::unique_ptr> m_Output; }; } // namespace armnn diff --git a/src/backends/reference/workloads/RefElementwiseWorkload.cpp b/src/backends/reference/workloads/RefElementwiseWorkload.cpp index 60acbd6252..dd7d325ca5 100644 --- a/src/backends/reference/workloads/RefElementwiseWorkload.cpp +++ b/src/backends/reference/workloads/RefElementwiseWorkload.cpp @@ -26,39 +26,41 @@ RefElementwiseWorkload::RefElementwiseWo } template -void RefElementwiseWorkload::PostAllocationConfigure() +void RefElementwiseWorkload::Execute() const { - const TensorInfo& inputInfo0 = GetTensorInfo(m_Data.m_Inputs[0]); - const TensorInfo& inputInfo1 = GetTensorInfo(m_Data.m_Inputs[1]); - const TensorInfo& outputInfo = GetTensorInfo(m_Data.m_Outputs[0]); + Execute(m_Data.m_Inputs, m_Data.m_Outputs); +} - m_Input0 = MakeDecoder(inputInfo0); - m_Input1 = MakeDecoder(inputInfo1); - m_Output = MakeEncoder(outputInfo); +template +void RefElementwiseWorkload::ExecuteAsync( + WorkingMemDescriptor &workingMemDescriptor) +{ + Execute(workingMemDescriptor.m_Inputs, workingMemDescriptor.m_Outputs); } template -void RefElementwiseWorkload::Execute() const +void RefElementwiseWorkload::Execute( + std::vector inputs, std::vector outputs) const { ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, StringMapping::Instance().Get(DebugString)); - const TensorInfo& inputInfo0 = GetTensorInfo(m_Data.m_Inputs[0]); - const TensorInfo& inputInfo1 = GetTensorInfo(m_Data.m_Inputs[1]); - const TensorInfo& outputInfo = GetTensorInfo(m_Data.m_Outputs[0]); + const TensorInfo& inputInfo0 = GetTensorInfo(inputs[0]); + const TensorInfo& inputInfo1 = GetTensorInfo(inputs[1]); + const TensorInfo& outputInfo = GetTensorInfo(outputs[0]); const TensorShape& inShape0 = inputInfo0.GetShape(); const TensorShape& inShape1 = inputInfo1.GetShape(); const TensorShape& outShape = outputInfo.GetShape(); - m_Input0->Reset(m_Data.m_Inputs[0]->Map()); - m_Input1->Reset(m_Data.m_Inputs[1]->Map()); - m_Output->Reset(m_Data.m_Outputs[0]->Map()); + std::unique_ptr> input0 = MakeDecoder(inputInfo0, inputs[0]->Map()); + std::unique_ptr> input1 = MakeDecoder(inputInfo1, inputs[1]->Map()); + std::unique_ptr> output= MakeEncoder(outputInfo, outputs[0]->Map()); ElementwiseBinaryFunction(inShape0, inShape1, outShape, - *m_Input0, - *m_Input1, - *m_Output); + *input0, + *input1, + *output); } } //namespace armnn diff --git a/src/backends/reference/workloads/RefElementwiseWorkload.hpp b/src/backends/reference/workloads/RefElementwiseWorkload.hpp index 03683b1a06..4dc4b5ba5a 100644 --- a/src/backends/reference/workloads/RefElementwiseWorkload.hpp +++ b/src/backends/reference/workloads/RefElementwiseWorkload.hpp @@ -26,13 +26,11 @@ public: using BaseWorkload::m_Data; RefElementwiseWorkload(const ParentDescriptor& descriptor, const WorkloadInfo& info); - void PostAllocationConfigure() override; void Execute() const override; + void ExecuteAsync(WorkingMemDescriptor& workingMemDescriptor) override; private: - std::unique_ptr> m_Input0; - std::unique_ptr> m_Input1; - std::unique_ptr> m_Output; + void Execute(std::vector inputs, std::vector outputs) const; }; template diff --git a/src/backends/reference/workloads/RefFakeQuantizationFloat32Workload.cpp b/src/backends/reference/workloads/RefFakeQuantizationFloat32Workload.cpp index cf355d35d2..b30811b8ed 100644 --- a/src/backends/reference/workloads/RefFakeQuantizationFloat32Workload.cpp +++ b/src/backends/reference/workloads/RefFakeQuantizationFloat32Workload.cpp @@ -27,13 +27,24 @@ void FakeQuantization(const float* inputData, float* outputData, uint32_t numEle } void RefFakeQuantizationFloat32Workload::Execute() const +{ + Execute(m_Data.m_Inputs, m_Data.m_Outputs); +} + +void RefFakeQuantizationFloat32Workload::ExecuteAsync(WorkingMemDescriptor &workingMemDescriptor) +{ + Execute(workingMemDescriptor.m_Inputs, workingMemDescriptor.m_Outputs); +} + +void RefFakeQuantizationFloat32Workload::Execute(std::vector inputs, + std::vector outputs) const { ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefFakeQuantizationFloat32Workload_Execute"); - const TensorInfo& inputInfo = GetTensorInfo(m_Data.m_Inputs[0]); + const TensorInfo& inputInfo = GetTensorInfo(inputs[0]); - const float* inputData = GetInputTensorDataFloat(0, m_Data); - float* outputData = GetOutputTensorDataFloat(0, m_Data); + const float* inputData = reinterpret_cast(inputs[0]->Map()); + float* outputData = reinterpret_cast(outputs[0]->Map()); FakeQuantization(inputData, outputData, inputInfo.GetNumElements(), m_Data.m_Parameters.m_Min, m_Data.m_Parameters.m_Max); diff --git a/src/backends/reference/workloads/RefFakeQuantizationFloat32Workload.hpp b/src/backends/reference/workloads/RefFakeQuantizationFloat32Workload.hpp index 269ca08d2a..8f6cabb3fe 100644 --- a/src/backends/reference/workloads/RefFakeQuantizationFloat32Workload.hpp +++ b/src/backends/reference/workloads/RefFakeQuantizationFloat32Workload.hpp @@ -15,7 +15,10 @@ class RefFakeQuantizationFloat32Workload : public Float32Workload::Float32Workload; - virtual void Execute() const override; + void Execute() const override; + void ExecuteAsync(WorkingMemDescriptor& workingMemDescriptor) override; +private: + void Execute(std::vector inputs, std::vector outputs) const; }; } //namespace armnn diff --git a/src/backends/reference/workloads/RefFillWorkload.cpp b/src/backends/reference/workloads/RefFillWorkload.cpp index 991ab45396..ea1ca87caf 100644 --- a/src/backends/reference/workloads/RefFillWorkload.cpp +++ b/src/backends/reference/workloads/RefFillWorkload.cpp @@ -15,12 +15,22 @@ namespace armnn { void RefFillWorkload::Execute() const +{ + Execute(m_Data.m_Outputs); +} + +void RefFillWorkload::ExecuteAsync(WorkingMemDescriptor &workingMemDescriptor) +{ + Execute(workingMemDescriptor.m_Outputs); +} + +void RefFillWorkload::Execute(std::vector outputs) const { ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefFillWorkload_Execute"); - const TensorInfo &outputTensorInfo = GetTensorInfo(m_Data.m_Outputs[0]); + const TensorInfo &outputTensorInfo = GetTensorInfo(outputs[0]); - std::unique_ptr> encoderPtr = MakeEncoder(outputTensorInfo, m_Data.m_Outputs[0]->Map()); + std::unique_ptr> encoderPtr = MakeEncoder(outputTensorInfo, outputs[0]->Map()); Encoder &encoder = *encoderPtr; Fill(encoder, outputTensorInfo.GetShape(), m_Data.m_Parameters.m_Value); diff --git a/src/backends/reference/workloads/RefFillWorkload.hpp b/src/backends/reference/workloads/RefFillWorkload.hpp index 9be773c50b..e92514d865 100644 --- a/src/backends/reference/workloads/RefFillWorkload.hpp +++ b/src/backends/reference/workloads/RefFillWorkload.hpp @@ -15,7 +15,10 @@ class RefFillWorkload : public BaseWorkload { public: using BaseWorkload::BaseWorkload; - virtual void Execute() const override; + void Execute() const override; + void ExecuteAsync(WorkingMemDescriptor& workingMemDescriptor) override; +private: + void Execute(std::vector outputs) const; }; } //namespace armnn diff --git a/src/backends/reference/workloads/RefFloorWorkload.cpp b/src/backends/reference/workloads/RefFloorWorkload.cpp index 0c61386b9a..e7bd50ddea 100644 --- a/src/backends/reference/workloads/RefFloorWorkload.cpp +++ b/src/backends/reference/workloads/RefFloorWorkload.cpp @@ -14,18 +14,28 @@ namespace armnn { void RefFloorWorkload::Execute() const +{ + Execute(m_Data.m_Inputs, m_Data.m_Outputs); +} + +void RefFloorWorkload::ExecuteAsync(WorkingMemDescriptor &workingMemDescriptor) +{ + Execute(workingMemDescriptor.m_Inputs, workingMemDescriptor.m_Outputs); +} + +void RefFloorWorkload::Execute(std::vector inputs, std::vector outputs) const { ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefFloorFloat32Workload_Execute"); - const TensorInfo &inputTensorInfo = GetTensorInfo(m_Data.m_Inputs[0]); - std::unique_ptr> decoderPtr = MakeDecoder(inputTensorInfo, m_Data.m_Inputs[0]->Map()); + const TensorInfo &inputTensorInfo = GetTensorInfo(inputs[0]); + std::unique_ptr> decoderPtr = MakeDecoder(inputTensorInfo, inputs[0]->Map()); Decoder &decoder = *decoderPtr; - const TensorInfo &outputTensorInfo = GetTensorInfo(m_Data.m_Outputs[0]); - std::unique_ptr> encoderPtr = MakeEncoder(outputTensorInfo, m_Data.m_Outputs[0]->Map()); + const TensorInfo &outputTensorInfo = GetTensorInfo(outputs[0]); + std::unique_ptr> encoderPtr = MakeEncoder(outputTensorInfo, outputs[0]->Map()); Encoder &encoder = *encoderPtr; - unsigned int numElements = GetTensorInfo(m_Data.m_Inputs[0]).GetNumElements(); + unsigned int numElements = GetTensorInfo(inputs[0]).GetNumElements(); for (unsigned int i = 0; i < numElements; ++i) { diff --git a/src/backends/reference/workloads/RefFloorWorkload.hpp b/src/backends/reference/workloads/RefFloorWorkload.hpp index 563640228d..28b2695c82 100644 --- a/src/backends/reference/workloads/RefFloorWorkload.hpp +++ b/src/backends/reference/workloads/RefFloorWorkload.hpp @@ -15,7 +15,10 @@ class RefFloorWorkload : public BaseWorkload { public: using BaseWorkload::BaseWorkload; - virtual void Execute() const override; + void Execute() const override; + void ExecuteAsync(WorkingMemDescriptor& workingMemDescriptor) override; +private: + void Execute(std::vector inputs, std::vector outputs) const; }; } //namespace armnn diff --git a/src/backends/reference/workloads/RefFullyConnectedWorkload.cpp b/src/backends/reference/workloads/RefFullyConnectedWorkload.cpp index 49e105f206..deb56d4c6b 100644 --- a/src/backends/reference/workloads/RefFullyConnectedWorkload.cpp +++ b/src/backends/reference/workloads/RefFullyConnectedWorkload.cpp @@ -34,28 +34,32 @@ RefFullyConnectedWorkload::RefFullyConnectedWorkload( void RefFullyConnectedWorkload::PostAllocationConfigure() { - const TensorInfo& inputInfo = GetTensorInfo(m_Data.m_Inputs[0]); + PostAllocationConfigure(m_Data.m_Inputs, m_Data.m_Outputs); +} + +void RefFullyConnectedWorkload::PostAllocationConfigure(std::vector inputs, + std::vector outputs) +{ + const TensorInfo& inputInfo = GetTensorInfo(inputs[0]); ARMNN_ASSERT(inputInfo.GetNumDimensions() > 1); m_InputShape = inputInfo.GetShape(); - m_InputDecoder = MakeDecoder(inputInfo); if (!m_Data.m_Parameters.m_ConstantWeights) { - const TensorInfo& rWeightInfo = GetTensorInfo(m_Data.m_Inputs[1]); + const TensorInfo& rWeightInfo = GetTensorInfo(inputs[1]); ARMNN_ASSERT(inputInfo.GetNumDimensions() > 1); m_WeightShape = rWeightInfo.GetShape(); m_WeightDecoder = MakeDecoder(rWeightInfo); if (m_Data.m_Parameters.m_BiasEnabled) { - const TensorInfo& biasInfo = GetTensorInfo(m_Data.m_Inputs[2]); + const TensorInfo& biasInfo = GetTensorInfo(inputs[2]); m_BiasDecoder = MakeDecoder(biasInfo); } } - const TensorInfo& outputInfo = GetTensorInfo(m_Data.m_Outputs[0]); + const TensorInfo& outputInfo = GetTensorInfo(outputs[0]); m_OutputShape = outputInfo.GetShape(); - m_OutputEncoder = MakeEncoder(outputInfo); m_NumActivations = 1; // Total number of activations in the input. for (unsigned int i = 1; i < inputInfo.GetNumDimensions(); i++) @@ -65,24 +69,37 @@ void RefFullyConnectedWorkload::PostAllocationConfigure() } void RefFullyConnectedWorkload::Execute() const +{ + Execute(m_Data.m_Inputs, m_Data.m_Outputs); +} + +void RefFullyConnectedWorkload::ExecuteAsync(WorkingMemDescriptor &workingMemDescriptor) +{ + PostAllocationConfigure(workingMemDescriptor.m_Inputs, workingMemDescriptor.m_Outputs); + + Execute(workingMemDescriptor.m_Inputs, workingMemDescriptor.m_Outputs); +} + +void RefFullyConnectedWorkload::Execute(std::vector inputs, std::vector outputs) const { ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefFullyConnectedWorkload_Execute"); - m_InputDecoder->Reset(m_Data.m_Inputs[0]->Map()); + std::unique_ptr> inputDecoder = MakeDecoder(GetTensorInfo(inputs[0]), inputs[0]->Map()); + std::unique_ptr> OutputEncoder = MakeEncoder(GetTensorInfo(outputs[0]), outputs[0]->Map()); + if (!m_Data.m_Parameters.m_ConstantWeights) { - m_WeightDecoder->Reset(m_Data.m_Inputs[1]->Map()); + m_WeightDecoder->Reset(inputs[1]->Map()); if (m_Data.m_Parameters.m_BiasEnabled) { - m_BiasDecoder->Reset(m_Data.m_Inputs[2]->Map()); + m_BiasDecoder->Reset(inputs[2]->Map()); } } - m_OutputEncoder->Reset(m_Data.m_Outputs[0]->Map()); FullyConnected(m_InputShape, - *m_InputDecoder, + *inputDecoder, m_OutputShape, - *m_OutputEncoder, + *OutputEncoder, m_WeightShape, *m_WeightDecoder, *m_BiasDecoder, diff --git a/src/backends/reference/workloads/RefFullyConnectedWorkload.hpp b/src/backends/reference/workloads/RefFullyConnectedWorkload.hpp index a8f0756223..5c0f67ebaf 100644 --- a/src/backends/reference/workloads/RefFullyConnectedWorkload.hpp +++ b/src/backends/reference/workloads/RefFullyConnectedWorkload.hpp @@ -23,14 +23,15 @@ public: void PostAllocationConfigure() override; - virtual void Execute() const override; + void Execute() const override; + void ExecuteAsync(WorkingMemDescriptor& workingMemDescriptor) override; private: + void PostAllocationConfigure(std::vector inputs, std::vector outputs); + void Execute(std::vector inputs, std::vector outputs) const; std::unique_ptr m_Weight; std::unique_ptr m_Bias; - std::unique_ptr> m_InputDecoder; - std::unique_ptr> m_OutputEncoder; std::unique_ptr> m_WeightDecoder; std::unique_ptr> m_BiasDecoder; diff --git a/src/backends/reference/workloads/RefGatherWorkload.cpp b/src/backends/reference/workloads/RefGatherWorkload.cpp index eaeed61b0a..020c067cfb 100644 --- a/src/backends/reference/workloads/RefGatherWorkload.cpp +++ b/src/backends/reference/workloads/RefGatherWorkload.cpp @@ -14,19 +14,29 @@ namespace armnn { void RefGatherWorkload::Execute() const +{ + Execute(m_Data.m_Inputs, m_Data.m_Outputs); +} + +void RefGatherWorkload::ExecuteAsync(WorkingMemDescriptor &workingMemDescriptor) +{ + Execute(workingMemDescriptor.m_Inputs, workingMemDescriptor.m_Outputs); +} + +void RefGatherWorkload::Execute(std::vector inputs, std::vector outputs) const { ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefGatherWorkload_Execute"); - const TensorInfo& inputInfo0 = GetTensorInfo(m_Data.m_Inputs[0]); - const TensorInfo& inputInfo1 = GetTensorInfo(m_Data.m_Inputs[1]); - const TensorInfo& outputInfo = GetTensorInfo(m_Data.m_Outputs[0]); + const TensorInfo& inputInfo0 = GetTensorInfo(inputs[0]); + const TensorInfo& inputInfo1 = GetTensorInfo(inputs[1]); + const TensorInfo& outputInfo = GetTensorInfo(outputs[0]); - std::unique_ptr> decoderPtr = MakeDecoder(inputInfo0, m_Data.m_Inputs[0]->Map()); + std::unique_ptr> decoderPtr = MakeDecoder(inputInfo0, inputs[0]->Map()); Decoder& decoder = *decoderPtr; const int32_t* indicesData = GetInputTensorData(1, m_Data); - std::unique_ptr> encoderPtr = MakeEncoder(outputInfo, m_Data.m_Outputs[0]->Map()); + std::unique_ptr> encoderPtr = MakeEncoder(outputInfo, outputs[0]->Map()); Encoder& encoder = *encoderPtr; Gather(inputInfo0, inputInfo1, outputInfo, decoder, indicesData, encoder, m_Data.m_Parameters.m_Axis); diff --git a/src/backends/reference/workloads/RefGatherWorkload.hpp b/src/backends/reference/workloads/RefGatherWorkload.hpp index 30019a8d4d..1664e1611d 100644 --- a/src/backends/reference/workloads/RefGatherWorkload.hpp +++ b/src/backends/reference/workloads/RefGatherWorkload.hpp @@ -21,6 +21,9 @@ class RefGatherWorkload : public BaseWorkload public: using BaseWorkload::BaseWorkload; void Execute() const override; + void ExecuteAsync(WorkingMemDescriptor& workingMemDescriptor) override; +private: + void Execute(std::vector inputs, std::vector outputs) const; }; } // namespace armnn diff --git a/src/backends/reference/workloads/RefInstanceNormalizationWorkload.cpp b/src/backends/reference/workloads/RefInstanceNormalizationWorkload.cpp index 150f0cb017..daee97ae3e 100644 --- a/src/backends/reference/workloads/RefInstanceNormalizationWorkload.cpp +++ b/src/backends/reference/workloads/RefInstanceNormalizationWorkload.cpp @@ -19,13 +19,24 @@ RefInstanceNormalizationWorkload::RefInstanceNormalizationWorkload( : BaseWorkload(descriptor, info) {} void RefInstanceNormalizationWorkload::Execute() const +{ + Execute(m_Data.m_Inputs, m_Data.m_Outputs); +} + +void RefInstanceNormalizationWorkload::ExecuteAsync(WorkingMemDescriptor &workingMemDescriptor) +{ + Execute(workingMemDescriptor.m_Inputs, workingMemDescriptor.m_Outputs); +} + +void RefInstanceNormalizationWorkload::Execute(std::vector inputs, + std::vector outputs) const { ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefInstanceNormalizationWorkload_Execute"); - std::unique_ptr> inputDecoder = MakeDecoder(GetTensorInfo(m_Data.m_Inputs[0]), - m_Data.m_Inputs[0]->Map()); - std::unique_ptr> outputEncoder = MakeEncoder(GetTensorInfo(m_Data.m_Outputs[0]), - m_Data.m_Outputs[0]->Map()); + std::unique_ptr> inputDecoder = MakeDecoder(GetTensorInfo(inputs[0]), + inputs[0]->Map()); + std::unique_ptr> outputEncoder = MakeEncoder(GetTensorInfo(outputs[0]), + outputs[0]->Map()); InstanceNorm(m_Data, *inputDecoder, *outputEncoder); } diff --git a/src/backends/reference/workloads/RefInstanceNormalizationWorkload.hpp b/src/backends/reference/workloads/RefInstanceNormalizationWorkload.hpp index 620779f953..e366ddb05b 100644 --- a/src/backends/reference/workloads/RefInstanceNormalizationWorkload.hpp +++ b/src/backends/reference/workloads/RefInstanceNormalizationWorkload.hpp @@ -16,7 +16,10 @@ class RefInstanceNormalizationWorkload : public BaseWorkload inputs, std::vector outputs) const; }; } //namespace armnn diff --git a/src/backends/reference/workloads/RefL2NormalizationWorkload.cpp b/src/backends/reference/workloads/RefL2NormalizationWorkload.cpp index f80901edc9..ca31503620 100644 --- a/src/backends/reference/workloads/RefL2NormalizationWorkload.cpp +++ b/src/backends/reference/workloads/RefL2NormalizationWorkload.cpp @@ -25,14 +25,24 @@ RefL2NormalizationWorkload::RefL2NormalizationWorkload( : BaseWorkload(descriptor, info) {} void RefL2NormalizationWorkload::Execute() const +{ + Execute(m_Data.m_Inputs, m_Data.m_Outputs); +} + +void RefL2NormalizationWorkload::ExecuteAsync(WorkingMemDescriptor &workingMemDescriptor) +{ + Execute(workingMemDescriptor.m_Inputs, workingMemDescriptor.m_Outputs); +} + +void RefL2NormalizationWorkload::Execute(std::vector inputs, std::vector outputs) const { ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefL2NormalizationWorkload_Execute"); - const TensorInfo& inputInfo = GetTensorInfo(m_Data.m_Inputs[0]); - const TensorInfo& outputInfo = GetTensorInfo(m_Data.m_Outputs[0]); + const TensorInfo& inputInfo = GetTensorInfo(inputs[0]); + const TensorInfo& outputInfo = GetTensorInfo(outputs[0]); - auto inputDecoder = MakeDecoder(inputInfo, m_Data.m_Inputs[0]->Map()); - auto outputEncoder = MakeEncoder(outputInfo, m_Data.m_Outputs[0]->Map()); + auto inputDecoder = MakeDecoder(inputInfo, inputs[0]->Map()); + auto outputEncoder = MakeEncoder(outputInfo, outputs[0]->Map()); DataLayoutIndexed dataLayout(m_Data.m_Parameters.m_DataLayout); diff --git a/src/backends/reference/workloads/RefL2NormalizationWorkload.hpp b/src/backends/reference/workloads/RefL2NormalizationWorkload.hpp index 4beedc9992..c17767b943 100644 --- a/src/backends/reference/workloads/RefL2NormalizationWorkload.hpp +++ b/src/backends/reference/workloads/RefL2NormalizationWorkload.hpp @@ -18,6 +18,9 @@ public: const WorkloadInfo& info); void Execute() const override; + void ExecuteAsync(WorkingMemDescriptor& workingMemDescriptor) override; +private: + void Execute(std::vector inputs, std::vector outputs) const; }; } //namespace armnn diff --git a/src/backends/reference/workloads/RefLogSoftmaxWorkload.cpp b/src/backends/reference/workloads/RefLogSoftmaxWorkload.cpp index a2ace13144..ebe1b1ecfe 100644 --- a/src/backends/reference/workloads/RefLogSoftmaxWorkload.cpp +++ b/src/backends/reference/workloads/RefLogSoftmaxWorkload.cpp @@ -18,14 +18,24 @@ namespace armnn { void RefLogSoftmaxWorkload::Execute() const +{ + Execute(m_Data.m_Inputs, m_Data.m_Outputs); +} + +void RefLogSoftmaxWorkload::ExecuteAsync(WorkingMemDescriptor &workingMemDescriptor) +{ + Execute(workingMemDescriptor.m_Inputs, workingMemDescriptor.m_Outputs); +} + +void RefLogSoftmaxWorkload::Execute(std::vector inputs, std::vector outputs) const { ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefLogSoftmaxWorkload_Execute"); - const TensorInfo& inputInfo = GetTensorInfo(m_Data.m_Inputs[0]); - const TensorInfo& outputInfo = GetTensorInfo(m_Data.m_Outputs[0]); + const TensorInfo& inputInfo = GetTensorInfo(inputs[0]); + const TensorInfo& outputInfo = GetTensorInfo(outputs[0]); - std::unique_ptr> decoder = MakeDecoder(inputInfo, m_Data.m_Inputs[0]->Map()); - std::unique_ptr> encoder = MakeEncoder(outputInfo, m_Data.m_Outputs[0]->Map()); + std::unique_ptr> decoder = MakeDecoder(inputInfo, inputs[0]->Map()); + std::unique_ptr> encoder = MakeEncoder(outputInfo, outputs[0]->Map()); ARMNN_ASSERT(decoder != nullptr); ARMNN_ASSERT(encoder != nullptr); diff --git a/src/backends/reference/workloads/RefLogSoftmaxWorkload.hpp b/src/backends/reference/workloads/RefLogSoftmaxWorkload.hpp index f5048d90b3..c5d5d5b0c9 100644 --- a/src/backends/reference/workloads/RefLogSoftmaxWorkload.hpp +++ b/src/backends/reference/workloads/RefLogSoftmaxWorkload.hpp @@ -15,7 +15,10 @@ class RefLogSoftmaxWorkload : public BaseWorkload { public: using BaseWorkload::BaseWorkload; - virtual void Execute() const override; + void Execute() const override; + void ExecuteAsync(WorkingMemDescriptor& workingMemDescriptor) override; +private: + void Execute(std::vector inputs, std::vector outputs) const; }; } // namespace armnn diff --git a/src/backends/reference/workloads/RefLogicalBinaryWorkload.cpp b/src/backends/reference/workloads/RefLogicalBinaryWorkload.cpp index 1b4e8f9aa0..f187e0ca31 100644 --- a/src/backends/reference/workloads/RefLogicalBinaryWorkload.cpp +++ b/src/backends/reference/workloads/RefLogicalBinaryWorkload.cpp @@ -22,32 +22,31 @@ RefLogicalBinaryWorkload::RefLogicalBinaryWorkload(const LogicalBinaryQueueDescr : BaseWorkload(desc, info) {} -void RefLogicalBinaryWorkload::PostAllocationConfigure() +void RefLogicalBinaryWorkload::Execute() const { - const TensorInfo& inputInfo0 = GetTensorInfo(m_Data.m_Inputs[0]); - const TensorInfo& inputInfo1 = GetTensorInfo(m_Data.m_Inputs[1]); - const TensorInfo& outputInfo = GetTensorInfo(m_Data.m_Outputs[0]); + Execute(m_Data.m_Inputs, m_Data.m_Outputs); +} - m_Input0 = MakeDecoder(inputInfo0); - m_Input1 = MakeDecoder(inputInfo1); - m_Output = MakeEncoder(outputInfo); +void RefLogicalBinaryWorkload::ExecuteAsync(WorkingMemDescriptor &workingMemDescriptor) +{ + Execute(workingMemDescriptor.m_Inputs, workingMemDescriptor.m_Outputs); } -void RefLogicalBinaryWorkload::Execute() const +void RefLogicalBinaryWorkload::Execute(std::vector inputs, std::vector outputs) const { ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefLogicalBinaryWorkload_Execute"); - const TensorInfo& inputInfo0 = GetTensorInfo(m_Data.m_Inputs[0]); - const TensorInfo& inputInfo1 = GetTensorInfo(m_Data.m_Inputs[1]); - const TensorInfo& outputInfo = GetTensorInfo(m_Data.m_Outputs[0]); + const TensorInfo& inputInfo0 = GetTensorInfo(inputs[0]); + const TensorInfo& inputInfo1 = GetTensorInfo(inputs[1]); + const TensorInfo& outputInfo = GetTensorInfo(outputs[0]); const TensorShape& inShape0 = inputInfo0.GetShape(); const TensorShape& inShape1 = inputInfo1.GetShape(); const TensorShape& outShape = outputInfo.GetShape(); - m_Input0->Reset(m_Data.m_Inputs[0]->Map()); - m_Input1->Reset(m_Data.m_Inputs[1]->Map()); - m_Output->Reset(m_Data.m_Outputs[0]->Map()); + std::unique_ptr> input0 = MakeDecoder(inputInfo0, inputs[0]->Map()); + std::unique_ptr> input1 = MakeDecoder(inputInfo1, inputs[1]->Map()); + std::unique_ptr> output = MakeEncoder(outputInfo, outputs[0]->Map()); using AndFunction = LogicalBinaryFunction>; using OrFunction = LogicalBinaryFunction>; @@ -56,12 +55,12 @@ void RefLogicalBinaryWorkload::Execute() const { case LogicalBinaryOperation::LogicalAnd: { - AndFunction(inShape0, inShape1, outShape, *m_Input0, *m_Input1, *m_Output); + AndFunction(inShape0, inShape1, outShape, *input0, *input1, *output); break; } case LogicalBinaryOperation::LogicalOr: { - OrFunction(inShape0, inShape1, outShape, *m_Input0, *m_Input1, *m_Output); + OrFunction(inShape0, inShape1, outShape, *input0, *input1, *output); break; } default: diff --git a/src/backends/reference/workloads/RefLogicalBinaryWorkload.hpp b/src/backends/reference/workloads/RefLogicalBinaryWorkload.hpp index 4d6baf5fa4..d79a3039c7 100644 --- a/src/backends/reference/workloads/RefLogicalBinaryWorkload.hpp +++ b/src/backends/reference/workloads/RefLogicalBinaryWorkload.hpp @@ -19,16 +19,13 @@ public: using BaseWorkload::m_Data; RefLogicalBinaryWorkload(const LogicalBinaryQueueDescriptor& descriptor, const WorkloadInfo& info); - void PostAllocationConfigure() override; - virtual void Execute() const override; + void Execute() const override; + void ExecuteAsync(WorkingMemDescriptor& workingMemDescriptor) override; private: + void Execute(std::vector inputs, std::vector outputs) const; using InType = bool; using OutType = bool; - - std::unique_ptr> m_Input0; - std::unique_ptr> m_Input1; - std::unique_ptr> m_Output; }; } // namespace armnn diff --git a/src/backends/reference/workloads/RefLogicalUnaryWorkload.cpp b/src/backends/reference/workloads/RefLogicalUnaryWorkload.cpp index 76eb5ac39f..bef2bdc668 100644 --- a/src/backends/reference/workloads/RefLogicalUnaryWorkload.cpp +++ b/src/backends/reference/workloads/RefLogicalUnaryWorkload.cpp @@ -22,27 +22,28 @@ RefLogicalUnaryWorkload::RefLogicalUnaryWorkload(const ElementwiseUnaryQueueDesc : BaseWorkload(desc, info) {} -void RefLogicalUnaryWorkload::PostAllocationConfigure() +void RefLogicalUnaryWorkload::Execute() const { - const TensorInfo& inputInfo = GetTensorInfo(m_Data.m_Inputs[0]); - const TensorInfo& outputInfo = GetTensorInfo(m_Data.m_Outputs[0]); + Execute(m_Data.m_Inputs, m_Data.m_Outputs); +} - m_Input = MakeDecoder(inputInfo); - m_Output = MakeEncoder(outputInfo); +void RefLogicalUnaryWorkload::ExecuteAsync(WorkingMemDescriptor &workingMemDescriptor) +{ + Execute(workingMemDescriptor.m_Inputs, workingMemDescriptor.m_Outputs); } -void RefLogicalUnaryWorkload::Execute() const +void RefLogicalUnaryWorkload::Execute(std::vector inputs, std::vector outputs) const { ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefLogicalUnaryWorkload_Execute"); - const TensorInfo& inputInfo = GetTensorInfo(m_Data.m_Inputs[0]); - const TensorInfo& outputInfo = GetTensorInfo(m_Data.m_Outputs[0]); + const TensorInfo& inputInfo = GetTensorInfo(inputs[0]); + const TensorInfo& outputInfo = GetTensorInfo(outputs[0]); const TensorShape& inShape = inputInfo.GetShape(); const TensorShape& outShape = outputInfo.GetShape(); - m_Input->Reset(m_Data.m_Inputs[0]->Map()); - m_Output->Reset(m_Data.m_Outputs[0]->Map()); + std::unique_ptr> input = MakeDecoder(inputInfo, inputs[0]->Map()); + std::unique_ptr> output = MakeEncoder(outputInfo, outputs[0]->Map()); using NotFunction = LogicalUnaryFunction>; @@ -50,7 +51,7 @@ void RefLogicalUnaryWorkload::Execute() const { case UnaryOperation::LogicalNot: { - NotFunction(inShape, outShape, *m_Input, *m_Output); + NotFunction(inShape, outShape, *input, *output); break; } default: diff --git a/src/backends/reference/workloads/RefLogicalUnaryWorkload.hpp b/src/backends/reference/workloads/RefLogicalUnaryWorkload.hpp index 0d8b35495c..117f16836d 100644 --- a/src/backends/reference/workloads/RefLogicalUnaryWorkload.hpp +++ b/src/backends/reference/workloads/RefLogicalUnaryWorkload.hpp @@ -19,15 +19,13 @@ public: using BaseWorkload::m_Data; RefLogicalUnaryWorkload(const ElementwiseUnaryQueueDescriptor& descriptor, const WorkloadInfo& info); - void PostAllocationConfigure() override; - virtual void Execute() const override; + void Execute() const override; + void ExecuteAsync(WorkingMemDescriptor& workingMemDescriptor) override; private: + void Execute(std::vector inputs, std::vector outputs) const; using InType = bool; using OutType = bool; - - std::unique_ptr> m_Input; - std::unique_ptr> m_Output; }; } // namespace armnn diff --git a/src/backends/reference/workloads/RefLstmWorkload.cpp b/src/backends/reference/workloads/RefLstmWorkload.cpp index 7c37301d1d..09423547da 100644 --- a/src/backends/reference/workloads/RefLstmWorkload.cpp +++ b/src/backends/reference/workloads/RefLstmWorkload.cpp @@ -39,26 +39,36 @@ RefLstmWorkload::RefLstmWorkload(const LstmQueueDescriptor &descriptor, const Wo {} void RefLstmWorkload::Execute() const +{ + Execute(m_Data.m_Inputs, m_Data.m_Outputs); +} + +void RefLstmWorkload::ExecuteAsync(WorkingMemDescriptor &workingMemDescriptor) +{ + Execute(workingMemDescriptor.m_Inputs, workingMemDescriptor.m_Outputs); +} + +void RefLstmWorkload::Execute(std::vector inputs, std::vector outputs) const { // This is a porting of the LSTM::Eval() method in the Android code base // Refer to: android/frameworks/ml/nn/common/operations/LSTM.cpp - const TensorInfo& inputInfo = GetTensorInfo(m_Data.m_Inputs[0]); - const TensorInfo& outputInfo = GetTensorInfo(m_Data.m_Outputs[0]); + const TensorInfo& inputInfo = GetTensorInfo(inputs[0]); + const TensorInfo& outputInfo = GetTensorInfo(outputs[0]); const TensorShape& inputShape = inputInfo.GetShape(); const DataType& outputType = outputInfo.GetDataType(); - std::unique_ptr> outputStateOut = MakeEncoder(outputInfo, m_Data.m_Outputs[1]->Map()); - std::unique_ptr> cellStateOut = MakeEncoder(outputInfo, m_Data.m_Outputs[2]->Map()); - std::unique_ptr> output = MakeEncoder(outputInfo, m_Data.m_Outputs[3]->Map()); + std::unique_ptr> outputStateOut = MakeEncoder(outputInfo, outputs[1]->Map()); + std::unique_ptr> cellStateOut = MakeEncoder(outputInfo, outputs[2]->Map()); + std::unique_ptr> output = MakeEncoder(outputInfo, outputs[3]->Map()); - std::unique_ptr> cellStateOutDecoder = MakeDecoder(outputInfo, m_Data.m_Outputs[2]->Map()); - std::unique_ptr> outputDecoder = MakeDecoder(outputInfo, m_Data.m_Outputs[3]->Map()); + std::unique_ptr> cellStateOutDecoder = MakeDecoder(outputInfo, outputs[2]->Map()); + std::unique_ptr> outputDecoder = MakeDecoder(outputInfo, outputs[3]->Map()); - std::unique_ptr> inputData = MakeDecoder(inputInfo, m_Data.m_Inputs[0]->Map()); - std::unique_ptr> outputStateIn = MakeDecoder(inputInfo, m_Data.m_Inputs[1]->Map()); - std::unique_ptr> cellStateIn = MakeDecoder(inputInfo, m_Data.m_Inputs[2]->Map()); + std::unique_ptr> inputData = MakeDecoder(inputInfo, inputs[0]->Map()); + std::unique_ptr> outputStateIn = MakeDecoder(inputInfo, inputs[1]->Map()); + std::unique_ptr> cellStateIn = MakeDecoder(inputInfo, inputs[2]->Map()); const uint32_t nBatch = inputShape[0]; const uint32_t nInput = inputShape[1]; @@ -71,19 +81,19 @@ void RefLstmWorkload::Execute() const const bool useLayerNorm = m_Data.m_Parameters.m_LayerNormEnabled; // Index the scratch buffers pointers to the global scratch buffer. - std::unique_ptr> inputGateScratch = MakeEncoder(outputInfo, m_Data.m_Outputs[0]->Map()); - std::unique_ptr> cellScratch = MakeEncoder(outputInfo, m_Data.m_Outputs[0]->Map()); - std::unique_ptr> forgetGateScratch = MakeEncoder(outputInfo, m_Data.m_Outputs[0]->Map()); - std::unique_ptr> outputGateScratch = MakeEncoder(outputInfo, m_Data.m_Outputs[0]->Map()); + std::unique_ptr> inputGateScratch = MakeEncoder(outputInfo, outputs[0]->Map()); + std::unique_ptr> cellScratch = MakeEncoder(outputInfo, outputs[0]->Map()); + std::unique_ptr> forgetGateScratch = MakeEncoder(outputInfo, outputs[0]->Map()); + std::unique_ptr> outputGateScratch = MakeEncoder(outputInfo, outputs[0]->Map()); std::unique_ptr> inputGateScratchDecoder = - MakeDecoder(outputInfo, m_Data.m_Outputs[0]->Map()); + MakeDecoder(outputInfo, outputs[0]->Map()); std::unique_ptr> cellScratchDecoder = - MakeDecoder(outputInfo, m_Data.m_Outputs[0]->Map()); + MakeDecoder(outputInfo, outputs[0]->Map()); std::unique_ptr> forgetGateScratchDecoder = - MakeDecoder(outputInfo, m_Data.m_Outputs[0]->Map()); + MakeDecoder(outputInfo, outputs[0]->Map()); std::unique_ptr> outputGateScratchDecoder = - MakeDecoder(outputInfo, m_Data.m_Outputs[0]->Map()); + MakeDecoder(outputInfo, outputs[0]->Map()); if (useCifg) { diff --git a/src/backends/reference/workloads/RefLstmWorkload.hpp b/src/backends/reference/workloads/RefLstmWorkload.hpp index ce5a775269..b55a1f9a9e 100644 --- a/src/backends/reference/workloads/RefLstmWorkload.hpp +++ b/src/backends/reference/workloads/RefLstmWorkload.hpp @@ -18,9 +18,11 @@ class RefLstmWorkload : public BaseWorkload public: explicit RefLstmWorkload(const LstmQueueDescriptor& descriptor, const WorkloadInfo& info); - virtual void Execute() const override; + void Execute() const override; + void ExecuteAsync(WorkingMemDescriptor& workingMemDescriptor) override; private: + void Execute(std::vector inputs, std::vector outputs) const; std::unique_ptr m_InputToInputWeightsTensor; std::unique_ptr m_InputToForgetWeightsTensor; std::unique_ptr m_InputToCellWeightsTensor; diff --git a/src/backends/reference/workloads/RefMeanWorkload.cpp b/src/backends/reference/workloads/RefMeanWorkload.cpp index 00e59bca4c..7941ce2c36 100644 --- a/src/backends/reference/workloads/RefMeanWorkload.cpp +++ b/src/backends/reference/workloads/RefMeanWorkload.cpp @@ -19,14 +19,24 @@ RefMeanWorkload::RefMeanWorkload(const MeanQueueDescriptor& descriptor, const Wo :BaseWorkload(descriptor, info) {} void RefMeanWorkload::Execute() const +{ + Execute(m_Data.m_Inputs, m_Data.m_Outputs); +} + +void RefMeanWorkload::ExecuteAsync(WorkingMemDescriptor &workingMemDescriptor) +{ + Execute(workingMemDescriptor.m_Inputs, workingMemDescriptor.m_Outputs); +} + +void RefMeanWorkload::Execute(std::vector inputs, std::vector outputs) const { ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefMeanWorkload_Execute"); - const TensorInfo& inputInfo = GetTensorInfo(m_Data.m_Inputs[0]); - const TensorInfo& outputInfo = GetTensorInfo(m_Data.m_Outputs[0]); + const TensorInfo& inputInfo = GetTensorInfo(inputs[0]); + const TensorInfo& outputInfo = GetTensorInfo(outputs[0]); - auto inputDecoder = MakeDecoder(inputInfo, m_Data.m_Inputs[0]->Map()); - auto outputEncoder = MakeEncoder(outputInfo, m_Data.m_Outputs[0]->Map()); + auto inputDecoder = MakeDecoder(inputInfo, inputs[0]->Map()); + auto outputEncoder = MakeEncoder(outputInfo, outputs[0]->Map()); Reduce(inputInfo, outputInfo, diff --git a/src/backends/reference/workloads/RefMeanWorkload.hpp b/src/backends/reference/workloads/RefMeanWorkload.hpp index c673f940e0..b5a9ed812f 100644 --- a/src/backends/reference/workloads/RefMeanWorkload.hpp +++ b/src/backends/reference/workloads/RefMeanWorkload.hpp @@ -18,7 +18,10 @@ class RefMeanWorkload : public BaseWorkload { public: explicit RefMeanWorkload (const MeanQueueDescriptor& descriptor, const WorkloadInfo& info); - virtual void Execute() const override; + void Execute() const override; + void ExecuteAsync(WorkingMemDescriptor& workingMemDescriptor) override; +private: + void Execute(std::vector inputs, std::vector outputs) const; }; } //namespace armnn diff --git a/src/backends/reference/workloads/RefNormalizationWorkload.cpp b/src/backends/reference/workloads/RefNormalizationWorkload.cpp index d5d2104cba..36828acfb3 100644 --- a/src/backends/reference/workloads/RefNormalizationWorkload.cpp +++ b/src/backends/reference/workloads/RefNormalizationWorkload.cpp @@ -162,13 +162,23 @@ RefNormalizationWorkload::RefNormalizationWorkload(const NormalizationQueueDescr {} void RefNormalizationWorkload::Execute() const +{ + Execute(m_Data.m_Inputs, m_Data.m_Outputs); +} + +void RefNormalizationWorkload::ExecuteAsync(WorkingMemDescriptor &workingMemDescriptor) +{ + Execute(workingMemDescriptor.m_Inputs, workingMemDescriptor.m_Outputs); +} + +void RefNormalizationWorkload::Execute(std::vector inputs, std::vector outputs) const { ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefNormalizationWorkload_Execute"); - const TensorInfo& inputInfo = GetTensorInfo(m_Data.m_Inputs[0]); + const TensorInfo& inputInfo = GetTensorInfo(inputs[0]); - auto inputDecoder = MakeDecoder(inputInfo, m_Data.m_Inputs[0]->Map()); - auto outputEncoder = MakeEncoder(inputInfo, m_Data.m_Outputs[0]->Map()); + auto inputDecoder = MakeDecoder(inputInfo, inputs[0]->Map()); + auto outputEncoder = MakeEncoder(inputInfo, outputs[0]->Map()); if (NormalizationAlgorithmMethod::LocalBrightness == m_Data.m_Parameters.m_NormMethodType) { diff --git a/src/backends/reference/workloads/RefNormalizationWorkload.hpp b/src/backends/reference/workloads/RefNormalizationWorkload.hpp index 9d68ffda58..59170b8a80 100644 --- a/src/backends/reference/workloads/RefNormalizationWorkload.hpp +++ b/src/backends/reference/workloads/RefNormalizationWorkload.hpp @@ -17,7 +17,10 @@ public: explicit RefNormalizationWorkload(const NormalizationQueueDescriptor& descriptor, const WorkloadInfo& info); - virtual void Execute() const override; + void Execute() const override; + void ExecuteAsync(WorkingMemDescriptor& workingMemDescriptor) override; +private: + void Execute(std::vector inputs, std::vector outputs) const; }; } // namespace armnn diff --git a/src/backends/reference/workloads/RefPadWorkload.cpp b/src/backends/reference/workloads/RefPadWorkload.cpp index af22c31001..ea515cae68 100644 --- a/src/backends/reference/workloads/RefPadWorkload.cpp +++ b/src/backends/reference/workloads/RefPadWorkload.cpp @@ -13,11 +13,21 @@ namespace armnn { void RefPadWorkload::Execute() const +{ + Execute(m_Data.m_Inputs, m_Data.m_Outputs); +} + +void RefPadWorkload::ExecuteAsync(WorkingMemDescriptor &workingMemDescriptor) +{ + Execute(workingMemDescriptor.m_Inputs, workingMemDescriptor.m_Outputs); +} + +void RefPadWorkload::Execute(std::vector inputs, std::vector outputs) const { ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefPadWorkload_Execute"); - const TensorInfo& inputInfo = GetTensorInfo(m_Data.m_Inputs[0]); - const TensorInfo& outputInfo = GetTensorInfo(m_Data.m_Outputs[0]); + const TensorInfo& inputInfo = GetTensorInfo(inputs[0]); + const TensorInfo& outputInfo = GetTensorInfo(outputs[0]); armnn::Pad(inputInfo, outputInfo, diff --git a/src/backends/reference/workloads/RefPadWorkload.hpp b/src/backends/reference/workloads/RefPadWorkload.hpp index 0b8379a60f..afc620383f 100644 --- a/src/backends/reference/workloads/RefPadWorkload.hpp +++ b/src/backends/reference/workloads/RefPadWorkload.hpp @@ -15,7 +15,10 @@ class RefPadWorkload : public BaseWorkload { public: using BaseWorkload::BaseWorkload; - virtual void Execute() const override; + void Execute() const override; + void ExecuteAsync(WorkingMemDescriptor& workingMemDescriptor) override; +private: + void Execute(std::vector inputs, std::vector outputs) const; }; } //namespace armnn diff --git a/src/backends/reference/workloads/RefPermuteWorkload.cpp b/src/backends/reference/workloads/RefPermuteWorkload.cpp index 1fb1421ed9..f6af208e8a 100644 --- a/src/backends/reference/workloads/RefPermuteWorkload.cpp +++ b/src/backends/reference/workloads/RefPermuteWorkload.cpp @@ -15,13 +15,26 @@ namespace armnn template void RefPermuteWorkload::Execute() const +{ + Execute(m_Data.m_Inputs, m_Data.m_Outputs); +} + +template +void RefPermuteWorkload::ExecuteAsync(WorkingMemDescriptor &workingMemDescriptor) +{ + Execute(workingMemDescriptor.m_Inputs, workingMemDescriptor.m_Outputs); +} + +template +void RefPermuteWorkload::Execute(std::vector inputs, + std::vector outputs) const { using T = ResolveType; ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, GetName() + "_Execute"); - const ITensorHandle* src = m_Data.m_Inputs[0]; - ITensorHandle* dst = m_Data.m_Outputs[0]; + const ITensorHandle* src = inputs[0]; + ITensorHandle* dst = outputs[0]; const PermutationVector& mappings = m_Data.m_Parameters.m_DimMappings; armnnUtils::Permute(GetTensorInfo(dst).GetShape(), mappings, diff --git a/src/backends/reference/workloads/RefPermuteWorkload.hpp b/src/backends/reference/workloads/RefPermuteWorkload.hpp index 62a145617a..94f633423a 100644 --- a/src/backends/reference/workloads/RefPermuteWorkload.hpp +++ b/src/backends/reference/workloads/RefPermuteWorkload.hpp @@ -25,6 +25,9 @@ public: using TypedWorkload::m_Data; using TypedWorkload::TypedWorkload; void Execute() const override; + void ExecuteAsync(WorkingMemDescriptor& workingMemDescriptor) override; +private: + void Execute(std::vector inputs, std::vector outputs) const; }; using RefPermuteBFloat16Workload = RefPermuteWorkload; diff --git a/src/backends/reference/workloads/RefPooling2dWorkload.cpp b/src/backends/reference/workloads/RefPooling2dWorkload.cpp index 40b814789c..d337278fe1 100644 --- a/src/backends/reference/workloads/RefPooling2dWorkload.cpp +++ b/src/backends/reference/workloads/RefPooling2dWorkload.cpp @@ -14,14 +14,24 @@ namespace armnn { void RefPooling2dWorkload::Execute() const +{ + Execute(m_Data.m_Inputs, m_Data.m_Outputs); +} + +void RefPooling2dWorkload::ExecuteAsync(WorkingMemDescriptor &workingMemDescriptor) +{ + Execute(workingMemDescriptor.m_Inputs, workingMemDescriptor.m_Outputs); +} + +void RefPooling2dWorkload::Execute(std::vector inputs, std::vector outputs) const { ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefPooling2dWorkload_Execute"); - const TensorInfo& inputInfo = GetTensorInfo(m_Data.m_Inputs[0]); - const TensorInfo& outputInfo = GetTensorInfo(m_Data.m_Outputs[0]); + const TensorInfo& inputInfo = GetTensorInfo(inputs[0]); + const TensorInfo& outputInfo = GetTensorInfo(outputs[0]); - auto inputDecoder = MakeDecoder(inputInfo, m_Data.m_Inputs[0] ->Map()); - auto outputEncoder = MakeEncoder(outputInfo, m_Data.m_Outputs[0]->Map()); + auto inputDecoder = MakeDecoder(inputInfo, inputs[0] ->Map()); + auto outputEncoder = MakeEncoder(outputInfo, outputs[0]->Map()); Pooling2d(*inputDecoder, *outputEncoder, diff --git a/src/backends/reference/workloads/RefPooling2dWorkload.hpp b/src/backends/reference/workloads/RefPooling2dWorkload.hpp index 24386b7e8d..3495d6b68d 100644 --- a/src/backends/reference/workloads/RefPooling2dWorkload.hpp +++ b/src/backends/reference/workloads/RefPooling2dWorkload.hpp @@ -18,6 +18,9 @@ class RefPooling2dWorkload : public BaseWorkload public: using BaseWorkload::BaseWorkload; - virtual void Execute() const override; + void Execute() const override; + void ExecuteAsync(WorkingMemDescriptor& workingMemDescriptor) override; +private: + void Execute(std::vector inputs, std::vector outputs) const; }; } //namespace armnn diff --git a/src/backends/reference/workloads/RefPreluWorkload.cpp b/src/backends/reference/workloads/RefPreluWorkload.cpp index cdc0a63711..b298874334 100644 --- a/src/backends/reference/workloads/RefPreluWorkload.cpp +++ b/src/backends/reference/workloads/RefPreluWorkload.cpp @@ -19,15 +19,25 @@ RefPreluWorkload::RefPreluWorkload(const PreluQueueDescriptor& descriptor, {} void RefPreluWorkload::Execute() const +{ + Execute(m_Data.m_Inputs, m_Data.m_Outputs); +} + +void RefPreluWorkload::ExecuteAsync(WorkingMemDescriptor &workingMemDescriptor) +{ + Execute(workingMemDescriptor.m_Inputs, workingMemDescriptor.m_Outputs); +} + +void RefPreluWorkload::Execute(std::vector inputs, std::vector outputs) const { ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefPreluWorkload_Execute"); - std::unique_ptr> inputDecoder = MakeDecoder(GetTensorInfo(m_Data.m_Inputs[0]), - m_Data.m_Inputs[0]->Map()); - std::unique_ptr> alphaDecoder = MakeDecoder(GetTensorInfo(m_Data.m_Inputs[1]), - m_Data.m_Inputs[1]->Map()); - std::unique_ptr> outputEncoder = MakeEncoder(GetTensorInfo(m_Data.m_Outputs[0]), - m_Data.m_Outputs[0]->Map()); + std::unique_ptr> inputDecoder = MakeDecoder(GetTensorInfo(inputs[0]), + inputs[0]->Map()); + std::unique_ptr> alphaDecoder = MakeDecoder(GetTensorInfo(inputs[1]), + inputs[1]->Map()); + std::unique_ptr> outputEncoder = MakeEncoder(GetTensorInfo(outputs[0]), + outputs[0]->Map()); PreluImpl(m_Data, *inputDecoder, *alphaDecoder, *outputEncoder); } diff --git a/src/backends/reference/workloads/RefPreluWorkload.hpp b/src/backends/reference/workloads/RefPreluWorkload.hpp index 72839e67dc..4fe5704711 100644 --- a/src/backends/reference/workloads/RefPreluWorkload.hpp +++ b/src/backends/reference/workloads/RefPreluWorkload.hpp @@ -16,7 +16,10 @@ class RefPreluWorkload : public BaseWorkload public: explicit RefPreluWorkload(const PreluQueueDescriptor& descriptor, const WorkloadInfo& info); - virtual void Execute() const override; + void Execute() const override; + void ExecuteAsync(WorkingMemDescriptor& workingMemDescriptor) override; +private: + void Execute(std::vector inputs, std::vector outputs) const; }; } // namespace armnn diff --git a/src/backends/reference/workloads/RefQLstmWorkload.cpp b/src/backends/reference/workloads/RefQLstmWorkload.cpp index bcd6a627de..7b7961c5a0 100644 --- a/src/backends/reference/workloads/RefQLstmWorkload.cpp +++ b/src/backends/reference/workloads/RefQLstmWorkload.cpp @@ -45,19 +45,30 @@ RefQLstmWorkload::RefQLstmWorkload(const QLstmQueueDescriptor &descriptor, const void RefQLstmWorkload::Execute() const { - // This is a porting of the QLSTM::Execute() method in the Android code base + Execute(m_Data.m_Inputs, m_Data.m_Outputs); +} + +void RefQLstmWorkload::ExecuteAsync(WorkingMemDescriptor &workingMemDescriptor) +{ + Execute(workingMemDescriptor.m_Inputs, workingMemDescriptor.m_Outputs); +} + +void RefQLstmWorkload::Execute(std::vector inputs, std::vector outputs) const +{ + // This is a porting of the QLSTM::Execute(std::vector inputs, std::vector outputs) + // method in the Android code base // Note: this implementation wraps the arithmetic functions of the LSTM cell in Quantize/Dequantize ops, so all // computation is done in the floating point domain. Arithmetic functions are found in LstmUtils.cpp. // Refer to: android/frameworks/ml/nn/common/operations/QLSTM.cpp const DataType& internalType = armnn::DataType::QSymmS16; - const TensorInfo& inputInfo = GetTensorInfo(m_Data.m_Inputs[0]); - const TensorInfo& outputStateInInfo = GetTensorInfo(m_Data.m_Inputs[1]); - const TensorInfo& cellStateInInfo = GetTensorInfo(m_Data.m_Inputs[2]); + const TensorInfo& inputInfo = GetTensorInfo(inputs[0]); + const TensorInfo& outputStateInInfo = GetTensorInfo(inputs[1]); + const TensorInfo& cellStateInInfo = GetTensorInfo(inputs[2]); - const TensorInfo& outputStateOutInfo = GetTensorInfo(m_Data.m_Outputs[0]); - const TensorInfo& cellStateOutInfo = GetTensorInfo(m_Data.m_Outputs[1]); - const TensorInfo& outputInfo = GetTensorInfo(m_Data.m_Outputs[2]); + const TensorInfo& outputStateOutInfo = GetTensorInfo(outputs[0]); + const TensorInfo& cellStateOutInfo = GetTensorInfo(outputs[1]); + const TensorInfo& outputInfo = GetTensorInfo(outputs[2]); const TensorShape& inputShape = inputInfo.GetShape(); const TensorShape& outputStateInShape = outputStateInInfo.GetShape(); @@ -77,27 +88,27 @@ void RefQLstmWorkload::Execute() const // Input decoders std::unique_ptr> inputDecoder = - MakeDecoder(inputInfo, m_Data.m_Inputs[0]->Map()); + MakeDecoder(inputInfo, inputs[0]->Map()); std::unique_ptr> outputStateInDecoder = - MakeDecoder(outputStateInInfo, m_Data.m_Inputs[1]->Map()); + MakeDecoder(outputStateInInfo, inputs[1]->Map()); std::unique_ptr> cellStateInDecoder = - MakeDecoder(cellStateInInfo, m_Data.m_Inputs[2]->Map()); + MakeDecoder(cellStateInInfo, inputs[2]->Map()); // Output decoders std::unique_ptr> outputStateOutDecoder = - MakeDecoder(outputStateOutInfo, m_Data.m_Outputs[0]->Map()); + MakeDecoder(outputStateOutInfo, outputs[0]->Map()); std::unique_ptr> cellStateOutDecoder = - MakeDecoder(cellStateOutInfo, m_Data.m_Outputs[1]->Map()); + MakeDecoder(cellStateOutInfo, outputs[1]->Map()); std::unique_ptr> outputDecoder = - MakeDecoder(outputInfo, m_Data.m_Outputs[2]->Map()); + MakeDecoder(outputInfo, outputs[2]->Map()); // Output encoders std::unique_ptr> outputStateOutEncoder = - MakeEncoder(outputStateOutInfo, m_Data.m_Outputs[0]->Map()); + MakeEncoder(outputStateOutInfo, outputs[0]->Map()); std::unique_ptr> cellStateOutEncoder = - MakeEncoder(cellStateOutInfo, m_Data.m_Outputs[1]->Map()); + MakeEncoder(cellStateOutInfo, outputs[1]->Map()); std::unique_ptr> outputEncoder = - MakeEncoder(outputInfo, m_Data.m_Outputs[2]->Map()); + MakeEncoder(outputInfo, outputs[2]->Map()); // Weights decoders std::unique_ptr> inputToForgetWeightsDecoder = MakeDecoder( diff --git a/src/backends/reference/workloads/RefQLstmWorkload.hpp b/src/backends/reference/workloads/RefQLstmWorkload.hpp index 19d3a2af0f..f4242ec8a4 100644 --- a/src/backends/reference/workloads/RefQLstmWorkload.hpp +++ b/src/backends/reference/workloads/RefQLstmWorkload.hpp @@ -18,9 +18,11 @@ class RefQLstmWorkload : public BaseWorkload public: explicit RefQLstmWorkload(const QLstmQueueDescriptor& descriptor, const WorkloadInfo& info); - virtual void Execute() const override; + void Execute() const override; + void ExecuteAsync(WorkingMemDescriptor& workingMemDescriptor) override; private: + void Execute(std::vector inputs, std::vector outputs) const; std::unique_ptr m_InputToInputWeightsTensor; std::unique_ptr m_InputToForgetWeightsTensor; std::unique_ptr m_InputToCellWeightsTensor; diff --git a/src/backends/reference/workloads/RefQuantizeWorkload.cpp b/src/backends/reference/workloads/RefQuantizeWorkload.cpp index 2eef5f33db..35791e65fb 100644 --- a/src/backends/reference/workloads/RefQuantizeWorkload.cpp +++ b/src/backends/reference/workloads/RefQuantizeWorkload.cpp @@ -34,21 +34,22 @@ RefQuantizeWorkload::RefQuantizeWorkload(const QuantizeQueueDescriptor& descript { } -void RefQuantizeWorkload::PostAllocationConfigure() +void RefQuantizeWorkload::Execute() const { - const TensorInfo& inputInfo = armnn::GetTensorInfo(m_Data.m_Inputs[0]); - m_InputDecoder = MakeDecoder(inputInfo); + Execute(m_Data.m_Inputs, m_Data.m_Outputs); +} - const TensorInfo& outputInfo = armnn::GetTensorInfo(m_Data.m_Outputs[0]); - m_OutputEncoder = MakeEncoder(outputInfo); +void RefQuantizeWorkload::ExecuteAsync(WorkingMemDescriptor &workingMemDescriptor) +{ + Execute(workingMemDescriptor.m_Inputs, workingMemDescriptor.m_Outputs); } -void RefQuantizeWorkload::Execute() const +void RefQuantizeWorkload::Execute(std::vector inputs, std::vector outputs) const { - m_InputDecoder->Reset(m_Data.m_Inputs[0]->Map()); - m_OutputEncoder->Reset(m_Data.m_Outputs[0]->Map()); + std::unique_ptr> inputDecoder = MakeDecoder(GetTensorInfo(inputs[0]), inputs[0]->Map()); + std::unique_ptr> outputEncoder = MakeEncoder(GetTensorInfo(outputs[0]), outputs[0]->Map()); - QuantizeImpl(*m_InputDecoder, *m_OutputEncoder, m_NumElements); + QuantizeImpl(*inputDecoder, *outputEncoder, m_NumElements); } } //namespace armnn \ No newline at end of file diff --git a/src/backends/reference/workloads/RefQuantizeWorkload.hpp b/src/backends/reference/workloads/RefQuantizeWorkload.hpp index 9ae107607b..48116e7b39 100644 --- a/src/backends/reference/workloads/RefQuantizeWorkload.hpp +++ b/src/backends/reference/workloads/RefQuantizeWorkload.hpp @@ -16,13 +16,11 @@ class RefQuantizeWorkload : public BaseWorkload { public: RefQuantizeWorkload(const QuantizeQueueDescriptor& descriptor, const WorkloadInfo &info); - void PostAllocationConfigure() override; void Execute() const override; + void ExecuteAsync(WorkingMemDescriptor& workingMemDescriptor) override; private: - - std::unique_ptr> m_InputDecoder; - std::unique_ptr> m_OutputEncoder; + void Execute(std::vector inputs, std::vector outputs) const; size_t m_NumElements; }; diff --git a/src/backends/reference/workloads/RefRankWorkload.hpp b/src/backends/reference/workloads/RefRankWorkload.hpp index 660db6b8db..237ae999ce 100644 --- a/src/backends/reference/workloads/RefRankWorkload.hpp +++ b/src/backends/reference/workloads/RefRankWorkload.hpp @@ -19,10 +19,21 @@ public: using BaseWorkload::BaseWorkload; virtual void Execute() const override { - const int32_t rank = static_cast(GetTensorInfo(m_Data.m_Inputs[0]).GetNumDimensions()); + Execute(m_Data.m_Inputs, m_Data.m_Outputs); + + } + void ExecuteAsync(WorkingMemDescriptor& workingMemDescriptor) override + { + Execute(workingMemDescriptor.m_Inputs, workingMemDescriptor.m_Outputs); + } + +private: + void Execute(std::vector inputs, std::vector outputs) const + { + const int32_t rank = static_cast(GetTensorInfo(inputs[0]).GetNumDimensions()); std::memcpy(GetOutputTensorData(0, m_Data), &rank, sizeof(int32_t)); - m_Data.m_Outputs[0]->Unmap(); + outputs[0]->Unmap(); } }; diff --git a/src/backends/reference/workloads/RefReduceWorkload.cpp b/src/backends/reference/workloads/RefReduceWorkload.cpp index 7a46ff9ffc..821e828b6e 100644 --- a/src/backends/reference/workloads/RefReduceWorkload.cpp +++ b/src/backends/reference/workloads/RefReduceWorkload.cpp @@ -19,16 +19,26 @@ RefReduceWorkload::RefReduceWorkload( : BaseWorkload(descriptor, info) {} void RefReduceWorkload::Execute() const +{ + Execute(m_Data.m_Inputs, m_Data.m_Outputs); +} + +void RefReduceWorkload::ExecuteAsync(WorkingMemDescriptor &workingMemDescriptor) +{ + Execute(workingMemDescriptor.m_Inputs, workingMemDescriptor.m_Outputs); +} + +void RefReduceWorkload::Execute(std::vector inputs, std::vector outputs) const { ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefReduceWorkload_Execute"); - const TensorInfo& inputInfo = GetTensorInfo(m_Data.m_Inputs[0]); - const TensorInfo& outputInfo = GetTensorInfo(m_Data.m_Outputs[0]); + const TensorInfo& inputInfo = GetTensorInfo(inputs[0]); + const TensorInfo& outputInfo = GetTensorInfo(outputs[0]); - std::unique_ptr> decoderPtr = MakeDecoder(inputInfo, m_Data.m_Inputs[0]->Map()); + std::unique_ptr> decoderPtr = MakeDecoder(inputInfo, inputs[0]->Map()); Decoder& decoder = *decoderPtr; - std::unique_ptr> encoderPtr = MakeEncoder(outputInfo, m_Data.m_Outputs[0]->Map()); + std::unique_ptr> encoderPtr = MakeEncoder(outputInfo, outputs[0]->Map()); Encoder& encoder = *encoderPtr; Reduce(inputInfo, diff --git a/src/backends/reference/workloads/RefReduceWorkload.hpp b/src/backends/reference/workloads/RefReduceWorkload.hpp index 1d551acb4a..d45161c692 100644 --- a/src/backends/reference/workloads/RefReduceWorkload.hpp +++ b/src/backends/reference/workloads/RefReduceWorkload.hpp @@ -17,7 +17,10 @@ public: explicit RefReduceWorkload(const ReduceQueueDescriptor& descriptor, const WorkloadInfo& info); - virtual void Execute() const override; + void Execute() const override; + void ExecuteAsync(WorkingMemDescriptor& workingMemDescriptor) override; +private: + void Execute(std::vector inputs, std::vector outputs) const; }; } //namespace armnn diff --git a/src/backends/reference/workloads/RefReshapeWorkload.cpp b/src/backends/reference/workloads/RefReshapeWorkload.cpp index 6d29781937..960d591fec 100644 --- a/src/backends/reference/workloads/RefReshapeWorkload.cpp +++ b/src/backends/reference/workloads/RefReshapeWorkload.cpp @@ -13,12 +13,22 @@ namespace armnn { void RefReshapeWorkload::Execute() const +{ + Execute(m_Data.m_Inputs, m_Data.m_Outputs); +} + +void RefReshapeWorkload::ExecuteAsync(WorkingMemDescriptor &workingMemDescriptor) +{ + Execute(workingMemDescriptor.m_Inputs, workingMemDescriptor.m_Outputs); +} + +void RefReshapeWorkload::Execute(std::vector inputs, std::vector outputs) const { ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefReshapeWorkload_Execute"); - void* output = GetOutputTensorData(0, m_Data); - const void* input = GetInputTensorData(0, m_Data); - unsigned int numBytes = GetTensorInfo(m_Data.m_Inputs[0]).GetNumBytes(); + void* output = outputs[0]->Map(); + const void* input = inputs[0]->Map(); + unsigned int numBytes = GetTensorInfo(inputs[0]).GetNumBytes(); memcpy(output, input, numBytes); } diff --git a/src/backends/reference/workloads/RefReshapeWorkload.hpp b/src/backends/reference/workloads/RefReshapeWorkload.hpp index 7359ff9cde..2b6cf43c72 100644 --- a/src/backends/reference/workloads/RefReshapeWorkload.hpp +++ b/src/backends/reference/workloads/RefReshapeWorkload.hpp @@ -15,7 +15,10 @@ class RefReshapeWorkload : public BaseWorkload { public: using BaseWorkload::BaseWorkload; - virtual void Execute() const override; + void Execute() const override; + void ExecuteAsync(WorkingMemDescriptor& workingMemDescriptor) override; +private: + void Execute(std::vector inputs, std::vector outputs) const; }; } //namespace armnn diff --git a/src/backends/reference/workloads/RefResizeBilinearWorkload.cpp b/src/backends/reference/workloads/RefResizeBilinearWorkload.cpp index a23caf9fc2..2cf5888f33 100644 --- a/src/backends/reference/workloads/RefResizeBilinearWorkload.cpp +++ b/src/backends/reference/workloads/RefResizeBilinearWorkload.cpp @@ -18,15 +18,25 @@ namespace armnn { void RefResizeBilinearWorkload::Execute() const +{ + Execute(m_Data.m_Inputs, m_Data.m_Outputs); +} + +void RefResizeBilinearWorkload::ExecuteAsync(WorkingMemDescriptor &workingMemDescriptor) +{ + Execute(workingMemDescriptor.m_Inputs, workingMemDescriptor.m_Outputs); +} + +void RefResizeBilinearWorkload::Execute(std::vector inputs, std::vector outputs) const { ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefResizeBilinearWorkload_Execute"); - const TensorInfo& inputInfo = GetTensorInfo(m_Data.m_Inputs[0]); - const TensorInfo& outputInfo = GetTensorInfo(m_Data.m_Outputs[0]); + const TensorInfo& inputInfo = GetTensorInfo(inputs[0]); + const TensorInfo& outputInfo = GetTensorInfo(outputs[0]); - std::unique_ptr> decoderPtr = MakeDecoder(inputInfo, m_Data.m_Inputs[0]->Map()); + std::unique_ptr> decoderPtr = MakeDecoder(inputInfo, inputs[0]->Map()); Decoder &decoder = *decoderPtr; - std::unique_ptr> encoderPtr = MakeEncoder(outputInfo, m_Data.m_Outputs[0]->Map()); + std::unique_ptr> encoderPtr = MakeEncoder(outputInfo, outputs[0]->Map()); Encoder &encoder = *encoderPtr; Resize(decoder, inputInfo, encoder, outputInfo, m_Data.m_Parameters.m_DataLayout, armnn::ResizeMethod::Bilinear); diff --git a/src/backends/reference/workloads/RefResizeBilinearWorkload.hpp b/src/backends/reference/workloads/RefResizeBilinearWorkload.hpp index a0e33fa320..5ada3d1ff8 100644 --- a/src/backends/reference/workloads/RefResizeBilinearWorkload.hpp +++ b/src/backends/reference/workloads/RefResizeBilinearWorkload.hpp @@ -15,7 +15,10 @@ class RefResizeBilinearWorkload : public BaseWorkload::BaseWorkload; - virtual void Execute() const override; + void Execute() const override; + void ExecuteAsync(WorkingMemDescriptor& workingMemDescriptor) override; +private: + void Execute(std::vector inputs, std::vector outputs) const; }; } //namespace armnn diff --git a/src/backends/reference/workloads/RefResizeWorkload.cpp b/src/backends/reference/workloads/RefResizeWorkload.cpp index 21ff852320..d7a82b8f34 100644 --- a/src/backends/reference/workloads/RefResizeWorkload.cpp +++ b/src/backends/reference/workloads/RefResizeWorkload.cpp @@ -18,15 +18,25 @@ namespace armnn { void RefResizeWorkload::Execute() const +{ + Execute(m_Data.m_Inputs, m_Data.m_Outputs); +} + +void RefResizeWorkload::ExecuteAsync(WorkingMemDescriptor &workingMemDescriptor) +{ + Execute(workingMemDescriptor.m_Inputs, workingMemDescriptor.m_Outputs); +} + +void RefResizeWorkload::Execute(std::vector inputs, std::vector outputs) const { ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefResizeWorkload_Execute"); - const TensorInfo& inputInfo = GetTensorInfo(m_Data.m_Inputs[0]); - const TensorInfo& outputInfo = GetTensorInfo(m_Data.m_Outputs[0]); + const TensorInfo& inputInfo = GetTensorInfo(inputs[0]); + const TensorInfo& outputInfo = GetTensorInfo(outputs[0]); - std::unique_ptr> decoderPtr = MakeDecoder(inputInfo, m_Data.m_Inputs[0]->Map()); + std::unique_ptr> decoderPtr = MakeDecoder(inputInfo, inputs[0]->Map()); Decoder &decoder = *decoderPtr; - std::unique_ptr> encoderPtr = MakeEncoder(outputInfo, m_Data.m_Outputs[0]->Map()); + std::unique_ptr> encoderPtr = MakeEncoder(outputInfo, outputs[0]->Map()); Encoder &encoder = *encoderPtr; Resize(decoder, diff --git a/src/backends/reference/workloads/RefResizeWorkload.hpp b/src/backends/reference/workloads/RefResizeWorkload.hpp index e72271afd8..f58eadc9af 100644 --- a/src/backends/reference/workloads/RefResizeWorkload.hpp +++ b/src/backends/reference/workloads/RefResizeWorkload.hpp @@ -15,7 +15,10 @@ class RefResizeWorkload : public BaseWorkload { public: using BaseWorkload::BaseWorkload; - virtual void Execute() const override; + void Execute() const override; + void ExecuteAsync(WorkingMemDescriptor& workingMemDescriptor) override; +private: + void Execute(std::vector inputs, std::vector outputs) const; }; } //namespace armnn diff --git a/src/backends/reference/workloads/RefSliceWorkload.cpp b/src/backends/reference/workloads/RefSliceWorkload.cpp index 2e448450c1..f94a83ee2c 100644 --- a/src/backends/reference/workloads/RefSliceWorkload.cpp +++ b/src/backends/reference/workloads/RefSliceWorkload.cpp @@ -14,15 +14,25 @@ namespace armnn { void RefSliceWorkload::Execute() const +{ + Execute(m_Data.m_Inputs, m_Data.m_Outputs); +} + +void RefSliceWorkload::ExecuteAsync(WorkingMemDescriptor &workingMemDescriptor) +{ + Execute(workingMemDescriptor.m_Inputs, workingMemDescriptor.m_Outputs); +} + +void RefSliceWorkload::Execute(std::vector inputs, std::vector outputs) const { ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefSliceWorkload_Execute"); - const TensorInfo& inputInfo = GetTensorInfo(m_Data.m_Inputs[0]); + const TensorInfo& inputInfo = GetTensorInfo(inputs[0]); Slice(inputInfo, m_Data.m_Parameters, - m_Data.m_Inputs[0]->Map(), - m_Data.m_Outputs[0]->Map(), + inputs[0]->Map(), + outputs[0]->Map(), GetDataTypeSize(inputInfo.GetDataType())); } diff --git a/src/backends/reference/workloads/RefSliceWorkload.hpp b/src/backends/reference/workloads/RefSliceWorkload.hpp index 006c7b775d..8a1db8e5a7 100644 --- a/src/backends/reference/workloads/RefSliceWorkload.hpp +++ b/src/backends/reference/workloads/RefSliceWorkload.hpp @@ -16,7 +16,10 @@ class RefSliceWorkload : public BaseWorkload public: using BaseWorkload::BaseWorkload; - virtual void Execute() const override; + void Execute() const override; + void ExecuteAsync(WorkingMemDescriptor& workingMemDescriptor) override; +private: + void Execute(std::vector inputs, std::vector outputs) const; }; } // namespace armnn diff --git a/src/backends/reference/workloads/RefSoftmaxWorkload.cpp b/src/backends/reference/workloads/RefSoftmaxWorkload.cpp index 2e4d811674..9733cbc859 100644 --- a/src/backends/reference/workloads/RefSoftmaxWorkload.cpp +++ b/src/backends/reference/workloads/RefSoftmaxWorkload.cpp @@ -18,17 +18,27 @@ namespace armnn { void RefSoftmaxWorkload::Execute() const +{ + Execute(m_Data.m_Inputs, m_Data.m_Outputs); +} + +void RefSoftmaxWorkload::ExecuteAsync(WorkingMemDescriptor &workingMemDescriptor) +{ + Execute(workingMemDescriptor.m_Inputs, workingMemDescriptor.m_Outputs); +} + +void RefSoftmaxWorkload::Execute(std::vector inputs, std::vector outputs) const { ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefSoftmaxWorkload_Execute"); - const TensorInfo &inputTensorInfo = GetTensorInfo(m_Data.m_Inputs[0]); + const TensorInfo &inputTensorInfo = GetTensorInfo(inputs[0]); - std::unique_ptr> decoderPtr = MakeDecoder(inputTensorInfo, m_Data.m_Inputs[0]->Map()); + std::unique_ptr> decoderPtr = MakeDecoder(inputTensorInfo, inputs[0]->Map()); Decoder &decoder = *decoderPtr; - const TensorInfo &outputTensorInfo = GetTensorInfo(m_Data.m_Outputs[0]); + const TensorInfo &outputTensorInfo = GetTensorInfo(outputs[0]); - std::unique_ptr> encoderPtr = MakeEncoder(outputTensorInfo, m_Data.m_Outputs[0]->Map()); + std::unique_ptr> encoderPtr = MakeEncoder(outputTensorInfo, outputs[0]->Map()); Encoder &encoder = *encoderPtr; Softmax(decoder, diff --git a/src/backends/reference/workloads/RefSoftmaxWorkload.hpp b/src/backends/reference/workloads/RefSoftmaxWorkload.hpp index 3d00c6ff96..6e62369880 100644 --- a/src/backends/reference/workloads/RefSoftmaxWorkload.hpp +++ b/src/backends/reference/workloads/RefSoftmaxWorkload.hpp @@ -15,7 +15,10 @@ class RefSoftmaxWorkload : public BaseWorkload { public: using BaseWorkload::BaseWorkload; - virtual void Execute() const override; + void Execute() const override; + void ExecuteAsync(WorkingMemDescriptor& workingMemDescriptor) override; +private: + void Execute(std::vector inputs, std::vector outputs) const; }; } //namespace armnn diff --git a/src/backends/reference/workloads/RefSpaceToBatchNdWorkload.cpp b/src/backends/reference/workloads/RefSpaceToBatchNdWorkload.cpp index c65d4c110c..e35632db5b 100644 --- a/src/backends/reference/workloads/RefSpaceToBatchNdWorkload.cpp +++ b/src/backends/reference/workloads/RefSpaceToBatchNdWorkload.cpp @@ -13,14 +13,24 @@ namespace armnn { void RefSpaceToBatchNdWorkload::Execute() const +{ + Execute(m_Data.m_Inputs, m_Data.m_Outputs); +} + +void RefSpaceToBatchNdWorkload::ExecuteAsync(WorkingMemDescriptor &workingMemDescriptor) +{ + Execute(workingMemDescriptor.m_Inputs, workingMemDescriptor.m_Outputs); +} + +void RefSpaceToBatchNdWorkload::Execute(std::vector inputs, std::vector outputs) const { ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefSpaceToBatchNdWorkload_Execute"); - const TensorInfo& inputInfo = GetTensorInfo(m_Data.m_Inputs[0]); - std::unique_ptr> decoder = MakeDecoder(inputInfo, m_Data.m_Inputs[0]->Map()); + const TensorInfo& inputInfo = GetTensorInfo(inputs[0]); + std::unique_ptr> decoder = MakeDecoder(inputInfo, inputs[0]->Map()); - const TensorInfo& outputInfo = GetTensorInfo(m_Data.m_Outputs[0]); - std::unique_ptr> encoder = MakeEncoder(outputInfo, m_Data.m_Outputs[0]->Map()); + const TensorInfo& outputInfo = GetTensorInfo(outputs[0]); + std::unique_ptr> encoder = MakeEncoder(outputInfo, outputs[0]->Map()); SpaceToBatchNd(inputInfo, outputInfo, m_Data.m_Parameters, *decoder, *encoder); } diff --git a/src/backends/reference/workloads/RefSpaceToBatchNdWorkload.hpp b/src/backends/reference/workloads/RefSpaceToBatchNdWorkload.hpp index caf264894a..82ddb32a44 100644 --- a/src/backends/reference/workloads/RefSpaceToBatchNdWorkload.hpp +++ b/src/backends/reference/workloads/RefSpaceToBatchNdWorkload.hpp @@ -16,6 +16,9 @@ class RefSpaceToBatchNdWorkload : public BaseWorkload::BaseWorkload; void Execute() const override; + void ExecuteAsync(WorkingMemDescriptor& workingMemDescriptor) override; +private: + void Execute(std::vector inputs, std::vector outputs) const; }; } //namespace armnn diff --git a/src/backends/reference/workloads/RefSpaceToDepthWorkload.cpp b/src/backends/reference/workloads/RefSpaceToDepthWorkload.cpp index 1b12272506..88faf7a790 100644 --- a/src/backends/reference/workloads/RefSpaceToDepthWorkload.cpp +++ b/src/backends/reference/workloads/RefSpaceToDepthWorkload.cpp @@ -13,14 +13,24 @@ namespace armnn { void RefSpaceToDepthWorkload::Execute() const +{ + Execute(m_Data.m_Inputs, m_Data.m_Outputs); +} + +void RefSpaceToDepthWorkload::ExecuteAsync(WorkingMemDescriptor &workingMemDescriptor) +{ + Execute(workingMemDescriptor.m_Inputs, workingMemDescriptor.m_Outputs); +} + +void RefSpaceToDepthWorkload::Execute(std::vector inputs, std::vector outputs) const { ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefSpaceToDepthWorkload_Execute"); - const TensorInfo& inputInfo = GetTensorInfo(m_Data.m_Inputs[0]); - std::unique_ptr> decoder = MakeDecoder(inputInfo, m_Data.m_Inputs[0]->Map()); + const TensorInfo& inputInfo = GetTensorInfo(inputs[0]); + std::unique_ptr> decoder = MakeDecoder(inputInfo, inputs[0]->Map()); - const TensorInfo& outputInfo = GetTensorInfo(m_Data.m_Outputs[0]); - std::unique_ptr> encoder = MakeEncoder(outputInfo, m_Data.m_Outputs[0]->Map()); + const TensorInfo& outputInfo = GetTensorInfo(outputs[0]); + std::unique_ptr> encoder = MakeEncoder(outputInfo, outputs[0]->Map()); SpaceToDepth(inputInfo, outputInfo, m_Data.m_Parameters, *decoder, *encoder); } diff --git a/src/backends/reference/workloads/RefSpaceToDepthWorkload.hpp b/src/backends/reference/workloads/RefSpaceToDepthWorkload.hpp index 89e5585249..d8f44b7995 100644 --- a/src/backends/reference/workloads/RefSpaceToDepthWorkload.hpp +++ b/src/backends/reference/workloads/RefSpaceToDepthWorkload.hpp @@ -15,7 +15,10 @@ class RefSpaceToDepthWorkload : public BaseWorkload { public: using BaseWorkload::BaseWorkload; - virtual void Execute() const override; + void Execute() const override; + void ExecuteAsync(WorkingMemDescriptor& workingMemDescriptor) override; +private: + void Execute(std::vector inputs, std::vector outputs) const; }; } //namespace armnn diff --git a/src/backends/reference/workloads/RefSplitterWorkload.cpp b/src/backends/reference/workloads/RefSplitterWorkload.cpp index 5207423995..076aefe517 100644 --- a/src/backends/reference/workloads/RefSplitterWorkload.cpp +++ b/src/backends/reference/workloads/RefSplitterWorkload.cpp @@ -12,9 +12,19 @@ namespace armnn { void RefSplitterWorkload::Execute() const +{ + Execute(m_Data.m_Inputs, m_Data.m_Outputs); +} + +void RefSplitterWorkload::ExecuteAsync(WorkingMemDescriptor &workingMemDescriptor) +{ + Execute(workingMemDescriptor.m_Inputs, workingMemDescriptor.m_Outputs); +} + +void RefSplitterWorkload::Execute(std::vector inputs, std::vector outputs) const { ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefSplitterWorkload_Execute"); - Split(m_Data); + Split(m_Data, inputs, outputs); } } //namespace armnn diff --git a/src/backends/reference/workloads/RefSplitterWorkload.hpp b/src/backends/reference/workloads/RefSplitterWorkload.hpp index c491e1ebcb..99b5ff6911 100644 --- a/src/backends/reference/workloads/RefSplitterWorkload.hpp +++ b/src/backends/reference/workloads/RefSplitterWorkload.hpp @@ -17,7 +17,10 @@ class RefSplitterWorkload : public BaseWorkload { public: using BaseWorkload::BaseWorkload; - virtual void Execute() const override; + void Execute() const override; + void ExecuteAsync(WorkingMemDescriptor& workingMemDescriptor) override; +private: + void Execute(std::vector inputs, std::vector outputs) const; }; } //namespace armnn diff --git a/src/backends/reference/workloads/RefStackWorkload.cpp b/src/backends/reference/workloads/RefStackWorkload.cpp index fc859506a3..20cf3b38f5 100644 --- a/src/backends/reference/workloads/RefStackWorkload.cpp +++ b/src/backends/reference/workloads/RefStackWorkload.cpp @@ -19,6 +19,16 @@ RefStackWorkload::RefStackWorkload(const StackQueueDescriptor& descriptor, {} void RefStackWorkload::Execute() const +{ + Execute(m_Data.m_Inputs, m_Data.m_Outputs); +} + +void RefStackWorkload::ExecuteAsync(WorkingMemDescriptor &workingMemDescriptor) +{ + Execute(workingMemDescriptor.m_Inputs, workingMemDescriptor.m_Outputs); +} + +void RefStackWorkload::Execute(std::vector inputs, std::vector outputs) const { ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefStackWorkload_Execute"); @@ -29,7 +39,7 @@ void RefStackWorkload::Execute() const ARMNN_ASSERT(output != nullptr); unsigned int numInputs = m_Data.m_Parameters.m_NumInputs; - unsigned int inputLength = GetTensorInfo(m_Data.m_Inputs[0]).GetNumElements(); + unsigned int inputLength = GetTensorInfo(inputs[0]).GetNumElements(); for (unsigned int inputIdx=0; inputIdx>> inputDecoders; - for (unsigned int i=0; i(GetTensorInfo(m_Data.m_Inputs[i]), - m_Data.m_Inputs[i]->Map())); + inputDecoders.push_back(MakeDecoder(GetTensorInfo(inputs[i]), + inputs[i]->Map())); } - std::unique_ptr> outputEncoder = MakeEncoder(GetTensorInfo(m_Data.m_Outputs[0]), - m_Data.m_Outputs[0]->Map()); + std::unique_ptr> outputEncoder = MakeEncoder(GetTensorInfo(outputs[0]), + outputs[0]->Map()); Stack(m_Data, inputDecoders, *outputEncoder); } diff --git a/src/backends/reference/workloads/RefStackWorkload.hpp b/src/backends/reference/workloads/RefStackWorkload.hpp index ceb27d9f60..4276339a8f 100644 --- a/src/backends/reference/workloads/RefStackWorkload.hpp +++ b/src/backends/reference/workloads/RefStackWorkload.hpp @@ -16,7 +16,10 @@ class RefStackWorkload : public BaseWorkload public: explicit RefStackWorkload(const StackQueueDescriptor& descriptor, const WorkloadInfo& info); - virtual void Execute() const override; + void Execute() const override; + void ExecuteAsync(WorkingMemDescriptor& workingMemDescriptor) override; +private: + void Execute(std::vector inputs, std::vector outputs) const; }; } // namespace armnn diff --git a/src/backends/reference/workloads/RefStridedSliceWorkload.cpp b/src/backends/reference/workloads/RefStridedSliceWorkload.cpp index ce807ee087..336a687d5c 100644 --- a/src/backends/reference/workloads/RefStridedSliceWorkload.cpp +++ b/src/backends/reference/workloads/RefStridedSliceWorkload.cpp @@ -17,30 +17,20 @@ RefStridedSliceWorkload::RefStridedSliceWorkload(const StridedSliceQueueDescript void RefStridedSliceWorkload::Execute() const { - ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefStridedSliceWorkload_Execute"); - - const TensorInfo& inputInfo = GetTensorInfo(m_Data.m_Inputs[0]); - const TensorInfo& outputInfo = GetTensorInfo(m_Data.m_Outputs[0]); - - DataType inputDataType = inputInfo.GetDataType(); - DataType outputDataType = outputInfo.GetDataType(); - - ARMNN_ASSERT(inputDataType == outputDataType); - IgnoreUnused(outputDataType); + Execute(m_Data.m_Inputs, m_Data.m_Outputs); +} - StridedSlice(inputInfo, - m_Data.m_Parameters, - m_Data.m_Inputs[0]->Map(), - m_Data.m_Outputs[0]->Map(), - GetDataTypeSize(inputDataType)); +void RefStridedSliceWorkload::ExecuteAsync(WorkingMemDescriptor &workingMemDescriptor) +{ + Execute(workingMemDescriptor.m_Inputs, workingMemDescriptor.m_Outputs); } -void RefStridedSliceWorkload::ExecuteAsync(WorkingMemDescriptor& descriptor) +void RefStridedSliceWorkload::Execute(std::vector inputs, std::vector outputs) const { - ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefStridedSliceWorkload_Execute_WorkingMemDescriptor"); + ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefStridedSliceWorkload_Execute"); - const TensorInfo& inputInfo = GetTensorInfo(descriptor.m_Inputs[0]); - const TensorInfo& outputInfo = GetTensorInfo(descriptor.m_Outputs[0]); + const TensorInfo& inputInfo = GetTensorInfo(inputs[0]); + const TensorInfo& outputInfo = GetTensorInfo(outputs[0]); DataType inputDataType = inputInfo.GetDataType(); DataType outputDataType = outputInfo.GetDataType(); @@ -50,8 +40,8 @@ void RefStridedSliceWorkload::ExecuteAsync(WorkingMemDescriptor& descriptor) StridedSlice(inputInfo, m_Data.m_Parameters, - descriptor.m_Inputs[0]->Map(), - descriptor.m_Outputs[0]->Map(), + inputs[0]->Map(), + outputs[0]->Map(), GetDataTypeSize(inputDataType)); } diff --git a/src/backends/reference/workloads/RefStridedSliceWorkload.hpp b/src/backends/reference/workloads/RefStridedSliceWorkload.hpp index 3e253edcd9..38613e2779 100644 --- a/src/backends/reference/workloads/RefStridedSliceWorkload.hpp +++ b/src/backends/reference/workloads/RefStridedSliceWorkload.hpp @@ -15,7 +15,9 @@ class RefStridedSliceWorkload : public BaseWorkload public: RefStridedSliceWorkload(const StridedSliceQueueDescriptor& descriptor, const WorkloadInfo& info); void Execute() const override; - void ExecuteAsync(WorkingMemDescriptor& descriptor) override; + void ExecuteAsync(WorkingMemDescriptor& workingMemDescriptor) override; +private: + void Execute(std::vector inputs, std::vector outputs) const; }; } // namespace armnn diff --git a/src/backends/reference/workloads/RefTransposeConvolution2dWorkload.cpp b/src/backends/reference/workloads/RefTransposeConvolution2dWorkload.cpp index 2ab76041ef..634122835f 100644 --- a/src/backends/reference/workloads/RefTransposeConvolution2dWorkload.cpp +++ b/src/backends/reference/workloads/RefTransposeConvolution2dWorkload.cpp @@ -33,35 +33,32 @@ RefTransposeConvolution2dWorkload::RefTransposeConvolution2dWorkload( } } -void RefTransposeConvolution2dWorkload::PostAllocationConfigure() +void RefTransposeConvolution2dWorkload::Execute() const { - // set up input decoder - const ITensorHandle* input = m_Data.m_Inputs[0]; - const TensorInfo& inputInfo = GetTensorInfo(input); - - m_InputShape = inputInfo.GetShape(); - m_InputDecoder = MakeDecoder(inputInfo); - - // set up output encoder - ITensorHandle* output = m_Data.m_Outputs[0]; - const TensorInfo& outputInfo = GetTensorInfo(output); + Execute(m_Data.m_Inputs, m_Data.m_Outputs); +} - m_OutputShape = outputInfo.GetShape(); - m_OutputEncoder = MakeEncoder(outputInfo); +void RefTransposeConvolution2dWorkload::ExecuteAsync(WorkingMemDescriptor &workingMemDescriptor) +{ + Execute(workingMemDescriptor.m_Inputs, workingMemDescriptor.m_Outputs); } -void RefTransposeConvolution2dWorkload::Execute() const +void RefTransposeConvolution2dWorkload::Execute(std::vector inputs, + std::vector outputs) const { ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefTransposeConvolution2dWorkload_Execute"); - m_InputDecoder->Reset(m_Data.m_Inputs[0]->Map()); - m_OutputEncoder->Reset(m_Data.m_Outputs[0]->Map()); + const TensorInfo& inputInfo = GetTensorInfo(inputs[0]); + const TensorInfo& outputInfo = GetTensorInfo(outputs[0]); + + std::unique_ptr> inputDecoder = MakeDecoder(inputInfo, inputs[0]->Map()); + std::unique_ptr> outputEncoder = MakeEncoder(outputInfo, outputs[0]->Map()); TransposeConvolution2dImpl(m_Data.m_Parameters, - m_InputShape, - *m_InputDecoder, - m_OutputShape, - *m_OutputEncoder, + inputInfo.GetShape(), + *inputDecoder, + outputInfo.GetShape(), + *outputEncoder, m_WeightsShape, *m_WeightsDecoder, m_BiasesDecoder.get()); diff --git a/src/backends/reference/workloads/RefTransposeConvolution2dWorkload.hpp b/src/backends/reference/workloads/RefTransposeConvolution2dWorkload.hpp index 9ded8c971f..7c18f10293 100644 --- a/src/backends/reference/workloads/RefTransposeConvolution2dWorkload.hpp +++ b/src/backends/reference/workloads/RefTransposeConvolution2dWorkload.hpp @@ -21,22 +21,17 @@ public: const WorkloadInfo& info); ~RefTransposeConvolution2dWorkload() = default; - void PostAllocationConfigure() override; - void Execute() const override; + void ExecuteAsync(WorkingMemDescriptor& workingMemDescriptor) override; private: + void Execute(std::vector inputs, std::vector outputs) const; std::unique_ptr m_Weights; std::unique_ptr m_Biases; - std::unique_ptr> m_InputDecoder; - std::unique_ptr> m_OutputEncoder; - std::unique_ptr> m_WeightsDecoder; std::unique_ptr> m_BiasesDecoder; - TensorShape m_InputShape; - TensorShape m_OutputShape; TensorShape m_WeightsShape; }; diff --git a/src/backends/reference/workloads/RefTransposeWorkload.cpp b/src/backends/reference/workloads/RefTransposeWorkload.cpp index cc7a555c41..828badd042 100644 --- a/src/backends/reference/workloads/RefTransposeWorkload.cpp +++ b/src/backends/reference/workloads/RefTransposeWorkload.cpp @@ -15,13 +15,26 @@ namespace armnn template void RefTransposeWorkload::Execute() const +{ + Execute(m_Data.m_Inputs, m_Data.m_Outputs); +} + +template +void RefTransposeWorkload::ExecuteAsync(WorkingMemDescriptor &workingMemDescriptor) +{ + Execute(workingMemDescriptor.m_Inputs, workingMemDescriptor.m_Outputs); +} + +template +void RefTransposeWorkload::Execute(std::vector inputs, + std::vector outputs) const { using T = ResolveType; ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, GetName() + "_Execute"); - const ITensorHandle* src = m_Data.m_Inputs[0]; - ITensorHandle* dst = m_Data.m_Outputs[0]; + const ITensorHandle* src = inputs[0]; + ITensorHandle* dst = outputs[0]; const PermutationVector& mappings = m_Data.m_Parameters.m_DimMappings; armnnUtils::Transpose(GetTensorInfo(src).GetShape(), mappings, src->Map(), dst->Map(), sizeof(T)); diff --git a/src/backends/reference/workloads/RefTransposeWorkload.hpp b/src/backends/reference/workloads/RefTransposeWorkload.hpp index 1e03f2e694..08ba74facc 100644 --- a/src/backends/reference/workloads/RefTransposeWorkload.hpp +++ b/src/backends/reference/workloads/RefTransposeWorkload.hpp @@ -25,6 +25,9 @@ public: using TypedWorkload::m_Data; using TypedWorkload::TypedWorkload; void Execute() const override; + void ExecuteAsync(WorkingMemDescriptor& workingMemDescriptor) override; +private: + void Execute(std::vector inputs, std::vector outputs) const; }; using RefTransposeBFloat16Workload = RefTransposeWorkload; diff --git a/src/backends/reference/workloads/Splitter.cpp b/src/backends/reference/workloads/Splitter.cpp index 09edc5e0f5..ed6d2b8fd8 100644 --- a/src/backends/reference/workloads/Splitter.cpp +++ b/src/backends/reference/workloads/Splitter.cpp @@ -18,12 +18,14 @@ namespace armnn { -void Split(const SplitterQueueDescriptor& data) +void Split(const SplitterQueueDescriptor& data, + std::vector inputs, + std::vector outputs) { - const TensorInfo& inputInfo = GetTensorInfo(data.m_Inputs[0]); + const TensorInfo& inputInfo = GetTensorInfo(inputs[0]); std::unique_ptr> decoderPtr = - MakeDecoder(inputInfo, data.m_Inputs[0]->Map()); + MakeDecoder(inputInfo, inputs[0]->Map()); Decoder& decoder = *decoderPtr; for (unsigned int index = 0; index < inputInfo.GetNumElements(); ++index) @@ -45,7 +47,7 @@ void Split(const SplitterQueueDescriptor& data) SplitterQueueDescriptor::ViewOrigin const& view = data.m_ViewOrigins[viewIdx]; //Split view extents are defined by the size of (the corresponding) input tensor. - const TensorInfo& outputInfo = GetTensorInfo(data.m_Outputs[viewIdx]); + const TensorInfo& outputInfo = GetTensorInfo(outputs[viewIdx]); ARMNN_ASSERT(outputInfo.GetNumDimensions() == inputInfo.GetNumDimensions()); // Check all dimensions to see if this element is inside the given input view. @@ -65,7 +67,7 @@ void Split(const SplitterQueueDescriptor& data) if (insideView) { std::unique_ptr> encoderPtr = - MakeEncoder(outputInfo, data.m_Outputs[viewIdx]->Map()); + MakeEncoder(outputInfo, outputs[viewIdx]->Map()); Encoder& encoder = *encoderPtr; unsigned int outIndex = 0; diff --git a/src/backends/reference/workloads/Splitter.hpp b/src/backends/reference/workloads/Splitter.hpp index aff4bcad94..e38a054650 100644 --- a/src/backends/reference/workloads/Splitter.hpp +++ b/src/backends/reference/workloads/Splitter.hpp @@ -14,9 +14,11 @@ namespace armnn { template -void Splitter(const SplitterQueueDescriptor& data) +void Splitter(const SplitterQueueDescriptor& data, + std::vector inputs, + std::vector outputs) { - const TensorInfo& inputInfo0 = GetTensorInfo(data.m_Inputs[0]); + const TensorInfo& inputInfo0 = GetTensorInfo(inputs[0]); for (unsigned int index = 0; index < inputInfo0.GetNumElements(); ++index) { @@ -37,7 +39,7 @@ void Splitter(const SplitterQueueDescriptor& data) SplitterQueueDescriptor::ViewOrigin const& view = data.m_ViewOrigins[viewIdx]; //Split view extents are defined by the size of (the corresponding) input tensor. - const TensorInfo& outputInfo = GetTensorInfo(data.m_Outputs[viewIdx]); + const TensorInfo& outputInfo = GetTensorInfo(outputs[viewIdx]); ARMNN_ASSERT(outputInfo.GetNumDimensions() == inputInfo0.GetNumDimensions()); // Check all dimensions to see if this element is inside the given input view. @@ -78,5 +80,7 @@ void Splitter(const SplitterQueueDescriptor& data) } } -void Split(const SplitterQueueDescriptor& data); +void Split(const SplitterQueueDescriptor& data, + std::vector inputs, + std::vector outputs); } //namespace armnn -- cgit v1.2.1