aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorFinn Williams <Finn.Williams@arm.com>2021-04-07 10:23:21 +0100
committerFinn Williams <Finn.Williams@arm.com>2021-04-14 15:18:38 +0100
commitb8181f72b8c7c9132373dbcf7f8709ec2c0f23c0 (patch)
tree04cc91a6efb7e2601f80e4213a747938165b7184
parentb898222a8856475f0217be5e78b4816aa1914f15 (diff)
downloadarmnn-b8181f72b8c7c9132373dbcf7f8709ec2c0f23c0.tar.gz
IVGCVSW-5787 Add/Update Execute() implementations in RefActivationWorkload
* Added multithreaded StridedSliceEndToEndTest Signed-off-by: Finn Williams <Finn.Williams@arm.com> Change-Id: I4579db7b5959e0a22256f1bda00238c22e611dec
-rw-r--r--src/backends/backendsCommon/test/StridedSliceAsyncEndToEndTest.hpp160
-rw-r--r--src/backends/reference/test/RefEndToEndTests.cpp5
-rw-r--r--src/backends/reference/workloads/Concatenate.cpp12
-rw-r--r--src/backends/reference/workloads/Concatenate.hpp4
-rw-r--r--src/backends/reference/workloads/RefActivationWorkload.cpp19
-rw-r--r--src/backends/reference/workloads/RefActivationWorkload.hpp6
-rw-r--r--src/backends/reference/workloads/RefArgMinMaxWorkload.cpp17
-rw-r--r--src/backends/reference/workloads/RefArgMinMaxWorkload.hpp6
-rw-r--r--src/backends/reference/workloads/RefBatchNormalizationWorkload.cpp19
-rw-r--r--src/backends/reference/workloads/RefBatchNormalizationWorkload.hpp4
-rw-r--r--src/backends/reference/workloads/RefBatchToSpaceNdWorkload.cpp18
-rw-r--r--src/backends/reference/workloads/RefBatchToSpaceNdWorkload.hpp6
-rw-r--r--src/backends/reference/workloads/RefComparisonWorkload.cpp36
-rw-r--r--src/backends/reference/workloads/RefComparisonWorkload.hpp3
-rw-r--r--src/backends/reference/workloads/RefConcatWorkload.cpp12
-rw-r--r--src/backends/reference/workloads/RefConcatWorkload.hpp5
-rw-r--r--src/backends/reference/workloads/RefConstantWorkload.cpp19
-rw-r--r--src/backends/reference/workloads/RefConstantWorkload.hpp6
-rw-r--r--src/backends/reference/workloads/RefConvertBf16ToFp32Workload.cpp17
-rw-r--r--src/backends/reference/workloads/RefConvertBf16ToFp32Workload.hpp5
-rw-r--r--src/backends/reference/workloads/RefConvertFp16ToFp32Workload.cpp17
-rw-r--r--src/backends/reference/workloads/RefConvertFp16ToFp32Workload.hpp5
-rw-r--r--src/backends/reference/workloads/RefConvertFp32ToBf16Workload.cpp17
-rw-r--r--src/backends/reference/workloads/RefConvertFp32ToBf16Workload.hpp5
-rw-r--r--src/backends/reference/workloads/RefConvertFp32ToFp16Workload.cpp17
-rw-r--r--src/backends/reference/workloads/RefConvertFp32ToFp16Workload.hpp5
-rw-r--r--src/backends/reference/workloads/RefConvolution2dWorkload.cpp24
-rw-r--r--src/backends/reference/workloads/RefConvolution2dWorkload.hpp9
-rw-r--r--src/backends/reference/workloads/RefDebugWorkload.cpp16
-rw-r--r--src/backends/reference/workloads/RefDebugWorkload.hpp2
-rw-r--r--src/backends/reference/workloads/RefDepthToSpaceWorkload.cpp16
-rw-r--r--src/backends/reference/workloads/RefDepthToSpaceWorkload.hpp5
-rw-r--r--src/backends/reference/workloads/RefDepthwiseConvolution2dWorkload.cpp25
-rw-r--r--src/backends/reference/workloads/RefDepthwiseConvolution2dWorkload.hpp9
-rw-r--r--src/backends/reference/workloads/RefDequantizeWorkload.cpp18
-rw-r--r--src/backends/reference/workloads/RefDequantizeWorkload.hpp3
-rw-r--r--src/backends/reference/workloads/RefDetectionPostProcessWorkload.cpp27
-rw-r--r--src/backends/reference/workloads/RefDetectionPostProcessWorkload.hpp4
-rw-r--r--src/backends/reference/workloads/RefElementwiseUnaryWorkload.cpp31
-rw-r--r--src/backends/reference/workloads/RefElementwiseUnaryWorkload.hpp6
-rw-r--r--src/backends/reference/workloads/RefElementwiseWorkload.cpp36
-rw-r--r--src/backends/reference/workloads/RefElementwiseWorkload.hpp6
-rw-r--r--src/backends/reference/workloads/RefFakeQuantizationFloat32Workload.cpp17
-rw-r--r--src/backends/reference/workloads/RefFakeQuantizationFloat32Workload.hpp5
-rw-r--r--src/backends/reference/workloads/RefFillWorkload.cpp14
-rw-r--r--src/backends/reference/workloads/RefFillWorkload.hpp5
-rw-r--r--src/backends/reference/workloads/RefFloorWorkload.cpp20
-rw-r--r--src/backends/reference/workloads/RefFloorWorkload.hpp5
-rw-r--r--src/backends/reference/workloads/RefFullyConnectedWorkload.cpp41
-rw-r--r--src/backends/reference/workloads/RefFullyConnectedWorkload.hpp7
-rw-r--r--src/backends/reference/workloads/RefGatherWorkload.cpp20
-rw-r--r--src/backends/reference/workloads/RefGatherWorkload.hpp3
-rw-r--r--src/backends/reference/workloads/RefInstanceNormalizationWorkload.cpp19
-rw-r--r--src/backends/reference/workloads/RefInstanceNormalizationWorkload.hpp5
-rw-r--r--src/backends/reference/workloads/RefL2NormalizationWorkload.cpp18
-rw-r--r--src/backends/reference/workloads/RefL2NormalizationWorkload.hpp3
-rw-r--r--src/backends/reference/workloads/RefLogSoftmaxWorkload.cpp18
-rw-r--r--src/backends/reference/workloads/RefLogSoftmaxWorkload.hpp5
-rw-r--r--src/backends/reference/workloads/RefLogicalBinaryWorkload.cpp31
-rw-r--r--src/backends/reference/workloads/RefLogicalBinaryWorkload.hpp9
-rw-r--r--src/backends/reference/workloads/RefLogicalUnaryWorkload.cpp23
-rw-r--r--src/backends/reference/workloads/RefLogicalUnaryWorkload.hpp8
-rw-r--r--src/backends/reference/workloads/RefLstmWorkload.cpp46
-rw-r--r--src/backends/reference/workloads/RefLstmWorkload.hpp4
-rw-r--r--src/backends/reference/workloads/RefMeanWorkload.cpp18
-rw-r--r--src/backends/reference/workloads/RefMeanWorkload.hpp5
-rw-r--r--src/backends/reference/workloads/RefNormalizationWorkload.cpp16
-rw-r--r--src/backends/reference/workloads/RefNormalizationWorkload.hpp5
-rw-r--r--src/backends/reference/workloads/RefPadWorkload.cpp14
-rw-r--r--src/backends/reference/workloads/RefPadWorkload.hpp5
-rw-r--r--src/backends/reference/workloads/RefPermuteWorkload.cpp17
-rw-r--r--src/backends/reference/workloads/RefPermuteWorkload.hpp3
-rw-r--r--src/backends/reference/workloads/RefPooling2dWorkload.cpp18
-rw-r--r--src/backends/reference/workloads/RefPooling2dWorkload.hpp5
-rw-r--r--src/backends/reference/workloads/RefPreluWorkload.cpp22
-rw-r--r--src/backends/reference/workloads/RefPreluWorkload.hpp5
-rw-r--r--src/backends/reference/workloads/RefQLstmWorkload.cpp43
-rw-r--r--src/backends/reference/workloads/RefQLstmWorkload.hpp4
-rw-r--r--src/backends/reference/workloads/RefQuantizeWorkload.cpp19
-rw-r--r--src/backends/reference/workloads/RefQuantizeWorkload.hpp6
-rw-r--r--src/backends/reference/workloads/RefRankWorkload.hpp15
-rw-r--r--src/backends/reference/workloads/RefReduceWorkload.cpp18
-rw-r--r--src/backends/reference/workloads/RefReduceWorkload.hpp5
-rw-r--r--src/backends/reference/workloads/RefReshapeWorkload.cpp16
-rw-r--r--src/backends/reference/workloads/RefReshapeWorkload.hpp5
-rw-r--r--src/backends/reference/workloads/RefResizeBilinearWorkload.cpp18
-rw-r--r--src/backends/reference/workloads/RefResizeBilinearWorkload.hpp5
-rw-r--r--src/backends/reference/workloads/RefResizeWorkload.cpp18
-rw-r--r--src/backends/reference/workloads/RefResizeWorkload.hpp5
-rw-r--r--src/backends/reference/workloads/RefSliceWorkload.cpp16
-rw-r--r--src/backends/reference/workloads/RefSliceWorkload.hpp5
-rw-r--r--src/backends/reference/workloads/RefSoftmaxWorkload.cpp18
-rw-r--r--src/backends/reference/workloads/RefSoftmaxWorkload.hpp5
-rw-r--r--src/backends/reference/workloads/RefSpaceToBatchNdWorkload.cpp18
-rw-r--r--src/backends/reference/workloads/RefSpaceToBatchNdWorkload.hpp3
-rw-r--r--src/backends/reference/workloads/RefSpaceToDepthWorkload.cpp18
-rw-r--r--src/backends/reference/workloads/RefSpaceToDepthWorkload.hpp5
-rw-r--r--src/backends/reference/workloads/RefSplitterWorkload.cpp12
-rw-r--r--src/backends/reference/workloads/RefSplitterWorkload.hpp5
-rw-r--r--src/backends/reference/workloads/RefStackWorkload.cpp22
-rw-r--r--src/backends/reference/workloads/RefStackWorkload.hpp5
-rw-r--r--src/backends/reference/workloads/RefStridedSliceWorkload.cpp32
-rw-r--r--src/backends/reference/workloads/RefStridedSliceWorkload.hpp4
-rw-r--r--src/backends/reference/workloads/RefTransposeConvolution2dWorkload.cpp37
-rw-r--r--src/backends/reference/workloads/RefTransposeConvolution2dWorkload.hpp9
-rw-r--r--src/backends/reference/workloads/RefTransposeWorkload.cpp17
-rw-r--r--src/backends/reference/workloads/RefTransposeWorkload.hpp3
-rw-r--r--src/backends/reference/workloads/Splitter.cpp12
-rw-r--r--src/backends/reference/workloads/Splitter.hpp12
109 files changed, 1150 insertions, 426 deletions
diff --git a/src/backends/backendsCommon/test/StridedSliceAsyncEndToEndTest.hpp b/src/backends/backendsCommon/test/StridedSliceAsyncEndToEndTest.hpp
index 66ccdbf1d9..16b10c88ac 100644
--- a/src/backends/backendsCommon/test/StridedSliceAsyncEndToEndTest.hpp
+++ b/src/backends/backendsCommon/test/StridedSliceAsyncEndToEndTest.hpp
@@ -24,6 +24,100 @@ namespace experimental
template<DataType ArmnnIType, DataType ArmnnOType,
typename TInput = ResolveType <ArmnnIType>, typename TOutput = ResolveType <ArmnnOType>>
+void AsyncThreadedEndToEndTestImpl(INetworkPtr network,
+ const std::vector<std::map<int, std::vector<TInput>>>& inputTensorData,
+ const std::vector<std::map<int, std::vector<TOutput>>>& expectedOutputData,
+ std::vector<BackendId> backends,
+ const size_t numberOfInferences,
+ float tolerance = 0.000001f)
+{
+ // Create Runtime in which test will run
+ IRuntime::CreationOptions options;
+ IRuntimePtr runtime(IRuntime::Create(options));
+
+ // Optimize the Network
+ IOptimizedNetworkPtr optNet = Optimize(*network, backends, runtime->GetDeviceSpec());
+
+
+ // Creates AsyncNetwork
+ NetworkId networkId = 0;
+ std::string errorMessage;
+ const INetworkProperties networkProperties(false, false, true);
+ runtime->LoadNetwork(networkId, std::move(optNet), errorMessage, networkProperties);
+
+ std::vector<InputTensors> inputTensorsVec;
+ std::vector<OutputTensors> outputTensorsVec;
+ std::vector<std::map<int, std::vector<TOutput>>> outputStorageVec;
+ std::vector<std::unique_ptr<IWorkingMemHandle>> workingMemHandles;
+
+ for (unsigned int i = 0; i < numberOfInferences; ++i)
+ {
+ InputTensors inputTensors;
+ OutputTensors outputTensors;
+ outputStorageVec.emplace_back(std::map<int, std::vector<TOutput>>());
+
+ inputTensors.reserve(inputTensorData.size());
+ for (auto&& it : inputTensorData[i])
+ {
+ inputTensors.push_back({it.first,
+ ConstTensor(runtime->GetInputTensorInfo(networkId, it.first), it.second.data())});
+ }
+
+ outputTensors.reserve(expectedOutputData.size());
+ for (auto&& it : expectedOutputData[i])
+ {
+ std::vector<TOutput> out(it.second.size());
+ outputStorageVec[i].emplace(it.first, out);
+ outputTensors.push_back({it.first,
+ Tensor(runtime->GetOutputTensorInfo(networkId, it.first),
+ outputStorageVec[i].at(it.first).data())});
+ }
+
+ inputTensorsVec.push_back(inputTensors);
+ outputTensorsVec.push_back(outputTensors);
+
+ workingMemHandles.push_back(runtime->CreateWorkingMemHandle(networkId));
+ }
+
+ std::vector<std::thread> threads;
+ for (unsigned int i = 0; i < numberOfInferences; ++i)
+ {
+ // Access the vectors before we do anything multi-threaded
+ InputTensors& inputTensors = inputTensorsVec[i];
+ OutputTensors& outputTensors = outputTensorsVec[i];
+ IWorkingMemHandle& workingMemHandle = *workingMemHandles[i].get();
+
+ threads.emplace_back([&]()
+ {
+ // Run the async network
+ runtime->Execute(workingMemHandle, inputTensors, outputTensors);
+ });
+ }
+
+ for (unsigned int i = 0; i < numberOfInferences; ++i)
+ {
+ threads[i].join();
+ }
+
+ // Checks the results.
+ for (unsigned int i = 0; i < numberOfInferences; ++i)
+ {
+ for (auto &&it : expectedOutputData[i])
+ {
+ std::vector<TOutput> out = outputStorageVec[i].at(it.first);
+ for (unsigned int j = 0; j < out.size(); ++j)
+ {
+ BOOST_CHECK(Compare<ArmnnOType>(it.second[j], out[j], tolerance) == true);
+ }
+ }
+ }
+
+}
+
+
+
+template<DataType ArmnnIType, DataType ArmnnOType,
+ typename TInput = ResolveType <ArmnnIType>, typename TOutput = ResolveType <ArmnnOType>>
void AsyncEndToEndTestImpl(INetworkPtr network,
const std::map<int, std::vector<TInput>>& inputTensorData,
const std::map<int, std::vector<TOutput>>& expectedOutputData,
@@ -169,7 +263,71 @@ void StridedSlicedEndToEndTest(const std::vector<BackendId>& backends)
std::map<int, std::vector<T>> inputTensorData = {{0, inputData}};
std::map<int, std::vector<T>> expectedOutputData = {{0, outputExpected}};
- AsyncEndToEndTestImpl<ArmnnType, ArmnnType>(move(net), inputTensorData, expectedOutputData, backends);
+ AsyncEndToEndTestImpl<ArmnnType, ArmnnType>(move(net), inputTensorData, expectedOutputData, backends, 1);
+}
+
+template<armnn::DataType ArmnnType>
+void StridedSlicedMultiThreadedEndToEndTest(const std::vector<BackendId>& backends)
+{
+ using namespace armnn;
+ using T = ResolveType<ArmnnType>;
+
+ const TensorShape& inputShape = {3, 2, 3, 1};
+ const TensorShape& outputShape = {1, 2, 3, 1};
+ const std::vector<int>& beginData = {1, 0, 0, 0};
+ const std::vector<int>& endData = {2, 2, 3, 1};
+ const std::vector<int>& stridesData = {1, 1, 1, 1};
+ int beginMask = 0;
+ int endMask = 0;
+ int shrinkAxisMask = 0;
+ int ellipsisMask = 0;
+ int newAxisMask = 0;
+
+ // Builds up the structure of the network
+ INetworkPtr net = CreateStridedSliceNetwork<ArmnnType>(inputShape,
+ outputShape,
+ beginData,
+ endData,
+ stridesData,
+ beginMask,
+ endMask,
+ shrinkAxisMask,
+ ellipsisMask,
+ newAxisMask);
+
+ BOOST_TEST_CHECKPOINT("create a network");
+
+ // Creates structures for input & output.
+ std::vector<T> inputData1{
+ 1.0f, 1.0f, 1.0f, 2.0f, 2.0f, 2.0f,
+
+ 3.0f, 3.0f, 3.0f, 4.0f, 4.0f, 4.0f,
+
+ 5.0f, 5.0f, 5.0f, 6.0f, 6.0f, 6.0f
+ };
+
+ std::vector<T> outputExpected1{ 3.0f, 3.0f, 3.0f, 4.0f, 4.0f, 4.0f };
+
+ // Creates structures for input & output.
+ std::vector<T> inputData2{
+ 1.0f, 1.0f, 1.0f, 2.0f, 2.0f, 2.0f,
+
+ 8.0f, 8.0f, 8.0f, 7.0f, 7.0f, 7.0f,
+
+ 5.0f, 5.0f, 5.0f, 6.0f, 6.0f, 6.0f
+ };
+
+ std::vector<T> outputExpected2{ 8.0f, 8.0f, 8.0f, 7.0f, 7.0f, 7.0f };
+
+ std::vector<std::map<int, std::vector<T>>> inputTensors;
+ std::vector<std::map<int, std::vector<T>>> outputTensors;
+
+ inputTensors.push_back(std::map<int, std::vector<T>> {{0, inputData1}});
+ inputTensors.push_back(std::map<int, std::vector<T>> {{0, inputData2}});
+ outputTensors.push_back(std::map<int, std::vector<T>> {{0, outputExpected1}});
+ outputTensors.push_back(std::map<int, std::vector<T>> {{0, outputExpected2}});
+
+ AsyncThreadedEndToEndTestImpl<ArmnnType, ArmnnType>(move(net), inputTensors, outputTensors, backends, 2);
}
} // experimental namespace
diff --git a/src/backends/reference/test/RefEndToEndTests.cpp b/src/backends/reference/test/RefEndToEndTests.cpp
index 521854b12b..0839c1c7af 100644
--- a/src/backends/reference/test/RefEndToEndTests.cpp
+++ b/src/backends/reference/test/RefEndToEndTests.cpp
@@ -1341,6 +1341,11 @@ BOOST_AUTO_TEST_CASE(RefAsyncFP32StridedSlicedEndToEndTest)
{
armnn::experimental::StridedSlicedEndToEndTest<armnn::DataType::Float32>(defaultBackends);
}
+
+BOOST_AUTO_TEST_CASE(RefAsyncFP32StridedSlicedMultiThreadedEndToEndTest)
+{
+ armnn::experimental::StridedSlicedMultiThreadedEndToEndTest<armnn::DataType::Float32>(defaultBackends);
+}
#endif
BOOST_AUTO_TEST_SUITE_END()
diff --git a/src/backends/reference/workloads/Concatenate.cpp b/src/backends/reference/workloads/Concatenate.cpp
index a85e34ee61..a0e0abfaa0 100644
--- a/src/backends/reference/workloads/Concatenate.cpp
+++ b/src/backends/reference/workloads/Concatenate.cpp
@@ -11,11 +11,13 @@
namespace armnn
{
-void Concatenate(const ConcatQueueDescriptor &data)
+void Concatenate(const ConcatQueueDescriptor &data,
+ std::vector<ITensorHandle*> inputs,
+ std::vector<ITensorHandle*> outputs)
{
- const TensorInfo& outputInfo0 = GetTensorInfo(data.m_Outputs[0]);
+ const TensorInfo& outputInfo0 = GetTensorInfo(outputs[0]);
- std::unique_ptr<Encoder<float>> encoderPtr = MakeEncoder<float>(outputInfo0, data.m_Outputs[0]->Map());
+ std::unique_ptr<Encoder<float>> encoderPtr = MakeEncoder<float>(outputInfo0, outputs[0]->Map());
Encoder<float>& encoder = *encoderPtr;
for (unsigned int index = 0 ; index < outputInfo0.GetNumElements(); ++index)
@@ -37,7 +39,7 @@ void Concatenate(const ConcatQueueDescriptor &data)
ConcatQueueDescriptor::ViewOrigin const& view = data.m_ViewOrigins[viewIdx];
//Split view extents are defined by the size of (the corresponding) input tensor.
- const TensorInfo& inputInfo = GetTensorInfo(data.m_Inputs[viewIdx]);
+ const TensorInfo& inputInfo = GetTensorInfo(inputs[viewIdx]);
ARMNN_ASSERT(inputInfo.GetNumDimensions() == outputInfo0.GetNumDimensions());
// Check all dimensions to see if this element is inside the given input view.
@@ -57,7 +59,7 @@ void Concatenate(const ConcatQueueDescriptor &data)
if (insideView)
{
std::unique_ptr<Decoder<float>> decoderPtr =
- MakeDecoder<float>(inputInfo, data.m_Inputs[viewIdx]->Map());
+ MakeDecoder<float>(inputInfo,inputs[viewIdx]->Map());
Decoder<float>& decoder = *decoderPtr;
unsigned int inIndex = 0;
unsigned int dimensionStride = 1;
diff --git a/src/backends/reference/workloads/Concatenate.hpp b/src/backends/reference/workloads/Concatenate.hpp
index 75e5f8c4f4..e0264b0aac 100644
--- a/src/backends/reference/workloads/Concatenate.hpp
+++ b/src/backends/reference/workloads/Concatenate.hpp
@@ -10,5 +10,7 @@
namespace armnn
{
-void Concatenate(const ConcatQueueDescriptor &data);
+void Concatenate(const ConcatQueueDescriptor &data,
+ std::vector<ITensorHandle*> inputs,
+ std::vector<ITensorHandle*> outputs);
} //namespace armnn
diff --git a/src/backends/reference/workloads/RefActivationWorkload.cpp b/src/backends/reference/workloads/RefActivationWorkload.cpp
index a26a6399bc..77958673e9 100644
--- a/src/backends/reference/workloads/RefActivationWorkload.cpp
+++ b/src/backends/reference/workloads/RefActivationWorkload.cpp
@@ -17,17 +17,28 @@ namespace armnn
void RefActivationWorkload::Execute() const
{
+ Execute(m_Data.m_Inputs, m_Data.m_Outputs);
+}
+
+void RefActivationWorkload::ExecuteAsync(WorkingMemDescriptor &workingMemDescriptor)
+{
+ Execute(workingMemDescriptor.m_Inputs, workingMemDescriptor.m_Outputs);
+}
+
+void RefActivationWorkload::Execute(std::vector<ITensorHandle*> inputs, std::vector<ITensorHandle*> outputs) const
+{
ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefActivationWorkload_Execute");
- const TensorInfo& inputInfo = GetTensorInfo(m_Data.m_Inputs[0]);
- const TensorInfo& outputInfo = GetTensorInfo(m_Data.m_Outputs[0]);
+ const TensorInfo& inputInfo = GetTensorInfo(inputs[0]);
+ const TensorInfo& outputInfo = GetTensorInfo(outputs[0]);
- Activation(*MakeDecoder<float>(inputInfo, m_Data.m_Inputs[0]->Map()),
- *MakeEncoder<float>(outputInfo, m_Data.m_Outputs[0]->Map()),
+ Activation(*MakeDecoder<float>(inputInfo, inputs[0]->Map()),
+ *MakeEncoder<float>(outputInfo, outputs[0]->Map()),
inputInfo,
m_Data.m_Parameters.m_Function,
m_Data.m_Parameters.m_A,
m_Data.m_Parameters.m_B);
}
+
} //namespace armnn
diff --git a/src/backends/reference/workloads/RefActivationWorkload.hpp b/src/backends/reference/workloads/RefActivationWorkload.hpp
index 5b2377e363..429fb60aaa 100644
--- a/src/backends/reference/workloads/RefActivationWorkload.hpp
+++ b/src/backends/reference/workloads/RefActivationWorkload.hpp
@@ -15,7 +15,11 @@ class RefActivationWorkload : public BaseWorkload<ActivationQueueDescriptor>
{
public:
using BaseWorkload<ActivationQueueDescriptor>::BaseWorkload;
- virtual void Execute() const override;
+ void Execute() const override;
+ void ExecuteAsync(WorkingMemDescriptor& workingMemDescriptor) override;
+
+private:
+ void Execute(std::vector<ITensorHandle*> inputs, std::vector<ITensorHandle*> outputs) const;
};
} //namespace armnn
diff --git a/src/backends/reference/workloads/RefArgMinMaxWorkload.cpp b/src/backends/reference/workloads/RefArgMinMaxWorkload.cpp
index bf8649f54d..77167a866b 100644
--- a/src/backends/reference/workloads/RefArgMinMaxWorkload.cpp
+++ b/src/backends/reference/workloads/RefArgMinMaxWorkload.cpp
@@ -18,16 +18,27 @@ RefArgMinMaxWorkload::RefArgMinMaxWorkload(
const WorkloadInfo& info)
: BaseWorkload<ArgMinMaxQueueDescriptor>(descriptor, info) {}
+
void RefArgMinMaxWorkload::Execute() const
{
+ Execute(m_Data.m_Inputs, m_Data.m_Outputs);
+}
+
+void RefArgMinMaxWorkload::ExecuteAsync(WorkingMemDescriptor &workingMemDescriptor)
+{
+ Execute(workingMemDescriptor.m_Inputs, workingMemDescriptor.m_Outputs);
+}
+
+void RefArgMinMaxWorkload::Execute(std::vector<ITensorHandle*> inputs, std::vector<ITensorHandle*> outputs) const
+{
ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefArgMinMaxWorkload_Execute");
- const TensorInfo &inputTensorInfo = GetTensorInfo(m_Data.m_Inputs[0]);
+ const TensorInfo &inputTensorInfo = GetTensorInfo(inputs[0]);
- std::unique_ptr<Decoder<float>> decoderPtr = MakeDecoder<float>(inputTensorInfo, m_Data.m_Inputs[0]->Map());
+ std::unique_ptr<Decoder<float>> decoderPtr = MakeDecoder<float>(inputTensorInfo, inputs[0]->Map());
Decoder<float> &decoder = *decoderPtr;
- const TensorInfo &outputTensorInfo = GetTensorInfo(m_Data.m_Outputs[0]);
+ const TensorInfo &outputTensorInfo = GetTensorInfo(outputs[0]);
if (outputTensorInfo.GetDataType() == armnn::DataType::Signed32) {
int32_t *output = GetOutputTensorData<int32_t>(0, m_Data);
diff --git a/src/backends/reference/workloads/RefArgMinMaxWorkload.hpp b/src/backends/reference/workloads/RefArgMinMaxWorkload.hpp
index 97b70772d1..df9ebcab0b 100644
--- a/src/backends/reference/workloads/RefArgMinMaxWorkload.hpp
+++ b/src/backends/reference/workloads/RefArgMinMaxWorkload.hpp
@@ -16,6 +16,10 @@ public:
explicit RefArgMinMaxWorkload(const ArgMinMaxQueueDescriptor& descriptor,
const WorkloadInfo& info);
- virtual void Execute() const override;
+ void Execute() const override;
+ void ExecuteAsync(WorkingMemDescriptor& workingMemDescriptor) override;
+
+private:
+ void Execute(std::vector<ITensorHandle*> inputs, std::vector<ITensorHandle*> outputs) const;
};
} //namespace armnn \ No newline at end of file
diff --git a/src/backends/reference/workloads/RefBatchNormalizationWorkload.cpp b/src/backends/reference/workloads/RefBatchNormalizationWorkload.cpp
index 21fcdab5a3..e1068896ba 100644
--- a/src/backends/reference/workloads/RefBatchNormalizationWorkload.cpp
+++ b/src/backends/reference/workloads/RefBatchNormalizationWorkload.cpp
@@ -24,6 +24,17 @@ RefBatchNormalizationWorkload::RefBatchNormalizationWorkload(const BatchNormaliz
void RefBatchNormalizationWorkload::Execute() const
{
+ Execute(m_Data.m_Inputs, m_Data.m_Outputs);
+}
+
+void RefBatchNormalizationWorkload::ExecuteAsync(WorkingMemDescriptor &workingMemDescriptor)
+{
+ Execute(workingMemDescriptor.m_Inputs, workingMemDescriptor.m_Outputs);
+}
+
+void RefBatchNormalizationWorkload::Execute(std::vector<ITensorHandle*> inputs,
+ std::vector<ITensorHandle*> outputs) const
+{
ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefBatchNormalizationWorkload_Execute");
std::unique_ptr<Decoder<float>> meanDecoder = MakeDecoder<float>(m_Mean->GetTensorInfo(),
@@ -34,10 +45,10 @@ void RefBatchNormalizationWorkload::Execute() const
m_Gamma->Map(true));
std::unique_ptr<Decoder<float>> betaDecoder = MakeDecoder<float>(m_Beta->GetTensorInfo(),
m_Beta->Map(true));
- std::unique_ptr<Decoder<float>> inputDecoder = MakeDecoder<float>(GetTensorInfo(m_Data.m_Inputs[0]),
- m_Data.m_Inputs[0]->Map());
- std::unique_ptr<Encoder<float>> outputEncoder = MakeEncoder<float>(GetTensorInfo(m_Data.m_Outputs[0]),
- m_Data.m_Outputs[0]->Map());
+ std::unique_ptr<Decoder<float>> inputDecoder = MakeDecoder<float>(GetTensorInfo(inputs[0]),
+ inputs[0]->Map());
+ std::unique_ptr<Encoder<float>> outputEncoder = MakeEncoder<float>(GetTensorInfo(outputs[0]),
+ outputs[0]->Map());
BatchNormImpl(m_Data, *meanDecoder, *varianceDecoder, *betaDecoder, *gammaDecoder, *inputDecoder, *outputEncoder);
}
diff --git a/src/backends/reference/workloads/RefBatchNormalizationWorkload.hpp b/src/backends/reference/workloads/RefBatchNormalizationWorkload.hpp
index 53d01f65da..a8a72ef65c 100644
--- a/src/backends/reference/workloads/RefBatchNormalizationWorkload.hpp
+++ b/src/backends/reference/workloads/RefBatchNormalizationWorkload.hpp
@@ -16,9 +16,11 @@ class RefBatchNormalizationWorkload : public BaseWorkload<BatchNormalizationQueu
public:
explicit RefBatchNormalizationWorkload(const BatchNormalizationQueueDescriptor& descriptor,
const WorkloadInfo& info);
- virtual void Execute() const override;
+ void Execute() const override;
+ void ExecuteAsync(WorkingMemDescriptor& workingMemDescriptor) override;
private:
+ void Execute(std::vector<ITensorHandle*> inputs, std::vector<ITensorHandle*> outputs) const;
std::unique_ptr<ScopedCpuTensorHandle> m_Mean;
std::unique_ptr<ScopedCpuTensorHandle> m_Variance;
std::unique_ptr<ScopedCpuTensorHandle> m_Beta;
diff --git a/src/backends/reference/workloads/RefBatchToSpaceNdWorkload.cpp b/src/backends/reference/workloads/RefBatchToSpaceNdWorkload.cpp
index c21ef7640a..441d2ba2cf 100644
--- a/src/backends/reference/workloads/RefBatchToSpaceNdWorkload.cpp
+++ b/src/backends/reference/workloads/RefBatchToSpaceNdWorkload.cpp
@@ -13,13 +13,23 @@ namespace armnn
void RefBatchToSpaceNdWorkload::Execute() const
{
+ Execute(m_Data.m_Inputs, m_Data.m_Outputs);
+}
+
+void RefBatchToSpaceNdWorkload::ExecuteAsync(WorkingMemDescriptor &workingMemDescriptor)
+{
+ Execute(workingMemDescriptor.m_Inputs, workingMemDescriptor.m_Outputs);
+}
+
+void RefBatchToSpaceNdWorkload::Execute(std::vector<ITensorHandle*> inputs, std::vector<ITensorHandle*> outputs) const
+{
ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefBatchToSpaceNdWorkload_Execute");
- const TensorInfo& inputInfo = GetTensorInfo(m_Data.m_Inputs[0]);
- const TensorInfo& outputInfo = GetTensorInfo(m_Data.m_Outputs[0]);
+ const TensorInfo& inputInfo = GetTensorInfo(inputs[0]);
+ const TensorInfo& outputInfo = GetTensorInfo(outputs[0]);
- std::unique_ptr<Decoder<float>> inputDecoder = MakeDecoder<float>(inputInfo, m_Data.m_Inputs[0]->Map());
- std::unique_ptr<Encoder<float>> outputEncoder = MakeEncoder<float>(outputInfo, m_Data.m_Outputs[0]->Map());
+ std::unique_ptr<Decoder<float>> inputDecoder = MakeDecoder<float>(inputInfo, inputs[0]->Map());
+ std::unique_ptr<Encoder<float>> outputEncoder = MakeEncoder<float>(outputInfo, outputs[0]->Map());
BatchToSpaceNd(m_Data.m_Parameters.m_DataLayout, inputInfo, outputInfo, m_Data.m_Parameters.m_BlockShape,
m_Data.m_Parameters.m_Crops, *inputDecoder, *outputEncoder);
diff --git a/src/backends/reference/workloads/RefBatchToSpaceNdWorkload.hpp b/src/backends/reference/workloads/RefBatchToSpaceNdWorkload.hpp
index 60577bab2e..07c800da83 100644
--- a/src/backends/reference/workloads/RefBatchToSpaceNdWorkload.hpp
+++ b/src/backends/reference/workloads/RefBatchToSpaceNdWorkload.hpp
@@ -16,7 +16,11 @@ class RefBatchToSpaceNdWorkload : public BaseWorkload<BatchToSpaceNdQueueDescrip
public:
using BaseWorkload<BatchToSpaceNdQueueDescriptor>::BaseWorkload;
- virtual void Execute() const override;
+ void Execute() const override;
+ void ExecuteAsync(WorkingMemDescriptor& workingMemDescriptor) override;
+
+private:
+ void Execute(std::vector<ITensorHandle*> inputs, std::vector<ITensorHandle*> outputs) const;
};
} // namespace armnn \ No newline at end of file
diff --git a/src/backends/reference/workloads/RefComparisonWorkload.cpp b/src/backends/reference/workloads/RefComparisonWorkload.cpp
index 52ad9a2879..03df7a4c4a 100644
--- a/src/backends/reference/workloads/RefComparisonWorkload.cpp
+++ b/src/backends/reference/workloads/RefComparisonWorkload.cpp
@@ -26,9 +26,15 @@ RefComparisonWorkload::RefComparisonWorkload(const ComparisonQueueDescriptor& de
void RefComparisonWorkload::PostAllocationConfigure()
{
- const TensorInfo& inputInfo0 = GetTensorInfo(m_Data.m_Inputs[0]);
- const TensorInfo& inputInfo1 = GetTensorInfo(m_Data.m_Inputs[1]);
- const TensorInfo& outputInfo = GetTensorInfo(m_Data.m_Outputs[0]);
+ PostAllocationConfigure(m_Data.m_Inputs, m_Data.m_Outputs);
+}
+
+void RefComparisonWorkload::PostAllocationConfigure(std::vector<ITensorHandle*> inputs,
+ std::vector<ITensorHandle*> outputs)
+{
+ const TensorInfo& inputInfo0 = GetTensorInfo(inputs[0]);
+ const TensorInfo& inputInfo1 = GetTensorInfo(inputs[1]);
+ const TensorInfo& outputInfo = GetTensorInfo(outputs[0]);
m_Input0 = MakeDecoder<InType>(inputInfo0);
m_Input1 = MakeDecoder<InType>(inputInfo1);
@@ -38,19 +44,31 @@ void RefComparisonWorkload::PostAllocationConfigure()
void RefComparisonWorkload::Execute() const
{
+ Execute(m_Data.m_Inputs, m_Data.m_Outputs);
+}
+
+void RefComparisonWorkload::ExecuteAsync(WorkingMemDescriptor &workingMemDescriptor)
+{
+ PostAllocationConfigure(workingMemDescriptor.m_Inputs, workingMemDescriptor.m_Outputs);
+
+ Execute(workingMemDescriptor.m_Inputs, workingMemDescriptor.m_Outputs);
+}
+
+void RefComparisonWorkload::Execute(std::vector<ITensorHandle*> inputs, std::vector<ITensorHandle*> outputs) const
+{
ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefComparisonWorkload_Execute");
- const TensorInfo& inputInfo0 = GetTensorInfo(m_Data.m_Inputs[0]);
- const TensorInfo& inputInfo1 = GetTensorInfo(m_Data.m_Inputs[1]);
- const TensorInfo& outputInfo = GetTensorInfo(m_Data.m_Outputs[0]);
+ const TensorInfo& inputInfo0 = GetTensorInfo(inputs[0]);
+ const TensorInfo& inputInfo1 = GetTensorInfo(inputs[1]);
+ const TensorInfo& outputInfo = GetTensorInfo(outputs[0]);
const TensorShape& inShape0 = inputInfo0.GetShape();
const TensorShape& inShape1 = inputInfo1.GetShape();
const TensorShape& outShape = outputInfo.GetShape();
- m_Input0->Reset(m_Data.m_Inputs[0]->Map());
- m_Input1->Reset(m_Data.m_Inputs[1]->Map());
- m_Output->Reset(m_Data.m_Outputs[0]->Map());
+ m_Input0->Reset(inputs[0]->Map());
+ m_Input1->Reset(inputs[1]->Map());
+ m_Output->Reset(outputs[0]->Map());
using EqualFunction = ElementwiseBinaryFunction<std::equal_to<InType>>;
using GreaterFunction = ElementwiseBinaryFunction<std::greater<InType>>;
diff --git a/src/backends/reference/workloads/RefComparisonWorkload.hpp b/src/backends/reference/workloads/RefComparisonWorkload.hpp
index a19e4a0540..de0144ca15 100644
--- a/src/backends/reference/workloads/RefComparisonWorkload.hpp
+++ b/src/backends/reference/workloads/RefComparisonWorkload.hpp
@@ -21,8 +21,11 @@ public:
RefComparisonWorkload(const ComparisonQueueDescriptor& descriptor, const WorkloadInfo& info);
void PostAllocationConfigure() override;
void Execute() const override;
+ void ExecuteAsync(WorkingMemDescriptor& workingMemDescriptor) override;
private:
+ void PostAllocationConfigure(std::vector<ITensorHandle*> inputs, std::vector<ITensorHandle*> outputs);
+ void Execute(std::vector<ITensorHandle*> inputs, std::vector<ITensorHandle*> outputs) const;
using InType = float;
using OutType = bool;
diff --git a/src/backends/reference/workloads/RefConcatWorkload.cpp b/src/backends/reference/workloads/RefConcatWorkload.cpp
index e606649ed0..c04c05354e 100644
--- a/src/backends/reference/workloads/RefConcatWorkload.cpp
+++ b/src/backends/reference/workloads/RefConcatWorkload.cpp
@@ -14,8 +14,18 @@ namespace armnn
void RefConcatWorkload::Execute() const
{
+ Execute(m_Data.m_Inputs, m_Data.m_Outputs);
+}
+
+void RefConcatWorkload::ExecuteAsync(WorkingMemDescriptor &workingMemDescriptor)
+{
+ Execute(workingMemDescriptor.m_Inputs, workingMemDescriptor.m_Outputs);
+}
+
+void RefConcatWorkload::Execute(std::vector<ITensorHandle*> inputs, std::vector<ITensorHandle*> outputs) const
+{
ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefConcatWorkload_Execute");
- Concatenate(m_Data);
+ Concatenate(m_Data, inputs, outputs);
}
} //namespace armnn
diff --git a/src/backends/reference/workloads/RefConcatWorkload.hpp b/src/backends/reference/workloads/RefConcatWorkload.hpp
index 0be28bb7c8..f4e1aa85f2 100644
--- a/src/backends/reference/workloads/RefConcatWorkload.hpp
+++ b/src/backends/reference/workloads/RefConcatWorkload.hpp
@@ -15,7 +15,10 @@ class RefConcatWorkload : public BaseWorkload<ConcatQueueDescriptor>
{
public:
using BaseWorkload<ConcatQueueDescriptor>::BaseWorkload;
- virtual void Execute() const override;
+ void Execute() const override;
+ void ExecuteAsync(WorkingMemDescriptor& workingMemDescriptor) override;
+private:
+ void Execute(std::vector<ITensorHandle*> inputs, std::vector<ITensorHandle*> outputs) const;
};
} //namespace armnn
diff --git a/src/backends/reference/workloads/RefConstantWorkload.cpp b/src/backends/reference/workloads/RefConstantWorkload.cpp
index d3e65e6615..6290237d69 100644
--- a/src/backends/reference/workloads/RefConstantWorkload.cpp
+++ b/src/backends/reference/workloads/RefConstantWorkload.cpp
@@ -20,21 +20,20 @@ RefConstantWorkload::RefConstantWorkload(
const ConstantQueueDescriptor& descriptor, const WorkloadInfo& info)
: BaseWorkload<ConstantQueueDescriptor>(descriptor, info) {}
-void RefConstantWorkload::PostAllocationConfigure()
+void RefConstantWorkload::Execute() const
{
- const ConstantQueueDescriptor& data = this->m_Data;
-
- ARMNN_ASSERT(data.m_LayerOutput != nullptr);
-
- const TensorInfo& outputInfo = GetTensorInfo(data.m_Outputs[0]);
- ARMNN_ASSERT(data.m_LayerOutput->GetTensorInfo().GetNumBytes() == outputInfo.GetNumBytes());
+ Execute(m_Data.m_Outputs);
+}
- memcpy(GetOutputTensorData<void>(0, data), data.m_LayerOutput->GetConstTensor<void>(),
- outputInfo.GetNumBytes());
+void RefConstantWorkload::ExecuteAsync(WorkingMemDescriptor &workingMemDescriptor)
+{
+ Execute(workingMemDescriptor.m_Outputs);
}
-void RefConstantWorkload::Execute() const
+void RefConstantWorkload::Execute(std::vector<ITensorHandle*> outputs) const
{
+ memcpy(outputs[0]->Map(), m_Data.m_LayerOutput->GetConstTensor<void>(), GetTensorInfo(outputs[0]).GetNumBytes());
+
ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefConstantWorkload_Execute");
}
diff --git a/src/backends/reference/workloads/RefConstantWorkload.hpp b/src/backends/reference/workloads/RefConstantWorkload.hpp
index ada488a7b2..9af5903329 100644
--- a/src/backends/reference/workloads/RefConstantWorkload.hpp
+++ b/src/backends/reference/workloads/RefConstantWorkload.hpp
@@ -19,8 +19,10 @@ class RefConstantWorkload : public BaseWorkload<ConstantQueueDescriptor>
public:
RefConstantWorkload(const ConstantQueueDescriptor& descriptor, const WorkloadInfo& info);
- void PostAllocationConfigure() override;
- virtual void Execute() const override;
+ void Execute() const override;
+ void ExecuteAsync(WorkingMemDescriptor& workingMemDescriptor) override;
+private:
+ void Execute(std::vector<ITensorHandle*> outputs) const;
};
} //namespace armnn
diff --git a/src/backends/reference/workloads/RefConvertBf16ToFp32Workload.cpp b/src/backends/reference/workloads/RefConvertBf16ToFp32Workload.cpp
index c4b5416836..70e377d19b 100644
--- a/src/backends/reference/workloads/RefConvertBf16ToFp32Workload.cpp
+++ b/src/backends/reference/workloads/RefConvertBf16ToFp32Workload.cpp
@@ -15,12 +15,23 @@ namespace armnn
void RefConvertBf16ToFp32Workload::Execute() const
{
+ Execute(m_Data.m_Inputs, m_Data.m_Outputs);
+}
+
+void RefConvertBf16ToFp32Workload::ExecuteAsync(WorkingMemDescriptor &workingMemDescriptor)
+{
+ Execute(workingMemDescriptor.m_Inputs, workingMemDescriptor.m_Outputs);
+}
+
+void RefConvertBf16ToFp32Workload::Execute(std::vector<ITensorHandle*> inputs,
+ std::vector<ITensorHandle*> outputs) const
+{
ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefConvertBf16ToFp32Workload_Execute");
- const BFloat16* const input = GetInputTensorDataBFloat16(0, m_Data);
- float* const output = GetOutputTensorDataFloat(0, m_Data);
+ const BFloat16* const input = reinterpret_cast<const BFloat16*>(inputs[0]->Map());
+ float* const output = reinterpret_cast<float*>(outputs[0]->Map());
- unsigned int numElements = GetTensorInfo(m_Data.m_Inputs[0]).GetNumElements();
+ unsigned int numElements = GetTensorInfo(inputs[0]).GetNumElements();
armnnUtils::FloatingPointConverter::ConvertBFloat16ToFloat32(input, numElements, output);
}
diff --git a/src/backends/reference/workloads/RefConvertBf16ToFp32Workload.hpp b/src/backends/reference/workloads/RefConvertBf16ToFp32Workload.hpp
index 87cdc3e1e3..90613621b4 100644
--- a/src/backends/reference/workloads/RefConvertBf16ToFp32Workload.hpp
+++ b/src/backends/reference/workloads/RefConvertBf16ToFp32Workload.hpp
@@ -15,7 +15,10 @@ class RefConvertBf16ToFp32Workload : public BFloat16ToFloat32Workload<ConvertBf1
{
public:
using BFloat16ToFloat32Workload<ConvertBf16ToFp32QueueDescriptor>::BFloat16ToFloat32Workload;
- virtual void Execute() const override;
+ void Execute() const override;
+ void ExecuteAsync(WorkingMemDescriptor& workingMemDescriptor) override;
+private:
+ void Execute(std::vector<ITensorHandle*> inputs, std::vector<ITensorHandle*> outputs) const;
};
} //namespace armnn
diff --git a/src/backends/reference/workloads/RefConvertFp16ToFp32Workload.cpp b/src/backends/reference/workloads/RefConvertFp16ToFp32Workload.cpp
index ef813eb69b..347132d1f6 100644
--- a/src/backends/reference/workloads/RefConvertFp16ToFp32Workload.cpp
+++ b/src/backends/reference/workloads/RefConvertFp16ToFp32Workload.cpp
@@ -15,12 +15,23 @@ namespace armnn
void RefConvertFp16ToFp32Workload::Execute() const
{
+ Execute(m_Data.m_Inputs, m_Data.m_Outputs);
+}
+
+void RefConvertFp16ToFp32Workload::ExecuteAsync(WorkingMemDescriptor &workingMemDescriptor)
+{
+ Execute(workingMemDescriptor.m_Inputs, workingMemDescriptor.m_Outputs);
+}
+
+void RefConvertFp16ToFp32Workload::Execute(std::vector<ITensorHandle*> inputs,
+ std::vector<ITensorHandle*> outputs) const
+{
ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefConvertFp16ToFp32Workload_Execute");
- const Half* const input = GetInputTensorDataHalf(0, m_Data);
- float* const output = GetOutputTensorDataFloat(0, m_Data);
+ const Half* const input = reinterpret_cast<const Half*>(inputs[0]->Map());
+ float* const output = reinterpret_cast<float*>(outputs[0]->Map());
- unsigned int numElements = GetTensorInfo(m_Data.m_Inputs[0]).GetNumElements();
+ unsigned int numElements = GetTensorInfo(inputs[0]).GetNumElements();
armnnUtils::FloatingPointConverter::ConvertFloat16To32(input, numElements, output);
}
diff --git a/src/backends/reference/workloads/RefConvertFp16ToFp32Workload.hpp b/src/backends/reference/workloads/RefConvertFp16ToFp32Workload.hpp
index 7c58e9f089..99ab9e9934 100644
--- a/src/backends/reference/workloads/RefConvertFp16ToFp32Workload.hpp
+++ b/src/backends/reference/workloads/RefConvertFp16ToFp32Workload.hpp
@@ -15,7 +15,10 @@ class RefConvertFp16ToFp32Workload : public Float16ToFloat32Workload<ConvertFp16
{
public:
using Float16ToFloat32Workload<ConvertFp16ToFp32QueueDescriptor>::Float16ToFloat32Workload;
- virtual void Execute() const override;
+ void Execute() const override;
+ void ExecuteAsync(WorkingMemDescriptor& workingMemDescriptor) override;
+private:
+ void Execute(std::vector<ITensorHandle*> inputs, std::vector<ITensorHandle*> outputs) const;
};
} //namespace armnn
diff --git a/src/backends/reference/workloads/RefConvertFp32ToBf16Workload.cpp b/src/backends/reference/workloads/RefConvertFp32ToBf16Workload.cpp
index 181b236e83..7fe302a5ad 100644
--- a/src/backends/reference/workloads/RefConvertFp32ToBf16Workload.cpp
+++ b/src/backends/reference/workloads/RefConvertFp32ToBf16Workload.cpp
@@ -15,12 +15,23 @@ namespace armnn
void RefConvertFp32ToBf16Workload::Execute() const
{
+ Execute(m_Data.m_Inputs, m_Data.m_Outputs);
+}
+
+void RefConvertFp32ToBf16Workload::ExecuteAsync(WorkingMemDescriptor &workingMemDescriptor)
+{
+ Execute(workingMemDescriptor.m_Inputs, workingMemDescriptor.m_Outputs);
+}
+
+void RefConvertFp32ToBf16Workload::Execute(std::vector<ITensorHandle*> inputs,
+ std::vector<ITensorHandle*> outputs) const
+{
ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefConvertFp32ToBf16Workload_Execute");
- const float* const input = GetInputTensorDataFloat(0, m_Data);
- BFloat16* const output = GetOutputTensorDataBFloat16(0, m_Data);
+ const float* const input = reinterpret_cast<const float*>(inputs[0]->Map());
+ BFloat16* const output = reinterpret_cast<BFloat16*>(outputs[0]->Map());
- unsigned int numElements = GetTensorInfo(m_Data.m_Inputs[0]).GetNumElements();
+ unsigned int numElements = GetTensorInfo(inputs[0]).GetNumElements();
armnnUtils::FloatingPointConverter::ConvertFloat32ToBFloat16(input, numElements, output);
}
diff --git a/src/backends/reference/workloads/RefConvertFp32ToBf16Workload.hpp b/src/backends/reference/workloads/RefConvertFp32ToBf16Workload.hpp
index 409603bb6c..694032c8e6 100644
--- a/src/backends/reference/workloads/RefConvertFp32ToBf16Workload.hpp
+++ b/src/backends/reference/workloads/RefConvertFp32ToBf16Workload.hpp
@@ -15,7 +15,10 @@ class RefConvertFp32ToBf16Workload : public Float32ToBFloat16Workload<ConvertFp3
{
public:
using Float32ToBFloat16Workload<ConvertFp32ToBf16QueueDescriptor>::Float32ToBFloat16Workload;
- virtual void Execute() const override;
+ void Execute() const override;
+ void ExecuteAsync(WorkingMemDescriptor& workingMemDescriptor) override;
+private:
+ void Execute(std::vector<ITensorHandle*> inputs, std::vector<ITensorHandle*> outputs) const;
};
} //namespace armnn
diff --git a/src/backends/reference/workloads/RefConvertFp32ToFp16Workload.cpp b/src/backends/reference/workloads/RefConvertFp32ToFp16Workload.cpp
index c68960fad2..be13458d89 100644
--- a/src/backends/reference/workloads/RefConvertFp32ToFp16Workload.cpp
+++ b/src/backends/reference/workloads/RefConvertFp32ToFp16Workload.cpp
@@ -16,13 +16,24 @@ namespace armnn
void RefConvertFp32ToFp16Workload::Execute() const
{
+ Execute(m_Data.m_Inputs, m_Data.m_Outputs);
+}
+
+void RefConvertFp32ToFp16Workload::ExecuteAsync(WorkingMemDescriptor &workingMemDescriptor)
+{
+ Execute(workingMemDescriptor.m_Inputs, workingMemDescriptor.m_Outputs);
+}
+
+void RefConvertFp32ToFp16Workload::Execute(std::vector<ITensorHandle*> inputs,
+ std::vector<ITensorHandle*> outputs) const
+{
ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefConvertFp32ToFp16Workload_Execute");
- const float* const input = GetInputTensorDataFloat(0, m_Data);
- Half* const output = GetOutputTensorDataHalf(0, m_Data);
+ const float* const input = reinterpret_cast<const float*>(inputs[0]->Map());
+ Half* const output = reinterpret_cast<Half*>(outputs[0]->Map());
// convert Fp32 input to Fp16 output
- unsigned int numElements = GetTensorInfo(m_Data.m_Inputs[0]).GetNumElements();
+ unsigned int numElements = GetTensorInfo(inputs[0]).GetNumElements();
armnnUtils::FloatingPointConverter::ConvertFloat32To16(input, numElements, output);
}
diff --git a/src/backends/reference/workloads/RefConvertFp32ToFp16Workload.hpp b/src/backends/reference/workloads/RefConvertFp32ToFp16Workload.hpp
index e1fd8755cb..f1daa54436 100644
--- a/src/backends/reference/workloads/RefConvertFp32ToFp16Workload.hpp
+++ b/src/backends/reference/workloads/RefConvertFp32ToFp16Workload.hpp
@@ -15,7 +15,10 @@ class RefConvertFp32ToFp16Workload : public Float32ToFloat16Workload<ConvertFp32
{
public:
using Float32ToFloat16Workload<ConvertFp32ToFp16QueueDescriptor>::Float32ToFloat16Workload;
- virtual void Execute() const override;
+ void Execute() const override;
+ void ExecuteAsync(WorkingMemDescriptor& workingMemDescriptor) override;
+private:
+ void Execute(std::vector<ITensorHandle*> inputs, std::vector<ITensorHandle*> outputs) const;
};
} //namespace armnn
diff --git a/src/backends/reference/workloads/RefConvolution2dWorkload.cpp b/src/backends/reference/workloads/RefConvolution2dWorkload.cpp
index dad9936f1b..6d0ab413d8 100644
--- a/src/backends/reference/workloads/RefConvolution2dWorkload.cpp
+++ b/src/backends/reference/workloads/RefConvolution2dWorkload.cpp
@@ -30,24 +30,26 @@ RefConvolution2dWorkload::RefConvolution2dWorkload(
}
}
-void RefConvolution2dWorkload::PostAllocationConfigure()
+void RefConvolution2dWorkload::Execute() const
{
- const TensorInfo& inputInfo = GetTensorInfo(m_Data.m_Inputs[0]);
- m_InputShape = inputInfo.GetShape();
- m_InputDecoder = MakeDecoder<float>(inputInfo);
+ Execute(m_Data.m_Inputs, m_Data.m_Outputs);
+}
- const TensorInfo& outputInfo = GetTensorInfo(m_Data.m_Outputs[0]);
- m_OutputShape = outputInfo.GetShape();
- m_OutputEncoder = MakeEncoder<float>(outputInfo);
+void RefConvolution2dWorkload::ExecuteAsync(WorkingMemDescriptor &workingMemDescriptor)
+{
+ Execute(workingMemDescriptor.m_Inputs, workingMemDescriptor.m_Outputs);
}
-void RefConvolution2dWorkload::Execute() const {
+void RefConvolution2dWorkload::Execute(std::vector<ITensorHandle*> inputs, std::vector<ITensorHandle*> outputs) const {
ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefConvolution2dWorkload_Execute");
- m_InputDecoder->Reset(m_Data.m_Inputs[0]->Map());
- m_OutputEncoder->Reset(m_Data.m_Outputs[0]->Map());
+ std::unique_ptr<Decoder<float>> inputDecoder = MakeDecoder<float>(GetTensorInfo(inputs[0]), inputs[0]->Map());
+ std::unique_ptr<Encoder<float>> outputEncoder = MakeEncoder<float>(GetTensorInfo(outputs[0]), outputs[0]->Map());
+
+ const TensorShape& inputShape = GetTensorInfo(inputs[0]).GetShape();
+ const TensorShape& outputShape = GetTensorInfo(outputs[0]).GetShape();
- Convolve(m_InputShape, *m_InputDecoder, m_OutputShape, *m_OutputEncoder, m_FilterShape,
+ Convolve(inputShape, *inputDecoder, outputShape, *outputEncoder, m_FilterShape,
*m_FilterDecoder, m_Data.m_Parameters.m_BiasEnabled, m_BiasDecoder.get(),
m_Data.m_Parameters.m_DataLayout, m_Data.m_Parameters.m_PadTop, m_Data.m_Parameters.m_PadLeft,
m_Data.m_Parameters.m_StrideX, m_Data.m_Parameters.m_StrideY,
diff --git a/src/backends/reference/workloads/RefConvolution2dWorkload.hpp b/src/backends/reference/workloads/RefConvolution2dWorkload.hpp
index b6bdf23ffa..57df3ce6ae 100644
--- a/src/backends/reference/workloads/RefConvolution2dWorkload.hpp
+++ b/src/backends/reference/workloads/RefConvolution2dWorkload.hpp
@@ -19,21 +19,18 @@ public:
explicit RefConvolution2dWorkload(const Convolution2dQueueDescriptor& descriptor,
const WorkloadInfo& info);
- void PostAllocationConfigure() override;
- virtual void Execute() const override;
+ void Execute() const override;
+ void ExecuteAsync(WorkingMemDescriptor& workingMemDescriptor) override;
private:
+ void Execute(std::vector<ITensorHandle*> inputs, std::vector<ITensorHandle*> outputs) const;
std::unique_ptr<ScopedCpuTensorHandle> m_Weight;
std::unique_ptr<ScopedCpuTensorHandle> m_Bias;
- std::unique_ptr<Decoder<float>> m_InputDecoder;
- std::unique_ptr<Encoder<float>> m_OutputEncoder;
std::unique_ptr<Decoder<float>> m_FilterDecoder;
std::unique_ptr<Decoder<float>> m_BiasDecoder;
- TensorShape m_InputShape;
- TensorShape m_OutputShape;
TensorShape m_FilterShape;
};
diff --git a/src/backends/reference/workloads/RefDebugWorkload.cpp b/src/backends/reference/workloads/RefDebugWorkload.cpp
index f9950c8231..b0e19c5851 100644
--- a/src/backends/reference/workloads/RefDebugWorkload.cpp
+++ b/src/backends/reference/workloads/RefDebugWorkload.cpp
@@ -17,18 +17,30 @@ namespace armnn
template<armnn::DataType DataType>
void RefDebugWorkload<DataType>::Execute() const
{
+ Execute(m_Data.m_Inputs);
+}
+
+template<armnn::DataType DataType>
+void RefDebugWorkload<DataType>::ExecuteAsync(WorkingMemDescriptor &workingMemDescriptor)
+{
+ Execute(workingMemDescriptor.m_Inputs);
+}
+
+template<armnn::DataType DataType>
+void RefDebugWorkload<DataType>::Execute(std::vector<ITensorHandle*> inputs) const
+{
using T = ResolveType<DataType>;
ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, GetName() + "_Execute");
- const TensorInfo& inputInfo = GetTensorInfo(m_Data.m_Inputs[0]);
+ const TensorInfo& inputInfo = GetTensorInfo(inputs[0]);
const T* inputData = GetInputTensorData<T>(0, m_Data);
T* outputData = GetOutputTensorData<T>(0, m_Data);
if (m_Callback)
{
- m_Callback(m_Data.m_Guid, m_Data.m_SlotIndex, m_Data.m_Inputs[0]);
+ m_Callback(m_Data.m_Guid, m_Data.m_SlotIndex, inputs[0]);
}
else
{
diff --git a/src/backends/reference/workloads/RefDebugWorkload.hpp b/src/backends/reference/workloads/RefDebugWorkload.hpp
index d7e3cd9947..d0c47dd829 100644
--- a/src/backends/reference/workloads/RefDebugWorkload.hpp
+++ b/src/backends/reference/workloads/RefDebugWorkload.hpp
@@ -30,10 +30,12 @@ public:
using TypedWorkload<DebugQueueDescriptor, DataType>::TypedWorkload;
void Execute() const override;
+ void ExecuteAsync(WorkingMemDescriptor& workingMemDescriptor) override;
void RegisterDebugCallback(const DebugCallbackFunction& func) override;
private:
+ void Execute(std::vector<ITensorHandle*> inputs) const;
DebugCallbackFunction m_Callback;
};
diff --git a/src/backends/reference/workloads/RefDepthToSpaceWorkload.cpp b/src/backends/reference/workloads/RefDepthToSpaceWorkload.cpp
index 93c1120a1c..22e35f0ec5 100644
--- a/src/backends/reference/workloads/RefDepthToSpaceWorkload.cpp
+++ b/src/backends/reference/workloads/RefDepthToSpaceWorkload.cpp
@@ -13,14 +13,24 @@ namespace armnn
void RefDepthToSpaceWorkload::Execute() const
{
+ Execute(m_Data.m_Inputs, m_Data.m_Outputs);
+}
+
+void RefDepthToSpaceWorkload::ExecuteAsync(WorkingMemDescriptor &workingMemDescriptor)
+{
+ Execute(workingMemDescriptor.m_Inputs, workingMemDescriptor.m_Outputs);
+}
+
+void RefDepthToSpaceWorkload::Execute(std::vector<ITensorHandle*> inputs, std::vector<ITensorHandle*> outputs) const
+{
ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefDepthToSpaceWorkload_Execute");
- const TensorInfo inputInfo = GetTensorInfo(m_Data.m_Inputs[0]);
+ const TensorInfo inputInfo = GetTensorInfo(inputs[0]);
DepthToSpace(inputInfo,
m_Data.m_Parameters,
- m_Data.m_Inputs[0]->Map(),
- m_Data.m_Outputs[0]->Map(),
+ inputs[0]->Map(),
+ outputs[0]->Map(),
GetDataTypeSize(inputInfo.GetDataType()));
}
diff --git a/src/backends/reference/workloads/RefDepthToSpaceWorkload.hpp b/src/backends/reference/workloads/RefDepthToSpaceWorkload.hpp
index a30fadc3e9..ec260a92f7 100644
--- a/src/backends/reference/workloads/RefDepthToSpaceWorkload.hpp
+++ b/src/backends/reference/workloads/RefDepthToSpaceWorkload.hpp
@@ -14,7 +14,10 @@ class RefDepthToSpaceWorkload : public BaseWorkload<DepthToSpaceQueueDescriptor>
{
public:
using BaseWorkload<DepthToSpaceQueueDescriptor>::BaseWorkload;
- virtual void Execute() const override;
+ void Execute() const override;
+ void ExecuteAsync(WorkingMemDescriptor& workingMemDescriptor) override;
+private:
+ void Execute(std::vector<ITensorHandle*> inputs, std::vector<ITensorHandle*> outputs) const;
};
} // namespace armnn
diff --git a/src/backends/reference/workloads/RefDepthwiseConvolution2dWorkload.cpp b/src/backends/reference/workloads/RefDepthwiseConvolution2dWorkload.cpp
index cfc81ce203..8fe5dec7d1 100644
--- a/src/backends/reference/workloads/RefDepthwiseConvolution2dWorkload.cpp
+++ b/src/backends/reference/workloads/RefDepthwiseConvolution2dWorkload.cpp
@@ -32,26 +32,29 @@ RefDepthwiseConvolution2dWorkload::RefDepthwiseConvolution2dWorkload(
}
}
-void RefDepthwiseConvolution2dWorkload::PostAllocationConfigure()
+void RefDepthwiseConvolution2dWorkload::Execute() const
{
- const TensorInfo& inputInfo = GetTensorInfo(m_Data.m_Inputs[0]);
- m_InputShape = inputInfo.GetShape();
- m_InputDecoder = MakeDecoder<float>(inputInfo);
+ Execute(m_Data.m_Inputs, m_Data.m_Outputs);
+}
- const TensorInfo& outputInfo = GetTensorInfo(m_Data.m_Outputs[0]);
- m_OutputShape = outputInfo.GetShape();
- m_OutputEncoder = MakeEncoder<float>(outputInfo);
+void RefDepthwiseConvolution2dWorkload::ExecuteAsync(WorkingMemDescriptor &workingMemDescriptor)
+{
+ Execute(workingMemDescriptor.m_Inputs, workingMemDescriptor.m_Outputs);
}
-void RefDepthwiseConvolution2dWorkload::Execute() const
+void RefDepthwiseConvolution2dWorkload::Execute(std::vector<ITensorHandle*> inputs,
+ std::vector<ITensorHandle*> outputs) const
{
ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefDepthwiseConvolution2dWorkload_Execute");
std::unique_ptr<Decoder<float>> pBiasDecoder{};
- m_InputDecoder->Reset(m_Data.m_Inputs[0]->Map());
- m_OutputEncoder->Reset(m_Data.m_Outputs[0]->Map());
+ std::unique_ptr<Decoder<float>> inputDecoder = MakeDecoder<float>(GetTensorInfo(inputs[0]), inputs[0]->Map());
+ std::unique_ptr<Encoder<float>> OutputEncoder = MakeEncoder<float>(GetTensorInfo(outputs[0]), outputs[0]->Map());
+
+ const TensorShape& inputShape = GetTensorInfo(inputs[0]).GetShape();
+ const TensorShape& outputShape = GetTensorInfo(outputs[0]).GetShape();
- Convolve(m_InputShape, *m_InputDecoder, m_OutputShape, *m_OutputEncoder,
+ Convolve(inputShape, *inputDecoder, outputShape, *OutputEncoder,
m_FilterShape, *m_FilterDecoder, m_Data.m_Parameters.m_BiasEnabled, m_BiasDecoder.get(),
m_Data.m_Parameters.m_DataLayout, m_Data.m_Parameters.m_PadTop, m_Data.m_Parameters.m_PadLeft,
m_Data.m_Parameters.m_StrideX, m_Data.m_Parameters.m_StrideY,
diff --git a/src/backends/reference/workloads/RefDepthwiseConvolution2dWorkload.hpp b/src/backends/reference/workloads/RefDepthwiseConvolution2dWorkload.hpp
index 6d7037f660..65a8fd76cf 100644
--- a/src/backends/reference/workloads/RefDepthwiseConvolution2dWorkload.hpp
+++ b/src/backends/reference/workloads/RefDepthwiseConvolution2dWorkload.hpp
@@ -17,22 +17,19 @@ public:
explicit RefDepthwiseConvolution2dWorkload(const DepthwiseConvolution2dQueueDescriptor &descriptor,
const WorkloadInfo &info);
- void PostAllocationConfigure() override;
- virtual void Execute() const override;
+ void Execute() const override;
+ void ExecuteAsync(WorkingMemDescriptor& workingMemDescriptor) override;
private:
+ void Execute(std::vector<ITensorHandle*> inputs, std::vector<ITensorHandle*> outputs) const;
std::unique_ptr <ScopedCpuTensorHandle> m_Weight;
std::unique_ptr <ScopedCpuTensorHandle> m_Bias;
- std::unique_ptr <Decoder<float>> m_InputDecoder;
- std::unique_ptr <Encoder<float>> m_OutputEncoder;
std::unique_ptr <Decoder<float>> m_FilterDecoder;
std::unique_ptr <Decoder<float>> m_BiasDecoder;
- TensorShape m_InputShape;
- TensorShape m_OutputShape;
TensorShape m_FilterShape;
};
diff --git a/src/backends/reference/workloads/RefDequantizeWorkload.cpp b/src/backends/reference/workloads/RefDequantizeWorkload.cpp
index d6e4964a49..f9d80073b0 100644
--- a/src/backends/reference/workloads/RefDequantizeWorkload.cpp
+++ b/src/backends/reference/workloads/RefDequantizeWorkload.cpp
@@ -14,13 +14,23 @@ namespace armnn
void RefDequantizeWorkload::Execute() const
{
+ Execute(m_Data.m_Inputs, m_Data.m_Outputs);
+}
+
+void RefDequantizeWorkload::ExecuteAsync(WorkingMemDescriptor &workingMemDescriptor)
+{
+ Execute(workingMemDescriptor.m_Inputs, workingMemDescriptor.m_Outputs);
+}
+
+void RefDequantizeWorkload::Execute(std::vector<ITensorHandle*> inputs, std::vector<ITensorHandle*> outputs) const
+{
ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefDequantizeWorkload_Execute");
- const TensorInfo& inputInfo = GetTensorInfo(m_Data.m_Inputs[0]);
- const TensorInfo& outputInfo = GetTensorInfo(m_Data.m_Outputs[0]);
+ const TensorInfo& inputInfo = GetTensorInfo(inputs[0]);
+ const TensorInfo& outputInfo = GetTensorInfo(outputs[0]);
- auto inputDecoder = MakeDecoder<float>(inputInfo, m_Data.m_Inputs[0]->Map());
- auto outputEncoder = MakeEncoder<float>(outputInfo, m_Data.m_Outputs[0]->Map());
+ auto inputDecoder = MakeDecoder<float>(inputInfo, inputs[0]->Map());
+ auto outputEncoder = MakeEncoder<float>(outputInfo, outputs[0]->Map());
Dequantize(*inputDecoder, *outputEncoder, inputInfo, outputInfo);
}
diff --git a/src/backends/reference/workloads/RefDequantizeWorkload.hpp b/src/backends/reference/workloads/RefDequantizeWorkload.hpp
index 691f713076..922d57c556 100644
--- a/src/backends/reference/workloads/RefDequantizeWorkload.hpp
+++ b/src/backends/reference/workloads/RefDequantizeWorkload.hpp
@@ -17,6 +17,9 @@ public:
using BaseWorkload<DequantizeQueueDescriptor>::BaseWorkload;
void Execute() const override;
+ void ExecuteAsync(WorkingMemDescriptor& workingMemDescriptor) override;
+private:
+ void Execute(std::vector<ITensorHandle*> inputs, std::vector<ITensorHandle*> outputs) const;
};
} // namespace armnn
diff --git a/src/backends/reference/workloads/RefDetectionPostProcessWorkload.cpp b/src/backends/reference/workloads/RefDetectionPostProcessWorkload.cpp
index b9817ba1ea..25c326ad37 100644
--- a/src/backends/reference/workloads/RefDetectionPostProcessWorkload.cpp
+++ b/src/backends/reference/workloads/RefDetectionPostProcessWorkload.cpp
@@ -20,19 +20,30 @@ RefDetectionPostProcessWorkload::RefDetectionPostProcessWorkload(
void RefDetectionPostProcessWorkload::Execute() const
{
+ Execute(m_Data.m_Inputs, m_Data.m_Outputs);
+}
+
+void RefDetectionPostProcessWorkload::ExecuteAsync(WorkingMemDescriptor &workingMemDescriptor)
+{
+ Execute(workingMemDescriptor.m_Inputs, workingMemDescriptor.m_Outputs);
+}
+
+void RefDetectionPostProcessWorkload::Execute(std::vector<ITensorHandle*> inputs,
+ std::vector<ITensorHandle*> outputs) const
+{
ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefDetectionPostProcessWorkload_Execute");
- const TensorInfo& boxEncodingsInfo = GetTensorInfo(m_Data.m_Inputs[0]);
- const TensorInfo& scoresInfo = GetTensorInfo(m_Data.m_Inputs[1]);
+ const TensorInfo& boxEncodingsInfo = GetTensorInfo(inputs[0]);
+ const TensorInfo& scoresInfo = GetTensorInfo(inputs[1]);
const TensorInfo& anchorsInfo = m_Anchors->GetTensorInfo();
- const TensorInfo& detectionBoxesInfo = GetTensorInfo(m_Data.m_Outputs[0]);
- const TensorInfo& detectionClassesInfo = GetTensorInfo(m_Data.m_Outputs[1]);
- const TensorInfo& detectionScoresInfo = GetTensorInfo(m_Data.m_Outputs[2]);
- const TensorInfo& numDetectionsInfo = GetTensorInfo(m_Data.m_Outputs[3]);
+ const TensorInfo& detectionBoxesInfo = GetTensorInfo(outputs[0]);
+ const TensorInfo& detectionClassesInfo = GetTensorInfo(outputs[1]);
+ const TensorInfo& detectionScoresInfo = GetTensorInfo(outputs[2]);
+ const TensorInfo& numDetectionsInfo = GetTensorInfo(outputs[3]);
- auto boxEncodings = MakeDecoder<float>(boxEncodingsInfo, m_Data.m_Inputs[0]->Map());
- auto scores = MakeDecoder<float>(scoresInfo, m_Data.m_Inputs[1]->Map());
+ auto boxEncodings = MakeDecoder<float>(boxEncodingsInfo, inputs[0]->Map());
+ auto scores = MakeDecoder<float>(scoresInfo, inputs[1]->Map());
auto anchors = MakeDecoder<float>(anchorsInfo, m_Anchors->Map(false));
float* detectionBoxes = GetOutputTensorData<float>(0, m_Data);
diff --git a/src/backends/reference/workloads/RefDetectionPostProcessWorkload.hpp b/src/backends/reference/workloads/RefDetectionPostProcessWorkload.hpp
index 799d0c6219..007dcea456 100644
--- a/src/backends/reference/workloads/RefDetectionPostProcessWorkload.hpp
+++ b/src/backends/reference/workloads/RefDetectionPostProcessWorkload.hpp
@@ -16,9 +16,11 @@ class RefDetectionPostProcessWorkload : public BaseWorkload<DetectionPostProcess
public:
explicit RefDetectionPostProcessWorkload(const DetectionPostProcessQueueDescriptor& descriptor,
const WorkloadInfo& info);
- virtual void Execute() const override;
+ void Execute() const override;
+ void ExecuteAsync(WorkingMemDescriptor& workingMemDescriptor) override;
private:
+ void Execute(std::vector<ITensorHandle*> inputs, std::vector<ITensorHandle*> outputs) const;
std::unique_ptr<ScopedCpuTensorHandle> m_Anchors;
};
diff --git a/src/backends/reference/workloads/RefElementwiseUnaryWorkload.cpp b/src/backends/reference/workloads/RefElementwiseUnaryWorkload.cpp
index 4fbb0d123f..b442f25c2a 100644
--- a/src/backends/reference/workloads/RefElementwiseUnaryWorkload.cpp
+++ b/src/backends/reference/workloads/RefElementwiseUnaryWorkload.cpp
@@ -28,28 +28,29 @@ RefElementwiseUnaryWorkload::RefElementwiseUnaryWorkload(const ElementwiseUnaryQ
: BaseWorkload<ElementwiseUnaryQueueDescriptor>(desc, info)
{}
-void RefElementwiseUnaryWorkload::PostAllocationConfigure()
+void RefElementwiseUnaryWorkload::Execute() const
{
- const TensorInfo& inputInfo = GetTensorInfo(m_Data.m_Inputs[0]);
- const TensorInfo& outputInfo = GetTensorInfo(m_Data.m_Outputs[0]);
+ Execute(m_Data.m_Inputs, m_Data.m_Outputs);
+}
- m_Input = MakeDecoder<InType>(inputInfo);
+void RefElementwiseUnaryWorkload::ExecuteAsync(WorkingMemDescriptor &workingMemDescriptor)
+{
- m_Output = MakeEncoder<OutType>(outputInfo);
+ Execute(workingMemDescriptor.m_Inputs, workingMemDescriptor.m_Outputs);
}
-void RefElementwiseUnaryWorkload::Execute() const
+void RefElementwiseUnaryWorkload::Execute(std::vector<ITensorHandle*> inputs, std::vector<ITensorHandle*> outputs) const
{
ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefElementwiseUnaryWorkload_Execute");
- const TensorInfo& inputInfo = GetTensorInfo(m_Data.m_Inputs[0]);
- const TensorInfo& outputInfo = GetTensorInfo(m_Data.m_Outputs[0]);
+ const TensorInfo& inputInfo = GetTensorInfo(inputs[0]);
+ const TensorInfo& outputInfo = GetTensorInfo(outputs[0]);
const TensorShape& inShape = inputInfo.GetShape();
const TensorShape& outShape = outputInfo.GetShape();
- m_Input->Reset(m_Data.m_Inputs[0]->Map());
- m_Output->Reset(m_Data.m_Outputs[0]->Map());
+ std::unique_ptr<Decoder<InType>> input = MakeDecoder<InType>(inputInfo, inputs[0]->Map());
+ std::unique_ptr<Encoder<OutType>> output= MakeEncoder<OutType>(outputInfo, outputs[0]->Map());
using AbsFunction = ElementwiseUnaryFunction<abs<InType>>;
using ExpFunction = ElementwiseUnaryFunction<exp<InType>>;
@@ -61,27 +62,27 @@ void RefElementwiseUnaryWorkload::Execute() const
{
case UnaryOperation::Abs:
{
- AbsFunction(inShape, outShape, *m_Input, *m_Output);
+ AbsFunction(inShape, outShape, *input, *output);
break;
}
case UnaryOperation::Exp:
{
- ExpFunction(inShape, outShape, *m_Input, *m_Output);
+ ExpFunction(inShape, outShape, *input, *output);
break;
}
case UnaryOperation::Neg:
{
- NegFunction(inShape, outShape, *m_Input, *m_Output);
+ NegFunction(inShape, outShape, *input, *output);
break;
}
case UnaryOperation::Rsqrt:
{
- RsqrtFunction(inShape, outShape, *m_Input, *m_Output);
+ RsqrtFunction(inShape, outShape, *input, *output);
break;
}
case UnaryOperation::Sqrt:
{
- SqrtFunction(inShape, outShape, *m_Input, *m_Output);
+ SqrtFunction(inShape, outShape, *input, *output);
break;
}
default:
diff --git a/src/backends/reference/workloads/RefElementwiseUnaryWorkload.hpp b/src/backends/reference/workloads/RefElementwiseUnaryWorkload.hpp
index efb2865ebd..d05347bbe5 100644
--- a/src/backends/reference/workloads/RefElementwiseUnaryWorkload.hpp
+++ b/src/backends/reference/workloads/RefElementwiseUnaryWorkload.hpp
@@ -19,15 +19,13 @@ public:
using BaseWorkload<ElementwiseUnaryQueueDescriptor>::m_Data;
RefElementwiseUnaryWorkload(const ElementwiseUnaryQueueDescriptor& descriptor, const WorkloadInfo& info);
- void PostAllocationConfigure() override;
void Execute() const override;
+ void ExecuteAsync(WorkingMemDescriptor& workingMemDescriptor) override;
private:
+ void Execute(std::vector<ITensorHandle*> inputs, std::vector<ITensorHandle*> outputs) const;
using InType = float;
using OutType = float;
-
- std::unique_ptr<Decoder<InType>> m_Input;
- std::unique_ptr<Encoder<OutType>> m_Output;
};
} // namespace armnn
diff --git a/src/backends/reference/workloads/RefElementwiseWorkload.cpp b/src/backends/reference/workloads/RefElementwiseWorkload.cpp
index 60acbd6252..dd7d325ca5 100644
--- a/src/backends/reference/workloads/RefElementwiseWorkload.cpp
+++ b/src/backends/reference/workloads/RefElementwiseWorkload.cpp
@@ -26,39 +26,41 @@ RefElementwiseWorkload<Functor, ParentDescriptor, DebugString>::RefElementwiseWo
}
template <typename Functor, typename ParentDescriptor, typename armnn::StringMapping::Id DebugString>
-void RefElementwiseWorkload<Functor, ParentDescriptor, DebugString>::PostAllocationConfigure()
+void RefElementwiseWorkload<Functor, ParentDescriptor, DebugString>::Execute() const
{
- const TensorInfo& inputInfo0 = GetTensorInfo(m_Data.m_Inputs[0]);
- const TensorInfo& inputInfo1 = GetTensorInfo(m_Data.m_Inputs[1]);
- const TensorInfo& outputInfo = GetTensorInfo(m_Data.m_Outputs[0]);
+ Execute(m_Data.m_Inputs, m_Data.m_Outputs);
+}
- m_Input0 = MakeDecoder<InType>(inputInfo0);
- m_Input1 = MakeDecoder<InType>(inputInfo1);
- m_Output = MakeEncoder<OutType>(outputInfo);
+template <typename Functor, typename ParentDescriptor, typename armnn::StringMapping::Id DebugString>
+void RefElementwiseWorkload<Functor, ParentDescriptor, DebugString>::ExecuteAsync(
+ WorkingMemDescriptor &workingMemDescriptor)
+{
+ Execute(workingMemDescriptor.m_Inputs, workingMemDescriptor.m_Outputs);
}
template <typename Functor, typename ParentDescriptor, typename armnn::StringMapping::Id DebugString>
-void RefElementwiseWorkload<Functor, ParentDescriptor, DebugString>::Execute() const
+void RefElementwiseWorkload<Functor, ParentDescriptor, DebugString>::Execute(
+ std::vector<ITensorHandle*> inputs, std::vector<ITensorHandle*> outputs) const
{
ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, StringMapping::Instance().Get(DebugString));
- const TensorInfo& inputInfo0 = GetTensorInfo(m_Data.m_Inputs[0]);
- const TensorInfo& inputInfo1 = GetTensorInfo(m_Data.m_Inputs[1]);
- const TensorInfo& outputInfo = GetTensorInfo(m_Data.m_Outputs[0]);
+ const TensorInfo& inputInfo0 = GetTensorInfo(inputs[0]);
+ const TensorInfo& inputInfo1 = GetTensorInfo(inputs[1]);
+ const TensorInfo& outputInfo = GetTensorInfo(outputs[0]);
const TensorShape& inShape0 = inputInfo0.GetShape();
const TensorShape& inShape1 = inputInfo1.GetShape();
const TensorShape& outShape = outputInfo.GetShape();
- m_Input0->Reset(m_Data.m_Inputs[0]->Map());
- m_Input1->Reset(m_Data.m_Inputs[1]->Map());
- m_Output->Reset(m_Data.m_Outputs[0]->Map());
+ std::unique_ptr<Decoder<InType>> input0 = MakeDecoder<InType>(inputInfo0, inputs[0]->Map());
+ std::unique_ptr<Decoder<InType>> input1 = MakeDecoder<InType>(inputInfo1, inputs[1]->Map());
+ std::unique_ptr<Encoder<OutType>> output= MakeEncoder<OutType>(outputInfo, outputs[0]->Map());
ElementwiseBinaryFunction<Functor>(inShape0,
inShape1,
outShape,
- *m_Input0,
- *m_Input1,
- *m_Output);
+ *input0,
+ *input1,
+ *output);
}
} //namespace armnn
diff --git a/src/backends/reference/workloads/RefElementwiseWorkload.hpp b/src/backends/reference/workloads/RefElementwiseWorkload.hpp
index 03683b1a06..4dc4b5ba5a 100644
--- a/src/backends/reference/workloads/RefElementwiseWorkload.hpp
+++ b/src/backends/reference/workloads/RefElementwiseWorkload.hpp
@@ -26,13 +26,11 @@ public:
using BaseWorkload<ParentDescriptor>::m_Data;
RefElementwiseWorkload(const ParentDescriptor& descriptor, const WorkloadInfo& info);
- void PostAllocationConfigure() override;
void Execute() const override;
+ void ExecuteAsync(WorkingMemDescriptor& workingMemDescriptor) override;
private:
- std::unique_ptr<Decoder<InType>> m_Input0;
- std::unique_ptr<Decoder<InType>> m_Input1;
- std::unique_ptr<Encoder<OutType>> m_Output;
+ void Execute(std::vector<ITensorHandle*> inputs, std::vector<ITensorHandle*> outputs) const;
};
template <typename DataType = float>
diff --git a/src/backends/reference/workloads/RefFakeQuantizationFloat32Workload.cpp b/src/backends/reference/workloads/RefFakeQuantizationFloat32Workload.cpp
index cf355d35d2..b30811b8ed 100644
--- a/src/backends/reference/workloads/RefFakeQuantizationFloat32Workload.cpp
+++ b/src/backends/reference/workloads/RefFakeQuantizationFloat32Workload.cpp
@@ -28,12 +28,23 @@ void FakeQuantization(const float* inputData, float* outputData, uint32_t numEle
void RefFakeQuantizationFloat32Workload::Execute() const
{
+ Execute(m_Data.m_Inputs, m_Data.m_Outputs);
+}
+
+void RefFakeQuantizationFloat32Workload::ExecuteAsync(WorkingMemDescriptor &workingMemDescriptor)
+{
+ Execute(workingMemDescriptor.m_Inputs, workingMemDescriptor.m_Outputs);
+}
+
+void RefFakeQuantizationFloat32Workload::Execute(std::vector<ITensorHandle*> inputs,
+ std::vector<ITensorHandle*> outputs) const
+{
ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefFakeQuantizationFloat32Workload_Execute");
- const TensorInfo& inputInfo = GetTensorInfo(m_Data.m_Inputs[0]);
+ const TensorInfo& inputInfo = GetTensorInfo(inputs[0]);
- const float* inputData = GetInputTensorDataFloat(0, m_Data);
- float* outputData = GetOutputTensorDataFloat(0, m_Data);
+ const float* inputData = reinterpret_cast<const float*>(inputs[0]->Map());
+ float* outputData = reinterpret_cast<float*>(outputs[0]->Map());
FakeQuantization(inputData, outputData, inputInfo.GetNumElements(),
m_Data.m_Parameters.m_Min,
m_Data.m_Parameters.m_Max);
diff --git a/src/backends/reference/workloads/RefFakeQuantizationFloat32Workload.hpp b/src/backends/reference/workloads/RefFakeQuantizationFloat32Workload.hpp
index 269ca08d2a..8f6cabb3fe 100644
--- a/src/backends/reference/workloads/RefFakeQuantizationFloat32Workload.hpp
+++ b/src/backends/reference/workloads/RefFakeQuantizationFloat32Workload.hpp
@@ -15,7 +15,10 @@ class RefFakeQuantizationFloat32Workload : public Float32Workload<FakeQuantizati
{
public:
using Float32Workload<FakeQuantizationQueueDescriptor>::Float32Workload;
- virtual void Execute() const override;
+ void Execute() const override;
+ void ExecuteAsync(WorkingMemDescriptor& workingMemDescriptor) override;
+private:
+ void Execute(std::vector<ITensorHandle*> inputs, std::vector<ITensorHandle*> outputs) const;
};
} //namespace armnn
diff --git a/src/backends/reference/workloads/RefFillWorkload.cpp b/src/backends/reference/workloads/RefFillWorkload.cpp
index 991ab45396..ea1ca87caf 100644
--- a/src/backends/reference/workloads/RefFillWorkload.cpp
+++ b/src/backends/reference/workloads/RefFillWorkload.cpp
@@ -16,11 +16,21 @@ namespace armnn
void RefFillWorkload::Execute() const
{
+ Execute(m_Data.m_Outputs);
+}
+
+void RefFillWorkload::ExecuteAsync(WorkingMemDescriptor &workingMemDescriptor)
+{
+ Execute(workingMemDescriptor.m_Outputs);
+}
+
+void RefFillWorkload::Execute(std::vector<ITensorHandle*> outputs) const
+{
ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefFillWorkload_Execute");
- const TensorInfo &outputTensorInfo = GetTensorInfo(m_Data.m_Outputs[0]);
+ const TensorInfo &outputTensorInfo = GetTensorInfo(outputs[0]);
- std::unique_ptr<Encoder<float>> encoderPtr = MakeEncoder<float>(outputTensorInfo, m_Data.m_Outputs[0]->Map());
+ std::unique_ptr<Encoder<float>> encoderPtr = MakeEncoder<float>(outputTensorInfo, outputs[0]->Map());
Encoder<float> &encoder = *encoderPtr;
Fill(encoder, outputTensorInfo.GetShape(), m_Data.m_Parameters.m_Value);
diff --git a/src/backends/reference/workloads/RefFillWorkload.hpp b/src/backends/reference/workloads/RefFillWorkload.hpp
index 9be773c50b..e92514d865 100644
--- a/src/backends/reference/workloads/RefFillWorkload.hpp
+++ b/src/backends/reference/workloads/RefFillWorkload.hpp
@@ -15,7 +15,10 @@ class RefFillWorkload : public BaseWorkload<FillQueueDescriptor>
{
public:
using BaseWorkload<FillQueueDescriptor>::BaseWorkload;
- virtual void Execute() const override;
+ void Execute() const override;
+ void ExecuteAsync(WorkingMemDescriptor& workingMemDescriptor) override;
+private:
+ void Execute(std::vector<ITensorHandle*> outputs) const;
};
} //namespace armnn
diff --git a/src/backends/reference/workloads/RefFloorWorkload.cpp b/src/backends/reference/workloads/RefFloorWorkload.cpp
index 0c61386b9a..e7bd50ddea 100644
--- a/src/backends/reference/workloads/RefFloorWorkload.cpp
+++ b/src/backends/reference/workloads/RefFloorWorkload.cpp
@@ -15,17 +15,27 @@ namespace armnn
void RefFloorWorkload::Execute() const
{
+ Execute(m_Data.m_Inputs, m_Data.m_Outputs);
+}
+
+void RefFloorWorkload::ExecuteAsync(WorkingMemDescriptor &workingMemDescriptor)
+{
+ Execute(workingMemDescriptor.m_Inputs, workingMemDescriptor.m_Outputs);
+}
+
+void RefFloorWorkload::Execute(std::vector<ITensorHandle*> inputs, std::vector<ITensorHandle*> outputs) const
+{
ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefFloorFloat32Workload_Execute");
- const TensorInfo &inputTensorInfo = GetTensorInfo(m_Data.m_Inputs[0]);
- std::unique_ptr<Decoder<float>> decoderPtr = MakeDecoder<float>(inputTensorInfo, m_Data.m_Inputs[0]->Map());
+ const TensorInfo &inputTensorInfo = GetTensorInfo(inputs[0]);
+ std::unique_ptr<Decoder<float>> decoderPtr = MakeDecoder<float>(inputTensorInfo, inputs[0]->Map());
Decoder<float> &decoder = *decoderPtr;
- const TensorInfo &outputTensorInfo = GetTensorInfo(m_Data.m_Outputs[0]);
- std::unique_ptr<Encoder<float>> encoderPtr = MakeEncoder<float>(outputTensorInfo, m_Data.m_Outputs[0]->Map());
+ const TensorInfo &outputTensorInfo = GetTensorInfo(outputs[0]);
+ std::unique_ptr<Encoder<float>> encoderPtr = MakeEncoder<float>(outputTensorInfo, outputs[0]->Map());
Encoder<float> &encoder = *encoderPtr;
- unsigned int numElements = GetTensorInfo(m_Data.m_Inputs[0]).GetNumElements();
+ unsigned int numElements = GetTensorInfo(inputs[0]).GetNumElements();
for (unsigned int i = 0; i < numElements; ++i)
{
diff --git a/src/backends/reference/workloads/RefFloorWorkload.hpp b/src/backends/reference/workloads/RefFloorWorkload.hpp
index 563640228d..28b2695c82 100644
--- a/src/backends/reference/workloads/RefFloorWorkload.hpp
+++ b/src/backends/reference/workloads/RefFloorWorkload.hpp
@@ -15,7 +15,10 @@ class RefFloorWorkload : public BaseWorkload<FloorQueueDescriptor>
{
public:
using BaseWorkload<FloorQueueDescriptor>::BaseWorkload;
- virtual void Execute() const override;
+ void Execute() const override;
+ void ExecuteAsync(WorkingMemDescriptor& workingMemDescriptor) override;
+private:
+ void Execute(std::vector<ITensorHandle*> inputs, std::vector<ITensorHandle*> outputs) const;
};
} //namespace armnn
diff --git a/src/backends/reference/workloads/RefFullyConnectedWorkload.cpp b/src/backends/reference/workloads/RefFullyConnectedWorkload.cpp
index 49e105f206..deb56d4c6b 100644
--- a/src/backends/reference/workloads/RefFullyConnectedWorkload.cpp
+++ b/src/backends/reference/workloads/RefFullyConnectedWorkload.cpp
@@ -34,28 +34,32 @@ RefFullyConnectedWorkload::RefFullyConnectedWorkload(
void RefFullyConnectedWorkload::PostAllocationConfigure()
{
- const TensorInfo& inputInfo = GetTensorInfo(m_Data.m_Inputs[0]);
+ PostAllocationConfigure(m_Data.m_Inputs, m_Data.m_Outputs);
+}
+
+void RefFullyConnectedWorkload::PostAllocationConfigure(std::vector<ITensorHandle*> inputs,
+ std::vector<ITensorHandle*> outputs)
+{
+ const TensorInfo& inputInfo = GetTensorInfo(inputs[0]);
ARMNN_ASSERT(inputInfo.GetNumDimensions() > 1);
m_InputShape = inputInfo.GetShape();
- m_InputDecoder = MakeDecoder<float>(inputInfo);
if (!m_Data.m_Parameters.m_ConstantWeights)
{
- const TensorInfo& rWeightInfo = GetTensorInfo(m_Data.m_Inputs[1]);
+ const TensorInfo& rWeightInfo = GetTensorInfo(inputs[1]);
ARMNN_ASSERT(inputInfo.GetNumDimensions() > 1);
m_WeightShape = rWeightInfo.GetShape();
m_WeightDecoder = MakeDecoder<float>(rWeightInfo);
if (m_Data.m_Parameters.m_BiasEnabled)
{
- const TensorInfo& biasInfo = GetTensorInfo(m_Data.m_Inputs[2]);
+ const TensorInfo& biasInfo = GetTensorInfo(inputs[2]);
m_BiasDecoder = MakeDecoder<float>(biasInfo);
}
}
- const TensorInfo& outputInfo = GetTensorInfo(m_Data.m_Outputs[0]);
+ const TensorInfo& outputInfo = GetTensorInfo(outputs[0]);
m_OutputShape = outputInfo.GetShape();
- m_OutputEncoder = MakeEncoder<float>(outputInfo);
m_NumActivations = 1; // Total number of activations in the input.
for (unsigned int i = 1; i < inputInfo.GetNumDimensions(); i++)
@@ -66,23 +70,36 @@ void RefFullyConnectedWorkload::PostAllocationConfigure()
void RefFullyConnectedWorkload::Execute() const
{
+ Execute(m_Data.m_Inputs, m_Data.m_Outputs);
+}
+
+void RefFullyConnectedWorkload::ExecuteAsync(WorkingMemDescriptor &workingMemDescriptor)
+{
+ PostAllocationConfigure(workingMemDescriptor.m_Inputs, workingMemDescriptor.m_Outputs);
+
+ Execute(workingMemDescriptor.m_Inputs, workingMemDescriptor.m_Outputs);
+}
+
+void RefFullyConnectedWorkload::Execute(std::vector<ITensorHandle*> inputs, std::vector<ITensorHandle*> outputs) const
+{
ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefFullyConnectedWorkload_Execute");
- m_InputDecoder->Reset(m_Data.m_Inputs[0]->Map());
+ std::unique_ptr<Decoder<float>> inputDecoder = MakeDecoder<float>(GetTensorInfo(inputs[0]), inputs[0]->Map());
+ std::unique_ptr<Encoder<float>> OutputEncoder = MakeEncoder<float>(GetTensorInfo(outputs[0]), outputs[0]->Map());
+
if (!m_Data.m_Parameters.m_ConstantWeights)
{
- m_WeightDecoder->Reset(m_Data.m_Inputs[1]->Map());
+ m_WeightDecoder->Reset(inputs[1]->Map());
if (m_Data.m_Parameters.m_BiasEnabled)
{
- m_BiasDecoder->Reset(m_Data.m_Inputs[2]->Map());
+ m_BiasDecoder->Reset(inputs[2]->Map());
}
}
- m_OutputEncoder->Reset(m_Data.m_Outputs[0]->Map());
FullyConnected(m_InputShape,
- *m_InputDecoder,
+ *inputDecoder,
m_OutputShape,
- *m_OutputEncoder,
+ *OutputEncoder,
m_WeightShape,
*m_WeightDecoder,
*m_BiasDecoder,
diff --git a/src/backends/reference/workloads/RefFullyConnectedWorkload.hpp b/src/backends/reference/workloads/RefFullyConnectedWorkload.hpp
index a8f0756223..5c0f67ebaf 100644
--- a/src/backends/reference/workloads/RefFullyConnectedWorkload.hpp
+++ b/src/backends/reference/workloads/RefFullyConnectedWorkload.hpp
@@ -23,14 +23,15 @@ public:
void PostAllocationConfigure() override;
- virtual void Execute() const override;
+ void Execute() const override;
+ void ExecuteAsync(WorkingMemDescriptor& workingMemDescriptor) override;
private:
+ void PostAllocationConfigure(std::vector<ITensorHandle*> inputs, std::vector<ITensorHandle*> outputs);
+ void Execute(std::vector<ITensorHandle*> inputs, std::vector<ITensorHandle*> outputs) const;
std::unique_ptr<ScopedCpuTensorHandle> m_Weight;
std::unique_ptr<ScopedCpuTensorHandle> m_Bias;
- std::unique_ptr<Decoder<float>> m_InputDecoder;
- std::unique_ptr<Encoder<float>> m_OutputEncoder;
std::unique_ptr<Decoder<float>> m_WeightDecoder;
std::unique_ptr<Decoder<float>> m_BiasDecoder;
diff --git a/src/backends/reference/workloads/RefGatherWorkload.cpp b/src/backends/reference/workloads/RefGatherWorkload.cpp
index eaeed61b0a..020c067cfb 100644
--- a/src/backends/reference/workloads/RefGatherWorkload.cpp
+++ b/src/backends/reference/workloads/RefGatherWorkload.cpp
@@ -15,18 +15,28 @@ namespace armnn
void RefGatherWorkload::Execute() const
{
+ Execute(m_Data.m_Inputs, m_Data.m_Outputs);
+}
+
+void RefGatherWorkload::ExecuteAsync(WorkingMemDescriptor &workingMemDescriptor)
+{
+ Execute(workingMemDescriptor.m_Inputs, workingMemDescriptor.m_Outputs);
+}
+
+void RefGatherWorkload::Execute(std::vector<ITensorHandle*> inputs, std::vector<ITensorHandle*> outputs) const
+{
ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefGatherWorkload_Execute");
- const TensorInfo& inputInfo0 = GetTensorInfo(m_Data.m_Inputs[0]);
- const TensorInfo& inputInfo1 = GetTensorInfo(m_Data.m_Inputs[1]);
- const TensorInfo& outputInfo = GetTensorInfo(m_Data.m_Outputs[0]);
+ const TensorInfo& inputInfo0 = GetTensorInfo(inputs[0]);
+ const TensorInfo& inputInfo1 = GetTensorInfo(inputs[1]);
+ const TensorInfo& outputInfo = GetTensorInfo(outputs[0]);
- std::unique_ptr<Decoder<float>> decoderPtr = MakeDecoder<float>(inputInfo0, m_Data.m_Inputs[0]->Map());
+ std::unique_ptr<Decoder<float>> decoderPtr = MakeDecoder<float>(inputInfo0, inputs[0]->Map());
Decoder<float>& decoder = *decoderPtr;
const int32_t* indicesData = GetInputTensorData<int32_t>(1, m_Data);
- std::unique_ptr<Encoder<float>> encoderPtr = MakeEncoder<float>(outputInfo, m_Data.m_Outputs[0]->Map());
+ std::unique_ptr<Encoder<float>> encoderPtr = MakeEncoder<float>(outputInfo, outputs[0]->Map());
Encoder<float>& encoder = *encoderPtr;
Gather(inputInfo0, inputInfo1, outputInfo, decoder, indicesData, encoder, m_Data.m_Parameters.m_Axis);
diff --git a/src/backends/reference/workloads/RefGatherWorkload.hpp b/src/backends/reference/workloads/RefGatherWorkload.hpp
index 30019a8d4d..1664e1611d 100644
--- a/src/backends/reference/workloads/RefGatherWorkload.hpp
+++ b/src/backends/reference/workloads/RefGatherWorkload.hpp
@@ -21,6 +21,9 @@ class RefGatherWorkload : public BaseWorkload<GatherQueueDescriptor>
public:
using BaseWorkload<GatherQueueDescriptor>::BaseWorkload;
void Execute() const override;
+ void ExecuteAsync(WorkingMemDescriptor& workingMemDescriptor) override;
+private:
+ void Execute(std::vector<ITensorHandle*> inputs, std::vector<ITensorHandle*> outputs) const;
};
} // namespace armnn
diff --git a/src/backends/reference/workloads/RefInstanceNormalizationWorkload.cpp b/src/backends/reference/workloads/RefInstanceNormalizationWorkload.cpp
index 150f0cb017..daee97ae3e 100644
--- a/src/backends/reference/workloads/RefInstanceNormalizationWorkload.cpp
+++ b/src/backends/reference/workloads/RefInstanceNormalizationWorkload.cpp
@@ -20,12 +20,23 @@ RefInstanceNormalizationWorkload::RefInstanceNormalizationWorkload(
void RefInstanceNormalizationWorkload::Execute() const
{
+ Execute(m_Data.m_Inputs, m_Data.m_Outputs);
+}
+
+void RefInstanceNormalizationWorkload::ExecuteAsync(WorkingMemDescriptor &workingMemDescriptor)
+{
+ Execute(workingMemDescriptor.m_Inputs, workingMemDescriptor.m_Outputs);
+}
+
+void RefInstanceNormalizationWorkload::Execute(std::vector<ITensorHandle*> inputs,
+ std::vector<ITensorHandle*> outputs) const
+{
ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefInstanceNormalizationWorkload_Execute");
- std::unique_ptr<Decoder<float>> inputDecoder = MakeDecoder<float>(GetTensorInfo(m_Data.m_Inputs[0]),
- m_Data.m_Inputs[0]->Map());
- std::unique_ptr<Encoder<float>> outputEncoder = MakeEncoder<float>(GetTensorInfo(m_Data.m_Outputs[0]),
- m_Data.m_Outputs[0]->Map());
+ std::unique_ptr<Decoder<float>> inputDecoder = MakeDecoder<float>(GetTensorInfo(inputs[0]),
+ inputs[0]->Map());
+ std::unique_ptr<Encoder<float>> outputEncoder = MakeEncoder<float>(GetTensorInfo(outputs[0]),
+ outputs[0]->Map());
InstanceNorm(m_Data, *inputDecoder, *outputEncoder);
}
diff --git a/src/backends/reference/workloads/RefInstanceNormalizationWorkload.hpp b/src/backends/reference/workloads/RefInstanceNormalizationWorkload.hpp
index 620779f953..e366ddb05b 100644
--- a/src/backends/reference/workloads/RefInstanceNormalizationWorkload.hpp
+++ b/src/backends/reference/workloads/RefInstanceNormalizationWorkload.hpp
@@ -16,7 +16,10 @@ class RefInstanceNormalizationWorkload : public BaseWorkload<InstanceNormalizati
public:
explicit RefInstanceNormalizationWorkload(const InstanceNormalizationQueueDescriptor& descriptor,
const WorkloadInfo& info);
- virtual void Execute() const override;
+ void Execute() const override;
+ void ExecuteAsync(WorkingMemDescriptor& workingMemDescriptor) override;
+private:
+ void Execute(std::vector<ITensorHandle*> inputs, std::vector<ITensorHandle*> outputs) const;
};
} //namespace armnn
diff --git a/src/backends/reference/workloads/RefL2NormalizationWorkload.cpp b/src/backends/reference/workloads/RefL2NormalizationWorkload.cpp
index f80901edc9..ca31503620 100644
--- a/src/backends/reference/workloads/RefL2NormalizationWorkload.cpp
+++ b/src/backends/reference/workloads/RefL2NormalizationWorkload.cpp
@@ -26,13 +26,23 @@ RefL2NormalizationWorkload::RefL2NormalizationWorkload(
void RefL2NormalizationWorkload::Execute() const
{
+ Execute(m_Data.m_Inputs, m_Data.m_Outputs);
+}
+
+void RefL2NormalizationWorkload::ExecuteAsync(WorkingMemDescriptor &workingMemDescriptor)
+{
+ Execute(workingMemDescriptor.m_Inputs, workingMemDescriptor.m_Outputs);
+}
+
+void RefL2NormalizationWorkload::Execute(std::vector<ITensorHandle*> inputs, std::vector<ITensorHandle*> outputs) const
+{
ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefL2NormalizationWorkload_Execute");
- const TensorInfo& inputInfo = GetTensorInfo(m_Data.m_Inputs[0]);
- const TensorInfo& outputInfo = GetTensorInfo(m_Data.m_Outputs[0]);
+ const TensorInfo& inputInfo = GetTensorInfo(inputs[0]);
+ const TensorInfo& outputInfo = GetTensorInfo(outputs[0]);
- auto inputDecoder = MakeDecoder<float>(inputInfo, m_Data.m_Inputs[0]->Map());
- auto outputEncoder = MakeEncoder<float>(outputInfo, m_Data.m_Outputs[0]->Map());
+ auto inputDecoder = MakeDecoder<float>(inputInfo, inputs[0]->Map());
+ auto outputEncoder = MakeEncoder<float>(outputInfo, outputs[0]->Map());
DataLayoutIndexed dataLayout(m_Data.m_Parameters.m_DataLayout);
diff --git a/src/backends/reference/workloads/RefL2NormalizationWorkload.hpp b/src/backends/reference/workloads/RefL2NormalizationWorkload.hpp
index 4beedc9992..c17767b943 100644
--- a/src/backends/reference/workloads/RefL2NormalizationWorkload.hpp
+++ b/src/backends/reference/workloads/RefL2NormalizationWorkload.hpp
@@ -18,6 +18,9 @@ public:
const WorkloadInfo& info);
void Execute() const override;
+ void ExecuteAsync(WorkingMemDescriptor& workingMemDescriptor) override;
+private:
+ void Execute(std::vector<ITensorHandle*> inputs, std::vector<ITensorHandle*> outputs) const;
};
} //namespace armnn
diff --git a/src/backends/reference/workloads/RefLogSoftmaxWorkload.cpp b/src/backends/reference/workloads/RefLogSoftmaxWorkload.cpp
index a2ace13144..ebe1b1ecfe 100644
--- a/src/backends/reference/workloads/RefLogSoftmaxWorkload.cpp
+++ b/src/backends/reference/workloads/RefLogSoftmaxWorkload.cpp
@@ -19,13 +19,23 @@ namespace armnn
void RefLogSoftmaxWorkload::Execute() const
{
+ Execute(m_Data.m_Inputs, m_Data.m_Outputs);
+}
+
+void RefLogSoftmaxWorkload::ExecuteAsync(WorkingMemDescriptor &workingMemDescriptor)
+{
+ Execute(workingMemDescriptor.m_Inputs, workingMemDescriptor.m_Outputs);
+}
+
+void RefLogSoftmaxWorkload::Execute(std::vector<ITensorHandle*> inputs, std::vector<ITensorHandle*> outputs) const
+{
ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefLogSoftmaxWorkload_Execute");
- const TensorInfo& inputInfo = GetTensorInfo(m_Data.m_Inputs[0]);
- const TensorInfo& outputInfo = GetTensorInfo(m_Data.m_Outputs[0]);
+ const TensorInfo& inputInfo = GetTensorInfo(inputs[0]);
+ const TensorInfo& outputInfo = GetTensorInfo(outputs[0]);
- std::unique_ptr<Decoder<float>> decoder = MakeDecoder<float>(inputInfo, m_Data.m_Inputs[0]->Map());
- std::unique_ptr<Encoder<float>> encoder = MakeEncoder<float>(outputInfo, m_Data.m_Outputs[0]->Map());
+ std::unique_ptr<Decoder<float>> decoder = MakeDecoder<float>(inputInfo, inputs[0]->Map());
+ std::unique_ptr<Encoder<float>> encoder = MakeEncoder<float>(outputInfo, outputs[0]->Map());
ARMNN_ASSERT(decoder != nullptr);
ARMNN_ASSERT(encoder != nullptr);
diff --git a/src/backends/reference/workloads/RefLogSoftmaxWorkload.hpp b/src/backends/reference/workloads/RefLogSoftmaxWorkload.hpp
index f5048d90b3..c5d5d5b0c9 100644
--- a/src/backends/reference/workloads/RefLogSoftmaxWorkload.hpp
+++ b/src/backends/reference/workloads/RefLogSoftmaxWorkload.hpp
@@ -15,7 +15,10 @@ class RefLogSoftmaxWorkload : public BaseWorkload<LogSoftmaxQueueDescriptor>
{
public:
using BaseWorkload<LogSoftmaxQueueDescriptor>::BaseWorkload;
- virtual void Execute() const override;
+ void Execute() const override;
+ void ExecuteAsync(WorkingMemDescriptor& workingMemDescriptor) override;
+private:
+ void Execute(std::vector<ITensorHandle*> inputs, std::vector<ITensorHandle*> outputs) const;
};
} // namespace armnn
diff --git a/src/backends/reference/workloads/RefLogicalBinaryWorkload.cpp b/src/backends/reference/workloads/RefLogicalBinaryWorkload.cpp
index 1b4e8f9aa0..f187e0ca31 100644
--- a/src/backends/reference/workloads/RefLogicalBinaryWorkload.cpp
+++ b/src/backends/reference/workloads/RefLogicalBinaryWorkload.cpp
@@ -22,32 +22,31 @@ RefLogicalBinaryWorkload::RefLogicalBinaryWorkload(const LogicalBinaryQueueDescr
: BaseWorkload<LogicalBinaryQueueDescriptor>(desc, info)
{}
-void RefLogicalBinaryWorkload::PostAllocationConfigure()
+void RefLogicalBinaryWorkload::Execute() const
{
- const TensorInfo& inputInfo0 = GetTensorInfo(m_Data.m_Inputs[0]);
- const TensorInfo& inputInfo1 = GetTensorInfo(m_Data.m_Inputs[1]);
- const TensorInfo& outputInfo = GetTensorInfo(m_Data.m_Outputs[0]);
+ Execute(m_Data.m_Inputs, m_Data.m_Outputs);
+}
- m_Input0 = MakeDecoder<InType>(inputInfo0);
- m_Input1 = MakeDecoder<InType>(inputInfo1);
- m_Output = MakeEncoder<OutType>(outputInfo);
+void RefLogicalBinaryWorkload::ExecuteAsync(WorkingMemDescriptor &workingMemDescriptor)
+{
+ Execute(workingMemDescriptor.m_Inputs, workingMemDescriptor.m_Outputs);
}
-void RefLogicalBinaryWorkload::Execute() const
+void RefLogicalBinaryWorkload::Execute(std::vector<ITensorHandle*> inputs, std::vector<ITensorHandle*> outputs) const
{
ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefLogicalBinaryWorkload_Execute");
- const TensorInfo& inputInfo0 = GetTensorInfo(m_Data.m_Inputs[0]);
- const TensorInfo& inputInfo1 = GetTensorInfo(m_Data.m_Inputs[1]);
- const TensorInfo& outputInfo = GetTensorInfo(m_Data.m_Outputs[0]);
+ const TensorInfo& inputInfo0 = GetTensorInfo(inputs[0]);
+ const TensorInfo& inputInfo1 = GetTensorInfo(inputs[1]);
+ const TensorInfo& outputInfo = GetTensorInfo(outputs[0]);
const TensorShape& inShape0 = inputInfo0.GetShape();
const TensorShape& inShape1 = inputInfo1.GetShape();
const TensorShape& outShape = outputInfo.GetShape();
- m_Input0->Reset(m_Data.m_Inputs[0]->Map());
- m_Input1->Reset(m_Data.m_Inputs[1]->Map());
- m_Output->Reset(m_Data.m_Outputs[0]->Map());
+ std::unique_ptr<Decoder<InType>> input0 = MakeDecoder<InType>(inputInfo0, inputs[0]->Map());
+ std::unique_ptr<Decoder<InType>> input1 = MakeDecoder<InType>(inputInfo1, inputs[1]->Map());
+ std::unique_ptr<Encoder<OutType>> output = MakeEncoder<OutType>(outputInfo, outputs[0]->Map());
using AndFunction = LogicalBinaryFunction<std::logical_and<bool>>;
using OrFunction = LogicalBinaryFunction<std::logical_or<bool>>;
@@ -56,12 +55,12 @@ void RefLogicalBinaryWorkload::Execute() const
{
case LogicalBinaryOperation::LogicalAnd:
{
- AndFunction(inShape0, inShape1, outShape, *m_Input0, *m_Input1, *m_Output);
+ AndFunction(inShape0, inShape1, outShape, *input0, *input1, *output);
break;
}
case LogicalBinaryOperation::LogicalOr:
{
- OrFunction(inShape0, inShape1, outShape, *m_Input0, *m_Input1, *m_Output);
+ OrFunction(inShape0, inShape1, outShape, *input0, *input1, *output);
break;
}
default:
diff --git a/src/backends/reference/workloads/RefLogicalBinaryWorkload.hpp b/src/backends/reference/workloads/RefLogicalBinaryWorkload.hpp
index 4d6baf5fa4..d79a3039c7 100644
--- a/src/backends/reference/workloads/RefLogicalBinaryWorkload.hpp
+++ b/src/backends/reference/workloads/RefLogicalBinaryWorkload.hpp
@@ -19,16 +19,13 @@ public:
using BaseWorkload<LogicalBinaryQueueDescriptor>::m_Data;
RefLogicalBinaryWorkload(const LogicalBinaryQueueDescriptor& descriptor, const WorkloadInfo& info);
- void PostAllocationConfigure() override;
- virtual void Execute() const override;
+ void Execute() const override;
+ void ExecuteAsync(WorkingMemDescriptor& workingMemDescriptor) override;
private:
+ void Execute(std::vector<ITensorHandle*> inputs, std::vector<ITensorHandle*> outputs) const;
using InType = bool;
using OutType = bool;
-
- std::unique_ptr<Decoder<InType>> m_Input0;
- std::unique_ptr<Decoder<InType>> m_Input1;
- std::unique_ptr<Encoder<OutType>> m_Output;
};
} // namespace armnn
diff --git a/src/backends/reference/workloads/RefLogicalUnaryWorkload.cpp b/src/backends/reference/workloads/RefLogicalUnaryWorkload.cpp
index 76eb5ac39f..bef2bdc668 100644
--- a/src/backends/reference/workloads/RefLogicalUnaryWorkload.cpp
+++ b/src/backends/reference/workloads/RefLogicalUnaryWorkload.cpp
@@ -22,27 +22,28 @@ RefLogicalUnaryWorkload::RefLogicalUnaryWorkload(const ElementwiseUnaryQueueDesc
: BaseWorkload<ElementwiseUnaryQueueDescriptor>(desc, info)
{}
-void RefLogicalUnaryWorkload::PostAllocationConfigure()
+void RefLogicalUnaryWorkload::Execute() const
{
- const TensorInfo& inputInfo = GetTensorInfo(m_Data.m_Inputs[0]);
- const TensorInfo& outputInfo = GetTensorInfo(m_Data.m_Outputs[0]);
+ Execute(m_Data.m_Inputs, m_Data.m_Outputs);
+}
- m_Input = MakeDecoder<InType>(inputInfo);
- m_Output = MakeEncoder<OutType>(outputInfo);
+void RefLogicalUnaryWorkload::ExecuteAsync(WorkingMemDescriptor &workingMemDescriptor)
+{
+ Execute(workingMemDescriptor.m_Inputs, workingMemDescriptor.m_Outputs);
}
-void RefLogicalUnaryWorkload::Execute() const
+void RefLogicalUnaryWorkload::Execute(std::vector<ITensorHandle*> inputs, std::vector<ITensorHandle*> outputs) const
{
ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefLogicalUnaryWorkload_Execute");
- const TensorInfo& inputInfo = GetTensorInfo(m_Data.m_Inputs[0]);
- const TensorInfo& outputInfo = GetTensorInfo(m_Data.m_Outputs[0]);
+ const TensorInfo& inputInfo = GetTensorInfo(inputs[0]);
+ const TensorInfo& outputInfo = GetTensorInfo(outputs[0]);
const TensorShape& inShape = inputInfo.GetShape();
const TensorShape& outShape = outputInfo.GetShape();
- m_Input->Reset(m_Data.m_Inputs[0]->Map());
- m_Output->Reset(m_Data.m_Outputs[0]->Map());
+ std::unique_ptr<Decoder<InType>> input = MakeDecoder<InType>(inputInfo, inputs[0]->Map());
+ std::unique_ptr<Encoder<OutType>> output = MakeEncoder<OutType>(outputInfo, outputs[0]->Map());
using NotFunction = LogicalUnaryFunction<std::logical_not<bool>>;
@@ -50,7 +51,7 @@ void RefLogicalUnaryWorkload::Execute() const
{
case UnaryOperation::LogicalNot:
{
- NotFunction(inShape, outShape, *m_Input, *m_Output);
+ NotFunction(inShape, outShape, *input, *output);
break;
}
default:
diff --git a/src/backends/reference/workloads/RefLogicalUnaryWorkload.hpp b/src/backends/reference/workloads/RefLogicalUnaryWorkload.hpp
index 0d8b35495c..117f16836d 100644
--- a/src/backends/reference/workloads/RefLogicalUnaryWorkload.hpp
+++ b/src/backends/reference/workloads/RefLogicalUnaryWorkload.hpp
@@ -19,15 +19,13 @@ public:
using BaseWorkload<ElementwiseUnaryQueueDescriptor>::m_Data;
RefLogicalUnaryWorkload(const ElementwiseUnaryQueueDescriptor& descriptor, const WorkloadInfo& info);
- void PostAllocationConfigure() override;
- virtual void Execute() const override;
+ void Execute() const override;
+ void ExecuteAsync(WorkingMemDescriptor& workingMemDescriptor) override;
private:
+ void Execute(std::vector<ITensorHandle*> inputs, std::vector<ITensorHandle*> outputs) const;
using InType = bool;
using OutType = bool;
-
- std::unique_ptr<Decoder<InType>> m_Input;
- std::unique_ptr<Encoder<OutType>> m_Output;
};
} // namespace armnn
diff --git a/src/backends/reference/workloads/RefLstmWorkload.cpp b/src/backends/reference/workloads/RefLstmWorkload.cpp
index 7c37301d1d..09423547da 100644
--- a/src/backends/reference/workloads/RefLstmWorkload.cpp
+++ b/src/backends/reference/workloads/RefLstmWorkload.cpp
@@ -40,25 +40,35 @@ RefLstmWorkload::RefLstmWorkload(const LstmQueueDescriptor &descriptor, const Wo
void RefLstmWorkload::Execute() const
{
+ Execute(m_Data.m_Inputs, m_Data.m_Outputs);
+}
+
+void RefLstmWorkload::ExecuteAsync(WorkingMemDescriptor &workingMemDescriptor)
+{
+ Execute(workingMemDescriptor.m_Inputs, workingMemDescriptor.m_Outputs);
+}
+
+void RefLstmWorkload::Execute(std::vector<ITensorHandle*> inputs, std::vector<ITensorHandle*> outputs) const
+{
// This is a porting of the LSTM::Eval() method in the Android code base
// Refer to: android/frameworks/ml/nn/common/operations/LSTM.cpp
- const TensorInfo& inputInfo = GetTensorInfo(m_Data.m_Inputs[0]);
- const TensorInfo& outputInfo = GetTensorInfo(m_Data.m_Outputs[0]);
+ const TensorInfo& inputInfo = GetTensorInfo(inputs[0]);
+ const TensorInfo& outputInfo = GetTensorInfo(outputs[0]);
const TensorShape& inputShape = inputInfo.GetShape();
const DataType& outputType = outputInfo.GetDataType();
- std::unique_ptr<Encoder<float>> outputStateOut = MakeEncoder<float>(outputInfo, m_Data.m_Outputs[1]->Map());
- std::unique_ptr<Encoder<float>> cellStateOut = MakeEncoder<float>(outputInfo, m_Data.m_Outputs[2]->Map());
- std::unique_ptr<Encoder<float>> output = MakeEncoder<float>(outputInfo, m_Data.m_Outputs[3]->Map());
+ std::unique_ptr<Encoder<float>> outputStateOut = MakeEncoder<float>(outputInfo, outputs[1]->Map());
+ std::unique_ptr<Encoder<float>> cellStateOut = MakeEncoder<float>(outputInfo, outputs[2]->Map());
+ std::unique_ptr<Encoder<float>> output = MakeEncoder<float>(outputInfo, outputs[3]->Map());
- std::unique_ptr<Decoder<float>> cellStateOutDecoder = MakeDecoder<float>(outputInfo, m_Data.m_Outputs[2]->Map());
- std::unique_ptr<Decoder<float>> outputDecoder = MakeDecoder<float>(outputInfo, m_Data.m_Outputs[3]->Map());
+ std::unique_ptr<Decoder<float>> cellStateOutDecoder = MakeDecoder<float>(outputInfo, outputs[2]->Map());
+ std::unique_ptr<Decoder<float>> outputDecoder = MakeDecoder<float>(outputInfo, outputs[3]->Map());
- std::unique_ptr<Decoder<float>> inputData = MakeDecoder<float>(inputInfo, m_Data.m_Inputs[0]->Map());
- std::unique_ptr<Decoder<float>> outputStateIn = MakeDecoder<float>(inputInfo, m_Data.m_Inputs[1]->Map());
- std::unique_ptr<Decoder<float>> cellStateIn = MakeDecoder<float>(inputInfo, m_Data.m_Inputs[2]->Map());
+ std::unique_ptr<Decoder<float>> inputData = MakeDecoder<float>(inputInfo, inputs[0]->Map());
+ std::unique_ptr<Decoder<float>> outputStateIn = MakeDecoder<float>(inputInfo, inputs[1]->Map());
+ std::unique_ptr<Decoder<float>> cellStateIn = MakeDecoder<float>(inputInfo, inputs[2]->Map());
const uint32_t nBatch = inputShape[0];
const uint32_t nInput = inputShape[1];
@@ -71,19 +81,19 @@ void RefLstmWorkload::Execute() const
const bool useLayerNorm = m_Data.m_Parameters.m_LayerNormEnabled;
// Index the scratch buffers pointers to the global scratch buffer.
- std::unique_ptr<Encoder<float>> inputGateScratch = MakeEncoder<float>(outputInfo, m_Data.m_Outputs[0]->Map());
- std::unique_ptr<Encoder<float>> cellScratch = MakeEncoder<float>(outputInfo, m_Data.m_Outputs[0]->Map());
- std::unique_ptr<Encoder<float>> forgetGateScratch = MakeEncoder<float>(outputInfo, m_Data.m_Outputs[0]->Map());
- std::unique_ptr<Encoder<float>> outputGateScratch = MakeEncoder<float>(outputInfo, m_Data.m_Outputs[0]->Map());
+ std::unique_ptr<Encoder<float>> inputGateScratch = MakeEncoder<float>(outputInfo, outputs[0]->Map());
+ std::unique_ptr<Encoder<float>> cellScratch = MakeEncoder<float>(outputInfo, outputs[0]->Map());
+ std::unique_ptr<Encoder<float>> forgetGateScratch = MakeEncoder<float>(outputInfo, outputs[0]->Map());
+ std::unique_ptr<Encoder<float>> outputGateScratch = MakeEncoder<float>(outputInfo, outputs[0]->Map());
std::unique_ptr<Decoder<float>> inputGateScratchDecoder =
- MakeDecoder<float>(outputInfo, m_Data.m_Outputs[0]->Map());
+ MakeDecoder<float>(outputInfo, outputs[0]->Map());
std::unique_ptr<Decoder<float>> cellScratchDecoder =
- MakeDecoder<float>(outputInfo, m_Data.m_Outputs[0]->Map());
+ MakeDecoder<float>(outputInfo, outputs[0]->Map());
std::unique_ptr<Decoder<float>> forgetGateScratchDecoder =
- MakeDecoder<float>(outputInfo, m_Data.m_Outputs[0]->Map());
+ MakeDecoder<float>(outputInfo, outputs[0]->Map());
std::unique_ptr<Decoder<float>> outputGateScratchDecoder =
- MakeDecoder<float>(outputInfo, m_Data.m_Outputs[0]->Map());
+ MakeDecoder<float>(outputInfo, outputs[0]->Map());
if (useCifg)
{
diff --git a/src/backends/reference/workloads/RefLstmWorkload.hpp b/src/backends/reference/workloads/RefLstmWorkload.hpp
index ce5a775269..b55a1f9a9e 100644
--- a/src/backends/reference/workloads/RefLstmWorkload.hpp
+++ b/src/backends/reference/workloads/RefLstmWorkload.hpp
@@ -18,9 +18,11 @@ class RefLstmWorkload : public BaseWorkload<LstmQueueDescriptor>
public:
explicit RefLstmWorkload(const LstmQueueDescriptor& descriptor, const WorkloadInfo& info);
- virtual void Execute() const override;
+ void Execute() const override;
+ void ExecuteAsync(WorkingMemDescriptor& workingMemDescriptor) override;
private:
+ void Execute(std::vector<ITensorHandle*> inputs, std::vector<ITensorHandle*> outputs) const;
std::unique_ptr<ScopedCpuTensorHandle> m_InputToInputWeightsTensor;
std::unique_ptr<ScopedCpuTensorHandle> m_InputToForgetWeightsTensor;
std::unique_ptr<ScopedCpuTensorHandle> m_InputToCellWeightsTensor;
diff --git a/src/backends/reference/workloads/RefMeanWorkload.cpp b/src/backends/reference/workloads/RefMeanWorkload.cpp
index 00e59bca4c..7941ce2c36 100644
--- a/src/backends/reference/workloads/RefMeanWorkload.cpp
+++ b/src/backends/reference/workloads/RefMeanWorkload.cpp
@@ -20,13 +20,23 @@ RefMeanWorkload::RefMeanWorkload(const MeanQueueDescriptor& descriptor, const Wo
void RefMeanWorkload::Execute() const
{
+ Execute(m_Data.m_Inputs, m_Data.m_Outputs);
+}
+
+void RefMeanWorkload::ExecuteAsync(WorkingMemDescriptor &workingMemDescriptor)
+{
+ Execute(workingMemDescriptor.m_Inputs, workingMemDescriptor.m_Outputs);
+}
+
+void RefMeanWorkload::Execute(std::vector<ITensorHandle*> inputs, std::vector<ITensorHandle*> outputs) const
+{
ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefMeanWorkload_Execute");
- const TensorInfo& inputInfo = GetTensorInfo(m_Data.m_Inputs[0]);
- const TensorInfo& outputInfo = GetTensorInfo(m_Data.m_Outputs[0]);
+ const TensorInfo& inputInfo = GetTensorInfo(inputs[0]);
+ const TensorInfo& outputInfo = GetTensorInfo(outputs[0]);
- auto inputDecoder = MakeDecoder<float>(inputInfo, m_Data.m_Inputs[0]->Map());
- auto outputEncoder = MakeEncoder<float>(outputInfo, m_Data.m_Outputs[0]->Map());
+ auto inputDecoder = MakeDecoder<float>(inputInfo, inputs[0]->Map());
+ auto outputEncoder = MakeEncoder<float>(outputInfo, outputs[0]->Map());
Reduce(inputInfo,
outputInfo,
diff --git a/src/backends/reference/workloads/RefMeanWorkload.hpp b/src/backends/reference/workloads/RefMeanWorkload.hpp
index c673f940e0..b5a9ed812f 100644
--- a/src/backends/reference/workloads/RefMeanWorkload.hpp
+++ b/src/backends/reference/workloads/RefMeanWorkload.hpp
@@ -18,7 +18,10 @@ class RefMeanWorkload : public BaseWorkload<MeanQueueDescriptor>
{
public:
explicit RefMeanWorkload (const MeanQueueDescriptor& descriptor, const WorkloadInfo& info);
- virtual void Execute() const override;
+ void Execute() const override;
+ void ExecuteAsync(WorkingMemDescriptor& workingMemDescriptor) override;
+private:
+ void Execute(std::vector<ITensorHandle*> inputs, std::vector<ITensorHandle*> outputs) const;
};
} //namespace armnn
diff --git a/src/backends/reference/workloads/RefNormalizationWorkload.cpp b/src/backends/reference/workloads/RefNormalizationWorkload.cpp
index d5d2104cba..36828acfb3 100644
--- a/src/backends/reference/workloads/RefNormalizationWorkload.cpp
+++ b/src/backends/reference/workloads/RefNormalizationWorkload.cpp
@@ -163,12 +163,22 @@ RefNormalizationWorkload::RefNormalizationWorkload(const NormalizationQueueDescr
void RefNormalizationWorkload::Execute() const
{
+ Execute(m_Data.m_Inputs, m_Data.m_Outputs);
+}
+
+void RefNormalizationWorkload::ExecuteAsync(WorkingMemDescriptor &workingMemDescriptor)
+{
+ Execute(workingMemDescriptor.m_Inputs, workingMemDescriptor.m_Outputs);
+}
+
+void RefNormalizationWorkload::Execute(std::vector<ITensorHandle*> inputs, std::vector<ITensorHandle*> outputs) const
+{
ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefNormalizationWorkload_Execute");
- const TensorInfo& inputInfo = GetTensorInfo(m_Data.m_Inputs[0]);
+ const TensorInfo& inputInfo = GetTensorInfo(inputs[0]);
- auto inputDecoder = MakeDecoder<float>(inputInfo, m_Data.m_Inputs[0]->Map());
- auto outputEncoder = MakeEncoder<float>(inputInfo, m_Data.m_Outputs[0]->Map());
+ auto inputDecoder = MakeDecoder<float>(inputInfo, inputs[0]->Map());
+ auto outputEncoder = MakeEncoder<float>(inputInfo, outputs[0]->Map());
if (NormalizationAlgorithmMethod::LocalBrightness == m_Data.m_Parameters.m_NormMethodType)
{
diff --git a/src/backends/reference/workloads/RefNormalizationWorkload.hpp b/src/backends/reference/workloads/RefNormalizationWorkload.hpp
index 9d68ffda58..59170b8a80 100644
--- a/src/backends/reference/workloads/RefNormalizationWorkload.hpp
+++ b/src/backends/reference/workloads/RefNormalizationWorkload.hpp
@@ -17,7 +17,10 @@ public:
explicit RefNormalizationWorkload(const NormalizationQueueDescriptor& descriptor,
const WorkloadInfo& info);
- virtual void Execute() const override;
+ void Execute() const override;
+ void ExecuteAsync(WorkingMemDescriptor& workingMemDescriptor) override;
+private:
+ void Execute(std::vector<ITensorHandle*> inputs, std::vector<ITensorHandle*> outputs) const;
};
} // namespace armnn
diff --git a/src/backends/reference/workloads/RefPadWorkload.cpp b/src/backends/reference/workloads/RefPadWorkload.cpp
index af22c31001..ea515cae68 100644
--- a/src/backends/reference/workloads/RefPadWorkload.cpp
+++ b/src/backends/reference/workloads/RefPadWorkload.cpp
@@ -14,10 +14,20 @@ namespace armnn
void RefPadWorkload::Execute() const
{
+ Execute(m_Data.m_Inputs, m_Data.m_Outputs);
+}
+
+void RefPadWorkload::ExecuteAsync(WorkingMemDescriptor &workingMemDescriptor)
+{
+ Execute(workingMemDescriptor.m_Inputs, workingMemDescriptor.m_Outputs);
+}
+
+void RefPadWorkload::Execute(std::vector<ITensorHandle*> inputs, std::vector<ITensorHandle*> outputs) const
+{
ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefPadWorkload_Execute");
- const TensorInfo& inputInfo = GetTensorInfo(m_Data.m_Inputs[0]);
- const TensorInfo& outputInfo = GetTensorInfo(m_Data.m_Outputs[0]);
+ const TensorInfo& inputInfo = GetTensorInfo(inputs[0]);
+ const TensorInfo& outputInfo = GetTensorInfo(outputs[0]);
armnn::Pad(inputInfo,
outputInfo,
diff --git a/src/backends/reference/workloads/RefPadWorkload.hpp b/src/backends/reference/workloads/RefPadWorkload.hpp
index 0b8379a60f..afc620383f 100644
--- a/src/backends/reference/workloads/RefPadWorkload.hpp
+++ b/src/backends/reference/workloads/RefPadWorkload.hpp
@@ -15,7 +15,10 @@ class RefPadWorkload : public BaseWorkload<PadQueueDescriptor>
{
public:
using BaseWorkload<PadQueueDescriptor>::BaseWorkload;
- virtual void Execute() const override;
+ void Execute() const override;
+ void ExecuteAsync(WorkingMemDescriptor& workingMemDescriptor) override;
+private:
+ void Execute(std::vector<ITensorHandle*> inputs, std::vector<ITensorHandle*> outputs) const;
};
} //namespace armnn
diff --git a/src/backends/reference/workloads/RefPermuteWorkload.cpp b/src/backends/reference/workloads/RefPermuteWorkload.cpp
index 1fb1421ed9..f6af208e8a 100644
--- a/src/backends/reference/workloads/RefPermuteWorkload.cpp
+++ b/src/backends/reference/workloads/RefPermuteWorkload.cpp
@@ -16,12 +16,25 @@ namespace armnn
template <armnn::DataType DataType>
void RefPermuteWorkload<DataType>::Execute() const
{
+ Execute(m_Data.m_Inputs, m_Data.m_Outputs);
+}
+
+template <armnn::DataType DataType>
+void RefPermuteWorkload<DataType>::ExecuteAsync(WorkingMemDescriptor &workingMemDescriptor)
+{
+ Execute(workingMemDescriptor.m_Inputs, workingMemDescriptor.m_Outputs);
+}
+
+template <armnn::DataType DataType>
+void RefPermuteWorkload<DataType>::Execute(std::vector<ITensorHandle*> inputs,
+ std::vector<ITensorHandle*> outputs) const
+{
using T = ResolveType<DataType>;
ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, GetName() + "_Execute");
- const ITensorHandle* src = m_Data.m_Inputs[0];
- ITensorHandle* dst = m_Data.m_Outputs[0];
+ const ITensorHandle* src = inputs[0];
+ ITensorHandle* dst = outputs[0];
const PermutationVector& mappings = m_Data.m_Parameters.m_DimMappings;
armnnUtils::Permute(GetTensorInfo(dst).GetShape(), mappings,
diff --git a/src/backends/reference/workloads/RefPermuteWorkload.hpp b/src/backends/reference/workloads/RefPermuteWorkload.hpp
index 62a145617a..94f633423a 100644
--- a/src/backends/reference/workloads/RefPermuteWorkload.hpp
+++ b/src/backends/reference/workloads/RefPermuteWorkload.hpp
@@ -25,6 +25,9 @@ public:
using TypedWorkload<PermuteQueueDescriptor, DataType>::m_Data;
using TypedWorkload<PermuteQueueDescriptor, DataType>::TypedWorkload;
void Execute() const override;
+ void ExecuteAsync(WorkingMemDescriptor& workingMemDescriptor) override;
+private:
+ void Execute(std::vector<ITensorHandle*> inputs, std::vector<ITensorHandle*> outputs) const;
};
using RefPermuteBFloat16Workload = RefPermuteWorkload<DataType::BFloat16>;
diff --git a/src/backends/reference/workloads/RefPooling2dWorkload.cpp b/src/backends/reference/workloads/RefPooling2dWorkload.cpp
index 40b814789c..d337278fe1 100644
--- a/src/backends/reference/workloads/RefPooling2dWorkload.cpp
+++ b/src/backends/reference/workloads/RefPooling2dWorkload.cpp
@@ -15,13 +15,23 @@ namespace armnn
{
void RefPooling2dWorkload::Execute() const
{
+ Execute(m_Data.m_Inputs, m_Data.m_Outputs);
+}
+
+void RefPooling2dWorkload::ExecuteAsync(WorkingMemDescriptor &workingMemDescriptor)
+{
+ Execute(workingMemDescriptor.m_Inputs, workingMemDescriptor.m_Outputs);
+}
+
+void RefPooling2dWorkload::Execute(std::vector<ITensorHandle*> inputs, std::vector<ITensorHandle*> outputs) const
+{
ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefPooling2dWorkload_Execute");
- const TensorInfo& inputInfo = GetTensorInfo(m_Data.m_Inputs[0]);
- const TensorInfo& outputInfo = GetTensorInfo(m_Data.m_Outputs[0]);
+ const TensorInfo& inputInfo = GetTensorInfo(inputs[0]);
+ const TensorInfo& outputInfo = GetTensorInfo(outputs[0]);
- auto inputDecoder = MakeDecoder<float>(inputInfo, m_Data.m_Inputs[0] ->Map());
- auto outputEncoder = MakeEncoder<float>(outputInfo, m_Data.m_Outputs[0]->Map());
+ auto inputDecoder = MakeDecoder<float>(inputInfo, inputs[0] ->Map());
+ auto outputEncoder = MakeEncoder<float>(outputInfo, outputs[0]->Map());
Pooling2d(*inputDecoder,
*outputEncoder,
diff --git a/src/backends/reference/workloads/RefPooling2dWorkload.hpp b/src/backends/reference/workloads/RefPooling2dWorkload.hpp
index 24386b7e8d..3495d6b68d 100644
--- a/src/backends/reference/workloads/RefPooling2dWorkload.hpp
+++ b/src/backends/reference/workloads/RefPooling2dWorkload.hpp
@@ -18,6 +18,9 @@ class RefPooling2dWorkload : public BaseWorkload<Pooling2dQueueDescriptor>
public:
using BaseWorkload<Pooling2dQueueDescriptor>::BaseWorkload;
- virtual void Execute() const override;
+ void Execute() const override;
+ void ExecuteAsync(WorkingMemDescriptor& workingMemDescriptor) override;
+private:
+ void Execute(std::vector<ITensorHandle*> inputs, std::vector<ITensorHandle*> outputs) const;
};
} //namespace armnn
diff --git a/src/backends/reference/workloads/RefPreluWorkload.cpp b/src/backends/reference/workloads/RefPreluWorkload.cpp
index cdc0a63711..b298874334 100644
--- a/src/backends/reference/workloads/RefPreluWorkload.cpp
+++ b/src/backends/reference/workloads/RefPreluWorkload.cpp
@@ -20,14 +20,24 @@ RefPreluWorkload::RefPreluWorkload(const PreluQueueDescriptor& descriptor,
void RefPreluWorkload::Execute() const
{
+ Execute(m_Data.m_Inputs, m_Data.m_Outputs);
+}
+
+void RefPreluWorkload::ExecuteAsync(WorkingMemDescriptor &workingMemDescriptor)
+{
+ Execute(workingMemDescriptor.m_Inputs, workingMemDescriptor.m_Outputs);
+}
+
+void RefPreluWorkload::Execute(std::vector<ITensorHandle*> inputs, std::vector<ITensorHandle*> outputs) const
+{
ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefPreluWorkload_Execute");
- std::unique_ptr<Decoder<float>> inputDecoder = MakeDecoder<float>(GetTensorInfo(m_Data.m_Inputs[0]),
- m_Data.m_Inputs[0]->Map());
- std::unique_ptr<Decoder<float>> alphaDecoder = MakeDecoder<float>(GetTensorInfo(m_Data.m_Inputs[1]),
- m_Data.m_Inputs[1]->Map());
- std::unique_ptr<Encoder<float>> outputEncoder = MakeEncoder<float>(GetTensorInfo(m_Data.m_Outputs[0]),
- m_Data.m_Outputs[0]->Map());
+ std::unique_ptr<Decoder<float>> inputDecoder = MakeDecoder<float>(GetTensorInfo(inputs[0]),
+ inputs[0]->Map());
+ std::unique_ptr<Decoder<float>> alphaDecoder = MakeDecoder<float>(GetTensorInfo(inputs[1]),
+ inputs[1]->Map());
+ std::unique_ptr<Encoder<float>> outputEncoder = MakeEncoder<float>(GetTensorInfo(outputs[0]),
+ outputs[0]->Map());
PreluImpl(m_Data, *inputDecoder, *alphaDecoder, *outputEncoder);
}
diff --git a/src/backends/reference/workloads/RefPreluWorkload.hpp b/src/backends/reference/workloads/RefPreluWorkload.hpp
index 72839e67dc..4fe5704711 100644
--- a/src/backends/reference/workloads/RefPreluWorkload.hpp
+++ b/src/backends/reference/workloads/RefPreluWorkload.hpp
@@ -16,7 +16,10 @@ class RefPreluWorkload : public BaseWorkload<PreluQueueDescriptor>
public:
explicit RefPreluWorkload(const PreluQueueDescriptor& descriptor,
const WorkloadInfo& info);
- virtual void Execute() const override;
+ void Execute() const override;
+ void ExecuteAsync(WorkingMemDescriptor& workingMemDescriptor) override;
+private:
+ void Execute(std::vector<ITensorHandle*> inputs, std::vector<ITensorHandle*> outputs) const;
};
} // namespace armnn
diff --git a/src/backends/reference/workloads/RefQLstmWorkload.cpp b/src/backends/reference/workloads/RefQLstmWorkload.cpp
index bcd6a627de..7b7961c5a0 100644
--- a/src/backends/reference/workloads/RefQLstmWorkload.cpp
+++ b/src/backends/reference/workloads/RefQLstmWorkload.cpp
@@ -45,19 +45,30 @@ RefQLstmWorkload::RefQLstmWorkload(const QLstmQueueDescriptor &descriptor, const
void RefQLstmWorkload::Execute() const
{
- // This is a porting of the QLSTM::Execute() method in the Android code base
+ Execute(m_Data.m_Inputs, m_Data.m_Outputs);
+}
+
+void RefQLstmWorkload::ExecuteAsync(WorkingMemDescriptor &workingMemDescriptor)
+{
+ Execute(workingMemDescriptor.m_Inputs, workingMemDescriptor.m_Outputs);
+}
+
+void RefQLstmWorkload::Execute(std::vector<ITensorHandle*> inputs, std::vector<ITensorHandle*> outputs) const
+{
+ // This is a porting of the QLSTM::Execute(std::vector<ITensorHandle*> inputs, std::vector<ITensorHandle*> outputs)
+ // method in the Android code base
// Note: this implementation wraps the arithmetic functions of the LSTM cell in Quantize/Dequantize ops, so all
// computation is done in the floating point domain. Arithmetic functions are found in LstmUtils.cpp.
// Refer to: android/frameworks/ml/nn/common/operations/QLSTM.cpp
const DataType& internalType = armnn::DataType::QSymmS16;
- const TensorInfo& inputInfo = GetTensorInfo(m_Data.m_Inputs[0]);
- const TensorInfo& outputStateInInfo = GetTensorInfo(m_Data.m_Inputs[1]);
- const TensorInfo& cellStateInInfo = GetTensorInfo(m_Data.m_Inputs[2]);
+ const TensorInfo& inputInfo = GetTensorInfo(inputs[0]);
+ const TensorInfo& outputStateInInfo = GetTensorInfo(inputs[1]);
+ const TensorInfo& cellStateInInfo = GetTensorInfo(inputs[2]);
- const TensorInfo& outputStateOutInfo = GetTensorInfo(m_Data.m_Outputs[0]);
- const TensorInfo& cellStateOutInfo = GetTensorInfo(m_Data.m_Outputs[1]);
- const TensorInfo& outputInfo = GetTensorInfo(m_Data.m_Outputs[2]);
+ const TensorInfo& outputStateOutInfo = GetTensorInfo(outputs[0]);
+ const TensorInfo& cellStateOutInfo = GetTensorInfo(outputs[1]);
+ const TensorInfo& outputInfo = GetTensorInfo(outputs[2]);
const TensorShape& inputShape = inputInfo.GetShape();
const TensorShape& outputStateInShape = outputStateInInfo.GetShape();
@@ -77,27 +88,27 @@ void RefQLstmWorkload::Execute() const
// Input decoders
std::unique_ptr<Decoder<float>> inputDecoder =
- MakeDecoder<float>(inputInfo, m_Data.m_Inputs[0]->Map());
+ MakeDecoder<float>(inputInfo, inputs[0]->Map());
std::unique_ptr<Decoder<float>> outputStateInDecoder =
- MakeDecoder<float>(outputStateInInfo, m_Data.m_Inputs[1]->Map());
+ MakeDecoder<float>(outputStateInInfo, inputs[1]->Map());
std::unique_ptr<Decoder<float>> cellStateInDecoder =
- MakeDecoder<float>(cellStateInInfo, m_Data.m_Inputs[2]->Map());
+ MakeDecoder<float>(cellStateInInfo, inputs[2]->Map());
// Output decoders
std::unique_ptr<Decoder<float>> outputStateOutDecoder =
- MakeDecoder<float>(outputStateOutInfo, m_Data.m_Outputs[0]->Map());
+ MakeDecoder<float>(outputStateOutInfo, outputs[0]->Map());
std::unique_ptr<Decoder<float>> cellStateOutDecoder =
- MakeDecoder<float>(cellStateOutInfo, m_Data.m_Outputs[1]->Map());
+ MakeDecoder<float>(cellStateOutInfo, outputs[1]->Map());
std::unique_ptr<Decoder<float>> outputDecoder =
- MakeDecoder<float>(outputInfo, m_Data.m_Outputs[2]->Map());
+ MakeDecoder<float>(outputInfo, outputs[2]->Map());
// Output encoders
std::unique_ptr<Encoder<float>> outputStateOutEncoder =
- MakeEncoder<float>(outputStateOutInfo, m_Data.m_Outputs[0]->Map());
+ MakeEncoder<float>(outputStateOutInfo, outputs[0]->Map());
std::unique_ptr<Encoder<float>> cellStateOutEncoder =
- MakeEncoder<float>(cellStateOutInfo, m_Data.m_Outputs[1]->Map());
+ MakeEncoder<float>(cellStateOutInfo, outputs[1]->Map());
std::unique_ptr<Encoder<float>> outputEncoder =
- MakeEncoder<float>(outputInfo, m_Data.m_Outputs[2]->Map());
+ MakeEncoder<float>(outputInfo, outputs[2]->Map());
// Weights decoders
std::unique_ptr<Decoder<float>> inputToForgetWeightsDecoder = MakeDecoder<float>(
diff --git a/src/backends/reference/workloads/RefQLstmWorkload.hpp b/src/backends/reference/workloads/RefQLstmWorkload.hpp
index 19d3a2af0f..f4242ec8a4 100644
--- a/src/backends/reference/workloads/RefQLstmWorkload.hpp
+++ b/src/backends/reference/workloads/RefQLstmWorkload.hpp
@@ -18,9 +18,11 @@ class RefQLstmWorkload : public BaseWorkload<QLstmQueueDescriptor>
public:
explicit RefQLstmWorkload(const QLstmQueueDescriptor& descriptor, const WorkloadInfo& info);
- virtual void Execute() const override;
+ void Execute() const override;
+ void ExecuteAsync(WorkingMemDescriptor& workingMemDescriptor) override;
private:
+ void Execute(std::vector<ITensorHandle*> inputs, std::vector<ITensorHandle*> outputs) const;
std::unique_ptr<ScopedCpuTensorHandle> m_InputToInputWeightsTensor;
std::unique_ptr<ScopedCpuTensorHandle> m_InputToForgetWeightsTensor;
std::unique_ptr<ScopedCpuTensorHandle> m_InputToCellWeightsTensor;
diff --git a/src/backends/reference/workloads/RefQuantizeWorkload.cpp b/src/backends/reference/workloads/RefQuantizeWorkload.cpp
index 2eef5f33db..35791e65fb 100644
--- a/src/backends/reference/workloads/RefQuantizeWorkload.cpp
+++ b/src/backends/reference/workloads/RefQuantizeWorkload.cpp
@@ -34,21 +34,22 @@ RefQuantizeWorkload::RefQuantizeWorkload(const QuantizeQueueDescriptor& descript
{
}
-void RefQuantizeWorkload::PostAllocationConfigure()
+void RefQuantizeWorkload::Execute() const
{
- const TensorInfo& inputInfo = armnn::GetTensorInfo(m_Data.m_Inputs[0]);
- m_InputDecoder = MakeDecoder<float>(inputInfo);
+ Execute(m_Data.m_Inputs, m_Data.m_Outputs);
+}
- const TensorInfo& outputInfo = armnn::GetTensorInfo(m_Data.m_Outputs[0]);
- m_OutputEncoder = MakeEncoder<float>(outputInfo);
+void RefQuantizeWorkload::ExecuteAsync(WorkingMemDescriptor &workingMemDescriptor)
+{
+ Execute(workingMemDescriptor.m_Inputs, workingMemDescriptor.m_Outputs);
}
-void RefQuantizeWorkload::Execute() const
+void RefQuantizeWorkload::Execute(std::vector<ITensorHandle*> inputs, std::vector<ITensorHandle*> outputs) const
{
- m_InputDecoder->Reset(m_Data.m_Inputs[0]->Map());
- m_OutputEncoder->Reset(m_Data.m_Outputs[0]->Map());
+ std::unique_ptr<Decoder<float>> inputDecoder = MakeDecoder<float>(GetTensorInfo(inputs[0]), inputs[0]->Map());
+ std::unique_ptr<Encoder<float>> outputEncoder = MakeEncoder<float>(GetTensorInfo(outputs[0]), outputs[0]->Map());
- QuantizeImpl(*m_InputDecoder, *m_OutputEncoder, m_NumElements);
+ QuantizeImpl(*inputDecoder, *outputEncoder, m_NumElements);
}
} //namespace armnn \ No newline at end of file
diff --git a/src/backends/reference/workloads/RefQuantizeWorkload.hpp b/src/backends/reference/workloads/RefQuantizeWorkload.hpp
index 9ae107607b..48116e7b39 100644
--- a/src/backends/reference/workloads/RefQuantizeWorkload.hpp
+++ b/src/backends/reference/workloads/RefQuantizeWorkload.hpp
@@ -16,13 +16,11 @@ class RefQuantizeWorkload : public BaseWorkload<QuantizeQueueDescriptor>
{
public:
RefQuantizeWorkload(const QuantizeQueueDescriptor& descriptor, const WorkloadInfo &info);
- void PostAllocationConfigure() override;
void Execute() const override;
+ void ExecuteAsync(WorkingMemDescriptor& workingMemDescriptor) override;
private:
-
- std::unique_ptr<Decoder<float>> m_InputDecoder;
- std::unique_ptr<Encoder<float>> m_OutputEncoder;
+ void Execute(std::vector<ITensorHandle*> inputs, std::vector<ITensorHandle*> outputs) const;
size_t m_NumElements;
};
diff --git a/src/backends/reference/workloads/RefRankWorkload.hpp b/src/backends/reference/workloads/RefRankWorkload.hpp
index 660db6b8db..237ae999ce 100644
--- a/src/backends/reference/workloads/RefRankWorkload.hpp
+++ b/src/backends/reference/workloads/RefRankWorkload.hpp
@@ -19,10 +19,21 @@ public:
using BaseWorkload<RankQueueDescriptor>::BaseWorkload;
virtual void Execute() const override
{
- const int32_t rank = static_cast<int32_t>(GetTensorInfo(m_Data.m_Inputs[0]).GetNumDimensions());
+ Execute(m_Data.m_Inputs, m_Data.m_Outputs);
+
+ }
+ void ExecuteAsync(WorkingMemDescriptor& workingMemDescriptor) override
+ {
+ Execute(workingMemDescriptor.m_Inputs, workingMemDescriptor.m_Outputs);
+ }
+
+private:
+ void Execute(std::vector<ITensorHandle*> inputs, std::vector<ITensorHandle*> outputs) const
+ {
+ const int32_t rank = static_cast<int32_t>(GetTensorInfo(inputs[0]).GetNumDimensions());
std::memcpy(GetOutputTensorData<void>(0, m_Data), &rank, sizeof(int32_t));
- m_Data.m_Outputs[0]->Unmap();
+ outputs[0]->Unmap();
}
};
diff --git a/src/backends/reference/workloads/RefReduceWorkload.cpp b/src/backends/reference/workloads/RefReduceWorkload.cpp
index 7a46ff9ffc..821e828b6e 100644
--- a/src/backends/reference/workloads/RefReduceWorkload.cpp
+++ b/src/backends/reference/workloads/RefReduceWorkload.cpp
@@ -20,15 +20,25 @@ RefReduceWorkload::RefReduceWorkload(
void RefReduceWorkload::Execute() const
{
+ Execute(m_Data.m_Inputs, m_Data.m_Outputs);
+}
+
+void RefReduceWorkload::ExecuteAsync(WorkingMemDescriptor &workingMemDescriptor)
+{
+ Execute(workingMemDescriptor.m_Inputs, workingMemDescriptor.m_Outputs);
+}
+
+void RefReduceWorkload::Execute(std::vector<ITensorHandle*> inputs, std::vector<ITensorHandle*> outputs) const
+{
ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefReduceWorkload_Execute");
- const TensorInfo& inputInfo = GetTensorInfo(m_Data.m_Inputs[0]);
- const TensorInfo& outputInfo = GetTensorInfo(m_Data.m_Outputs[0]);
+ const TensorInfo& inputInfo = GetTensorInfo(inputs[0]);
+ const TensorInfo& outputInfo = GetTensorInfo(outputs[0]);
- std::unique_ptr<Decoder<float>> decoderPtr = MakeDecoder<float>(inputInfo, m_Data.m_Inputs[0]->Map());
+ std::unique_ptr<Decoder<float>> decoderPtr = MakeDecoder<float>(inputInfo, inputs[0]->Map());
Decoder<float>& decoder = *decoderPtr;
- std::unique_ptr<Encoder<float>> encoderPtr = MakeEncoder<float>(outputInfo, m_Data.m_Outputs[0]->Map());
+ std::unique_ptr<Encoder<float>> encoderPtr = MakeEncoder<float>(outputInfo, outputs[0]->Map());
Encoder<float>& encoder = *encoderPtr;
Reduce(inputInfo,
diff --git a/src/backends/reference/workloads/RefReduceWorkload.hpp b/src/backends/reference/workloads/RefReduceWorkload.hpp
index 1d551acb4a..d45161c692 100644
--- a/src/backends/reference/workloads/RefReduceWorkload.hpp
+++ b/src/backends/reference/workloads/RefReduceWorkload.hpp
@@ -17,7 +17,10 @@ public:
explicit RefReduceWorkload(const ReduceQueueDescriptor& descriptor,
const WorkloadInfo& info);
- virtual void Execute() const override;
+ void Execute() const override;
+ void ExecuteAsync(WorkingMemDescriptor& workingMemDescriptor) override;
+private:
+ void Execute(std::vector<ITensorHandle*> inputs, std::vector<ITensorHandle*> outputs) const;
};
} //namespace armnn
diff --git a/src/backends/reference/workloads/RefReshapeWorkload.cpp b/src/backends/reference/workloads/RefReshapeWorkload.cpp
index 6d29781937..960d591fec 100644
--- a/src/backends/reference/workloads/RefReshapeWorkload.cpp
+++ b/src/backends/reference/workloads/RefReshapeWorkload.cpp
@@ -14,11 +14,21 @@ namespace armnn
void RefReshapeWorkload::Execute() const
{
+ Execute(m_Data.m_Inputs, m_Data.m_Outputs);
+}
+
+void RefReshapeWorkload::ExecuteAsync(WorkingMemDescriptor &workingMemDescriptor)
+{
+ Execute(workingMemDescriptor.m_Inputs, workingMemDescriptor.m_Outputs);
+}
+
+void RefReshapeWorkload::Execute(std::vector<ITensorHandle*> inputs, std::vector<ITensorHandle*> outputs) const
+{
ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefReshapeWorkload_Execute");
- void* output = GetOutputTensorData<void>(0, m_Data);
- const void* input = GetInputTensorData<void>(0, m_Data);
- unsigned int numBytes = GetTensorInfo(m_Data.m_Inputs[0]).GetNumBytes();
+ void* output = outputs[0]->Map();
+ const void* input = inputs[0]->Map();
+ unsigned int numBytes = GetTensorInfo(inputs[0]).GetNumBytes();
memcpy(output, input, numBytes);
}
diff --git a/src/backends/reference/workloads/RefReshapeWorkload.hpp b/src/backends/reference/workloads/RefReshapeWorkload.hpp
index 7359ff9cde..2b6cf43c72 100644
--- a/src/backends/reference/workloads/RefReshapeWorkload.hpp
+++ b/src/backends/reference/workloads/RefReshapeWorkload.hpp
@@ -15,7 +15,10 @@ class RefReshapeWorkload : public BaseWorkload<ReshapeQueueDescriptor>
{
public:
using BaseWorkload<ReshapeQueueDescriptor>::BaseWorkload;
- virtual void Execute() const override;
+ void Execute() const override;
+ void ExecuteAsync(WorkingMemDescriptor& workingMemDescriptor) override;
+private:
+ void Execute(std::vector<ITensorHandle*> inputs, std::vector<ITensorHandle*> outputs) const;
};
} //namespace armnn
diff --git a/src/backends/reference/workloads/RefResizeBilinearWorkload.cpp b/src/backends/reference/workloads/RefResizeBilinearWorkload.cpp
index a23caf9fc2..2cf5888f33 100644
--- a/src/backends/reference/workloads/RefResizeBilinearWorkload.cpp
+++ b/src/backends/reference/workloads/RefResizeBilinearWorkload.cpp
@@ -19,14 +19,24 @@ namespace armnn
void RefResizeBilinearWorkload::Execute() const
{
+ Execute(m_Data.m_Inputs, m_Data.m_Outputs);
+}
+
+void RefResizeBilinearWorkload::ExecuteAsync(WorkingMemDescriptor &workingMemDescriptor)
+{
+ Execute(workingMemDescriptor.m_Inputs, workingMemDescriptor.m_Outputs);
+}
+
+void RefResizeBilinearWorkload::Execute(std::vector<ITensorHandle*> inputs, std::vector<ITensorHandle*> outputs) const
+{
ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefResizeBilinearWorkload_Execute");
- const TensorInfo& inputInfo = GetTensorInfo(m_Data.m_Inputs[0]);
- const TensorInfo& outputInfo = GetTensorInfo(m_Data.m_Outputs[0]);
+ const TensorInfo& inputInfo = GetTensorInfo(inputs[0]);
+ const TensorInfo& outputInfo = GetTensorInfo(outputs[0]);
- std::unique_ptr<Decoder<float>> decoderPtr = MakeDecoder<float>(inputInfo, m_Data.m_Inputs[0]->Map());
+ std::unique_ptr<Decoder<float>> decoderPtr = MakeDecoder<float>(inputInfo, inputs[0]->Map());
Decoder<float> &decoder = *decoderPtr;
- std::unique_ptr<Encoder<float>> encoderPtr = MakeEncoder<float>(outputInfo, m_Data.m_Outputs[0]->Map());
+ std::unique_ptr<Encoder<float>> encoderPtr = MakeEncoder<float>(outputInfo, outputs[0]->Map());
Encoder<float> &encoder = *encoderPtr;
Resize(decoder, inputInfo, encoder, outputInfo, m_Data.m_Parameters.m_DataLayout, armnn::ResizeMethod::Bilinear);
diff --git a/src/backends/reference/workloads/RefResizeBilinearWorkload.hpp b/src/backends/reference/workloads/RefResizeBilinearWorkload.hpp
index a0e33fa320..5ada3d1ff8 100644
--- a/src/backends/reference/workloads/RefResizeBilinearWorkload.hpp
+++ b/src/backends/reference/workloads/RefResizeBilinearWorkload.hpp
@@ -15,7 +15,10 @@ class RefResizeBilinearWorkload : public BaseWorkload<ResizeBilinearQueueDescrip
{
public:
using BaseWorkload<ResizeBilinearQueueDescriptor>::BaseWorkload;
- virtual void Execute() const override;
+ void Execute() const override;
+ void ExecuteAsync(WorkingMemDescriptor& workingMemDescriptor) override;
+private:
+ void Execute(std::vector<ITensorHandle*> inputs, std::vector<ITensorHandle*> outputs) const;
};
} //namespace armnn
diff --git a/src/backends/reference/workloads/RefResizeWorkload.cpp b/src/backends/reference/workloads/RefResizeWorkload.cpp
index 21ff852320..d7a82b8f34 100644
--- a/src/backends/reference/workloads/RefResizeWorkload.cpp
+++ b/src/backends/reference/workloads/RefResizeWorkload.cpp
@@ -19,14 +19,24 @@ namespace armnn
void RefResizeWorkload::Execute() const
{
+ Execute(m_Data.m_Inputs, m_Data.m_Outputs);
+}
+
+void RefResizeWorkload::ExecuteAsync(WorkingMemDescriptor &workingMemDescriptor)
+{
+ Execute(workingMemDescriptor.m_Inputs, workingMemDescriptor.m_Outputs);
+}
+
+void RefResizeWorkload::Execute(std::vector<ITensorHandle*> inputs, std::vector<ITensorHandle*> outputs) const
+{
ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefResizeWorkload_Execute");
- const TensorInfo& inputInfo = GetTensorInfo(m_Data.m_Inputs[0]);
- const TensorInfo& outputInfo = GetTensorInfo(m_Data.m_Outputs[0]);
+ const TensorInfo& inputInfo = GetTensorInfo(inputs[0]);
+ const TensorInfo& outputInfo = GetTensorInfo(outputs[0]);
- std::unique_ptr<Decoder<float>> decoderPtr = MakeDecoder<float>(inputInfo, m_Data.m_Inputs[0]->Map());
+ std::unique_ptr<Decoder<float>> decoderPtr = MakeDecoder<float>(inputInfo, inputs[0]->Map());
Decoder<float> &decoder = *decoderPtr;
- std::unique_ptr<Encoder<float>> encoderPtr = MakeEncoder<float>(outputInfo, m_Data.m_Outputs[0]->Map());
+ std::unique_ptr<Encoder<float>> encoderPtr = MakeEncoder<float>(outputInfo, outputs[0]->Map());
Encoder<float> &encoder = *encoderPtr;
Resize(decoder,
diff --git a/src/backends/reference/workloads/RefResizeWorkload.hpp b/src/backends/reference/workloads/RefResizeWorkload.hpp
index e72271afd8..f58eadc9af 100644
--- a/src/backends/reference/workloads/RefResizeWorkload.hpp
+++ b/src/backends/reference/workloads/RefResizeWorkload.hpp
@@ -15,7 +15,10 @@ class RefResizeWorkload : public BaseWorkload<ResizeQueueDescriptor>
{
public:
using BaseWorkload<ResizeQueueDescriptor>::BaseWorkload;
- virtual void Execute() const override;
+ void Execute() const override;
+ void ExecuteAsync(WorkingMemDescriptor& workingMemDescriptor) override;
+private:
+ void Execute(std::vector<ITensorHandle*> inputs, std::vector<ITensorHandle*> outputs) const;
};
} //namespace armnn
diff --git a/src/backends/reference/workloads/RefSliceWorkload.cpp b/src/backends/reference/workloads/RefSliceWorkload.cpp
index 2e448450c1..f94a83ee2c 100644
--- a/src/backends/reference/workloads/RefSliceWorkload.cpp
+++ b/src/backends/reference/workloads/RefSliceWorkload.cpp
@@ -15,14 +15,24 @@ namespace armnn
void RefSliceWorkload::Execute() const
{
+ Execute(m_Data.m_Inputs, m_Data.m_Outputs);
+}
+
+void RefSliceWorkload::ExecuteAsync(WorkingMemDescriptor &workingMemDescriptor)
+{
+ Execute(workingMemDescriptor.m_Inputs, workingMemDescriptor.m_Outputs);
+}
+
+void RefSliceWorkload::Execute(std::vector<ITensorHandle*> inputs, std::vector<ITensorHandle*> outputs) const
+{
ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefSliceWorkload_Execute");
- const TensorInfo& inputInfo = GetTensorInfo(m_Data.m_Inputs[0]);
+ const TensorInfo& inputInfo = GetTensorInfo(inputs[0]);
Slice(inputInfo,
m_Data.m_Parameters,
- m_Data.m_Inputs[0]->Map(),
- m_Data.m_Outputs[0]->Map(),
+ inputs[0]->Map(),
+ outputs[0]->Map(),
GetDataTypeSize(inputInfo.GetDataType()));
}
diff --git a/src/backends/reference/workloads/RefSliceWorkload.hpp b/src/backends/reference/workloads/RefSliceWorkload.hpp
index 006c7b775d..8a1db8e5a7 100644
--- a/src/backends/reference/workloads/RefSliceWorkload.hpp
+++ b/src/backends/reference/workloads/RefSliceWorkload.hpp
@@ -16,7 +16,10 @@ class RefSliceWorkload : public BaseWorkload<SliceQueueDescriptor>
public:
using BaseWorkload<SliceQueueDescriptor>::BaseWorkload;
- virtual void Execute() const override;
+ void Execute() const override;
+ void ExecuteAsync(WorkingMemDescriptor& workingMemDescriptor) override;
+private:
+ void Execute(std::vector<ITensorHandle*> inputs, std::vector<ITensorHandle*> outputs) const;
};
} // namespace armnn
diff --git a/src/backends/reference/workloads/RefSoftmaxWorkload.cpp b/src/backends/reference/workloads/RefSoftmaxWorkload.cpp
index 2e4d811674..9733cbc859 100644
--- a/src/backends/reference/workloads/RefSoftmaxWorkload.cpp
+++ b/src/backends/reference/workloads/RefSoftmaxWorkload.cpp
@@ -19,16 +19,26 @@ namespace armnn
void RefSoftmaxWorkload::Execute() const
{
+ Execute(m_Data.m_Inputs, m_Data.m_Outputs);
+}
+
+void RefSoftmaxWorkload::ExecuteAsync(WorkingMemDescriptor &workingMemDescriptor)
+{
+ Execute(workingMemDescriptor.m_Inputs, workingMemDescriptor.m_Outputs);
+}
+
+void RefSoftmaxWorkload::Execute(std::vector<ITensorHandle*> inputs, std::vector<ITensorHandle*> outputs) const
+{
ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefSoftmaxWorkload_Execute");
- const TensorInfo &inputTensorInfo = GetTensorInfo(m_Data.m_Inputs[0]);
+ const TensorInfo &inputTensorInfo = GetTensorInfo(inputs[0]);
- std::unique_ptr<Decoder<float>> decoderPtr = MakeDecoder<float>(inputTensorInfo, m_Data.m_Inputs[0]->Map());
+ std::unique_ptr<Decoder<float>> decoderPtr = MakeDecoder<float>(inputTensorInfo, inputs[0]->Map());
Decoder<float> &decoder = *decoderPtr;
- const TensorInfo &outputTensorInfo = GetTensorInfo(m_Data.m_Outputs[0]);
+ const TensorInfo &outputTensorInfo = GetTensorInfo(outputs[0]);
- std::unique_ptr<Encoder<float>> encoderPtr = MakeEncoder<float>(outputTensorInfo, m_Data.m_Outputs[0]->Map());
+ std::unique_ptr<Encoder<float>> encoderPtr = MakeEncoder<float>(outputTensorInfo, outputs[0]->Map());
Encoder<float> &encoder = *encoderPtr;
Softmax(decoder,
diff --git a/src/backends/reference/workloads/RefSoftmaxWorkload.hpp b/src/backends/reference/workloads/RefSoftmaxWorkload.hpp
index 3d00c6ff96..6e62369880 100644
--- a/src/backends/reference/workloads/RefSoftmaxWorkload.hpp
+++ b/src/backends/reference/workloads/RefSoftmaxWorkload.hpp
@@ -15,7 +15,10 @@ class RefSoftmaxWorkload : public BaseWorkload<SoftmaxQueueDescriptor>
{
public:
using BaseWorkload<SoftmaxQueueDescriptor>::BaseWorkload;
- virtual void Execute() const override;
+ void Execute() const override;
+ void ExecuteAsync(WorkingMemDescriptor& workingMemDescriptor) override;
+private:
+ void Execute(std::vector<ITensorHandle*> inputs, std::vector<ITensorHandle*> outputs) const;
};
} //namespace armnn
diff --git a/src/backends/reference/workloads/RefSpaceToBatchNdWorkload.cpp b/src/backends/reference/workloads/RefSpaceToBatchNdWorkload.cpp
index c65d4c110c..e35632db5b 100644
--- a/src/backends/reference/workloads/RefSpaceToBatchNdWorkload.cpp
+++ b/src/backends/reference/workloads/RefSpaceToBatchNdWorkload.cpp
@@ -14,13 +14,23 @@ namespace armnn
void RefSpaceToBatchNdWorkload::Execute() const
{
+ Execute(m_Data.m_Inputs, m_Data.m_Outputs);
+}
+
+void RefSpaceToBatchNdWorkload::ExecuteAsync(WorkingMemDescriptor &workingMemDescriptor)
+{
+ Execute(workingMemDescriptor.m_Inputs, workingMemDescriptor.m_Outputs);
+}
+
+void RefSpaceToBatchNdWorkload::Execute(std::vector<ITensorHandle*> inputs, std::vector<ITensorHandle*> outputs) const
+{
ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefSpaceToBatchNdWorkload_Execute");
- const TensorInfo& inputInfo = GetTensorInfo(m_Data.m_Inputs[0]);
- std::unique_ptr<Decoder<float>> decoder = MakeDecoder<float>(inputInfo, m_Data.m_Inputs[0]->Map());
+ const TensorInfo& inputInfo = GetTensorInfo(inputs[0]);
+ std::unique_ptr<Decoder<float>> decoder = MakeDecoder<float>(inputInfo, inputs[0]->Map());
- const TensorInfo& outputInfo = GetTensorInfo(m_Data.m_Outputs[0]);
- std::unique_ptr<Encoder<float>> encoder = MakeEncoder<float>(outputInfo, m_Data.m_Outputs[0]->Map());
+ const TensorInfo& outputInfo = GetTensorInfo(outputs[0]);
+ std::unique_ptr<Encoder<float>> encoder = MakeEncoder<float>(outputInfo, outputs[0]->Map());
SpaceToBatchNd(inputInfo, outputInfo, m_Data.m_Parameters, *decoder, *encoder);
}
diff --git a/src/backends/reference/workloads/RefSpaceToBatchNdWorkload.hpp b/src/backends/reference/workloads/RefSpaceToBatchNdWorkload.hpp
index caf264894a..82ddb32a44 100644
--- a/src/backends/reference/workloads/RefSpaceToBatchNdWorkload.hpp
+++ b/src/backends/reference/workloads/RefSpaceToBatchNdWorkload.hpp
@@ -16,6 +16,9 @@ class RefSpaceToBatchNdWorkload : public BaseWorkload<SpaceToBatchNdQueueDescrip
public:
using BaseWorkload<SpaceToBatchNdQueueDescriptor>::BaseWorkload;
void Execute() const override;
+ void ExecuteAsync(WorkingMemDescriptor& workingMemDescriptor) override;
+private:
+ void Execute(std::vector<ITensorHandle*> inputs, std::vector<ITensorHandle*> outputs) const;
};
} //namespace armnn
diff --git a/src/backends/reference/workloads/RefSpaceToDepthWorkload.cpp b/src/backends/reference/workloads/RefSpaceToDepthWorkload.cpp
index 1b12272506..88faf7a790 100644
--- a/src/backends/reference/workloads/RefSpaceToDepthWorkload.cpp
+++ b/src/backends/reference/workloads/RefSpaceToDepthWorkload.cpp
@@ -14,13 +14,23 @@ namespace armnn
void RefSpaceToDepthWorkload::Execute() const
{
+ Execute(m_Data.m_Inputs, m_Data.m_Outputs);
+}
+
+void RefSpaceToDepthWorkload::ExecuteAsync(WorkingMemDescriptor &workingMemDescriptor)
+{
+ Execute(workingMemDescriptor.m_Inputs, workingMemDescriptor.m_Outputs);
+}
+
+void RefSpaceToDepthWorkload::Execute(std::vector<ITensorHandle*> inputs, std::vector<ITensorHandle*> outputs) const
+{
ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefSpaceToDepthWorkload_Execute");
- const TensorInfo& inputInfo = GetTensorInfo(m_Data.m_Inputs[0]);
- std::unique_ptr<Decoder<float>> decoder = MakeDecoder<float>(inputInfo, m_Data.m_Inputs[0]->Map());
+ const TensorInfo& inputInfo = GetTensorInfo(inputs[0]);
+ std::unique_ptr<Decoder<float>> decoder = MakeDecoder<float>(inputInfo, inputs[0]->Map());
- const TensorInfo& outputInfo = GetTensorInfo(m_Data.m_Outputs[0]);
- std::unique_ptr<Encoder<float>> encoder = MakeEncoder<float>(outputInfo, m_Data.m_Outputs[0]->Map());
+ const TensorInfo& outputInfo = GetTensorInfo(outputs[0]);
+ std::unique_ptr<Encoder<float>> encoder = MakeEncoder<float>(outputInfo, outputs[0]->Map());
SpaceToDepth(inputInfo, outputInfo, m_Data.m_Parameters, *decoder, *encoder);
}
diff --git a/src/backends/reference/workloads/RefSpaceToDepthWorkload.hpp b/src/backends/reference/workloads/RefSpaceToDepthWorkload.hpp
index 89e5585249..d8f44b7995 100644
--- a/src/backends/reference/workloads/RefSpaceToDepthWorkload.hpp
+++ b/src/backends/reference/workloads/RefSpaceToDepthWorkload.hpp
@@ -15,7 +15,10 @@ class RefSpaceToDepthWorkload : public BaseWorkload<SpaceToDepthQueueDescriptor>
{
public:
using BaseWorkload<SpaceToDepthQueueDescriptor>::BaseWorkload;
- virtual void Execute() const override;
+ void Execute() const override;
+ void ExecuteAsync(WorkingMemDescriptor& workingMemDescriptor) override;
+private:
+ void Execute(std::vector<ITensorHandle*> inputs, std::vector<ITensorHandle*> outputs) const;
};
} //namespace armnn
diff --git a/src/backends/reference/workloads/RefSplitterWorkload.cpp b/src/backends/reference/workloads/RefSplitterWorkload.cpp
index 5207423995..076aefe517 100644
--- a/src/backends/reference/workloads/RefSplitterWorkload.cpp
+++ b/src/backends/reference/workloads/RefSplitterWorkload.cpp
@@ -13,8 +13,18 @@ namespace armnn
void RefSplitterWorkload::Execute() const
{
+ Execute(m_Data.m_Inputs, m_Data.m_Outputs);
+}
+
+void RefSplitterWorkload::ExecuteAsync(WorkingMemDescriptor &workingMemDescriptor)
+{
+ Execute(workingMemDescriptor.m_Inputs, workingMemDescriptor.m_Outputs);
+}
+
+void RefSplitterWorkload::Execute(std::vector<ITensorHandle*> inputs, std::vector<ITensorHandle*> outputs) const
+{
ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefSplitterWorkload_Execute");
- Split(m_Data);
+ Split(m_Data, inputs, outputs);
}
} //namespace armnn
diff --git a/src/backends/reference/workloads/RefSplitterWorkload.hpp b/src/backends/reference/workloads/RefSplitterWorkload.hpp
index c491e1ebcb..99b5ff6911 100644
--- a/src/backends/reference/workloads/RefSplitterWorkload.hpp
+++ b/src/backends/reference/workloads/RefSplitterWorkload.hpp
@@ -17,7 +17,10 @@ class RefSplitterWorkload : public BaseWorkload<SplitterQueueDescriptor>
{
public:
using BaseWorkload<SplitterQueueDescriptor>::BaseWorkload;
- virtual void Execute() const override;
+ void Execute() const override;
+ void ExecuteAsync(WorkingMemDescriptor& workingMemDescriptor) override;
+private:
+ void Execute(std::vector<ITensorHandle*> inputs, std::vector<ITensorHandle*> outputs) const;
};
} //namespace armnn
diff --git a/src/backends/reference/workloads/RefStackWorkload.cpp b/src/backends/reference/workloads/RefStackWorkload.cpp
index fc859506a3..20cf3b38f5 100644
--- a/src/backends/reference/workloads/RefStackWorkload.cpp
+++ b/src/backends/reference/workloads/RefStackWorkload.cpp
@@ -20,6 +20,16 @@ RefStackWorkload::RefStackWorkload(const StackQueueDescriptor& descriptor,
void RefStackWorkload::Execute() const
{
+ Execute(m_Data.m_Inputs, m_Data.m_Outputs);
+}
+
+void RefStackWorkload::ExecuteAsync(WorkingMemDescriptor &workingMemDescriptor)
+{
+ Execute(workingMemDescriptor.m_Inputs, workingMemDescriptor.m_Outputs);
+}
+
+void RefStackWorkload::Execute(std::vector<ITensorHandle*> inputs, std::vector<ITensorHandle*> outputs) const
+{
ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefStackWorkload_Execute");
// Can perform a simple concatenation when axis == 0
@@ -29,7 +39,7 @@ void RefStackWorkload::Execute() const
ARMNN_ASSERT(output != nullptr);
unsigned int numInputs = m_Data.m_Parameters.m_NumInputs;
- unsigned int inputLength = GetTensorInfo(m_Data.m_Inputs[0]).GetNumElements();
+ unsigned int inputLength = GetTensorInfo(inputs[0]).GetNumElements();
for (unsigned int inputIdx=0; inputIdx<numInputs; ++inputIdx)
{
@@ -43,13 +53,13 @@ void RefStackWorkload::Execute() const
}
std::vector<std::unique_ptr<Decoder<float>>> inputDecoders;
- for (unsigned int i=0; i<m_Data.m_Inputs.size(); ++i)
+ for (unsigned int i=0; i<inputs.size(); ++i)
{
- inputDecoders.push_back(MakeDecoder<float>(GetTensorInfo(m_Data.m_Inputs[i]),
- m_Data.m_Inputs[i]->Map()));
+ inputDecoders.push_back(MakeDecoder<float>(GetTensorInfo(inputs[i]),
+ inputs[i]->Map()));
}
- std::unique_ptr<Encoder<float>> outputEncoder = MakeEncoder<float>(GetTensorInfo(m_Data.m_Outputs[0]),
- m_Data.m_Outputs[0]->Map());
+ std::unique_ptr<Encoder<float>> outputEncoder = MakeEncoder<float>(GetTensorInfo(outputs[0]),
+ outputs[0]->Map());
Stack(m_Data, inputDecoders, *outputEncoder);
}
diff --git a/src/backends/reference/workloads/RefStackWorkload.hpp b/src/backends/reference/workloads/RefStackWorkload.hpp
index ceb27d9f60..4276339a8f 100644
--- a/src/backends/reference/workloads/RefStackWorkload.hpp
+++ b/src/backends/reference/workloads/RefStackWorkload.hpp
@@ -16,7 +16,10 @@ class RefStackWorkload : public BaseWorkload<StackQueueDescriptor>
public:
explicit RefStackWorkload(const StackQueueDescriptor& descriptor,
const WorkloadInfo& info);
- virtual void Execute() const override;
+ void Execute() const override;
+ void ExecuteAsync(WorkingMemDescriptor& workingMemDescriptor) override;
+private:
+ void Execute(std::vector<ITensorHandle*> inputs, std::vector<ITensorHandle*> outputs) const;
};
} // namespace armnn
diff --git a/src/backends/reference/workloads/RefStridedSliceWorkload.cpp b/src/backends/reference/workloads/RefStridedSliceWorkload.cpp
index ce807ee087..336a687d5c 100644
--- a/src/backends/reference/workloads/RefStridedSliceWorkload.cpp
+++ b/src/backends/reference/workloads/RefStridedSliceWorkload.cpp
@@ -17,30 +17,20 @@ RefStridedSliceWorkload::RefStridedSliceWorkload(const StridedSliceQueueDescript
void RefStridedSliceWorkload::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefStridedSliceWorkload_Execute");
-
- const TensorInfo& inputInfo = GetTensorInfo(m_Data.m_Inputs[0]);
- const TensorInfo& outputInfo = GetTensorInfo(m_Data.m_Outputs[0]);
-
- DataType inputDataType = inputInfo.GetDataType();
- DataType outputDataType = outputInfo.GetDataType();
-
- ARMNN_ASSERT(inputDataType == outputDataType);
- IgnoreUnused(outputDataType);
+ Execute(m_Data.m_Inputs, m_Data.m_Outputs);
+}
- StridedSlice(inputInfo,
- m_Data.m_Parameters,
- m_Data.m_Inputs[0]->Map(),
- m_Data.m_Outputs[0]->Map(),
- GetDataTypeSize(inputDataType));
+void RefStridedSliceWorkload::ExecuteAsync(WorkingMemDescriptor &workingMemDescriptor)
+{
+ Execute(workingMemDescriptor.m_Inputs, workingMemDescriptor.m_Outputs);
}
-void RefStridedSliceWorkload::ExecuteAsync(WorkingMemDescriptor& descriptor)
+void RefStridedSliceWorkload::Execute(std::vector<ITensorHandle*> inputs, std::vector<ITensorHandle*> outputs) const
{
- ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefStridedSliceWorkload_Execute_WorkingMemDescriptor");
+ ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefStridedSliceWorkload_Execute");
- const TensorInfo& inputInfo = GetTensorInfo(descriptor.m_Inputs[0]);
- const TensorInfo& outputInfo = GetTensorInfo(descriptor.m_Outputs[0]);
+ const TensorInfo& inputInfo = GetTensorInfo(inputs[0]);
+ const TensorInfo& outputInfo = GetTensorInfo(outputs[0]);
DataType inputDataType = inputInfo.GetDataType();
DataType outputDataType = outputInfo.GetDataType();
@@ -50,8 +40,8 @@ void RefStridedSliceWorkload::ExecuteAsync(WorkingMemDescriptor& descriptor)
StridedSlice(inputInfo,
m_Data.m_Parameters,
- descriptor.m_Inputs[0]->Map(),
- descriptor.m_Outputs[0]->Map(),
+ inputs[0]->Map(),
+ outputs[0]->Map(),
GetDataTypeSize(inputDataType));
}
diff --git a/src/backends/reference/workloads/RefStridedSliceWorkload.hpp b/src/backends/reference/workloads/RefStridedSliceWorkload.hpp
index 3e253edcd9..38613e2779 100644
--- a/src/backends/reference/workloads/RefStridedSliceWorkload.hpp
+++ b/src/backends/reference/workloads/RefStridedSliceWorkload.hpp
@@ -15,7 +15,9 @@ class RefStridedSliceWorkload : public BaseWorkload<StridedSliceQueueDescriptor>
public:
RefStridedSliceWorkload(const StridedSliceQueueDescriptor& descriptor, const WorkloadInfo& info);
void Execute() const override;
- void ExecuteAsync(WorkingMemDescriptor& descriptor) override;
+ void ExecuteAsync(WorkingMemDescriptor& workingMemDescriptor) override;
+private:
+ void Execute(std::vector<ITensorHandle*> inputs, std::vector<ITensorHandle*> outputs) const;
};
} // namespace armnn
diff --git a/src/backends/reference/workloads/RefTransposeConvolution2dWorkload.cpp b/src/backends/reference/workloads/RefTransposeConvolution2dWorkload.cpp
index 2ab76041ef..634122835f 100644
--- a/src/backends/reference/workloads/RefTransposeConvolution2dWorkload.cpp
+++ b/src/backends/reference/workloads/RefTransposeConvolution2dWorkload.cpp
@@ -33,35 +33,32 @@ RefTransposeConvolution2dWorkload::RefTransposeConvolution2dWorkload(
}
}
-void RefTransposeConvolution2dWorkload::PostAllocationConfigure()
+void RefTransposeConvolution2dWorkload::Execute() const
{
- // set up input decoder
- const ITensorHandle* input = m_Data.m_Inputs[0];
- const TensorInfo& inputInfo = GetTensorInfo(input);
-
- m_InputShape = inputInfo.GetShape();
- m_InputDecoder = MakeDecoder<float>(inputInfo);
-
- // set up output encoder
- ITensorHandle* output = m_Data.m_Outputs[0];
- const TensorInfo& outputInfo = GetTensorInfo(output);
+ Execute(m_Data.m_Inputs, m_Data.m_Outputs);
+}
- m_OutputShape = outputInfo.GetShape();
- m_OutputEncoder = MakeEncoder<float>(outputInfo);
+void RefTransposeConvolution2dWorkload::ExecuteAsync(WorkingMemDescriptor &workingMemDescriptor)
+{
+ Execute(workingMemDescriptor.m_Inputs, workingMemDescriptor.m_Outputs);
}
-void RefTransposeConvolution2dWorkload::Execute() const
+void RefTransposeConvolution2dWorkload::Execute(std::vector<ITensorHandle*> inputs,
+ std::vector<ITensorHandle*> outputs) const
{
ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefTransposeConvolution2dWorkload_Execute");
- m_InputDecoder->Reset(m_Data.m_Inputs[0]->Map());
- m_OutputEncoder->Reset(m_Data.m_Outputs[0]->Map());
+ const TensorInfo& inputInfo = GetTensorInfo(inputs[0]);
+ const TensorInfo& outputInfo = GetTensorInfo(outputs[0]);
+
+ std::unique_ptr<Decoder<float>> inputDecoder = MakeDecoder<float>(inputInfo, inputs[0]->Map());
+ std::unique_ptr<Encoder<float>> outputEncoder = MakeEncoder<float>(outputInfo, outputs[0]->Map());
TransposeConvolution2dImpl(m_Data.m_Parameters,
- m_InputShape,
- *m_InputDecoder,
- m_OutputShape,
- *m_OutputEncoder,
+ inputInfo.GetShape(),
+ *inputDecoder,
+ outputInfo.GetShape(),
+ *outputEncoder,
m_WeightsShape,
*m_WeightsDecoder,
m_BiasesDecoder.get());
diff --git a/src/backends/reference/workloads/RefTransposeConvolution2dWorkload.hpp b/src/backends/reference/workloads/RefTransposeConvolution2dWorkload.hpp
index 9ded8c971f..7c18f10293 100644
--- a/src/backends/reference/workloads/RefTransposeConvolution2dWorkload.hpp
+++ b/src/backends/reference/workloads/RefTransposeConvolution2dWorkload.hpp
@@ -21,22 +21,17 @@ public:
const WorkloadInfo& info);
~RefTransposeConvolution2dWorkload() = default;
- void PostAllocationConfigure() override;
-
void Execute() const override;
+ void ExecuteAsync(WorkingMemDescriptor& workingMemDescriptor) override;
private:
+ void Execute(std::vector<ITensorHandle*> inputs, std::vector<ITensorHandle*> outputs) const;
std::unique_ptr<ScopedCpuTensorHandle> m_Weights;
std::unique_ptr<ScopedCpuTensorHandle> m_Biases;
- std::unique_ptr<Decoder<float>> m_InputDecoder;
- std::unique_ptr<Encoder<float>> m_OutputEncoder;
-
std::unique_ptr<Decoder<float>> m_WeightsDecoder;
std::unique_ptr<Decoder<float>> m_BiasesDecoder;
- TensorShape m_InputShape;
- TensorShape m_OutputShape;
TensorShape m_WeightsShape;
};
diff --git a/src/backends/reference/workloads/RefTransposeWorkload.cpp b/src/backends/reference/workloads/RefTransposeWorkload.cpp
index cc7a555c41..828badd042 100644
--- a/src/backends/reference/workloads/RefTransposeWorkload.cpp
+++ b/src/backends/reference/workloads/RefTransposeWorkload.cpp
@@ -16,12 +16,25 @@ namespace armnn
template <armnn::DataType DataType>
void RefTransposeWorkload<DataType>::Execute() const
{
+ Execute(m_Data.m_Inputs, m_Data.m_Outputs);
+}
+
+template <armnn::DataType DataType>
+void RefTransposeWorkload<DataType>::ExecuteAsync(WorkingMemDescriptor &workingMemDescriptor)
+{
+ Execute(workingMemDescriptor.m_Inputs, workingMemDescriptor.m_Outputs);
+}
+
+template <armnn::DataType DataType>
+void RefTransposeWorkload<DataType>::Execute(std::vector<ITensorHandle*> inputs,
+ std::vector<ITensorHandle*> outputs) const
+{
using T = ResolveType<DataType>;
ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, GetName() + "_Execute");
- const ITensorHandle* src = m_Data.m_Inputs[0];
- ITensorHandle* dst = m_Data.m_Outputs[0];
+ const ITensorHandle* src = inputs[0];
+ ITensorHandle* dst = outputs[0];
const PermutationVector& mappings = m_Data.m_Parameters.m_DimMappings;
armnnUtils::Transpose(GetTensorInfo(src).GetShape(), mappings, src->Map(), dst->Map(), sizeof(T));
diff --git a/src/backends/reference/workloads/RefTransposeWorkload.hpp b/src/backends/reference/workloads/RefTransposeWorkload.hpp
index 1e03f2e694..08ba74facc 100644
--- a/src/backends/reference/workloads/RefTransposeWorkload.hpp
+++ b/src/backends/reference/workloads/RefTransposeWorkload.hpp
@@ -25,6 +25,9 @@ public:
using TypedWorkload<TransposeQueueDescriptor, DataType>::m_Data;
using TypedWorkload<TransposeQueueDescriptor, DataType>::TypedWorkload;
void Execute() const override;
+ void ExecuteAsync(WorkingMemDescriptor& workingMemDescriptor) override;
+private:
+ void Execute(std::vector<ITensorHandle*> inputs, std::vector<ITensorHandle*> outputs) const;
};
using RefTransposeBFloat16Workload = RefTransposeWorkload<DataType::BFloat16>;
diff --git a/src/backends/reference/workloads/Splitter.cpp b/src/backends/reference/workloads/Splitter.cpp
index 09edc5e0f5..ed6d2b8fd8 100644
--- a/src/backends/reference/workloads/Splitter.cpp
+++ b/src/backends/reference/workloads/Splitter.cpp
@@ -18,12 +18,14 @@
namespace armnn
{
-void Split(const SplitterQueueDescriptor& data)
+void Split(const SplitterQueueDescriptor& data,
+ std::vector<ITensorHandle*> inputs,
+ std::vector<ITensorHandle*> outputs)
{
- const TensorInfo& inputInfo = GetTensorInfo(data.m_Inputs[0]);
+ const TensorInfo& inputInfo = GetTensorInfo(inputs[0]);
std::unique_ptr<Decoder<float>> decoderPtr =
- MakeDecoder<float>(inputInfo, data.m_Inputs[0]->Map());
+ MakeDecoder<float>(inputInfo, inputs[0]->Map());
Decoder<float>& decoder = *decoderPtr;
for (unsigned int index = 0; index < inputInfo.GetNumElements(); ++index)
@@ -45,7 +47,7 @@ void Split(const SplitterQueueDescriptor& data)
SplitterQueueDescriptor::ViewOrigin const& view = data.m_ViewOrigins[viewIdx];
//Split view extents are defined by the size of (the corresponding) input tensor.
- const TensorInfo& outputInfo = GetTensorInfo(data.m_Outputs[viewIdx]);
+ const TensorInfo& outputInfo = GetTensorInfo(outputs[viewIdx]);
ARMNN_ASSERT(outputInfo.GetNumDimensions() == inputInfo.GetNumDimensions());
// Check all dimensions to see if this element is inside the given input view.
@@ -65,7 +67,7 @@ void Split(const SplitterQueueDescriptor& data)
if (insideView)
{
std::unique_ptr<Encoder<float>> encoderPtr =
- MakeEncoder<float>(outputInfo, data.m_Outputs[viewIdx]->Map());
+ MakeEncoder<float>(outputInfo, outputs[viewIdx]->Map());
Encoder<float>& encoder = *encoderPtr;
unsigned int outIndex = 0;
diff --git a/src/backends/reference/workloads/Splitter.hpp b/src/backends/reference/workloads/Splitter.hpp
index aff4bcad94..e38a054650 100644
--- a/src/backends/reference/workloads/Splitter.hpp
+++ b/src/backends/reference/workloads/Splitter.hpp
@@ -14,9 +14,11 @@ namespace armnn
{
template <typename DataType>
-void Splitter(const SplitterQueueDescriptor& data)
+void Splitter(const SplitterQueueDescriptor& data,
+ std::vector<ITensorHandle*> inputs,
+ std::vector<ITensorHandle*> outputs)
{
- const TensorInfo& inputInfo0 = GetTensorInfo(data.m_Inputs[0]);
+ const TensorInfo& inputInfo0 = GetTensorInfo(inputs[0]);
for (unsigned int index = 0; index < inputInfo0.GetNumElements(); ++index)
{
@@ -37,7 +39,7 @@ void Splitter(const SplitterQueueDescriptor& data)
SplitterQueueDescriptor::ViewOrigin const& view = data.m_ViewOrigins[viewIdx];
//Split view extents are defined by the size of (the corresponding) input tensor.
- const TensorInfo& outputInfo = GetTensorInfo(data.m_Outputs[viewIdx]);
+ const TensorInfo& outputInfo = GetTensorInfo(outputs[viewIdx]);
ARMNN_ASSERT(outputInfo.GetNumDimensions() == inputInfo0.GetNumDimensions());
// Check all dimensions to see if this element is inside the given input view.
@@ -78,5 +80,7 @@ void Splitter(const SplitterQueueDescriptor& data)
}
}
-void Split(const SplitterQueueDescriptor& data);
+void Split(const SplitterQueueDescriptor& data,
+ std::vector<ITensorHandle*> inputs,
+ std::vector<ITensorHandle*> outputs);
} //namespace armnn