15 TEST_CASE(
"FallbackImportToCpuAcc")
17 using namespace armnn;
22 CHECK((backendObjPtr !=
nullptr));
25 if (backendIds.find(
"MockRef") == backendIds.end())
27 std::string message =
"Cannot load MockRef";
86 std::string ignoredErrorMessage;
88 runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties);
91 std::vector<float> inputData0
93 1.0f, 1.0f, 2.0f, 2.0f, 2.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f, 6.0f
95 std::vector<float> inputData1
97 0.0f, 1.0f, 1.0f, 2.0f, 3.0f, 3.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f
99 std::vector<float> inputData2
101 12.0f, 11.0f, 10.0f, 9.0f, 8.0f, 7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f
104 std::vector<float> outputData(12);
106 std::vector<float> expectedOutput
108 11.0f, 9.0f, 7.0f, 5.0f, 3.0f, 1.0f, -1.0f, -3.0f, -5.0f, -7.0f, -9.0f, -11.0f
126 { 0,
armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data()) }
129 runtime->GetProfiler(netId)->EnableProfiling(
true);
132 runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
136 std::stringstream ss;
138 std::string dump = ss.str();
141 std::size_t found = dump.find(
"ImportMemGeneric");
142 CHECK(found != std::string::npos);
145 found = dump.find(
"SyncMemGeneric");
146 CHECK(found != std::string::npos);
149 found = dump.find(
"CopyMemGeneric");
150 CHECK(found == std::string::npos);
156 CHECK(outputData == expectedOutput);
159 TEST_CASE(
"FallbackPaddingCopyToCpuAcc")
161 using namespace armnn;
166 CHECK((backendObjPtr !=
nullptr));
169 if (backendIds.find(
"MockRef") == backendIds.end())
171 std::string message =
"Cannot load MockRef";
228 std::string ignoredErrorMessage;
231 runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties);
234 std::vector<float> inputData0
236 1.0f, 1.0f, 2.0f, 2.0f, 2.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f, 6.0f
238 std::vector<float> inputData1
240 0.0f, 1.0f, 1.0f, 2.0f, 3.0f, 3.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f
243 std::vector<float> outputData(2);
245 std::vector<float> expectedOutput
262 { 0,
armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data()) }
265 runtime->GetProfiler(netId)->EnableProfiling(
true);
268 runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
272 std::stringstream ss;
274 std::string dump = ss.str();
277 std::size_t found = dump.find(
"CopyMemGeneric");
278 CHECK(found != std::string::npos);
281 found = dump.find(
"SyncMemGeneric");
282 CHECK(found != std::string::npos);
285 found = dump.find(
"ImportMemGeneric");
286 CHECK(found == std::string::npos);
292 CHECK(outputData == expectedOutput);
295 TEST_CASE(
"FallbackImportFromCpuAcc")
297 using namespace armnn;
302 CHECK((backendObjPtr !=
nullptr));
305 if (backendIds.find(
"MockRef") == backendIds.end())
307 std::string message =
"Cannot load MockRef";
366 std::string ignoredErrorMessage;
369 runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties);
372 std::vector<float> inputData0
374 1.0f, 1.0f, 2.0f, 2.0f, 2.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f, 0.0f
376 std::vector<float> inputData1
378 0.0f, 1.0f, 1.0f, 2.0f, 3.0f, 3.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f
380 std::vector<float> inputData2
382 12.0f, 11.0f, 10.0f, 9.0f, 8.0f, 7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f
385 std::vector<float> outputData(12);
387 std::vector<float> expectedOutput
389 13.0f, 11.0f, 11.0f, 9.0f, 7.0f, 7.0f, 7.0f, 5.0f, 5.0f, 3.0f, 3.0f, -5.0f
407 { 0,
armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data()) }
410 runtime->GetProfiler(netId)->EnableProfiling(
true);
413 runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
417 std::stringstream ss;
419 std::string dump = ss.str();
422 std::size_t found = dump.find(
"ImportMemGeneric");
423 CHECK(found != std::string::npos);
426 found = dump.find(
"SyncMemGeneric");
427 CHECK(found != std::string::npos);
430 found = dump.find(
"CopyMemGeneric");
431 CHECK(found == std::string::npos);
437 CHECK(outputData == expectedOutput);
440 TEST_CASE(
"FallbackPaddingCopyFromCpuAcc")
442 using namespace armnn;
447 CHECK((backendObjPtr !=
nullptr));
450 if (backendIds.find(
"MockRef") == backendIds.end())
452 std::string message =
"Cannot load MockRef";
509 std::string ignoredErrorMessage;
512 runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties);
515 std::vector<float> inputData0
517 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f, 12.0f
519 std::vector<float> inputData1
524 std::vector<float> outputData(2);
526 std::vector<float> expectedOutput
543 { 0,
armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data()) }
546 runtime->GetProfiler(netId)->EnableProfiling(
true);
549 runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
553 std::stringstream ss;
555 std::string dump = ss.str();
558 std::size_t found = dump.find(
"CopyMemGeneric");
559 CHECK(found != std::string::npos);
562 found = dump.find(
"SyncMemGeneric");
563 CHECK(found != std::string::npos);
566 found = dump.find(
"ImportMemGeneric");
567 CHECK(found == std::string::npos);
573 CHECK(outputData == expectedOutput);
576 TEST_CASE(
"FallbackDisableImportFromCpuAcc")
578 using namespace armnn;
583 CHECK((backendObjPtr !=
nullptr));
586 if (backendIds.find(
"MockRef") == backendIds.end())
588 std::string message =
"Cannot load MockRef";
644 std::string ignoredErrorMessage;
647 runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties);
650 std::vector<float> inputData0
652 1.0f, 1.0f, 2.0f, 2.0f, 2.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f, 0.0f
654 std::vector<float> inputData1
656 0.0f, 1.0f, 1.0f, 2.0f, 3.0f, 3.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f
658 std::vector<float> inputData2
660 12.0f, 11.0f, 10.0f, 9.0f, 8.0f, 7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f
663 std::vector<float> outputData(12);
665 std::vector<float> expectedOutput
667 13.0f, 11.0f, 11.0f, 9.0f, 7.0f, 7.0f, 7.0f, 5.0f, 5.0f, 3.0f, 3.0f, -5.0f
685 { 0,
armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data()) }
688 runtime->GetProfiler(netId)->EnableProfiling(
true);
691 runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
695 std::stringstream ss;
697 std::string dump = ss.str();
700 std::size_t found = dump.find(
"CopyMemGeneric");
701 CHECK(found != std::string::npos);
704 found = dump.find(
"ImportMemGeneric");
705 CHECK(found == std::string::npos);
711 CHECK(outputData == expectedOutput);
714 #if defined(ARMCOMPUTECL_ENABLED) 715 TEST_CASE(
"NeonImportEnabledFallbackToCl")
717 using namespace armnn;
782 std::string ignoredErrorMessage;
786 runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties);
789 std::vector<float> inputData0
791 1.0f, 1.0f, 2.0f, 2.0f, 2.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f, 6.0f, 1.0f, 1.0f, 2.0f, 2.0f
793 std::vector<float> inputData1
795 0.0f, 1.0f, 1.0f, 2.0f, 3.0f, 3.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f, 0.0f, 1.0f, 1.0f, 2.0f
797 std::vector<float> inputData2
799 12.0f, 11.0f, 10.0f, 9.0f, 8.0f, 7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f, 12.0f, 11.0f, 10.0f, 9.0f
802 std::vector<float> outputData(16);
804 std::vector<float> expectedOutput
806 11.0f, 9.0f, 7.0f, 5.0f, 3.0f, 1.0f, -1.0f, -3.0f, -5.0f, -7.0f, -9.0f, -11.0f, 11.0f, 9.0f, 7.0f, 5.0f
811 size_t totalBytes = numElements *
sizeof(float);
814 const size_t alignment = 64;
815 size_t space = totalBytes + alignment + alignment;
816 auto inputData = std::make_unique<uint8_t[]>(space);
817 void* alignedInputPtr = inputData.get();
818 CHECK(std::align(alignment, totalBytes, alignedInputPtr, space));
820 auto* intputPtr =
reinterpret_cast<float*
>(alignedInputPtr);
821 std::copy(inputData2.begin(), inputData2.end(), intputPtr);
838 { 0,
armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data()) }
841 runtime->GetProfiler(netId)->EnableProfiling(
true);
844 runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
848 std::stringstream ss;
850 std::string dump = ss.str();
853 std::size_t found = dump.find(
"ClSubtractionWorkload_Execute");
854 CHECK(found != std::string::npos);
857 found = dump.find(
"CopyMemGeneric");
858 CHECK(found != std::string::npos);
861 for(
unsigned int i = 0; i < numElements; ++i)
863 CHECK(outputData[i] == expectedOutput[i]);
865 runtime->UnloadNetwork(netId);
868 TEST_CASE(
"NeonImportDisabledFallbackToCl")
870 using namespace armnn;
899 std::vector<BackendId> backends = {
Compute::CpuAcc, Compute::GpuAcc };
933 runtime->LoadNetwork(netId, std::move(optNet));
936 std::vector<float> inputData0
938 1.0f, 1.0f, 2.0f, 2.0f, 2.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f, 6.0f
940 std::vector<float> inputData1
942 0.0f, 1.0f, 1.0f, 2.0f, 3.0f, 3.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f
944 std::vector<float> inputData2
946 12.0f, 11.0f, 10.0f, 9.0f, 8.0f, 7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f
949 std::vector<float> outputData(12);
951 std::vector<float> expectedOutput
953 11.0f, 9.0f, 7.0f, 5.0f, 3.0f, 1.0f, -1.0f, -3.0f, -5.0f, -7.0f, -9.0f, -11.0f
971 { 0,
armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data()) }
974 runtime->GetProfiler(netId)->EnableProfiling(
true);
977 runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
981 std::stringstream ss;
983 std::string dump = ss.str();
986 std::size_t found = dump.find(
"ClSubtractionWorkload_Execute");
987 CHECK(found != std::string::npos);
990 found = dump.find(
"CopyMemGeneric");
991 CHECK(found != std::string::npos);
994 CHECK(outputData == expectedOutput);
997 TEST_CASE(
"NeonImportEnabledFallbackSubgraphToCl")
999 using namespace armnn;
1038 std::vector<BackendId> backends = {
Compute::CpuAcc, Compute::GpuAcc };
1079 std::string ignoredErrorMessage;
1083 runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties);
1086 std::vector<float> inputData0
1088 1.0f, 1.0f, 2.0f, 2.0f, 2.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f, 6.0f, 1.0f, 1.0f, 2.0f, 2.0f
1090 std::vector<float> inputData1
1092 0.0f, 1.0f, 1.0f, 2.0f, 3.0f, 3.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f, 0.0f, 1.0f, 1.0f, 2.0f
1094 std::vector<float> inputData2
1096 12.0f, 11.0f, 10.0f, 9.0f, 8.0f, 7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f, 12.0f, 11.0f, 10.0f, 9.0f
1099 std::vector<float> outputData(4);
1101 std::vector<float> expectedOutput{ 11.0f, 3.0f, -5.0f, 11.0f };
1105 size_t totalBytes = numElements *
sizeof(float);
1106 const size_t alignment = 64;
1107 size_t space = totalBytes + alignment + alignment;
1108 auto inputData = std::make_unique<uint8_t[]>(space);
1109 void* alignedInputPtr = inputData.get();
1110 CHECK(std::align(alignment, totalBytes, alignedInputPtr, space));
1112 auto* intputPtr =
reinterpret_cast<float*
>(alignedInputPtr);
1113 std::copy(inputData2.begin(), inputData2.end(), intputPtr);
1130 { 0,
armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data()) }
1133 runtime->GetProfiler(netId)->EnableProfiling(
true);
1136 runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
1140 std::stringstream ss;
1142 std::string dump = ss.str();
1145 std::size_t found = dump.find(
"ClSubtractionWorkload_Execute");
1146 CHECK(found != std::string::npos);
1149 found = dump.find(
"NeonPooling2dWorkload_Execute");
1150 CHECK(found != std::string::npos);
1153 found = dump.find(
"CopyMemGeneric");
1154 CHECK(found != std::string::npos);
1157 found = dump.find(
"SyncMemGeneric");
1158 CHECK(found != std::string::npos);
1161 CHECK(outputData == expectedOutput);
1162 runtime->UnloadNetwork(netId);
1165 TEST_CASE(
"NeonImportDisableFallbackSubgraphToCl")
1167 using namespace armnn;
1202 std::vector<BackendId> backends = {
Compute::CpuAcc, Compute::GpuAcc };
1241 runtime->LoadNetwork(netId, std::move(optNet));
1244 std::vector<float> inputData0
1246 1.0f, 1.0f, 2.0f, 2.0f, 2.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f, 6.0f
1248 std::vector<float> inputData1
1250 0.0f, 1.0f, 1.0f, 2.0f, 3.0f, 3.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f
1252 std::vector<float> inputData2
1254 12.0f, 11.0f, 10.0f, 9.0f, 8.0f, 7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f
1257 std::vector<float> outputData(2);
1259 std::vector<float> expectedOutput{ 11.0f, -1.0f };
1276 { 0,
armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data()) }
1279 runtime->GetProfiler(netId)->EnableProfiling(
true);
1282 runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
1286 std::stringstream ss;
1288 std::string dump = ss.str();
1291 std::size_t found = dump.find(
"ClSubtractionWorkload_Execute");
1292 CHECK(found != std::string::npos);
1295 found = dump.find(
"NeonPooling2dWorkload_Execute");
1296 CHECK(found != std::string::npos);
1299 found = dump.find(
"CopyMemGeneric");
1300 CHECK(found != std::string::npos);
1303 CHECK(outputData == expectedOutput);
static IRuntimePtr Create(const CreationOptions &options)
Interface for a layer that is connectable to other layers via InputSlots and OutputSlots.
BackendIdSet GetBackendIds() const
std::unordered_set< BackendId > BackendIdSet
armnn::Layer * GetFirstLayerWithName(armnn::Graph &graph, const std::string &name)
static ProfilerManager & GetInstance()
bool CheckOrder(const armnn::Graph &graph, const armnn::Layer *first, const armnn::Layer *second)
Checks that first comes before second in the order.
virtual void BackendSelectionHint(Optional< BackendId > backend)=0
Provide a hint for the optimizer as to which backend to prefer for this layer.
uint32_t m_PoolWidth
Pooling width value.
std::unique_ptr< IRuntime, void(*)(IRuntime *runtime)> IRuntimePtr
void Print(std::ostream &outStream) const
Print stats for events in JSON Format to the given output stream.
BackendRegistry & BackendRegistryInstance()
std::vector< std::pair< LayerBindingId, class ConstTensor > > InputTensors
Copyright (c) 2021 ARM Limited and Contributors.
uint32_t m_StrideX
Stride value when proceeding through input for the width dimension.
virtual void SetTensorInfo(const TensorInfo &tensorInfo)=0
constexpr const char * MockImportBackendId()
IProfiler * GetProfiler()
A tensor defined by a TensorInfo (shape and data type) and a mutable backing store.
uint32_t m_PoolHeight
Pooling height value.
IOptimizedNetworkPtr Optimize(const INetwork &network, const std::vector< BackendId > &backendPreferences, const IDeviceSpec &deviceSpec, const OptimizerOptions &options=OptimizerOptions(), Optional< std::vector< std::string > &> messages=EmptyOptional())
Create an optimized version of the network.
A tensor defined by a TensorInfo (shape and data type) and an immutable backing store.
std::vector< std::pair< LayerBindingId, class Tensor > > OutputTensors
LayerType GetType() const override
Returns the armnn::LayerType of this layer.
std::unique_ptr< IOptimizedNetwork, void(*)(IOptimizedNetwork *network)> IOptimizedNetworkPtr
GPU Execution: OpenCL: ArmCompute.
ArmNN performs an optimization on each model/network before it gets loaded for execution.
const BackendId & GetBackendId() const
Graph & GetGraphForTesting(IOptimizedNetwork *optNet)
CPU Execution: NEON: ArmCompute.
armnn::IBackendInternalUniquePtr CreateBackendObject(const armnn::BackendId &backendId)
virtual const IInputSlot & GetInputSlot(unsigned int index) const =0
Get a const input slot handle by slot index.
void SetConstant(const bool IsConstant=true)
Marks the data corresponding to this tensor info as constant.
virtual const IOutputSlot & GetOutputSlot(unsigned int index) const =0
Get the const output slot handle by slot index.
std::unique_ptr< INetwork, void(*)(INetwork *network)> INetworkPtr
virtual int Connect(IInputSlot &destination)=0
A Pooling2dDescriptor for the Pooling2dLayer.
static INetworkPtr Create(NetworkOptions networkOptions={})
uint32_t m_StrideY
Stride value when proceeding through input for the height dimension.
unsigned int GetNumElements() const