17 #include <doctest/doctest.h> 24 using namespace armnn;
27 bool ConstantUsageTest(
const std::vector<BackendId>& computeDevice,
29 const std::vector<T>& inputData,
30 const std::vector<T>& constantData,
31 const std::vector<T>& expectedOutputData)
59 runtime->LoadNetwork(netId, std::move(optNet));
62 std::vector<T> outputData(inputData.size());
66 {0,
ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData.data())}
70 {0,
Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data())}
74 runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
77 return outputData == expectedOutputData;
80 inline bool ConstantUsageFloat32Test(
const std::vector<BackendId>& backends)
85 return ConstantUsageTest(backends,
87 std::vector<float>{ 1.f, 2.f, 3.f, 4.f, 5.f, 6.f },
88 std::vector<float>{ 6.f, 5.f, 4.f, 3.f, 2.f, 1.f },
89 std::vector<float>{ 7.f, 7.f, 7.f, 7.f, 7.f, 7.f }
93 inline bool ConstantUsageUint8Test(
const std::vector<BackendId>& backends)
97 const float scale = 0.023529f;
98 const int8_t offset = -43;
104 return ConstantUsageTest(backends,
106 armnnUtils::QuantizedVector<uint8_t>({ 1.f, 2.f, 3.f, 4.f, 5.f, 6.f }, scale, offset),
107 armnnUtils::QuantizedVector<uint8_t>({ 6.f, 5.f, 4.f, 3.f, 2.f, 1.f }, scale, offset),
108 armnnUtils::QuantizedVector<uint8_t>({ 7.f, 7.f, 7.f, 7.f, 7.f, 7.f }, scale, offset)
113 int SubStringCounter(std::string&
string, std::string&& substring)
115 std::size_t found = 0;
118 while((found =
string.find(substring, found)) != std::string::npos)
122 found += substring.length();
130 const std::map<
int, std::vector<TInput>>& inputTensorData,
131 const std::map<
int, std::vector<TOutput>>& expectedOutputData,
132 std::vector<BackendId> backends,
133 float tolerance = 0.000001f)
144 runtime->LoadNetwork(netId, std::move(optNet));
147 inputTensors.reserve(inputTensorData.size());
148 for (
auto&& it : inputTensorData)
150 inputTensors.push_back({it.first,
151 ConstTensor(runtime->GetInputTensorInfo(netId, it.first), it.second.data())});
154 outputTensors.reserve(expectedOutputData.size());
155 std::map<int, std::vector<TOutput>> outputStorage;
156 for (
auto&& it : expectedOutputData)
158 std::vector<TOutput> out(it.second.size());
159 outputStorage.emplace(it.first, out);
160 outputTensors.push_back({it.first,
161 Tensor(runtime->GetOutputTensorInfo(netId, it.first),
162 outputStorage.at(it.first).data())});
166 runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
169 for (
auto&& it : expectedOutputData)
171 std::vector<TOutput> out = outputStorage.at(it.first);
172 for (
unsigned int i = 0; i < out.size(); ++i)
174 CHECK_MESSAGE(Compare<ArmnnOType>(it.second[i], out[i], tolerance) ==
true,
175 "Actual output: " << out[i] <<
". Expected output:" << it.second[i]);
181 inline void ImportNonAlignedInputPointerTest(std::vector<BackendId> backends)
183 using namespace armnn;
212 std::string ignoredErrorMessage;
215 runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties);
218 std::vector<float> inputData
220 1.0f, 2.0f, 3.0f, 4.0f
224 float* misalignedInputData =
reinterpret_cast<float*
>(
reinterpret_cast<char*
>(inputData.data()) + 1);
226 std::vector<float> outputData(4);
229 float* alignedOutputData = outputData.data();
237 {0,
armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), alignedOutputData)}
240 runtime->GetProfiler(netId)->EnableProfiling(
true);
243 CHECK_THROWS_AS(runtime->EnqueueWorkload(netId, inputTensors, outputTensors),
MemoryImportException);
246 inline void ExportNonAlignedOutputPointerTest(std::vector<BackendId> backends)
248 using namespace armnn;
277 std::string ignoredErrorMessage;
280 runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties);
283 std::vector<float> inputData
285 1.0f, 2.0f, 3.0f, 4.0f, 5.0f
289 float* alignedInputData = inputData.data();
291 std::vector<float> outputData(5);
294 float* misalignedOutputData =
reinterpret_cast<float*
>(
reinterpret_cast<char*
>(outputData.data()) + 1);
302 {0,
armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), misalignedOutputData)}
309 CHECK_THROWS_AS(runtime->EnqueueWorkload(netId, inputTensors, outputTensors),
MemoryImportException);
313 CHECK_THROWS_AS(runtime->EnqueueWorkload(netId, inputTensors, outputTensors),
MemoryExportException);
317 inline void ImportAlignedPointerTest(std::vector<BackendId> backends)
319 using namespace armnn;
348 std::string ignoredErrorMessage;
351 runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties);
354 std::vector<float> inputData
356 1.0f, 2.0f, 3.0f, 4.0f
359 std::vector<float> outputData(4);
361 std::vector<float> expectedOutput
363 1.0f, 4.0f, 9.0f, 16.0f
372 {0,
armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data())}
375 runtime->GetProfiler(netId)->EnableProfiling(
true);
378 runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
382 std::stringstream ss;
384 std::string dump = ss.str();
387 std::size_t found = dump.find(
"ActivationWorkload");
388 CHECK(found != std::string::npos);
391 found = dump.find(
"SyncMemGeneric");
392 CHECK(found != std::string::npos);
395 found = dump.find(
"CopyMemGeneric");
396 CHECK(found == std::string::npos);
399 CHECK(outputData == expectedOutput);
402 inline void ImportOnlyWorkload(std::vector<BackendId> backends)
404 using namespace armnn;
429 INFO(
"Load Network");
432 std::string ignoredErrorMessage;
436 CHECK(runtime->LoadNetwork(netId, std::move(optNet),ignoredErrorMessage, networkProperties)
439 INFO(
"Generate Data");
441 std::vector<float> inputData
443 1.0f, 2.0f, 3.0f, 4.0f
446 std::vector<float> outputData(4);
448 std::vector<float> expectedOutput
450 1.0f, 4.0f, 9.0f, 16.0f
453 INFO(
"Create Inference");
461 {0,
armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data())}
464 INFO(
"Get Profiler");
465 runtime->GetProfiler(netId)->EnableProfiling(
true);
467 INFO(
"Run Inference");
469 runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
471 INFO(
"Print Profiler");
474 std::stringstream ss;
476 std::string dump = ss.str();
479 INFO(
"Find SyncMemGeneric");
480 int count = SubStringCounter(dump,
"SyncMemGeneric");
484 INFO(
"Find CopyMemGeneric");
485 count = SubStringCounter(dump,
"CopyMemGeneric");
489 CHECK(std::equal(outputData.begin(), outputData.end(), expectedOutput.begin(), expectedOutput.end()));
492 inline void ExportOnlyWorkload(std::vector<BackendId> backends)
494 using namespace armnn;
519 INFO(
"Load Network");
522 std::string ignoredErrorMessage;
524 CHECK(runtime->LoadNetwork(netId, std::move(optNet),ignoredErrorMessage, networkProperties)
527 INFO(
"Generate Data");
529 std::vector<float> inputData
531 1.0f, 2.0f, 3.0f, 4.0f
534 std::vector<float> outputData(4);
536 std::vector<float> expectedOutput
538 1.0f, 4.0f, 9.0f, 16.0f
541 INFO(
"Create Inference");
549 {0,
armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data())}
552 INFO(
"Get Profiler");
553 runtime->GetProfiler(netId)->EnableProfiling(
true);
555 INFO(
"Run Inference");
557 runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
559 INFO(
"Print Profiler");
562 std::stringstream ss;
564 std::string dump = ss.str();
567 INFO(
"Find SyncMemGeneric");
568 int count = SubStringCounter(dump,
"SyncMemGeneric");
572 INFO(
"Find CopyMemGeneric");
573 count = SubStringCounter(dump,
"CopyMemGeneric");
577 CHECK(std::equal(outputData.begin(), outputData.end(), expectedOutput.begin(), expectedOutput.end()));
580 inline void ImportAndExportWorkload(std::vector<BackendId> backends)
582 using namespace armnn;
606 INFO(
"Load Network");
609 std::string ignoredErrorMessage;
613 CHECK(runtime->LoadNetwork(netId, std::move(optNet),ignoredErrorMessage, networkProperties)
616 INFO(
"Generate Data");
618 std::vector<float> inputData
620 1.0f, 2.0f, 3.0f, 4.0f
623 std::vector<float> outputData(4);
625 std::vector<float> expectedOutput
627 1.0f, 4.0f, 9.0f, 16.0f
630 INFO(
"Create inference");
638 {0,
armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data())}
641 INFO(
"Get Profiler");
642 runtime->GetProfiler(netId)->EnableProfiling(
true);
644 INFO(
"Run Inference");
646 runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
648 INFO(
"Print Profiler");
651 std::stringstream ss;
653 std::string dump = ss.str();
656 INFO(
"Find SyncMemGeneric");
657 int count = SubStringCounter(dump,
"SyncMemGeneric");
661 INFO(
"Find CopyMemGeneric");
662 count = SubStringCounter(dump,
"CopyMemGeneric");
666 CHECK(std::equal(outputData.begin(), outputData.end(), expectedOutput.begin(), expectedOutput.end()));
669 inline void ExportOutputWithSeveralOutputSlotConnectionsTest(std::vector<BackendId> backends)
671 using namespace armnn;
701 std::string ignoredErrorMessage;
704 runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties);
707 std::vector<float> inputData
709 1.0f, 2.0f, 3.0f, 4.0f
712 std::vector<float> outputData0(4);
713 std::vector<float> outputData1(4);
715 std::vector<float> expectedOutput
717 1.0f, 4.0f, 9.0f, 16.0f
726 {0,
armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData0.data())},
727 {1,
armnn::Tensor(runtime->GetOutputTensorInfo(netId, 1), outputData1.data())}
732 runtime->GetProfiler(netId)->EnableProfiling(
true);
735 runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
739 std::stringstream ss;
741 std::string dump = ss.str();
743 std::size_t found = std::string::npos;
747 found = dump.find(
"RefActivationWorkload");
751 found = dump.find(
"NeonActivationWorkload");
755 found = dump.find(
"ClActivationWorkload");
758 CHECK(found != std::string::npos);
760 found = dump.find(
"SyncMemGeneric");
761 CHECK(found == std::string::npos);
763 found = dump.find(
"CopyMemGeneric");
764 CHECK(found != std::string::npos);
767 CHECK(std::equal(outputData0.begin(), outputData0.end(),
768 expectedOutput.begin(), expectedOutput.end()));
769 CHECK(std::equal(outputData1.begin(), outputData1.end(),
770 expectedOutput.begin(), expectedOutput.end()));
773 inline void StridedSliceInvalidSliceEndToEndTest(std::vector<BackendId> backends)
775 using namespace armnn;
790 descriptor.
m_End = {2, 3};
809 inline void ForceImportWithAlignedBuffersEndToEndTest(std::vector<BackendId> backends)
817 using namespace armnn;
833 INFO(
"Load Network");
837 std::string ignoredErrorMessage;
839 CHECK(runtime->LoadNetwork(netId, std::move(optNet),ignoredErrorMessage, networkProperties)
841 INFO(
"Generate Data");
844 std::vector<float> inputData
846 1.0f, 2.0f, 3.0f, 4.0f
848 std::vector<float> outputData(4);
849 std::vector<float> expectedOutput
851 1.0f, 4.0f, 9.0f, 16.0f
856 CHECK(!(reinterpret_cast<uintptr_t>(inputData.data()) % alignment));
857 CHECK(!(reinterpret_cast<uintptr_t>(outputData.data()) % alignment));
859 INFO(
"Create Inference");
866 {0,
armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data())}
869 runtime->GetProfiler(netId)->EnableProfiling(
true);
870 std::vector<ImportedInputId> importedInputIds =
872 std::vector<ImportedOutputId> importedOutputIds =
875 runtime->EnqueueWorkload(netId, inputTensors, outputTensors, importedInputIds, importedOutputIds);
879 std::stringstream ss;
881 std::string dump = ss.str();
887 int count = SubStringCounter(dump,
"SyncMemGeneric");
890 count = SubStringCounter(dump,
"CopyMemGeneric");
896 int count = SubStringCounter(dump,
"SyncMemGeneric");
899 count = SubStringCounter(dump,
"CopyMemGeneric");
903 CHECK(std::equal(outputData.begin(), outputData.end(), expectedOutput.begin(), expectedOutput.end()));
906 inline void ForceImportWithMisalignedInputBuffersEndToEndTest(std::vector<BackendId> backends)
914 using namespace armnn;
935 INFO(
"Load Network");
938 std::string ignoredErrorMessage;
940 CHECK(runtime->LoadNetwork(netId, std::move(optNet),ignoredErrorMessage, networkProperties)
942 INFO(
"Generate Data");
946 auto memPtr = std::malloc(4 *
sizeof(
float) +
sizeof(
char));
948 float* misalignedMemPtr =
reinterpret_cast<float*
>(
reinterpret_cast<char*
>(memPtr) + 1);
952 CHECK (reinterpret_cast<uintptr_t>(misalignedMemPtr) % alignment);
954 std::vector<float> inputData
956 1.0f, 2.0f, 3.0f, 4.0f
959 std::memcpy(misalignedMemPtr, inputData.data(), 4*
sizeof(float));
961 std::vector<float> outputData(4);
963 CHECK(!(reinterpret_cast<uintptr_t>(outputData.data()) % alignment));
965 std::vector<float> expectedOutput
967 1.0f, 4.0f, 9.0f, 16.0f
970 INFO(
"Create Inference");
977 {0,
armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data())}
979 runtime->GetProfiler(netId)->EnableProfiling(
true);
980 std::vector<ImportedInputId> importedInputIds =
982 std::vector<ImportedOutputId> importedOutputIds =
986 runtime->EnqueueWorkload(netId, inputTensors, outputTensors, importedInputIds, importedOutputIds);
990 std::stringstream ss;
992 std::string dump = ss.str();
1004 int count = SubStringCounter(dump,
"SyncMemGeneric");
1007 count = SubStringCounter(dump,
"CopyMemGeneric");
1013 int count = SubStringCounter(dump,
"SyncMemGeneric");
1016 count = SubStringCounter(dump,
"CopyMemGeneric");
1021 CHECK(std::equal(outputData.begin(), outputData.end(), expectedOutput.begin(), expectedOutput.end()));
1025 inline void ForceImportWithMisalignedOutputBuffersEndToEndTest(std::vector<BackendId> backends)
1033 using namespace armnn;
1054 INFO(
"Load Network");
1057 std::string ignoredErrorMessage;
1059 CHECK(runtime->LoadNetwork(netId, std::move(optNet),ignoredErrorMessage, networkProperties)
1061 INFO(
"Generate Data");
1065 auto memPtr = std::malloc(4 *
sizeof(
float) +
sizeof(
char));
1067 float* misalignedMemPtr =
reinterpret_cast<float*
>(
reinterpret_cast<char*
>(memPtr) + 1);
1071 CHECK (reinterpret_cast<uintptr_t>(misalignedMemPtr) % alignment);
1074 std::vector<float> inputData
1076 1.0f, 2.0f, 3.0f, 4.0f
1080 CHECK(!(reinterpret_cast<uintptr_t>(inputData.data()) % alignment));
1081 std::vector<float> expectedOutput
1083 1.0f, 4.0f, 9.0f, 16.0f
1086 INFO(
"Create Inference");
1093 {0,
armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), misalignedMemPtr)}
1095 runtime->GetProfiler(netId)->EnableProfiling(
true);
1096 std::vector<ImportedInputId> importedInputIds =
1098 std::vector<ImportedOutputId> importedOutputIds =
1102 runtime->EnqueueWorkload(netId, inputTensors, outputTensors, importedInputIds, importedOutputIds);
1106 std::stringstream ss;
1108 std::string dump = ss.str();
1116 int count = SubStringCounter(dump,
"SyncMemGeneric");
1119 count = SubStringCounter(dump,
"CopyMemGeneric");
1132 unsigned int index = 0;
1133 std::vector<float> outputData(expectedOutput.size(), 0);
1134 std::memcpy(outputData.data(), misalignedMemPtr, expectedOutput.size() *
sizeof(float));
1135 for (
auto outputValue : expectedOutput)
1137 CHECK(outputValue == outputData[index]);
1143 inline void ForceImportWithMisalignedInputAndOutputBuffersEndToEndTest(std::vector<BackendId> backends)
1151 using namespace armnn;
1172 INFO(
"Load Network");
1175 std::string ignoredErrorMessage;
1177 CHECK(runtime->LoadNetwork(netId, std::move(optNet),ignoredErrorMessage, networkProperties)
1179 INFO(
"Generate Data");
1183 auto inputMemPtr = std::malloc(4 *
sizeof(
float) +
sizeof(
char));
1184 float* misalignedInputPtr =
reinterpret_cast<float*
>(
reinterpret_cast<char*
>(inputMemPtr) + 1);
1188 CHECK (reinterpret_cast<uintptr_t>(misalignedInputPtr) % alignment);
1189 std::vector<float> inputData
1191 1.0f, 2.0f, 3.0f, 4.0f
1193 std::memcpy(misalignedInputPtr, inputData.data(), 4*
sizeof(float));
1195 auto outputMemPtr = std::malloc(4 *
sizeof(
float) +
sizeof(
char));
1196 float* misalignedOutputPtr =
reinterpret_cast<float*
>(
reinterpret_cast<char*
>(outputMemPtr) + 1);
1199 CHECK (reinterpret_cast<uintptr_t>(misalignedOutputPtr) % alignment);
1201 std::vector<float> expectedOutput
1203 1.0f, 4.0f, 9.0f, 16.0f
1206 INFO(
"Create Inference");
1213 {0,
armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), misalignedOutputPtr)}
1215 runtime->GetProfiler(netId)->EnableProfiling(
true);
1216 std::vector<ImportedInputId> importedInputIds =
1218 std::vector<ImportedOutputId> importedOutputIds =
1222 runtime->EnqueueWorkload(netId, inputTensors, outputTensors, importedInputIds, importedOutputIds);
1226 std::stringstream ss;
1228 std::string dump = ss.str();
1236 int count = SubStringCounter(dump,
"SyncMemGeneric");
1239 count = SubStringCounter(dump,
"CopyMemGeneric");
1243 unsigned int index = 0;
1244 std::vector<float> outputData(expectedOutput.size(), 0);
1245 std::memcpy(outputData.data(), misalignedOutputPtr, expectedOutput.size() *
sizeof(float));
1246 for (
auto expectedValue : expectedOutput)
1248 CHECK(expectedValue == outputData[index]);
1251 std::free(inputMemPtr);
1252 std::free(outputMemPtr);
1255 inline void ForceImportRepeatedInferencesEndToEndTest(std::vector<BackendId> backends)
1265 using namespace armnn;
1286 INFO(
"Load Network");
1289 std::string ignoredErrorMessage;
1291 CHECK(runtime->LoadNetwork(netId, std::move(optNet),ignoredErrorMessage, networkProperties)
1293 INFO(
"Generate Data");
1296 std::vector<float> inputData
1298 1.0f, 2.0f, 3.0f, 4.0f
1300 std::vector<float> outputData(4);
1301 std::vector<float> expectedOutput
1303 1.0f, 4.0f, 9.0f, 16.0f
1308 CHECK(!(reinterpret_cast<uintptr_t>(inputData.data()) % alignment));
1309 CHECK(!(reinterpret_cast<uintptr_t>(outputData.data()) % alignment));
1311 INFO(
"Create Inference");
1318 {0,
armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data())}
1321 runtime->GetProfiler(netId)->EnableProfiling(
true);
1322 std::vector<ImportedInputId> importedInputIds =
1324 std::vector<ImportedOutputId> importedOutputIds =
1327 runtime->EnqueueWorkload(netId, inputTensors, outputTensors, importedInputIds, importedOutputIds);
1331 std::stringstream ss;
1333 std::string dump = ss.str();
1339 int count = SubStringCounter(dump,
"SyncMemGeneric");
1342 count = SubStringCounter(dump,
"CopyMemGeneric");
1348 int count = SubStringCounter(dump,
"SyncMemGeneric");
1351 count = SubStringCounter(dump,
"CopyMemGeneric");
1355 CHECK(std::equal(outputData.begin(), outputData.end(), expectedOutput.begin(), expectedOutput.end()));
1359 auto inputMemPtr = std::malloc(4 *
sizeof(
float) +
sizeof(
char));
1360 float* misalignedInputPtr =
reinterpret_cast<float*
>(
reinterpret_cast<char*
>(inputMemPtr) + 1);
1363 CHECK (reinterpret_cast<uintptr_t>(misalignedInputPtr) % alignment);
1365 std::vector<float> inputValues
1367 2.0f, 3.0f, 4.0f, 5.0f
1370 std::memcpy(misalignedInputPtr, inputValues.data(), inputValues.size()*
sizeof(float));
1372 auto outputMemPtr = std::malloc(4 *
sizeof(
float) +
sizeof(
char));
1373 float* misalignedOutputPtr =
reinterpret_cast<float*
>(
reinterpret_cast<char*
>(outputMemPtr) + 1);
1376 CHECK (reinterpret_cast<uintptr_t>(misalignedOutputPtr) % alignment);
1378 std::vector<float> expectedMisalignedOutput
1380 4.0f, 9.0f, 16.0f, 25.0f
1383 INFO(
"Create Second Inference");
1390 {0,
armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), misalignedOutputPtr)}
1393 importedOutputIds = runtime->ImportOutputs(netId, outputTensorsMisaligned,
MemorySource::Malloc);
1396 runtime->EnqueueWorkload(netId,
1397 inputTensorsMisaligned,
1398 outputTensorsMisaligned,
1413 int count = SubStringCounter(dump,
"SyncMemGeneric");
1416 count = SubStringCounter(dump,
"CopyMemGeneric");
1420 unsigned int index = 0;
1421 std::vector<float> alignedOutputData(expectedMisalignedOutput.size(), 0);
1422 std::memcpy(alignedOutputData.data(), misalignedOutputPtr, expectedMisalignedOutput.size() *
sizeof(float));
1423 for (
auto outputValue : expectedMisalignedOutput)
1425 CHECK(outputValue == alignedOutputData[index]);
1429 runtime->UnloadNetwork(netId);
1430 std::free(inputMemPtr);
1431 std::free(outputMemPtr);
1435 inline void ForceImportRepeatedInferencesInvertedEndToEndTest(std::vector<BackendId> backends)
1445 using namespace armnn;
1466 INFO(
"Load Network");
1469 std::string ignoredErrorMessage;
1471 CHECK(runtime->LoadNetwork(netId, std::move(optNet),ignoredErrorMessage, networkProperties)
1473 INFO(
"Generate Data");
1477 auto inputMemPtr = std::malloc(4 *
sizeof(
float) +
sizeof(
char));
1478 float* misalignedInputPtr =
reinterpret_cast<float*
>(
reinterpret_cast<char*
>(inputMemPtr) + 1);
1482 CHECK (reinterpret_cast<uintptr_t>(misalignedInputPtr) % alignment);
1483 std::vector<float> inputValues
1485 2.0f, 3.0f, 4.0f, 5.0f
1487 std::memcpy(misalignedInputPtr, inputValues.data(), inputValues.size() *
sizeof(float));
1489 auto outputMemPtr = std::malloc(4 *
sizeof(
float) +
sizeof(
char));
1490 float* misalignedOutputPtr =
reinterpret_cast<float*
>(
reinterpret_cast<char*
>(outputMemPtr) + 1);
1493 CHECK (reinterpret_cast<uintptr_t>(misalignedOutputPtr) % alignment);
1495 std::vector<float> expectedMisalignedOutput
1497 4.0f, 9.0f, 16.0f, 25.0f
1500 INFO(
"Create Second Inference");
1507 {0,
armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), misalignedOutputPtr)}
1509 runtime->GetProfiler(netId)->EnableProfiling(
true);
1510 std::vector<ImportedInputId> importedInputIds =
1512 std::vector<ImportedOutputId> importedOutputIds =
1516 runtime->EnqueueWorkload(netId,
1517 inputTensorsMisaligned,
1518 outputTensorsMisaligned,
1524 std::stringstream ss;
1526 std::string dump = ss.str();
1534 int count = SubStringCounter(dump,
"SyncMemGeneric");
1537 count = SubStringCounter(dump,
"CopyMemGeneric");
1541 unsigned int index = 0;
1542 std::vector<float> alignedOutput(expectedMisalignedOutput.size());
1543 std::memcpy(alignedOutput.data(), misalignedOutputPtr, expectedMisalignedOutput.size()*
sizeof(float));
1544 for (
auto outputValue : expectedMisalignedOutput)
1546 CHECK(outputValue == alignedOutput[index]);
1549 std::free(inputMemPtr);
1550 std::free(outputMemPtr);
1553 std::vector<float> inputData
1555 1.0f, 2.0f, 3.0f, 4.0f
1557 std::vector<float> outputData(4);
1558 std::vector<float> expectedOutput
1560 1.0f, 4.0f, 9.0f, 16.0f
1564 CHECK(!(reinterpret_cast<uintptr_t>(inputData.data()) % alignment));
1565 CHECK(!(reinterpret_cast<uintptr_t>(outputData.data()) % alignment));
1567 INFO(
"Create Inference");
1574 {0,
armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data())}
1580 runtime->EnqueueWorkload(netId, inputTensors, outputTensors, importedInputIds, importedOutputIds);
1591 int count = SubStringCounter(dump,
"SyncMemGeneric");
1594 count = SubStringCounter(dump,
"CopyMemGeneric");
1601 int count = SubStringCounter(dump,
"SyncMemGeneric");
1604 count = SubStringCounter(dump,
"CopyMemGeneric");
1608 CHECK(std::equal(outputData.begin(), outputData.end(), expectedOutput.begin(), expectedOutput.end()));
1610 runtime->UnloadNetwork(netId);
static IRuntimePtr Create(const CreationOptions &options)
Interface for a layer that is connectable to other layers via InputSlots and OutputSlots.
CPU Execution: Reference C++ kernels.
int32_t m_ShrinkAxisMask
Shrink axis mask value. If set, the nth specification shrinks the dimensionality by 1...
static ProfilerManager & GetInstance()
std::vector< int > m_Begin
Begin values for the input that will be sliced.
std::unique_ptr< IRuntime, void(*)(IRuntime *runtime)> IRuntimePtr
void Print(std::ostream &outStream) const
Print stats for events in JSON Format to the given output stream.
typename ResolveTypeImpl< DT >::Type ResolveType
std::vector< std::pair< LayerBindingId, class ConstTensor > > InputTensors
void AnalyzeEventsAndWriteResults(std::ostream &outStream) const
Analyzes the tracked events and writes the results to the given output stream.
Copyright (c) 2021 ARM Limited and Contributors.
int32_t m_BeginMask
Begin mask value.
int32_t m_EndMask
End mask value.
virtual void SetTensorInfo(const TensorInfo &tensorInfo)=0
IProfiler * GetProfiler()
A tensor defined by a TensorInfo (shape and data type) and a mutable backing store.
IOptimizedNetworkPtr Optimize(const INetwork &network, const std::vector< BackendId > &backendPreferences, const IDeviceSpec &deviceSpec, const OptimizerOptions &options=OptimizerOptions(), Optional< std::vector< std::string > &> messages=EmptyOptional())
Create an optimized version of the network.
A tensor defined by a TensorInfo (shape and data type) and an immutable backing store.
std::vector< std::pair< LayerBindingId, class Tensor > > OutputTensors
std::unique_ptr< IOptimizedNetwork, void(*)(IOptimizedNetwork *network)> IOptimizedNetworkPtr
void SetQuantizationScale(float scale)
GPU Execution: OpenCL: ArmCompute.
std::vector< int > m_Stride
Stride values for the input that will be sliced.
An ActivationDescriptor for the ActivationLayer.
std::vector< int > m_End
End values for the input that will be sliced.
CPU Execution: NEON: ArmCompute.
virtual const IInputSlot & GetInputSlot(unsigned int index) const =0
Get a const input slot handle by slot index.
void SetConstant(const bool IsConstant=true)
Marks the data corresponding to this tensor info as constant.
A StridedSliceDescriptor for the StridedSliceLayer.
virtual const IOutputSlot & GetOutputSlot(unsigned int index) const =0
Get the const output slot handle by slot index.
void SetQuantizationOffset(int32_t offset)
std::unique_ptr< INetwork, void(*)(INetwork *network)> INetworkPtr
virtual int Connect(IInputSlot &destination)=0
static INetworkPtr Create(NetworkOptions networkOptions={})
ActivationFunction m_Function
The activation function to use (Sigmoid, TanH, Linear, ReLu, BoundedReLu, SoftReLu, LeakyReLu, Abs, Sqrt, Square, Elu).
constexpr unsigned int GetDataTypeSize(DataType dataType)