21 #include <doctest/doctest.h>
25 using namespace armnn;
33 template<
typename Workload>
34 std::unique_ptr<Workload> MakeAndCheckWorkload(
Layer& layer,
38 std::unique_ptr<IWorkload> workload = layer.
CreateWorkload(factory);
39 CHECK_MESSAGE(workload.get() == PolymorphicDowncast<Workload*>(workload.get()),
40 "Cannot convert to derived class");
41 std::string reasonIfUnsupported;
44 return std::unique_ptr<Workload>(
static_cast<Workload*
>(workload.release()));
65 template <
typename ActivationWorkload, armnn::DataType DataType>
73 layerDesc.
m_B = -10.0f;
84 Connect(input, layer, tensorInfo);
85 Connect(layer, output, tensorInfo);
87 CreateTensorHandles(graph, factory);
90 auto workload = MakeAndCheckWorkload<ActivationWorkload>(*layer, factory);
93 CHECK(queueDescriptor.
m_Inputs.size() == 1);
94 CHECK(queueDescriptor.
m_Outputs.size() == 1);
103 template <
typename WorkloadType,
104 typename DescriptorType,
120 Connect(input1, layer, tensorInfo, 0, 0);
121 Connect(input2, layer, tensorInfo, 0, 1);
122 Connect(layer, output, tensorInfo);
123 CreateTensorHandles(graph, factory);
126 auto workload = MakeAndCheckWorkload<WorkloadType>(*layer, factory);
128 DescriptorType queueDescriptor = workload->GetData();
129 CHECK(queueDescriptor.m_Inputs.size() == 2);
130 CHECK(queueDescriptor.m_Outputs.size() == 1);
136 template<
typename WorkloadType,
137 typename DescriptorType,
145 auto activationDesc = std::make_shared<ActivationDescriptor>();
146 activationDesc->m_A = 10.0f;
147 activationDesc->m_B = 5.0f;
159 Connect(input1, layer, tensorInfo, 0, 0);
160 Connect(input2, layer, tensorInfo, 0, 1);
161 Connect(layer, output, tensorInfo);
162 CreateTensorHandles(graph, factory);
165 std::shared_ptr<ActivationDescriptor>
168 ARMNN_ASSERT(
static_cast<float>(activationDescPtr->m_A) == 10.0f);
169 ARMNN_ASSERT(
static_cast<float>(activationDescPtr->m_B) == 5.0f);
175 auto workload = MakeAndCheckWorkload<WorkloadType>(*layer, factory);
177 DescriptorType queueDescriptor = workload->GetData();
180 queueDescriptor.template GetAdditionalInformation<ActivationDescriptor>();
188 CHECK(queueDescriptor.m_Inputs.size() == 2);
189 CHECK(queueDescriptor.m_Outputs.size() == 1);
194 template<
typename WorkloadType,
195 typename DescriptorType,
203 auto activationDesc = std::make_shared<ActivationDescriptor>();
204 activationDesc->m_A = 10.0f;
205 activationDesc->m_B = 5.0f;
217 Connect(input1, layer, tensorInfo, 0, 0);
218 Connect(input2, layer, tensorInfo, 0, 1);
219 Connect(layer, output, tensorInfo);
220 CreateTensorHandles(graph, factory);
223 std::shared_ptr<ActivationDescriptor>
226 ARMNN_ASSERT(
static_cast<float>(activationDescPtr->m_A) == 10.0f);
227 ARMNN_ASSERT(
static_cast<float>(activationDescPtr->m_B) == 5.0f);
233 auto workload = MakeAndCheckWorkload<WorkloadType>(*layer, factory);
235 DescriptorType queueDescriptor = workload->GetData();
236 CHECK(queueDescriptor.m_Inputs.size() == 2);
237 CHECK(queueDescriptor.m_Outputs.size() == 1);
239 queueDescriptor.template GetAdditionalInformation<ActivationDescriptor>();
250 template<
typename WorkloadType,
251 typename DescriptorType,
259 auto activationDesc = std::make_shared<ActivationDescriptor>();
260 activationDesc->m_A = 10.0f;
261 activationDesc->m_B = 5.0f;
273 Connect(input1, layer, tensorInfo, 0, 0);
274 Connect(input2, layer, tensorInfo, 0, 1);
275 Connect(layer, output, tensorInfo);
276 CreateTensorHandles(graph, factory);
279 std::shared_ptr<ActivationDescriptor>
280 activationDescPtr = layer->template GetAdditionalInformation<ActivationDescriptor>();
282 ARMNN_ASSERT(
static_cast<float>(activationDescPtr->m_A) == 10.0f);
283 ARMNN_ASSERT(
static_cast<float>(activationDescPtr->m_B) == 5.0f);
289 auto workload = MakeAndCheckWorkload<WorkloadType>(*layer, factory);
291 DescriptorType queueDescriptor = workload->GetData();
293 queueDescriptor.template GetAdditionalInformation<ActivationDescriptor>();
295 CHECK(queueDescriptor.m_Inputs.size() == 2);
296 CHECK(queueDescriptor.m_Outputs.size() == 1);
306 template <
typename WorkloadType,
307 typename DescriptorType,
320 Connect(input, layer, tensorInfo, 0, 0);
321 Connect(layer, output, tensorInfo, 0, 0);
322 CreateTensorHandles(graph, factory);
324 auto workload = MakeAndCheckWorkload<WorkloadType>(*layer, factory);
325 DescriptorType queueDescriptor = workload->GetData();
327 CHECK(queueDescriptor.m_Inputs.size() == 1);
328 CHECK(queueDescriptor.m_Outputs.size() == 1);
333 template <
typename BatchNormalizationWorkloadType, armnn::DataType DataType>
334 std::unique_ptr<BatchNormalizationWorkloadType> CreateBatchNormalizationWorkloadTest(
341 tensorShape = { 2, 4, 4, 3 };
345 tensorShape = { 2, 3, 4, 4 };
350 layerDesc.
m_Eps = 0.05f;
356 layer->
m_Mean = std::make_unique<ScopedTensorHandle>(weightInfo);
357 layer->
m_Variance = std::make_unique<ScopedTensorHandle>(weightInfo);
358 layer->
m_Beta = std::make_unique<ScopedTensorHandle>(weightInfo);
359 layer->
m_Gamma = std::make_unique<ScopedTensorHandle>(weightInfo);
360 layer->
m_Mean->Allocate();
362 layer->
m_Beta->Allocate();
371 Connect(input, layer, tensorInfo);
372 Connect(layer, output, tensorInfo);
373 CreateTensorHandles(graph, factory);
376 auto workload = MakeAndCheckWorkload<BatchNormalizationWorkloadType>(*layer, factory);
379 CHECK(queueDescriptor.
m_Inputs.size() == 1);
380 CHECK(queueDescriptor.
m_Outputs.size() == 1);
391 template <
typename BatchNormalizationWorkloadType, armnn::DataType DataType>
392 std::unique_ptr<BatchNormalizationWorkloadType> CreateBatchNormalizationWithBlobWorkloadTest(
399 tensorShape = { 2, 4, 4, 3 };
403 tensorShape = { 2, 3, 4, 4 };
408 layerDesc.
m_Eps = 0.05f;
414 layer->
m_Mean = std::make_unique<ScopedTensorHandle>(weightInfo);
415 layer->
m_Variance = std::make_unique<ScopedTensorHandle>(weightInfo);
416 layer->
m_Beta = std::make_unique<ScopedTensorHandle>(weightInfo);
417 layer->
m_Gamma = std::make_unique<ScopedTensorHandle>(weightInfo);
418 layer->
m_Mean->Allocate();
420 layer->
m_Beta->Allocate();
423 auto activationDesc = std::make_shared<ActivationDescriptor>();
424 activationDesc->m_A = 10.0f;
425 activationDesc->m_B = 5.0f;
432 ARMNN_ASSERT(
static_cast<float>(activationDescPtr->m_A) == 10.0f);
433 ARMNN_ASSERT(
static_cast<float>(activationDescPtr->m_B) == 5.0f);
444 Connect(input, layer, tensorInfo);
445 Connect(layer, output, tensorInfo);
446 CreateTensorHandles(graph, factory);
449 auto workload = MakeAndCheckWorkload<BatchNormalizationWorkloadType>(*layer, factory);
460 CHECK(queueDescriptor.
m_Inputs.size() == 1);
461 CHECK(queueDescriptor.
m_Outputs.size() == 1);
472 template <
typename Convolution2dWorkload, armnn::DataType DataType>
499 weightsTensorInfo.SetConstant();
506 weights->m_LayerOutput = std::make_unique<ScopedTensorHandle>(weightsTensorInfo);
507 weights->m_LayerOutput->Allocate();
511 Connect(weights, layer, weightsTensorInfo, 0, 1);
513 CreateTensorHandles(graph, factory);
516 auto workload = MakeAndCheckWorkload<Convolution2dWorkload>(*layer, factory, modelOptions);
528 CHECK(queueDescriptor.
m_Inputs.size() == 2);
529 CHECK(queueDescriptor.
m_Outputs.size() == 1);
535 template<
typename Convolution2dWorkload, armnn::DataType DataType>
536 std::unique_ptr<Convolution2dWorkload> CreateConvolution2dFusedActivationWithBlobWorkloadTest(
563 weightsTensorInfo.SetConstant();
567 auto activationDesc = std::make_shared<ActivationDescriptor>();
568 activationDesc->m_A = 10.0f;
569 activationDesc->m_B = 5.0f;
577 ARMNN_ASSERT(
static_cast<float>(activationDescPtr->m_A) == 10.0f);
578 ARMNN_ASSERT(
static_cast<float>(activationDescPtr->m_B) == 5.0f);
589 weights->m_LayerOutput = std::make_unique<ScopedTensorHandle>(weightsTensorInfo);
590 weights->m_LayerOutput->Allocate();
591 bias->
m_LayerOutput = std::make_unique<ScopedTensorHandle>(biasTensorInfo);
596 Connect(weights, layer, weightsTensorInfo, 0, 1);
597 Connect(bias, layer, biasTensorInfo, 0, 2);
599 CreateTensorHandles(graph, factory);
602 auto workload = MakeAndCheckWorkload<Convolution2dWorkload>(*layer, factory, modelOptions);
622 CHECK(queueDescriptor.
m_Outputs.size() == 1);
623 CHECK(queueDescriptor.
m_Inputs.size() == 3);
629 template <
typename Convolution2dWorkload, armnn::DataType DataType>
630 std::unique_ptr<Convolution2dWorkload> CreateConvolution2dWorkloadFastMathTest(
armnn::IWorkloadFactory& factory,
657 weightsTensorInfo.SetConstant();
659 biasTensorInfo.SetConstant();
669 Connect(weights, layer, weightsTensorInfo, 0, 1);
670 Connect(bias, layer, biasTensorInfo, 0, 2);
672 CreateTensorHandles(graph, factory);
675 auto workload = MakeAndCheckWorkload<Convolution2dWorkload>(*layer, factory, modelOptions);
686 CHECK(queueDescriptor.
m_Inputs.size() == 3);
687 CHECK(queueDescriptor.
m_Outputs.size() == 1);
693 template <
typename LstmWorkload>
706 unsigned int batchSize = 2;
707 unsigned int inputSize = 2;
708 unsigned int numUnits = 4;
709 unsigned int outputSize = 4;
741 if (layerDesc.m_PeepholeEnabled)
764 armnn::TensorInfo lstmTensorInfoScratchBuff({ batchSize, numUnits * (layerDesc.m_CifgEnabled ? 3 : 4) },
766 Connect(input, layer, lstmTensorInfo1, 0, 0);
767 Connect(cellStateIn, layer, lstmTensorInfo2, 0, 1);
768 Connect(outputStateIn, layer, lstmTensorInfo3, 0, 2);
769 Connect(layer, scratchBuffer, lstmTensorInfoScratchBuff, 0, 0);
770 Connect(layer, outputStateOut, lstmTensorInfo3, 1, 0);
771 Connect(layer, cellStateOut, lstmTensorInfo2, 2, 0);
772 Connect(layer, output, lstmTensorInfo3, 3, 0);
774 CreateTensorHandles(graph, factory);
777 auto workload = MakeAndCheckWorkload<LstmWorkload>(*layer, factory);
782 CHECK(queueDescriptor.
m_Inputs.size() == 3);
783 CHECK(queueDescriptor.
m_Outputs.size() == 4);
793 template <
typename QuantizedLstmWorkload>
798 unsigned int numBatches = 2;
799 unsigned int inputSize = 2;
800 unsigned int outputSize = 4;
803 float inputOutputScale = 0.0078125f;
804 int32_t inputOutputOffset = 128;
806 float cellStateScale = 0.00048828125f;
807 int32_t cellStateOffset = 0;
809 float weightsScale = 0.00408021f;
810 int32_t weightsOffset = 100;
812 float biasScale = 3.1876640625e-05f;
813 int32_t biasOffset = 0;
832 layer->m_QuantizedLstmParameters.m_InputToInputWeights =
833 std::make_unique<ScopedTensorHandle>(inputWeightsInfo);
834 layer->m_QuantizedLstmParameters.m_InputToForgetWeights =
835 std::make_unique<ScopedTensorHandle>(inputWeightsInfo);
836 layer->m_QuantizedLstmParameters.m_InputToCellWeights =
837 std::make_unique<ScopedTensorHandle>(inputWeightsInfo);
838 layer->m_QuantizedLstmParameters.m_InputToOutputWeights =
839 std::make_unique<ScopedTensorHandle>(inputWeightsInfo);
841 layer->m_QuantizedLstmParameters.m_RecurrentToInputWeights =
842 std::make_unique<ScopedTensorHandle>(recurrentWeightsInfo);
843 layer->m_QuantizedLstmParameters.m_RecurrentToForgetWeights =
844 std::make_unique<ScopedTensorHandle>(recurrentWeightsInfo);
845 layer->m_QuantizedLstmParameters.m_RecurrentToCellWeights =
846 std::make_unique<ScopedTensorHandle>(recurrentWeightsInfo);
847 layer->m_QuantizedLstmParameters.m_RecurrentToOutputWeights =
848 std::make_unique<ScopedTensorHandle>(recurrentWeightsInfo);
850 layer->m_QuantizedLstmParameters.m_InputGateBias = std::make_unique<ScopedTensorHandle>(biasInfo);
851 layer->m_QuantizedLstmParameters.m_ForgetGateBias = std::make_unique<ScopedTensorHandle>(biasInfo);
852 layer->m_QuantizedLstmParameters.m_CellBias = std::make_unique<ScopedTensorHandle>(biasInfo);
853 layer->m_QuantizedLstmParameters.m_OutputGateBias = std::make_unique<ScopedTensorHandle>(biasInfo);
856 layer->m_QuantizedLstmParameters.m_InputToInputWeights->Allocate();
857 layer->m_QuantizedLstmParameters.m_InputToForgetWeights->Allocate();
858 layer->m_QuantizedLstmParameters.m_InputToCellWeights->Allocate();
859 layer->m_QuantizedLstmParameters.m_InputToOutputWeights->Allocate();
861 layer->m_QuantizedLstmParameters.m_RecurrentToInputWeights->Allocate();
862 layer->m_QuantizedLstmParameters.m_RecurrentToForgetWeights->Allocate();
863 layer->m_QuantizedLstmParameters.m_RecurrentToCellWeights->Allocate();
864 layer->m_QuantizedLstmParameters.m_RecurrentToOutputWeights->Allocate();
866 layer->m_QuantizedLstmParameters.m_InputGateBias->Allocate();
867 layer->m_QuantizedLstmParameters.m_ForgetGateBias->Allocate();
868 layer->m_QuantizedLstmParameters.m_CellBias->Allocate();
869 layer->m_QuantizedLstmParameters.m_OutputGateBias->Allocate();
896 Connect(input, layer, inputInfo, 0, 0);
897 Connect(cellStateIn, layer, cellStateInfo, 0, 1);
898 Connect(outputStateIn, layer, outputStateInfo, 0, 2);
900 Connect(layer, cellStateOut, cellStateInfo, 0, 0);
901 Connect(layer, outputStateOut, outputStateInfo, 1, 0);
903 CreateTensorHandles(graph, factory);
906 auto workload = MakeAndCheckWorkload<QuantizedLstmWorkload>(*layer, factory);
910 CHECK(queueDescriptor.
m_Inputs.size() == 3);
911 CHECK(queueDescriptor.
m_Outputs.size() == 2);
932 template <
typename QLstmWorkload>
955 unsigned int numBatches = 2;
956 unsigned int inputSize = 4;
957 unsigned int numUnits = 4;
958 unsigned int outputSize = 4;
961 float inputScale = 0.0078125f;
962 int32_t inputOffset = 0;
965 float outputScale = layerDesc.m_HiddenStateScale;
966 int32_t outputOffset = layerDesc.m_HiddenStateZeroPoint;
968 float cellStateScale = 3.05176e-05f;
969 int32_t cellStateOffset = 0;
971 float weightsScale = 0.00784314f;
972 int32_t weightsOffset = 0;
974 float layerNormScale = 3.05182e-05f;
975 int32_t layerNormOffset = 0;
977 float biasScale = layerNormScale / 1024;
978 int32_t biasOffset = 0;
1001 std::make_unique<ScopedTensorHandle>(recurrentWeightsInfo);
1003 std::make_unique<ScopedTensorHandle>(recurrentWeightsInfo);
1005 std::make_unique<ScopedTensorHandle>(recurrentWeightsInfo);
1012 std::make_unique<ScopedTensorHandle>(layerNormWeightsInfo);
1014 std::make_unique<ScopedTensorHandle>(layerNormWeightsInfo);
1016 std::make_unique<ScopedTensorHandle>(layerNormWeightsInfo);
1060 Connect(input, layer, inputInfo, 0, 0);
1061 Connect(outputStateIn, layer, outputStateInfo, 0, 1);
1062 Connect(cellStateIn, layer, cellStateInfo, 0, 2);
1064 Connect(layer, outputStateOut, outputStateInfo, 0, 0);
1065 Connect(layer, cellStateOut, cellStateInfo, 1, 0);
1066 Connect(layer, output, outputStateInfo, 2, 0);
1068 CreateTensorHandles(graph, factory);
1071 auto workload = MakeAndCheckWorkload<QLstmWorkload>(*layer, factory);
1075 CHECK(queueDescriptor.
m_Inputs.size() == 3);
1076 CHECK(queueDescriptor.
m_Outputs.size() == 3);
1093 template<
typename Convolution2dWorkload, armnn::DataType DataType>
1094 std::unique_ptr<Convolution2dWorkload> CreateDirectConvolution2dWorkloadTest(
armnn::IWorkloadFactory& factory,
1115 weightsTensorInfo.SetConstant();
1117 biasTensorInfo.SetConstant();
1125 weights->m_LayerOutput = std::make_unique<ScopedTensorHandle>(weightsTensorInfo);
1126 weights->m_LayerOutput->Allocate();
1127 bias->
m_LayerOutput = std::make_unique<ScopedTensorHandle>(biasTensorInfo);
1132 Connect(weights, layer, weightsTensorInfo, 0, 1);
1133 Connect(bias, layer, biasTensorInfo, 0, 2);
1135 CreateTensorHandles(graph, factory);
1138 auto workload = MakeAndCheckWorkload<Convolution2dWorkload>(*layer, factory);
1149 CHECK(queueDescriptor.
m_Inputs.size() == 3);
1150 CHECK(queueDescriptor.
m_Outputs.size() == 1);
1156 template <
typename DepthwiseConvolution2dFloat32Workload, armnn::DataType DataType>
1157 std::unique_ptr<DepthwiseConvolution2dFloat32Workload> CreateDepthwiseConvolution2dWorkloadTest(
1192 CreateTensorHandles(graph, factory);
1195 auto workload = MakeAndCheckWorkload<DepthwiseConvolution2dFloat32Workload>(*layer, factory);
1207 CHECK(queueDescriptor.
m_Inputs.size() == 2);
1208 CHECK(queueDescriptor.
m_Outputs.size() == 1);
1214 template <
typename FullyConnectedWorkload, armnn::DataType DataType>
1236 weights->m_LayerOutput = std::make_unique<ScopedTensorHandle>(weightsTensorInfo);
1237 weights->m_LayerOutput->Allocate();
1241 Connect(weights, layer, weightsTensorInfo, 0, 1);
1243 CreateTensorHandles(graph, factory);
1246 auto workload = MakeAndCheckWorkload<FullyConnectedWorkload>(*layer, factory);
1251 CHECK(queueDescriptor.
m_Inputs.size() == 2);
1252 CHECK(queueDescriptor.
m_Outputs.size() == 1);
1258 template <
typename FullyConnectedWorkload, armnn::DataType DataType>
1259 std::unique_ptr<FullyConnectedWorkload> CreateFullyConnectedWithBlobWorkloadTest
1276 biasesTensorInfo.SetConstant();
1278 auto activationDesc = std::make_shared<ActivationDescriptor>();
1279 activationDesc->m_A = 10.0f;
1280 activationDesc->m_B = 5.0f;
1287 ARMNN_ASSERT(
static_cast<float>(activationDescPtr->m_A) == 10.0f);
1288 ARMNN_ASSERT(
static_cast<float>(activationDescPtr->m_B) == 5.0f);
1298 weights->m_LayerOutput = std::make_unique<ScopedTensorHandle>(weightsTensorInfo);
1299 weights->m_LayerOutput->Allocate();
1300 biases->
m_LayerOutput = std::make_unique<ScopedTensorHandle>(biasesTensorInfo);
1305 Connect(weights, layer, weightsTensorInfo, 0, 1);
1306 Connect(biases, layer, biasesTensorInfo, 0, 2);
1308 CreateTensorHandles(graph, factory);
1311 auto workload = MakeAndCheckWorkload<FullyConnectedWorkload>(*layer, factory);
1326 CHECK(queueDescriptor.
m_Inputs.size() == 3);
1327 CHECK(queueDescriptor.
m_Outputs.size() == 1);
1333 template <
typename FullyConnectedWorkload, armnn::DataType DataType>
1334 std::unique_ptr<FullyConnectedWorkload> CreateFullyConnectedWorkloadWeightsBiasesAsInputsTest
1360 CreateTensorHandles(graph, factory);
1363 auto workload = MakeAndCheckWorkload<FullyConnectedWorkload>(*layer, factory);
1370 CHECK(queueDescriptor.
m_Inputs.size() == 3);
1371 CHECK(queueDescriptor.
m_Outputs.size() == 1);
1378 template <
typename NormalizationWorkload, armnn::DataType DataType>
1389 layerDesc.
m_Beta = -1.0f;
1390 layerDesc.
m_K = 0.2f;
1407 Connect(input, layer, inputTensorInfo);
1408 Connect(layer, output, outputTensorInfo);
1409 CreateTensorHandles(graph, factory);
1412 auto workload = MakeAndCheckWorkload<NormalizationWorkload>(*layer, factory);
1423 CHECK(queueDescriptor.
m_Inputs.size() == 1);
1424 CHECK(queueDescriptor.
m_Outputs.size() == 1);
1430 template <
typename Pooling2dWorkload, armnn::DataType DataType>
1461 CreateTensorHandles(graph, factory);
1464 auto workload = MakeAndCheckWorkload<Pooling2dWorkload>(*layer, factory);
1479 CHECK(queueDescriptor.
m_Inputs.size() == 1);
1480 CHECK(queueDescriptor.
m_Outputs.size() == 1);
1486 template <
typename SoftmaxWorkload, armnn::DataType DataType>
1495 softmaxDescriptor.
m_Axis = -1;
1508 tensorInfo.SetQuantizationScale(1.f / 256);
1512 tensorInfo.SetQuantizationOffset(-128);
1513 tensorInfo.SetQuantizationScale(1.f / 256);
1516 Connect(input, layer, tensorInfo);
1517 Connect(layer, output, tensorInfo);
1518 CreateTensorHandles(graph, factory);
1521 auto workload = MakeAndCheckWorkload<SoftmaxWorkload>(*layer, factory);
1524 CHECK(queueDescriptor.
m_Inputs.size() == 1);
1525 CHECK(queueDescriptor.
m_Outputs.size() == 1);
1531 template<
typename SplitterWorkload, armnn::DataType DataType>
1532 std::unique_ptr<SplitterWorkload>
1543 layerDesc.SetViewOriginCoord(0, 0, 0);
1544 layerDesc.SetViewOriginCoord(1, 0, 1);
1545 layerDesc.SetViewOriginCoord(2, 0, 3);
1557 Connect(input, layer, tensorInfo);
1563 Connect(layer, output0, output0Info, 0, 0);
1564 Connect(layer, output1, output1Info, 1, 0);
1565 Connect(layer, output2, output2Info, 2, 0);
1567 CreateTensorHandles(graph, factory);
1570 auto workload = MakeAndCheckWorkload<SplitterWorkload>(*layer, factory);
1573 CHECK(queueDescriptor.
m_Inputs.size() == 1);
1574 CHECK(queueDescriptor.
m_Outputs.size() == 3);
1592 template<
typename SplitterWorkload,
typename ConcatWorkload, armnn::DataType DataType>
1593 std::pair<std::unique_ptr<SplitterWorkload>, std::unique_ptr<ConcatWorkload>>
1605 splitterViews.SetViewOriginCoord(0, 0, 0);
1606 splitterViews.SetViewOriginCoord(0, 1, 0);
1607 splitterViews.SetViewOriginCoord(0, 2, 0);
1608 splitterViews.SetViewOriginCoord(0, 3, 0);
1610 splitterViews.SetViewOriginCoord(1, 0, 0);
1611 splitterViews.SetViewOriginCoord(1, 1, 1);
1612 splitterViews.SetViewOriginCoord(1, 2, 0);
1613 splitterViews.SetViewOriginCoord(1, 3, 0);
1620 concatViews.SetViewOriginCoord(0, 0, 0);
1621 concatViews.SetViewOriginCoord(0, 1, 1);
1622 concatViews.SetViewOriginCoord(0, 2, 0);
1623 concatViews.SetViewOriginCoord(0, 3, 0);
1625 concatViews.SetViewOriginCoord(1, 0, 0);
1626 concatViews.SetViewOriginCoord(1, 1, 0);
1627 concatViews.SetViewOriginCoord(1, 2, 0);
1628 concatViews.SetViewOriginCoord(1, 3, 0);
1638 Connect(input, splitter, inputTensorInfo, 0, 0);
1640 Connect(splitter, concat, splitTensorInfo1, 0, 1);
1642 Connect(splitter, concat, splitTensorInfo2, 1, 0);
1644 Connect(concat, output, inputTensorInfo, 0, 0);
1647 CreateTensorHandles(graph, factory);
1650 auto workloadSplitter = MakeAndCheckWorkload<SplitterWorkload>(*splitter, factory);
1651 CHECK(workloadSplitter);
1653 auto workloadConcat = MakeAndCheckWorkload<ConcatWorkload>(*concat, factory);
1654 CHECK(workloadConcat);
1656 return {std::move(workloadSplitter), std::move(workloadConcat)};
1662 template<
typename SplitterWorkload,
typename ActivationWorkload, armnn::DataType DataType>
1664 std::unique_ptr<SplitterWorkload>& wlSplitter,
1665 std::unique_ptr<ActivationWorkload>& wlActiv0_0,
1666 std::unique_ptr<ActivationWorkload>& wlActiv0_1,
1667 std::unique_ptr<ActivationWorkload>& wlActiv1_0,
1668 std::unique_ptr<ActivationWorkload>& wlActiv1_1)
1679 splitterViews.SetViewOriginCoord(0, 0, 0);
1680 splitterViews.SetViewOriginCoord(0, 1, 0);
1681 splitterViews.SetViewOriginCoord(0, 2, 0);
1682 splitterViews.SetViewOriginCoord(0, 3, 0);
1684 splitterViews.SetViewOriginCoord(1, 0, 0);
1685 splitterViews.SetViewOriginCoord(1, 1, 1);
1686 splitterViews.SetViewOriginCoord(1, 2, 0);
1687 splitterViews.SetViewOriginCoord(1, 3, 0);
1704 Connect(input, splitter, inputTensorInfo, 0, 0);
1705 Connect(splitter, activ0_0, splitTensorInfo1, 0, 0);
1706 Connect(splitter, activ0_1, splitTensorInfo1, 0, 0);
1708 Connect(splitter, activ1_0, splitTensorInfo2, 1, 0);
1709 Connect(splitter, activ1_1, splitTensorInfo2, 1, 0);
1711 Connect(activ0_0, output1, splitTensorInfo1, 0, 0);
1712 Connect(activ0_1, output2, splitTensorInfo1, 0, 0);
1713 Connect(activ1_0, output3, splitTensorInfo2, 0, 0);
1714 Connect(activ1_1, output4, splitTensorInfo2, 0, 0);
1716 CreateTensorHandles(graph, factory);
1718 auto workloadSplitter = MakeAndCheckWorkload<SplitterWorkload>(*splitter, factory);
1719 auto workloadActiv0_0 = MakeAndCheckWorkload<ActivationWorkload>(*activ0_0, factory);
1720 auto workloadActiv0_1 = MakeAndCheckWorkload<ActivationWorkload>(*activ0_1, factory);
1721 auto workloadActiv1_0 = MakeAndCheckWorkload<ActivationWorkload>(*activ1_0, factory);
1722 auto workloadActiv1_1 = MakeAndCheckWorkload<ActivationWorkload>(*activ1_1, factory);
1724 wlSplitter = std::move(workloadSplitter);
1725 wlActiv0_0 = std::move(workloadActiv0_0);
1726 wlActiv0_1 = std::move(workloadActiv0_1);
1727 wlActiv1_0 = std::move(workloadActiv1_0);
1728 wlActiv1_1 = std::move(workloadActiv1_1);
1731 template <
typename ResizeWorkload, armnn::DataType DataType>
1739 switch (dataLayout) {
1741 inputShape = { 2, 4, 4, 3 };
1742 outputShape = { 2, 2, 2, 3 };
1746 inputShape = { 2, 3, 4, 4 };
1747 outputShape = { 2, 3, 2, 2 };
1766 Connect(input, layer, inputTensorInfo);
1767 Connect(layer, output, outputTensorInfo);
1768 CreateTensorHandles(graph, factory);
1771 auto workload = MakeAndCheckWorkload<ResizeWorkload>(*layer, factory);
1773 auto queueDescriptor = workload->GetData();
1774 CHECK(queueDescriptor.
m_Inputs.size() == 1);
1775 CHECK(queueDescriptor.
m_Outputs.size() == 1);
1776 CHECK(queueDescriptor.
m_Parameters.m_DataLayout == dataLayout);
1782 template <
typename BatchToSpaceNdWorkload, armnn::DataType DataType>
1796 Connect(input, layer, tensorInfo);
1797 Connect(layer, output, tensorInfo);
1799 CreateTensorHandles(graph, factory);
1802 auto workload = MakeAndCheckWorkload<BatchToSpaceNdWorkload>(*layer, factory);
1805 CHECK(queueDescriptor.
m_Inputs.size() == 1);
1806 CHECK(queueDescriptor.
m_Outputs.size() == 1);
1811 template <
typename LogSoftmaxWorkload, armnn::DataType DataType>
1820 logSoftmaxDescriptor.
m_Axis = -1;
1831 Connect(input, layer, tensorInfo);
1832 Connect(layer, output, tensorInfo);
1833 CreateTensorHandles(graph, factory);
1836 auto workload = MakeAndCheckWorkload<LogSoftmaxWorkload>(*layer, factory);
1839 CHECK(queueDescriptor.
m_Inputs.size() == 1);
1840 CHECK(queueDescriptor.
m_Outputs.size() == 1);
1846 template <
typename L2NormalizationWorkload, armnn::DataType DataType>
1868 Connect(input, layer, inputTensorInfo);
1869 Connect(layer, output, outputTensorInfo);
1870 CreateTensorHandles(graph, factory);
1873 auto workload = MakeAndCheckWorkload<L2NormalizationWorkload>(*layer, factory);
1877 CHECK(queueDescriptor.
m_Inputs.size() == 1);
1878 CHECK(queueDescriptor.
m_Outputs.size() == 1);
1884 template <
typename ReshapeWorkload, armnn::DataType DataType>
1901 Connect(input, layer, inputTensorInfo);
1902 Connect(layer, output, outputTensorInfo);
1903 CreateTensorHandles(graph, factory);
1906 auto workload = MakeAndCheckWorkload<ReshapeWorkload>(*layer, factory);
1909 CHECK(queueDescriptor.
m_Inputs.size() == 1);
1910 CHECK(queueDescriptor.
m_Outputs.size() == 1);
1916 template <
typename ConvertFp16ToFp32Float32Workload>
1917 std::unique_ptr<ConvertFp16ToFp32Float32Workload> CreateConvertFp16ToFp32WorkloadTest(
1930 Connect(input, layer, inputTensorInfo);
1931 Connect(layer, output, outputTensorInfo);
1932 CreateTensorHandles(graph, factory);
1935 auto workload = MakeAndCheckWorkload<ConvertFp16ToFp32Float32Workload>(*layer, factory);
1938 CHECK(queueDescriptor.
m_Inputs.size() == 1);
1939 CHECK(queueDescriptor.
m_Outputs.size() == 1);
1945 template <
typename ConvertFp32ToFp16Float16Workload>
1946 std::unique_ptr<ConvertFp32ToFp16Float16Workload> CreateConvertFp32ToFp16WorkloadTest(
1959 Connect(input, layer, inputTensorInfo);
1960 Connect(layer, output, outputTensorInfo);
1961 CreateTensorHandles(graph, factory);
1964 auto workload = MakeAndCheckWorkload<ConvertFp32ToFp16Float16Workload>(*layer, factory);
1967 CHECK(queueDescriptor.
m_Inputs.size() == 1);
1968 CHECK(queueDescriptor.
m_Outputs.size() == 1);
1974 template <
typename MeanWorkload, armnn::DataType DataType>
1990 Connect(input, layer, inputTensorInfo);
1991 Connect(layer, output, outputTensorInfo);
1992 CreateTensorHandles(graph, factory);
1995 auto workload = MakeAndCheckWorkload<MeanWorkload>(*layer, factory);
2000 CHECK(queueDescriptor.
m_Inputs.size() == 1);
2001 CHECK(queueDescriptor.
m_Outputs.size() == 1);
2007 template<
typename ConcatWorkload, armnn::DataType DataType>
2011 unsigned int concatAxis)
2021 std::vector<armnn::TensorShape> inputShapes{{ 2, 3, 2, 5 }, { 2, 3, 2, 5 }};
2035 Connect(input0, concat, inputTensorInfo, 0, 0);
2037 Connect(input1, concat, inputTensorInfo, 0, 1);
2039 Connect(concat, output, outputTensorInfo, 0, 0);
2042 CreateTensorHandles(graph, factory);
2045 auto workloadConcat = MakeAndCheckWorkload<ConcatWorkload>(*concat, factory);
2046 CHECK(workloadConcat);
2048 return workloadConcat;
2051 template <
typename PreCompiledWorkload, armnn::DataType dataType>
2052 std::pair<armnn::IOptimizedNetworkPtr, std::unique_ptr<PreCompiledWorkload>> CreatePreCompiledWorkloadTest(
2055 bool biasEnabled =
false)
2070 unsigned int weightsLength = weightsTensorInfo.GetNumElements();
2073 std::vector<WeightType> convWeightsData(weightsLength);
2074 for (
unsigned int i = 0; i < weightsLength; ++i)
2076 convWeightsData[i] =
static_cast<WeightType
>(i);
2089 const std::string convLayerName(
"conv layer");
2103 unsigned int biasLength = biasTensorInfo.GetNumElements();
2106 std::vector<BiasType> biasData(biasLength);
2107 std::fill(biasData.begin(), biasData.end(),
static_cast<BiasType
>(0));
2127 inputTensorInfo.SetQuantizationOffset(0);
2128 inputTensorInfo.SetQuantizationScale(0.9f);
2134 outputTensorInfo.SetQuantizationOffset(0);
2135 outputTensorInfo.SetQuantizationScale(0.9f);
2146 std::vector<armnn::BackendId> backends = {factory.
GetBackendId()};
2152 CHECK(optimizedNet !=
nullptr);
2156 Layer* preCompiledLayer =
nullptr;
2157 for (
auto& layer : optimisedGraph)
2161 preCompiledLayer = layer;
2164 CHECK(preCompiledLayer !=
nullptr);
2167 CreateTensorHandles(optimisedGraph, factory);
2170 auto workload = MakeAndCheckWorkload<PreCompiledWorkload>(*preCompiledLayer, factory);
2173 CHECK(queueDescriptor.
m_Inputs.size() == 1);
2174 CHECK(queueDescriptor.
m_Outputs.size() == 1);
2179 return std::make_pair(std::move(optimizedNet), std::move(workload));
2182 template<
typename ConstantWorkload, armnn::DataType DataType>
2192 constant->m_LayerOutput = std::make_unique<ScopedTensorHandle>(outputTensorInfo);
2198 Connect(constant, output, outputTensorInfo, 0, 0);
2201 CreateTensorHandles(graph, factory);
2204 auto workloadConstant = MakeAndCheckWorkload<ConstantWorkload>(*constant, factory);
2205 CHECK(workloadConstant);
2207 return workloadConstant;
2210 template <
typename PreluWorkload>
2220 CHECK(layer !=
nullptr);
2226 CHECK(input !=
nullptr);
2227 CHECK(alpha !=
nullptr);
2228 CHECK(output !=
nullptr);
2234 Connect(input, layer, inputTensorInfo, 0, 0);
2235 Connect(alpha, layer, alphaTensorInfo, 0, 1);
2236 Connect(layer, output, outputTensorInfo, 0, 0);
2237 CreateTensorHandles(graph, factory);
2240 auto workload = MakeAndCheckWorkload<PreluWorkload>(*layer, factory);
2243 CHECK(queueDescriptor.
m_Inputs.size() == 2);
2244 CHECK(queueDescriptor.
m_Outputs.size() == 1);
2250 template <
typename SpaceToDepthWorkload, armnn::DataType DataType>
2266 Connect(input, layer, inputTensorInfo);
2267 Connect(layer, output, outputTensorInfo);
2269 CreateTensorHandles(graph, factory);
2272 auto workload = MakeAndCheckWorkload<SpaceToDepthWorkload>(*layer, factory);
2275 CHECK(queueDescriptor.
m_Inputs.size() == 1);
2276 CHECK(queueDescriptor.
m_Outputs.size() == 1);
2281 template <
typename StackWorkload, armnn::DataType DataType>
2287 unsigned int numInputs)
2295 CHECK(stackLayer !=
nullptr);
2298 std::vector<Layer*> inputs;
2299 for (
unsigned int i=0; i<numInputs; ++i)
2302 static_cast<int>(i),
2303 (
"input" + std::to_string(i)).c_str()
2305 CHECK(inputs[i] !=
nullptr);
2308 CHECK(output !=
nullptr);
2311 for (
unsigned int i=0; i<numInputs; ++i)
2313 Connect(inputs[i], stackLayer, inputTensorInfo, 0, i);
2315 Connect(stackLayer, output, outputTensorInfo, 0, 0);
2317 CreateTensorHandles(graph, factory);
2319 auto stackWorkload = MakeAndCheckWorkload<StackWorkload>(*stackLayer, factory);
2321 CHECK(queueDescriptor.
m_Inputs.size() == numInputs);
2322 CHECK(queueDescriptor.
m_Outputs.size() == 1);
2324 return stackWorkload;