11 #include <GraphUtils.hpp> 12 #include <TestUtils.hpp> 14 #include <doctest/doctest.h> 19 using namespace armnn;
25 std::vector<T> GetVector(
unsigned int size,
float initial,
float increment)
27 std::vector<float> typeVector(size, initial);
28 std::vector<T> vector(size);
32 for (
unsigned int i = 0; i < size; ++i)
34 vector[i] = T(initial + (increment * static_cast<float>(i)));
40 template<DataType ArmnnType,
typename T = ResolveType<ArmnnType>>
41 struct Convolution2dTest
44 static const bool isElementWise =
false;
45 static const bool isConstTensorAsInputSupported =
true;
51 constexpr
static const unsigned int inputSize = 48;
52 constexpr
static const unsigned int outputSize = 36;
70 static std::vector<IConnectableLayer*> AddConstantLayers(
INetwork* network,
75 std::vector<float> weightsData = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
76 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22,
77 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
78 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42 };
79 std::vector<T> weightsVector = armnnUtils::QuantizedVector<T>(weightsData, scale, offset);
80 TensorInfo weightsInfo(GetWeightsShape(), ArmnnType, scale, offset,
true);
86 std::vector<IConnectableLayer*> layers = { weightsLayer };
91 template<DataType ArmnnType,
typename T = ResolveType<ArmnnType>>
92 struct DWConvolution2dTest
96 static const bool isElementWise =
false;
97 static const bool isConstTensorAsInputSupported =
true;
103 constexpr
static const unsigned int inputSize = 48;
104 constexpr
static const unsigned int outputSize = 108;
123 static std::vector<IConnectableLayer*> AddConstantLayers(
INetwork* network,
127 std::vector<float> weightsData = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
128 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22,
129 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
130 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42};
131 std::vector<T> weightsVector = armnnUtils::QuantizedVector<T>(weightsData, scale, offset);
132 TensorInfo weightsInfo(GetWeightsShape(), ArmnnType, scale, offset,
true);
138 std::vector<IConnectableLayer*> layers = { weightsLayer };
143 template<DataType ArmnnType,
typename T = ResolveType<ArmnnType>>
148 static const bool isElementWise =
false;
149 static const bool isConstTensorAsInputSupported =
true;
155 constexpr
static const unsigned int inputSize = 10;
156 constexpr
static const unsigned int outputSize = 6;
172 static std::vector<IConnectableLayer*> AddConstantLayers(
INetwork* network,
176 std::vector<float> weightsData = { 1, 2, 3, 4, 5,
179 std::vector<T> weightsVector = armnnUtils::QuantizedVector<T>(weightsData, scale, offset);
180 TensorInfo weightsInfo(GetWeightsShape(), ArmnnType, scale, offset,
true);
186 std::vector<IConnectableLayer*> layers = { weightsLayer };
191 template<DataType ArmnnType,
typename T = ResolveType<ArmnnType>>
196 static const bool isElementWise =
false;
197 static const bool isConstTensorAsInputSupported =
false;
202 constexpr
static const unsigned int inputSize = 48;
203 constexpr
static const unsigned int outputSize = 48;
216 std::vector<T> betaVector = GetVector<T>(GetOutputShape()[3], 0.0f, 0.2f);
217 std::vector<T> gammaVector = GetVector<T>(GetOutputShape()[3], 0.5f, 0.1f);
218 std::vector<T> meanVector = GetVector<T>(GetOutputShape()[3], 0.1f, 0.1f);
219 std::vector<T> varianceVector = GetVector<T>(GetOutputShape()[3], 1.0f, 0.1f);
221 const unsigned int outputChannelSize[] = { GetOutputShape()[3] };
230 static std::vector<IConnectableLayer*> AddConstantLayers(
INetwork* network,
241 template<DataType ArmnnType,
typename T = ResolveType<ArmnnType>>
245 static const bool isElementWise =
true;
246 static const bool isConstTensorAsInputSupported =
false;
251 constexpr
static const unsigned int inputSize = 48;
252 constexpr
static const unsigned int outputSize = 48;
265 static std::vector<IConnectableLayer*> AddConstantLayers(
INetwork* network,
276 template<DataType ArmnnType,
typename T = ResolveType<ArmnnType>>
280 static const bool isElementWise =
true;
281 static const bool isConstTensorAsInputSupported =
false;
286 constexpr
static const unsigned int inputSize = 48;
287 constexpr
static const unsigned int outputSize = 48;
300 static std::vector<IConnectableLayer*> AddConstantLayers(
INetwork* network,
311 template<DataType ArmnnType,
typename T = ResolveType<ArmnnType>>
315 static const bool isElementWise =
true;
316 static const bool isConstTensorAsInputSupported =
false;
321 constexpr
static const unsigned int inputSize = 48;
322 constexpr
static const unsigned int outputSize = 48;
335 static std::vector<IConnectableLayer*> AddConstantLayers(
INetwork* network,
346 template<DataType ArmnnType,
typename T = ResolveType<ArmnnType>>
350 static const bool isElementWise =
true;
351 static const bool isConstTensorAsInputSupported =
false;
356 constexpr
static const unsigned int inputSize = 48;
357 constexpr
static const unsigned int outputSize = 48;
370 static std::vector<IConnectableLayer*> AddConstantLayers(
INetwork* network,
381 template<
typename LayerTest,
384 float scale, int32_t offset)
396 IConnectableLayer* activationLayer = network->AddActivationLayer(activationDescriptor,
400 IConnectableLayer* output2Layer = preventFusing ? network->AddOutputLayer(1) :
nullptr;
403 if (LayerTest::isConstTensorAsInputSupported)
405 std::vector<IConnectableLayer*> constantLayers = LayerTest::AddConstantLayers(network.get(),
410 for (
unsigned int i = 0; i < constantLayers.size(); ++i)
417 TensorInfo inputInfo(LayerTest::GetInputShape(), ArmnnType, scale, offset);
418 TensorInfo outputInfo(LayerTest::GetOutputShape(), ArmnnType, scale, offset);
423 activationLayer->GetOutputSlot(0).SetTensorInfo(outputInfo);
428 activationLayer->GetOutputSlot(0).Connect(outputLayer->
GetInputSlot(0));
430 if (LayerTest::isElementWise)
442 template<
typename LayerTest,
447 float scale = 1.f, int32_t offset=0)
451 INetworkPtr networkFused = CreateNetwork<LayerTest, ArmnnType>(activationDescriptor,
false, scale, offset);
461 auto checkFusedConv2d = [](
const Layer*
const layer)->
bool {
462 return IsLayerOfType<LayerType>(layer) &&
463 (layer->GetNameStr() ==
"fused-activation-into-receiverLayer");
467 if(LayerTest::isConstTensorAsInputSupported)
469 CHECK(4 == graphFused.GetNumLayers());
472 &IsLayerOfType<InputLayer>,
473 &IsLayerOfType<ConstantLayer>,
475 &IsLayerOfType<OutputLayer>));
479 CHECK(fusedReceiverLayer);
484 CHECK(3 == graphFused.GetNumLayers());
487 &IsLayerOfType<InputLayer>,
489 &IsLayerOfType<OutputLayer>));
494 CHECK(run->LoadNetwork(networkIdentifier, std::move(optNetFused)) ==
Status::Success);
497 std::vector<float> data = GetVector<float>(LayerTest::inputSize, 1.0f, 0.1f);
498 std::vector<T> inputDataFused = armnnUtils::QuantizedVector<T>(data, scale, offset);
499 std::vector<T> outputDataFused(LayerTest::outputSize);
501 armnn::TensorInfo inputTensorInfo = run->GetInputTensorInfo(networkIdentifier, 0);
505 {0,
ConstTensor(inputTensorInfo, inputDataFused.data())}};
507 {0,
Tensor(run->GetOutputTensorInfo(networkIdentifier, 0), outputDataFused.data())}};
510 CHECK(run->EnqueueWorkload(networkIdentifier, inputTensorsFused, outputTensorsFused) ==
Status::Success);
514 INetworkPtr networkNotFused = CreateNetwork<LayerTest, ArmnnType>(activationDescriptor,
true, scale, offset);
525 if(LayerTest::isConstTensorAsInputSupported)
527 CHECK(6 == graphNotFused.GetNumLayers());
529 graphNotFused.cend(),
530 &IsLayerOfType<InputLayer>,
531 &IsLayerOfType<ConstantLayer>,
532 &IsLayerOfType<LayerType>,
533 &IsLayerOfType<ActivationLayer>,
534 &IsLayerOfType<OutputLayer>,
535 &IsLayerOfType<OutputLayer>));
539 CHECK(5 == graphNotFused.GetNumLayers());
541 graphNotFused.cend(),
542 &IsLayerOfType<InputLayer>,
543 &IsLayerOfType<LayerType>,
544 &IsLayerOfType<ActivationLayer>,
545 &IsLayerOfType<OutputLayer>,
546 &IsLayerOfType<OutputLayer>));
551 CHECK(runNotFused->LoadNetwork(networkIdentifierNotFused, std::move(optNetNotFused)) ==
Status::Success);
554 std::vector<T> inputDataNotFused = armnnUtils::QuantizedVector<T>(data, scale, offset);
555 std::vector<T> outputDataNotFused(LayerTest::outputSize);
556 std::vector<T> outputData2NotFused(LayerTest::outputSize);
558 TensorInfo inputTensorInfoNotFused = runNotFused->GetInputTensorInfo(networkIdentifierNotFused, 0);
562 {0,
ConstTensor(inputTensorInfoNotFused, inputDataNotFused.data())}};
564 {0,
Tensor(runNotFused->GetOutputTensorInfo(networkIdentifierNotFused, 0), outputDataNotFused.data())},
565 {1,
Tensor(runNotFused->GetOutputTensorInfo(networkIdentifierNotFused, 1), outputData2NotFused.data())}};
568 CHECK(runNotFused->EnqueueWorkload(networkIdentifierNotFused, inputTensorsNotFused, outputTensorsNotFused)
572 for (
unsigned int n = 0; n < outputDataFused.size(); ++n)
574 auto outputNotFused =
static_cast<float>(outputDataNotFused[n]);
575 CHECK(static_cast<float>(outputDataFused[n]) == doctest::Approx(outputNotFused).epsilon(tolerance));
579 template<
typename LayerTest,
584 float scale = 1.f, int32_t offset = 0)
590 INetworkPtr networkFused = CreateNetwork<LayerTest, ArmnnType>(activationDescriptor,
false, scale, offset);
600 CHECK(run->LoadNetwork(networkIdentifier, std::move(optNetFused)) ==
Status::Success);
603 std::vector<float> data = GetVector<float>(LayerTest::inputSize, 1.0f, 0.1f);
604 std::vector<T> inputDataFused = armnnUtils::QuantizedVector<T>(data, scale, offset);
605 std::vector<T> outputDataFused(LayerTest::outputSize);
607 TensorInfo inputTensorInfo = run->GetInputTensorInfo(networkIdentifier, 0);
611 {0,
ConstTensor(inputTensorInfo, inputDataFused.data())}};
613 {0,
Tensor(run->GetOutputTensorInfo(networkIdentifier, 0), outputDataFused.data())}};
616 run->EnqueueWorkload(networkIdentifier, inputTensorsFused, outputTensorsFused);
620 catch (
const std::exception& e)
622 std::cerr << e.what() << std::endl;
631 #if defined(ARMCOMPUTENEON_ENABLED) 635 TEST_CASE(
"FuseReLUIntoConvFloat32CpuAccTest")
640 FuseActivationIntoPreviousLayerTest<Convolution2dTest<DataType::Float32>,
DataType::Float32>
643 TEST_CASE(
"FuseReLUIntoDWConvFloat32CpuAccTest")
648 FuseActivationIntoPreviousLayerTest<DWConvolution2dTest<DataType::Float32>,
DataType::Float32>
651 TEST_CASE(
"FuseReLUIntoFullyConnectedFloat32CpuAccTest")
656 FuseActivationIntoPreviousLayerTest<FullyConnectedTest<DataType::Float32>,
DataType::Float32>
659 TEST_CASE(
"FuseReLUIntoBatchNormFloat32CpuAccTest")
664 FuseActivationIntoPreviousLayerTest<BatchNormTest<DataType::Float32>,
DataType::Float32>
669 TEST_CASE(
"FuseBoundedReLUIntoConvFloat32CpuAccTest")
673 activationDescriptor.
m_A = 1.0f;
674 activationDescriptor.
m_B = -1.0f;
676 FuseActivationIntoPreviousLayerTest<Convolution2dTest<DataType::Float32>,
DataType::Float32>
679 TEST_CASE(
"FuseBoundedReLUIntoDWConvFloat32CpuAccTest")
683 activationDescriptor.
m_A = 1.0f;
684 activationDescriptor.
m_B = -1.0f;
686 FuseActivationIntoPreviousLayerTest < DWConvolution2dTest < DataType::Float32 > ,
DataType::Float32 >
689 TEST_CASE(
"FuseBoundedReLUIntoFullyConnectedFloat32CpuAccTest")
693 activationDescriptor.
m_A = 1.0f;
694 activationDescriptor.
m_B = -1.0f;
696 FuseActivationIntoPreviousLayerTest<FullyConnectedTest<DataType::Float32>,
DataType::Float32>
699 TEST_CASE(
"FuseBoundedReLUIntoBatchNormFloat32CpuAccTest")
703 activationDescriptor.
m_A = 1.0f;
704 activationDescriptor.
m_B = -1.0f;
706 FuseActivationIntoPreviousLayerTest<BatchNormTest<DataType::Float32>,
DataType::Float32>
711 TEST_CASE(
"FuseReLUIntoConvQAsymmU8CpuAccTest")
716 FuseActivationIntoPreviousLayerTest<Convolution2dTest<DataType::QAsymmU8>,
DataType::QAsymmU8>
719 TEST_CASE(
"FuseReLUIntoDWConvQAsymmU8CpuAccTest")
724 FuseActivationIntoPreviousLayerTest<DWConvolution2dTest<DataType::QAsymmU8>,
DataType::QAsymmU8>
727 TEST_CASE(
"FuseReLUIntoFullyConnectedQAsymmU8CpuAccTest")
732 FuseActivationIntoPreviousLayerTest<FullyConnectedTest<DataType::QAsymmU8>,
DataType::QAsymmU8>
737 TEST_CASE(
"FuseBoundedReLUIntoConvQASymmS8CpuAccTest")
741 activationDescriptor.
m_A = 6.0f;
742 activationDescriptor.
m_B = 0.0f;
744 FuseActivationIntoPreviousLayerTest<Convolution2dTest<DataType::QAsymmS8>,
DataType::QAsymmS8>
747 TEST_CASE(
"FuseBoundedReLUIntoDWConvQASymmS8CpuAccTest")
751 activationDescriptor.
m_A = 6.0f;
752 activationDescriptor.
m_B = 0.0f;
754 FuseActivationIntoPreviousLayerTest < DWConvolution2dTest < DataType::QAsymmS8 > ,
DataType::QAsymmS8 >
757 TEST_CASE(
"FuseBoundedReLUIntoFullyConnectedQASymmS8CpuAccTest")
761 activationDescriptor.
m_A = 6.0f;
762 activationDescriptor.
m_B = 0.0f;
764 FuseActivationIntoPreviousLayerTest<FullyConnectedTest<DataType::QAsymmS8>,
DataType::QAsymmS8>
769 TEST_CASE(
"FuseTanHIntoConvFloat32CpuAccTest")
774 FuseActivationIntoPreviousLayerTest<Convolution2dTest<DataType::Float32>,
DataType::Float32>
779 TEST_CASE(
"FuseHardSwishIntoConvFloat32CpuAccTest")
784 FuseActivationIntoPreviousLayerTest<Convolution2dTest<DataType::Float32>,
DataType::Float32>
789 TEST_CASE(
"LayerFollowedByActivationFloat32CpuAccTest")
792 for (
int i = 0; i != 12; ++i)
795 activationDescriptor.
m_A = 1.0f;
796 activationDescriptor.
m_B = -1.0f;
797 CHECK_MESSAGE((FuseActivationSimpleTest<Convolution2dTest<DataType::Float32>,
DataType::Float32>
798 (activationDescriptor,
Compute::CpuAcc)),
"Convolution + Activation function " << i);
799 CHECK_MESSAGE((FuseActivationSimpleTest<DWConvolution2dTest<DataType::Float32>,
DataType::Float32>
800 (activationDescriptor,
Compute::CpuAcc)),
"DepthwiseConvolution + Activation function " << i);
801 CHECK_MESSAGE((FuseActivationSimpleTest<FullyConnectedTest<DataType::Float32>,
DataType::Float32>
802 (activationDescriptor,
Compute::CpuAcc)),
"FullyConnected + Activation function " << i);
803 CHECK_MESSAGE((FuseActivationSimpleTest<BatchNormTest<DataType::Float32>,
DataType::Float32>
804 (activationDescriptor,
Compute::CpuAcc)),
"BatchNorm + Activation function " << i);
807 TEST_CASE(
"LayerFollowedByActivationFloat16CpuAccTest")
810 for (
int i = 0; i != 12; ++i)
813 activationDescriptor.
m_A = 1.0f;
814 activationDescriptor.
m_B = -1.0f;
815 CHECK_MESSAGE((FuseActivationSimpleTest<Convolution2dTest<DataType::Float16>,
DataType::Float16>
816 (activationDescriptor,
Compute::CpuAcc)),
"Convolution + Activation function " << i);
817 CHECK_MESSAGE((FuseActivationSimpleTest<DWConvolution2dTest<DataType::Float16>,
DataType::Float16>
818 (activationDescriptor,
Compute::CpuAcc)),
"DepthwiseConvolution + Activation function " << i);
819 CHECK_MESSAGE((FuseActivationSimpleTest<FullyConnectedTest<DataType::Float16>,
DataType::Float16>
820 (activationDescriptor,
Compute::CpuAcc)),
"FullyConnected + Activation function " << i);
821 CHECK_MESSAGE((FuseActivationSimpleTest<BatchNormTest<DataType::Float16>,
DataType::Float16>
822 (activationDescriptor,
Compute::CpuAcc)),
"BatchNorm + Activation function " << i);
825 TEST_CASE(
"LayerFollowedByActivationQAsymmU8CpuAccTest")
830 CHECK_MESSAGE((FuseActivationSimpleTest<Convolution2dTest<DataType::QAsymmU8>,
DataType::QAsymmU8>
831 (activationDescriptor,
Compute::CpuAcc, 1.f / 256.f, 0)),
"Convolution + Activation function " <<
832 static_cast<int>(activationDescriptor.
m_Function));
833 CHECK_MESSAGE((FuseActivationSimpleTest<FullyConnectedTest<DataType::QAsymmU8>,
DataType::QAsymmU8>
834 (activationDescriptor,
Compute::CpuAcc, 1.f / 256.f, 0)),
"FullyConnected + Activation function " <<
835 static_cast<int>(activationDescriptor.
m_Function));
838 CHECK_MESSAGE((FuseActivationSimpleTest<Convolution2dTest<DataType::QAsymmU8>,
DataType::QAsymmU8>
839 (activationDescriptor,
Compute::CpuAcc, 1.f / 128.f, 128)),
"Convolution + Activation function " <<
840 static_cast<int>(activationDescriptor.
m_Function));
841 CHECK_MESSAGE((FuseActivationSimpleTest<FullyConnectedTest<DataType::QAsymmU8>,
DataType::QAsymmU8>
842 (activationDescriptor,
Compute::CpuAcc, 1.f / 128.f, 128)),
"FullyConnected + Activation function " <<
843 static_cast<int>(activationDescriptor.
m_Function));
846 CHECK_MESSAGE((FuseActivationSimpleTest<Convolution2dTest<DataType::QAsymmU8>,
DataType::QAsymmU8>
847 (activationDescriptor,
Compute::CpuAcc)),
"Convolution + Activation function " <<
848 static_cast<int>(activationDescriptor.
m_Function));
849 CHECK_MESSAGE((FuseActivationSimpleTest<FullyConnectedTest<DataType::QAsymmU8>,
DataType::QAsymmU8>
850 (activationDescriptor,
Compute::CpuAcc)),
"FullyConnected + Activation function " <<
851 static_cast<int>(activationDescriptor.
m_Function));
854 activationDescriptor.
m_A = 1.0f;
855 activationDescriptor.
m_B = -1.0f;
856 CHECK_MESSAGE((FuseActivationSimpleTest<Convolution2dTest<DataType::QAsymmU8>,
DataType::QAsymmU8>
857 (activationDescriptor,
Compute::CpuAcc)),
"Convolution + Activation function " <<
858 static_cast<int>(activationDescriptor.
m_Function));
859 CHECK_MESSAGE((FuseActivationSimpleTest<FullyConnectedTest<DataType::QAsymmU8>,
DataType::QAsymmU8>
860 (activationDescriptor,
Compute::CpuAcc)),
"FullyConnected + Activation function " <<
861 static_cast<int>(activationDescriptor.
m_Function));
864 CHECK_MESSAGE((FuseActivationSimpleTest<Convolution2dTest<DataType::QAsymmU8>,
DataType::QAsymmU8>
865 (activationDescriptor,
Compute::CpuAcc)),
"Convolution + Activation function " <<
866 static_cast<int>(activationDescriptor.
m_Function));
867 CHECK_MESSAGE((FuseActivationSimpleTest<FullyConnectedTest<DataType::QAsymmU8>,
DataType::QAsymmU8>
868 (activationDescriptor,
Compute::CpuAcc)),
"FullyConnected + Activation function " <<
869 static_cast<int>(activationDescriptor.
m_Function));
874 #if defined(ARMCOMPUTECL_ENABLED) 878 TEST_CASE(
"FuseReLUIntoConvFloat32GpuAccTest")
883 FuseActivationIntoPreviousLayerTest<Convolution2dTest<DataType::Float32>,
DataType::Float32>
886 TEST_CASE(
"FuseReLUIntoDWConvFloat32GpuAccTest")
891 FuseActivationIntoPreviousLayerTest<DWConvolution2dTest<DataType::Float32>,
DataType::Float32>
894 TEST_CASE(
"FuseReLUIntoFullyConnectedFloat32GpuAccTest")
899 FuseActivationIntoPreviousLayerTest<FullyConnectedTest<DataType::Float32>,
DataType::Float32>
902 TEST_CASE(
"FuseReLUIntoBatchNormFloat32GpuAccTest")
907 FuseActivationIntoPreviousLayerTest<BatchNormTest<DataType::Float32>,
DataType::Float32>
910 TEST_CASE(
"FuseReLUIntoMulFloat32GpuAccTest")
915 FuseActivationIntoPreviousLayerTest<MultiplicationTest<DataType::Float32>,
DataType::Float32>
918 TEST_CASE(
"FuseReLUIntoAddFloat32GpuAccTest")
923 FuseActivationIntoPreviousLayerTest<AdditionTest<DataType::Float32>,
DataType::Float32>
926 TEST_CASE(
"FuseReLUIntoSubFloat32GpuAccTest")
931 FuseActivationIntoPreviousLayerTest<SubtractionTest<DataType::Float32>,
DataType::Float32>
934 TEST_CASE(
"FuseReLUIntoDivFloat32GpuAccTest")
939 FuseActivationIntoPreviousLayerTest<DivisionTest<DataType::Float32>,
DataType::Float32>
944 TEST_CASE(
"FuseBoundedReLUIntoConvFloat32GpuAccTest")
948 activationDescriptor.
m_A = 1.0f;
949 activationDescriptor.
m_B = -1.0f;
951 FuseActivationIntoPreviousLayerTest<Convolution2dTest<DataType::Float32>,
DataType::Float32>
954 TEST_CASE(
"FuseBoundedReLUIntoDWConvFloat32GpuAccTest")
958 activationDescriptor.
m_A = 1.0f;
959 activationDescriptor.
m_B = -1.0f;
961 FuseActivationIntoPreviousLayerTest<DWConvolution2dTest<DataType::Float32>,
DataType::Float32>
964 TEST_CASE(
"FuseBoundedReLUIntoFullyConnectedFloat32GpuAccTest")
968 activationDescriptor.
m_A = 1.0f;
969 activationDescriptor.
m_B = -1.0f;
971 FuseActivationIntoPreviousLayerTest<FullyConnectedTest<DataType::Float32>,
DataType::Float32>
974 TEST_CASE(
"FuseBoundedReLUIntoBatchNormFloat32GpuAccTest")
978 activationDescriptor.
m_A = 1.0f;
979 activationDescriptor.
m_B = -1.0f;
981 FuseActivationIntoPreviousLayerTest<BatchNormTest<DataType::Float32>,
DataType::Float32>
984 TEST_CASE(
"FuseBoundedReLUIntoMulFloat32GpuAccTest")
988 activationDescriptor.
m_A = 1.0f;
989 activationDescriptor.
m_B = -1.0f;
991 FuseActivationIntoPreviousLayerTest<MultiplicationTest<DataType::Float32>,
DataType::Float32>
994 TEST_CASE(
"FuseBoundedReLUIntoAddFloat32GpuAccTest")
998 activationDescriptor.
m_A = 1.0f;
999 activationDescriptor.
m_B = -1.0f;
1001 FuseActivationIntoPreviousLayerTest<AdditionTest<DataType::Float32>,
DataType::Float32>
1004 TEST_CASE(
"FuseBoundedReLUIntoSubFloat32GpuAccTest")
1008 activationDescriptor.
m_A = 1.0f;
1009 activationDescriptor.
m_B = -1.0f;
1011 FuseActivationIntoPreviousLayerTest<SubtractionTest<DataType::Float32>,
DataType::Float32>
1014 TEST_CASE(
"FuseBoundedReLUIntoDivFloat32GpuAccTest")
1018 activationDescriptor.
m_A = 1.0f;
1019 activationDescriptor.
m_B = -1.0f;
1021 FuseActivationIntoPreviousLayerTest<DivisionTest<DataType::Float32>,
DataType::Float32>
1026 TEST_CASE(
"FuseReLUIntoConvFloat16GpuAccTest")
1031 FuseActivationIntoPreviousLayerTest<Convolution2dTest<DataType::Float16>,
DataType::Float16>
1034 TEST_CASE(
"FuseReLUIntoDWConvFloat16GpuAccTest")
1039 FuseActivationIntoPreviousLayerTest<DWConvolution2dTest<DataType::Float16>,
DataType::Float16>
1042 TEST_CASE(
"FuseReLUIntoFullyConnectedFloat16GpuAccTest")
1047 FuseActivationIntoPreviousLayerTest<FullyConnectedTest<DataType::Float16>,
DataType::Float16>
1050 TEST_CASE(
"FuseReLUIntoBatchNormFloat16GpuAccTest")
1055 FuseActivationIntoPreviousLayerTest<BatchNormTest<DataType::Float16>,
DataType::Float16>
1058 TEST_CASE(
"FuseReLUIntoMulFloat16GpuAccTest")
1063 FuseActivationIntoPreviousLayerTest<MultiplicationTest<DataType::Float16>,
DataType::Float16>
1066 TEST_CASE(
"FuseReLUIntoAddFloat16GpuAccTest")
1071 FuseActivationIntoPreviousLayerTest<AdditionTest<DataType::Float16>,
DataType::Float16>
1074 TEST_CASE(
"FuseReLUIntoSubFloat16GpuAccTest")
1079 FuseActivationIntoPreviousLayerTest<SubtractionTest<DataType::Float16>,
DataType::Float16>
1082 TEST_CASE(
"FuseReLUIntoDivFloat16GpuAccTest")
1087 FuseActivationIntoPreviousLayerTest<DivisionTest<DataType::Float16>,
DataType::Float16>
1092 TEST_CASE(
"FuseReLUQIntoConvAsymmU8GpuAccTest")
1097 FuseActivationIntoPreviousLayerTest<Convolution2dTest<DataType::QAsymmU8>,
DataType::QAsymmU8>
1100 TEST_CASE(
"FuseReLUQIntoDWConvAsymmU8GpuAccTest")
1105 FuseActivationIntoPreviousLayerTest<DWConvolution2dTest<DataType::QAsymmU8>,
DataType::QAsymmU8>
1108 TEST_CASE(
"FuseReLUQIntoFullyConnectedAsymmU8GpuAccTest")
1113 FuseActivationIntoPreviousLayerTest<FullyConnectedTest<DataType::QAsymmU8>,
DataType::QAsymmU8>
1118 TEST_CASE(
"FuseBoundedReLUIntoConvQASymmS8GpuAccTest")
1122 activationDescriptor.
m_A = 6.0f;
1123 activationDescriptor.
m_B = 0.0f;
1125 FuseActivationIntoPreviousLayerTest<Convolution2dTest<DataType::QAsymmS8>,
DataType::QAsymmS8>
1128 TEST_CASE(
"FuseBoundedReLUIntoDWConvQASymmS8GpuAccTest")
1132 activationDescriptor.
m_A = 6.0f;
1133 activationDescriptor.
m_B = 0.0f;
1135 FuseActivationIntoPreviousLayerTest < DWConvolution2dTest < DataType::QAsymmS8 > ,
DataType::QAsymmS8 >
1138 TEST_CASE(
"FuseBoundedReLUIntoFullyConnectedQASymmS8GpuAccTest")
1142 activationDescriptor.
m_A = 6.0f;
1143 activationDescriptor.
m_B = 0.0f;
1145 FuseActivationIntoPreviousLayerTest<FullyConnectedTest<DataType::QAsymmS8>,
DataType::QAsymmS8>
1150 TEST_CASE(
"FuseTanHIntoConvFloat32GpuAccTest")
1155 FuseActivationIntoPreviousLayerTest<Convolution2dTest<DataType::Float32>,
DataType::Float32>
1158 TEST_CASE(
"FuseTanHIntoMulFloat32GpuAccTest")
1163 FuseActivationIntoPreviousLayerTest<MultiplicationTest<DataType::Float32>,
DataType::Float32>
1166 TEST_CASE(
"FuseTanHIntoAddFloat32GpuAccTest")
1171 FuseActivationIntoPreviousLayerTest<AdditionTest<DataType::Float32>,
DataType::Float32>
1174 TEST_CASE(
"FuseTanHIntoSubFloat32GpuAccTest")
1179 FuseActivationIntoPreviousLayerTest<SubtractionTest<DataType::Float32>,
DataType::Float32>
1182 TEST_CASE(
"FuseTanHIntoDivFloat32GpuAccTest")
1187 FuseActivationIntoPreviousLayerTest<DivisionTest<DataType::Float32>,
DataType::Float32>
1192 TEST_CASE(
"FuseHardSwishIntoConvFloat32GpuAccTest")
1197 FuseActivationIntoPreviousLayerTest<Convolution2dTest<DataType::Float32>,
DataType::Float32>
1200 TEST_CASE(
"FuseHardSwishIntoMulFloat32GpuAccTest")
1205 FuseActivationIntoPreviousLayerTest<MultiplicationTest<DataType::Float32>,
DataType::Float32>
1208 TEST_CASE(
"FuseHardSwishIntoAddFloat32GpuAccTest")
1213 FuseActivationIntoPreviousLayerTest<AdditionTest<DataType::Float32>,
DataType::Float32>
1216 TEST_CASE(
"FuseHardSwishIntoSubFloat32GpuAccTest")
1221 FuseActivationIntoPreviousLayerTest<SubtractionTest<DataType::Float32>,
DataType::Float32>
1224 TEST_CASE(
"FuseHardSwishIntoDivFloat32GpuAccTest")
1229 FuseActivationIntoPreviousLayerTest<DivisionTest<DataType::Float32>,
DataType::Float32>
1234 TEST_CASE(
"LayerFollowedByActivationFloat32GpuAccTest")
1237 for (
int i = 0; i != 12; ++i)
1240 activationDescriptor.
m_A = 1.0f;
1241 activationDescriptor.
m_B = -1.0f;
1244 CHECK_MESSAGE((FuseActivationSimpleTest<Convolution2dTest<DataType::Float32>,
DataType::Float32>
1245 (activationDescriptor,
Compute::GpuAcc)),
"Convolution + Activation function " << i);
1246 CHECK_MESSAGE((FuseActivationSimpleTest<DWConvolution2dTest<DataType::Float32>,
DataType::Float32>
1247 (activationDescriptor,
Compute::GpuAcc)),
"DepthwiseConvolution + Activation function " << i);
1248 CHECK_MESSAGE((FuseActivationSimpleTest<FullyConnectedTest<DataType::Float32>,
DataType::Float32>
1249 (activationDescriptor,
Compute::GpuAcc)),
"FullyConnected + Activation function " << i);
1250 CHECK_MESSAGE((FuseActivationSimpleTest<BatchNormTest<DataType::Float32>,
DataType::Float32>
1251 (activationDescriptor,
Compute::GpuAcc)),
"BatchNorm + Activation function " << i);
1252 CHECK_MESSAGE((FuseActivationSimpleTest<MultiplicationTest<DataType::Float32>,
DataType::Float32>
1253 (activationDescriptor,
Compute::GpuAcc)),
"Multiplication + Activation function " << i);
1254 CHECK_MESSAGE((FuseActivationSimpleTest<AdditionTest<DataType::Float32>,
DataType::Float32>
1255 (activationDescriptor,
Compute::GpuAcc)),
"Addition + Activation function " << i);
1256 CHECK_MESSAGE((FuseActivationSimpleTest<SubtractionTest<DataType::Float32>,
DataType::Float32>
1257 (activationDescriptor,
Compute::GpuAcc)),
"Subtraction + Activation function " << i);
1258 CHECK_MESSAGE((FuseActivationSimpleTest<DivisionTest<DataType::Float32>,
DataType::Float32>
1259 (activationDescriptor,
Compute::GpuAcc)),
"Division + Activation function " << i);
1263 TEST_CASE(
"LayerFollowedByActivationFloat16GpuAccTest")
1266 for (
int i = 0; i != 12; ++i)
1269 activationDescriptor.
m_A = 1.0f;
1270 activationDescriptor.
m_B = -1.0f;
1273 CHECK_MESSAGE((FuseActivationSimpleTest<Convolution2dTest<DataType::Float16>,
DataType::Float16>
1274 (activationDescriptor,
Compute::GpuAcc)),
"Convolution + Activation function " << i);
1275 CHECK_MESSAGE((FuseActivationSimpleTest<DWConvolution2dTest<DataType::Float16>,
DataType::Float16>
1276 (activationDescriptor,
Compute::GpuAcc)),
"Depthwise + Activation function " << i);
1277 CHECK_MESSAGE((FuseActivationSimpleTest<FullyConnectedTest<DataType::Float16>,
DataType::Float16>
1278 (activationDescriptor,
Compute::GpuAcc)),
"FullyConnected + Activation function " << i);
1279 CHECK_MESSAGE((FuseActivationSimpleTest<BatchNormTest<DataType::Float16>,
DataType::Float16>
1280 (activationDescriptor,
Compute::GpuAcc)),
"BatchNorm + Activation function " << i);
1281 CHECK_MESSAGE((FuseActivationSimpleTest<MultiplicationTest<DataType::Float16>,
DataType::Float16>
1282 (activationDescriptor,
Compute::GpuAcc)),
"Multiplication + Activation function " << i);
1283 CHECK_MESSAGE((FuseActivationSimpleTest<AdditionTest<DataType::Float16>,
DataType::Float16>
1284 (activationDescriptor,
Compute::GpuAcc)),
"Addition + Activation function " << i);
1285 CHECK_MESSAGE((FuseActivationSimpleTest<SubtractionTest<DataType::Float16>,
DataType::Float16>
1286 (activationDescriptor,
Compute::GpuAcc)),
"Subtraction + Activation function " << i);
1287 CHECK_MESSAGE((FuseActivationSimpleTest<DivisionTest<DataType::Float16>,
DataType::Float16>
1288 (activationDescriptor,
Compute::GpuAcc)),
"Division + Activation function " << i);
1292 TEST_CASE(
"LayerFollowedByActivationQAsymmU8GpuAccTest")
1297 CHECK_MESSAGE((FuseActivationSimpleTest<Convolution2dTest<DataType::QAsymmU8>,
DataType::QAsymmU8>
1298 (activationDescriptor,
Compute::GpuAcc, 1.f / 256.f, 0)),
"Convolution + Activation function " <<
1299 static_cast<int>(activationDescriptor.
m_Function));
1300 CHECK_MESSAGE((FuseActivationSimpleTest<FullyConnectedTest<DataType::QAsymmU8>,
DataType::QAsymmU8>
1301 (activationDescriptor,
Compute::GpuAcc, 1.f / 256.f, 0)),
"FullyConnected + Activation function " <<
1302 static_cast<int>(activationDescriptor.
m_Function));
1305 CHECK_MESSAGE((FuseActivationSimpleTest<Convolution2dTest<DataType::QAsymmU8>,
DataType::QAsymmU8>
1306 (activationDescriptor,
Compute::GpuAcc, 1.f / 128.f, 128)),
"Convolution + Activation function " <<
1307 static_cast<int>(activationDescriptor.
m_Function));
1308 CHECK_MESSAGE((FuseActivationSimpleTest<FullyConnectedTest<DataType::QAsymmU8>,
DataType::QAsymmU8>
1309 (activationDescriptor,
Compute::GpuAcc, 1.f / 128.f, 128)),
"FullyConnected + Activation function " <<
1310 static_cast<int>(activationDescriptor.
m_Function));
1313 CHECK_MESSAGE((FuseActivationSimpleTest<Convolution2dTest<DataType::QAsymmU8>,
DataType::QAsymmU8>
1314 (activationDescriptor,
Compute::GpuAcc)),
"Convolution + Activation function " <<
1315 static_cast<int>(activationDescriptor.
m_Function));
1316 CHECK_MESSAGE((FuseActivationSimpleTest<FullyConnectedTest<DataType::QAsymmU8>,
DataType::QAsymmU8>
1317 (activationDescriptor,
Compute::GpuAcc)),
"FullyConnected + Activation function " <<
1318 static_cast<int>(activationDescriptor.
m_Function));
1321 activationDescriptor.
m_A = 1.0f;
1322 activationDescriptor.
m_B = -1.0f;
1323 CHECK_MESSAGE((FuseActivationSimpleTest<Convolution2dTest<DataType::QAsymmU8>,
DataType::QAsymmU8>
1324 (activationDescriptor,
Compute::GpuAcc)),
"Convolution + Activation function " <<
1325 static_cast<int>(activationDescriptor.
m_Function));
1326 CHECK_MESSAGE((FuseActivationSimpleTest<FullyConnectedTest<DataType::QAsymmU8>,
DataType::QAsymmU8>
1327 (activationDescriptor,
Compute::GpuAcc)),
"FullyConnected + Activation function " <<
1328 static_cast<int>(activationDescriptor.
m_Function));
1331 CHECK_MESSAGE((FuseActivationSimpleTest<Convolution2dTest<DataType::QAsymmU8>,
DataType::QAsymmU8>
1332 (activationDescriptor,
Compute::GpuAcc)),
"Convolution + Activation function " <<
1333 static_cast<int>(activationDescriptor.
m_Function));
1334 CHECK_MESSAGE((FuseActivationSimpleTest<FullyConnectedTest<DataType::QAsymmU8>,
DataType::QAsymmU8>
1335 (activationDescriptor,
Compute::GpuAcc)),
"FullyConnected + Activation function " <<
1336 static_cast<int>(activationDescriptor.
m_Function));
TEST_SUITE("TestConstTensorLayerVisitor")
IConnectableLayer * AddSubtractionLayer(const char *name=nullptr)
Adds a subtraction layer to the network.
DataLayout m_DataLayout
The data layout to be used (NCHW, NHWC).
static IRuntimePtr Create(const CreationOptions &options)
This layer represents a batch normalization operation.
Interface for a layer that is connectable to other layers via InputSlots and OutputSlots.
bool m_BiasEnabled
Enable/disable bias.
bool CheckSequence(const armnn::Graph::ConstIterator first, const armnn::Graph::ConstIterator last)
armnn::Layer * GetFirstLayerWithName(armnn::Graph &graph, const std::string &name)
IConnectableLayer * AddConstantLayer(const ConstTensor &input, const char *name=nullptr)
Adds a layer with no inputs and a single output, which always corresponds to the passed in constant t...
DataLayout m_DataLayout
The data layout to be used (NCHW, NHWC).
IConnectableLayer * AddDepthwiseConvolution2dLayer(const DepthwiseConvolution2dDescriptor &convolution2dDescriptor, const char *name=nullptr)
Adds a 2D depthwise convolution layer to the network.
This layer represents a depthwise convolution 2d operation.
A Convolution2dDescriptor for the Convolution2dLayer.
LayerTestResult< float, 4 > DivisionTest(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, const armnn::ITensorHandleFactory &tensorHandleFactory)
std::unique_ptr< IRuntime, void(*)(IRuntime *runtime)> IRuntimePtr
DataLayout m_DataLayout
The data layout to be used (NCHW, NHWC).
typename ResolveTypeImpl< DT >::Type ResolveType
Main network class which provides the interface for building up a neural network. ...
std::vector< std::pair< LayerBindingId, class ConstTensor > > InputTensors
Copyright (c) 2021 ARM Limited and Contributors.
void IgnoreUnused(Ts &&...)
IConnectableLayer * AddDivisionLayer(const char *name=nullptr)
Adds a division layer to the network.
LayerTestResult< float, 4 > AdditionTest(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, const armnn::ITensorHandleFactory &tensorHandleFactory)
IConnectableLayer * AddFullyConnectedLayer(const FullyConnectedDescriptor &fullyConnectedDescriptor, const char *name=nullptr)
Adds a fully connected layer to the network.
LayerTestResult< T, 2 > FullyConnectedTest(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, const armnn::ITensorHandleFactory &tensorHandleFactory, bool biasEnabled, bool constantWeights)
virtual void SetTensorInfo(const TensorInfo &tensorInfo)=0
A tensor defined by a TensorInfo (shape and data type) and a mutable backing store.
const InputSlot & GetInputSlot(unsigned int index) const override
Get a const input slot handle by slot index.
uint32_t m_StrideX
Stride value when proceeding through input for the width dimension.
uint32_t m_StrideX
Stride value when proceeding through input for the width dimension.
Compute
The Compute enum is now deprecated and it is now being replaced by BackendId.
This layer represents a fully connected operation.
IOptimizedNetworkPtr Optimize(const INetwork &network, const std::vector< BackendId > &backendPreferences, const IDeviceSpec &deviceSpec, const OptimizerOptions &options=OptimizerOptions(), Optional< std::vector< std::string > &> messages=EmptyOptional())
Create an optimized version of the network.
A FullyConnectedDescriptor for the FullyConnectedLayer.
bool m_BiasEnabled
Enable/disable bias.
A tensor defined by a TensorInfo (shape and data type) and an immutable backing store.
std::vector< std::pair< LayerBindingId, class Tensor > > OutputTensors
std::unique_ptr< IOptimizedNetwork, void(*)(IOptimizedNetwork *network)> IOptimizedNetworkPtr
GPU Execution: OpenCL: ArmCompute.
An ActivationDescriptor for the ActivationLayer.
min(a, max(b, input)) ReLu1 & ReLu6.
IConnectableLayer * AddBatchNormalizationLayer(const BatchNormalizationDescriptor &desc, const ConstTensor &mean, const ConstTensor &variance, const ConstTensor &beta, const ConstTensor &gamma, const char *name=nullptr)
Adds a batch normalization layer to the network.
uint32_t m_StrideY
Stride value when proceeding through input for the height dimension.
This layer represents an addition operation.
Graph & GetGraphForTesting(IOptimizedNetwork *optNet)
float m_A
Alpha upper bound value used by the activation functions. (BoundedReLu, Linear, TanH, Elu).
IConnectableLayer * AddAdditionLayer(const char *name=nullptr)
Adds an addition layer to the network.
This layer represents a subtraction operation.
uint32_t m_StrideY
Stride value when proceeding through input for the height dimension.
LayerTestResult< float, 4 > SubtractionTest(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, const armnn::ITensorHandleFactory &tensorHandleFactory)
CPU Execution: NEON: ArmCompute.
IConnectableLayer * AddConvolution2dLayer(const Convolution2dDescriptor &convolution2dDescriptor, const char *name=nullptr)
Adds a 2D convolution layer to the network.
virtual const IInputSlot & GetInputSlot(unsigned int index) const =0
Get a const input slot handle by slot index.
void SetConstant(const bool IsConstant=true)
Marks the data corresponding to this tensor info as constant.
This layer represents a division operation.
virtual const IOutputSlot & GetOutputSlot(unsigned int index) const =0
Get the const output slot handle by slot index.
This layer represents a convolution 2d operation.
IConnectableLayer * AddMultiplicationLayer(const char *name=nullptr)
Adds a multiplication layer to the network.
std::unique_ptr< INetwork, void(*)(INetwork *network)> INetworkPtr
virtual int Connect(IInputSlot &destination)=0
This layer represents a multiplication operation.
static INetworkPtr Create(NetworkOptions networkOptions={})
float m_B
Beta lower bound value used by the activation functions. (BoundedReLu, Linear, TanH).
LayerTestResult< float, 4 > MultiplicationTest(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, const armnn::ITensorHandleFactory &tensorHandleFactory)
ActivationFunction m_Function
The activation function to use (Sigmoid, TanH, Linear, ReLu, BoundedReLu, SoftReLu, LeakyReLu, Abs, Sqrt, Square, Elu).
A DepthwiseConvolution2dDescriptor for the DepthwiseConvolution2dLayer.
A BatchNormalizationDescriptor for the BatchNormalizationLayer.
LayerType
When adding a new layer, adapt also the LastLayer enum value in the enum class LayerType below...