diff options
Diffstat (limited to 'src')
29 files changed, 1409 insertions, 13 deletions
diff --git a/src/armnn/BackendHelper.cpp b/src/armnn/BackendHelper.cpp index fc7a2fab83..56938d021e 100644 --- a/src/armnn/BackendHelper.cpp +++ b/src/armnn/BackendHelper.cpp @@ -246,6 +246,22 @@ bool LayerSupportHandle::IsBatchToSpaceNdSupported(const TensorInfo& input, reasonIfUnsupported); } + +bool LayerSupportHandle::IsBroadcastToSupported(const TensorInfo& input, + const TensorInfo& output, + const armnn::BroadcastToDescriptor& descriptor, + Optional<std::string&> reasonIfUnsupported) +{ + TensorInfos infos{input, output}; + + return m_LayerSupport->IsLayerSupported(LayerType::BroadcastTo, + infos, + descriptor, + EmptyOptional(), + EmptyOptional(), + reasonIfUnsupported.value()); +} + bool LayerSupportHandle::IsCastSupported(const TensorInfo& input, const TensorInfo& output, Optional<std::string&> reasonIfUnsupported) diff --git a/src/armnn/LayersFwd.hpp b/src/armnn/LayersFwd.hpp index f83b710134..325bfc3875 100644 --- a/src/armnn/LayersFwd.hpp +++ b/src/armnn/LayersFwd.hpp @@ -12,6 +12,7 @@ #include "layers/BatchMatMulLayer.hpp" #include "layers/BatchNormalizationLayer.hpp" #include "layers/BatchToSpaceNdLayer.hpp" +#include "layers/BroadcastToLayer.hpp" #include "layers/CastLayer.hpp" #include "layers/ChannelShuffleLayer.hpp" #include "layers/ComparisonLayer.hpp" @@ -116,6 +117,7 @@ DECLARE_LAYER(ArgMinMax) DECLARE_LAYER(BatchMatMul) DECLARE_LAYER(BatchNormalization) DECLARE_LAYER(BatchToSpaceNd) +DECLARE_LAYER(BroadcastTo) DECLARE_LAYER(Cast) DECLARE_LAYER(ChannelShuffle) DECLARE_LAYER(Comparison) diff --git a/src/armnn/Network.cpp b/src/armnn/Network.cpp index 7f4ef6b1b6..d2b14cd045 100644 --- a/src/armnn/Network.cpp +++ b/src/armnn/Network.cpp @@ -657,6 +657,12 @@ IConnectableLayer* INetwork::AddTileLayer(const TileDescriptor &descriptor, return pNetworkImpl->AddTileLayer(descriptor, name); } +IConnectableLayer* INetwork::AddBroadcastToLayer(const BroadcastToDescriptor& descriptor, + const char* name) +{ + return pNetworkImpl->AddBroadcastToLayer(descriptor, name); +} + void INetwork::ExecuteStrategy(IStrategy& strategy) const { return pNetworkImpl->ExecuteStrategy(strategy); @@ -1929,8 +1935,10 @@ IOptimizedNetworkPtr Optimize(const Graph& inGraph, optGraph.InferTensorInfos(); } - // Perform AddBroadcastReshapeLayer optimisation + // Perform BroadcastToOptimizationLayer and then AddBroadcastReshapeLayer optimisation using namespace optimizations; + Optimizer::Pass(optGraph, MakeOptimizations(BroadcastToOptimizationLayer())); + Optimizer::Pass(optGraph, MakeOptimizations(AddBroadcastReshapeLayer())); if(options.GetShapeInferenceMethod() == ShapeInferenceMethod::ValidateOnly) @@ -1961,6 +1969,7 @@ IOptimizedNetworkPtr Optimize(const Graph& inGraph, FoldPadIntoConvolution2d(), FoldPadIntoDepthwiseConvolution2d(), FoldPadIntoPooling2d(), + BroadcastToOptimizationLayer(), PermuteAndBatchToSpaceAsDepthToSpace(), TransposeAndBatchToSpaceAsDepthToSpace(), FuseBatchNormIntoConvolution2DFloat32(), @@ -3045,6 +3054,11 @@ IConnectableLayer* NetworkImpl::AddPrecompiledLayer(const PreCompiledDescriptor& return layer; } +IConnectableLayer* NetworkImpl::AddBroadcastToLayer(const BroadcastToDescriptor &desc, const char *name) +{ + return m_Graph->AddLayer<BroadcastToLayer>(desc, name); +} + void NetworkImpl::ExecuteStrategy(IStrategy& strategy) const { for (auto layer : GetGraph()) diff --git a/src/armnn/Network.hpp b/src/armnn/Network.hpp index 5a3570d825..6ffdfb37a8 100644 --- a/src/armnn/Network.hpp +++ b/src/armnn/Network.hpp @@ -63,6 +63,9 @@ public: IConnectableLayer* AddBatchToSpaceNdLayer(const BatchToSpaceNdDescriptor& batchToSpaceNdDescriptor, const char* name = nullptr); + IConnectableLayer* AddBroadcastToLayer(const BroadcastToDescriptor& descriptor, + const char* name = nullptr); + IConnectableLayer* AddCastLayer(const char* name = nullptr); IConnectableLayer* AddChannelShuffleLayer(const ChannelShuffleDescriptor& channelShuffleDescriptor, diff --git a/src/armnn/layers/BroadcastToLayer.cpp b/src/armnn/layers/BroadcastToLayer.cpp new file mode 100644 index 0000000000..252aa46de0 --- /dev/null +++ b/src/armnn/layers/BroadcastToLayer.cpp @@ -0,0 +1,57 @@ +// +// Copyright © 2023 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "BroadcastToLayer.hpp" + +#include "LayerCloneBase.hpp" + +#include <armnn/TypesUtils.hpp> +#include <armnn/backends/WorkloadData.hpp> +#include <armnn/backends/WorkloadFactory.hpp> + +namespace armnn +{ + +BroadcastToLayer::BroadcastToLayer(const BroadcastToDescriptor& param, const char* name) + : LayerWithParameters(1, 1, LayerType::BroadcastTo, param, name) +{} + +std::unique_ptr<IWorkload> BroadcastToLayer::CreateWorkload(const IWorkloadFactory& factory) const +{ + BroadcastToQueueDescriptor descriptor; + SetAdditionalInfo(descriptor); + + return factory.CreateWorkload(LayerType::BroadcastTo, descriptor, PrepInfoAndDesc(descriptor)); +} + +BroadcastToLayer* BroadcastToLayer::Clone(armnn::Graph& graph) const +{ + return CloneBase<BroadcastToLayer>(graph, m_Param, GetName()); +} + +std::vector<TensorShape> BroadcastToLayer::InferOutputShapes(const std::vector<TensorShape>&) const +{ + return std::vector<TensorShape>({ m_Param.m_BroadcastToShape }); +} + +void BroadcastToLayer::ValidateTensorShapesFromInputs() +{ + VerifyLayerConnections(1, CHECK_LOCATION()); + + const TensorShape &outputShape = GetOutputSlot(0).GetTensorInfo().GetShape(); + + VerifyShapeInferenceType(outputShape, m_ShapeInferenceMethod); + + auto inferredShapes = outputShape; + + ValidateAndCopyShape(outputShape, inferredShapes, m_ShapeInferenceMethod, "BroadcastToLayer"); +} + +void BroadcastToLayer::ExecuteStrategy(IStrategy& strategy) const +{ + strategy.ExecuteStrategy(this, GetParameters(), {}, GetName()); +} + +} //namespace armnn diff --git a/src/armnn/layers/BroadcastToLayer.hpp b/src/armnn/layers/BroadcastToLayer.hpp new file mode 100644 index 0000000000..5da27a7a66 --- /dev/null +++ b/src/armnn/layers/BroadcastToLayer.hpp @@ -0,0 +1,48 @@ +// +// Copyright © 2023 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "LayerWithParameters.hpp" + +namespace armnn +{ + + class BroadcastToLayer : public LayerWithParameters<BroadcastToDescriptor> + { + public: + /// Makes a workload for the BroadcastTo type. + /// @param [in] factory The workload factory which will create the workload. + /// @return A pointer to the created workload, or nullptr if not created. + virtual std::unique_ptr<IWorkload> CreateWorkload(const IWorkloadFactory& factory) const override; + + /// Creates a dynamically-allocated copy of this layer. + /// @param [in] graph The graph into which this layer is being cloned. + BroadcastToLayer* Clone(Graph& graph) const override; + + /// Infers the output shapes from given input shapes and layer properties. + /// @param [in] inputShapes The input shapes layer has. + /// @return A vector to the inferred output shape. + std::vector<TensorShape> InferOutputShapes(const std::vector<TensorShape>& inputShapes) const override; + + /// Check if the input tensor BroadcastTo(s) + /// will lead to a valid configuration of @ref BroadcastToLayer. + void ValidateTensorShapesFromInputs() override; + + /// Execute Strategy on BroadcastTo layer + /// @param [in] strategy The input strategy for the layer + void ExecuteStrategy(IStrategy& strategy) const override; + + protected: + /// Constructor to create a BroadcastToLayer. + /// @param [in] param Parameters for the layer. + /// @param [in] name Optional name for the layer. + BroadcastToLayer(const BroadcastToDescriptor& param, const char* name); + + /// Default destructor. + ~BroadcastToLayer() = default; + }; + +} // namespace armnn
\ No newline at end of file diff --git a/src/armnn/optimizations/All.hpp b/src/armnn/optimizations/All.hpp index 0e67516193..abf4cde442 100644 --- a/src/armnn/optimizations/All.hpp +++ b/src/armnn/optimizations/All.hpp @@ -1,5 +1,5 @@ // -// Copyright © 2022 Arm Ltd and Contributors. All rights reserved. +// Copyright © 2022, 2023 Arm Ltd and Contributors. All rights reserved. // SPDX-License-Identifier: MIT // #pragma once @@ -10,6 +10,7 @@ #include "ConvertConstDequantisationLayersToConstLayers.hpp" #include "ConvertConstPermuteLayersToConstLayers.hpp" #include "ConvertFp32NetworkToFp16.hpp" +#include "DeleteBroadcastTo.hpp" #include "FoldPadIntoLayer2d.hpp" #include "FuseBatchNorm.hpp" #include "MovePermuteUp.hpp" @@ -21,4 +22,4 @@ #include "PermuteAndBatchToSpaceAsDepthToSpace.hpp" #include "PermuteDepthwiseConv2dWeights.hpp" #include "SquashEqualSiblings.hpp" -#include "TransposeAsReshape.hpp"
\ No newline at end of file +#include "TransposeAsReshape.hpp" diff --git a/src/armnn/optimizations/DeleteBroadcastTo.hpp b/src/armnn/optimizations/DeleteBroadcastTo.hpp new file mode 100644 index 0000000000..9ea20907df --- /dev/null +++ b/src/armnn/optimizations/DeleteBroadcastTo.hpp @@ -0,0 +1,37 @@ +// +// Copyright © 2023 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// +#pragma once + +#include "Optimization.hpp" + +namespace armnn +{ +namespace optimizations +{ +class DeleteBroadcastToImpl +{ +public: + /// Run for every BroadcastToLayer. Remove it if it is before an ElementWiseLayer. + /// Since ElementWiseBinary uses a brodcastLoop, using a broadcastTo layer is + /// not necessary so it will be deleted. + void Run(Graph&, BroadcastToLayer& layer) const + { + if(layer.GetType() == LayerType::BroadcastTo) + { + Layer& next = layer.GetOutputSlot(0).GetConnection(0)->GetOwningLayer(); + if (next.GetType() == LayerType::ElementwiseBinary) + { + Layer& connectedLayer = layer.GetInputSlots()[0].GetConnectedOutputSlot()->GetOwningLayer(); + layer.GetOutputSlot().MoveAllConnections(connectedLayer.GetOutputSlot()); + } + } + } +protected: + DeleteBroadcastToImpl() = default; + ~DeleteBroadcastToImpl() = default; +}; +using BroadcastToOptimizationLayer = OptimizeForType<BroadcastToLayer, DeleteBroadcastToImpl>; +} +}
\ No newline at end of file diff --git a/src/armnn/test/optimizations/BroadcastToTests.cpp b/src/armnn/test/optimizations/BroadcastToTests.cpp new file mode 100644 index 0000000000..69f2bb8860 --- /dev/null +++ b/src/armnn/test/optimizations/BroadcastToTests.cpp @@ -0,0 +1,151 @@ +// +// Copyright © 2023 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "LayersFwd.hpp" + +#include <Network.hpp> +#include <ResolveType.hpp> +#include <armnn/INetwork.hpp> +#include <TestUtils.hpp> +#include <Optimizer.hpp> + +#include <doctest/doctest.h> + +TEST_SUITE("Optimizer") +{ + using namespace armnn; + using namespace armnn::optimizations; + + TEST_CASE("DeleteBroadcastToAfterMulLayer") + { + Graph graph; + const unsigned int inputShape[] = {1, 3}; + const unsigned int outputShape[] = {4, 3}; + + //rank of input is 1 and of output is 2 + TensorInfo inputInfo(1, inputShape, DataType::Float32); + TensorInfo floorInfo(1, inputShape, DataType::Float32); + TensorInfo outputInfo(2, outputShape, DataType::Float32); + + Layer* input = graph.AddLayer<InputLayer>(0, "input"); + input->GetOutputSlot().SetTensorInfo(inputInfo); + + FloorLayer* floorLayer = graph.AddLayer<FloorLayer>("floor"); + floorLayer->GetOutputSlot().SetTensorInfo(inputInfo); + + BroadcastToDescriptor broadcastToDescriptor({4, 3}); + BroadcastToLayer* broadcastToLayer = graph.AddLayer<BroadcastToLayer>(broadcastToDescriptor, "broadcast_to"); + broadcastToLayer->GetOutputSlot().SetTensorInfo(floorInfo); + + ElementwiseBinaryDescriptor elementwiseBinaryDescriptor(BinaryOperation::Mul); + ElementwiseBinaryLayer* elementwiseBinaryLayer = + graph.AddLayer<ElementwiseBinaryLayer>(elementwiseBinaryDescriptor, "multiplication"); + elementwiseBinaryLayer->GetOutputSlot().SetTensorInfo(outputInfo); + + Layer* output = graph.AddLayer<OutputLayer>(0, "output"); + + // Connect up layers - input -> floor -> broadcast_to -> multiplication -> output + input->GetOutputSlot().Connect(floorLayer->GetInputSlot(0)); + floorLayer->GetOutputSlot().Connect(broadcastToLayer->GetInputSlot(0)); + broadcastToLayer->GetOutputSlot().Connect(elementwiseBinaryLayer->GetInputSlot(0)); + elementwiseBinaryLayer->GetOutputSlot().Connect(output->GetInputSlot(0)); + + CHECK(CheckSequence(graph.cbegin(), graph.cend(), + &IsLayerOfType<InputLayer>, + &IsLayerOfType<FloorLayer>, + &IsLayerOfType<BroadcastToLayer>, + &IsLayerOfType<ElementwiseBinaryLayer>, + &IsLayerOfType<OutputLayer>)); + + Optimizer::Pass(graph, MakeOptimizations(BroadcastToOptimizationLayer())); + + CHECK(CheckSequence(graph.cbegin(), graph.cend(), + &IsLayerOfType<InputLayer>, + &IsLayerOfType<FloorLayer>, + &IsLayerOfType<ElementwiseBinaryLayer>, + &IsLayerOfType<OutputLayer>)); + } + + TEST_CASE("DeleteBroadcastToNullptr") + { + Graph graph; + const unsigned int inputShape[] = {1, 3}; + const unsigned int outputShape[] = {4, 3}; + + //rank of input is 1 and of output is 2 + TensorInfo inputInfo(1, inputShape, DataType::Float32); + TensorInfo outputInfo(2, outputShape, DataType::Float32); + + Layer* input = graph.AddLayer<InputLayer>(0, "input"); + input->GetOutputSlot().SetTensorInfo(inputInfo); + + ElementwiseBinaryDescriptor elementwiseBinaryDescriptor(BinaryOperation::Mul); + ElementwiseBinaryLayer* elementwiseBinaryLayer = + graph.AddLayer<ElementwiseBinaryLayer>(elementwiseBinaryDescriptor, "multiplication"); + elementwiseBinaryLayer->GetOutputSlot().SetTensorInfo(outputInfo); + + Layer* output = graph.AddLayer<OutputLayer>(0, "output"); + + // Connect up layers - input -> broadcast_to -> multiplication -> output + input->GetOutputSlot().Connect(elementwiseBinaryLayer->GetInputSlot(0)); + elementwiseBinaryLayer->GetOutputSlot().Connect(output->GetInputSlot(0)); + + CHECK(CheckSequence(graph.cbegin(), graph.cend(), + &IsLayerOfType<InputLayer>, + &IsLayerOfType<ElementwiseBinaryLayer>, + &IsLayerOfType<OutputLayer>)); + + Optimizer::Pass(graph, MakeOptimizations(BroadcastToOptimizationLayer())); + + CHECK(CheckSequence(graph.cbegin(), graph.cend(), + &IsLayerOfType<InputLayer>, + &IsLayerOfType<ElementwiseBinaryLayer>, + &IsLayerOfType<OutputLayer>)); + } + + TEST_CASE("DeleteBroadcastToNotElementWise") + { + Graph graph; + const unsigned int inputShape[] = {1, 3}; + const unsigned int outputShape[] = {4, 3}; + + //rank of input is 1 and of output is 2 + TensorInfo inputInfo(1, inputShape, DataType::Float32); + TensorInfo broadcastToInfo(2, outputShape, DataType::Float32); + TensorInfo outputInfo(2, outputShape, DataType::Float32); + + Layer* input = graph.AddLayer<InputLayer>(0, "input"); + input->GetOutputSlot().SetTensorInfo(inputInfo); + + BroadcastToDescriptor broadcastToDescriptor({4, 3}); + BroadcastToLayer* broadcastToLayer = graph.AddLayer<BroadcastToLayer>(broadcastToDescriptor, "broadcast_to"); + broadcastToLayer->GetOutputSlot().SetTensorInfo(broadcastToInfo); + + TileDescriptor tileDescriptor({2, 3}); + TileLayer* tileLayer = graph.AddLayer<TileLayer>(tileDescriptor, "tile"); + tileLayer->GetOutputSlot().SetTensorInfo(outputInfo); + + Layer* output = graph.AddLayer<OutputLayer>(0, "output"); + + // Connect up layers - input -> broadcast_to -> tile -> output + input->GetOutputSlot().Connect(broadcastToLayer->GetInputSlot(0)); + broadcastToLayer->GetOutputSlot().Connect(tileLayer->GetInputSlot(0)); + tileLayer->GetOutputSlot().Connect(output->GetInputSlot(0)); + + CHECK(CheckSequence(graph.cbegin(), graph.cend(), + &IsLayerOfType<InputLayer>, + &IsLayerOfType<BroadcastToLayer>, + &IsLayerOfType<TileLayer>, + &IsLayerOfType<OutputLayer>)); + + Optimizer::Pass(graph, MakeOptimizations(BroadcastToOptimizationLayer())); + + CHECK(CheckSequence(graph.cbegin(), graph.cend(), + &IsLayerOfType<InputLayer>, + &IsLayerOfType<BroadcastToLayer>, + &IsLayerOfType<TileLayer>, + &IsLayerOfType<OutputLayer>)); + } +} diff --git a/src/backends/backendsCommon/WorkloadData.cpp b/src/backends/backendsCommon/WorkloadData.cpp index d0f6eea3d4..021435ea40 100644 --- a/src/backends/backendsCommon/WorkloadData.cpp +++ b/src/backends/backendsCommon/WorkloadData.cpp @@ -4417,4 +4417,30 @@ void TileQueueDescriptor::Validate(const WorkloadInfo& workloadInfo) const ValidateDataTypes(outputTensorInfo, supportedTypes, descriptorName); } +void BroadcastToQueueDescriptor::Validate(const WorkloadInfo& workloadInfo) const +{ + const std::string& descriptorName{"BroadcastToQueueDescriptor"}; + + ValidateNumInputs(workloadInfo, descriptorName, 1); + ValidateNumOutputs(workloadInfo, descriptorName, 1); + + const TensorInfo& inputTensorInfo = workloadInfo.m_InputTensorInfos[0]; + const TensorInfo& outputTensorInfo = workloadInfo.m_OutputTensorInfos[0]; + + std::vector<DataType> supportedTypes = + { + DataType::Float32, + DataType::Float16, + DataType::QAsymmS8, + DataType::QAsymmU8, + DataType::QSymmS8, + DataType::QSymmS16, + DataType::Signed32, + DataType::Signed64 + }; + + ValidateDataTypes(inputTensorInfo, supportedTypes, descriptorName); + ValidateDataTypes(outputTensorInfo, supportedTypes, descriptorName); +} + } // namespace armnn
\ No newline at end of file diff --git a/src/backends/backendsCommon/WorkloadFactory.cpp b/src/backends/backendsCommon/WorkloadFactory.cpp index 6ff237bc12..2538211a41 100644 --- a/src/backends/backendsCommon/WorkloadFactory.cpp +++ b/src/backends/backendsCommon/WorkloadFactory.cpp @@ -183,6 +183,18 @@ bool IWorkloadFactory::IsLayerConfigurationSupported(const BackendId& backendId, reason); break; } + case LayerType::BroadcastTo: + { + auto cLayer = PolymorphicDowncast<const BroadcastToLayer*>(&layer); + const TensorInfo& input = layer.GetInputSlot(0).GetConnection()->GetTensorInfo(); + const TensorInfo& output = layer.GetOutputSlot(0).GetTensorInfo(); + + result = layerSupportObject.IsBroadcastToSupported(OverrideDataType(input, dataType), + OverrideDataType(output, dataType), + cLayer->GetParameters(), + reason); + break; + } case LayerType::Cast: { const TensorInfo& input = layer.GetInputSlot(0).GetTensorInfo(); diff --git a/src/backends/backendsCommon/common.mk b/src/backends/backendsCommon/common.mk index 303f8aca9c..4876f02ce0 100644 --- a/src/backends/backendsCommon/common.mk +++ b/src/backends/backendsCommon/common.mk @@ -48,6 +48,7 @@ COMMON_TEST_SOURCES := \ test/layerTests/ArgMinMaxTestImpl.cpp \ test/layerTests/BatchMatMulTestImpl.cpp \ test/layerTests/BatchNormalizationTestImpl.cpp \ + test/layerTests/BroadcastToTestImpl.cpp \ test/layerTests/CastTestImpl.cpp \ test/layerTests/ChannelShuffleTestImpl.cpp \ test/layerTests/ComparisonTestImpl.cpp \ diff --git a/src/backends/backendsCommon/test/BroadcastToEndToEndTestImpl.hpp b/src/backends/backendsCommon/test/BroadcastToEndToEndTestImpl.hpp new file mode 100644 index 0000000000..3b2c47fb94 --- /dev/null +++ b/src/backends/backendsCommon/test/BroadcastToEndToEndTestImpl.hpp @@ -0,0 +1,149 @@ +// +// Copyright © 2023 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// +#pragma once +#include "armnn/INetwork.hpp" +#include "armnnUtils/QuantizeHelper.hpp" +#include "ElementwiseBinaryEndToEndTestImpl.hpp" +#include "Optimizer.hpp" +#include <CommonTestUtils.hpp> +#include <ResolveType.hpp> +#include <doctest/doctest.h> + +namespace +{ + using namespace armnn; + armnn::INetworkPtr CreateBroadcastToNetwork(BroadcastToDescriptor& descriptor, + const armnn::TensorInfo& inputInfo, + const armnn::TensorInfo& outputInfo) + { + INetworkPtr network(INetwork::Create()); + IConnectableLayer* inputLayer = network->AddInputLayer(0, "input"); + IConnectableLayer* broadcastLayer = network->AddBroadcastToLayer(descriptor, "broadcast_to"); + IConnectableLayer* outputLayer = network->AddOutputLayer(0, "output"); + Connect(inputLayer, broadcastLayer, inputInfo, 0, 0); + Connect(broadcastLayer, outputLayer, outputInfo, 0, 0); + return network; + } + + armnn::INetworkPtr CreateBroadcastToNetworkWithElementWiseBinary(BroadcastToDescriptor& descriptor, + const ElementwiseBinaryDescriptor& + elementWiseDescriptor, + const armnn::TensorInfo& inputInfo, + const armnn::TensorInfo& inputInfoElementWise, + const armnn::TensorInfo& outputInfo) + { + INetworkPtr network(INetwork::Create()); + IConnectableLayer* inputLayer = network->AddInputLayer(0, "input"); + IConnectableLayer* inputLayerElementWise = network->AddInputLayer(1, "inputElementWiseBinary"); + IConnectableLayer* broadcastLayer = network->AddBroadcastToLayer(descriptor, "broadcast_to"); + IConnectableLayer* multiplicationLayer = + network->AddElementwiseBinaryLayer(elementWiseDescriptor, + "multiplication"); + IConnectableLayer* outputLayer = network->AddOutputLayer(0, "output"); + Connect(inputLayer, broadcastLayer, inputInfo, 0, 0); + Connect(inputLayerElementWise, multiplicationLayer, + inputInfoElementWise, 0, 1); + Connect(broadcastLayer, multiplicationLayer, inputInfo, 0, 0); + Connect(multiplicationLayer, outputLayer, outputInfo, 0, 0); + return network; + } + + template <armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>> + void BroadcastToEndToEnd(const std::vector<BackendId>& backends) + { + float qScale = 1.0f; + int32_t qOffset = 0; + bool qConst = true; + + const TensorShape inputTensorShape = { {1, 4} }; + const TensorShape outputTensorShape = { {4, 4} }; + + TensorInfo inputInfo (inputTensorShape, ArmnnType, qScale, + qOffset, qConst); + TensorInfo outputInfo (outputTensorShape, ArmnnType,qScale, + qOffset); + + std::vector<T> inputData = armnnUtils::QuantizedVector<T>({ + 65, 144, 91, 161 + }, qScale, qOffset); + + std::vector<T> expectedOutputData = armnnUtils::QuantizedVector<T>({ + 65, 144, 91, 161, + 65, 144, 91, 161, + 65, 144, 91, 161, + 65, 144, 91, 161 + }, qScale, qOffset); + + auto descriptor = armnn::BroadcastToDescriptor(armnn::TensorShape({ 4, 4 })); + CHECK(descriptor.m_BroadcastToShape == outputTensorShape); + INetworkPtr network = CreateBroadcastToNetwork(descriptor, inputInfo, outputInfo); + + std::map<int, std::vector<T>> inputTensor = { { 0, inputData } }; + std::map<int, std::vector<T>> expectedOutputTensor = { { 0, expectedOutputData } }; + EndToEndLayerTestImpl<ArmnnType, ArmnnType>(std::move(network),inputTensor, + expectedOutputTensor, backends); + } + + template <armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>> + void BroadcastToEndToEndElementWiseBinary(const std::vector<BackendId>& backends) + { + float qScale = 1.0f; + int32_t qOffset = 0; + bool qConst = true; + + const TensorShape inputTensorShape = { {1, 4} }; + const TensorShape outputTensorShape = { {4, 4} }; + + const TensorInfo inputInfo (inputTensorShape, ArmnnType, qScale, + qOffset, qConst); + const TensorInfo inputInfoElementWise (outputTensorShape, ArmnnType, qScale, + qOffset, qConst); + const TensorInfo outputInfo (outputTensorShape, ArmnnType,qScale, + qOffset); + + std::vector<T> inputData = armnnUtils::QuantizedVector<T>({ + 65, 144, 91, 161 + }, qScale, qOffset); + + std::vector<T> inputDataElementWise = armnnUtils::QuantizedVector<T>({ + 1, 1, 1, 1, + 1, 1, 1, 1, + 1, 1, 1, 1, + 1, 1, 1, 1 + }, qScale, qOffset); + + std::vector<T> expectedOutputData = armnnUtils::QuantizedVector<T>({ + 65, 144, 91, 161, + 65, 144, 91, 161, + 65, 144, 91, 161, + 65, 144, 91, 161 + }, qScale, qOffset); + + auto descriptor = armnn::BroadcastToDescriptor(armnn::TensorShape({ 4, 4 })); + CHECK(descriptor.m_BroadcastToShape == outputTensorShape); + INetworkPtr network = CreateBroadcastToNetworkWithElementWiseBinary(descriptor, + BinaryOperation::Mul, + inputInfo, + inputInfoElementWise, + outputInfo); + // Create ArmNN runtime + IRuntimePtr run = IRuntime::Create(IRuntime::CreationOptions()); + + // Optimise ArmNN network + IOptimizedNetworkPtr optNet = Optimize(*network, {Compute::CpuRef}, + run->GetDeviceSpec()); + + Graph& graph = GetGraphForTesting(optNet.get()); + + Optimizer::Pass(graph, + armnn::MakeOptimizations(armnn::optimizations::BroadcastToOptimizationLayer())); + + std::map<int, std::vector<T>> inputTensor = { { 0, inputData }, {1, inputDataElementWise} }; + std::map<int, std::vector<T>> expectedOutputTensor = { { 0, expectedOutputData } }; + EndToEndLayerTestImpl<ArmnnType, ArmnnType>(std::move(network),inputTensor, + expectedOutputTensor, backends); + } + +} // anonymous namespace
\ No newline at end of file diff --git a/src/backends/backendsCommon/test/CMakeLists.txt b/src/backends/backendsCommon/test/CMakeLists.txt index 8f3a22d53b..ed95bcf399 100644 --- a/src/backends/backendsCommon/test/CMakeLists.txt +++ b/src/backends/backendsCommon/test/CMakeLists.txt @@ -10,6 +10,7 @@ list(APPEND armnnBackendsCommonUnitTests_sources BackendIdTests.cpp BackendProfilingTests.cpp BackendRegistryTests.cpp + BroadcastToEndToEndTestImpl.hpp ChannelShuffleEndToEndTestImpl.hpp ComparisonEndToEndTestImpl.hpp CompatibilityTests.cpp @@ -79,6 +80,8 @@ list(APPEND armnnBackendsCommonUnitTests_sources layerTests/BatchNormalizationTestImpl.cpp layerTests/BatchNormalizationTestImpl.hpp layerTests/BatchToSpaceNdTestImpl.hpp + layerTests/BroadcastToTestImpl.cpp + layerTests/BroadcastToTestImpl.hpp layerTests/CastTestImpl.cpp layerTests/CastTestImpl.hpp layerTests/ChannelShuffleTestImpl.cpp diff --git a/src/backends/backendsCommon/test/ElementwiseUnaryEndToEndTestImpl.hpp b/src/backends/backendsCommon/test/ElementwiseUnaryEndToEndTestImpl.hpp index 9586417407..9d05a64ce8 100644 --- a/src/backends/backendsCommon/test/ElementwiseUnaryEndToEndTestImpl.hpp +++ b/src/backends/backendsCommon/test/ElementwiseUnaryEndToEndTestImpl.hpp @@ -94,7 +94,7 @@ void ElementwiseUnarySimpleEndToEnd(const std::vector<BackendId>& backends, std::map<int, std::vector<TInput>> inputTensorData = {{ 0, qInputData }}; std::map<int, std::vector<TInput>> expectedOutputData = {{ 0, qExpectedOutput }}; - EndToEndLayerTestImpl<ArmnnInType, ArmnnInType>(move(net), inputTensorData, expectedOutputData, backends); + EndToEndLayerTestImpl<ArmnnInType, ArmnnInType>(std::move(net), inputTensorData, expectedOutputData, backends); } } // anonymous namespace diff --git a/src/backends/backendsCommon/test/IsLayerSupportedTestImpl.hpp b/src/backends/backendsCommon/test/IsLayerSupportedTestImpl.hpp index e8a2ec6931..9f472e9f28 100644 --- a/src/backends/backendsCommon/test/IsLayerSupportedTestImpl.hpp +++ b/src/backends/backendsCommon/test/IsLayerSupportedTestImpl.hpp @@ -634,6 +634,8 @@ DECLARE_LAYER_POLICY_2_PARAM(BatchNormalization) DECLARE_LAYER_POLICY_2_PARAM(BatchToSpaceNd) +DECLARE_LAYER_POLICY_2_PARAM(BroadcastTo) + DECLARE_LAYER_POLICY_1_PARAM(Cast) DECLARE_LAYER_POLICY_2_PARAM(ChannelShuffle) diff --git a/src/backends/backendsCommon/test/LayerTests.hpp b/src/backends/backendsCommon/test/LayerTests.hpp index 3f8d045c06..015d25ef3e 100644 --- a/src/backends/backendsCommon/test/LayerTests.hpp +++ b/src/backends/backendsCommon/test/LayerTests.hpp @@ -13,6 +13,7 @@ #include <backendsCommon/test/layerTests/BatchMatMulTestImpl.hpp> #include <backendsCommon/test/layerTests/BatchNormalizationTestImpl.hpp> #include <backendsCommon/test/layerTests/BatchToSpaceNdTestImpl.hpp> +#include <backendsCommon/test/layerTests/BroadcastToTestImpl.hpp> #include <backendsCommon/test/layerTests/CastTestImpl.hpp> #include <backendsCommon/test/layerTests/ChannelShuffleTestImpl.hpp> #include <backendsCommon/test/layerTests/ComparisonTestImpl.hpp> @@ -79,3 +80,4 @@ #include <backendsCommon/test/layerTests/TransposeConvolution2dTestImpl.hpp> #include <backendsCommon/test/layerTests/TransposeTestImpl.hpp> #include <backendsCommon/test/layerTests/UnidirectionalSequenceLstmTestImpl.hpp> + diff --git a/src/backends/backendsCommon/test/layerTests/BroadcastToTestImpl.cpp b/src/backends/backendsCommon/test/layerTests/BroadcastToTestImpl.cpp new file mode 100644 index 0000000000..b4e8a4c85d --- /dev/null +++ b/src/backends/backendsCommon/test/layerTests/BroadcastToTestImpl.cpp @@ -0,0 +1,636 @@ +// +// Copyright © 2023 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "BroadcastToTestImpl.hpp" +#include <vector> + +#include <armnn/backends/IBackendInternal.hpp> +#include <armnn/backends/Workload.hpp> +#include <armnn/backends/WorkloadData.hpp> +#include <armnn/backends/WorkloadFactory.hpp> + +#include <armnnTestUtils/WorkloadTestUtils.hpp> +#include <armnnTestUtils/TensorCopyUtils.hpp> + +#include <armnn/BackendHelper.hpp> + +#include <armnnUtils/QuantizeHelper.hpp> +#include <doctest/doctest.h> + +namespace +{ +template<typename T, std::size_t NumDims> +LayerTestResult<T, NumDims> BroadcastToTestImpl( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + const armnn::ITensorHandleFactory& tensorHandleFactory, + armnn::BroadcastToDescriptor descriptor, + armnn::TensorInfo& inputInfo, + armnn::TensorInfo& outputInfo, + std::vector<T>& inputData, + std::vector<T>& expectedOutputData) +{ + + CHECK(descriptor.m_BroadcastToShape == outputInfo.GetShape()); + + LayerTestResult<T, NumDims> result(outputInfo); + std::vector<T> outputActual(outputInfo.GetNumElements()); + + armnn::BroadcastToQueueDescriptor queueDescriptor; + queueDescriptor.m_Parameters = std::move(descriptor); + armnn::WorkloadInfo workloadInfo; + + std::unique_ptr<armnn::ITensorHandle> inputHandle = tensorHandleFactory.CreateTensorHandle(inputInfo); + std::unique_ptr<armnn::ITensorHandle> outputHandle = tensorHandleFactory.CreateTensorHandle(outputInfo); + + AddInputToWorkload(queueDescriptor, workloadInfo, inputInfo, inputHandle.get()); + AddOutputToWorkload(queueDescriptor, workloadInfo, outputInfo, outputHandle.get()); + + const armnn::BackendId& backend = workloadFactory.GetBackendId(); + armnn::LayerSupportHandle handle = armnn::GetILayerSupportByBackendId(backend); + + auto workload = workloadFactory.CreateWorkload(armnn::LayerType::BroadcastTo, queueDescriptor, workloadInfo); + + inputHandle->Allocate(); + outputHandle->Allocate(); + + CopyDataToITensorHandle(inputHandle.get(), inputData.data()); + + workload->PostAllocationConfigure(); + ExecuteWorkload(*workload, memoryManager); + + CopyDataFromITensorHandle(outputActual.data(), outputHandle.get()); + return LayerTestResult<T, NumDims>(outputActual, + expectedOutputData, + outputHandle->GetShape(), + outputInfo.GetShape()); +} +} + +template<armnn::DataType ArmnnType, typename T> +LayerTestResult<T, 1> BroadcastTo1dTest( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + const armnn::ITensorHandleFactory& tensorHandleFactory) +{ + auto descriptor = armnn::BroadcastToDescriptor(armnn::TensorShape( {1, 4} )); + + float qScale = 1.0f; + int32_t qOffset = 0; + + armnn::TensorShape inputShape = { 1, 1 }; + armnn::TensorShape outputShape = { 1, 4 }; + + armnn::TensorInfo inputInfo(inputShape, ArmnnType); + armnn::TensorInfo outputInfo(outputShape, ArmnnType); + + std::vector<T> input = armnnUtils::QuantizedVector<T>( + { + 1.f + }, qScale, qOffset); + + std::vector<T> expectedOutput = armnnUtils::QuantizedVector<T>( + { + 1.f, 1.f, + 1.f, 1.f + }, qScale, qOffset); + + return BroadcastToTestImpl<T, 1>(workloadFactory, + memoryManager, + tensorHandleFactory, + descriptor, + inputInfo, + outputInfo, + input, + expectedOutput); +} + +template<armnn::DataType ArmnnType, typename T> +LayerTestResult<T, 2> BroadcastTo2dAxis0Test(armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + const armnn::ITensorHandleFactory& tensorHandleFactory) +{ + auto descriptor = armnn::BroadcastToDescriptor(armnn::TensorShape({ 4, 3 })); + + float qScale = 1.0f; + int32_t qOffset = 0; + + armnn::TensorShape inputShape = { 1, 3 }; + armnn::TensorShape outputShape = { 4, 3 }; + + armnn::TensorInfo inputInfo(inputShape, ArmnnType); + armnn::TensorInfo outputInfo(outputShape, ArmnnType); + + std::vector<T> input = armnnUtils::QuantizedVector<T>( + { + 0.f, 1.f, 2.f + }, qScale, qOffset); + + + std::vector<T> expectedOutput = armnnUtils::QuantizedVector<T>( + { + 0.f, 1.f, 2.f, + 0.f, 1.f, 2.f, + 0.f, 1.f, 2.f, + 0.f, 1.f, 2.f + }, qScale, qOffset); + + return BroadcastToTestImpl<T, 2>(workloadFactory, + memoryManager, + tensorHandleFactory, + descriptor, + inputInfo, + outputInfo, + input, + expectedOutput); +} + +template<armnn::DataType ArmnnType, typename T> +LayerTestResult<T, 2> BroadcastTo2dAxis1Test(armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + const armnn::ITensorHandleFactory& tensorHandleFactory) +{ + auto descriptor = armnn::BroadcastToDescriptor(armnn::TensorShape({ 3, 4 })); + + float qScale = 1.0f; + int32_t qOffset = 0; + + armnn::TensorShape inputShape = { 3, 1 }; + armnn::TensorShape outputShape = { 3, 4 }; + + armnn::TensorInfo inputInfo(inputShape, ArmnnType); + armnn::TensorInfo outputInfo(outputShape, ArmnnType); + + std::vector<T> input = armnnUtils::QuantizedVector<T>( + { + 0.f, 1.f, 2.f + }, qScale, qOffset); + + + std::vector<T> expectedOutput = armnnUtils::QuantizedVector<T>( + { + 0.f, 0.f, 0.f, 0.f, + 1.f, 1.f, 1.f, 1.f, + 2.f, 2.f, 2.f, 2.f + }, qScale, qOffset); + + return BroadcastToTestImpl<T, 2>(workloadFactory, + memoryManager, + tensorHandleFactory, + descriptor, + inputInfo, + outputInfo, + input, + expectedOutput); +} + +template<armnn::DataType ArmnnType, typename T> +LayerTestResult<T, 3> BroadcastTo3dAxis0Test(armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + const armnn::ITensorHandleFactory& tensorHandleFactory) +{ + auto descriptor = armnn::BroadcastToDescriptor(armnn::TensorShape({ 2, 1, 3 })); + + float qScale = 1.0f; + int32_t qOffset = 0; + + armnn::TensorShape inputShape = { 1, 1, 3 }; + armnn::TensorShape outputShape = { 2, 1, 3 }; + + armnn::TensorInfo inputInfo(inputShape, ArmnnType); + armnn::TensorInfo outputInfo(outputShape, ArmnnType); + std::vector<T> input = armnnUtils::QuantizedVector<T>( + { + 1.1f, 2.12f, 3.3f + }, qScale, qOffset); + + + std::vector<T> expectedOutput = armnnUtils::QuantizedVector<T>( + { + 1.1f, 2.12f, 3.3f, + 1.1f, 2.12f, 3.3f + }, qScale, qOffset); + + return BroadcastToTestImpl<T, 3>(workloadFactory, + memoryManager, + tensorHandleFactory, + descriptor, + inputInfo, + outputInfo, + input, + expectedOutput); +} + +template<armnn::DataType ArmnnType, typename T> +LayerTestResult<T, 3> BroadcastTo3dAxis1Test(armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + const armnn::ITensorHandleFactory& tensorHandleFactory) +{ + auto descriptor = armnn::BroadcastToDescriptor(armnn::TensorShape({ 1, 3, 3 })); + + float qScale = 1.0f; + int32_t qOffset = 0; + + armnn::TensorShape inputShape = { 1, 1, 3 }; + armnn::TensorShape outputShape = { 1, 3, 3 }; + + armnn::TensorInfo inputInfo(inputShape, ArmnnType); + armnn::TensorInfo outputInfo(outputShape, ArmnnType); + std::vector<T> input = armnnUtils::QuantizedVector<T>( + { + 1.1f, 2.12f, 3.3f + }, qScale, qOffset); + + + std::vector<T> expectedOutput = armnnUtils::QuantizedVector<T>( + { + 1.1f, 2.12f, 3.3f, + 1.1f, 2.12f, 3.3f, + 1.1f, 2.12f, 3.3f + }, qScale, qOffset); + + return BroadcastToTestImpl<T, 3>(workloadFactory, + memoryManager, + tensorHandleFactory, + descriptor, + inputInfo, + outputInfo, + input, + expectedOutput); +} + +template<armnn::DataType ArmnnType, typename T> +LayerTestResult<T, 3> BroadcastTo3dAxis2Test(armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + const armnn::ITensorHandleFactory& tensorHandleFactory) +{ + auto descriptor = armnn::BroadcastToDescriptor(armnn::TensorShape({ 1, 3, 3 })); + + float qScale = 1.0f; + int32_t qOffset = 0; + + armnn::TensorShape inputShape = { 1, 3, 1 }; + armnn::TensorShape outputShape = { 1, 3, 3 }; + + armnn::TensorInfo inputInfo(inputShape, ArmnnType); + armnn::TensorInfo outputInfo(outputShape, ArmnnType); + std::vector<T> input = armnnUtils::QuantizedVector<T>( + { + 1.1f, 2.12f, 3.3f + }, qScale, qOffset); + + + std::vector<T> expectedOutput = armnnUtils::QuantizedVector<T>( + { + 1.1f, 1.1f, 1.1f, + 2.12f, 2.12f, 2.12f, + 3.3f, 3.3f, 3.3f + }, qScale, qOffset); + + return BroadcastToTestImpl<T, 3>(workloadFactory, + memoryManager, + tensorHandleFactory, + descriptor, + inputInfo, + outputInfo, + input, + expectedOutput); +} + +template<armnn::DataType ArmnnType, typename T> +LayerTestResult<T, 4> BroadcastTo4dTest(armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + const armnn::ITensorHandleFactory& tensorHandleFactory) +{ + auto descriptor = armnn::BroadcastToDescriptor(armnn::TensorShape({ 3, 1, 2, 3 })); + + float qScale = 1.0f; + int32_t qOffset = 0; + + armnn::TensorShape inputShape = { 1, 1, 1, 3 }; + armnn::TensorShape outputShape = { 3, 1, 2, 3 }; + + armnn::TensorInfo inputInfo(inputShape, ArmnnType); + armnn::TensorInfo outputInfo(outputShape, ArmnnType); + + std::vector<T> input = armnnUtils::QuantizedVector<T>( + { + 0.f, 1.f, 2.f + }, qScale, qOffset); + + + std::vector<T> expectedOutput = armnnUtils::QuantizedVector<T>( + { + 0.f, 1.f, 2.f, + 0.f, 1.f, 2.f, + 0.f, 1.f, 2.f, + 0.f, 1.f, 2.f, + 0.f, 1.f, 2.f, + 0.f, 1.f, 2.f + }, qScale, qOffset); + + return BroadcastToTestImpl<T, 4>(workloadFactory, + memoryManager, + tensorHandleFactory, + descriptor, + inputInfo, + outputInfo, + input, + expectedOutput); +} + +template LayerTestResult<armnn::ResolveType<armnn::DataType::Float32>, 1> +BroadcastTo1dTest<armnn::DataType::Float32>( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + const armnn::ITensorHandleFactory& tensorHandleFactory); + +template LayerTestResult<armnn::ResolveType<armnn::DataType::Float32>, 2> +BroadcastTo2dAxis0Test<armnn::DataType::Float32>( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + const armnn::ITensorHandleFactory& tensorHandleFactory); + +template LayerTestResult<armnn::ResolveType<armnn::DataType::Float32>, 2> +BroadcastTo2dAxis1Test<armnn::DataType::Float32>( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + const armnn::ITensorHandleFactory& tensorHandleFactory); + +template LayerTestResult<armnn::ResolveType<armnn::DataType::Float32>, 3> +BroadcastTo3dAxis0Test<armnn::DataType::Float32>( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + const armnn::ITensorHandleFactory& tensorHandleFactory); + +template LayerTestResult<armnn::ResolveType<armnn::DataType::Float32>, 3> +BroadcastTo3dAxis1Test<armnn::DataType::Float32>( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + const armnn::ITensorHandleFactory& tensorHandleFactory); + +template LayerTestResult<armnn::ResolveType<armnn::DataType::Float32>, 3> +BroadcastTo3dAxis2Test<armnn::DataType::Float32>( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + const armnn::ITensorHandleFactory& tensorHandleFactory); + +template LayerTestResult<armnn::ResolveType<armnn::DataType::Float32>, 4> +BroadcastTo4dTest<armnn::DataType::Float32>( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + const armnn::ITensorHandleFactory& tensorHandleFactory); + +template LayerTestResult<armnn::ResolveType<armnn::DataType::Float16>, 1> +BroadcastTo1dTest<armnn::DataType::Float16>( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + const armnn::ITensorHandleFactory& tensorHandleFactory); + +template LayerTestResult<armnn::ResolveType<armnn::DataType::Float16>, 2> +BroadcastTo2dAxis0Test<armnn::DataType::Float16>( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + const armnn::ITensorHandleFactory& tensorHandleFactory); + +template LayerTestResult<armnn::ResolveType<armnn::DataType::Float16>, 2> +BroadcastTo2dAxis1Test<armnn::DataType::Float16>( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + const armnn::ITensorHandleFactory& tensorHandleFactory); + +template LayerTestResult<armnn::ResolveType<armnn::DataType::Float16>, 3> +BroadcastTo3dAxis0Test<armnn::DataType::Float16>( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + const armnn::ITensorHandleFactory& tensorHandleFactory); + +template LayerTestResult<armnn::ResolveType<armnn::DataType::Float16>, 3> +BroadcastTo3dAxis1Test<armnn::DataType::Float16>( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + const armnn::ITensorHandleFactory& tensorHandleFactory); + +template LayerTestResult<armnn::ResolveType<armnn::DataType::Float16>, 3> +BroadcastTo3dAxis2Test<armnn::DataType::Float16>( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + const armnn::ITensorHandleFactory& tensorHandleFactory); + +template LayerTestResult<armnn::ResolveType<armnn::DataType::Float16>, 4> +BroadcastTo4dTest<armnn::DataType::Float16>( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + const armnn::ITensorHandleFactory& tensorHandleFactory); + +template LayerTestResult<armnn::ResolveType<armnn::DataType::QAsymmS8>, 1> +BroadcastTo1dTest<armnn::DataType::QAsymmS8>( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + const armnn::ITensorHandleFactory& tensorHandleFactory); + +template LayerTestResult<armnn::ResolveType<armnn::DataType::QAsymmS8>, 2> +BroadcastTo2dAxis0Test<armnn::DataType::QAsymmS8>( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + const armnn::ITensorHandleFactory& tensorHandleFactory); + +template LayerTestResult<armnn::ResolveType<armnn::DataType::QAsymmS8>, 2> +BroadcastTo2dAxis1Test<armnn::DataType::QAsymmS8>( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + const armnn::ITensorHandleFactory& tensorHandleFactory); + +template LayerTestResult<armnn::ResolveType<armnn::DataType::QAsymmS8>, 3> +BroadcastTo3dAxis0Test<armnn::DataType::QAsymmS8>( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + const armnn::ITensorHandleFactory& tensorHandleFactory); + +template LayerTestResult<armnn::ResolveType<armnn::DataType::QAsymmS8>, 3> +BroadcastTo3dAxis1Test<armnn::DataType::QAsymmS8>( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + const armnn::ITensorHandleFactory& tensorHandleFactory); + +template LayerTestResult<armnn::ResolveType<armnn::DataType::QAsymmS8>, 3> +BroadcastTo3dAxis2Test<armnn::DataType::QAsymmS8>( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + const armnn::ITensorHandleFactory& tensorHandleFactory); + +template LayerTestResult<armnn::ResolveType<armnn::DataType::QAsymmS8>, 4> +BroadcastTo4dTest<armnn::DataType::QAsymmS8>( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + const armnn::ITensorHandleFactory& tensorHandleFactory); + +template LayerTestResult<armnn::ResolveType<armnn::DataType::QAsymmU8>, 1> +BroadcastTo1dTest<armnn::DataType::QAsymmU8>( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + const armnn::ITensorHandleFactory& tensorHandleFactory); + +template LayerTestResult<armnn::ResolveType<armnn::DataType::QAsymmU8>, 2> +BroadcastTo2dAxis0Test<armnn::DataType::QAsymmU8>( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + const armnn::ITensorHandleFactory& tensorHandleFactory); + +template LayerTestResult<armnn::ResolveType<armnn::DataType::QAsymmU8>, 2> +BroadcastTo2dAxis1Test<armnn::DataType::QAsymmU8>( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + const armnn::ITensorHandleFactory& tensorHandleFactory); + +template LayerTestResult<armnn::ResolveType<armnn::DataType::QAsymmU8>, 3> +BroadcastTo3dAxis0Test<armnn::DataType::QAsymmU8>( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + const armnn::ITensorHandleFactory& tensorHandleFactory); + +template LayerTestResult<armnn::ResolveType<armnn::DataType::QAsymmU8>, 3> +BroadcastTo3dAxis1Test<armnn::DataType::QAsymmU8>( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + const armnn::ITensorHandleFactory& tensorHandleFactory); + +template LayerTestResult<armnn::ResolveType<armnn::DataType::QAsymmU8>, 3> +BroadcastTo3dAxis2Test<armnn::DataType::QAsymmU8>( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + const armnn::ITensorHandleFactory& tensorHandleFactory); + +template LayerTestResult<armnn::ResolveType<armnn::DataType::QAsymmU8>, 4> +BroadcastTo4dTest<armnn::DataType::QAsymmU8>( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + const armnn::ITensorHandleFactory& tensorHandleFactory); + +template LayerTestResult<armnn::ResolveType<armnn::DataType::QSymmS8>, 1> +BroadcastTo1dTest<armnn::DataType::QSymmS8>( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + const armnn::ITensorHandleFactory& tensorHandleFactory); + +template LayerTestResult<armnn::ResolveType<armnn::DataType::QSymmS8>, 2> +BroadcastTo2dAxis0Test<armnn::DataType::QSymmS8>( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + const armnn::ITensorHandleFactory& tensorHandleFactory); + +template LayerTestResult<armnn::ResolveType<armnn::DataType::QSymmS8>, 2> +BroadcastTo2dAxis1Test<armnn::DataType::QSymmS8>( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + const armnn::ITensorHandleFactory& tensorHandleFactory); + +template LayerTestResult<armnn::ResolveType<armnn::DataType::QSymmS8>, 3> +BroadcastTo3dAxis0Test<armnn::DataType::QSymmS8>( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + const armnn::ITensorHandleFactory& tensorHandleFactory); + +template LayerTestResult<armnn::ResolveType<armnn::DataType::QSymmS8>, 3> +BroadcastTo3dAxis1Test<armnn::DataType::QSymmS8>( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + const armnn::ITensorHandleFactory& tensorHandleFactory); + +template LayerTestResult<armnn::ResolveType<armnn::DataType::QSymmS8>, 3> +BroadcastTo3dAxis2Test<armnn::DataType::QSymmS8>( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + const armnn::ITensorHandleFactory& tensorHandleFactory); + +template LayerTestResult<armnn::ResolveType<armnn::DataType::QSymmS8>, 4> +BroadcastTo4dTest<armnn::DataType::QSymmS8>( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + const armnn::ITensorHandleFactory& tensorHandleFactory); + +template LayerTestResult<armnn::ResolveType<armnn::DataType::QSymmS16>, 1> +BroadcastTo1dTest<armnn::DataType::QSymmS16>( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + const armnn::ITensorHandleFactory& tensorHandleFactory); + +template LayerTestResult<armnn::ResolveType<armnn::DataType::QSymmS16>, 2> +BroadcastTo2dAxis0Test<armnn::DataType::QSymmS16>( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + const armnn::ITensorHandleFactory& tensorHandleFactory); + +template LayerTestResult<armnn::ResolveType<armnn::DataType::QSymmS16>, 2> +BroadcastTo2dAxis1Test<armnn::DataType::QSymmS16>( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + const armnn::ITensorHandleFactory& tensorHandleFactory); + +template LayerTestResult<armnn::ResolveType<armnn::DataType::QSymmS16>, 3> +BroadcastTo3dAxis0Test<armnn::DataType::QSymmS16>( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + const armnn::ITensorHandleFactory& tensorHandleFactory); + +template LayerTestResult<armnn::ResolveType<armnn::DataType::QSymmS16>, 3> +BroadcastTo3dAxis1Test<armnn::DataType::QSymmS16>( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + const armnn::ITensorHandleFactory& tensorHandleFactory); + +template LayerTestResult<armnn::ResolveType<armnn::DataType::QSymmS16>, 3> +BroadcastTo3dAxis2Test<armnn::DataType::QSymmS16>( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + const armnn::ITensorHandleFactory& tensorHandleFactory); + +template LayerTestResult<armnn::ResolveType<armnn::DataType::QSymmS16>, 4> +BroadcastTo4dTest<armnn::DataType::QSymmS16>( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + const armnn::ITensorHandleFactory& tensorHandleFactory); + +template LayerTestResult<armnn::ResolveType<armnn::DataType::Signed32>, 1> +BroadcastTo1dTest<armnn::DataType::Signed32>( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + const armnn::ITensorHandleFactory& tensorHandleFactory); + +template LayerTestResult<armnn::ResolveType<armnn::DataType::Signed32>, 2> +BroadcastTo2dAxis0Test<armnn::DataType::Signed32>( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + const armnn::ITensorHandleFactory& tensorHandleFactory); + +template LayerTestResult<armnn::ResolveType<armnn::DataType::Signed32>, 2> +BroadcastTo2dAxis1Test<armnn::DataType::Signed32>( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + const armnn::ITensorHandleFactory& tensorHandleFactory); + +template LayerTestResult<armnn::ResolveType<armnn::DataType::Signed32>, 3> +BroadcastTo3dAxis0Test<armnn::DataType::Signed32>( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + const armnn::ITensorHandleFactory& tensorHandleFactory); + +template LayerTestResult<armnn::ResolveType<armnn::DataType::Signed32>, 3> +BroadcastTo3dAxis1Test<armnn::DataType::Signed32>( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + const armnn::ITensorHandleFactory& tensorHandleFactory); + +template LayerTestResult<armnn::ResolveType<armnn::DataType::Signed32>, 3> +BroadcastTo3dAxis2Test<armnn::DataType::Signed32>( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + const armnn::ITensorHandleFactory& tensorHandleFactory); + +template LayerTestResult<armnn::ResolveType<armnn::DataType::Signed32>, 4> +BroadcastTo4dTest<armnn::DataType::Signed32>( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + const armnn::ITensorHandleFactory& tensorHandleFactory); diff --git a/src/backends/backendsCommon/test/layerTests/BroadcastToTestImpl.hpp b/src/backends/backendsCommon/test/layerTests/BroadcastToTestImpl.hpp new file mode 100644 index 0000000000..d8d0df447b --- /dev/null +++ b/src/backends/backendsCommon/test/layerTests/BroadcastToTestImpl.hpp @@ -0,0 +1,46 @@ +// +// Copyright © 2023 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include <armnnTestUtils/LayerTestResult.hpp> +#include <armnn/backends/IBackendInternal.hpp> +#include <armnn/backends/WorkloadFactory.hpp> +#include "ResolveType.hpp" + +template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>> +LayerTestResult<T, 4> BroadcastTo4dTest(armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + const armnn::ITensorHandleFactory& tensorHandleFactory); + +template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>> +LayerTestResult<T, 3> BroadcastTo3dAxis0Test(armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + const armnn::ITensorHandleFactory& tensorHandleFactory); + +template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>> +LayerTestResult<T, 3> BroadcastTo3dAxis1Test(armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + const armnn::ITensorHandleFactory& tensorHandleFactory); + +template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>> +LayerTestResult<T, 3> BroadcastTo3dAxis2Test(armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + const armnn::ITensorHandleFactory& tensorHandleFactory); + +template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>> +LayerTestResult<T, 2> BroadcastTo2dAxis0Test(armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + const armnn::ITensorHandleFactory& tensorHandleFactory); + +template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>> +LayerTestResult<T, 2> BroadcastTo2dAxis1Test(armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + const armnn::ITensorHandleFactory& tensorHandleFactory); + +template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>> +LayerTestResult<T, 1> BroadcastTo1dTest(armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + const armnn::ITensorHandleFactory& tensorHandleFactory);
\ No newline at end of file diff --git a/src/backends/reference/RefLayerSupport.cpp b/src/backends/reference/RefLayerSupport.cpp index 0b1b9c7824..defdf0d807 100644 --- a/src/backends/reference/RefLayerSupport.cpp +++ b/src/backends/reference/RefLayerSupport.cpp @@ -100,6 +100,11 @@ bool RefLayerSupport::IsLayerSupported(const LayerType& type, infos[1], *(PolymorphicDowncast<const BatchToSpaceNdDescriptor*>(&descriptor)), reasonIfUnsupported); + case LayerType::BroadcastTo: + return IsBroadcastToSupported(infos[0], + infos[1], + *(PolymorphicDowncast<const BroadcastToDescriptor*>(&descriptor)), + reasonIfUnsupported); case LayerType::Comparison: return IsComparisonSupported(infos[0], infos[1], @@ -807,20 +812,50 @@ bool RefLayerSupport::IsBatchToSpaceNdSupported(const TensorInfo& input, return supported; } +bool RefLayerSupport::IsBroadcastToSupported(const TensorInfo& input, + const TensorInfo& output, + const BroadcastToDescriptor& descriptor, + Optional<std::string&> reasonIfUnsupported) const +{ + IgnoreUnused(descriptor); + + bool supported = true; + + std::array<DataType, 8> supportedTypes + { + DataType::Float32, + DataType::Float16, + DataType::QAsymmS8, + DataType::QAsymmU8, + DataType::QSymmS8, + DataType::QSymmS16, + DataType::Signed32, + DataType::Signed64 + }; + + supported &= CheckSupportRule(TypeAnyOf(input, supportedTypes), reasonIfUnsupported, + "BroadcastTo: input type not supported."); + + supported &= CheckSupportRule(TypeAnyOf(output, supportedTypes), reasonIfUnsupported, + "BroadcastTo: output type not supported"); + + return supported; +} + bool RefLayerSupport::IsCastSupported(const TensorInfo& input, const TensorInfo& output, Optional<std::string&> reasonIfUnsupported) const { std::array<DataType, 9> supportedInputTypes = - { - DataType::Float32, - DataType::Float16, - DataType::QSymmS8, - DataType::QAsymmS8, - DataType::QAsymmU8, - DataType::QSymmS16, - DataType::Signed32 - }; + { + DataType::Float32, + DataType::Float16, + DataType::QSymmS8, + DataType::QAsymmS8, + DataType::QAsymmU8, + DataType::QSymmS16, + DataType::Signed32 + }; bool supported = true; supported &= CheckSupportRule(TypeAnyOf(input, supportedInputTypes), reasonIfUnsupported, diff --git a/src/backends/reference/RefLayerSupport.hpp b/src/backends/reference/RefLayerSupport.hpp index 377afac62f..9e7175389a 100644 --- a/src/backends/reference/RefLayerSupport.hpp +++ b/src/backends/reference/RefLayerSupport.hpp @@ -54,6 +54,11 @@ public: const BatchToSpaceNdDescriptor& descriptor, Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const; + bool IsBroadcastToSupported(const TensorInfo& input, + const TensorInfo& output, + const BroadcastToDescriptor& descriptor, + Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const; + bool IsCastSupported(const TensorInfo& input, const TensorInfo& output, Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const; diff --git a/src/backends/reference/RefWorkloadFactory.cpp b/src/backends/reference/RefWorkloadFactory.cpp index fa2082d4f2..c4d9583a66 100644 --- a/src/backends/reference/RefWorkloadFactory.cpp +++ b/src/backends/reference/RefWorkloadFactory.cpp @@ -179,6 +179,11 @@ std::unique_ptr<IWorkload> RefWorkloadFactory::CreateWorkload(LayerType type, = PolymorphicDowncast<const BatchToSpaceNdQueueDescriptor*>(&descriptor); return std::make_unique<RefBatchToSpaceNdWorkload>(*batchToSpaceNdQueueDescriptor, info); } + case LayerType::BroadcastTo: + { + auto broadcastToQueueDescriptor = PolymorphicDowncast<const BroadcastToQueueDescriptor*>(&descriptor); + return std::make_unique<RefBroadcastToWorkload>(*broadcastToQueueDescriptor, info); + } case LayerType::Cast : { auto castQueueDescriptor = PolymorphicDowncast<const CastQueueDescriptor*>(&descriptor); diff --git a/src/backends/reference/backend.mk b/src/backends/reference/backend.mk index 7f047af930..27ca8f607a 100644 --- a/src/backends/reference/backend.mk +++ b/src/backends/reference/backend.mk @@ -53,6 +53,7 @@ BACKEND_SOURCES := \ workloads/RefBatchMatMulWorkload.cpp \ workloads/RefBatchNormalizationWorkload.cpp \ workloads/RefBatchToSpaceNdWorkload.cpp \ + workloads/RefBroadcastToWorkload.cpp \ workloads/RefCastWorkload.cpp \ workloads/RefChannelShuffleWorkload.cpp \ workloads/RefComparisonWorkload.cpp \ diff --git a/src/backends/reference/test/RefEndToEndTests.cpp b/src/backends/reference/test/RefEndToEndTests.cpp index 09d6ac5d20..e503d3fb7f 100644 --- a/src/backends/reference/test/RefEndToEndTests.cpp +++ b/src/backends/reference/test/RefEndToEndTests.cpp @@ -10,6 +10,7 @@ #include <backendsCommon/test/ArgMinMaxEndToEndTestImpl.hpp> #include <backendsCommon/test/BatchToSpaceNdEndToEndTestImpl.hpp> #include <backendsCommon/test/BatchMatMulEndToEndTestImpl.hpp> +#include <backendsCommon/test/BroadcastToEndToEndTestImpl.hpp> #include <backendsCommon/test/ChannelShuffleEndToEndTestImpl.hpp> #include <backendsCommon/test/ComparisonEndToEndTestImpl.hpp> #include <backendsCommon/test/ConcatEndToEndTestImpl.hpp> @@ -1728,4 +1729,15 @@ TEST_CASE("RefReshapeRemovalNCHWSecondEndToEnd") { ReshapeRemovalNCHWEndToEnd<armnn::DataType::Float32>(defaultBackends, true, false); } + +// BroadcastTo +TEST_CASE("RefBroadcastToEndToEndFloat32") +{ + BroadcastToEndToEnd<armnn::DataType::Float32>(defaultBackends); +} + +TEST_CASE("RefBroadcastToEndToEndWithElementWiseBinaryFloat32") +{ + BroadcastToEndToEndElementWiseBinary<armnn::DataType::Float32>(defaultBackends); +} } diff --git a/src/backends/reference/test/RefLayerTests.cpp b/src/backends/reference/test/RefLayerTests.cpp index a079bb712a..af4ed966b2 100644 --- a/src/backends/reference/test/RefLayerTests.cpp +++ b/src/backends/reference/test/RefLayerTests.cpp @@ -2823,4 +2823,60 @@ ARMNN_AUTO_TEST_CASE_WITH_THF(UnidirectionalSequenceLstmLayerInt8NoCifgWithPeeph ARMNN_AUTO_TEST_CASE_WITH_THF(UnidirectionalSequenceLstmInt8WithCifgWithPeepholeNoProjection, UnidirectionalSequenceLstmInt8WithCifgWithPeepholeNoProjectionTest) +// Broadcast to +ARMNN_AUTO_TEST_CASE_WITH_THF(BroadcastTo1dTestFloat32, BroadcastTo1dTest<DataType::Float32>) +ARMNN_AUTO_TEST_CASE_WITH_THF(BroadcastTo2dAxis0TestFloat32, BroadcastTo2dAxis0Test<DataType::Float32>) +ARMNN_AUTO_TEST_CASE_WITH_THF(BroadcastTo2dAxis1TestFloat32, BroadcastTo2dAxis1Test<DataType::Float32>) +ARMNN_AUTO_TEST_CASE_WITH_THF(BroadcastTo3dAxis0TestFloat32, BroadcastTo3dAxis0Test<DataType::Float32>) +ARMNN_AUTO_TEST_CASE_WITH_THF(BroadcastTo3dAxis1TestFloat32, BroadcastTo3dAxis1Test<DataType::Float32>) +ARMNN_AUTO_TEST_CASE_WITH_THF(BroadcastTo3dAxis2TestFloat32, BroadcastTo3dAxis2Test<DataType::Float32>) +ARMNN_AUTO_TEST_CASE_WITH_THF(BroadcastTo4dTestFloat32, BroadcastTo4dTest<DataType::Float32>) + +ARMNN_AUTO_TEST_CASE_WITH_THF(BroadcastTo1dTestFloat16, BroadcastTo1dTest<DataType::Float16>) +ARMNN_AUTO_TEST_CASE_WITH_THF(BroadcastTo2dAxis0TestFloat16, BroadcastTo2dAxis0Test<DataType::Float16>) +ARMNN_AUTO_TEST_CASE_WITH_THF(BroadcastTo2dAxis1TestFloat16, BroadcastTo2dAxis1Test<DataType::Float16>) +ARMNN_AUTO_TEST_CASE_WITH_THF(BroadcastTo3dAxis0TestFloat16, BroadcastTo3dAxis0Test<DataType::Float16>) +ARMNN_AUTO_TEST_CASE_WITH_THF(BroadcastTo3dAxis1TestFloat16, BroadcastTo3dAxis1Test<DataType::Float16>) +ARMNN_AUTO_TEST_CASE_WITH_THF(BroadcastTo3dAxis2TestFloat16, BroadcastTo3dAxis2Test<DataType::Float16>) +ARMNN_AUTO_TEST_CASE_WITH_THF(BroadcastTo4dTestFloat16, BroadcastTo4dTest<DataType::Float16>) + +ARMNN_AUTO_TEST_CASE_WITH_THF(BroadcastTo1dTestQAsymmS8, BroadcastTo1dTest<DataType::QAsymmS8>) +ARMNN_AUTO_TEST_CASE_WITH_THF(BroadcastTo2dAxis0TestQAsymmS8, BroadcastTo2dAxis0Test<DataType::QAsymmS8>) +ARMNN_AUTO_TEST_CASE_WITH_THF(BroadcastTo2dAxis1TestQAsymmS8, BroadcastTo2dAxis1Test<DataType::QAsymmS8>) +ARMNN_AUTO_TEST_CASE_WITH_THF(BroadcastTo3dAxis0TestQAsymmS8, BroadcastTo3dAxis0Test<DataType::QAsymmS8>) +ARMNN_AUTO_TEST_CASE_WITH_THF(BroadcastTo3dAxis1TestQAsymmS8, BroadcastTo3dAxis1Test<DataType::QAsymmS8>) +ARMNN_AUTO_TEST_CASE_WITH_THF(BroadcastTo3dAxis2TestQAsymmS8, BroadcastTo3dAxis2Test<DataType::QAsymmS8>) +ARMNN_AUTO_TEST_CASE_WITH_THF(BroadcastTo4dTestQAsymmS8, BroadcastTo4dTest<DataType::QAsymmS8>) + +ARMNN_AUTO_TEST_CASE_WITH_THF(BroadcastTo1dTestQAsymmU8, BroadcastTo1dTest<DataType::QAsymmU8>) +ARMNN_AUTO_TEST_CASE_WITH_THF(BroadcastTo2dAxis0TestQAsymmU8, BroadcastTo2dAxis0Test<DataType::QAsymmU8>) +ARMNN_AUTO_TEST_CASE_WITH_THF(BroadcastTo2dAxis1TestQAsymmU8, BroadcastTo2dAxis1Test<DataType::QAsymmU8>) +ARMNN_AUTO_TEST_CASE_WITH_THF(BroadcastTo3dAxis0TestQAsymmU8, BroadcastTo3dAxis0Test<DataType::QAsymmU8>) +ARMNN_AUTO_TEST_CASE_WITH_THF(BroadcastTo3dAxis1TestQAsymmU8, BroadcastTo3dAxis1Test<DataType::QAsymmU8>) +ARMNN_AUTO_TEST_CASE_WITH_THF(BroadcastTo3dAxis2TestQAsymmU8, BroadcastTo3dAxis2Test<DataType::QAsymmU8>) +ARMNN_AUTO_TEST_CASE_WITH_THF(BroadcastTo4dTestQAsymmU8, BroadcastTo4dTest<DataType::QAsymmU8>) + +ARMNN_AUTO_TEST_CASE_WITH_THF(BroadcastTo1dTestQSymmS8, BroadcastTo1dTest<DataType::QSymmS8>) +ARMNN_AUTO_TEST_CASE_WITH_THF(BroadcastTo2dAxis0TestQSymmS8, BroadcastTo2dAxis0Test<DataType::QSymmS8>) +ARMNN_AUTO_TEST_CASE_WITH_THF(BroadcastTo2dAxis1TestQSymmS8, BroadcastTo2dAxis1Test<DataType::QSymmS8>) +ARMNN_AUTO_TEST_CASE_WITH_THF(BroadcastTo3dAxis0TestQSymmS8, BroadcastTo3dAxis0Test<DataType::QSymmS8>) +ARMNN_AUTO_TEST_CASE_WITH_THF(BroadcastTo3dAxis1TestQSymmS8, BroadcastTo3dAxis1Test<DataType::QSymmS8>) +ARMNN_AUTO_TEST_CASE_WITH_THF(BroadcastTo3dAxis2TestQSymmS8, BroadcastTo3dAxis2Test<DataType::QSymmS8>) +ARMNN_AUTO_TEST_CASE_WITH_THF(BroadcastTo4dTestQSymmS8, BroadcastTo4dTest<DataType::QSymmS8>) + +ARMNN_AUTO_TEST_CASE_WITH_THF(BroadcastTo1dTestQSymmS16, BroadcastTo1dTest<DataType::QSymmS16>) +ARMNN_AUTO_TEST_CASE_WITH_THF(BroadcastTo2dAxis0TestQSymmS16, BroadcastTo2dAxis0Test<DataType::QSymmS16>) +ARMNN_AUTO_TEST_CASE_WITH_THF(BroadcastTo2dAxis1TestQSymmS16, BroadcastTo2dAxis1Test<DataType::QSymmS16>) +ARMNN_AUTO_TEST_CASE_WITH_THF(BroadcastTo3dAxis0TestQSymmS16, BroadcastTo3dAxis0Test<DataType::QSymmS16>) +ARMNN_AUTO_TEST_CASE_WITH_THF(BroadcastTo3dAxis1TestQSymmS16, BroadcastTo3dAxis1Test<DataType::QSymmS16>) +ARMNN_AUTO_TEST_CASE_WITH_THF(BroadcastTo3dAxis2TestQSymmS16, BroadcastTo3dAxis2Test<DataType::QSymmS16>) +ARMNN_AUTO_TEST_CASE_WITH_THF(BroadcastTo4dTestQSymmS16, BroadcastTo4dTest<DataType::QSymmS16>) + +ARMNN_AUTO_TEST_CASE_WITH_THF(BroadcastTo1dTestSigned32, BroadcastTo1dTest<DataType::Signed32>) +ARMNN_AUTO_TEST_CASE_WITH_THF(BroadcastTo2dAxis0TestSigned32, BroadcastTo2dAxis0Test<DataType::Signed32>) +ARMNN_AUTO_TEST_CASE_WITH_THF(BroadcastTo2dAxis1TestSigned32, BroadcastTo2dAxis1Test<DataType::Signed32>) +ARMNN_AUTO_TEST_CASE_WITH_THF(BroadcastTo3dAxis0TestSigned32, BroadcastTo3dAxis0Test<DataType::Signed32>) +ARMNN_AUTO_TEST_CASE_WITH_THF(BroadcastTo3dAxis1TestSigned32, BroadcastTo3dAxis1Test<DataType::Signed32>) +ARMNN_AUTO_TEST_CASE_WITH_THF(BroadcastTo3dAxis2TestSigned32, BroadcastTo3dAxis2Test<DataType::Signed32>) +ARMNN_AUTO_TEST_CASE_WITH_THF(BroadcastTo4dTestSigned32, BroadcastTo4dTest<DataType::Signed32>) }
\ No newline at end of file diff --git a/src/backends/reference/workloads/CMakeLists.txt b/src/backends/reference/workloads/CMakeLists.txt index 9372568133..42f92aec1d 100644 --- a/src/backends/reference/workloads/CMakeLists.txt +++ b/src/backends/reference/workloads/CMakeLists.txt @@ -79,6 +79,8 @@ list(APPEND armnnRefBackendWorkloads_sources RefBatchNormalizationWorkload.hpp RefBatchToSpaceNdWorkload.cpp RefBatchToSpaceNdWorkload.hpp + RefBroadcastToWorkload.cpp + RefBroadcastToWorkload.hpp RefCastWorkload.cpp RefCastWorkload.hpp RefChannelShuffleWorkload.cpp diff --git a/src/backends/reference/workloads/RefBroadcastToWorkload.cpp b/src/backends/reference/workloads/RefBroadcastToWorkload.cpp new file mode 100644 index 0000000000..3a6184d22e --- /dev/null +++ b/src/backends/reference/workloads/RefBroadcastToWorkload.cpp @@ -0,0 +1,48 @@ +// +// Copyright © 2023 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "RefBroadcastToWorkload.hpp" +#include "RefWorkloadUtils.hpp" +#include "Profiling.hpp" +#include "Broadcast.hpp" + +#include "Decoders.hpp" +#include "Encoders.hpp" + +namespace armnn +{ + +RefBroadcastToWorkload::RefBroadcastToWorkload(const BroadcastToQueueDescriptor& descriptor, const WorkloadInfo& info) + : RefBaseWorkload(descriptor, info) +{} + +void RefBroadcastToWorkload::Execute() const +{ + Execute(m_Data.m_Inputs, m_Data.m_Outputs); +} + +void RefBroadcastToWorkload::ExecuteAsync(ExecutionData& executionData) +{ + WorkingMemDescriptor* workingMemDescriptor = static_cast<WorkingMemDescriptor*>(executionData.m_Data); + Execute(workingMemDescriptor->m_Inputs, workingMemDescriptor->m_Outputs); +} + +void RefBroadcastToWorkload::Execute(std::vector<ITensorHandle*> inputs, std::vector<ITensorHandle*> outputs) const +{ + ARMNN_SCOPED_PROFILING_EVENT_REF_NAME_GUID("RefBroadcastToWorkload_Execute"); + const TensorInfo& inputInfo = GetTensorInfo(inputs[0]); + const TensorInfo& outputInfo = GetTensorInfo(outputs[0]); + + std::unique_ptr<Decoder<float>> input = MakeDecoder<float>(inputInfo, inputs[0]->Map()); + std::unique_ptr<Encoder<float>> output= MakeEncoder<float>(outputInfo, outputs[0]->Map()); + + auto broadcastTo = [](float x) + { + return x; + }; + BroadcastLoop(inputInfo.GetShape(), outputInfo.GetShape()).Unroll(broadcastTo, + 0, *input, *output); +} +} // namespace armnn diff --git a/src/backends/reference/workloads/RefBroadcastToWorkload.hpp b/src/backends/reference/workloads/RefBroadcastToWorkload.hpp new file mode 100644 index 0000000000..ac947ae787 --- /dev/null +++ b/src/backends/reference/workloads/RefBroadcastToWorkload.hpp @@ -0,0 +1,25 @@ +// +// Copyright © 2023 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "RefBaseWorkload.hpp" + +namespace armnn +{ +class RefBroadcastToWorkload : public RefBaseWorkload<BroadcastToQueueDescriptor> +{ + +public: + explicit RefBroadcastToWorkload(const BroadcastToQueueDescriptor& descriptor, + const WorkloadInfo& info); + + void Execute() const override; + void ExecuteAsync(ExecutionData& executionData) override; + +private: + void Execute(std::vector<ITensorHandle*> inputs, std::vector<ITensorHandle*> outputs) const; +}; +} // namespace armnn
\ No newline at end of file diff --git a/src/backends/reference/workloads/RefWorkloads.hpp b/src/backends/reference/workloads/RefWorkloads.hpp index a36eae501c..98aa27b8a9 100644 --- a/src/backends/reference/workloads/RefWorkloads.hpp +++ b/src/backends/reference/workloads/RefWorkloads.hpp @@ -10,6 +10,7 @@ #include "RefBatchMatMulWorkload.hpp" #include "RefBatchNormalizationWorkload.hpp" #include "RefBatchToSpaceNdWorkload.hpp" +#include "RefBroadcastToWorkload.hpp" #include "RefCastWorkload.hpp" #include "RefChannelShuffleWorkload.hpp" #include "RefComparisonWorkload.hpp" |