From d6161b4f1bad0fc0095e73747159480066f56edb Mon Sep 17 00:00:00 2001 From: Cathal Corbett Date: Fri, 13 Jan 2023 19:19:02 +0000 Subject: IVGCVSW-7382 Implementation of Conv2d within GpuFsa Signed-off-by: Cathal Corbett Change-Id: I6802687bb959c74afb1b9aebed133b2b17c036dc --- src/backends/gpuFsa/GpuFsaBackend.cpp | 134 ++++++++++++++++++++- src/backends/gpuFsa/GpuFsaLayerSupport.cpp | 1 + .../GpuFsaConvolution2dValidate.cpp | 90 ++++++++++---- .../GpuFsaConvolution2dValidate.hpp | 12 +- .../gpuFsa/test/GpuFsaOptimizedNetworkTests.cpp | 128 +++++++++++++++++++- 5 files changed, 334 insertions(+), 31 deletions(-) diff --git a/src/backends/gpuFsa/GpuFsaBackend.cpp b/src/backends/gpuFsa/GpuFsaBackend.cpp index ae7ff0c243..35ad229239 100644 --- a/src/backends/gpuFsa/GpuFsaBackend.cpp +++ b/src/backends/gpuFsa/GpuFsaBackend.cpp @@ -13,15 +13,54 @@ #include #include -#include - #include +#include +#include +#include #include +#include +#include + +#include "layerValidators/GpuFsaConvolution2dValidate.hpp" + namespace armnn { +template +inline void DeleteAsType(const void* const blob) +{ + delete static_cast(blob); +} + +inline SubgraphView::InputSlots CreateInputsFrom(Layer* layer) +{ + SubgraphView::InputSlots result; + for (auto&& it = layer->BeginInputSlots(); it != layer->EndInputSlots(); ++it) + { + result.push_back(&(*it)); + } + return result; +} + +inline SubgraphView::OutputSlots CreateOutputsFrom(Layer* layer) +{ + SubgraphView::OutputSlots result; + for (auto&& it = layer->BeginOutputSlots(); it != layer->EndOutputSlots(); ++it) + { + result.push_back(&(*it)); + } + return result; +} + +inline SubgraphView::SubgraphViewPtr CreateSubgraphViewFrom(SubgraphView::InputSlots&& inputs, + SubgraphView::OutputSlots&& outputs, + SubgraphView::Layers&& layers) +{ + return std::make_unique(std::move(inputs), std::move(outputs), std::move(layers)); +} + const BackendId& GpuFsaBackend::GetIdStatic() { static const BackendId s_Id{GpuFsaBackendId()}; @@ -178,7 +217,96 @@ OptimizationViews GpuFsaBackend::OptimizeSubgraphView(const SubgraphView& subgra const ModelOptions& modelOptions) const { OptimizationViews optimizationViews(modelOptions); - optimizationViews.AddUntouchedSubgraph(SubgraphView(subgraph)); + + using namespace arm_compute::experimental::dynamic_fusion; + // Create a new workload sketch, for validation purposes + auto compileCtx = arm_compute::CLKernelLibrary::get().get_compile_context(); + auto gpuCtx = GpuWorkloadContext(&compileCtx); + + auto it = subgraph.endIConnectable(); + std::map untouched; + while (it != subgraph.beginIConnectable()) + { + --it; + Layer& base = *(PolymorphicDowncast(*it)); + untouched.insert({base.GetGuid(), &base}); + } + + GpuFsaLayerSupport supportChecker; + it = subgraph.endIConnectable(); + while (it != subgraph.beginIConnectable()) + { + --it; + Layer& base = *(PolymorphicDowncast(*it)); + + std::unique_ptr sketch = std::make_unique(&gpuCtx); + switch (base.GetType()) + { + case (LayerType::Convolution2d): + { + auto input = base.GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(); + auto weights = base.GetInputSlot(1).GetConnectedOutputSlot()->GetTensorInfo(); + //std::vector infos = {input, weights}; + + auto desc = PolymorphicDowncast(&base.GetParameters()); + if (desc->m_BiasEnabled) + { + auto bias = base.GetInputSlot(2).GetConnectedOutputSlot()->GetTensorInfo(); + GpuFsaConvolution2dCreateOp(input, + *desc, + weights, + bias, + *(sketch.get())); + } + else + { + GpuFsaConvolution2dCreateOp(input, + *desc, + weights, + EmptyOptional(), + *(sketch.get())); + } + break; + } + default: + // unsupported layer for GpuFsa backend + continue; + } + + auto compiledBlob = std::make_unique(sketch.release(), DeleteAsType); + + IConnectableLayer* preCompiledLayer = optimizationViews.GetINetwork()->AddPrecompiledLayer( + PreCompiledDescriptor(base.GetNumInputSlots(), base.GetNumOutputSlots()), + std::move(*compiledBlob), + armnn::Optional(GetId()), + "GpuFsa_Pre_Compiled_Layer"); + + // Copy the output tensor infos from sub-graph + for (unsigned int i = 0; i < subgraph.GetNumOutputSlots(); i++) + { + preCompiledLayer->GetOutputSlot(i).SetTensorInfo(base.GetOutputSlot(i).GetTensorInfo()); + } + + SubgraphView::SubgraphViewPtr substituteSubgraph = + CreateSubgraphViewFrom(CreateInputsFrom(&base), + CreateOutputsFrom(&base), + {&base}); + + optimizationViews.AddSubstitution({ *substituteSubgraph, SubgraphView(preCompiledLayer) }); + + untouched.erase(base.GetGuid()); + } + + if (optimizationViews.GetSubstitutions().empty()) + { + optimizationViews.AddUntouchedSubgraph(SubgraphView(subgraph)); + } + else + { + ReportUntouchedLayers(optimizationViews, untouched); + } + + return optimizationViews; } diff --git a/src/backends/gpuFsa/GpuFsaLayerSupport.cpp b/src/backends/gpuFsa/GpuFsaLayerSupport.cpp index 7faad2ba73..063af2732e 100644 --- a/src/backends/gpuFsa/GpuFsaLayerSupport.cpp +++ b/src/backends/gpuFsa/GpuFsaLayerSupport.cpp @@ -98,6 +98,7 @@ bool GpuFsaLayerSupport::IsLayerSupported(const LayerType& type, infos[3]); } } + case LayerType::Constant: case LayerType::Input: case LayerType::Output: return IsGpuFsaBackendSupported(reasonIfUnsupported, infos[0]); diff --git a/src/backends/gpuFsa/layerValidators/GpuFsaConvolution2dValidate.cpp b/src/backends/gpuFsa/layerValidators/GpuFsaConvolution2dValidate.cpp index 2b6c2ee3dc..269442b60c 100644 --- a/src/backends/gpuFsa/layerValidators/GpuFsaConvolution2dValidate.cpp +++ b/src/backends/gpuFsa/layerValidators/GpuFsaConvolution2dValidate.cpp @@ -19,8 +19,8 @@ #include #include #include -#include #include +#include #include #include @@ -30,29 +30,24 @@ namespace armnn using namespace armcomputetensorutils; -arm_compute::Status GpuFsaConvolution2dValidate(const TensorInfo& input, - const Convolution2dDescriptor& descriptor, - const TensorInfo& weights, - const Optional& biases) +inline arm_compute::Status ValidateAndCreateOp(const TensorInfo& input, + const Convolution2dDescriptor& descriptor, + const TensorInfo& weights, + const Optional& biases, + GpuWorkloadSketch& sketch, + const bool createOp = false) { - using namespace arm_compute::experimental::dynamic_fusion; - - // Create a new workload sketch, for validation purposes - auto compileCtx = arm_compute::CLKernelLibrary::get().get_compile_context(); - auto gpuCtx = GpuWorkloadContext(&compileCtx); - GpuWorkloadSketch sketch{ &gpuCtx }; - // Build and create tensor infos using the sketch - const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout); - arm_compute::TensorInfo aclWeightsInfo = BuildArmComputeTensorInfo(weights, descriptor.m_DataLayout); + const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout); + arm_compute::TensorInfo aclWeightsInfo = BuildArmComputeTensorInfo(weights, descriptor.m_DataLayout); aclWeightsInfo.set_are_values_constant(weights.IsConstant()); auto inputInfo = sketch.create_tensor_info(aclInputInfo); auto weightInfo = sketch.create_tensor_info(aclWeightsInfo); // Only create the bias tensor info if enabled, otherwise pass nullptr to validate_op - arm_compute::TensorInfo aclBiasInfo; - arm_compute::TensorInfo biasSketchInfo; + arm_compute::TensorInfo aclBiasInfo; + arm_compute::TensorInfo biasSketchInfo; arm_compute::TensorInfo* biasSketchInfoPtr = nullptr; if (descriptor.m_BiasEnabled) @@ -61,28 +56,71 @@ arm_compute::Status GpuFsaConvolution2dValidate(const TensorInfo& input, aclBiasInfo = BuildArmComputeTensorInfo(biases.value(), descriptor.m_DataLayout); aclBiasInfo.set_are_values_constant(biases.value().IsConstant()); - biasSketchInfo = sketch.create_tensor_info(aclBiasInfo); + biasSketchInfo = sketch.create_tensor_info(aclBiasInfo); biasSketchInfoPtr = &biasSketchInfo; } // Set Conv2d attributes using descriptor - const arm_compute::Size2D aclDilationInfo = BuildArmComputeSize2D(descriptor.m_DilationX, descriptor.m_DilationY); - const arm_compute::Padding2D aclPadInfo = BuildArmComputePaddingInfo(descriptor); - const arm_compute::Size2D aclStrideInfo = BuildArmComputeSize2D(descriptor.m_StrideX, descriptor.m_StrideY); + const arm_compute::Size2D aclDilationInfo = BuildArmComputeSize2D(descriptor.m_DilationX, + descriptor.m_DilationY); + const arm_compute::Padding2D aclPadInfo = BuildArmComputePaddingInfo(descriptor); + const arm_compute::Size2D aclStrideInfo = BuildArmComputeSize2D(descriptor.m_StrideX, descriptor.m_StrideY); Conv2dAttributes conv2DAttributes{}; conv2DAttributes.dilation(aclDilationInfo); conv2DAttributes.pad(aclPadInfo); conv2DAttributes.stride(aclStrideInfo); + // Validate operator, check status and update reasonIfUnsupported + arm_compute::Status aclStatus = GpuConv2d::validate_op(sketch, + &inputInfo, + &weightInfo, + biasSketchInfoPtr, + conv2DAttributes); + + if (createOp) { - // Validate operator, check status and update reasonIfUnsupported - return GpuConv2d::validate_op(sketch, - &inputInfo, - &weightInfo, - biasSketchInfoPtr, - conv2DAttributes); + const bool supported = (aclStatus.error_code() == arm_compute::ErrorCode::OK); + if (!supported) + { + throw BackendCapabilityException("\"GpuFsa\" backend failed during operation validation when attempting " + "to fuse a GpuConv2d operator into the existing workload sketch."); + } + + arm_compute::ITensorInfo* convOutInfo = GpuConv2d::create_op(sketch, + &inputInfo, + &weightInfo, + biasSketchInfoPtr, + conv2DAttributes); + + // Temporary fix until fusing attempt is make for GpuFsa backend and Output layer workload is created. + auto outputInfo = sketch.create_tensor_info(); + GpuOutput::create_op(sketch, convOutInfo, &outputInfo); } + + return aclStatus; +} + +arm_compute::Status GpuFsaConvolution2dValidate(const TensorInfo& input, + const Convolution2dDescriptor& descriptor, + const TensorInfo& weights, + const Optional& biases) +{ + // Create a new workload sketch, for validation purposes + auto compileCtx = arm_compute::CLKernelLibrary::get().get_compile_context(); + auto gpuCtx = GpuWorkloadContext(&compileCtx); + GpuWorkloadSketch sketch{ &gpuCtx }; + + return ValidateAndCreateOp(input, descriptor, weights, biases, sketch); +} + +void GpuFsaConvolution2dCreateOp(const TensorInfo& input, + const Convolution2dDescriptor& descriptor, + const TensorInfo& weights, + const Optional& biases, + GpuWorkloadSketch& sketch) +{ + ValidateAndCreateOp(input, descriptor, weights, biases, sketch, true); } } // namespace armnn \ No newline at end of file diff --git a/src/backends/gpuFsa/layerValidators/GpuFsaConvolution2dValidate.hpp b/src/backends/gpuFsa/layerValidators/GpuFsaConvolution2dValidate.hpp index ecdb3cf597..79a2fec400 100644 --- a/src/backends/gpuFsa/layerValidators/GpuFsaConvolution2dValidate.hpp +++ b/src/backends/gpuFsa/layerValidators/GpuFsaConvolution2dValidate.hpp @@ -8,12 +8,22 @@ #include #include +#include -namespace armnn { +namespace armnn +{ + +using namespace arm_compute::experimental::dynamic_fusion; arm_compute::Status GpuFsaConvolution2dValidate(const TensorInfo& input, const Convolution2dDescriptor& descriptor, const TensorInfo& weights, const Optional& biases); +void GpuFsaConvolution2dCreateOp(const TensorInfo& input, + const Convolution2dDescriptor& descriptor, + const TensorInfo& weights, + const Optional& biases, + GpuWorkloadSketch& sketch); + } // namespace armnn \ No newline at end of file diff --git a/src/backends/gpuFsa/test/GpuFsaOptimizedNetworkTests.cpp b/src/backends/gpuFsa/test/GpuFsaOptimizedNetworkTests.cpp index fa97b135df..92521a0376 100644 --- a/src/backends/gpuFsa/test/GpuFsaOptimizedNetworkTests.cpp +++ b/src/backends/gpuFsa/test/GpuFsaOptimizedNetworkTests.cpp @@ -8,4 +8,130 @@ #include #include -#include \ No newline at end of file +#include + +using namespace armnn; + +TEST_SUITE("GpuFsaOptimizedNetwork") +{ + +TEST_CASE("SingleConv2dSupportedOptimizedNetwork") +{ + IRuntime::CreationOptions options; + IRuntimePtr runtime(IRuntime::Create(options)); + INetworkPtr network(INetwork::Create()); + + TensorInfo inputInfo({ 1, 5, 5, 1 }, DataType::Float32); + TensorInfo outputInfo({ 1, 3, 3, 1 }, DataType::Float32); + TensorInfo weightsInfo({ 1, 3, 3, 1 }, DataType::Float32, 0.0f, 0, true); + TensorInfo biasesInfo({ 1 }, DataType::Float32, 0.0f, 0, true); + + Convolution2dDescriptor desc; + desc.m_BiasEnabled = true; + desc.m_DataLayout = DataLayout::NHWC; + + auto inputLayer = network->AddInputLayer(0, "input"); + auto weightLayer = network->AddConstantLayer(ConstTensor(weightsInfo, nullptr), "weights"); + auto biasLayer = network->AddConstantLayer(ConstTensor(biasesInfo, nullptr), "bias"); + auto convLayer = network->AddConvolution2dLayer(desc, "conv2d"); + auto outputLayer = network->AddOutputLayer(1, "output"); + + inputLayer->GetOutputSlot(0).Connect(convLayer->GetInputSlot(0)); + inputLayer->GetOutputSlot(0).SetTensorInfo(inputInfo); + + weightLayer->GetOutputSlot(0).Connect(convLayer->GetInputSlot(1)); + weightLayer->GetOutputSlot(0).SetTensorInfo(weightsInfo); + + biasLayer->GetOutputSlot(0).Connect(convLayer->GetInputSlot(2)); + biasLayer->GetOutputSlot(0).SetTensorInfo(biasesInfo); + + convLayer->GetOutputSlot(0).Connect(outputLayer->GetInputSlot(0)); + convLayer->GetOutputSlot(0).SetTensorInfo(outputInfo); + + std::vector backends = { "GpuFsa" }; + + OptimizerOptions optimizedOptions; + IOptimizedNetworkPtr optNet = Optimize(*network, backends, runtime->GetDeviceSpec(), optimizedOptions); + CHECK(optNet); + + Graph& graph = GetGraphForTesting(optNet.get()); + + // Check graph layer sequence to ensure that the network has been replaced with a PreCompiledLayer + CHECK(CheckSequence(graph.cbegin(), graph.cend(), + &IsLayerOfType, + &IsLayerOfType, + &IsLayerOfType, + &IsLayerOfType, + &IsLayerOfType)); +} + +TEST_CASE("TwoConv2dSupportedOptimizedNetwork") +{ + IRuntime::CreationOptions options; + IRuntimePtr runtime(IRuntime::Create(options)); + INetworkPtr network(INetwork::Create()); + + TensorInfo inputInfo({ 1, 5, 5, 1 }, DataType::Float32); + TensorInfo intermediateInfo({ 1, 3, 3, 1 }, DataType::Float32); + TensorInfo outputInfo({ 1, 1, 1, 1 }, DataType::Float32); + TensorInfo weightsInfo({ 1, 3, 3, 1 }, DataType::Float32, 0.0f, 0, true); + TensorInfo biasesInfo({ 1 }, DataType::Float32, 0.0f, 0, true); + + Convolution2dDescriptor desc; + desc.m_BiasEnabled = true; + desc.m_DataLayout = DataLayout::NHWC; + + auto inputLayer = network->AddInputLayer(0, "input"); + + auto weightLayer1 = network->AddConstantLayer(ConstTensor(weightsInfo, nullptr), "weights"); + auto biasLayer1 = network->AddConstantLayer(ConstTensor(biasesInfo, nullptr), "bias"); + auto convLayer1 = network->AddConvolution2dLayer(desc, "conv2d"); + + auto weightLayer2 = network->AddConstantLayer(ConstTensor(weightsInfo, nullptr), "weights"); + auto biasLayer2 = network->AddConstantLayer(ConstTensor(biasesInfo, nullptr), "bias"); + auto convLayer2 = network->AddConvolution2dLayer(desc, "conv2d"); + + auto outputLayer = network->AddOutputLayer(0, "output"); + + inputLayer->GetOutputSlot(0).Connect(convLayer1->GetInputSlot(0)); + inputLayer->GetOutputSlot(0).SetTensorInfo(inputInfo); + + weightLayer1->GetOutputSlot(0).Connect(convLayer1->GetInputSlot(1)); + weightLayer1->GetOutputSlot(0).SetTensorInfo(weightsInfo); + + biasLayer1->GetOutputSlot(0).Connect(convLayer1->GetInputSlot(2)); + biasLayer1->GetOutputSlot(0).SetTensorInfo(biasesInfo); + + convLayer1->GetOutputSlot(0).Connect(convLayer2->GetInputSlot(0)); + convLayer1->GetOutputSlot(0).SetTensorInfo(intermediateInfo); + + weightLayer2->GetOutputSlot(0).Connect(convLayer2->GetInputSlot(1)); + weightLayer2->GetOutputSlot(0).SetTensorInfo(weightsInfo); + + biasLayer2->GetOutputSlot(0).Connect(convLayer2->GetInputSlot(2)); + biasLayer2->GetOutputSlot(0).SetTensorInfo(biasesInfo); + + convLayer2->GetOutputSlot(0).Connect(outputLayer->GetInputSlot(0)); + convLayer2->GetOutputSlot(0).SetTensorInfo(outputInfo); + + std::vector backends = { "GpuFsa" }; + + OptimizerOptions optimizedOptions; + IOptimizedNetworkPtr optNet = Optimize(*network, backends, runtime->GetDeviceSpec(), optimizedOptions); + CHECK(optNet); + + Graph& graph = GetGraphForTesting(optNet.get()); + + // Check graph layer sequence to ensure that the network has been replaced with a PreCompiledLayer + CHECK(CheckSequence(graph.cbegin(), graph.cend(), + &IsLayerOfType, + &IsLayerOfType, + &IsLayerOfType, + &IsLayerOfType, + &IsLayerOfType, + &IsLayerOfType, + &IsLayerOfType, + &IsLayerOfType)); +} + +} \ No newline at end of file -- cgit v1.2.1