aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCathal Corbett <cathal.corbett@arm.com>2023-01-13 19:19:02 +0000
committerCathal Corbett <cathal.corbett@arm.com>2023-01-17 16:53:29 +0000
commitd6161b4f1bad0fc0095e73747159480066f56edb (patch)
treebeee339a1532fc81391409ff9f74c5a3723a1749
parent04059e5c4e23085f0a70456c2ff5fe1bc029eb06 (diff)
downloadarmnn-experimental/GpuFsa.tar.gz
IVGCVSW-7382 Implementation of Conv2d within GpuFsaexperimental/GpuFsa
Signed-off-by: Cathal Corbett <cathal.corbett@arm.com> Change-Id: I6802687bb959c74afb1b9aebed133b2b17c036dc
-rw-r--r--src/backends/gpuFsa/GpuFsaBackend.cpp134
-rw-r--r--src/backends/gpuFsa/GpuFsaLayerSupport.cpp1
-rw-r--r--src/backends/gpuFsa/layerValidators/GpuFsaConvolution2dValidate.cpp90
-rw-r--r--src/backends/gpuFsa/layerValidators/GpuFsaConvolution2dValidate.hpp12
-rw-r--r--src/backends/gpuFsa/test/GpuFsaOptimizedNetworkTests.cpp128
5 files changed, 334 insertions, 31 deletions
diff --git a/src/backends/gpuFsa/GpuFsaBackend.cpp b/src/backends/gpuFsa/GpuFsaBackend.cpp
index ae7ff0c243..35ad229239 100644
--- a/src/backends/gpuFsa/GpuFsaBackend.cpp
+++ b/src/backends/gpuFsa/GpuFsaBackend.cpp
@@ -13,15 +13,54 @@
#include <armnn/backends/IBackendContext.hpp>
#include <armnn/backends/IMemoryManager.hpp>
-#include <Optimizer.hpp>
-
#include <aclCommon/BaseMemoryManager.hpp>
+#include <backendsCommon/SubgraphUtils.hpp>
+#include <Optimizer.hpp>
+#include <arm_compute/core/CL/CLKernelLibrary.h>
#include <arm_compute/runtime/CL/CLBufferAllocator.h>
+#include <arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadContext.h>
+#include <arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.h>
+
+#include "layerValidators/GpuFsaConvolution2dValidate.hpp"
+
namespace armnn
{
+template <typename T>
+inline void DeleteAsType(const void* const blob)
+{
+ delete static_cast<const T*>(blob);
+}
+
+inline SubgraphView::InputSlots CreateInputsFrom(Layer* layer)
+{
+ SubgraphView::InputSlots result;
+ for (auto&& it = layer->BeginInputSlots(); it != layer->EndInputSlots(); ++it)
+ {
+ result.push_back(&(*it));
+ }
+ return result;
+}
+
+inline SubgraphView::OutputSlots CreateOutputsFrom(Layer* layer)
+{
+ SubgraphView::OutputSlots result;
+ for (auto&& it = layer->BeginOutputSlots(); it != layer->EndOutputSlots(); ++it)
+ {
+ result.push_back(&(*it));
+ }
+ return result;
+}
+
+inline SubgraphView::SubgraphViewPtr CreateSubgraphViewFrom(SubgraphView::InputSlots&& inputs,
+ SubgraphView::OutputSlots&& outputs,
+ SubgraphView::Layers&& layers)
+{
+ return std::make_unique<SubgraphView>(std::move(inputs), std::move(outputs), std::move(layers));
+}
+
const BackendId& GpuFsaBackend::GetIdStatic()
{
static const BackendId s_Id{GpuFsaBackendId()};
@@ -178,7 +217,96 @@ OptimizationViews GpuFsaBackend::OptimizeSubgraphView(const SubgraphView& subgra
const ModelOptions& modelOptions) const
{
OptimizationViews optimizationViews(modelOptions);
- optimizationViews.AddUntouchedSubgraph(SubgraphView(subgraph));
+
+ using namespace arm_compute::experimental::dynamic_fusion;
+ // Create a new workload sketch, for validation purposes
+ auto compileCtx = arm_compute::CLKernelLibrary::get().get_compile_context();
+ auto gpuCtx = GpuWorkloadContext(&compileCtx);
+
+ auto it = subgraph.endIConnectable();
+ std::map<LayerGuid, Layer*> untouched;
+ while (it != subgraph.beginIConnectable())
+ {
+ --it;
+ Layer& base = *(PolymorphicDowncast<Layer*>(*it));
+ untouched.insert({base.GetGuid(), &base});
+ }
+
+ GpuFsaLayerSupport supportChecker;
+ it = subgraph.endIConnectable();
+ while (it != subgraph.beginIConnectable())
+ {
+ --it;
+ Layer& base = *(PolymorphicDowncast<Layer*>(*it));
+
+ std::unique_ptr<GpuWorkloadSketch> sketch = std::make_unique<GpuWorkloadSketch>(&gpuCtx);
+ switch (base.GetType())
+ {
+ case (LayerType::Convolution2d):
+ {
+ auto input = base.GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo();
+ auto weights = base.GetInputSlot(1).GetConnectedOutputSlot()->GetTensorInfo();
+ //std::vector<TensorInfo> infos = {input, weights};
+
+ auto desc = PolymorphicDowncast<const Convolution2dDescriptor*>(&base.GetParameters());
+ if (desc->m_BiasEnabled)
+ {
+ auto bias = base.GetInputSlot(2).GetConnectedOutputSlot()->GetTensorInfo();
+ GpuFsaConvolution2dCreateOp(input,
+ *desc,
+ weights,
+ bias,
+ *(sketch.get()));
+ }
+ else
+ {
+ GpuFsaConvolution2dCreateOp(input,
+ *desc,
+ weights,
+ EmptyOptional(),
+ *(sketch.get()));
+ }
+ break;
+ }
+ default:
+ // unsupported layer for GpuFsa backend
+ continue;
+ }
+
+ auto compiledBlob = std::make_unique<PreCompiledObjectPtr>(sketch.release(), DeleteAsType<GpuWorkloadSketch>);
+
+ IConnectableLayer* preCompiledLayer = optimizationViews.GetINetwork()->AddPrecompiledLayer(
+ PreCompiledDescriptor(base.GetNumInputSlots(), base.GetNumOutputSlots()),
+ std::move(*compiledBlob),
+ armnn::Optional<BackendId>(GetId()),
+ "GpuFsa_Pre_Compiled_Layer");
+
+ // Copy the output tensor infos from sub-graph
+ for (unsigned int i = 0; i < subgraph.GetNumOutputSlots(); i++)
+ {
+ preCompiledLayer->GetOutputSlot(i).SetTensorInfo(base.GetOutputSlot(i).GetTensorInfo());
+ }
+
+ SubgraphView::SubgraphViewPtr substituteSubgraph =
+ CreateSubgraphViewFrom(CreateInputsFrom(&base),
+ CreateOutputsFrom(&base),
+ {&base});
+
+ optimizationViews.AddSubstitution({ *substituteSubgraph, SubgraphView(preCompiledLayer) });
+
+ untouched.erase(base.GetGuid());
+ }
+
+ if (optimizationViews.GetSubstitutions().empty())
+ {
+ optimizationViews.AddUntouchedSubgraph(SubgraphView(subgraph));
+ }
+ else
+ {
+ ReportUntouchedLayers(optimizationViews, untouched);
+ }
+
+
return optimizationViews;
}
diff --git a/src/backends/gpuFsa/GpuFsaLayerSupport.cpp b/src/backends/gpuFsa/GpuFsaLayerSupport.cpp
index 7faad2ba73..063af2732e 100644
--- a/src/backends/gpuFsa/GpuFsaLayerSupport.cpp
+++ b/src/backends/gpuFsa/GpuFsaLayerSupport.cpp
@@ -98,6 +98,7 @@ bool GpuFsaLayerSupport::IsLayerSupported(const LayerType& type,
infos[3]);
}
}
+ case LayerType::Constant:
case LayerType::Input:
case LayerType::Output:
return IsGpuFsaBackendSupported(reasonIfUnsupported, infos[0]);
diff --git a/src/backends/gpuFsa/layerValidators/GpuFsaConvolution2dValidate.cpp b/src/backends/gpuFsa/layerValidators/GpuFsaConvolution2dValidate.cpp
index 2b6c2ee3dc..269442b60c 100644
--- a/src/backends/gpuFsa/layerValidators/GpuFsaConvolution2dValidate.cpp
+++ b/src/backends/gpuFsa/layerValidators/GpuFsaConvolution2dValidate.cpp
@@ -19,8 +19,8 @@
#include <arm_compute/dynamic_fusion/runtime/gpu/cl/ClWorkloadRuntime.h>
#include <arm_compute/dynamic_fusion/sketch/OperatorAttributes.h>
#include <arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadContext.h>
-#include <arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.h>
#include <arm_compute/dynamic_fusion/sketch/gpu/operators/GpuConv2d.h>
+#include <arm_compute/dynamic_fusion/sketch/gpu/operators/GpuOutput.h>
#include <vector>
#include <iostream>
@@ -30,29 +30,24 @@ namespace armnn
using namespace armcomputetensorutils;
-arm_compute::Status GpuFsaConvolution2dValidate(const TensorInfo& input,
- const Convolution2dDescriptor& descriptor,
- const TensorInfo& weights,
- const Optional<TensorInfo>& biases)
+inline arm_compute::Status ValidateAndCreateOp(const TensorInfo& input,
+ const Convolution2dDescriptor& descriptor,
+ const TensorInfo& weights,
+ const Optional<TensorInfo>& biases,
+ GpuWorkloadSketch& sketch,
+ const bool createOp = false)
{
- using namespace arm_compute::experimental::dynamic_fusion;
-
- // Create a new workload sketch, for validation purposes
- auto compileCtx = arm_compute::CLKernelLibrary::get().get_compile_context();
- auto gpuCtx = GpuWorkloadContext(&compileCtx);
- GpuWorkloadSketch sketch{ &gpuCtx };
-
// Build and create tensor infos using the sketch
- const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout);
- arm_compute::TensorInfo aclWeightsInfo = BuildArmComputeTensorInfo(weights, descriptor.m_DataLayout);
+ const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout);
+ arm_compute::TensorInfo aclWeightsInfo = BuildArmComputeTensorInfo(weights, descriptor.m_DataLayout);
aclWeightsInfo.set_are_values_constant(weights.IsConstant());
auto inputInfo = sketch.create_tensor_info(aclInputInfo);
auto weightInfo = sketch.create_tensor_info(aclWeightsInfo);
// Only create the bias tensor info if enabled, otherwise pass nullptr to validate_op
- arm_compute::TensorInfo aclBiasInfo;
- arm_compute::TensorInfo biasSketchInfo;
+ arm_compute::TensorInfo aclBiasInfo;
+ arm_compute::TensorInfo biasSketchInfo;
arm_compute::TensorInfo* biasSketchInfoPtr = nullptr;
if (descriptor.m_BiasEnabled)
@@ -61,28 +56,71 @@ arm_compute::Status GpuFsaConvolution2dValidate(const TensorInfo& input,
aclBiasInfo = BuildArmComputeTensorInfo(biases.value(), descriptor.m_DataLayout);
aclBiasInfo.set_are_values_constant(biases.value().IsConstant());
- biasSketchInfo = sketch.create_tensor_info(aclBiasInfo);
+ biasSketchInfo = sketch.create_tensor_info(aclBiasInfo);
biasSketchInfoPtr = &biasSketchInfo;
}
// Set Conv2d attributes using descriptor
- const arm_compute::Size2D aclDilationInfo = BuildArmComputeSize2D(descriptor.m_DilationX, descriptor.m_DilationY);
- const arm_compute::Padding2D aclPadInfo = BuildArmComputePaddingInfo(descriptor);
- const arm_compute::Size2D aclStrideInfo = BuildArmComputeSize2D(descriptor.m_StrideX, descriptor.m_StrideY);
+ const arm_compute::Size2D aclDilationInfo = BuildArmComputeSize2D(descriptor.m_DilationX,
+ descriptor.m_DilationY);
+ const arm_compute::Padding2D aclPadInfo = BuildArmComputePaddingInfo(descriptor);
+ const arm_compute::Size2D aclStrideInfo = BuildArmComputeSize2D(descriptor.m_StrideX, descriptor.m_StrideY);
Conv2dAttributes conv2DAttributes{};
conv2DAttributes.dilation(aclDilationInfo);
conv2DAttributes.pad(aclPadInfo);
conv2DAttributes.stride(aclStrideInfo);
+ // Validate operator, check status and update reasonIfUnsupported
+ arm_compute::Status aclStatus = GpuConv2d::validate_op(sketch,
+ &inputInfo,
+ &weightInfo,
+ biasSketchInfoPtr,
+ conv2DAttributes);
+
+ if (createOp)
{
- // Validate operator, check status and update reasonIfUnsupported
- return GpuConv2d::validate_op(sketch,
- &inputInfo,
- &weightInfo,
- biasSketchInfoPtr,
- conv2DAttributes);
+ const bool supported = (aclStatus.error_code() == arm_compute::ErrorCode::OK);
+ if (!supported)
+ {
+ throw BackendCapabilityException("\"GpuFsa\" backend failed during operation validation when attempting "
+ "to fuse a GpuConv2d operator into the existing workload sketch.");
+ }
+
+ arm_compute::ITensorInfo* convOutInfo = GpuConv2d::create_op(sketch,
+ &inputInfo,
+ &weightInfo,
+ biasSketchInfoPtr,
+ conv2DAttributes);
+
+ // Temporary fix until fusing attempt is make for GpuFsa backend and Output layer workload is created.
+ auto outputInfo = sketch.create_tensor_info();
+ GpuOutput::create_op(sketch, convOutInfo, &outputInfo);
}
+
+ return aclStatus;
+}
+
+arm_compute::Status GpuFsaConvolution2dValidate(const TensorInfo& input,
+ const Convolution2dDescriptor& descriptor,
+ const TensorInfo& weights,
+ const Optional<TensorInfo>& biases)
+{
+ // Create a new workload sketch, for validation purposes
+ auto compileCtx = arm_compute::CLKernelLibrary::get().get_compile_context();
+ auto gpuCtx = GpuWorkloadContext(&compileCtx);
+ GpuWorkloadSketch sketch{ &gpuCtx };
+
+ return ValidateAndCreateOp(input, descriptor, weights, biases, sketch);
+}
+
+void GpuFsaConvolution2dCreateOp(const TensorInfo& input,
+ const Convolution2dDescriptor& descriptor,
+ const TensorInfo& weights,
+ const Optional<TensorInfo>& biases,
+ GpuWorkloadSketch& sketch)
+{
+ ValidateAndCreateOp(input, descriptor, weights, biases, sketch, true);
}
} // namespace armnn \ No newline at end of file
diff --git a/src/backends/gpuFsa/layerValidators/GpuFsaConvolution2dValidate.hpp b/src/backends/gpuFsa/layerValidators/GpuFsaConvolution2dValidate.hpp
index ecdb3cf597..79a2fec400 100644
--- a/src/backends/gpuFsa/layerValidators/GpuFsaConvolution2dValidate.hpp
+++ b/src/backends/gpuFsa/layerValidators/GpuFsaConvolution2dValidate.hpp
@@ -8,12 +8,22 @@
#include <armnn/Tensor.hpp>
#include <arm_compute/core/Error.h>
+#include <arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.h>
-namespace armnn {
+namespace armnn
+{
+
+using namespace arm_compute::experimental::dynamic_fusion;
arm_compute::Status GpuFsaConvolution2dValidate(const TensorInfo& input,
const Convolution2dDescriptor& descriptor,
const TensorInfo& weights,
const Optional<TensorInfo>& biases);
+void GpuFsaConvolution2dCreateOp(const TensorInfo& input,
+ const Convolution2dDescriptor& descriptor,
+ const TensorInfo& weights,
+ const Optional<TensorInfo>& biases,
+ GpuWorkloadSketch& sketch);
+
} // namespace armnn \ No newline at end of file
diff --git a/src/backends/gpuFsa/test/GpuFsaOptimizedNetworkTests.cpp b/src/backends/gpuFsa/test/GpuFsaOptimizedNetworkTests.cpp
index fa97b135df..92521a0376 100644
--- a/src/backends/gpuFsa/test/GpuFsaOptimizedNetworkTests.cpp
+++ b/src/backends/gpuFsa/test/GpuFsaOptimizedNetworkTests.cpp
@@ -8,4 +8,130 @@
#include <GraphUtils.hpp>
#include <TestUtils.hpp>
-#include <doctest/doctest.h> \ No newline at end of file
+#include <doctest/doctest.h>
+
+using namespace armnn;
+
+TEST_SUITE("GpuFsaOptimizedNetwork")
+{
+
+TEST_CASE("SingleConv2dSupportedOptimizedNetwork")
+{
+ IRuntime::CreationOptions options;
+ IRuntimePtr runtime(IRuntime::Create(options));
+ INetworkPtr network(INetwork::Create());
+
+ TensorInfo inputInfo({ 1, 5, 5, 1 }, DataType::Float32);
+ TensorInfo outputInfo({ 1, 3, 3, 1 }, DataType::Float32);
+ TensorInfo weightsInfo({ 1, 3, 3, 1 }, DataType::Float32, 0.0f, 0, true);
+ TensorInfo biasesInfo({ 1 }, DataType::Float32, 0.0f, 0, true);
+
+ Convolution2dDescriptor desc;
+ desc.m_BiasEnabled = true;
+ desc.m_DataLayout = DataLayout::NHWC;
+
+ auto inputLayer = network->AddInputLayer(0, "input");
+ auto weightLayer = network->AddConstantLayer(ConstTensor(weightsInfo, nullptr), "weights");
+ auto biasLayer = network->AddConstantLayer(ConstTensor(biasesInfo, nullptr), "bias");
+ auto convLayer = network->AddConvolution2dLayer(desc, "conv2d");
+ auto outputLayer = network->AddOutputLayer(1, "output");
+
+ inputLayer->GetOutputSlot(0).Connect(convLayer->GetInputSlot(0));
+ inputLayer->GetOutputSlot(0).SetTensorInfo(inputInfo);
+
+ weightLayer->GetOutputSlot(0).Connect(convLayer->GetInputSlot(1));
+ weightLayer->GetOutputSlot(0).SetTensorInfo(weightsInfo);
+
+ biasLayer->GetOutputSlot(0).Connect(convLayer->GetInputSlot(2));
+ biasLayer->GetOutputSlot(0).SetTensorInfo(biasesInfo);
+
+ convLayer->GetOutputSlot(0).Connect(outputLayer->GetInputSlot(0));
+ convLayer->GetOutputSlot(0).SetTensorInfo(outputInfo);
+
+ std::vector<BackendId> backends = { "GpuFsa" };
+
+ OptimizerOptions optimizedOptions;
+ IOptimizedNetworkPtr optNet = Optimize(*network, backends, runtime->GetDeviceSpec(), optimizedOptions);
+ CHECK(optNet);
+
+ Graph& graph = GetGraphForTesting(optNet.get());
+
+ // Check graph layer sequence to ensure that the network has been replaced with a PreCompiledLayer
+ CHECK(CheckSequence(graph.cbegin(), graph.cend(),
+ &IsLayerOfType<InputLayer>,
+ &IsLayerOfType<ConstantLayer>,
+ &IsLayerOfType<ConstantLayer>,
+ &IsLayerOfType<PreCompiledLayer>,
+ &IsLayerOfType<OutputLayer>));
+}
+
+TEST_CASE("TwoConv2dSupportedOptimizedNetwork")
+{
+ IRuntime::CreationOptions options;
+ IRuntimePtr runtime(IRuntime::Create(options));
+ INetworkPtr network(INetwork::Create());
+
+ TensorInfo inputInfo({ 1, 5, 5, 1 }, DataType::Float32);
+ TensorInfo intermediateInfo({ 1, 3, 3, 1 }, DataType::Float32);
+ TensorInfo outputInfo({ 1, 1, 1, 1 }, DataType::Float32);
+ TensorInfo weightsInfo({ 1, 3, 3, 1 }, DataType::Float32, 0.0f, 0, true);
+ TensorInfo biasesInfo({ 1 }, DataType::Float32, 0.0f, 0, true);
+
+ Convolution2dDescriptor desc;
+ desc.m_BiasEnabled = true;
+ desc.m_DataLayout = DataLayout::NHWC;
+
+ auto inputLayer = network->AddInputLayer(0, "input");
+
+ auto weightLayer1 = network->AddConstantLayer(ConstTensor(weightsInfo, nullptr), "weights");
+ auto biasLayer1 = network->AddConstantLayer(ConstTensor(biasesInfo, nullptr), "bias");
+ auto convLayer1 = network->AddConvolution2dLayer(desc, "conv2d");
+
+ auto weightLayer2 = network->AddConstantLayer(ConstTensor(weightsInfo, nullptr), "weights");
+ auto biasLayer2 = network->AddConstantLayer(ConstTensor(biasesInfo, nullptr), "bias");
+ auto convLayer2 = network->AddConvolution2dLayer(desc, "conv2d");
+
+ auto outputLayer = network->AddOutputLayer(0, "output");
+
+ inputLayer->GetOutputSlot(0).Connect(convLayer1->GetInputSlot(0));
+ inputLayer->GetOutputSlot(0).SetTensorInfo(inputInfo);
+
+ weightLayer1->GetOutputSlot(0).Connect(convLayer1->GetInputSlot(1));
+ weightLayer1->GetOutputSlot(0).SetTensorInfo(weightsInfo);
+
+ biasLayer1->GetOutputSlot(0).Connect(convLayer1->GetInputSlot(2));
+ biasLayer1->GetOutputSlot(0).SetTensorInfo(biasesInfo);
+
+ convLayer1->GetOutputSlot(0).Connect(convLayer2->GetInputSlot(0));
+ convLayer1->GetOutputSlot(0).SetTensorInfo(intermediateInfo);
+
+ weightLayer2->GetOutputSlot(0).Connect(convLayer2->GetInputSlot(1));
+ weightLayer2->GetOutputSlot(0).SetTensorInfo(weightsInfo);
+
+ biasLayer2->GetOutputSlot(0).Connect(convLayer2->GetInputSlot(2));
+ biasLayer2->GetOutputSlot(0).SetTensorInfo(biasesInfo);
+
+ convLayer2->GetOutputSlot(0).Connect(outputLayer->GetInputSlot(0));
+ convLayer2->GetOutputSlot(0).SetTensorInfo(outputInfo);
+
+ std::vector<BackendId> backends = { "GpuFsa" };
+
+ OptimizerOptions optimizedOptions;
+ IOptimizedNetworkPtr optNet = Optimize(*network, backends, runtime->GetDeviceSpec(), optimizedOptions);
+ CHECK(optNet);
+
+ Graph& graph = GetGraphForTesting(optNet.get());
+
+ // Check graph layer sequence to ensure that the network has been replaced with a PreCompiledLayer
+ CHECK(CheckSequence(graph.cbegin(), graph.cend(),
+ &IsLayerOfType<InputLayer>,
+ &IsLayerOfType<ConstantLayer>,
+ &IsLayerOfType<ConstantLayer>,
+ &IsLayerOfType<ConstantLayer>,
+ &IsLayerOfType<ConstantLayer>,
+ &IsLayerOfType<PreCompiledLayer>,
+ &IsLayerOfType<PreCompiledLayer>,
+ &IsLayerOfType<OutputLayer>));
+}
+
+} \ No newline at end of file