diff options
Diffstat (limited to 'src/backends/gpuFsa/GpuFsaBackend.cpp')
-rw-r--r-- | src/backends/gpuFsa/GpuFsaBackend.cpp | 134 |
1 files changed, 131 insertions, 3 deletions
diff --git a/src/backends/gpuFsa/GpuFsaBackend.cpp b/src/backends/gpuFsa/GpuFsaBackend.cpp index ae7ff0c243..35ad229239 100644 --- a/src/backends/gpuFsa/GpuFsaBackend.cpp +++ b/src/backends/gpuFsa/GpuFsaBackend.cpp @@ -13,15 +13,54 @@ #include <armnn/backends/IBackendContext.hpp> #include <armnn/backends/IMemoryManager.hpp> -#include <Optimizer.hpp> - #include <aclCommon/BaseMemoryManager.hpp> +#include <backendsCommon/SubgraphUtils.hpp> +#include <Optimizer.hpp> +#include <arm_compute/core/CL/CLKernelLibrary.h> #include <arm_compute/runtime/CL/CLBufferAllocator.h> +#include <arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadContext.h> +#include <arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.h> + +#include "layerValidators/GpuFsaConvolution2dValidate.hpp" + namespace armnn { +template <typename T> +inline void DeleteAsType(const void* const blob) +{ + delete static_cast<const T*>(blob); +} + +inline SubgraphView::InputSlots CreateInputsFrom(Layer* layer) +{ + SubgraphView::InputSlots result; + for (auto&& it = layer->BeginInputSlots(); it != layer->EndInputSlots(); ++it) + { + result.push_back(&(*it)); + } + return result; +} + +inline SubgraphView::OutputSlots CreateOutputsFrom(Layer* layer) +{ + SubgraphView::OutputSlots result; + for (auto&& it = layer->BeginOutputSlots(); it != layer->EndOutputSlots(); ++it) + { + result.push_back(&(*it)); + } + return result; +} + +inline SubgraphView::SubgraphViewPtr CreateSubgraphViewFrom(SubgraphView::InputSlots&& inputs, + SubgraphView::OutputSlots&& outputs, + SubgraphView::Layers&& layers) +{ + return std::make_unique<SubgraphView>(std::move(inputs), std::move(outputs), std::move(layers)); +} + const BackendId& GpuFsaBackend::GetIdStatic() { static const BackendId s_Id{GpuFsaBackendId()}; @@ -178,7 +217,96 @@ OptimizationViews GpuFsaBackend::OptimizeSubgraphView(const SubgraphView& subgra const ModelOptions& modelOptions) const { OptimizationViews optimizationViews(modelOptions); - optimizationViews.AddUntouchedSubgraph(SubgraphView(subgraph)); + + using namespace arm_compute::experimental::dynamic_fusion; + // Create a new workload sketch, for validation purposes + auto compileCtx = arm_compute::CLKernelLibrary::get().get_compile_context(); + auto gpuCtx = GpuWorkloadContext(&compileCtx); + + auto it = subgraph.endIConnectable(); + std::map<LayerGuid, Layer*> untouched; + while (it != subgraph.beginIConnectable()) + { + --it; + Layer& base = *(PolymorphicDowncast<Layer*>(*it)); + untouched.insert({base.GetGuid(), &base}); + } + + GpuFsaLayerSupport supportChecker; + it = subgraph.endIConnectable(); + while (it != subgraph.beginIConnectable()) + { + --it; + Layer& base = *(PolymorphicDowncast<Layer*>(*it)); + + std::unique_ptr<GpuWorkloadSketch> sketch = std::make_unique<GpuWorkloadSketch>(&gpuCtx); + switch (base.GetType()) + { + case (LayerType::Convolution2d): + { + auto input = base.GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(); + auto weights = base.GetInputSlot(1).GetConnectedOutputSlot()->GetTensorInfo(); + //std::vector<TensorInfo> infos = {input, weights}; + + auto desc = PolymorphicDowncast<const Convolution2dDescriptor*>(&base.GetParameters()); + if (desc->m_BiasEnabled) + { + auto bias = base.GetInputSlot(2).GetConnectedOutputSlot()->GetTensorInfo(); + GpuFsaConvolution2dCreateOp(input, + *desc, + weights, + bias, + *(sketch.get())); + } + else + { + GpuFsaConvolution2dCreateOp(input, + *desc, + weights, + EmptyOptional(), + *(sketch.get())); + } + break; + } + default: + // unsupported layer for GpuFsa backend + continue; + } + + auto compiledBlob = std::make_unique<PreCompiledObjectPtr>(sketch.release(), DeleteAsType<GpuWorkloadSketch>); + + IConnectableLayer* preCompiledLayer = optimizationViews.GetINetwork()->AddPrecompiledLayer( + PreCompiledDescriptor(base.GetNumInputSlots(), base.GetNumOutputSlots()), + std::move(*compiledBlob), + armnn::Optional<BackendId>(GetId()), + "GpuFsa_Pre_Compiled_Layer"); + + // Copy the output tensor infos from sub-graph + for (unsigned int i = 0; i < subgraph.GetNumOutputSlots(); i++) + { + preCompiledLayer->GetOutputSlot(i).SetTensorInfo(base.GetOutputSlot(i).GetTensorInfo()); + } + + SubgraphView::SubgraphViewPtr substituteSubgraph = + CreateSubgraphViewFrom(CreateInputsFrom(&base), + CreateOutputsFrom(&base), + {&base}); + + optimizationViews.AddSubstitution({ *substituteSubgraph, SubgraphView(preCompiledLayer) }); + + untouched.erase(base.GetGuid()); + } + + if (optimizationViews.GetSubstitutions().empty()) + { + optimizationViews.AddUntouchedSubgraph(SubgraphView(subgraph)); + } + else + { + ReportUntouchedLayers(optimizationViews, untouched); + } + + return optimizationViews; } |