aboutsummaryrefslogtreecommitdiff
path: root/src/backends/gpuFsa/GpuFsaBackend.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/backends/gpuFsa/GpuFsaBackend.cpp')
-rw-r--r--src/backends/gpuFsa/GpuFsaBackend.cpp134
1 files changed, 131 insertions, 3 deletions
diff --git a/src/backends/gpuFsa/GpuFsaBackend.cpp b/src/backends/gpuFsa/GpuFsaBackend.cpp
index ae7ff0c243..35ad229239 100644
--- a/src/backends/gpuFsa/GpuFsaBackend.cpp
+++ b/src/backends/gpuFsa/GpuFsaBackend.cpp
@@ -13,15 +13,54 @@
#include <armnn/backends/IBackendContext.hpp>
#include <armnn/backends/IMemoryManager.hpp>
-#include <Optimizer.hpp>
-
#include <aclCommon/BaseMemoryManager.hpp>
+#include <backendsCommon/SubgraphUtils.hpp>
+#include <Optimizer.hpp>
+#include <arm_compute/core/CL/CLKernelLibrary.h>
#include <arm_compute/runtime/CL/CLBufferAllocator.h>
+#include <arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadContext.h>
+#include <arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.h>
+
+#include "layerValidators/GpuFsaConvolution2dValidate.hpp"
+
namespace armnn
{
+template <typename T>
+inline void DeleteAsType(const void* const blob)
+{
+ delete static_cast<const T*>(blob);
+}
+
+inline SubgraphView::InputSlots CreateInputsFrom(Layer* layer)
+{
+ SubgraphView::InputSlots result;
+ for (auto&& it = layer->BeginInputSlots(); it != layer->EndInputSlots(); ++it)
+ {
+ result.push_back(&(*it));
+ }
+ return result;
+}
+
+inline SubgraphView::OutputSlots CreateOutputsFrom(Layer* layer)
+{
+ SubgraphView::OutputSlots result;
+ for (auto&& it = layer->BeginOutputSlots(); it != layer->EndOutputSlots(); ++it)
+ {
+ result.push_back(&(*it));
+ }
+ return result;
+}
+
+inline SubgraphView::SubgraphViewPtr CreateSubgraphViewFrom(SubgraphView::InputSlots&& inputs,
+ SubgraphView::OutputSlots&& outputs,
+ SubgraphView::Layers&& layers)
+{
+ return std::make_unique<SubgraphView>(std::move(inputs), std::move(outputs), std::move(layers));
+}
+
const BackendId& GpuFsaBackend::GetIdStatic()
{
static const BackendId s_Id{GpuFsaBackendId()};
@@ -178,7 +217,96 @@ OptimizationViews GpuFsaBackend::OptimizeSubgraphView(const SubgraphView& subgra
const ModelOptions& modelOptions) const
{
OptimizationViews optimizationViews(modelOptions);
- optimizationViews.AddUntouchedSubgraph(SubgraphView(subgraph));
+
+ using namespace arm_compute::experimental::dynamic_fusion;
+ // Create a new workload sketch, for validation purposes
+ auto compileCtx = arm_compute::CLKernelLibrary::get().get_compile_context();
+ auto gpuCtx = GpuWorkloadContext(&compileCtx);
+
+ auto it = subgraph.endIConnectable();
+ std::map<LayerGuid, Layer*> untouched;
+ while (it != subgraph.beginIConnectable())
+ {
+ --it;
+ Layer& base = *(PolymorphicDowncast<Layer*>(*it));
+ untouched.insert({base.GetGuid(), &base});
+ }
+
+ GpuFsaLayerSupport supportChecker;
+ it = subgraph.endIConnectable();
+ while (it != subgraph.beginIConnectable())
+ {
+ --it;
+ Layer& base = *(PolymorphicDowncast<Layer*>(*it));
+
+ std::unique_ptr<GpuWorkloadSketch> sketch = std::make_unique<GpuWorkloadSketch>(&gpuCtx);
+ switch (base.GetType())
+ {
+ case (LayerType::Convolution2d):
+ {
+ auto input = base.GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo();
+ auto weights = base.GetInputSlot(1).GetConnectedOutputSlot()->GetTensorInfo();
+ //std::vector<TensorInfo> infos = {input, weights};
+
+ auto desc = PolymorphicDowncast<const Convolution2dDescriptor*>(&base.GetParameters());
+ if (desc->m_BiasEnabled)
+ {
+ auto bias = base.GetInputSlot(2).GetConnectedOutputSlot()->GetTensorInfo();
+ GpuFsaConvolution2dCreateOp(input,
+ *desc,
+ weights,
+ bias,
+ *(sketch.get()));
+ }
+ else
+ {
+ GpuFsaConvolution2dCreateOp(input,
+ *desc,
+ weights,
+ EmptyOptional(),
+ *(sketch.get()));
+ }
+ break;
+ }
+ default:
+ // unsupported layer for GpuFsa backend
+ continue;
+ }
+
+ auto compiledBlob = std::make_unique<PreCompiledObjectPtr>(sketch.release(), DeleteAsType<GpuWorkloadSketch>);
+
+ IConnectableLayer* preCompiledLayer = optimizationViews.GetINetwork()->AddPrecompiledLayer(
+ PreCompiledDescriptor(base.GetNumInputSlots(), base.GetNumOutputSlots()),
+ std::move(*compiledBlob),
+ armnn::Optional<BackendId>(GetId()),
+ "GpuFsa_Pre_Compiled_Layer");
+
+ // Copy the output tensor infos from sub-graph
+ for (unsigned int i = 0; i < subgraph.GetNumOutputSlots(); i++)
+ {
+ preCompiledLayer->GetOutputSlot(i).SetTensorInfo(base.GetOutputSlot(i).GetTensorInfo());
+ }
+
+ SubgraphView::SubgraphViewPtr substituteSubgraph =
+ CreateSubgraphViewFrom(CreateInputsFrom(&base),
+ CreateOutputsFrom(&base),
+ {&base});
+
+ optimizationViews.AddSubstitution({ *substituteSubgraph, SubgraphView(preCompiledLayer) });
+
+ untouched.erase(base.GetGuid());
+ }
+
+ if (optimizationViews.GetSubstitutions().empty())
+ {
+ optimizationViews.AddUntouchedSubgraph(SubgraphView(subgraph));
+ }
+ else
+ {
+ ReportUntouchedLayers(optimizationViews, untouched);
+ }
+
+
return optimizationViews;
}