From 38b600d8abb2c5f7a44511b5deddf441f975d51d Mon Sep 17 00:00:00 2001 From: Nikhil Raj Date: Thu, 15 Feb 2024 15:02:19 +0000 Subject: IVGCVSW-7968 Update Doxygen docu for 24.02 Signed-off-by: Nikhil Raj Change-Id: I8c1e45815c6cf78f80d6f2c0959a5bbba6cd11de --- latest/_gpu_fsa_backend_8cpp_source.html | 639 +++++++++++++++++++++++++++++++ 1 file changed, 639 insertions(+) create mode 100644 latest/_gpu_fsa_backend_8cpp_source.html (limited to 'latest/_gpu_fsa_backend_8cpp_source.html') diff --git a/latest/_gpu_fsa_backend_8cpp_source.html b/latest/_gpu_fsa_backend_8cpp_source.html new file mode 100644 index 0000000000..816cd10ee7 --- /dev/null +++ b/latest/_gpu_fsa_backend_8cpp_source.html @@ -0,0 +1,639 @@ + + + + + + + + +Arm NN: src/backends/gpuFsa/GpuFsaBackend.cpp Source File + + + + + + + + + + + + + + + + +
+
+ + + + ArmNN + + + +
+
+  24.02 +
+
+
+ + + + + + + +
+
+ +
+
+
+ +
+ +
+
+ + +
+ +
+ +
+
+
GpuFsaBackend.cpp
+
+
+Go to the documentation of this file.
1 //
+
2 // Copyright © 2022-2024 Arm Ltd and Contributors. All rights reserved.
+
3 // SPDX-License-Identifier: MIT
+
4 //
+
5 
+
6 #include "GpuFsaBackend.hpp"
+ + +
9 #include "GpuFsaBackendId.hpp"
+
10 #include "GpuFsaLayerSupport.hpp"
+ + +
13 
+ + + +
17 #include <Optimizer.hpp>
+
18 
+
19 #include <arm_compute/core/CL/CLKernelLibrary.h>
+
20 #include <arm_compute/runtime/CL/CLBufferAllocator.h>
+
21 
+ + +
24 #include "layers/GpuFsaCast.hpp"
+ + + + +
29 #include "layers/GpuFsaReshape.hpp"
+
30 #include "layers/GpuFsaResize.hpp"
+
31 #include "layers/GpuFsaSoftmax.hpp"
+
32 
+
33 namespace armnn
+
34 {
+
35 
+
36 template <typename T>
+
37 inline void DeleteAsType(const void* const blob)
+
38 {
+
39  delete static_cast<const T*>(blob);
+
40 }
+
41 
+ +
43 {
+ +
45  for (auto&& it = layer->BeginInputSlots(); it != layer->EndInputSlots(); ++it)
+
46  {
+
47  result.push_back(&(*it));
+
48  }
+
49  return result;
+
50 }
+
51 
+ +
53 {
+ +
55  for (auto&& it = layer->BeginOutputSlots(); it != layer->EndOutputSlots(); ++it)
+
56  {
+
57  result.push_back(&(*it));
+
58  }
+
59  return result;
+
60 }
+
61 
+ +
63  SubgraphView::OutputSlots&& outputs,
+
64  SubgraphView::Layers&& layers)
+
65 {
+
66  return std::make_unique<SubgraphView>(std::move(inputs), std::move(outputs), std::move(layers));
+
67 }
+
68 
+ +
70 {
+
71  static const BackendId s_Id{GpuFsaBackendId()};
+
72  return s_Id;
+
73 }
+
74 
+ +
76 {
+ +
78  {
+
79  return std::make_unique<GpuFsaMemoryManager>(m_CustomAllocator);
+
80  }
+
81  return std::make_unique<GpuFsaMemoryManager>(std::make_unique<arm_compute::CLBufferAllocator>());
+
82 }
+
83 
+ +
85  const IBackendInternal::IMemoryManagerSharedPtr& memoryManager) const
+
86 {
+
87  return std::make_unique<GpuFsaWorkloadFactory>(PolymorphicPointerDowncast<GpuFsaMemoryManager>(memoryManager));
+
88 }
+
89 
+ +
91  TensorHandleFactoryRegistry& registry) const
+
92 {
+
93  std::shared_ptr<GpuFsaMemoryManager> memoryManager;
+ +
95  {
+
96  memoryManager = std::make_shared<GpuFsaMemoryManager>(m_CustomAllocator);
+
97  }
+
98  else
+
99  {
+
100  memoryManager = std::make_shared<GpuFsaMemoryManager>(std::make_unique<arm_compute::CLBufferAllocator>());
+
101  }
+
102 
+
103  std::unique_ptr<ITensorHandleFactory> factory = std::make_unique<GpuFsaTensorHandleFactory>(memoryManager);
+
104 
+
105  registry.RegisterMemoryManager(memoryManager);
+
106  registry.RegisterFactory(std::move(factory));
+
107 
+
108  return std::make_unique<GpuFsaWorkloadFactory>(PolymorphicPointerDowncast<GpuFsaMemoryManager>(memoryManager));
+
109 }
+
110 
+ +
112  TensorHandleFactoryRegistry& registry,
+
113  const ModelOptions&,
+
114  MemorySourceFlags inputFlags,
+
115  MemorySourceFlags outputFlags) const
+
116 {
+
117 
+
118  // To allow force import if inputFlags/outputFlags are Undefined, set it as Malloc
+
119  if (inputFlags == static_cast<MemorySourceFlags>(MemorySource::Undefined))
+
120  {
+
121  inputFlags = static_cast<MemorySourceFlags>(MemorySource::Malloc);
+
122  }
+
123  if (outputFlags == static_cast<MemorySourceFlags>(MemorySource::Undefined))
+
124  {
+
125  outputFlags = static_cast<MemorySourceFlags>(MemorySource::Malloc);
+
126  }
+
127 
+
128  std::shared_ptr<GpuFsaMemoryManager> memoryManager;
+ +
130  {
+
131  memoryManager = std::make_shared<GpuFsaMemoryManager>(m_CustomAllocator);
+
132  }
+
133  else
+
134  {
+
135  memoryManager = std::make_shared<GpuFsaMemoryManager>(std::make_unique<arm_compute::CLBufferAllocator>());
+
136  }
+
137 
+
138  std::unique_ptr<ITensorHandleFactory> factory = std::make_unique<GpuFsaTensorHandleFactory>(memoryManager);
+
139 
+
140  registry.RegisterMemoryManager(memoryManager);
+
141  registry.RegisterFactory(std::move(factory));
+
142 
+
143  return std::make_unique<GpuFsaWorkloadFactory>(PolymorphicPointerDowncast<GpuFsaMemoryManager>(memoryManager));
+
144 }
+
145 
+
146 std::vector<ITensorHandleFactory::FactoryId> GpuFsaBackend::GetHandleFactoryPreferences() const
+
147 {
+
148  return std::vector<ITensorHandleFactory::FactoryId> { GpuFsaTensorHandleFactory::GetIdStatic() };
+
149 }
+
150 
+ +
152 {
+
153  std::shared_ptr<GpuFsaMemoryManager> memoryManager;
+ +
155  {
+
156  memoryManager = std::make_shared<GpuFsaMemoryManager>(m_CustomAllocator);
+
157  }
+
158  else
+
159  {
+
160  memoryManager = std::make_shared<GpuFsaMemoryManager>(std::make_unique<arm_compute::CLBufferAllocator>());
+
161  }
+
162 
+
163  std::unique_ptr<ITensorHandleFactory> factory = std::make_unique<GpuFsaTensorHandleFactory>(memoryManager);
+
164  registry.RegisterMemoryManager(memoryManager);
+
165  registry.RegisterFactory(std::move(factory));
+
166 
+
167 }
+
168 
+ +
170  MemorySourceFlags inputFlags,
+
171  MemorySourceFlags outputFlags)
+
172 {
+
173  // To allow force import if inputFlags/outputFlags are Undefined, set it as Malloc
+
174  if (inputFlags == static_cast<MemorySourceFlags>(MemorySource::Undefined))
+
175  {
+
176  inputFlags = static_cast<MemorySourceFlags>(MemorySource::Malloc);
+
177  }
+
178  if (outputFlags == static_cast<MemorySourceFlags>(MemorySource::Undefined))
+
179  {
+
180  outputFlags = static_cast<MemorySourceFlags>(MemorySource::Malloc);
+
181  }
+
182 
+
183  std::shared_ptr<GpuFsaMemoryManager> memoryManager;
+ +
185  {
+
186  memoryManager = std::make_shared<GpuFsaMemoryManager>(m_CustomAllocator);
+
187  }
+
188  else
+
189  {
+
190  memoryManager = std::make_shared<GpuFsaMemoryManager>(std::make_unique<arm_compute::CLBufferAllocator>());
+
191  }
+
192 
+
193  std::unique_ptr<ITensorHandleFactory> factory = std::make_unique<GpuFsaTensorHandleFactory>(memoryManager);
+
194  registry.RegisterMemoryManager(memoryManager);
+
195  registry.RegisterFactory(std::move(factory));
+
196 }
+
197 
+ +
199 {
+
200  return IBackendContextPtr{new GpuFsaBackendContext{options}};
+
201 }
+
202 
+ + +
205 {
+ +
207 }
+
208 
+ +
210 {
+
211  static ILayerSupportSharedPtr layerSupport{new GpuFsaLayerSupport};
+
212  return layerSupport;
+
213 }
+
214 
+
215 std::unique_ptr<ICustomAllocator> GpuFsaBackend::GetDefaultAllocator() const
+
216 {
+
217  return std::make_unique<GpuFsaBackendDefaultAllocator>();
+
218 }
+
219 
+ +
221  const ModelOptions& modelOptions) const
+
222 {
+
223  OptimizationViews optimizationViews(modelOptions);
+
224 
+
225  using namespace arm_compute::experimental::dynamic_fusion;
+
226 
+
227  auto it = subgraph.end();
+
228  std::map<LayerGuid, Layer*> untouched;
+
229  while (it != subgraph.begin())
+
230  {
+
231  --it;
+
232  Layer& base = *(PolymorphicDowncast<Layer*>(*it));
+
233  untouched.insert({base.GetGuid(), &base});
+
234  }
+
235 
+
236  GpuFsaLayerSupport supportChecker;
+
237  it = subgraph.end();
+
238  arm_compute::CLCompileContext* compileCtx = &(arm_compute::CLKernelLibrary::get().get_compile_context());
+
239 
+
240  // Setup the GpuWokloadContext which will exist for the lifetime of the Graph. This contains the TensorInfos
+
241  std::shared_ptr<GpuWorkloadContext> workloadContext = std::make_shared<GpuWorkloadContext>(compileCtx);
+
242  while (it != subgraph.begin())
+
243  {
+
244  --it;
+
245  Layer& base = *(PolymorphicDowncast<Layer*>(*it));
+
246  // Create a GpuFsaPreCompiledBlob, this contains all of the information needed to execute an operator
+
247  GpuFsaPreCompiledBlob* preCompiledBlobPtr = new GpuFsaPreCompiledBlob();
+
248  preCompiledBlobPtr->workloadContext = workloadContext;
+
249  preCompiledBlobPtr->sketch = std::make_unique<GpuWorkloadSketch>(workloadContext.get());
+
250 
+
251  // Configure and setup the sketch for each supported op. Their data will be wrapped into a PreCompiled layer
+
252  switch (base.GetType())
+
253  {
+
254  case (LayerType::Activation):
+
255  {
+
256  auto desc = PolymorphicDowncast<const ActivationDescriptor*>(&base.GetParameters());
+
257  auto input = base.GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo();
+
258  GpuFsaActivationCreateOp(preCompiledBlobPtr, input, *desc);
+
259  break;
+
260  }
+
261  case (LayerType::Cast):
+
262  {
+
263  auto input = base.GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo();
+
264  auto output = base.GetOutputSlot(0).GetTensorInfo();
+
265  GpuFsaCastCreateOp(preCompiledBlobPtr, input, output);
+
266  break;
+
267  }
+ +
269  {
+
270  auto input = base.GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo();
+
271  auto weights = base.GetInputSlot(1).GetConnectedOutputSlot()->GetTensorInfo();
+
272 
+
273  auto desc = PolymorphicDowncast<const Convolution2dDescriptor*>(&base.GetParameters());
+
274  if (desc->m_BiasEnabled)
+
275  {
+
276  auto bias = base.GetInputSlot(2).GetConnectedOutputSlot()->GetTensorInfo();
+
277  GpuFsaConvolution2dCreateOp(preCompiledBlobPtr,
+
278  input,
+
279  *desc,
+
280  weights,
+
281  bias);
+
282  }
+
283  else
+
284  {
+
285  GpuFsaConvolution2dCreateOp(preCompiledBlobPtr,
+
286  input,
+
287  *desc,
+
288  weights,
+
289  EmptyOptional());
+
290  }
+
291  break;
+
292  }
+
293  case (LayerType::BatchMatMul):
+
294  {
+
295  auto input0 = base.GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo();
+
296  auto input1 = base.GetInputSlot(1).GetConnectedOutputSlot()->GetTensorInfo();
+
297  auto desc = PolymorphicDowncast<const BatchMatMulDescriptor*>(&base.GetParameters());
+
298  GpuFsaBatchMatMulCreateOp(preCompiledBlobPtr, input0, input1, *desc);
+
299  break;
+
300  }
+ +
302  {
+
303  auto input = base.GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo();
+
304  auto weights = base.GetInputSlot(1).GetConnectedOutputSlot()->GetTensorInfo();
+
305 
+
306  auto desc = PolymorphicDowncast<const DepthwiseConvolution2dDescriptor*>(&base.GetParameters());
+
307  if (desc->m_BiasEnabled)
+
308  {
+
309  auto bias = base.GetInputSlot(2).GetConnectedOutputSlot()->GetTensorInfo();
+
310  GpuFsaDepthwiseConvolution2dCreateOp(preCompiledBlobPtr,
+
311  input,
+
312  *desc,
+
313  weights,
+
314  bias);
+
315  }
+
316  else
+
317  {
+
318  GpuFsaDepthwiseConvolution2dCreateOp(preCompiledBlobPtr,
+
319  input,
+
320  *desc,
+
321  weights,
+
322  EmptyOptional());
+
323  }
+
324  break;
+
325  }
+ +
327  {
+
328  auto desc = PolymorphicDowncast<const ElementwiseBinaryDescriptor *>(&base.GetParameters());
+
329  auto input0 = base.GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo();
+
330  auto input1 = base.GetInputSlot(1).GetConnectedOutputSlot()->GetTensorInfo();
+
331  GpuFsaElementwiseBinaryCreateOp(preCompiledBlobPtr, input0, input1, *desc);
+
332  break;
+
333  }
+
334  case (LayerType::Pooling2d):
+
335  {
+
336  auto input = base.GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo();
+
337  auto desc = PolymorphicDowncast<const Pooling2dDescriptor*>(&base.GetParameters());
+
338  GpuFsaPooling2dCreateOp(preCompiledBlobPtr, input, *desc);
+
339  break;
+
340  }
+
341  case LayerType::Reshape:
+
342  {
+
343  auto input = base.GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo();
+
344  auto desc = PolymorphicDowncast<const ReshapeDescriptor*>(&base.GetParameters());
+
345  GpuFsaReshapeCreateOp(preCompiledBlobPtr, input, *desc);
+
346 
+
347  break;
+
348  }
+
349  case (LayerType::Resize):
+
350  {
+
351  auto input = base.GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo();
+
352  auto desc = PolymorphicDowncast<const ResizeDescriptor*>(&base.GetParameters());
+
353  GpuFsaResizeCreateOp(preCompiledBlobPtr, input, *desc);
+
354  break;
+
355  }
+
356  case (LayerType::Softmax):
+
357  {
+
358  auto input = base.GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo();
+
359  auto output = base.GetOutputSlot(0).GetTensorInfo();
+
360 
+
361  auto desc = PolymorphicDowncast<const SoftmaxDescriptor*>(&base.GetParameters());
+
362  GpuFsaSoftmaxCreateOp(preCompiledBlobPtr,
+
363  input,
+
364  output,
+
365  *desc);
+
366  break;
+
367  }
+
368  default:
+
369  // unsupported layer for GpuFsa backend
+
370  continue;
+
371  }
+
372 
+
373  auto compiledBlob =
+
374  std::make_unique<PreCompiledObjectPtr>(preCompiledBlobPtr, DeleteAsType<GpuFsaPreCompiledBlob>);
+
375 
+
376  IConnectableLayer* preCompiledLayer = optimizationViews.GetINetwork()->AddPrecompiledLayer(
+ +
378  std::move(*compiledBlob),
+ +
380  "GpuFsa_Pre_Compiled_Layer");
+
381 
+
382  // Copy the output tensor infos from sub-graph
+
383  for (unsigned int i = 0; i < subgraph.GetNumOutputSlots(); i++)
+
384  {
+
385  preCompiledLayer->GetOutputSlot(i).SetTensorInfo(base.GetOutputSlot(i).GetTensorInfo());
+
386  }
+
387 
+
388  SubgraphView::SubgraphViewPtr substituteSubgraph =
+ +
390  CreateOutputsFrom(&base),
+
391  {&base});
+
392 
+
393  optimizationViews.AddSubstitution({ std::move(*substituteSubgraph), SubgraphView(preCompiledLayer) });
+
394 
+
395  untouched.erase(base.GetGuid());
+
396  }
+
397 
+
398  if (optimizationViews.GetSubstitutions().empty())
+
399  {
+
400  optimizationViews.AddUntouchedSubgraph(SubgraphView(subgraph));
+
401  }
+
402  else
+
403  {
+
404  ReportUntouchedLayers(optimizationViews, untouched);
+
405  }
+
406 
+
407 
+
408  return optimizationViews;
+
409 }
+
410 
+
411 } // namespace armnn
+
+
+ +
void AddUntouchedSubgraph(SubgraphView &&subgraph)
+
void GpuFsaSoftmaxCreateOp(GpuFsaPreCompiledBlob *blob, const TensorInfo &input, const TensorInfo &output, const SoftmaxDescriptor &descriptor)
+
void GpuFsaElementwiseBinaryCreateOp(GpuFsaPreCompiledBlob *blob, const TensorInfo &input0, const TensorInfo &input1, const ElementwiseBinaryDescriptor &descriptor)
+ +
void GpuFsaPooling2dCreateOp(GpuFsaPreCompiledBlob *blob, const TensorInfo &input, const Pooling2dDescriptor &descriptor)
+ +
const TensorInfo & GetTensorInfo() const override
Definition: Layer.cpp:92
+
void RegisterTensorHandleFactories(TensorHandleFactoryRegistry &registry) override
(Optional) Register TensorHandleFactories Either this method or CreateMemoryManager() and IWorkloadFa...
+
static const BackendId & GetIdStatic()
+
void GpuFsaBatchMatMulCreateOp(GpuFsaPreCompiledBlob *blob, const TensorInfo &input0, const TensorInfo &input1, const BatchMatMulDescriptor &descriptor)
+ +
std::shared_ptr< IMemoryManager > IMemoryManagerSharedPtr
+ +
IBackendInternal::ILayerSupportSharedPtr GetLayerSupport() const override
+
std::list< Layer * > Layers
+ + +
void GpuFsaActivationCreateOp(GpuFsaPreCompiledBlob *blob, const TensorInfo &input, const ActivationDescriptor &descriptor)
+
unsigned int MemorySourceFlags
+ +
const OutputSlot & GetOutputSlot(unsigned int index=0) const override
Get the const output slot handle by slot index.
Definition: Layer.hpp:339
+
void RegisterMemoryManager(std::shared_ptr< IMemoryManager > memoryManger)
Register a memory manager with shared ownership.
+
constexpr const char * GpuFsaBackendId()
+
OptimizationViews OptimizeSubgraphView(const SubgraphView &subgraph, const ModelOptions &modelOptions) const override
+ +
std::unique_ptr< IBackendContext > IBackendContextPtr
+ +
std::unique_ptr< ICustomAllocator > GetDefaultAllocator() const override
Returns the default memory allocator for the backend.
+
const InputSlot & GetInputSlot(unsigned int index) const override
Get a const input slot handle by slot index.
Definition: Layer.hpp:337
+ +
std::vector< ITensorHandleFactory::FactoryId > GetHandleFactoryPreferences() const override
(Optional) Returns a vector of supported TensorHandleFactory ids in preference order.
+
std::vector< InputSlot * > InputSlots
+ +
void GpuFsaReshapeCreateOp(GpuFsaPreCompiledBlob *blob, const TensorInfo &input, const ReshapeDescriptor &descriptor)
+ +
IBackendInternal::IMemoryManagerUniquePtr CreateMemoryManager() const override
+
IBackendInternal::IBackendContextPtr CreateBackendContext(const IRuntime::CreationOptions &) const override
Create the runtime context of the backend.
+
IConnectableLayerIterator begin()
+ +
IConnectableLayer * AddPrecompiledLayer(const PreCompiledDescriptor &preCompiledDescriptor, CompiledBlobPtr compiledBlobPtr, const Optional< BackendId > &backend, const char *name=nullptr)
Adds a Precompiled layer to the network.
Definition: Network.cpp:368
+
virtual void SetTensorInfo(const TensorInfo &tensorInfo)=0
+ + +
EmptyOptional is used to initialize the Optional class in case we want to have default value for an O...
Definition: Optional.hpp:32
+ +
std::unique_ptr< arm_compute::experimental::dynamic_fusion::GpuWorkloadSketch > sketch
+ + +
std::shared_ptr< SubgraphView > SubgraphViewPtr
+
void GpuFsaDepthwiseConvolution2dCreateOp(GpuFsaPreCompiledBlob *blob, const TensorInfo &input, const DepthwiseConvolution2dDescriptor &descriptor, const TensorInfo &weights, const Optional< TensorInfo > &biases)
+
std::vector< InputSlot >::iterator EndInputSlots()
Definition: Layer.hpp:263
+
LayerGuid GetGuid() const final
Returns the unique id of the layer.
Definition: Layer.hpp:343
+
unsigned int GetNumOutputSlots() const override
Returns the number of connectable output slots.
Definition: Layer.hpp:335
+
The SubgraphView class represents a subgraph of a Graph.
+ + +
IBackendInternal::IBackendProfilingContextPtr CreateBackendProfilingContext(const IRuntime::CreationOptions &, IBackendProfilingPtr &backendProfiling) override
Create context specifically used for profiling interaction from backends.
+
IBackendInternal::IWorkloadFactoryPtr CreateWorkloadFactory(const IBackendInternal::IMemoryManagerSharedPtr &memoryManager=nullptr) const override
+
SubgraphView::SubgraphViewPtr CreateSubgraphViewFrom(SubgraphView::InputSlots &&inputs, SubgraphView::OutputSlots &&outputs, SubgraphView::Layers &&layers)
+
void AddSubstitution(SubstitutionPair &&substitution)
+
std::vector< InputSlot >::iterator BeginInputSlots()
Definition: Layer.hpp:262
+
std::shared_ptr< GpuFsaBackendCustomAllocatorWrapper > m_CustomAllocator
+ +
std::shared_ptr< arm::pipe::IBackendProfilingContext > IBackendProfilingContextPtr
This is the bridge between backend and backend profiling we'll keep it in the backend namespace.
+
unsigned int GetNumInputSlots() const override
Returns the number of connectable input slots.
Definition: Layer.hpp:334
+
void GpuFsaResizeCreateOp(GpuFsaPreCompiledBlob *blob, const TensorInfo &input, const ResizeDescriptor &descriptor)
+
virtual const BaseDescriptor & GetParameters() const override
If the layer has a descriptor return it.
Definition: Layer.hpp:378
+
void DeleteAsType(const void *const blob)
+ + + + + +
LayerType GetType() const override
Returns the armnn::LayerType of this layer.
Definition: Layer.hpp:286
+
unsigned int GetNumOutputSlots() const
+
std::unique_ptr< arm::pipe::IBackendProfiling > IBackendProfilingPtr
+ + +
void GpuFsaCastCreateOp(GpuFsaPreCompiledBlob *blob, const TensorInfo &input, const TensorInfo &output)
Definition: GpuFsaCast.cpp:61
+ +
std::vector< OutputSlot >::iterator BeginOutputSlots()
Definition: Layer.hpp:266
+
const BackendId & GetId() const override
+
std::unique_ptr< IMemoryManager > IMemoryManagerUniquePtr
+ +
const Substitutions & GetSubstitutions() const
+
IConnectableLayerIterator end()
+ +
std::vector< OutputSlot * > OutputSlots
+
const OutputSlot * GetConnectedOutputSlot() const
Definition: Layer.hpp:56
+
virtual const IOutputSlot & GetOutputSlot(unsigned int index) const =0
Get the const output slot handle by slot index.
+
Copyright (c) 2021 ARM Limited and Contributors.
+
void RegisterFactory(std::unique_ptr< ITensorHandleFactory > allocator)
Register a TensorHandleFactory and transfer ownership.
+ + + +
std::shared_ptr< ILayerSupport > ILayerSupportSharedPtr
+
std::shared_ptr< arm_compute::experimental::dynamic_fusion::GpuWorkloadContext > workloadContext
+
Interface for a layer that is connectable to other layers via InputSlots and OutputSlots.
Definition: INetwork.hpp:80
+
std::vector< BackendOptions > ModelOptions
+ +
A structure which contains all the elements needed to execute a fused workload in the GpuFsa Backend.
+
A PreCompiledDescriptor for the PreCompiledLayer.
+ +
void ReportUntouchedLayers(OptimizationViews &optimizationViews, std::map< LayerGuid, Layer * > untouched)
+ +
void GpuFsaConvolution2dCreateOp(GpuFsaPreCompiledBlob *blob, const TensorInfo &input, const Convolution2dDescriptor &descriptor, const TensorInfo &weights, const Optional< TensorInfo > &biases)
+ +
std::unique_ptr< IWorkloadFactory > IWorkloadFactoryPtr
+ + + +
SubgraphView::OutputSlots CreateOutputsFrom(Layer *layer)
+ +
std::vector< OutputSlot >::iterator EndOutputSlots()
Definition: Layer.hpp:267
+ +
SubgraphView::InputSlots CreateInputsFrom(Layer *layer)
+ + + + -- cgit v1.2.1