aboutsummaryrefslogtreecommitdiff
path: root/src/graph/backends/CL
diff options
context:
space:
mode:
Diffstat (limited to 'src/graph/backends/CL')
-rw-r--r--src/graph/backends/CL/CLDeviceBackend.cpp65
-rw-r--r--src/graph/backends/CL/CLFunctionsFactory.cpp183
-rw-r--r--src/graph/backends/CL/CLNodeValidator.cpp85
-rw-r--r--src/graph/backends/CL/CLSubTensorHandle.cpp11
-rw-r--r--src/graph/backends/CL/CLTensorHandle.cpp11
5 files changed, 228 insertions, 127 deletions
diff --git a/src/graph/backends/CL/CLDeviceBackend.cpp b/src/graph/backends/CL/CLDeviceBackend.cpp
index 0159592af6..e27a4109d1 100644
--- a/src/graph/backends/CL/CLDeviceBackend.cpp
+++ b/src/graph/backends/CL/CLDeviceBackend.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2020 ARM Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -23,19 +23,17 @@
*/
#include "arm_compute/graph/backends/CL/CLDeviceBackend.h"
-#include "arm_compute/graph/Graph.h"
-#include "arm_compute/graph/GraphContext.h"
-#include "arm_compute/graph/INode.h"
-#include "arm_compute/graph/Logger.h"
-#include "arm_compute/graph/Tensor.h"
+#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/graph/backends/BackendRegistrar.h"
#include "arm_compute/graph/backends/CL/CLFunctionFactory.h"
#include "arm_compute/graph/backends/CL/CLNodeValidator.h"
#include "arm_compute/graph/backends/CL/CLSubTensorHandle.h"
#include "arm_compute/graph/backends/CL/CLTensorHandle.h"
-
-#include "arm_compute/core/CL/CLCoreRuntimeContext.h"
-#include "arm_compute/core/TensorInfo.h"
+#include "arm_compute/graph/Graph.h"
+#include "arm_compute/graph/GraphContext.h"
+#include "arm_compute/graph/INode.h"
+#include "arm_compute/graph/Logger.h"
+#include "arm_compute/graph/Tensor.h"
#include "arm_compute/runtime/BlobLifetimeManager.h"
#include "arm_compute/runtime/CL/CLBufferAllocator.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
@@ -65,17 +63,18 @@ bool file_exists(const std::string &filename)
static detail::BackendRegistrar<CLDeviceBackend> CLDeviceBackend_registrar(Target::CL);
CLDeviceBackend::CLDeviceBackend()
- : _context_count(0), _tuner(), _allocator(nullptr), _tuner_file()
+ : _context_count(0),
+ _tuner(),
+ _gemm_heuristics(),
+ _allocator(nullptr),
+ _tuner_file(),
+ _backend_type(CLBackendType::Native)
{
}
CLDeviceBackend::~CLDeviceBackend()
{
- // TODO (geopin01) : Shouldn't call non exception safe stuff here
- if(_tuner.tune_new_kernels() && !_tuner.lws_table().empty() && !_tuner_file.empty())
- {
- _tuner.save_to_file(_tuner_file);
- }
+ _tuner.save_to_file(_tuner_file);
}
void CLDeviceBackend::set_kernel_tuning(bool enable_tuning)
@@ -91,16 +90,16 @@ void CLDeviceBackend::set_kernel_tuning_mode(CLTunerMode tuning_mode)
void CLDeviceBackend::initialize_backend()
{
// Setup Scheduler
- CLScheduler::get().default_init(&_tuner);
+ CLScheduler::get().default_init(&_tuner, &_gemm_heuristics, _backend_type);
// Create allocator with new context
- _allocator = support::cpp14::make_unique<CLBufferAllocator>(nullptr /* legacy path for CLCoreRuntimeContext */);
+ _allocator = std::make_unique<CLBufferAllocator>();
}
void CLDeviceBackend::release_backend_context(GraphContext &ctx)
{
ARM_COMPUTE_UNUSED(ctx);
_context_count--;
- if(_context_count == 0) // No more context using the backend: free resources
+ if (_context_count == 0) // No more context using the backend: free resources
{
_allocator = nullptr;
}
@@ -110,15 +109,17 @@ void CLDeviceBackend::setup_backend_context(GraphContext &ctx)
{
// Force backend initialization
_context_count++;
- if(_context_count == 1)
+ if (_context_count == 1)
{
+ _backend_type = ctx.config().backend_type;
initialize_backend();
}
// Setup tuner
_tuner_file = ctx.config().tuner_file;
+
// Load tuner data if available
- if(file_exists(_tuner_file))
+ if (file_exists(_tuner_file))
{
_tuner.load_from_file(_tuner_file);
}
@@ -126,8 +127,12 @@ void CLDeviceBackend::setup_backend_context(GraphContext &ctx)
set_kernel_tuning(ctx.config().use_tuner);
set_kernel_tuning_mode(ctx.config().tuner_mode);
+ // Attempt to load mlgo heuristics
+ ARM_COMPUTE_ERROR_ON(CLScheduler::get().gemm_heuristics() == nullptr);
+ CLScheduler::get().gemm_heuristics()->reload_from_file(ctx.config().mlgo_file);
+
// Setup a management backend
- if(ctx.memory_management_ctx(Target::CL) == nullptr)
+ if (ctx.memory_management_ctx(Target::CL) == nullptr)
{
MemoryManagerContext mm_ctx;
mm_ctx.target = Target::CL;
@@ -140,7 +145,7 @@ void CLDeviceBackend::setup_backend_context(GraphContext &ctx)
}
// Create function level weights manager
- if(ctx.weights_management_ctx(Target::CL) == nullptr)
+ if (ctx.weights_management_ctx(Target::CL) == nullptr)
{
WeightsManagerContext wm_ctx;
wm_ctx.target = Target::CL;
@@ -170,17 +175,18 @@ std::unique_ptr<ITensorHandle> CLDeviceBackend::create_tensor(const Tensor &tens
TensorInfo info(tensor_desc.shape, 1, tensor_desc.data_type, tensor_desc.quant_info);
info.set_data_layout(tensor_desc.layout);
- return support::cpp14::make_unique<CLTensorHandle>(info);
+ return std::make_unique<CLTensorHandle>(info);
}
-std::unique_ptr<ITensorHandle> CLDeviceBackend::create_subtensor(ITensorHandle *parent, TensorShape shape, Coordinates coords, bool extend_parent)
+std::unique_ptr<ITensorHandle>
+CLDeviceBackend::create_subtensor(ITensorHandle *parent, TensorShape shape, Coordinates coords, bool extend_parent)
{
- if(parent == nullptr)
+ if (parent == nullptr)
{
return nullptr;
}
- return support::cpp14::make_unique<CLSubTensorHandle>(parent, shape, coords, extend_parent);
+ return std::make_unique<CLSubTensorHandle>(parent, shape, coords, extend_parent);
}
std::unique_ptr<arm_compute::IFunction> CLDeviceBackend::configure_node(INode &node, GraphContext &ctx)
@@ -202,7 +208,7 @@ arm_compute::Status CLDeviceBackend::validate_node(INode &node)
std::shared_ptr<arm_compute::IMemoryManager> CLDeviceBackend::create_memory_manager(MemoryManagerAffinity affinity)
{
- if(affinity == MemoryManagerAffinity::Offset)
+ if (affinity == MemoryManagerAffinity::Offset)
{
ARM_COMPUTE_LOG_GRAPH_WARNING("CL Backend does not support offset affinity memory management!");
return nullptr;
@@ -220,6 +226,11 @@ std::shared_ptr<arm_compute::IWeightsManager> CLDeviceBackend::create_weights_ma
auto weights_mgr = std::make_shared<IWeightsManager>();
return weights_mgr;
}
+
+void CLDeviceBackend::sync()
+{
+ CLScheduler::get().sync();
+}
} // namespace backends
} // namespace graph
} // namespace arm_compute
diff --git a/src/graph/backends/CL/CLFunctionsFactory.cpp b/src/graph/backends/CL/CLFunctionsFactory.cpp
index 312e09a49a..d4e1aa880f 100644
--- a/src/graph/backends/CL/CLFunctionsFactory.cpp
+++ b/src/graph/backends/CL/CLFunctionsFactory.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2020 ARM Limited.
+ * Copyright (c) 2018-2021, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -22,14 +22,15 @@
* SOFTWARE.
*/
#include "arm_compute/graph/backends/CL/CLFunctionFactory.h"
-
-#include "arm_compute/core/utils/misc/Cast.h"
+#include "arm_compute/graph/backends/FunctionHelpers.h"
#include "arm_compute/graph/Graph.h"
#include "arm_compute/graph/GraphContext.h"
-#include "arm_compute/graph/backends/FunctionHelpers.h"
#include "arm_compute/runtime/CL/CLFunctions.h"
#include "arm_compute/runtime/CPP/CPPFunctions.h"
+#include "src/core/CL/CLKernels.h"
+#include "support/Cast.h"
+
using namespace arm_compute::utils::cast;
namespace arm_compute
@@ -42,6 +43,7 @@ namespace backends
struct CLTargetInfo
{
using TensorType = arm_compute::ICLTensor;
+ using SrcTensorType = const arm_compute::ICLTensor;
using TensorConcreteType = CLTensor;
static Target TargetType;
};
@@ -63,6 +65,14 @@ struct CLEltwiseFunctions
using Addition = CLArithmeticAddition;
using Subtraction = CLArithmeticSubtraction;
using Multiplication = CLPixelWiseMultiplication;
+ using Maximum = CLElementwiseMax;
+ using Division = CLArithmeticDivision;
+};
+
+/** Collection of CL unary element-wise functions */
+struct CLUnaryEltwiseFunctions
+{
+ using Exp = CLExpLayer;
};
/** Function and tensor types to be used inside a CL fused convolution/batch normalization layer */
@@ -71,28 +81,27 @@ struct CLFusedLayerTypes
using ConvolutionLayer = CLConvolutionLayer;
using DepthwiseConvolutionLayer = CLDepthwiseConvolutionLayer;
using FuseBatchNormalization = CLFuseBatchNormalization;
+ using GEMMConvolutionLayer = CLGEMMConvolutionLayer;
};
-// TODO (isagot01): Remove once we support heterogeneous scheduling at function level
/** Wrapper for the CPP Function in the OpenCL backend **/
class CPPWrapperFunction : public IFunction
{
public:
/* Default constructor */
- CPPWrapperFunction()
- : _tensors(), _func(nullptr)
+ CPPWrapperFunction() : _tensors(), _func(nullptr)
{
}
void run() override
{
- for(auto &tensor : _tensors)
+ for (auto &tensor : _tensors)
{
tensor->map(CLScheduler::get().queue());
}
_func->run();
- for(auto &tensor : _tensors)
+ for (auto &tensor : _tensors)
{
tensor->unmap(CLScheduler::get().queue());
}
@@ -117,7 +126,8 @@ namespace detail
{
// Specialized functions
template <>
-std::unique_ptr<IFunction> create_detection_output_layer<CPPDetectionOutputLayer, CLTargetInfo>(DetectionOutputLayerNode &node)
+std::unique_ptr<IFunction>
+create_detection_output_layer<CPPDetectionOutputLayer, CLTargetInfo>(DetectionOutputLayerNode &node)
{
validate_node<CLTargetInfo>(node, 3 /* expected inputs */, 1 /* expected outputs */);
@@ -134,23 +144,19 @@ std::unique_ptr<IFunction> create_detection_output_layer<CPPDetectionOutputLayer
ARM_COMPUTE_ERROR_ON(output == nullptr);
// Create and configure function
- auto func = support::cpp14::make_unique<CPPDetectionOutputLayer>();
+ auto func = std::make_unique<CPPDetectionOutputLayer>();
func->configure(input0, input1, input2, output, detect_info);
// Log info
ARM_COMPUTE_LOG_GRAPH_INFO("Instantiated "
- << node.name()
- << " Type: " << node.type()
- << " Target: " << CLTargetInfo::TargetType
- << " Data Type: " << input0->info()->data_type()
- << " Input0 shape: " << input0->info()->tensor_shape()
- << " Input1 shape: " << input1->info()->tensor_shape()
+ << node.name() << " Type: " << node.type() << " Target: " << CLTargetInfo::TargetType
+ << " Data Type: " << input0->info()->data_type() << " Input0 shape: "
+ << input0->info()->tensor_shape() << " Input1 shape: " << input1->info()->tensor_shape()
<< " Input2 shape: " << input2->info()->tensor_shape()
<< " Output shape: " << output->info()->tensor_shape()
- << " DetectionOutputLayer info: " << detect_info
- << std::endl);
+ << " DetectionOutputLayer info: " << detect_info << std::endl);
- auto wrap_function = support::cpp14::make_unique<CPPWrapperFunction>();
+ auto wrap_function = std::make_unique<CPPWrapperFunction>();
wrap_function->register_function(std::move(func));
wrap_function->register_tensor(input0);
@@ -158,10 +164,11 @@ std::unique_ptr<IFunction> create_detection_output_layer<CPPDetectionOutputLayer
wrap_function->register_tensor(input2);
wrap_function->register_tensor(output);
- return RETURN_UNIQUE_PTR(wrap_function);
+ return std::move(wrap_function);
}
template <>
-std::unique_ptr<IFunction> create_detection_post_process_layer<CPPDetectionPostProcessLayer, CLTargetInfo>(DetectionPostProcessLayerNode &node)
+std::unique_ptr<IFunction>
+create_detection_post_process_layer<CPPDetectionPostProcessLayer, CLTargetInfo>(DetectionPostProcessLayerNode &node)
{
validate_node<CLTargetInfo>(node, 3 /* expected inputs */, 4 /* expected outputs */);
@@ -184,26 +191,22 @@ std::unique_ptr<IFunction> create_detection_post_process_layer<CPPDetectionPostP
ARM_COMPUTE_ERROR_ON(output3 == nullptr);
// Create and configure function
- auto func = support::cpp14::make_unique<CPPDetectionPostProcessLayer>();
+ auto func = std::make_unique<CPPDetectionPostProcessLayer>();
func->configure(input0, input1, input2, output0, output1, output2, output3, detect_info);
// Log info
ARM_COMPUTE_LOG_GRAPH_INFO("Instantiated "
- << node.name()
- << " Type: " << node.type()
- << " Target: " << CLTargetInfo::TargetType
- << " Data Type: " << input0->info()->data_type()
- << " Input0 shape: " << input0->info()->tensor_shape()
- << " Input1 shape: " << input1->info()->tensor_shape()
+ << node.name() << " Type: " << node.type() << " Target: " << CLTargetInfo::TargetType
+ << " Data Type: " << input0->info()->data_type() << " Input0 shape: "
+ << input0->info()->tensor_shape() << " Input1 shape: " << input1->info()->tensor_shape()
<< " Input2 shape: " << input2->info()->tensor_shape()
<< " Output0 shape: " << output0->info()->tensor_shape()
<< " Output1 shape: " << output1->info()->tensor_shape()
<< " Output2 shape: " << output2->info()->tensor_shape()
<< " Output3 shape: " << output3->info()->tensor_shape()
- << " DetectionPostProcessLayer info: " << detect_info
- << std::endl);
+ << " DetectionPostProcessLayer info: " << detect_info << std::endl);
- auto wrap_function = support::cpp14::make_unique<CPPWrapperFunction>();
+ auto wrap_function = std::make_unique<CPPWrapperFunction>();
wrap_function->register_function(std::move(func));
wrap_function->register_tensor(input0);
@@ -214,90 +217,134 @@ std::unique_ptr<IFunction> create_detection_post_process_layer<CPPDetectionPostP
wrap_function->register_tensor(output2);
wrap_function->register_tensor(output3);
- return RETURN_UNIQUE_PTR(wrap_function);
+ return std::move(wrap_function);
}
} // namespace detail
std::unique_ptr<IFunction> CLFunctionFactory::create(INode *node, GraphContext &ctx)
{
- if(node == nullptr)
+ if (node == nullptr)
{
return nullptr;
}
NodeType type = node->type();
- switch(type)
+ switch (type)
{
case NodeType::ActivationLayer:
- return detail::create_activation_layer<CLActivationLayer, CLTargetInfo>(*polymorphic_downcast<ActivationLayerNode *>(node));
+ return detail::create_activation_layer<CLActivationLayer, CLTargetInfo>(
+ *polymorphic_downcast<ActivationLayerNode *>(node));
+ case NodeType::ArgMinMaxLayer:
+ return detail::create_arg_min_max_layer<CLArgMinMaxLayer, CLTargetInfo>(
+ *polymorphic_downcast<ArgMinMaxLayerNode *>(node));
case NodeType::BatchNormalizationLayer:
- return detail::create_batch_normalization_layer<CLBatchNormalizationLayer, CLTargetInfo>(*polymorphic_downcast<BatchNormalizationLayerNode *>(node));
+ return detail::create_batch_normalization_layer<CLBatchNormalizationLayer, CLTargetInfo>(
+ *polymorphic_downcast<BatchNormalizationLayerNode *>(node));
case NodeType::BoundingBoxTransformLayer:
- return detail::create_bounding_box_transform_layer<CLBoundingBoxTransform, CLTargetInfo>(*polymorphic_downcast<BoundingBoxTransformLayerNode *>(node));
+ return detail::create_bounding_box_transform_layer<CLBoundingBoxTransform, CLTargetInfo>(
+ *polymorphic_downcast<BoundingBoxTransformLayerNode *>(node));
case NodeType::ChannelShuffleLayer:
- return detail::create_channel_shuffle_layer<CLChannelShuffleLayer, CLTargetInfo>(*polymorphic_downcast<ChannelShuffleLayerNode *>(node));
+ return detail::create_channel_shuffle_layer<CLChannelShuffleLayer, CLTargetInfo>(
+ *polymorphic_downcast<ChannelShuffleLayerNode *>(node));
case NodeType::ConvolutionLayer:
- return detail::create_convolution_layer<CLConvolutionLayerFunctions, CLTargetInfo>(*polymorphic_downcast<ConvolutionLayerNode *>(node), ctx);
+ return detail::create_convolution_layer<CLConvolutionLayerFunctions, CLTargetInfo>(
+ *polymorphic_downcast<ConvolutionLayerNode *>(node), ctx);
case NodeType::DeconvolutionLayer:
- return detail::create_deconvolution_layer<CLDeconvolutionLayer, CLTargetInfo>(*polymorphic_downcast<DeconvolutionLayerNode *>(node), ctx);
+ return detail::create_deconvolution_layer<CLDeconvolutionLayer, CLTargetInfo>(
+ *polymorphic_downcast<DeconvolutionLayerNode *>(node), ctx);
case NodeType::ConcatenateLayer:
- return detail::create_concatenate_layer<CLConcatenateLayer, CLTargetInfo>(*polymorphic_downcast<ConcatenateLayerNode *>(node));
+ return detail::create_concatenate_layer<CLConcatenateLayer, CLTargetInfo>(
+ *polymorphic_downcast<ConcatenateLayerNode *>(node));
+ case NodeType::DepthToSpaceLayer:
+ return detail::create_depth_to_space_layer<CLDepthToSpaceLayer, CLTargetInfo>(
+ *polymorphic_downcast<DepthToSpaceLayerNode *>(node));
case NodeType::DepthwiseConvolutionLayer:
- return detail::create_depthwise_convolution_layer<CLDepthwiseConvolutionLayer, CLTargetInfo>(*polymorphic_downcast<DepthwiseConvolutionLayerNode *>(node));
+ return detail::create_depthwise_convolution_layer<CLDepthwiseConvolutionLayer, CLTargetInfo>(
+ *polymorphic_downcast<DepthwiseConvolutionLayerNode *>(node));
case NodeType::DequantizationLayer:
- return detail::create_dequantization_layer<CLDequantizationLayer, CLTargetInfo>(*polymorphic_downcast<DequantizationLayerNode *>(node));
+ return detail::create_dequantization_layer<CLDequantizationLayer, CLTargetInfo>(
+ *polymorphic_downcast<DequantizationLayerNode *>(node));
case NodeType::DetectionOutputLayer:
- return detail::create_detection_output_layer<CPPDetectionOutputLayer, CLTargetInfo>(*polymorphic_downcast<DetectionOutputLayerNode *>(node));
+ return detail::create_detection_output_layer<CPPDetectionOutputLayer, CLTargetInfo>(
+ *polymorphic_downcast<DetectionOutputLayerNode *>(node));
case NodeType::DetectionPostProcessLayer:
- return detail::create_detection_post_process_layer<CPPDetectionPostProcessLayer, CLTargetInfo>(*polymorphic_downcast<DetectionPostProcessLayerNode *>(node));
+ return detail::create_detection_post_process_layer<CPPDetectionPostProcessLayer, CLTargetInfo>(
+ *polymorphic_downcast<DetectionPostProcessLayerNode *>(node));
case NodeType::EltwiseLayer:
- return detail::create_eltwise_layer<CLEltwiseFunctions, CLTargetInfo>(*polymorphic_downcast<EltwiseLayerNode *>(node));
+ return detail::create_eltwise_layer<CLEltwiseFunctions, CLTargetInfo>(
+ *polymorphic_downcast<EltwiseLayerNode *>(node));
+ case NodeType::UnaryEltwiseLayer:
+ return detail::create_unary_eltwise_layer<CLUnaryEltwiseFunctions, CLTargetInfo>(
+ *polymorphic_downcast<UnaryEltwiseLayerNode *>(node));
case NodeType::FlattenLayer:
- return detail::create_flatten_layer<CLFlattenLayer, CLTargetInfo>(*polymorphic_downcast<FlattenLayerNode *>(node));
+ return detail::create_flatten_layer<CLFlattenLayer, CLTargetInfo>(
+ *polymorphic_downcast<FlattenLayerNode *>(node));
case NodeType::FullyConnectedLayer:
- return detail::create_fully_connected_layer<CLFullyConnectedLayer, CLTargetInfo>(*polymorphic_downcast<FullyConnectedLayerNode *>(node), ctx);
+ return detail::create_fully_connected_layer<CLFullyConnectedLayer, CLTargetInfo>(
+ *polymorphic_downcast<FullyConnectedLayerNode *>(node), ctx);
case NodeType::FusedConvolutionBatchNormalizationLayer:
- return detail::create_fused_convolution_batch_normalization_layer<CLFusedLayerTypes, CLTargetInfo>(*polymorphic_downcast<FusedConvolutionBatchNormalizationNode *>(node), ctx);
+ return detail::create_fused_convolution_batch_normalization_layer<CLFusedLayerTypes, CLTargetInfo>(
+ *polymorphic_downcast<FusedConvolutionBatchNormalizationNode *>(node), ctx);
case NodeType::FusedDepthwiseConvolutionBatchNormalizationLayer:
- return detail::create_fused_depthwise_convolution_batch_normalization_layer<CLFusedLayerTypes, CLTargetInfo>(*polymorphic_downcast<FusedDepthwiseConvolutionBatchNormalizationNode *>(node), ctx);
+ return detail::create_fused_depthwise_convolution_batch_normalization_layer<CLFusedLayerTypes,
+ CLTargetInfo>(
+ *polymorphic_downcast<FusedDepthwiseConvolutionBatchNormalizationNode *>(node), ctx);
case NodeType::GenerateProposalsLayer:
- return detail::create_generate_proposals_layer<CLGenerateProposalsLayer, CLTargetInfo>(*polymorphic_downcast<GenerateProposalsLayerNode *>(node), ctx);
+ return detail::create_generate_proposals_layer<CLGenerateProposalsLayer, CLTargetInfo>(
+ *polymorphic_downcast<GenerateProposalsLayerNode *>(node), ctx);
+ case NodeType::L2NormalizeLayer:
+ return detail::create_l2_normalize_layer<CLL2NormalizeLayer, CLTargetInfo>(
+ *polymorphic_downcast<L2NormalizeLayerNode *>(node), ctx);
case NodeType::NormalizationLayer:
- return detail::create_normalization_layer<CLNormalizationLayer, CLTargetInfo>(*polymorphic_downcast<NormalizationLayerNode *>(node), ctx);
+ return detail::create_normalization_layer<CLNormalizationLayer, CLTargetInfo>(
+ *polymorphic_downcast<NormalizationLayerNode *>(node), ctx);
case NodeType::NormalizePlanarYUVLayer:
- return detail::create_normalize_planar_yuv_layer<CLNormalizePlanarYUVLayer, CLTargetInfo>(*polymorphic_downcast<NormalizePlanarYUVLayerNode *>(node));
+ return detail::create_normalize_planar_yuv_layer<CLNormalizePlanarYUVLayer, CLTargetInfo>(
+ *polymorphic_downcast<NormalizePlanarYUVLayerNode *>(node));
case NodeType::PadLayer:
return detail::create_pad_layer<CLPadLayer, CLTargetInfo>(*polymorphic_downcast<PadLayerNode *>(node));
case NodeType::PermuteLayer:
- return detail::create_permute_layer<CLPermute, CLTargetInfo>(*polymorphic_downcast<PermuteLayerNode *>(node));
+ return detail::create_permute_layer<CLPermute, CLTargetInfo>(
+ *polymorphic_downcast<PermuteLayerNode *>(node));
case NodeType::PoolingLayer:
- return detail::create_pooling_layer<CLPoolingLayer, CLTargetInfo>(*polymorphic_downcast<PoolingLayerNode *>(node));
+ return detail::create_pooling_layer<CLPoolingLayer, CLTargetInfo>(
+ *polymorphic_downcast<PoolingLayerNode *>(node));
case NodeType::PReluLayer:
- return detail::create_prelu_layer<CLPReluLayer, CLTargetInfo>(*polymorphic_downcast<PReluLayerNode *>(node));
+ return detail::create_prelu_layer<CLPReluLayer, CLTargetInfo>(
+ *polymorphic_downcast<PReluLayerNode *>(node));
case NodeType::PrintLayer:
return detail::create_print_layer<CLTargetInfo>(*polymorphic_downcast<PrintLayerNode *>(node));
case NodeType::PriorBoxLayer:
- return detail::create_priorbox_layer<CLPriorBoxLayer, CLTargetInfo>(*polymorphic_downcast<PriorBoxLayerNode *>(node));
+ return detail::create_priorbox_layer<CLPriorBoxLayer, CLTargetInfo>(
+ *polymorphic_downcast<PriorBoxLayerNode *>(node));
case NodeType::QuantizationLayer:
- return detail::create_quantization_layer<CLQuantizationLayer, CLTargetInfo>(*polymorphic_downcast<QuantizationLayerNode *>(node));
+ return detail::create_quantization_layer<CLQuantizationLayer, CLTargetInfo>(
+ *polymorphic_downcast<QuantizationLayerNode *>(node));
+ case NodeType::ReductionOperationLayer:
+ return detail::create_reduction_operation_layer<CLReductionOperation, CLTargetInfo>(
+ *polymorphic_downcast<ReductionLayerNode *>(node), ctx);
case NodeType::ReorgLayer:
- return detail::create_reorg_layer<CLReorgLayer, CLTargetInfo>(*polymorphic_downcast<ReorgLayerNode *>(node));
+ return detail::create_reorg_layer<CLReorgLayer, CLTargetInfo>(
+ *polymorphic_downcast<ReorgLayerNode *>(node));
case NodeType::ReshapeLayer:
- return detail::create_reshape_layer<CLReshapeLayer, CLTargetInfo>(*polymorphic_downcast<ReshapeLayerNode *>(node));
+ return detail::create_reshape_layer<CLReshapeLayer, CLTargetInfo>(
+ *polymorphic_downcast<ReshapeLayerNode *>(node));
case NodeType::ResizeLayer:
return detail::create_resize_layer<CLScale, CLTargetInfo>(*polymorphic_downcast<ResizeLayerNode *>(node));
case NodeType::ROIAlignLayer:
- return detail::create_roi_align_layer<CLROIAlignLayer, CLTargetInfo>(*polymorphic_downcast<ROIAlignLayerNode *>(node));
+ return detail::create_roi_align_layer<CLROIAlignLayer, CLTargetInfo>(
+ *polymorphic_downcast<ROIAlignLayerNode *>(node));
case NodeType::SliceLayer:
return detail::create_slice_layer<CLSlice, CLTargetInfo>(*polymorphic_downcast<SliceLayerNode *>(node));
case NodeType::SoftmaxLayer:
- return detail::create_softmax_layer<CLSoftmaxLayer, CLTargetInfo>(*polymorphic_downcast<SoftmaxLayerNode *>(node), ctx);
+ return detail::create_softmax_layer<CLSoftmaxLayer, CLTargetInfo>(
+ *polymorphic_downcast<SoftmaxLayerNode *>(node), ctx);
case NodeType::StackLayer:
- return detail::create_stack_layer<CLStackLayer, CLTargetInfo>(*polymorphic_downcast<StackLayerNode *>(node));
- case NodeType::UpsampleLayer:
- return detail::create_upsample_layer<CLUpsampleLayer, CLTargetInfo>(*polymorphic_downcast<UpsampleLayerNode *>(node), ctx);
- case NodeType::YOLOLayer:
- return detail::create_yolo_layer<CLYOLOLayer, CLTargetInfo>(*polymorphic_downcast<YOLOLayerNode *>(node), ctx);
+ return detail::create_stack_layer<CLStackLayer, CLTargetInfo>(
+ *polymorphic_downcast<StackLayerNode *>(node));
+ case NodeType::StridedSliceLayer:
+ return detail::create_strided_slice_layer<CLStridedSlice, CLTargetInfo>(
+ *polymorphic_downcast<StridedSliceLayerNode *>(node));
default:
return nullptr;
}
diff --git a/src/graph/backends/CL/CLNodeValidator.cpp b/src/graph/backends/CL/CLNodeValidator.cpp
index ddb8e3d1ac..510eda7935 100644
--- a/src/graph/backends/CL/CLNodeValidator.cpp
+++ b/src/graph/backends/CL/CLNodeValidator.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2020 ARM Limited.
+ * Copyright (c) 2018-2021, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -25,11 +25,11 @@
#include "arm_compute/graph/backends/ValidateHelpers.h"
#include "arm_compute/graph/nodes/Nodes.h"
-
-#include "arm_compute/core/utils/misc/Cast.h"
#include "arm_compute/runtime/CL/CLFunctions.h"
#include "arm_compute/runtime/CPP/CPPFunctions.h"
+#include "support/Cast.h"
+
using namespace arm_compute::utils::cast;
namespace arm_compute
@@ -38,37 +38,69 @@ namespace graph
{
namespace backends
{
+/** Collection of CL element-wise functions */
+struct CLEltwiseLayerFunctions
+{
+ using ArithmeticAddition = CLArithmeticAddition;
+ using ArithmeticSubtraction = CLArithmeticSubtraction;
+ using PixelWiseMultiplication = CLPixelWiseMultiplication;
+ using ElementwiseMax = CLElementwiseMax;
+ using ArithmeticDivision = CLArithmeticDivision;
+};
+
+/** Collection of CL unary element-wise functions */
+struct CLUnaryEltwiseLayerFunctions
+{
+ using ExpLayer = CLExpLayer;
+};
+
Status CLNodeValidator::validate(INode *node)
{
- if(node == nullptr)
+ if (node == nullptr)
{
return Status{};
}
NodeType type = node->type();
- switch(type)
+ switch (type)
{
+ case NodeType::ArgMinMaxLayer:
+ return detail::validate_arg_min_max_layer<CLArgMinMaxLayer>(
+ *polymorphic_downcast<ArgMinMaxLayerNode *>(node));
case NodeType::BoundingBoxTransformLayer:
- return detail::validate_bounding_box_transform_layer<CLBoundingBoxTransform>(*polymorphic_downcast<BoundingBoxTransformLayerNode *>(node));
+ return detail::validate_bounding_box_transform_layer<CLBoundingBoxTransform>(
+ *polymorphic_downcast<BoundingBoxTransformLayerNode *>(node));
case NodeType::ChannelShuffleLayer:
- return detail::validate_channel_shuffle_layer<CLChannelShuffleLayer>(*polymorphic_downcast<ChannelShuffleLayerNode *>(node));
+ return detail::validate_channel_shuffle_layer<CLChannelShuffleLayer>(
+ *polymorphic_downcast<ChannelShuffleLayerNode *>(node));
case NodeType::ConvolutionLayer:
- return detail::validate_convolution_layer<CLConvolutionLayer,
- CLDirectConvolutionLayer,
- CLGEMMConvolutionLayer,
- CLWinogradConvolutionLayer>(*polymorphic_downcast<ConvolutionLayerNode *>(node));
+ return detail::validate_convolution_layer<CLConvolutionLayer, CLDirectConvolutionLayer,
+ CLGEMMConvolutionLayer, CLWinogradConvolutionLayer>(
+ *polymorphic_downcast<ConvolutionLayerNode *>(node));
+ case NodeType::DepthToSpaceLayer:
+ return detail::validate_depth_to_space_layer<CLDepthToSpaceLayer>(
+ *polymorphic_downcast<DepthToSpaceLayerNode *>(node));
case NodeType::DepthwiseConvolutionLayer:
- return detail::validate_depthwise_convolution_layer<CLDepthwiseConvolutionLayer>(*polymorphic_downcast<DepthwiseConvolutionLayerNode *>(node));
+ return detail::validate_depthwise_convolution_layer<CLDepthwiseConvolutionLayer>(
+ *polymorphic_downcast<DepthwiseConvolutionLayerNode *>(node));
case NodeType::DequantizationLayer:
- return detail::validate_dequantization_layer<CLDequantizationLayer>(*polymorphic_downcast<DequantizationLayerNode *>(node));
+ return detail::validate_dequantization_layer<CLDequantizationLayer>(
+ *polymorphic_downcast<DequantizationLayerNode *>(node));
case NodeType::DetectionOutputLayer:
- return detail::validate_detection_output_layer<CPPDetectionOutputLayer>(*polymorphic_downcast<DetectionOutputLayerNode *>(node));
+ return detail::validate_detection_output_layer<CPPDetectionOutputLayer>(
+ *polymorphic_downcast<DetectionOutputLayerNode *>(node));
case NodeType::DetectionPostProcessLayer:
- return detail::validate_detection_post_process_layer<CPPDetectionPostProcessLayer>(*polymorphic_downcast<DetectionPostProcessLayerNode *>(node));
+ return detail::validate_detection_post_process_layer<CPPDetectionPostProcessLayer>(
+ *polymorphic_downcast<DetectionPostProcessLayerNode *>(node));
case NodeType::GenerateProposalsLayer:
- return detail::validate_generate_proposals_layer<CLGenerateProposalsLayer>(*polymorphic_downcast<GenerateProposalsLayerNode *>(node));
+ return detail::validate_generate_proposals_layer<CLGenerateProposalsLayer>(
+ *polymorphic_downcast<GenerateProposalsLayerNode *>(node));
+ case NodeType::L2NormalizeLayer:
+ return detail::validate_l2_normalize_layer<CLL2NormalizeLayer>(
+ *polymorphic_downcast<L2NormalizeLayerNode *>(node));
case NodeType::NormalizePlanarYUVLayer:
- return detail::validate_normalize_planar_yuv_layer<CLNormalizePlanarYUVLayer>(*polymorphic_downcast<NormalizePlanarYUVLayerNode *>(node));
+ return detail::validate_normalize_planar_yuv_layer<CLNormalizePlanarYUVLayer>(
+ *polymorphic_downcast<NormalizePlanarYUVLayerNode *>(node));
case NodeType::PadLayer:
return detail::validate_pad_layer<CLPadLayer>(*polymorphic_downcast<PadLayerNode *>(node));
case NodeType::PermuteLayer:
@@ -78,7 +110,11 @@ Status CLNodeValidator::validate(INode *node)
case NodeType::PriorBoxLayer:
return detail::validate_priorbox_layer<CLPriorBoxLayer>(*polymorphic_downcast<PriorBoxLayerNode *>(node));
case NodeType::QuantizationLayer:
- return detail::validate_quantization_layer<CLQuantizationLayer>(*polymorphic_downcast<QuantizationLayerNode *>(node));
+ return detail::validate_quantization_layer<CLQuantizationLayer>(
+ *polymorphic_downcast<QuantizationLayerNode *>(node));
+ case NodeType::ReductionOperationLayer:
+ return detail::validate_reduction_operation_layer<CLReductionOperation>(
+ *polymorphic_downcast<ReductionLayerNode *>(node));
case NodeType::ReorgLayer:
return detail::validate_reorg_layer<CLReorgLayer>(*polymorphic_downcast<ReorgLayerNode *>(node));
case NodeType::ReshapeLayer:
@@ -87,10 +123,15 @@ Status CLNodeValidator::validate(INode *node)
return detail::validate_roi_align_layer<CLROIAlignLayer>(*polymorphic_downcast<ROIAlignLayerNode *>(node));
case NodeType::SliceLayer:
return detail::validate_slice_layer<CLSlice>(*polymorphic_downcast<SliceLayerNode *>(node));
- case NodeType::UpsampleLayer:
- return detail::validate_upsample_layer<CLUpsampleLayer>(*polymorphic_downcast<UpsampleLayerNode *>(node));
- case NodeType::YOLOLayer:
- return detail::validate_yolo_layer<CLYOLOLayer>(*polymorphic_downcast<YOLOLayerNode *>(node));
+ case NodeType::StridedSliceLayer:
+ return detail::validate_strided_slice_layer<CLStridedSlice>(
+ *polymorphic_downcast<StridedSliceLayerNode *>(node));
+ case NodeType::EltwiseLayer:
+ return detail::validate_eltwise_Layer<CLEltwiseLayerFunctions>(
+ *polymorphic_downcast<EltwiseLayerNode *>(node));
+ case NodeType::UnaryEltwiseLayer:
+ return detail::validate_unary_eltwise_layer<CLUnaryEltwiseLayerFunctions>(
+ *polymorphic_downcast<UnaryEltwiseLayerNode *>(node));
default:
return Status{};
}
diff --git a/src/graph/backends/CL/CLSubTensorHandle.cpp b/src/graph/backends/CL/CLSubTensorHandle.cpp
index 016dca753b..ccdc877a18 100644
--- a/src/graph/backends/CL/CLSubTensorHandle.cpp
+++ b/src/graph/backends/CL/CLSubTensorHandle.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018 ARM Limited.
+ * Copyright (c) 2018-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -23,7 +23,7 @@
*/
#include "arm_compute/graph/backends/CL/CLSubTensorHandle.h"
-#include "arm_compute/core/utils/misc/Cast.h"
+#include "support/Cast.h"
namespace arm_compute
{
@@ -31,7 +31,10 @@ namespace graph
{
namespace backends
{
-CLSubTensorHandle::CLSubTensorHandle(ITensorHandle *parent_handle, const TensorShape &shape, const Coordinates &coords, bool extend_parent)
+CLSubTensorHandle::CLSubTensorHandle(ITensorHandle *parent_handle,
+ const TensorShape &shape,
+ const Coordinates &coords,
+ bool extend_parent)
: _sub_tensor(), _parent_handle(nullptr)
{
ARM_COMPUTE_ERROR_ON(!parent_handle);
@@ -98,4 +101,4 @@ Target CLSubTensorHandle::target() const
}
} // namespace backends
} // namespace graph
-} // namespace arm_compute \ No newline at end of file
+} // namespace arm_compute
diff --git a/src/graph/backends/CL/CLTensorHandle.cpp b/src/graph/backends/CL/CLTensorHandle.cpp
index 891c784cbe..1b69f9dede 100644
--- a/src/graph/backends/CL/CLTensorHandle.cpp
+++ b/src/graph/backends/CL/CLTensorHandle.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2019 ARM Limited.
+ * Copyright (c) 2018-2019 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -31,8 +31,7 @@ namespace graph
{
namespace backends
{
-CLTensorHandle::CLTensorHandle(const ITensorInfo &info)
- : _tensor()
+CLTensorHandle::CLTensorHandle(const ITensorInfo &info) : _tensor()
{
_tensor.allocator()->init(info);
}
@@ -49,7 +48,7 @@ void CLTensorHandle::free()
void CLTensorHandle::manage(IMemoryGroup *mg)
{
- if(mg != nullptr)
+ if (mg != nullptr)
{
mg->manage(&_tensor);
}
@@ -68,7 +67,7 @@ void CLTensorHandle::unmap()
void CLTensorHandle::release_if_unused()
{
// TODO (geopin01): Release tensor only if all sub-tensors are marked as not used
- if(!_tensor.is_used())
+ if (!_tensor.is_used())
{
_tensor.allocator()->free();
}
@@ -100,4 +99,4 @@ Target CLTensorHandle::target() const
}
} // namespace backends
} // namespace graph
-} // namespace arm_compute \ No newline at end of file
+} // namespace arm_compute