diff options
Diffstat (limited to 'src/runtime/NEON')
-rw-r--r-- | src/runtime/NEON/functions/NEGEMM.cpp | 17 | ||||
-rw-r--r-- | src/runtime/NEON/functions/NEGEMMConv2d.cpp | 139 | ||||
-rw-r--r-- | src/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.cpp | 15 |
3 files changed, 42 insertions, 129 deletions
diff --git a/src/runtime/NEON/functions/NEGEMM.cpp b/src/runtime/NEON/functions/NEGEMM.cpp index b84128e6c0..7318c3e492 100644 --- a/src/runtime/NEON/functions/NEGEMM.cpp +++ b/src/runtime/NEON/functions/NEGEMM.cpp @@ -89,10 +89,19 @@ void NEGEMM::configure(const ITensor *a, const ITensor *b, const ITensor *c, ITe if(run_optimised) { - const ITensor *c_to_use = is_c_bias ? c : nullptr; - _asm_glue->configure(a, b, c_to_use, d, asm_info); + const ITensor *c_to_use = is_c_bias ? c : nullptr; + const ITensorInfo *c_info_to_use = c_to_use != nullptr ? c_to_use->info() : nullptr; + _asm_glue->configure(a->info(), b->info(), c_info_to_use, d->info(), asm_info); ARM_COMPUTE_ERROR_ON(!_asm_glue->is_configured()); + _asm_glue_tensors = + { + { ACL_SRC_0, a }, + { ACL_SRC_1, b }, + { ACL_SRC_2, c_to_use }, + { ACL_DST, d }, + }; + // Scale product by alpha if(_run_alpha_scale) { @@ -314,7 +323,7 @@ void NEGEMM::run() if(_asm_glue->is_configured()) { - _asm_glue->run(); + _asm_glue->run(_asm_glue_tensors); if(_run_alpha_scale) { _alpha_scale_func.run(); @@ -368,7 +377,7 @@ void NEGEMM::prepare() ARM_COMPUTE_ERROR_ON(!_original_b->is_used()); } - _asm_glue->prepare(); + _asm_glue->prepare(_asm_glue_tensors); if(!original_b_managed_by_weights_manager) { _original_b->mark_as_unused(); diff --git a/src/runtime/NEON/functions/NEGEMMConv2d.cpp b/src/runtime/NEON/functions/NEGEMMConv2d.cpp index ddeacc85f5..94ceb6d27c 100644 --- a/src/runtime/NEON/functions/NEGEMMConv2d.cpp +++ b/src/runtime/NEON/functions/NEGEMMConv2d.cpp @@ -26,151 +26,48 @@ #include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "arm_compute/core/utils/quantization/AsymmHelpers.h" #include "arm_compute/runtime/NEON/NEScheduler.h" -#include "src/runtime/cpu/operators/internal/CpuGemmAssemblyDispatch.h" +#include "src/runtime/cpu/operators/CpuGemmDirectConv2d.h" #include <set> namespace arm_compute { -namespace -{ -GEMMLowpOutputStageInfo calculate_output_stage_metadata(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *output, const ActivationLayerInfo &act) -{ - // Since we need negative offsets for computing convolution, we need to change QuantizationInfo() - // Extract and negate input and weights offset - const QuantizationInfo iqinfo = input->quantization_info(); - const QuantizationInfo wqinfo = weights->quantization_info(); - const QuantizationInfo oqinfo = (output->total_size() == 0) ? iqinfo : output->quantization_info(); - const UniformQuantizationInfo uoqinfo = oqinfo.uniform(); - const DataType data_type = input->data_type(); - // Merge activation with output stage - const std::set<ActivationLayerInfo::ActivationFunction> supported_acts = { ActivationLayerInfo::ActivationFunction::RELU, - ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, - ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU - }; - PixelValue type_min{}; - PixelValue type_max{}; - std::tie(type_min, type_max) = get_min_max(data_type); - int32_t min_activation = type_min.get<int32_t>(); - int32_t max_activation = type_max.get<int32_t>(); - if(supported_acts.count(act.activation()) != 0) - { - std::tie(min_activation, max_activation) = get_quantized_activation_min_max(act, data_type, uoqinfo); - } - GEMMLowpOutputStageInfo os_info; - os_info.type = GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT; - os_info.gemmlowp_offset = uoqinfo.offset; - os_info.gemmlowp_min_bound = min_activation; - os_info.gemmlowp_max_bound = max_activation; - os_info.is_quantized_per_channel = (weights->data_type() == DataType::QSYMM8_PER_CHANNEL); - quantization::calculate_quantized_multipliers(iqinfo, wqinfo, oqinfo, os_info); - return os_info; -} -cpu::AsmGemmInfo init_assembly_metadata(const Conv2dInfo &info, bool is_indirect) +using OperatorType = cpu::CpuGemmDirectConv2d; + +struct NEGEMMConv2d::Impl { - cpu::AsmGemmInfo asm_info; - asm_info.method = is_indirect ? cpu::AsmConvMethod::Indirect : cpu::AsmConvMethod::Conv; - asm_info.ps_info = info.conv_info; - asm_info.activation_info = info.act_info; - asm_info.depth_output_gemm3d = true; - asm_info.reinterpret_input_as_3d = true; - asm_info.padding_top = info.conv_info.pad_top(); - asm_info.padding_left = info.conv_info.pad_left(); - asm_info.padding_value = 0.f; - asm_info.negated_offsets = false; - return asm_info; -} -} // namespace + ITensorPack tensors{}; + std::unique_ptr<OperatorType> op{ nullptr }; +}; NEGEMMConv2d::NEGEMMConv2d(const std::shared_ptr<IMemoryManager> &memory_manager) - : _gemm_asm_func(std::make_unique<cpu::CpuGemmAssemblyDispatch>(memory_manager)), _activation_func(), _weights_permute_func(), _original_weights(nullptr), _permuted_weights(), _is_prepared(false), - _run_activation(false) + : _impl(std::make_unique<Impl>()) { + _impl->op = std::make_unique<OperatorType>(memory_manager); } NEGEMMConv2d::~NEGEMMConv2d() = default; void NEGEMMConv2d::configure(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const Conv2dInfo &info) { - ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output); - ARM_COMPUTE_ERROR_THROW_ON(NEGEMMConv2d::validate(input->info(), - weights->info(), - biases != nullptr ? biases->info() : nullptr, - output->info(), - info)); - _original_weights = weights; - _weights_permute_func.configure(weights, &_permuted_weights, PermutationVector{ 3, 0, 1, 2 }); + _impl->tensors.add_const_tensor(TensorType::ACL_SRC_0, input); + _impl->tensors.add_const_tensor(TensorType::ACL_SRC_1, weights); + _impl->tensors.add_const_tensor(TensorType::ACL_SRC_2, biases); + _impl->tensors.add_tensor(TensorType::ACL_DST, output); - // Configure assembly dispatch - cpu::AsmGemmInfo asm_info = init_assembly_metadata(info, false); - if(is_data_type_quantized(input->info()->data_type())) - { - asm_info.output_stage = calculate_output_stage_metadata(input->info(), weights->info(), output->info(), info.act_info); - } - _gemm_asm_func->configure(input, &_permuted_weights, biases, output, asm_info); - - // Configure activation - if(info.act_info.enabled() && !_gemm_asm_func->is_activation_supported(info.act_info)) - { - _activation_func.configure(output, nullptr, info.act_info); - _run_activation = true; - } + _impl->op->configure(input->info(), weights->info(), biases->info(), output->info(), info); } + Status NEGEMMConv2d::validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const Conv2dInfo &info) { - ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, output); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::BFLOAT16, DataType::F16, DataType::F32); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(weights, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::QSYMM8_PER_CHANNEL, DataType::BFLOAT16, DataType::F16, DataType::F32); - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(input, weights); - ARM_COMPUTE_RETURN_ERROR_ON_MSG(info.num_groups > 1, "Grouping (num_groups != 1) is not supported on Neon"); - ARM_COMPUTE_RETURN_ERROR_ON_MSG(input->data_layout() != DataLayout::NHWC, "Data layout supported is NHWC"); - const DataType data_type = input->data_type(); - const TensorShape i_shape = input->tensor_shape(); - const TensorShape w_shape = weights->tensor_shape(); - ARM_COMPUTE_RETURN_ERROR_ON(w_shape[0] != i_shape[0]); - ARM_COMPUTE_RETURN_ERROR_ON(info.dilation != Size2D(1U, 1U)); - ARM_COMPUTE_RETURN_ERROR_ON(weights->num_dimensions() > 4); - // Validate biases - if(biases != nullptr) - { - if(is_data_type_quantized_asymmetric(data_type)) - { - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(biases, 1, DataType::S32); - } - else if(data_type == DataType::BFLOAT16) - { - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(biases, 1, DataType::F32); - } - else - { - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, biases); - } - ARM_COMPUTE_RETURN_ERROR_ON(biases->dimension(0) != weights->dimension(3)); - ARM_COMPUTE_RETURN_ERROR_ON(biases->num_dimensions() > 1); - } - - cpu::AsmGemmInfo asm_info = init_assembly_metadata(info, false); - ARM_COMPUTE_RETURN_ON_ERROR(cpu::CpuGemmAssemblyDispatch::validate(input, weights, biases, output, asm_info)); - return Status{}; + return OperatorType::validate(input, weights, biases, output, info); } void NEGEMMConv2d::run() { - prepare(); - - _gemm_asm_func->run(); - if(_run_activation) - { - _activation_func.run(); - } + _impl->op->run(_impl->tensors); } void NEGEMMConv2d::prepare() { - if(!_is_prepared) - { - _permuted_weights.allocator()->allocate(); - _weights_permute_func.run(); - _original_weights->mark_as_unused(); - _is_prepared = true; - } + _impl->op->prepare(_impl->tensors); } } // namespace arm_compute diff --git a/src/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.cpp b/src/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.cpp index 53dd39e549..cc0f20e695 100644 --- a/src/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.cpp +++ b/src/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.cpp @@ -146,14 +146,21 @@ void NEGEMMLowpMatrixMultiplyCore::configure(const ITensor *a, const ITensor *b, { if(is_data_type_quantized_asymmetric(a_to_use->info()->data_type()) && info.gemmlowp_output_stage().type == GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT) { - _asm_glue->configure(a_to_use, b, c, output, asm_info); + auto c_info_to_use = c == nullptr ? nullptr : c->info(); + _asm_glue->configure(a_to_use->info(), b->info(), c_info_to_use, output->info(), asm_info); _fused_assembly_path = _asm_glue->is_configured(); + _asm_glue_tensors.add_const_tensor(TensorType::ACL_SRC_2, c); + _asm_glue_tensors.add_tensor(TensorType::ACL_DST, output); } else { - _asm_glue->configure(a_to_use, b, nullptr, _fuse_output_stage ? &_mm_result_s32 : output, asm_info); + auto output_to_use = (_fuse_output_stage ? &_mm_result_s32 : output); + _asm_glue->configure(a_to_use->info(), b->info(), nullptr, output_to_use->info(), asm_info); + _asm_glue_tensors.add_tensor(TensorType::ACL_DST, output_to_use); } _assembly_path = _asm_glue->is_configured(); + _asm_glue_tensors.add_const_tensor(TensorType::ACL_SRC_0, a_to_use); + _asm_glue_tensors.add_const_tensor(TensorType::ACL_SRC_1, b); break; } default: @@ -513,7 +520,7 @@ void NEGEMMLowpMatrixMultiplyCore::run() // Run GEMM if(_asm_glue->is_configured()) { - _asm_glue->run(); + _asm_glue->run(_asm_glue_tensors); } else { @@ -583,7 +590,7 @@ void NEGEMMLowpMatrixMultiplyCore::prepare() ARM_COMPUTE_ERROR_ON(!_original_b->is_used()); } - _asm_glue->prepare(); + _asm_glue->prepare(_asm_glue_tensors); if(!original_b_managed_by_weights_manager) { _original_b->mark_as_unused(); |