diff options
author | Pablo Marquez Tello <pablo.tello@arm.com> | 2021-09-15 10:14:20 +0100 |
---|---|---|
committer | Sheri Zhang <sheri.zhang@arm.com> | 2021-09-16 09:59:54 +0000 |
commit | 9ac7b99106e24ab426d8a948dd5243defb753b32 (patch) | |
tree | 1eedc0be5bba626afceecfb9d5d4436e3ac38502 /src/cpu/operators | |
parent | cc171f9e4520e16b5e1b9c483562ed022d9151fa (diff) | |
download | ComputeLibrary-9ac7b99106e24ab426d8a948dd5243defb753b32.tar.gz |
Revert "Add support for non-constant weights and biases in CpuFullyConnected"
This reverts commit aed63ee175e0d64c934389e9d1b2edd0cb1a5cdd.
* Resolves COMPMID-4812
Change-Id: I16919e2f3b22c868ae146d0d10dae97a80e1ba46
Signed-off-by: Pablo Marquez Tello <pablo.tello@arm.com>
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/6266
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Sheri Zhang <sheri.zhang@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/cpu/operators')
-rw-r--r-- | src/cpu/operators/CpuFullyConnected.cpp | 26 | ||||
-rw-r--r-- | src/cpu/operators/internal/CpuGemmAssemblyDispatch.cpp | 31 |
2 files changed, 5 insertions, 52 deletions
diff --git a/src/cpu/operators/CpuFullyConnected.cpp b/src/cpu/operators/CpuFullyConnected.cpp index 57094cb0b4..cafb3484b6 100644 --- a/src/cpu/operators/CpuFullyConnected.cpp +++ b/src/cpu/operators/CpuFullyConnected.cpp @@ -312,13 +312,9 @@ void CpuFullyConnected::configure(const ITensorInfo *src, const ITensorInfo *wei if(_aux_mem[Pretranspose].size > 0) { - // Release permuted weights at the end of prepare as they are further transposed by the assembly dispatch - // Do not release them if biases are dynamic and data type is quantized, since the weights tensor will be used for biases offset calculation - _aux_mem[TransposedWeights] = MemoryInfo(offset_int_vec(TransposedWeights), (_is_quantized_asymmetric && biases && !(biases->are_values_constant())) ? - MemoryLifetime::Persistent : - MemoryLifetime::Prepare, - _reshaped_weights.total_size()); - _aux_mem[ConvertedWeights] = MemoryInfo(offset_int_vec(ConvertedWeights), MemoryLifetime::Prepare, _converted_weights.total_size()); + // Release permuted weights at the of prepare as they are further transposed by the assembly dispatch + _aux_mem[TransposedWeights] = MemoryInfo(offset_int_vec(TransposedWeights), MemoryLifetime::Prepare, _reshaped_weights.total_size()); + _aux_mem[ConvertedWeights] = MemoryInfo(offset_int_vec(ConvertedWeights), MemoryLifetime::Prepare, _converted_weights.total_size()); } else { @@ -336,9 +332,10 @@ Status CpuFullyConnected::validate(const ITensorInfo *src, const ITensorInfo *we ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(src, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::F16, DataType::F32); ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(src, weights, dst); ARM_COMPUTE_RETURN_ERROR_ON(weights->num_dimensions() > 2); + ARM_COMPUTE_RETURN_ERROR_ON(biases != nullptr && biases->num_dimensions() > 1); ARM_COMPUTE_RETURN_ERROR_ON(fc_info.activation_info.enabled() && is_data_type_quantized(src->data_type()) && fc_info.activation_info.activation() != ActivationLayerInfo::ActivationFunction::RELU && fc_info.activation_info.activation() != ActivationLayerInfo::ActivationFunction::BOUNDED_RELU && fc_info.activation_info.activation() != ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU); - ARM_COMPUTE_RETURN_ERROR_ON(!weights->are_values_constant() && (!fc_info.are_weights_reshaped || fc_info.transpose_weights)); + ARM_COMPUTE_RETURN_ERROR_ON_MSG(!fc_info.constant_weights, "Non-constant weights are currently not supported"); bool weights_reshaped = fc_info.transpose_weights ? fc_info.are_weights_reshaped : true; bool is_fc_after_conv = true; @@ -359,19 +356,6 @@ Status CpuFullyConnected::validate(const ITensorInfo *src, const ITensorInfo *we // Check if we have a fully connected layer with batches const bool is_batched_fc_layer = dst->dimension(1) > 1; - if(biases != nullptr) - { - ARM_COMPUTE_RETURN_ERROR_ON(biases->num_dimensions() > 1); - if(is_data_type_quantized(src->data_type())) - { - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(biases, 1, DataType::S32); - } - else - { - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(src, biases); - } - } - if(is_batched_fc_layer) { is_fc_after_conv = (TensorShape::num_max_dimensions >= 4) && (std::equal(src->tensor_shape().cbegin() + 3, diff --git a/src/cpu/operators/internal/CpuGemmAssemblyDispatch.cpp b/src/cpu/operators/internal/CpuGemmAssemblyDispatch.cpp index 1dd6286dbf..97893b0672 100644 --- a/src/cpu/operators/internal/CpuGemmAssemblyDispatch.cpp +++ b/src/cpu/operators/internal/CpuGemmAssemblyDispatch.cpp @@ -206,7 +206,6 @@ private: std::vector<TypeInput> _indirect_pad{}; arm_gemm::ConvolutionParameters _cp{}; experimental::MemoryRequirements _aux_mem{ Count }; - bool _B_pretranspose_required{ false }; }; template <typename TypeInput, typename TypeOutput, class OutputStage> @@ -392,7 +391,6 @@ void Fallback<TypeInput, TypeOutput, OutputStage>::configure(const ITensorInfo * const size_t B_pretranspose_size = _gemm_kernel_asm->get_B_pretransposed_array_size(); _pretranspose_info = TensorInfo(TensorShape(B_pretranspose_size), 1, DataType::U8); _aux_mem[Pretranspose] = MemoryInfo(offset_int_vec(Pretranspose), MemoryLifetime::Persistent, B_pretranspose_size, alignment); - _B_pretranspose_required = true; } // Handle indirect GEMM convolution @@ -487,35 +485,6 @@ void Fallback<TypeInput, TypeOutput, OutputStage>::run(ITensorPack &tensors) in1_ptr = reinterpret_cast<const TypeInput *>(b->buffer() + b->info()->offset_first_element_in_bytes()); } - // If necessary, run pretranspose every time if either weights or biases are non-constant - if((b && !b->info()->are_values_constant()) || (c && !c->info()->are_values_constant() && c->info()->data_type() == DataType::S32)) - { - if(c && c->info()->data_type() == DataType::S32) - { - _gemm_kernel_asm->set_quantized_bias(reinterpret_cast<const int32_t *>(c->buffer() + c->info()->offset_first_element_in_bytes()), 0); - } - - // Pretranspose B if required - if(_B_pretranspose_required) - { - const int ldb = b->info()->strides_in_bytes().y() / sizeof(TypeInput); - const auto b_ptr = reinterpret_cast<const TypeInput *>(b->buffer() + b->info()->offset_first_element_in_bytes()); - const int multi_stride_b = b->info()->strides_in_bytes().z() / sizeof(TypeInput); - - CpuAuxTensorHandler pretranspose(offset_int_vec(Pretranspose), _pretranspose_info, tensors, true); - ARM_COMPUTE_ERROR_ON(pretranspose.get()->buffer() == nullptr); - - if(b->info()->are_values_constant()) - { - _gemm_kernel_asm->requantize_bias(pretranspose.get()->buffer(), b_ptr, ldb, multi_stride_b); - } - else - { - _gemm_kernel_asm->pretranspose_B_array(pretranspose.get()->buffer(), b_ptr, ldb, multi_stride_b); - } - } - } - const auto scheduling_hint = scheduling_hint_heuristic(_kernel_info.method, d->info()->data_type()); // Set workspace if needed and reset number of threads as buffer manager gets re-created with max_threads |