diff options
-rw-r--r-- | arm_compute/core/Types.h | 2 | ||||
-rw-r--r-- | src/gpu/cl/operators/ClFullyConnected.cpp | 54 | ||||
-rw-r--r-- | src/gpu/cl/operators/ClFullyConnected.h | 8 | ||||
-rw-r--r-- | src/runtime/CL/functions/CLFullyConnectedLayer.cpp | 14 | ||||
-rw-r--r-- | tests/validation/CL/FullyConnectedLayer.cpp | 24 | ||||
-rw-r--r-- | tests/validation/fixtures/FullyConnectedLayerFixture.h | 2 | ||||
-rw-r--r-- | utils/TypePrinter.h | 1 |
7 files changed, 78 insertions, 27 deletions
diff --git a/arm_compute/core/Types.h b/arm_compute/core/Types.h index 8434611f7a..fd45bbaa1e 100644 --- a/arm_compute/core/Types.h +++ b/arm_compute/core/Types.h @@ -1833,7 +1833,7 @@ struct FullyConnectedLayerInfo /* Information about weights */ DataLayout weights_trained_layout{ DataLayout::NCHW }; /**< Layout that the weights have been trained with. */ bool transpose_weights{ true }; /**< Transpose weights if true. */ - bool are_weights_reshaped{ false }; /**< Reshape the weights tensor if false. */ + bool are_weights_reshaped{ false }; /**< @deprecated Reshape the weights tensor if false. */ bool retain_internal_weights{ false }; /**< Retain internal reshaped weights. */ bool enable_fast_math{ false }; /**< Enable fast math computation. */ /* Other parameters */ diff --git a/src/gpu/cl/operators/ClFullyConnected.cpp b/src/gpu/cl/operators/ClFullyConnected.cpp index 8afd036e7c..b289cc0104 100644 --- a/src/gpu/cl/operators/ClFullyConnected.cpp +++ b/src/gpu/cl/operators/ClFullyConnected.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2021 Arm Limited. + * Copyright (c) 2017-2021, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -162,7 +162,7 @@ void ClFullyConnected::configure_mm(const CLCompileContext &compile_context, ITe const GEMMInfo &gemm_info = GEMMInfo(false, // is_a_reshaped false, // is_b_reshaped - true, // reshape_b_only_on_first_run + !_dynamic_weights, // reshape_b_only_on_first_run 0, // depth_output_gemm3d false, // reinterpret_input_as_3d fc_info.retain_internal_weights, // retain_internal_weights @@ -240,6 +240,7 @@ void ClFullyConnected::configure(const CLCompileContext &compile_context, ITenso _is_prepared = fc_info.retain_internal_weights; _weights_to_use = TensorInfo(*weights); _weights_to_use_idx = ACL_SRC_1; + _dynamic_weights = !weights->are_values_constant() && !_are_weights_reshaped; // With the Fully Connected layer we can have 4 different cases: // 1) Convolution layer -> Fully Connected layer without batches @@ -310,8 +311,15 @@ void ClFullyConnected::configure(const CLCompileContext &compile_context, ITenso if(_aux_mem[1].size > 0 || _aux_mem[2].size > 0) // Persistent weights memory on GEMMs { // Release permuted weights at the of prepare as they are further transposed by the assembly dispatch - _aux_mem[TransposedWeights] = MemoryInfo(offset_int_vec(TransposedWeights), MemoryLifetime::Prepare, _reshaped_weights.total_size()); - _aux_mem[ConvertedWeights] = MemoryInfo(offset_int_vec(ConvertedWeights), MemoryLifetime::Prepare, _converted_weights.total_size()); + // Keep all the auxiliary tensors in case of dynamic weights as they are recalculated every time + _aux_mem[TransposedWeights] = MemoryInfo( + offset_int_vec(TransposedWeights), + _dynamic_weights ? MemoryLifetime::Temporary : MemoryLifetime::Prepare, + _reshaped_weights.total_size()); + _aux_mem[ConvertedWeights] = MemoryInfo( + offset_int_vec(ConvertedWeights), + _dynamic_weights ? MemoryLifetime::Temporary : MemoryLifetime::Prepare, + _converted_weights.total_size()); } else { @@ -319,8 +327,14 @@ void ClFullyConnected::configure(const CLCompileContext &compile_context, ITenso const auto transposed_wei_lft = (_weights_to_use_idx == offset_int_vec(TransposedWeights)) ? MemoryLifetime::Persistent : MemoryLifetime::Prepare; const auto converted_wei_lft = (_weights_to_use_idx == offset_int_vec(ConvertedWeights)) ? MemoryLifetime::Persistent : MemoryLifetime::Prepare; - _aux_mem[TransposedWeights] = MemoryInfo(offset_int_vec(TransposedWeights), transposed_wei_lft, _reshaped_weights.total_size()); - _aux_mem[ConvertedWeights] = MemoryInfo(offset_int_vec(ConvertedWeights), converted_wei_lft, _converted_weights.total_size()); + _aux_mem[TransposedWeights] = MemoryInfo( + offset_int_vec(TransposedWeights), + _dynamic_weights ? MemoryLifetime::Temporary : transposed_wei_lft, + _reshaped_weights.total_size()); + _aux_mem[ConvertedWeights] = MemoryInfo( + offset_int_vec(ConvertedWeights), + _dynamic_weights ? MemoryLifetime::Temporary : converted_wei_lft, + _converted_weights.total_size()); } _aux_mem[FlattenedSrc] = MemoryInfo(offset_int_vec(FlattenedSrc), MemoryLifetime::Temporary, _flattened_src.total_size()); } @@ -334,7 +348,6 @@ Status ClFullyConnected::validate(const ITensorInfo *src, const ITensorInfo *wei ARM_COMPUTE_RETURN_ERROR_ON(weights->num_dimensions() > 2); ARM_COMPUTE_RETURN_ERROR_ON(fc_info.activation_info.enabled() && is_data_type_quantized(src->data_type()) && fc_info.activation_info.activation() != ActivationLayerInfo::ActivationFunction::RELU && fc_info.activation_info.activation() != ActivationLayerInfo::ActivationFunction::BOUNDED_RELU && fc_info.activation_info.activation() != ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU); - ARM_COMPUTE_RETURN_ERROR_ON(!weights->are_values_constant() && (!fc_info.are_weights_reshaped || fc_info.transpose_weights)); bool weights_reshaped = fc_info.transpose_weights ? fc_info.are_weights_reshaped : true; bool is_fc_after_conv = true; @@ -420,6 +433,11 @@ void ClFullyConnected::run(ITensorPack &tensors) { prepare(tensors); +#ifdef ARM_COMPUTE_ASSERTS_ENABLED + ++_asrt_run_count; + ARM_COMPUTE_ERROR_ON(_dynamic_weights && _asrt_prepare_count != _asrt_run_count); +#endif // ARM_COMPUTE_ASSERTS_ENABLED + auto src = tensors.get_const_tensor(ACL_SRC_0); CLAuxTensorHandler flattened_src(offset_int_vec(FlattenedSrc), _flattened_src, tensors, false); @@ -452,8 +470,13 @@ void ClFullyConnected::run(ITensorPack &tensors) void ClFullyConnected::prepare(ITensorPack &tensors) { - if(!_is_prepared) + if(!_is_prepared || _dynamic_weights) { +#ifdef ARM_COMPUTE_ASSERTS_ENABLED + ++_asrt_prepare_count; + ARM_COMPUTE_ERROR_ON(!_dynamic_weights && _asrt_prepare_count > 1); +#endif // ARM_COMPUTE_ASSERTS_ENABLED + auto weights = tensors.get_const_tensor(ACL_SRC_1); CLAuxTensorHandler reshaped_weights(offset_int_vec(TransposedWeights), _reshaped_weights, tensors, false); @@ -462,7 +485,7 @@ void ClFullyConnected::prepare(ITensorPack &tensors) // Pointer to current weights const ITensor *cur_weights = weights; - // Reshape of the weights if needed (happens only once) + // Reshape of the weights if needed if(!_are_weights_reshaped) { // Run reshape weights kernel and mark weights as unused @@ -471,11 +494,9 @@ void ClFullyConnected::prepare(ITensorPack &tensors) cur_weights->mark_as_unused(); cur_weights = reshaped_weights.get(); - - _are_weights_reshaped = true; } - // Convert weights if needed (happens only once) + // Convert weights if needed if(!_are_weights_converted) { ITensorPack convert_pack{ { ACL_SRC, cur_weights }, { ACL_DST, converted_weights.get() } }; @@ -483,20 +504,19 @@ void ClFullyConnected::prepare(ITensorPack &tensors) cur_weights->mark_as_unused(); cur_weights = converted_weights.get(); - - _are_weights_converted = true; } - tensors.add_const_tensor(ACL_SRC_1, cur_weights); + ITensorPack gemm_pack = tensors; + gemm_pack.add_const_tensor(ACL_SRC_1, cur_weights); // Prepare GEMM prepare and release unused weights if(!_is_quantized) { - _mm_gemm->prepare(tensors); + _mm_gemm->prepare(gemm_pack); } else { - _mm_gemmlowp->prepare(tensors); + _mm_gemmlowp->prepare(gemm_pack); } _is_prepared = true; } diff --git a/src/gpu/cl/operators/ClFullyConnected.h b/src/gpu/cl/operators/ClFullyConnected.h index b5ac70c93b..d08d5db8a4 100644 --- a/src/gpu/cl/operators/ClFullyConnected.h +++ b/src/gpu/cl/operators/ClFullyConnected.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2021 Arm Limited. + * Copyright (c) 2017-2021, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -132,6 +132,12 @@ private: bool _is_fc_after_conv{ true }; bool _is_quantized{ false }; bool _is_prepared{ false }; + bool _dynamic_weights{ false }; + +#ifdef ARM_COMPUTE_ASSERTS_ENABLED + int _asrt_run_count{}; + int _asrt_prepare_count{}; +#endif // ARM_COMPUTE_ASSERTS_ENABLED }; } // namespace opencl } // namespace arm_compute diff --git a/src/runtime/CL/functions/CLFullyConnectedLayer.cpp b/src/runtime/CL/functions/CLFullyConnectedLayer.cpp index 02b2042a6c..1c162db79a 100644 --- a/src/runtime/CL/functions/CLFullyConnectedLayer.cpp +++ b/src/runtime/CL/functions/CLFullyConnectedLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2021 Arm Limited. + * Copyright (c) 2017-2021, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -46,6 +46,7 @@ struct CLFullyConnectedLayer::Impl experimental::MemoryRequirements aux_mem_req{}; bool is_prepared{ false }; + bool dynamic_weights{ false }; }; CLFullyConnectedLayer::CLFullyConnectedLayer(std::shared_ptr<IMemoryManager> memory_manager, IWeightsManager *weights_manager) @@ -96,6 +97,12 @@ void CLFullyConnectedLayer::configure(const CLCompileContext &compile_context, c _impl->run_pack.add_tensor(ACL_SRC_0, input); _impl->run_pack.add_tensor(ACL_DST, output); } + + _impl->dynamic_weights = + !weights->info()->are_values_constant() && + fc_info.transpose_weights && + !fc_info.are_weights_reshaped && + !fc_info.retain_internal_weights; } Status CLFullyConnectedLayer::validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, @@ -106,7 +113,10 @@ Status CLFullyConnectedLayer::validate(const ITensorInfo *input, const ITensorIn void CLFullyConnectedLayer::run() { - prepare(); + if(!_impl->dynamic_weights) + { + prepare(); + } MemoryGroupResourceScope scope_mg(_impl->memory_group); _impl->op->run(_impl->run_pack); diff --git a/tests/validation/CL/FullyConnectedLayer.cpp b/tests/validation/CL/FullyConnectedLayer.cpp index fcfae4e156..9213ab541d 100644 --- a/tests/validation/CL/FullyConnectedLayer.cpp +++ b/tests/validation/CL/FullyConnectedLayer.cpp @@ -150,6 +150,12 @@ FIXTURE_DATA_TEST_CASE(RunLarge, CLFullyConnectedLayerFixture<half>, framework:: // Validate output validate(CLAccessor(_target), _reference, tolerance_f16, tolerance_num); } +FIXTURE_DATA_TEST_CASE(RunDynamicWeights, CLFullyConnectedLayerDynamicWeightsFixture<half>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallFullyConnectedLayerDataset(), + framework::dataset::make("DataType", DataType::F16)), + framework::dataset::make("ActivationInfo", ActivationLayerInfo())), + framework::dataset::make("WeightsReshaped", { false, true }))) +{ +} TEST_SUITE_END() TEST_SUITE(FP32) @@ -173,9 +179,9 @@ FIXTURE_DATA_TEST_CASE(RunMixedDataLayout, CLFullyConnectedLayerMixedDataLayoutF validate(CLAccessor(_target), _reference, rel_tolerance_f32, 0, abs_tolerance_f32); } FIXTURE_DATA_TEST_CASE(RunDynamicWeights, CLFullyConnectedLayerDynamicWeightsFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallFullyConnectedLayerDataset(), - framework::dataset::make("DataType", DataType::F32)), - framework::dataset::make("ActivationInfo", ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))), - framework::dataset::make("WeightsReshaped", { true }))) + framework::dataset::make("DataType", DataType::F32)), + framework::dataset::make("ActivationInfo", ActivationLayerInfo())), + framework::dataset::make("WeightsReshaped", { false, true }))) { } FIXTURE_DATA_TEST_CASE(RunLarge, CLFullyConnectedLayerFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeFullyConnectedLayerDataset(), FullyConnectedParameters), @@ -223,6 +229,12 @@ FIXTURE_DATA_TEST_CASE(RunLarge, CLFullyConnectedLayerQuantizedFixture<uint8_t>, // Validate output validate(CLAccessor(_target), _reference, tolerance_qasymm8); } +FIXTURE_DATA_TEST_CASE(RunDynamicWeights, CLFullyConnectedLayerDynamicWeightsFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallFullyConnectedLayerDataset(), + framework::dataset::make("DataType", DataType::QASYMM8)), + framework::dataset::make("ActivationInfo", ActivationLayerInfo())), + framework::dataset::make("WeightsReshaped", { false /* COMPMID-6000: Support FullyConnected with quantized dynamic weights already reshaped */ }))) +{ +} TEST_SUITE_END() /* QASYMM8 */ TEST_SUITE(QASYMM8_SIGNED) FIXTURE_DATA_TEST_CASE(RunSmall, CLFullyConnectedLayerQuantizedFixture<int8_t>, framework::DatasetMode::PRECOMMIT, @@ -246,6 +258,12 @@ FIXTURE_DATA_TEST_CASE(RunMixedDataLayout, CLFullyConnectedLayerQuantizedMixedDa // Validate output validate(CLAccessor(_target), _reference, tolerance_qasymm8); } +FIXTURE_DATA_TEST_CASE(RunDynamicWeights, CLFullyConnectedLayerDynamicWeightsFixture<int8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallFullyConnectedLayerDataset(), + framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)), + framework::dataset::make("ActivationInfo", ActivationLayerInfo())), + framework::dataset::make("WeightsReshaped", { false /* COMPMID-6000: Support FullyConnected with quantized dynamic weights already reshaped */ }))) +{ +} TEST_SUITE_END() // QASYMM8_SIGNED TEST_SUITE_END() // Quantized TEST_SUITE_END() // FullyConnectedLayer diff --git a/tests/validation/fixtures/FullyConnectedLayerFixture.h b/tests/validation/fixtures/FullyConnectedLayerFixture.h index 7d1aa494ba..75bef144ad 100644 --- a/tests/validation/fixtures/FullyConnectedLayerFixture.h +++ b/tests/validation/fixtures/FullyConnectedLayerFixture.h @@ -333,7 +333,6 @@ private: validate(AccessorType(target), ref, rel_tolerance_f32, 0, abs_tolerance_f32); } -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC void validate_with_tolerance(TensorType &target, SimpleTensor<half_float::half> &ref) { constexpr AbsoluteTolerance<float> abs_tolerance_f16(0.3f); @@ -342,7 +341,6 @@ private: validate(AccessorType(target), ref, rel_tolerance_f16, tolerance_num_f16, abs_tolerance_f16); } -#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC void validate_with_tolerance(TensorType &target, SimpleTensor<uint8_t> &ref) { diff --git a/utils/TypePrinter.h b/utils/TypePrinter.h index 9b9c7b5b34..d40c3eb3d7 100644 --- a/utils/TypePrinter.h +++ b/utils/TypePrinter.h @@ -2772,7 +2772,6 @@ inline ::std::ostream &operator<<(::std::ostream &os, const FullyConnectedLayerI << "transpose_weights=" << layer_info.transpose_weights << ", " << "are_weights_reshaped=" << layer_info.are_weights_reshaped << ", " << "retain_internal_weights=" << layer_info.retain_internal_weights << ", " - << "constant_weights=" << layer_info.transpose_weights << ", " << "fp_mixed_precision=" << layer_info.fp_mixed_precision << "}"; return os; } |