diff options
author | Freddie Liardet <frederick.liardet@arm.com> | 2021-08-03 15:57:32 +0100 |
---|---|---|
committer | Georgios Pinitas <georgios.pinitas@arm.com> | 2021-09-07 11:13:44 +0000 |
commit | d216f570750b8ccde3754c4aef53fc20a90cb32d (patch) | |
tree | 83a88d3d4391c6a8ca5dabc73c763e6f0878c595 /src/core/NEON/kernels/arm_conv/depthwise/depthwise_fp32.cpp | |
parent | 4e53c5ab47a713ab0ce53d076e2e4cf274fec312 (diff) | |
download | ComputeLibrary-d216f570750b8ccde3754c4aef53fc20a90cb32d.tar.gz |
Update cpu depthwise kernels
Resolves: COMPMID-4688
Signed-off-by: Freddie Liardet <frederick.liardet@arm.com>
Change-Id: I9e22f967f5b7ccaebff2fc49f0253f621d62d820
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/6030
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Pablo Marquez Tello <pablo.tello@arm.com>
Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/core/NEON/kernels/arm_conv/depthwise/depthwise_fp32.cpp')
-rw-r--r-- | src/core/NEON/kernels/arm_conv/depthwise/depthwise_fp32.cpp | 38 |
1 files changed, 25 insertions, 13 deletions
diff --git a/src/core/NEON/kernels/arm_conv/depthwise/depthwise_fp32.cpp b/src/core/NEON/kernels/arm_conv/depthwise/depthwise_fp32.cpp index ac43df979c..7a26ba4230 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/depthwise_fp32.cpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/depthwise_fp32.cpp @@ -94,7 +94,8 @@ static const DepthwiseImplementation<float, float> depthwise_fp32_methods[] = { cpu_has_sve), cycle_estimate<sve_fp32_nhwc_3x3_s1_output4x4_mla_depthfirst>, [] (const DepthwiseArgs &args, const Nothing &) -> DepthwiseCommon<float, float, float> * { - return new DepthwiseDepthfirst<sve_fp32_nhwc_3x3_s1_output4x4_mla_depthfirst>(args); + auto strat = new sve_fp32_nhwc_3x3_s1_output4x4_mla_depthfirst(args.cpu_info); + return new DepthwiseDepthfirst<float, float, float, float>(strat, args); }, }, { @@ -105,7 +106,8 @@ static const DepthwiseImplementation<float, float> depthwise_fp32_methods[] = { cpu_has_sve), cycle_estimate<sve_fp32_nhwc_3x3_s1_output3x3_mla_depthfirst>, [] (const DepthwiseArgs &args, const Nothing &) -> DepthwiseCommon<float, float, float> * { - return new DepthwiseDepthfirst<sve_fp32_nhwc_3x3_s1_output3x3_mla_depthfirst>(args); + auto strat = new sve_fp32_nhwc_3x3_s1_output3x3_mla_depthfirst(args.cpu_info); + return new DepthwiseDepthfirst<float, float, float, float>(strat, args); }, }, { @@ -116,7 +118,8 @@ static const DepthwiseImplementation<float, float> depthwise_fp32_methods[] = { cpu_has_sve), cycle_estimate<sve_fp32_nhwc_3x3_s1_output2x2_mla_depthfirst>, [] (const DepthwiseArgs &args, const Nothing &) -> DepthwiseCommon<float, float, float> * { - return new DepthwiseDepthfirst<sve_fp32_nhwc_3x3_s1_output2x2_mla_depthfirst>(args); + auto strat = new sve_fp32_nhwc_3x3_s1_output2x2_mla_depthfirst(args.cpu_info); + return new DepthwiseDepthfirst<float, float, float, float>(strat, args); }, }, { @@ -127,7 +130,8 @@ static const DepthwiseImplementation<float, float> depthwise_fp32_methods[] = { cpu_has_sve), cycle_estimate<sve_fp32_nhwc_3x3_s2_output2x2_mla_depthfirst>, [] (const DepthwiseArgs &args, const Nothing &) -> DepthwiseCommon<float, float, float> * { - return new DepthwiseDepthfirst<sve_fp32_nhwc_3x3_s2_output2x2_mla_depthfirst>(args); + auto strat = new sve_fp32_nhwc_3x3_s2_output2x2_mla_depthfirst(args.cpu_info); + return new DepthwiseDepthfirst<float, float, float, float>(strat, args); }, }, { @@ -138,7 +142,8 @@ static const DepthwiseImplementation<float, float> depthwise_fp32_methods[] = { cpu_has_sve), cycle_estimate<sve_fp32_nhwc_5x5_s1_output2x2_mla_depthfirst>, [] (const DepthwiseArgs &args, const Nothing &) -> DepthwiseCommon<float, float, float> * { - return new DepthwiseDepthfirst<sve_fp32_nhwc_5x5_s1_output2x2_mla_depthfirst>(args); + auto strat = new sve_fp32_nhwc_5x5_s1_output2x2_mla_depthfirst(args.cpu_info); + return new DepthwiseDepthfirst<float, float, float, float>(strat, args); }, }, { @@ -153,7 +158,8 @@ static const DepthwiseImplementation<float, float> depthwise_fp32_methods[] = { { DepthwiseMethod::DEPTHFIRST, "sve_fp32_nhwc_3x3_s2_with_multiplier_output3x3_mla_depthfirst", - constraint(is_supported<sve_fp32_packed_to_nhwc_3x3_s2_with_multiplier_output3x3_mla_depthfirst>, cpu_has_sve), + constraint(is_supported<sve_fp32_packed_to_nhwc_3x3_s2_with_multiplier_output3x3_mla_depthfirst>, + cpu_has_sve), not_preferred_if_no_multiplier, [] (const DepthwiseArgs &args, const Nothing &) -> DepthwiseCommon<float, float, float> * { return new DepthwiseDepthfirstWithMultiplier<sve_fp32_packed_to_nhwc_3x3_s2_with_multiplier_output3x3_mla_depthfirst>(args); @@ -162,7 +168,8 @@ static const DepthwiseImplementation<float, float> depthwise_fp32_methods[] = { { DepthwiseMethod::DEPTHFIRST, "sve_fp32_nhwc_5x5_s1_with_multiplier_output2x4_mla_depthfirst", - constraint(is_supported<sve_fp32_packed_to_nhwc_5x5_s1_with_multiplier_output2x4_mla_depthfirst>, cpu_has_sve), + constraint(is_supported<sve_fp32_packed_to_nhwc_5x5_s1_with_multiplier_output2x4_mla_depthfirst>, + cpu_has_sve), not_preferred_if_no_multiplier, [] (const DepthwiseArgs &args, const Nothing &) -> DepthwiseCommon<float, float, float> * { return new DepthwiseDepthfirstWithMultiplier<sve_fp32_packed_to_nhwc_5x5_s1_with_multiplier_output2x4_mla_depthfirst>(args); @@ -185,7 +192,8 @@ static const DepthwiseImplementation<float, float> depthwise_fp32_methods[] = { has_no_channel_multiplier), cycle_estimate<a64_fp32_nhwc_3x3_s1_output4x4_mla_depthfirst>, [] (const DepthwiseArgs &args, const Nothing &) -> DepthwiseCommon<float, float, float> * { - return new DepthwiseDepthfirst<a64_fp32_nhwc_3x3_s1_output4x4_mla_depthfirst>(args); + auto strat = new a64_fp32_nhwc_3x3_s1_output4x4_mla_depthfirst(args.cpu_info); + return new DepthwiseDepthfirst<float, float, float, float>(strat, args); }, }, { @@ -195,17 +203,19 @@ static const DepthwiseImplementation<float, float> depthwise_fp32_methods[] = { has_no_channel_multiplier), cycle_estimate<a64_fp32_nhwc_3x3_s1_output3x3_mla_depthfirst>, [] (const DepthwiseArgs &args, const Nothing &) -> DepthwiseCommon<float, float, float> * { - return new DepthwiseDepthfirst<a64_fp32_nhwc_3x3_s1_output3x3_mla_depthfirst>(args); + auto strat = new a64_fp32_nhwc_3x3_s1_output3x3_mla_depthfirst(args.cpu_info); + return new DepthwiseDepthfirst<float, float, float, float>(strat, args); }, }, { DepthwiseMethod::DEPTHFIRST, "a64_fp32_nhwc_3x3_s1_output2x2_mla_depthfirst", constraint(is_supported<a64_fp32_nhwc_3x3_s1_output2x2_mla_depthfirst>, - has_no_channel_multiplier), + has_no_channel_multiplier), cycle_estimate<a64_fp32_nhwc_3x3_s1_output2x2_mla_depthfirst>, [] (const DepthwiseArgs &args, const Nothing &) -> DepthwiseCommon<float, float, float> * { - return new DepthwiseDepthfirst<a64_fp32_nhwc_3x3_s1_output2x2_mla_depthfirst>(args); + auto strat = new a64_fp32_nhwc_3x3_s1_output2x2_mla_depthfirst(args.cpu_info); + return new DepthwiseDepthfirst<float, float, float, float>(strat, args); }, }, { @@ -215,7 +225,8 @@ static const DepthwiseImplementation<float, float> depthwise_fp32_methods[] = { has_no_channel_multiplier), cycle_estimate<a64_fp32_nhwc_3x3_s2_output2x2_mla_depthfirst>, [] (const DepthwiseArgs &args, const Nothing &) -> DepthwiseCommon<float, float, float> * { - return new DepthwiseDepthfirst<a64_fp32_nhwc_3x3_s2_output2x2_mla_depthfirst>(args); + auto strat = new a64_fp32_nhwc_3x3_s2_output2x2_mla_depthfirst(args.cpu_info); + return new DepthwiseDepthfirst<float, float, float, float>(strat, args); }, }, { @@ -225,7 +236,8 @@ static const DepthwiseImplementation<float, float> depthwise_fp32_methods[] = { has_no_channel_multiplier), cycle_estimate<a64_fp32_nhwc_5x5_s1_output2x2_mla_depthfirst>, [] (const DepthwiseArgs &args, const Nothing &) -> DepthwiseCommon<float, float, float> * { - return new DepthwiseDepthfirst<a64_fp32_nhwc_5x5_s1_output2x2_mla_depthfirst>(args); + auto strat = new a64_fp32_nhwc_5x5_s1_output2x2_mla_depthfirst(args.cpu_info); + return new DepthwiseDepthfirst<float, float, float, float>(strat, args); }, }, { |