aboutsummaryrefslogtreecommitdiff
path: root/examples
diff options
context:
space:
mode:
authorSiCong Li <sicong.li@arm.com>2022-05-24 14:37:31 +0100
committerSiCong Li <sicong.li@arm.com>2022-06-01 14:07:40 +0000
commit0e7053da06b4d73058abbc3782611259b00066b5 (patch)
tree4f3327401fee66d85934a4ff79b8bc5730cc07c1 /examples
parent82169b3cf131318b55e80fe980895b3c16d5aca5 (diff)
downloadComputeLibrary-0e7053da06b4d73058abbc3782611259b00066b5.tar.gz
Fix reference example for fusion of conv2d and adddition
* Make sure the reference is running the same computation as the fused example * Add finer breakdown of start-up time Related to COMPMID-5365 Signed-off-by: SiCong Li <sicong.li@arm.com> Change-Id: Ia5c73d4af318cafda1335438f88d0d4f74a5355c Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/7643 Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Michalis Spyrou <michalis.spyrou@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'examples')
-rw-r--r--examples/dynamic_fusion/cl_fused_conv2d_elementwise_add.cpp7
-rw-r--r--examples/dynamic_fusion/cl_ref_conv2d_elementwise_add.cpp37
2 files changed, 33 insertions, 11 deletions
diff --git a/examples/dynamic_fusion/cl_fused_conv2d_elementwise_add.cpp b/examples/dynamic_fusion/cl_fused_conv2d_elementwise_add.cpp
index af00efdfb9..285509b586 100644
--- a/examples/dynamic_fusion/cl_fused_conv2d_elementwise_add.cpp
+++ b/examples/dynamic_fusion/cl_fused_conv2d_elementwise_add.cpp
@@ -138,6 +138,7 @@ public:
CLScheduler::get().default_init(tuner_to_use);
TICK(startup_time);
+ TICK(configure);
/* Computation:
* out = add_desc(addend, conv2d1x1(direct_conv)(input, weights, bias))
*/
@@ -251,7 +252,9 @@ public:
const auto success = ClCompositeOperator::validate(workload); // Optional
op.configure(CLKernelLibrary::get().get_compile_context(), workload);
// [Validate and configure ClCompositeOperator]
+ TOCK(configure, measurements);
+ TICK(tensor_allocation);
/// @page example_dynamic_fusion_cl_conv2d_elementwise_add
/// @subsection run_clcompositeoperator Run ClCompositeOperator
/// Construct the runtime CLTensor s with backing memory
@@ -312,7 +315,9 @@ public:
tensor_data.tensor->allocator()->allocate();
}
// [Initialize and Allocate Auxiliary CLTensor objects]
+ TOCK(tensor_allocation, measurements);
+ TICK(dummy_run);
/// @page example_dynamic_fusion_cl_conv2d_elementwise_add
/// Run the ClCompositeOperator prepare job. This performs any jobs that are required for the first run, like
/// reshaping tensors for a more performant format.
@@ -327,6 +332,8 @@ public:
// [Run ClCompositeOperator]
op.run(run_pack_map);
// [Run ClCompositeOperator]
+ CLScheduler::get().sync();
+ TOCK(dummy_run, measurements);
TOCK(startup_time, measurements);
return true;
}
diff --git a/examples/dynamic_fusion/cl_ref_conv2d_elementwise_add.cpp b/examples/dynamic_fusion/cl_ref_conv2d_elementwise_add.cpp
index 4f68372b49..3aedcc0f41 100644
--- a/examples/dynamic_fusion/cl_ref_conv2d_elementwise_add.cpp
+++ b/examples/dynamic_fusion/cl_ref_conv2d_elementwise_add.cpp
@@ -52,6 +52,9 @@ using namespace utils;
using std::chrono::duration_cast;
using std::chrono::microseconds;
+/** A reference for comparing against the fusion of a direct convolution with an elementwise addition:
+ * examples/dynamic_fusion/cl_fused_conv2d_elementwise_add.cpp
+ */
class ClRefConv2dEltwiseAddExample : public Example
{
public:
@@ -69,7 +72,7 @@ public:
if(argc < 10)
{
// Print help
- std::cout << "Usage: ./cl_conv2d_elementwise_add ih iw ifm wh ww ofm tuner_choice(0=Disable, 1=Rapid, 2=Normal, 3=Exhaustive)\n";
+ std::cout << "Usage: ./cl_ref_conv2d_elementwise_add ih iw ifm wh ww ofm tuner_choice(0=Disable, 1=Rapid, 2=Normal, 3=Exhaustive) pad_x pad_y\n";
std::cout << "Too few or no input_matrices provided. Using shape config = SRGAN_0, tuner_choice=2\n\n";
ih = 512;
iw = 512;
@@ -126,6 +129,7 @@ public:
CLScheduler::get().default_init(tuner_to_use);
TICK(startup_time);
+ TICK(configure);
/* Computation:
* out = add_desc(addend, conv2d1x1(direct_conv)(input, weights, bias))
@@ -133,54 +137,64 @@ public:
const auto data_type = DataType::F32;
const auto data_layout = DataLayout::NHWC;
const PadStrideInfo conv_info{ 1, 1, pad_x, pad_y };
- // const auto t_input_shape = TensorShape(384, 12, 12);
- // const auto t_weight_shape = TensorShape(384, 1, 1, 64);
- // const auto t_dst_shape = TensorShape(64, 12, 12);
- const auto t_input_shape = TensorShape(ifm, iw, ih);
- const auto t_weight_shape = TensorShape(ifm, ww, wh, ofm);
- const auto t_dst_shape = misc::shape_calculator::compute_deep_convolution_shape(t_input_shape, data_layout, t_weight_shape, conv_info);
+ const auto t_input_shape = TensorShape(ifm, iw, ih);
+ const auto t_weight_shape = TensorShape(ifm, ww, wh, ofm);
+ const auto t_bias_shape = TensorShape(ofm);
+ const auto t_l1_addend_shape = TensorShape(ofm, iw);
+ const auto t_dst_shape = misc::shape_calculator::compute_deep_convolution_shape(t_input_shape, data_layout, t_weight_shape, conv_info);
std::cout << "input_shape: " << t_input_shape << std::endl;
std::cout << "weight_shape: " << t_weight_shape << std::endl;
+ std::cout << "bias_shape: " << t_bias_shape << std::endl;
+ std::cout << "addend_shape: " << t_l1_addend_shape << std::endl;
std::cout << "dst_shape: " << t_dst_shape << std::endl;
auto t_input_info = TensorInfo(t_input_shape, 1, data_type, data_layout);
auto t_weight_info = TensorInfo(t_weight_shape, 1, data_type, data_layout);
+ auto t_bias_info = TensorInfo(t_bias_shape, 1, data_type, data_layout);
auto t_l0_dst_info = TensorInfo(t_dst_shape, 1, data_type, data_layout); // Intermediate tensor for cond3
- auto t_l1_addend_info = TensorInfo(t_dst_shape, 1, data_type, data_layout);
+ auto t_l1_addend_info = TensorInfo(t_l1_addend_shape, 1, data_type, data_layout);
auto t_dst_info = TensorInfo(t_dst_shape, 1, data_type, data_layout);
// Init tensors
{
t_input.allocator()->init(t_input_info);
t_weight.allocator()->init(t_weight_info);
+ t_bias.allocator()->init(t_bias_info);
t_l1_addend.allocator()->init(t_dst_info);
t_l0_dst.allocator()->init(t_l0_dst_info);
t_dst.allocator()->init(t_dst_info);
}
- op0.configure(&t_input, &t_weight, nullptr, &t_l0_dst, conv_info);
+ op0.configure(&t_input, &t_weight, &t_bias, &t_l0_dst, conv_info);
op1.configure(&t_l0_dst, &t_l1_addend, &t_dst, ConvertPolicy{});
+ TOCK(configure, measurements);
+ TICK(tensor_allocation);
// Construct tensors
// Allocate and fill tensors
{
t_input.allocator()->allocate();
t_weight.allocator()->allocate();
+ t_bias.allocator()->allocate();
t_l1_addend.allocator()->allocate();
t_l0_dst.allocator()->allocate();
t_dst.allocator()->allocate();
fill_random_tensor(t_input, -1.f, 1.f);
fill_random_tensor(t_weight, -1.f, 1.f);
+ fill_random_tensor(t_bias, -1.f, 1.f);
fill_random_tensor(t_l1_addend, -1.f, 1.f);
}
+ TOCK(tensor_allocation, measurements);
// Dummy run for CLTuner
+ TICK(dummy_run);
op0.run();
- op1.run();
+ CLScheduler::get().sync();
+ TOCK(dummy_run, measurements);
TOCK(startup_time, measurements);
return true;
}
void do_run() override
{
- // Run the fused op
+ // Run the ops
op0.run();
op1.run();
@@ -199,6 +213,7 @@ public:
private:
CLTensor t_input{};
CLTensor t_weight{};
+ CLTensor t_bias{};
CLTensor t_l1_addend{};
CLTensor t_l0_dst{};
CLTensor t_dst{};