From b3b1e0b3e349ae607297bbca3f273d3d0dd19679 Mon Sep 17 00:00:00 2001 From: Georgios Pinitas Date: Fri, 27 Apr 2018 18:49:44 +0100 Subject: COMPMID-1010: Remove RSH profiler header Change-Id: I2967ec94c3bead0b92ff1d1581ff6afea21c7f04 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/129405 Tested-by: Jenkins Reviewed-by: Michalis Spyrou --- .../kernels/convolution/winograd/winograd_gemm.cpp | 96 ++++++---------------- 1 file changed, 25 insertions(+), 71 deletions(-) (limited to 'src/core/NEON/kernels/convolution') diff --git a/src/core/NEON/kernels/convolution/winograd/winograd_gemm.cpp b/src/core/NEON/kernels/convolution/winograd/winograd_gemm.cpp index 8f8cd250bf..a0ecaea4d4 100644 --- a/src/core/NEON/kernels/convolution/winograd/winograd_gemm.cpp +++ b/src/core/NEON/kernels/convolution/winograd/winograd_gemm.cpp @@ -24,6 +24,8 @@ #include "arm_compute/core/NEON/kernels/convolution/winograd/winograd_gemm.hpp" #include "arm_compute/core/NEON/kernels/convolution/winograd/batched_blocked_gemm.hpp" +#include + using namespace winograd; /** Get the output shape of a convolution. */ @@ -243,8 +245,7 @@ WinogradGEMM::Conv tile_cols(iceildiv(output_shape.n_cols, output_tile_cols)), M(input_shape.n_batches * tile_rows * tile_cols), K(kernel_shape.n_input_channels), - N(kernel_shape.n_output_channels), - prof() + N(kernel_shape.n_output_channels) { // Create pointers to the kernel matrices const int kernel_matrix_size_bytes = get_kernel_matrix_size(kernel_shape); @@ -317,20 +318,12 @@ Convolution::transform_weights( kernel_hwio = reinterpret_cast(transform_working_space); // Re-order the weights from OIHW to HWIO - this->prof( - "Weight reorder", - [&kernel, &kernel_hwio, this] () { - reorder::ofm_ifm_h_w_to_h_w_ifm_ofm( - kernel, const_cast(kernel_hwio), - kernel_shape.n_output_channels, - kernel_shape.n_input_channels, - kernel_shape.n_rows, - kernel_shape.n_cols - ); - }, - kernel_shape.size() * sizeof(TIn), - 0, - kernel_shape.size() * sizeof(TIn) + reorder::ofm_ifm_h_w_to_h_w_ifm_ofm( + kernel, const_cast(kernel_hwio), + kernel_shape.n_output_channels, + kernel_shape.n_input_channels, + kernel_shape.n_rows, + kernel_shape.n_cols ); } @@ -344,17 +337,7 @@ Convolution::transform_weights( ); // Transform the weights into the Winograd domain - auto kernel_prep = [&] () - { - weights_transform.run(0, weights_transform.get_window()); - }; - - prof( - "Kernel Prep", kernel_prep, - WeightsTransformT::bytes_read(kernel_shape), - WeightsTransformT::ops_performed(kernel_shape), - WeightsTransformT::bytes_written(kernel_shape) - ); + weights_transform.run(0, weights_transform.get_window()); // Free memory if we allocated it if (allocated_working_space) @@ -419,18 +402,12 @@ Convolution::execute( ws_bytes + N_GEMMS*(in_matrix_stride_bytes + out_matrix_stride_bytes) ); - this->prof( - "NCHW -> NHWC", - [input, input_shape, input_nhwc] () { - reorder::nchw_to_nhwc( - input, const_cast(input_nhwc), - input_shape.n_batches, - input_shape.n_channels, - input_shape.n_rows, - input_shape.n_cols - ); - }, - input_shape.size(), 0, input_shape.size() + reorder::nchw_to_nhwc( + input, const_cast(input_nhwc), + input_shape.n_batches, + input_shape.n_channels, + input_shape.n_rows, + input_shape.n_cols ); } @@ -456,15 +433,7 @@ Convolution::execute( ); // Transform the input into the Winograd domain - auto input_prep = [&] () { - input_transform.run(0, input_transform.get_window()); - }; - prof( - "Input Prep", input_prep, - InputTransform::bytes_read(input_shape), - InputTransform::ops_performed(input_shape), - InputTransform::bytes_written(input_shape) - ); + input_transform.run(0, input_transform.get_window()); // Perform the GEMMs const int kernel_matrix_stride_bytes = get_kernel_matrix_size(kernel_shape); @@ -482,8 +451,7 @@ Convolution::execute( ); for (unsigned int i = 0; i < gemms.get_window(); i++) { - auto run_gemm = [&] () { gemms.run(i, i+1); }; - prof("GEMM", run_gemm, 0, 0, 0); + gemms.run(i, i+1); } // If the output tensor needs to be in NCHW form then store the NHWC output @@ -510,31 +478,17 @@ Convolution::execute( output_shape.n_cols, output_shape.n_channels ); - auto output_prep = [&] () { - output_transform.run(0, output_transform.get_window()); - }; - prof( - "Output Comp", output_prep, - OutputTransform::bytes_read(output_shape), - OutputTransform::ops_performed(output_shape), - OutputTransform::bytes_written(output_shape) - ); + output_transform.run(0, output_transform.get_window()); // Reorder the output tensor if it is required to be in NCHW form. if (input_shape.ordering == NCHW) { - prof( - "NHWC -> NCHW", - [output_nhwc, output_shape, output] () { - reorder::nhwc_to_nchw( - output_nhwc, output, - output_shape.n_batches, - output_shape.n_rows, - output_shape.n_cols, - output_shape.n_channels - ); - }, - output_shape.size(), 0, output_shape.size() + reorder::nhwc_to_nchw( + output_nhwc, output, + output_shape.n_batches, + output_shape.n_rows, + output_shape.n_cols, + output_shape.n_channels ); } -- cgit v1.2.1