aboutsummaryrefslogtreecommitdiff
path: root/src/core/GLES_COMPUTE/kernels/GCDirectConvolutionLayerKernel.cpp
diff options
context:
space:
mode:
authorMatthew Bentham <matthew.bentham@arm.com>2020-03-05 23:37:48 +0000
committerMichele Di Giorgio <michele.digiorgio@arm.com>2020-03-10 18:41:04 +0000
commit758b5ba3e6d22509d4deab3d8b0b9c2f03418130 (patch)
treec16734e12b105819a919e8015a4d281dffd8817c /src/core/GLES_COMPUTE/kernels/GCDirectConvolutionLayerKernel.cpp
parent6b3865ad038d60a126fe1f90df815a480527a29f (diff)
downloadComputeLibrary-758b5ba3e6d22509d4deab3d8b0b9c2f03418130.tar.gz
COMPMID-3069: Improve build time by splitting up ToolchainSupport.h
Split out the parts of ToolchainSupport coming from <memory> and the parts coming from <string> into their own new header files. This accounts for 99% of uses of ToolchainSupport, which means that expensive header files such as arm_neon.h don't need to be included everywhere. Knocks about 10% of compilation time off kernel files. Signed-off-by: Matthew Bentham <matthew.bentham@arm.com> Change-Id: I2ae718fe766b5ff28608812b0f686f30eeac1b21 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/2852 Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Michele Di Giorgio <michele.digiorgio@arm.com>
Diffstat (limited to 'src/core/GLES_COMPUTE/kernels/GCDirectConvolutionLayerKernel.cpp')
-rw-r--r--src/core/GLES_COMPUTE/kernels/GCDirectConvolutionLayerKernel.cpp22
1 files changed, 11 insertions, 11 deletions
diff --git a/src/core/GLES_COMPUTE/kernels/GCDirectConvolutionLayerKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCDirectConvolutionLayerKernel.cpp
index ecff233382..f3e47d9ae9 100644
--- a/src/core/GLES_COMPUTE/kernels/GCDirectConvolutionLayerKernel.cpp
+++ b/src/core/GLES_COMPUTE/kernels/GCDirectConvolutionLayerKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2018 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -33,7 +33,7 @@
#include "arm_compute/core/ITensor.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/core/Validate.h"
-#include "support/ToolchainSupport.h"
+#include "support/StringSupport.h"
using namespace arm_compute;
@@ -44,7 +44,7 @@ GCDirectConvolutionLayerKernel<kernel_size>::GCDirectConvolutionLayerKernel()
}
template <unsigned int kernel_size>
-BorderSize GCDirectConvolutionLayerKernel<kernel_size>::border_size() const
+BorderSize GCDirectConvolutionLayerKernel<kernel_size>::border_size() const
{
return _border_size;
}
@@ -70,8 +70,8 @@ void GCDirectConvolutionLayerKernel<kernel_size>::configure(const IGCTensor *inp
}
// Get convolved dimensions
- unsigned int owidth = 0;
- unsigned int oheight = 0;
+ unsigned int owidth = 0;
+ unsigned int oheight = 0;
std::tie(owidth, oheight) = scaled_dimensions(input->info()->dimension(0), input->info()->dimension(1), kernel_size, kernel_size, conv_info);
TensorShape output_shape = input->info()->tensor_shape();
@@ -238,20 +238,20 @@ void GCDirectConvolutionLayerKernel<kernel_size>::configure(const IGCTensor *inp
num_elems_written_per_iteration_x = 4;
#elif defined(PROCESS_4X_2Y_1Z)
options.emplace("#define PROCESS_4X_2Y_1Z");
- num_elems_read_per_iteration_x = 4;
- num_elems_read_per_iteration_y = 2;
+ num_elems_read_per_iteration_x = 4;
+ num_elems_read_per_iteration_y = 2;
num_elems_written_per_iteration_x = 4;
num_elems_written_per_iteration_y = 2;
#elif defined(PROCESS_4X_3Y_1Z)
options.emplace("#define PROCESS_4X_3Y_1Z");
- num_elems_read_per_iteration_x = 4;
- num_elems_read_per_iteration_y = 3;
+ num_elems_read_per_iteration_x = 4;
+ num_elems_read_per_iteration_y = 3;
num_elems_written_per_iteration_x = 4;
num_elems_written_per_iteration_y = 3;
#elif defined(PROCESS_4X_4Y_1Z)
options.emplace("#define PROCESS_4X_4Y_1Z");
- num_elems_read_per_iteration_x = 4;
- num_elems_read_per_iteration_y = 4;
+ num_elems_read_per_iteration_x = 4;
+ num_elems_read_per_iteration_y = 4;
num_elems_written_per_iteration_x = 4;
num_elems_written_per_iteration_y = 4;
#elif defined(PROCESS_4X_2Y_2Z)