aboutsummaryrefslogtreecommitdiff
path: root/src/core
diff options
context:
space:
mode:
authorGeorgios Pinitas <georgios.pinitas@arm.com>2018-10-22 16:17:20 +0100
committerAnthony Barbier <anthony.barbier@arm.com>2018-11-02 16:55:45 +0000
commit60e98253f1e3df1723e7b8f4c996b544aa7c7205 (patch)
tree45ca11d6fb0a16974fc8681bc7161a6ad2b1af2e /src/core
parentc04a0e8f93c620d05444251e1ae55dcf8c660a1b (diff)
downloadComputeLibrary-60e98253f1e3df1723e7b8f4c996b544aa7c7205.tar.gz
COMPMID-1451: Fuse activation in DepthwiseConvolution.
Change-Id: Id964d9068e18aaa13ab8adcbf7a9375b034ea6c3 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/154651 Tested-by: bsgcomp <bsgcomp@arm.com> Reviewed-by: Gian Marco Iodice <gianmarco.iodice@arm.com>
Diffstat (limited to 'src/core')
-rw-r--r--src/core/CL/cl_kernels/depthwise_convolution_quantized.cl18
-rw-r--r--src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.cpp7
-rw-r--r--src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.cpp7
3 files changed, 15 insertions, 17 deletions
diff --git a/src/core/CL/cl_kernels/depthwise_convolution_quantized.cl b/src/core/CL/cl_kernels/depthwise_convolution_quantized.cl
index 7cd48790c6..3239885abc 100644
--- a/src/core/CL/cl_kernels/depthwise_convolution_quantized.cl
+++ b/src/core/CL/cl_kernels/depthwise_convolution_quantized.cl
@@ -720,7 +720,7 @@ __kernel void depthwise_convolution_3x3_quantized_nhwc(
Image dst = CONVERT_TENSOR3D_TO_IMAGE_STRUCT(dst);
VSTORE(VEC_SIZE)
- (res, 0, dst.ptr);
+ (ACTIVATION_FUNC(res), 0, dst.ptr);
}
#endif // defined(CONV_STRIDE_X) && defined(CONV_STRIDE_Y)
@@ -953,18 +953,18 @@ __kernel void depthwise_convolution_3x3_quantized_nhwc_stride1(
__global uchar *dst_addr = dst_ptr + dst_offset_first_element_in_bytes + x * dst_step_x + y * dst_step_y + (z * NUM_PLANES_PROCESSED) * dst_step_z;
VSTORE(VEC_SIZE)
- (res0, 0, dst_addr + 0 * dst_stride_y);
+ (ACTIVATION_FUNC(res0), 0, dst_addr + 0 * dst_stride_y);
VSTORE(VEC_SIZE)
- (res1, 0, dst_addr + 1 * dst_stride_y);
+ (ACTIVATION_FUNC(res1), 0, dst_addr + 1 * dst_stride_y);
#if((DST_DIM_2 % NUM_PLANES_PROCESSED) != 0)
if((z * NUM_PLANES_PROCESSED + 1) < DST_DIM_2)
#endif // ((DST_DIM_2 % NUM_PLANES_PROCESSED) != 0)
{
VSTORE(VEC_SIZE)
- (res2, 0, dst_addr + 0 * dst_stride_y + 1 * dst_stride_z);
+ (ACTIVATION_FUNC(res2), 0, dst_addr + 0 * dst_stride_y + 1 * dst_stride_z);
VSTORE(VEC_SIZE)
- (res3, 0, dst_addr + 1 * dst_stride_y + 1 * dst_stride_z);
+ (ACTIVATION_FUNC(res3), 0, dst_addr + 1 * dst_stride_y + 1 * dst_stride_z);
}
}
@@ -1159,18 +1159,18 @@ __kernel void depthwise_convolution_3x3_quantized_dot8_nhwc_stride1(
__global uchar *dst_addr = dst_ptr + dst_offset_first_element_in_bytes + x * dst_step_x + y * dst_step_y + (z * NUM_PLANES_PROCESSED) * dst_step_z;
VSTORE(VEC_SIZE)
- (res0, 0, dst_addr + 0 * dst_stride_y);
+ (ACTIVATION_FUNC(res0), 0, dst_addr + 0 * dst_stride_y);
VSTORE(VEC_SIZE)
- (res1, 0, dst_addr + 1 * dst_stride_y);
+ (ACTIVATION_FUNC(res1), 0, dst_addr + 1 * dst_stride_y);
#if((DST_DIM_2 % NUM_PLANES_PROCESSED) != 0)
if((z * NUM_PLANES_PROCESSED + 1) < DST_DIM_2)
#endif // ((DST_DIM_2 % NUM_PLANES_PROCESSED) != 0)
{
VSTORE(VEC_SIZE)
- (res2, 0, dst_addr + 0 * dst_stride_y + 1 * dst_stride_z);
+ (ACTIVATION_FUNC(res2), 0, dst_addr + 0 * dst_stride_y + 1 * dst_stride_z);
VSTORE(VEC_SIZE)
- (res3, 0, dst_addr + 1 * dst_stride_y + 1 * dst_stride_z);
+ (ACTIVATION_FUNC(res3), 0, dst_addr + 1 * dst_stride_y + 1 * dst_stride_z);
}
}
#endif // defined(ARM_COMPUTE_OPENCL_DOT8_ENABLED) && defined(cl_arm_integer_dot_product_int8)
diff --git a/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.cpp b/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.cpp
index de7e2b8737..eb561faf77 100644
--- a/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.cpp
+++ b/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.cpp
@@ -207,8 +207,7 @@ BorderSize CLDepthwiseConvolutionLayer3x3NCHWKernel::border_size() const
}
void CLDepthwiseConvolutionLayer3x3NCHWKernel::configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info,
- unsigned int depth_multiplier,
- ActivationLayerInfo act_info)
+ unsigned int depth_multiplier, ActivationLayerInfo act_info)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output);
ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), weights->info(), (biases != nullptr) ? biases->info() : nullptr, output->info(), conv_info, depth_multiplier, act_info));
@@ -272,11 +271,11 @@ void CLDepthwiseConvolutionLayer3x3NCHWKernel::configure(const ICLTensor *input,
const float s2 = output->info()->quantization_info().scale;
const int o2 = output->info()->quantization_info().offset;
+ build_opts.add_option("-DS1_VAL=" + float_to_string_with_full_precision(s1));
+ build_opts.add_option("-DO1_VAL=" + support::cpp11::to_string(o1));
if(o1 != o2 || s1 != s2)
{
- build_opts.add_option("-DS1_VAL=" + float_to_string_with_full_precision(s1));
build_opts.add_option("-DS2_VAL=" + float_to_string_with_full_precision(s2));
- build_opts.add_option("-DO1_VAL=" + support::cpp11::to_string(o1));
build_opts.add_option("-DO2_VAL=" + support::cpp11::to_string(o2));
}
}
diff --git a/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.cpp b/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.cpp
index d56ac01a83..d3bed87037 100644
--- a/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.cpp
+++ b/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.cpp
@@ -139,8 +139,7 @@ BorderSize CLDepthwiseConvolutionLayer3x3NHWCKernel::border_size() const
}
void CLDepthwiseConvolutionLayer3x3NHWCKernel::configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info,
- unsigned int depth_multiplier,
- ActivationLayerInfo act_info)
+ unsigned int depth_multiplier, ActivationLayerInfo act_info)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output);
@@ -213,11 +212,11 @@ void CLDepthwiseConvolutionLayer3x3NHWCKernel::configure(const ICLTensor *input,
const float s2 = output->info()->quantization_info().scale;
const int o2 = output->info()->quantization_info().offset;
+ build_opts.add_option("-DS1_VAL=" + float_to_string_with_full_precision(s1));
+ build_opts.add_option("-DO1_VAL=" + support::cpp11::to_string(o1));
if(o1 != o2 || s1 != s2)
{
- build_opts.add_option("-DS1_VAL=" + float_to_string_with_full_precision(s1));
build_opts.add_option("-DS2_VAL=" + float_to_string_with_full_precision(s2));
- build_opts.add_option("-DO1_VAL=" + support::cpp11::to_string(o1));
build_opts.add_option("-DO2_VAL=" + support::cpp11::to_string(o2));
}
}