diff options
author | Georgios Pinitas <georgios.pinitas@arm.com> | 2019-03-11 14:03:23 +0000 |
---|---|---|
committer | Georgios Pinitas <georgios.pinitas@arm.com> | 2019-03-29 09:54:53 +0000 |
commit | 47d39dc615d1dee2482bc84699802165a9778ac8 (patch) | |
tree | 87f2fdb4f4957be7ff1c043be6328e4154cdf9e1 /src/core/NEON/kernels/convolution/common | |
parent | 2d2551ed3934f071eb6a65f5b776301454bc147a (diff) | |
download | ComputeLibrary-47d39dc615d1dee2482bc84699802165a9778ac8.tar.gz |
COMPMID-1975: Update depthwise convolution.
Change-Id: Iad58672be35710a7ec2e918653d6d529709387e8
Signed-off-by: Georgios Pinitas <georgios.pinitas@arm.com>
Reviewed-on: https://review.mlplatform.org/c/898
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Giuseppe Rossini <giuseppe.rossini@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Gian Marco Iodice <gianmarco.iodice@arm.com>
Diffstat (limited to 'src/core/NEON/kernels/convolution/common')
-rw-r--r-- | src/core/NEON/kernels/convolution/common/padding.cpp | 130 | ||||
-rw-r--r-- | src/core/NEON/kernels/convolution/common/qasymm8.cpp | 90 |
2 files changed, 220 insertions, 0 deletions
diff --git a/src/core/NEON/kernels/convolution/common/padding.cpp b/src/core/NEON/kernels/convolution/common/padding.cpp new file mode 100644 index 0000000000..b50067b4e0 --- /dev/null +++ b/src/core/NEON/kernels/convolution/common/padding.cpp @@ -0,0 +1,130 @@ +/* + * Copyright (c) 2019 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include <cstring> +#include <cstdint> + +#include "arm_compute/core/NEON/kernels/convolution/common/arm.hpp" +#include "arm_compute/core/NEON/kernels/convolution/common/padding.hpp" + +namespace padding +{ + +template <typename T> +void copy_and_pad_tile( + const unsigned int tile_rows, + const unsigned int tile_cols, + const unsigned int n_channels, + const T* const inptr, + const unsigned int in_row_stride, + const unsigned int in_col_stride, + T* const outptr, + const unsigned int out_row_stride, + const unsigned int out_col_stride, + const unsigned int pad_top, + const unsigned int pad_left, + const unsigned int pad_bottom, + const unsigned int pad_right, + const T pad_value +) +{ + for (unsigned int out_i = 0; out_i < tile_rows; out_i++) + { + for (unsigned int out_j = 0; out_j < tile_cols; out_j++) + { + T* const output = outptr + out_i*out_row_stride + out_j*out_col_stride; + + if (out_i < pad_top || tile_rows - pad_bottom <= out_i || + out_j < pad_left || tile_cols - pad_right <= out_j) + { + for (unsigned int n = 0; n < n_channels; n++) + { + output[n] = pad_value; + } + } + else + { + const auto in_i = out_i - pad_top, in_j = out_j - pad_left; + const T* const input = inptr + in_i*in_row_stride + in_j*in_col_stride; + std::memcpy(output, input, n_channels * sizeof(T)); + } + } + } +} + +template void copy_and_pad_tile( + unsigned int, unsigned int, unsigned int, + const uint8_t *, unsigned int, unsigned int, + uint8_t *, unsigned int, unsigned int, + unsigned int, unsigned int, unsigned int, unsigned int, uint8_t +); + +template void copy_and_pad_tile( + unsigned int, unsigned int, unsigned int, + const float *, unsigned int, unsigned int, + float *, unsigned int, unsigned int, + unsigned int, unsigned int, unsigned int, unsigned int, float +); + +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +template void copy_and_pad_tile( + unsigned int, unsigned int, unsigned int, + const float16_t *, unsigned int, unsigned int, + float16_t *, unsigned int, unsigned int, + unsigned int, unsigned int, unsigned int, unsigned int, float16_t +); +#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC + +template <unsigned int TileRows, unsigned int TileCols> +void CopyCropped<TileRows, TileCols>::execute( + const size_t size, + const void * const inptr, + const size_t in_row_stride, + const size_t in_col_stride, + void * const outptr, + const size_t out_row_stride, + const size_t out_col_stride, + const unsigned int pad_top, + const unsigned int pad_left, + const unsigned int pad_bottom, + const unsigned int pad_right +) +{ + for (unsigned int out_i = 0, in_i = pad_top; in_i < TileRows - pad_bottom; out_i++, in_i++) + { + for (unsigned int out_j = 0, in_j = pad_left; in_j < TileCols - pad_right; out_j++, in_j++) + { + std::memcpy( + static_cast<uint8_t *>(outptr) + out_i*out_row_stride + out_j*out_col_stride, + static_cast<const uint8_t *>(inptr) + in_i*in_row_stride + in_j*in_col_stride, + size + ); + } + } +} + +template class CopyCropped<2, 2>; +template class CopyCropped<3, 3>; +template class CopyCropped<4, 4>; + +} // namespace padding diff --git a/src/core/NEON/kernels/convolution/common/qasymm8.cpp b/src/core/NEON/kernels/convolution/common/qasymm8.cpp new file mode 100644 index 0000000000..1de9ebf28a --- /dev/null +++ b/src/core/NEON/kernels/convolution/common/qasymm8.cpp @@ -0,0 +1,90 @@ +/* + * Copyright (c) 2019 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <algorithm> +#include <cassert> +#include <cstdint> +#include <cmath> +#include <limits> + +#include "arm_compute/core/NEON/kernels/convolution/common/qasymm8.hpp" + +namespace qasymm8 +{ +#if(__ANDROID__ || BARE_METAL) +template <typename T> T round(T val) { return ::round(val); } +template <typename T> T exp2(T val) { return ::exp2(val); } +template <typename T> T log2(T val) { return ::log2(val); } +#else /* (__ANDROID__ || BARE_METAL) */ +template <typename T> T round(T val) { return std::round(val); } +template <typename T> T exp2(T val) { return std::exp2(val); } +template <typename T> T log2(T val) { return std::log2(val); } +#endif /* (__ANDROID__ || BARE_METAL) */ + +uint8_t QAsymm8Params::quantize(const float value) const +{ + const float transformed = value / scale + offset; + return static_cast<uint8_t>(round(std::max(0.0f, std::min(255.0f, transformed)))); +} + +float QAsymm8Params::dequantize(const uint8_t value) const +{ + return scale * (static_cast<float>(value) - offset); +} + +QAsymm8RescaleParams QAsymm8RescaleParams::make_rescale_params( + const QAsymm8Params& weight_quant, + const QAsymm8Params& input_quant, + const QAsymm8Params& output_quant +) +{ + // Based on the gemmlowp approach: https://github.com/google/gemmlowp/blob/master/doc/quantization_example.cc + const float rescale = weight_quant.scale * input_quant.scale / output_quant.scale; + const float shiftf = round(log2(0.5f / rescale)); + const float multf = exp2(31.0f + shiftf)*rescale; + + int64_t shift = static_cast<int64_t>(shiftf); + int64_t mult = static_cast<int64_t>(multf); + + if (mult == (1ll << 31)) + { + mult /= 2; + shift--; + } + + assert(shift >= 0); + assert(mult <= std::numeric_limits<int32_t>::max()); + + return QAsymm8RescaleParams( + static_cast<int32_t>(shift), + static_cast<int32_t>(mult), + rescale + ); +} + +QAsymm8RescaleParams::QAsymm8RescaleParams(int32_t shift, int32_t multi, float rescale) + : shift(shift), multiplier(multi), rescale(rescale) +{ +} +} |