/* * Copyright (c) 2017-2019 ARM Limited. * * SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to * deal in the Software without restriction, including without limitation the * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or * sell copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #include #include "utils.hpp" #include "winograd.hpp" using namespace winograd; using array2 = std::pair; #define MEMBERFN(RTYPE) \ template \ template \ RTYPE WinogradGEMM::Convolution /** Get the output shape of a convolution. */ MEMBERFN(array2) ::get_output_shape(const std::pair input_shape, const bool padding_same) { const unsigned int n_rows = padding_same ? input_shape.first : input_shape.first - (kernel_rows - 1); const unsigned int n_cols = padding_same ? input_shape.second : input_shape.second - (kernel_cols - 1); return {n_rows, n_cols}; } /** Get the memory required to store the kernel transformed into the * Winograd domain. */ MEMBERFN(size_t) ::get_kernel_storage_size(const unsigned int n_input_channels, const unsigned int n_output_channels) { return N_GEMMS * get_kernel_matrix_size(n_input_channels, n_output_channels); } MEMBERFN(size_t) ::get_input_storage_size(const unsigned int n_batches, const unsigned int n_rows, const unsigned int n_cols, const unsigned int n_channels, const bool same_padding) { return N_GEMMS * get_input_matrix_size(n_batches, n_rows, n_cols, n_channels, same_padding); } MEMBERFN(size_t) ::get_output_storage_size(const unsigned int n_batches, const unsigned int n_rows, const unsigned int n_cols, const unsigned int n_channels) { return N_GEMMS * get_output_matrix_size(n_batches, n_rows, n_cols, n_channels); } /** Get the memory required to apply a Winograd operator to some input. */ MEMBERFN(size_t) ::get_working_space_size(const unsigned int n_batches, const unsigned int n_rows, const unsigned int n_cols, const unsigned int n_input_channels, const unsigned int n_output_channels, const bool padding_same) { const auto output_shape = get_output_shape({n_rows, n_cols}, padding_same); // Get the memory required to store the matrices const size_t matrix_sizes = N_GEMMS * (get_input_matrix_size(n_batches, n_rows, n_cols, n_input_channels, padding_same) + get_output_matrix_size(n_batches, output_shape.first, output_shape.second, n_output_channels)); return matrix_sizes; } /* Get the memory required by a single "input" matrix. */ MEMBERFN(size_t) ::get_input_matrix_size(const unsigned int n_batches, const unsigned int n_rows, const unsigned int n_cols, const unsigned int n_channels, const bool same_padding) { return get_input_matrix_stride(n_batches, n_rows, n_cols, n_channels, same_padding) * sizeof(TGEMMIn); } MEMBERFN(int) ::get_input_matrix_stride(const unsigned int n_batches, const unsigned int n_rows, const unsigned int n_cols, const unsigned int n_channels, const bool same_padding) { const auto output_shape = get_output_shape({n_rows, n_cols}, same_padding); const unsigned int tile_rows = iceildiv(output_shape.first, output_tile_rows); const unsigned int tile_cols = iceildiv(output_shape.second, output_tile_cols); const unsigned int M = roundup(n_batches * tile_rows * tile_cols, M_BLOCK); const unsigned int K = n_channels; return M * K; } /* Get the memory required by a single "output" matrix. */ MEMBERFN(size_t) ::get_output_matrix_size(const unsigned int n_batches, const unsigned int n_rows, const unsigned int n_cols, const unsigned int n_channels) { return get_output_matrix_stride(n_batches, n_rows, n_cols, n_channels) * sizeof(TGEMMOut); } MEMBERFN(int) ::get_output_matrix_stride(const unsigned int n_batches, const unsigned int n_rows, const unsigned int n_cols, const unsigned int n_channels) { // Compute shape for the GEMM const int tile_rows = iceildiv(n_rows, output_tile_rows); const int tile_cols = iceildiv(n_cols, output_tile_cols); const int M = roundup(tile_rows * tile_cols, M_BLOCK); const int N = roundup(n_channels, N_BLOCK); return n_batches * M * N; } /* Get the memory required by a single "kernel" matrix. */ MEMBERFN(size_t) ::get_kernel_matrix_size(const unsigned int n_input_channels, const unsigned int n_output_channels) { return sizeof(TGEMMIn) * get_kernel_matrix_stride(n_input_channels, n_output_channels); } MEMBERFN(int) ::get_kernel_matrix_stride(const unsigned int n_input_channels, const unsigned int n_output_channels) { return n_input_channels * roundup(n_output_channels, N_BLOCK); } // Instantiate required implementations template class WinogradGEMM<2, 2, 3, 3, WinogradRoots::Integers>::Convolution; template class WinogradGEMM<4, 4, 3, 3, WinogradRoots::Integers>::Convolution; template class WinogradGEMM<1, 6, 1, 3, WinogradRoots::Integers>::Convolution; template class WinogradGEMM<6, 1, 3, 1, WinogradRoots::Integers>::Convolution; template class WinogradGEMM<2, 2, 5, 5, WinogradRoots::Integers>::Convolution; template class WinogradGEMM<1, 4, 1, 5, WinogradRoots::Integers>::Convolution; template class WinogradGEMM<4, 1, 5, 1, WinogradRoots::Integers>::Convolution; template class WinogradGEMM<1, 2, 1, 7, WinogradRoots::Integers>::Convolution; template class WinogradGEMM<2, 1, 7, 1, WinogradRoots::Integers>::Convolution;