From 3eb263e95898c933cde5450a919a161ac6b2359b Mon Sep 17 00:00:00 2001 From: SiCong Li Date: Mon, 19 Jun 2017 15:31:43 +0100 Subject: COMPMID-424 Add validation tests for Gaussian5x5 * Fix apply_2d_spatial_filter to use double as intermediate type * Fix tensor_elem_at to use random value if on border and border_mode is UNDEFINED Change-Id: I7feea23c4664cc63c5bab936566dc92b98c723b9 Reviewed-on: http://mpd-gerrit.cambridge.arm.com/78905 Tested-by: Kaizen Reviewed-by: Moritz Pflanzer --- src/core/NEON/kernels/NEGaussian5x5Kernel.cpp | 4 +- src/runtime/NEON/functions/NEGaussian5x5.cpp | 2 +- tests/validation/CL/CMakeLists.txt | 1 + tests/validation/CL/Gaussian5x5.cpp | 165 +++++++++++++++++++++++++ tests/validation/NEON/CMakeLists.txt | 3 +- tests/validation/NEON/Gaussian5x5.cpp | 168 ++++++++++++++++++++++++++ tests/validation/Reference.cpp | 15 +++ tests/validation/Reference.h | 9 ++ tests/validation/ReferenceCPP.cpp | 9 ++ tests/validation/ReferenceCPP.h | 8 ++ tests/validation/TensorOperations.h | 19 +++ 11 files changed, 399 insertions(+), 4 deletions(-) create mode 100644 tests/validation/CL/Gaussian5x5.cpp create mode 100644 tests/validation/NEON/Gaussian5x5.cpp diff --git a/src/core/NEON/kernels/NEGaussian5x5Kernel.cpp b/src/core/NEON/kernels/NEGaussian5x5Kernel.cpp index f872cc2f0a..4e1880d968 100644 --- a/src/core/NEON/kernels/NEGaussian5x5Kernel.cpp +++ b/src/core/NEON/kernels/NEGaussian5x5Kernel.cpp @@ -116,8 +116,8 @@ BorderSize NEGaussian5x5VertKernel::border_size() const void NEGaussian5x5VertKernel::configure(const ITensor *input, ITensor *output, bool border_undefined) { - ARM_COMPUTE_ERROR_ON_FORMAT_NOT_IN(input, Format::S16); - ARM_COMPUTE_ERROR_ON_FORMAT_NOT_IN(output, Format::U8); + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::S16); + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8); _input = input; _output = output; diff --git a/src/runtime/NEON/functions/NEGaussian5x5.cpp b/src/runtime/NEON/functions/NEGaussian5x5.cpp index 5ccc765966..69639d0d43 100644 --- a/src/runtime/NEON/functions/NEGaussian5x5.cpp +++ b/src/runtime/NEON/functions/NEGaussian5x5.cpp @@ -40,7 +40,7 @@ NEGaussian5x5::NEGaussian5x5() void NEGaussian5x5::configure(ITensor *input, ITensor *output, BorderMode border_mode, uint8_t constant_border_value) { // Init temporary buffer - TensorInfo tensor_info(input->info()->tensor_shape(), Format::S16); + TensorInfo tensor_info(input->info()->tensor_shape(), 1, DataType::S16); _tmp.allocator()->init(tensor_info); // Create and configure kernels for the two passes diff --git a/tests/validation/CL/CMakeLists.txt b/tests/validation/CL/CMakeLists.txt index 2f220ffdff..d46cca7ac4 100644 --- a/tests/validation/CL/CMakeLists.txt +++ b/tests/validation/CL/CMakeLists.txt @@ -33,6 +33,7 @@ set(arm_compute_test_validation_OPENCL_SOURCE_FILES ${CMAKE_CURRENT_SOURCE_DIR}/DepthConvert.cpp ${CMAKE_CURRENT_SOURCE_DIR}/FillBorder.cpp ${CMAKE_CURRENT_SOURCE_DIR}/Gaussian3x3.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/Gaussian5x5.cpp ` ${CMAKE_CURRENT_SOURCE_DIR}/IntegralImage.cpp ${CMAKE_CURRENT_SOURCE_DIR}/Sobel3x3.cpp ${CMAKE_CURRENT_SOURCE_DIR}/Sobel5x5.cpp diff --git a/tests/validation/CL/Gaussian5x5.cpp b/tests/validation/CL/Gaussian5x5.cpp new file mode 100644 index 0000000000..a13474bc58 --- /dev/null +++ b/tests/validation/CL/Gaussian5x5.cpp @@ -0,0 +1,165 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "CL/CLAccessor.h" +#include "CL/Helper.h" +#include "Globals.h" +#include "PaddingCalculator.h" +#include "TensorLibrary.h" +#include "TypePrinter.h" +#include "Utils.h" +#include "validation/Datasets.h" +#include "validation/Reference.h" +#include "validation/Validation.h" +#include "validation/ValidationUserConfiguration.h" + +#include "arm_compute/core/Helpers.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/CL/CLTensor.h" +#include "arm_compute/runtime/CL/CLTensorAllocator.h" +#include "arm_compute/runtime/CL/functions/CLGaussian5x5.h" + +#include "boost_wrapper.h" + +#include +#include + +using namespace arm_compute; +using namespace arm_compute::test; +using namespace arm_compute::test::cl; +using namespace arm_compute::test::validation; + +namespace +{ +/** Compute CL gaussian5x5 filter. + * + * @param[in] shape Shape of the input and output tensors. + * @param[in] border_mode BorderMode used by the input tensor. + * @param[in] constant_border_value Constant to use if @p border_mode == CONSTANT. + * + * @return Computed output tensor. + */ +CLTensor compute_gaussian5x5(const TensorShape &shape, BorderMode border_mode, uint8_t constant_border_value) +{ + // Create tensors + CLTensor src = create_tensor(shape, DataType::U8); + CLTensor dst = create_tensor(shape, DataType::U8); + + // Create and configure function + CLGaussian5x5 gaussian5x5; + gaussian5x5.configure(&src, &dst, border_mode, constant_border_value); + + // Allocate tensors + src.allocator()->allocate(); + dst.allocator()->allocate(); + + BOOST_TEST(!src.info()->is_resizable()); + BOOST_TEST(!dst.info()->is_resizable()); + + // Fill tensors + library->fill_tensor_uniform(CLAccessor(src), 0); + + // Compute function + gaussian5x5.run(); + + return dst; +} +} // namespace + +#ifndef DOXYGEN_SKIP_THIS +BOOST_AUTO_TEST_SUITE(CL) +BOOST_AUTO_TEST_SUITE(Gaussian5x5) + +BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit") * boost::unit_test::label("nightly")) +BOOST_DATA_TEST_CASE(Configuration, (SmallShapes() + LargeShapes()) * BorderModes(), shape, border_mode) +{ + // Create tensors + CLTensor src = create_tensor(shape, DataType::U8); + CLTensor dst = create_tensor(shape, DataType::U8); + + BOOST_TEST(src.info()->is_resizable()); + BOOST_TEST(dst.info()->is_resizable()); + + // Create and configure function + CLGaussian5x5 gaussian5x5; + gaussian5x5.configure(&src, &dst, border_mode); + + // Validate valid region + const ValidRegion src_valid_region = shape_to_valid_region(shape); + const ValidRegion dst_valid_region = shape_to_valid_region(shape, border_mode == BorderMode::UNDEFINED, BorderSize(2)); + validate(src.info()->valid_region(), src_valid_region); + validate(dst.info()->valid_region(), dst_valid_region); + + // Validate padding + PaddingCalculator calculator(shape.x(), 8); + calculator.set_border_size(2); + calculator.set_border_mode(border_mode); + + const PaddingSize dst_padding = calculator.required_padding(); + + calculator.set_accessed_elements(16); + calculator.set_access_offset(-2); + + const PaddingSize src_padding = calculator.required_padding(); + + validate(src.info()->padding(), src_padding); + validate(dst.info()->padding(), dst_padding); +} + +BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit")) +BOOST_DATA_TEST_CASE(RunSmall, SmallShapes() * BorderModes(), shape, border_mode) +{ + std::mt19937 gen(user_config.seed.get()); + std::uniform_int_distribution distribution(0, 255); + const uint8_t border_value = distribution(gen); + + // Compute function + CLTensor dst = compute_gaussian5x5(shape, border_mode, border_value); + + // Compute reference + RawTensor ref_dst = Reference::compute_reference_gaussian5x5(shape, border_mode, border_value); + + // Validate output + validate(CLAccessor(dst), ref_dst, shape_to_valid_region(shape, border_mode == BorderMode::UNDEFINED, BorderSize(2))); +} + +BOOST_TEST_DECORATOR(*boost::unit_test::label("nightly")) +BOOST_DATA_TEST_CASE(RunLarge, LargeShapes() * BorderModes(), shape, border_mode) +{ + std::mt19937 gen(user_config.seed.get()); + std::uniform_int_distribution distribution(0, 255); + const uint8_t border_value = distribution(gen); + + // Compute function + CLTensor dst = compute_gaussian5x5(shape, border_mode, border_value); + + // Compute reference + RawTensor ref_dst = Reference::compute_reference_gaussian5x5(shape, border_mode, border_value); + + // Validate output + validate(CLAccessor(dst), ref_dst, shape_to_valid_region(shape, border_mode == BorderMode::UNDEFINED, BorderSize(2))); +} + +BOOST_AUTO_TEST_SUITE_END() +BOOST_AUTO_TEST_SUITE_END() +#endif diff --git a/tests/validation/NEON/CMakeLists.txt b/tests/validation/NEON/CMakeLists.txt index 6c58a74151..824e0d8ce7 100644 --- a/tests/validation/NEON/CMakeLists.txt +++ b/tests/validation/NEON/CMakeLists.txt @@ -46,11 +46,12 @@ set(arm_compute_test_validation_NEON_SOURCE_FILES ${CMAKE_CURRENT_SOURCE_DIR}/Fixedpoint/Reciprocal_QS8.cpp ${CMAKE_CURRENT_SOURCE_DIR}/FullyConnectedLayer.cpp ${CMAKE_CURRENT_SOURCE_DIR}/Gaussian3x3.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/Gaussian5x5.cpp ${CMAKE_CURRENT_SOURCE_DIR}/GEMM.cpp ${CMAKE_CURRENT_SOURCE_DIR}/IntegralImage.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/MeanStdDev.cpp ${CMAKE_CURRENT_SOURCE_DIR}/NormalizationLayer.cpp ${CMAKE_CURRENT_SOURCE_DIR}/PixelWiseMultiplication.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/MeanStdDev.cpp ${CMAKE_CURRENT_SOURCE_DIR}/Pooling/PoolingLayer.cpp ${CMAKE_CURRENT_SOURCE_DIR}/Sobel3x3.cpp ${CMAKE_CURRENT_SOURCE_DIR}/Sobel5x5.cpp diff --git a/tests/validation/NEON/Gaussian5x5.cpp b/tests/validation/NEON/Gaussian5x5.cpp new file mode 100644 index 0000000000..a553078d6c --- /dev/null +++ b/tests/validation/NEON/Gaussian5x5.cpp @@ -0,0 +1,168 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "Globals.h" +#include "NEON/Helper.h" +#include "NEON/NEAccessor.h" +#include "PaddingCalculator.h" +#include "TensorLibrary.h" +#include "TypePrinter.h" +#include "Utils.h" +#include "validation/Datasets.h" +#include "validation/Reference.h" +#include "validation/Validation.h" +#include "validation/ValidationUserConfiguration.h" + +#include "arm_compute/core/Helpers.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/NEON/functions/NEGaussian5x5.h" +#include "arm_compute/runtime/SubTensor.h" +#include "arm_compute/runtime/Tensor.h" +#include "arm_compute/runtime/TensorAllocator.h" + +#include "boost_wrapper.h" + +#include + +#include +#include + +using namespace arm_compute; +using namespace arm_compute::test; +using namespace arm_compute::test::neon; +using namespace arm_compute::test::validation; + +namespace +{ +/** Compute Neon gaussian5x5 filter. + * + * @param[in] shape Shape of the input and output tensors. + * @param[in] border_mode BorderMode used by the input tensor. + * @param[in] constant_border_value Constant to use if @p border_mode == CONSTANT. + * + * @return Computed output tensor. + */ +Tensor compute_gaussian5x5(const TensorShape &shape, BorderMode border_mode, uint8_t constant_border_value) +{ + // Create tensors + Tensor src = create_tensor(shape, DataType::U8); + Tensor dst = create_tensor(shape, DataType::U8); + + // Create and configure function + NEGaussian5x5 gaussian5x5; + gaussian5x5.configure(&src, &dst, border_mode, constant_border_value); + + // Allocate tensors + src.allocator()->allocate(); + dst.allocator()->allocate(); + + BOOST_TEST(!src.info()->is_resizable()); + BOOST_TEST(!dst.info()->is_resizable()); + + // Fill tensors + library->fill_tensor_uniform(NEAccessor(src), 0); + + // Compute function + gaussian5x5.run(); + + return dst; +} +} // namespace + +#ifndef DOXYGEN_SKIP_THIS +BOOST_AUTO_TEST_SUITE(NEON) +BOOST_AUTO_TEST_SUITE(Gaussian5x5) + +BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit") * boost::unit_test::label("nightly")) +BOOST_DATA_TEST_CASE(Configuration, (SmallShapes() + LargeShapes()) * BorderModes(), shape, border_mode) +{ + // Create tensors + Tensor src = create_tensor(shape, DataType::U8); + Tensor dst = create_tensor(shape, DataType::U8); + + BOOST_TEST(src.info()->is_resizable()); + BOOST_TEST(dst.info()->is_resizable()); + + // Create and configure function + NEGaussian5x5 gaussian5x5; + gaussian5x5.configure(&src, &dst, border_mode); + + // Validate valid region + const ValidRegion src_valid_region = shape_to_valid_region(shape); + const ValidRegion dst_valid_region = shape_to_valid_region(shape, border_mode == BorderMode::UNDEFINED, BorderSize(2)); + validate(src.info()->valid_region(), src_valid_region); + validate(dst.info()->valid_region(), dst_valid_region); + + // Validate padding + PaddingCalculator calculator(shape.x(), 16); + calculator.set_border_size(2); + calculator.set_border_mode(border_mode); + + const PaddingSize dst_padding = calculator.required_padding(); + + calculator.set_processed_elements(8); + calculator.set_access_offset(-2); + + const PaddingSize src_padding = calculator.required_padding(); + + validate(src.info()->padding(), src_padding); + validate(dst.info()->padding(), dst_padding); +} + +BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit")) +BOOST_DATA_TEST_CASE(RunSmall, SmallShapes() * BorderModes(), shape, border_mode) +{ + std::mt19937 gen(user_config.seed.get()); + std::uniform_int_distribution distribution(0, 255); + const uint8_t border_value = distribution(gen); + + // Compute function + Tensor dst = compute_gaussian5x5(shape, border_mode, border_value); + + // Compute reference + RawTensor ref_dst = Reference::compute_reference_gaussian5x5(shape, border_mode, border_value); + + // Validate output + validate(NEAccessor(dst), ref_dst, shape_to_valid_region(shape, border_mode == BorderMode::UNDEFINED, BorderSize(2))); +} + +BOOST_TEST_DECORATOR(*boost::unit_test::label("nightly")) +BOOST_DATA_TEST_CASE(RunLarge, LargeShapes() * BorderModes(), shape, border_mode) +{ + std::mt19937 gen(user_config.seed.get()); + std::uniform_int_distribution distribution(0, 255); + const uint8_t border_value = distribution(gen); + + // Compute function + Tensor dst = compute_gaussian5x5(shape, border_mode, border_value); + + // Compute reference + RawTensor ref_dst = Reference::compute_reference_gaussian5x5(shape, border_mode, border_value); + + // Validate output + validate(NEAccessor(dst), ref_dst, shape_to_valid_region(shape, border_mode == BorderMode::UNDEFINED, BorderSize(2))); +} + +BOOST_AUTO_TEST_SUITE_END() +BOOST_AUTO_TEST_SUITE_END() +#endif diff --git a/tests/validation/Reference.cpp b/tests/validation/Reference.cpp index 98da34c27d..f6f3cb8e1f 100644 --- a/tests/validation/Reference.cpp +++ b/tests/validation/Reference.cpp @@ -310,6 +310,21 @@ RawTensor Reference::compute_reference_gaussian3x3(const TensorShape &shape, Bor return ref_dst; } +RawTensor Reference::compute_reference_gaussian5x5(const TensorShape &shape, BorderMode border_mode, uint8_t constant_border_value) +{ + // Create reference + RawTensor ref_src = library->get(shape, DataType::U8); + RawTensor ref_dst = library->get(shape, DataType::U8); + + // Fill reference + library->fill_tensor_uniform(ref_src, 0); + + // Compute reference + ReferenceCPP::gaussian5x5(ref_src, ref_dst, border_mode, constant_border_value); + + return ref_dst; +} + RawTensor Reference::compute_reference_gemm(const TensorShape &src_shape1, const TensorShape &src_shape2, const TensorShape &src_shape3, const TensorShape &dst_shape, float alpha, float beta, DataType dt, int fixed_point_position) { diff --git a/tests/validation/Reference.h b/tests/validation/Reference.h index 73227695e8..22aa939dab 100644 --- a/tests/validation/Reference.h +++ b/tests/validation/Reference.h @@ -182,6 +182,15 @@ public: * @return Computed raw tensor. */ static RawTensor compute_reference_gaussian3x3(const TensorShape &shape, BorderMode border_mode, uint8_t constant_border_value); + /** Compute reference gaussian5x5 filter. + * + * @param[in] shape Shape of the input and output tensors. + * @param[in] border_mode BorderMode used by the input tensor. + * @param[in] constant_border_value Constant to use if @p border_mode == CONSTANT. + * + * @return Computed raw tensor. + */ + static RawTensor compute_reference_gaussian5x5(const TensorShape &shape, BorderMode border_mode, uint8_t constant_border_value); /** Compute matrix multiply function. * * @param[in] src_shape1 First input tensor shape diff --git a/tests/validation/ReferenceCPP.cpp b/tests/validation/ReferenceCPP.cpp index 06606c7075..6264695253 100644 --- a/tests/validation/ReferenceCPP.cpp +++ b/tests/validation/ReferenceCPP.cpp @@ -204,6 +204,15 @@ void ReferenceCPP::gaussian3x3(const RawTensor &src, RawTensor &dst, BorderMode tensor_operations::gaussian3x3(s, d, border_mode, constant_border_value); } +// Gaussian5x5 filter +void ReferenceCPP::gaussian5x5(const RawTensor &src, RawTensor &dst, BorderMode border_mode, uint8_t constant_border_value) +{ + ARM_COMPUTE_ERROR_ON(src.data_type() != DataType::U8 || dst.data_type() != DataType::U8); + const Tensor s(src.shape(), src.data_type(), src.fixed_point_position(), reinterpret_cast(src.data())); + Tensor d(dst.shape(), dst.data_type(), dst.fixed_point_position(), reinterpret_cast(dst.data())); + tensor_operations::gaussian5x5(s, d, border_mode, constant_border_value); +} + // GEMM void ReferenceCPP::gemm(const RawTensor &src1, const RawTensor &src2, const RawTensor &src3, RawTensor &dst, float alpha, float beta) diff --git a/tests/validation/ReferenceCPP.h b/tests/validation/ReferenceCPP.h index b6fd90191f..e1b71c5493 100644 --- a/tests/validation/ReferenceCPP.h +++ b/tests/validation/ReferenceCPP.h @@ -169,6 +169,14 @@ public: * @param[in] constant_border_value Constant border value if @p border_mode is BorderMode::CONSTANT */ static void gaussian3x3(const RawTensor &src, RawTensor &dst, BorderMode border_mode, uint8_t constant_border_value); + /** Function to compute gaussian5x5 filtered result tensor. + * + * @param[in] src Input tensor. + * @param[out] dst Result tensor. + * @param[in] border_mode Border mode + * @param[in] constant_border_value Constant border value if @p border_mode is BorderMode::CONSTANT + */ + static void gaussian5x5(const RawTensor &src, RawTensor &dst, BorderMode border_mode, uint8_t constant_border_value); /** Compute GEMM function. * * @param[in] src1 First input tensor diff --git a/tests/validation/TensorOperations.h b/tests/validation/TensorOperations.h index 4905e05732..0430d59d33 100644 --- a/tests/validation/TensorOperations.h +++ b/tests/validation/TensorOperations.h @@ -619,6 +619,25 @@ void gaussian3x3(const Tensor &in, Tensor &out, BorderMode border_mode, T } } +// Gaussian5x5 filter +template ::value>::type> +void gaussian5x5(const Tensor &in, Tensor &out, BorderMode border_mode, T constant_border_value) +{ + const std::array filter{ { + 1, 4, 6, 4, 1, + 4, 16, 24, 16, 4, + 6, 24, 36, 24, 6, + 4, 16, 24, 16, 4, + 1, 4, 6, 4, 1 + } }; + const float scale = 1.f / 256.f; + for(int element_idx = 0; element_idx < in.num_elements(); ++element_idx) + { + const Coordinates id = index2coord(in.shape(), element_idx); + apply_2d_spatial_filter(id, in, out, TensorShape(5U, 5U), filter.data(), scale, border_mode, constant_border_value); + } +} + // Matrix multiplication for floating point type template ::value, int>::type * = nullptr> void gemm(const Tensor &in1, const Tensor &in2, const Tensor &in3, Tensor &out, float alpha, float beta) -- cgit v1.2.1