aboutsummaryrefslogtreecommitdiff
path: root/examples
diff options
context:
space:
mode:
authorManuel Bottini <manuel.bottini@arm.com>2021-06-17 17:18:45 +0100
committerManuel Bottini <manuel.bottini@arm.com>2021-06-22 17:03:54 +0000
commitae58bdf3b58739e105a24e3640d0245e81cea5ee (patch)
treee993b8768c3eff364a7c706db411c799fa86bfe0 /examples
parent2db3a9955ef22be4be8ccd5a45bc0973ef80e42a (diff)
downloadComputeLibrary-ae58bdf3b58739e105a24e3640d0245e81cea5ee.tar.gz
Port NEGEMMLowp Part 1
Details: Port NEGEMMLowpQuantizeDownInt32ScaleKernel to CpuGemmLowpQuantizeDownInt32ScaleKernel Port NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel to CpuGemmLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel Port NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel to CpuGemmLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel Port NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel to CpuGemmLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel Port NEGEMMLowpOutputStage functions to CpuGemmLowpOutputStage operators Partially Resolves: COMPMID-4403 Change-Id: I6d5f45e43f35d731d564ed3b5c0e804d2a318fb1 Signed-off-by: Manuel Bottini <manuel.bottini@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/5833 Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Michele Di Giorgio <michele.digiorgio@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'examples')
-rw-r--r--examples/neon_gemm_qasymm8.cpp40
1 files changed, 26 insertions, 14 deletions
diff --git a/examples/neon_gemm_qasymm8.cpp b/examples/neon_gemm_qasymm8.cpp
index efe1655d45..159636db1d 100644
--- a/examples/neon_gemm_qasymm8.cpp
+++ b/examples/neon_gemm_qasymm8.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2020 Arm Limited.
+ * Copyright (c) 2020-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -26,8 +26,8 @@
#include "arm_compute/core/utils/quantization/AsymmHelpers.h"
#include "arm_compute/runtime/NEON/NEFunctions.h"
#include "arm_compute/runtime/NEON/NEScheduler.h"
-#include "utils/Utils.h"
#include "support/ToolchainSupport.h"
+#include "utils/Utils.h"
#include <cstdlib>
@@ -102,10 +102,10 @@ int main(int argc, char **argv)
Tensor q_dst0;
Tensor q_res;
Tensor q_res_output;
- size_t M = 4;
- size_t N = 4;
- size_t K = 4;
- bool default_input = true;
+ size_t M = 4;
+ size_t N = 4;
+ size_t K = 4;
+ bool default_input = true;
// Parse args
if(argc < 3) /* case default matrix sizes */
@@ -144,15 +144,18 @@ int main(int argc, char **argv)
// Fill in: one is the identity matrix, other is sequential values
// src1: Identity matrix
- for(size_t i = 0; i < M * K; i++) {
+ for(size_t i = 0; i < M * K; i++)
+ {
src1_ptr[i] = 0;
}
- for(size_t i = 0; i < M; i++) {
+ for(size_t i = 0; i < M; i++)
+ {
src1_ptr[i * K + i] = 1.0f;
}
// src2: Sequential values matrix
- for(size_t i = 0; i < K * N; i++) {
+ for(size_t i = 0; i < K * N; i++)
+ {
src2_ptr[i] = i * 1.123f;
}
@@ -217,13 +220,22 @@ int main(int argc, char **argv)
qgemm.configure(&q_src1, &q_src2, nullptr, &q_res);
// Configure output stage after computing shift and multiplier parameters
- NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint gemmlowp_output_stage;
- int output_multiplier;
- int output_shift;
- float multiplier = (src1_qinfo.uniform().scale * src2_qinfo.uniform().scale) / dst0_qinfo.uniform().scale;
+ NEGEMMLowpOutputStage gemmlowp_output_stage;
+ int output_multiplier;
+ int output_shift;
+ float multiplier = (src1_qinfo.uniform().scale * src2_qinfo.uniform().scale) / dst0_qinfo.uniform().scale;
quantization::calculate_quantized_multiplier_less_than_one(multiplier, &output_multiplier, &output_shift);
std::cout << "(q_multiplier, q_shift) = (" << output_multiplier << ", " << output_shift << ")\n\n";
- gemmlowp_output_stage.configure(&q_res, nullptr, &q_res_output, output_multiplier, output_shift, dst0_qinfo.uniform().offset);
+
+ GEMMLowpOutputStageInfo info;
+ info.type = GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT;
+ info.gemmlowp_multiplier = output_multiplier;
+ info.gemmlowp_shift = output_shift;
+ info.gemmlowp_offset = dst0_qinfo.uniform().offset;
+ info.output_data_type = DataType::QASYMM8;
+ q_res_output.info()->set_data_type(DataType::QASYMM8);
+ q_res_output.info()->set_num_channels(1);
+ gemmlowp_output_stage.configure(&q_res, nullptr, &q_res_output, info);
// Allocate all tensors
q_src1.allocator()->allocate();