From 307392a4962cc659f7104867a56816a011694a44 Mon Sep 17 00:00:00 2001
From: Tai Ly <tai.ly@arm.com>
Date: Fri, 12 May 2023 21:42:19 +0000
Subject: Add abs calculations under precise_mode

This adds a second run of reference model under precise_mode
when test_desc.json contains a "compliance" dictionary which
contains a "mode" entry with value "dot product".

In this second run, abs_mode will be set to true, which causes:

1. evaluation will take absolute values of inputs for these operators:
	conv2d, conv3d, depthwise_conv2d, fully_connected,
	matmul, transpose_conv2d, fft2d, rfft2d
	reduce_sum, avg_pool2d
2. output files will have prefix "bounds_" prepended to them

Signed-off-by: Tai Ly <tai.ly@arm.com>
Change-Id: I7070ecc7ead2d2ea3375c44663d653c6772b88e0
---
 reference_model/include/func_config.h |   1 +
 reference_model/src/main.cpp          | 147 +++++++++++++++++++++-------------
 reference_model/src/ops/reduction.cc  |  20 +++--
 reference_model/src/ops/tensor_ops.cc | 105 +++++++++++++++++++++---
 4 files changed, 202 insertions(+), 71 deletions(-)

diff --git a/reference_model/include/func_config.h b/reference_model/include/func_config.h
index b92845b..900ebab 100644
--- a/reference_model/include/func_config.h
+++ b/reference_model/include/func_config.h
@@ -49,6 +49,7 @@ struct func_config_t
     uint32_t dump_intermediates = 0;
     std::string fp_format       = "0.5";
     uint32_t precise_mode       = 0;
+    bool abs_mode               = 0;      // set in main as second run of precise_mode
     bool float_is_big_endian    = false;  // Set in arith_util.h by float_is_big_endian()
 
     tosa_level_t tosa_level;
diff --git a/reference_model/src/main.cpp b/reference_model/src/main.cpp
index aad07cb..55e6d67 100644
--- a/reference_model/src/main.cpp
+++ b/reference_model/src/main.cpp
@@ -33,10 +33,12 @@ using namespace tosa;
 using json = nlohmann::json;
 
 int initTestDesc(json& test_desc);
-int readInputTensors(SubgraphTraverser& gt, json test_desc);
-int writeFinalTensors(SubgraphTraverser& gt, json test_desc);
-int loadGraph(TosaSerializationHandler& tsh, json test_desc);
+int readInputTensors(SubgraphTraverser& gt, json& test_desc);
+int writeFinalTensors(SubgraphTraverser& gt, json& test_desc, const std::string& filename_prefix);
+int loadGraph(TosaSerializationHandler& tsh, json& test_desc);
 void parse_value(const std::string& text, tosa_level_t& value);
+const std::string getResultFilenamePrefix();
+bool isComplianceModeDotProduct(json& test_desc);
 
 int main(int argc, char** argv)
 {
@@ -84,44 +86,51 @@ int main(int argc, char** argv)
         FATAL_ERROR("Unable to load graph");
     }
 
-    SubgraphTraverser main_gt(tsh.GetMainRegion()->GetBlockByName("main"), &tsh, nullptr);
+    GraphStatus status = GraphStatus::TOSA_VALID;
 
-    if (main_gt.initializeGraph())
+    // max of 2 runs, second run only happens when precise_mode is set, to do an abs_mode run
+    for (int run = 0; run < 2; run++)
     {
-        WARNING("Unable to initialize main graph traverser.");
-        goto done;
-    }
+        SubgraphTraverser main_gt(tsh.GetMainRegion()->GetBlockByName("main"), &tsh, nullptr);
 
-    if (main_gt.linkTensorsAndNodes())
-    {
-        WARNING("Failed to link tensors and nodes");
-        goto done;
-    }
+        if (main_gt.initializeGraph())
+        {
+            WARNING("Unable to initialize main graph traverser.");
+            goto done;
+        }
 
-    if (main_gt.validateGraph())
-    {
-        WARNING("Failed to validate graph. Evaluation aborted.");
-        goto done;
-    }
+        if (main_gt.linkTensorsAndNodes())
+        {
+            WARNING("Failed to link tensors and nodes");
+            goto done;
+        }
 
-    if (main_gt.allocateTensor())
-    {
-        WARNING("Failed to allocate tensor. Evaluation aborted.");
-        goto done;
-    }
+        if (main_gt.validateGraph())
+        {
+            WARNING("Failed to validate graph. Evaluation aborted.");
+            goto done;
+        }
 
-    if (g_func_config.validate_only)
-    {
-        goto done;
-    }
+        if (main_gt.allocateTensor())
+        {
+            WARNING("Failed to allocate tensor. Evaluation aborted.");
+            goto done;
+        }
 
-    if (readInputTensors(main_gt, test_desc))
-    {
-        FATAL_ERROR("Unable to read input tensors");
-    }
+        if (g_func_config.validate_only)
+        {
+            goto done;
+        }
 
-    if (g_func_config.eval)
-    {
+        if (readInputTensors(main_gt, test_desc))
+        {
+            FATAL_ERROR("Unable to read input tensors");
+        }
+
+        if (!g_func_config.eval)
+        {
+            goto done;
+        }
 
         // evaluateAll() returns 1 if graph evaluation is forced to be terminated earlier.
         if (main_gt.evaluateAll())
@@ -165,36 +174,47 @@ int main(int argc, char** argv)
 
             if (g_func_config.output_tensors)
             {
-                if (writeFinalTensors(main_gt, test_desc))
+                if (writeFinalTensors(main_gt, test_desc, getResultFilenamePrefix()))
                 {
                     WARNING("Errors encountered in saving output tensors");
                 }
             }
         }
-    }
 
-done:
-    switch (main_gt.getGraphStatus())
-    {
-        case GraphStatus::TOSA_VALID:
-            // Result is valid.
-            break;
-        case GraphStatus::TOSA_UNPREDICTABLE:
-            fprintf(stderr, "Graph result: UNPREDICTABLE.\n");
-            break;
-        case GraphStatus::TOSA_ERROR:
-            fprintf(stderr, "Graph result: ERROR.\n");
-            break;
-        default:
-            fprintf(stderr, "Unknown graph status code=%d.\n", (int)main_gt.getGraphStatus());
+    done:
+        status = main_gt.getGraphStatus();
+        switch (status)
+        {
+            case GraphStatus::TOSA_VALID:
+                // Result is valid.
+                break;
+            case GraphStatus::TOSA_UNPREDICTABLE:
+                fprintf(stderr, "Graph result: UNPREDICTABLE.\n");
+                break;
+            case GraphStatus::TOSA_ERROR:
+                fprintf(stderr, "Graph result: ERROR.\n");
+                break;
+            default:
+                fprintf(stderr, "Unknown graph status code=%d.\n", (int)main_gt.getGraphStatus());
+        }
+
+        if (status == GraphStatus::TOSA_VALID && g_func_config.eval && g_func_config.precise_mode &&
+            isComplianceModeDotProduct(test_desc))
+        {
+            // first run result is valid, in precise mode and eval is true: turn on abs_mode for second run
+            g_func_config.abs_mode = true;
+            continue;
+        }
+
+        // otherwise, do only one run
+        break;
     }
 
     g_func_debug.fini_debug();
-
-    return (int)main_gt.getGraphStatus();
+    return (int)status;
 }
 
-int loadGraph(TosaSerializationHandler& tsh, json test_desc)
+int loadGraph(TosaSerializationHandler& tsh, json& test_desc)
 {
     char graph_fullname[1024];
     const std::string error_msg1 = "Check \"tosa_file\" in .json specified by --tosa_desc";
@@ -248,7 +268,7 @@ int loadGraph(TosaSerializationHandler& tsh, json test_desc)
     return 0;
 }
 
-int readInputTensors(SubgraphTraverser& gt, json test_desc)
+int readInputTensors(SubgraphTraverser& gt, json& test_desc)
 {
     int tensorCount = gt.getNumInputTensors();
     Tensor* tensor;
@@ -314,7 +334,24 @@ int readInputTensors(SubgraphTraverser& gt, json test_desc)
     return 0;
 }
 
-int writeFinalTensors(SubgraphTraverser& gt, json test_desc)
+const std::string getResultFilenamePrefix()
+{
+    return g_func_config.abs_mode ? "bounds_" : "";
+}
+
+// returns true iff test_desc contains a dictionay, "compliance",
+// which contains entry "mode" whose value is "dot product"
+bool isComplianceModeDotProduct(json& test_desc)
+{
+    if (test_desc.contains("compliance") && test_desc["compliance"].contains("mode") &&
+        test_desc["compliance"]["mode"] == "dot product")
+    {
+        return true;
+    }
+    return false;
+}
+
+int writeFinalTensors(SubgraphTraverser& gt, json& test_desc, const std::string& filename_prefix)
 {
     int tensorCount = gt.getNumOutputTensors();
     const Tensor* tensor;
@@ -338,7 +375,7 @@ int writeFinalTensors(SubgraphTraverser& gt, json test_desc)
                 return 1;
             }
 
-            snprintf(filename, sizeof(filename), "%s/%s", g_func_config.output_dir.c_str(),
+            snprintf(filename, sizeof(filename), "%s/%s%s", g_func_config.output_dir.c_str(), filename_prefix.c_str(),
                      test_desc["ofm_file"][i].get<std::string>().c_str());
 
             DEBUG_MED(GT, "Writing output tensor[%d] %s to filename: %s", i, tensor->getName().c_str(), filename);
diff --git a/reference_model/src/ops/reduction.cc b/reference_model/src/ops/reduction.cc
index fd48472..f07ffd7 100644
--- a/reference_model/src/ops/reduction.cc
+++ b/reference_model/src/ops/reduction.cc
@@ -192,7 +192,10 @@ int OpReduceSum<Rank, Dtype>::eval()
     {
         case TOSA_REF_TYPE_FP16:
         case TOSA_REF_TYPE_BF16:
-            this->out->getTensor() = this->in->getTensor().sum(this->dims).reshape(this->out->getTensor().dimensions()).unaryExpr([](float f){return fpTrunc<Dtype>(f);});
+            this->out->getTensor() = this->in->getTensor()
+                                         .sum(this->dims)
+                                         .reshape(this->out->getTensor().dimensions())
+                                         .unaryExpr([](float f) { return fpTrunc<Dtype>(f); });
             break;
         case TOSA_REF_TYPE_FP32:
         case TOSA_REF_TYPE_INT32:
@@ -225,7 +228,9 @@ struct SumRequiresReducer {
 template <int Rank, TOSA_REF_TYPE Dtype>
 int OpReduceSumInt<Rank, Dtype>::eval()
 {
-    this->out->getTensor() = this->in->getTensor().reduce(this->dims, SumRequiresReducer(this->parent_sgt)).reshape(this->out->getTensor().dimensions());
+    this->out->getTensor() = this->in->getTensor()
+                                 .reduce(this->dims, SumRequiresReducer(this->parent_sgt))
+                                 .reshape(this->out->getTensor().dimensions());
 
     return GraphNode::eval();
 }
@@ -250,12 +255,17 @@ struct SumDoubleReducer
 template <int Rank, TOSA_REF_TYPE Dtype>
 int OpReduceSumDouble<Rank, Dtype>::eval()
 {
+    typename ReduceNode<Rank, Dtype>::TIn in_val = this->in->getTensor();
+    if (g_func_config.abs_mode)
+    {
+        // in abs_mode: take abs values of in value
+        in_val = in_val.abs();
+    }
     switch (Dtype)
     {
         case TOSA_REF_TYPE_FP64:
-            this->out->getTensor() = this->in->getTensor()
-                                         .reduce(this->dims, SumDoubleReducer())
-                                         .reshape(this->out->getTensor().dimensions());
+            this->out->getTensor() =
+                in_val.reduce(this->dims, SumDoubleReducer()).reshape(this->out->getTensor().dimensions());
             break;
         default:
             ERROR_IF(true, "unsupported TOSA_REF_TYPE %s", EnumNameTOSAREFTYPE(Dtype));
diff --git a/reference_model/src/ops/tensor_ops.cc b/reference_model/src/ops/tensor_ops.cc
index a60819d..5ba7a83 100644
--- a/reference_model/src/ops/tensor_ops.cc
+++ b/reference_model/src/ops/tensor_ops.cc
@@ -572,6 +572,12 @@ int OpAvgPool2d<Dtype, AccDtype>::eval()
 
     ETensor4<InEigenType> input_padded = input_val.pad(pad);
 
+    if (g_func_config.abs_mode)
+    {
+        // in abs_mode: take abs values of input_padded
+        input_padded = input_padded.abs();
+    }
+
     // assuming input and output have same scales
     // so input and output scaling is not required
     // TODO: check if this assumption TOSA made
@@ -799,6 +805,16 @@ int OpConv2d<InDtype, WeightDtype, OutDtype>::eval()
 
     ETensor4<InEigenType> input_padded = input_val.pad(pad);
 
+    TBias bias_val = this->bias->getTensor();
+
+    if (g_func_config.abs_mode)
+    {
+        // in abs_mode: take abs values of conv operands
+        input_padded = input_padded.abs();
+        weight_val   = weight_val.abs();
+        bias_val     = bias_val.abs();
+    }
+
     // extract_image_patches() output [N, KH, KW, H * W, C]
     // need to transpose to [N, H * W, KH, KW, C]
     ETensor5<InEigenType> input_extract_patches =
@@ -815,7 +831,8 @@ int OpConv2d<InDtype, WeightDtype, OutDtype>::eval()
 
     // don't need to apply bias_multiplier ( * bias_scale and >> bias_shift) since tflite already scale it
     // and reshaped from [C] to [1, C], and broadcast to [N * H * W, C]
-    ETensor2<OutEigenType> bias_2d = (this->bias->getTensor().reshape(bias_reshaped_dims).broadcast(bias_bcast_dims)).template cast<OutEigenType>();
+    ETensor2<OutEigenType> bias_2d =
+        (bias_val.reshape(bias_reshaped_dims).broadcast(bias_bcast_dims)).template cast<OutEigenType>();
 
     // output matrix is [N * H * W, C]
     ETensor2<OutEigenType> contracted_result =
@@ -977,6 +994,16 @@ int OpConv3d<InDtype, WeightDtype, OutDtype>::eval()
 
     ETensor5<InEigenType> input_padded = input_val.pad(pad);
 
+    TBias bias_val = this->bias->getTensor();
+
+    if (g_func_config.abs_mode)
+    {
+        // in abs_mode: take abs values of conv operands
+        input_padded = input_padded.abs();
+        weight_val   = weight_val.abs();
+        bias_val     = bias_val.abs();
+    }
+
     // 1. initialize with bias
     Eigen::array<Eigen::Index, 5> reshape_dim;
     reshape_dim.fill(1);
@@ -988,7 +1015,7 @@ int OpConv3d<InDtype, WeightDtype, OutDtype>::eval()
     bcast[2]                  = out_height;
     bcast[3]                  = out_width;
     bcast[4]                  = 1;
-    this->output->getTensor() = this->bias->getTensor().reshape(reshape_dim).broadcast(bcast);
+    this->output->getTensor() = bias_val.reshape(reshape_dim).broadcast(bcast);
 
     // 2. direct convolution
     AccEigenType acc(0.0);
@@ -1167,6 +1194,16 @@ int OpDepthwiseConv2d<InDtype, WeightDtype, OutDtype>::eval()
 
     ETensor4<InEigenType> input_padded = input_val.pad(pad);
 
+    TBias bias_val = this->bias->getTensor();
+
+    if (g_func_config.abs_mode)
+    {
+        // in abs_mode: take abs values of conv operands
+        input_padded = input_padded.abs();
+        weight_val   = weight_val.abs();
+        bias_val     = bias_val.abs();
+    }
+
     // GEMM doesn't fit well with DepthwiseConv2d
     // 1. use extract_image_patches() to handle stride/dilation/pad
     // 2. perform direct convolution
@@ -1186,7 +1223,7 @@ int OpDepthwiseConv2d<InDtype, WeightDtype, OutDtype>::eval()
     bcast[3] = 1;
 
     // initialize with bias
-    this->output->getTensor() = this->bias->getTensor().reshape(reshape_dim).broadcast(bcast);
+    this->output->getTensor() = bias_val.reshape(reshape_dim).broadcast(bcast);
 
     // 2. direct depthwise convolution
     for (int ob = 0; ob < out_batch; ob++)
@@ -1307,9 +1344,20 @@ int OpFullyConnected<InDtype, WeightDtype, OutDtype>::eval()
         weight_val = weight_val - (WeightEigenType)attribute->weight_zp();
     }
 
-    this->output->getTensor() =
-        input_val.template cast<AccEigenType>().contract(weight_val.template cast<AccEigenType>(), dims).template cast<OutEigenType>() +
-            this->bias->getTensor().reshape(bias_reshape).broadcast(bias_bcast);
+    TBias bias_val = this->bias->getTensor();
+
+    if (g_func_config.abs_mode)
+    {
+        // in abs_mode: take abs values of conv operands
+        input_val  = input_val.abs();
+        weight_val = weight_val.abs();
+        bias_val   = bias_val.abs();
+    }
+
+    this->output->getTensor() = input_val.template cast<AccEigenType>()
+                                    .contract(weight_val.template cast<AccEigenType>(), dims)
+                                    .template cast<OutEigenType>() +
+                                bias_val.reshape(bias_reshape).broadcast(bias_bcast);
 
     if (OutDtype == TOSA_REF_TYPE_INT48)
     {
@@ -1416,6 +1464,13 @@ int OpMatMul<Dtype, OutDtype>::eval()
         b_val = b_val - (InEigenType)attribute->b_zp();
     }
 
+    if (g_func_config.abs_mode)
+    {
+        // in abs_mode: take abs values of matmul operands
+        a_val = a_val.abs();
+        b_val = b_val.abs();
+    }
+
     Eigen::array<Eigen::Index, 2> a_rank2_shape({ H, C });
     Eigen::array<Eigen::Index, 2> b_rank2_shape({ C, W });
     Eigen::array<Eigen::Index, 3> output_rank3_shape({ 1, H, W });
@@ -1692,6 +1747,16 @@ int OpFFT2d<Dtype>::eval()
         sign_val = -1.0;
     }
 
+    TIn in_real_val = this->in_real->getTensor();
+    TIn in_imag_val = this->in_imag->getTensor();
+
+    if (g_func_config.abs_mode)
+    {
+        // in abs_mode: take abs values of real and imag operands
+        in_real_val = in_real_val.abs();
+        in_imag_val = in_imag_val.abs();
+    }
+
     for (int n = 0; n < in_real_batch; n++)
     {
         for (int oy = 0; oy < out_real_height; oy++)
@@ -1704,8 +1769,8 @@ int OpFFT2d<Dtype>::eval()
                 {
                     for (int ix = 0; ix < in_real_width; ix++)
                     {
-                        OutEigenType val_real = this->in_real->getTensor()(n, iy, ix);
-                        OutEigenType val_imag = this->in_imag->getTensor()(n, iy, ix);
+                        OutEigenType val_real = in_real_val(n, iy, ix);
+                        OutEigenType val_imag = in_imag_val(n, iy, ix);
                         // Use explicit cast to ensure intermmediate calculations are completed using OutEigenType
                         a = sign_val * 2 * M_PI * ((iy * (OutEigenType)oy) / in_real_height + (ix * (OutEigenType)ox) / in_real_width);
                         sum_real += val_real * cos(a) + val_imag * sin(a);
@@ -1800,6 +1865,14 @@ int OpRFFT2d<Dtype>::eval()
 
     OutEigenType sum_real, sum_imag, a;
 
+    TIn in_val = this->in->getTensor();
+
+    if (g_func_config.abs_mode)
+    {
+        // in abs_mode: take abs values of in operand
+        in_val = in_val.abs();
+    }
+
     for (int n = 0; n < in_batch; n++)
     {
         for (int oy = 0; oy < out_real_height; oy++)
@@ -1814,8 +1887,8 @@ int OpRFFT2d<Dtype>::eval()
                     {
                         // Use explicit cast to ensure intermmediate calculations are completed using OutEigenType
                         a = 2 * M_PI * ((iy * (OutEigenType)oy) / in_height + (ix * (OutEigenType)ox) / in_width);
-                        sum_real += this->in->getTensor()(n, iy, ix) * cos(a);
-                        sum_imag += -this->in->getTensor()(n, iy, ix) * sin(a);
+                        sum_real += in_val(n, iy, ix) * cos(a);
+                        sum_imag += -in_val(n, iy, ix) * sin(a);
                     }
                 }
                 this->out_real->getTensor()(n, oy, ox) = sum_real;
@@ -2005,6 +2078,16 @@ int OpTransposeConv2d<InDtype, WeightDtype, OutDtype>::eval()
         weight_val = weight_val - (WeightEigenType)attribute->weight_zp();
     }
 
+    TBias bias_val = this->bias->getTensor();
+
+    if (g_func_config.abs_mode)
+    {
+        // in abs_mode: take abs values of conv operands
+        input_val  = input_val.abs();
+        weight_val = weight_val.abs();
+        bias_val   = bias_val.abs();
+    }
+
     Eigen::array<Eigen::Index, 4> reshape_dim;
     reshape_dim.fill(1);
     reshape_dim[3] = b_out_channels;
@@ -2016,7 +2099,7 @@ int OpTransposeConv2d<InDtype, WeightDtype, OutDtype>::eval()
     bcast[3] = 1;
 
     // initialize with bias
-    this->output->getTensor() = this->bias->getTensor().reshape(reshape_dim).broadcast(bcast);
+    this->output->getTensor() = bias_val.reshape(reshape_dim).broadcast(bcast);
 
     int out_x_origin, out_y_origin;
     int out_x, out_y;
-- 
cgit v1.2.1