aboutsummaryrefslogtreecommitdiff
path: root/reference_model/src/arith_util.h
blob: 59bdf447bdff9fdec9e5ea3e78e28b0d602fcfe4 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287

// Copyright (c) 2020-2023, ARM Limited.
//
//    Licensed under the Apache License, Version 2.0 (the "License");
//    you may not use this file except in compliance with the License.
//    You may obtain a copy of the License at
//
//         http://www.apache.org/licenses/LICENSE-2.0
//
//    Unless required by applicable law or agreed to in writing, software
//    distributed under the License is distributed on an "AS IS" BASIS,
//    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
//    See the License for the specific language governing permissions and
//    limitations under the License.

/*
 *   Filename:     src/arith_util.h
 *   Description:
 *    arithmetic utility macro, include:
 *      fp16 (float16_t ) type alias
 *      bitwise operation
 *      fix point arithmetic
 *      fp16 type conversion(in binary translation)
 *      fp16 arithmetic (disguised with fp32 now)
 */

#ifndef ARITH_UTIL_H
#define ARITH_UTIL_H

#include <fenv.h>
#include <math.h>
#define __STDC_LIMIT_MACROS    //enable min/max of plain data type
#include "func_config.h"
#include "func_debug.h"
#include "half.hpp"
#include "inttypes.h"
#include "tosa_generated.h"
#include <Eigen/Core>
#include <bitset>
#include <cassert>
#include <iostream>
#include <limits>
#include <stdint.h>
#include <typeinfo>

using namespace tosa;
using namespace std;

inline size_t _count_one(uint64_t val)
{
    size_t count = 0;
    for (; val; count++)
    {
        val &= val - 1;
    }
    return count;
}

template <typename T>
inline size_t _integer_log2(T val)
{
    size_t result = 0;
    while (val >>= 1)
    {
        ++result;
    }
    return result;
}

template <typename T>
inline size_t _count_leading_zeros(T val)
{
    size_t size  = sizeof(T) * 8;
    size_t count = 0;
    T msb        = static_cast<T>(1) << (size - 1);
    for (size_t i = 0; i < size; i++)
    {
        if (!((val << i) & msb))
            count++;
        else
            break;
    }
    return count;
}

template <typename T>
inline size_t _count_leading_ones(T val)
{
    size_t size  = sizeof(T) * 8;
    size_t count = 0;
    T msb        = static_cast<T>(1) << (size - 1);
    for (size_t i = 0; i < size; i++)
    {
        if ((val << i) & msb)
            count++;
        else
            break;
    }
    return count;
}

#define MAX(a, b) ((a) > (b) ? (a) : (b))
#define MIN(a, b) ((a) < (b) ? (a) : (b))
// Compute ceiling of (a/b)
#define DIV_CEIL(a, b) ((a) % (b) ? ((a) / (b) + 1) : ((a) / (b)))

// Returns a mask of 1's of this size
#define ONES_MASK(SIZE) ((uint64_t)((SIZE) >= 64 ? UINT64_C(0xffffffffffffffff) : (UINT64_C(1) << (SIZE)) - 1))

// Returns a field of bits from HIGH_BIT to LOW_BIT, right-shifted
// include both side, equivalent VAL[LOW_BIT:HIGH_BIT] in verilog

#define BIT_FIELD(HIGH_BIT, LOW_BIT, VAL) (((uint64_t)(VAL) >> (LOW_BIT)) & ONES_MASK((HIGH_BIT) + 1 - (LOW_BIT)))

// Returns a bit at a particular position
#define BIT_EXTRACT(POS, VAL) (((uint64_t)(VAL) >> (POS)) & (1))

// Use Brian Kernigahan's way: https://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetKernighan
// Does this need to support floating point type?
// Not sure if static_cast is the right thing to do, try to be type safe first
#define ONES_COUNT(VAL) (_count_one((uint64_t)(VAL)))

#define SHIFT(SHF, VAL) (((SHF) > 0) ? ((VAL) << (SHF)) : ((SHF < 0) ? ((VAL) >> (-(SHF))) : (VAL)))
#define ROUNDTO(A, B) ((A) % (B) == 0 ? (A) : ((A) / (B) + 1) * (B))
#define ROUNDTOLOWER(A, B) (((A) / (B)) * (B))
#define BIDIRECTIONAL_SHIFT(VAL, SHIFT) (((SHIFT) >= 0) ? ((VAL) << (SHIFT)) : ((VAL) >> (-(SHIFT))))
#define ILOG2(VAL) (_integer_log2(VAL))

// Get negative value (2's complement)
#define NEGATIVE_8(VAL) ((uint8_t)(~(VAL) + 1))
#define NEGATIVE_16(VAL) ((uint16_t)(~(VAL) + 1))
#define NEGATIVE_32(VAL) ((uint32_t)(~(VAL) + 1))
#define NEGATIVE_64(VAL) ((uint64_t)(~(VAL) + 1))
// Convert a bit quanity to the minimum bytes required to hold those bits
#define BITS_TO_BYTES(BITS) (ROUNDTO((BITS), 8) / 8)

// Count leading zeros/ones for 8/16/32/64-bit operands
// (I don't see an obvious way to collapse this into a size-independent set)
// treated as unsigned
#define LEADING_ZEROS_64(VAL) (_count_leading_zeros((uint64_t)(VAL)))
#define LEADING_ZEROS_32(VAL) (_count_leading_zeros((uint32_t)(VAL)))
#define LEADING_ZEROS_16(VAL) (_count_leading_zeros((uint16_t)(VAL)))
#define LEADING_ZEROS_8(VAL) (_count_leading_zeros((uint8_t)(VAL)))
#define LEADING_ZEROS(VAL) (_count_leading_zeros(VAL))

#define LEADING_ONES_64(VAL) _count_leading_ones((uint64_t)(VAL))
#define LEADING_ONES_32(VAL) _count_leading_ones((uint32_t)(VAL))
#define LEADING_ONES_16(VAL) _count_leading_ones((uint16_t)(VAL))
#define LEADING_ONES_8(VAL) _count_leading_ones((uint8_t)(VAL))
#define LEADING_ONES(VAL) _count_leading_ones(VAL)
// math operation
// sign-extended for signed version
// extend different return type (8, 16, 32) + (S, U)
// Saturate a value at a certain bitwidth, signed and unsigned versions
// Format is as followed: SATURATE_VAL_{saturation_sign}_{return_type}
// for example
// SATURATE_VAL_U_8U(8,300) will return uint8_t with value of 255(0xff)
// SATURATE_VAL_S_32S(5,-48) will return int32_t with value of -16(0x10)
// note that negative value can cast to unsigned return type using native uint(int) cast
// so SATURATE_VAL_S_8U(5,-40) will have value 0'b1110000 which is in turn 224 in uint8_t

template <typename T>
constexpr T bitmask(const uint32_t width)
{
    ASSERT(width <= sizeof(T) * 8);
    return width == sizeof(T) * 8 ? static_cast<T>(std::numeric_limits<uintmax_t>::max())
                                  : (static_cast<T>(1) << width) - 1;
}

template <typename T>
constexpr T minval(const uint32_t width)
{
    ASSERT(width <= sizeof(T) * 8);
    return std::is_signed<T>::value ? -(static_cast<T>(1) << (width - 1)) : 0;
}

template <typename T>
constexpr T maxval(const uint32_t width)
{
    ASSERT(width <= sizeof(T) * 8);
    return bitmask<T>(width - std::is_signed<T>::value);
}

template <typename T>
constexpr T saturate(const uint32_t width, const intmax_t value)
{
    // clang-format off
    return  static_cast<T>(
        std::min(
            std::max(
                value,
                static_cast<intmax_t>(minval<T>(width))
            ),
            static_cast<intmax_t>(maxval<T>(width))
        )
    );
    // clang-format on
}

inline void float_trunc_bytes(float* src)
{
    /* Set the least significant two bytes to zero for the input float value.*/
    char src_as_bytes[sizeof(float)];
    memcpy(src_as_bytes, src, sizeof(float));

    if (g_func_config.float_is_big_endian)
    {
        src_as_bytes[2] = '\000';
        src_as_bytes[3] = '\000';
    }
    else
    {
        src_as_bytes[0] = '\000';
        src_as_bytes[1] = '\000';
    }

    memcpy(src, &src_as_bytes, sizeof(float));
}

inline void truncateFloatToBFloat(float* src, int64_t size) {
    /* Set the least significant two bytes to zero for each float
    value in the input src buffer. */
    ASSERT_MEM(src);
    ASSERT_MSG(size > 0, "Size of src (representing number of values in src) must be a positive integer.");
    for (; size != 0; src++, size--)
    {
        float_trunc_bytes(src);
    }
}

inline bool checkValidBFloat(float src)
{
    /* Checks if the least significant two bytes are zero. */
    char src_as_bytes[sizeof(float)];
    memcpy(src_as_bytes, &src, sizeof(float));

    if (g_func_config.float_is_big_endian)
    {
        return (src_as_bytes[2] == '\000' && src_as_bytes[3] == '\000');
    }
    else
    {
        return (src_as_bytes[0] == '\000' && src_as_bytes[1] == '\000');
    }
}

inline bool float_is_big_endian()
{
    /* Compares float values 1.0 and -1.0 by checking whether the
    negation causes the first or the last byte to change.
    First byte changing would indicate the float representation
    is big-endian.*/
    float f = 1.0;
    char f_as_bytes[sizeof(float)];
    memcpy(f_as_bytes, &f, sizeof(float));
    f = -f;
    char f_neg_as_bytes[sizeof(float)];
    memcpy(f_neg_as_bytes, &f, sizeof(float));
    return f_as_bytes[0] != f_neg_as_bytes[0];
}

template <DType Dtype>
float fpTrunc(float f_in)
{
    /* Truncates a float value based on the DType it represents.*/
    switch (Dtype)
    {
        case DType_BF16:
            truncateFloatToBFloat(&f_in, 1);
            break;
        case DType_FP16:
            // Cast to temporary float16 value before casting back to float32
            {
                half_float::half h = half_float::half_cast<half_float::half, float>(f_in);
                f_in               = half_float::half_cast<float, half_float::half>(h);
                break;
            }
        case DType_FP32:
            // No-op for fp32
            break;
        default:
            ASSERT_MSG(false, "DType %s should not be float-truncated.", EnumNameDType(Dtype));
    }
    return f_in;
}

#endif /* _ARITH_UTIL_H */