FixedPoint/examples.cpp at master · coder-mike/FixedPoint · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
#include "fixed_point.hpp"

#include <iostream>

using namespace std;

void edgeCases();
void otherOperators();
#define PRINT_VAL(VAL) cout << #VAL << ":\t" << (VAL) << endl

int main()
{
    // A fixed-point number in Q4.4 signed format (its always signed), with value "5"
    FixedPoint<4,4> a = 5;

    PRINT_VAL(a); // 5.00

    // Move radix point left 1 bit (right shift by 1 bit)
    FixedPoint<3,5> b = a.rightShift<1>();

    PRINT_VAL(b); // 2.50

    // Multiplication (integer and fraction lengths are added together)
    FixedPoint<7,9> c = a * b;

    PRINT_VAL(c); // 12.500

    // Addition (the integer and fraction lengths are the maximum from each operand)
    FixedPoint<4,5> d = a + b;

    PRINT_VAL(d); // 7.500

    // Retrieve value as integer
    int di = d.getValue();
    int di2 = d.round();

    PRINT_VAL(di); // 7
    PRINT_VAL(di2); // 8

    // Convert Q3.5 to Q3.1 (any bit length is ok as long as less than 64 total)
    FixedPoint<3,1> e = a.convert<3,1>();

    PRINT_VAL(e); // 5.0

    // Extend the integer part of the fixed-point number to 12 bits
    FixedPoint<12,5> f = b.extend<12>();

    PRINT_VAL(f); // 2.50

    // Initialize a fixed-point with from given binary data
    FixedPoint<2,6> g = FixedPoint<2,6>::createRaw(0xA0);

    PRINT_VAL(g); // -1.5

    // Fixed-point division
    // Note that dividing Qa.b by Qc.d results in Q(a+d).(b+c)
    // This is symmetrical to multiplication result: Q(a+c).(b+d)
    // To see why this makes sense, look in edgeCases().
    FixedPoint<9,7> h = a / b; // 5.0/2.5 = 2.0
    FixedPoint<7,9> j = b / a; // 2.5/5.0 = 0.5
    FixedPoint<4,7> k = a / FixedPoint<3,0>(3); // 5.0/3.0 = 1.6640625

    PRINT_VAL(h); // 2.000
    PRINT_VAL(j); // 0.500
    PRINT_VAL(k); // 1.664

    // See-also, some edge cases
    edgeCases();

    // See-also, some more operators that can be used
    otherOperators();
}

void edgeCases()
{
    cout << "--- Edge cases ---" << endl;
    {
        FixedPoint<4,8> a = 3;
        /* This is an interesting conversion, because it makes the result smaller,
        but it also shifts the bits internally because the fractional part is
        smaller. If it's converted to 8-bit BEFORE the shift, it it will loose the
        upper 2 bits. */
        FixedPoint<4,4> b = a.convert<4,4>();

        PRINT_VAL(b); // 3.000
    }

    {
        FixedPoint<4,4> a = 3;
        /* This is the converse case. In the previous case, the type-conversion
        needed to occur AFTER the shift, but in this case it needs to occur
        BEFORE the shift. */
        FixedPoint<4,8> b = a.convert<4,8>();

        PRINT_VAL(b); // 3.000
    }

    {
        // These edge cases shows why it makes sense to have quotient in the
        // format it is.

        // The first case is dividing a really big number by a really small one
        FixedPoint<8,0> a = -128;
        // A really small number:
        FixedPoint<0,8> b = 1.0/256;
        FixedPoint<16,0> c = a/b;
        PRINT_VAL(c); // -32768

        // The converse is dividing a really small number by a really big one
        FixedPoint<0,16> d = b/a;
        PRINT_VAL(d); // -0.00003, although the value is actually exactly equal to -2^(-15)

        // You can see the exact value here:
        FixedPoint<15,1> e = d.leftShift<15>();
        PRINT_VAL(e); // "-1.0"
    }
}

void otherOperators()
{
    cout << "--- Other Operators ---" << endl;
    FixedPoint<5,1> a = 2.5;

    // Addition with an integer
    FixedPoint<5,1> b = a + 1;
    PRINT_VAL(b); // 3.50

    // Comparison operators
    PRINT_VAL(a < b); // true
    PRINT_VAL(b < a); // false
    PRINT_VAL(a > b); // true
    PRINT_VAL(a == a); // true
    PRINT_VAL(a == b); // false
    PRINT_VAL(a != a); // false
    PRINT_VAL(a != b); // true
    PRINT_VAL(a >= b); // false
    PRINT_VAL(a >= a); // true

    // Modulus operator
    // Note the output form:
    // If the input is format Q(a).(b) % Q(c).(d) then the output is Q(c).(max(b,d))
   // FixedPoint<1,3> c = FixedPoint<4,3>(2.625) % FixedPoint<1,2>(0.5);
   // PRINT_VAL(c); // 0.125
   // FixedPoint<1,3> d = FixedPoint<4,1>(0.875) % FixedPoint<1,3>(0.375);
   //PRINT_VAL(d); // 0.125

    // Accumulation
    FixedPoint<5,2> e = 3.5;
    e += FixedPoint<3,2>(0.25);
    PRINT_VAL(e); // 3.75

    // Copy constructor
    FixedPoint<5,2> f(e);
    PRINT_VAL(f); // 3.75

    // Assignment operator
    f = e;
    PRINT_VAL(f); // 3.75
}

void compatibility()
{
    cout << "--- Compatibility ---" << endl;

    // Assignment with integer value
    // FixedPoint<60,4> a = 123456789123456789;
    // PRINT_VAL(a); // 123456789123456789.0

    // Asssignment to flexible bit combinations
    FixedPoint<31,31> b = 2147483646.99999999;
    PRINT_VAL(b);

    // Assignment to integer
    int x = FixedPoint<5,2>(3.75).getValue();
    PRINT_VAL(x); // 3

    // Assignment to float
    float f = FixedPoint<5,2>(3.75).getValueF();
    PRINT_VAL(f); // 3.75
}