-
Notifications
You must be signed in to change notification settings - Fork 5
Basic support for floats (f16, f32, f64, f128)
#995
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
bcbf928
1cba805
0392706
a1d86d9
ae733f8
db85b08
e38c300
3ce4165
e9f3868
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -1421,7 +1421,41 @@ Boolean values can also be cast to Integers (encoding `true` as `1`). | |
| [preserves-definedness] // ensures #numTypeOf is defined | ||
| ``` | ||
|
|
||
| Casts involving `Float` values are currently not implemented. | ||
| Casts involving `Float` values: `IntToFloat`, `FloatToInt`, and `FloatToFloat`. | ||
|
|
||
| ```k | ||
| // IntToFloat: convert integer to float with the target float type's precision | ||
| rule <k> #cast(Integer(VAL, _WIDTH, _SIGNEDNESS), castKindIntToFloat, _, TY) | ||
| => Float( | ||
| Int2Float(VAL, | ||
| #significandBits(#floatTypeOf(lookupTy(TY))), | ||
| #exponentBits(#floatTypeOf(lookupTy(TY)))), | ||
| #bitWidth(#floatTypeOf(lookupTy(TY))) | ||
| ) | ||
| ... | ||
| </k> | ||
| [preserves-definedness] | ||
|
|
||
| // FloatToInt: truncate float towards zero and convert to integer | ||
| rule <k> #cast(Float(VAL, _WIDTH), castKindFloatToInt, _, TY) | ||
| => #intAsType(Float2Int(VAL), 128, #intTypeOf(lookupTy(TY))) | ||
| ... | ||
| </k> | ||
| requires #isIntType(lookupTy(TY)) | ||
| [preserves-definedness] | ||
|
|
||
| // FloatToFloat: round float to the target float type's precision | ||
| rule <k> #cast(Float(VAL, _WIDTH), castKindFloatToFloat, _, TY) | ||
| => Float( | ||
| roundFloat(VAL, | ||
| #significandBits(#floatTypeOf(lookupTy(TY))), | ||
| #exponentBits(#floatTypeOf(lookupTy(TY)))), | ||
| #bitWidth(#floatTypeOf(lookupTy(TY))) | ||
| ) | ||
| ... | ||
| </k> | ||
| [preserves-definedness] | ||
| ``` | ||
|
|
||
| ### Casts between pointer types | ||
|
|
||
|
|
@@ -1991,6 +2025,18 @@ are correct. | |
| rule onInt(binOpRem, X, Y) => X %Int Y requires Y =/=Int 0 [preserves-definedness] | ||
| // operation undefined otherwise | ||
|
|
||
| // performs the given operation on IEEE 754 floats | ||
| syntax Float ::= onFloat( BinOp, Float, Float ) [function] | ||
| // ------------------------------------------------------- | ||
| rule onFloat(binOpAdd, X, Y) => X +Float Y [preserves-definedness] | ||
| rule onFloat(binOpAddUnchecked, X, Y) => X +Float Y [preserves-definedness] | ||
| rule onFloat(binOpSub, X, Y) => X -Float Y [preserves-definedness] | ||
| rule onFloat(binOpSubUnchecked, X, Y) => X -Float Y [preserves-definedness] | ||
| rule onFloat(binOpMul, X, Y) => X *Float Y [preserves-definedness] | ||
| rule onFloat(binOpMulUnchecked, X, Y) => X *Float Y [preserves-definedness] | ||
| rule onFloat(binOpDiv, X, Y) => X /Float Y [preserves-definedness] | ||
| rule onFloat(binOpRem, X, Y) => X %Float Y [preserves-definedness] | ||
|
|
||
| // error cases for isArithmetic(BOP): | ||
| // * arguments must be Numbers | ||
|
|
||
|
|
@@ -2059,6 +2105,18 @@ are correct. | |
| // infinite precision result must equal truncated result | ||
| andBool truncate(onInt(BOP, ARG1, ARG2), WIDTH, Unsigned) ==Int onInt(BOP, ARG1, ARG2) | ||
| [preserves-definedness] | ||
|
|
||
| // Float arithmetic: Rust never emits CheckedBinaryOp for floats (only BinaryOp), | ||
| // so the checked flag is always false here. See rustc_const_eval/src/interpret/operator.rs: | ||
| // binary_float_op returns a plain value, not a (value, overflow) pair. | ||
| rule #applyBinOp( | ||
| BOP, | ||
| Float(ARG1, WIDTH), | ||
| Float(ARG2, WIDTH), | ||
| false) | ||
| => Float(onFloat(BOP, ARG1, ARG2), WIDTH) | ||
| requires isArithmetic(BOP) | ||
| [preserves-definedness] | ||
| ``` | ||
|
|
||
| #### Comparison operations | ||
|
|
@@ -2095,6 +2153,14 @@ The argument types must be the same for all comparison operations, however this | |
| rule cmpOpBool(binOpGe, X, Y) => cmpOpBool(binOpLe, Y, X) | ||
| rule cmpOpBool(binOpGt, X, Y) => cmpOpBool(binOpLt, Y, X) | ||
|
|
||
| syntax Bool ::= cmpOpFloat ( BinOp, Float, Float ) [function] | ||
| rule cmpOpFloat(binOpEq, X, Y) => X ==Float Y | ||
| rule cmpOpFloat(binOpLt, X, Y) => X <Float Y | ||
| rule cmpOpFloat(binOpLe, X, Y) => X <=Float Y | ||
| rule cmpOpFloat(binOpNe, X, Y) => X =/=Float Y | ||
| rule cmpOpFloat(binOpGe, X, Y) => X >=Float Y | ||
| rule cmpOpFloat(binOpGt, X, Y) => X >Float Y | ||
|
|
||
| // error cases for isComparison and binOpCmp: | ||
| // * arguments must be numbers or Bool | ||
|
|
||
|
|
@@ -2122,6 +2188,12 @@ The argument types must be the same for all comparison operations, however this | |
| BoolVal(cmpOpBool(OP, VAL1, VAL2)) | ||
| requires isComparison(OP) | ||
| [priority(60), preserves-definedness] // OP known to be a comparison | ||
|
|
||
| rule #applyBinOp(OP, Float(VAL1, WIDTH), Float(VAL2, WIDTH), _) | ||
| => | ||
| BoolVal(cmpOpFloat(OP, VAL1, VAL2)) | ||
| requires isComparison(OP) | ||
| [preserves-definedness] // OP known to be a comparison | ||
| ``` | ||
|
|
||
| The `binOpCmp` operation returns `-1`, `0`, or `+1` (the behaviour of Rust's `std::cmp::Ordering as i8`), indicating `LE`, `EQ`, or `GT`. | ||
|
|
@@ -2137,13 +2209,22 @@ The `binOpCmp` operation returns `-1`, `0`, or `+1` (the behaviour of Rust's `st | |
| rule cmpBool(X, Y) => 0 requires X ==Bool Y | ||
| rule cmpBool(X, Y) => 1 requires X andBool notBool Y | ||
|
|
||
| syntax Int ::= cmpFloat ( Float, Float ) [function] | ||
| rule cmpFloat(VAL1, VAL2) => -1 requires VAL1 <Float VAL2 | ||
| rule cmpFloat(VAL1, VAL2) => 0 requires VAL1 ==Float VAL2 | ||
| rule cmpFloat(VAL1, VAL2) => 1 requires VAL1 >Float VAL2 | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. How are the comparisons with NaN handled? How do they not get stuck with some of the tests' values where NaN and another float reach here? Do they not? |
||
|
|
||
| rule #applyBinOp(binOpCmp, Integer(VAL1, WIDTH, SIGN), Integer(VAL2, WIDTH, SIGN), _) | ||
| => | ||
| Integer(cmpInt(VAL1, VAL2), 8, true) | ||
|
|
||
| rule #applyBinOp(binOpCmp, BoolVal(VAL1), BoolVal(VAL2), _) | ||
| => | ||
| Integer(cmpBool(VAL1, VAL2), 8, true) | ||
|
|
||
| rule #applyBinOp(binOpCmp, Float(VAL1, WIDTH), Float(VAL2, WIDTH), _) | ||
| => | ||
| Integer(cmpFloat(VAL1, VAL2), 8, true) | ||
| ``` | ||
|
|
||
| #### Unary operations on Boolean and integral values | ||
|
|
@@ -2159,7 +2240,11 @@ The semantics of the operation in this case is to wrap around (with the given bi | |
| ... | ||
| </k> | ||
|
|
||
| // TODO add rule for Floats once they are supported. | ||
| rule <k> #applyUnOp(unOpNeg, Float(VAL, WIDTH)) | ||
| => | ||
| Float(--Float VAL, WIDTH) | ||
| ... | ||
| </k> | ||
| ``` | ||
|
|
||
| The `unOpNot` operation works on boolean and integral values, with the usual semantics for booleans and a bitwise semantics for integral values (overflows cannot occur). | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -13,6 +13,7 @@ module RT-NUMBERS | |
| imports BOOL | ||
| imports BYTES | ||
| imports INT | ||
| imports FLOAT | ||
| ``` | ||
|
|
||
| ## Helpers and Constants for Integer Operations | ||
|
|
@@ -38,6 +39,15 @@ module RT-NUMBERS | |
| rule #isIntType(typeInfoPrimitiveType(primTypeInt(_))) => true | ||
| rule #isIntType(typeInfoPrimitiveType(primTypeUint(_))) => true | ||
| rule #isIntType(_) => false [owise] | ||
|
|
||
| syntax Bool ::= #isFloatType ( TypeInfo ) [function, total] | ||
| // -------------------------------------------------------- | ||
| rule #isFloatType(typeInfoPrimitiveType(primTypeFloat(_))) => true | ||
| rule #isFloatType(_) => false [owise] | ||
|
|
||
| syntax FloatTy ::= #floatTypeOf ( TypeInfo ) [function] | ||
| // ---------------------------------------------------- | ||
| rule #floatTypeOf(typeInfoPrimitiveType(primTypeFloat(FLOATTY))) => FLOATTY | ||
| ``` | ||
|
|
||
| Constants used for overflow-checking and truncation are defined here as macros. | ||
|
|
@@ -110,6 +120,200 @@ This truncation function is instrumental in the implementation of Integer arithm | |
| [preserves-definedness] | ||
| ``` | ||
|
|
||
| ## Helpers and Constants for Float Operations | ||
|
|
||
| Rust supports four fixed-width IEEE 754 float types: `f16`, `f32`, `f64`, and `f128`. | ||
| The helpers below extract format parameters for each type. First, an overview of the format. | ||
|
|
||
| ### IEEE 754 Binary Format | ||
|
|
||
| An IEEE 754 binary floating-point word has three fields stored left-to-right: | ||
|
|
||
| ``` | ||
| MSB LSB | ||
| +---------+----------------+----------------------+ | ||
| | sign | exponent | fraction | | ||
| | (1 bit) | (EB bits) | (SB - 1 bits) | | ||
| +---------+----------------+----------------------+ | ||
| total bits = 1 + EB + (SB - 1) | ||
| ``` | ||
|
|
||
| The **significand** (also called **precision**) is the total number of significant bits | ||
| in the represented value, including an implicit leading 1 that is not stored in the | ||
| fraction field. For a normal number, the mathematical value is: | ||
|
|
||
| value = (-1)^sign * 2^(exponent - bias) * 1.fraction | ||
|
|
||
| The "1." prefix is the implicit bit, so the significand has `SB` bits of precision | ||
| even though only `SB - 1` fraction bits are stored. For example, f64 stores 52 fraction | ||
| bits but has 53 bits of significand precision. | ||
|
|
||
| K's built-in `FLOAT` module uses this convention: `Int2Float(x, precision, exponentBits)` | ||
| takes `precision = SB` (total significand bits including the implicit 1) and `exponentBits = EB`. | ||
| See [IEEE 754 on Wikipedia](https://en.wikipedia.org/wiki/IEEE_754) for full details. | ||
|
|
||
| The exponent is stored as an unsigned integer in | ||
| [excess-M encoding](https://en.wikipedia.org/wiki/Offset_binary) with `bias = 2^(EB-1) - 1`, | ||
| so that the actual exponent is `stored - bias`. For f64, bias = 1023: a stored value of 1023 | ||
| means exponent 0, 1024 means +1, and 1 means -1022. Stored values 0 and `2^EB - 1` are | ||
| reserved for zero/subnormals and infinity/NaN respectively. | ||
|
|
||
| | Type | Total bits | Sign | Exponent (EB) | Fraction (SB-1) | Significand (SB) | Bias | | ||
| |------|------------|------|---------------|-----------------|------------------|------------| | ||
| | f16 | 16 | 1 | 5 | 10 | 11 | 15 | | ||
| | f32 | 32 | 1 | 8 | 23 | 24 | 127 | | ||
| | f64 | 64 | 1 | 11 | 52 | 53 | 1023 | | ||
| | f128 | 128 | 1 | 15 | 112 | 113 | 16383 | | ||
|
|
||
| ```k | ||
| syntax Int ::= #significandBits ( FloatTy ) [function, total] | ||
| // ---------------------------------------------------------- | ||
| rule #significandBits(floatTyF16) => 11 | ||
| rule #significandBits(floatTyF32) => 24 | ||
| rule #significandBits(floatTyF64) => 53 | ||
| rule #significandBits(floatTyF128) => 113 | ||
|
|
||
| syntax Int ::= #exponentBits ( FloatTy ) [function, total] | ||
| // ------------------------------------------------------- | ||
| rule #exponentBits(floatTyF16) => 5 | ||
| rule #exponentBits(floatTyF32) => 8 | ||
| rule #exponentBits(floatTyF64) => 11 | ||
| rule #exponentBits(floatTyF128) => 15 | ||
|
|
||
| syntax Int ::= #bias ( FloatTy ) [function, total] | ||
| // ----------------------------------------------- | ||
| rule #bias(FLOATTY) => (1 <<Int (#exponentBits(FLOATTY) -Int 1)) -Int 1 | ||
| ``` | ||
|
|
||
| ### IEEE 754 Special Values | ||
|
|
||
| When the exponent field is all 1s (`2^EB - 1`), the value is either infinity | ||
| (fraction = 0) or NaN (fraction != 0). K's `FLOAT-SYNTAX` module in | ||
| [domains.md](https://github.com/runtimeverification/k/blob/master/k-distribution/include/kframework/builtin/domains.md#ieee-754-floating-point-numbers) | ||
| defines float literals with a `p<SB>x<EB>` suffix to specify precision and exponent | ||
| bits. See IEEE 754 Binary Format above for values of SB and EB. | ||
|
|
||
| For example, `Infinityp53x11` is f64 positive infinity, `NaNp24x8` is f32 NaN. | ||
|
|
||
| ```k | ||
| syntax Float ::= #posInfFloat ( FloatTy ) [function, total] | ||
| // -------------------------------------------------------- | ||
| rule #posInfFloat(floatTyF16) => Infinityp11x5 | ||
| rule #posInfFloat(floatTyF32) => Infinityp24x8 | ||
| rule #posInfFloat(floatTyF64) => Infinityp53x11 | ||
| rule #posInfFloat(floatTyF128) => Infinityp113x15 | ||
|
|
||
| syntax Float ::= #nanFloat ( FloatTy ) [function, total] | ||
| // ----------------------------------------------------- | ||
| rule #nanFloat(floatTyF16) => NaNp11x5 | ||
| rule #nanFloat(floatTyF32) => NaNp24x8 | ||
| rule #nanFloat(floatTyF64) => NaNp53x11 | ||
| rule #nanFloat(floatTyF128) => NaNp113x15 | ||
| ``` | ||
|
|
||
| ## Decoding Float values from `Bytes` for `OperandConstant` | ||
|
|
||
| The `#decodeFloat` function reconstructs a `Float` value from its IEEE 754 byte representation. | ||
| The bytes are first converted to a raw integer, then the sign, biased exponent, and stored significand | ||
| are extracted. The value is reconstructed using K's `Int2Float` and float arithmetic, with a | ||
| high-precision intermediate to avoid overflow when reconstructing subnormals and small normal values. | ||
|
|
||
| ```k | ||
| syntax Value ::= #decodeFloat ( Bytes, FloatTy ) [function] | ||
| // -------------------------------------------------------- | ||
| rule #decodeFloat(BYTES, FLOATTY) => #decodeFloatRaw(Bytes2Int(BYTES, LE, Unsigned), FLOATTY) | ||
| requires lengthBytes(BYTES) ==Int #bitWidth(FLOATTY) /Int 8 | ||
| [preserves-definedness] | ||
|
|
||
| syntax Value ::= #decodeFloatRaw ( Int, FloatTy ) [function, total] | ||
| // ---------------------------------------------------------------- | ||
| rule #decodeFloatRaw(RAW, FLOATTY) | ||
| => #decodeFloatParts( | ||
| RAW >>Int (#significandBits(FLOATTY) +Int #exponentBits(FLOATTY) -Int 1), | ||
| (RAW >>Int (#significandBits(FLOATTY) -Int 1)) &Int ((1 <<Int #exponentBits(FLOATTY)) -Int 1), | ||
| RAW &Int ((1 <<Int (#significandBits(FLOATTY) -Int 1)) -Int 1), | ||
| FLOATTY | ||
| ) | ||
|
|
||
| syntax Value ::= #decodeFloatParts ( sign: Int, biasedExp: Int, storedSig: Int, FloatTy ) [function] | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is not explicitly marked as total, however it would need to be (because |
||
| // ------------------------------------------------------------------------------------------------- | ||
|
|
||
| // Zero (positive or negative) | ||
| rule #decodeFloatParts(SIGN, 0, 0, FLOATTY) | ||
| => Float(#applyFloatSign(Int2Float(0, #significandBits(FLOATTY), #exponentBits(FLOATTY)), SIGN), #bitWidth(FLOATTY)) | ||
| [preserves-definedness] | ||
|
|
||
| // Subnormal: no implicit leading 1, exponent is 1 - bias | ||
| rule #decodeFloatParts(SIGN, 0, SIG, FLOATTY) | ||
| => Float( | ||
| #applyFloatSign( | ||
| #reconstructFloat(SIG, 2 -Int #bias(FLOATTY) -Int #significandBits(FLOATTY), FLOATTY), | ||
| SIGN | ||
| ), | ||
| #bitWidth(FLOATTY) | ||
| ) | ||
| requires SIG =/=Int 0 | ||
| [preserves-definedness] | ||
|
|
||
| // Normal: implicit leading 1 in significand | ||
| rule #decodeFloatParts(SIGN, EXP, SIG, FLOATTY) | ||
| => Float( | ||
| #applyFloatSign( | ||
| #reconstructFloat( | ||
| SIG |Int (1 <<Int (#significandBits(FLOATTY) -Int 1)), | ||
| EXP -Int #bias(FLOATTY) -Int #significandBits(FLOATTY) +Int 1, | ||
| FLOATTY | ||
| ), | ||
| SIGN | ||
| ), | ||
| #bitWidth(FLOATTY) | ||
| ) | ||
| requires EXP >Int 0 andBool EXP <Int ((1 <<Int #exponentBits(FLOATTY)) -Int 1) | ||
| [preserves-definedness] | ||
|
|
||
| // Infinity | ||
| rule #decodeFloatParts(SIGN, EXP, 0, FLOATTY) | ||
| => Float(#applyFloatSign(#posInfFloat(FLOATTY), SIGN), #bitWidth(FLOATTY)) | ||
| requires EXP ==Int ((1 <<Int #exponentBits(FLOATTY)) -Int 1) | ||
| [preserves-definedness] | ||
|
|
||
| // NaN | ||
| rule #decodeFloatParts(_SIGN, EXP, SIG, FLOATTY) | ||
| => Float(#nanFloat(FLOATTY), #bitWidth(FLOATTY)) | ||
| requires EXP ==Int ((1 <<Int #exponentBits(FLOATTY)) -Int 1) andBool SIG =/=Int 0 | ||
| [preserves-definedness] | ||
| ``` | ||
|
|
||
| Reconstruct a float from its integer significand and adjusted exponent. | ||
| For positive exponents, shift the significand left and convert. | ||
| For negative exponents, use a high-precision intermediate (256-bit significand, 64-bit exponent) | ||
| to avoid overflow, then round down to the target precision. | ||
|
|
||
| ```k | ||
| syntax Float ::= #reconstructFloat ( sig: Int, adjExp: Int, FloatTy ) [function] | ||
| // ------------------------------------------------------------------------------- | ||
| rule #reconstructFloat(SIG, AEXP, FLOATTY) | ||
| => Int2Float(SIG <<Int AEXP, #significandBits(FLOATTY), #exponentBits(FLOATTY)) | ||
| requires AEXP >=Int 0 | ||
| [preserves-definedness] | ||
|
|
||
| rule #reconstructFloat(SIG, AEXP, FLOATTY) | ||
| => roundFloat( | ||
| Int2Float(SIG, 256, 64) /Float Int2Float(1 <<Int (0 -Int AEXP), 256, 64), | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I understand the conversion here, but is seems different from the approach in the c-semantics. This conversion has an additional rounding step. I wonder if is intentional (avoid the Rat sort and Rat2Float for some reason?). If not, the following rule should work equally well with only one rounding, and no arbitrary precision. (imports RAT is needed) |
||
| #significandBits(FLOATTY), | ||
| #exponentBits(FLOATTY) | ||
| ) | ||
| requires AEXP <Int 0 | ||
| [preserves-definedness] | ||
|
|
||
| // Apply the sign bit to a float value | ||
| syntax Float ::= #applyFloatSign ( Float, Int ) [function, total] | ||
| // --------------------------------------------------------------- | ||
| rule #applyFloatSign(F, 0) => F | ||
| rule #applyFloatSign(F, 1) => --Float F | ||
| rule #applyFloatSign(F, _) => F [owise] | ||
| ``` | ||
|
|
||
| ## Type Casts Between Different Numeric Types | ||
|
|
||
|
|
||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
%Float implements the % operation according to the IEEE 754 standard:
"IEEE 754 Remainder: The result
r=x-y*n, wherenis the integer nearest to the exact valuex/y"It seems that Rust computes it in a slightly different way, where
nis calculated by dropping the fractional part, i.e., truncated.The tests do not exercise this yet, but here is a suggestion if needed.