Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion arm/macros.inc
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ rloopindex .req r11
rlooplimit .req r12

@ default register mapping
@ sp .req r13
@ sp .req r13 @ rsp (return stack pointer)
@ lr .req r14
@ pc .req r15

Expand Down
19 changes: 10 additions & 9 deletions arm/words/double.s
Original file line number Diff line number Diff line change
Expand Up @@ -15,17 +15,18 @@
@ ( 1L 1H 2L tos: 2H -- Rem-L Rem-H Quot-L tos: Quot-H )
@------------------------------------------------------------------------------
@ use faster um/mod if divisor is 32-bits
@ TODO: This crashes hard in QEMU, why?
@ cbnz tos, 1f
@ loadtos
@ b umslashmod
@ 1:
@ throw if divisor is zero
ldr r0, [psp, #0]
orrs r0, r0, tos
bne 2f
cbnz tos, 1f
loadtos
cbnz tos, 2f @ throw if divisor is zero
throw EDIVZ
2:
bl umslashmod @ (rem quolo quohi)
ldr r0, [psp] @ save quolo
mov r1, #0 @ push 0 for remhi
str r1, [psp]
pushnos r0 @ (rem 0 quolo quohi)
NEXT
1:
bl ud_slash_mod
NEXT

Expand Down
6 changes: 5 additions & 1 deletion arm/words/muldiv.s
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,10 @@ slashmod:
CODEWORD "u/mod", USLASHMOD @ ( u1 u2 -- rem quot )
cbnz tos, uslashmod @ throw if divisor is zero
throw EDIVZ
bl uslashmod
NEXT

@ call with bl uslashmod
uslashmod:
ldm psp!, {r0} @ Get u1 into a register
movs r1, tos @ Back up the divisor in X.
Expand All @@ -38,4 +42,4 @@ uslashmod:
subs r0, r1 @ Compute remainder.
subs psp, #4
str r0, [psp]
NEXT
bx lr
65 changes: 40 additions & 25 deletions arm/words/um-slash-mod.s
Original file line number Diff line number Diff line change
Expand Up @@ -7,39 +7,54 @@ SHORT: Divide ud by u1, giving the quotient u3 and the remainder u2.
All values and arithmetic are unsigned. An ambiguous condition exists if u1 is zero or if the quotient lies outside the range of a single-cell unsigned integer.
*/
CODEWORD "um/mod", UMSLASHMOD
@ Same algorithm as if performing the division by hand, just in binary.
@ Inputs: hi:lo = 64-bit dividend, tos = 32-bit divisor
@ Outputs: hi:lo = 64-bit quotient, rem = 32-bit remainder
hi .req r0
lo .req r1
rem .req r2
idx .req r3
umslashmod:
cbnz tos, 4f @ throw if divisor is zero
cbnz tos, 1f @ throw if divisor is zero
throw EDIVZ
4: popnos hi
cmp hi, #0 @ if hi == 0, use the quicker u/mod
beq uslashmod
popnos lo
1:
bl umslashmod
cbz tos, 2f @ if quohi > 0, then quotient is too large
throw ERANGE
2:
loadtos @drop quohi
NEXT

@ call with bl umslashmod
@ Same algorithm as if performing the division by hand, just in binary.
@ Inputs: hi:lo = 64-bit dividend, tos = 32-bit divisor
@ Outputs: hi:lo = 64-bit quotient, rem = 32-bit remainder
hi .req r0 @ dividend-high
lo .req r1 @ dividend-low
rem .req r2 @ reminder
dsr .req r3 @ divisor
umslashmod: @ ( dndlo dndhi dsr -- rem quolo quohi )
popnos hi @ load dividend-high
cbnz hi, 3f @ if hi == 0, use the quicker u/mod
push {lr}
bl uslashmod @ ( rem quolo )
savetos
mov tos, #0 @ ( rem quolo 0 )
pop {pc} @ return
3:
popnos lo @ load dividend-low
@ TODO: could use CLZ to skip shifting through 0 bit prefix bit by bit and save some iterations
mov idx, #64 @ Loop counter for 64 bits
mov dsr, tos @ load divisor
mov tos, #64 @ Loop counter for 64 bits
mov rem, #0 @ Initialize remainder to 0
1: lsls lo, lo, #1 @ Shift dividend/quotient low word
adcs hi, hi, hi @ Shift dividend/quotient high word into carry (adc is how to lsl by 1 bit with carry)
adcs rem, rem, rem @ Shift carry into remainder, also catch the bit shifting off at the top
bcs 5f @ If Carry is set, r3 is now effectively 33-bits, force the divisor subtraction
cmp rem, tos @ Can we subtract the divisor?
cmp rem, dsr @ Can we subtract the divisor?
blo 2f @ If remainder < divisor, skip
5: sub rem, rem, tos @ remainder -= divisor
5: sub rem, rem, dsr @ remainder -= divisor
adds lo, lo, #1 @ Set the lowest bit of quotient
2: subs idx, idx, #1 @ Decrement loop counter
2: subs tos, tos, #1 @ Decrement loop counter
bne 1b

cbz hi, 3f @ if hi > 0, then quotient is too large
throw ERANGE
3: pushnos rem
mov tos, lo
.unreq rem
.unreq lo
.unreq hi
NEXT
pushnos rem
pushnos lo
mov tos, hi
bx lr
.unreq rem
.unreq lo
.unreq hi

8 changes: 8 additions & 0 deletions tests/core2.fr
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,11 @@ TESTING rot, -rot
T{ 1 2 3 -rot -> 3 1 2 }T
T{ 1 2 3 -rot rot -> 1 2 3 }T

TESTING doubles

\ full 64/64 division
T{ $10000000010. $100000000. ud/mod -> $10. $100. }T
\ escape to 64/32 division
T{ $100000010. $10000. ud/mod -> $10. $10000. }T
\ escape to 32/32 division
T{ $1000010. $1000. ud/mod -> $10. $1000. }T
7 changes: 3 additions & 4 deletions todo.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,8 @@ List of know issues and tasks that need to be done (by area)
* [x] implement m-rot.s (see rv)
* [x] implement umstar.s (see rv)
* [ ] (exiti) likely needs work
* [ ] document dev tool setup
* [ ] we are not using the link register, would it speed things up if it was used to cache DO_NEXT?
(i.e. macro NEXT would do `b lr` instead of `b DO_NEXT`)

## LM4F120

Expand All @@ -47,9 +48,7 @@ List of know issues and tasks that need to be done (by area)


# RISC-V
* [ ] add readme.md
* [ ] generalize flash dictionary write support (flash.s)
* [ ] generalize eeprom support (eeprom.s)
* [ ] implement native um/mod and optimize for narrow arguments (see ARM)

## CH32V307
* [ ] RAMALLOT reg_shadow differences between 307 ad QEM configuration
Expand Down