-
Notifications
You must be signed in to change notification settings - Fork 8
Expand file tree
/
Copy pathmult41.a
More file actions
118 lines (109 loc) · 3.48 KB
/
mult41.a
File metadata and controls
118 lines (109 loc) · 3.48 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
; mult41.a
; based on 8 bit multiply of mult13.a, combined by TobyLobster to give a 16 bit multiply
;
; 16 bit x 16 bit unsigned multiply, 32 bit result
; Average cycles: 350
; 1149 bytes
inputA = $02 ; 2 bytes (a0,a1)
inputB = $04 ; 2 bytes (b0,b1)
result = $06 ; 4 bytes
temp_m2 = $0a
prod_low = $0b
* = $0200
; align tables to start of a page for speed
squares_low
!for i, 0, 511 {
!byte <((i*i)/4)
}
squares_high
!for i, 0, 511 {
!byte >((i*i)/4)
}
; ***************************************************************************************
; On Entry:
; inputA: multiplier (2 bytes)
; inputB: multiplicand (2 bytes)
; On Exit:
; result: product (4 bytes)
mult
lda inputA ;
ldx inputB ;
jsr mult8bit ; (a0xb0)
sta result ; a is low byte
stx result+1 ; x is high byte
lda inputA+1 ;
ldx inputB ;
jsr mult8bit ; (a1*b0)
clc ;
adc result+1 ;
sta result+1 ;
txa ;
adc #0 ;
sta result+2 ;
lda inputA ;
ldx inputB+1 ;
jsr mult8bit ; (a0,b1)
clc ;
adc result+1 ;
sta result+1 ;
txa ;
adc result+2 ;
sta result+2 ;
lda #0 ;
rol ; remember the carry for result+3
sta result+3 ;
lda inputA+1 ;
ldx inputB+1 ;
jsr mult8bit ; (a1*b1)
clc ;
adc result+2 ;
sta result+2 ;
txa ;
adc result+3 ;
sta result+3 ;
rts ;
; ***************************************************************************************
; mult13 tweaked for output parameters
;
; 8 bit x 8 bit multiply, 16 bit result
;
; On Entry:
; A: multiplier
; X: multiplicand
;
; On Exit:
; A: low byte of product
; X: high byte of product
mult8bit
tay ; save m1 in y
stx temp_m2 ; save m2
sec ; set carry for subtract
sbc temp_m2 ; find difference
bcs + ; was m1 > m2 ?
eor #$ff ; invert it
adc #1 ; and add 1
+
tax ; use abs(m1-m2) as index
clc ;
tya ; get m1 back
adc temp_m2 ; find m1 + m2
tay ; use m1+m2 as index
bcc + ; m1+m2 < 255 ?
lda squares_low+256,y ; find sum squared low if > 255
sbc squares_low,x ; subtract diff squared
sta prod_low ; save in product
lda squares_high+256,y ; hi byte
sbc squares_high,x ;
tax ;
lda prod_low ;
rts ; done
+
sec ; set carry for subtract
lda squares_low,y ; find sum of squares low if < 255
sbc squares_low,x ; subtract diff squared
sta prod_low ; save in product
lda squares_high,y ; hi byte
sbc squares_high,x ;
tax ;
lda prod_low ;
rts ;