Skip to content

Commit 3f822df

Browse files
committed
runtime (gc_blocks.go): make sweep branchless
Instead of looping over each block, we can use bit hacks to operate on an entire state byte. This deinterleaves the state bits in order to enable these tricks.
1 parent 505b5ee commit 3f822df

File tree

2 files changed

+79
-71
lines changed

2 files changed

+79
-71
lines changed

builder/sizes_test.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -42,9 +42,9 @@ func TestBinarySize(t *testing.T) {
4242
// This is a small number of very diverse targets that we want to test.
4343
tests := []sizeTest{
4444
// microcontrollers
45-
{"hifive1b", "examples/echo", 3568, 280, 0, 2268},
46-
{"microbit", "examples/serial", 2630, 342, 8, 2272},
47-
{"wioterminal", "examples/pininterrupt", 7175, 1493, 116, 6912},
45+
{"hifive1b", "examples/echo", 3524, 296, 0, 2268},
46+
{"microbit", "examples/serial", 2598, 358, 8, 2272},
47+
{"wioterminal", "examples/pininterrupt", 7095, 1509, 116, 6912},
4848

4949
// TODO: also check wasm. Right now this is difficult, because
5050
// wasm binaries are run through wasm-opt and therefore the

src/runtime/gc_blocks.go

Lines changed: 76 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -71,19 +71,20 @@ var zeroSizedAlloc uint8
7171
type blockState uint8
7272

7373
const (
74-
blockStateFree blockState = 0 // 00
75-
blockStateHead blockState = 1 // 01
76-
blockStateTail blockState = 2 // 10
77-
blockStateMark blockState = 3 // 11
78-
blockStateMask blockState = 3 // 11
74+
blockStateLow blockState = 1
75+
blockStateHigh blockState = 1 << blocksPerStateByte
76+
77+
blockStateFree blockState = 0
78+
blockStateHead blockState = blockStateLow
79+
blockStateTail blockState = blockStateHigh
80+
blockStateMark blockState = blockStateLow | blockStateHigh
81+
blockStateMask blockState = blockStateLow | blockStateHigh
7982
)
8083

84+
const blockStateEach = 1<<blocksPerStateByte - 1
85+
8186
// The byte value of a block where every block is a 'tail' block.
82-
const blockStateByteAllTails = 0 |
83-
uint8(blockStateTail<<(stateBits*3)) |
84-
uint8(blockStateTail<<(stateBits*2)) |
85-
uint8(blockStateTail<<(stateBits*1)) |
86-
uint8(blockStateTail<<(stateBits*0))
87+
const blockStateByteAllTails = byte(blockStateTail) * blockStateEach
8788

8889
// String returns a human-readable version of the block state, for debugging.
8990
func (s blockState) String() string {
@@ -180,7 +181,7 @@ func (b gcBlock) stateByte() byte {
180181
// Return the block state given a state byte. The state byte must have been
181182
// obtained using b.stateByte(), otherwise the result is incorrect.
182183
func (b gcBlock) stateFromByte(stateByte byte) blockState {
183-
return blockState(stateByte>>((b%blocksPerStateByte)*stateBits)) & blockStateMask
184+
return blockState(stateByte>>(b%blocksPerStateByte)) & blockStateMask
184185
}
185186

186187
// State returns the current block state.
@@ -193,38 +194,12 @@ func (b gcBlock) state() blockState {
193194
// from head to mark.
194195
func (b gcBlock) setState(newState blockState) {
195196
stateBytePtr := (*uint8)(unsafe.Add(metadataStart, b/blocksPerStateByte))
196-
*stateBytePtr |= uint8(newState << ((b % blocksPerStateByte) * stateBits))
197+
*stateBytePtr |= uint8(newState << (b % blocksPerStateByte))
197198
if gcAsserts && b.state() != newState {
198199
runtimePanic("gc: setState() was not successful")
199200
}
200201
}
201202

202-
// markFree sets the block state to free, no matter what state it was in before.
203-
func (b gcBlock) markFree() {
204-
stateBytePtr := (*uint8)(unsafe.Add(metadataStart, b/blocksPerStateByte))
205-
*stateBytePtr &^= uint8(blockStateMask << ((b % blocksPerStateByte) * stateBits))
206-
if gcAsserts && b.state() != blockStateFree {
207-
runtimePanic("gc: markFree() was not successful")
208-
}
209-
if gcAsserts {
210-
*(*[wordsPerBlock]uintptr)(unsafe.Pointer(b.address())) = [wordsPerBlock]uintptr{}
211-
}
212-
}
213-
214-
// unmark changes the state of the block from mark to head. It must be marked
215-
// before calling this function.
216-
func (b gcBlock) unmark() {
217-
if gcAsserts && b.state() != blockStateMark {
218-
runtimePanic("gc: unmark() on a block that is not marked")
219-
}
220-
clearMask := blockStateMask ^ blockStateHead // the bits to clear from the state
221-
stateBytePtr := (*uint8)(unsafe.Add(metadataStart, b/blocksPerStateByte))
222-
*stateBytePtr &^= uint8(clearMask << ((b % blocksPerStateByte) * stateBits))
223-
if gcAsserts && b.state() != blockStateHead {
224-
runtimePanic("gc: unmark() was not successful")
225-
}
226-
}
227-
228203
// objHeader is a structure prepended to every heap object to hold metadata.
229204
type objHeader struct {
230205
// next is the next object to scan after this.
@@ -633,36 +608,69 @@ func markRoot(addr, root uintptr) {
633608
// Sweep goes through all memory and frees unmarked memory.
634609
// It returns how many bytes are free in the heap after the sweep.
635610
func sweep() (freeBytes uintptr) {
636-
freeCurrentObject := false
637-
var freed uint64
638-
for block := gcBlock(0); block < endBlock; block++ {
639-
switch block.state() {
640-
case blockStateHead:
641-
// Unmarked head. Free it, including all tail blocks following it.
642-
block.markFree()
643-
freeCurrentObject = true
644-
gcFrees++
645-
freed++
646-
case blockStateTail:
647-
if freeCurrentObject {
648-
// This is a tail object following an unmarked head.
649-
// Free it now.
650-
block.markFree()
651-
freed++
652-
}
653-
case blockStateMark:
654-
// This is a marked object. The next tail blocks must not be freed,
655-
// but the mark bit must be removed so the next GC cycle will
656-
// collect this object if it is unreferenced then.
657-
block.unmark()
658-
freeCurrentObject = false
659-
case blockStateFree:
660-
freeBytes += bytesPerBlock
661-
}
662-
}
663-
gcFreedBlocks += freed
664-
freeBytes += uintptr(freed) * bytesPerBlock
665-
return
611+
endBlock := endBlock
612+
metadataEnd := unsafe.Add(metadataStart, (endBlock+(blocksPerStateByte-1))/blocksPerStateByte)
613+
var oldFreeBlocks, freedHeads, freedTails uintptr
614+
var carry byte
615+
// Pre-subtract the blocks that do not actually exist from oldFreeBlocks.
616+
oldFreeBlocks -= (blocksPerStateByte - 1) - uintptr(endBlock+(blocksPerStateByte-1))%blocksPerStateByte
617+
for meta := metadataStart; meta != metadataEnd; meta = unsafe.Add(meta, 1) {
618+
// Fetch the state byte.
619+
stateBytePtr := (*byte)(unsafe.Pointer(meta))
620+
stateByte := *stateBytePtr
621+
622+
// Count existing free blocks in the state byte.
623+
lowState := stateByte & blockStateEach
624+
highState := stateByte >> blocksPerStateByte
625+
freeBlocks := lowState | highState
626+
oldFreeBlocks += uintptr(count4LUT[freeBlocks])
627+
628+
// Count unmarked heads in the state byte.
629+
unmarkedHeads := lowState &^ highState
630+
freedHeads += uintptr(count4LUT[unmarkedHeads])
631+
632+
// Identify and seperate live and free tails.
633+
// Adding 1 to a run of bits will clear the run.
634+
// We can use this to clear tails after a freed head.
635+
tails := highState &^ lowState
636+
tailClear := tails + (unmarkedHeads << 1) + carry
637+
carry = tailClear >> blocksPerStateByte
638+
freedTails += uintptr(count4LUT[tails&^tailClear])
639+
tails &= tailClear
640+
641+
// Construct the new state byte.
642+
markedHeads := highState & lowState
643+
*stateBytePtr = markedHeads | (tails << blocksPerStateByte)
644+
}
645+
646+
// Update the GC metrics.
647+
gcFrees += uint64(freedHeads)
648+
freedBlocks := freedHeads + freedTails
649+
gcFreedBlocks += uint64(freedBlocks)
650+
freeBlocks := oldFreeBlocks + freedBlocks
651+
652+
return freeBlocks * bytesPerBlock
653+
}
654+
655+
// count4LUT is a lookup table used to count set bits in a 4-bit mask.
656+
// TODO: replace with popcnt when available
657+
var count4LUT = [16]uint8{
658+
0b0000: 0,
659+
0b0001: 1,
660+
0b0010: 1,
661+
0b0011: 2,
662+
0b0100: 1,
663+
0b0101: 2,
664+
0b0110: 2,
665+
0b0111: 3,
666+
0b1000: 1,
667+
0b1001: 2,
668+
0b1010: 2,
669+
0b1011: 3,
670+
0b1100: 2,
671+
0b1101: 3,
672+
0b1110: 3,
673+
0b1111: 4,
666674
}
667675

668676
// dumpHeap can be used for debugging purposes. It dumps the state of each heap

0 commit comments

Comments
 (0)