@@ -71,19 +71,20 @@ var zeroSizedAlloc uint8
7171type blockState uint8
7272
7373const (
74- blockStateFree blockState = 0 // 00
75- blockStateHead blockState = 1 // 01
76- blockStateTail blockState = 2 // 10
77- blockStateMark blockState = 3 // 11
78- blockStateMask blockState = 3 // 11
74+ blockStateLow blockState = 1
75+ blockStateHigh blockState = 1 << blocksPerStateByte
76+
77+ blockStateFree blockState = 0
78+ blockStateHead blockState = blockStateLow
79+ blockStateTail blockState = blockStateHigh
80+ blockStateMark blockState = blockStateLow | blockStateHigh
81+ blockStateMask blockState = blockStateLow | blockStateHigh
7982)
8083
84+ const blockStateEach = 1 << blocksPerStateByte - 1
85+
8186// The byte value of a block where every block is a 'tail' block.
82- const blockStateByteAllTails = 0 |
83- uint8 (blockStateTail << (stateBits * 3 )) |
84- uint8 (blockStateTail << (stateBits * 2 )) |
85- uint8 (blockStateTail << (stateBits * 1 )) |
86- uint8 (blockStateTail << (stateBits * 0 ))
87+ const blockStateByteAllTails = byte (blockStateTail ) * blockStateEach
8788
8889// String returns a human-readable version of the block state, for debugging.
8990func (s blockState ) String () string {
@@ -180,7 +181,7 @@ func (b gcBlock) stateByte() byte {
180181// Return the block state given a state byte. The state byte must have been
181182// obtained using b.stateByte(), otherwise the result is incorrect.
182183func (b gcBlock ) stateFromByte (stateByte byte ) blockState {
183- return blockState (stateByte >> (( b % blocksPerStateByte ) * stateBits )) & blockStateMask
184+ return blockState (stateByte >> (b % blocksPerStateByte )) & blockStateMask
184185}
185186
186187// State returns the current block state.
@@ -193,38 +194,12 @@ func (b gcBlock) state() blockState {
193194// from head to mark.
194195func (b gcBlock ) setState (newState blockState ) {
195196 stateBytePtr := (* uint8 )(unsafe .Add (metadataStart , b / blocksPerStateByte ))
196- * stateBytePtr |= uint8 (newState << (( b % blocksPerStateByte ) * stateBits ))
197+ * stateBytePtr |= uint8 (newState << (b % blocksPerStateByte ))
197198 if gcAsserts && b .state () != newState {
198199 runtimePanic ("gc: setState() was not successful" )
199200 }
200201}
201202
202- // markFree sets the block state to free, no matter what state it was in before.
203- func (b gcBlock ) markFree () {
204- stateBytePtr := (* uint8 )(unsafe .Add (metadataStart , b / blocksPerStateByte ))
205- * stateBytePtr &^= uint8 (blockStateMask << ((b % blocksPerStateByte ) * stateBits ))
206- if gcAsserts && b .state () != blockStateFree {
207- runtimePanic ("gc: markFree() was not successful" )
208- }
209- if gcAsserts {
210- * (* [wordsPerBlock ]uintptr )(unsafe .Pointer (b .address ())) = [wordsPerBlock ]uintptr {}
211- }
212- }
213-
214- // unmark changes the state of the block from mark to head. It must be marked
215- // before calling this function.
216- func (b gcBlock ) unmark () {
217- if gcAsserts && b .state () != blockStateMark {
218- runtimePanic ("gc: unmark() on a block that is not marked" )
219- }
220- clearMask := blockStateMask ^ blockStateHead // the bits to clear from the state
221- stateBytePtr := (* uint8 )(unsafe .Add (metadataStart , b / blocksPerStateByte ))
222- * stateBytePtr &^= uint8 (clearMask << ((b % blocksPerStateByte ) * stateBits ))
223- if gcAsserts && b .state () != blockStateHead {
224- runtimePanic ("gc: unmark() was not successful" )
225- }
226- }
227-
228203// objHeader is a structure prepended to every heap object to hold metadata.
229204type objHeader struct {
230205 // next is the next object to scan after this.
@@ -633,36 +608,69 @@ func markRoot(addr, root uintptr) {
633608// Sweep goes through all memory and frees unmarked memory.
634609// It returns how many bytes are free in the heap after the sweep.
635610func sweep () (freeBytes uintptr ) {
636- freeCurrentObject := false
637- var freed uint64
638- for block := gcBlock (0 ); block < endBlock ; block ++ {
639- switch block .state () {
640- case blockStateHead :
641- // Unmarked head. Free it, including all tail blocks following it.
642- block .markFree ()
643- freeCurrentObject = true
644- gcFrees ++
645- freed ++
646- case blockStateTail :
647- if freeCurrentObject {
648- // This is a tail object following an unmarked head.
649- // Free it now.
650- block .markFree ()
651- freed ++
652- }
653- case blockStateMark :
654- // This is a marked object. The next tail blocks must not be freed,
655- // but the mark bit must be removed so the next GC cycle will
656- // collect this object if it is unreferenced then.
657- block .unmark ()
658- freeCurrentObject = false
659- case blockStateFree :
660- freeBytes += bytesPerBlock
661- }
662- }
663- gcFreedBlocks += freed
664- freeBytes += uintptr (freed ) * bytesPerBlock
665- return
611+ endBlock := endBlock
612+ metadataEnd := unsafe .Add (metadataStart , (endBlock + (blocksPerStateByte - 1 ))/ blocksPerStateByte )
613+ var oldFreeBlocks , freedHeads , freedTails uintptr
614+ var carry byte
615+ // Pre-subtract the blocks that do not actually exist from oldFreeBlocks.
616+ oldFreeBlocks -= (blocksPerStateByte - 1 ) - uintptr (endBlock + (blocksPerStateByte - 1 ))% blocksPerStateByte
617+ for meta := metadataStart ; meta != metadataEnd ; meta = unsafe .Add (meta , 1 ) {
618+ // Fetch the state byte.
619+ stateBytePtr := (* byte )(unsafe .Pointer (meta ))
620+ stateByte := * stateBytePtr
621+
622+ // Count existing free blocks in the state byte.
623+ lowState := stateByte & blockStateEach
624+ highState := stateByte >> blocksPerStateByte
625+ freeBlocks := lowState | highState
626+ oldFreeBlocks += uintptr (count4LUT [freeBlocks ])
627+
628+ // Count unmarked heads in the state byte.
629+ unmarkedHeads := lowState &^ highState
630+ freedHeads += uintptr (count4LUT [unmarkedHeads ])
631+
632+ // Identify and seperate live and free tails.
633+ // Adding 1 to a run of bits will clear the run.
634+ // We can use this to clear tails after a freed head.
635+ tails := highState &^ lowState
636+ tailClear := tails + (unmarkedHeads << 1 ) + carry
637+ carry = tailClear >> blocksPerStateByte
638+ freedTails += uintptr (count4LUT [tails &^tailClear ])
639+ tails &= tailClear
640+
641+ // Construct the new state byte.
642+ markedHeads := highState & lowState
643+ * stateBytePtr = markedHeads | (tails << blocksPerStateByte )
644+ }
645+
646+ // Update the GC metrics.
647+ gcFrees += uint64 (freedHeads )
648+ freedBlocks := freedHeads + freedTails
649+ gcFreedBlocks += uint64 (freedBlocks )
650+ freeBlocks := oldFreeBlocks + freedBlocks
651+
652+ return freeBlocks * bytesPerBlock
653+ }
654+
655+ // count4LUT is a lookup table used to count set bits in a 4-bit mask.
656+ // TODO: replace with popcnt when available
657+ var count4LUT = [16 ]uint8 {
658+ 0b0000 : 0 ,
659+ 0b0001 : 1 ,
660+ 0b0010 : 1 ,
661+ 0b0011 : 2 ,
662+ 0b0100 : 1 ,
663+ 0b0101 : 2 ,
664+ 0b0110 : 2 ,
665+ 0b0111 : 3 ,
666+ 0b1000 : 1 ,
667+ 0b1001 : 2 ,
668+ 0b1010 : 2 ,
669+ 0b1011 : 3 ,
670+ 0b1100 : 2 ,
671+ 0b1101 : 3 ,
672+ 0b1110 : 3 ,
673+ 0b1111 : 4 ,
666674}
667675
668676// dumpHeap can be used for debugging purposes. It dumps the state of each heap
0 commit comments