typegstruct{// Stack parameters.
// stack describes the actual stack memory: [stack.lo, stack.hi).
// stackguard0 is the stack pointer compared in the Go stack growth prologue.
// It is stack.lo+StackGuard normally, but can be StackPreempt to trigger a preemption.
// stackguard1 is the stack pointer compared in the //go:systemstack stack growth prologue.
// It is stack.lo+StackGuard on g0 and gsignal stacks.
// It is ~0 on other goroutine stacks, to trigger a call to morestackc (and crash).
stackstack// offset known to runtime/cgo
stackguard0uintptr// offset known to liblink
stackguard1uintptr// offset known to liblink
_panic*_panic// innermost panic - offset known to liblink
_defer*_defer// innermost defer
m*m// current m; offset known to arm liblink
schedgobufsyscallspuintptr// if status==Gsyscall, syscallsp = sched.sp to use during gc
syscallpcuintptr// if status==Gsyscall, syscallpc = sched.pc to use during gc
stktopspuintptr// expected sp at top of stack, to check in traceback
// param is a generic pointer parameter field used to pass
// values in particular contexts where other storage for the
// parameter would be difficult to find. It is currently used
// in four ways:
// 1. When a channel operation wakes up a blocked goroutine, it sets param to
// point to the sudog of the completed blocking operation.
// 2. By gcAssistAlloc1 to signal back to its caller that the goroutine completed
// the GC cycle. It is unsafe to do so in any other way, because the goroutine's
// stack may have moved in the meantime.
// 3. By debugCallWrap to pass parameters to a new goroutine because allocating a
// closure in the runtime is forbidden.
// 4. When a panic is recovered and control returns to the respective frame,
// param may point to a savedOpenDeferState.
paramunsafe.Pointeratomicstatusatomic.Uint32stackLockuint32// sigprof/scang lock; TODO: fold in to atomicstatus
goiduint64schedlinkguintptrwaitsinceint64// approx time when the g become blocked
waitreasonwaitReason// if status==Gwaiting
preemptbool// preemption signal, duplicates stackguard0 = stackpreempt
preemptStopbool// transition to _Gpreempted on preemption; otherwise, just deschedule
preemptShrinkbool// shrink stack at synchronous safe point
// asyncSafePoint is set if g is stopped at an asynchronous
// safe point. This means there are frames on the stack
// without precise pointer information.
asyncSafePointboolpaniconfaultbool// panic (instead of crash) on unexpected fault address
gcscandonebool// g has scanned stack; protected by _Gscan bit in status
throwsplitbool// must not split stack
// activeStackChans indicates that there are unlocked channels
// pointing into this goroutine's stack. If true, stack
// copying needs to acquire channel locks to protect these
// areas of the stack.
activeStackChansbool// parkingOnChan indicates that the goroutine is about to
// park on a chansend or chanrecv. Used to signal an unsafe point
// for stack shrinking.
parkingOnChanatomic.Bool// inMarkAssist indicates whether the goroutine is in mark assist.
// Used by the execution tracer.
inMarkAssistboolcoroexitbool// argument to coroswitch_m
raceignoreint8// ignore race detection events
nocgocallbackbool// whether disable callback from C
trackingbool// whether we're tracking this G for sched latency statistics
trackingSequint8// used to decide whether to track this G
trackingStampint64// timestamp of when the G last started being tracked
runnableTimeint64// the amount of time spent runnable, cleared when running, only used when tracking
lockedmmuintptrsiguint32writebuf[]bytesigcode0uintptrsigcode1uintptrsigpcuintptrparentGoiduint64// goid of goroutine that created this goroutine
gopcuintptr// pc of go statement that created this goroutine
ancestors*[]ancestorInfo// ancestor information goroutine(s) that created this goroutine (only used if debug.tracebackancestors)
startpcuintptr// pc of goroutine function
racectxuintptrwaiting*sudog// sudog structures this g is waiting on (that have a valid elem ptr); in lock order
cgoCtxt[]uintptr// cgo traceback context
labelsunsafe.Pointer// profiler labels
timer*timer// cached timer for time.Sleep
selectDoneatomic.Uint32// are we participating in a select and did someone win the race?
coroarg*coro// argument during coroutine transfers
// goroutineProfiled indicates the status of this goroutine's stack for the
// current in-progress goroutine profile
goroutineProfiledgoroutineProfileStateHolder// Per-G tracer state.
tracegTraceState// Per-G GC state
// gcAssistBytes is this G's GC assist credit in terms of
// bytes allocated. If this is positive, then the G has credit
// to allocate gcAssistBytes bytes without assisting. If this
// is negative, then the G must correct this by performing
// scan work. We track this in bytes to make it fast to update
// and check for debt in the malloc hot path. The assist ratio
// determines how this corresponds to scan work debt.
gcAssistBytesint64}
typemstruct{g0*g// goroutine with scheduling stack
morebufgobuf// gobuf arg to morestack
divmoduint32// div/mod denominator for arm - known to liblink
_uint32// align next field to 8 bytes
// Fields not known to debuggers.
prociduint64// for debuggers, but offset not hard-coded
gsignal*g// signal-handling g
goSigStackgsignalStack// Go-allocated signal handling stack
sigmasksigset// storage for saved signal mask
tls[tlsSlots]uintptr// thread-local storage (for x86 extern register)
mstartfnfunc()curg*g// current running goroutine
caughtsigguintptr// goroutine running during fatal signal
ppuintptr// attached p for executing go code (nil if not executing go code)
nextppuintptroldppuintptr// the p that was attached before executing a syscall
idint64mallocingint32throwingthrowTypepreemptoffstring// if != "", keep curg running on this m
locksint32dyingint32profilehzint32spinningbool// m is out of work and is actively looking for work
blockedbool// m is blocked on a note
newSigstackbool// minit on C thread called sigaltstack
printlockint8incgobool// m is executing a cgo call
isextrabool// m is an extra m
isExtraInCbool// m is an extra m that is not executing Go code
isExtraInSigbool// m is an extra m in a signal handler
freeWaitatomic.Uint32// Whether it is safe to free g0 and delete m (one of freeMRef, freeMStack, freeMWait)
needextrambooltracebackuint8ncgocalluint64// number of cgo calls in total
ncgoint32// number of cgo calls currently in progress
cgoCallersUseatomic.Uint32// if non-zero, cgoCallers in use temporarily
cgoCallers*cgoCallers// cgo traceback if crashing in cgo call
parknotealllink*m// on allm
schedlinkmuintptrlockedgguintptrcreatestack[32]uintptr// stack that created this thread, it's used for StackRecord.Stack0, so it must align with it.
lockedExtuint32// tracking for external LockOSThread
lockedIntuint32// tracking for internal lockOSThread
nextwaitmmuintptr// next m waiting for lock
mLockProfilemLockProfile// fields relating to runtime.lock contention
// wait* are used to carry arguments from gopark into park_m, because
// there's no stack to put them on. That is their sole purpose.
waitunlockffunc(*g,unsafe.Pointer)boolwaitlockunsafe.PointerwaitTraceBlockReasontraceBlockReasonwaitTraceSkipintsyscalltickuint32freelink*m// on sched.freem
tracemTraceState// these are here because they are too large to be on the stack
// of low-level NOSPLIT functions.
libcalllibcalllibcallpcuintptr// for cpu profiler
libcallspuintptrlibcallgguintptrsyscalllibcall// stores syscall parameters on windows
vdsoSPuintptr// SP for traceback while in VDSO call (0 if not in call)
vdsoPCuintptr// PC for traceback while in VDSO call
// preemptGen counts the number of completed preemption
// signals. This is used to detect when a preemption is
// requested, but fails.
preemptGenatomic.Uint32// Whether this is a pending preemption signal on this M.
signalPendingatomic.Uint32// pcvalue lookup cache
pcvalueCachepcvalueCachedlogPerMmOSchacha8chacha8rand.Statecheapranduint64// Up to 10 locks held by this m, maintained by the lock ranking code.
locksHeldLenintlocksHeld[10]heldLockInfo}
typepstruct{idint32statusuint32// one of pidle/prunning/...
linkpuintptrschedtickuint32// incremented on every scheduler call
syscalltickuint32// incremented on every system call
sysmonticksysmontick// last tick observed by sysmon
mmuintptr// back-link to associated m (nil if idle)
mcache*mcachepcachepageCacheraceprocctxuintptrdeferpool[]*_defer// pool of available defer structs (see panic.go)
deferpoolbuf[32]*_defer// Cache of goroutine ids, amortizes accesses to runtime·sched.goidgen.
goidcacheuint64goidcacheenduint64// Queue of runnable goroutines. Accessed without lock.
runqheaduint32runqtailuint32runq[256]guintptr// runnext, if non-nil, is a runnable G that was ready'd by
// the current G and should be run next instead of what's in
// runq if there's time remaining in the running G's time
// slice. It will inherit the time left in the current time
// slice. If a set of goroutines is locked in a
// communicate-and-wait pattern, this schedules that set as a
// unit and eliminates the (potentially large) scheduling
// latency that otherwise arises from adding the ready'd
// goroutines to the end of the run queue.
//
// Note that while other P's may atomically CAS this to zero,
// only the owner P can CAS it to a valid G.
runnextguintptr// Available G's (status == Gdead)
gFreestruct{gListnint32}sudogcache[]*sudogsudogbuf[128]*sudog// Cache of mspan objects from the heap.
mspancachestruct{// We need an explicit length here because this field is used
// in allocation codepaths where write barriers are not allowed,
// and eliminating the write barrier/keeping it eliminated from
// slice updates is tricky, more so than just managing the length
// ourselves.
lenintbuf[128]*mspan}// Cache of a single pinner object to reduce allocations from repeated
// pinner creation.
pinnerCache*pinnertracepTraceStatepallocpersistentAlloc// per-P to avoid mutex
// The when field of the first entry on the timer heap.
// This is 0 if the timer heap is empty.
timer0Whenatomic.Int64// The earliest known nextwhen field of a timer with
// timerModifiedEarlier status. Because the timer may have been
// modified again, there need not be any timer with this value.
// This is 0 if there are no timerModifiedEarlier timers.
timerModifiedEarliestatomic.Int64// Per-P GC state
gcAssistTimeint64// Nanoseconds in assistAlloc
gcFractionalMarkTimeint64// Nanoseconds in fractional mark worker (atomic)
// limiterEvent tracks events for the GC CPU limiter.
limiterEventlimiterEvent// gcMarkWorkerMode is the mode for the next mark worker to run in.
// That is, this is used to communicate with the worker goroutine
// selected for immediate execution by
// gcController.findRunnableGCWorker. When scheduling other goroutines,
// this field must be set to gcMarkWorkerNotWorker.
gcMarkWorkerModegcMarkWorkerMode// gcMarkWorkerStartTime is the nanotime() at which the most recent
// mark worker started.
gcMarkWorkerStartTimeint64// gcw is this P's GC work buffer cache. The work buffer is
// filled by write barriers, drained by mutator assists, and
// disposed on certain GC state transitions.
gcwgcWork// wbBuf is this P's GC write barrier buffer.
//
// TODO: Consider caching this in the running G.
wbBufwbBufrunSafePointFnuint32// if 1, run sched.safePointFn at next safe point
// statsSeq is a counter indicating whether this P is currently
// writing any stats. Its value is even when not, odd when it is.
statsSeqatomic.Uint32// Lock for timers. We normally access the timers while running
// on this P, but the scheduler can also do it from a different P.
timersLockmutex// Actions to take at some time. This is used to implement the
// standard library's time package.
// Must hold timersLock to access.
timers[]*timer// Number of timers in P's heap.
numTimersatomic.Uint32// Number of timerDeleted timers in P's heap.
deletedTimersatomic.Uint32// Race context used while executing timer functions.
timerRaceCtxuintptr// maxStackScanDelta accumulates the amount of stack space held by
// live goroutines (i.e. those eligible for stack scanning).
// Flushed to gcController.maxStackScan once maxStackScanSlack
// or -maxStackScanSlack is reached.
maxStackScanDeltaint64// gc-time statistics about current goroutines
// Note that this differs from maxStackScan in that this
// accumulates the actual stack observed to be used at GC time (hi - sp),
// not an instantaneous measure of the total stack size that might need
// to be scanned (hi - lo).
scannedStackSizeuint64// stack size of goroutines scanned by this P
scannedStacksuint64// number of goroutines scanned by this P
// preempt is set to indicate that this P should be enter the
// scheduler ASAP (regardless of what G is running on it).
preemptbool// pageTraceBuf is a buffer for writing out page allocation/free/scavenge traces.
//
// Used only if GOEXPERIMENT=pagetrace.
pageTraceBufpageTraceBuf// Padding is no longer needed. False sharing is now not a worry because p is large enough
// that its size class is an integer multiple of the cache line size (for any of our architectures).
}
在 p 的结构中,runnext guintptr 就是 run next,大小为 1,存放下一个将要运行的 G
在 p 的结构中,runq [256]guintptr 就是 local run queue,大小为 256 array,用于存放等待运行的 G
typegobufstruct{// The offsets of sp, pc, and g are known to (hard-coded in) libmach.
//
// ctxt is unusual with respect to GC: it may be a
// heap-allocated funcval, so GC needs to track it, but it
// needs to be set and cleared from assembly, where it's
// difficult to have write barriers. However, ctxt is really a
// saved, live register, and we only ever exchange it between
// the real register and the gobuf. Hence, we treat it as a
// root during stack scanning, which means assembly that saves
// and restores it doesn't need write barriers. It's still
// typed as a pointer so that any other writes from Go get
// write barriers.
spuintptrpcuintptrgguintptrctxtunsafe.Pointerretuintptrlruintptrbpuintptr// for framepointer-enabled architectures
}
ch<-->runtime.chansend1->runtime.chansend{...gp:=getg()mysg:=acquireSudog()...gp.waiting=mysggp.param=nilc.sendq.enqueue(mysg)// Signal to anyone trying to shrink our stack that we're about
// to park on a channel. The window between when this G's status
// changes and when we set gp.activeStackChans is not safe for
// stack shrinking.
gp.parkingOnChan.Store(true)gopark(chanparkcommit,unsafe.Pointer(&c.lock),waitReasonChanSend,traceBlockChanSend,2)...}->gopark
<-ch->runtime.chanrecv1(c*hchan,elemunsafe.Pointer)->runtime.chanrecv(c*hchan,epunsafe.Pointer,blockbool)(selected,receivedbool){...// no sender available: block on this channel.
gp:=getg()mysg:=acquireSudog()...gp.waiting=mysgmysg.g=gp...c.recvq.enqueue(mysg)// Signal to anyone trying to shrink our stack that we're about
// to park on a channel. The window between when this G's status
// changes and when we set gp.activeStackChans is not safe for
// stack shrinking.
gp.parkingOnChan.Store(true)gopark(chanparkcommit,unsafe.Pointer(&c.lock),waitReasonChanReceive,traceBlockChanRecv,2)}->runtime.gopark(unlockffunc(*g,unsafe.Pointer)bool,lockunsafe.Pointer,reasonwaitReason,traceReasontraceBlockReason,traceskipint)
// sudog (pseudo-g) represents a g in a wait list, such as for sending/receiving
// on a channel.
//
// sudog is necessary because the g ↔ synchronization object relation
// is many-to-many. A g can be on many wait lists, so there may be
// many sudogs for one g; and many gs may be waiting on the same
// synchronization object, so there may be many sudogs for one object.
//
// sudogs are allocated from a special pool. Use acquireSudog and
// releaseSudog to allocate and free them.
typesudogstruct{// The following fields are protected by the hchan.lock of the
// channel this sudog is blocking on. shrinkstack depends on
// this for sudogs involved in channel ops.
g*gnext*sudogprev*sudogelemunsafe.Pointer// data element (may point to stack)
// The following fields are never accessed concurrently.
// For channels, waitlink is only accessed by g.
// For semaphores, all fields (including the ones above)
// are only accessed when holding a semaRoot lock.
acquiretimeint64releasetimeint64ticketuint32// isSelect indicates g is participating in a select, so
// g.selectDone must be CAS'd to win the wake-up race.
isSelectbool// success indicates whether communication over channel c
// succeeded. It is true if the goroutine was awoken because a
// value was delivered over channel c, and false if awoken
// because c was closed.
successbool// waiters is a count of semaRoot waiting list other than head of list,
// clamped to a uint16 to fit in unused space.
// Only meaningful at the head of the list.
// (If we wanted to be overly clever, we could store a high 16 bits
// in the second entry in the list.)
waitersuint16parent*sudog// semaRoot binary tree
waitlink*sudog// g.waiting list or semaRoot
waittail*sudog// semaRoot
c*hchan// channel
}
typehchanstruct{qcountuint// total data in the queue
dataqsizuint// size of the circular queue
bufunsafe.Pointer// points to an array of dataqsiz elements
elemsizeuint16closeduint32elemtype*_type// element type
sendxuint// send index
recvxuint// receive index
recvqwaitq// list of recv waiters
sendqwaitq// list of send waiters
// lock protects all fields in hchan, as well as several
// fields in sudogs blocked on this channel.
//
// Do not change another G's status while holding this lock
// (in particular, do not ready a G), as this can deadlock
// with stack shrinking.
lockmutex}typewaitqstruct{first*sudoglast*sudog}
packagemainimport("fmt""sync""time")varmtxsync.Mutexfuncmain(){gofunc(){mtx.Lock()defermtx.Unlock()fmt.Printf("Start\n")time.Sleep(time.Second*10)fmt.Printf("End\n")}()time.Sleep(time.Second)// Ensure child goroutine gets the mutex before main goroutine
fmt.Printf("Try to acquire mutex\n")mtx.Lock()fmt.Printf("Main goroutine\n")mtx.Unlock()}
mtx.Lock()--->sync.(*Mutex).Lock()--->sync.(*Mutex)lockSlow()--->sync.runtime_SemacquireMutex(s*uint32,lifobool,skipframesint)--->sync.sync_runtime_SemacquireMutex(addr*uint32,lifobool,skipframesint)--->runtime.semacquire1(addr*uint32,lifobool,profilesemaProfileFlags,skipframesint,reasonwaitReason){gp:=getg()ifgp!=gp.m.curg{throw("semacquire not on the G stack")}// Easy case.
ifcansemacquire(addr){return}// Harder case:
// increment waiter count
// try cansemacquire one more time, return if succeeded
// enqueue itself as a waiter
// sleep
// (waiter descriptor is dequeued by signaler)
s:=acquireSudog()root:=semtable.rootFor(addr)...// Any semrelease after the cansemacquire knows we're waiting
// (we set nwait above), so go to sleep.
root.queue(addr,s,lifo)goparkunlock(&root.lock,reason,traceBlockSync,4+skipframes)...}--->goparkunlock(lock*mutex,reasonwaitReason,traceReasontraceBlockReason,traceskipint)--->gopark(unlockffunc(*g,unsafe.Pointer)bool,lockunsafe.Pointer,reasonwaitReason,traceReasontraceBlockReason,traceskipint)
// Asynchronous semaphore for sync.Mutex.
// A semaRoot holds a balanced tree of sudog with distinct addresses (s.elem).
// Each of those sudog may in turn point (through s.waitlink) to a list
// of other sudogs waiting on the same address.
// The operations on the inner lists of sudogs with the same address
// are all O(1). The scanning of the top-level semaRoot list is O(log n),
// where n is the number of distinct addresses with goroutines blocked
// on them that hash to the given semaRoot.
// See golang.org/issue/17953 for a program that worked badly
// before we introduced the second level of list, and
// BenchmarkSemTable/OneAddrCollision/* for a benchmark that exercises this.
typesemaRootstruct{lockmutextreap*sudog// root of balanced tree of unique waiters.
nwaitatomic.Uint32// Number of waiters. Read w/o the lock.
}// Prime to not correlate with any user patterns.
constsemTabSize=251typesemTable[semTabSize]struct{rootsemaRootpad[cpu.CacheLinePadSize-unsafe.Sizeof(semaRoot{})]byte}
runtime 不可拦截 goroutine 阻塞场景解析
time.Sleep / channel send / channel recv / select / net read / net write / sync.Mutex 等阻塞场景可被 runtime 拦截,然而仍存在一些阻塞情况是 runtime 无法拦截的,例如:在执行 C 代码或阻塞在 syscall 上时,必须占用一个线程。
III. Sysmon
system monitor,高优先级,在专有线程中执行,不需要绑定 p.
IV. Summary
Runtime 构成:Scheduler、Netpoll、内存管理、垃圾回收
GMP:M - 任务消费者;G - 计算任务;P - 可以使用 CPU 的 token
GMP 中的队列抽象:P 的本地 runnext 字段 –» P 的 local run queue –» global run queue;采用多级队列减少锁竞争