platform: Improve oom-killer

This commit is contained in:
世界 2026-04-22 01:52:05 +08:00
parent a20f5fd7b6
commit cd4a4e6229
No known key found for this signature in database
GPG key ID: CD109927C34A63C4
3 changed files with 120 additions and 4 deletions

View file

@ -64,19 +64,63 @@ type oomReporter struct{}
var _ oomkiller.OOMReporter = (*oomReporter)(nil)
func (r *oomReporter) WriteReport(memoryUsage uint64) error {
now := time.Now().UTC()
draftPath := filepath.Join(sWorkingPath, "oom_draft")
draftInfo, err := os.Stat(draftPath)
if err != nil {
if !os.IsNotExist(err) {
return err
}
draftInfo = nil
}
reportsDir := filepath.Join(sWorkingPath, "oom_reports")
err := os.MkdirAll(reportsDir, 0o777)
err = os.MkdirAll(reportsDir, 0o777)
if err != nil {
return err
}
chownReport(reportsDir)
destPath, err := nextAvailableReportPath(reportsDir, now)
destPath, err := nextAvailableReportPath(reportsDir, time.Now().UTC())
if err != nil {
return err
}
err = os.MkdirAll(destPath, 0o777)
err = r.writeSnapshot(destPath, memoryUsage)
if err != nil {
return err
}
return discardDraftIfCurrent(draftPath, draftInfo)
}
func (r *oomReporter) WriteDraft(memoryUsage uint64) error {
draftPath := filepath.Join(sWorkingPath, "oom_draft")
os.RemoveAll(draftPath)
return r.writeSnapshot(draftPath, memoryUsage)
}
func (r *oomReporter) DiscardDraft() error {
draftPath := filepath.Join(sWorkingPath, "oom_draft")
return os.RemoveAll(draftPath)
}
func discardDraftIfCurrent(draftPath string, draftInfo os.FileInfo) error {
if draftInfo == nil {
return nil
}
currentInfo, err := os.Stat(draftPath)
if err != nil {
if os.IsNotExist(err) {
return nil
}
return err
}
if !os.SameFile(draftInfo, currentInfo) {
return nil
}
return os.RemoveAll(draftPath)
}
func (r *oomReporter) writeSnapshot(destPath string, memoryUsage uint64) error {
now := time.Now().UTC()
err := os.MkdirAll(destPath, 0o777)
if err != nil {
return err
}
@ -139,3 +183,36 @@ func writeOOMProfile(destPath string, name string) {
}
chownReport(filePath)
}
func promoteOOMDraftAt(workingPath string) {
draftPath := filepath.Join(workingPath, "oom_draft")
info, err := os.Stat(draftPath)
if err != nil || !info.IsDir() {
return
}
reportsDir := filepath.Join(workingPath, "oom_reports")
initReportDir(reportsDir)
destPath, err := nextAvailableReportPath(reportsDir, info.ModTime().UTC())
if err != nil {
os.RemoveAll(draftPath)
return
}
err = os.Rename(draftPath, destPath)
if err != nil {
os.RemoveAll(draftPath)
return
}
chownReport(destPath)
}
func promoteOOMDraft() {
promoteOOMDraftAt(sWorkingPath)
}
func PromoteOOMDraft() {
promoteOOMDraft()
}
func PromoteOOMDraftAt(workingPath string) {
promoteOOMDraftAt(workingPath)
}

View file

@ -15,6 +15,8 @@ import (
type OOMReporter interface {
WriteReport(memoryUsage uint64) error
WriteDraft(memoryUsage uint64) error
DiscardDraft() error
}
func RegisterService(registry *boxService.Registry) {
@ -29,6 +31,7 @@ type Service struct {
timerConfig timerConfig
adaptiveTimer *adaptiveTimer
lastReportTime atomic.Int64
draftCancelled atomic.Bool
}
func NewService(ctx context.Context, logger log.ContextLogger, tag string, options option.OOMKillerServiceOptions) (adapter.Service, error) {
@ -81,3 +84,37 @@ func (s *Service) writeOOMReport(memoryUsage uint64) {
s.logger.Info("OOM report saved")
}
}
func (s *Service) writeOOMDraft(memoryUsage uint64) {
if s.draftCancelled.Load() {
return
}
reporter := service.FromContext[OOMReporter](s.ctx)
if reporter == nil {
return
}
err := reporter.WriteDraft(memoryUsage)
if s.draftCancelled.Load() {
reporter.DiscardDraft()
return
}
if err != nil {
s.logger.Warn("failed to write OOM draft: ", err)
} else {
s.logger.Warn("OOM draft saved")
}
}
func (s *Service) discardOOMDraft() {
s.draftCancelled.Store(true)
reporter := service.FromContext[OOMReporter](s.ctx)
if reporter == nil {
return
}
err := reporter.DiscardDraft()
if err != nil {
s.logger.Warn("failed to discard OOM draft: ", err)
} else {
s.logger.Info("OOM draft discarded")
}
}

View file

@ -83,6 +83,7 @@ func (s *Service) Close() error {
if isLast {
C.stopMemoryPressureMonitor()
}
s.discardOOMDraft()
}
return nil
}
@ -100,6 +101,7 @@ func goMemoryPressureCallback(status C.ulong) {
sample := readMemorySample(policyModeNetworkExtension)
for _, s := range services {
s.logger.Warn("memory pressure: critical, usage: ", byteformats.FormatMemoryBytes(sample.usage))
s.writeOOMDraft(sample.usage)
s.adaptiveTimer.notifyPressure()
}
}