diff --git a/experimental/libbox/oom_report.go b/experimental/libbox/oom_report.go index e96c3e875..64afc4b52 100644 --- a/experimental/libbox/oom_report.go +++ b/experimental/libbox/oom_report.go @@ -64,19 +64,63 @@ type oomReporter struct{} var _ oomkiller.OOMReporter = (*oomReporter)(nil) func (r *oomReporter) WriteReport(memoryUsage uint64) error { - now := time.Now().UTC() + draftPath := filepath.Join(sWorkingPath, "oom_draft") + draftInfo, err := os.Stat(draftPath) + if err != nil { + if !os.IsNotExist(err) { + return err + } + draftInfo = nil + } reportsDir := filepath.Join(sWorkingPath, "oom_reports") - err := os.MkdirAll(reportsDir, 0o777) + err = os.MkdirAll(reportsDir, 0o777) if err != nil { return err } chownReport(reportsDir) - destPath, err := nextAvailableReportPath(reportsDir, now) + destPath, err := nextAvailableReportPath(reportsDir, time.Now().UTC()) if err != nil { return err } - err = os.MkdirAll(destPath, 0o777) + err = r.writeSnapshot(destPath, memoryUsage) + if err != nil { + return err + } + return discardDraftIfCurrent(draftPath, draftInfo) +} + +func (r *oomReporter) WriteDraft(memoryUsage uint64) error { + draftPath := filepath.Join(sWorkingPath, "oom_draft") + os.RemoveAll(draftPath) + return r.writeSnapshot(draftPath, memoryUsage) +} + +func (r *oomReporter) DiscardDraft() error { + draftPath := filepath.Join(sWorkingPath, "oom_draft") + return os.RemoveAll(draftPath) +} + +func discardDraftIfCurrent(draftPath string, draftInfo os.FileInfo) error { + if draftInfo == nil { + return nil + } + currentInfo, err := os.Stat(draftPath) + if err != nil { + if os.IsNotExist(err) { + return nil + } + return err + } + if !os.SameFile(draftInfo, currentInfo) { + return nil + } + return os.RemoveAll(draftPath) +} + +func (r *oomReporter) writeSnapshot(destPath string, memoryUsage uint64) error { + now := time.Now().UTC() + err := os.MkdirAll(destPath, 0o777) if err != nil { return err } @@ -139,3 +183,36 @@ func writeOOMProfile(destPath string, name string) { } chownReport(filePath) } + +func promoteOOMDraftAt(workingPath string) { + draftPath := filepath.Join(workingPath, "oom_draft") + info, err := os.Stat(draftPath) + if err != nil || !info.IsDir() { + return + } + reportsDir := filepath.Join(workingPath, "oom_reports") + initReportDir(reportsDir) + destPath, err := nextAvailableReportPath(reportsDir, info.ModTime().UTC()) + if err != nil { + os.RemoveAll(draftPath) + return + } + err = os.Rename(draftPath, destPath) + if err != nil { + os.RemoveAll(draftPath) + return + } + chownReport(destPath) +} + +func promoteOOMDraft() { + promoteOOMDraftAt(sWorkingPath) +} + +func PromoteOOMDraft() { + promoteOOMDraft() +} + +func PromoteOOMDraftAt(workingPath string) { + promoteOOMDraftAt(workingPath) +} diff --git a/service/oomkiller/service.go b/service/oomkiller/service.go index ec3838d2b..7c19562e3 100644 --- a/service/oomkiller/service.go +++ b/service/oomkiller/service.go @@ -15,6 +15,8 @@ import ( type OOMReporter interface { WriteReport(memoryUsage uint64) error + WriteDraft(memoryUsage uint64) error + DiscardDraft() error } func RegisterService(registry *boxService.Registry) { @@ -29,6 +31,7 @@ type Service struct { timerConfig timerConfig adaptiveTimer *adaptiveTimer lastReportTime atomic.Int64 + draftCancelled atomic.Bool } func NewService(ctx context.Context, logger log.ContextLogger, tag string, options option.OOMKillerServiceOptions) (adapter.Service, error) { @@ -81,3 +84,37 @@ func (s *Service) writeOOMReport(memoryUsage uint64) { s.logger.Info("OOM report saved") } } + +func (s *Service) writeOOMDraft(memoryUsage uint64) { + if s.draftCancelled.Load() { + return + } + reporter := service.FromContext[OOMReporter](s.ctx) + if reporter == nil { + return + } + err := reporter.WriteDraft(memoryUsage) + if s.draftCancelled.Load() { + reporter.DiscardDraft() + return + } + if err != nil { + s.logger.Warn("failed to write OOM draft: ", err) + } else { + s.logger.Warn("OOM draft saved") + } +} + +func (s *Service) discardOOMDraft() { + s.draftCancelled.Store(true) + reporter := service.FromContext[OOMReporter](s.ctx) + if reporter == nil { + return + } + err := reporter.DiscardDraft() + if err != nil { + s.logger.Warn("failed to discard OOM draft: ", err) + } else { + s.logger.Info("OOM draft discarded") + } +} diff --git a/service/oomkiller/service_darwin.go b/service/oomkiller/service_darwin.go index 1d51c1b48..a40daea10 100644 --- a/service/oomkiller/service_darwin.go +++ b/service/oomkiller/service_darwin.go @@ -83,6 +83,7 @@ func (s *Service) Close() error { if isLast { C.stopMemoryPressureMonitor() } + s.discardOOMDraft() } return nil } @@ -100,6 +101,7 @@ func goMemoryPressureCallback(status C.ulong) { sample := readMemorySample(policyModeNetworkExtension) for _, s := range services { s.logger.Warn("memory pressure: critical, usage: ", byteformats.FormatMemoryBytes(sample.usage)) + s.writeOOMDraft(sample.usage) s.adaptiveTimer.notifyPressure() } }