mirror of
https://github.com/MHSanaei/3x-ui.git
synced 2026-06-28 04:00:57 +00:00
fix(jobs): isolate per-node background goroutines from panics (#5397)
A panic in a goroutine without a recover takes the whole panel down. The per-node heartbeat and traffic-sync goroutines run remote network I/O for each node with no panic isolation, so one misbehaving node could crash the master. Add common.GoRecover(name, fn), which runs fn in a goroutine guarded by a recover that logs the panic with a stack trace instead of crashing, and use it for the per-node heartbeat, traffic-sync and global-push goroutines. The deferred WaitGroup/semaphore releases still run during panic unwind, so the group never stalls. Other background goroutines can adopt the same helper.
This commit is contained in:
parent
bedbe04bf1
commit
f63ed9f510
4 changed files with 67 additions and 7 deletions
|
|
@ -4,6 +4,7 @@ package common
|
|||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"runtime/debug"
|
||||
|
||||
"github.com/mhsanaei/3x-ui/v3/internal/logger"
|
||||
)
|
||||
|
|
@ -30,3 +31,17 @@ func Recover(msg string) any {
|
|||
}
|
||||
return panicErr
|
||||
}
|
||||
|
||||
// GoRecover runs fn in a new goroutine guarded by a recover, so a panic in a
|
||||
// background goroutine is logged (with name and a stack trace) instead of taking
|
||||
// the whole process down. name identifies the goroutine in the log.
|
||||
func GoRecover(name string, fn func()) {
|
||||
go func() {
|
||||
defer func() {
|
||||
if r := recover(); r != nil {
|
||||
logger.Error("panic in goroutine", name, ":", r, "\n"+string(debug.Stack()))
|
||||
}
|
||||
}()
|
||||
fn()
|
||||
}()
|
||||
}
|
||||
|
|
|
|||
41
internal/util/common/gorecover_test.go
Normal file
41
internal/util/common/gorecover_test.go
Normal file
|
|
@ -0,0 +1,41 @@
|
|||
package common
|
||||
|
||||
import (
|
||||
"os"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/mhsanaei/3x-ui/v3/internal/logger"
|
||||
"github.com/op/go-logging"
|
||||
)
|
||||
|
||||
func TestMain(m *testing.M) {
|
||||
logger.InitLogger(logging.ERROR)
|
||||
os.Exit(m.Run())
|
||||
}
|
||||
|
||||
func TestGoRecover_RunsFn(t *testing.T) {
|
||||
done := make(chan struct{})
|
||||
GoRecover("test-run", func() { close(done) })
|
||||
select {
|
||||
case <-done:
|
||||
case <-time.After(2 * time.Second):
|
||||
t.Fatal("fn did not run")
|
||||
}
|
||||
}
|
||||
|
||||
func TestGoRecover_RecoversPanic(t *testing.T) {
|
||||
done := make(chan struct{})
|
||||
// If GoRecover did not recover, this panic would crash the test binary.
|
||||
GoRecover("test-panic", func() {
|
||||
defer close(done)
|
||||
panic("boom")
|
||||
})
|
||||
select {
|
||||
case <-done:
|
||||
case <-time.After(2 * time.Second):
|
||||
t.Fatal("goroutine did not complete")
|
||||
}
|
||||
// Let the deferred recover+log run before the test ends.
|
||||
time.Sleep(50 * time.Millisecond)
|
||||
}
|
||||
|
|
@ -9,6 +9,7 @@ import (
|
|||
"github.com/mhsanaei/3x-ui/v3/internal/database/model"
|
||||
"github.com/mhsanaei/3x-ui/v3/internal/eventbus"
|
||||
"github.com/mhsanaei/3x-ui/v3/internal/logger"
|
||||
"github.com/mhsanaei/3x-ui/v3/internal/util/common"
|
||||
"github.com/mhsanaei/3x-ui/v3/internal/web/service"
|
||||
"github.com/mhsanaei/3x-ui/v3/internal/web/websocket"
|
||||
)
|
||||
|
|
@ -50,11 +51,12 @@ func (j *NodeHeartbeatJob) Run() {
|
|||
}
|
||||
wg.Add(1)
|
||||
sem <- struct{}{}
|
||||
go func(n *model.Node) {
|
||||
n := n
|
||||
common.GoRecover("node-heartbeat:"+n.Name, func() {
|
||||
defer wg.Done()
|
||||
defer func() { <-sem }()
|
||||
j.probeOne(n)
|
||||
}(n)
|
||||
})
|
||||
}
|
||||
wg.Wait()
|
||||
|
||||
|
|
|
|||
|
|
@ -8,10 +8,10 @@ import (
|
|||
|
||||
"github.com/mhsanaei/3x-ui/v3/internal/database/model"
|
||||
"github.com/mhsanaei/3x-ui/v3/internal/logger"
|
||||
"github.com/mhsanaei/3x-ui/v3/internal/util/common"
|
||||
"github.com/mhsanaei/3x-ui/v3/internal/web/runtime"
|
||||
"github.com/mhsanaei/3x-ui/v3/internal/web/service"
|
||||
"github.com/mhsanaei/3x-ui/v3/internal/web/websocket"
|
||||
"github.com/mhsanaei/3x-ui/v3/internal/xray"
|
||||
)
|
||||
|
||||
const (
|
||||
|
|
@ -96,11 +96,12 @@ func (j *NodeTrafficSyncJob) Run() {
|
|||
}
|
||||
wg.Add(1)
|
||||
sem <- struct{}{}
|
||||
go func(n *model.Node) {
|
||||
n := n
|
||||
common.GoRecover("node-traffic-sync:"+n.Name, func() {
|
||||
defer wg.Done()
|
||||
defer func() { <-sem }()
|
||||
j.syncOne(mgr, n, doIpSync)
|
||||
}(n)
|
||||
})
|
||||
}
|
||||
wg.Wait()
|
||||
|
||||
|
|
@ -211,7 +212,8 @@ func (j *NodeTrafficSyncJob) maybePushGlobals(mgr *runtime.Manager, nodes []*mod
|
|||
}
|
||||
wg.Add(1)
|
||||
sem <- struct{}{}
|
||||
go func(n *model.Node, remote *runtime.Remote, traffics []*xray.ClientTraffic) {
|
||||
n, remote, traffics := n, remote, traffics
|
||||
common.GoRecover("node-global-push:"+n.Name, func() {
|
||||
defer wg.Done()
|
||||
defer func() { <-sem }()
|
||||
ctx, cancel := context.WithTimeout(context.Background(), nodeTrafficSyncRequestTimeout)
|
||||
|
|
@ -225,7 +227,7 @@ func (j *NodeTrafficSyncJob) maybePushGlobals(mgr *runtime.Manager, nodes []*mod
|
|||
logger.Warning("node traffic sync: push globals to", n.Name, "failed:", err)
|
||||
}
|
||||
}
|
||||
}(n, remote, traffics)
|
||||
})
|
||||
}
|
||||
wg.Wait()
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue