Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ replace github.com/sagernet/wireguard-go => github.com/getlantern/wireguard-go v

replace github.com/tetratelabs/wazero => github.com/getlantern/wazero v1.11.0-water.1

replace github.com/refraction-networking/water => github.com/getlantern/water v0.7.1-alpha.0.20260309190745-bd547c14b4aa
replace github.com/refraction-networking/water => github.com/getlantern/water v0.7.1-alpha.0.20260619150927-31d1e50fe910

require (
github.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5
Expand Down
4 changes: 2 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -271,8 +271,8 @@ github.com/getlantern/sing-box-minimal v1.12.22-lantern.0.20260529221144-4396515
github.com/getlantern/sing-box-minimal v1.12.22-lantern.0.20260529221144-439651539f38/go.mod h1:LzlFRel9E92gX0HXWCdsxgeg+kuAEPzLR+Znixk9EI4=
github.com/getlantern/telemetry v0.0.0-20250606052628-8960164ec1f5 h1:6ITBYqNkLbVZ1tQNXwmH8N80rZtvyW6IZa8L6EAhBQo=
github.com/getlantern/telemetry v0.0.0-20250606052628-8960164ec1f5/go.mod h1:gx3dPUhZtQszPX5C+TshhxPXQHlY5t4gMnOOMA+viPA=
github.com/getlantern/water v0.7.1-alpha.0.20260309190745-bd547c14b4aa h1:goZee1cURbWPiqX1xyjfwLB/zByQ50hEwonnnyLmcJQ=
github.com/getlantern/water v0.7.1-alpha.0.20260309190745-bd547c14b4aa/go.mod h1:Yo6Yk++Q9HRaCT+6/eYk3elNoNtjlJ3V5FufHY6ezto=
github.com/getlantern/water v0.7.1-alpha.0.20260619150927-31d1e50fe910 h1:si3w1ruCFAs5qzQEi2qRUxvNkrzuiv9q4ayiCIvDpLQ=
github.com/getlantern/water v0.7.1-alpha.0.20260619150927-31d1e50fe910/go.mod h1:Yo6Yk++Q9HRaCT+6/eYk3elNoNtjlJ3V5FufHY6ezto=
github.com/getlantern/wazero v1.11.0-water.1 h1:mzUlaOoQKMDd16yL3mBIFrzg2nEK+gw7mdQgff1nOPQ=
github.com/getlantern/wazero v1.11.0-water.1/go.mod h1:eV28rsN8Q+xwjogd7f4/Pp4xFxO7uOGbLcD/LzB1wiU=
github.com/getlantern/wireguard-go v0.0.1-beta.7.0.20251208214020-d78e69f1eff4 h1:j/A6xSUbz78xQfFXyDbnWkg96D+UprbLgKlGjXbodxA=
Expand Down
6 changes: 6 additions & 0 deletions option/water.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,12 @@ type WATEROutboundOptions struct {
// SkipHandshake is used when the WATER module deals with the handshake
// instead of the sing-box WATER transport
SkipHandshake bool `json:"skip_handshake,omitempty"`
// UseCompiler runs the WASM transport with wazero's optimizing compiler
// instead of the interpreter. Defaults to off: the compiler's per-instance
// JIT arena can push the macOS/iOS NetworkExtension past its jetsam limit and
// get the process killed. Only enable on hosts without that limit (e.g. the
// standalone daemon), where it substantially lowers CPU.
UseCompiler bool `json:"use_compiler,omitempty"`
// Optional configuration for supporting UDP over TCP
UDPOverTCP *option.UDPOverTCPOptions `json:"udp_over_tcp,omitempty"`
}
Expand Down
150 changes: 100 additions & 50 deletions protocol/water/outbound.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ import (
"github.com/getlantern/lantern-water/seed"
waterVC "github.com/getlantern/lantern-water/version_control"
"github.com/refraction-networking/water"
_ "github.com/refraction-networking/water/transport/v1"
waterv1 "github.com/refraction-networking/water/transport/v1"
"github.com/sagernet/sing-box/adapter"
"github.com/sagernet/sing-box/adapter/outbound"
"github.com/sagernet/sing-box/common/conntrack"
Expand Down Expand Up @@ -51,6 +51,7 @@ type Outbound struct {
logger logger.ContextLogger
skipHandshake bool
dialerConfig *water.Config
sharedDialer *waterv1.SharedDialer
loadErr error
transportModuleConfig map[string]any
outboundDialer N.Dialer
Expand Down Expand Up @@ -196,11 +197,16 @@ func (o *Outbound) loadConfig(ctx context.Context, logger log.ContextLogger, opt
OverrideLogger: slogLogger,
}

// Use the interpreter, not the JIT. The JIT arena (5-20 MB on arm64) pushes
// the macOS/iOS NetworkExtension past its ~50 MB jetsam limit and the OS
// kills the process silently. Interpreted WASM is slower, but the proxy
// bottleneck is network I/O.
o.dialerConfig.RuntimeConfig().Interpreter()
// Default to the interpreter: the compiler's per-instance JIT arena can push
// the macOS/iOS NetworkExtension past its jetsam limit and get the process
// killed. Hosts without that limit opt into the compiler, which is much
// cheaper on CPU and reuses compiled modules across dials via the on-disk
// cache.
if options.UseCompiler {
o.dialerConfig.RuntimeConfig().Compiler()
} else {
o.dialerConfig.RuntimeConfig().Interpreter()
}

// Don't tie the WASM worker's lifetime to the dial ctx. sing-box cancels
// the dial ctx as soon as the dial phase completes — well before the conn
Expand All @@ -209,18 +215,24 @@ func (o *Outbound) loadConfig(ctx context.Context, logger log.ContextLogger, opt
o.dialerConfig.RuntimeConfig().SetCloseOnContextDone(false)
o.mu.Unlock()

// Validate and warm the interpreter by doing a lightweight parse of the
// WASM module so any module errors surface early.
logger.DebugContext(ctx, "validating WASM module via interpreter",
slog.String("transport", options.Transport))
if preErr := preCompileWASM(ctx, b, o.dialerConfig); preErr != nil {
logger.WarnContext(ctx, "WASM module validation failed (non-fatal)",
slog.String("transport", options.Transport),
slog.Any("error", preErr))
} else {
logger.DebugContext(ctx, "WASM module validation complete",
slog.String("transport", options.Transport))
// Build the shared runtime once: it compiles the WASM module a single time
// and reuses one runtime + compiled module across all dials, instantiating
// only a fresh guest instance per connection. This both surfaces module
// errors early and avoids the per-dial recompile that otherwise leaks the
// interpreter's compiled functions.
sharedDialer, err := waterv1.NewSharedDialer(ctx, o.dialerConfig)
if err != nil {
logger.ErrorContext(ctx, "failed to build shared WATER runtime",
slog.Any("error", err), slog.String("transport", options.Transport))
o.mu.Lock()
o.loadErr = err
o.mu.Unlock()
return
}
o.mu.Lock()
o.sharedDialer = sharedDialer
o.mu.Unlock()
logger.DebugContext(ctx, "WATER shared runtime ready", slog.String("transport", options.Transport))

if options.SeedEnabled {
transportFilepath := filepath.Join(wasmDir, fmt.Sprintf("%s.%s", options.Transport, "wasm"))
Expand Down Expand Up @@ -254,33 +266,20 @@ func (o *Outbound) Close() error {
o.cancelLoad()
o.mu.Lock()
defer o.mu.Unlock()
if o.sharedDialer != nil {
_ = o.sharedDialer.Close(context.Background())
o.sharedDialer = nil
}
if o.seeder != nil {
return o.seeder.Close()
}
return nil
}

// preCompileWASM validates the WASM binary against the provided runtime config
// by compiling it once. In interpreter mode (the default for WATER dials) this
// is a lightweight parse+validate step; in compiler mode it warms the on-disk
// JIT compilation cache. Either way this surfaces module-load errors early.
// It is best-effort; callers must handle a non-nil error gracefully.
func preCompileWASM(ctx context.Context, wasmBin []byte, cfg *water.Config) (err error) {
defer func() {
if r := recover(); r != nil {
slog.Error("panic during WASM pre-compilation",
slog.Any("panic", r),
slog.String("stack", string(debug.Stack())))
err = fmt.Errorf("WASM pre-compilation panicked: %v", r)
}
}()
rt := wazero.NewRuntimeWithConfig(ctx, cfg.RuntimeConfig().GetConfig())
defer rt.Close(ctx)
_, err = rt.CompileModule(ctx, wasmBin)
return
}

func (o *Outbound) newDialer(ctx context.Context, destination M.Socksaddr) (water.Dialer, error) {
// dialConfig returns the per-dial water.Config from the base config plus this
// dial's server dialer and the destination, which the WATM reads from its config
// file at instantiation.
func (o *Outbound) dialConfig(ctx context.Context, destination M.Socksaddr) (*water.Config, error) {
o.mu.Lock()
loadErr := o.loadErr
dialerCfg := o.dialerConfig
Expand All @@ -296,11 +295,23 @@ func (o *Outbound) newDialer(ctx context.Context, destination M.Socksaddr) (wate
return nil, fmt.Errorf("WATER outbound is still loading, not ready to dial")
}

cfg := dialerCfg.Clone()
// Shallow copy so the per-dial fields below don't mutate the base config that
// concurrent dials share. No deep copy is needed: the shared runtime already
// compiled the binary once, and NewCore uses this config as-is.
cfgCopy := *dialerCfg
cfg := &cfgCopy

// NetworkDialerFunc is per-dial so it captures ctx; cancelling the dial also cancels the inner TCP connection.
// Dial the server with the caller's deadline but not its cancellation:
// cancelling the inner dial mid-dial would orphan a half-built core, and the
// connection's lifetime is governed by Close rather than this ctx anyway.
cfg.NetworkDialerFunc = func(network, address string) (net.Conn, error) {
conn, err := outboundDialer.DialContext(ctx, network, serverAddr)
dialCtx := context.WithoutCancel(ctx)
if dl, ok := ctx.Deadline(); ok {
var cancel context.CancelFunc
dialCtx, cancel = context.WithDeadline(dialCtx, dl)
defer cancel()
}
conn, err := outboundDialer.DialContext(dialCtx, network, serverAddr)
if err != nil {
return nil, err
}
Expand Down Expand Up @@ -331,7 +342,7 @@ func (o *Outbound) newDialer(ctx context.Context, destination M.Socksaddr) (wate
cfg.TransportModuleConfig = water.TransportModuleConfigFromBytes(transportModuleConfigJSON)
}

return water.NewDialerWithContext(ctx, cfg)
return cfg, nil
}

// DialContext dials a connection to the specified network and destination.
Expand Down Expand Up @@ -363,30 +374,69 @@ func (o *Outbound) dialTCP(ctx context.Context, destination M.Socksaddr) (conn n
}
}()

dialer, err := o.newDialer(ctx, destination)
o.mu.Lock()
sharedDialer := o.sharedDialer
o.mu.Unlock()
if sharedDialer == nil {
return nil, fmt.Errorf("WATER outbound is still loading, not ready to dial")
}

// The dial below detaches from ctx, so an already-canceled caller would
// otherwise pay for a full dial before the post-dial check notices.
if cerr := ctx.Err(); cerr != nil {
return nil, cerr
}

cfg, err := o.dialConfig(ctx, destination)
if err != nil {
o.logger.ErrorContext(ctx, "failed to build new WATER dialer", slog.Any("error", err), slog.String("destination", destination.String()))
o.logger.ErrorContext(ctx, "failed to build WATER dial config", slog.Any("error", err), slog.String("destination", destination.String()))
return nil, err
}

// NetworkDialerFunc (called inside dialer.DialContext during Initialize/_start)
// uses the caller's ctx, so context cancellation unblocks the dial naturally.
rawConn, err = dialer.DialContext(ctx, N.NetworkTCP, "localhost:0")
// Detach from sing-box's dial-ctx cancellation so the dial isn't torn down
// mid-flight; since it then ignores caller cancellation, honor it manually
// below.
rawConn, err = sharedDialer.DialContext(context.WithoutCancel(ctx), cfg, N.NetworkTCP, "localhost:0")
Comment on lines +396 to +399

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🩺 Stability & Availability | 🟠 Major | ⚡ Quick win

🧩 Analysis chain

🏁 Script executed:

#!/bin/bash
set -euo pipefail

# Inspect the relevant section
sed -n '340,430p' protocol/water/outbound.go

# Find the shared dialer and related context handling
rg -n "sharedDialer\.DialContext|NetworkDialerFunc|WithoutCancel|WithDeadline|Deadline" protocol -S

# Map the file structure for the relevant file
ast-grep outline protocol/water/outbound.go --view expanded

Repository: getlantern/lantern-box

Length of output: 1967


🌐 Web query:

Go context.WithoutCancel removes deadlines documentation

💡 Result:

Yes, context.WithoutCancel removes both the cancellation signal and the deadline from the parent context [1][2]. According to the official Go documentation, context.WithoutCancel (introduced in Go 1.21) returns a derived context that points to the parent but is not canceled when the parent is canceled [1][3]. As part of this contract, the returned context explicitly returns no deadline (Deadline returns ok=false) and no error (Err returns nil), and its Done channel is nil [1][4]. By stripping both the cancellation and the deadline, context.WithoutCancel allows a task to outlive the scope of its parent request while still retaining access to request-scoped values [2]. If a specific deadline is required for the detached work, it must be added back explicitly using another constructor like context.WithTimeout or context.WithDeadline [2].

Citations:


Preserve the caller deadline when detaching the dial context. context.WithoutCancel(ctx) clears both cancellation and deadline, so this dial can outlive the request timeout unless the deadline is re-applied.

🤖 Prompt for AI Agents
Verify each finding against current code. Fix only still-valid issues, skip the
rest with a brief reason, keep changes minimal, and validate.

In `@protocol/water/outbound.go` around lines 396 - 399, The detached dial in
sharedDialer.DialContext currently drops both caller cancellation and the caller
deadline, so reapply the deadline after context.WithoutCancel(ctx) instead of
using it alone. Update the outbound dial logic in the context-setup around
rawConn/sharedDialer.DialContext so the dial still ignores cancellation but
preserves ctx’s timeout/deadline semantics.

if err != nil {
o.logger.ErrorContext(ctx, "WATER failed to dial", slog.Any("error", err))
return nil, err
}
// The detached dial ignores caller cancellation, so honor it here: tear down
// the freshly built conn (and its instance) instead of leaking it. WATER's
// Close can block, so dispatch it off the cancellation path.
if cerr := ctx.Err(); cerr != nil {
go rawConn.Close()
return nil, cerr
}
return &asyncCloseConn{Conn: waterTransport.NewWATERConnection(rawConn, destination, o.skipHandshake)}, nil
}

// asyncCloseConn wraps a net.Conn whose Close can block (e.g. WATER's
// WaitWorker); dispatching to a goroutine prevents stalling callers on the
// synchronous cancellation path.
// asyncCloseConn wraps a WATER net.Conn to manage its instance's lifetime. Close
// is dispatched to a goroutine because WATER's Close can block (e.g. WaitWorker),
// and Read/Write trigger that Close on stream end: water tears the instance down
// only when the WATM worker thread exits, which a TCP stream ending (Read/Write
// returning EOF) does not by itself cause — so without this the instance lingers.
type asyncCloseConn struct {
net.Conn
closeOnce sync.Once
}

func (c *asyncCloseConn) Read(b []byte) (int, error) {
n, err := c.Conn.Read(b)
if err != nil {
c.Close()
}
return n, err
}

func (c *asyncCloseConn) Write(b []byte) (int, error) {
n, err := c.Conn.Write(b)
if err != nil {
c.Close()
}
return n, err
}

func (c *asyncCloseConn) Close() error {
c.closeOnce.Do(func() { go c.Conn.Close() })
return nil
Expand Down
Loading