Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -216,7 +216,7 @@ OPTIONS:
Port:
--port-driver value port driver for non-host network. [none, implicit (for pasta), builtin, slirp4netns, gvisor-tap-vsock(experimental)] (default: "none")
--publish value, -p value [ --publish value, -p value ] publish ports. e.g. "127.0.0.1:8080:80/tcp"
--source-ip-transparent preserve real client source IP using IP_TRANSPARENT (builtin port driver) (default: true)
--source-ip-transparent preserve real client source IP using IP_TRANSPARENT (builtin port driver, TCP only) (default: true)

Process:
--pidns create a PID namespace (default: false)
Expand Down
2 changes: 1 addition & 1 deletion cmd/rootlesskit/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -206,7 +206,7 @@ See https://rootlesscontaine.rs/getting-started/common/ .
}, CategoryPort),
Categorize(&cli.BoolFlag{
Name: "source-ip-transparent",
Usage: "preserve real client source IP using IP_TRANSPARENT (builtin port driver)",
Usage: "preserve real client source IP using IP_TRANSPARENT (builtin port driver, TCP only)",
Value: true,
}, CategoryPort),
Categorize(&cli.BoolFlag{
Expand Down
2 changes: 1 addition & 1 deletion docs/port.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ The default value is `none` (do not expose ports).
| `--port-driver` | Throughput | Source IP | Notes
|----------------------|-------------|----------|-------
| `slirp4netns` | 8.03 Gbps | Propagated |
| `builtin` | 29.9 Gbps | Propagated (since v3.0) | In the case of Rootless Docker, userland-proxy has to be disabled for propagating the source IP.
| `builtin` | 29.9 Gbps | Propagated for TCP (since v3.0) | Source IP propagation (`--source-ip-transparent`) applies to TCP only; UDP is not propagated. In the case of Rootless Docker, userland-proxy has to be disabled for propagating the source IP.
| `implicit` | 37.6 Gbps | Propagated | Requires `pasta` network
| `gvisor-tap-vsock` (Experimental) | 3.83 Gbps | Not propagated | Throughput is currently limited; see issue link below for improvement ideas.

Expand Down
23 changes: 14 additions & 9 deletions pkg/port/builtin/child/child.go
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,18 @@ func (d *childDriver) handleConnectRequest(c *net.UnixConn, req *msg.Request) er

var targetConn net.Conn
var err error
if d.sourceIPTransparent && req.SourceIP != "" && req.SourcePort != 0 && (dialProto == "tcp" || dialProto == "udp") && !net.ParseIP(req.SourceIP).IsLoopback() {
// IP_TRANSPARENT source IP preservation is only supported for TCP.
//
// For UDP it cannot be made to work reliably: the in-netns server replies to
// the real (non-local) client address, and unlike TCP there is no per-flow
// accepted socket to carry the fwmark (no udp_fwmark_accept), so the reply's
// route and source address are selected at send time via the main table. The
// reply is therefore sent out the default route (e.g. the slirp4netns TAP)
// and never reaches the transparent socket, breaking UDP forwarding entirely
// for non-loopback clients (rootless-containers/rootlesskit#592). UDP falls
// back to the non-transparent path below, which works for all clients but
// does not preserve the client source IP.
if d.sourceIPTransparent && req.SourceIP != "" && req.SourcePort != 0 && dialProto == "tcp" && !net.ParseIP(req.SourceIP).IsLoopback() {
d.routingSetup.Do(func() { d.routingReady = d.setupTransparentRouting() })
if !d.routingReady {
d.routingWarn.Do(func() {
Expand Down Expand Up @@ -250,17 +261,11 @@ func (d *childDriver) setupTransparentRouting() bool {

// transparentDial dials targetAddr using IP_TRANSPARENT, binding to the given
// source IP and port so the backend service sees the real client address.
// Only TCP is supported; see the comment in handleConnectRequest.
func transparentDial(dialProto, targetAddr, sourceIP string, sourcePort int) (net.Conn, error) {
var localAddr net.Addr
switch dialProto {
case "tcp":
localAddr = &net.TCPAddr{IP: net.ParseIP(sourceIP), Port: sourcePort}
case "udp":
localAddr = &net.UDPAddr{IP: net.ParseIP(sourceIP), Port: sourcePort}
}
dialer := net.Dialer{
Timeout: time.Second,
LocalAddr: localAddr,
LocalAddr: &net.TCPAddr{IP: net.ParseIP(sourceIP), Port: sourcePort},
Control: func(network, address string, c syscall.RawConn) error {
var sockErr error
if err := c.Control(func(fd uintptr) {
Expand Down
15 changes: 4 additions & 11 deletions pkg/port/builtin/msg/msg.go
Original file line number Diff line number Diff line change
Expand Up @@ -82,17 +82,10 @@ func ConnectToChild(c *net.UnixConn, spec port.Spec, sourceAddr net.Addr) (int,
ParentIP: spec.ParentIP,
HostGatewayIP: hostGatewayIP(),
}
switch a := sourceAddr.(type) {
case *net.TCPAddr:
if a != nil {
req.SourceIP = a.IP.String()
req.SourcePort = a.Port
}
case *net.UDPAddr:
if a != nil {
req.SourceIP = a.IP.String()
req.SourcePort = a.Port
}
// Source IP preservation (IP_TRANSPARENT) is only supported for TCP.
if tcpAddr, ok := sourceAddr.(*net.TCPAddr); ok && tcpAddr != nil {
req.SourceIP = tcpAddr.IP.String()
req.SourcePort = tcpAddr.Port
}
if _, err := lowlevelmsgutil.MarshalToWriter(c, &req); err != nil {
return 0, err
Expand Down
4 changes: 2 additions & 2 deletions pkg/port/builtin/parent/udp/udp.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,9 @@ func Run(socketPath string, spec port.Spec, stopCh <-chan struct{}, stoppedCh ch
udpp := &udpproxy.UDPProxy{
LogWriter: logWriter,
Listener: c,
BackendDial: func(from *net.UDPAddr) (*net.UDPConn, error) {
BackendDial: func() (*net.UDPConn, error) {
// get fd from the child as an SCM_RIGHTS cmsg
fd, err := msg.ConnectToChildWithRetry(socketPath, spec, 10, from)
fd, err := msg.ConnectToChildWithRetry(socketPath, spec, 10, nil)
if err != nil {
return nil, err
}
Expand Down
4 changes: 2 additions & 2 deletions pkg/port/builtin/parent/udp/udpproxy/udp_proxy.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ type connTrackMap map[connTrackKey]*net.UDPConn
type UDPProxy struct {
LogWriter io.Writer
Listener *net.UDPConn
BackendDial func(from *net.UDPAddr) (*net.UDPConn, error)
BackendDial func() (*net.UDPConn, error)
connTrackTable connTrackMap
connTrackLock sync.Mutex
}
Expand Down Expand Up @@ -108,7 +108,7 @@ func (proxy *UDPProxy) Run() {
proxy.connTrackLock.Lock()
proxyConn, hit := proxy.connTrackTable[*fromKey]
if !hit {
proxyConn, err = proxy.BackendDial(from)
proxyConn, err = proxy.BackendDial()
if err != nil {
fmt.Fprintf(proxy.LogWriter, "Can't proxy a datagram to udp: %v\n", err)
proxy.connTrackLock.Unlock()
Expand Down
44 changes: 39 additions & 5 deletions pkg/port/testsuite/testsuite.go
Original file line number Diff line number Diff line change
Expand Up @@ -512,11 +512,25 @@ func transparentUDPDialAndSend(t *testing.T, parentAddr string) string {
if err != nil {
t.Fatal(err)
}
defer conn.Close()
clientAddr := conn.LocalAddr().String()
if _, err := conn.Write([]byte("hello")); err != nil {
t.Fatal(err)
}
conn.Close()
// Verify the return path: the echoed reply must reach the client. This is
// the regression assertion for rootless-containers/rootlesskit#592, where
// UDP responses were lost for non-loopback clients.
if err := conn.SetReadDeadline(time.Now().Add(5 * time.Second)); err != nil {
t.Fatal(err)
}
buf := make([]byte, 64)
n, err := conn.Read(buf)
if err != nil {
t.Fatalf("did not receive UDP echo reply (issue #592 return-path regression): %v", err)
}
if got := string(buf[:n]); got != "hello" {
t.Fatalf("unexpected UDP echo reply: %q", got)
}
return clientAddr
}

Expand Down Expand Up @@ -652,8 +666,6 @@ func testTransparentWithPID(t *testing.T, proto string, d port.ParentDriver, chi

echoCmd.Wait()

// Parse and verify: the echo server should see the client's non-loopback IP,
// not 127.0.0.1 or a hard-coded router address.
clientHost, _, err := net.SplitHostPort(clientAddr)
if err != nil {
t.Fatalf("failed to parse client address %q: %v", clientAddr, err)
Expand All @@ -663,8 +675,23 @@ func testTransparentWithPID(t *testing.T, proto string, d port.ParentDriver, chi
t.Fatalf("failed to parse server-seen address %q: %v", serverSawAddr, err)
}

if clientHost != serverHost {
t.Errorf("IP mismatch: client=%s, server saw=%s", clientHost, serverHost)
switch proto {
case "tcp":
// TCP preserves the real client source IP via IP_TRANSPARENT: the echo
// server must see the client's non-loopback IP, not 127.0.0.1 or a
// hard-coded router address.
if clientHost != serverHost {
t.Errorf("IP mismatch: client=%s, server saw=%s", clientHost, serverHost)
}
case "udp":
// UDP does not preserve the source IP: it falls back to the
// non-transparent path (see rootless-containers/rootlesskit#592 and the
// comment in pkg/port/builtin/child). The server therefore sees a
// loopback source, and the reply still reaches the client (verified by
// transparentUDPDialAndSend reading the echo above).
if clientHost == serverHost {
t.Errorf("expected UDP source IP not to be preserved, but server saw client IP %s", serverHost)
}
}

// Cleanup
Expand Down Expand Up @@ -707,6 +734,13 @@ func runUDPEchoServer() {
conn.WriteToUDP(buf[:n], from)
}

// RunUDPTransparent exercises the source-ip-transparent code path for UDP. UDP
// does not actually support IP_TRANSPARENT (it falls back to the non-transparent
// path), so this is also the regression test for
// rootless-containers/rootlesskit#592: the client connects from a non-loopback
// address (which previously triggered the broken path) and the test asserts that
// the echo reply is delivered back to the client. Source IP preservation is
// intentionally not expected for UDP.
func RunUDPTransparent(t *testing.T, pf func() port.ParentDriver) {
t.Run("TestUDPTransparent", func(t *testing.T) { TestUDPTransparent(t, pf()) })
}
Expand Down