Skip to content

TCPDialer Connection Pool Exhaustion #11

@sdvcrx

Description

@sdvcrx

Description

fastdns/client.go

Lines 39 to 105 in 575808f

// exchange performs the transport-level DNS round trip with the configured dialer.
func (c *Client) exchange(ctx context.Context, req, resp *Message) error {
var err error
var conn net.Conn
if c.Dialer != nil {
conn, err = c.Dialer.DialContext(ctx, "udp", c.Addr)
} else {
conn, err = net.Dial("udp", c.Addr)
}
if err != nil {
return err
}
if c.Timeout > 0 {
err = conn.SetDeadline(time.Now().Add(c.Timeout))
if err != nil && err != errors.ErrUnsupported {
return err
}
defer conn.SetDeadline(time.Time{}) // nolint:errcheck
}
if options, ok := ctx.Value(clientOptionsContextKey).(*clientOptionsContextValue); ok {
roa, err := req.OptionsAppender()
if err != nil {
return err
}
if options.prefix.IsValid() {
roa.AppendSubnet(options.prefix)
}
if options.cookie != "" {
roa.AppendCookie(options.cookie)
}
if options.padding != 0 {
roa.AppendPadding(options.padding)
}
}
_, err = conn.Write(req.Raw)
if err != nil {
return err
}
resp.Raw = resp.Raw[:cap(resp.Raw)]
n, err := conn.Read(resp.Raw)
if err != nil {
return err
}
resp.Raw = resp.Raw[:n]
err = ParseMessage(resp, resp.Raw, false)
if err != nil {
return err
}
if d, _ := c.Dialer.(interface {
Put(c net.Conn)
}); d != nil {
d.Put(conn)
}
if c.Dialer == nil {
_ = conn.Close()
}
return nil
}

When errors occur during Read or Write operations on the client exchange , the conn is not returned to the connection pool.

Since the TCPDialer connection pool is implemented using channels , repeated errors will exhaust the connection pool, ultimately causing a deadlock. (maybe affects UDPDialer too)

Reproduction Case

package main

import (
	"context"
	"crypto/tls"
	"log"
	"net"
	"time"

	"github.com/phuslu/fastdns"
)

func main() {
	maxConns := 1

	client := fastdns.Client{
		Addr: "1.1.1.1:853",
		Dialer: &fastdns.TCPDialer{
			Addr: func() (u *net.TCPAddr) { u, _ = net.ResolveTCPAddr("tcp", "1.1.1.1:853"); return }(),
			TLSConfig: &tls.Config{
				ServerName:         "1.1.1.1",
				ClientSessionCache: tls.NewLRUClientSessionCache(128),
			},
			MaxConns: uint16(maxConns),
		},
	}

	for range maxConns*2 + 1 {
		req, resp := fastdns.AcquireMessage(), fastdns.AcquireMessage()
		req.SetRequestQuestion("x.com", fastdns.TypeA, fastdns.ClassINET)
		err := client.Exchange(context.Background(), req, resp)
		if err != nil {
			log.Println("Query err", err)
		} else {
			log.Printf("Query success, %s: CLASS %s TYPE %s\n", resp.Domain, resp.Question.Class, resp.Question.Type)
		}

		// Wait for server-side idle timeout to trigger an EOF on next use
		time.Sleep(15 * time.Second)
	}
}

Actual Result:

  • Query 1: Success.
  • Query 2: Fails with EOF. Connection is leaked.
  • Query 3: Hangs indefinitely at <-d.conns.
Output logs
2026/03/11 16:33:26 Query success, x.com: CLASS IN TYPE A
2026/03/11 16:33:41 Query err EOF
fatal error: all goroutines are asleep - deadlock!

goroutine 1 [chan receive]:
github.com/phuslu/fastdns.(*TCPDialer).get(0x23f316a79d48?)
        .../fastdns/client_dialer.go:115 +0x58
github.com/phuslu/fastdns.(*TCPDialer).DialContext(0x104102140?, {0x23f316a79d78?, 0x23f316a79de8?}, {0x104216b24?, 0x100?}, {0x10443cbc0?, 0x23f316a79d01?})
        .../fastdns/client_dialer.go:96 +0x1c
github.com/phuslu/fastdns.(*Client).exchange(0x23f316a79e38, {0x1044960b0, 0x10450d7e0}, 0x23f316bc2720, 0x23f316bc2780)
        .../fastdns/client.go:45 +0x64
github.com/phuslu/fastdns.(*Client).Exchange(...)
        .../fastdns/client.go:32
main.main()
        .../fastdns/cmd/fastdoh/main.go:32 +0x238
exit status 2

Metadata

Metadata

Assignees

No one assigned

    Labels

    bugSomething isn't working

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions