Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 23 additions & 0 deletions plugins/inputs/docker/docker.go
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,18 @@ func (d *Docker) Init() error {
}

func (d *Docker) Start(telegraf.Accumulator) error {
// Attempt initial connection but don't fail if Docker is unavailable.
// This preserves backwards compatibility where Telegraf starts even when
// Docker daemon is not running.
if err := d.initClient(); err != nil {
d.Log.Warnf("Failed to connect to Docker daemon during startup: %v. Will retry on first gather.", err)
}
return nil
}

// initClient initializes the Docker client and performs Podman detection.
// Returns an error if the connection fails, but does not prevent Telegraf from starting.
func (d *Docker) initClient() error {
// Get client
c, err := d.getNewClient()
if err != nil {
Expand All @@ -153,6 +165,8 @@ func (d *Docker) Start(telegraf.Accumulator) error {
// Check API version compatibility
version, err := semver.NewVersion(d.client.ClientVersion())
if err != nil {
d.client.Close()
d.client = nil
return fmt.Errorf("failed to parse client version: %w", err)
}

Expand All @@ -170,6 +184,8 @@ func (d *Docker) Start(telegraf.Accumulator) error {

info, err := d.client.Info(ctx)
if err != nil {
d.client.Close()
d.client = nil
return fmt.Errorf("failed to get Docker info: %w", err)
}

Expand Down Expand Up @@ -197,6 +213,13 @@ func (d *Docker) Stop() {
}

func (d *Docker) Gather(acc telegraf.Accumulator) error {
// If client is not initialized, try to connect now
if d.client == nil {
if err := d.initClient(); err != nil {
return fmt.Errorf("failed to connect to Docker daemon: %w", err)
}
}

// Create label filters if not already created
if !d.filtersCreated {
err := d.createLabelFilters()
Expand Down
82 changes: 82 additions & 0 deletions plugins/inputs/docker/docker_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package docker
import (
"context"
"crypto/tls"
"errors"
"io"
"reflect"
"sort"
Expand Down Expand Up @@ -1774,3 +1775,84 @@ func TestPodmanStatsCache(t *testing.T) {
require.NotContains(t, d.statsCache, "old-container")
require.Contains(t, d.statsCache, testID)
}

func TestStartWithUnavailableDocker(t *testing.T) {
// Test that Telegraf starts successfully even when Docker is unavailable
// This is a regression test for https://github.com/influxdata/telegraf/issues/18089
var acc testutil.Accumulator
d := Docker{
Log: testutil.Logger{},
newClient: func(string, *tls.Config) (dockerClient, error) {
return nil, errors.New("cannot connect to the Docker daemon")
},
newEnvClient: func() (dockerClient, error) {
return nil, errors.New("cannot connect to the Docker daemon")
},
}

require.NoError(t, d.Init())
// Start should NOT return an error even when Docker is unavailable
require.NoError(t, d.Start(&acc))
// Client should be nil since connection failed
require.Nil(t, d.client)

// Gather should return an error since Docker is still unavailable
err := d.Gather(&acc)
require.Error(t, err)
require.Contains(t, err.Error(), "failed to connect to Docker daemon")
}

func TestLazyClientInitialization(t *testing.T) {
// Test that client is initialized lazily on first Gather if Start failed to connect
var acc testutil.Accumulator

// Track connection attempts
connectionAttempts := 0

d := Docker{
Log: testutil.Logger{},
newClient: func(string, *tls.Config) (dockerClient, error) {
connectionAttempts++
// First attempt fails, subsequent attempts succeed
if connectionAttempts == 1 {
return nil, errors.New("docker daemon not ready")
}
return &mockClient{
InfoF: func() (system.Info, error) {
return system.Info{
Name: "docker-desktop",
ServerVersion: "20.10.0",
}, nil
},
ContainerListF: func(container.ListOptions) ([]container.Summary, error) {
return nil, nil
},
ClientVersionF: func() string {
return "1.24.0"
},
CloseF: func() error {
return nil
},
}, nil
},
newEnvClient: func() (dockerClient, error) {
return nil, errors.New("not using env client")
},
}

require.NoError(t, d.Init())
// Start should succeed even though connection fails
require.NoError(t, d.Start(&acc))
require.Equal(t, 1, connectionAttempts)
require.Nil(t, d.client)

// First Gather fails because Docker is still unavailable (same mock returns error on attempt 1)
// Reset connection attempts to simulate Docker becoming available
connectionAttempts = 1 // Set to 1 so next attempt (2) will succeed

// Second Gather should succeed after lazy initialization
err := d.Gather(&acc)
require.NoError(t, err)
require.Equal(t, 2, connectionAttempts)
require.NotNil(t, d.client)
}
Loading