diff --git a/agent/agent.go b/agent/agent.go index 9085db94e..15beadc83 100644 --- a/agent/agent.go +++ b/agent/agent.go @@ -2,7 +2,8 @@ package agent import ( "errors" - "log" + + "k8s.io/klog/v2" ) type Agent struct { @@ -35,38 +36,38 @@ func NewAgent() (*Agent, error) { } func (a *Agent) Start() { - log.Println("I! agent starting") + klog.InfoS("agent starting") for _, agent := range a.agents { if agent == nil { continue } if err := agent.Start(); err != nil { - log.Printf("E! start [%T] err: [%+v]", agent, err) + klog.ErrorS(err, "start agent module failed", "module", agent) } else { - log.Printf("I! [%T] started", agent) + klog.InfoS("agent module started", "module", agent) } } - log.Println("I! agent started") + klog.InfoS("agent started") } func (a *Agent) Stop() { - log.Println("I! agent stopping") + klog.InfoS("agent stopping") for _, agent := range a.agents { if agent == nil { continue } if err := agent.Stop(); err != nil { - log.Printf("E! stop [%T] err: [%+v]", agent, err) + klog.ErrorS(err, "stop agent module failed", "module", agent) } else { - log.Printf("I! [%T] stopped", agent) + klog.InfoS("agent module stopped", "module", agent) } } - log.Println("I! agent stopped") + klog.InfoS("agent stopped") } func (a *Agent) Reload() { - log.Println("I! agent reloading") + klog.InfoS("agent reloading") a.Stop() a.Start() - log.Println("I! agent reloaded") + klog.InfoS("agent reloaded") } diff --git a/agent/ibex_agent.go b/agent/ibex_agent.go index 6a2a2619d..5f96e0642 100644 --- a/agent/ibex_agent.go +++ b/agent/ibex_agent.go @@ -3,10 +3,9 @@ package agent import ( - "log" - coreconfig "flashcat.cloud/categraf/config" "flashcat.cloud/categraf/ibex" + "k8s.io/klog/v2" ) type IbexAgent struct { @@ -16,7 +15,7 @@ func NewIbexAgent() AgentModule { if coreconfig.Config == nil || coreconfig.Config.Ibex == nil || !coreconfig.Config.Ibex.Enable { - log.Println("I! ibex agent disabled!") + klog.Info("ibex agent disabled") return nil } if coreconfig.Config.Ibex.MetaDir == "" { diff --git a/agent/install/service_linux.go b/agent/install/service_linux.go index 881ea5f3b..29ecd74e3 100644 --- a/agent/install/service_linux.go +++ b/agent/install/service_linux.go @@ -3,7 +3,6 @@ package install import ( "bytes" "fmt" - "log" "os" "os/exec" "path/filepath" @@ -11,6 +10,7 @@ import ( "time" "github.com/kardianos/service" + "k8s.io/klog/v2" "flashcat.cloud/categraf/pkg/cmdx" ) @@ -167,7 +167,7 @@ func isSystemd() bool { err, timeout := cmdx.RunTimeout(cmd, time.Second*2) if timeout { - log.Printf("E! run command: %s timeout", cmd) + klog.Warningf("run command timeout: %s", cmd) return false } @@ -234,7 +234,7 @@ func ServiceConfig(userMode bool) *service.Config { cfg.WorkingDirectory = filepath.Dir(ov) } } else { - log.Println("E! get exeutable path error:", err) + klog.ErrorS(err, "get executable path error") } cfg.Arguments = []string{"-configs", filepath.Dir(ov) + "/conf"} if userMode { diff --git a/agent/logs_agent.go b/agent/logs_agent.go index e141c1eaa..c3a8403ef 100644 --- a/agent/logs_agent.go +++ b/agent/logs_agent.go @@ -11,7 +11,6 @@ import ( "context" "errors" "fmt" - "log" "os" "time" @@ -31,6 +30,7 @@ import ( coreconfig "flashcat.cloud/categraf/config" logsconfig "flashcat.cloud/categraf/config/logs" logService "flashcat.cloud/categraf/logs/service" + "k8s.io/klog/v2" ) const ( @@ -71,20 +71,20 @@ func NewLogsAgent() AgentModule { if err != nil { message := fmt.Sprintf("Invalid endpoints: %v", err) status.AddGlobalError("invalid endpoints", message) - log.Println("E!", errors.New(message)) + klog.ErrorS(errors.New(message), "invalid endpoints") return nil } processingRules, err := GlobalProcessingRules() if err != nil { message := fmt.Sprintf("Invalid processing rules: %v", err) status.AddGlobalError(invalidProcessingRules, message) - log.Println("E!", errors.New(message)) + klog.ErrorS(errors.New(message), "invalid processing rules") return nil } sources := logsconfig.NewLogSources() services := logService.NewServices() - log.Println("I! Starting logs-agent...") + klog.Info("starting logs-agent") // setup the auditor // We pass the health handle to the auditor because it's the end of the pipeline and the most @@ -120,7 +120,7 @@ func NewLogsAgent() AgentModule { journald.NewLauncher(sources, pipelineProvider, auditor), } if coreconfig.EnableCollectContainer() { - log.Println("collect docker logs...") + klog.Info("collect docker logs") inputs = append(inputs, container.NewLauncher(containerLaunchables)) } @@ -142,7 +142,7 @@ func (la *LogsAgent) Start() error { if coreconfig.EnableCollectContainer() { // collect container all if util.Debug() { - log.Println("Adding ContainerCollectAll source to the Logs Agent") + klog.V(1).Info("adding ContainerCollectAll source to the Logs Agent") } kubesource := logsconfig.NewLogSource(logsconfig.ContainerCollectAll, &logsconfig.LogsConfig{ @@ -161,7 +161,7 @@ func (la *LogsAgent) Start() error { } source := logsconfig.NewLogSource(c.Name, c) if err := c.Validate(); err != nil { - log.Println("W! Invalid logs configuration:", err) + klog.Warningf("invalid logs configuration: %v", err) source.Status.Error(err) continue } @@ -214,7 +214,7 @@ func (a *LogsAgent) Stop() error { select { case <-c: case <-time.After(timeout): - log.Println("I! Timed out when stopping logs-agent, forcing it to stop now") + klog.Info("timed out when stopping logs-agent, forcing it to stop now") // We force all destinations to read/flush all the messages they get without // trying to write to the network. a.destinationsCtx.Stop() @@ -226,7 +226,7 @@ func (a *LogsAgent) Stop() error { select { case <-c: case <-timeout.C: - log.Println("W! Force close of the Logs LogsAgent, dumping the Go routines.") + klog.Warning("force close of the logs agent, dumping the Go routines") } } return nil diff --git a/agent/metrics_agent.go b/agent/metrics_agent.go index 583654e97..4fb62000a 100644 --- a/agent/metrics_agent.go +++ b/agent/metrics_agent.go @@ -2,7 +2,6 @@ package agent import ( "errors" - "log" "strings" "sync" @@ -104,6 +103,7 @@ import ( _ "flashcat.cloud/categraf/inputs/x509_cert" _ "flashcat.cloud/categraf/inputs/xskyapi" _ "flashcat.cloud/categraf/inputs/zookeeper" + "k8s.io/klog/v2" ) type MetricsAgent struct { @@ -167,7 +167,7 @@ func NewMetricsAgent() AgentModule { provider, err := inputs.NewProvider(c, agent) if err != nil { - log.Println("E! init metrics agent error: ", err) + klog.ErrorS(err, "init metrics agent error") return nil } agent.InputProviders = provider @@ -196,7 +196,7 @@ func (ma *MetricsAgent) Start() error { func (ma *MetricsAgent) start(idx int) error { if _, err := ma.InputProviders[idx].LoadConfig(); err != nil { - log.Println("E! input provider load config get err: ", err) + klog.ErrorS(err, "input provider load config get err") } ma.InputProviders[idx].StartReloader() @@ -206,7 +206,7 @@ func (ma *MetricsAgent) start(idx int) error { } if len(names) == 0 { - log.Println("I! no inputs") + klog.InfoS("no inputs") return nil } @@ -218,7 +218,7 @@ func (ma *MetricsAgent) start(idx int) error { configs, err := ma.InputProviders[idx].GetInputConfig(name) if err != nil { - log.Println("E! failed to get configuration of plugin:", name, "error:", err) + klog.ErrorS(err, "failed to get configuration of plugin", "plugin", name) continue } @@ -249,7 +249,7 @@ func (ma *MetricsAgent) RegisterInput(name string, configs []cfg.ConfigWithForma creator, has := inputs.InputCreators[inputKey] if !has { - log.Println("E! input:", name, "not supported") + klog.Warningf("input %s not supported", name) return } @@ -260,12 +260,12 @@ func (ma *MetricsAgent) RegisterInput(name string, configs []cfg.ConfigWithForma } } if idx == -1 { - log.Println("E! input provider:", typ, "not found") + klog.Warningf("input provider %s not found", typ) // hint and panic next line } newInputs, err := ma.InputProviders[idx].LoadInputConfig(configs, creator()) if err != nil { - log.Println("E! failed to load configuration of plugin:", name, "error:", err) + klog.ErrorS(err, "failed to load configuration of plugin", "plugin", name) return } @@ -276,19 +276,18 @@ func (ma *MetricsAgent) RegisterInput(name string, configs []cfg.ConfigWithForma func (ma *MetricsAgent) inputGo(name string, sum string, input inputs.Input) { var err error + inputLogger := klog.Background().WithValues(metricsAgentInputLoggerValues(name, sum)...) if err = input.InitInternalConfig(); err != nil { - log.Println("E! failed to init input:", name, "error:", err) + klog.ErrorS(err, "failed to init input", "input", name) return } - if err = inputs.MayInit(input); err != nil { + if err = inputs.MayInit(input, inputLogger); err != nil { if !errors.Is(err, types.ErrInstancesEmpty) { - log.Println("E! failed to init input:", name, "error:", err) + klog.ErrorS(err, "failed to init input", "input", name) } else { - if config.Config.DebugMode { - _, inputKey := inputs.ParseInputName(name) - log.Println("W! no instances for input: ", inputKey) - } + _, inputKey := inputs.ParseInputName(name) + klog.V(1).InfoS("no instances for input", "input", inputKey) } return } @@ -298,13 +297,14 @@ func (ma *MetricsAgent) inputGo(name string, sum string, input inputs.Input) { empty := true for i := 0; i < len(instances); i++ { if err := instances[i].InitInternalConfig(); err != nil { - log.Println("E! failed to init input:", name, "error:", err) + klog.ErrorS(err, "failed to init input", "input", name) continue } - if err := inputs.MayInit(instances[i]); err != nil { + instanceLogger := inputLogger.WithValues(metricsAgentInstanceLoggerValues(i, instances[i].GetLabels())...) + if err := inputs.MayInit(instances[i], instanceLogger); err != nil { if !errors.Is(err, types.ErrInstancesEmpty) { - log.Println("E! failed to init input:", name, "error:", err) + klog.ErrorS(err, "failed to init input", "input", name) } continue } @@ -313,10 +313,8 @@ func (ma *MetricsAgent) inputGo(name string, sum string, input inputs.Input) { } if empty { - if config.Config.DebugMode { - _, inputKey := inputs.ParseInputName(name) - log.Printf("W! no instances for input:%s", inputKey) - } + _, inputKey := inputs.ParseInputName(name) + klog.V(1).InfoS("no instances for input", "input", inputKey) return } } @@ -324,7 +322,7 @@ func (ma *MetricsAgent) inputGo(name string, sum string, input inputs.Input) { reader := newInputReader(name, input) go reader.startInput() ma.InputReaders.Add(name, sum, reader) - log.Println("I! input:", name, "started") + klog.InfoS("input started", "input", name) } func (ma *MetricsAgent) DeregisterInput(name string, sum string) { @@ -335,9 +333,9 @@ func (ma *MetricsAgent) DeregisterInput(name string, sum string) { } } ma.InputReaders.Del(name, sum) - log.Printf("I! input: %s[checksum:%s] stopped", name, sum) + klog.InfoS("input stopped", "input", name, "checksum", sum) } else { - log.Printf("W! dereigster input name [%s] not found", name) + klog.Warningf("deregister input name [%s] not found", name) } } @@ -352,3 +350,21 @@ func parseFilter(filterStr string) map[string]struct{} { } return filtermap } + +func metricsAgentInputLoggerValues(name, sum string) []interface{} { + _, plugin := inputs.ParseInputName(name) + return []interface{}{ + "component", "inputs", + "input", name, + "plugin", plugin, + "checksum", sum, + } +} + +func metricsAgentInstanceLoggerValues(index int, labels map[string]string) []interface{} { + values := []interface{}{"instance_index", index} + if target, ok := labels["target"]; ok && target != "" { + values = append(values, "instance_target", target) + } + return values +} diff --git a/agent/metrics_agent_test.go b/agent/metrics_agent_test.go new file mode 100644 index 000000000..fecbf2ca4 --- /dev/null +++ b/agent/metrics_agent_test.go @@ -0,0 +1,381 @@ +package agent + +import ( + "reflect" + "testing" + "time" + + "flashcat.cloud/categraf/config" + "flashcat.cloud/categraf/inputs" + "flashcat.cloud/categraf/types" + klog "k8s.io/klog/v2" +) + +type testMetricsInput struct { + instances []inputs.Instance + initCalls int + loggerInit int + logger klog.Logger + internalConfig int +} + +func (t *testMetricsInput) Clone() inputs.Input { + return t +} + +func (t *testMetricsInput) Name() string { + return "test" +} + +func (t *testMetricsInput) GetLabels() map[string]string { + return nil +} + +func (t *testMetricsInput) GetInterval() config.Duration { + return 0 +} + +func (t *testMetricsInput) InitInternalConfig() error { + t.internalConfig++ + return nil +} + +func (t *testMetricsInput) Process(slist *types.SampleList) *types.SampleList { + return slist +} + +func (t *testMetricsInput) Init() error { + t.initCalls++ + return nil +} + +func (t *testMetricsInput) InitWithLogger(logger klog.Logger) error { + t.loggerInit++ + t.logger = logger + return nil +} + +func (t *testMetricsInput) GetInstances() []inputs.Instance { + return t.instances +} + +type testMetricsInstance struct { + initialized bool + internalConfig int + loggerInit int + logger klog.Logger + labels map[string]string +} + +func (t *testMetricsInstance) Initialized() bool { + return t.initialized +} + +func (t *testMetricsInstance) SetInitialized() { + t.initialized = true +} + +func (t *testMetricsInstance) GetLabels() map[string]string { + return t.labels +} + +func (t *testMetricsInstance) GetIntervalTimes() int64 { + return 0 +} + +func (t *testMetricsInstance) InitInternalConfig() error { + t.internalConfig++ + return nil +} + +func (t *testMetricsInstance) Process(slist *types.SampleList) *types.SampleList { + return slist +} + +func (t *testMetricsInstance) InitWithLogger(logger klog.Logger) error { + t.loggerInit++ + t.logger = logger + return nil +} + +type testLegacyMetricsInstance struct { + initialized bool + internalConfig int + initCalls int +} + +func (t *testLegacyMetricsInstance) Initialized() bool { + return t.initialized +} + +func (t *testLegacyMetricsInstance) SetInitialized() { + t.initialized = true +} + +func (t *testLegacyMetricsInstance) GetLabels() map[string]string { + return nil +} + +func (t *testLegacyMetricsInstance) GetIntervalTimes() int64 { + return 0 +} + +func (t *testLegacyMetricsInstance) InitInternalConfig() error { + t.internalConfig++ + return nil +} + +func (t *testLegacyMetricsInstance) Process(slist *types.SampleList) *types.SampleList { + return slist +} + +func (t *testLegacyMetricsInstance) Init() error { + t.initCalls++ + return nil +} + +type testEmptyMetricsInput struct { + initCalled bool +} + +func (t *testEmptyMetricsInput) Clone() inputs.Input { + return t +} + +func (t *testEmptyMetricsInput) Name() string { + return "empty" +} + +func (t *testEmptyMetricsInput) GetLabels() map[string]string { + return nil +} + +func (t *testEmptyMetricsInput) GetInterval() config.Duration { + return 0 +} + +func (t *testEmptyMetricsInput) InitInternalConfig() error { + return nil +} + +func (t *testEmptyMetricsInput) Process(slist *types.SampleList) *types.SampleList { + return slist +} + +func (t *testEmptyMetricsInput) InitWithLogger(klog.Logger) error { + t.initCalled = true + return types.ErrInstancesEmpty +} + +type testLegacyTopLevelMetricsInput struct { + initCalls int +} + +func (t *testLegacyTopLevelMetricsInput) Clone() inputs.Input { + return t +} + +func (t *testLegacyTopLevelMetricsInput) Name() string { + return "legacy-top-level" +} + +func (t *testLegacyTopLevelMetricsInput) GetLabels() map[string]string { + return nil +} + +func (t *testLegacyTopLevelMetricsInput) GetInterval() config.Duration { + return 0 +} + +func (t *testLegacyTopLevelMetricsInput) InitInternalConfig() error { + return nil +} + +func (t *testLegacyTopLevelMetricsInput) Process(slist *types.SampleList) *types.SampleList { + return slist +} + +func (t *testLegacyTopLevelMetricsInput) Init() error { + t.initCalls++ + return nil +} + +func TestMetricsAgentInputGoUsesLoggerInitForInputAndInstances(t *testing.T) { + restore := setupMetricsAgentTestConfig() + defer restore() + + agent := &MetricsAgent{ + InputReaders: NewReaders(), + } + instance := &testMetricsInstance{ + labels: map[string]string{"target": "demo"}, + } + input := &testMetricsInput{ + instances: []inputs.Instance{instance}, + } + + agent.inputGo("provider.demo", "sum", input) + + if input.internalConfig != 1 { + t.Fatalf("expected input internal config once, got %d", input.internalConfig) + } + if input.loggerInit != 1 { + t.Fatalf("expected input logger init once, got %d", input.loggerInit) + } + if input.initCalls != 0 { + t.Fatalf("expected legacy input init to be skipped, got %d", input.initCalls) + } + if instance.internalConfig != 1 { + t.Fatalf("expected instance internal config once, got %d", instance.internalConfig) + } + if instance.loggerInit != 1 { + t.Fatalf("expected instance logger init once, got %d", instance.loggerInit) + } + if !instance.initialized { + t.Fatal("expected instance to be marked initialized") + } + readers, ok := agent.InputReaders.GetInput("provider.demo") + if !ok { + t.Fatal("expected input reader to be registered") + } + reader, ok := readers["sum"] + if !ok { + t.Fatal("expected checksum reader to be registered") + } + defer reader.Stop() + var zeroLogger klog.Logger + if input.logger == zeroLogger { + t.Fatal("expected input logger to be set") + } + if instance.logger == zeroLogger { + t.Fatal("expected instance logger to be set") + } +} + +func TestMetricsAgentInputGoInitializesLegacyInput(t *testing.T) { + restore := setupMetricsAgentTestConfig() + defer restore() + + agent := &MetricsAgent{ + InputReaders: NewReaders(), + } + input := &testLegacyTopLevelMetricsInput{} + + agent.inputGo("provider.demo", "legacy-top-level", input) + + if input.initCalls != 1 { + t.Fatalf("expected legacy input Init once, got %d", input.initCalls) + } + readers, ok := agent.InputReaders.GetInput("provider.demo") + if !ok { + t.Fatal("expected input reader to be registered") + } + reader, ok := readers["legacy-top-level"] + if !ok { + t.Fatal("expected legacy input checksum reader to be registered") + } + defer reader.Stop() +} + +func TestMetricsAgentInputGoInitializesLegacyInstance(t *testing.T) { + restore := setupMetricsAgentTestConfig() + defer restore() + + agent := &MetricsAgent{ + InputReaders: NewReaders(), + } + instance := &testLegacyMetricsInstance{} + input := &testMetricsInput{ + instances: []inputs.Instance{instance}, + } + + agent.inputGo("provider.demo", "legacy-sum", input) + + if input.loggerInit != 1 { + t.Fatalf("expected input logger init once, got %d", input.loggerInit) + } + if instance.internalConfig != 1 { + t.Fatalf("expected legacy instance internal config once, got %d", instance.internalConfig) + } + if instance.initCalls != 1 { + t.Fatalf("expected legacy instance Init once, got %d", instance.initCalls) + } + if !instance.initialized { + t.Fatal("expected legacy instance to be marked initialized") + } + readers, ok := agent.InputReaders.GetInput("provider.demo") + if !ok { + t.Fatal("expected input reader to be registered") + } + reader, ok := readers["legacy-sum"] + if !ok { + t.Fatal("expected checksum reader to be registered") + } + defer reader.Stop() +} + +func TestMetricsAgentLoggerContextValues(t *testing.T) { + inputContext := metricsAgentInputLoggerValues("provider.demo", "sum") + wantInputContext := []interface{}{ + "component", "inputs", + "input", "provider.demo", + "plugin", "demo", + "checksum", "sum", + } + if !reflect.DeepEqual(inputContext, wantInputContext) { + t.Fatalf("unexpected input logger context: got %#v want %#v", inputContext, wantInputContext) + } + + instanceContext := metricsAgentInstanceLoggerValues(2, map[string]string{"target": "demo"}) + wantInstanceContext := []interface{}{ + "instance_index", 2, + "instance_target", "demo", + } + if !reflect.DeepEqual(instanceContext, wantInstanceContext) { + t.Fatalf("unexpected instance logger context: got %#v want %#v", instanceContext, wantInstanceContext) + } +} + +func TestMetricsAgentInputGoKeepsErrInstancesEmptyBehavior(t *testing.T) { + restore := setupMetricsAgentTestConfig() + defer restore() + + agent := &MetricsAgent{ + InputReaders: NewReaders(), + } + input := &testEmptyMetricsInput{} + + agent.inputGo("provider.empty", "sum", input) + + if !input.initCalled { + t.Fatal("expected input init to be attempted") + } + if _, ok := agent.InputReaders.GetInput("provider.empty"); ok { + t.Fatal("expected no readers to be registered for empty instances") + } +} + +func setupMetricsAgentTestConfig() func() { + prevConfig := config.Config + config.Config = &config.ConfigType{ + TestMode: true, + Global: config.Global{ + Interval: config.Duration(time.Hour), + Concurrency: 1, + Precision: "ms", + }, + } + return func() { + if prevConfig == nil { + config.Config = &config.ConfigType{ + Global: config.Global{ + Interval: config.Duration(time.Hour), + Concurrency: 1, + Precision: "ms", + }, + } + return + } + config.Config = prevConfig + } +} diff --git a/agent/metrics_reader.go b/agent/metrics_reader.go index e77e12761..ce8dcc8d0 100644 --- a/agent/metrics_reader.go +++ b/agent/metrics_reader.go @@ -1,7 +1,7 @@ package agent import ( - "log" + "fmt" "sync" "sync/atomic" "time" @@ -11,6 +11,7 @@ import ( "flashcat.cloud/categraf/pkg/runtimex" "flashcat.cloud/categraf/types" "flashcat.cloud/categraf/writer" + "k8s.io/klog/v2" ) type InputReader struct { @@ -43,7 +44,7 @@ func (r *InputReader) startInput() { slist := types.NewSampleList() err := si.Start(slist) if err != nil { - log.Printf("I! startInput err:%v", err) + klog.ErrorS(err, "start input err", "input", r.inputName) return } } @@ -58,15 +59,11 @@ func (r *InputReader) startInput() { return case <-timer.C: start = time.Now() - if config.Config.DebugMode { - log.Println("D!", r.inputName, ": before gather once") - } + klog.V(1).InfoS("before gather once", "input", r.inputName) r.gatherOnce() - if config.Config.DebugMode { - log.Println("D!", r.inputName, ": after gather once,", "duration:", time.Since(start)) - } + klog.V(1).InfoS("after gather once", "input", r.inputName, "duration", time.Since(start)) next := interval - time.Since(start) if next < 0 { @@ -80,7 +77,7 @@ func (r *InputReader) startInput() { func (r *InputReader) gatherOnce() { defer func() { if rc := recover(); rc != nil { - log.Println("E!", r.inputName, ": gather metrics panic:", r, string(runtimex.Stack(3))) + klog.ErrorS(fmt.Errorf("panic: %v", rc), "gather metrics panic", "input", r.inputName, "stack", string(runtimex.Stack(3))) } }() diff --git a/agent/prometheus_agent.go b/agent/prometheus_agent.go index 453a3d8d2..523a897d4 100644 --- a/agent/prometheus_agent.go +++ b/agent/prometheus_agent.go @@ -3,10 +3,9 @@ package agent import ( - "log" - coreconfig "flashcat.cloud/categraf/config" "flashcat.cloud/categraf/prometheus" + "k8s.io/klog/v2" ) type PrometheusAgent struct { @@ -16,7 +15,7 @@ func NewPrometheusAgent() AgentModule { if coreconfig.Config == nil || coreconfig.Config.Prometheus == nil || !coreconfig.Config.Prometheus.Enable { - log.Println("I! prometheus scraping disabled!") + klog.Info("prometheus scraping disabled") return nil } return &PrometheusAgent{} @@ -24,12 +23,12 @@ func NewPrometheusAgent() AgentModule { func (pa *PrometheusAgent) Start() error { go prometheus.Start() - log.Println("I! prometheus scraping started!") + klog.Info("prometheus scraping started") return nil } func (pa *PrometheusAgent) Stop() error { prometheus.Stop() - log.Println("I! prometheus scraping stopped!") + klog.Info("prometheus scraping stopped") return nil } diff --git a/agent/update/update_linux.go b/agent/update/update_linux.go index 5dc58dd44..669255ca3 100644 --- a/agent/update/update_linux.go +++ b/agent/update/update_linux.go @@ -6,16 +6,17 @@ import ( "compress/gzip" "fmt" "io" - "log" "net/http" "os" "path/filepath" "strings" + + "k8s.io/klog/v2" ) func download(file string) (string, error) { fname := filepath.Base(file) - log.Println("downloading file:", file, "save to:", fname) + klog.InfoS("downloading file", "source", file, "dest", fname) res, err := http.Get(file) if err != nil { return fname, fmt.Errorf("cannot download file from %s", file) @@ -67,7 +68,7 @@ func Update(tar string) error { if fi.Mode().IsDir() { return fmt.Errorf("%s is directory", nv) } - log.Printf("I! replace old version:%s with new version:%s", ov, "./"+nv) + klog.InfoS("replace old version with new version", "old_version", ov, "new_version", "./"+nv) // replace err = os.Rename(nv, ov) @@ -76,15 +77,15 @@ func Update(tar string) error { } err = os.RemoveAll("./" + filepath.Dir(nv)) if err != nil { - log.Println("E! clean dir:", "./"+filepath.Dir(nv), "error:", err) + klog.ErrorS(err, "clean dir failed", "path", "./"+filepath.Dir(nv)) } else { - log.Println("I! clean dir:", "./"+filepath.Dir(nv), "success") + klog.InfoS("clean dir success", "path", "./"+filepath.Dir(nv)) } err = os.Remove("./" + fname) if err != nil { - log.Println("E! clean file:", "./"+fname, "error:", err) + klog.ErrorS(err, "clean file failed", "path", "./"+fname) } else { - log.Println("I! clean file:", "./"+fname, "success") + klog.InfoS("clean file success", "path", "./"+fname) } return os.Chmod(ov, fm.Mode().Perm()) } @@ -128,7 +129,7 @@ func UnTar(dst, src string) (target string, err error) { case tar.TypeReg: err := os.MkdirAll(filepath.Dir(dstFileDir), 0755) if err != nil { - log.Printf("mdkir:%s, error:%s", filepath.Base(dstFileDir), err) + klog.ErrorS(err, "mkdir failed", "path", filepath.Base(dstFileDir)) return target, err } file, err := os.OpenFile(dstFileDir, os.O_CREATE|os.O_RDWR, os.FileMode(hdr.Mode)) diff --git a/agent/update/update_windows.go b/agent/update/update_windows.go index 5da3cc662..eb8b8cdda 100644 --- a/agent/update/update_windows.go +++ b/agent/update/update_windows.go @@ -5,18 +5,18 @@ import ( "bufio" "fmt" "io" - "log" "net/http" "os" "path/filepath" "strings" "golang.org/x/sys/windows" + "k8s.io/klog/v2" ) func download(file string) (string, error) { fname := filepath.Base(file) - log.Println("downloading file:", file, "save to:", fname) + klog.InfoS("downloading file", "source", file, "dest", fname) res, err := http.Get(file) if err != nil { return fname, fmt.Errorf("cannot download file from %s", file) @@ -68,7 +68,7 @@ func Update(tar string) error { if fi.Mode().IsDir() { return fmt.Errorf("%s is directory", nv) } - log.Printf("I! replace old version:%s with new version:%s", ov, "./"+nv) + klog.InfoS("replace old version with new version", "old_version", ov, "new_version", "./"+nv) // rename current -> current.old oldBackup := ov + ".old" @@ -78,7 +78,7 @@ func Update(tar string) error { } err = windows.MoveFileEx(windows.StringToUTF16Ptr(oldBackup), nil, windows.MOVEFILE_DELAY_UNTIL_REBOOT) // optional: delay delete old file if err != nil { - log.Printf("I! cannot auto remove old file for current user. please manual remove %s. cause: %v", oldBackup, err) + klog.Warningf("cannot auto remove old file for current user, please manually remove %s: %v", oldBackup, err) } // replace err = os.Rename(nv, ov) @@ -87,15 +87,15 @@ func Update(tar string) error { } err = os.RemoveAll("./" + filepath.Dir(nv)) if err != nil { - log.Println("E! clean dir:", "./"+filepath.Dir(nv), "error:", err) + klog.ErrorS(err, "clean dir failed", "path", "./"+filepath.Dir(nv)) } else { - log.Println("I! clean dir:", "./"+filepath.Dir(nv), "success") + klog.InfoS("clean dir success", "path", "./"+filepath.Dir(nv)) } err = os.Remove("./" + fname) if err != nil { - log.Println("E! clean file:", "./"+fname, "error:", err) + klog.ErrorS(err, "clean file failed", "path", "./"+fname) } else { - log.Println("I! clean file:", "./"+fname, "success") + klog.InfoS("clean file success", "path", "./"+fname) } return os.Chmod(ov, fm.Mode().Perm()) } @@ -132,7 +132,7 @@ func UnTar(dst, src string) (target string, err error) { // now create directory for files err = os.MkdirAll(filepath.Dir(destPath), 0755) if err != nil { - log.Printf("mdkir:%s, error:%s", filepath.Base(destPath), err) + klog.ErrorS(err, "mkdir failed", "path", filepath.Base(destPath)) return "", err } diff --git a/api/router_falcon.go b/api/router_falcon.go index 17f18686c..346616793 100644 --- a/api/router_falcon.go +++ b/api/router_falcon.go @@ -3,7 +3,6 @@ package api import ( "encoding/json" "fmt" - "log" "net/http" "strconv" "strings" @@ -12,6 +11,7 @@ import ( "github.com/gin-gonic/gin" "github.com/prometheus/common/model" "github.com/prometheus/prometheus/prompb" + "k8s.io/klog/v2" "flashcat.cloud/categraf/config" "flashcat.cloud/categraf/writer" @@ -207,7 +207,7 @@ func openFalcon(c *gin.Context) { } if fail > 0 { - log.Println("falcon forwarder error, message:", string(bytes)) + klog.Errorf("falcon forwarder error, message: %s", string(bytes)) } writer.WriteTimeSeries(series) diff --git a/api/router_opentsdb.go b/api/router_opentsdb.go index 2193b134b..664ce6c00 100644 --- a/api/router_opentsdb.go +++ b/api/router_opentsdb.go @@ -3,7 +3,6 @@ package api import ( "encoding/json" "fmt" - "log" "net/http" "strconv" "strings" @@ -12,6 +11,7 @@ import ( "github.com/gin-gonic/gin" "github.com/prometheus/common/model" "github.com/prometheus/prometheus/prompb" + "k8s.io/klog/v2" "flashcat.cloud/categraf/config" "flashcat.cloud/categraf/writer" @@ -156,7 +156,7 @@ func openTSDB(c *gin.Context) { series := make([]prompb.TimeSeries, 0, count) for i := 0; i < len(list); i++ { if err := list[i].Clean(ts); err != nil { - log.Println("clean opentsdb sample:", err) + klog.ErrorS(err, "clean opentsdb sample") if fail == 0 { msg = fmt.Sprintf("%s , Error clean: %s", msg, err.Error()) } @@ -179,7 +179,7 @@ func openTSDB(c *gin.Context) { pt, err := list[i].ToProm() if err != nil { - log.Println("convert opentsdb sample:", err) + klog.ErrorS(err, "convert opentsdb sample") if fail == 0 { msg = fmt.Sprintf("%s , Error toprom: %s", msg, err.Error()) } @@ -192,7 +192,7 @@ func openTSDB(c *gin.Context) { } if fail > 0 { - log.Println("opentsdb forwarder error, message:", string(bytes)) + klog.Errorf("opentsdb forwarder error, message: %s", string(bytes)) } writer.WriteTimeSeries(series) diff --git a/api/server.go b/api/server.go index 2b1b30403..b937d5bbc 100644 --- a/api/server.go +++ b/api/server.go @@ -2,12 +2,12 @@ package api import ( "crypto/tls" - "log" "net/http" "strings" "time" "github.com/gin-gonic/gin" + "k8s.io/klog/v2" "flashcat.cloud/categraf/config" "flashcat.cloud/categraf/pkg/aop" @@ -50,7 +50,7 @@ func Start() { IdleTimeout: time.Duration(conf.IdleTimeout) * time.Second, } - log.Println("I! http server listening on:", addr) + klog.InfoS("http server listening", "address", addr) var err error if conf.CertFile != "" && conf.KeyFile != "" { diff --git a/conf/config.toml b/conf/config.toml index 39221909f..e90c87f87 100644 --- a/conf/config.toml +++ b/conf/config.toml @@ -37,6 +37,15 @@ concurrency = -1 # file_name is the file to write logs to file_name = "stdout" +# level controls the default runtime log detail. +# supported values: debug, info, warn, error +level = "info" + +# verbosity controls klog V-level output. +# 0 disables klog.V(n); 1 enables klog.V(1); 10 enables up to klog.V(10). +# when level = "debug" and verbosity = 0, categraf will use verbosity = 1. +verbosity = 0 + # options below will not be work when file_name is stdout or stderr # max_size is the maximum size in megabytes of the log file before it gets rotated. It defaults to 100 megabytes. max_size = 100 diff --git a/config/config.go b/config/config.go index 520bd41eb..f050c2c58 100644 --- a/config/config.go +++ b/config/config.go @@ -2,7 +2,6 @@ package config import ( "fmt" - "log" "net" "net/url" "os" @@ -14,6 +13,7 @@ import ( jsoniter "github.com/json-iterator/go" "github.com/toolkits/pkg/file" + "k8s.io/klog/v2" "flashcat.cloud/categraf/pkg/cfg" "flashcat.cloud/categraf/pkg/tls" @@ -46,6 +46,8 @@ type Log struct { MaxBackups int `toml:"max_backups"` LocalTime bool `toml:"local_time"` Compress bool `toml:"compress"` + Level string `toml:"level"` + Verbosity int `toml:"verbosity"` } type WriterOpt struct { @@ -128,7 +130,7 @@ type ConfigType struct { var Config *ConfigType -func InitConfig(configDir string, debugLevel int, debugMode, testMode bool, interval int64, inputFilters string) error { +func InitConfig(configDir string, debugLevel int, debugMode, testMode bool, debugLevelSet, debugModeSet bool, interval int64, inputFilters string) error { configFile := path.Join(configDir, "config.toml") if !file.IsExist(configFile) { return fmt.Errorf("configuration file(%s) not found", configFile) @@ -146,6 +148,13 @@ func InitConfig(configDir string, debugLevel int, debugMode, testMode bool, inte return fmt.Errorf("failed to load configs of dir: %s err:%s", configDir, err) } + resolvedDebugMode, resolvedDebugLevel, err := resolveLogSettings(Config.Log, debugLevel, debugMode, debugLevelSet, debugModeSet) + if err != nil { + return err + } + Config.DebugMode = resolvedDebugMode + Config.DebugLevel = resolvedDebugLevel + if interval > 0 { Config.Global.Interval = Duration(time.Duration(interval) * time.Second) } @@ -186,6 +195,52 @@ func InitConfig(configDir string, debugLevel int, debugMode, testMode bool, inte return nil } +func resolveLogSettings(logCfg Log, cliDebugLevel int, cliDebugMode bool, cliDebugLevelSet, cliDebugModeSet bool) (bool, int, error) { + level := strings.ToLower(strings.TrimSpace(logCfg.Level)) + if level == "" { + level = "info" + } + if logCfg.Verbosity < 0 { + return false, 0, fmt.Errorf("invalid log.verbosity %d: must be >= 0", logCfg.Verbosity) + } + + verbosity := logCfg.Verbosity + switch level { + case "debug": + if verbosity == 0 { + verbosity = 1 + } + case "info", "warn", "warning", "error": + default: + return false, 0, fmt.Errorf("invalid log.level %q: supported values are debug, info, warn, error", logCfg.Level) + } + + debugMode := verbosity > 0 + debugLevel := verbosity + + if cliDebugModeSet { + debugMode = cliDebugMode + if !debugMode { + debugLevel = 0 + } else if debugLevel == 0 { + debugLevel = 1 + } + } + + if cliDebugLevelSet { + debugLevel = cliDebugLevel + if cliDebugLevel > 0 { + debugMode = true + } else if cliDebugModeSet { + debugMode = cliDebugMode + } else { + debugMode = false + } + } + + return debugMode, debugLevel, nil +} + func (c *ConfigType) GetHostname() string { ret := c.Global.Hostname @@ -245,7 +300,7 @@ func getLocalIP() (net.IP, error) { } addrs, err := iface.Addrs() if err != nil { - log.Println("W! iface address error", err) + klog.Warningf("iface address error: %v", err) continue } for _, addr := range addrs { @@ -267,13 +322,13 @@ func getLocalIP() (net.IP, error) { func GetOutboundIP() (net.IP, error) { addr := defaultProbeAddr if len(Config.Writers) == 0 { - log.Printf("E! writers is not configured, use %s as default probe address", defaultProbeAddr) + klog.Warningf("writers is not configured, use %s as default probe address", defaultProbeAddr) } for _, v := range Config.Writers { if len(v.Url) != 0 { u, err := url.Parse(v.Url) if err != nil { - log.Printf("W! parse writers url %s error %s", v.Url, err) + klog.Warningf("parse writers url %s error %s", v.Url, err) continue } else { if strings.Contains(u.Host, "localhost") || strings.Contains(u.Host, "127.0.0.1") { diff --git a/config/config_test.go b/config/config_test.go new file mode 100644 index 000000000..8208f6fea --- /dev/null +++ b/config/config_test.go @@ -0,0 +1,69 @@ +package config + +import "testing" + +func TestResolveLogSettingsUsesConfigVerbosity(t *testing.T) { + logCfg := Log{ + Level: "info", + Verbosity: 10, + } + + debugMode, debugLevel, err := resolveLogSettings(logCfg, 0, false, false, false) + if err != nil { + t.Fatalf("resolveLogSettings returned error: %v", err) + } + + if !debugMode { + t.Fatalf("expected debug mode to be enabled when verbosity > 0") + } + + if debugLevel != 10 { + t.Fatalf("expected debug level 10, got %d", debugLevel) + } +} + +func TestResolveLogSettingsMapsDebugLevelToVerbosityOne(t *testing.T) { + logCfg := Log{ + Level: "debug", + } + + debugMode, debugLevel, err := resolveLogSettings(logCfg, 0, false, false, false) + if err != nil { + t.Fatalf("resolveLogSettings returned error: %v", err) + } + + if !debugMode { + t.Fatalf("expected debug mode to be enabled for debug level") + } + + if debugLevel != 1 { + t.Fatalf("expected debug level 1 for debug log level, got %d", debugLevel) + } +} + +func TestResolveLogSettingsCliOverridesConfig(t *testing.T) { + logCfg := Log{ + Level: "debug", + Verbosity: 10, + } + + debugMode, debugLevel, err := resolveLogSettings(logCfg, 2, false, true, false) + if err != nil { + t.Fatalf("resolveLogSettings returned error: %v", err) + } + + if !debugMode { + t.Fatalf("expected debug mode to stay enabled when cli debug level is set") + } + + if debugLevel != 2 { + t.Fatalf("expected cli debug level 2 to win, got %d", debugLevel) + } +} + +func TestResolveLogSettingsRejectsUnknownLevel(t *testing.T) { + _, _, err := resolveLogSettings(Log{Level: "verbose"}, 0, false, false, false) + if err == nil { + t.Fatal("expected invalid log level to return error") + } +} diff --git a/config/hostname.go b/config/hostname.go index 0e094a669..a4a882017 100644 --- a/config/hostname.go +++ b/config/hostname.go @@ -2,10 +2,11 @@ package config import ( "fmt" - "log" "os" "sync" "time" + + "k8s.io/klog/v2" ) type HostInfoCache struct { @@ -101,13 +102,13 @@ func (c *HostInfoCache) update() { time.Sleep(time.Minute) name, err := os.Hostname() if err != nil { - log.Println("E! failed to get hostname:", err) + klog.ErrorS(err, "failed to get hostname") } else { HostInfo.SetHostname(name) } ip, err := GetOutboundIP() if err != nil { - log.Println("E! failed to get ip:", err) + klog.ErrorS(err, "failed to get ip") } else { HostInfo.SetIP(fmt.Sprint(ip)) } diff --git a/config/urllabel.go b/config/urllabel.go index 2f927dfbd..026b95d31 100644 --- a/config/urllabel.go +++ b/config/urllabel.go @@ -2,10 +2,11 @@ package config import ( "bytes" - "log" "net/url" "strings" "text/template" + + "k8s.io/klog/v2" ) type UrlLabel struct { @@ -46,9 +47,7 @@ func (ul *UrlLabel) PrepareUrlTemplate() error { value = k + "=" + v } } - if Config.DebugMode { - log.Printf("D! label pair tpl:%s", value) - } + klog.V(1).InfoS("label pair template", "template", value) ul.LabelPairTpl, err = template.New("pair").Parse(value) if err != nil { return err @@ -99,9 +98,7 @@ func (ul *UrlLabel) GenerateLabel(u *url.URL) (map[string]string, error) { if len(kvs) != 2 { continue } - if Config.DebugMode { - log.Printf("D! label pairs after rendering: %s=%s", kvs[0], kvs[1]) - } + klog.V(1).InfoS("label pair rendered", "key", kvs[0], "value", kvs[1]) ret[kvs[0]] = kvs[1] } buffer.Reset() diff --git a/docs/superpowers/plans/2026-04-13-global-klog-logging.md b/docs/superpowers/plans/2026-04-13-global-klog-logging.md new file mode 100644 index 000000000..a04e13062 --- /dev/null +++ b/docs/superpowers/plans/2026-04-13-global-klog-logging.md @@ -0,0 +1,1025 @@ +# Global Klog Logging Standardization Implementation Plan + +> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** Standardize repository-owned logging on shared `klog`, remove standard-library `log.Printf` / `log.Println` as the normal logging path, and replace `DebugMode`-gated extra logs with verbosity-based `klog` output. + +**Architecture:** Add a shared `pkg/logging` package that owns `klog` setup, output routing, standard-library bridging, and contextual logger helpers. Migrate repository callsites in batches, using repository policy tests plus `go test` and `rg` sweeps to enforce that runtime code no longer relies on standard-library logging or `DebugMode` branches just to emit extra logs. Preserve business behavior and keep `DebugMod` fields only where they control downstream library behavior rather than local log emission. + +**Tech Stack:** Go 1.25, `k8s.io/klog/v2`, standard `flag` package, existing `agent`, `inputs`, `writer`, `heartbeat`, `ibex`, `logs`, and `pkg` packages, `go test`, `rg` + +--- + +## File Map + +- `pkg/logging/logging.go`: shared `klog` registration, configuration, output writer selection, flush lifecycle, and contextual logger helpers +- `pkg/logging/logging_test.go`: focused unit tests for output selection, verbosity mapping, and standard-library bridge behavior +- `pkg/logging/repository_policy_test.go`: repo policy tests that fail when scoped runtime files still use `log.Printf` / `log.Println` or `DebugMode`-only log branches +- `main.go`, `main_posix.go`, `main_windows.go`: process-wide logger registration, early service-path initialization, and shutdown flushing +- `agent/metrics_agent.go`, `agent/metrics_reader.go`, `agent/agent.go`, `agent/prometheus_agent.go`, `agent/ibex_agent.go`: shared logger usage in the main collection loop and lifecycle logs +- `writer/writers.go`, `writer/writer.go`: writer queue and remote write logs converted to `klog`, with debug output moved to `V(level)` +- `config/config.go`, `config/hostname.go`, `config/urllabel.go`: config, hostname, and URL label logs moved off standard `log` +- `heartbeat/heartbeat.go`: debug helper converted from `DebugMode` gate to verbosity-driven `klog` +- `ibex/*.go`, `logs/**/*.go`, `pkg/**/*.go`, `parser/**/*.go`, `api/**/*.go`: runtime packages migrated in batches +- `inputs/**/*.go`: migrated in two waves, first framework/common collectors, then heavyweight subtrees such as `elasticsearch`, `mtail`, `node_exporter`, `ipmi`, and `snmp_zabbix` + +### Task 1: Add Shared Logging Base And Repository Policy Tests + +**Files:** +- Create: `pkg/logging/logging.go` +- Create: `pkg/logging/logging_test.go` +- Create: `pkg/logging/repository_policy_test.go` + +- [ ] **Step 1: Write the failing tests for the shared logging package** + +Add `pkg/logging/logging_test.go` with focused tests around output routing and the standard-library bridge: + +```go +package logging + +import ( + "bytes" + "flag" + stdlog "log" + "strings" + "testing" + + "k8s.io/klog/v2" +) + +func TestConfigureMapsDebugToVerbosity(t *testing.T) { + state := klog.CaptureState() + defer state.Restore() + + fs := flag.NewFlagSet("logging-test", flag.ContinueOnError) + RegisterFlags(fs) + if err := fs.Parse(nil); err != nil { + t.Fatalf("Parse() error = %v", err) + } + + var buf bytes.Buffer + if err := configureWithWriter(&buf, fs, true, 0); err != nil { + t.Fatalf("configureWithWriter() error = %v", err) + } + + klog.V(1).InfoS("debug enabled") + klog.Flush() + + if !strings.Contains(buf.String(), "debug enabled") { + t.Fatalf("expected verbosity 1 output, got %q", buf.String()) + } +} + +func TestConfigureBridgesStandardLibraryLog(t *testing.T) { + state := klog.CaptureState() + defer state.Restore() + + fs := flag.NewFlagSet("logging-bridge-test", flag.ContinueOnError) + RegisterFlags(fs) + if err := fs.Parse(nil); err != nil { + t.Fatalf("Parse() error = %v", err) + } + + var buf bytes.Buffer + if err := configureWithWriter(&buf, fs, false, 0); err != nil { + t.Fatalf("configureWithWriter() error = %v", err) + } + + stdlog.Println("legacy bridge message") + klog.Flush() + + if !strings.Contains(buf.String(), "legacy bridge message") { + t.Fatalf("expected bridged standard log output, got %q", buf.String()) + } +} +``` + +- [ ] **Step 2: Write the failing repository policy test for the first migration scope** + +Add `pkg/logging/repository_policy_test.go` to enforce the first batch of runtime files: + +```go +package logging + +import ( + "os" + "path/filepath" + "regexp" + "testing" +) + +var forbiddenStdLog = regexp.MustCompile(`\blog\.(Printf|Println|Fatal|Fatalf|Fatalln)\b`) +var forbiddenDebugBranch = regexp.MustCompile(`if\s+(config\.Config\.DebugMode|Config\.DebugMode)\s*\{`) + +func TestCoreRuntimeDoesNotUseStandardLogOrDebugBranches(t *testing.T) { + files := []string{ + filepath.Join("..", "..", "main.go"), + filepath.Join("..", "..", "main_posix.go"), + filepath.Join("..", "..", "main_windows.go"), + filepath.Join("..", "..", "agent", "agent.go"), + filepath.Join("..", "..", "agent", "metrics_agent.go"), + filepath.Join("..", "..", "agent", "metrics_reader.go"), + filepath.Join("..", "..", "writer", "writer.go"), + filepath.Join("..", "..", "writer", "writers.go"), + filepath.Join("..", "..", "heartbeat", "heartbeat.go"), + } + + for _, file := range files { + content, err := os.ReadFile(file) + if err != nil { + t.Fatalf("ReadFile(%s) error = %v", file, err) + } + if forbiddenStdLog.Match(content) { + t.Fatalf("forbidden standard log call remains in %s", file) + } + if forbiddenDebugBranch.Match(content) { + t.Fatalf("forbidden DebugMode-only log branch remains in %s", file) + } + } +} +``` + +- [ ] **Step 3: Run the new logging tests to verify RED** + +Run: + +```bash +go test ./pkg/logging -count=1 +``` + +Expected: FAIL because `pkg/logging` does not exist and the policy test points at files that still use standard-library logging. + +- [ ] **Step 4: Implement the shared logging package** + +Create `pkg/logging/logging.go` with explicit flag registration, configurable output, and a bridge for legacy `log`: + +```go +package logging + +import ( + "flag" + "fmt" + "io" + stdlog "log" + "os" + "strconv" + "sync" + "time" + + "gopkg.in/natefinch/lumberjack.v2" + "k8s.io/klog/v2" +) + +var flushOnce sync.Once + +func RegisterFlags(fs *flag.FlagSet) { + klog.InitFlags(fs) +} + +func Configure(output string, maxSize, maxAge, maxBackups int, localTime, compress, debug bool, debugLevel int) error { + return configureWithWriter(newWriter(output, maxSize, maxAge, maxBackups, localTime, compress), flag.CommandLine, debug, debugLevel) +} + +func configureWithWriter(writer io.Writer, fs *flag.FlagSet, debug bool, debugLevel int) error { + level := debugLevel + if debug && level == 0 { + level = 1 + } + + if err := fs.Set("logtostderr", "false"); err != nil { + return err + } + if err := fs.Set("alsologtostderr", "false"); err != nil { + return err + } + if err := fs.Set("stderrthreshold", "FATAL"); err != nil { + return err + } + if err := fs.Set("v", strconv.Itoa(level)); err != nil { + return err + } + + stdlog.SetFlags(0) + klog.SetOutput(writer) + klog.CopyStandardLogTo("INFO") + flushOnce.Do(func() { + klog.StartFlushDaemon(5 * time.Second) + }) + return nil +} + +func newWriter(output string, maxSize, maxAge, maxBackups int, localTime, compress bool) io.Writer { + switch output { + case "", "stdout": + return os.Stdout + case "stderr": + return os.Stderr + default: + return &lumberjack.Logger{ + Filename: output, + MaxSize: maxSize, + MaxAge: maxAge, + MaxBackups: maxBackups, + LocalTime: localTime, + Compress: compress, + } + } +} + +func Sync() { + klog.Flush() +} + +func Component(name string) klog.Logger { + return klog.Background().WithName(name) +} + +func ComponentValues(name string, kv ...interface{}) klog.Logger { + return Component(name).WithValues(kv...) +} + +func Verbose(level int, msg string, kv ...interface{}) { + klog.V(klog.Level(level)).InfoS(msg, kv...) +} +``` + +- [ ] **Step 5: Run the shared logging tests again and confirm the policy test still fails for unmigrated runtime files** + +Run: + +```bash +go test ./pkg/logging -count=1 +``` + +Expected: `logging_test.go` passes, while `TestCoreRuntimeDoesNotUseStandardLogOrDebugBranches` still fails because the scoped runtime files are not migrated yet. + +- [ ] **Step 6: Commit the logging base and policy test scaffolding** + +Run: + +```bash +git add pkg/logging/logging.go pkg/logging/logging_test.go pkg/logging/repository_policy_test.go +git commit -m "test: add shared logging base and policy checks" +``` + +Expected: One commit with the shared logging package and failing-policy scaffold. + +### Task 2: Wire Shared Logging Into Process Startup And Inputs Initialization + +**Files:** +- Modify: `main.go` +- Modify: `main_posix.go` +- Modify: `main_windows.go` +- Modify: `agent/metrics_agent.go` +- Modify: `agent/metrics_agent_test.go` +- Modify: `inputs/inputs.go` +- Modify: `inputs/inputs_test.go` + +- [ ] **Step 1: Register `klog` flags before parsing CLI flags** + +In `main.go`, register `klog` flags once before `flag.Parse()` and remove the old `initLog` helper: + +```go +func init() { + logging.RegisterFlags(flag.CommandLine) + + var err error + if appPath, err = winsvc.GetAppPath(); err != nil { + klog.Fatal(err) + } + if err := os.Chdir(filepath.Dir(appPath)); err != nil { + klog.Fatal(err) + } +} +``` + +- [ ] **Step 2: Configure a minimal stderr logger before service-control paths and reconfigure after config load** + +Update `main.go` so both early service flows and the normal runtime use `pkg/logging`: + +```go +func main() { + flag.Parse() + + if err := logging.Configure("stderr", 0, 0, 0, false, false, *debugMode, *debugLevel); err != nil { + fmt.Fprintln(os.Stderr, err) + os.Exit(1) + } + defer logging.Sync() + + if *install || *remove || *start || *stop || *status || *update { + if err := serviceProcess(); err != nil { + klog.ErrorS(err, "service command failed") + } + return + } + + if err := config.InitConfig(*configDir, *debugLevel, *debugMode, *testMode, *interval, *inputFilters); err != nil { + klog.Fatal(err) + } + + if err := logging.Configure( + config.Config.Log.FileName, + config.Config.Log.MaxSize, + config.Config.Log.MaxAge, + config.Config.Log.MaxBackups, + config.Config.Log.LocalTime, + config.Config.Log.Compress, + config.Config.DebugMode, + config.Config.DebugLevel, + ); err != nil { + klog.Fatal(err) + } +} +``` + +- [ ] **Step 3: Update Windows and POSIX entrypoints to use `klog` and shared flushing** + +Replace direct `log.Fatalln` / `log.Printf` usage in `main_posix.go` and `main_windows.go` with `klog`: + +```go +func reapDaemon() { + ... + if err != nil { + klog.ErrorS(err, "failed to reap child processes") + continue + } + klog.InfoS("reaped child process", "pid", e.pid, "status", e.status) +} +``` + +```go +if err := winsvc.RunAsService(*flagWinSvcName, ag.Start, ag.Stop, false); err != nil { + klog.Fatal(err) +} +``` + +- [ ] **Step 4: Fold the current metrics-agent logger derivation into the shared logging helper** + +Update `agent/metrics_agent.go` to derive child loggers from `pkg/logging`: + +```go +func metricsAgentInputLogger(name string, sum string) klog.Logger { + return logging.ComponentValues( + "inputs", + "component", "inputs", + "input", name, + "plugin", parsedInputKey(name), + "checksum", sum, + ) +} + +func metricsAgentInstanceLogger(inputLogger klog.Logger, idx int) klog.Logger { + return inputLogger.WithValues("instance_index", idx) +} +``` + +Keep `inputs.MayInit(t, logger)` and the existing tests, but update assertions to check the logger still comes from the shared path. + +- [ ] **Step 5: Run the focused tests for startup-adjacent code** + +Run: + +```bash +go test ./pkg/logging ./inputs ./agent -run 'Test(MayInit|MetricsAgentInputGo|Configure|CoreRuntime)' -count=1 +``` + +Expected: `pkg/logging` tests pass, `inputs` and `agent` tests pass, and the core runtime policy test still fails until Task 3 migrates the runtime files. + +- [ ] **Step 6: Commit the startup wiring and shared logger integration** + +Run: + +```bash +git add main.go main_posix.go main_windows.go agent/metrics_agent.go agent/metrics_agent_test.go inputs/inputs.go inputs/inputs_test.go +git commit -m "refactor: wire shared klog startup logging" +``` + +Expected: One commit containing only startup wiring plus the shared `inputs` logger integration. + +### Task 3: Migrate Core Runtime Packages And Remove Pure Debug Log Branches + +**Files:** +- Modify: `agent/agent.go` +- Modify: `agent/metrics_agent.go` +- Modify: `agent/metrics_reader.go` +- Modify: `agent/prometheus_agent.go` +- Modify: `agent/ibex_agent.go` +- Modify: `config/config.go` +- Modify: `config/hostname.go` +- Modify: `config/urllabel.go` +- Modify: `heartbeat/heartbeat.go` +- Modify: `writer/writer.go` +- Modify: `writer/writers.go` +- Modify: `parser/influx/parser.go` +- Modify: `parser/prometheus/parser.go` +- Modify: `pkg/aop/logger.go` +- Modify: `pkg/aop/recovery.go` +- Modify: `pkg/httpx/client.go` +- Modify: `pkg/httpx/transport.go` +- Modify: `pkg/kubernetes/pod.go` +- Modify: `pkg/pprof/profile.go` +- Modify: `pkg/snmp/translate.go` +- Modify: `pkg/logging/repository_policy_test.go` + +- [ ] **Step 1: Extend the policy test to cover the full core runtime batch** + +Append these files to `TestCoreRuntimeDoesNotUseStandardLogOrDebugBranches` in `pkg/logging/repository_policy_test.go`: + +```go +files = append(files, + filepath.Join("..", "..", "agent", "prometheus_agent.go"), + filepath.Join("..", "..", "agent", "ibex_agent.go"), + filepath.Join("..", "..", "config", "config.go"), + filepath.Join("..", "..", "config", "hostname.go"), + filepath.Join("..", "..", "config", "urllabel.go"), + filepath.Join("..", "..", "parser", "influx", "parser.go"), + filepath.Join("..", "..", "parser", "prometheus", "parser.go"), + filepath.Join("..", "..", "pkg", "httpx", "client.go"), + filepath.Join("..", "..", "pkg", "httpx", "transport.go"), +) +``` + +- [ ] **Step 2: Run the policy test and capture the RED failures** + +Run: + +```bash +go test ./pkg/logging -run TestCoreRuntimeDoesNotUseStandardLogOrDebugBranches -count=1 +``` + +Expected: FAIL with the first remaining file that still contains standard-library `log` calls or a `DebugMode` logging branch. + +- [ ] **Step 3: Replace direct runtime logs with structured `klog` calls** + +Apply the same style across the files in this task: + +```go +klog.InfoS("agent started", "agent", fmt.Sprintf("%T", agent)) +klog.ErrorS(err, "failed to start agent", "agent", fmt.Sprintf("%T", agent)) +klog.Warningf("writers queue is full, dropped %d samples (queue_size=%d)", len(items), l) +``` + +For debug-only branches such as `agent/metrics_reader.go`, remove the branch and use verbosity: + +```go +klog.V(1).InfoS("before gather once", "input", r.inputName) +r.gatherOnce() +klog.V(1).InfoS("after gather once", "input", r.inputName, "duration", time.Since(start)) +``` + +For `heartbeat/heartbeat.go`, keep the helper but convert it to verbosity: + +```go +func heartbeatLogger() klog.Logger { + return logging.Component("heartbeat") +} + +func heartbeatDebug() klog.Verbose { + level := klog.V(1) + return level +} +``` + +Then replace `if debug() { log.Printf(...) }` with: + +```go +heartbeatDebug().InfoS("heartbeat request", "body", string(bs)) +``` + +- [ ] **Step 4: Preserve non-logging debug behavior and only remove log-only branches** + +Do not remove `DebugMod` or downstream debug toggles that are passed into other libraries. Restrict this task to code where `DebugMode` exists only to guard local logging, for example: + +```go +if config.Config.DebugMode { + printTestMetrics(samples) +} +``` + +Replace this in `writer/writers.go` with: + +```go +if klog.V(1).Enabled() { + printTestMetrics(samples) +} +``` + +- [ ] **Step 5: Run targeted tests plus the core runtime policy test** + +Run: + +```bash +go test ./pkg/logging ./agent ./writer ./heartbeat ./config ./parser/... ./pkg/... -count=1 +``` + +Expected: PASS for the touched packages, including `TestCoreRuntimeDoesNotUseStandardLogOrDebugBranches`. + +- [ ] **Step 6: Commit the core runtime migration** + +Run: + +```bash +git add agent/agent.go agent/metrics_agent.go agent/metrics_reader.go agent/prometheus_agent.go agent/ibex_agent.go config/config.go config/hostname.go config/urllabel.go heartbeat/heartbeat.go writer/writer.go writer/writers.go parser/influx/parser.go parser/prometheus/parser.go pkg/aop/logger.go pkg/aop/recovery.go pkg/httpx/client.go pkg/httpx/transport.go pkg/kubernetes/pod.go pkg/pprof/profile.go pkg/snmp/translate.go pkg/logging/repository_policy_test.go +git commit -m "refactor: migrate core runtime logging to klog" +``` + +Expected: One commit containing the shared runtime-path migration and the updated core policy scope. + +### Task 4: Migrate API, Service Management, Ibex, And Logs Runtime Packages + +**Files:** +- Modify: `api/router_falcon.go` +- Modify: `api/router_opentsdb.go` +- Modify: `api/server.go` +- Modify: `agent/install/service_linux.go` +- Modify: `agent/update/update_linux.go` +- Modify: `agent/update/update_windows.go` +- Modify: `ibex/client/cli.go` +- Modify: `ibex/heartbeat.go` +- Modify: `ibex/task.go` +- Modify: `ibex/tasks.go` +- Modify: `logs/auditor/auditor.go` +- Modify: `logs/client/http/destination.go` +- Modify: `logs/client/kafka/destination.go` +- Modify: `logs/client/kafka/producer.go` +- Modify: `logs/client/tcp/connection_manager.go` +- Modify: `logs/decoder/auto_multiline_handler.go` +- Modify: `logs/decoder/decoder.go` +- Modify: `logs/decoder/line_parser.go` +- Modify: `logs/input/container/launcher.go` +- Modify: `logs/input/file/file_provider.go` +- Modify: `logs/input/file/scanner.go` +- Modify: `logs/input/file/tailer.go` +- Modify: `logs/input/file/tailer_nix.go` +- Modify: `logs/input/file/tailer_windows.go` +- Modify: `logs/input/journald/launcher.go` +- Modify: `logs/input/journald/tailer.go` +- Modify: `logs/input/kubernetes/json_parser.go` +- Modify: `logs/input/kubernetes/launcher.go` +- Modify: `logs/input/kubernetes/scanner.go` +- Modify: `logs/input/listener/tailer.go` +- Modify: `logs/input/listener/tcp.go` +- Modify: `logs/input/listener/udp.go` +- Modify: `logs/message/origin.go` +- Modify: `logs/processor/processor.go` +- Modify: `logs/sender/batch_strategy.go` +- Modify: `logs/sender/stream_strategy.go` +- Modify: `logs/tag/provider.go` +- Modify: `logs/util/containers/filter.go` +- Modify: `logs/util/containers/providers/provider.go` +- Modify: `logs/util/debug.go` +- Modify: `logs/util/docker/containers.go` +- Modify: `logs/util/docker/docker.go` +- Modify: `logs/util/docker/event_pull.go` +- Modify: `logs/util/docker/event_stream.go` +- Modify: `logs/util/docker/global.go` +- Modify: `logs/util/docker/network.go` +- Modify: `logs/util/docker/rancher.go` +- Modify: `logs/util/docker/storage.go` +- Modify: `logs/util/kubernetes/kubelet/containers.go` +- Modify: `logs/util/kubernetes/kubelet/kubelet.go` +- Modify: `logs/util/kubernetes/kubelet/kubelet_client.go` +- Modify: `logs/util/kubernetes/kubelet/kubelet_hosts.go` +- Modify: `logs/util/kubernetes/kubelet/podwatcher.go` +- Modify: `logs/util/kubernetes/tags/builder.go` +- Modify: `pkg/logging/repository_policy_test.go` + +- [ ] **Step 1: Expand the repository policy test to the API, service, ibex, and logs batch** + +Add a second policy test that walks these directories recursively: + +```go +func TestServiceAndLogsRuntimeDoesNotUseStandardLogOrDebugBranches(t *testing.T) { + roots := []string{ + filepath.Join("..", "..", "api"), + filepath.Join("..", "..", "ibex"), + filepath.Join("..", "..", "logs"), + filepath.Join("..", "..", "agent", "install"), + filepath.Join("..", "..", "agent", "update"), + } + assertNoForbiddenLogging(t, roots) +} +``` + +Use an `assertNoForbiddenLogging` helper that skips `vendor`, `docs`, `README.md`, generated protobufs, and `_test.go`. + +- [ ] **Step 2: Run the new policy test to confirm RED** + +Run: + +```bash +go test ./pkg/logging -run TestServiceAndLogsRuntimeDoesNotUseStandardLogOrDebugBranches -count=1 +``` + +Expected: FAIL on the first file in these directories that still contains `log.Printf`, `log.Println`, or a pure log-only `DebugMode` branch. + +- [ ] **Step 3: Migrate the batch to structured `klog`** + +Use consistent `klog` patterns: + +```go +logger := logging.ComponentValues("ibex", "task_id", t.Id) +logger.Error(err, "failed to write args file", "path", argsFile) +logger.Info("task finished") +``` + +For `logs/util/debug.go`, keep a helper but return verbosity instead of a `DebugMode` boolean: + +```go +func Verbose() klog.Verbose { + return klog.V(1) +} +``` + +Then change callsites from: + +```go +if util.Debug() { + log.Println("D! docker event", event) +} +``` + +to: + +```go +util.Verbose().InfoS("docker event", "event", event) +``` + +- [ ] **Step 4: Run package tests for the touched service/runtime code** + +Run: + +```bash +go test ./pkg/logging ./api ./agent/... ./ibex/... ./logs/... -count=1 +``` + +Expected: PASS for the touched packages, including the new service/logs policy test. + +- [ ] **Step 5: Commit the service and logs migration** + +Run: + +```bash +git add api/router_falcon.go api/router_opentsdb.go api/server.go agent/install/service_linux.go agent/update/update_linux.go agent/update/update_windows.go ibex/client/cli.go ibex/heartbeat.go ibex/task.go ibex/tasks.go logs pkg/logging/repository_policy_test.go +git commit -m "refactor: migrate service and logs runtime logging" +``` + +Expected: One commit containing only the API/service, ibex, and logs runtime migration. + +### Task 5: Migrate Inputs Framework And Medium-Complexity Collectors + +**Files:** +- Modify: `inputs/collector.go` +- Modify: `inputs/http_provider.go` +- Modify: `inputs/provider_manager.go` +- Modify: `inputs/aliyun/cloud.go` +- Modify: `inputs/amd_rocm_smi/amd_rocm_smi.go` +- Modify: `inputs/appdynamics/instances.go` +- Modify: `inputs/arp_packet/arp_packet.go` +- Modify: `inputs/bind/bind.go` +- Modify: `inputs/cadvisor/instances.go` +- Modify: `inputs/chrony/chrony.go` +- Modify: `inputs/clickhouse/clickhouse.go` +- Modify: `inputs/cloudwatch/cloudwatch.go` +- Modify: `inputs/conntrack/conntrack.go` +- Modify: `inputs/consul/consul.go` +- Modify: `inputs/cpu/cpu.go` +- Modify: `inputs/dcgm/exporter.go` +- Modify: `inputs/disk/disk.go` +- Modify: `inputs/diskio/diskio.go` +- Modify: `inputs/dmesg/dmesg.go` +- Modify: `inputs/dns_query/dns_query.go` +- Modify: `inputs/docker/docker.go` +- Modify: `inputs/emc_unity/emc_unity.go` +- Modify: `inputs/ethtool/command_linux.go` +- Modify: `inputs/ethtool/ethtool_linux.go` +- Modify: `inputs/ethtool/ethtool_notlinux.go` +- Modify: `inputs/ethtool/namespace_linux.go` +- Modify: `inputs/exec/exec.go` +- Modify: `inputs/filecount/filecount.go` +- Modify: `inputs/gnmi/gnmi.go` +- Modify: `inputs/gnmi/handler.go` +- Modify: `inputs/googlecloud/instances.go` +- Modify: `inputs/greenplum/greenplum.go` +- Modify: `inputs/hadoop/hadoop.go` +- Modify: `inputs/haproxy/exporter.go` +- Modify: `inputs/haproxy/haproxy.go` +- Modify: `inputs/http_response/http_response.go` +- Modify: `inputs/huatuo/huatuo.go` +- Modify: `inputs/ipmi/instances.go` +- Modify: `inputs/iptables/iptables.go` +- Modify: `inputs/ipvs/ipvs_linux_amd64.go` +- Modify: `inputs/jenkins/jenkins.go` +- Modify: `inputs/jolokia/gatherer.go` +- Modify: `inputs/jolokia_agent/jolokia_agent.go` +- Modify: `inputs/jolokia_proxy/jolokia_proxy.go` +- Modify: `inputs/kafka/kafka.go` +- Modify: `inputs/kernel/kernel.go` +- Modify: `inputs/kernel_vmstat/kernel_vmstat.go` +- Modify: `inputs/kubernetes/kubernetes.go` +- Modify: `inputs/ldap/ldap.go` +- Modify: `inputs/linux_sysctl_fs/linux_sysctl_fs_linux.go` +- Modify: `inputs/logstash/logstash.go` +- Modify: `inputs/mem/mem.go` +- Modify: `inputs/mongodb/mongodb.go` +- Modify: `inputs/mongodb/mongodb_server.go` +- Modify: `inputs/mysql/binlog.go` +- Modify: `inputs/mysql/custom_queries.go` +- Modify: `inputs/mysql/engine_innodb.go` +- Modify: `inputs/mysql/global_status.go` +- Modify: `inputs/mysql/global_variables.go` +- Modify: `inputs/mysql/mysql.go` +- Modify: `inputs/mysql/processlist.go` +- Modify: `inputs/mysql/processlist_by_user.go` +- Modify: `inputs/mysql/schema_size.go` +- Modify: `inputs/mysql/slave_status.go` +- Modify: `inputs/mysql/table_size.go` +- Modify: `inputs/nats/nats.go` +- Modify: `inputs/net/net.go` +- Modify: `inputs/net_response/net_response.go` +- Modify: `inputs/netstat/netstat.go` +- Modify: `inputs/netstat_filter/netstat_filter.go` +- Modify: `inputs/netstat_filter/netstat_tcp.go` +- Modify: `inputs/nfsclient/nfsclient.go` +- Modify: `inputs/nginx/nginx.go` +- Modify: `inputs/nginx_upstream_check/nginx_upstream_check.go` +- Modify: `inputs/nsq/nsq.go` +- Modify: `inputs/ntp/ntp.go` +- Modify: `inputs/nvidia_smi/builder.go` +- Modify: `inputs/nvidia_smi/nvidia_smi.go` +- Modify: `inputs/oracle/oracle.go` +- Modify: `inputs/phpfpm/phpfpm.go` +- Modify: `inputs/ping/ping.go` +- Modify: `inputs/ping/ping_notwindows.go` +- Modify: `inputs/ping/ping_windows.go` +- Modify: `inputs/postgresql/postgresql.go` +- Modify: `inputs/processes/processes_notwindows.go` +- Modify: `inputs/procstat/procstat.go` +- Modify: `inputs/procstat/win_service_windows.go` +- Modify: `inputs/prometheus/consul.go` +- Modify: `inputs/prometheus/prometheus.go` +- Modify: `inputs/rabbitmq/rabbitmq.go` +- Modify: `inputs/redfish/redfish.go` +- Modify: `inputs/redis/redis.go` +- Modify: `inputs/redis_sentinel/redis_sentinel.go` +- Modify: `inputs/rocketmq_offset/rocketmq.go` +- Modify: `inputs/self_metrics/metrics.go` +- Modify: `inputs/smart/instances.go` +- Modify: `inputs/snmp/health_check.go` +- Modify: `inputs/snmp/instances.go` +- Modify: `inputs/snmp/netsnmp.go` +- Modify: `inputs/snmp/table.go` +- Modify: `inputs/snmp/wrapper.go` +- Modify: `inputs/snmp_trap/snmp_trap.go` +- Modify: `inputs/sockstat/sockstat.go` +- Modify: `inputs/sqlserver/sqlserver.go` +- Modify: `inputs/supervisor/supervisor.go` +- Modify: `inputs/switch_legacy/switch_legacy.go` +- Modify: `inputs/system/ps.go` +- Modify: `inputs/system/system.go` +- Modify: `inputs/systemd/systemd_linux.go` +- Modify: `inputs/tengine/tengine.go` +- Modify: `inputs/tomcat/tomcat.go` +- Modify: `inputs/traffic_server/traffic_server.go` +- Modify: `inputs/vsphere/client.go` +- Modify: `inputs/vsphere/endpoint.go` +- Modify: `inputs/vsphere/finder.go` +- Modify: `inputs/vsphere/tscache.go` +- Modify: `inputs/vsphere/vsphere.go` +- Modify: `inputs/whois/whois.go` +- Modify: `inputs/x509_cert/x509_cert.go` +- Modify: `inputs/xskyapi/xskyapi.go` +- Modify: `inputs/zookeeper/zookeeper.go` +- Modify: `pkg/logging/repository_policy_test.go` + +- [ ] **Step 1: Add a recursive inputs policy test for the medium-complexity batch** + +Add this test to `pkg/logging/repository_policy_test.go`: + +```go +func TestInputsMediumBatchDoesNotUseStandardLog(t *testing.T) { + roots := []string{ + filepath.Join("..", "..", "inputs"), + } + skip := []string{ + filepath.Join("inputs", "elasticsearch"), + filepath.Join("inputs", "ipmi", "exporter"), + filepath.Join("inputs", "mtail"), + filepath.Join("inputs", "node_exporter"), + filepath.Join("inputs", "snmp_zabbix"), + } + assertNoForbiddenLoggingExcept(t, roots, skip) +} +``` + +- [ ] **Step 2: Run the new inputs policy test to verify RED** + +Run: + +```bash +go test ./pkg/logging -run TestInputsMediumBatchDoesNotUseStandardLog -count=1 +``` + +Expected: FAIL because the medium-complexity inputs files still contain standard-library logging or log-only `DebugMode` branches. + +- [ ] **Step 3: Migrate inputs callsites while preserving non-logging debug controls** + +Use `klog` directly for local logs and keep `DebugMod` only where it is passed into another system: + +```go +logger := logging.ComponentValues("inputs", "plugin", ins.Name(), "target", ins.Address) +logger.Error(err, "failed to query target") +klog.V(2).InfoS("collector request", "plugin", ins.Name(), "url", url) +``` + +For cases like `inputs/cloudwatch/cloudwatch.go` and `inputs/vsphere/client.go`, keep `ins.DebugMod` when it is forwarded into SDK or collector configuration, but replace local branches such as: + +```go +if ins.DebugMod { + log.Printf("D! cloudwatch request: %s", req) +} +``` + +with: + +```go +klog.V(1).InfoS("cloudwatch request", "request", req) +``` + +- [ ] **Step 4: Run the medium-batch package tests and policy test** + +Run: + +```bash +go test ./pkg/logging ./inputs/... -count=1 +``` + +Expected: PASS for the touched inputs packages and `TestInputsMediumBatchDoesNotUseStandardLog`. + +- [ ] **Step 5: Commit the medium-batch inputs migration** + +Run: + +```bash +git add inputs pkg/logging/repository_policy_test.go +git commit -m "refactor: migrate collector logging to klog" +``` + +Expected: One commit covering the inputs framework and medium-complexity collector migration. + +### Task 6: Migrate Heavyweight Collector Subtrees And Run The Final Repository Sweep + +**Files:** +- Modify: `inputs/elasticsearch/collector/categraf_utils.go` +- Modify: `inputs/elasticsearch/collector/cluster_health.go` +- Modify: `inputs/elasticsearch/collector/cluster_health_indices.go` +- Modify: `inputs/elasticsearch/collector/cluster_settings.go` +- Modify: `inputs/elasticsearch/collector/cluster_stats.go` +- Modify: `inputs/elasticsearch/collector/collector.go` +- Modify: `inputs/elasticsearch/collector/indices.go` +- Modify: `inputs/elasticsearch/collector/indices_mappings.go` +- Modify: `inputs/elasticsearch/collector/indices_settings.go` +- Modify: `inputs/elasticsearch/collector/nodes.go` +- Modify: `inputs/elasticsearch/collector/shards.go` +- Modify: `inputs/elasticsearch/collector/tasks.go` +- Modify: `inputs/elasticsearch/collector/util.go` +- Modify: `inputs/elasticsearch/elasticsearch.go` +- Modify: `inputs/elasticsearch/pkg/clusterinfo/clusterinfo.go` +- Modify: `inputs/elasticsearch/pkg/roundtripper/roundtripper.go` +- Modify: `inputs/ipmi/exporter/collector_bmc.go` +- Modify: `inputs/ipmi/exporter/collector_bmc_watchdog.go` +- Modify: `inputs/ipmi/exporter/collector_chassis.go` +- Modify: `inputs/ipmi/exporter/collector_dcmi.go` +- Modify: `inputs/ipmi/exporter/collector_ipmi.go` +- Modify: `inputs/ipmi/exporter/collector_notwindows.go` +- Modify: `inputs/ipmi/exporter/collector_sel.go` +- Modify: `inputs/ipmi/exporter/collector_sm_lan_mode.go` +- Modify: `inputs/ipmi/exporter/freeipmi/freeipmi.go` +- Modify: `inputs/mtail/internal/exporter/export.go` +- Modify: `inputs/mtail/internal/exporter/json.go` +- Modify: `inputs/mtail/internal/exporter/prometheus.go` +- Modify: `inputs/mtail/internal/metrics/metric.go` +- Modify: `inputs/mtail/internal/metrics/store.go` +- Modify: `inputs/mtail/internal/mtail/golden/reader.go` +- Modify: `inputs/mtail/internal/mtail/httpstatus.go` +- Modify: `inputs/mtail/internal/mtail/mtail.go` +- Modify: `inputs/mtail/internal/runtime/compiler/checker/checker.go` +- Modify: `inputs/mtail/internal/runtime/compiler/codegen/codegen.go` +- Modify: `inputs/mtail/internal/runtime/compiler/compiler.go` +- Modify: `inputs/mtail/internal/runtime/compiler/parser/lexer.go` +- Modify: `inputs/mtail/internal/runtime/compiler/types/types.go` +- Modify: `inputs/mtail/internal/runtime/runtime.go` +- Modify: `inputs/mtail/internal/runtime/vm/vm.go` +- Modify: `inputs/mtail/internal/tailer/logstream/cancel.go` +- Modify: `inputs/mtail/internal/tailer/logstream/dgramstream.go` +- Modify: `inputs/mtail/internal/tailer/logstream/fifostream.go` +- Modify: `inputs/mtail/internal/tailer/logstream/filestream.go` +- Modify: `inputs/mtail/internal/tailer/logstream/logstream.go` +- Modify: `inputs/mtail/internal/tailer/logstream/socketstream.go` +- Modify: `inputs/mtail/internal/tailer/tail.go` +- Modify: `inputs/mtail/internal/waker/testwaker.go` +- Modify: `inputs/mtail/mtail.go` +- Modify: `inputs/node_exporter/collector/buddyinfo.go` +- Modify: `inputs/node_exporter/collector/collector.go` +- Modify: `inputs/node_exporter/collector/cpu_linux.go` +- Modify: `inputs/node_exporter/collector/diskstats_common.go` +- Modify: `inputs/node_exporter/collector/diskstats_linux.go` +- Modify: `inputs/node_exporter/collector/ethtool_linux.go` +- Modify: `inputs/node_exporter/collector/filesystem_common.go` +- Modify: `inputs/node_exporter/collector/netclass_rtnl_linux.go` +- Modify: `inputs/node_exporter/collector/netdev_common.go` +- Modify: `inputs/node_exporter/collector/ntp.go` +- Modify: `inputs/node_exporter/collector/perf_linux.go` +- Modify: `inputs/node_exporter/collector/qdisc_linux.go` +- Modify: `inputs/node_exporter/collector/runit.go` +- Modify: `inputs/node_exporter/collector/supervisord.go` +- Modify: `inputs/node_exporter/collector/systemd_linux.go` +- Modify: `inputs/node_exporter/collector/textfile.go` +- Modify: `inputs/node_exporter/exporter.go` +- Modify: `inputs/snmp_zabbix/collector.go` +- Modify: `inputs/snmp_zabbix/discovery.go` +- Modify: `inputs/snmp_zabbix/discovery_scheduler.go` +- Modify: `inputs/snmp_zabbix/preprocessing.go` +- Modify: `inputs/snmp_zabbix/scheduler.go` +- Modify: `inputs/snmp_zabbix/snmp.go` +- Modify: `inputs/snmp_zabbix/snmp_client.go` +- Modify: `inputs/snmp_zabbix/template.go` +- Modify: `pkg/logging/repository_policy_test.go` + +- [ ] **Step 1: Expand the inputs policy test to cover the heavyweight subtrees** + +Replace the skip list in `TestInputsMediumBatchDoesNotUseStandardLog` with a full recursive inputs assertion: + +```go +func TestAllInputsDoNotUseStandardLog(t *testing.T) { + roots := []string{ + filepath.Join("..", "..", "inputs"), + } + assertNoForbiddenLogging(t, roots) +} +``` + +Keep explicit skips only for docs, fixtures, generated outputs, `README.md`, and `_test.go`. + +- [ ] **Step 2: Run the full inputs policy test to confirm RED** + +Run: + +```bash +go test ./pkg/logging -run TestAllInputsDoNotUseStandardLog -count=1 +``` + +Expected: FAIL on one of the heavyweight collector files listed in this task. + +- [ ] **Step 3: Migrate the heavyweight subtrees to `klog`** + +Use the same repository conventions throughout: + +```go +logger := logging.ComponentValues("inputs", "plugin", "elasticsearch", "cluster", clusterName) +logger.Error(err, "failed to fetch cluster health") +klog.V(2).InfoS("collector request", "plugin", "node_exporter", "collector", name) +``` + +For collectors that still use `DebugMod`, keep external debug toggles but replace local log-only branches with `klog.V(level)` calls. For example: + +```go +if klog.V(2).Enabled() { + klog.V(2).InfoS("received cluster info update", "cluster", ci.ClusterName) +} +``` + +- [ ] **Step 4: Run the final package tests and repository-wide scans** + +Run: + +```bash +go test ./... -count=1 +rg -n '\blog\.(Printf|Println|Fatal|Fatalf|Fatalln)\b' . --glob '!vendor/**' --glob '!docs/**' --glob '!**/*_test.go' +rg -n 'if\s+(config\.Config\.DebugMode|Config\.DebugMode)\s*\{' . --glob '!vendor/**' --glob '!docs/**' +``` + +Expected: + +- `go test ./...` passes +- the first `rg` command returns no repository-owned runtime matches +- the second `rg` command returns no pure `DebugMode` log-branch matches; remaining `DebugMod` uses are limited to downstream behavior or non-logging logic + +- [ ] **Step 5: Commit the final collector migration and policy closure** + +Run: + +```bash +git add inputs pkg/logging/repository_policy_test.go +git commit -m "refactor: complete repository-wide klog migration" +``` + +Expected: One commit containing the heavyweight subtree migration plus the final policy scope. + +## Self-Review + +- Spec coverage: the plan includes shared `klog` setup, startup integration, structured logger reuse for `inputs`, core/runtime migration, service/logs migration, inputs migration, and final repository verification +- Placeholder scan: no `TODO`, `TBD`, or “similar to previous task” references remain; each task names exact files, commands, and concrete code patterns +- Type consistency: the plan consistently uses `pkg/logging.RegisterFlags`, `pkg/logging.Configure`, `pkg/logging.Sync`, `logging.Component`, and `logging.ComponentValues` across tasks + diff --git a/docs/superpowers/plans/2026-04-13-inputs-klog-init.md b/docs/superpowers/plans/2026-04-13-inputs-klog-init.md new file mode 100644 index 000000000..ac25a6ca7 --- /dev/null +++ b/docs/superpowers/plans/2026-04-13-inputs-klog-init.md @@ -0,0 +1,396 @@ +# Inputs Klog Init Implementation Plan + +> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** Add a logger-aware `inputs` initialization path that passes a shared `klog` logger with plugin and instance context during input startup, while keeping legacy `Init() error` plugins working unchanged. + +**Architecture:** Extend the `inputs` framework with a logger-aware initializer interface and route all initialization through `inputs.MayInit(t, logger)`. In `MetricsAgent.inputGo`, build structured child loggers for the input and each instance, use them for framework initialization logs, and pass them into `MayInit`. Keep compatibility by calling `InitWithLogger(klog.Logger)` when available and falling back to legacy `Init() error` otherwise. + +**Tech Stack:** Go 1.25, `k8s.io/klog/v2`, existing `agent` and `inputs` packages, `go test` + +--- + +### Task 1: Create the Branch And Lock Down `inputs` Initializer Behavior With Tests + +**Files:** +- Modify: `inputs/inputs.go` +- Create: `inputs/inputs_test.go` + +- [ ] **Step 1: Create the feature branch from the current local `main`** + +Run: + +```bash +git switch -c feat/inputs-klog-init +``` + +Expected: Git reports a new branch named `feat/inputs-klog-init`. + +- [ ] **Step 2: Write the failing tests for logger-aware and legacy initializer dispatch** + +Add `inputs/inputs_test.go` with focused table-free tests: + +```go +package inputs + +import ( + "errors" + "testing" + + "k8s.io/klog/v2" +) + +type legacyInitializer struct { + called bool + err error +} + +func (i *legacyInitializer) Init() error { + i.called = true + return i.err +} + +type klogInitializer struct { + called bool + logger klog.Logger + err error +} + +func (i *klogInitializer) InitWithLogger(logger klog.Logger) error { + i.called = true + i.logger = logger + return i.err +} + +func TestMayInitPrefersKlogInitializer(t *testing.T) {} +func TestMayInitFallsBackToLegacyInitializer(t *testing.T) {} +func TestMayInitReturnsNilForNonInitializer(t *testing.T) {} +func TestMayInitPropagatesInitializerErrors(t *testing.T) {} +``` + +- [ ] **Step 3: Run the new `inputs` tests to verify RED** + +Run: + +```bash +go test ./inputs -run TestMayInit -count=1 +``` + +Expected: FAIL because `MayInit` does not yet accept a logger or dispatch to the new interface. + +- [ ] **Step 4: Implement the minimal `inputs` framework change** + +Update `inputs/inputs.go` to introduce the new interface and logger-aware dispatch: + +```go +package inputs + +import ( + "flashcat.cloud/categraf/config" + "flashcat.cloud/categraf/types" + "k8s.io/klog/v2" +) + +type Initializer interface { + Init() error +} + +type KlogInitializer interface { + InitWithLogger(klog.Logger) error +} + +func MayInit(t interface{}, logger klog.Logger) error { + if initializer, ok := t.(KlogInitializer); ok { + return initializer.InitWithLogger(logger) + } + if initializer, ok := t.(Initializer); ok { + return initializer.Init() + } + return nil +} +``` + +- [ ] **Step 5: Update the tests to use the final interface and verify GREEN** + +Complete `inputs/inputs_test.go` so it checks: + +```go +func TestMayInitPrefersKlogInitializer(t *testing.T) { + logger := klog.Background().WithName("inputs-test") + initializer := &testKlogInitializer{} + + if err := MayInit(initializer, logger); err != nil { + t.Fatalf("MayInit() error = %v", err) + } + + if !initializer.called { + t.Fatal("expected logger-aware initializer to be called") + } + if initializer.logger != logger { + t.Fatal("expected logger to be passed through unchanged") + } +} +``` + +Run: + +```bash +go test ./inputs -run TestMayInit -count=1 +``` + +Expected: PASS. + +- [ ] **Step 6: Commit the isolated framework dispatch change** + +Run: + +```bash +git add inputs/inputs.go inputs/inputs_test.go +git commit -m "test: cover inputs logger-aware init dispatch" +``` + +Expected: A commit containing only the `inputs` dispatch and tests. + +### Task 2: Add Metrics Agent Tests For Input And Instance Logger Context + +**Files:** +- Modify: `agent/metrics_agent.go` +- Create: `agent/metrics_agent_test.go` + +- [ ] **Step 1: Write the failing metrics agent tests around initialization logger propagation** + +Add `agent/metrics_agent_test.go` with a fake input, fake provider-free agent setup, and logger-aware fake input/instance types: + +```go +package agent + +import ( + "testing" + + "flashcat.cloud/categraf/config" + "flashcat.cloud/categraf/inputs" + "flashcat.cloud/categraf/types" + "k8s.io/klog/v2" +) + +type fakeInput struct { + initLogger klog.Logger + instanceLogger klog.Logger + instances []inputs.Instance +} + +func (f *fakeInput) Clone() inputs.Input { return f } +func (f *fakeInput) Name() string { return "fake" } +func (f *fakeInput) GetLabels() map[string]string { return nil } +func (f *fakeInput) GetInterval() config.Duration { return 0 } +func (f *fakeInput) InitInternalConfig() error { return nil } +func (f *fakeInput) Process(s *types.SampleList) *types.SampleList { return s } +func (f *fakeInput) InitWithLogger(logger klog.Logger) error { f.initLogger = logger; return nil } +func (f *fakeInput) GetInstances() []inputs.Instance { return f.instances } + +type fakeInstance struct { + initLogger klog.Logger + initialized bool +} + +func (f *fakeInstance) Initialized() bool { return f.initialized } +func (f *fakeInstance) SetInitialized() { f.initialized = true } +func (f *fakeInstance) GetLabels() map[string]string { return map[string]string{"target": "demo"} } +func (f *fakeInstance) GetIntervalTimes() int64 { return 1 } +func (f *fakeInstance) InitInternalConfig() error { return nil } +func (f *fakeInstance) Process(s *types.SampleList) *types.SampleList { return s } +func (f *fakeInstance) InitWithLogger(logger klog.Logger) error { f.initLogger = logger; return nil } + +func TestMetricsAgentInputGoPassesLoggerToInputAndInstance(t *testing.T) {} +func TestMetricsAgentInputGoKeepsErrInstancesEmptyBehavior(t *testing.T) {} +``` + +The final test should assert logger context through production-owned helper functions that return the key-value pairs used to build the logger. + +- [ ] **Step 2: Run the agent tests to verify RED** + +Run: + +```bash +go test ./agent -run TestMetricsAgentInputGo -count=1 +``` + +Expected: FAIL because `MetricsAgent.inputGo` does not yet construct or pass structured `klog` loggers. + +- [ ] **Step 3: Introduce the smallest production helper needed for deterministic logger testing** + +In `agent/metrics_agent.go`, add narrow helpers instead of embedding all logger derivation inline: + +```go +func inputInitLoggerValues(name string, sum string) []interface{} { + _, inputKey := inputs.ParseInputName(name) + return []interface{}{"input", name, "plugin", inputKey, "checksum", sum} +} + +func newInputInitLogger(name string, sum string) klog.Logger { + return klog.Background(). + WithName("inputs"). + WithValues(inputInitLoggerValues(name, sum)...) +} + +func instanceInitLoggerValues(idx int, labels map[string]string) []interface{} { + values := []interface{}{"instance_index", idx} + if target, ok := labels["target"]; ok && target != "" { + values = append(values, "instance_target", target) + } + return values +} + +func newInstanceInitLogger(logger klog.Logger, idx int, labels map[string]string) klog.Logger { + return logger.WithValues(instanceInitLoggerValues(idx, labels)...) +} +``` + +Keep the helper small and stable so tests can verify semantics without poking at unrelated agent state. + +- [ ] **Step 4: Update `inputGo` to use the logger helpers and `inputs.MayInit(..., logger)`** + +Wire the initialization path like this: + +```go +func (ma *MetricsAgent) inputGo(name string, sum string, input inputs.Input) { + inputLogger := newInputInitLogger(name, sum) + + if err := input.InitInternalConfig(); err != nil { + inputLogger.Error(err, "failed to init input internal config") + return + } + + if err := inputs.MayInit(input, inputLogger); err != nil { + // preserve ErrInstancesEmpty behavior + return + } + + instances := inputs.MayGetInstances(input) + for i := 0; i < len(instances); i++ { + instanceLogger := newInstanceInitLogger(inputLogger, i, instances[i].GetLabels()) + if err := inputs.MayInit(instances[i], instanceLogger); err != nil { + // preserve current semantics + continue + } + } +} +``` + +- [ ] **Step 5: Complete the tests and verify GREEN** + +Update `agent/metrics_agent_test.go` to check: + +```go +func TestMetricsAgentInputGoPassesLoggerToInputAndInstance(t *testing.T) { + instance := &fakeInstance{} + input := &fakeInput{instances: []inputs.Instance{instance}} + agent := &MetricsAgent{InputReaders: NewReaders()} + + agent.inputGo("local.fake", "sum-1", input) + + if !instance.initialized { + t.Fatal("expected instance to be marked initialized") + } + + inputValues := inputInitLoggerValues("local.fake", "sum-1") + if !reflect.DeepEqual([]interface{}{"input", "local.fake", "plugin", "fake", "checksum", "sum-1"}, inputValues) { + t.Fatalf("unexpected input logger values: %#v", inputValues) + } + + instanceValues := instanceInitLoggerValues(0, map[string]string{"target": "demo"}) + if !reflect.DeepEqual([]interface{}{"instance_index", 0, "instance_target", "demo"}, instanceValues) { + t.Fatalf("unexpected instance logger values: %#v", instanceValues) + } +} +``` + +Run: + +```bash +go test ./agent -run TestMetricsAgentInputGo -count=1 +``` + +Expected: PASS. + +- [ ] **Step 6: Run the targeted package tests together** + +Run: + +```bash +go test ./inputs ./agent -count=1 +``` + +Expected: PASS. + +- [ ] **Step 7: Commit the metrics agent logger propagation** + +Run: + +```bash +git add agent/metrics_agent.go agent/metrics_agent_test.go +git commit -m "feat: pass klog through input initialization" +``` + +Expected: A commit containing the agent logger propagation and tests. + +### Task 3: Verify End-To-End Behavior And Clean Up + +**Files:** +- Modify: `agent/metrics_agent.go` +- Modify: `inputs/inputs.go` +- Modify: `agent/metrics_agent_test.go` +- Modify: `inputs/inputs_test.go` + +- [ ] **Step 1: Replace touched initialization log statements with structured `klog` logging** + +Keep this change scoped to the touched initialization path. For example: + +```go +inputLogger.Error(err, "failed to init input") +inputLogger.V(1).Info("no instances for input") +inputLogger.Info("input started") +``` + +Do not rewrite unrelated files or unrelated agent startup logs in this task. + +- [ ] **Step 2: Run focused regression tests** + +Run: + +```bash +go test ./inputs ./agent -run 'TestMayInit|TestMetricsAgentInputGo' -count=1 +``` + +Expected: PASS. + +- [ ] **Step 3: Run broader verification for touched packages** + +Run: + +```bash +go test ./inputs ./agent -count=1 +``` + +Expected: PASS. + +- [ ] **Step 4: Inspect the diff for scope control** + +Run: + +```bash +git diff -- inputs/inputs.go inputs/inputs_test.go agent/metrics_agent.go agent/metrics_agent_test.go +``` + +Expected: The diff is limited to logger-aware init interfaces, structured init logging, and tests. + +- [ ] **Step 5: Commit the final cleanup if needed** + +Run: + +```bash +git add inputs/inputs.go inputs/inputs_test.go agent/metrics_agent.go agent/metrics_agent_test.go +git commit -m "refactor: normalize input init logging" +``` + +Expected: Either no-op if previous commits already captured the exact diff, or a small final cleanup commit. diff --git a/docs/superpowers/plans/2026-04-14-global-klog-standardization.md b/docs/superpowers/plans/2026-04-14-global-klog-standardization.md new file mode 100644 index 000000000..ebfa8f86f --- /dev/null +++ b/docs/superpowers/plans/2026-04-14-global-klog-standardization.md @@ -0,0 +1,603 @@ +# Global Klog Standardization Implementation Plan + +> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** Standardize repository-owned logging on `k8s.io/klog/v2`, remove `log.Println` / `log.Printf` from repository code, tests, and docs, and replace log-only `DebugMode` branches with verbosity-based `klog` output. + +**Architecture:** Introduce one shared logging bootstrap under `pkg/logging`, wire it into process startup, then migrate call sites in batches. Use focused package tests, a repository policy test for core files, and final `rg` sweeps to ensure repository-owned runtime code, tests, and docs no longer preserve the legacy logging path. + +**Tech Stack:** Go 1.25, `k8s.io/klog/v2`, `flag`, `gopkg.in/natefinch/lumberjack.v2`, existing `agent`, `writer`, `inputs`, `api`, `heartbeat`, `ibex`, `pkg`, `parser`, `config` packages, `go test`, `rg` + +--- + +## File Map + +- `pkg/logging/logging.go`: shared `klog` flag registration, output routing, verbosity mapping, stdlib bridge, and flush helper +- `pkg/logging/logging_test.go`: logging bootstrap tests +- `pkg/logging/repository_policy_test.go`: core repository guardrails for runtime logging patterns +- `main.go`, `main_posix.go`, `main_windows.go`: process bootstrap, service command logs, flush on exit +- `agent/*.go`, `writer/*.go`, `heartbeat/*.go`, `config/*.go`: first migration wave for core runtime paths and `DebugMode` cleanup +- `inputs/**/*.go`, `pkg/**/*.go`, `api/**/*.go`, `parser/**/*.go`, `ibex/**/*.go`: second migration wave for repository-owned packages +- `agent/metrics_agent_test.go`, `inputs/inputs_test.go`, any other touched `*_test.go`: tests aligned with the new logger behavior +- `docs/superpowers/plans/*.md`, `docs/superpowers/specs/*.md`, other repo docs that demonstrate old logging: documentation cleanup + +### Task 1: Add Shared Logging Bootstrap And First Policy Tests + +**Files:** +- Create: `pkg/logging/logging.go` +- Create: `pkg/logging/logging_test.go` +- Create: `pkg/logging/repository_policy_test.go` + +- [ ] **Step 1: Write the failing logging bootstrap test** + +Create `pkg/logging/logging_test.go`: + +```go +package logging + +import ( + "bytes" + "flag" + stdlog "log" + "strings" + "testing" + + "k8s.io/klog/v2" +) + +func TestConfigureMapsDebugToVerbosity(t *testing.T) { + state := klog.CaptureState() + defer state.Restore() + + fs := flag.NewFlagSet("logging-test", flag.ContinueOnError) + RegisterFlags(fs) + if err := fs.Parse(nil); err != nil { + t.Fatalf("Parse() error = %v", err) + } + + var buf bytes.Buffer + if err := configureWithWriter(&buf, fs, true, 0); err != nil { + t.Fatalf("configureWithWriter() error = %v", err) + } + + klog.V(1).InfoS("debug enabled") + klog.Flush() + + if !strings.Contains(buf.String(), "debug enabled") { + t.Fatalf("expected verbosity output, got %q", buf.String()) + } +} + +func TestConfigureBridgesStandardLibraryLog(t *testing.T) { + state := klog.CaptureState() + defer state.Restore() + + fs := flag.NewFlagSet("logging-bridge-test", flag.ContinueOnError) + RegisterFlags(fs) + if err := fs.Parse(nil); err != nil { + t.Fatalf("Parse() error = %v", err) + } + + var buf bytes.Buffer + if err := configureWithWriter(&buf, fs, false, 0); err != nil { + t.Fatalf("configureWithWriter() error = %v", err) + } + + stdlog.Println("legacy bridge message") + klog.Flush() + + if !strings.Contains(buf.String(), "legacy bridge message") { + t.Fatalf("expected bridged log output, got %q", buf.String()) + } +} +``` + +- [ ] **Step 2: Write the failing core policy test** + +Create `pkg/logging/repository_policy_test.go`: + +```go +package logging + +import ( + "os" + "path/filepath" + "regexp" + "testing" +) + +var forbiddenStdLog = regexp.MustCompile(`\blog\.(Println|Printf|Fatal|Fatalf|Fatalln)\b`) +var forbiddenDebugBranch = regexp.MustCompile(`if\s+config\.Config\.DebugMode\s*\{`) + +func TestCoreRuntimeDoesNotUseStandardLogOrDebugBranches(t *testing.T) { + files := []string{ + filepath.Join("..", "..", "main.go"), + filepath.Join("..", "..", "main_posix.go"), + filepath.Join("..", "..", "main_windows.go"), + filepath.Join("..", "..", "agent", "agent.go"), + filepath.Join("..", "..", "agent", "metrics_agent.go"), + filepath.Join("..", "..", "agent", "metrics_reader.go"), + filepath.Join("..", "..", "writer", "writer.go"), + filepath.Join("..", "..", "writer", "writers.go"), + filepath.Join("..", "..", "heartbeat", "heartbeat.go"), + } + + for _, file := range files { + content, err := os.ReadFile(file) + if err != nil { + t.Fatalf("ReadFile(%s) error = %v", file, err) + } + if forbiddenStdLog.Match(content) { + t.Fatalf("forbidden standard log call remains in %s", file) + } + if forbiddenDebugBranch.Match(content) { + t.Fatalf("forbidden DebugMode branch remains in %s", file) + } + } +} +``` + +- [ ] **Step 3: Verify RED** + +Run: + +```bash +go test ./pkg/logging -count=1 +``` + +Expected: FAIL because the package does not exist yet and the policy test references files still using stdlib logging. + +- [ ] **Step 4: Implement the shared logging bootstrap** + +Create `pkg/logging/logging.go`: + +```go +package logging + +import ( + "flag" + "io" + stdlog "log" + "os" + "strconv" + "sync" + "time" + + "gopkg.in/natefinch/lumberjack.v2" + "k8s.io/klog/v2" +) + +var flushOnce sync.Once + +func RegisterFlags(fs *flag.FlagSet) { + klog.InitFlags(fs) +} + +func Configure(output string, maxSize, maxAge, maxBackups int, localTime, compress, debug bool, debugLevel int) error { + return configureWithWriter(newWriter(output, maxSize, maxAge, maxBackups, localTime, compress), flag.CommandLine, debug, debugLevel) +} + +func configureWithWriter(writer io.Writer, fs *flag.FlagSet, debug bool, debugLevel int) error { + level := debugLevel + if debug && level == 0 { + level = 1 + } + + if err := fs.Set("logtostderr", "false"); err != nil { + return err + } + if err := fs.Set("alsologtostderr", "false"); err != nil { + return err + } + if err := fs.Set("stderrthreshold", "FATAL"); err != nil { + return err + } + if err := fs.Set("v", strconv.Itoa(level)); err != nil { + return err + } + + stdlog.SetFlags(0) + klog.SetOutput(writer) + klog.CopyStandardLogTo("INFO") + flushOnce.Do(func() { + klog.StartFlushDaemon(5 * time.Second) + }) + return nil +} + +func newWriter(output string, maxSize, maxAge, maxBackups int, localTime, compress bool) io.Writer { + switch output { + case "", "stdout": + return os.Stdout + case "stderr": + return os.Stderr + default: + return &lumberjack.Logger{ + Filename: output, + MaxSize: maxSize, + MaxAge: maxAge, + MaxBackups: maxBackups, + LocalTime: localTime, + Compress: compress, + } + } +} + +func Sync() { + klog.Flush() +} +``` + +- [ ] **Step 5: Verify GREEN for the bootstrap tests** + +Run: + +```bash +go test ./pkg/logging -run 'TestConfigure' -count=1 +``` + +Expected: PASS for the bootstrap tests, while the repository policy test still fails until runtime call sites are migrated. + +- [ ] **Step 6: Commit the bootstrap** + +Run: + +```bash +git add pkg/logging/logging.go pkg/logging/logging_test.go pkg/logging/repository_policy_test.go +git commit -m "feat: add shared klog bootstrap" +``` + +### Task 2: Wire Bootstrap Into Process Startup And Convert Core Runtime Files + +**Files:** +- Modify: `main.go` +- Modify: `main_posix.go` +- Modify: `main_windows.go` +- Modify: `agent/agent.go` +- Modify: `agent/metrics_agent.go` +- Modify: `agent/metrics_reader.go` +- Modify: `writer/writers.go` +- Modify: `writer/writer.go` +- Modify: `heartbeat/heartbeat.go` +- Test: `pkg/logging/repository_policy_test.go` +- Test: `agent/metrics_agent_test.go` + +- [ ] **Step 1: Lock the core policy test scope** + +Keep `pkg/logging/repository_policy_test.go` enforcing the core runtime files before any migration work: + +```go +files := []string{ + filepath.Join("..", "..", "main.go"), + filepath.Join("..", "..", "main_posix.go"), + filepath.Join("..", "..", "main_windows.go"), + filepath.Join("..", "..", "agent", "agent.go"), + filepath.Join("..", "..", "agent", "metrics_agent.go"), + filepath.Join("..", "..", "agent", "metrics_reader.go"), + filepath.Join("..", "..", "writer", "writer.go"), + filepath.Join("..", "..", "writer", "writers.go"), + filepath.Join("..", "..", "heartbeat", "heartbeat.go"), +} +``` + +- [ ] **Step 2: Run the core policy test to verify RED** + +Run: + +```bash +go test ./pkg/logging -run TestCoreRuntimeDoesNotUseStandardLogOrDebugBranches -count=1 +``` + +Expected: FAIL on existing `log.Println` / `DebugMode` usage. + +- [ ] **Step 3: Wire `pkg/logging` into startup and migrate core files** + +Update `main.go` to register `klog` flags and initialize logging after config load: + +```go +import ( + "flag" + "fmt" + "os" + // ... + + "flashcat.cloud/categraf/pkg/logging" + "k8s.io/klog/v2" +) + +func init() { + logging.RegisterFlags(flag.CommandLine) + // existing appPath/chdir logic +} + +func initLog() { + if err := logging.Configure( + config.Config.Log.FileName, + config.Config.Log.MaxSize, + config.Config.Log.MaxAge, + config.Config.Log.MaxBackups, + config.Config.Log.LocalTime, + config.Config.Log.Compress, + config.Config.DebugMode, + config.Config.DebugLevel, + ); err != nil { + fmt.Fprintf(os.Stderr, "failed to configure logging: %v\n", err) + os.Exit(1) + } +} +``` + +Update representative call sites: + +```go +// main.go +klog.InfoS("received signal", "signal", sig.String()) +klog.InfoS("runner env", "binarydir", runner.Cwd, "hostname", runner.Hostname, "fd_limits", runner.FdLimits(), "vm_limits", runner.VMLimits()) + +// main_posix.go +klog.ErrorS(err, "reaping children failed") +klog.InfoS("reaped child process", "pid", e.pid, "status", e.status) + +// agent/metrics_reader.go +klog.V(1).InfoS("before gather once", "input", r.inputName) +klog.V(1).InfoS("after gather once", "input", r.inputName, "duration", time.Since(start)) +klog.ErrorS(fmt.Errorf("panic: %v", rc), "gather metrics panic", "input", r.inputName, "stack", string(runtimex.Stack(3))) + +// agent/metrics_agent.go +klog.Warningf("no instances for input: %s", inputKey) +klog.InfoS("input started", "name", name) + +// writer/writers.go +klog.Errorf("write %d samples failed, please increase queue size(%d)", len(items), l) +klog.V(1).InfoS("write time series", "count", len(timeSeries), "duration_ms", time.Since(now).Milliseconds()) +``` + +- [ ] **Step 4: Verify GREEN on core packages** + +Run: + +```bash +go test ./pkg/logging ./agent ./writer ./heartbeat -count=1 +``` + +Expected: PASS, including the repository policy test for the enforced core files. + +- [ ] **Step 5: Commit the core runtime migration** + +Run: + +```bash +git add main.go main_posix.go main_windows.go agent/agent.go agent/metrics_agent.go agent/metrics_reader.go writer/writer.go writer/writers.go heartbeat/heartbeat.go pkg/logging/repository_policy_test.go +git commit -m "refactor: migrate core runtime logs to klog" +``` + +### Task 3: Remove Log-Only `DebugMode` Branches And Align Logger-Aware Tests + +**Files:** +- Modify: `inputs/http_provider.go` +- Modify: `agent/metrics_agent.go` +- Modify: `agent/metrics_reader.go` +- Modify: `writer/writers.go` +- Modify: `agent/metrics_agent_test.go` +- Modify: `inputs/inputs_test.go` + +- [ ] **Step 1: Write or extend a failing test for logger-aware initialization** + +Add or extend `agent/metrics_agent_test.go` to assert logger-aware initialization still happens after the migration: + +```go +func TestMetricsAgentInputGoUsesLoggerInitForInputAndInstances(t *testing.T) { + restore := setupMetricsAgentTestConfig() + defer restore() + + agent := &MetricsAgent{ + InputReaders: NewReaders(), + } + instance := &testMetricsInstance{ + labels: map[string]string{"target": "demo"}, + } + input := &testMetricsInput{ + instances: []inputs.Instance{instance}, + } + + agent.inputGo("provider.demo", "sum", input) + + if input.loggerInit != 1 { + t.Fatalf("expected logger-aware init once, got %d", input.loggerInit) + } + if instance.loggerInit != 1 { + t.Fatalf("expected instance logger-aware init once, got %d", instance.loggerInit) + } +} +``` + +- [ ] **Step 2: Verify RED for the affected packages** + +Run: + +```bash +go test ./agent ./inputs -count=1 +``` + +Expected: FAIL until `inputGo`, `MayInit`, and the log-gated code paths are aligned with the new logger flow. + +- [ ] **Step 3: Replace log-only `DebugMode` branches with verbosity-based logging** + +Representative edits: + +```go +// agent/metrics_reader.go +klog.V(1).InfoS("before gather once", "input", r.inputName) +r.gatherOnce() +klog.V(1).InfoS("after gather once", "input", r.inputName, "duration", time.Since(start)) + +// writer/writers.go +if config.Config.TestMode { + printTestMetrics(samples) + return +} +klog.V(1).InfoS("queued samples", "count", len(samples)) + +// inputs/http_provider.go +klog.V(2).InfoS("collector request", "plugin", ins.Name(), "url", url) +klog.V(2).InfoS("collector response", "plugin", ins.Name(), "status", resp.StatusCode) +``` + +Keep `DebugMode` only where it changes behavior beyond logging. + +- [ ] **Step 4: Align logger-aware helpers** + +Ensure `inputs.MayInit` keeps preferring `InitWithLogger`: + +```go +func MayInit(target interface{}, logger klog.Logger) error { + if in, ok := target.(interface{ InitWithLogger(klog.Logger) error }); ok { + return in.InitWithLogger(logger) + } + if in, ok := target.(interface{ Init() error }); ok { + return in.Init() + } + return nil +} +``` + +- [ ] **Step 5: Verify GREEN** + +Run: + +```bash +go test ./agent ./inputs -count=1 +``` + +Expected: PASS, with no remaining log-only `DebugMode` branches in the touched files. + +- [ ] **Step 6: Commit the debug-branch cleanup** + +Run: + +```bash +git add agent/metrics_agent.go agent/metrics_reader.go writer/writers.go inputs/http_provider.go agent/metrics_agent_test.go inputs/inputs_test.go +git commit -m "refactor: replace debug log branches with klog verbosity" +``` + +### Task 4: Migrate Remaining Repository-Owned Packages In Batches + +**Files:** +- Modify: `api/**/*.go` +- Modify: `config/**/*.go` +- Modify: `ibex/**/*.go` +- Modify: `parser/**/*.go` +- Modify: `pkg/**/*.go` +- Modify: `inputs/**/*.go` + +- [ ] **Step 1: Capture the next failing scope with a search** + +Run: + +```bash +rg -n 'log\.(Println|Printf)\(' api config ibex parser pkg inputs +``` + +Expected: non-empty output listing the next migration batch. + +- [ ] **Step 2: Migrate one batch at a time** + +Apply the same severity mapping throughout the batch: + +```go +// old +log.Println("E! failed to collect metrics:", err) +log.Printf("W! Couldn't stat target %v: %v", target, err) +log.Println("D! http_response... target:", target) + +// new +klog.ErrorS(err, "failed to collect metrics") +klog.Warningf("couldn't stat target %v: %v", target, err) +klog.V(1).InfoS("http_response target", "target", target) +``` + +Prefer `InfoS` / `ErrorS` when there are stable key/value fields to extract. + +- [ ] **Step 3: Verify each batch immediately** + +Run package tests after each edited subtree. Use the smallest package set that matches the edit: + +```bash +go test ./api/... -count=1 +go test ./config/... -count=1 +go test ./ibex/... -count=1 +go test ./parser/... -count=1 +go test ./pkg/... -count=1 +go test ./inputs/... -count=1 +``` + +Expected: PASS for each touched subtree before moving on. + +- [ ] **Step 4: Commit the package migrations** + +Use one or more commits, but keep them scoped by area: + +```bash +git add api config ibex parser pkg inputs +git commit -m "refactor: migrate repository logs to klog" +``` + +### Task 5: Clean Tests And Documentation, Then Run Final Verification + +**Files:** +- Modify: `**/*_test.go` that still demonstrates stdlib logging +- Modify: `docs/**/*.md` +- Modify: `docs/superpowers/specs/2026-04-14-global-klog-standardization-design.md` +- Modify: `docs/superpowers/plans/2026-04-13-global-klog-logging.md` + +- [ ] **Step 1: Find failing cleanup scope** + +Run: + +```bash +rg -n 'log\.(Println|Printf)\(' . --glob '**/*_test.go' --glob 'docs/**/*.md' --glob '!docs/superpowers/plans/2026-04-14-global-klog-standardization.md' +rg -n 'if config\.Config\.DebugMode \{' . --glob '**/*_test.go' --glob 'docs/**/*.md' --glob '!docs/superpowers/plans/2026-04-14-global-klog-standardization.md' +``` + +Expected: matches in tests and documentation only. + +- [ ] **Step 2: Update test and doc examples to canonical `klog` style** + +Representative replacements: + +```go +// tests/docs old +log.Println("E! failed to collect metrics:", err) +if config.Config.DebugMode { + log.Println("D! heartbeat response:", string(bs)) +} + +// tests/docs new +klog.ErrorS(err, "failed to collect metrics") +klog.V(1).InfoS("heartbeat response", "body", string(bs)) +``` + +For prose examples, remove `I!/W!/E!/D!` prefixes unless the doc is explicitly discussing the legacy format. + +- [ ] **Step 3: Run final verification** + +Run: + +```bash +go test ./pkg/logging ./agent ./writer ./heartbeat ./api/... ./config/... ./ibex/... ./parser/... ./pkg/... ./inputs/... -count=1 +rg -n 'log\.(Println|Printf)\(' . --glob '!vendor/**' --glob '!docs/superpowers/plans/2026-04-14-global-klog-standardization.md' +rg -n 'if config\.Config\.DebugMode \{' . --glob '!vendor/**' --glob '!docs/superpowers/plans/2026-04-14-global-klog-standardization.md' +``` + +Expected: +- `go test` passes for all touched packages +- `rg` returns no repository-owned runtime, test, or doc matches other than any explicitly accepted exclusions + +- [ ] **Step 4: Commit the cleanup and verification state** + +Run: + +```bash +git add docs/superpowers/plans/2026-04-13-global-klog-logging.md docs/superpowers/specs/2026-04-14-global-klog-standardization-design.md +git commit -m "docs: align tests and docs with klog logging policy" +``` diff --git a/docs/superpowers/specs/2026-04-13-inputs-klog-init-design.md b/docs/superpowers/specs/2026-04-13-inputs-klog-init-design.md new file mode 100644 index 000000000..a472442ff --- /dev/null +++ b/docs/superpowers/specs/2026-04-13-inputs-klog-init-design.md @@ -0,0 +1,106 @@ +# Inputs Klog Initialization Design + +## Goal + +Standardize `inputs` initialization logging by passing a shared `klog` logger into plugin and instance initialization, while preserving compatibility for existing plugins that still implement `Init() error`. + +## Context + +`MetricsAgent.inputGo` currently initializes inputs and instances through `inputs.MayInit`, and both the agent and plugins emit logs through mixed mechanisms. The initialization path does not carry structured logger context, which makes it hard to normalize fields like plugin name, checksum, and instance identity. + +## Approaches Considered + +### 1. Explicit logger injection in `InitWithLogger(logger)` with compatibility fallback + +Add a new logger-aware initializer interface in `inputs`, change `MayInit` to accept a logger, and let it call either `InitWithLogger(klog.Logger) error` or legacy `Init() error`. + +This keeps logger dependency explicit, allows per-plugin and per-instance child loggers, and avoids a flag day migration across all plugins. + +### 2. Two-phase injection with `SetLogger(logger)` plus existing `Init()` + +Inject the logger before `Init()` through a secondary interface. + +This reduces signature churn but weakens lifecycle guarantees because logger setup and initialization become separate calls. + +### 3. Global logger only via `klog.SetLogger(...)` + +Rely on package-global logging and let plugins fetch a logger implicitly. + +This is the smallest change, but it does not make initialization dependencies explicit and does not naturally carry per-plugin or per-instance context from the caller. + +## Decision + +Use approach 1. + +Introduce a new logger-aware initializer interface and update the initialization path to pass derived `klog` loggers with stable context fields. Keep legacy `Init() error` support so unchanged plugins continue to work. + +## Design + +### Interface changes + +In `inputs/inputs.go`: + +- Add `type KlogInitializer interface { InitWithLogger(klog.Logger) error }` +- Change `MayInit` to `func MayInit(t interface{}, logger klog.Logger) error` +- `MayInit` should prefer `KlogInitializer`; if unavailable, it should fall back to the current `Initializer` + +This preserves backwards compatibility while creating a single framework entrypoint for initialization logging. + +### Logger derivation + +In `agent/metrics_agent.go`: + +- Create a root logger for the metrics agent initialization flow +- Derive an input logger with fields such as: + - `component=inputs` + - `input=` + - `plugin=` + - `checksum=` +- Derive an instance logger from the input logger for each instance with: + - `instance_index=` + - optional identifying labels when available and cheap to compute + +Framework logs emitted during initialization should use these structured loggers instead of the current plain `log.Println` calls in the touched path. + +### Plugin migration model + +No immediate repo-wide migration is required. + +- Existing plugins that only implement `Init() error` continue to work through the compatibility branch in `MayInit` +- Plugins that need structured initialization logs can opt into `InitWithLogger(klog.Logger) error` +- The framework remains the single place where context is attached + +### Error handling + +- Preserve current initialization behavior and error semantics +- Continue special-casing `types.ErrInstancesEmpty` +- Do not silently swallow logger-aware initializer errors +- Avoid introducing expensive reflection or plugin-specific heuristics for instance identity + +## Testing + +Add focused unit tests for `inputs.MayInit`: + +- logger-aware initializer is preferred when both interfaces are present +- legacy initializer still works +- non-initializer returns `nil` +- errors propagate unchanged + +Add focused tests for `MetricsAgent.inputGo` or the smallest practical extraction around it: + +- input initialization receives a logger with plugin/input context +- instance initialization receives a derived logger +- initialization failure paths still stop startup as before + +## Files Expected To Change + +- `inputs/inputs.go` +- `inputs/inputs_test.go` or equivalent new test file +- `agent/metrics_agent.go` +- `agent/metrics_agent_test.go` or equivalent new test file + +## Non-Goals + +- Converting all plugin runtime logging to `klog` in this change +- Rewriting unrelated non-input logging paths +- Adding a new logging abstraction beyond `klog` diff --git a/docs/superpowers/specs/2026-04-14-global-klog-standardization-design.md b/docs/superpowers/specs/2026-04-14-global-klog-standardization-design.md new file mode 100644 index 000000000..4e28c9e9c --- /dev/null +++ b/docs/superpowers/specs/2026-04-14-global-klog-standardization-design.md @@ -0,0 +1,182 @@ +# Global Klog Standardization Design + +## Summary + +Standardize `/Users/ruochen/workspace/youzan/categraf` on `k8s.io/klog/v2` as the primary repository logging surface. Replace repository-owned `log.Println` / `log.Printf` call sites with leveled `klog` logging, remove `DebugMode` branches whose only purpose is local debug log emission, and update tests plus documentation so the repository no longer teaches or preserves the legacy standard-library logging path. + +## Goals + +- Eliminate repository-owned `log.Println` and `log.Printf` usage from runtime code, tests, and docs. +- Remove `if config.Config.DebugMode { ... }` branches when they only gate local debug logging. +- Route informational, warning, error, and debug output through a shared `klog` policy. +- Keep external behavior stable apart from log formatting and verbosity control. +- Leave non-logging business logic unchanged. + +## Non-Goals + +- No unrelated refactors of collector logic, agent lifecycle, or writer behavior. +- No redesign of config structure beyond what is needed to map existing debug semantics onto `klog` verbosity. +- No changes to third-party code, vendored code, or generated code unless a repository-owned generated artifact clearly needs documentation cleanup. + +## Scope + +The migration applies to the full repository surface requested by the user: + +- Runtime packages such as `agent`, `writer`, `inputs`, `heartbeat`, `config`, `parser`, `api`, `pkg`, and related repository-owned packages. +- Test files that currently demonstrate or depend on legacy logging style. +- Documentation under `docs/`, including prior implementation plans that show the old style. + +If a `DebugMode` field controls downstream library behavior rather than just local logging, preserve that behavioral control and only remove the local log-only branch. + +## Existing Context + +The repository already contains an uncommitted plan under `docs/superpowers/plans/2026-04-13-global-klog-logging.md` plus logging-related test scaffolding. Current runtime call sites still include many `log.Println(...)` usages and several `config.Config.DebugMode` branches in files such as: + +- `agent/metrics_agent.go` +- `agent/metrics_reader.go` +- `writer/writers.go` +- `inputs/http_provider.go` + +`go.mod` already carries `k8s.io/klog/v2`, so the migration can standardize on the existing dependency rather than introducing a second logger. + +## Logging Policy + +### Primary API + +Use `k8s.io/klog/v2` directly or through the repository's shared logging package if that package is already being introduced for initialization and policy enforcement. + +### Level Mapping + +- Normal lifecycle and status messages: `klog.InfoS(...)` or `klog.Infof(...)` +- Warnings: `klog.Warningf(...)` +- Errors with structured context: prefer `klog.ErrorS(err, msg, kv...)` +- Errors without a clean error object: `klog.Errorf(...)` +- Debug output previously guarded by `DebugMode`: `klog.V(1).InfoS(...)` +- Very noisy collector or request/response tracing: `klog.V(2).InfoS(...)` + +### Formatting Rule + +Do not preserve legacy `I!`, `W!`, `E!`, `D!` prefixes inside the message text unless a call site is intentionally preserving exact output for compatibility. `klog` level selection should carry the severity. + +### DebugMode Rule + +Replace log-only `DebugMode` gates with verbosity-driven logging. Example: + +```go +klog.V(1).InfoS("before gather once", "input", r.inputName) +``` + +instead of: + +```go +if config.Config.DebugMode { + log.Println("D!", r.inputName, ": before gather once") +} +``` + +Where `DebugMode` controls printing of metrics or other operational behavior beyond logging, keep that behavior and only change the local logging expression. + +## Architecture + +### Shared Initialization + +The repository should have one place that configures `klog`, bridges legacy standard-library logging if transitional support is still needed, and maps existing debug configuration onto `klog` verbosity. + +Responsibilities of the shared logging setup: + +- register `klog` flags +- choose output target consistently +- map existing debug configuration to a minimum verbosity level +- flush logs on shutdown +- optionally bridge `log` package output during migration, but not preserve `log.Println` as an accepted final repository pattern + +### Call Site Migration Strategy + +Apply migration in repository-owned packages in place rather than building a large compatibility wrapper for every old call pattern. The main work is mechanical but should still be reviewed per file for: + +- severity choice +- structured fields worth extracting +- removal of stale `I!/W!/E!/D!` string prefixes +- preservation of non-logging behavior in `DebugMode` branches + +### Policy Enforcement + +Add or extend repository policy tests to fail when forbidden patterns remain in the intended scope. The policy should at minimum detect: + +- `log.Println` / `log.Printf` in repository-owned code paths under migration +- `if config.Config.DebugMode {` branches that exist only to emit logs in the enforced scope + +## File-Level Design + +### Shared Logging Package + +If `pkg/logging` is being introduced, it should own: + +- `klog` flag registration +- configuration and output wiring +- standard-library bridging during transition +- flush lifecycle helpers +- repository policy tests + +If equivalent initialization already exists elsewhere, prefer consolidating there rather than duplicating logging setup. + +### Runtime Packages + +Representative transformations: + +- `agent/metrics_reader.go`: replace pre/post gather debug branches with `klog.V(1)` and panic logging with `klog.ErrorS` or `klog.Errorf` +- `agent/metrics_agent.go`: replace startup, init, unsupported input, and no-instance messages with `klog` and remove log-only debug branches +- `writer/writers.go`: keep `TestMode` behavior, preserve metric printing semantics, but move queue/debug logs to `klog` +- `inputs/http_provider.go`: convert request/response debug branches to verbosity-based logging +- `heartbeat/heartbeat.go` and similar packages: convert legacy prefixed messages to severity-appropriate `klog` + +### Tests + +Tests should validate two things: + +- logging configuration and verbosity behavior work as intended +- forbidden legacy patterns are absent from enforced repository scope + +Existing untracked tests such as `agent/metrics_agent_test.go` and `inputs/inputs_test.go` should be reviewed and aligned with the final logging policy rather than left half-integrated. + +### Documentation + +Update repository docs and plan files that still demonstrate `log.Println`, `log.Printf`, or `DebugMode`-guarded logging. Documentation should reflect the canonical `klog` style so future contributors do not reintroduce the old pattern. + +## Error Handling + +The migration should not silence errors. When replacing `log.Println("E! ...", err)`, preserve the original operational context and include key identifiers as structured fields where practical. If the original code logged and returned, the new code must still log and return. If the original code logged inside a recover block, the new code must preserve that recover path. + +## Testing Strategy + +1. Add or finish focused tests for shared logging configuration. +2. Add or finish repository policy tests covering the first enforced scope. +3. Run package-level `go test` for directly modified packages. +4. Run repository-wide `rg` verification for forbidden legacy patterns. +5. Expand policy scope if needed once the first migration batch is stable. + +## Risks And Mitigations + +### Risk: Mechanical replacement changes semantics + +Mitigation: Review each `DebugMode` branch to distinguish pure logging from real behavior control before deleting the branch. + +### Risk: Inconsistent severity mapping + +Mitigation: Use a simple, repo-wide mapping and normalize legacy prefix-based messages into `klog` levels. + +### Risk: Partial migration leaves repo in mixed state + +Mitigation: Use repository policy tests plus final `rg` sweeps on runtime code, tests, and docs. + +### Risk: Untracked local work overlaps with migration + +Mitigation: Avoid reverting existing untracked tests/docs; incorporate them where they match the approved direction. + +## Success Criteria + +- Repository-owned `log.Println` / `log.Printf` call sites targeted by the migration are replaced with `klog`. +- Log-only `DebugMode` branches are removed in favor of verbosity-based `klog` calls. +- Tests and docs no longer demonstrate the legacy logging style. +- Modified packages pass targeted `go test` runs. +- Final repository sweeps confirm the targeted legacy patterns are gone from code, tests, and docs. diff --git a/heartbeat/heartbeat.go b/heartbeat/heartbeat.go index 91594caaf..dc563bbef 100644 --- a/heartbeat/heartbeat.go +++ b/heartbeat/heartbeat.go @@ -6,7 +6,6 @@ import ( "encoding/json" keyset "flashcat.cloud/categraf/set/key" "io" - "log" "net" "net/http" "os" @@ -17,6 +16,7 @@ import ( "time" cpuUtil "github.com/shirou/gopsutil/v3/cpu" + "k8s.io/klog/v2" "flashcat.cloud/categraf/config" "flashcat.cloud/categraf/inputs/system" @@ -52,7 +52,7 @@ func Work() { client, err := newHTTPClient() if err != nil { - log.Println("E! failed to create heartbeat client:", err) + klog.ErrorS(err, "failed to create heartbeat client") return } @@ -84,7 +84,7 @@ func newHTTPClient() (*http.Client, error) { if strings.HasPrefix(config.Config.Heartbeat.Url, "https:") { tlsCfg, err := config.Config.Heartbeat.TLSConfig() if err != nil { - log.Println("E! failed to init tls:", err) + klog.ErrorS(err, "failed to init tls") return nil, err } @@ -110,8 +110,8 @@ func version() string { return config.Version } -func debug() bool { - return config.Config.DebugMode && strings.Contains(config.Config.InputFilters, keyset.HeartbeatAgent) +func debugEnabled() bool { + return klog.V(1).Enabled() && strings.Contains(config.Config.InputFilters, keyset.HeartbeatAgent) } func work(ps *system.SystemPS, client *http.Client) { @@ -142,33 +142,33 @@ func work(ps *system.SystemPS, client *http.Client) { } } } else { - log.Println("E! failed to collect system info:", err) + klog.ErrorS(err, "failed to collect system info") } data["unixtime"] = time.Now().UnixMilli() bs, err := json.Marshal(data) if err != nil { - log.Println("E! failed to marshal heartbeat request:", err) + klog.ErrorS(err, "failed to marshal heartbeat request") return } var buf bytes.Buffer g := gzip.NewWriter(&buf) if _, err = g.Write(bs); err != nil { - log.Println("E! failed to write gzip buffer:", err) + klog.ErrorS(err, "failed to write gzip buffer") } if err = g.Close(); err != nil { - log.Println("E! failed to close gzip buffer:", err) + klog.ErrorS(err, "failed to close gzip buffer") return } - if debug() { - log.Printf("D! heartbeat request: %s", string(bs)) + if debugEnabled() { + klog.V(1).InfoS("heartbeat request", "body", string(bs)) } req, err := http.NewRequest("POST", config.Config.Heartbeat.Url, &buf) if err != nil { - log.Println("E! failed to new heartbeat request:", err) + klog.ErrorS(err, "failed to create heartbeat request") return } @@ -189,30 +189,30 @@ func work(ps *system.SystemPS, client *http.Client) { res, err := client.Do(req) if err != nil { - log.Println("E! failed to do heartbeat:", err) + klog.ErrorS(err, "failed to do heartbeat") return } defer res.Body.Close() bs, err = io.ReadAll(res.Body) if err != nil { - log.Println("E! failed to read heartbeat response body:", err, " status code:", res.StatusCode) + klog.ErrorS(err, "failed to read heartbeat response body", "status_code", res.StatusCode) return } if res.StatusCode/100 != 2 { - log.Println("E! heartbeat status code:", res.StatusCode, " response:", string(bs)) + klog.Warningf("heartbeat status code: %d response: %s", res.StatusCode, string(bs)) return } - if debug() { - log.Println("D! heartbeat response:", string(bs), "status code:", res.StatusCode) + if debugEnabled() { + klog.V(1).InfoS("heartbeat response", "body", string(bs), "status_code", res.StatusCode) } hr := HeartbeatResponse{} err = json.Unmarshal(bs, &hr) if err != nil { - log.Println("W! failed to unmarshal heartbeat response:", err) + klog.Warningf("failed to unmarshal heartbeat response: %v", err) return } if len(hr.Data.NewVersion) != 0 && len(hr.Data.UpdateURL) != 0 && hr.Data.NewVersion != shortVersion && hr.Data.NewVersion != config.Version { @@ -222,7 +222,7 @@ func work(ps *system.SystemPS, client *http.Client) { ) exe, err := os.Executable() if err != nil { - log.Println("E! failed to get current executable:", err) + klog.ErrorS(err, "failed to get current executable") return } cmd := osExec.Command(exe, "-update", "-update_url", hr.Data.UpdateURL) @@ -230,22 +230,21 @@ func work(ps *system.SystemPS, client *http.Client) { cmd.Stderr = &stderr err, timeout := cmdx.RunTimeout(cmd, time.Second*300) if timeout { - log.Printf("E! exec %s timeout", cmd.String()) + klog.Warningf("exec %s timeout", cmd.String()) return } if err != nil { - log.Println("E! failed to update categraf:", err, "stderr:", stderr.String(), "stdout:", - out.String(), "command:", cmd.String()) + klog.ErrorS(err, "failed to update categraf", "stderr", stderr.String(), "stdout", out.String(), "command", cmd.String()) return } - log.Printf("update categraf(%s) from %s success, new version: %s", version(), hr.Data.UpdateURL, hr.Data.NewVersion) + klog.Infof("update categraf(%s) from %s success, new version: %s", version(), hr.Data.UpdateURL, hr.Data.NewVersion) } } func memUsage(ps *system.SystemPS) float64 { vm, err := ps.VMStat() if err != nil { - log.Println("E! failed to get vmstat:", err) + klog.ErrorS(err, "failed to get vmstat") return 0 } @@ -263,7 +262,7 @@ func cpuUsage(ps *system.SystemPS) float64 { // first times, err := ps.CPUTimes(false, true) if err != nil { - log.Println("E! failed to collect cpu_util:", err) + klog.ErrorS(err, "failed to collect cpu_util") return 0 } @@ -279,7 +278,7 @@ func cpuUsage(ps *system.SystemPS) float64 { // sencond times, err = ps.CPUTimes(false, true) if err != nil { - log.Println("E! failed to collect cpu_util:", err) + klog.ErrorS(err, "failed to collect cpu_util") return 0 } @@ -292,7 +291,7 @@ func cpuUsage(ps *system.SystemPS) float64 { // compute totalDelta := total - lastTotal if totalDelta < 0 { - log.Println("W! current total CPU time is less than previous total CPU time") + klog.Warningf("current total CPU time is less than previous total CPU time") return 0 } @@ -311,4 +310,4 @@ func totalCPUTime(t cpuUtil.TimesStat) float64 { func activeCPUTime(t cpuUtil.TimesStat) float64 { active := totalCPUTime(t) - t.Idle return active -} \ No newline at end of file +} diff --git a/heartbeat/memory/memory.go b/heartbeat/memory/memory.go index 2500786c2..f74bdb575 100644 --- a/heartbeat/memory/memory.go +++ b/heartbeat/memory/memory.go @@ -7,9 +7,10 @@ package memory import ( "fmt" - "log" "strconv" "strings" + + "k8s.io/klog/v2" ) type Memory struct{} @@ -54,7 +55,7 @@ func (self *Memory) Collect() (result interface{}, err error) { } tv, e := convert(newTotal, times) if e != nil { - log.Printf("W! parse memory total [%s||%s||%s] error: %s", total, t, newTotal, e) + klog.Warningf("parse memory total [%s||%s||%s] error: %v", total, t, newTotal, e) err = e } else { mem["total"] = fmt.Sprintf("%d", int64(tv)) @@ -93,7 +94,7 @@ func (self *Memory) Collect() (result interface{}, err error) { } tv, e := convert(newSwap, times) if e != nil { - log.Printf("W! parse memory swap [%s||%s||%s] error: %s", swap, s, newSwap, err) + klog.Warningf("parse memory swap [%s||%s||%s] error: %v", swap, s, newSwap, e) err = e } else { mem["swap_total"] = fmt.Sprintf("%d", int64(tv)) diff --git a/heartbeat/platform/platform_darwin.go b/heartbeat/platform/platform_darwin.go index a5cbe7154..d100dad83 100644 --- a/heartbeat/platform/platform_darwin.go +++ b/heartbeat/platform/platform_darwin.go @@ -6,10 +6,10 @@ package platform import ( - "log" "strings" "golang.org/x/sys/unix" + "k8s.io/klog/v2" ) var unameOptions = []string{"-s", "-n", "-r", "-m", "-p"} @@ -36,10 +36,10 @@ func updateArchInfo(archInfo map[string]interface{}, values []string) { archInfo["os"] = values[0] if isTranslated, err := processIsTranslated(); err == nil && isTranslated { - log.Println("Running under Rosetta translator; overriding architecture values") + klog.Info("Running under Rosetta translator; overriding architecture values") archInfo["processor"] = "arm" archInfo["machine"] = "arm64" } else if err != nil { - log.Printf("Error when detecting Rosetta translator: %s", err) + klog.Warningf("error when detecting Rosetta translator: %v", err) } } diff --git a/ibex/client/cli.go b/ibex/client/cli.go index 81fb867d5..193d64b3f 100644 --- a/ibex/client/cli.go +++ b/ibex/client/cli.go @@ -6,7 +6,6 @@ import ( "bufio" "fmt" "io" - "log" "math" "net" "net/rpc" @@ -15,6 +14,7 @@ import ( "github.com/toolkits/pkg/net/gobrpc" "github.com/ugorji/go/codec" + "k8s.io/klog/v2" "flashcat.cloud/categraf/config" "flashcat.cloud/categraf/ibex/types" @@ -43,7 +43,7 @@ func getCli() *gobrpc.RPCClient { begin := time.Now() conn, err := net.DialTimeout("tcp", addr, time.Second*5) if err != nil { - log.Printf("W! dial %s fail: %s", addr, err) + klog.Warningf("dial %s fail: %s", addr, err) continue } @@ -64,7 +64,7 @@ func getCli() *gobrpc.RPCClient { var out string err = c.Call("Server.Ping", "", &out) if err != nil { - log.Printf("W! ping %s fail: %s", addr, err) + klog.Warningf("ping %s fail: %s", addr, err) continue } use := time.Since(begin).Nanoseconds() @@ -77,11 +77,11 @@ func getCli() *gobrpc.RPCClient { } if address == "" { - log.Println("E! no job server found") + klog.ErrorS(nil, "no job server found") return nil } - log.Printf("I! choose server: %s, duration: %dms", address, duration/1000000) + klog.InfoS("choose ibex server", "address", address, "duration_ms", duration/1000000) for addr, c := range acm { if addr == address { @@ -119,13 +119,13 @@ func Meta(id int64) (script string, args string, account string, stdin string, e var resp types.TaskMetaResponse err = GetCli().Call("Server.GetTaskMeta", id, &resp) if err != nil { - log.Println("E! rpc call Server.GetTaskMeta:", err) + klog.ErrorS(err, "rpc call failed", "rpc", "Server.GetTaskMeta", "task_id", id) CloseCli() return } if resp.Message != "" { - log.Println("E! rpc call Server.GetTaskMeta:", resp.Message) + klog.ErrorS(nil, "rpc call failed", "rpc", "Server.GetTaskMeta", "task_id", id, "message", resp.Message) err = fmt.Errorf("%s", resp.Message) return } diff --git a/ibex/heartbeat.go b/ibex/heartbeat.go index 477e9b682..7219d146f 100644 --- a/ibex/heartbeat.go +++ b/ibex/heartbeat.go @@ -4,7 +4,6 @@ package ibex import ( "context" - "log" "os" "os/signal" "syscall" @@ -13,10 +12,11 @@ import ( "flashcat.cloud/categraf/config" "flashcat.cloud/categraf/ibex/client" "flashcat.cloud/categraf/ibex/types" + "k8s.io/klog/v2" ) func heartbeatCron(ctx context.Context, ib *config.IbexConfig) { - log.Println("I! ibex agent start rolling request Server.Report.") + klog.InfoS("ibex agent start rolling request", "rpc", "Server.Report") interval := time.Duration(ib.Interval) for { select { @@ -40,13 +40,13 @@ func heartbeat() { err := client.GetCli().Call("Server.Report", req, &resp) if err != nil { - log.Println("E! rpc call Server.Report fail:", err) + klog.ErrorS(err, "rpc call failed", "rpc", "Server.Report") client.CloseCli() return } if resp.Message != "" { - log.Println("E! error from server:", resp.Message) + klog.ErrorS(nil, "error from server", "rpc", "Server.Report", "message", resp.Message) return } @@ -62,7 +62,7 @@ func heartbeat() { } if len(assigned) > 0 { - log.Println("I! assigned tasks:", mapKeys(assigned)) + klog.InfoS("assigned tasks", "task_ids", mapKeys(assigned)) } Locals.Clean(assigned) @@ -85,7 +85,7 @@ func Start() { EXIT: for { sig := <-sc - log.Println("I! ibex agent received signal:", sig.String()) + klog.InfoS("ibex agent received signal", "signal", sig.String()) switch sig { case syscall.SIGQUIT, syscall.SIGTERM, syscall.SIGINT: break EXIT diff --git a/ibex/task.go b/ibex/task.go index f535b8214..0d1c68b23 100644 --- a/ibex/task.go +++ b/ibex/task.go @@ -7,7 +7,6 @@ import ( "bytes" "fmt" "io" - "log" "os/exec" "os/user" "path" @@ -18,6 +17,7 @@ import ( "github.com/toolkits/pkg/file" "github.com/toolkits/pkg/sys" + "k8s.io/klog/v2" "flashcat.cloud/categraf/config" "flashcat.cloud/categraf/ibex/client" @@ -84,7 +84,7 @@ func (t *Task) GetStdout() string { b := buf.Bytes() decoded, err := ansiToUtf8(b) if err != nil { - log.Printf("E! convert out to windows-ansi fail: %v", err) + klog.ErrorS(err, "convert stdout from windows ansi failed", "task_id", t.Id) out = string(b) } out = decoded @@ -107,7 +107,7 @@ func (t *Task) GetStderr() string { b := buf.Bytes() decoded, err := ansiToUtf8(b) if err != nil { - log.Printf("E! convert out to windows-ansi fail: %v", err) + klog.ErrorS(err, "convert stderr from windows ansi failed", "task_id", t.Id) out = string(b) } out = decoded @@ -141,15 +141,15 @@ func (t *Task) loadResult() { t.Status, err = file.ReadStringTrim(doneFlag) if err != nil { - log.Printf("E! read file %s fail %v", doneFlag, err) + klog.ErrorS(err, "read task status file failed", "task_id", t.Id, "path", doneFlag) } stdout, err := file.ReadString(stdoutFile) if err != nil { - log.Printf("E! read file %s fail %v", stdoutFile, err) + klog.ErrorS(err, "read task stdout file failed", "task_id", t.Id, "path", stdoutFile) } stderr, err := file.ReadString(stderrFile) if err != nil { - log.Printf("E! read file %s fail %v", stderrFile, err) + klog.ErrorS(err, "read task stderr file failed", "task_id", t.Id, "path", stderrFile) } t.Stdout = *bytes.NewBufferString(stdout) @@ -166,7 +166,7 @@ func (t *Task) prepare() error { IdDir := filepath.Join(config.Config.Ibex.MetaDir, fmt.Sprint(t.Id)) err := file.EnsureDir(IdDir) if err != nil { - log.Printf("E! mkdir -p %s fail: %v", IdDir, err) + klog.ErrorS(err, "ensure task dir failed", "task_id", t.Id, "path", IdDir) return err } @@ -176,21 +176,21 @@ func (t *Task) prepare() error { argsFile := filepath.Join(IdDir, "args") args, err := file.ReadStringTrim(argsFile) if err != nil { - log.Printf("E! read %s fail %v", argsFile, err) + klog.ErrorS(err, "read task args file failed", "task_id", t.Id, "path", argsFile) return err } accountFile := filepath.Join(IdDir, "account") account, err := file.ReadStringTrim(accountFile) if err != nil { - log.Printf("E! read %s fail %v", accountFile, err) + klog.ErrorS(err, "read task account file failed", "task_id", t.Id, "path", accountFile) return err } stdinFile := path.Join(IdDir, "stdin") stdin, err := file.ReadStringTrim(stdinFile) if err != nil { - log.Printf("E: read %s fail %v", stdinFile, err) + klog.ErrorS(err, "read task stdin file failed", "task_id", t.Id, "path", stdinFile) return err } @@ -202,7 +202,7 @@ func (t *Task) prepare() error { // 从远端读取,再写入磁盘 script, args, account, stdin, err := client.Meta(t.Id) if err != nil { - log.Println("E! query task meta fail:", err) + klog.ErrorS(err, "query task meta failed", "task_id", t.Id) return err } @@ -212,14 +212,14 @@ func (t *Task) prepare() error { // if change to powershell , not convert script and stdin to ANSI and CRLF encodedStdin, err := utf8ToAnsi(stdin) if err != nil { - log.Printf("E! convert stdin[%s] to windows-ansi fail: %v", stdin, err) + klog.ErrorS(err, "convert stdin to windows ansi failed", "task_id", t.Id) return err } stdin = encodedStdin encodedArgs, err := utf8ToAnsi(args) if err != nil { - log.Printf("E! convert args[%s] to windows-ansi fail: %v", args, err) + klog.ErrorS(err, "convert args to windows ansi failed", "task_id", t.Id) return err } args = encodedArgs @@ -228,26 +228,26 @@ func (t *Task) prepare() error { script = strings.ReplaceAll(script, "\n", "\r\n") encodedScript, err := utf8ToAnsi(script) if err != nil { - log.Printf("E! convert script to windows-ansi fail: %v", err) + klog.ErrorS(err, "convert script to windows ansi failed", "task_id", t.Id) return err } scriptFile := filepath.Join(IdDir, "script.bat") _, err = file.WriteString(scriptFile, fmt.Sprintf("@echo off\r\n%s", encodedScript)) if err != nil { - log.Printf("E! write script to %s fail: %v", scriptFile, err) + klog.ErrorS(err, "write windows task script failed", "task_id", t.Id, "path", scriptFile) return err } default: scriptFile := filepath.Join(IdDir, "script") _, err = file.WriteString(scriptFile, script) if err != nil { - log.Printf("E! write script to %s fail: %v", scriptFile, err) + klog.ErrorS(err, "write task script failed", "task_id", t.Id, "path", scriptFile) return err } out, err := sys.CmdOutTrim("chmod", "+x", scriptFile) if err != nil { - log.Printf("E! chmod +x %s fail %v. output: %s", scriptFile, err, out) + klog.ErrorS(err, "chmod task script failed", "task_id", t.Id, "path", scriptFile, "output", out) return err } } @@ -255,27 +255,27 @@ func (t *Task) prepare() error { argsFile := filepath.Join(IdDir, "args") _, err = file.WriteString(argsFile, args) if err != nil { - log.Printf("E! write args to %s fail: %v", argsFile, err) + klog.ErrorS(err, "write task args failed", "task_id", t.Id, "path", argsFile) return err } accountFile := filepath.Join(IdDir, "account") _, err = file.WriteString(accountFile, account) if err != nil { - log.Printf("E! write account to %s fail: %v", accountFile, err) + klog.ErrorS(err, "write task account failed", "task_id", t.Id, "path", accountFile) return err } stdinFile := path.Join(IdDir, "stdin") _, err = file.WriteString(stdinFile, stdin) if err != nil { - log.Printf("E: write tags to %s fail: %v", stdinFile, err) + klog.ErrorS(err, "write task stdin failed", "task_id", t.Id, "path", stdinFile) return err } _, err = file.WriteString(writeFlag, "") if err != nil { - log.Printf("E! create %s flag file fail: %v", writeFlag, err) + klog.ErrorS(err, "create task write flag failed", "task_id", t.Id, "path", writeFlag) return err } @@ -311,7 +311,7 @@ func (t *Task) start() { scriptFile, err := filepath.Abs(filepath.Join(config.Config.Ibex.MetaDir, fmt.Sprint(t.Id), scriptFileType)) if err != nil { - log.Println("E! cannot get current absolute path:", err) + klog.ErrorS(err, "cannot get current absolute path", "task_id", t.Id) return } @@ -320,7 +320,7 @@ func (t *Task) start() { loginUser, err := user.Current() if err != nil { - log.Println("E! cannot get current login user:", err) + klog.ErrorS(err, "cannot get current login user", "task_id", t.Id) return } @@ -348,19 +348,19 @@ func (t *Task) start() { stdout, err := t.Cmd.StdoutPipe() if err != nil { - log.Printf("E! cannot read ouput of task[%d]: %v", t.Id, err) + klog.ErrorS(err, "cannot read stdout pipe", "task_id", t.Id) } stderr, err := t.Cmd.StderrPipe() if err != nil { - log.Printf("E! cannot read err of task[%d]: %v", t.Id, err) + klog.ErrorS(err, "cannot read stderr pipe", "task_id", t.Id) } err = CmdStart(cmd) if err != nil { - log.Printf("E! cannot start cmd of task[%d]: %v", t.Id, err) + klog.ErrorS(err, "cannot start task command", "task_id", t.Id) return } @@ -410,7 +410,7 @@ func runProcessRealtime(stdout io.ReadCloser, stderr io.ReadCloser, t *Task) { } if err2 != nil { if err2 != io.EOF { - log.Println("W! read stdout fail:", err2) + klog.Warningf("read stdout fail for task %d: %v", t.Id, err2) } break } @@ -428,7 +428,7 @@ func runProcessRealtime(stdout io.ReadCloser, stderr io.ReadCloser, t *Task) { } if err2 != nil { if err2 != io.EOF { - log.Println("W! read stdout fail:", err2) + klog.Warningf("read stderr fail for task %d: %v", t.Id, err2) } break } @@ -439,18 +439,18 @@ func runProcessRealtime(stdout io.ReadCloser, stderr io.ReadCloser, t *Task) { if err != nil { if strings.Contains(err.Error(), "signal: killed") { t.SetStatus("killed") - log.Printf("D! process of task[%d] killed", t.Id) + klog.V(1).InfoS("process killed", "task_id", t.Id) } else if strings.Contains(err.Error(), "signal: terminated") { // kill children process manually t.SetStatus("killed") - log.Printf("D! process of task[%d] terminated", t.Id) + klog.V(1).InfoS("process terminated", "task_id", t.Id) } else { t.SetStatus("failed") - log.Printf("D! process of task[%d] return error: %v", t.Id, err) + klog.V(1).InfoS("process returned error", "task_id", t.Id, "error", err) } } else { t.SetStatus("success") - log.Printf("D! process of task[%d] done", t.Id) + klog.V(1).InfoS("process done", "task_id", t.Id) } persistResult(t) @@ -466,15 +466,15 @@ func killProcess(t *Task) { t.SetAlive(true) defer t.SetAlive(false) - log.Printf("D! begin kill process of task[%d]", t.Id) + klog.V(1).InfoS("begin kill process", "task_id", t.Id) err := CmdKill(t.Cmd) if err != nil { t.SetStatus("killfailed") - log.Printf("D! kill process of task[%d] fail: %v", t.Id, err) + klog.V(1).InfoS("kill process failed", "task_id", t.Id, "error", err) } else { t.SetStatus("killed") - log.Printf("D! process of task[%d] killed", t.Id) + klog.V(1).InfoS("process killed", "task_id", t.Id) } persistResult(t) diff --git a/ibex/tasks.go b/ibex/tasks.go index 308e079c5..7529a41f2 100644 --- a/ibex/tasks.go +++ b/ibex/tasks.go @@ -3,9 +3,8 @@ package ibex import ( - "log" - "flashcat.cloud/categraf/ibex/types" + "k8s.io/klog/v2" ) type LocalTasksT struct { @@ -89,7 +88,7 @@ func (lt *LocalTasksT) AssignTask(at types.AssignTask) { local.SetStatus("running") local.start() } else { - log.Printf("W! unknown action: %s of task %d", at.Action, at.Id) + klog.Warningf("unknown action: %s of task %d", at.Action, at.Id) } } diff --git a/inputs/aliyun/cloud.go b/inputs/aliyun/cloud.go index 877a5afdd..be2508f1b 100644 --- a/inputs/aliyun/cloud.go +++ b/inputs/aliyun/cloud.go @@ -3,13 +3,13 @@ package aliyun import ( "context" "fmt" - "log" "regexp" "sync" "time" cms20190101 "github.com/alibabacloud-go/cms-20190101/v8/client" "github.com/alibabacloud-go/tea/tea" + "k8s.io/klog/v2" "flashcat.cloud/categraf/config" "flashcat.cloud/categraf/inputs" @@ -149,7 +149,7 @@ func (ins *Instance) Init() error { err := ins.initialize() if err != nil { - log.Println("E! initialize error:", err) + klog.ErrorS(err, "initialize aliyun client error") return err } @@ -193,7 +193,7 @@ func (ins *Instance) initialize() error { if ins.metaCache.Size() == 0 { hosts, err := ins.client.GetEcsHosts() if err != nil { - log.Println(err) + klog.ErrorS(err, "failed to get aliyun ecs hosts") return err } for _, host := range hosts { @@ -258,7 +258,7 @@ func (ins *Instance) getFilteredMetrics(slist *types.SampleList) ([]filteredMetr if ins.DebugMod { for _, m := range metrics { - log.Println("D!", m.Namespace, m.MetricName, m.Dimensions) + klog.V(1).InfoS("aliyun metric selected", "namespace", m.Namespace, "metric_name", m.MetricName, "dimensions", m.Dimensions) } } @@ -288,7 +288,7 @@ func (ins *Instance) Gather(slist *types.SampleList) { } else { filteredMetrics, err := ins.getFilteredMetrics(slist) if err != nil { - log.Println("E!", err) + klog.ErrorS(err, "failed to get filtered aliyun metrics") return } for _, filtered := range filteredMetrics { @@ -328,7 +328,7 @@ func (ins *Instance) sendMetrics(metric internalTypes.Metric, wg *sync.WaitGroup "callee": "DescribeMetricList", }).SetTime(time.Now())) if err != nil { - log.Printf("E! get metrics %s::%s error, %s", metric.Namespace, metric.MetricName, err) + klog.ErrorS(err, "get aliyun metrics error", "namespace", metric.Namespace, "metric_name", metric.MetricName) return } for _, point := range points { @@ -450,7 +450,7 @@ func (ins *Instance) fetchNamespaceMetrics(slist *types.SampleList, namespaces [ }).SetTime(time.Now())) if err != nil { - log.Printf("E! failed to list metrics with namespace %s: %v", namespace, err) + klog.ErrorS(err, "failed to list aliyun metrics", "namespace", namespace) // skip problem namespace on error and continue to next namespace return nil, err } diff --git a/inputs/aliyun/internal/manager/cms.go b/inputs/aliyun/internal/manager/cms.go index e2b714ec2..b5462711f 100644 --- a/inputs/aliyun/internal/manager/cms.go +++ b/inputs/aliyun/internal/manager/cms.go @@ -4,7 +4,6 @@ import ( "context" "encoding/json" "fmt" - "log" "strconv" "strings" "time" @@ -13,6 +12,7 @@ import ( cms2021101 "github.com/alibabacloud-go/cms-export-20211101/v2/client" openapi "github.com/alibabacloud-go/darabonba-openapi/v2/client" "github.com/alibabacloud-go/tea/tea" + "k8s.io/klog/v2" "flashcat.cloud/categraf/inputs/aliyun/internal/types" "flashcat.cloud/categraf/pkg/limiter" @@ -120,8 +120,13 @@ func (m *Manager) requestDebugLog(req *cms20190101.DescribeMetricListRequest, re token = *resp.Body.NextToken } } - log.Printf("cms.DescribeMetricList request took %s, namespace:%s, metric name:%s, page:%d, request id:%s, next token:%s", - cost, *req.Namespace, *req.MetricName, page, reqid, token) + klog.V(1).InfoS("cms.DescribeMetricList request completed", + "cost", cost.String(), + "namespace", *req.Namespace, + "metric_name", *req.MetricName, + "page", page, + "request_id", reqid, + "next_token", token) } } @@ -148,7 +153,7 @@ func (m *Manager) GetMetric(ctx context.Context, req *cms20190101.DescribeMetric resp, err = m.cms.DescribeMetricList(req) m.requestDebugLog(req, resp, count, time.Since(now)) if err != nil { - log.Println(err) + klog.ErrorS(err, "failed to describe aliyun metric list page") continue } points, err := m.dataPointConverter(*req.MetricName, *req.Namespace, *resp.Body.Datapoints) diff --git a/inputs/amd_rocm_smi/amd_rocm_smi.go b/inputs/amd_rocm_smi/amd_rocm_smi.go index 29f92e52d..de66e51e6 100644 --- a/inputs/amd_rocm_smi/amd_rocm_smi.go +++ b/inputs/amd_rocm_smi/amd_rocm_smi.go @@ -3,7 +3,6 @@ package amd_rocm_smi import ( "bytes" "encoding/json" - "log" "os" "os/exec" "strconv" @@ -14,6 +13,7 @@ import ( "flashcat.cloud/categraf/inputs" "flashcat.cloud/categraf/pkg/cmdx" "flashcat.cloud/categraf/types" + "k8s.io/klog/v2" ) const inputName = "amd_rocm_smi" @@ -52,12 +52,12 @@ func (rsmi *ROCmSMI) Name() string { func (rsmi *ROCmSMI) Gather(slist *types.SampleList) { if len(rsmi.BinPath) == 0 { if rsmi.DebugMod { - log.Printf("W! empty rocm-smi's bin_path, cannot query GPUs statistics") + klog.V(1).InfoS("empty rocm-smi bin path, skipping gpu query") } return } if _, err := os.Stat(rsmi.BinPath); os.IsNotExist(err) { - log.Printf("E! rocm-smi binary not found in path %s, cannot query GPUs statistics", rsmi.BinPath) + klog.ErrorS(err, "rocm-smi binary not found, cannot query GPUs statistics", "path", rsmi.BinPath) return } @@ -67,7 +67,7 @@ func (rsmi *ROCmSMI) Gather(slist *types.SampleList) { } err := gatherROCmSMI(data, slist) if err != nil { - log.Printf("E! Error gathering metrics from rocm-smi: %s", err) + klog.ErrorS(err, "failed gathering metrics from rocm-smi", "path", rsmi.BinPath) return } } @@ -121,13 +121,12 @@ func (rsmi *ROCmSMI) pollROCmSMI() []byte { cmd.Stderr = &stderr err, timeout := cmdx.RunTimeout(cmd, time.Duration(rsmi.Timeout)) if timeout { - log.Printf("run command: %s timeout", cmd) + klog.ErrorS(nil, "rocm-smi command timed out", "command", cmd.String(), "timeout", time.Duration(rsmi.Timeout)) return nil } if err != nil { - log.Printf("failed to run command: %s | error: %v | stdout: %s | stderr: %s", - cmd, err, stdout.String(), stderr.String()) + klog.ErrorS(err, "failed to run rocm-smi command", "command", cmd.String(), "stdout", stdout.String(), "stderr", stderr.String()) return nil } // ret, _ := internal.StdOutputTimeout(cmd, time.Duration(rsmi.Timeout)) diff --git a/inputs/appdynamics/instances.go b/inputs/appdynamics/instances.go index f2829b92e..ef4cdba2b 100644 --- a/inputs/appdynamics/instances.go +++ b/inputs/appdynamics/instances.go @@ -5,7 +5,6 @@ import ( "encoding/json" "fmt" "io" - "log" "net" "net/http" "net/url" @@ -19,6 +18,7 @@ import ( "flashcat.cloud/categraf/pkg/stringx" "flashcat.cloud/categraf/pkg/tls" "flashcat.cloud/categraf/types" + "k8s.io/klog/v2" ) type ( @@ -84,7 +84,7 @@ func (ins *Instance) prepare() error { tmpl, err := template.New("appdynamics").Parse(ins.URLBase) if err != nil { e := fmt.Errorf("failed to parse url template, error: %s", err) - log.Println(e) + klog.ErrorS(e, "failed to parse appdynamics url template", "url_base", ins.URLBase) return e } @@ -94,20 +94,20 @@ func (ins *Instance) prepare() error { err = tmpl.Execute(&buf, vars) if err != nil { e := fmt.Errorf("failed to prepare url template, error: %s", err) - log.Println(e) + klog.ErrorS(e, "failed to prepare appdynamics url template", "url_base", ins.URLBase) return e } target := buf.String() addr, err := url.Parse(target) if err != nil { e := fmt.Errorf("failed to parse http(s) url: %s, error: %v", target, err) - log.Println(e) + klog.ErrorS(e, "failed to parse appdynamics target url", "target", target) return e } if addr.Scheme != "http" && addr.Scheme != "https" { e := fmt.Errorf("only http and https are supported, url: %s", target) - log.Println(e) + klog.ErrorS(e, "unsupported appdynamics url scheme", "target", target) return e } @@ -241,13 +241,13 @@ func (ins *Instance) gather(slist *types.SampleList, link string, labels map[str link = strings.Replace(link, "$END_TIME", fmt.Sprintf("%d", e), -1) u, err := url.Parse(link) if err != nil { - log.Println("E! failed to parse url:", link, "error:", err) + klog.ErrorS(err, "failed to parse appdynamics url", "url", link) return } req, err := http.NewRequest("GET", u.String(), nil) if err != nil { - log.Println("E! failed to new request for url:", u.String(), "error:", err) + klog.ErrorS(err, "failed to create appdynamics request", "url", u.String()) return } @@ -255,7 +255,7 @@ func (ins *Instance) gather(slist *types.SampleList, link string, labels map[str gTags, err := ins.GenerateLabel(u) if err != nil { - log.Println("E! failed to generate url label value:", err) + klog.ErrorS(err, "failed to generate appdynamics url label", "url", u.String()) return } for k, v := range gTags { @@ -265,13 +265,13 @@ func (ins *Instance) gather(slist *types.SampleList, link string, labels map[str res, err := ins.client.Do(req) if err != nil { slist.PushFront(types.NewSample("", "up", 0, labels).SetTime(tm)) - log.Println("E! failed to query url:", u.String(), "error:", err) + klog.ErrorS(err, "failed to query appdynamics url", "url", u.String()) return } if res.StatusCode != http.StatusOK { slist.PushFront(types.NewSample("", "up", 0, labels).SetTime(tm)) - log.Println("E! failed to query url:", u.String(), "status code:", res.StatusCode) + klog.ErrorS(nil, "failed to query appdynamics url", "url", u.String(), "status_code", res.StatusCode) return } @@ -280,7 +280,7 @@ func (ins *Instance) gather(slist *types.SampleList, link string, labels map[str body, err := io.ReadAll(res.Body) if err != nil { slist.PushFront(types.NewSample("", "up", 0, labels).SetTime(tm)) - log.Println("E! failed to read response body, url:", u.String(), "error:", err) + klog.ErrorS(err, "failed to read appdynamics response body", "url", u.String()) return } @@ -288,7 +288,7 @@ func (ins *Instance) gather(slist *types.SampleList, link string, labels map[str metrics := []Metric{} err = json.Unmarshal(body, &metrics) if err != nil { - log.Printf("E! failed to unmarshal response body %s, url:%s, error:%s", body, u.String(), err) + klog.ErrorS(err, "failed to unmarshal appdynamics response body", "url", u.String(), "body", string(body)) } for _, metric := range metrics { name := metric.Path @@ -322,7 +322,7 @@ func (ins *Instance) gather(slist *types.SampleList, link string, labels map[str } } if len(ins.Filters) == 0 { - log.Printf("W! no filter specified, use default filter: current") + klog.Warning("no appdynamics filter specified, using default filter: current") slist.PushFront(types.NewSample(inputName, name+"_current", val.Current, labels).SetTime(tm)) } } diff --git a/inputs/arp_packet/arp_packet.go b/inputs/arp_packet/arp_packet.go index a84077c0c..145a1bb96 100644 --- a/inputs/arp_packet/arp_packet.go +++ b/inputs/arp_packet/arp_packet.go @@ -6,7 +6,6 @@ package arp_packet import ( "errors" "fmt" - "log" "net" "sync" "time" @@ -17,6 +16,7 @@ import ( "github.com/google/gopacket" "github.com/google/gopacket/layers" "github.com/google/gopacket/pcap" + "k8s.io/klog/v2" ) const inputName = "arp_packet" @@ -92,7 +92,7 @@ func (ins *Instance) Init() error { var err error ins.LocalIP, err = ins.GetInterfaceIpv4Addr(ins.Ethdevice) if err != nil { - log.Println("E!", err) + klog.ErrorS(err, "failed to get ARP packet interface IPv4 address", "device", ins.Ethdevice) return types.ErrInstancesEmpty } ins.snapshot_len = 1024 @@ -101,11 +101,11 @@ func (ins *Instance) Init() error { // Open device ins.EthHandle, err = pcap.OpenLive(ins.Ethdevice, ins.snapshot_len, ins.promiscuous, ins.timeout) if err != nil { - log.Println("E!", err) + klog.ErrorS(err, "failed to open ARP packet capture handle", "device", ins.Ethdevice) return types.ErrInstancesEmpty } go ins.arpStat() - log.Println("I! start arp stat") + klog.InfoS("start ARP packet stat", "device", ins.Ethdevice, "local_ip", ins.LocalIP) return nil } func (ins *Instance) Gather(slist *types.SampleList) { @@ -138,8 +138,7 @@ func (ins *Instance) arpStat() { sourceAddr := sip.String() dip = arp.DstProtAddress if sourceAddr == ins.LocalIP { - log.Println("I! ARPResp: SourceProtAddress:", sourceAddr, " mac:", macs) - log.Println("I! ARPResp: DstProtAddress:", dip.String(), " mac:", macd) + klog.InfoS("ARP response observed", "source_prot_address", sourceAddr, "source_mac", macs.String(), "dst_prot_address", dip.String(), "dst_mac", macd.String()) ins.mutex.Lock() ins.resARP++ ins.mutex.Unlock() @@ -154,8 +153,7 @@ func (ins *Instance) arpStat() { sourceAddr := sip.String() dip = arp.DstProtAddress if sourceAddr == ins.LocalIP { - log.Println("I! ARPReq: SourceProtAddress:", sourceAddr, " mac:", macs) - log.Println("I! ARPReq: DstProtAddress:", dip.String(), " mac:", macd) + klog.InfoS("ARP request observed", "source_prot_address", sourceAddr, "source_mac", macs.String(), "dst_prot_address", dip.String(), "dst_mac", macd.String()) ins.mutex.Lock() ins.reqARP++ ins.mutex.Unlock() diff --git a/inputs/bind/bind.go b/inputs/bind/bind.go index 3da598c11..55463b076 100644 --- a/inputs/bind/bind.go +++ b/inputs/bind/bind.go @@ -2,7 +2,6 @@ package bind import ( "fmt" - "log" "net/http" "net/url" "sync" @@ -11,6 +10,7 @@ import ( "flashcat.cloud/categraf/config" "flashcat.cloud/categraf/inputs" "flashcat.cloud/categraf/types" + "k8s.io/klog/v2" ) const ( @@ -79,7 +79,7 @@ func (b *Instance) Gather(slist *types.SampleList) { for _, u := range b.Urls { addr, err := url.Parse(u) if err != nil { - log.Printf("unable to parse address %q: %s", u, err) + klog.Warningf("unable to parse address %q: %v", u, err) continue } @@ -88,7 +88,7 @@ func (b *Instance) Gather(slist *types.SampleList) { defer wg.Done() err = b.gatherURL(addr, slist) if err != nil { - log.Printf("E! gather url:%s error:%s", addr, err) + klog.ErrorS(err, "failed to gather bind url", "url", addr.String()) } }(addr) } diff --git a/inputs/cadvisor/instances.go b/inputs/cadvisor/instances.go index a2f848dac..bb6b9c96b 100644 --- a/inputs/cadvisor/instances.go +++ b/inputs/cadvisor/instances.go @@ -4,7 +4,6 @@ import ( "encoding/json" "fmt" "io" - "log" "net/http" "net/url" "os" @@ -20,6 +19,7 @@ import ( util "flashcat.cloud/categraf/pkg/metrics" "flashcat.cloud/categraf/pkg/tls" "flashcat.cloud/categraf/types" + "k8s.io/klog/v2" ) const ( @@ -141,7 +141,7 @@ func (ins *Instance) cache() { podUrl.Path = "/pods" req, err := http.NewRequest("GET", podUrl.String(), nil) if err != nil { - log.Println("E! failed to new request for url:", podUrl.String(), "error:", err) + klog.ErrorS(err, "failed to create cadvisor pod request", "url", podUrl.String()) return } ins.setHeaders(req) @@ -152,19 +152,19 @@ func (ins *Instance) cache() { case <-timer.C: resp, err := ins.client.Do(req) if err != nil { - log.Println("E! failed to request for url:", podUrl.String(), "error:", err) + klog.ErrorS(err, "failed to request cadvisor pods", "url", podUrl.String()) continue } body, err := io.ReadAll(resp.Body) if err != nil { - log.Println("E! failed to read body for url:", podUrl.String(), "error:", err) + klog.ErrorS(err, "failed to read cadvisor pod body", "url", podUrl.String()) continue } resp.Body.Close() pods := kubernetes.PodList{} err = json.Unmarshal(body, &pods) if err != nil { - log.Println("E! unmarshal pods info", err) + klog.ErrorS(err, "failed to unmarshal cadvisor pod info", "url", podUrl.String()) continue } for _, pod := range pods.Items { @@ -182,7 +182,7 @@ func cacheKey(ns, pod string) string { } func (ins *Instance) Drop() { - log.Println("I! cadvisor instance stop") + klog.InfoS("cadvisor instance stop", "url", ins.URL) close(ins.stop) } @@ -209,7 +209,7 @@ func (ins *Instance) Gather(slist *types.SampleList) { req, err := http.NewRequest("GET", ins.u.String(), nil) if err != nil { - log.Println("E! failed to new request for url:", ins.u.String(), "error:", err) + klog.ErrorS(err, "failed to create cadvisor request", "url", ins.u.String()) return } @@ -217,20 +217,20 @@ func (ins *Instance) Gather(slist *types.SampleList) { labels, err := ins.GenerateLabel(ins.u) if err != nil { - log.Println("E! failed to generate url label value:", err) + klog.ErrorS(err, "failed to generate cadvisor url label", "url", ins.u.String()) return } res, err := ins.client.Do(req) if err != nil { slist.PushFront(types.NewSample("", "up", 0, labels)) - log.Println("E! failed to query url:", ins.u.String(), "error:", err) + klog.ErrorS(err, "failed to query cadvisor url", "url", ins.u.String()) return } if res.StatusCode != http.StatusOK { slist.PushFront(types.NewSample("", "up", 0, labels)) - log.Println("E! failed to query url:", ins.u.String(), "status code:", res.StatusCode) + klog.ErrorS(nil, "failed to query cadvisor url", "url", ins.u.String(), "status_code", res.StatusCode) return } @@ -239,7 +239,7 @@ func (ins *Instance) Gather(slist *types.SampleList) { body, err := io.ReadAll(res.Body) if err != nil { slist.PushFront(types.NewSample("", "up", 0, labels)) - log.Println("E! failed to read response body, url:", ins.u.String(), "error:", err) + klog.ErrorS(err, "failed to read cadvisor response body", "url", ins.u.String()) return } @@ -252,7 +252,7 @@ func (ins *Instance) Gather(slist *types.SampleList) { func (ins *Instance) gather(buf []byte, header http.Header, defaultLabels map[string]string, slist *types.SampleList) { metricFamilies, err := util.Parse(buf, header) if err != nil { - log.Println("E! failed to parse metrics, url:", ins.u.String(), "error:", err) + klog.ErrorS(err, "failed to parse cadvisor metrics", "url", ins.u.String()) return } @@ -335,7 +335,7 @@ func (ins *Instance) makeLabels(m *dto.Metric, defaultLabels map[string]string) } } else { if ins.DebugMod { - log.Println(cacheKey(namespace, podName), "not in cache") + klog.V(1).InfoS("cadvisor pod cache miss", "cache_key", cacheKey(namespace, podName)) } } } @@ -356,7 +356,7 @@ func (ins *Instance) setHeaders(req *http.Request) { if ins.BearerTokeFile != "" { content, err := os.ReadFile(ins.BearerTokeFile) if err != nil { - log.Println("E! failed to read bearer token file:", ins.BearerTokeFile, "error:", err) + klog.ErrorS(err, "failed to read cadvisor bearer token file", "file", ins.BearerTokeFile) return } diff --git a/inputs/chrony/chrony.go b/inputs/chrony/chrony.go index c302146fe..f989384d0 100644 --- a/inputs/chrony/chrony.go +++ b/inputs/chrony/chrony.go @@ -3,7 +3,6 @@ package chrony import ( "bytes" "fmt" - "log" "os/exec" "strconv" "strings" @@ -13,6 +12,7 @@ import ( "flashcat.cloud/categraf/inputs" "flashcat.cloud/categraf/pkg/cmdx" "flashcat.cloud/categraf/types" + "k8s.io/klog/v2" ) const inputName = "chrony" @@ -63,24 +63,24 @@ func (c *Chrony) Gather(slist *types.SampleList) { err, timeout := cmdx.RunTimeout(cmd, time.Second*5) if timeout { - log.Printf("E! run command: %s timeout", strings.Join(cmd.Args, " ")) + klog.Warningf("run command timeout: %s", strings.Join(cmd.Args, " ")) return } if err != nil { - log.Printf("E! failed to run command: %s | error: %v | stdout: %s | stderr: %s", + klog.Errorf("failed to run command: %s | error: %v | stdout: %s | stderr: %s", strings.Join(cmd.Args, " "), err, stdout.String(), stderr.String()) return } fields, tags, err := processChronycOutput(stdout.String()) if err != nil { - log.Println("E! failed to gather chrony processOutput: ", err) + klog.ErrorS(err, "failed to gather chrony process output") return } if len(fields) == 0 { - log.Println("E! Chrony input failed to collect metrics") + klog.Error("chrony input failed to collect metrics") } slist.PushSamples("chrony", fields, tags) diff --git a/inputs/clickhouse/clickhouse.go b/inputs/clickhouse/clickhouse.go index 4786d36b6..04f71eb17 100644 --- a/inputs/clickhouse/clickhouse.go +++ b/inputs/clickhouse/clickhouse.go @@ -5,7 +5,6 @@ import ( "encoding/json" "fmt" "io" - "log" "net" "net/http" "net/url" @@ -22,6 +21,7 @@ import ( "flashcat.cloud/categraf/types" "github.com/tidwall/gjson" + "k8s.io/klog/v2" ) const inputName = "clickhouse" @@ -136,14 +136,14 @@ func (ins *Instance) Gather(slist *types.SampleList) { for _, server := range ins.Servers { u, err := url.Parse(server) if err != nil { - log.Println("E! failed to parse server url, error: ", err) + klog.ErrorS(err, "failed to parse server url", "server", server) return } switch { case ins.AutoDiscovery: var conns []connect if err := ins.execQuery(u, "SELECT cluster, shard_num, host_name FROM system.clusters "+ins.clusterIncludeExcludeFilter(), &conns); err != nil { - log.Println("E! failed to exec clickhouse query:", "SELECT cluster, shard_num, host_name FROM system.clusters "+ins.clusterIncludeExcludeFilter()) + klog.ErrorS(err, "failed to exec clickhouse query", "query", "SELECT cluster, shard_num, host_name FROM system.clusters "+ins.clusterIncludeExcludeFilter()) continue } for _, c := range conns { @@ -178,20 +178,20 @@ func (ins *Instance) Gather(slist *types.SampleList) { for _, metricFunc := range metricsFuncs { if err := metricFunc(slist, &connects[i]); err != nil { - log.Println("E! failed to exec metrics Funcs error:", err) + klog.ErrorS(err, "failed to exec metrics func") } } for metric := range commonMetrics { if err := ins.commonMetrics(slist, &connects[i], metric); err != nil { - log.Println("E! failed to exec query commonMetrics error:", err) + klog.ErrorS(err, "failed to exec query common metrics", "metric", metric) } } - log.Printf("I! metrics count: %d", len(ins.Metrics)) + klog.InfoS("clickhouse metrics count", "count", len(ins.Metrics)) waitMetrics := new(sync.WaitGroup) if len(connects) == 0 { - log.Println("W! No available connections for custom metrics") + klog.Warning("no available connections for custom metrics") return } @@ -578,7 +578,7 @@ func (ins *Instance) execCustomQuery(conn *connect, waitMetrics *sync.WaitGroup, if err := json.NewDecoder(resp.Body).Decode(&response); err != nil { return err } - log.Println("content:", len(response.Data)) + klog.V(1).InfoS("clickhouse response content", "items", len(response.Data)) for _, item := range response.Data { localTags := tags for _, label := range metricConf.LabelFields { @@ -588,7 +588,7 @@ func (ins *Instance) execCustomQuery(conn *connect, waitMetrics *sync.WaitGroup, for _, column := range metricConf.MetricFields { value, err := conv.ToFloat64(gjson.Get(string(item), column).String()) if err != nil { - log.Println("E! failed to convert field:", column, "value:", value, "error:", err) + klog.ErrorS(err, "failed to convert field", "column", column, "value", value) return err } diff --git a/inputs/cloudwatch/cloudwatch.go b/inputs/cloudwatch/cloudwatch.go index 4d0bd6e96..61ea2baa6 100644 --- a/inputs/cloudwatch/cloudwatch.go +++ b/inputs/cloudwatch/cloudwatch.go @@ -5,7 +5,6 @@ import ( "context" _ "embed" "fmt" - "log" "net" "net/http" "strconv" @@ -28,6 +27,7 @@ import ( "flashcat.cloud/categraf/pkg/stringx" internalTypes "flashcat.cloud/categraf/types" internalMetric "flashcat.cloud/categraf/types/metric" + "k8s.io/klog/v2" ) //go:embed sample.conf @@ -170,7 +170,7 @@ func (cw *CloudWatch) GetInstances() []inputs.Instance { func (ins *Instance) Gather(slist *internalTypes.SampleList) { filteredMetrics, err := getFilteredMetrics(ins) if err != nil { - log.Println("E! filter metrics error,", err) + klog.ErrorS(err, "filter cloudwatch metrics error", "namespaces", ins.Namespaces) return } @@ -179,7 +179,7 @@ func (ins *Instance) Gather(slist *internalTypes.SampleList) { // Get all of the possible queries so we can send groups of 100. queries := ins.getDataQueries(filteredMetrics) if len(queries) == 0 { - log.Printf("E! data queries length is 0, namespaces:%+v", ins.Namespaces) + klog.ErrorS(nil, "cloudwatch data queries length is 0", "namespaces", ins.Namespaces) return } @@ -208,7 +208,7 @@ func (ins *Instance) Gather(slist *internalTypes.SampleList) { defer wg.Done() result, err := ins.gatherMetrics(ins.getDataInputs(inm)) if err != nil { - log.Printf("E! gather namespace:%s error:%s", n, err) + klog.ErrorS(err, "gather cloudwatch namespace error", "namespace", n) return } @@ -223,7 +223,7 @@ func (ins *Instance) Gather(slist *internalTypes.SampleList) { err = ins.aggregateMetrics(slist, results) if err != nil { - log.Println("E! aggregate metrics error,", err) + klog.ErrorS(err, "aggregate cloudwatch metrics error", "namespaces", ins.Namespaces) } } @@ -311,7 +311,7 @@ type filteredMetric struct { func getFilteredMetrics(c *Instance) ([]filteredMetric, error) { if c.metricCache != nil && c.metricCache.isValid() { if c.DebugMod { - log.Printf("D! use filtered metrics cache for namespace %+v", c.Namespaces) + klog.V(1).InfoS("use filtered cloudwatch metrics cache", "namespaces", c.Namespaces) } return c.metricCache.metrics, nil } @@ -407,7 +407,7 @@ func (ins *Instance) fetchNamespaceMetrics() []types.Metric { for { resp, err := ins.client.ListMetrics(context.Background(), params) if err != nil { - log.Printf("E! failed to list metrics with namespace %s: %v", namespace, err) + klog.ErrorS(err, "failed to list cloudwatch metrics", "namespace", namespace) // skip problem namespace on error and continue to next namespace break } @@ -514,7 +514,7 @@ func (ins *Instance) getDataQueries(filteredMetrics []filteredMetric) map[string if len(dataQueries) == 0 { if ins.DebugMod { - log.Printf("D! no metrics found to collect for namespace:%+v", ins.Namespaces) + klog.V(1).InfoS("no cloudwatch metrics found to collect", "namespaces", ins.Namespaces) } return nil } diff --git a/inputs/collector.go b/inputs/collector.go index 63c9c6f10..13f6ec069 100644 --- a/inputs/collector.go +++ b/inputs/collector.go @@ -3,12 +3,12 @@ package inputs import ( "errors" "fmt" - "log" "regexp" "strings" "github.com/prometheus/client_golang/prometheus" dto "github.com/prometheus/client_model/go" + "k8s.io/klog/v2" util "flashcat.cloud/categraf/pkg/metrics" "flashcat.cloud/categraf/types" @@ -35,19 +35,19 @@ func Collect(e prometheus.Collector, slist *types.SampleList, constLabels ...map desc := metric.Desc().String() descName, err := DescName(desc) if err != nil { - log.Printf("error getting metric name: %s", err) + klog.ErrorS(err, "failed to get metric name", "desc", desc) continue } ls, err := DescConstLabels(desc) if err != nil { - log.Println("E! failed to read labels:", desc) + klog.ErrorS(err, "failed to read metric labels", "desc", desc) continue } dtoMetric := &dto.Metric{} err = metric.Write(dtoMetric) if err != nil { - log.Println("E! failed to write metric:", desc) + klog.ErrorS(err, "failed to write metric", "desc", desc) continue } diff --git a/inputs/conntrack/conntrack.go b/inputs/conntrack/conntrack.go index 56779a4b7..165025287 100644 --- a/inputs/conntrack/conntrack.go +++ b/inputs/conntrack/conntrack.go @@ -4,7 +4,6 @@ package conntrack import ( - "log" "os" "path/filepath" "strconv" @@ -13,6 +12,7 @@ import ( "flashcat.cloud/categraf/config" "flashcat.cloud/categraf/inputs" "flashcat.cloud/categraf/types" + "k8s.io/klog/v2" ) const inputName = "conntrack" @@ -86,20 +86,20 @@ func (c *Conntrack) Gather(slist *types.SampleList) { contents, err := os.ReadFile(fName) if err != nil { - log.Println("E! failed to read file:", fName, "error:", err) + klog.ErrorS(err, "failed to read conntrack file", "path", fName) continue } v := strings.TrimSpace(string(contents)) fields[metricKey], err = strconv.ParseFloat(v, 64) if err != nil { - log.Println("E! failed to parse metric, expected number but found:", v, "error:", err) + klog.ErrorS(err, "failed to parse conntrack metric", "value", v, "path", fName) } } } if len(fields) == 0 && !c.Quiet { - log.Println("E! Conntrack input failed to collect metrics. Is the conntrack kernel module loaded?") + klog.Error("conntrack input failed to collect metrics. Is the conntrack kernel module loaded?") } slist.PushSamples("conntrack", fields) diff --git a/inputs/consul/consul.go b/inputs/consul/consul.go index 49257286d..58e33bd8c 100644 --- a/inputs/consul/consul.go +++ b/inputs/consul/consul.go @@ -1,7 +1,6 @@ package consul import ( - "log" "net/http" "regexp" "strconv" @@ -14,6 +13,7 @@ import ( "flashcat.cloud/categraf/types" "github.com/hashicorp/consul/api" + "k8s.io/klog/v2" ) const inputName = "consul" @@ -149,7 +149,7 @@ func (ins *Instance) Gather(slist *types.SampleList) { for _, fn := range fns { if err := fn(slist); err != nil { up = 0 - log.Println("E! failed to gather http target:", ins.Address, "error:", err) + klog.ErrorS(err, "failed to gather consul target", "address", ins.Address) } } slist.PushFront(types.NewSample(inputName, "up", up, tag)) diff --git a/inputs/cpu/cpu.go b/inputs/cpu/cpu.go index 0fdd9e4ae..7eecc3bcf 100644 --- a/inputs/cpu/cpu.go +++ b/inputs/cpu/cpu.go @@ -1,14 +1,13 @@ package cpu import ( - "log" - cpuUtil "github.com/shirou/gopsutil/v3/cpu" "flashcat.cloud/categraf/config" "flashcat.cloud/categraf/inputs" "flashcat.cloud/categraf/inputs/system" "flashcat.cloud/categraf/types" + "k8s.io/klog/v2" ) const inputName = "cpu" @@ -42,7 +41,7 @@ func (c *CPUStats) Name() string { func (c *CPUStats) Gather(slist *types.SampleList) { times, err := c.ps.CPUTimes(c.CollectPerCPU, true) if err != nil { - log.Println("E! failed to get cpu metrics:", err) + klog.ErrorS(err, "failed to get cpu metrics") return } @@ -70,7 +69,7 @@ func (c *CPUStats) Gather(slist *types.SampleList) { totalDelta := total - lastTotal if totalDelta < 0 { - log.Println("W! current total CPU time is less than previous total CPU time") + klog.Warning("current total CPU time is less than previous total CPU time") break } diff --git a/inputs/dcgm/exporter.go b/inputs/dcgm/exporter.go index d19ac1f45..618416d7b 100644 --- a/inputs/dcgm/exporter.go +++ b/inputs/dcgm/exporter.go @@ -5,7 +5,6 @@ package dcgm import ( "bytes" "fmt" - "log" "net/http" "runtime/debug" "strconv" @@ -18,6 +17,7 @@ import ( "flashcat.cloud/categraf/inputs/dcgm/dcgmexporter" "flashcat.cloud/categraf/parser/prometheus" "flashcat.cloud/categraf/types" + "k8s.io/klog/v2" ) const ( @@ -92,7 +92,7 @@ func (e *Exporter) Drop() { } } -func (ins *Instance) Init() error { +func (ins *Instance) Init() (err error) { if len(ins.CollectorsFile) == 0 { return types.ErrInstancesEmpty @@ -141,33 +141,34 @@ func (ins *Instance) Init() error { // during initialization and return an error. defer func() { if r := recover(); r != nil { - log.Println(string(debug.Stack())) - log.Printf("E! encountered a failure; err: %v", r) + panicErr := fmt.Errorf("panic: %v", r) + klog.ErrorS(panicErr, "encountered a dcgm initialization failure", "stack", string(debug.Stack())) + err = panicErr } }() if ins.DebugMod { // enable debug logging - log.Println("Starting dcgm-exporter") + klog.V(1).InfoS("starting dcgm-exporter") } if ins.DebugMod { - log.Printf("%+v", cfg) + klog.V(1).InfoS("dcgm exporter config", "config", fmt.Sprintf("%+v", cfg)) } if cfg.UseRemoteHE { - log.Printf("Attemping to connect to remote hostengine at ", cfg.RemoteHEInfo) + klog.InfoS("attempting to connect to remote hostengine", "remote_hostengine", cfg.RemoteHEInfo) ins.dcgmCleanup, err = dcgm.Init(dcgm.Standalone, cfg.RemoteHEInfo, "0") if err != nil { - log.Fatal(err) + return err } } else { ins.dcgmCleanup, err = dcgm.Init(dcgm.Embedded) if err != nil { - log.Fatal(err) + return err } } - log.Println("DCGM successfully initialized!") + klog.InfoS("DCGM successfully initialized") dcgm.FieldsInit() defer dcgm.FieldsTerm() @@ -176,16 +177,16 @@ func (ins *Instance) Init() error { groups, err = dcgm.GetSupportedMetricGroups(0) if err != nil { cfg.CollectDCP = false - log.Println("Not collecting DCP metrics: ", err) + klog.Warningf("not collecting DCP metrics: %v", err) } else { - log.Println("Collecting DCP Metrics") + klog.InfoS("collecting DCP metrics") cfg.MetricGroups = groups } cs, err := dcgmexporter.GetCounterSet(cfg) if err != nil { - log.Fatalln(err) + return err } // Copy labels from DCGM Counters to ExporterCounters @@ -212,7 +213,7 @@ func (ins *Instance) Init() error { for _, egt := range dcgmexporter.FieldEntityGroupTypeToMonitor { err := fieldEntityGroupTypeSystemInfo.Load(egt) if err != nil { - log.Printf("Not collecting %s metrics; %s", egt.String(), err) + klog.Warningf("not collecting %s metrics; %v", egt.String(), err) } } @@ -227,7 +228,7 @@ func (ins *Instance) Init() error { ins.pipeline = pipeline ins.plCleanup = cleanup if err != nil { - log.Fatal(err) + return err } ins.registry = dcgmexporter.NewRegistry() @@ -235,33 +236,33 @@ func (ins *Instance) Init() error { if dcgmexporter.IsDCGMExpXIDErrorsCountEnabled(cs.ExporterCounters) { item, exists := fieldEntityGroupTypeSystemInfo.Get(dcgm.FE_GPU) if !exists { - log.Fatalf("%s collector cannot be initialized", dcgmexporter.DCGMXIDErrorsCount.String()) + return fmt.Errorf("%s collector cannot be initialized", dcgmexporter.DCGMXIDErrorsCount.String()) } xidCollector, err := dcgmexporter.NewXIDCollector(cs.ExporterCounters, hostname, cfg, item) if err != nil { - log.Fatal(err) + return err } ins.registry.Register(xidCollector) - log.Printf("%s collector initialized", dcgmexporter.DCGMXIDErrorsCount.String()) + klog.InfoS("dcgm collector initialized", "collector", dcgmexporter.DCGMXIDErrorsCount.String()) } if dcgmexporter.IsDCGMExpClockEventsCountEnabled(cs.ExporterCounters) { item, exists := fieldEntityGroupTypeSystemInfo.Get(dcgm.FE_GPU) if !exists { - log.Fatalf("%s collector cannot be initialized", dcgmexporter.DCGMClockEventsCount.String()) + return fmt.Errorf("%s collector cannot be initialized", dcgmexporter.DCGMClockEventsCount.String()) } clocksThrottleReasonsCollector, err := dcgmexporter.NewClockEventsCollector( cs.ExporterCounters, hostname, cfg, item) if err != nil { - log.Fatal(err) + return err } ins.registry.Register(clocksThrottleReasonsCollector) - log.Printf("%s collector initialized", dcgmexporter.DCGMClockEventsCount.String()) + klog.InfoS("dcgm collector initialized", "collector", dcgmexporter.DCGMClockEventsCount.String()) } return nil } @@ -335,25 +336,25 @@ func (ins *Instance) Gather(slist *types.SampleList) { labels := ins.GetLabels() out, err := ins.pipeline.Run() if err != nil { - log.Println("E! dcgm exporter collects error:", err) + klog.ErrorS(err, "dcgm exporter pipeline run failed") return } buf.WriteString(out) metrics, err := ins.registry.Gather() if err != nil { - log.Println("E! dcgm exporter collects error:", err) + klog.ErrorS(err, "dcgm exporter registry gather failed") return } err = dcgmexporter.EncodeExpMetrics(buf, metrics) if err != nil { - log.Println("E! dcgm exporter collects error:", err) + klog.ErrorS(err, "dcgm exporter encode metrics failed") return } parser := prometheus.NewParser("", labels, http.Header{}, false, nil, nil) err = parser.Parse(buf.Bytes(), slist) if err != nil { - log.Println("E! dcgm exporter collects error:", err) + klog.ErrorS(err, "dcgm exporter parser parse failed") return } } diff --git a/inputs/disk/disk.go b/inputs/disk/disk.go index 3463b9571..044ac9a4b 100644 --- a/inputs/disk/disk.go +++ b/inputs/disk/disk.go @@ -1,7 +1,6 @@ package disk import ( - "log" "strings" "flashcat.cloud/categraf/config" @@ -9,6 +8,7 @@ import ( "flashcat.cloud/categraf/inputs/system" "flashcat.cloud/categraf/pkg/choice" "flashcat.cloud/categraf/types" + "k8s.io/klog/v2" ) const inputName = "disk" @@ -43,7 +43,7 @@ func (s *DiskStats) Name() string { func (s *DiskStats) Gather(slist *types.SampleList) { disks, partitions, err := s.ps.DiskUsage(s.MountPoints, s.IgnoreFS) if err != nil { - log.Println("E! failed to get disk usage:", err) + klog.ErrorS(err, "failed to get disk usage") return } diff --git a/inputs/diskio/diskio.go b/inputs/diskio/diskio.go index 24fc94b35..0edccb108 100644 --- a/inputs/diskio/diskio.go +++ b/inputs/diskio/diskio.go @@ -2,7 +2,6 @@ package diskio import ( "fmt" - "log" "time" "github.com/shirou/gopsutil/v3/disk" @@ -12,6 +11,7 @@ import ( "flashcat.cloud/categraf/inputs/system" "flashcat.cloud/categraf/pkg/filter" "flashcat.cloud/categraf/types" + "k8s.io/klog/v2" ) const inputName = "diskio" @@ -66,7 +66,7 @@ func (d *DiskIO) Gather(slist *types.SampleList) { diskio, err := d.ps.DiskIO(devices) if err != nil { - log.Println("E! failed to get disk io:", err) + klog.ErrorS(err, "failed to get disk io") return } diff --git a/inputs/dmesg/dmesg.go b/inputs/dmesg/dmesg.go index e1ac4f31a..28107d473 100644 --- a/inputs/dmesg/dmesg.go +++ b/inputs/dmesg/dmesg.go @@ -6,7 +6,6 @@ package dmesg import ( "bytes" "errors" - "log" "os" "strconv" "strings" @@ -15,6 +14,7 @@ import ( "flashcat.cloud/categraf/config" "flashcat.cloud/categraf/inputs" "flashcat.cloud/categraf/types" + "k8s.io/klog/v2" ) const inputName = "dmesg" @@ -67,14 +67,14 @@ func (ins *Instance) Init() error { f, err := os.OpenFile("/dev/kmsg", syscall.O_RDONLY|syscall.O_NONBLOCK, 0) if err != nil { - log.Println("Error opening /dev/kmsg:", err) + klog.ErrorS(err, "error opening /dev/kmsg") return err } ins.conn, err = f.SyscallConn() if err != nil { f.Close() - log.Println("Error getting raw connection:", err) + klog.ErrorS(err, "error getting raw connection") return err } @@ -160,7 +160,7 @@ func (ins *Instance) Gather(slist *types.SampleList) { } if err != nil { - log.Println("Error reading from /dev/kmsg:", err) + klog.ErrorS(err, "error reading from /dev/kmsg") slist.PushFront(types.NewSample(inputName, "up", 0, nil)) return } @@ -241,4 +241,4 @@ func parseData(data []byte) *Msg { } return &msg -} \ No newline at end of file +} diff --git a/inputs/dns_query/dns_query.go b/inputs/dns_query/dns_query.go index 6926c1c70..7c1a5a43e 100644 --- a/inputs/dns_query/dns_query.go +++ b/inputs/dns_query/dns_query.go @@ -2,7 +2,6 @@ package dns_query import ( "fmt" - "log" "net" "os" "strconv" @@ -13,6 +12,7 @@ import ( "flashcat.cloud/categraf/inputs" "flashcat.cloud/categraf/types" "github.com/miekg/dns" + "k8s.io/klog/v2" ) const inputName = "dns_query" @@ -84,7 +84,7 @@ func (ins *Instance) Init() error { config, err := dns.ClientConfigFromFile(resolvPath) if err != nil { - log.Println("E! failed to detect local dns server:", err) + klog.ErrorS(err, "failed to detect local dns server", "path", resolvPath) return types.ErrInstancesEmpty } @@ -145,7 +145,7 @@ func (ins *Instance) Gather(slist *types.SampleList) { setResult(Timeout, fields) } else if err != nil { setResult(Error, fields) - log.Println("E!", err) + klog.ErrorS(err, "dns query failed", "domain", domain, "server", server, "record_type", ins.RecordType) } slist.PushSamples("dns_query", fields, tags) diff --git a/inputs/docker/docker.go b/inputs/docker/docker.go index f280ec922..d8b8fef55 100644 --- a/inputs/docker/docker.go +++ b/inputs/docker/docker.go @@ -6,7 +6,6 @@ import ( "errors" "fmt" "io" - "log" "strings" "sync" "time" @@ -21,6 +20,7 @@ import ( "github.com/docker/docker/api/types" "github.com/docker/docker/api/types/filters" "github.com/docker/docker/api/types/swarm" + "k8s.io/klog/v2" tlsx "flashcat.cloud/categraf/pkg/tls" itypes "flashcat.cloud/categraf/types" @@ -145,7 +145,7 @@ func (ins *Instance) Gather(slist *itypes.SampleList) { c, err := ins.getNewClient() if err != nil { slist.PushSample("docker", "up", 0) - log.Println("E! failed to new docker client:", err) + klog.ErrorS(err, "failed to create docker client", "endpoint", ins.Endpoint) return } ins.client = c @@ -155,7 +155,7 @@ func (ins *Instance) Gather(slist *itypes.SampleList) { if err := ins.gatherInfo(slist); err != nil { slist.PushSample("docker", "up", 0) - log.Println("E! failed to gather docker info:", err) + klog.ErrorS(err, "failed to gather docker info", "endpoint", ins.Endpoint) return } @@ -186,11 +186,11 @@ func (ins *Instance) Gather(slist *itypes.SampleList) { containers, err := ins.client.ContainerList(ctx, opts) if err == context.DeadlineExceeded { - log.Println("E! failed to gather container list: timeout") + klog.ErrorS(err, "failed to gather docker container list: timeout", "endpoint", ins.Endpoint) return } if err != nil { - log.Println("E! failed to gather container list:", err) + klog.ErrorS(err, "failed to gather docker container list", "endpoint", ins.Endpoint) return } @@ -247,11 +247,11 @@ func (ins *Instance) gatherContainer(container types.Container, slist *itypes.Sa r, err := ins.client.ContainerStats(ctx, container.ID, false) if err == context.DeadlineExceeded { - log.Println("E! failed to get container stats: timeout") + klog.ErrorS(err, "failed to get docker container stats: timeout", "container_id", container.ID) return } if err != nil { - log.Println("E! failed to get container stats:", err) + klog.ErrorS(err, "failed to get docker container stats", "container_id", container.ID) return } @@ -263,7 +263,7 @@ func (ins *Instance) gatherContainer(container types.Container, slist *itypes.Sa if err = dec.Decode(&v); err != nil { if err != io.EOF { - log.Println("E! failed to decode output of container stats:", err) + klog.ErrorS(err, "failed to decode output of docker container stats", "container_id", container.ID) } return } @@ -277,7 +277,7 @@ func (ins *Instance) gatherContainer(container types.Container, slist *itypes.Sa err = ins.gatherContainerInspect(container, slist, tags, r.OSType, v) if err != nil { - log.Println("E! failed to gather container inspect:", err) + klog.ErrorS(err, "failed to gather docker container inspect", "container_id", container.ID, "container_name", cname) } } @@ -630,11 +630,11 @@ func (ins *Instance) gatherSwarmInfo(slist *itypes.SampleList) { services, err := ins.client.ServiceList(ctx, types.ServiceListOptions{}) if err == context.DeadlineExceeded { - log.Println("E! failed to gather swarm info: timeout") + klog.ErrorS(err, "failed to gather docker swarm info: timeout", "endpoint", ins.Endpoint) return } if err != nil { - log.Println("E! failed to gather swarm info:", err) + klog.ErrorS(err, "failed to gather docker swarm services", "endpoint", ins.Endpoint) return } @@ -644,13 +644,13 @@ func (ins *Instance) gatherSwarmInfo(slist *itypes.SampleList) { tasks, err := ins.client.TaskList(ctx, types.TaskListOptions{}) if err != nil { - log.Println("E! failed to gather swarm info:", err) + klog.ErrorS(err, "failed to gather docker swarm tasks", "endpoint", ins.Endpoint) return } nodes, err := ins.client.NodeList(ctx, types.NodeListOptions{}) if err != nil { - log.Println("E! failed to gather swarm info:", err) + klog.ErrorS(err, "failed to gather docker swarm nodes", "endpoint", ins.Endpoint) return } @@ -690,7 +690,7 @@ func (ins *Instance) gatherSwarmInfo(slist *itypes.SampleList) { fields["tasks_running"] = running[service.ID] fields["tasks_desired"] = tasksNoShutdown[service.ID] } else { - log.Println("E! Unknown replica mode") + klog.ErrorS(nil, "unknown docker service replica mode", "service_id", service.ID, "service_name", service.Spec.Name) } slist.PushSamples("docker_swarm", fields, tags) @@ -809,4 +809,4 @@ func hostnameFromID(id string) string { // } // return int64(size), nil -// } \ No newline at end of file +// } diff --git a/inputs/elasticsearch/collector/categraf_utils.go b/inputs/elasticsearch/collector/categraf_utils.go index 40235024f..32e8770bc 100644 --- a/inputs/elasticsearch/collector/categraf_utils.go +++ b/inputs/elasticsearch/collector/categraf_utils.go @@ -6,11 +6,12 @@ import ( "encoding/json" "fmt" "io" - "log" "net/http" "net/url" "path" "strings" + + "k8s.io/klog/v2" ) func GetNodeID(client *http.Client, user, password, s string) (string, error) { @@ -30,7 +31,7 @@ func GetNodeID(client *http.Client, user, password, s string) (string, error) { defer func() { err = res.Body.Close() if err != nil { - log.Println("failed to close http.Client, err: ", err) + klog.ErrorS(err, "failed to close elasticsearch response body") } }() @@ -70,7 +71,7 @@ func GetClusterName(client *http.Client, user, password, s string) (string, erro defer func() { err = res.Body.Close() if err != nil { - log.Println("failed to close response body, err: ", err) + klog.ErrorS(err, "failed to close elasticsearch response body") } }() if res.StatusCode != http.StatusOK { @@ -102,7 +103,7 @@ func GetCatMaster(client *http.Client, user, password, s string) (string, error) defer func() { err = res.Body.Close() if err != nil { - log.Println("failed to close http.Client, err: ", err) + klog.ErrorS(err, "failed to close elasticsearch response body") } }() @@ -133,7 +134,7 @@ func queryURL(client *http.Client, u *url.URL) ([]byte, error) { defer func() { err = res.Body.Close() if err != nil { - log.Println("failed to close http.Client, err: ", err) + klog.ErrorS(err, "failed to close elasticsearch response body") } }() diff --git a/inputs/elasticsearch/collector/cluster_health.go b/inputs/elasticsearch/collector/cluster_health.go index ba7e78333..de83dd9ee 100644 --- a/inputs/elasticsearch/collector/cluster_health.go +++ b/inputs/elasticsearch/collector/cluster_health.go @@ -17,12 +17,12 @@ import ( "encoding/json" "fmt" "io" - "log" "net/http" "net/url" "path" "github.com/prometheus/client_golang/prometheus" + "k8s.io/klog/v2" ) var ( @@ -233,7 +233,7 @@ func (c *ClusterHealth) fetchAndDecodeClusterHealth() (clusterHealthResponse, er defer func() { err = res.Body.Close() if err != nil { - log.Println("failed to close http.Client, err: ", err) + klog.ErrorS(err, "failed to close elasticsearch response body") } }() @@ -257,7 +257,7 @@ func (c *ClusterHealth) fetchAndDecodeClusterHealth() (clusterHealthResponse, er func (c *ClusterHealth) Collect(ch chan<- prometheus.Metric) { clusterHealthResp, err := c.fetchAndDecodeClusterHealth() if err != nil { - log.Println("failed to fetch and decode cluster health, err: ", err) + klog.ErrorS(err, "failed to fetch and decode elasticsearch cluster health") return } diff --git a/inputs/elasticsearch/collector/cluster_health_indices.go b/inputs/elasticsearch/collector/cluster_health_indices.go index fb6ae3a15..f1818cc3d 100644 --- a/inputs/elasticsearch/collector/cluster_health_indices.go +++ b/inputs/elasticsearch/collector/cluster_health_indices.go @@ -4,12 +4,12 @@ import ( "encoding/json" "fmt" "io" - "log" "net/http" "net/url" "path" "github.com/prometheus/client_golang/prometheus" + "k8s.io/klog/v2" ) // ClusterHealthIndices type defines the collector struct @@ -150,7 +150,7 @@ func (c *ClusterHealthIndices) fetchAndDecodeClusterHealthIndices() (clusterHeal defer func() { err = res.Body.Close() if err != nil { - log.Println("failed to close http.Client, err: ", err) + klog.ErrorS(err, "failed to close elasticsearch response body") } }() @@ -174,7 +174,7 @@ func (c *ClusterHealthIndices) fetchAndDecodeClusterHealthIndices() (clusterHeal func (c *ClusterHealthIndices) Collect(ch chan<- prometheus.Metric) { clusterHealthResp, err := c.fetchAndDecodeClusterHealthIndices() if err != nil { - log.Println("failed to fetch and decode cluster health, err: ", err) + klog.ErrorS(err, "failed to fetch and decode elasticsearch cluster health indices") return } diff --git a/inputs/elasticsearch/collector/cluster_settings.go b/inputs/elasticsearch/collector/cluster_settings.go index daff45ce7..7814dbddc 100644 --- a/inputs/elasticsearch/collector/cluster_settings.go +++ b/inputs/elasticsearch/collector/cluster_settings.go @@ -18,7 +18,6 @@ import ( "encoding/json" "fmt" "io" - "log" "net/http" "net/url" "strconv" @@ -26,6 +25,7 @@ import ( "github.com/imdario/mergo" "github.com/prometheus/client_golang/prometheus" + "k8s.io/klog/v2" ) func init() { @@ -222,12 +222,12 @@ func (c *ClusterSettingsCollector) Update(ctx context.Context, ch chan<- prometh // Watermark bytes or ratio metrics watermarkFlood, err := parseWatermarkValue(merged.Cluster.Routing.Allocation.Disk.Watermark.FloodStage) if err != nil { - log.Println("failed to parse flood stage watermark, err: ", err) + klog.ErrorS(err, "failed to parse flood stage watermark") } else { if strings.HasSuffix(watermarkFlood, "b") { floodStageBytes, err := getValueInBytes(watermarkFlood) if err != nil { - log.Println("failed to parse flood_stage bytes , err: ", err) + klog.ErrorS(err, "failed to parse flood stage bytes") } else { ch <- prometheus.MustNewConstMetric( clusterSettingsDesc["floodStageBytes"], @@ -238,7 +238,7 @@ func (c *ClusterSettingsCollector) Update(ctx context.Context, ch chan<- prometh } else { floodStageRatio, err := getValueAsRatio(watermarkFlood) if err != nil { - log.Println("failed to parse flood_stage ratio, err: ", err) + klog.ErrorS(err, "failed to parse flood stage ratio") } else { ch <- prometheus.MustNewConstMetric( clusterSettingsDesc["floodStageRatio"], @@ -251,12 +251,12 @@ func (c *ClusterSettingsCollector) Update(ctx context.Context, ch chan<- prometh watermarkHigh, err := parseWatermarkValue(merged.Cluster.Routing.Allocation.Disk.Watermark.High) if err != nil { - log.Println("failed to parse high watermark, err: ", err) + klog.ErrorS(err, "failed to parse high watermark") } else { if strings.HasSuffix(watermarkHigh, "b") { highBytes, err := getValueInBytes(watermarkHigh) if err != nil { - log.Println("failed to parse high bytes, err: ", err) + klog.ErrorS(err, "failed to parse high bytes") } else { ch <- prometheus.MustNewConstMetric( clusterSettingsDesc["highBytes"], @@ -267,7 +267,7 @@ func (c *ClusterSettingsCollector) Update(ctx context.Context, ch chan<- prometh } else { highRatio, err := getValueAsRatio(watermarkHigh) if err != nil { - log.Println("failed to parse high ratio, err: ", err) + klog.ErrorS(err, "failed to parse high ratio") } else { ch <- prometheus.MustNewConstMetric( clusterSettingsDesc["highRatio"], @@ -280,12 +280,12 @@ func (c *ClusterSettingsCollector) Update(ctx context.Context, ch chan<- prometh watermarkLow, err := parseWatermarkValue(merged.Cluster.Routing.Allocation.Disk.Watermark.Low) if err != nil { - log.Println("failed to parse low watermark, err: ", err) + klog.ErrorS(err, "failed to parse low watermark") } else { if strings.HasSuffix(watermarkLow, "b") { lowBytes, err := getValueInBytes(watermarkLow) if err != nil { - log.Println("failed to parse low bytes, err: ", err) + klog.ErrorS(err, "failed to parse low bytes") } else { ch <- prometheus.MustNewConstMetric( clusterSettingsDesc["lowBytes"], @@ -296,7 +296,7 @@ func (c *ClusterSettingsCollector) Update(ctx context.Context, ch chan<- prometh } else { lowRatio, err := getValueAsRatio(watermarkLow) if err != nil { - log.Println("failed to parse low ratio, err: ", err) + klog.ErrorS(err, "failed to parse low ratio") } else { ch <- prometheus.MustNewConstMetric( clusterSettingsDesc["lowRatio"], diff --git a/inputs/elasticsearch/collector/cluster_stats.go b/inputs/elasticsearch/collector/cluster_stats.go index bd7892fc6..5164bfd94 100644 --- a/inputs/elasticsearch/collector/cluster_stats.go +++ b/inputs/elasticsearch/collector/cluster_stats.go @@ -4,12 +4,12 @@ import ( "encoding/json" "fmt" "io" - "log" "net/http" "net/url" "path" "github.com/prometheus/client_golang/prometheus" + "k8s.io/klog/v2" ) type clusterStatsMetric struct { @@ -850,7 +850,7 @@ func (c *ClusterStats) fetchAndDecodeClusterStats() (ClusterStatsResponse, error defer func() { err = res.Body.Close() if err != nil { - log.Println("failed to close http.Client, err: ", err) + klog.ErrorS(err, "failed to close elasticsearch response body") } }() @@ -874,7 +874,7 @@ func (c *ClusterStats) fetchAndDecodeClusterStats() (ClusterStatsResponse, error func (c *ClusterStats) Collect(ch chan<- prometheus.Metric) { clusterStatsResp, err := c.fetchAndDecodeClusterStats() if err != nil { - log.Println("failed to fetch and decode cluster health, err: ", err) + klog.ErrorS(err, "failed to fetch and decode elasticsearch cluster stats") return } diff --git a/inputs/elasticsearch/collector/collector.go b/inputs/elasticsearch/collector/collector.go index 37f444236..b421f7b3c 100644 --- a/inputs/elasticsearch/collector/collector.go +++ b/inputs/elasticsearch/collector/collector.go @@ -18,7 +18,6 @@ import ( "context" "errors" "fmt" - "log" "net/http" "net/url" "sync" @@ -26,6 +25,7 @@ import ( "github.com/alecthomas/kingpin/v2" "github.com/prometheus/client_golang/prometheus" + "k8s.io/klog/v2" ) const ( @@ -182,13 +182,13 @@ func execute(ctx context.Context, name string, c Collector, ch chan<- prometheus if err != nil { if IsNoDataError(err) { - log.Printf("collector returned no data, name: %s, duration_seconds: %f, err: %s\n", name, duration.Seconds(), err) + klog.Warningf("elasticsearch collector returned no data, name: %s, duration_seconds: %f, err: %v", name, duration.Seconds(), err) } else { - log.Printf("collector failed, name: %s, duration_seconds: %f, err: %s\n", name, duration.Seconds(), err) + klog.ErrorS(err, "elasticsearch collector failed", "name", name, "duration_seconds", duration.Seconds()) } success = 0 } else { - log.Printf("collector succeeded, name: %s, duration_seconds: %f, err: %s\n", name, duration.Seconds(), err) + klog.V(1).InfoS("elasticsearch collector succeeded", "name", name, "duration_seconds", duration.Seconds()) success = 1 } ch <- prometheus.MustNewConstMetric(scrapeDurationDesc, prometheus.GaugeValue, duration.Seconds(), name) diff --git a/inputs/elasticsearch/collector/indices.go b/inputs/elasticsearch/collector/indices.go index 2715a0f08..a7d078924 100644 --- a/inputs/elasticsearch/collector/indices.go +++ b/inputs/elasticsearch/collector/indices.go @@ -16,7 +16,6 @@ package collector import ( "context" "encoding/json" - "log" "net/http" "net/url" "path" @@ -29,6 +28,7 @@ import ( "github.com/prometheus/client_golang/prometheus" "flashcat.cloud/categraf/inputs/elasticsearch/pkg/clusterinfo" + "k8s.io/klog/v2" ) var ( @@ -499,7 +499,7 @@ func NewIndices(client *http.Client, url *url.URL, shards bool, includeAliases b select { case ci := <-indices.clusterInfoCh: if ci != nil { - log.Println("received cluster info update, cluster: ", ci.ClusterName) + klog.V(1).InfoS("received elasticsearch cluster info update", "cluster", ci.ClusterName) indices.lastClusterInfo = ci } case <-timer.C: @@ -689,7 +689,7 @@ func (i *Indices) fetchAndDecodeIndexStats(ctx context.Context) (indexStatsRespo u := i.url.ResolveReference(&url.URL{Path: "_alias"}) resp, err := getURL(ctx, i.client, u.String()) if err != nil { - log.Println("error getting alias information, err: ", err) + klog.ErrorS(err, "error getting elasticsearch alias information") return isr, err } @@ -745,7 +745,7 @@ func (i *Indices) Collect(ch chan<- prometheus.Metric) { ctx := context.TODO() indexStatsResp, err := i.fetchAndDecodeIndexStats(ctx) if err != nil { - log.Println("failed to fetch and decode index stats, err", err) + klog.ErrorS(err, "failed to fetch and decode elasticsearch index stats") return } diff --git a/inputs/elasticsearch/collector/indices_mappings.go b/inputs/elasticsearch/collector/indices_mappings.go index da8baa4f1..73cd4ee22 100644 --- a/inputs/elasticsearch/collector/indices_mappings.go +++ b/inputs/elasticsearch/collector/indices_mappings.go @@ -17,7 +17,6 @@ import ( "encoding/json" "fmt" "io" - "log" "net/http" "net/url" "path" @@ -26,6 +25,7 @@ import ( "flashcat.cloud/categraf/pkg/filter" "github.com/prometheus/client_golang/prometheus" + "k8s.io/klog/v2" ) var defaultIndicesMappingsLabels = []string{"index"} @@ -121,13 +121,13 @@ func (im *IndicesMappings) getAndParseURL(u *url.URL) (*IndicesMappingsResponse, body, err := io.ReadAll(res.Body) if err != nil { - log.Println("failed to read response body, err: ", err) + klog.ErrorS(err, "failed to read elasticsearch response body") return nil, err } err = res.Body.Close() if err != nil { - log.Println("failed to close response body, err: ", err) + klog.ErrorS(err, "failed to close elasticsearch response body") return nil, err } @@ -155,7 +155,7 @@ func (im *IndicesMappings) fetchAndDecodeIndicesMappings() (*IndicesMappingsResp func (im *IndicesMappings) Collect(ch chan<- prometheus.Metric) { indicesMappingsResponse, err := im.fetchAndDecodeIndicesMappings() if err != nil { - log.Println("failed to fetch and decode cluster mappings stats, err: ", err) + klog.ErrorS(err, "failed to fetch and decode elasticsearch indices mappings") return } diff --git a/inputs/elasticsearch/collector/indices_settings.go b/inputs/elasticsearch/collector/indices_settings.go index 8ab8c33e1..ff72c6995 100644 --- a/inputs/elasticsearch/collector/indices_settings.go +++ b/inputs/elasticsearch/collector/indices_settings.go @@ -17,7 +17,6 @@ import ( "encoding/json" "fmt" "io" - "log" "net/http" "net/url" "path" @@ -27,6 +26,7 @@ import ( "flashcat.cloud/categraf/pkg/filter" "github.com/prometheus/client_golang/prometheus" + "k8s.io/klog/v2" ) // IndicesSettings information struct @@ -139,7 +139,7 @@ func (cs *IndicesSettings) getAndParseURL(u *url.URL, data interface{}) error { defer func() { err = res.Body.Close() if err != nil { - log.Println("failed to close http.Client, err :", err) + klog.ErrorS(err, "failed to close elasticsearch response body") } }() @@ -186,7 +186,7 @@ func (cs *IndicesSettings) Collect(ch chan<- prometheus.Metric) { asr, err := cs.fetchAndDecodeIndicesSettings() if err != nil { cs.readOnlyIndices.Set(0) - log.Println("failed to fetch and decode cluster settings stats, err :", err) + klog.ErrorS(err, "failed to fetch and decode elasticsearch indices settings") return } diff --git a/inputs/elasticsearch/collector/nodes.go b/inputs/elasticsearch/collector/nodes.go index 24bf39b92..a984d0f2b 100644 --- a/inputs/elasticsearch/collector/nodes.go +++ b/inputs/elasticsearch/collector/nodes.go @@ -17,13 +17,13 @@ import ( "encoding/json" "fmt" "io" - "log" "net/http" "net/url" "path" "strings" "github.com/prometheus/client_golang/prometheus" + "k8s.io/klog/v2" ) func getRoles(node NodeStatsNodeResponse) map[string]bool { @@ -1888,7 +1888,7 @@ func (c *Nodes) fetchAndDecodeNodeStats() (nodeStatsResponse, error) { defer func() { err = res.Body.Close() if err != nil { - log.Println("failed to close http.Client, err: ", err) + klog.ErrorS(err, "failed to close elasticsearch response body") } }() @@ -1911,7 +1911,7 @@ func (c *Nodes) fetchAndDecodeNodeStats() (nodeStatsResponse, error) { func (c *Nodes) Collect(ch chan<- prometheus.Metric) { nodeStatsResp, err := c.fetchAndDecodeNodeStats() if err != nil { - log.Println("failed to fetch and decode node stats, err: ", err) + klog.ErrorS(err, "failed to fetch and decode elasticsearch node stats") return } diff --git a/inputs/elasticsearch/collector/shards.go b/inputs/elasticsearch/collector/shards.go index d0331ee90..ca617bcdb 100644 --- a/inputs/elasticsearch/collector/shards.go +++ b/inputs/elasticsearch/collector/shards.go @@ -16,7 +16,6 @@ package collector import ( "encoding/json" "fmt" - "log" "net/http" "net/url" "path" @@ -25,6 +24,7 @@ import ( "github.com/prometheus/client_golang/prometheus" "flashcat.cloud/categraf/inputs/elasticsearch/pkg/clusterinfo" + "k8s.io/klog/v2" ) // ShardResponse has shard's node and index info @@ -139,17 +139,17 @@ func NewShards(client *http.Client, url *url.URL) *Shards { // start go routine to fetch clusterinfo updates and save them to lastClusterinfo go func() { timer := time.NewTimer(2 * time.Minute) - log.Println("starting cluster info receive loop") + klog.V(1).InfoS("starting elasticsearch cluster info receive loop") for { select { case ci := <-shards.clusterInfoCh: if ci != nil { - log.Println("received cluster info update, cluster ", ci.ClusterName) + klog.V(1).InfoS("received elasticsearch cluster info update", "cluster", ci.ClusterName) shards.lastClusterInfo = ci } case <-timer.C: close(shards.clusterInfoCh) - log.Println("exiting cluster info receive loop") + klog.V(1).InfoS("exiting elasticsearch cluster info receive loop") return } } @@ -178,7 +178,7 @@ func (s *Shards) getAndParseURL(u *url.URL) ([]ShardResponse, error) { defer func() { err = res.Body.Close() if err != nil { - log.Println("failed to close http.Client, err: ", err) + klog.ErrorS(err, "failed to close elasticsearch response body") } }() @@ -214,7 +214,7 @@ func (s *Shards) Collect(ch chan<- prometheus.Metric) { sr, err := s.fetchAndDecodeShards() if err != nil { - log.Println("failed to fetch and decode node shards stats, err: ", err) + klog.ErrorS(err, "failed to fetch and decode elasticsearch node shards stats") return } diff --git a/inputs/elasticsearch/collector/tasks.go b/inputs/elasticsearch/collector/tasks.go index 1f979646e..3707dfba7 100644 --- a/inputs/elasticsearch/collector/tasks.go +++ b/inputs/elasticsearch/collector/tasks.go @@ -18,12 +18,12 @@ import ( "encoding/json" "fmt" "io" - "log" "net/http" "net/url" "github.com/alecthomas/kingpin/v2" "github.com/prometheus/client_golang/prometheus" + "k8s.io/klog/v2" ) // filterByTask global required because collector interface doesn't expose any way to take @@ -91,7 +91,7 @@ func (t *TaskCollector) fetchTasks(_ context.Context) (tasksResponse, error) { defer func() { err = res.Body.Close() if err != nil { - log.Println("failed to close http.Client, err: ", err) + klog.ErrorS(err, "failed to close elasticsearch response body") } }() diff --git a/inputs/elasticsearch/collector/util.go b/inputs/elasticsearch/collector/util.go index 4506c1c99..03727200b 100644 --- a/inputs/elasticsearch/collector/util.go +++ b/inputs/elasticsearch/collector/util.go @@ -17,8 +17,9 @@ import ( "context" "fmt" "io" - "log" "net/http" + + "k8s.io/klog/v2" ) func getURL(ctx context.Context, hc *http.Client, u string) ([]byte, error) { @@ -35,7 +36,7 @@ func getURL(ctx context.Context, hc *http.Client, u string) ([]byte, error) { defer func() { err = resp.Body.Close() if err != nil { - log.Println("E! failed to close response body:", err) + klog.ErrorS(err, "failed to close elasticsearch response body") } }() diff --git a/inputs/elasticsearch/elasticsearch.go b/inputs/elasticsearch/elasticsearch.go index 0500cb20b..4cd8af038 100644 --- a/inputs/elasticsearch/elasticsearch.go +++ b/inputs/elasticsearch/elasticsearch.go @@ -6,7 +6,6 @@ import ( "errors" "fmt" "io" - "log" "net/http" "net/url" "os" @@ -30,6 +29,7 @@ import ( "flashcat.cloud/categraf/types" "github.com/prometheus/common/version" + "k8s.io/klog/v2" ) const inputName = "elasticsearch" @@ -161,7 +161,7 @@ func (ins *Instance) Init() error { return err } if ins.ExportIndexAliases { - log.Println("export_index_aliases is deprecated, use export_indices_aliases instead") + klog.Warning("export_index_aliases is deprecated, use export_indices_aliases instead") ins.ExportIndicesAliases = true } @@ -177,7 +177,7 @@ func (ins *Instance) Init() error { func (ins *Instance) Gather(slist *types.SampleList) { // version metric if err := inputs.Collect(NewCollector(inputName), slist); err != nil { - log.Println("E! failed to collect version metric:", err) + klog.ErrorS(err, "failed to collect elasticsearch version metric") } if ins.ClusterStats || len(ins.IndicesInclude) > 0 { var wgC sync.WaitGroup @@ -193,7 +193,7 @@ func (ins *Instance) Gather(slist *types.SampleList) { // Gather node ID if info.nodeID, err = collector.GetNodeID(ins.Client, ins.UserName, ins.Password, s); err != nil { slist.PushSample("elasticsearch", "up", 0, map[string]string{"address": s}) - log.Println("E! failed to gather node id:", err) + klog.ErrorS(err, "failed to gather elasticsearch node id", "address", s) return } @@ -201,13 +201,13 @@ func (ins *Instance) Gather(slist *types.SampleList) { // whether this node is the Master if info.masterID, err = collector.GetCatMaster(ins.Client, ins.UserName, ins.Password, s); err != nil { slist.PushSample("elasticsearch", "up", 0, map[string]string{"address": s}) - log.Println("E! failed to get cat master:", err) + klog.ErrorS(err, "failed to get elasticsearch cat master", "address", s) return } if info.clusterName, err = collector.GetClusterName(ins.Client, ins.UserName, ins.Password, s); err != nil { slist.PushSample("elasticsearch", "up", 0, map[string]string{"address": s}) - log.Println("E! failed to get cluster name:", err) + klog.ErrorS(err, "failed to get elasticsearch cluster name", "address", s) return } @@ -232,7 +232,7 @@ func (ins *Instance) Gather(slist *types.SampleList) { defer wg.Done() EsUrl, err := url.Parse(s) if err != nil { - log.Println("failed to parse es_uri, err: ", err) + klog.ErrorS(err, "failed to parse elasticsearch url", "address", s) return } if ins.UserName != "" && ins.Password != "" { @@ -249,11 +249,11 @@ func (ins *Instance) Gather(slist *types.SampleList) { collector.EnableExportClusterSettings(ins.ExportClusterSettings), ) if err != nil { - log.Println("E! failed to create Elasticsearch collector, err: ", err) + klog.ErrorS(err, "failed to create Elasticsearch collector", "address", s) return } if err := inputs.Collect(exporter, slist); err != nil { - log.Println("E! failed to collect metrics:", err) + klog.ErrorS(err, "failed to collect elasticsearch exporter metrics", "address", s) } if ins.NumMostRecentIndices != 0 { @@ -269,11 +269,11 @@ func (ins *Instance) Gather(slist *types.SampleList) { uu.RawQuery = "format=json&s=index:desc&h=index" indices_bts, err := ins.queryURL(&uu) if err != nil { - log.Println("E! failed to query all indices:", err) + klog.ErrorS(err, "failed to query all elasticsearch indices", "address", s) } var indices []IndicesInfo if err := json.Unmarshal(indices_bts, &indices); err != nil { - log.Println("E! json unmarshal to query all indices:", err) + klog.ErrorS(err, "failed to unmarshal elasticsearch indices", "address", s) } var indexList []string @@ -288,7 +288,7 @@ func (ins *Instance) Gather(slist *types.SampleList) { // Always gather node stats if err := inputs.Collect(collector.NewNodes(ins.Client, EsUrl, ins.AllNodes, ins.Node, ins.Local, ins.NodeStats), slist); err != nil { - log.Println("E! failed to collect nodes metrics:", err) + klog.ErrorS(err, "failed to collect elasticsearch nodes metrics", "address", s) } clusterInfoRetriever := clusterinfo.New(ins.Client, EsUrl, time.Duration(ins.ClusterInfoInterval)) @@ -296,47 +296,47 @@ func (ins *Instance) Gather(slist *types.SampleList) { if ins.ClusterHealth { if ins.ClusterHealthLevel == "indices" { if err := inputs.Collect(collector.NewClusterHealthIndices(ins.Client, EsUrl), slist); err != nil { - log.Println("E! failed to collect cluster health indices metrics:", err) + klog.ErrorS(err, "failed to collect elasticsearch cluster health indices metrics", "address", s) } } else { if err := inputs.Collect(collector.NewClusterHealth(ins.Client, EsUrl), slist); err != nil { - log.Println("E! failed to collect cluster health metrics:", err) + klog.ErrorS(err, "failed to collect elasticsearch cluster health metrics", "address", s) } } } if ins.ClusterStats && (ins.serverInfo[s].isMaster() || !ins.Local) { if err := inputs.Collect(collector.NewClusterStats(ins.Client, EsUrl), slist); err != nil { - log.Println("E! failed to collect cluster stats metrics:", err) + klog.ErrorS(err, "failed to collect elasticsearch cluster stats metrics", "address", s) } } if (ins.ExportIndices || ins.ExportShards) && (ins.serverInfo[s].isMaster() || !ins.Local) { sC := collector.NewShards(ins.Client, EsUrl) if err := inputs.Collect(sC, slist); err != nil { - log.Println("E! failed to collect shards metrics:", err) + klog.ErrorS(err, "failed to collect elasticsearch shards metrics", "address", s) } iC := collector.NewIndices(ins.Client, EsUrl, ins.ExportShards, ins.ExportIndicesAliases, ins.NewIndicesInclude, ins.MaxIndicesIncludeCount) if err := inputs.Collect(iC, slist); err != nil { - log.Println("E! failed to collect indices metrics:", err) + klog.ErrorS(err, "failed to collect elasticsearch indices metrics", "address", s) } if registerErr := clusterInfoRetriever.RegisterConsumer(iC); registerErr != nil { - log.Println("failed to register indices collector in cluster info") + klog.ErrorS(registerErr, "failed to register indices collector in cluster info", "address", s) } if registerErr := clusterInfoRetriever.RegisterConsumer(sC); registerErr != nil { - log.Println("failed to register shards collector in cluster info") + klog.ErrorS(registerErr, "failed to register shards collector in cluster info", "address", s) } } if ins.ExportIndicesSettings { if err := inputs.Collect(collector.NewIndicesSettings(ins.Client, EsUrl, ins.NewIndicesInclude, ins.MaxIndicesIncludeCount), slist); err != nil { - log.Println("E! failed to collect indices settings metrics:", err) + klog.ErrorS(err, "failed to collect elasticsearch indices settings metrics", "address", s) } } if ins.ExportIndicesMappings { if err := inputs.Collect(collector.NewIndicesMappings(ins.Client, EsUrl, ins.NewIndicesInclude, ins.MaxIndicesIncludeCount), slist); err != nil { - log.Println("E! failed to collect indices mappings metrics:", err) + klog.ErrorS(err, "failed to collect elasticsearch indices mappings metrics", "address", s) } } @@ -348,20 +348,20 @@ func (ins *Instance) Gather(slist *types.SampleList) { switch runErr := clusterInfoRetriever.Run(ctx); { case runErr == nil: if ins.DebugMod { - log.Println("started cluster info retriever, interval: ", ins.ClusterInfoInterval) + klog.V(1).InfoS("started cluster info retriever", "interval", ins.ClusterInfoInterval) } case errors.Is(runErr, clusterinfo.ErrInitialCallTimeout): if ins.DebugMod { - log.Println("initial cluster info call timed out") + klog.V(1).InfoS("initial cluster info call timed out") } default: - log.Println("failed to run cluster info retriever, err: ", err) + klog.ErrorS(runErr, "failed to run cluster info retriever", "address", s) return } // register cluster info retriever as prometheus collector if err := inputs.Collect(clusterInfoRetriever, slist); err != nil { - log.Println("E! failed to collect cluster info metrics:", err) + klog.ErrorS(err, "failed to collect elasticsearch cluster info metrics", "address", s) } ins.serverInfoMutex.Lock() ins.hasRunBefore = true @@ -408,7 +408,7 @@ func (ins *Instance) createHTTPClient() (*http.Client, error) { if ins.AwsRegion != "" { ins.Client.Transport, err = roundtripper.NewAWSSigningTransport(httpTransport, ins.AwsRegion, ins.AwsRoleArn) if err != nil { - log.Println("E! failed to create AWS transport, err: ", err) + klog.ErrorS(err, "failed to create AWS transport") } } @@ -441,7 +441,7 @@ func (ins *Instance) queryURL(u *url.URL) ([]byte, error) { defer func() { err := res.Body.Close() if err != nil { - log.Println("E! failed to close response body:", err) + klog.ErrorS(err, "failed to close elasticsearch response body") } }() diff --git a/inputs/elasticsearch/pkg/clusterinfo/clusterinfo.go b/inputs/elasticsearch/pkg/clusterinfo/clusterinfo.go index f6b851fa9..65f85ed1a 100644 --- a/inputs/elasticsearch/pkg/clusterinfo/clusterinfo.go +++ b/inputs/elasticsearch/pkg/clusterinfo/clusterinfo.go @@ -19,13 +19,13 @@ import ( "errors" "fmt" "io" - "log" "net/http" "net/url" "path" "time" "github.com/prometheus/client_golang/prometheus" + "k8s.io/klog/v2" ) const ( @@ -170,24 +170,24 @@ func (r *Retriever) Run(ctx context.Context) error { for { select { case <-ctx.Done(): - log.Println("context cancelled, exiting cluster info update loop, err: ", ctx.Err()) + klog.V(1).InfoS("context cancelled, exiting cluster info update loop", "error", ctx.Err()) return case <-r.sync: - log.Println("providing consumers with updated cluster info label") + klog.V(1).InfoS("providing consumers with updated cluster info label") res, err := r.fetchAndDecodeClusterInfo() if err != nil { - log.Println("failed to retrieve cluster info from ES, err: ", err) + klog.ErrorS(err, "failed to retrieve cluster info from elasticsearch") r.updateMetrics(nil) continue } r.updateMetrics(res) for name, consumerCh := range r.consumerChannels { - log.Println("sending update, consumer: ", name, "res: ", fmt.Sprintf("%+v", res)) + klog.V(1).InfoS("sending cluster info update", "consumer", name, "response", fmt.Sprintf("%+v", res)) // 使用 recover 防止向已关闭 channel 发送导致 panic func() { defer func() { if err := recover(); err != nil { - log.Printf("panic caught while sending to consumer %s: %v, removing consumer", name, err) + klog.ErrorS(fmt.Errorf("panic: %v", err), "panic caught while sending to cluster info consumer", "consumer", name) // 可选:在这里从 r.consumerChannels 中移除该消费者,避免后续继续尝试发送 // delete(r.consumerChannels, name) } @@ -197,7 +197,7 @@ func (r *Retriever) Run(ctx context.Context) error { // successfully sent default: // channel is full, skip this iteration - log.Println("consumer channel full, skipping: ", name) + klog.Warningf("cluster info consumer channel full, skipping: %s", name) } }() } @@ -211,13 +211,13 @@ func (r *Retriever) Run(ctx context.Context) error { } }(ctx) // trigger initial cluster info call - log.Println("triggering initial cluster info call") + klog.V(1).InfoS("triggering initial cluster info call") r.sync <- struct{}{} // start a ticker routine go func(ctx context.Context) { if r.interval <= 0 { - log.Println("no periodic cluster info label update requested") + klog.V(1).InfoS("no periodic cluster info label update requested") return } ticker := time.NewTicker(r.interval) @@ -225,10 +225,10 @@ func (r *Retriever) Run(ctx context.Context) error { for { select { case <-ctx.Done(): - log.Println("context cancelled, exiting cluster info trigger loop, err: ", ctx.Err()) + klog.V(1).InfoS("context cancelled, exiting cluster info trigger loop", "error", ctx.Err()) return case <-ticker.C: - log.Println("triggering periodic update") + klog.V(1).InfoS("triggering periodic cluster info update") r.sync <- struct{}{} } } @@ -238,7 +238,7 @@ func (r *Retriever) Run(ctx context.Context) error { select { case <-startupComplete: // first sync has been successful - log.Println("initial clusterinfo sync succeeded") + klog.V(1).InfoS("initial clusterinfo sync succeeded") return nil case <-time.After(initialTimeout): // initial call timed out @@ -256,14 +256,14 @@ func (r *Retriever) fetchAndDecodeClusterInfo() (*Response, error) { res, err := r.client.Get(u.String()) if err != nil { - log.Println("failed to get cluster info, err: ", err) + klog.ErrorS(err, "failed to get elasticsearch cluster info") return nil, err } defer func() { err = res.Body.Close() if err != nil { - log.Println("failed to close http.Client, err: ", err) + klog.ErrorS(err, "failed to close elasticsearch response body") } }() diff --git a/inputs/elasticsearch/pkg/roundtripper/roundtripper.go b/inputs/elasticsearch/pkg/roundtripper/roundtripper.go index 89b2bd49d..4b5dede79 100644 --- a/inputs/elasticsearch/pkg/roundtripper/roundtripper.go +++ b/inputs/elasticsearch/pkg/roundtripper/roundtripper.go @@ -19,7 +19,6 @@ import ( "crypto/sha256" "encoding/hex" "io" - "log" "net/http" "time" @@ -28,6 +27,7 @@ import ( "github.com/aws/aws-sdk-go-v2/config" "github.com/aws/aws-sdk-go-v2/credentials/stscreds" "github.com/aws/aws-sdk-go-v2/service/sts" + "k8s.io/klog/v2" ) const ( @@ -48,7 +48,7 @@ func NewAWSSigningTransport(transport http.RoundTripper, region string, roleArn } cfg, err := config.LoadDefaultConfig(context.Background(), opts...) if err != nil { - log.Println("failed to load aws default config , err: ", err) + klog.ErrorS(err, "failed to load aws default config") return nil, err } @@ -61,7 +61,7 @@ func NewAWSSigningTransport(transport http.RoundTripper, region string, roleArn // are valid before returning the transport. _, err = cfg.Credentials.Retrieve(context.Background()) if err != nil { - log.Println("failed to retrive aws credentials, err: ", err) + klog.ErrorS(err, "failed to retrieve aws credentials") return nil, err } @@ -76,20 +76,20 @@ func (a *AWSSigningTransport) RoundTrip(req *http.Request) (*http.Response, erro signer := v4.NewSigner() payloadHash, newReader, err := hashPayload(req.Body) if err != nil { - log.Println("failed to hash request body, err: ", err) + klog.ErrorS(err, "failed to hash elasticsearch aws request body") return nil, err } req.Body = newReader creds, err := a.creds.Retrieve(context.Background()) if err != nil { - log.Println("failed to retrieve aws credentials, err: ", err) + klog.ErrorS(err, "failed to retrieve aws credentials") return nil, err } err = signer.SignHTTP(context.Background(), creds, req, payloadHash, service, a.region, time.Now()) if err != nil { - log.Println("failed to sign request body, err: ", err) + klog.ErrorS(err, "failed to sign elasticsearch aws request body") return nil, err } return a.t.RoundTrip(req) diff --git a/inputs/emc_unity/emc_unity.go b/inputs/emc_unity/emc_unity.go index bff60a65e..ce337e155 100644 --- a/inputs/emc_unity/emc_unity.go +++ b/inputs/emc_unity/emc_unity.go @@ -5,7 +5,6 @@ import ( "encoding/json" "errors" "fmt" - "log" "net/http" "net/http/cookiejar" "net/url" @@ -15,6 +14,7 @@ import ( "flashcat.cloud/categraf/config" "flashcat.cloud/categraf/inputs" "flashcat.cloud/categraf/types" + "k8s.io/klog/v2" ) const ( @@ -97,22 +97,26 @@ func (i *Instance) Init() error { func (i *Instance) Gather(sList *types.SampleList) { for _, a := range i.Addresses { if err := i.collectLun(a, sList); err != nil { - log.Println("E! error collectLun:", err) + klog.ErrorS(err, "failed to collect emc unity lun metrics", "host", a.baseURL.Host) continue } if err := i.collectCpu(a, sList); err != nil { - log.Println("E! error collectCpu:", err) + klog.ErrorS(err, "failed to collect emc unity cpu metrics", "host", a.baseURL.Host) continue } if err := i.collectFibreChannel(a, sList); err != nil { - log.Println("E! error collectFibreChannel:", err) + klog.ErrorS(err, "failed to collect emc unity fibre channel metrics", "host", a.baseURL.Host) continue } } } +func logUnexpectedPath(path string) { + klog.ErrorS(nil, "unexpected emc unity metric path", "path", path) +} + type KpiResp struct { Base string `json:"@base"` Updated time.Time `json:"updated"` @@ -182,8 +186,8 @@ func (i *Instance) collectFibreChannel(a Address, sList *types.SampleList) error switch { case strings.Contains(entry.Content.Path, "rw.+.throughput"): spl := strings.Split(entry.Content.Path, ".") - if len(spl) < 2 { - log.Println("E! error parse path:", "unexpected path") + if len(spl) < 3 { + logUnexpectedPath(entry.Content.Path) continue } @@ -198,8 +202,8 @@ func (i *Instance) collectFibreChannel(a Address, sList *types.SampleList) error case strings.Contains(entry.Content.Path, "read.throughput"): spl := strings.Split(entry.Content.Path, ".") - if len(spl) < 2 { - log.Println("E! error parse path:", "unexpected path") + if len(spl) < 3 { + logUnexpectedPath(entry.Content.Path) continue } @@ -214,8 +218,8 @@ func (i *Instance) collectFibreChannel(a Address, sList *types.SampleList) error case strings.Contains(entry.Content.Path, "write.throughput"): spl := strings.Split(entry.Content.Path, ".") - if len(spl) < 2 { - log.Println("E! error parse path:", "unexpected path") + if len(spl) < 3 { + logUnexpectedPath(entry.Content.Path) continue } @@ -230,8 +234,8 @@ func (i *Instance) collectFibreChannel(a Address, sList *types.SampleList) error case strings.Contains(entry.Content.Path, "+.bandwidth"): spl := strings.Split(entry.Content.Path, ".") - if len(spl) < 2 { - log.Println("E! error parse path:", "unexpected path") + if len(spl) < 3 { + logUnexpectedPath(entry.Content.Path) continue } @@ -246,8 +250,8 @@ func (i *Instance) collectFibreChannel(a Address, sList *types.SampleList) error case strings.Contains(entry.Content.Path, "read.bandwidth"): spl := strings.Split(entry.Content.Path, ".") - if len(spl) < 2 { - log.Println("E! error parse path:", "unexpected path") + if len(spl) < 3 { + logUnexpectedPath(entry.Content.Path) continue } @@ -262,8 +266,8 @@ func (i *Instance) collectFibreChannel(a Address, sList *types.SampleList) error case strings.Contains(entry.Content.Path, "write.bandwidth"): spl := strings.Split(entry.Content.Path, ".") - if len(spl) < 2 { - log.Println("E! error parse path:", "unexpected path") + if len(spl) < 3 { + logUnexpectedPath(entry.Content.Path) continue } @@ -328,8 +332,8 @@ func (i *Instance) collectCpu(a Address, sList *types.SampleList) error { switch { case strings.Contains(entry.Content.Path, "spa.utilization"): spl := strings.Split(entry.Content.Path, ".") - if len(spl) < 2 { - log.Println("E! error parse path:", "unexpected path") + if len(spl) < 3 { + logUnexpectedPath(entry.Content.Path) continue } @@ -344,8 +348,8 @@ func (i *Instance) collectCpu(a Address, sList *types.SampleList) error { case strings.Contains(entry.Content.Path, "spb.utilization"): spl := strings.Split(entry.Content.Path, ".") - if len(spl) < 2 { - log.Println("E! error parse path:", "unexpected path") + if len(spl) < 3 { + logUnexpectedPath(entry.Content.Path) continue } @@ -416,8 +420,8 @@ func (i *Instance) collectLun(a Address, sList *types.SampleList) error { switch { case strings.HasSuffix(entry.Content.Path, "rw.+.throughput"): spl := strings.Split(entry.Content.Path, ".") - if len(spl) < 2 { - log.Println("E! error parse path:", "unexpected path") + if len(spl) < 3 { + logUnexpectedPath(entry.Content.Path) continue } @@ -432,8 +436,8 @@ func (i *Instance) collectLun(a Address, sList *types.SampleList) error { case strings.HasSuffix(entry.Content.Path, "rw.read.throughput"): spl := strings.Split(entry.Content.Path, ".") - if len(spl) < 2 { - log.Println("E! error parse path:", "unexpected path") + if len(spl) < 3 { + logUnexpectedPath(entry.Content.Path) continue } @@ -448,8 +452,8 @@ func (i *Instance) collectLun(a Address, sList *types.SampleList) error { case strings.HasSuffix(entry.Content.Path, "rw.write.throughput"): spl := strings.Split(entry.Content.Path, ".") - if len(spl) < 2 { - log.Println("E! error parse path:", "unexpected path") + if len(spl) < 3 { + logUnexpectedPath(entry.Content.Path) continue } @@ -464,8 +468,8 @@ func (i *Instance) collectLun(a Address, sList *types.SampleList) error { case strings.HasSuffix(entry.Content.Path, "rw.+.ioSize"): spl := strings.Split(entry.Content.Path, ".") - if len(spl) < 2 { - log.Println("E! error parse path:", "unexpected path") + if len(spl) < 3 { + logUnexpectedPath(entry.Content.Path) continue } @@ -480,8 +484,8 @@ func (i *Instance) collectLun(a Address, sList *types.SampleList) error { case strings.HasSuffix(entry.Content.Path, "rw.read.ioSize"): spl := strings.Split(entry.Content.Path, ".") - if len(spl) < 2 { - log.Println("E! error parse path:", "unexpected path") + if len(spl) < 3 { + logUnexpectedPath(entry.Content.Path) continue } @@ -496,8 +500,8 @@ func (i *Instance) collectLun(a Address, sList *types.SampleList) error { case strings.HasSuffix(entry.Content.Path, "rw.write.ioSize"): spl := strings.Split(entry.Content.Path, ".") - if len(spl) < 2 { - log.Println("E! error parse path:", "unexpected path") + if len(spl) < 3 { + logUnexpectedPath(entry.Content.Path) continue } @@ -512,8 +516,8 @@ func (i *Instance) collectLun(a Address, sList *types.SampleList) error { case strings.HasSuffix(entry.Content.Path, "sp.+.responseTime"): spl := strings.Split(entry.Content.Path, ".") - if len(spl) < 2 { - log.Println("E! error parse path:", "unexpected path") + if len(spl) < 3 { + logUnexpectedPath(entry.Content.Path) continue } diff --git a/inputs/ethtool/command_linux.go b/inputs/ethtool/command_linux.go index 60b528b4f..81ca85789 100755 --- a/inputs/ethtool/command_linux.go +++ b/inputs/ethtool/command_linux.go @@ -3,11 +3,11 @@ package ethtool import ( - "log" "os" "path/filepath" "github.com/vishvananda/netns" + "k8s.io/klog/v2" ) type Command interface { @@ -37,7 +37,7 @@ func (c *CommandEthtool) Init() error { handle: initialNamespace, } if err := namespaceGoroutine.Start(); err != nil { - log.Println("E! Failed to start goroutine for the initial namespace: ", err) + klog.ErrorS(err, "failed to start goroutine for initial namespace") return err } c.namespaceGoroutines = map[string]*NamespaceGoroutine{ @@ -63,7 +63,7 @@ func (c *CommandEthtool) Interfaces(includeNamespaces bool) ([]NamespacedInterfa initialNamespace, err := netns.Get() if err != nil { - log.Println("E! Could not get initial namespace: ", err) + klog.ErrorS(err, "could not get initial namespace") return nil, err } defer initialNamespace.Close() @@ -79,7 +79,7 @@ func (c *CommandEthtool) Interfaces(includeNamespaces bool) ([]NamespacedInterfa if includeNamespaces { namespaces, err := os.ReadDir(namespaceDirectory) if err != nil { - log.Println("W! Could not find namespace directory: ", err) + klog.Warningf("could not find namespace directory: path=%s err=%v", namespaceDirectory, err) } // We'll always have at least the initial namespace, so add one to ensure @@ -91,7 +91,7 @@ func (c *CommandEthtool) Interfaces(includeNamespaces bool) ([]NamespacedInterfa handle, err := netns.GetFromPath(filepath.Join(namespaceDirectory, name)) if err != nil { - log.Printf("W! Could not get handle for namespace [%q]: [%s]", name, err.Error()) + klog.Warningf("could not get handle for namespace: namespace=%q err=%v", name, err) continue } handles[name] = handle @@ -116,7 +116,7 @@ func (c *CommandEthtool) Interfaces(includeNamespaces bool) ([]NamespacedInterfa handle: handles[namespace], } if err := c.namespaceGoroutines[namespace].Start(); err != nil { - log.Printf("E! Failed to start goroutine for namespace [%q]: [%s]", namespace, err.Error()) + klog.ErrorS(err, "failed to start goroutine for namespace", "namespace", namespace) delete(c.namespaceGoroutines, namespace) continue } @@ -124,7 +124,7 @@ func (c *CommandEthtool) Interfaces(includeNamespaces bool) ([]NamespacedInterfa interfaces, err := c.namespaceGoroutines[namespace].Interfaces() if err != nil { - log.Printf("W! Could not get interfaces from namespace [%q]: [%s]", namespace, err.Error()) + klog.Warningf("could not get interfaces from namespace: namespace=%q err=%v", namespace, err) continue } allInterfaces = append(allInterfaces, interfaces...) diff --git a/inputs/ethtool/ethtool_linux.go b/inputs/ethtool/ethtool_linux.go index 39281982a..ec10feeb9 100755 --- a/inputs/ethtool/ethtool_linux.go +++ b/inputs/ethtool/ethtool_linux.go @@ -4,7 +4,6 @@ package ethtool import ( "fmt" - "log" "net" "regexp" "strings" @@ -15,6 +14,7 @@ import ( "flashcat.cloud/categraf/pkg/choice" "flashcat.cloud/categraf/pkg/filter" "flashcat.cloud/categraf/types" + "k8s.io/klog/v2" ) const inputName = "ethtool" @@ -115,7 +115,7 @@ func (ins *Instance) Init() error { ins.command = NewCommandEthtool() if _, ok := ins.command.(*CommandEthtool); !ok { errMsg := "Conversion failed" - log.Println("E! ", errMsg) + klog.ErrorS(nil, "ethtool command conversion failed") return fmt.Errorf("%v", errMsg) } @@ -126,7 +126,7 @@ func (ins *Instance) Gather(slist *types.SampleList) { // Get the list of interfaces interfaces, err := ins.command.Interfaces(ins.includeNamespaces) if err != nil { - log.Printf("E! gather interfaces:[%q] error:[%s]", interfaces, err) + klog.ErrorS(err, "failed to gather interfaces", "interfaces", interfaces) return } @@ -177,7 +177,7 @@ func (ins *Instance) gatherEthtoolStats(iface NamespacedInterface, slist *types. driverName, err := ins.command.DriverName(iface) if err != nil { - log.Printf("E! [%q] driver: [%s]", iface.Name, err) + klog.ErrorS(err, "failed to get interface driver", "interface", iface.Name) return } @@ -186,7 +186,7 @@ func (ins *Instance) gatherEthtoolStats(iface NamespacedInterface, slist *types. fields := make(map[string]interface{}) stats, err := ins.command.Stats(iface) if err != nil { - log.Printf("E! [%q] stats: [%s]", iface.Name, err) + klog.ErrorS(err, "failed to get interface stats", "interface", iface.Name) return } @@ -198,7 +198,7 @@ func (ins *Instance) gatherEthtoolStats(iface NamespacedInterface, slist *types. cmdget, err := ins.command.Get(iface) // error text is directly from running ethtool and syscalls if err != nil && err.Error() != "operation not supported" { - log.Printf("E! [%q] get: [%s]", iface.Name, err) + klog.ErrorS(err, "failed to get ethtool settings", "interface", iface.Name) return } for k, v := range cmdget { diff --git a/inputs/ethtool/ethtool_notlinux.go b/inputs/ethtool/ethtool_notlinux.go index 7b0fc3ca9..1ad36231d 100644 --- a/inputs/ethtool/ethtool_notlinux.go +++ b/inputs/ethtool/ethtool_notlinux.go @@ -3,11 +3,10 @@ package ethtool import ( - "log" - "flashcat.cloud/categraf/config" "flashcat.cloud/categraf/inputs" "flashcat.cloud/categraf/types" + "k8s.io/klog/v2" ) const inputName = "ethtool" @@ -62,7 +61,7 @@ type Instance struct { } func (ins *Instance) Init() error { - log.Println("E! Current platform is not supported") + klog.Error("current platform is not supported") return nil } diff --git a/inputs/ethtool/namespace_linux.go b/inputs/ethtool/namespace_linux.go index 407fa7732..4f77f7393 100755 --- a/inputs/ethtool/namespace_linux.go +++ b/inputs/ethtool/namespace_linux.go @@ -3,13 +3,13 @@ package ethtool import ( - "log" "math" "net" "runtime" ethtoolLib "github.com/safchain/ethtool" "github.com/vishvananda/netns" + "k8s.io/klog/v2" ) type Namespace interface { @@ -138,13 +138,13 @@ func (n *NamespaceGoroutine) Start() error { // current one. initialNamespace, err := netns.Get() if err != nil { - log.Println("E! Could not get initial namespace: ", err) + klog.ErrorS(err, "could not get initial namespace") started <- err return } if !initialNamespace.Equal(n.handle) { if err := netns.Set(n.handle); err != nil { - log.Printf("E! Could not switch to namespace [%q]: [%s]", n.name, err.Error()) + klog.ErrorS(err, "could not switch to namespace", "namespace", n.name) started <- err return } @@ -153,7 +153,7 @@ func (n *NamespaceGoroutine) Start() error { // Every namespace needs its own connection to ethtool e, err := ethtoolLib.NewEthtool() if err != nil { - log.Printf("E! Could not create ethtool client for namespace [%q]: [%s]", n.name, err.Error()) + klog.ErrorS(err, "could not create ethtool client for namespace", "namespace", n.name) started <- err return } diff --git a/inputs/exec/exec.go b/inputs/exec/exec.go index c8b2925da..e02ccf99f 100644 --- a/inputs/exec/exec.go +++ b/inputs/exec/exec.go @@ -4,7 +4,6 @@ import ( "bytes" "fmt" "io" - "log" "os" osExec "os/exec" "path/filepath" @@ -21,6 +20,7 @@ import ( "flashcat.cloud/categraf/parser/prometheus" "flashcat.cloud/categraf/pkg/cmdx" "flashcat.cloud/categraf/types" + "k8s.io/klog/v2" ) const inputName = "exec" @@ -111,7 +111,7 @@ func (ins *Instance) Gather(slist *types.SampleList) { matches, err := filepath.Glob(cmdAndArgs[0]) if err != nil { - log.Println("E! failed to get filepath glob of commands:", err) + klog.ErrorS(err, "failed to glob exec commands", "pattern", cmdAndArgs[0]) continue } @@ -134,7 +134,7 @@ func (ins *Instance) Gather(slist *types.SampleList) { } if len(commands) == 0 { - log.Println("W! no commands after parse") + klog.Warning("no exec commands available after parsing") return } @@ -152,17 +152,17 @@ func (ins *Instance) ProcessCommand(slist *types.SampleList, command string, wg out, errbuf, runErr := commandRun(command, time.Duration(ins.Timeout)) if runErr != nil || len(errbuf) > 0 { - log.Println("E! exec_command:", command, "error:", runErr, "stderr:", string(errbuf)) + klog.ErrorS(runErr, "exec command failed", "command", command, "stderr", string(errbuf)) return } if len(out) == 0 { - log.Println("E! exec_command:", command, "output is empty?, please check your command:", string(out)) + klog.ErrorS(nil, "exec command output is empty", "command", command) return } err := ins.parser.Parse(out, slist) if err != nil { - log.Println("E! failed to parse command stdout:", err) + klog.ErrorS(err, "failed to parse exec command stdout", "command", command) } } diff --git a/inputs/filecount/filecount.go b/inputs/filecount/filecount.go index f1e2e53df..f71d6a937 100644 --- a/inputs/filecount/filecount.go +++ b/inputs/filecount/filecount.go @@ -4,7 +4,6 @@ import ( "errors" "fmt" "io/fs" - "log" "os" "path/filepath" "sync" @@ -15,6 +14,7 @@ import ( "flashcat.cloud/categraf/pkg/globpath" "flashcat.cloud/categraf/types" "github.com/karrick/godirwalk" + "k8s.io/klog/v2" ) const inputName = "filecount" @@ -247,7 +247,7 @@ func (ins *Instance) count(slist *types.SampleList, basedir string, glob globpat } match, err := ins.filter(file) if err != nil { - log.Println("E! filter file fail:", err) + klog.ErrorS(err, "filter file failed", "path", path) return nil } if match { @@ -309,14 +309,14 @@ func (ins *Instance) count(slist *types.SampleList, basedir string, glob globpat FollowSymbolicLinks: ins.FollowSymlinks, ErrorCallback: func(osPathname string, err error) godirwalk.ErrorAction { if errors.Is(err, fs.ErrPermission) { - log.Println("E! no permission to walk dir:", err) + klog.ErrorS(err, "no permission to walk dir", "path", osPathname) return godirwalk.SkipNode } return godirwalk.Halt }, }) if err != nil { - log.Println("E! count dir error:", err) + klog.ErrorS(err, "count dir error", "directory", basedir) } } diff --git a/inputs/gnmi/gnmi.go b/inputs/gnmi/gnmi.go index 8480eed80..b7eecc20b 100644 --- a/inputs/gnmi/gnmi.go +++ b/inputs/gnmi/gnmi.go @@ -4,7 +4,6 @@ package gnmi import ( "context" "fmt" - "log" "strings" "sync" "time" @@ -18,6 +17,7 @@ import ( "flashcat.cloud/categraf/pkg/choice" "flashcat.cloud/categraf/pkg/tls" "flashcat.cloud/categraf/types" + "k8s.io/klog/v2" ) // Define the warning to show if we cannot get a metric name. @@ -119,7 +119,7 @@ func (c *Instance) Init() error { // Check options if time.Duration(c.Redial) <= 0 { c.Redial = config.Duration(10 * time.Second) - log.Println("W! redial duration must be positive") + klog.Warning("redial duration must be positive") } // Check vendor_specific options configured by user @@ -200,7 +200,7 @@ func (c *Instance) Init() error { for alias, encodingPath := range c.Aliases { c.internalAliases[newInfoFromString(encodingPath)] = alias } - log.Printf("D! Internal alias mapping: %+v", c.internalAliases) + klog.V(1).InfoS("internal alias mapping", "aliases", c.internalAliases) go c.Start() return nil @@ -248,7 +248,7 @@ func (c *Instance) Start() error { } for ctx.Err() == nil { if err := h.subscribeGNMI(ctx, c.slist, tlscfg, request); err != nil && ctx.Err() == nil { - log.Println("W! Error in gNMI subscription:", err) + klog.Warningf("error in gNMI subscription: %v", err) } select { diff --git a/inputs/gnmi/handler.go b/inputs/gnmi/handler.go index 2c9fdf439..eabd9e57a 100644 --- a/inputs/gnmi/handler.go +++ b/inputs/gnmi/handler.go @@ -6,7 +6,6 @@ import ( "errors" "fmt" "io" - "log" "net" "path" "sort" @@ -25,6 +24,7 @@ import ( jnprHeader "flashcat.cloud/categraf/inputs/gnmi/extensions/jnpr_gnmi_extention" "flashcat.cloud/categraf/pkg/choice" "flashcat.cloud/categraf/types" + "k8s.io/klog/v2" ) const eidJuniperTelemetryHeader = 1 @@ -81,7 +81,7 @@ func (h *handler) subscribeGNMI(ctx context.Context, slist *types.SampleList, tl return fmt.Errorf("failed to send subscription request: %w", err) } - log.Printf("Connection to gNMI device %s established", h.address) + klog.InfoS("connection to gNMI device established", "address", h.address) // Used to report the status of the TCP connection to the device. If the // GNMI connection goes down, but TCP is still up this will still report @@ -89,7 +89,7 @@ func (h *handler) subscribeGNMI(ctx context.Context, slist *types.SampleList, tl // connectStat := selfstat.Register("gnmi", "grpc_connection_status", map[string]string{"source": h.address}) // connectStat.Set(1) - defer log.Printf("Connection to gNMI device %s closed", h.address) + defer klog.InfoS("connection to gNMI device closed", "address", h.address) for ctx.Err() == nil { var reply *gnmiLib.SubscribeResponse if reply, err = subscribeClient.Recv(); err != nil { @@ -103,10 +103,10 @@ func (h *handler) subscribeGNMI(ctx context.Context, slist *types.SampleList, tl if h.trace { buf, err := protojson.Marshal(reply) if err != nil { - log.Printf("Marshal failed: %v", err) + klog.Warningf("marshal failed: %v", err) } else { t := reply.GetUpdate().GetTimestamp() - log.Printf("Got update_%v: %s", t, string(buf)) + klog.V(1).InfoS("got gNMI update", "timestamp", t, "payload", string(buf)) } } if response, ok := reply.Response.(*gnmiLib.SubscribeResponse_Update); ok { @@ -137,7 +137,7 @@ func (h *handler) handleSubscribeResponseUpdate(slist *types.SampleList, respons if choice.Contains("juniper_header", h.vendorExt) { juniperHeader := &jnprHeader.GnmiJuniperTelemetryHeaderExtension{} if err := proto.Unmarshal(currentExt, juniperHeader); err != nil { - log.Printf("unmarshal gnmi Juniper Header extension failed: %v", err) + klog.Warningf("unmarshal gnmi Juniper Header extension failed: %v", err) } else { // Add only relevant Tags from the Juniper Header extension. // These are required for aggregation @@ -167,7 +167,7 @@ func (h *handler) handleSubscribeResponseUpdate(slist *types.SampleList, respons fullPath := prefix.append(update.Path) fields, err := newFieldsFromUpdate(fullPath, update) if err != nil { - log.Printf("Processing update %v failed: %v", update, err) + klog.Warningf("processing update %v failed: %v", update, err) } // Prepare tags from prefix @@ -185,9 +185,9 @@ func (h *handler) handleSubscribeResponseUpdate(slist *types.SampleList, respons if !fullPath.equalsPathNoKeys(tagSub.fullPath) { continue } - log.Printf("Tag-subscription update for %q: %+v", tagSub.Name, update) + klog.V(1).InfoS("tag-subscription update", "subscription", tagSub.Name, "update", update) if err := h.tagStore.insert(tagSub, fullPath, fields, tags); err != nil { - log.Printf("E! Inserting tag failed: %v", err) + klog.ErrorS(err, "inserting tag failed", "subscription", tagSub.Name) } tagUpdate = true break @@ -225,9 +225,9 @@ func (h *handler) handleSubscribeResponseUpdate(slist *types.SampleList, respons // Lookup alias for the metric aliasPath, name := h.lookupAlias(field.path) if name == "" { - log.Printf("No measurement alias for gNMI path: %s", field.path) + klog.Warningf("no measurement alias for gNMI path: %s", field.path) if !h.emptyNameWarnShown { - log.Printf(emptyNameWarning, response.Update) + klog.Warningf(emptyNameWarning, response.Update) h.emptyNameWarnShown = true } } @@ -254,7 +254,7 @@ func (h *handler) handleSubscribeResponseUpdate(slist *types.SampleList, respons key = strings.TrimLeft(key, "/.") } if key == "" { - log.Printf("E! Invalid empty path %q with alias %q", fieldPath, aliasPath) + klog.ErrorS(nil, "invalid empty path", "path", fieldPath, "alias", aliasPath) continue } prefix := inputName diff --git a/inputs/googlecloud/instances.go b/inputs/googlecloud/instances.go index 07a7f2aa3..db1685f77 100644 --- a/inputs/googlecloud/instances.go +++ b/inputs/googlecloud/instances.go @@ -3,12 +3,12 @@ package googlecloud import ( "context" "fmt" - "log" "time" apiv3 "cloud.google.com/go/monitoring/apiv3" "google.golang.org/api/option" "google.golang.org/genproto/googleapis/monitoring/v3" + "k8s.io/klog/v2" "flashcat.cloud/categraf/config" "flashcat.cloud/categraf/inputs" @@ -66,7 +66,7 @@ func (f *metricCache) isValid() bool { func (ins *Instance) Drop() { err := ins.v3client.Close() if err != nil { - log.Println("W! close gcp client error:", err) + klog.Warningf("close gcp client error: %v", err) } } @@ -123,20 +123,20 @@ func (ins *Instance) Init() error { func (ins *Instance) Gather(slist *types.SampleList) { if ins == nil || ins.v3client == nil { - log.Println("E! googlecloud client is nil") + klog.Error("googlecloud client is nil") return } if len(ins.Filter) != 0 { err := ins.readTimeSeriesValue(slist, ins.Filter) if err != nil { - log.Println("E! read time series value error:", err) + klog.ErrorS(err, "read googlecloud time series value error", "project_id", ins.ProjectID, "filter", ins.Filter) } } else { if ins.metricCache == nil || !ins.metricCache.isValid() { metrics, err := ins.ListMetrics() if err != nil { - log.Println("E! list metrics error:", err) + klog.ErrorS(err, "list googlecloud metrics error", "project_id", ins.ProjectID) return } ins.metricCache = &metricCache{ diff --git a/inputs/greenplum/greenplum.go b/inputs/greenplum/greenplum.go index db04dcf16..1cbf4db53 100644 --- a/inputs/greenplum/greenplum.go +++ b/inputs/greenplum/greenplum.go @@ -1,13 +1,13 @@ package greenplum import ( - "log" "os/exec" "strings" "flashcat.cloud/categraf/config" "flashcat.cloud/categraf/inputs" "flashcat.cloud/categraf/types" + "k8s.io/klog/v2" ) const inputName = "greenplum" @@ -48,7 +48,7 @@ func (ins *Greenplum) Gather(slist *types.SampleList) { stateValue = strings.TrimSpace(stateValue) gpstate := strings.Fields(stateValue) if len(gpstate)%7 != 0 { - log.Printf("E! failed to parse gpstate -m output: %v", gpstate) + klog.ErrorS(nil, "failed to parse gpstate -m output", "fields", gpstate) return } line := len(gpstate) / 7 diff --git a/inputs/hadoop/hadoop.go b/inputs/hadoop/hadoop.go index da456882f..313754415 100644 --- a/inputs/hadoop/hadoop.go +++ b/inputs/hadoop/hadoop.go @@ -4,12 +4,12 @@ import ( "errors" "fmt" "github.com/emirpasic/gods/lists/singlylinkedlist" - "log" "sync" "flashcat.cloud/categraf/config" "flashcat.cloud/categraf/inputs" "flashcat.cloud/categraf/types" + "k8s.io/klog/v2" ) const inputName = "hadoop" @@ -79,13 +79,13 @@ func (ins *Hadoop) Gather(slist *types.SampleList) { data, getDataErr := component.GetData(component.ComposeMetricUrl()) if getDataErr != nil { - log.Printf("E! Failed to get data from %s: %v", component.Name, getDataErr) + klog.ErrorS(getDataErr, "failed to get hadoop component data", "component", component.Name) return } res, fetchDataErr := component.FetchData(data) if fetchDataErr != nil { - log.Printf("E! Failed to fetch data from %s: %v", component.Name, fetchDataErr) + klog.ErrorS(fetchDataErr, "failed to fetch hadoop component data", "component", component.Name) return } diff --git a/inputs/haproxy/exporter.go b/inputs/haproxy/exporter.go index f7b09e6d8..116cc836f 100644 --- a/inputs/haproxy/exporter.go +++ b/inputs/haproxy/exporter.go @@ -20,7 +20,6 @@ import ( "errors" "fmt" "io" - "log" "net" "net/http" _ "net/http/pprof" @@ -32,6 +31,7 @@ import ( "time" "github.com/prometheus/client_golang/prometheus" + "k8s.io/klog/v2" ) const ( @@ -375,14 +375,14 @@ func (e *Exporter) scrape(ch chan<- prometheus.Metric) (up float64) { if e.fetchInfo != nil { infoReader, err := e.fetchInfo() if err != nil { - log.Println("E! failed to fetch haproxy info:", err) + klog.ErrorS(err, "failed to fetch haproxy info") return 0 } defer infoReader.Close() info, err := e.parseInfo(infoReader) if err != nil { - log.Println("E! failed to parse haproxy info:", err) + klog.ErrorS(err, "failed to parse haproxy info") } else { ch <- prometheus.MustNewConstMetric(haproxyInfo, prometheus.GaugeValue, 1, info.ReleaseDate, info.Version) } @@ -390,7 +390,7 @@ func (e *Exporter) scrape(ch chan<- prometheus.Metric) (up float64) { body, err := e.fetchStat() if err != nil { - log.Println("E! failed to fetch haproxy stat:", err) + klog.ErrorS(err, "failed to fetch haproxy stat") return 0 } defer body.Close() @@ -408,11 +408,11 @@ loop: break loop default: if _, ok := err.(*csv.ParseError); ok { - log.Println("E! failed to parse csv:", err) + klog.ErrorS(err, "failed to parse haproxy csv") e.csvParseFailures.Inc() continue loop } - log.Println("E! failed to read csv:", err) + klog.ErrorS(err, "failed to read haproxy csv") return 0 } e.parseRow(row, ch) @@ -447,7 +447,7 @@ func (e *Exporter) parseInfo(i io.Reader) (versionInfo, error) { func (e *Exporter) parseRow(csvRow []string, ch chan<- prometheus.Metric) { if len(csvRow) < minimumCsvFieldCount { - log.Println("E! Parser received unexpected number of CSV fields", "min", minimumCsvFieldCount, "received", len(csvRow)) + klog.ErrorS(nil, "parser received unexpected number of CSV fields", "min", minimumCsvFieldCount, "received", len(csvRow)) e.csvParseFailures.Inc() return } @@ -511,7 +511,7 @@ func (e *Exporter) exportCsvFields(metrics map[int]metricInfo, csvRow []string, value = float64(valueInt) } if err != nil { - log.Println("E! Can't parse CSV field value", "value", valueStr, "err", err) + klog.ErrorS(err, "can't parse CSV field value", "value", valueStr) e.csvParseFailures.Inc() continue } diff --git a/inputs/haproxy/haproxy.go b/inputs/haproxy/haproxy.go index 642aaf0e1..9697b00f1 100644 --- a/inputs/haproxy/haproxy.go +++ b/inputs/haproxy/haproxy.go @@ -2,12 +2,12 @@ package haproxy import ( "fmt" - "log" "time" "flashcat.cloud/categraf/config" "flashcat.cloud/categraf/inputs" "flashcat.cloud/categraf/types" + "k8s.io/klog/v2" ) const inputName = "haproxy" @@ -109,6 +109,6 @@ func (ins *Instance) Gather(slist *types.SampleList) { err := inputs.Collect(ins.e, slist) if err != nil { - log.Println("E! failed to collect metrics:", err) + klog.ErrorS(err, "failed to collect metrics") } } diff --git a/inputs/http_provider.go b/inputs/http_provider.go index 07b872763..0e905c6db 100644 --- a/inputs/http_provider.go +++ b/inputs/http_provider.go @@ -4,7 +4,6 @@ import ( "encoding/json" "fmt" "io" - "log" "net/http" "strings" "sync" @@ -14,6 +13,7 @@ import ( "flashcat.cloud/categraf/pkg/cfg" "flashcat.cloud/categraf/pkg/set" "flashcat.cloud/categraf/pkg/tls" + klog "k8s.io/klog/v2" ) // HTTPProvider provider a mechanism to get config from remote http server at a fixed interval @@ -164,7 +164,7 @@ func (hrp *HTTPProvider) check() error { func (hrp *HTTPProvider) doReq() (*httpProviderResponse, error) { req, err := http.NewRequest("GET", hrp.RemoteUrl, nil) if err != nil { - log.Println("E! http provider: build reload config request error:", err) + klog.ErrorS(err, "http provider build reload config request error", "remote_url", hrp.RemoteUrl) return nil, err } @@ -191,20 +191,20 @@ func (hrp *HTTPProvider) doReq() (*httpProviderResponse, error) { resp, err := hrp.client.Do(req) if err != nil { - log.Println("E! http provider: request reload config error:", err) + klog.ErrorS(err, "http provider request reload config error", "remote_url", hrp.RemoteUrl) return nil, err } defer resp.Body.Close() respData, err := io.ReadAll(resp.Body) if err != nil { - log.Println("E! http provider: request reload config error:", err) + klog.ErrorS(err, "http provider read reload config response error", "remote_url", hrp.RemoteUrl) return nil, err } confResp := &httpProviderResponse{} err = json.Unmarshal(respData, confResp) if err != nil { - log.Println("E! http provider: unmarshal result error:", err) + klog.ErrorS(err, "http provider unmarshal result error", "remote_url", hrp.RemoteUrl) return nil, err } @@ -226,11 +226,11 @@ func (hrp *HTTPProvider) doReq() (*httpProviderResponse, error) { } func (hrp *HTTPProvider) LoadConfig() (bool, error) { - log.Println("I! http provider: start reload config from remote:", hrp.RemoteUrl) + klog.InfoS("http provider start reload config from remote", "remote_url", hrp.RemoteUrl) confResp, err := hrp.doReq() if err != nil { - log.Printf("W! http provider: request remote err: [%+v]", err) + klog.Warningf("http provider request remote error: %+v", err) return false, err } @@ -238,7 +238,7 @@ func (hrp *HTTPProvider) LoadConfig() (bool, error) { if confResp.Version == hrp.version || confResp.Version == "" { return false, nil } - log.Printf("I! remote version:%s, current version:%s", confResp.Version, hrp.version) + klog.InfoS("http provider version changed", "remote_version", confResp.Version, "current_version", hrp.version) // delete empty entries for k, v := range confResp.Configs { @@ -290,7 +290,7 @@ func (hrp *HTTPProvider) StartReloader() { } if changed { if hrp.add.len() > 0 { - log.Println("I! http provider: new or updated inputs:", hrp.add) + klog.InfoS("http provider new or updated inputs", "cache", hrp.add) for inputKey, cm := range hrp.add.iter() { hrp.preStop(inputKey) for _, conf := range cm { @@ -300,7 +300,7 @@ func (hrp *HTTPProvider) StartReloader() { } if hrp.del.len() > 0 { - log.Println("I! http provider: deleted inputs:", hrp.del) + klog.InfoS("http provider deleted inputs", "cache", hrp.del) for inputKey, cm := range hrp.del.iter() { if hrp.serviceInput(inputKey) { continue @@ -356,9 +356,7 @@ func (hrp *HTTPProvider) caculateDiff(newConfigs map[string]map[string]*cfg.Conf cache := newInnerCache() for inputKey, configs := range newConfigs { for _, inputConfig := range configs { - if config.Config.DebugMode { - log.Println("D!: inputKey:", inputKey, "config sum:", inputConfig.CheckSum()) - } + klog.V(2).InfoS("http provider config", "input_key", inputKey, "checksum", inputConfig.CheckSum()) cache.put(inputKey, *inputConfig) } } @@ -371,22 +369,16 @@ func (hrp *HTTPProvider) caculateDiff(newConfigs map[string]map[string]*cfg.Conf oldConfig := set.NewWithLoad[string, cfg.ConfigWithFormat](oldConfigMap) add, _, del := newConfig.Diff(oldConfig) for sum := range add { - if config.Config.DebugMode { - log.Println("D!: add config:", inputKey, "config sum:", sum) - } + klog.V(1).InfoS("http provider add config", "input_key", inputKey, "checksum", sum) hrp.add.put(inputKey, configMap[sum]) } for sum := range del { - if config.Config.DebugMode { - log.Println("D!: delete config:", inputKey, "config sum:", sum) - } + klog.V(1).InfoS("http provider delete config", "input_key", inputKey, "checksum", sum) hrp.del.put(inputKey, oldConfigMap[sum]) } } else { for _, inputConfig := range configMap { - if config.Config.DebugMode { - log.Println("D!: add config:", inputKey, "config sum:", inputConfig.CheckSum()) - } + klog.V(1).InfoS("http provider add config", "input_key", inputKey, "checksum", inputConfig.CheckSum()) hrp.add.put(inputKey, inputConfig) } } @@ -395,9 +387,7 @@ func (hrp *HTTPProvider) caculateDiff(newConfigs map[string]map[string]*cfg.Conf for inputKey, configMap := range hrp.cache.iter() { if _, has := cache.get(inputKey); !has { for _, inputConfig := range configMap { - if config.Config.DebugMode { - log.Println("D!: delete config:", inputKey, "config sum:", inputConfig.CheckSum()) - } + klog.V(1).InfoS("http provider delete config", "input_key", inputKey, "checksum", inputConfig.CheckSum()) hrp.del.put(inputKey, inputConfig) } } @@ -416,10 +406,7 @@ func (hrp *HTTPProvider) LoadInputConfig(configs []cfg.ConfigWithFormat, input I nInput := input.Clone() err := cfg.LoadSingleConfig(c, nInput) if err != nil { - log.Println("E! load http config error:", err) - if config.Config.DebugMode { - log.Printf("D! config:%+v load error:%s", c, err) - } + klog.ErrorS(err, "load http config error", "config", c) continue } inputs[c.CheckSum()] = nInput diff --git a/inputs/http_response/http_response.go b/inputs/http_response/http_response.go index 0563732c1..e002968ef 100644 --- a/inputs/http_response/http_response.go +++ b/inputs/http_response/http_response.go @@ -5,7 +5,6 @@ import ( "errors" "fmt" "io" - "log" "net" "net/http" "net/http/httptrace" @@ -20,6 +19,7 @@ import ( "flashcat.cloud/categraf/pkg/httpx" "flashcat.cloud/categraf/pkg/netx" "flashcat.cloud/categraf/types" + "k8s.io/klog/v2" ) const ( @@ -192,7 +192,7 @@ func (ins *Instance) Gather(slist *types.SampleList) { func (ins *Instance) gather(slist *types.SampleList, target string) { if ins.DebugMod { - log.Println("D! http_response... target:", target) + klog.V(1).InfoS("http_response gather", "target", target) } labels := map[string]string{"target": target} @@ -225,7 +225,7 @@ func (ins *Instance) gather(slist *types.SampleList, target string) { returnTags, fields, err = ins.httpGather(target) if err != nil { - log.Println("E! failed to gather http target:", target, "error:", err) + klog.ErrorS(err, "failed to gather http target", "target", target) } for k, v := range returnTags { @@ -293,7 +293,7 @@ func (ins *Instance) httpGather(target string) (map[string]string, map[string]in // If an error in returned, it means we are dealing with a network error, as // HTTP error codes do not generate errors in the net/http library if err != nil { - log.Println("E! network error while polling:", target, "error:", err) + klog.ErrorS(err, "network error while polling", "target", target) // metric: result_code fields["result_code"] = ConnectionFailed @@ -337,23 +337,23 @@ func (ins *Instance) httpGather(target string) (map[string]string, map[string]in bs, err := io.ReadAll(resp.Body) if err != nil { - log.Println("E! failed to read response body:", err, "target:", target) + klog.ErrorS(err, "failed to read response body", "target", target) return tags, fields, nil } if len(ins.ExpectResponseSubstring) > 0 && !strings.Contains(string(bs), ins.ExpectResponseSubstring) { - log.Println("E! body mismatch, response body:", string(bs)) + klog.ErrorS(nil, "body mismatch", "target", target, "body", string(bs)) fields["result_code"] = BodyMismatch } if ins.regularExpression != nil && !ins.regularExpression.Match(bs) { - log.Println("E! body mismatch, response body:", string(bs)) + klog.ErrorS(nil, "body mismatch", "target", target, "body", string(bs)) fields["result_code"] = BodyMismatch } if ins.ExpectResponseStatusCode != nil && *ins.ExpectResponseStatusCode != resp.StatusCode || len(ins.ExpectResponseStatusCodes) > 0 && !strings.Contains(ins.ExpectResponseStatusCodes, fmt.Sprintf("%d", resp.StatusCode)) { - log.Println("E! status code mismatch, response stats code:", resp.StatusCode) + klog.ErrorS(nil, "status code mismatch", "target", target, "status_code", resp.StatusCode) fields["result_code"] = CodeMismatch } diff --git a/inputs/huatuo/huatuo.go b/inputs/huatuo/huatuo.go index ddc873660..d7625cf32 100644 --- a/inputs/huatuo/huatuo.go +++ b/inputs/huatuo/huatuo.go @@ -6,7 +6,6 @@ import ( "context" "fmt" "io" - "log" "net/http" "os" "os/exec" @@ -22,6 +21,7 @@ import ( "flashcat.cloud/categraf/parser" "flashcat.cloud/categraf/parser/prometheus" "flashcat.cloud/categraf/types" + "k8s.io/klog/v2" ) const inputName = "huatuo" @@ -165,7 +165,7 @@ func (ins *Instance) ensureInstalled() error { subBinPath := filepath.Join(ins.InstallPath, entry.Name(), "bin", "huatuo-bamai") if _, err := os.Stat(subBinPath); err == nil { // Found it! Move everything up one level. - log.Printf("I! Detected subdirectory %s, moving files up to %s", entry.Name(), ins.InstallPath) + klog.InfoS("detected huatuo subdirectory, moving files into install path", "subdir", entry.Name(), "install_path", ins.InstallPath) srcDir := filepath.Join(ins.InstallPath, entry.Name()) // Move content @@ -186,7 +186,7 @@ func (ins *Instance) ensureInstalled() error { } // Remove empty subdir if err := os.Remove(srcDir); err != nil { - log.Printf("W! failed to remove empty dir %s: %v", srcDir, err) + klog.Warningf("failed to remove empty huatuo dir: path=%s err=%v", srcDir, err) } return nil } @@ -365,7 +365,7 @@ func (ins *Instance) manageProcess(ctx context.Context) { } if err := cmd.Start(); err != nil { - log.Printf("E! failed to start huatuo: %v", err) + klog.ErrorS(err, "failed to start huatuo", "install_path", ins.InstallPath, "config", confPath) select { case <-ctx.Done(): return @@ -395,7 +395,7 @@ func (ins *Instance) manageProcess(ctx context.Context) { } return case err := <-done: - log.Printf("I! huatuo process exited: %v", err) + klog.InfoS("huatuo process exited", "err", err, "install_path", ins.InstallPath) } // Allow quick restart unless context cancelled @@ -431,7 +431,7 @@ func (ins *Instance) Gather(slist *types.SampleList) { err := ins.scrape(slist) if err != nil { - log.Printf("E! failed to scrape huatuo: %v", err) + klog.ErrorS(err, "failed to scrape huatuo", "url", ins.realURL) } } diff --git a/inputs/inputs.go b/inputs/inputs.go index b9f36a554..bb289793a 100644 --- a/inputs/inputs.go +++ b/inputs/inputs.go @@ -3,12 +3,17 @@ package inputs import ( "flashcat.cloud/categraf/config" "flashcat.cloud/categraf/types" + klog "k8s.io/klog/v2" ) type Initializer interface { Init() error } +type LoggerInitializer interface { + InitWithLogger(klog.Logger) error +} + type SampleGatherer interface { Gather(*types.SampleList) } @@ -21,7 +26,10 @@ type InstancesGetter interface { GetInstances() []Instance } -func MayInit(t interface{}) error { +func MayInit(t interface{}, logger klog.Logger) error { + if initializer, ok := t.(LoggerInitializer); ok { + return initializer.InitWithLogger(logger) + } if initializer, ok := t.(Initializer); ok { return initializer.Init() } diff --git a/inputs/inputs_test.go b/inputs/inputs_test.go new file mode 100644 index 000000000..6c18ecba9 --- /dev/null +++ b/inputs/inputs_test.go @@ -0,0 +1,90 @@ +package inputs + +import ( + "errors" + "testing" + + klog "k8s.io/klog/v2" +) + +type testKlogInitializer struct { + legacyCalled bool + loggerCalled bool + logger klog.Logger + err error +} + +func (t *testKlogInitializer) Init() error { + t.legacyCalled = true + return nil +} + +func (t *testKlogInitializer) InitWithLogger(logger klog.Logger) error { + t.loggerCalled = true + t.logger = logger + return t.err +} + +type testLegacyInitializer struct { + called bool + err error +} + +func (t *testLegacyInitializer) Init() error { + t.called = true + return t.err +} + +func TestMayInit(t *testing.T) { + t.Run("logger-aware initializer is preferred", func(t *testing.T) { + wantErr := errors.New("logger init failed") + target := &testKlogInitializer{err: wantErr} + logger := klog.Background() + + err := MayInit(target, logger) + if err != wantErr { + t.Fatalf("expected exact error %v, got %v", wantErr, err) + } + if !target.loggerCalled { + t.Fatal("expected logger-aware initializer to be called") + } + if target.legacyCalled { + t.Fatal("expected legacy initializer to be skipped") + } + var zeroLogger klog.Logger + if target.logger == zeroLogger { + t.Fatal("expected logger to be passed to logger-aware initializer") + } + }) + + t.Run("legacy initializer still works", func(t *testing.T) { + wantErr := errors.New("legacy init failed") + target := &testLegacyInitializer{err: wantErr} + logger := klog.Background() + + err := MayInit(target, logger) + if err != wantErr { + t.Fatalf("expected exact error %v, got %v", wantErr, err) + } + if !target.called { + t.Fatal("expected legacy initializer to be called") + } + }) + + t.Run("non-initializer returns nil", func(t *testing.T) { + if err := MayInit(struct{}{}, klog.Background()); err != nil { + t.Fatalf("expected nil, got %v", err) + } + }) + + t.Run("errors propagate unchanged", func(t *testing.T) { + wantErr := errors.New("unchanged error") + target := &testKlogInitializer{err: wantErr} + logger := klog.Background() + + err := MayInit(target, logger) + if err != wantErr { + t.Fatalf("expected exact error %v, got %v", wantErr, err) + } + }) +} diff --git a/inputs/ipmi/exporter/collector_bmc.go b/inputs/ipmi/exporter/collector_bmc.go index db77f910b..27dd86150 100644 --- a/inputs/ipmi/exporter/collector_bmc.go +++ b/inputs/ipmi/exporter/collector_bmc.go @@ -17,9 +17,8 @@ package exporter import ( - "log" - "github.com/prometheus/client_golang/prometheus" + "k8s.io/klog/v2" "flashcat.cloud/categraf/inputs/ipmi/exporter/freeipmi" ) @@ -56,18 +55,18 @@ func (c BMCCollector) Args() []string { func (c BMCCollector) Collect(result freeipmi.Result, ch chan<- prometheus.Metric, target ipmiTarget) (int, error) { firmwareRevision, err := freeipmi.GetBMCInfoFirmwareRevision(result) if err != nil { - log.Println("E!", "Failed to collect BMC data", "target", targetName(target.host), "error", err) + klog.ErrorS(err, "failed to collect BMC data", "target", targetName(target.host)) return 0, err } manufacturerID, err := freeipmi.GetBMCInfoManufacturerID(result) if err != nil { - log.Println("E!", "Failed to collect BMC data", "target", targetName(target.host), "error", err) + klog.ErrorS(err, "failed to collect BMC data", "target", targetName(target.host)) return 0, err } systemFirmwareVersion, err := freeipmi.GetBMCInfoSystemFirmwareVersion(result) if err != nil { // This one is not always available. - log.Println("E!", "Failed to parse bmc-info data", "target", targetName(target.host), "error", err) + klog.ErrorS(err, "failed to parse bmc-info data", "target", targetName(target.host)) systemFirmwareVersion = "N/A" } ch <- prometheus.MustNewConstMetric( diff --git a/inputs/ipmi/exporter/collector_bmc_watchdog.go b/inputs/ipmi/exporter/collector_bmc_watchdog.go index ec2bc0ecf..7f29b28e3 100644 --- a/inputs/ipmi/exporter/collector_bmc_watchdog.go +++ b/inputs/ipmi/exporter/collector_bmc_watchdog.go @@ -17,9 +17,8 @@ package exporter import ( - "log" - "github.com/prometheus/client_golang/prometheus" + "k8s.io/klog/v2" "flashcat.cloud/categraf/inputs/ipmi/exporter/freeipmi" ) @@ -101,42 +100,42 @@ func (c BMCWatchdogCollector) Args() []string { func (c BMCWatchdogCollector) Collect(result freeipmi.Result, ch chan<- prometheus.Metric, target ipmiTarget) (int, error) { timerState, err := freeipmi.GetBMCWatchdogTimerState(result) if err != nil { - log.Println("E!", "Failed to collect BMC watchdog timer", "target", targetName(target.host), "error", err) + klog.ErrorS(err, "failed to collect BMC watchdog timer", "target", targetName(target.host)) return 0, err } currentTimerUse, err := freeipmi.GetBMCWatchdogTimerUse(result) if err != nil { - log.Println("E!", "Failed to collect BMC watchdog timer use", "target", targetName(target.host), "error", err) + klog.ErrorS(err, "failed to collect BMC watchdog timer use", "target", targetName(target.host)) return 0, err } loggingState, err := freeipmi.GetBMCWatchdogLoggingState(result) if err != nil { - log.Println("E!", "Failed to collect BMC watchdog logging", "target", targetName(target.host), "error", err) + klog.ErrorS(err, "failed to collect BMC watchdog logging", "target", targetName(target.host)) return 0, err } currentTimeoutAction, err := freeipmi.GetBMCWatchdogTimeoutAction(result) if err != nil { - log.Println("E!", "Failed to collect BMC watchdog timeout action", "target", targetName(target.host), "error", err) + klog.ErrorS(err, "failed to collect BMC watchdog timeout action", "target", targetName(target.host)) return 0, err } currentPretimeoutInterrupt, err := freeipmi.GetBMCWatchdogPretimeoutInterrupt(result) if err != nil { - log.Println("E!", "Failed to collect BMC watchdog pretimeout interrupt", "target", targetName(target.host), "error", err) + klog.ErrorS(err, "failed to collect BMC watchdog pretimeout interrupt", "target", targetName(target.host)) return 0, err } pretimeoutInterval, err := freeipmi.GetBMCWatchdogPretimeoutInterval(result) if err != nil { - log.Println("E!", "Failed to collect BMC watchdog pretimeout interval", "target", targetName(target.host), "error", err) + klog.ErrorS(err, "failed to collect BMC watchdog pretimeout interval", "target", targetName(target.host)) return 0, err } initialCountdown, err := freeipmi.GetBMCWatchdogInitialCountdown(result) if err != nil { - log.Println("E!", "Failed to collect BMC watchdog initial countdown", "target", targetName(target.host), "error", err) + klog.ErrorS(err, "failed to collect BMC watchdog initial countdown", "target", targetName(target.host)) return 0, err } currentCountdown, err := freeipmi.GetBMCWatchdogCurrentCountdown(result) if err != nil { - log.Println("E!", "Failed to collect BMC watchdog current countdown", "target", targetName(target.host), "error", err) + klog.ErrorS(err, "failed to collect BMC watchdog current countdown", "target", targetName(target.host)) return 0, err } diff --git a/inputs/ipmi/exporter/collector_chassis.go b/inputs/ipmi/exporter/collector_chassis.go index da80f5bf9..e7c73725c 100644 --- a/inputs/ipmi/exporter/collector_chassis.go +++ b/inputs/ipmi/exporter/collector_chassis.go @@ -17,9 +17,8 @@ package exporter import ( - "log" - "github.com/prometheus/client_golang/prometheus" + "k8s.io/klog/v2" "flashcat.cloud/categraf/inputs/ipmi/exporter/freeipmi" ) @@ -68,17 +67,17 @@ func (c ChassisCollector) Args() []string { func (c ChassisCollector) Collect(result freeipmi.Result, ch chan<- prometheus.Metric, target ipmiTarget) (int, error) { currentChassisPowerState, err := freeipmi.GetChassisPowerState(result) if err != nil { - log.Println("E!", "Failed to collect chassis data", "target", targetName(target.host), "error", err) + klog.ErrorS(err, "failed to collect chassis data", "target", targetName(target.host)) return 0, err } currentChassisDriveFault, err := freeipmi.GetChassisDriveFault(result) if err != nil { - log.Println("E!", "Failed to collect chassis data", "target", targetName(target.host), "error", err) + klog.ErrorS(err, "failed to collect chassis data", "target", targetName(target.host)) return 0, err } currentChassisCoolingFault, err := freeipmi.GetChassisCoolingFault(result) if err != nil { - log.Println("E!", "Failed to collect chassis data", "target", targetName(target.host), "error", err) + klog.ErrorS(err, "failed to collect chassis data", "target", targetName(target.host)) return 0, err } ch <- prometheus.MustNewConstMetric( diff --git a/inputs/ipmi/exporter/collector_dcmi.go b/inputs/ipmi/exporter/collector_dcmi.go index 70059cc89..9f827fc0d 100644 --- a/inputs/ipmi/exporter/collector_dcmi.go +++ b/inputs/ipmi/exporter/collector_dcmi.go @@ -17,9 +17,8 @@ package exporter import ( - "log" - "github.com/prometheus/client_golang/prometheus" + "k8s.io/klog/v2" "flashcat.cloud/categraf/inputs/ipmi/exporter/freeipmi" ) @@ -56,7 +55,7 @@ func (c DCMICollector) Args() []string { func (c DCMICollector) Collect(result freeipmi.Result, ch chan<- prometheus.Metric, target ipmiTarget) (int, error) { currentPowerConsumption, err := freeipmi.GetCurrentPowerConsumption(result) if err != nil { - log.Println("E!", "Failed to collect DCMI data", "target", targetName(target.host), "error", err) + klog.ErrorS(err, "failed to collect DCMI data", "target", targetName(target.host)) return 0, err } // Returned value negative == Power Measurement is not avail diff --git a/inputs/ipmi/exporter/collector_ipmi.go b/inputs/ipmi/exporter/collector_ipmi.go index 0b1ba8de9..4a3949f89 100644 --- a/inputs/ipmi/exporter/collector_ipmi.go +++ b/inputs/ipmi/exporter/collector_ipmi.go @@ -18,11 +18,11 @@ package exporter import ( "fmt" - "log" "math" "strconv" "github.com/prometheus/client_golang/prometheus" + "k8s.io/klog/v2" "flashcat.cloud/categraf/inputs/ipmi/exporter/freeipmi" ) @@ -153,7 +153,7 @@ func (c IPMICollector) Collect(result freeipmi.Result, ch chan<- prometheus.Metr targetHost := targetName(target.host) results, err := freeipmi.GetSensorData(result, excludeIds) if err != nil { - log.Println("E!", "Failed to collect sensor data", "target", targetHost, "error", err) + klog.ErrorS(err, "failed to collect sensor data", "target", targetHost) return 0, err } for _, data := range results { @@ -169,12 +169,12 @@ func (c IPMICollector) Collect(result freeipmi.Result, ch chan<- prometheus.Metr case "N/A": state = math.NaN() default: - log.Println("W!", "Unknown sensor state", "target", targetHost, "state", data.State) + klog.Warningf("unknown sensor state: target=%s state=%s", targetHost, data.State) state = math.NaN() } if c.debugMod { - log.Println("D!", "Got values", "target", targetHost, "data", fmt.Sprintf("%+v", data)) + klog.V(1).InfoS("got ipmi sensor values", "target", targetHost, "data", fmt.Sprintf("%+v", data)) } switch data.Unit { diff --git a/inputs/ipmi/exporter/collector_notwindows.go b/inputs/ipmi/exporter/collector_notwindows.go index b6b59961a..b5f5a0860 100644 --- a/inputs/ipmi/exporter/collector_notwindows.go +++ b/inputs/ipmi/exporter/collector_notwindows.go @@ -17,11 +17,11 @@ package exporter import ( - "log" "path" "time" "github.com/prometheus/client_golang/prometheus" + "k8s.io/klog/v2" "flashcat.cloud/categraf/inputs/ipmi/exporter/freeipmi" ) @@ -86,7 +86,7 @@ func Collect(ch chan<- prometheus.Metric, host, binPath string, config IPMIConfi duration := time.Since(start).Seconds() if debugMod { - log.Println("D!", "Scrape duration", "target", targetName(host), "duration", duration) + klog.V(1).InfoS("ipmi scrape duration", "target", targetName(host), "duration", duration) } ch <- prometheus.MustNewConstMetric( durationDesc, @@ -103,7 +103,7 @@ func Collect(ch chan<- prometheus.Metric, host, binPath string, config IPMIConfi for _, collector := range config.GetCollectors(debugMod) { var up int if debugMod { - log.Println("D!", "Running collector", "target", target.host, "collector", collector.Name()) + klog.V(1).InfoS("running ipmi collector", "target", target.host, "collector", collector.Name()) } fqcmd := path.Join(binPath, collector.Cmd()) diff --git a/inputs/ipmi/exporter/collector_sel.go b/inputs/ipmi/exporter/collector_sel.go index 7ea7e61dd..5b7437f53 100644 --- a/inputs/ipmi/exporter/collector_sel.go +++ b/inputs/ipmi/exporter/collector_sel.go @@ -17,9 +17,8 @@ package exporter import ( - "log" - "github.com/prometheus/client_golang/prometheus" + "k8s.io/klog/v2" "flashcat.cloud/categraf/inputs/ipmi/exporter/freeipmi" ) @@ -63,12 +62,12 @@ func (c SELCollector) Args() []string { func (c SELCollector) Collect(result freeipmi.Result, ch chan<- prometheus.Metric, target ipmiTarget) (int, error) { entriesCount, err := freeipmi.GetSELInfoEntriesCount(result) if err != nil { - log.Println("E!", "Failed to collect SEL data", "target", targetName(target.host), "error", err) + klog.ErrorS(err, "failed to collect SEL data", "target", targetName(target.host)) return 0, err } freeSpace, err := freeipmi.GetSELInfoFreeSpace(result) if err != nil { - log.Println("E!", "Failed to collect SEL data", "target", targetName(target.host), "error", err) + klog.ErrorS(err, "failed to collect SEL data", "target", targetName(target.host)) return 0, err } ch <- prometheus.MustNewConstMetric( diff --git a/inputs/ipmi/exporter/collector_sm_lan_mode.go b/inputs/ipmi/exporter/collector_sm_lan_mode.go index 2e384a20f..47df10900 100644 --- a/inputs/ipmi/exporter/collector_sm_lan_mode.go +++ b/inputs/ipmi/exporter/collector_sm_lan_mode.go @@ -18,10 +18,10 @@ package exporter import ( "fmt" - "log" "strconv" "github.com/prometheus/client_golang/prometheus" + "k8s.io/klog/v2" "flashcat.cloud/categraf/inputs/ipmi/exporter/freeipmi" ) @@ -58,11 +58,11 @@ func (c SMLANModeCollector) Args() []string { func (c SMLANModeCollector) Collect(result freeipmi.Result, ch chan<- prometheus.Metric, target ipmiTarget) (int, error) { octets, err := freeipmi.GetRawOctets(result) if err != nil { - log.Println("E!", "Failed to collect LAN mode data", "target", targetName(target.host), "error", err) + klog.ErrorS(err, "failed to collect LAN mode data", "target", targetName(target.host)) return 0, err } if len(octets) != 3 { - log.Println("E!", "Unexpected number of octets", "target", targetName(target.host), "octets", octets) + klog.ErrorS(nil, "unexpected number of octets", "target", targetName(target.host), "octets", octets) return 0, fmt.Errorf("unexpected number of octets in raw response: %d", len(octets)) } @@ -71,7 +71,7 @@ func (c SMLANModeCollector) Collect(result freeipmi.Result, ch chan<- prometheus value, _ := strconv.Atoi(octets[2]) ch <- prometheus.MustNewConstMetric(lanModeDesc, prometheus.GaugeValue, float64(value)) default: - log.Println("E!", "Unexpected lan mode status (ipmi-raw)", "target", targetName(target.host), "sgatus", octets[2]) + klog.ErrorS(nil, "unexpected lan mode status (ipmi-raw)", "target", targetName(target.host), "status", octets[2]) return 0, fmt.Errorf("unexpected lan mode status: %s", octets[2]) } diff --git a/inputs/ipmi/exporter/freeipmi/freeipmi.go b/inputs/ipmi/exporter/freeipmi/freeipmi.go index 218692c2a..d81f6d49f 100644 --- a/inputs/ipmi/exporter/freeipmi/freeipmi.go +++ b/inputs/ipmi/exporter/freeipmi/freeipmi.go @@ -22,7 +22,6 @@ import ( "encoding/csv" "encoding/hex" "fmt" - "log" "math" "os" "os/exec" @@ -31,6 +30,8 @@ import ( "strconv" "strings" "syscall" + + "k8s.io/klog/v2" ) var ( @@ -126,10 +127,10 @@ func freeipmiConfigPipe(config string) (string, error) { go func(file string, data []byte) { f, err := os.OpenFile(file, os.O_WRONLY|os.O_CREATE|os.O_APPEND, os.ModeNamedPipe) if err != nil { - log.Println("msg", "Error opening pipe", "error", err) + klog.ErrorS(err, "error opening freeipmi pipe", "pipe", file) } if _, err := f.Write(data); err != nil { - log.Println("msg", "Error writing config to pipe", "error", err) + klog.ErrorS(err, "error writing freeipmi config to pipe", "pipe", file) } f.Close() }(pipe, content) @@ -147,7 +148,7 @@ func Execute(cmd string, args []string, config string, target string, debugMod b } defer func() { if err := os.Remove(pipe); err != nil { - log.Println("msg", "Error deleting named pipe", "error", err) + klog.ErrorS(err, "error deleting freeipmi named pipe", "pipe", pipe) } }() @@ -157,7 +158,7 @@ func Execute(cmd string, args []string, config string, target string, debugMod b } if debugMod { - log.Println("D!", "Executing", "command", cmd, "args", fmt.Sprintf("%+v", args)) + klog.V(1).InfoS("executing freeipmi command", "command", cmd, "args", fmt.Sprintf("%+v", args)) } out, err := exec.Command(cmd, args...).CombinedOutput() diff --git a/inputs/ipmi/instances.go b/inputs/ipmi/instances.go index 8fb557338..92f591851 100644 --- a/inputs/ipmi/instances.go +++ b/inputs/ipmi/instances.go @@ -2,10 +2,10 @@ package ipmi import ( "flashcat.cloud/categraf/inputs" - "log" "github.com/prometheus/client_golang/prometheus" dto "github.com/prometheus/client_model/go" + "k8s.io/klog/v2" "flashcat.cloud/categraf/config" "flashcat.cloud/categraf/inputs/ipmi/exporter" @@ -51,19 +51,19 @@ func (m *Instance) Gather(slist *types.SampleList) { desc := metric.Desc().String() descName, err := inputs.DescName(desc) if err != nil { - log.Println("E! failed to parse desc name:", desc) + klog.ErrorS(err, "failed to parse ipmi metric desc name", "desc", desc) continue } icLabels, err := inputs.DescConstLabels(desc) if err != nil { - log.Println("E! failed to read labels:", desc) + klog.ErrorS(err, "failed to read ipmi metric labels", "desc", desc) continue } dtoMetric := &dto.Metric{} err = metric.Write(dtoMetric) if err != nil { - log.Println("E! failed to write metric:", desc) + klog.ErrorS(err, "failed to write ipmi metric", "desc", desc) continue } diff --git a/inputs/iptables/iptables.go b/inputs/iptables/iptables.go index 5152648d9..79bc8c585 100644 --- a/inputs/iptables/iptables.go +++ b/inputs/iptables/iptables.go @@ -5,7 +5,6 @@ package iptables import ( "errors" - "log" "os/exec" "regexp" "strconv" @@ -14,6 +13,7 @@ import ( "flashcat.cloud/categraf/config" "flashcat.cloud/categraf/inputs" "flashcat.cloud/categraf/types" + "k8s.io/klog/v2" ) const inputName = "iptables" @@ -59,7 +59,7 @@ type chainLister func(table, chain string) (string, error) func (ins *Instance) Init() error { if ins.Table == "" || len(ins.Chains) == 0 { - log.Println("W! Table or Chains is empty") + klog.Warning("iptables table or chains is empty") return types.ErrInstancesEmpty } if ins.lister == nil { @@ -71,11 +71,11 @@ func (ins *Instance) Init() error { // Gather gathers iptables packets and bytes throughput from the configured tables and chains. func (ins *Instance) Gather(slist *types.SampleList) { if ins.Table == "" || len(ins.Chains) == 0 { - log.Println("W! Table or Chains is empty") + klog.Warning("iptables table or chains is empty") return } if ins.lister == nil { - log.Println("E! Lister is empty or not initialized") + klog.Error("iptables lister is empty or not initialized") return } // best effort : we continue through the chains even if an error is encountered, @@ -83,12 +83,12 @@ func (ins *Instance) Gather(slist *types.SampleList) { for _, chain := range ins.Chains { data, err := ins.lister(ins.Table, chain) if err != nil { - log.Println("E! ChainLister error:", err) + klog.ErrorS(err, "iptables chain lister failed", "table", ins.Table, "chain", chain) continue } err = ins.parseAndGather(data, slist) if err != nil { - log.Println("E! ParseAndGather failed:", err) + klog.ErrorS(err, "iptables parse and gather failed", "table", ins.Table, "chain", chain) continue } } diff --git a/inputs/ipvs/ipvs_linux_amd64.go b/inputs/ipvs/ipvs_linux_amd64.go index ed94ac6d7..52959b2d5 100644 --- a/inputs/ipvs/ipvs_linux_amd64.go +++ b/inputs/ipvs/ipvs_linux_amd64.go @@ -3,12 +3,12 @@ package ipvs import ( _ "embed" "fmt" - "log" "math/bits" "strconv" "syscall" "github.com/moby/ipvs" + "k8s.io/klog/v2" "flashcat.cloud/categraf/config" "flashcat.cloud/categraf/inputs" @@ -42,7 +42,7 @@ func (i *IPVS) Gather(slist *types.SampleList) { if i.handle == nil { h, err := ipvs.New("") if err != nil { - log.Printf("E! Unable to open IPVS handle: %v\n", err) + klog.ErrorS(err, "unable to open IPVS handle") return } i.handle = h @@ -52,7 +52,7 @@ func (i *IPVS) Gather(slist *types.SampleList) { if err != nil { i.handle.Close() i.handle = nil // trigger a reopen on next call to gather - log.Printf("E! Failed to list IPVS services: %v\n", err) + klog.ErrorS(err, "failed to list IPVS services") return } for _, s := range services { @@ -70,7 +70,7 @@ func (i *IPVS) Gather(slist *types.SampleList) { destinations, err := i.handle.GetDestinations(s) if err != nil { - log.Printf("E! Failed to list destinations for a virtual server: %v\n", err) + klog.ErrorS(err, "failed to list destinations for IPVS service", "service", serviceTags(s)) continue // move on to the next virtual server } diff --git a/inputs/jenkins/jenkins.go b/inputs/jenkins/jenkins.go index 289b972b2..bd1971457 100644 --- a/inputs/jenkins/jenkins.go +++ b/inputs/jenkins/jenkins.go @@ -4,7 +4,6 @@ import ( "context" "encoding/json" "fmt" - "log" "net/http" "net/url" "strconv" @@ -17,6 +16,7 @@ import ( "flashcat.cloud/categraf/pkg/filter" "flashcat.cloud/categraf/pkg/tls" "flashcat.cloud/categraf/types" + "k8s.io/klog/v2" ) const inputName = "jenkins" @@ -88,12 +88,12 @@ func (ins *Instance) Gather(slist *types.SampleList) { if ins.client == nil { client, err := ins.newHTTPClient() if err != nil { - log.Println("E! failed to new HTTPClient:", err) + klog.ErrorS(err, "failed to create Jenkins HTTP client", "url", ins.URL) return } if err = ins.initialize(client); err != nil { - log.Println("E! failed to initialize:", err) + klog.ErrorS(err, "failed to initialize Jenkins input", "url", ins.URL) return } } @@ -224,7 +224,7 @@ func (ins *Instance) gatherNodeData(n node, slist *types.SampleList) error { func (ins *Instance) gatherNodesData(slist *types.SampleList) { nodeResp, err := ins.client.getAllNodes(context.Background()) if err != nil { - log.Println("E! gatherNodesData", err) + klog.ErrorS(err, "failed to gather Jenkins node data", "url", ins.URL) return } @@ -246,7 +246,7 @@ func (ins *Instance) gatherNodesData(slist *types.SampleList) { func (ins *Instance) gatherJobs(slist *types.SampleList) { js, err := ins.client.getJobs(context.Background(), nil) if err != nil { - log.Println("E! gatherJobs", err) + klog.ErrorS(err, "failed to gather Jenkins jobs", "url", ins.URL) return } var wg sync.WaitGroup @@ -259,7 +259,7 @@ func (ins *Instance) gatherJobs(slist *types.SampleList) { parents: []string{}, layer: 0, }, slist); err != nil { - log.Println("E! getJobDetail", err) + klog.ErrorS(err, "failed to get Jenkins job detail", "url", ins.URL, "job", name) } }(job.Name, &wg, slist) } @@ -293,7 +293,7 @@ func (ins *Instance) getJobDetail(jr jobRequest, slist *types.SampleList) error parents: jr.combined(), layer: jr.layer + 1, }, slist); err != nil { - log.Println("E! getJobDetail", err) + klog.ErrorS(err, "failed to get Jenkins sub-job detail", "url", ins.URL, "job", ij.Name, "parent_job", jr.hierarchyName()) } }(ij, jr, slist) } @@ -312,7 +312,7 @@ func (ins *Instance) getJobDetail(jr jobRequest, slist *types.SampleList) error if build.Building { if ins.DebugMod { - log.Println("Ignore running build on ", jr.name, "build", number) + klog.V(1).InfoS("ignoring running Jenkins build", "url", ins.URL, "job", jr.name, "build", number) } return nil } diff --git a/inputs/jolokia/gatherer.go b/inputs/jolokia/gatherer.go index fc009d64e..09d609f61 100644 --- a/inputs/jolokia/gatherer.go +++ b/inputs/jolokia/gatherer.go @@ -2,11 +2,11 @@ package jolokia import ( "fmt" - "log" "sort" "strings" "flashcat.cloud/categraf/types" + "k8s.io/klog/v2" ) const defaultFieldName = "value" @@ -60,7 +60,7 @@ func (g *Gatherer) gatherResponses(responses []ReadResponse, tags map[string]str responsePoints, responseErrors := g.generatePoints(metric, responses) points = append(points, responsePoints...) for _, err := range responseErrors { - log.Println("E!", err) + klog.ErrorS(err, "failed to generate jolokia points", "metric", metric.Name) } series[metric.Name] = points diff --git a/inputs/jolokia_agent/jolokia_agent.go b/inputs/jolokia_agent/jolokia_agent.go index f54f99410..b52dc5fef 100644 --- a/inputs/jolokia_agent/jolokia_agent.go +++ b/inputs/jolokia_agent/jolokia_agent.go @@ -1,8 +1,6 @@ package jolokia_agent import ( - "fmt" - "log" "sync" "time" @@ -11,6 +9,7 @@ import ( "flashcat.cloud/categraf/inputs/jolokia" "flashcat.cloud/categraf/pkg/tls" "flashcat.cloud/categraf/types" + "k8s.io/klog/v2" ) const inputName = "jolokia_agent" @@ -82,7 +81,7 @@ func (ins *Instance) Gather(slist *types.SampleList) { for _, url := range ins.URLs { client, err := ins.createClient(url) if err != nil { - log.Println("E! failed to create client:", err) + klog.ErrorS(err, "failed to create client", "url", url) continue } ins.clients = append(ins.clients, client) @@ -98,7 +97,7 @@ func (ins *Instance) Gather(slist *types.SampleList) { err := ins.gatherer.Gather(client, slist) if err != nil { - log.Println("E!", fmt.Errorf("unable to gather metrics for %s: %v", client.URL, err)) + klog.ErrorS(err, "unable to gather metrics", "url", client.URL) } }(client) } diff --git a/inputs/jolokia_proxy/jolokia_proxy.go b/inputs/jolokia_proxy/jolokia_proxy.go index 475730a1c..2a7ca6938 100644 --- a/inputs/jolokia_proxy/jolokia_proxy.go +++ b/inputs/jolokia_proxy/jolokia_proxy.go @@ -1,8 +1,6 @@ package jolokia_proxy import ( - "fmt" - "log" "time" "flashcat.cloud/categraf/config" @@ -10,6 +8,7 @@ import ( "flashcat.cloud/categraf/inputs/jolokia" "flashcat.cloud/categraf/pkg/tls" "flashcat.cloud/categraf/types" + "k8s.io/klog/v2" ) const inputName = "jolokia_proxy" @@ -89,7 +88,7 @@ func (ins *Instance) Gather(slist *types.SampleList) { if ins.client == nil { client, err := ins.createClient(ins.URL) if err != nil { - log.Println("E! failed to create client:", err) + klog.ErrorS(err, "failed to create jolokia proxy client", "url", ins.URL) return } ins.client = client @@ -97,7 +96,7 @@ func (ins *Instance) Gather(slist *types.SampleList) { err := ins.gatherer.Gather(ins.client, slist) if err != nil { - log.Println("E!", fmt.Errorf("unable to gather metrics for %s: %v", ins.client.URL, err)) + klog.ErrorS(err, "unable to gather jolokia proxy metrics", "url", ins.client.URL) } } diff --git a/inputs/kafka/kafka.go b/inputs/kafka/kafka.go index e11095d9b..4d9d8e2a8 100644 --- a/inputs/kafka/kafka.go +++ b/inputs/kafka/kafka.go @@ -2,7 +2,6 @@ package kafka import ( "fmt" - "log" "os" "strings" "time" @@ -13,8 +12,9 @@ import ( "flashcat.cloud/categraf/types" "github.com/IBM/sarama" "github.com/go-kit/log/level" + "k8s.io/klog/v2" - klog "github.com/go-kit/log" + gokitlog "github.com/go-kit/log" ) const inputName = "kafka" @@ -134,7 +134,7 @@ type Instance struct { // disable calculate lag rate DisableCalculateLagRate bool `toml:"disable_calculate_lag_rate,omitempty"` - l klog.Logger `toml:"-"` + l gokitlog.Logger `toml:"-"` e *exporter.Exporter `toml:"-"` DialTimeout int `toml:"dial_timeout"` @@ -227,7 +227,7 @@ func (ins *Instance) Init() error { WriteTimeout: time.Duration(ins.WriteTimeout) * time.Second, } - ins.l = level.NewFilter(klog.NewLogfmtLogger(klog.NewSyncWriter(os.Stderr)), levelFilter(ins.LogLevel)) + ins.l = level.NewFilter(gokitlog.NewLogfmtLogger(gokitlog.NewSyncWriter(os.Stderr)), levelFilter(ins.LogLevel)) e, err := exporter.New(ins.l, options, ins.TopicsFilter, ins.TopicExclude, ins.GroupFilter, ins.GroupExclude) if err != nil { @@ -245,7 +245,7 @@ func (ins *Instance) Gather(slist *types.SampleList) { err := inputs.Collect(ins.e, slist) if err != nil { - log.Println("E! failed to collect metrics:", err) + klog.ErrorS(err, "failed to collect kafka metrics", "kafka_uris", ins.KafkaURIs) } } diff --git a/inputs/kernel/kernel.go b/inputs/kernel/kernel.go index fcacfa032..13043a735 100644 --- a/inputs/kernel/kernel.go +++ b/inputs/kernel/kernel.go @@ -6,7 +6,6 @@ package kernel import ( "bytes" "fmt" - "log" "os" "strconv" "strings" @@ -14,6 +13,7 @@ import ( "flashcat.cloud/categraf/config" "flashcat.cloud/categraf/inputs" "flashcat.cloud/categraf/types" + "k8s.io/klog/v2" ) const inputName = "kernel" @@ -56,20 +56,20 @@ func (s *KernelStats) Name() string { func (s *KernelStats) Gather(slist *types.SampleList) { data, err := s.getProcStat() if err != nil { - log.Println("E! failed to read:", s.statFile, "error:", err) + klog.ErrorS(err, "failed to read kernel stat file", "path", s.statFile) return } entropyData, err := os.ReadFile(s.entropyStatFile) if err != nil { - log.Println("E! failed to read:", s.entropyStatFile, "error:", err) + klog.ErrorS(err, "failed to read entropy stat file", "path", s.entropyStatFile) return } entropyString := string(entropyData) entropyValue, err := strconv.ParseInt(strings.TrimSpace(entropyString), 10, 64) if err != nil { - log.Println("E! failed to parse:", s.entropyStatFile, "error:", err) + klog.ErrorS(err, "failed to parse entropy stat file", "path", s.entropyStatFile) return } diff --git a/inputs/kernel_vmstat/kernel_vmstat.go b/inputs/kernel_vmstat/kernel_vmstat.go index 9238a8b91..f9214a959 100644 --- a/inputs/kernel_vmstat/kernel_vmstat.go +++ b/inputs/kernel_vmstat/kernel_vmstat.go @@ -6,13 +6,13 @@ package kernel_vmstat import ( "bytes" "fmt" - "log" "os" "strconv" "flashcat.cloud/categraf/config" "flashcat.cloud/categraf/inputs" "flashcat.cloud/categraf/types" + "k8s.io/klog/v2" ) const inputName = "kernel_vmstat" @@ -45,7 +45,7 @@ func (s *KernelVmstat) Name() string { func (s *KernelVmstat) Gather(slist *types.SampleList) { data, err := s.getProcVmstat() if err != nil { - log.Println("E! failed to gather vmstat:", err) + klog.ErrorS(err, "failed to gather vmstat") return } @@ -61,7 +61,7 @@ func (s *KernelVmstat) Gather(slist *types.SampleList) { m, err := strconv.ParseInt(string(dataFields[i+1]), 10, 64) if err != nil { if s.DebugMod { - log.Println("D! failed to parse vmstat field:", string(dataFields[i])) + klog.V(1).InfoS("failed to parse vmstat field", "field", string(dataFields[i])) } continue } diff --git a/inputs/kubernetes/kubernetes.go b/inputs/kubernetes/kubernetes.go index 83e3c65d1..233105b33 100644 --- a/inputs/kubernetes/kubernetes.go +++ b/inputs/kubernetes/kubernetes.go @@ -3,7 +3,6 @@ package kubernetes import ( "encoding/json" "fmt" - "log" "net/http" "os" "strings" @@ -14,6 +13,7 @@ import ( "flashcat.cloud/categraf/pkg/filter" "flashcat.cloud/categraf/pkg/tls" "flashcat.cloud/categraf/types" + "k8s.io/klog/v2" ) const ( @@ -110,7 +110,7 @@ func (ins *Instance) Gather(slist *types.SampleList) { urlpath := fmt.Sprintf("%s/stats/summary", ins.URL) err := ins.LoadJSON(urlpath, summaryMetrics) if err != nil { - log.Println("E! failed to load", urlpath, "error:", err) + klog.ErrorS(err, "failed to load kubernetes summary metrics", "url", urlpath) slist.PushSample(inputName, "kubelet_up", 0) return } @@ -119,7 +119,7 @@ func (ins *Instance) Gather(slist *types.SampleList) { podInfos, err := ins.gatherPodInfo(ins.URL) if err != nil { - log.Println("E! failed to gather pod info, error:", err) + klog.ErrorS(err, "failed to gather kubernetes pod info", "url", ins.URL) return } diff --git a/inputs/ldap/ldap.go b/inputs/ldap/ldap.go index 672404475..d9f02e751 100644 --- a/inputs/ldap/ldap.go +++ b/inputs/ldap/ldap.go @@ -4,15 +4,15 @@ import ( "crypto/tls" "errors" "fmt" - "log" "net/url" "time" "github.com/go-ldap/ldap/v3" + "k8s.io/klog/v2" - commontls "flashcat.cloud/categraf/pkg/tls" "flashcat.cloud/categraf/config" "flashcat.cloud/categraf/inputs" + commontls "flashcat.cloud/categraf/pkg/tls" "flashcat.cloud/categraf/types" ) @@ -103,7 +103,7 @@ func (ins *Instance) Init() error { func (ins *Instance) Gather(slist *types.SampleList) { conn, err := ins.connect() if err != nil { - log.Println("E! failed to connect the server:", ins.Server, "error:", err) + klog.ErrorS(err, "failed to connect ldap server", "server", ins.Server) return } defer conn.Close() @@ -111,12 +111,12 @@ func (ins *Instance) Gather(slist *types.SampleList) { for _, req := range ins.requests { result, err := conn.Search(req.query) if err != nil { - log.Println("E! failed to search the server:", ins.Server, "error:", err) + klog.ErrorS(err, "failed to search ldap server", "server", ins.Server) continue } s, err := ins.gather(req, result) if err != nil { - log.Println("E! failed to gather metrics: ", err) + klog.ErrorS(err, "failed to gather ldap metrics", "server", ins.Server) return } slist.PushFrontN(s) diff --git a/inputs/linux_sysctl_fs/linux_sysctl_fs_linux.go b/inputs/linux_sysctl_fs/linux_sysctl_fs_linux.go index 6e06f3569..5b9fc0dc1 100644 --- a/inputs/linux_sysctl_fs/linux_sysctl_fs_linux.go +++ b/inputs/linux_sysctl_fs/linux_sysctl_fs_linux.go @@ -6,7 +6,6 @@ package linux_sysctl_fs import ( "bytes" "errors" - "log" "os" "path" "strconv" @@ -15,6 +14,7 @@ import ( "flashcat.cloud/categraf/inputs" "flashcat.cloud/categraf/pkg/osx" "flashcat.cloud/categraf/types" + "k8s.io/klog/v2" ) const inputName = "linux_sysctl_fs" @@ -48,23 +48,23 @@ func (s *SysctlFS) Gather(slist *types.SampleList) { for _, n := range []string{"aio-nr", "aio-max-nr", "dquot-nr", "dquot-max", "super-nr", "super-max"} { if err := s.gatherOne(n, fields); err != nil { - log.Println("E! failed to gather sysctl fs:", err) + klog.ErrorS(err, "failed to gather sysctl fs metric", "name", n) } } err := s.gatherList("inode-state", fields, "inode-nr", "inode-free-nr", "inode-preshrink-nr") if err != nil { - log.Println("E! failed to gather inode-state:", err) + klog.ErrorS(err, "failed to gather sysctl inode-state") } err = s.gatherList("dentry-state", fields, "dentry-nr", "dentry-unused-nr", "dentry-age-limit", "dentry-want-pages") if err != nil { - log.Println("E! failed to gather dentry-state:", err) + klog.ErrorS(err, "failed to gather sysctl dentry-state") } err = s.gatherList("file-nr", fields, "file-nr", "", "file-max") if err != nil { - log.Println("E! failed to gather file-nr:", err) + klog.ErrorS(err, "failed to gather sysctl file-nr") } slist.PushSamples(inputName, fields) diff --git a/inputs/logstash/logstash.go b/inputs/logstash/logstash.go index becb0b19b..0d8b7814c 100644 --- a/inputs/logstash/logstash.go +++ b/inputs/logstash/logstash.go @@ -4,7 +4,6 @@ import ( "encoding/json" "fmt" "io" - "log" "net/http" "net/url" "strings" @@ -16,6 +15,7 @@ import ( "flashcat.cloud/categraf/pkg/jsonx" "flashcat.cloud/categraf/pkg/tls" "flashcat.cloud/categraf/types" + "k8s.io/klog/v2" ) const inputName = "logstash" @@ -157,11 +157,11 @@ func (ins *Instance) Gather(slist *types.SampleList) { if choice.Contains("jvm", ins.Collect) { jvmURL, err := url.Parse(ins.URL + jvmStats) if err != nil { - log.Println("E! failed to parse url:", ins.URL+jvmStats) + klog.ErrorS(err, "failed to parse logstash url", "url", ins.URL+jvmStats) return } if err := ins.gatherJVMStats(jvmURL.String(), slist); err != nil { - log.Println("E! failed to gather jvm stats:", err) + klog.ErrorS(err, "failed to gather jvm stats", "url", jvmURL.String()) return } } @@ -169,11 +169,11 @@ func (ins *Instance) Gather(slist *types.SampleList) { if choice.Contains("process", ins.Collect) { processURL, err := url.Parse(ins.URL + processStats) if err != nil { - log.Println("E! failed to parse url:", ins.URL+processStats) + klog.ErrorS(err, "failed to parse logstash url", "url", ins.URL+processStats) return } if err := ins.gatherProcessStats(processURL.String(), slist); err != nil { - log.Println("E! failed to gather process stats:", err) + klog.ErrorS(err, "failed to gather process stats", "url", processURL.String()) return } } @@ -182,21 +182,21 @@ func (ins *Instance) Gather(slist *types.SampleList) { if ins.SinglePipeline { pipelineURL, err := url.Parse(ins.URL + pipelineStats) if err != nil { - log.Println("E! failed to parse url:", ins.URL+pipelineStats) + klog.ErrorS(err, "failed to parse logstash url", "url", ins.URL+pipelineStats) return } if err := ins.gatherPipelineStats(pipelineURL.String(), slist); err != nil { - log.Println("E! failed to gather pipeline stats:", err) + klog.ErrorS(err, "failed to gather pipeline stats", "url", pipelineURL.String()) return } } else { pipelinesURL, err := url.Parse(ins.URL + pipelinesStats) if err != nil { - log.Println("E! failed to parse url:", ins.URL+pipelinesStats) + klog.ErrorS(err, "failed to parse logstash url", "url", ins.URL+pipelinesStats) return } if err := ins.gatherPipelinesStats(pipelinesURL.String(), slist); err != nil { - log.Println("E! failed to gather pipelines stats:", err) + klog.ErrorS(err, "failed to gather pipelines stats", "url", pipelinesURL.String()) return } } diff --git a/inputs/mem/mem.go b/inputs/mem/mem.go index 21d290f86..b615fd7fe 100644 --- a/inputs/mem/mem.go +++ b/inputs/mem/mem.go @@ -1,13 +1,13 @@ package mem import ( - "log" "runtime" "flashcat.cloud/categraf/config" "flashcat.cloud/categraf/inputs" "flashcat.cloud/categraf/inputs/system" "flashcat.cloud/categraf/types" + "k8s.io/klog/v2" ) const inputName = "mem" @@ -41,7 +41,7 @@ func (s *MemStats) Name() string { func (s *MemStats) Gather(slist *types.SampleList) { vm, err := s.ps.VMStat() if err != nil { - log.Println("E! failed to get vmstat:", err) + klog.ErrorS(err, "failed to get vmstat") return } diff --git a/inputs/mongodb/mongodb.go b/inputs/mongodb/mongodb.go index dfd07813b..56d52a35a 100644 --- a/inputs/mongodb/mongodb.go +++ b/inputs/mongodb/mongodb.go @@ -2,7 +2,6 @@ package mongodb import ( "fmt" - "log" "time" "flashcat.cloud/categraf/config" @@ -10,6 +9,7 @@ import ( "flashcat.cloud/categraf/inputs/mongodb/exporter" "flashcat.cloud/categraf/types" "github.com/sirupsen/logrus" + "k8s.io/klog/v2" ) const inputName = "mongodb" @@ -131,6 +131,6 @@ func (ins *Instance) Gather(slist *types.SampleList) { err := inputs.Collect(ins.e, slist) if err != nil { - log.Println("E! failed to collect metrics:", err) + klog.ErrorS(err, "failed to collect mongodb metrics", "mongodb_uri", ins.MongodbURI) } } diff --git a/inputs/mongodb/mongodb_server.go b/inputs/mongodb/mongodb_server.go index 1f67c2127..5051125a6 100644 --- a/inputs/mongodb/mongodb_server.go +++ b/inputs/mongodb/mongodb_server.go @@ -3,7 +3,6 @@ package mongodb import ( "context" "fmt" - "log" "strconv" "strings" "time" @@ -14,6 +13,7 @@ import ( "go.mongodb.org/mongo-driver/mongo/options" "flashcat.cloud/categraf/types" + "k8s.io/klog/v2" ) type Server struct { @@ -44,11 +44,7 @@ func (s *Server) ping() error { } func (s *Server) authLog(err error) { - if IsAuthorization(err) { - log.Printf("W! %s", err.Error()) - } else { - log.Printf("W! %s", err.Error()) - } + klog.Warning(err.Error()) } func (s *Server) runCommand(database string, cmd interface{}, result interface{}) error { @@ -241,7 +237,7 @@ func (s *Server) gatherCollectionStats(colStatsDbs []string) (*ColStats, error) var colls []string colls, err = s.client.Database(dbName).ListCollectionNames(context.Background(), filter) if err != nil { - log.Printf("E! Error getting collection names: %s", err.Error()) + klog.ErrorS(err, "error getting MongoDB collection names", "db", dbName) continue } for _, colName := range colls { @@ -292,7 +288,7 @@ func (s *Server) gatherData( if gatherReplicasetStatus { replSetStatus, err = s.gatherReplSetStatus() if err != nil { - log.Printf("W! Unable to gather replica set status: %s", err.Error()) + klog.Warningf("unable to gather MongoDB replica set status: %s", err.Error()) } // Gather the oplog if we are a member of a replica set. Non-replica set @@ -309,7 +305,7 @@ func (s *Server) gatherData( if gatherClusterStatus { status, err := s.gatherClusterStatus() if err != nil { - log.Printf("W! Unable to gather cluster status: %s", err.Error()) + klog.Warningf("unable to gather MongoDB cluster status: %s", err.Error()) } clusterStatus = status } @@ -338,7 +334,7 @@ func (s *Server) gatherData( for _, name := range names { db, err := s.gatherDBStats(name) if err != nil { - log.Printf("W! Error getting db stats from %q: %s", name, err.Error()) + klog.Warningf("error getting MongoDB db stats from %q: %s", name, err.Error()) } dbStats.Dbs = append(dbStats.Dbs, *db) } @@ -348,7 +344,7 @@ func (s *Server) gatherData( if gatherTopStat { topStats, err := s.gatherTopStatData() if err != nil { - log.Printf("W! Unable to gather top stat data: %s", err.Error()) + klog.Warningf("unable to gather MongoDB top stat data: %s", err.Error()) return err } topStatData = topStats diff --git a/inputs/mtail/internal/exporter/export.go b/inputs/mtail/internal/exporter/export.go index 43784c404..0e6487316 100644 --- a/inputs/mtail/internal/exporter/export.go +++ b/inputs/mtail/internal/exporter/export.go @@ -10,7 +10,6 @@ import ( "expvar" "fmt" "io" - "log" "net" "os" "sort" @@ -19,6 +18,7 @@ import ( "time" "github.com/pkg/errors" + "k8s.io/klog/v2" "flashcat.cloud/categraf/inputs/mtail/internal/metrics" ) @@ -213,20 +213,20 @@ func (e *Exporter) PushMetrics() { // glog.V(2).Infof("pushing to %s", target.addr) conn, err := net.DialTimeout(target.net, target.addr, writeDeadline) if err != nil { - log.Printf("pusher dial error: %s", err) + klog.ErrorS(err, "pusher dial error", "network", target.net, "addr", target.addr) continue } err = conn.SetDeadline(time.Now().Add(writeDeadline)) if err != nil { - log.Printf("Couldn't set deadline on connection: %s", err) + klog.ErrorS(err, "couldn't set deadline on connection", "addr", target.addr) } err = e.writeSocketMetrics(conn, target.f, target.total, target.success) if err != nil { - log.Printf("pusher write error: %s", err) + klog.ErrorS(err, "pusher write error", "addr", target.addr) } err = conn.Close() if err != nil { - log.Printf("connection close failed: %s", err) + klog.ErrorS(err, "connection close failed", "addr", target.addr) } } } @@ -234,7 +234,7 @@ func (e *Exporter) PushMetrics() { // StartMetricPush pushes metrics to the configured services each interval. func (e *Exporter) StartMetricPush() { if e.exportDisabled { - log.Printf("Export loop disabled.") + klog.InfoS("export loop disabled") return } if len(e.pushTargets) == 0 { @@ -247,7 +247,7 @@ func (e *Exporter) StartMetricPush() { go func() { defer e.wg.Done() <-e.initDone - log.Printf("Started metric push.") + klog.InfoS("started metric push") ticker := time.NewTicker(e.pushInterval) defer ticker.Stop() for { diff --git a/inputs/mtail/internal/exporter/json.go b/inputs/mtail/internal/exporter/json.go index 0b7c17fec..f32cc25e1 100644 --- a/inputs/mtail/internal/exporter/json.go +++ b/inputs/mtail/internal/exporter/json.go @@ -6,8 +6,9 @@ package exporter import ( "encoding/json" "expvar" - "log" "net/http" + + "k8s.io/klog/v2" ) var exportJSONErrors = expvar.NewInt("exporter_json_errors") @@ -17,13 +18,13 @@ func (e *Exporter) HandleJSON(w http.ResponseWriter, r *http.Request) { b, err := json.MarshalIndent(e.store, "", " ") if err != nil { exportJSONErrors.Add(1) - log.Printf("error marshalling metrics into json:%s", err.Error()) + klog.ErrorS(err, "error marshalling metrics into json") http.Error(w, err.Error(), http.StatusInternalServerError) return } w.Header().Set("content-type", "application/json") if _, err := w.Write(b); err != nil { - log.Println(err) + klog.ErrorS(err, "failed to write json export response") http.Error(w, err.Error(), http.StatusInternalServerError) } } diff --git a/inputs/mtail/internal/exporter/prometheus.go b/inputs/mtail/internal/exporter/prometheus.go index b743f8f27..267d24854 100644 --- a/inputs/mtail/internal/exporter/prometheus.go +++ b/inputs/mtail/internal/exporter/prometheus.go @@ -7,13 +7,13 @@ import ( "expvar" "fmt" "io" - "log" "strings" "flashcat.cloud/categraf/inputs/mtail/internal/metrics" "flashcat.cloud/categraf/inputs/mtail/internal/metrics/datum" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/expfmt" + "k8s.io/klog/v2" ) var metricExportTotal = expvar.NewInt("metric_export_total") @@ -79,7 +79,7 @@ func (e *Exporter) Collect(c chan<- prometheus.Metric) { vals...) } if err != nil { - log.Println(err) + klog.ErrorS(err, "failed to construct prometheus metric", "metric", m.Name) return nil } // By default no timestamp is emitted to Prometheus. Setting a diff --git a/inputs/mtail/internal/metrics/metric.go b/inputs/mtail/internal/metrics/metric.go index f929d3c4c..19e8383f7 100644 --- a/inputs/mtail/internal/metrics/metric.go +++ b/inputs/mtail/internal/metrics/metric.go @@ -8,7 +8,6 @@ package metrics import ( "encoding/json" "fmt" - "log" "math/rand" "reflect" "strings" @@ -16,6 +15,7 @@ import ( "time" "github.com/pkg/errors" + "k8s.io/klog/v2" "flashcat.cloud/categraf/inputs/mtail/internal/metrics/datum" ) @@ -193,7 +193,7 @@ func (m *Metric) RemoveOldestDatum() { // glog.V(1).Infof("removeOldest: removing oldest LV: %v", oldestLV) err := m.RemoveDatum(oldestLV.Labels...) if err != nil { - log.Println("W!", err) + klog.Warningf("remove oldest datum failed: %v", err) } } } diff --git a/inputs/mtail/internal/metrics/store.go b/inputs/mtail/internal/metrics/store.go index 727c9c526..69c256d62 100644 --- a/inputs/mtail/internal/metrics/store.go +++ b/inputs/mtail/internal/metrics/store.go @@ -7,12 +7,12 @@ import ( "context" "encoding/json" "io" - "log" "reflect" "sync" "time" "github.com/pkg/errors" + "k8s.io/klog/v2" ) // Store contains Metrics. @@ -154,7 +154,7 @@ func (s *Store) Range(f func(*Metric) error) error { // Gc iterates through the Store looking for metrics that can be tidied up, // if they are passed their expiry or sized greater than their limit. func (s *Store) Gc() error { - // log.Println("D! Running Store.Expire()") + // Debug note: running Store.Expire(). now := time.Now() return s.Range(func(m *Metric) error { if m.Limit > 0 && len(m.LabelValues) >= m.Limit { @@ -182,18 +182,18 @@ func (s *Store) Gc() error { // StartGcLoop runs a permanent goroutine to expire metrics every duration. func (s *Store) StartGcLoop(ctx context.Context, duration time.Duration) { if duration <= 0 { - log.Println("Metric store expiration disabled") + klog.InfoS("metric store expiration disabled") return } go func() { - log.Printf("Starting metric store expiry loop every %s", duration.String()) + klog.InfoS("starting metric store expiry loop", "duration", duration.String()) ticker := time.NewTicker(duration) defer ticker.Stop() for { select { case <-ticker.C: if err := s.Gc(); err != nil { - log.Println(err) + klog.ErrorS(err, "metric store gc failed") } case <-ctx.Done(): return diff --git a/inputs/mtail/internal/mtail/golden/reader.go b/inputs/mtail/internal/mtail/golden/reader.go index 734b244df..3c3c0f795 100644 --- a/inputs/mtail/internal/mtail/golden/reader.go +++ b/inputs/mtail/internal/mtail/golden/reader.go @@ -6,7 +6,6 @@ package golden import ( "bufio" "io" - "log" "path/filepath" "regexp" "strconv" @@ -15,6 +14,7 @@ import ( "flashcat.cloud/categraf/inputs/mtail/internal/metrics" "flashcat.cloud/categraf/inputs/mtail/internal/metrics/datum" + "k8s.io/klog/v2" ) var varRe = regexp.MustCompile(`^(counter|gauge|timer|text|histogram) ([^ ]+)(?: {([^}]+)})?(?: (\S+))?(?: (.+))?`) @@ -83,7 +83,7 @@ func ReadTestData(file io.Reader, programfile string) metrics.MetricSlice { if err == nil { timestamp = time.Unix(j/1000000000, j%1000000000) } else { - log.Println(err) + klog.ErrorS(err, "failed to parse golden metric timestamp", "value", match[5]) } } } @@ -92,7 +92,7 @@ func ReadTestData(file io.Reader, programfile string) metrics.MetricSlice { m := store.FindMetricOrNil(match[2], prog) if m != nil { if m.Type != typ { - log.Printf("The type of the fetched metric is not %s: %s", typ, m) + klog.Warningf("the type of the fetched metric is not %s: %s", typ, m) continue } } else { @@ -100,7 +100,8 @@ func ReadTestData(file io.Reader, programfile string) metrics.MetricSlice { if kind == metrics.Counter && len(keys) == 0 { d, err := m.GetDatum() if err != nil { - log.Fatal(err) + klog.ErrorS(err, "failed to get datum for golden metric initialisation", "metric", match[2]) + continue } // Initialize to zero at the zero time. switch typ { @@ -111,14 +112,14 @@ func ReadTestData(file io.Reader, programfile string) metrics.MetricSlice { } } if err := store.Add(m); err != nil { - log.Printf("Failed to add metric %v to store: %s", m, err) + klog.ErrorS(err, "failed to add metric to store", "metric", m) } } if match[4] != "" { d, err := m.GetDatum(vals...) if err != nil { - log.Printf("Failed to get datum: %s", err) + klog.ErrorS(err, "failed to get datum") continue } diff --git a/inputs/mtail/internal/mtail/httpstatus.go b/inputs/mtail/internal/mtail/httpstatus.go index c431f6696..068d40d4f 100644 --- a/inputs/mtail/internal/mtail/httpstatus.go +++ b/inputs/mtail/internal/mtail/httpstatus.go @@ -5,8 +5,9 @@ package mtail import ( "html/template" - "log" "net/http" + + "k8s.io/klog/v2" ) const statusTemplate = ` @@ -44,11 +45,11 @@ func (m *Server) ServeHTTP(w http.ResponseWriter, r *http.Request) { } err = m.r.WriteStatusHTML(w) if err != nil { - log.Printf("Error while writing loader status: %s", err) + klog.ErrorS(err, "error while writing loader status") } err = m.t.WriteStatusHTML(w) if err != nil { - log.Printf("W! Error while writing tailer status: %s", err) + klog.Warningf("error while writing tailer status: %s", err) } } diff --git a/inputs/mtail/internal/mtail/mtail.go b/inputs/mtail/internal/mtail/mtail.go index 6b8841567..97b579ef3 100644 --- a/inputs/mtail/internal/mtail/mtail.go +++ b/inputs/mtail/internal/mtail/mtail.go @@ -5,11 +5,11 @@ package mtail import ( "context" - "log" "net" "sync" "github.com/prometheus/client_golang/prometheus" + "k8s.io/klog/v2" "flashcat.cloud/categraf/inputs/mtail/internal/exporter" "flashcat.cloud/categraf/inputs/mtail/internal/logline" @@ -125,7 +125,7 @@ func (m *Server) Run() error { m.wg.Wait() m.cancel() if m.compileOnly { - log.Println("compile-only is set, exiting") + klog.InfoS("compile-only is set, exiting") return nil } return nil diff --git a/inputs/mtail/internal/runtime/compiler/checker/checker.go b/inputs/mtail/internal/runtime/compiler/checker/checker.go index 2cea88f2b..f6b13c27c 100644 --- a/inputs/mtail/internal/runtime/compiler/checker/checker.go +++ b/inputs/mtail/internal/runtime/compiler/checker/checker.go @@ -6,7 +6,6 @@ package checker import ( goerrors "errors" "fmt" - "log" "strings" "time" @@ -16,6 +15,7 @@ import ( "flashcat.cloud/categraf/inputs/mtail/internal/runtime/compiler/parser" "flashcat.cloud/categraf/inputs/mtail/internal/runtime/compiler/symbol" "flashcat.cloud/categraf/inputs/mtail/internal/runtime/compiler/types" + "k8s.io/klog/v2" ) const ( @@ -251,7 +251,7 @@ func (c *checker) checkSymbolTable() { // Don't warn about the zeroth capture group; it's not user-defined. continue } - log.Printf("capture group reference `%s' at %s appears to be unused", sym.Name, sym.Pos) + klog.Warningf("capture group reference `%s` at %s appears to be unused", sym.Name, sym.Pos) continue } c.errors.Add(sym.Pos, fmt.Sprintf("Declaration of %s `%s' here is never used.", sym.Kind, sym.Name)) @@ -811,7 +811,7 @@ func (c *checker) VisitAfter(node ast.Node) ast.Node { // glog.V(2).Infof("time_str is %q", timeStr) _, err := time.Parse(f.Text, timeStr) if err != nil { - log.Printf("time.Parse(%q, %q) failed: %s", f.Text, timeStr, err) + klog.Warningf("time.Parse(%q, %q) failed: %s", f.Text, timeStr, err) c.errors.Add(f.Pos(), fmt.Sprintf("invalid time format string %q\n\tRefer to the documentation at https://golang.org/pkg/time/#pkg-constants for advice.", f.Text)) n.SetType(types.Error) return n diff --git a/inputs/mtail/internal/runtime/compiler/codegen/codegen.go b/inputs/mtail/internal/runtime/compiler/codegen/codegen.go index 8d094c21d..cb0aced9f 100644 --- a/inputs/mtail/internal/runtime/compiler/codegen/codegen.go +++ b/inputs/mtail/internal/runtime/compiler/codegen/codegen.go @@ -5,7 +5,6 @@ package codegen import ( "fmt" - "log" "math" "regexp" "time" @@ -19,6 +18,7 @@ import ( "flashcat.cloud/categraf/inputs/mtail/internal/runtime/compiler/position" "flashcat.cloud/categraf/inputs/mtail/internal/runtime/compiler/symbol" "flashcat.cloud/categraf/inputs/mtail/internal/runtime/compiler/types" + "k8s.io/klog/v2" ) // codegen represents a code generator. @@ -97,7 +97,7 @@ func (c *codegen) VisitBefore(node ast.Node) (ast.Visitor, ast.Node) { dtyp = metrics.Buckets default: if !types.IsComplete(t) { - log.Printf("Incomplete type %v for %#v", t, n) + klog.Warningf("incomplete type %v for %#v", t, n) } dtyp = metrics.Int } diff --git a/inputs/mtail/internal/runtime/compiler/compiler.go b/inputs/mtail/internal/runtime/compiler/compiler.go index 55c0df184..62a9a95fc 100644 --- a/inputs/mtail/internal/runtime/compiler/compiler.go +++ b/inputs/mtail/internal/runtime/compiler/compiler.go @@ -5,7 +5,6 @@ package compiler import ( "io" - "log" "path/filepath" "flashcat.cloud/categraf/inputs/mtail/internal/runtime/code" @@ -14,6 +13,7 @@ import ( "flashcat.cloud/categraf/inputs/mtail/internal/runtime/compiler/codegen" "flashcat.cloud/categraf/inputs/mtail/internal/runtime/compiler/opt" "flashcat.cloud/categraf/inputs/mtail/internal/runtime/compiler/parser" + "k8s.io/klog/v2" // "github.com/golang/glog" ) @@ -98,7 +98,7 @@ func (c *Compiler) Compile(name string, input io.Reader) (obj *code.Object, err } if c.emitAst { s := parser.Sexp{} - log.Printf("%s AST:\n%s", name, s.Dump(ast)) + klog.V(1).Infof("%s AST:\n%s", name, s.Dump(ast)) } if !c.disableOptimisation { @@ -108,7 +108,7 @@ func (c *Compiler) Compile(name string, input io.Reader) (obj *code.Object, err } if c.emitAstTypes { s := parser.Sexp{} - log.Printf("Post optimisation %s AST:\n%s", name, s.Dump(ast)) + klog.V(1).Infof("Post optimisation %s AST:\n%s", name, s.Dump(ast)) } } @@ -119,7 +119,7 @@ func (c *Compiler) Compile(name string, input io.Reader) (obj *code.Object, err if c.emitAstTypes { s := parser.Sexp{} s.EmitTypes = true - log.Printf("%s AST with Type Annotation:\n%s", name, s.Dump(ast)) + klog.V(1).Infof("%s AST with Type Annotation:\n%s", name, s.Dump(ast)) } if !c.disableOptimisation { @@ -130,7 +130,7 @@ func (c *Compiler) Compile(name string, input io.Reader) (obj *code.Object, err if c.emitAstTypes { s := parser.Sexp{} s.EmitTypes = true - log.Printf("Post optimisation %s AST with Type Annotation:\n%s", name, s.Dump(ast)) + klog.V(1).Infof("Post optimisation %s AST with Type Annotation:\n%s", name, s.Dump(ast)) } } diff --git a/inputs/mtail/internal/runtime/compiler/parser/lexer.go b/inputs/mtail/internal/runtime/compiler/parser/lexer.go index 69571ffbe..bc460c2a5 100644 --- a/inputs/mtail/internal/runtime/compiler/parser/lexer.go +++ b/inputs/mtail/internal/runtime/compiler/parser/lexer.go @@ -8,13 +8,13 @@ import ( "errors" "fmt" "io" - "log" "sort" "strings" "unicode" // "github.com/golang/glog" "flashcat.cloud/categraf/inputs/mtail/internal/runtime/compiler/position" + "k8s.io/klog/v2" ) // List of keywords. Keep this list sorted! @@ -147,7 +147,7 @@ func (l *Lexer) backup() { return } if err := l.input.UnreadRune(); err != nil { - log.Println(err) + klog.ErrorS(err, "failed to unread rune") } } diff --git a/inputs/mtail/internal/runtime/compiler/types/types.go b/inputs/mtail/internal/runtime/compiler/types/types.go index 152ae0990..26cd499a1 100644 --- a/inputs/mtail/internal/runtime/compiler/types/types.go +++ b/inputs/mtail/internal/runtime/compiler/types/types.go @@ -6,11 +6,11 @@ package types import ( "errors" "fmt" - "log" "regexp/syntax" "strings" "sync" // "github.com/golang/glog" + "k8s.io/klog/v2" ) // Type represents a type in the mtail program. @@ -289,7 +289,7 @@ func FreshType(t Type) Type { } return &Operator{p1.Name, args} default: - log.Printf("Unexpected type p1: %v", p1) + klog.Warningf("unexpected type p1: %v", p1) } return tp } diff --git a/inputs/mtail/internal/runtime/runtime.go b/inputs/mtail/internal/runtime/runtime.go index 2b8a0fb99..f5f944312 100644 --- a/inputs/mtail/internal/runtime/runtime.go +++ b/inputs/mtail/internal/runtime/runtime.go @@ -12,7 +12,6 @@ import ( "expvar" "fmt" "io" - "log" "os" "os/signal" "path/filepath" @@ -27,6 +26,7 @@ import ( "flashcat.cloud/categraf/inputs/mtail/internal/runtime/vm" "github.com/pkg/errors" "github.com/prometheus/client_golang/prometheus" + "k8s.io/klog/v2" ) var ( @@ -49,7 +49,7 @@ const ( // This function returns an error if an internal error occurs. func (r *Runtime) LoadAllPrograms() error { if len(r.programPath) == 0 && len(r.progs) == 0 { - log.Printf("W! Programpath is empty, loading nothing") + klog.Warning("Programpath is empty, loading nothing") return nil } // TODO load 配置规则 @@ -67,7 +67,7 @@ func (r *Runtime) LoadAllPrograms() error { if r.errorsAbort { return r.programErrors[name] } - log.Printf("Compile errors for %s:\n%s", name, r.programErrors[name]) + klog.Warningf("compile errors for %s:\n%s", name, r.programErrors[name]) } } return nil @@ -86,7 +86,7 @@ func (r *Runtime) LoadAllPrograms() error { markDeleted := make(map[string]struct{}) r.handleMu.RLock() for name := range r.handles { - log.Printf("added %s", name) + klog.V(1).InfoS("marked existing program for reload", "name", name) markDeleted[name] = struct{}{} } r.handleMu.RUnlock() @@ -99,13 +99,13 @@ func (r *Runtime) LoadAllPrograms() error { if r.errorsAbort { return err } - log.Println(err) + klog.ErrorS(err, "failed to load program", "program", fi.Name()) } - log.Printf("unmarking %s", filepath.Base(fi.Name())) + klog.V(1).InfoS("unmarking program from deletion", "name", filepath.Base(fi.Name())) delete(markDeleted, filepath.Base(fi.Name())) } for name := range markDeleted { - log.Printf("unloading %s", name) + klog.V(1).InfoS("unloading stale program", "name", name) r.UnloadProgram(name) } default: @@ -114,7 +114,7 @@ func (r *Runtime) LoadAllPrograms() error { if r.errorsAbort { return err } - log.Println(err) + klog.ErrorS(err, "failed to load program", "program", r.programPath) } } return nil @@ -125,11 +125,11 @@ func (r *Runtime) LoadAllPrograms() error { func (r *Runtime) LoadProgram(programPath string) error { name := filepath.Base(programPath) if strings.HasPrefix(name, ".") { - log.Printf("W! Skipping %s because it is a hidden file.", programPath) + klog.Warningf("skipping %s because it is a hidden file", programPath) return nil } if filepath.Ext(name) != fileExt { - log.Printf("W! Skipping %s due to file extension.", programPath) + klog.Warningf("skipping %s due to file extension", programPath) return nil } f, err := os.OpenFile(filepath.Clean(programPath), os.O_RDONLY, 0o600) @@ -139,7 +139,7 @@ func (r *Runtime) LoadProgram(programPath string) error { } defer func() { if err := f.Close(); err != nil { - log.Println(err) + klog.ErrorS(err, "failed to close program file", "program", programPath) } }() r.programErrorMu.Lock() @@ -149,7 +149,7 @@ func (r *Runtime) LoadProgram(programPath string) error { if r.errorsAbort { return r.programErrors[name] } - log.Printf("Compile errors for %s:\n%s", name, r.programErrors[name]) + klog.Warningf("compile errors for %s:\n%s", name, r.programErrors[name]) } return nil } @@ -188,7 +188,7 @@ func (r *Runtime) CompileAndRun(name string, input io.Reader) error { v := vm.New(name, obj, r.syslogUseCurrentYear, r.overrideLocation, r.logRuntimeErrors, r.trace) if r.dumpBytecode { - log.Println("Dumping program objects and bytecode\n", v.DumpByteCode()) + klog.V(1).InfoS("dumping program objects and bytecode", "program", name, "dump", v.DumpByteCode()) } // Load the metrics from the compilation into the global metric storage for export. @@ -205,7 +205,7 @@ func (r *Runtime) CompileAndRun(name string, input io.Reader) error { } ProgLoads.Add(name, 1) - log.Printf("Loaded program %s", name) + klog.InfoS("loaded program", "name", name) if r.compileOnly { return nil @@ -315,7 +315,7 @@ func New(lines <-chan *logline.LogLine, wg *sync.WaitGroup, programPath string, } r.handleMu.RUnlock() } - log.Println("END OF LINE") + klog.V(1).InfoS("end of line stream") close(r.signalQuit) r.handleMu.Lock() for prog := range r.handles { @@ -325,7 +325,7 @@ func New(lines <-chan *logline.LogLine, wg *sync.WaitGroup, programPath string, r.handleMu.Unlock() }() if len(r.programPath) == 0 && len(r.progs) == 0 { - log.Println("No program path specified, no programs will be loaded.") + klog.InfoS("no program path specified, no programs will be loaded") return r, nil } @@ -335,7 +335,7 @@ func New(lines <-chan *logline.LogLine, wg *sync.WaitGroup, programPath string, defer r.wg.Done() <-initDone if len(r.programPath) == 0 && len(r.progs) == 0 { - log.Println("no program reload on SIGHUP without programPath") + klog.InfoS("no program reload on SIGHUP without programPath") return } n := make(chan os.Signal, 1) @@ -347,7 +347,7 @@ func New(lines <-chan *logline.LogLine, wg *sync.WaitGroup, programPath string, return case <-n: if err := r.LoadAllPrograms(); err != nil { - log.Println(err) + klog.ErrorS(err, "failed to reload programs on SIGHUP") } } } diff --git a/inputs/mtail/internal/runtime/vm/vm.go b/inputs/mtail/internal/runtime/vm/vm.go index 88dbb43f6..6b9bbb810 100644 --- a/inputs/mtail/internal/runtime/vm/vm.go +++ b/inputs/mtail/internal/runtime/vm/vm.go @@ -10,7 +10,6 @@ import ( "context" "expvar" "fmt" - "log" "math" "regexp" "strconv" @@ -26,6 +25,7 @@ import ( "github.com/golang/groupcache/lru" "github.com/pkg/errors" "github.com/prometheus/client_golang/prometheus" + "k8s.io/klog/v2" ) var ( @@ -103,9 +103,9 @@ func (v *VM) errorf(format string, args ...interface{}) { v.t.pc-1, i.Opcode, i.Operand, v.name, i.SourceLine+1) v.runtimeError += fmt.Sprintf("Full input text from %q was %q", v.input.Filename, v.input.Line) if v.logRuntimeErrors { - log.Println(v.name + ": Runtime error: " + v.runtimeError) + klog.ErrorS(nil, "vm runtime error", "program", v.name, "detail", v.runtimeError) - log.Printf("Set logging verbosity higher (-v1 or more) to see full VM state dump.") + klog.InfoS("set logging verbosity higher (-v1 or more) to see full VM state dump", "program", v.name) } // if glog.V(1) { // glog.Infof("VM stack:\n%s", debug.Stack()) @@ -121,7 +121,7 @@ func (v *VM) errorf(format string, args ...interface{}) { // glog.Infof(v.DumpByteCode()) // } if v.trace != nil { - log.Printf("Execution Trace: %v", v.trace) + klog.V(1).InfoS("execution trace", "program", v.name, "trace", v.trace) } v.runtimeErrorMu.Unlock() v.terminate = true @@ -1033,7 +1033,7 @@ func (v *VM) DumpByteCode() string { fmt.Fprintf(w, "\t%d\t%s\t%v\t%d\t\n", n, i.Opcode, i.Operand, i.SourceLine+1) } if err := w.Flush(); err != nil { - log.Printf("flush error: %s", err) + klog.ErrorS(err, "failed to flush bytecode dump", "program", v.name) } return b.String() } @@ -1055,5 +1055,5 @@ func (v *VM) Run(lines <-chan *logline.LogLine, wg *sync.WaitGroup) { for line := range lines { v.ProcessLogLine(ctx, line) } - log.Printf("VM %q finished", v.name) + klog.V(1).InfoS("vm finished", "program", v.name) } diff --git a/inputs/mtail/internal/tailer/logstream/cancel.go b/inputs/mtail/internal/tailer/logstream/cancel.go index 05dd9e3ab..07e4b3ec7 100644 --- a/inputs/mtail/internal/tailer/logstream/cancel.go +++ b/inputs/mtail/internal/tailer/logstream/cancel.go @@ -4,10 +4,11 @@ import ( "context" "errors" "io" - "log" "os" "strings" "time" + + "k8s.io/klog/v2" ) // ReadDeadliner has a SetReadDeadline function to be used for interrupting reads. @@ -21,9 +22,9 @@ type ReadDeadliner interface { func SetReadDeadlineOnDone(ctx context.Context, d ReadDeadliner) { go func() { <-ctx.Done() - log.Println("cancelled, setting read deadline to interrupt read") + klog.V(1).InfoS("cancelled, setting read deadline to interrupt read") if err := d.SetReadDeadline(time.Now()); err != nil { - log.Printf("SetReadDeadline() -> %v", err) + klog.ErrorS(err, "SetReadDeadline failed") } }() } diff --git a/inputs/mtail/internal/tailer/logstream/dgramstream.go b/inputs/mtail/internal/tailer/logstream/dgramstream.go index 7dec7f364..5645c0593 100644 --- a/inputs/mtail/internal/tailer/logstream/dgramstream.go +++ b/inputs/mtail/internal/tailer/logstream/dgramstream.go @@ -6,12 +6,12 @@ package logstream import ( "context" "fmt" - "log" "net" "sync" "flashcat.cloud/categraf/inputs/mtail/internal/logline" "flashcat.cloud/categraf/inputs/mtail/internal/waker" + "k8s.io/klog/v2" ) type dgramStream struct { @@ -64,7 +64,7 @@ func (ds *dgramStream) stream(ctx context.Context, wg *sync.WaitGroup, waker wak err := c.Close() if err != nil { logErrors.Add(ds.address, 1) - log.Println(err) + klog.ErrorS(err, "failed to close datagram connection", "source", ds.sourcename) } logCloses.Add(ds.address, 1) lr.Finish(ctx) diff --git a/inputs/mtail/internal/tailer/logstream/fifostream.go b/inputs/mtail/internal/tailer/logstream/fifostream.go index a121b5a72..69aef7011 100644 --- a/inputs/mtail/internal/tailer/logstream/fifostream.go +++ b/inputs/mtail/internal/tailer/logstream/fifostream.go @@ -7,13 +7,13 @@ import ( "context" "errors" "io" - "log" "os" "sync" "syscall" "flashcat.cloud/categraf/inputs/mtail/internal/logline" "flashcat.cloud/categraf/inputs/mtail/internal/waker" + "k8s.io/klog/v2" ) type fifoStream struct { @@ -49,7 +49,7 @@ func fifoOpen(pathname string) (*os.File, error) { // Open in nonblocking mode because the write end of the fifo may not have started yet; this also gives us the ability to set a read deadline when the context is cancelled. https://github.com/golang/go/issues/24842 fd, err := os.OpenFile(pathname, os.O_RDONLY|syscall.O_NONBLOCK, 0o600) // #nosec G304 -- path already validated by caller if err != nil { - log.Printf("fifoOpen(%s): open failed: %v", pathname, err) + klog.ErrorS(err, "fifo open failed", "pathname", pathname) logErrors.Add(pathname, 1) return nil, err } @@ -86,7 +86,7 @@ func (ps *fifoStream) stream(ctx context.Context, wg *sync.WaitGroup, waker wake err := fd.Close() if err != nil { logErrors.Add(ps.pathname, 1) - log.Println(err) + klog.ErrorS(err, "failed to close fifo", "pathname", ps.pathname) } logCloses.Add(ps.pathname, 1) lr.Finish(ctx) diff --git a/inputs/mtail/internal/tailer/logstream/filestream.go b/inputs/mtail/internal/tailer/logstream/filestream.go index 0b58326b8..7d2749f8d 100644 --- a/inputs/mtail/internal/tailer/logstream/filestream.go +++ b/inputs/mtail/internal/tailer/logstream/filestream.go @@ -8,7 +8,6 @@ import ( "errors" "expvar" "io" - "log" "os" "sync" "syscall" @@ -16,6 +15,7 @@ import ( // "github.com/golang/glog" "flashcat.cloud/categraf/inputs/mtail/internal/logline" "flashcat.cloud/categraf/inputs/mtail/internal/waker" + "k8s.io/klog/v2" ) // fileTruncates counts the truncations of a file stream. @@ -74,7 +74,7 @@ func (fs *fileStream) stream(ctx context.Context, wg *sync.WaitGroup, waker wake logErrors.Add(fs.sourcename, 1) if err := fd.Close(); err != nil { logErrors.Add(fs.sourcename, 1) - log.Printf("stream(%s): closing file: %v", fs.sourcename, err) + klog.ErrorS(err, "closing file failed", "source", fs.sourcename) } return err } @@ -93,7 +93,7 @@ func (fs *fileStream) stream(ctx context.Context, wg *sync.WaitGroup, waker wake // glog.V(2).Infof("stream(%s): closing file descriptor", fs.sourcename) if err := fd.Close(); err != nil { logErrors.Add(fs.sourcename, 1) - log.Printf("stream(%s): closing file: %v", fs.sourcename, err) + klog.ErrorS(err, "closing file failed", "source", fs.sourcename) } logCloses.Add(fs.sourcename, 1) }() @@ -118,15 +118,15 @@ func (fs *fileStream) stream(ctx context.Context, wg *sync.WaitGroup, waker wake // errors, and end on unretriables; e.g. ESTALE looks // retryable. if errors.Is(err, syscall.ESTALE) { - log.Printf("stream(%s): reopening stream due to %s", fs.sourcename, err) + klog.Warningf("stream(%s): reopening stream due to %s", fs.sourcename, err) // streamFromStart always true on a stream reopen if nerr := fs.stream(ctx, wg, waker, fi, oneShot, true); nerr != nil { - log.Printf("stream(%s): new stream: %v", fs.sourcename, nerr) + klog.ErrorS(nerr, "failed to create reopened file stream", "source", fs.sourcename) } // Close this stream. return } - log.Printf("stream(%s): read error: %v", fs.sourcename, err) + klog.ErrorS(err, "file stream read error", "source", fs.sourcename) } // If we have read no bytes and are at EOF, check for truncation and rotation. @@ -137,7 +137,7 @@ func (fs *fileStream) stream(ctx context.Context, wg *sync.WaitGroup, waker wake // common change pattern anyway. newfi, serr := os.Stat(fs.pathname) if serr != nil { - log.Printf("stream(%s): stat error: %v", fs.pathname, serr) + klog.ErrorS(serr, "file stream stat error", "pathname", fs.pathname) // If this is a NotExist error, then we should wrap up this // goroutine. The Tailer will create a new logstream if the // file is in the middle of a rotation and gets recreated @@ -157,7 +157,7 @@ func (fs *fileStream) stream(ctx context.Context, wg *sync.WaitGroup, waker wake // glog.V(2).Infof("stream(%s): adding a new file routine", fs.sourcename) // Stream from start always true on a stream reopen if err := fs.stream(ctx, wg, waker, newfi, oneShot, true); err != nil { - log.Printf("stream(%s): new stream: %v", fs.sourcename, err) + klog.ErrorS(err, "failed to create rotated file stream", "source", fs.sourcename) } // We're at EOF so there's nothing left to read here. return @@ -165,7 +165,7 @@ func (fs *fileStream) stream(ctx context.Context, wg *sync.WaitGroup, waker wake currentOffset, serr := fd.Seek(0, io.SeekCurrent) if serr != nil { logErrors.Add(fs.sourcename, 1) - log.Println(serr) + klog.ErrorS(serr, "failed to seek current offset", "source", fs.sourcename) continue } // glog.V(2).Infof("stream(%s): current seek is %d", fs.sourcename, currentOffset) @@ -183,7 +183,7 @@ func (fs *fileStream) stream(ctx context.Context, wg *sync.WaitGroup, waker wake _, serr := fd.Seek(0, io.SeekStart) if serr != nil { logErrors.Add(fs.sourcename, 1) - log.Printf("stream(%s): seek: %v", fs.sourcename, serr) + klog.ErrorS(serr, "failed to seek to start", "source", fs.sourcename) } // glog.V(2).Infof("stream(%s): Seeked to %d", fs.sourcename, p) fileTruncates.Add(fs.sourcename, 1) diff --git a/inputs/mtail/internal/tailer/logstream/logstream.go b/inputs/mtail/internal/tailer/logstream/logstream.go index 6a881c846..345bd038f 100644 --- a/inputs/mtail/internal/tailer/logstream/logstream.go +++ b/inputs/mtail/internal/tailer/logstream/logstream.go @@ -13,13 +13,13 @@ import ( "errors" "expvar" "fmt" - "log" "net/url" "os" "sync" "flashcat.cloud/categraf/inputs/mtail/internal/logline" "flashcat.cloud/categraf/inputs/mtail/internal/waker" + "k8s.io/klog/v2" ) var ( @@ -69,7 +69,7 @@ func New(ctx context.Context, wg *sync.WaitGroup, waker waker.Waker, pathname st if err != nil { return nil, err } - log.Printf("Parsed url as %v", u) + klog.V(1).InfoS("parsed url", "url", u.String()) path := pathname switch u.Scheme { diff --git a/inputs/mtail/internal/tailer/logstream/socketstream.go b/inputs/mtail/internal/tailer/logstream/socketstream.go index 169cf92f7..51d302e03 100644 --- a/inputs/mtail/internal/tailer/logstream/socketstream.go +++ b/inputs/mtail/internal/tailer/logstream/socketstream.go @@ -6,12 +6,12 @@ package logstream import ( "context" "fmt" - "log" "net" "sync" "flashcat.cloud/categraf/inputs/mtail/internal/logline" "flashcat.cloud/categraf/inputs/mtail/internal/waker" + "k8s.io/klog/v2" ) type socketStream struct { @@ -73,7 +73,7 @@ func (ss *socketStream) stream(ctx context.Context, wg *sync.WaitGroup, waker wa // glog.V(2).Infof("stream(%s): closing listener", ss.sourcename) err := l.Close() if err != nil { - log.Println(err) + klog.ErrorS(err, "failed to close listener", "source", ss.sourcename) } connWg.Wait() close(ss.lines) @@ -87,7 +87,7 @@ func (ss *socketStream) stream(ctx context.Context, wg *sync.WaitGroup, waker wa for { c, err := l.Accept() if err != nil { - log.Println(err) + klog.ErrorS(err, "failed to accept connection", "source", ss.sourcename) return } // glog.V(2).Infof("stream(%s): got new conn %v", ss.sourcename, c) @@ -115,7 +115,7 @@ func (ss *socketStream) handleConn(ctx context.Context, wg *sync.WaitGroup, wake err := c.Close() if err != nil { logErrors.Add(ss.address, 1) - log.Println(err) + klog.ErrorS(err, "failed to close connection", "source", ss.sourcename) } lr.Finish(ctx) logCloses.Add(ss.address, 1) diff --git a/inputs/mtail/internal/tailer/tail.go b/inputs/mtail/internal/tailer/tail.go index d82d92c6b..778f53902 100644 --- a/inputs/mtail/internal/tailer/tail.go +++ b/inputs/mtail/internal/tailer/tail.go @@ -10,7 +10,6 @@ import ( "errors" "expvar" "fmt" - "log" "net/url" "os" "path/filepath" @@ -20,6 +19,7 @@ import ( "flashcat.cloud/categraf/inputs/mtail/internal/logline" "flashcat.cloud/categraf/inputs/mtail/internal/tailer/logstream" "flashcat.cloud/categraf/inputs/mtail/internal/waker" + "k8s.io/klog/v2" ) // logCount records the number of logs that are being tailed. @@ -288,10 +288,10 @@ func (t *Tailer) TailPath(pathname string) error { // shut down the tailer when there are outstanding patterns to poll for. func (t *Tailer) pollLogPattern(pattern string) { if err := t.doPatternGlob(pattern); err != nil { - log.Printf("pollPattern(%v): glob failed: %v", pattern, err) + klog.ErrorS(err, "pollPattern glob failed", "pattern", pattern) } if t.logPatternPollWaker == nil { - log.Printf("pollPattern(%v): log pattern polling disabled by no waker", pattern) + klog.InfoS("log pattern polling disabled by no waker", "pattern", pattern) return } t.wg.Add(1) @@ -299,7 +299,7 @@ func (t *Tailer) pollLogPattern(pattern string) { defer t.wg.Done() <-t.initDone if t.oneShot { - log.Printf("pollPattern(%v): no polling loop in oneshot mode", pattern) + klog.InfoS("no polling loop in oneshot mode", "pattern", pattern) return } // glog.V(1).Infof("pollPattern(%v): starting log pattern poll loop", pattern) @@ -309,7 +309,7 @@ func (t *Tailer) pollLogPattern(pattern string) { return case <-t.logPatternPollWaker.Wake(): if err := t.doPatternGlob(pattern); err != nil { - log.Printf("pollPattern(%v): glob failed: %v", pattern, err) + klog.ErrorS(err, "pollPattern glob failed", "pattern", pattern) } } } @@ -335,7 +335,7 @@ func (t *Tailer) doPatternGlob(pattern string) error { } // glog.V(2).Infof("doPatternGlob(%v): tailable path is %q", pattern, absPath) if err := t.TailPath(absPath); err != nil { - log.Println(err) + klog.ErrorS(err, "failed to tail path", "pattern", pattern, "path", absPath) } } return nil diff --git a/inputs/mtail/internal/waker/testwaker.go b/inputs/mtail/internal/waker/testwaker.go index ca9cc40fd..2c5fb3f51 100644 --- a/inputs/mtail/internal/waker/testwaker.go +++ b/inputs/mtail/internal/waker/testwaker.go @@ -5,8 +5,9 @@ package waker import ( "context" - "log" "sync" + + "k8s.io/klog/v2" ) // A testWaker is used to manually signal to idle routines it's time to look for new work. @@ -47,22 +48,22 @@ func NewTest(ctx context.Context, n int) (Waker, WakeFunc) { }() wakeFunc := func(after int) { <-initDone - log.Println(1, "TestWaker yielding to Wakee") + klog.V(1).InfoS("TestWaker yielding to Wakee") for i := 0; i < t.n; i++ { t.wait <- struct{}{} } - log.Printf("waiting for %d wakees to get the wake chan", t.n) + klog.V(1).InfoS("waiting for wakees to get the wake chan", "count", t.n) for i := 0; i < t.n; i++ { <-t.wakeeReady } t.broadcastWakeAndReset() // Now wakeFunc blocks here - log.Printf("waiting for %d wakees to return to Wake", after) + klog.V(1).InfoS("waiting for wakees to return to Wake", "count", after) for i := 0; i < after; i++ { <-t.wakeeDone } t.n = after - log.Println("Wakee yielding to TestWaker") + klog.V(1).InfoS("Wakee yielding to TestWaker") } return t, wakeFunc } @@ -72,7 +73,7 @@ func (t *testWaker) Wake() (w <-chan struct{}) { t.mu.Lock() w = t.wake t.mu.Unlock() - log.Println("waiting for wakeup on chan ", w) + klog.V(1).InfoS("waiting for wakeup on chan", "chan", w) // Background this so we can return the wake channel. // The wakeFunc won't close the channel until this completes. go func() { @@ -101,10 +102,10 @@ func (t *testWaker) Wake() (w <-chan struct{}) { func (t *testWaker) broadcastWakeAndReset() { t.mu.Lock() defer t.mu.Unlock() - log.Printf("broadcasting wake to chan %p", t.wake) + klog.V(1).InfoS("broadcasting wake", "chan", t.wake) close(t.wake) t.wake = make(chan struct{}) - log.Printf("wake channel reset") + klog.V(1).InfoS("wake channel reset") } // alwaysWaker never blocks the wakee. diff --git a/inputs/mtail/mtail.go b/inputs/mtail/mtail.go index de51a2e95..3501dc8c5 100644 --- a/inputs/mtail/mtail.go +++ b/inputs/mtail/mtail.go @@ -2,7 +2,6 @@ package mtail import ( "fmt" - "log" "os" "time" @@ -17,6 +16,7 @@ import ( "flashcat.cloud/categraf/inputs/mtail/internal/waker" util "flashcat.cloud/categraf/pkg/metrics" "flashcat.cloud/categraf/types" + "k8s.io/klog/v2" ) const inputName = `mtail` @@ -134,7 +134,7 @@ func (ins *Instance) Init() error { m, err := mtail.New(ins.ctx, store, opts...) if err != nil { - log.Println(err) + klog.ErrorS(err, "failed to create mtail server") ins.cancel() return err } @@ -182,7 +182,7 @@ func (ins *Instance) Gather(slist *types.SampleList) { reg := ins.m.GetRegistry() mfs, done, err := prometheus.ToTransactionalGatherer(reg).Gather() if err != nil { - log.Println(err) + klog.ErrorS(err, "failed to gather mtail metrics") return } defer done() diff --git a/inputs/mysql/binlog.go b/inputs/mysql/binlog.go index d80dcb04d..01fef8577 100644 --- a/inputs/mysql/binlog.go +++ b/inputs/mysql/binlog.go @@ -2,12 +2,12 @@ package mysql import ( "database/sql" - "log" "strconv" "strings" "flashcat.cloud/categraf/pkg/tagx" "flashcat.cloud/categraf/types" + "k8s.io/klog/v2" ) func (ins *Instance) gatherBinlog(slist *types.SampleList, db *sql.DB, globalTags map[string]string) { @@ -17,7 +17,7 @@ func (ins *Instance) gatherBinlog(slist *types.SampleList, db *sql.DB, globalTag var logBin uint8 err := db.QueryRow(`SELECT @@log_bin`).Scan(&logBin) if err != nil { - log.Println("E! failed to query SELECT @@log_bin:", err) + klog.ErrorS(err, "failed to query mysql @@log_bin", "address", ins.Address) return } @@ -28,7 +28,7 @@ func (ins *Instance) gatherBinlog(slist *types.SampleList, db *sql.DB, globalTag rows, err := db.Query(`SHOW BINARY LOGS`) if err != nil { - log.Println("E! failed to query SHOW BINARY LOGS:", err) + klog.ErrorS(err, "failed to query mysql binary logs", "address", ins.Address) return } @@ -36,7 +36,7 @@ func (ins *Instance) gatherBinlog(slist *types.SampleList, db *sql.DB, globalTag columns, err := rows.Columns() if err != nil { - log.Println("E! failed to get columns:", err) + klog.ErrorS(err, "failed to get mysql binary log columns", "address", ins.Address) return } @@ -46,8 +46,8 @@ func (ins *Instance) gatherBinlog(slist *types.SampleList, db *sql.DB, globalTag filename string filesize uint64 encrypted string - algorithm string // 加密算法(在某些版本中可用);Reserved for future use: currently unused - columnCount int = len(columns) + algorithm string // 加密算法(在某些版本中可用);Reserved for future use: currently unused + columnCount int = len(columns) ) for rows.Next() { @@ -65,7 +65,7 @@ func (ins *Instance) gatherBinlog(slist *types.SampleList, db *sql.DB, globalTag return } default: - log.Println("E! invalid number of columns:", columnCount) + klog.ErrorS(nil, "invalid number of mysql binary log columns", "address", ins.Address, "column_count", columnCount) } size += filesize diff --git a/inputs/mysql/custom_queries.go b/inputs/mysql/custom_queries.go index d82c284f4..04edddb6a 100644 --- a/inputs/mysql/custom_queries.go +++ b/inputs/mysql/custom_queries.go @@ -3,7 +3,6 @@ package mysql import ( "context" "database/sql" - "log" "strings" "sync" "time" @@ -11,6 +10,7 @@ import ( "flashcat.cloud/categraf/pkg/conv" "flashcat.cloud/categraf/pkg/tagx" "flashcat.cloud/categraf/types" + "k8s.io/klog/v2" ) func (ins *Instance) gatherCustomQueries(slist *types.SampleList, db *sql.DB, globalTags map[string]string) { @@ -37,12 +37,12 @@ func (ins *Instance) gatherOneQuery(slist *types.SampleList, db *sql.DB, globalT rows, err := db.QueryContext(ctx, query.Request) if ctx.Err() == context.DeadlineExceeded { - log.Println("E! query timeout, request:", query.Request) + klog.ErrorS(ctx.Err(), "mysql custom query timeout", "address", ins.Address, "request", query.Request) return } if err != nil { - log.Println("E! failed to query:", err) + klog.ErrorS(err, "failed to query mysql custom query", "address", ins.Address, "request", query.Request) return } @@ -50,7 +50,7 @@ func (ins *Instance) gatherOneQuery(slist *types.SampleList, db *sql.DB, globalT cols, err := rows.Columns() if err != nil { - log.Println("E! failed to get columns:", err) + klog.ErrorS(err, "failed to get mysql custom query columns", "address", ins.Address, "request", query.Request) return } @@ -63,7 +63,7 @@ func (ins *Instance) gatherOneQuery(slist *types.SampleList, db *sql.DB, globalT // Scan the result into the column pointers... if err := rows.Scan(columnPointers...); err != nil { - log.Println("E! failed to scan:", err) + klog.ErrorS(err, "failed to scan mysql custom query row", "address", ins.Address, "request", query.Request) return } @@ -74,7 +74,7 @@ func (ins *Instance) gatherOneQuery(slist *types.SampleList, db *sql.DB, globalT } if err = ins.parseRow(row, query, slist, globalTags); err != nil { - log.Println("E! failed to parse row:", err, "sql:", query.Request) + klog.ErrorS(err, "failed to parse mysql custom query row", "address", ins.Address, "request", query.Request) } } } @@ -92,7 +92,7 @@ func (ins *Instance) parseRow(row map[string]string, query QueryConfig, slist *t for _, column := range query.MetricFields { value, err := conv.ToFloat64(row[column]) if err != nil { - log.Println("E! failed to convert field:", column, "value:", value, "error:", err) + klog.ErrorS(err, "failed to convert mysql custom query field", "address", ins.Address, "column", column, "value", row[column]) return err } diff --git a/inputs/mysql/engine_innodb.go b/inputs/mysql/engine_innodb.go index 6245ae973..b2691eb0b 100644 --- a/inputs/mysql/engine_innodb.go +++ b/inputs/mysql/engine_innodb.go @@ -2,13 +2,13 @@ package mysql import ( "database/sql" - "log" "regexp" "strconv" "strings" "flashcat.cloud/categraf/pkg/tagx" "flashcat.cloud/categraf/types" + "k8s.io/klog/v2" ) func (ins *Instance) gatherEngineInnodbStatus(slist *types.SampleList, db *sql.DB, globalTags map[string]string, cache map[string]float64) { @@ -17,7 +17,7 @@ func (ins *Instance) gatherEngineInnodbStatus(slist *types.SampleList, db *sql.D } rows, err := db.Query(SQL_ENGINE_INNODB_STATUS) if err != nil { - log.Println("E! failed to query engine innodb status:", err) + klog.ErrorS(err, "failed to query mysql engine innodb status", "address", ins.Address) return } @@ -27,7 +27,7 @@ func (ins *Instance) gatherEngineInnodbStatus(slist *types.SampleList, db *sql.D // First row should contain the necessary info. If many rows returned then it's unknown case. if rows.Next() { if err := rows.Scan(&typeCol, &nameCol, &statusCol); err != nil { - log.Println("E! failed to scan result, sql:", SQL_ENGINE_INNODB_STATUS, "error:", err) + klog.ErrorS(err, "failed to scan mysql engine innodb status", "address", ins.Address, "sql", SQL_ENGINE_INNODB_STATUS) return } } diff --git a/inputs/mysql/global_status.go b/inputs/mysql/global_status.go index cea7fbaa3..044af75df 100644 --- a/inputs/mysql/global_status.go +++ b/inputs/mysql/global_status.go @@ -2,7 +2,6 @@ package mysql import ( "database/sql" - "log" "regexp" "strconv" "strings" @@ -10,6 +9,7 @@ import ( "flashcat.cloud/categraf/pkg/tagx" "flashcat.cloud/categraf/types" + "k8s.io/klog/v2" ) // Regexp to match various groups of status vars. @@ -21,7 +21,7 @@ func (ins *Instance) gatherGlobalStatus(slist *types.SampleList, db *sql.DB, glo } rows, err := db.Query(SQL_GLOBAL_STATUS) if err != nil { - log.Println("E! failed to query global status:", err) + klog.ErrorS(err, "failed to query mysql global status", "address", ins.Address) return } diff --git a/inputs/mysql/global_variables.go b/inputs/mysql/global_variables.go index 832df45fa..3650ee58d 100644 --- a/inputs/mysql/global_variables.go +++ b/inputs/mysql/global_variables.go @@ -2,13 +2,13 @@ package mysql import ( "database/sql" - "log" "regexp" "strconv" "strings" "flashcat.cloud/categraf/pkg/tagx" "flashcat.cloud/categraf/types" + "k8s.io/klog/v2" ) func (ins *Instance) gatherGlobalVariables(slist *types.SampleList, db *sql.DB, globalTags map[string]string, cache map[string]float64) { @@ -17,7 +17,7 @@ func (ins *Instance) gatherGlobalVariables(slist *types.SampleList, db *sql.DB, } rows, err := db.Query(SQL_GLOBAL_VARIABLES) if err != nil { - log.Println("E! failed to query global variables:", err) + klog.ErrorS(err, "failed to query mysql global variables", "address", ins.Address) return } diff --git a/inputs/mysql/mysql.go b/inputs/mysql/mysql.go index ab768976a..1a400afd6 100644 --- a/inputs/mysql/mysql.go +++ b/inputs/mysql/mysql.go @@ -3,11 +3,11 @@ package mysql import ( "database/sql" "fmt" - "log" "strings" "time" "github.com/go-sql-driver/mysql" + "k8s.io/klog/v2" "flashcat.cloud/categraf/config" "flashcat.cloud/categraf/inputs" @@ -204,7 +204,7 @@ func (ins *Instance) Gather(slist *types.SampleList) { db, err := sql.Open("mysql", ins.dsn) if err != nil { slist.PushSample(inputName, "up", 0, tags) - log.Println("E! failed to open mysql:", err) + klog.ErrorS(err, "failed to open mysql", "address", ins.Address) return } @@ -216,7 +216,7 @@ func (ins *Instance) Gather(slist *types.SampleList) { if err = db.Ping(); err != nil { slist.PushSample(inputName, "up", 0, tags) - log.Println("E! failed to ping mysql:", err) + klog.ErrorS(err, "failed to ping mysql", "address", ins.Address) return } diff --git a/inputs/mysql/processlist.go b/inputs/mysql/processlist.go index 234f9cb38..10348a451 100644 --- a/inputs/mysql/processlist.go +++ b/inputs/mysql/processlist.go @@ -2,11 +2,11 @@ package mysql import ( "database/sql" - "log" "strings" "flashcat.cloud/categraf/pkg/tagx" "flashcat.cloud/categraf/types" + "k8s.io/klog/v2" ) // These are const but can't be declared as such because golang doesn't allow const maps @@ -96,7 +96,7 @@ func (ins *Instance) gatherProcesslistByState(slist *types.SampleList, db *sql.D rows, err := db.Query(SQL_INFO_SCHEMA_PROCESSLIST) if err != nil { - log.Println("E! failed to get processlist:", err) + klog.ErrorS(err, "failed to query mysql processlist by state", "address", ins.Address) return } @@ -120,7 +120,7 @@ func (ins *Instance) gatherProcesslistByState(slist *types.SampleList, db *sql.D err = rows.Scan(&command, &state, &count) if err != nil { - log.Println("W! failed to scan rows:", err) + klog.Warningf("failed to scan mysql processlist rows: address=%s err=%v", ins.Address, err) return } // each state has its mapping diff --git a/inputs/mysql/processlist_by_user.go b/inputs/mysql/processlist_by_user.go index 437495bbd..56c5ddd0c 100644 --- a/inputs/mysql/processlist_by_user.go +++ b/inputs/mysql/processlist_by_user.go @@ -2,10 +2,10 @@ package mysql import ( "database/sql" - "log" "flashcat.cloud/categraf/pkg/tagx" "flashcat.cloud/categraf/types" + "k8s.io/klog/v2" ) func (ins *Instance) gatherProcesslistByUser(slist *types.SampleList, db *sql.DB, globalTags map[string]string) { @@ -15,7 +15,7 @@ func (ins *Instance) gatherProcesslistByUser(slist *types.SampleList, db *sql.DB rows, err := db.Query(SQL_INFO_SCHEMA_PROCESSLIST_BY_USER) if err != nil { - log.Println("E! failed to get processlist:", err) + klog.ErrorS(err, "failed to query mysql processlist by user", "address", ins.Address) return } @@ -29,7 +29,7 @@ func (ins *Instance) gatherProcesslistByUser(slist *types.SampleList, db *sql.DB err = rows.Scan(&user, &connections) if err != nil { - log.Println("E! failed to scan rows:", err) + klog.ErrorS(err, "failed to scan mysql processlist by user rows", "address", ins.Address) return } diff --git a/inputs/mysql/schema_size.go b/inputs/mysql/schema_size.go index d9bc28b38..59561a23c 100644 --- a/inputs/mysql/schema_size.go +++ b/inputs/mysql/schema_size.go @@ -2,10 +2,10 @@ package mysql import ( "database/sql" - "log" "flashcat.cloud/categraf/pkg/tagx" "flashcat.cloud/categraf/types" + "k8s.io/klog/v2" ) func (ins *Instance) gatherSchemaSize(slist *types.SampleList, db *sql.DB, globalTags map[string]string) { @@ -15,7 +15,7 @@ func (ins *Instance) gatherSchemaSize(slist *types.SampleList, db *sql.DB, globa rows, err := db.Query(SQL_QUERY_SCHEMA_SIZE) if err != nil { - log.Println("E! failed to get schema size of", ins.Address, err) + klog.ErrorS(err, "failed to query mysql schema sizes", "address", ins.Address) return } @@ -29,7 +29,7 @@ func (ins *Instance) gatherSchemaSize(slist *types.SampleList, db *sql.DB, globa err = rows.Scan(&schema, &size) if err != nil { - log.Println("E! failed to scan rows of", ins.Address, err) + klog.ErrorS(err, "failed to scan mysql schema size rows", "address", ins.Address) return } diff --git a/inputs/mysql/slave_status.go b/inputs/mysql/slave_status.go index f482ed86e..30997b7d1 100644 --- a/inputs/mysql/slave_status.go +++ b/inputs/mysql/slave_status.go @@ -4,12 +4,12 @@ import ( "bytes" "database/sql" "fmt" - "log" "strconv" "strings" "flashcat.cloud/categraf/pkg/tagx" "flashcat.cloud/categraf/types" + "k8s.io/klog/v2" ) var slaveStatusQueries = [2]string{"SHOW ALL SLAVES STATUS", "SHOW SLAVE STATUS"} @@ -42,12 +42,12 @@ func (ins *Instance) gatherSlaveStatus(slist *types.SampleList, db *sql.DB, glob rows, err := querySlaveStatus(db) if err != nil { - log.Println("E! failed to query slave status:", err) + klog.ErrorS(err, "failed to query mysql slave status", "address", ins.Address) return } if rows == nil { - log.Println("E! failed to query slave status: rows is nil") + klog.ErrorS(nil, "mysql slave status rows are nil", "address", ins.Address) return } @@ -55,7 +55,7 @@ func (ins *Instance) gatherSlaveStatus(slist *types.SampleList, db *sql.DB, glob slaveCols, err := rows.Columns() if err != nil { - log.Println("E! failed to get columns of slave rows:", err) + klog.ErrorS(err, "failed to get mysql slave status columns", "address", ins.Address) return } @@ -180,7 +180,7 @@ func (ins *Instance) gatherReplicaStatus(slist *types.SampleList, db *sql.DB, gl } } - log.Println("E! failed to gather replica status:", err) + klog.ErrorS(err, "failed to gather mysql replica status", "address", ins.Address) return err } @@ -246,7 +246,7 @@ func (ins *Instance) gatherReplicaStatusOnce(slist *types.SampleList, db *sql.DB value, err := ins.parseValueByDatabaseTypeName(colValue, col.DatabaseTypeName()) if err != nil { errString := fmt.Errorf("error parsing mysql slave status %q=%q: %w", colName, string(colValue), err) - log.Println(errString) + klog.ErrorS(errString, "failed to parse mysql slave status value", "address", ins.Address, "column", colName, "value", string(colValue)) continue } diff --git a/inputs/mysql/table_size.go b/inputs/mysql/table_size.go index d60871f11..2a3817573 100644 --- a/inputs/mysql/table_size.go +++ b/inputs/mysql/table_size.go @@ -2,10 +2,10 @@ package mysql import ( "database/sql" - "log" "flashcat.cloud/categraf/pkg/tagx" "flashcat.cloud/categraf/types" + "k8s.io/klog/v2" ) func (ins *Instance) gatherTableSize(slist *types.SampleList, db *sql.DB, globalTags map[string]string, isSystem bool) { @@ -23,7 +23,7 @@ func (ins *Instance) gatherTableSize(slist *types.SampleList, db *sql.DB, global rows, err := db.Query(query) if err != nil { - log.Println("E! failed to get table size:", err) + klog.ErrorS(err, "failed to query mysql table sizes", "address", ins.Address, "system_tables", isSystem) return } @@ -37,7 +37,7 @@ func (ins *Instance) gatherTableSize(slist *types.SampleList, db *sql.DB, global err = rows.Scan(&schema, &table, &indexSize, &dataSize, &dataFree) if err != nil { - log.Println("E! failed to scan rows:", err) + klog.ErrorS(err, "failed to scan mysql table size rows", "address", ins.Address, "system_tables", isSystem) return } diff --git a/inputs/nats/nats.go b/inputs/nats/nats.go index b9fe20f52..111bf10b6 100644 --- a/inputs/nats/nats.go +++ b/inputs/nats/nats.go @@ -7,11 +7,12 @@ import ( "flashcat.cloud/categraf/types" gnatsd "github.com/nats-io/nats-server/v2/server" "io" - "log" "net/http" "net/url" "path" "time" + + "k8s.io/klog/v2" ) const inputName = "nats" @@ -69,32 +70,32 @@ func (ins *Instance) Init() error { func (ins *Instance) Gather(slist *types.SampleList) { if ins.DebugMod { - log.Println("D! nats... server:", ins.Server) + klog.V(1).InfoS("nats gather", "server", ins.Server) } address, err := url.Parse(ins.Server) if err != nil { - log.Println("E! error parseURL", err) + klog.ErrorS(err, "error parsing NATS URL", "server", ins.Server) return } address.Path = path.Join(address.Path, "varz") resp, err := ins.client.Get(address.String()) if err != nil { - log.Println("E! error while polling", address.String(), err) + klog.ErrorS(err, "error while polling", "url", address.String()) return } defer resp.Body.Close() bytes, err := io.ReadAll(resp.Body) if err != nil { - log.Println("E! error reading body", err) + klog.ErrorS(err, "error reading body", "url", address.String()) return } stats := new(gnatsd.Varz) err = json.Unmarshal(bytes, &stats) if err != nil { - log.Println("E! error parsing response", err) + klog.ErrorS(err, "error parsing response", "url", address.String()) return } diff --git a/inputs/net/net.go b/inputs/net/net.go index 24ed8bb33..1d1ba8c7d 100644 --- a/inputs/net/net.go +++ b/inputs/net/net.go @@ -2,7 +2,6 @@ package net import ( "fmt" - "log" "net" "flashcat.cloud/categraf/config" @@ -10,6 +9,7 @@ import ( "flashcat.cloud/categraf/inputs/system" "flashcat.cloud/categraf/pkg/filter" "flashcat.cloud/categraf/types" + "k8s.io/klog/v2" ) const inputName = "net" @@ -62,13 +62,13 @@ func (s *NetIOStats) Init() error { func (s *NetIOStats) Gather(slist *types.SampleList) { netio, err := s.ps.NetIO() if err != nil { - log.Println("E! failed to get net io metrics:", err) + klog.ErrorS(err, "failed to get net io metrics") return } interfaces, err := net.Interfaces() if err != nil { - log.Println("E! failed to list interfaces:", err) + klog.ErrorS(err, "failed to list interfaces") return } diff --git a/inputs/net_response/net_response.go b/inputs/net_response/net_response.go index 600472d06..fa8043889 100644 --- a/inputs/net_response/net_response.go +++ b/inputs/net_response/net_response.go @@ -4,7 +4,6 @@ import ( "bufio" "errors" "fmt" - "log" "math" "net" "net/textproto" @@ -15,6 +14,7 @@ import ( "flashcat.cloud/categraf/config" "flashcat.cloud/categraf/inputs" "flashcat.cloud/categraf/types" + "k8s.io/klog/v2" ) const ( @@ -148,7 +148,7 @@ func (ins *Instance) Gather(slist *types.SampleList) { func (ins *Instance) gather(slist *types.SampleList, target string) { if ins.DebugMod { - log.Println("D! net_response... target:", target) + klog.V(1).InfoS("net_response gather", "target", target) } labels := map[string]string{"target": target} @@ -172,19 +172,19 @@ func (ins *Instance) gather(slist *types.SampleList, target string) { case "tcp": returnTags, fields, err = ins.TCPGather(target) if err != nil { - log.Println("E! failed to gather:", target, "error:", err) + klog.ErrorS(err, "failed to gather net response", "target", target, "protocol", "tcp") return } labels["protocol"] = "tcp" case "udp": returnTags, fields, err = ins.UDPGather(target) if err != nil { - log.Println("E! failed to gather:", target, "error:", err) + klog.ErrorS(err, "failed to gather net response", "target", target, "protocol", "udp") return } labels["protocol"] = "udp" default: - log.Println("E! bad protocol, target:", target) + klog.ErrorS(nil, "bad protocol", "target", target, "protocol", ins.Protocol) } for k, v := range returnTags { @@ -240,7 +240,7 @@ func (ins *Instance) TCPGather(address string) (map[string]string, map[string]in responseTime = time.Since(start).Seconds() // Handle error if err != nil { - log.Printf("E! read tcp failed, address: %s, error: %s", address, err) + klog.ErrorS(err, "read tcp failed", "address", address) fields["result_code"] = ReadFailed } else { if strings.Contains(data, ins.Expect) { @@ -303,7 +303,7 @@ func (ins *Instance) UDPGather(address string) (map[string]string, map[string]in time.Sleep(1 * time.Second) _, err = conn.Write(msg) if err != nil && ins.DebugMod { - log.Printf("E! write udp failed, address: %s, error: %s", address, err) + klog.V(1).InfoS("write udp failed", "address", address, "error", err) } if err != nil && strings.Contains(err.Error(), "refused") { fields["result_code"] = ConnectionFailed @@ -322,7 +322,7 @@ func (ins *Instance) UDPGather(address string) (map[string]string, map[string]in responseTime = time.Since(start).Seconds() // Handle error if err != nil { - log.Printf("E! read udp failed, address: %s, error: %s", address, err) + klog.ErrorS(err, "read udp failed", "address", address) fields["result_code"] = ReadFailed fields["response_time"] = -1 // Error encoded in result diff --git a/inputs/netstat/netstat.go b/inputs/netstat/netstat.go index 197aa2265..02c4c4044 100644 --- a/inputs/netstat/netstat.go +++ b/inputs/netstat/netstat.go @@ -4,7 +4,6 @@ import ( "bufio" "bytes" "io" - "log" "os" "path" "runtime" @@ -18,6 +17,7 @@ import ( "flashcat.cloud/categraf/inputs" "flashcat.cloud/categraf/inputs/system" "flashcat.cloud/categraf/types" + "k8s.io/klog/v2" ) const inputName = "netstat" @@ -66,7 +66,7 @@ func (s *NetStats) gatherSummary(slist *types.SampleList) { } bs, err := os.ReadFile(f) if err != nil { - log.Println("E! failed to read sockstat", f, err) + klog.ErrorS(err, "failed to read sockstat", "path", f) return } reader := bufio.NewReader(bytes.NewBuffer(bs)) @@ -90,7 +90,7 @@ func (s *NetStats) gatherSummary(slist *types.SampleList) { for i := 0; i < len(kvs); i += 2 { val, err := strconv.ParseUint(kvs[i+1], 10, 64) if err != nil { - log.Println("W! parse:", f, "line:", line, "field:", kvs[i+1], "failed:", err) + klog.Warningf("failed to parse sockstat field: path=%s line=%q field=%q err=%v", f, line, kvs[i+1], err) } slist.PushSample(inputName+"_"+metric, strings.ToLower(kvs[i]), val, tags) } @@ -107,7 +107,7 @@ func (s *NetStats) Gather(slist *types.SampleList) { } netconns, err := s.ps.NetConnections() if err != nil { - log.Println("E! failed to get net connections:", err) + klog.ErrorS(err, "failed to get net connections") return } @@ -158,7 +158,7 @@ func (s *NetStats) gatherExt(slist *types.SampleList) { return } if err != nil { - log.Println("E! failed to get ext metrics:", err) + klog.ErrorS(err, "failed to get netstat extension metrics") return } diff --git a/inputs/netstat_filter/netstat_filter.go b/inputs/netstat_filter/netstat_filter.go index bfb59c96b..f5bdd7cc1 100644 --- a/inputs/netstat_filter/netstat_filter.go +++ b/inputs/netstat_filter/netstat_filter.go @@ -2,17 +2,19 @@ package netstat import ( "fmt" - "log" "syscall" "flashcat.cloud/categraf/config" "flashcat.cloud/categraf/inputs" "flashcat.cloud/categraf/inputs/system" "flashcat.cloud/categraf/types" + "k8s.io/klog/v2" ) const inputName = "netstat_filter" + var executed = false + type NetStatFilter struct { config.PluginConfig Instances []*Instance `toml:"instances"` @@ -63,7 +65,7 @@ func (ins *Instance) Init() error { func (ins *Instance) Gather(slist *types.SampleList) { netconns, err := ins.ps.NetConnections() if err != nil { - log.Println("E! failed to get net connections:", err) + klog.ErrorS(err, "failed to get filtered net connections") return } @@ -117,7 +119,7 @@ func (ins *Instance) Gather(slist *types.SampleList) { } else { if !executed { // 执行只需要在启动后执行一次的代码 - log.Println("E! init Key not matched, TCP_ Send_ Queue, TCP_ Recv_Queue, The queue value is 0,key:", key) + klog.ErrorS(nil, "netstat filter key not matched, send and recv queue values are 0", "key", key) executed = true } } diff --git a/inputs/netstat_filter/netstat_tcp.go b/inputs/netstat_filter/netstat_tcp.go index 0d51f21b3..16f02a365 100644 --- a/inputs/netstat_filter/netstat_tcp.go +++ b/inputs/netstat_filter/netstat_tcp.go @@ -7,12 +7,13 @@ import ( "bufio" "encoding/binary" "fmt" - "log" "net" "os" "regexp" "strconv" "strings" + + "k8s.io/klog/v2" ) var ( @@ -42,7 +43,7 @@ func Trim(s string) string { func decToInt(n string) int { d, err := strconv.ParseInt(n, 10, 64) if err != nil { - log.Printf("Error while parsing %s to int: %s", n, err) + klog.ErrorS(err, "error while parsing decimal to int", "value", n) } return int(d) } @@ -50,7 +51,7 @@ func decToInt(n string) int { func hexToInt(h string) uint { d, err := strconv.ParseUint(h, 16, 64) if err != nil { - log.Printf("Error while parsing %s to int: %s", h, err) + klog.ErrorS(err, "error while parsing hex to int", "value", h) } return uint(d) } @@ -59,18 +60,18 @@ func hexToInt2(h string) (uint, uint) { if len(h) > 16 { d, err := strconv.ParseUint(h[:16], 16, 64) if err != nil { - log.Printf("Error while parsing %s to int: %s", h[16:], err) + klog.ErrorS(err, "error while parsing hex to int", "value", h[:16]) } d2, err := strconv.ParseUint(h[16:], 16, 64) if err != nil { - log.Printf("Error while parsing %s to int: %s", h[16:], err) + klog.ErrorS(err, "error while parsing hex to int", "value", h[16:]) } return uint(d), uint(d2) } d, err := strconv.ParseUint(h, 16, 64) if err != nil { - log.Printf("Error while parsing %s to int: %s", h[16:], err) + klog.ErrorS(err, "error while parsing hex to int", "value", h) } return uint(d), 0 } @@ -112,7 +113,7 @@ func Parse(proto string) ([]Entry, error) { line := Trim(scanner.Text()) m := parser.FindStringSubmatch(line) if m == nil { - log.Printf("Could not parse netstat line from %s: %s", filename, line) + klog.Warningf("could not parse netstat line from %s: %s", filename, line) continue } //只统计状态为TCP_ESTABLISHED diff --git a/inputs/nfsclient/nfsclient.go b/inputs/nfsclient/nfsclient.go index cb5407438..f00f356aa 100644 --- a/inputs/nfsclient/nfsclient.go +++ b/inputs/nfsclient/nfsclient.go @@ -3,7 +3,6 @@ package nfsclient import ( "bufio" "fmt" - "log" "os" "regexp" "strconv" @@ -12,6 +11,7 @@ import ( "flashcat.cloud/categraf/config" "flashcat.cloud/categraf/inputs" "flashcat.cloud/categraf/types" + "k8s.io/klog/v2" ) const inputName = "nfsclient" @@ -178,41 +178,41 @@ func (s *NfsClient) Init() error { if len(s.IncludeMounts) > 0 { if s.DebugMod { - log.Println("D! Including these mount patterns:", s.IncludeMounts) + klog.V(1).InfoS("including mount patterns", "mount_patterns", s.IncludeMounts) } } else { if s.DebugMod { - log.Println("D! Including all mounts.") + klog.V(1).InfoS("including all mounts") } } if len(s.ExcludeMounts) > 0 { if s.DebugMod { - log.Println("D! Excluding these mount patterns:", s.ExcludeMounts) + klog.V(1).InfoS("excluding mount patterns", "mount_patterns", s.ExcludeMounts) } } else { if s.DebugMod { - log.Println("D! Not excluding any mounts.") + klog.V(1).InfoS("not excluding any mounts") } } if len(s.IncludeOperations) > 0 { if s.DebugMod { - log.Println("D! Including these operations:", s.IncludeOperations) + klog.V(1).InfoS("including operations", "operations", s.IncludeOperations) } } else { if s.DebugMod { - log.Println("D! Including all operations.") + klog.V(1).InfoS("including all operations") } } if len(s.ExcludeOperations) > 0 { if s.DebugMod { - log.Println("D! Excluding these mount patterns:", s.ExcludeOperations) + klog.V(1).InfoS("excluding operations", "operations", s.ExcludeOperations) } } else { if s.DebugMod { - log.Println("D! Not excluding any operations.") + klog.V(1).InfoS("not excluding any operations") } } @@ -223,7 +223,7 @@ func (s *NfsClient) Gather(slist *types.SampleList) { file, err := os.Open(s.mountstatsPath) if err != nil { if s.DebugMod { - log.Println("D! Failed opening the", file, "file:", err) + klog.V(1).InfoS("failed opening mountstats file", "path", s.mountstatsPath, "error", err) } return } @@ -235,7 +235,7 @@ func (s *NfsClient) Gather(slist *types.SampleList) { } if err := scanner.Err(); err != nil { - log.Println("E!", err) + klog.ErrorS(err, "failed reading mountstats") } } @@ -279,7 +279,7 @@ func (s *NfsClient) parseStat(mountpoint string, export string, version string, } if len(nline) == 0 { - log.Println("W! Parsing Stat line with one field:", line) + klog.Warningf("parsing stat line with one field: %v", line) return nil } @@ -495,7 +495,7 @@ func (s *NfsClient) getMountStatsPath() string { path = os.Getenv("MOUNT_PROC") } if s.DebugMod { - log.Println("D! using [", path, "] for mountstats") + klog.V(1).InfoS("using mountstats path", "path", path) } return path } diff --git a/inputs/nginx/nginx.go b/inputs/nginx/nginx.go index cf24b4ab7..40c38e969 100644 --- a/inputs/nginx/nginx.go +++ b/inputs/nginx/nginx.go @@ -4,7 +4,6 @@ import ( "bufio" "fmt" "io" - "log" "net" "net/http" "net/url" @@ -17,6 +16,7 @@ import ( "flashcat.cloud/categraf/inputs" "flashcat.cloud/categraf/pkg/tls" "flashcat.cloud/categraf/types" + "k8s.io/klog/v2" ) const inputName = "nginx" @@ -120,14 +120,14 @@ func (ins *Instance) Gather(slist *types.SampleList) { for _, u := range ins.Urls { addr, err := url.Parse(u) if err != nil { - log.Println("E! failed to parse the url:", u, "error:", err) + klog.ErrorS(err, "failed to parse nginx url", "url", u) continue } wg.Add(1) go func(addr *url.URL) { defer wg.Done() if err := ins.gather(addr, slist); err != nil { - log.Println("E!", err) + klog.ErrorS(err, "failed to gather nginx metrics", "url", addr.String()) } }(addr) } @@ -164,7 +164,7 @@ func (ins *Instance) createHTTPClient() (*http.Client, error) { func (ins *Instance) gather(addr *url.URL, slist *types.SampleList) error { if ins.DebugMod { - log.Println("D! nginx... url:", addr) + klog.V(1).InfoS("nginx gathering url", "url", addr.String()) } var body io.Reader @@ -205,7 +205,7 @@ func (ins *Instance) gather(addr *url.URL, slist *types.SampleList) error { defer func(Body io.ReadCloser) { err := Body.Close() if err != nil { - log.Println("E! failed to close the body of client:", err) + klog.ErrorS(err, "failed to close nginx response body", "url", addr.String()) } }(resp.Body) diff --git a/inputs/nginx_upstream_check/nginx_upstream_check.go b/inputs/nginx_upstream_check/nginx_upstream_check.go index 6c15756b9..3166adca4 100644 --- a/inputs/nginx_upstream_check/nginx_upstream_check.go +++ b/inputs/nginx_upstream_check/nginx_upstream_check.go @@ -4,7 +4,6 @@ import ( "encoding/json" "fmt" "io" - "log" "net" "net/http" "net/url" @@ -17,6 +16,7 @@ import ( "flashcat.cloud/categraf/pkg/netx" "flashcat.cloud/categraf/pkg/tls" "flashcat.cloud/categraf/types" + "k8s.io/klog/v2" ) const inputName = "nginx_upstream_check" @@ -184,7 +184,7 @@ type NginxUpstreamCheckServer struct { func (ins *Instance) gather(slist *types.SampleList, target string) { if ins.DebugMod { - log.Println("D! nginx_upstream_check... target:", target) + klog.V(1).InfoS("nginx_upstream_check gather", "target", target) } labels := map[string]string{"target": target} @@ -193,7 +193,7 @@ func (ins *Instance) gather(slist *types.SampleList, target string) { err := ins.gatherJSONData(target, checkData) if err != nil { - log.Println("E! failed to gather json data:", err) + klog.ErrorS(err, "failed to gather json data", "target", target) return } diff --git a/inputs/node_exporter/collector/buddyinfo.go b/inputs/node_exporter/collector/buddyinfo.go index 30bf56af9..c9c204748 100644 --- a/inputs/node_exporter/collector/buddyinfo.go +++ b/inputs/node_exporter/collector/buddyinfo.go @@ -19,11 +19,11 @@ package collector import ( coreconfig "flashcat.cloud/categraf/config" "fmt" - "log" "strconv" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/procfs" + "k8s.io/klog/v2" ) const ( @@ -62,7 +62,7 @@ func (c *buddyinfoCollector) Update(ch chan<- prometheus.Metric) error { } if coreconfig.Config.DebugMode && coreconfig.Config.DebugLevel > 2 { - log.Println("D! set node_buddy buddyInfo", buddyInfo) + klog.V(1).InfoS("set node_buddy buddyInfo", "buddy_info", buddyInfo) } for _, entry := range buddyInfo { for size, value := range entry.Sizes { diff --git a/inputs/node_exporter/collector/collector.go b/inputs/node_exporter/collector/collector.go index fffae8bb1..493023062 100644 --- a/inputs/node_exporter/collector/collector.go +++ b/inputs/node_exporter/collector/collector.go @@ -17,12 +17,12 @@ package collector import ( "errors" "fmt" - "log" "strings" "sync" "time" "github.com/prometheus/client_golang/prometheus" + "k8s.io/klog/v2" ) // Namespace defines the common namespace to be used by all metrics. @@ -105,7 +105,7 @@ func (nc *NodeCollector) Init(filters ...string) { } else if len(paras) == 2 { params[paras[0]] = paras[1] } else { - log.Println(c, "invalid format") + klog.ErrorS(nil, "invalid collector filter format", "filter", c) } if strings.HasPrefix(c, "collector.") { c = strings.TrimPrefix(c, "collector.") @@ -183,14 +183,14 @@ func (n *NodeCollector) execute(name string, c Collector, ch chan<- prometheus.M if err != nil { if IsNoDataError(err) { - log.Println("E! collector returned no data:", name, "duration_seconds", duration.Seconds(), "err", err) + klog.ErrorS(err, "collector returned no data", "name", name, "duration_seconds", duration.Seconds()) } else { - log.Println("E! collector failed", "name", name, "duration_seconds", duration.Seconds(), "err", err) + klog.ErrorS(err, "collector failed", "name", name, "duration_seconds", duration.Seconds()) } success = 0 } else { if n.DebugMode { - log.Println("I!", "collector succeeded", "name", name, "duration_seconds", duration.Seconds()) + klog.V(1).InfoS("collector succeeded", "name", name, "duration_seconds", duration.Seconds()) } success = 1 } diff --git a/inputs/node_exporter/collector/cpu_linux.go b/inputs/node_exporter/collector/cpu_linux.go index 80a06db6e..b82601c2f 100644 --- a/inputs/node_exporter/collector/cpu_linux.go +++ b/inputs/node_exporter/collector/cpu_linux.go @@ -18,7 +18,6 @@ package collector import ( "fmt" - "log" "os" "path/filepath" "regexp" @@ -30,6 +29,7 @@ import ( "github.com/prometheus/procfs/sysfs" "golang.org/x/exp/maps" "golang.org/x/exp/slices" + "k8s.io/klog/v2" ) type cpuCollector struct { @@ -142,7 +142,7 @@ func NewCPUCollector() (Collector, error) { func (c *cpuCollector) compileIncludeFlags(flagsIncludeFlag, bugsIncludeFlag *string) error { if (*flagsIncludeFlag != "" || *bugsIncludeFlag != "") && !*enableCPUInfo { *enableCPUInfo = true - log.Println("I! --collector.cpu.info has been set to `true` because you set the following flags, like --collector.cpu.info.flags-include and --collector.cpu.info.bugs-include") + klog.Info("--collector.cpu.info has been set to true because cpu info include flags were configured") } var err error diff --git a/inputs/node_exporter/collector/diskstats_common.go b/inputs/node_exporter/collector/diskstats_common.go index 07b705350..cb65db472 100644 --- a/inputs/node_exporter/collector/diskstats_common.go +++ b/inputs/node_exporter/collector/diskstats_common.go @@ -19,9 +19,9 @@ package collector import ( "errors" - "log" "github.com/prometheus/client_golang/prometheus" + "k8s.io/klog/v2" ) const ( @@ -84,7 +84,7 @@ var ( func newDiskstatsDeviceFilter() (deviceFilter, error) { if *oldDiskstatsDeviceExclude != "" { if !diskstatsDeviceExcludeSet { - log.Println("W! --collector.diskstats.ignore-devices is DEPRECATED and will be removed in 2.0.0, use --collector.diskstats.device-exclude") + klog.Warning("--collector.diskstats.ignore-devices is deprecated and will be removed in 2.0.0, use --collector.diskstats.device-exclude") *diskstatsDeviceExclude = *oldDiskstatsDeviceExclude } else { return deviceFilter{}, errors.New("--collector.diskstats.ignored-devices and --collector.diskstats.device-exclude are mutually exclusive") @@ -96,11 +96,11 @@ func newDiskstatsDeviceFilter() (deviceFilter, error) { } if *diskstatsDeviceExclude != "" { - log.Println("I! Parsing flag --collector.diskstats.device-exclude", "flag", *diskstatsDeviceExclude) + klog.InfoS("parsing flag --collector.diskstats.device-exclude", "flag", *diskstatsDeviceExclude) } if *diskstatsDeviceInclude != "" { - log.Println("Parsed Flag --collector.diskstats.device-include", "flag", *diskstatsDeviceInclude) + klog.InfoS("parsed flag --collector.diskstats.device-include", "flag", *diskstatsDeviceInclude) } return newDeviceFilter(*diskstatsDeviceExclude, *diskstatsDeviceInclude), nil diff --git a/inputs/node_exporter/collector/diskstats_linux.go b/inputs/node_exporter/collector/diskstats_linux.go index 0e3d913f9..0d93d1803 100644 --- a/inputs/node_exporter/collector/diskstats_linux.go +++ b/inputs/node_exporter/collector/diskstats_linux.go @@ -19,13 +19,13 @@ package collector import ( "bufio" "fmt" - "log" "os" "strconv" "strings" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/procfs/blockdevice" + "k8s.io/klog/v2" ) const ( @@ -259,7 +259,7 @@ func NewDiskstatsCollector() (Collector, error) { // Only enable getting device properties from udev if the directory is readable. if stat, err := os.Stat(*udevDataPath); err != nil || !stat.IsDir() { - log.Println("E! failed to open directory, disabling udev device properties path", *udevDataPath) + klog.ErrorS(err, "failed to open directory, disabling udev device properties path", "path", *udevDataPath) } else { collector.getUdevDeviceProperties = getUdevDeviceProperties } @@ -372,7 +372,7 @@ func (c *diskstatsCollector) Update(ch chan<- prometheus.Metric) error { if value, err := strconv.ParseFloat(str, 64); err == nil { ch <- desc.mustNewConstMetric(value, dev) } else { - log.Println("E! Failed to parse ATA value", err) + klog.ErrorS(err, "failed to parse ATA value", "value", str, "device", dev) } } } diff --git a/inputs/node_exporter/collector/ethtool_linux.go b/inputs/node_exporter/collector/ethtool_linux.go index e2c009d9d..669b1029d 100644 --- a/inputs/node_exporter/collector/ethtool_linux.go +++ b/inputs/node_exporter/collector/ethtool_linux.go @@ -23,7 +23,6 @@ package collector import ( "errors" "fmt" - "log" "os" "regexp" "sort" @@ -35,6 +34,7 @@ import ( "github.com/prometheus/procfs/sysfs" "github.com/safchain/ethtool" "golang.org/x/sys/unix" + "k8s.io/klog/v2" ) var ( @@ -404,10 +404,10 @@ func (c *ethtoolCollector) Update(ch chan<- prometheus.Metric) error { if err == unix.EOPNOTSUPP { // level.Debug(c.logger).Log("msg", "ethtool link info error", "err", err, "device", device, "errno", uint(errno)) } else if errno != 0 { - log.Println("E! ethtool link info error, err:", err, "device:", device, "errno:", uint(errno)) + klog.ErrorS(err, "ethtool link info error", "device", device, "errno", uint(errno)) } } else { - log.Println("E! ethtool link info error, err:", err, "device:", device) + klog.ErrorS(err, "ethtool link info error", "device", device) } } @@ -421,10 +421,10 @@ func (c *ethtoolCollector) Update(ch chan<- prometheus.Metric) error { if err == unix.EOPNOTSUPP { // level.Debug(c.logger).Log("msg", "ethtool driver info error", "err", err, "device", device, "errno", uint(errno)) } else if errno != 0 { - log.Println("E! ethtool driver info error, err:", err, "device:", device, "errno:", uint(errno)) + klog.ErrorS(err, "ethtool driver info error", "device", device, "errno", uint(errno)) } } else { - log.Println("E! ethool driver info error, err:", err, "device:", device) + klog.ErrorS(err, "ethtool driver info error", "device", device) } } @@ -437,10 +437,10 @@ func (c *ethtoolCollector) Update(ch chan<- prometheus.Metric) error { if err == unix.EOPNOTSUPP { // level.Debug(c.logger).Log("msg", "ethtool stats error", "err", err, "device", device, "errno", uint(errno)) } else if errno != 0 { - log.Println("E! ethool stats error, err:", err, "device:", device, "errno:", uint(errno)) + klog.ErrorS(err, "ethtool stats error", "device", device, "errno", uint(errno)) } } else { - log.Println("E! ethool stats error, err:", err, "device:", device) + klog.ErrorS(err, "ethtool stats error", "device", device) } } diff --git a/inputs/node_exporter/collector/filesystem_common.go b/inputs/node_exporter/collector/filesystem_common.go index 3a0a43a2a..b8ceb46ea 100644 --- a/inputs/node_exporter/collector/filesystem_common.go +++ b/inputs/node_exporter/collector/filesystem_common.go @@ -19,10 +19,10 @@ package collector import ( "errors" - "log" "regexp" "github.com/prometheus/client_golang/prometheus" + "k8s.io/klog/v2" ) // Arch-dependent implementation must define: @@ -97,7 +97,7 @@ func init() { func NewFilesystemCollector() (Collector, error) { if *oldMountPointsExcluded != "" { if !mountPointsExcludeSet { - log.Println("W! --collector.filesystem.ignored-mount-points isDEPRECATED and will be removed in 2.0.0, use --collector.filesystem.mount-points-exclude") + klog.Warning("--collector.filesystem.ignored-mount-points is DEPRECATED and will be removed in 2.0.0, use --collector.filesystem.mount-points-exclude") *mountPointsExclude = *oldMountPointsExcluded } else { return nil, errors.New("--collector.filesystem.ignored-mount-points and --collector.filesystem.mount-points-exclude are mutually exclusive") @@ -106,7 +106,7 @@ func NewFilesystemCollector() (Collector, error) { if *oldFSTypesExcluded != "" { if !fsTypesExcludeSet { - log.Println("W! --collector.filesystem.ignored-fs-types is DEPRECATED and will be removed in 2.0.0, use --collector.filesystem.fs-types-exclude") + klog.Warning("--collector.filesystem.ignored-fs-types is DEPRECATED and will be removed in 2.0.0, use --collector.filesystem.fs-types-exclude") *fsTypesExclude = *oldFSTypesExcluded } else { return nil, errors.New("--collector.filesystem.ignored-fs-types and --collector.filesystem.fs-types-exclude are mutually exclusive") @@ -114,9 +114,9 @@ func NewFilesystemCollector() (Collector, error) { } subsystem := "filesystem" - log.Println("I! Parsed flag --collector.filesystem.mount-points-exclude", "flag", *mountPointsExclude) + klog.InfoS("parsed flag --collector.filesystem.mount-points-exclude", "flag", *mountPointsExclude) mountPointPattern := regexp.MustCompile(*mountPointsExclude) - log.Println("I! Parsed flag --collector.filesystem.fs-types-exclude", "flag", *fsTypesExclude) + klog.InfoS("parsed flag --collector.filesystem.fs-types-exclude", "flag", *fsTypesExclude) filesystemsTypesPattern := regexp.MustCompile(*fsTypesExclude) sizeDesc := prometheus.NewDesc( diff --git a/inputs/node_exporter/collector/netclass_rtnl_linux.go b/inputs/node_exporter/collector/netclass_rtnl_linux.go index c320b7359..e52660415 100644 --- a/inputs/node_exporter/collector/netclass_rtnl_linux.go +++ b/inputs/node_exporter/collector/netclass_rtnl_linux.go @@ -20,13 +20,13 @@ import ( "errors" "fmt" "io/fs" - "log" "path/filepath" "github.com/jsimonetti/rtnetlink" "github.com/mdlayher/ethtool" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/procfs/sysfs" + "k8s.io/klog/v2" ) var ( @@ -44,7 +44,7 @@ func (c *netClassCollector) netClassRTNLUpdate(ch chan<- prometheus.Metric) erro if !errors.Is(errors.Unwrap(err), fs.ErrNotExist) { return fmt.Errorf("could not get link modes: %w", err) } - log.Println("I! ETHTOOL netlink interface unavailable, duplex and linkspeed are not scraped.") + klog.Info("ETHTOOL netlink interface unavailable, duplex and linkspeed are not scraped") } else { for _, lm := range lms { if c.ignoredDevicesPattern.MatchString(lm.Interface.Name) { diff --git a/inputs/node_exporter/collector/netdev_common.go b/inputs/node_exporter/collector/netdev_common.go index ca142e1ed..dd9c8baf8 100644 --- a/inputs/node_exporter/collector/netdev_common.go +++ b/inputs/node_exporter/collector/netdev_common.go @@ -20,11 +20,12 @@ package collector import ( "errors" "fmt" - "github.com/prometheus/client_golang/prometheus" - "log" "net" "strconv" "sync" + + "github.com/prometheus/client_golang/prometheus" + "k8s.io/klog/v2" ) var ( @@ -53,7 +54,7 @@ func init() { func NewNetDevCollector() (Collector, error) { if *oldNetdevDeviceInclude != "" { if *netdevDeviceInclude == "" { - log.Println("W! --collector.netdev.device-whitelist is DEPRECATED and will be removed in 2.0.0, use --collector.netdev.device-include") + klog.Warning("--collector.netdev.device-whitelist is DEPRECATED and will be removed in 2.0.0, use --collector.netdev.device-include") *netdevDeviceInclude = *oldNetdevDeviceInclude } else { return nil, errors.New("--collector.netdev.device-whitelist and --collector.netdev.device-include are mutually exclusive") @@ -62,7 +63,7 @@ func NewNetDevCollector() (Collector, error) { if *oldNetdevDeviceExclude != "" { if *netdevDeviceExclude == "" { - log.Println("W! --collector.netdev.device-blacklist is DEPRECATED and will be removed in 2.0.0, use --collector.netdev.device-exclude") + klog.Warning("--collector.netdev.device-blacklist is DEPRECATED and will be removed in 2.0.0, use --collector.netdev.device-exclude") *netdevDeviceExclude = *oldNetdevDeviceExclude } else { return nil, errors.New("--collector.netdev.device-blacklist and --collector.netdev.device-exclude are mutually exclusive") @@ -74,11 +75,11 @@ func NewNetDevCollector() (Collector, error) { } if *netdevDeviceExclude != "" { - log.Println("Parsed flag --collector.netdev.device-exclude", "flag", *netdevDeviceExclude) + klog.InfoS("parsed flag --collector.netdev.device-exclude", "flag", *netdevDeviceExclude) } if *netdevDeviceInclude != "" { - log.Println("Parsed Flag --collector.netdev.device-include", "flag", *netdevDeviceInclude) + klog.InfoS("parsed flag --collector.netdev.device-include", "flag", *netdevDeviceInclude) } return &netDevCollector{ diff --git a/inputs/node_exporter/collector/ntp.go b/inputs/node_exporter/collector/ntp.go index f642ee4fe..6ab577bf2 100644 --- a/inputs/node_exporter/collector/ntp.go +++ b/inputs/node_exporter/collector/ntp.go @@ -18,7 +18,6 @@ package collector import ( "fmt" - "log" "net" "strconv" "sync" @@ -26,6 +25,7 @@ import ( "github.com/beevik/ntp" "github.com/prometheus/client_golang/prometheus" + "k8s.io/klog/v2" ) const ( @@ -78,7 +78,7 @@ func NewNtpCollector() (Collector, error) { return nil, fmt.Errorf("invalid NTP port number %d; must be between 1 and 65535 inclusive", *ntpServerPort) } - log.Println("W! This collector is deprecated and will be removed in the next major version release.") + klog.Warning("this node_exporter ntp collector is deprecated and will be removed in the next major version release") return &ntpCollector{ stratum: typedDesc{prometheus.NewDesc( prometheus.BuildFQName(namespace, ntpSubsystem, "stratum"), diff --git a/inputs/node_exporter/collector/perf_linux.go b/inputs/node_exporter/collector/perf_linux.go index 82b7675b0..96700698d 100644 --- a/inputs/node_exporter/collector/perf_linux.go +++ b/inputs/node_exporter/collector/perf_linux.go @@ -18,7 +18,6 @@ package collector import ( "fmt" - "log" "runtime" "strconv" "strings" @@ -26,6 +25,7 @@ import ( "github.com/hodgesds/perf-utils" "github.com/prometheus/client_golang/prometheus" "golang.org/x/sys/unix" + "k8s.io/klog/v2" ) const ( @@ -214,7 +214,7 @@ func (c *perfTracepointCollector) updateCPU(cpu int, ch chan<- prometheus.Metric profiler := c.profilers[cpu] p := &perf.GroupProfileValue{} if err := profiler.Profile(p); err != nil { - log.Println("E! Failed to collect tracepoint profile:", err) + klog.ErrorS(err, "failed to collect tracepoint profile", "cpu", cpu) return err } diff --git a/inputs/node_exporter/collector/qdisc_linux.go b/inputs/node_exporter/collector/qdisc_linux.go index 78dca2901..1b845771c 100644 --- a/inputs/node_exporter/collector/qdisc_linux.go +++ b/inputs/node_exporter/collector/qdisc_linux.go @@ -19,12 +19,12 @@ package collector import ( "encoding/json" "fmt" - "log" "os" "path/filepath" "github.com/ema/qdisc" "github.com/prometheus/client_golang/prometheus" + "k8s.io/klog/v2" ) type qdiscStatCollector struct { @@ -54,7 +54,7 @@ func init() { func NewQdiscStatCollector() (Collector, error) { if *oldCollectorQdiskDeviceInclude != "" { if *collectorQdiscDeviceInclude == "" { - log.Println("W! --collector.qdisk.device-include is DEPRECATED and will be removed in 2.0.0, use --collector.qdisc.device-include") + klog.Warning("--collector.qdisk.device-include is deprecated and will be removed in 2.0.0, use --collector.qdisc.device-include") *collectorQdiscDeviceInclude = *oldCollectorQdiskDeviceInclude } else { return nil, fmt.Errorf("--collector.qdisk.device-include and --collector.qdisc.device-include are mutually exclusive") @@ -63,7 +63,7 @@ func NewQdiscStatCollector() (Collector, error) { if *oldCollectorQdiskDeviceExclude != "" { if *collectorQdiscDeviceExclude == "" { - log.Println("W! --collector.qdisk.device-exclude is DEPRECATED and will be removed in 2.0.0, use --collector.qdisc.device-exclude") + klog.Warning("--collector.qdisk.device-exclude is deprecated and will be removed in 2.0.0, use --collector.qdisc.device-exclude") *collectorQdiscDeviceExclude = *oldCollectorQdiskDeviceExclude } else { diff --git a/inputs/node_exporter/collector/runit.go b/inputs/node_exporter/collector/runit.go index 9399db3a3..59ce90d1f 100644 --- a/inputs/node_exporter/collector/runit.go +++ b/inputs/node_exporter/collector/runit.go @@ -17,10 +17,9 @@ package collector import ( - "log" - "github.com/prometheus-community/go-runit/runit" "github.com/prometheus/client_golang/prometheus" + "k8s.io/klog/v2" ) var runitServiceDir = new(string) // kingpin.Flag("collector.runit.servicedir", "Path to runit service directory.").Default("/etc/service").String() @@ -44,7 +43,7 @@ func NewRunitCollector() (Collector, error) { labelNames = []string{"service"} ) - log.Println("W! This collector is deprecated and will be removed in the next major version release.") + klog.Warning("this node_exporter runit collector is deprecated and will be removed in the next major version release") return &runitCollector{ state: typedDesc{prometheus.NewDesc( diff --git a/inputs/node_exporter/collector/supervisord.go b/inputs/node_exporter/collector/supervisord.go index a9dc799fc..bf7697bf6 100644 --- a/inputs/node_exporter/collector/supervisord.go +++ b/inputs/node_exporter/collector/supervisord.go @@ -19,7 +19,6 @@ package collector import ( "context" "fmt" - "log" "net" "net/http" "net/url" @@ -27,6 +26,7 @@ import ( "github.com/mattn/go-xmlrpc" "github.com/prometheus/client_golang/prometheus" + "k8s.io/klog/v2" ) var ( @@ -66,7 +66,7 @@ func NewSupervisordCollector() (Collector, error) { xrpc = xmlrpc.NewClient(*supervisordURL) } - log.Println("W! this collector is deprecated and will be removed in the next major version release.") + klog.Warning("this node_exporter supervisord collector is deprecated and will be removed in the next major version release") return &supervisordCollector{ upDesc: prometheus.NewDesc( diff --git a/inputs/node_exporter/collector/systemd_linux.go b/inputs/node_exporter/collector/systemd_linux.go index 8cc4b7cb6..bbfe9ef73 100644 --- a/inputs/node_exporter/collector/systemd_linux.go +++ b/inputs/node_exporter/collector/systemd_linux.go @@ -22,12 +22,13 @@ import ( "fmt" "github.com/coreos/go-systemd/v22/dbus" "github.com/prometheus/client_golang/prometheus" - "log" "math" "regexp" "strconv" "strings" "sync" + + "k8s.io/klog/v2" ) const ( @@ -138,7 +139,7 @@ func NewSystemdCollector() (Collector, error) { if *oldSystemdUnitExclude != "" { if !systemdUnitExcludeSet { - log.Println("W! --collector.systemd.unit-blacklist is DEPRECATED and will be removed in 2.0.0, use --collector.systemd.unit-exclude") + klog.Warning("--collector.systemd.unit-blacklist is deprecated and will be removed in 2.0.0, use --collector.systemd.unit-exclude") *systemdUnitExclude = *oldSystemdUnitExclude } else { return nil, errors.New("--collector.systemd.unit-blacklist and --collector.systemd.unit-exclude are mutually exclusive") @@ -146,7 +147,7 @@ func NewSystemdCollector() (Collector, error) { } if *oldSystemdUnitInclude != "" { if !systemdUnitIncludeSet { - log.Println("W! --collector.systemd.unit-whitelist is DEPRECATED and will be removed in 2.0.0, use --collector.systemd.unit-include") + klog.Warning("--collector.systemd.unit-whitelist is deprecated and will be removed in 2.0.0, use --collector.systemd.unit-include") *systemdUnitInclude = *oldSystemdUnitInclude } else { return nil, errors.New("--collector.systemd.unit-whitelist and --collector.systemd.unit-include are mutually exclusive") diff --git a/inputs/node_exporter/collector/textfile.go b/inputs/node_exporter/collector/textfile.go index 308ab4b4d..71fb0b2d0 100644 --- a/inputs/node_exporter/collector/textfile.go +++ b/inputs/node_exporter/collector/textfile.go @@ -18,7 +18,6 @@ package collector import ( "fmt" - "log" "os" "path/filepath" "sort" @@ -28,6 +27,7 @@ import ( "github.com/prometheus/client_golang/prometheus" dto "github.com/prometheus/client_model/go" "github.com/prometheus/common/expfmt" + "k8s.io/klog/v2" ) var ( @@ -76,7 +76,7 @@ func convertMetricFamily(metricFamily *dto.MetricFamily, ch chan<- prometheus.Me for _, metric := range metricFamily.Metric { if metric.TimestampMs != nil { - log.Println("W! ignoring unsupported custom timestamp on textfile collector metric", metric) + klog.Warningf("ignoring unsupported custom timestamp on textfile collector metric: metric=%v", metric) } labels := metric.GetLabel() @@ -207,7 +207,7 @@ func (c *textFileCollector) Update(ch chan<- prometheus.Metric) error { files, err := os.ReadDir(path) if err != nil && path != "" { errored = true - log.Println("E! failed to read textfile collector directory", path, err) + klog.ErrorS(err, "failed to read textfile collector directory", "path", path) } for _, f := range files { @@ -225,7 +225,7 @@ func (c *textFileCollector) Update(ch chan<- prometheus.Metric) error { if err != nil { errored = true - log.Println("E! failed to collect textfile data", "file", f.Name(), "err", err) + klog.ErrorS(err, "failed to collect textfile data", "file", f.Name()) continue } diff --git a/inputs/node_exporter/exporter.go b/inputs/node_exporter/exporter.go index ba7cb0e4b..0397fa48e 100644 --- a/inputs/node_exporter/exporter.go +++ b/inputs/node_exporter/exporter.go @@ -2,9 +2,9 @@ package node_exporter import ( "fmt" - "log" "github.com/prometheus/client_golang/prometheus" + "k8s.io/klog/v2" "flashcat.cloud/categraf/config" "flashcat.cloud/categraf/inputs" @@ -75,6 +75,6 @@ func (e *Exporter) Gather(slist *types.SampleList) { labels := e.GetLabels() err := inputs.Collect(e.nc, slist, labels) if err != nil { - log.Println("E! node exporter collects error:", err) + klog.ErrorS(err, "failed to collect node exporter metrics") } } diff --git a/inputs/nsq/nsq.go b/inputs/nsq/nsq.go index c6ee6851a..15c14eec5 100644 --- a/inputs/nsq/nsq.go +++ b/inputs/nsq/nsq.go @@ -4,7 +4,6 @@ import ( "encoding/json" "fmt" "io" - "log" "net" "net/http" "net/url" @@ -16,6 +15,7 @@ import ( "flashcat.cloud/categraf/inputs" "flashcat.cloud/categraf/pkg/httpx" "flashcat.cloud/categraf/types" + "k8s.io/klog/v2" ) const inputName = "nsq" @@ -63,7 +63,7 @@ type Instance struct { func (ins *Instance) Init() error { if len(ins.URL) != 0 { - log.Println("W! url is deprecated, please use targets") + klog.Warning("url is deprecated, please use targets") } if len(ins.Targets) == 0 && len(ins.URL) == 0 { return types.ErrInstancesEmpty @@ -97,13 +97,13 @@ func (ins *Instance) Gather(slist *types.SampleList) { if len(ins.URL) != 0 { topics, err := ins.GetTopicInfo() if err != nil { - log.Println("E! Failed to obtain the topic list error:", err) + klog.ErrorS(err, "failed to obtain the topic list") } else { for _, topic := range topics { v, err := ins.getQueuesInfo(topic) if err != nil { v = 0 - log.Println("E! Failed to obtain topic depth value error:", err) + klog.ErrorS(err, "failed to obtain topic depth value", "topic", topic) } fields := map[string]interface{}{ "depth": v, @@ -123,31 +123,31 @@ func (ins *Instance) Gather(slist *types.SampleList) { func (ins *Instance) gatherEndpoint(e string, slist *types.SampleList) { u, err := buildURL(e) if err != nil { - log.Println("E! error buildURL", err) + klog.ErrorS(err, "failed to build nsq url", "endpoint", e) return } r, err := ins.client.Get(u.String()) if err != nil { - log.Println("E! error while polling", u.String(), err) + klog.ErrorS(err, "error while polling nsq", "url", u.String()) return } defer r.Body.Close() if r.StatusCode != http.StatusOK { - log.Println(u.String(), "E! error while polling", r.Status) + klog.ErrorS(nil, "unexpected nsq response status", "url", u.String(), "status", r.Status) return } body, err := io.ReadAll(r.Body) if err != nil { - log.Println("E! error reading body", err) + klog.ErrorS(err, "failed to read nsq response body", "url", u.String()) return } data := &NSQStatsData{} err = json.Unmarshal(body, data) if err != nil { - log.Println("E! error parsing response", err) + klog.ErrorS(err, "failed to parse nsq response", "url", u.String()) return } @@ -156,7 +156,7 @@ func (ins *Instance) gatherEndpoint(e string, slist *types.SampleList) { wrapper := &NSQStats{} err = json.Unmarshal(body, wrapper) if err != nil { - log.Println("E! error parsing response", err) + klog.ErrorS(err, "failed to parse legacy nsq response", "url", u.String()) return } diff --git a/inputs/ntp/ntp.go b/inputs/ntp/ntp.go index da2249ccc..946392f54 100644 --- a/inputs/ntp/ntp.go +++ b/inputs/ntp/ntp.go @@ -1,13 +1,13 @@ package ntp import ( - "log" "time" "flashcat.cloud/categraf/config" "flashcat.cloud/categraf/inputs" "flashcat.cloud/categraf/types" "github.com/beevik/ntp" + "k8s.io/klog/v2" ) const inputName = "ntp" @@ -55,7 +55,7 @@ func (n *NTPStat) Gather(slist *types.SampleList) { }) if err != nil { - log.Println("E! failed to connect ntp server:", n.server, "error:", err) + klog.ErrorS(err, "failed to connect ntp server", "server", n.server) n.server = "" continue } diff --git a/inputs/ntp/ntp_test.go b/inputs/ntp/ntp_test.go index ffebd3d27..fd6a63d51 100644 --- a/inputs/ntp/ntp_test.go +++ b/inputs/ntp/ntp_test.go @@ -1,7 +1,7 @@ package ntp import ( - "log" + stdlog "log" "testing" "time" @@ -9,17 +9,18 @@ import ( ) func TestClockOffset(t *testing.T) { - log.Println("Begin") + logger := stdlog.New(stdlog.Writer(), "", 0) + logger.Println("Begin") resp, err := ntp.QueryWithOptions("ntp1.aliyun.com", ntp.QueryOptions{ Timeout: 20 * time.Second, Version: 4, }) if err != nil { - log.Println(err) + logger.Println(err) return } // offset in ms delta := resp.ClockOffset.Seconds() * 1000 - log.Println("Offset (ms):", delta) + logger.Println("Offset (ms):", delta) } diff --git a/inputs/nvidia_smi/builder.go b/inputs/nvidia_smi/builder.go index 9ca2c2ad5..ed44fd40f 100644 --- a/inputs/nvidia_smi/builder.go +++ b/inputs/nvidia_smi/builder.go @@ -2,10 +2,10 @@ package nvidia_smi import ( "fmt" - "log" "strings" "flashcat.cloud/categraf/pkg/stringx" + "k8s.io/klog/v2" ) func buildQFieldToMetricInfoMap(qFieldtoRFieldMap map[qField]rField) map[qField]MetricInfo { @@ -55,7 +55,7 @@ func (s *GPUStats) buildQFieldToRFieldMap() ([]qField, map[qField]rField, error) if len(qFieldsSeparated) == 1 && qFieldsSeparated[0] == qFieldsAuto { parsed, err := parseAutoQFields(s.NvidiaSmiCommand) if err != nil { - log.Println("W! failed to auto-determine query field names, falling back to the built-in list. error:", err) + klog.Warningf("failed to auto-determine nvidia-smi query field names, falling back to built-in list: err=%v", err) return getKeys(fallbackQFieldToRFieldMap), fallbackQFieldToRFieldMap, nil } @@ -67,7 +67,7 @@ func (s *GPUStats) buildQFieldToRFieldMap() ([]qField, map[qField]rField, error) var rFields []rField if err != nil { - log.Println("W! failed to run an initial scrape, using the built-in list for field mapping") + klog.Warningf("failed to run initial nvidia-smi scrape, using built-in field mapping: err=%v", err) rFields, err = getFallbackValues(qFields) if err != nil { diff --git a/inputs/nvidia_smi/nvidia_smi.go b/inputs/nvidia_smi/nvidia_smi.go index bab07273e..bebbdbe22 100644 --- a/inputs/nvidia_smi/nvidia_smi.go +++ b/inputs/nvidia_smi/nvidia_smi.go @@ -3,13 +3,13 @@ package nvidia_smi // This is a fork of https://github.com/utkuozdemir/nvidia_gpu_exporter import ( - "log" "strings" "time" "flashcat.cloud/categraf/config" "flashcat.cloud/categraf/inputs" "flashcat.cloud/categraf/types" + "k8s.io/klog/v2" ) const inputName = "nvidia_smi" @@ -101,7 +101,7 @@ func (s *GPUStats) Gather(slist *types.SampleList) { num, err := transformRawValue(currentCell.rawValue, metricInfo.valueMultiplier) if err != nil { if s.DebugMod { - log.Println("D! failed to transform gpu field:", currentCell.qField, "raw value:", currentCell.rawValue, "error:", err) + klog.V(1).InfoS("failed to transform gpu field", "field", currentCell.qField, "raw_value", currentCell.rawValue, "err", err) } continue } diff --git a/inputs/oracle/oracle.go b/inputs/oracle/oracle.go index c59a660d7..18c095bd5 100644 --- a/inputs/oracle/oracle.go +++ b/inputs/oracle/oracle.go @@ -4,7 +4,6 @@ import ( "context" "database/sql" "fmt" - "log" "regexp" "strconv" "strings" @@ -17,6 +16,7 @@ import ( "flashcat.cloud/categraf/inputs" "flashcat.cloud/categraf/pkg/conv" "flashcat.cloud/categraf/types" + "k8s.io/klog/v2" ) const inputName = "oracle" @@ -128,18 +128,18 @@ func (ins *Instance) Init() error { func (ins *Instance) Drop() error { if ins.DebugMod { - log.Println("D! dropping oracle connection:", ins.Address) + klog.V(1).InfoS("dropping oracle connection", "address", ins.Address) } if len(ins.Address) == 0 || ins.client == nil { if ins.DebugMod { - log.Println("D! oracle address is empty or client is nil, so there is no need to close") + klog.V(1).InfoS("oracle address is empty or client is nil, so there is no need to close", "address", ins.Address) } return nil } if err := ins.client.Close(); err != nil { - log.Println("E! failed to close oracle connection:", ins.Address, "error:", err) + klog.ErrorS(err, "failed to close oracle connection", "address", ins.Address) } return nil @@ -148,7 +148,7 @@ func (ins *Instance) Drop() error { func (ins *Instance) Gather(slist *types.SampleList) { if len(ins.Address) == 0 { if ins.DebugMod { - log.Println("D! oracle address is empty") + klog.V(1).InfoS("oracle address is empty") } return } @@ -160,13 +160,12 @@ func (ins *Instance) Gather(slist *types.SampleList) { }(time.Now()) if err := ins.client.Ping(); err != nil { - - log.Println("I! attempting to rebuild oracle connection:", ins.Address) + klog.InfoS("attempting to rebuild oracle connection", "address", ins.Address) ins.Drop() ins.Init() if err := ins.client.Ping(); err != nil { slist.PushFront(types.NewSample(inputName, "up", 0, tags)) - log.Println("E! failed to ping oracle:", ins.Address, "error:", err) + klog.ErrorS(err, "failed to ping oracle", "address", ins.Address) return } } else { @@ -200,13 +199,12 @@ func (ins *Instance) scrapeMetric(waitMetrics *sync.WaitGroup, slist *types.Samp rows, err := ins.client.QueryContext(ctx, metricConf.Request) if ctx.Err() == context.DeadlineExceeded { - log.Printf("E! %s oracle query timeout (more than %d seconds), request: %s", ins.Address, metricConf.Timeout/(1000*1000*1000), - strings.ReplaceAll(strings.ReplaceAll(metricConf.Request, "\n", " "), "\r", " ")) + klog.ErrorS(nil, "oracle query timeout", "address", ins.Address, "timeout_seconds", metricConf.Timeout/(1000*1000*1000), "request", strings.ReplaceAll(strings.ReplaceAll(metricConf.Request, "\n", " "), "\r", " ")) return } if err != nil { - log.Println("E! failed to query:", err) + klog.ErrorS(err, "failed to query oracle", "address", ins.Address) return } @@ -214,12 +212,12 @@ func (ins *Instance) scrapeMetric(waitMetrics *sync.WaitGroup, slist *types.Samp cols, err := rows.Columns() if err != nil { - log.Println("E! failed to get columns:", err) + klog.ErrorS(err, "failed to get oracle columns", "address", ins.Address) return } if ins.DebugMod { - log.Println("D! columns:", cols) + klog.V(1).InfoS("oracle columns", "address", ins.Address, "columns", cols) } for rows.Next() { @@ -231,7 +229,7 @@ func (ins *Instance) scrapeMetric(waitMetrics *sync.WaitGroup, slist *types.Samp // Scan the result into the column pointers... if err := rows.Scan(columnPointers...); err != nil { - log.Println("E! failed to scan:", err) + klog.ErrorS(err, "failed to scan oracle row", "address", ins.Address) return } @@ -245,14 +243,14 @@ func (ins *Instance) scrapeMetric(waitMetrics *sync.WaitGroup, slist *types.Samp count := 0 if err = ins.parseRow(m, metricConf, slist, tags); err != nil { - log.Println("E! failed to parse row:", err) + klog.ErrorS(err, "failed to parse oracle row", "address", ins.Address) continue } else { count++ } if !metricConf.IgnoreZeroResult && count == 0 { - log.Println("E! no metrics found while parsing") + klog.ErrorS(nil, "no oracle metrics found while parsing", "address", ins.Address) } } } @@ -273,7 +271,7 @@ func (ins *Instance) parseRow(row map[string]string, metricConf MetricConfig, sl for _, column := range metricConf.MetricFields { value, err := conv.ToFloat64(row[column]) if err != nil { - log.Println("E! failed to convert field:", column, "value:", value, "error:", err) + klog.ErrorS(err, "failed to convert oracle field", "address", ins.Address, "column", column, "value", row[column]) return err } @@ -325,7 +323,7 @@ func (ins *Instance) getConnectionString() (string, error) { ip, port, service, err := explode(ins.Address) if err != nil { - log.Println("E! oracle address format error:", err) + klog.ErrorS(err, "oracle address format error", "address", ins.Address) return "", err } return go_ora.BuildUrl(ip, port, service, ins.Username, ins.Password, opts), nil diff --git a/inputs/phpfpm/phpfpm.go b/inputs/phpfpm/phpfpm.go index 7cf93d904..ec0c6e89c 100644 --- a/inputs/phpfpm/phpfpm.go +++ b/inputs/phpfpm/phpfpm.go @@ -4,7 +4,6 @@ import ( "bufio" "fmt" "io" - "log" "net/http" "net/url" "os" @@ -19,6 +18,7 @@ import ( "flashcat.cloud/categraf/pkg/tls" "flashcat.cloud/categraf/types" fcgiclient "github.com/tomasen/fcgi_client" + "k8s.io/klog/v2" ) const ( @@ -104,7 +104,7 @@ func (ins *Instance) Gather(sList *types.SampleList) { urls, err := expandUrls(ins.Urls) if err != nil { - log.Println("E! failed to parse urls:", err) + klog.ErrorS(err, "failed to expand php-fpm urls") return } @@ -113,7 +113,7 @@ func (ins *Instance) Gather(sList *types.SampleList) { go func(url string) { defer wg.Done() if err := ins.gather(url, sList); err != nil { - log.Println("E!", err) + klog.ErrorS(err, "failed to gather php-fpm metrics", "url", url) } }(u) } @@ -123,7 +123,7 @@ func (ins *Instance) Gather(sList *types.SampleList) { func (ins *Instance) gather(addr string, sList *types.SampleList) error { if ins.DebugMod { - log.Println("D! php-fpm... url:", addr) + klog.V(1).InfoS("php-fpm gathering url", "url", addr) } var resp *http.Response @@ -144,7 +144,7 @@ func (ins *Instance) gather(addr string, sList *types.SampleList) error { defer func(Body io.ReadCloser) { err := Body.Close() if err != nil { - log.Println("E! failed to close the body of client:", err) + klog.ErrorS(err, "failed to close php-fpm response body", "url", addr) } }(resp.Body) @@ -229,7 +229,7 @@ func (ins *Instance) initHTTPClient() { if ins.client == nil { client, err := ins.createHTTPClient() if err != nil { - log.Printf("failed to create http client: %v", err) + klog.ErrorS(err, "failed to create php-fpm HTTP client") } ins.client = client } diff --git a/inputs/ping/ping.go b/inputs/ping/ping.go index 47ec826fd..2dc1bbfaa 100644 --- a/inputs/ping/ping.go +++ b/inputs/ping/ping.go @@ -4,7 +4,6 @@ import ( "bytes" "errors" "fmt" - "log" "net" "os/exec" "runtime" @@ -18,6 +17,7 @@ import ( "flashcat.cloud/categraf/inputs" "flashcat.cloud/categraf/pkg/cmdx" "flashcat.cloud/categraf/types" + "k8s.io/klog/v2" ) const ( @@ -139,7 +139,7 @@ func (ins *Instance) Gather(slist *types.SampleList) { } if ins.DebugMod { - log.Println("D! ping method", ins.Method) + klog.V(1).InfoS("ping method", "method", ins.Method) } wg := new(sync.WaitGroup) ch := make(chan struct{}, ins.Conc) @@ -162,7 +162,7 @@ func (ins *Instance) Gather(slist *types.SampleList) { func (ins *Instance) nativeGather(slist *types.SampleList, target string) { if ins.DebugMod { - log.Println("D! ping...", target) + klog.V(1).InfoS("ping target", "target", target) } labels := map[string]string{"target": target} @@ -177,7 +177,7 @@ func (ins *Instance) nativeGather(slist *types.SampleList, target string) { stats, err := ins.ping(target) if err != nil { - log.Println("E! failed to ping:", target, "error:", err) + klog.ErrorS(err, "failed to ping", "target", target) if strings.Contains(err.Error(), "unknown") { fields["result_code"] = 1 } else { @@ -190,7 +190,7 @@ func (ins *Instance) nativeGather(slist *types.SampleList, target string) { if stats.PacketsSent == 0 { if ins.DebugMod { - log.Println("D! no packets sent, target:", target) + klog.V(1).InfoS("no packets sent", "target", target) } fields["result_code"] = 2 return @@ -198,7 +198,7 @@ func (ins *Instance) nativeGather(slist *types.SampleList, target string) { if stats.PacketsRecv == 0 { if ins.DebugMod { - log.Println("D! no packets received, target:", target) + klog.V(1).InfoS("no packets received", "target", target) } fields["result_code"] = 1 fields["minimum_response_ms"] = float64(-1) @@ -290,7 +290,7 @@ func hostPinger(binary string, timeout float64, args ...string) (string, error) err, to := cmdx.RunTimeout(cmd, time.Second*time.Duration(timeout+5)) if to { - log.Printf("E! run command: %s timeout", strings.Join(cmd.Args, " ")) + klog.ErrorS(nil, "run ping command timeout", "command", strings.Join(cmd.Args, " ")) return stderr.String(), errors.New("run command timeout") } return stdout.String(), err diff --git a/inputs/ping/ping_notwindows.go b/inputs/ping/ping_notwindows.go index a141b4f7d..60477f74a 100644 --- a/inputs/ping/ping_notwindows.go +++ b/inputs/ping/ping_notwindows.go @@ -5,7 +5,6 @@ package ping import ( "errors" "fmt" - "log" "os/exec" "regexp" "runtime" @@ -14,6 +13,7 @@ import ( "syscall" "flashcat.cloud/categraf/types" + "k8s.io/klog/v2" ) type roundTripTimeStats struct { @@ -32,7 +32,7 @@ type statistics struct { func (ins *Instance) execGather(slist *types.SampleList, target string) { if ins.DebugMod { - log.Println("D! ping...", target) + klog.V(1).InfoS("ping target", "target", target) } fields := map[string]interface{}{"result_code": 0} @@ -72,9 +72,9 @@ func (ins *Instance) execGather(slist *types.SampleList, target string) { // Combine go err + stderr output out = strings.TrimSpace(out) if len(out) > 0 { - log.Println(target, fmt.Errorf("%w - %s", err, out)) + klog.ErrorS(fmt.Errorf("%w - %s", err, out), "ping command failed", "target", target) } else { - log.Println(target, fmt.Errorf("%w", err)) + klog.ErrorS(fmt.Errorf("%w", err), "ping command failed", "target", target) } fields["result_code"] = 2 return @@ -84,7 +84,7 @@ func (ins *Instance) execGather(slist *types.SampleList, target string) { stats, err := processPingOutput(out) if err != nil { // fatal error - log.Println(target, fmt.Errorf("%w - %s", err, out)) + klog.ErrorS(fmt.Errorf("%w - %s", err, out), "failed to process ping output", "target", target) fields["result_code"] = 2 return } diff --git a/inputs/ping/ping_windows.go b/inputs/ping/ping_windows.go index 40364b0bd..1d0073d78 100644 --- a/inputs/ping/ping_windows.go +++ b/inputs/ping/ping_windows.go @@ -5,12 +5,12 @@ package ping import ( "errors" "fmt" - "log" "regexp" "strconv" "strings" "flashcat.cloud/categraf/types" + "k8s.io/klog/v2" ) type roundTripTimeStats struct { @@ -28,7 +28,7 @@ type statistics struct { func (ins *Instance) execGather(slist *types.SampleList, target string) { if ins.DebugMod { - log.Println("D! ping...", target) + klog.V(1).InfoS("ping target", "target", target) } fields := map[string]interface{}{"result_code": 0} @@ -53,9 +53,9 @@ func (ins *Instance) execGather(slist *types.SampleList, target string) { if err != nil { // fatal error if pendingError != nil { - log.Println(target, fmt.Errorf("%s: %w", target, pendingError)) + klog.ErrorS(fmt.Errorf("%s: %w", target, pendingError), "failed to process ping output", "target", target) } else { - log.Println(target, fmt.Errorf("%s: %w", target, err)) + klog.ErrorS(fmt.Errorf("%s: %w", target, err), "failed to process ping output", "target", target) } fields["result_code"] = 2 diff --git a/inputs/postgresql/postgresql.go b/inputs/postgresql/postgresql.go index f9bf979b8..08bfc163d 100644 --- a/inputs/postgresql/postgresql.go +++ b/inputs/postgresql/postgresql.go @@ -5,7 +5,6 @@ import ( "context" "database/sql" "fmt" - "log" "net" "net/url" "regexp" @@ -21,6 +20,7 @@ import ( "flashcat.cloud/categraf/types" "github.com/jackc/pgx/v4" "github.com/jackc/pgx/v4/stdlib" + "k8s.io/klog/v2" ) const ( @@ -121,7 +121,7 @@ func (ins *Instance) Init() error { connConfig, err := pgx.ParseConfig(ins.Address) if err != nil { - log.Println("E! can't parse address :", err) + klog.ErrorS(err, "can't parse postgresql address", "address", ins.Address) return err } @@ -155,18 +155,18 @@ func (ins *Instance) Gather(slist *types.SampleList) { ) addr, err := ins.SanitizedAddress() if err != nil { - log.Println("E! can't sanitize address :", err) + klog.ErrorS(err, "can't sanitize postgresql address") } tags := map[string]string{"server": addr} if ins.db, err = sql.Open("pgx", ins.connConfig); err != nil { slist.PushSample(inputName, "up", 0, tags) - log.Println("E! can't open db :", err) + klog.ErrorS(err, "can't open postgresql db", "server", addr) return } defer ins.db.Close() if err := ins.db.Ping(); err != nil { slist.PushSample(inputName, "up", 0, tags) - log.Println("E! failed to ping postgresql:", addr, err) + klog.ErrorS(err, "failed to ping postgresql", "server", addr) return } slist.PushSample(inputName, "up", 1, tags) @@ -187,7 +187,7 @@ func (ins *Instance) Gather(slist *types.SampleList) { rows, err := ins.db.Query(query) if err != nil { - log.Println("E! failed to execute Query :", err) + klog.ErrorS(err, "failed to execute postgresql query", "query", query) return } @@ -195,14 +195,14 @@ func (ins *Instance) Gather(slist *types.SampleList) { // grab the column information from the result if columns, err = rows.Columns(); err != nil { - log.Println("E! failed to grab column info:", err) + klog.ErrorS(err, "failed to grab postgresql column info") return } for rows.Next() { err = ins.accRow(rows, slist, "", columns, columns, nil) if err != nil { - log.Println("E! failed to get row data:", err) + klog.ErrorS(err, "failed to get postgresql row data") return } } @@ -212,7 +212,7 @@ func (ins *Instance) Gather(slist *types.SampleList) { var version int err = ins.db.QueryRow("SELECT current_setting('server_version_num')::int").Scan(&version) if err != nil { - log.Println("E! failed to query current version:", err) + klog.ErrorS(err, "failed to query current postgresql version") return } ins.Version = version @@ -222,7 +222,7 @@ func (ins *Instance) Gather(slist *types.SampleList) { query = `SELECT * FROM pg_stat_bgwriter` bgWriterRow, err := ins.db.Query(query) if err != nil { - log.Println("E! failed to execute Query:", err) + klog.ErrorS(err, "failed to execute postgresql query", "query", query) return } @@ -230,14 +230,14 @@ func (ins *Instance) Gather(slist *types.SampleList) { // grab the column information from the result if columns, err = bgWriterRow.Columns(); err != nil { - log.Println("E! failed to grab column info:", err) + klog.ErrorS(err, "failed to grab postgresql column info") return } for bgWriterRow.Next() { err = ins.accRow(bgWriterRow, slist, "", columns, columns, nil) if err != nil { - log.Println("E! failed to get row data:", err) + klog.ErrorS(err, "failed to get postgresql row data") return } } @@ -248,20 +248,20 @@ func (ins *Instance) Gather(slist *types.SampleList) { query = `SELECT * FROM pg_stat_bgwriter` bgWriterRow, err := ins.db.Query(query) if err != nil { - log.Println("E! failed to execute Query pg_stat_bgwriter:", err) + klog.ErrorS(err, "failed to execute postgresql query", "query", query) return } defer bgWriterRow.Close() if columns, err = bgWriterRow.Columns(); err != nil { - log.Println("E! failed to grab column info for pg_stat_bgwriter:", err) + klog.ErrorS(err, "failed to grab pg_stat_bgwriter column info") return } for bgWriterRow.Next() { err = ins.accRow(bgWriterRow, slist, "", columns, columns, nil) if err != nil { - log.Println("E! failed to get row data from pg_stat_bgwriter:", err) + klog.ErrorS(err, "failed to get row data from pg_stat_bgwriter") return } } @@ -285,20 +285,20 @@ func (ins *Instance) Gather(slist *types.SampleList) { checkpointerRow, err := ins.db.Query(query) if err != nil { - log.Println("E! failed to get row data:", err) + klog.ErrorS(err, "failed to query pg_stat_checkpointer") return } defer checkpointerRow.Close() if columns, err = checkpointerRow.Columns(); err != nil { - log.Println("E! failed to grab column info for pg_stat_checkpointer:", err) + klog.ErrorS(err, "failed to grab column info for pg_stat_checkpointer") return } for checkpointerRow.Next() { err = ins.accRow(checkpointerRow, slist, "", columns, columns, nil) if err != nil { - log.Println("E! failed to get row data from pg_stat_checkpointer:", err) + klog.ErrorS(err, "failed to get row data from pg_stat_checkpointer") return } } @@ -376,14 +376,14 @@ func (ins *Instance) getStatementMetrics(slist *types.SampleList, version int) { statements, err := ins.db.Query(query + limit) if err != nil { - log.Println("E! failed to query stat statements:", err.Error()) + klog.ErrorS(err, "failed to query postgresql stat statements") return } defer statements.Close() columns, err := statements.Columns() if err != nil { - log.Println("E! failed to grab column info:", err.Error()) + klog.ErrorS(err, "failed to grab postgresql column info") return } @@ -392,7 +392,7 @@ func (ins *Instance) getStatementMetrics(slist *types.SampleList, version int) { for statements.Next() { err := ins.accRow(statements, slist, "statements", columns, valueColumns, labelColumns) if err != nil { - log.Println("E! failed to get row data:", err.Error()) + klog.ErrorS(err, "failed to get postgresql row data") return } } @@ -408,12 +408,12 @@ func (ins *Instance) scrapeMetric(waitMetrics *sync.WaitGroup, slist *types.Samp rows, err := ins.db.QueryContext(ctx, metricConf.Request) if ctx.Err() == context.DeadlineExceeded { - log.Println("E! postgresql query timeout, request:", metricConf.Request) + klog.ErrorS(nil, "postgresql query timeout", "request", metricConf.Request) return } if err != nil { - log.Println("E! failed to query:", err) + klog.ErrorS(err, "failed to query postgresql") return } @@ -421,7 +421,7 @@ func (ins *Instance) scrapeMetric(waitMetrics *sync.WaitGroup, slist *types.Samp cols, err := rows.Columns() if err != nil { - log.Println("E! failed to get columns:", err) + klog.ErrorS(err, "failed to get postgresql columns") return } @@ -434,7 +434,7 @@ func (ins *Instance) scrapeMetric(waitMetrics *sync.WaitGroup, slist *types.Samp // Scan the result into the column pointers... if err := rows.Scan(columnPointers...); err != nil { - log.Println("E! failed to scan:", err) + klog.ErrorS(err, "failed to scan postgresql row") return } @@ -448,14 +448,14 @@ func (ins *Instance) scrapeMetric(waitMetrics *sync.WaitGroup, slist *types.Samp count := 0 if err = ins.parseRow(m, metricConf, slist, tags); err != nil { - log.Println("E! failed to parse row:", err) + klog.ErrorS(err, "failed to parse postgresql row") continue } else { count++ } if !metricConf.IgnoreZeroResult && count == 0 { - log.Println("E! no metrics found while parsing") + klog.ErrorS(nil, "no postgresql metrics found while parsing") } } } @@ -476,7 +476,7 @@ func (ins *Instance) parseRow(row map[string]string, metricConf MetricConfig, sl for _, column := range metricConf.MetricFields { value, err := conv.ToFloat64(row[column]) if err != nil { - log.Println("E! failed to convert field:", column, "value:", value, "error:", err) + klog.ErrorS(err, "failed to convert postgresql field", "column", column, "value", row[column]) return err } @@ -531,19 +531,19 @@ func (ins *Instance) accRow(row scanner, slist *types.SampleList, prefix string, // extract the database name from the column map if dbNameStr, ok := (*columnMap["datname"]).(string); ok { if _, err := dbname.WriteString(dbNameStr); err != nil { - log.Println("E! failed to WriteString:", dbNameStr, err) + klog.ErrorS(err, "failed to write postgresql db name", "db", dbNameStr) return err } } else { // PG 12 adds tracking of global objects to pg_stat_database if _, err := dbname.WriteString("postgres_global"); err != nil { - log.Println("E! failed to WriteString: postgres_global", err) + klog.ErrorS(err, "failed to write postgresql db name", "db", "postgres_global") return err } } } else { if _, err := dbname.WriteString("postgres"); err != nil { - log.Println("E! failed to WriteString: postgres", err) + klog.ErrorS(err, "failed to write postgresql db name", "db", "postgres") return err } } @@ -551,7 +551,7 @@ func (ins *Instance) accRow(row scanner, slist *types.SampleList, prefix string, var tagAddress string tagAddress, err = ins.SanitizedAddress() if err != nil { - log.Println("E! failed to SanitizedAddress", err) + klog.ErrorS(err, "failed to sanitize postgresql address") return err } @@ -658,4 +658,4 @@ func (ins *Instance) SanitizedAddress() (sanitizedAddress string, err error) { } return sanitizedAddress, err -} \ No newline at end of file +} diff --git a/inputs/processes/processes_notwindows.go b/inputs/processes/processes_notwindows.go index 05e411791..576e745b5 100644 --- a/inputs/processes/processes_notwindows.go +++ b/inputs/processes/processes_notwindows.go @@ -6,7 +6,6 @@ package processes import ( "bytes" "fmt" - "log" "os" "os/exec" "path/filepath" @@ -18,6 +17,7 @@ import ( "flashcat.cloud/categraf/inputs" "flashcat.cloud/categraf/pkg/osx" "flashcat.cloud/categraf/types" + "k8s.io/klog/v2" ) const inputName = "processes" @@ -60,11 +60,11 @@ func (p *Processes) Gather(slist *types.SampleList) { // Gather stats from 'ps' or procfs if usePS { if err := p.gatherFromPS(fields); err != nil { - log.Println("E! failed to gather from ps:", err) + klog.ErrorS(err, "failed to gather process metrics from ps") } } else { if err := p.gatherFromProc(fields); err != nil { - log.Println("E! failed to gather from proc:", err) + klog.ErrorS(err, "failed to gather process metrics from proc") } } @@ -132,7 +132,7 @@ func (p *Processes) gatherFromPS(fields map[string]interface{}) error { case '?': fields["unknown"] = fields["unknown"].(int64) + int64(1) default: - log.Println("W! unknown state:", string(status[0]), "from ps") + klog.Warningf("unknown process state %q from ps", string(status[0])) } fields["total"] = fields["total"].(int64) + int64(1) } @@ -189,13 +189,13 @@ func (p *Processes) gatherFromProc(fields map[string]interface{}) error { } fields["parked"] = int64(1) default: - log.Println("W! Unknown state:", string(stats[0][0]), "in file:", filename) + klog.Warningf("unknown process state %q in file %s", string(stats[0][0]), filename) } fields["total"] = fields["total"].(int64) + int64(1) threads, err := strconv.Atoi(string(stats[17])) if err != nil { - log.Println("W! Error parsing thread count:", err) + klog.Warningf("error parsing process thread count from %s: %v", filename, err) continue } fields["total_threads"] = fields["total_threads"].(int64) + int64(threads) diff --git a/inputs/procstat/procstat.go b/inputs/procstat/procstat.go index a3f8b46dd..43728f51c 100644 --- a/inputs/procstat/procstat.go +++ b/inputs/procstat/procstat.go @@ -6,7 +6,6 @@ import ( "errors" "fmt" "io" - "log" "os" "os/exec" "regexp" @@ -19,6 +18,7 @@ import ( "flashcat.cloud/categraf/config" "flashcat.cloud/categraf/inputs" "flashcat.cloud/categraf/types" + "k8s.io/klog/v2" ) var execCommand = exec.Command @@ -71,21 +71,21 @@ func (ins *Instance) Init() error { if ins.SearchExecSubstring != "" { ins.searchString = ins.SearchExecSubstring - log.Println("I! procstat: search_exec_substring:", ins.SearchExecSubstring) + klog.InfoS("procstat configured search_exec_substring", "value", ins.SearchExecSubstring) } else if ins.SearchCmdlineSubstring != "" { ins.searchString = ins.SearchCmdlineSubstring - log.Println("I! procstat: search_cmdline_substring:", ins.SearchCmdlineSubstring) + klog.InfoS("procstat configured search_cmdline_substring", "value", ins.SearchCmdlineSubstring) } else if ins.SearchWinService != "" { ins.searchString = ins.SearchWinService - log.Println("I! procstat: search_win_service:", ins.SearchWinService) + klog.InfoS("procstat configured search_win_service", "value", ins.SearchWinService) } else if ins.SearchExecRegexp != "" { ins.searchExecRegexp = regexp.MustCompile(ins.SearchExecRegexp) ins.searchString = ins.SearchExecRegexp - log.Println("I! procstat: search_exec_regexp:", ins.SearchExecRegexp) + klog.InfoS("procstat configured search_exec_regexp", "value", ins.SearchExecRegexp) } else if ins.SearchCmdLineRegexp != "" { ins.searchCmdLineRegexp = regexp.MustCompile(ins.SearchCmdLineRegexp) ins.searchString = ins.SearchCmdLineRegexp - log.Println("I! procstat: search_cmdline_regexp:", ins.SearchCmdLineRegexp) + klog.InfoS("procstat configured search_cmdline_regexp", "value", ins.SearchCmdLineRegexp) } else { return errors.New("the fields should not be all blank: search_exec_substring, search_cmdline_substring, search_win_service") } @@ -97,9 +97,9 @@ func (ins *Instance) Init() error { extractLabelKey := r.SubexpNames() if len(extractLabelKey) > 0 { ins.labelsFromCmdlineRegexp = r - log.Println("I! procstat: gather labels from cmdline using regexp. labels: ", extractLabelKey) + klog.InfoS("procstat gather labels from cmdline using regexp", "labels", extractLabelKey) } else { - log.Println("W! procstat: labels_from_cmdline_reggroup no NamedGroup label includes, ignore this conf: ", ins.LabelsFromCmdlineRegexp) + klog.Warningf("procstat labels_from_cmdline_reggroup has no named groups, ignoring config: %s", ins.LabelsFromCmdlineRegexp) } } @@ -192,12 +192,12 @@ func (ins *Instance) Gather(slist *types.SampleList) { } else if ins.SearchWinService != "" { pids, err = ins.winServicePIDs() } else { - log.Println("E! Oops... search string not found") + klog.Error("procstat search string not found") return } if err != nil { - log.Println("E! procstat: failed to lookup pids, search string:", ins.searchString, "error:", err) + klog.ErrorS(err, "procstat failed to lookup pids", "search_string", ins.searchString) slist.PushFront(types.NewSample(inputName, "lookup_count", 0, tags)) return } @@ -238,14 +238,14 @@ func (ins *Instance) Gather(slist *types.SampleList) { exeMd5cache[exe] = sum } else { if ins.DebugMod { - log.Println("E! failed to get md5sum of exe:", exe, "pid:", p.PID(), err) + klog.V(1).InfoS("failed to get md5sum of exe", "exe", exe, "pid", p.PID(), "error", err) } if sum, err := md5sum(fmt.Sprintf("/proc/%d/exe", pid)); err == nil { info["binary_md5sum"] = sum exeMd5cache[exe] = sum } else { if ins.DebugMod { - log.Println("E! failed to get md5sum of /proc/pid/exe:", p.PID(), err) + klog.V(1).InfoS("failed to get md5sum of /proc/pid/exe", "pid", p.PID(), "error", err) } } } @@ -274,7 +274,7 @@ func (ins *Instance) Gather(slist *types.SampleList) { case "jvm": ins.gatherJvm(slist, ins.procs, tags) default: - log.Println("E! unknown choice in gather_more_metrics:", field) + klog.ErrorS(nil, "unknown choice in gather_more_metrics", "field", field) } } } @@ -545,7 +545,7 @@ func (ins *Instance) gatherJvm(slist *types.SampleList, procs map[PID]Process, t for pid := range procs { jvmStat, err := ins.execJstat(pid) if err != nil { - log.Println("E! failed to exec jstat:", err) + klog.ErrorS(err, "failed to exec jstat", "pid", pid) continue } diff --git a/inputs/procstat/win_service_windows.go b/inputs/procstat/win_service_windows.go index 10365e9a3..cf925865e 100644 --- a/inputs/procstat/win_service_windows.go +++ b/inputs/procstat/win_service_windows.go @@ -5,11 +5,11 @@ package procstat import ( "fmt" - "log" "unsafe" "golang.org/x/sys/windows" "golang.org/x/sys/windows/svc/mgr" + "k8s.io/klog/v2" ) func getService(name string) (*mgr.Service, error) { @@ -35,7 +35,7 @@ func queryPidWithWinServiceName(winServiceName string) (uint32, error) { defer func(srv *mgr.Service) { err := srv.Close() if err != nil { - log.Printf("E! Close srv error: %s", err) + klog.ErrorS(err, "failed to close windows service handle", "service", winServiceName) } }(srv) diff --git a/inputs/prometheus/consul.go b/inputs/prometheus/consul.go index d777e9ef2..613f8238f 100644 --- a/inputs/prometheus/consul.go +++ b/inputs/prometheus/consul.go @@ -4,7 +4,6 @@ import ( "bytes" "context" "fmt" - "log" "net/url" "strings" "text/template" @@ -13,6 +12,7 @@ import ( "flashcat.cloud/categraf/config" "github.com/hashicorp/consul/api" + "k8s.io/klog/v2" ) type ConsulConfig struct { @@ -75,7 +75,7 @@ func (ins *Instance) InitConsulClient(ctx context.Context) error { for tagName, tagTemplateString := range ins.ConsulConfig.Queries[i].ServiceExtraTags { tagTemplate, err := template.New(tagName).Funcs(templateFunctions).Parse(tagTemplateString) if err != nil { - log.Printf("failed to parse the Consul query Extra Tag template (%s): %s", tagTemplateString, err) + klog.Warningf("failed to parse the Consul query extra tag template (%s): %v", tagTemplateString, err) continue } q.serviceExtraTagsTemplate[tagName] = tagTemplate @@ -100,7 +100,7 @@ func (ins *Instance) InitConsulClient(ctx context.Context) error { err := ins.refreshConsulServices(catalog) if err != nil { refreshFailed = true - log.Printf("Unable to refreh Consul services: %v", err) + klog.Warningf("unable to refresh Consul services: %v", err) } for { select { @@ -109,16 +109,15 @@ func (ins *Instance) InitConsulClient(ctx context.Context) error { case <-time.After(time.Duration(ins.ConsulConfig.QueryInterval)): err := ins.refreshConsulServices(catalog) if err != nil { - message := fmt.Sprintf("Unable to refreh Consul services: %v", err) if refreshFailed { - log.Println("E!", message) + klog.ErrorS(err, "unable to refresh Consul services") } else { - log.Println("W!", message) + klog.Warningf("unable to refresh Consul services: %v", err) } refreshFailed = true } else if refreshFailed { refreshFailed = false - log.Println("Successfully refreshed Consul services after previous errors") + klog.InfoS("successfully refreshed Consul services after previous errors") } } } @@ -143,7 +142,7 @@ func (ins *Instance) refreshConsulServices(c *api.Catalog) error { consulServiceURLs := make(map[string]*ScrapeUrl) if ins.DebugMod { - log.Println("Refreshing Consul services") + klog.V(1).InfoS("refreshing Consul services") } for _, q := range ins.ConsulConfig.Queries { @@ -159,31 +158,30 @@ func (ins *Instance) refreshConsulServices(c *api.Catalog) error { } if len(consulServices) == 0 { if ins.DebugMod { - log.Printf("Queried Consul for Service (%s, %s) but did not find any instances\n", q.ServiceName, q.ServiceTag) + klog.V(1).InfoS("queried Consul service and found no instances", "service", q.ServiceName, "tag", q.ServiceTag) } continue } if ins.DebugMod { - log.Printf("Queried Consul for Service (%s, %s) and found %d instances\n", q.ServiceName, q.ServiceTag, len(consulServices)) + klog.V(1).InfoS("queried Consul service and found instances", "service", q.ServiceName, "tag", q.ServiceTag, "count", len(consulServices)) } for _, consulService := range consulServices { uaa, err := ins.getConsulServiceURL(q, consulService) if err != nil { - message := fmt.Sprintf("Unable to get scrape URLs from Consul for Service (%s, %s): %s", q.ServiceName, q.ServiceTag, err) if q.lastQueryFailed { - log.Println("E!", message) + klog.ErrorS(err, "unable to get scrape URLs from Consul", "service", q.ServiceName, "tag", q.ServiceTag) } else { - log.Println("W!", message) + klog.Warningf("unable to get scrape URLs from Consul for service (%s, %s): %v", q.ServiceName, q.ServiceTag, err) } q.lastQueryFailed = true break } if q.lastQueryFailed { - log.Printf("Created scrape URLs from Consul for Service (%s, %s)\n", q.ServiceName, q.ServiceTag) + klog.InfoS("created scrape URLs from Consul after previous errors", "service", q.ServiceName, "tag", q.ServiceTag) } q.lastQueryFailed = false - log.Printf("Adding scrape URL from Consul for Service (%s, %s): %s\n", q.ServiceName, q.ServiceTag, uaa.URL.String()) + klog.InfoS("adding scrape URL from Consul", "service", q.ServiceName, "tag", q.ServiceTag, "url", uaa.URL.String()) consulServiceURLs[uaa.URL.String()] = uaa } } @@ -218,7 +216,7 @@ func (ins *Instance) getConsulServiceURL(q *ConsulQuery, s *api.CatalogService) } if ins.DebugMod { - log.Println("D! found consul service:", serviceURL.String()) + klog.V(1).InfoS("found Consul service", "url", serviceURL.String()) } return &ScrapeUrl{ diff --git a/inputs/prometheus/prometheus.go b/inputs/prometheus/prometheus.go index 6339b2d39..698e6f28b 100644 --- a/inputs/prometheus/prometheus.go +++ b/inputs/prometheus/prometheus.go @@ -3,7 +3,6 @@ package prometheus import ( "context" "io" - "log" "net/http" "net/url" "os" @@ -17,6 +16,7 @@ import ( "flashcat.cloud/categraf/pkg/filter" "flashcat.cloud/categraf/pkg/tls" "flashcat.cloud/categraf/types" + "k8s.io/klog/v2" ) const inputName = "prometheus" @@ -173,7 +173,7 @@ func (ins *Instance) Gather(slist *types.SampleList) { for i := 0; i < len(ins.URLs); i++ { u, err := url.Parse(ins.URLs[i]) if err != nil { - log.Println("E! failed to parse prometheus scrape url:", ins.URLs[i], "error:", err) + klog.ErrorS(err, "failed to parse prometheus scrape url", "url", ins.URLs[i]) continue } @@ -184,7 +184,7 @@ func (ins *Instance) Gather(slist *types.SampleList) { urls, err := ins.UrlsFromConsul() if err != nil { - log.Println("E! failed to query urls from consul:", err) + klog.ErrorS(err, "failed to query urls from consul") return } @@ -205,7 +205,7 @@ func (ins *Instance) gatherUrl(urlwg *sync.WaitGroup, slist *types.SampleList, u req, err := http.NewRequest("GET", u.String(), nil) if err != nil { - log.Println("E! failed to new request for url:", u.String(), "error:", err) + klog.ErrorS(err, "failed to create request for prometheus url", "url", u.String()) return } @@ -213,7 +213,7 @@ func (ins *Instance) gatherUrl(urlwg *sync.WaitGroup, slist *types.SampleList, u labels, err := ins.GenerateLabel(u) if err != nil { - log.Println("E! failed to generate url label value:", err) + klog.ErrorS(err, "failed to generate url label value", "url", u.String()) return } @@ -224,13 +224,13 @@ func (ins *Instance) gatherUrl(urlwg *sync.WaitGroup, slist *types.SampleList, u res, err := ins.client.Do(req) if err != nil { slist.PushFront(types.NewSample("", "up", 0, labels)) - log.Println("E! failed to query url:", u.String(), "error:", err) + klog.ErrorS(err, "failed to query prometheus url", "url", u.String()) return } if res.StatusCode != http.StatusOK { slist.PushFront(types.NewSample("", "up", 0, labels)) - log.Println("E! failed to query url:", u.String(), "status code:", res.StatusCode) + klog.ErrorS(nil, "failed to query prometheus url", "url", u.String(), "status_code", res.StatusCode) return } @@ -239,7 +239,7 @@ func (ins *Instance) gatherUrl(urlwg *sync.WaitGroup, slist *types.SampleList, u body, err := io.ReadAll(res.Body) if err != nil { slist.PushFront(types.NewSample("", "up", 0, labels)) - log.Println("E! failed to read response body, url:", u.String(), "error:", err) + klog.ErrorS(err, "failed to read response body", "url", u.String()) return } @@ -247,7 +247,7 @@ func (ins *Instance) gatherUrl(urlwg *sync.WaitGroup, slist *types.SampleList, u parser := prometheus.NewParser(ins.NamePrefix, labels, res.Header, ins.DuplicationAllowed, ins.ignoreMetricsFilter, ins.ignoreLabelKeysFilter) if err = parser.Parse(body, slist); err != nil { - log.Println("E! failed to parse response body, url:", u.String(), "error:", err) + klog.ErrorS(err, "failed to parse response body", "url", u.String()) } } @@ -259,7 +259,7 @@ func (ins *Instance) setHeaders(req *http.Request) { if ins.BearerTokeFile != "" { content, err := os.ReadFile(ins.BearerTokeFile) if err != nil { - log.Println("E! failed to read bearer token file:", ins.BearerTokeFile, "error:", err) + klog.ErrorS(err, "failed to read bearer token file", "path", ins.BearerTokeFile) return } diff --git a/inputs/provider_manager.go b/inputs/provider_manager.go index af4e4a8a5..7128dfdfb 100644 --- a/inputs/provider_manager.go +++ b/inputs/provider_manager.go @@ -1,11 +1,11 @@ package inputs import ( - "log" "strings" "flashcat.cloud/categraf/config" "flashcat.cloud/categraf/pkg/cfg" + "k8s.io/klog/v2" ) const inputFilePrefix = "input." @@ -57,7 +57,7 @@ type Provider interface { } func NewProvider(c *config.ConfigType, op InputOperation) ([]Provider, error) { - log.Println("I! use input provider:", c.Global.Providers) + klog.InfoS("use input providers", "providers", c.Global.Providers) // 不添加provider配置 则默认使用local // 兼容老版本 if len(c.Global.Providers) == 0 { @@ -68,7 +68,7 @@ func NewProvider(c *config.ConfigType, op InputOperation) ([]Provider, error) { for _, p := range c.Global.Providers { name := strings.ToLower(p) if _, ok := record[name]; ok { - log.Println("W! duplicate input provider:", name) + klog.Warningf("duplicate input provider: %s", name) continue } else { record[name] = struct{}{} diff --git a/inputs/rabbitmq/rabbitmq.go b/inputs/rabbitmq/rabbitmq.go index 37afaa356..f62b43586 100644 --- a/inputs/rabbitmq/rabbitmq.go +++ b/inputs/rabbitmq/rabbitmq.go @@ -4,7 +4,6 @@ import ( "encoding/json" "fmt" "io" - "log" "net/http" "sync" "time" @@ -14,6 +13,7 @@ import ( "flashcat.cloud/categraf/pkg/filter" "flashcat.cloud/categraf/pkg/tls" "flashcat.cloud/categraf/types" + "k8s.io/klog/v2" ) const inputName = "rabbitmq" @@ -379,7 +379,7 @@ func (ins *Instance) requestEndpoint(u string) ([]byte, error) { endpoint := ins.URL + u if ins.DebugMod { - log.Println("D! requesting:", endpoint) + klog.V(1).InfoS("requesting rabbitmq endpoint", "endpoint", endpoint) } req, err := http.NewRequest("GET", endpoint, nil) @@ -429,12 +429,12 @@ func gatherOverview(ins *Instance, slist *types.SampleList) { err := ins.requestJSON("/api/overview", &overview) if err != nil { - log.Println("E! failed to query rabbitmq /api/overview:", err) + klog.ErrorS(err, "failed to query rabbitmq overview", "url", ins.URL) return } if overview.QueueTotals == nil || overview.ObjectTotals == nil || overview.MessageStats == nil || overview.Listeners == nil { - log.Println("E! wrong answer from rabbitmq. probably auth issue") + klog.ErrorS(nil, "wrong answer from rabbitmq, probably auth issue", "url", ins.URL, "endpoint", "/api/overview") return } @@ -480,7 +480,7 @@ func gatherExchanges(ins *Instance, slist *types.SampleList) { exchanges := make([]Exchange, 0) err := ins.requestJSON("/api/exchanges", &exchanges) if err != nil { - log.Println("E! failed to query rabbitmq /api/exchanges:", err) + klog.ErrorS(err, "failed to query rabbitmq exchanges", "url", ins.URL) return } @@ -528,7 +528,7 @@ func gatherFederationLinks(ins *Instance, slist *types.SampleList) { federationLinks := make([]FederationLink, 0) err := ins.requestJSON("/api/federation-links", &federationLinks) if err != nil { - log.Println("E! failed to query rabbitmq /api/federation-links:", err) + klog.ErrorS(err, "failed to query rabbitmq federation links", "url", ins.URL) return } @@ -587,7 +587,7 @@ func gatherNodes(ins *Instance, slist *types.SampleList) { err := ins.requestJSON("/api/nodes", &allNodes) if err != nil { - log.Println("E! failed to query rabbitmq /api/nodes:", err) + klog.ErrorS(err, "failed to query rabbitmq nodes", "url", ins.URL) return } @@ -644,7 +644,7 @@ func gatherNodes(ins *Instance, slist *types.SampleList) { var memory MemoryResponse err = ins.requestJSON("/api/nodes/"+node.Name+"/memory", &memory) if err != nil { - log.Println("E! failed to query rabbitmq /api/nodes/"+node.Name+"/memory:", err) + klog.ErrorS(err, "failed to query rabbitmq node memory", "url", ins.URL, "node", node.Name) return } @@ -682,14 +682,14 @@ func gatherNodes(ins *Instance, slist *types.SampleList) { } msg := fmt.Sprintf("unknown type %T for %q total memory", x, estimator) - log.Println("E!", msg) + klog.Error(msg) } } if !foundEstimator { - log.Println("E! no known memory estimation in", v) + klog.ErrorS(nil, "no known memory estimation in rabbitmq response", "value", v, "node", node.Name) } default: - log.Println("E! unknown type", memory.Memory.Total, "for total memory") + klog.ErrorS(nil, "unknown rabbitmq memory total type", "value", memory.Memory.Total, "node", node.Name) } } @@ -730,7 +730,7 @@ func gatherQueues(ins *Instance, slist *types.SampleList) { queues := make([]Queue, 0) err := ins.requestJSON("/api/queues", &queues) if err != nil { - log.Println("E! failed to query rabbitmq /api/queues:", err) + klog.ErrorS(err, "failed to query rabbitmq queues", "url", ins.URL) return } diff --git a/inputs/redfish/redfish.go b/inputs/redfish/redfish.go index 769e3c8f8..c21679b75 100644 --- a/inputs/redfish/redfish.go +++ b/inputs/redfish/redfish.go @@ -6,7 +6,6 @@ import ( "errors" "fmt" "io" - "log" "net/http" "net/url" "strings" @@ -16,6 +15,7 @@ import ( "flashcat.cloud/categraf/inputs" "flashcat.cloud/categraf/types" "github.com/tidwall/gjson" + "k8s.io/klog/v2" ) const ( @@ -141,7 +141,7 @@ func join(in ...string) string { func (i *Instance) Gather(sList *types.SampleList) { for _, a := range i.Addresses { if err := i.gatherRedfishUp(a, sList); err != nil { - log.Println("E! error gatherRedfishAccess", err) + klog.ErrorS(err, "failed to gather redfish access metrics", "host", a.baseURL.Host) continue } @@ -150,7 +150,7 @@ func (i *Instance) Gather(sList *types.SampleList) { js, err := a.getData(setUrl.String()) if err != nil { - log.Println("E! error getData", err) + klog.ErrorS(err, "failed to get redfish data", "url", setUrl.String(), "host", a.baseURL.Host) continue } @@ -209,7 +209,7 @@ func (i *Instance) Gather(sList *types.SampleList) { } if err := i.gatherDisks(a, &i.Disks, sList, ""); err != nil { - log.Println("E! get disks data error", err) + klog.ErrorS(err, "failed to gather redfish disk data", "host", a.baseURL.Host) continue } } diff --git a/inputs/redis/redis.go b/inputs/redis/redis.go index fe23bc1e2..342500835 100644 --- a/inputs/redis/redis.go +++ b/inputs/redis/redis.go @@ -4,7 +4,6 @@ import ( "bufio" "context" "fmt" - "log" "regexp" "strconv" "strings" @@ -16,6 +15,7 @@ import ( "flashcat.cloud/categraf/pkg/tls" "flashcat.cloud/categraf/types" "github.com/go-redis/redis/v8" + "k8s.io/klog/v2" ) const inputName = "redis" @@ -122,7 +122,7 @@ func (ins *Instance) Gather(slist *types.SampleList) { slist.PushFront(types.NewSample(inputName, "ping_use_seconds", time.Since(begun).Seconds(), tags)) if err != nil { slist.PushFront(types.NewSample(inputName, "up", 0, tags)) - log.Println("E! failed to ping redis:", ins.Address, "error:", err) + klog.ErrorS(err, "failed to ping redis", "address", ins.Address) return } else { slist.PushFront(types.NewSample(inputName, "up", 1, tags)) @@ -139,7 +139,7 @@ func (ins *Instance) gatherSlowLog(slist *types.SampleList, tags map[string]stri } info, err := ins.client.SlowLogGet(context.Background(), ins.SlowLogMaxLen).Result() if err != nil { - log.Println("E! get slow log err:", err) + klog.ErrorS(err, "get slow log error", "address", ins.Address) return } now := time.Now().Unix() @@ -163,7 +163,7 @@ func (ins *Instance) gatherCommandValues(slist *types.SampleList, tags map[strin for _, cmd := range ins.Commands { val, err := ins.client.Do(context.Background(), cmd.Command...).Result() if err != nil { - log.Println("E! failed to exec redis command:", cmd.Command) + klog.ErrorS(err, "failed to exec redis command", "address", ins.Address, "command", cmd.Command) continue } @@ -172,7 +172,7 @@ func (ins *Instance) gatherCommandValues(slist *types.SampleList, tags map[strin } fval, err := conv.ToFloat64(val) if err != nil { - log.Println("E! failed to convert result of command:", cmd.Command, "error:", err) + klog.ErrorS(err, "failed to convert result of command", "address", ins.Address, "command", cmd.Command) continue } @@ -191,7 +191,7 @@ func (ins *Instance) gatherInfoAll(slist *types.SampleList, tags map[string]stri } if err != nil { - log.Println("E! failed to call redis `info all`:", err) + klog.ErrorS(err, "failed to call redis info all", "address", ins.Address) return } diff --git a/inputs/redis_sentinel/redis_sentinel.go b/inputs/redis_sentinel/redis_sentinel.go index 8c2c09fbd..cdf6223a8 100644 --- a/inputs/redis_sentinel/redis_sentinel.go +++ b/inputs/redis_sentinel/redis_sentinel.go @@ -5,7 +5,6 @@ import ( "context" "fmt" "io" - "log" "net/url" "strconv" "strings" @@ -16,6 +15,7 @@ import ( "flashcat.cloud/categraf/pkg/tls" "flashcat.cloud/categraf/types" "github.com/go-redis/redis/v8" + "k8s.io/klog/v2" ) const inputName = "redis_sentinel" @@ -131,20 +131,20 @@ func (ins *Instance) Gather(slist *types.SampleList) { masters, err := client.gatherMasterStats(slist) if err != nil { - log.Println("E! failed to gather master stats:", err) + klog.ErrorS(err, "failed to gather redis sentinel master stats", "tags", client.tags) } for _, master := range masters { if err := client.gatherReplicaStats(slist, master); err != nil { - log.Println("E! failed to gather replica stats:", err) + klog.ErrorS(err, "failed to gather redis sentinel replica stats", "master", master, "tags", client.tags) } if err := client.gatherSentinelStats(slist, master); err != nil { - log.Println("E! failed to gather sentinel stats:", err) + klog.ErrorS(err, "failed to gather redis sentinel sentinel stats", "master", master, "tags", client.tags) } } if err := client.gatherInfoStats(slist); err != nil { - log.Println("E! failed to gather info stats:", err) + klog.ErrorS(err, "failed to gather redis sentinel info stats", "tags", client.tags) } }(slist, client) } diff --git a/inputs/rocketmq_offset/rocketmq.go b/inputs/rocketmq_offset/rocketmq.go index 95444d2a7..3574b9f9c 100644 --- a/inputs/rocketmq_offset/rocketmq.go +++ b/inputs/rocketmq_offset/rocketmq.go @@ -4,7 +4,6 @@ import ( "encoding/json" "fmt" "io" - "log" "net/http" "net/url" "strings" @@ -12,6 +11,7 @@ import ( "flashcat.cloud/categraf/config" "flashcat.cloud/categraf/inputs" "flashcat.cloud/categraf/types" + "k8s.io/klog/v2" ) const inputName = "rocketmq_offset" @@ -106,13 +106,13 @@ func (ins *Instance) Init() error { func (ins *Instance) Gather(slist *types.SampleList) { // 判断username是否为空,如果不为空则登录并获取 cookie - log.Printf("console login username: %s", ins.Username) + klog.V(1).InfoS("rocketmq console login", "username", ins.Username, "address", ins.RocketMQConsoleIPAndPort) cookies := "" if ins.Username != "" { loginCookie, err := ins.Login() cookies = loginCookie if err != nil { - log.Printf("E! failed to login: %v", err) + klog.ErrorS(err, "failed to login rocketmq console", "address", ins.RocketMQConsoleIPAndPort, "username", ins.Username) return } } @@ -120,7 +120,7 @@ func (ins *Instance) Gather(slist *types.SampleList) { // 获取rocketmq集群中的topicNameList topicNameArray := GetTopicNameList(ins.RocketMQConsoleIPAndPort, cookies) if topicNameArray == nil { - log.Println("E! fail to get topic,please check config!") + klog.ErrorS(nil, "failed to get rocketmq topic list, please check config", "address", ins.RocketMQConsoleIPAndPort) return } @@ -340,14 +340,14 @@ func GetTopicNameList(rocketmqConsoleIPAndPort string, cookies string) []string var url = consoleSchema + rocketmqConsoleIPAndPort + topicNameListPath var content, err = doRequest(url, cookies) if err != nil { - log.Println("E! unable to get topic name list", err) + klog.ErrorS(err, "unable to get rocketmq topic name list", "url", url) return nil } var jsonData TopicList err = json.Unmarshal([]byte(content), &jsonData) if err != nil { - log.Println("E! unable to decode topic name list", err) + klog.ErrorS(err, "unable to decode rocketmq topic name list", "url", url) return nil } @@ -358,14 +358,14 @@ func GetConsumerListByTopic(rocketmqConsoleIPAndPort string, topicName string, c var url = consoleSchema + rocketmqConsoleIPAndPort + queryConsumerByTopicPath + topicName var content, err = doRequest(url, cookies) if err != nil { - log.Println("E! unable to get consumer list by topic", err) + klog.ErrorS(err, "unable to get rocketmq consumer list by topic", "url", url, "topic", topicName) return nil } var jsonData *ConsumerListByTopic err = json.Unmarshal([]byte(content), &jsonData) if err != nil { - log.Println("E! unable to decode consumer list by topic", err) + klog.ErrorS(err, "unable to decode rocketmq consumer list by topic", "url", url, "topic", topicName) return nil } @@ -391,7 +391,7 @@ func doRequest(url string, cookies string) ([]byte, error) { body, err := io.ReadAll(res.Body) if err != nil { - log.Println("E! fail to read request data", err) + klog.ErrorS(err, "failed to read rocketmq request body", "url", url) return nil, err } diff --git a/inputs/self_metrics/metrics.go b/inputs/self_metrics/metrics.go index 6470b8b4e..59016aac6 100644 --- a/inputs/self_metrics/metrics.go +++ b/inputs/self_metrics/metrics.go @@ -1,8 +1,6 @@ package categraf import ( - "log" - "github.com/prometheus/client_golang/prometheus" dto "github.com/prometheus/client_model/go" @@ -11,6 +9,7 @@ import ( "flashcat.cloud/categraf/pkg/metrics" "flashcat.cloud/categraf/types" "flashcat.cloud/categraf/writer" + "k8s.io/klog/v2" ) const ( @@ -39,7 +38,7 @@ func (pt *Categraf) Name() string { func (ins *Categraf) Gather(slist *types.SampleList) { mfs, err := prometheus.DefaultGatherer.Gather() if err != nil { - log.Println(err) + klog.ErrorS(err, "failed to gather self metrics") return } vTag := map[string]string{ diff --git a/inputs/smart/instances.go b/inputs/smart/instances.go index f2ba6b862..2d42803ff 100644 --- a/inputs/smart/instances.go +++ b/inputs/smart/instances.go @@ -5,7 +5,6 @@ import ( "context" "errors" "fmt" - "log" "os" "os/exec" "path" @@ -20,6 +19,7 @@ import ( "flashcat.cloud/categraf/config" "flashcat.cloud/categraf/types" + "k8s.io/klog/v2" ) // Instance plugin reads metrics from storage devices supporting S.M.A.R.T. @@ -90,10 +90,7 @@ func (m *Instance) Init() error { if err != nil { m.PathNVMe = "" // without nvme, plugin will not be able to gather vendor specific attributes (but it can work without it) - log.Printf( - "W! nvme not found: verify that nvme is installed and it is in your PATH (or specified in config) to gather vendor specific attributes: %s", - err.Error(), - ) + klog.Warningf("nvme not found: verify that nvme is installed and it is in your PATH (or specified in config) to gather vendor specific attributes: %s", err.Error()) } return nil @@ -116,7 +113,7 @@ func (m *Instance) Gather(slist *types.SampleList) { if isVendorExtension && isNVMe { scannedNVMeDevices, _, err = m.scanAllDevices(true) if err != nil { - log.Println("E! error while scanning devices:", err) + klog.ErrorS(err, "error while scanning SMART devices") return } nvmeDevices := distinguishNVMeDevices(devicesFromConfig, scannedNVMeDevices) @@ -127,7 +124,7 @@ func (m *Instance) Gather(slist *types.SampleList) { } scannedNVMeDevices, scannedNonNVMeDevices, err = m.scanAllDevices(false) if err != nil { - log.Println("E! error while scanning all devices:", err) + klog.ErrorS(err, "error while scanning all SMART devices") return } var devicesFromScan []string @@ -278,7 +275,7 @@ func getDeviceInfoForNVMeDisks(slist *types.SampleList, devices []string, nvme s for _, device := range devices { newDevice, err := gatherNVMeDeviceInfo(nvme, device, timeout, useSudo) if err != nil { - log.Printf("E! cannot find device info for %s device", device) + klog.ErrorS(err, "cannot find NVMe device info", "device", device) continue } nvmeDevices = append(nvmeDevices, newDevice) @@ -344,7 +341,7 @@ func gatherIntelNVMeDisk(slist *types.SampleList, timeout config.Duration, usesu _, er := exitStatus(e) if er != nil { - log.Printf("E! failed to run command '%s %s': %v - %s", nvme, strings.Join(args, " "), e, outStr) + klog.ErrorS(e, "failed to run NVMe command", "command", nvme, "args", strings.Join(args, " "), "output", outStr) return } @@ -413,7 +410,7 @@ func (m *Instance) gatherDisk(slist *types.SampleList, device string, wg *sync.W // Ignore all exit statuses except if it is a command line parse error exitStatus, er := exitStatus(e) if er != nil { - log.Printf("E! failed to run command '%s %s': %v - %s", m.PathSmartctl, strings.Join(args, " "), e, outStr) + klog.ErrorS(e, "failed to run smartctl command", "command", m.PathSmartctl, "args", strings.Join(args, " "), "output", outStr) return } @@ -548,7 +545,7 @@ func (m *Instance) gatherDisk(slist *types.SampleList, device string, wg *sync.W } if err := parse(fields, deviceFields, metric, matches[2]); err != nil { - log.Printf("E!error parsing %s: %q: %v", attr.Name, matches[2], err) + klog.ErrorS(err, "error parsing SMART attribute", "attribute", attr.Name, "value", matches[2]) continue } // if the field is classified as an attribute, only add it diff --git a/inputs/snmp/health_check.go b/inputs/snmp/health_check.go index db4152052..c7c53f330 100644 --- a/inputs/snmp/health_check.go +++ b/inputs/snmp/health_check.go @@ -1,10 +1,10 @@ package snmp import ( - "log" "time" coreconfig "flashcat.cloud/categraf/config" + "k8s.io/klog/v2" ) func (ins *Instance) StartHealthMonitor() { @@ -59,20 +59,20 @@ func (ins *Instance) checkAgentHealth(i int, agent string) { gs, err := NewWrapper(clientConfig) if err != nil { - log.Printf("Health check: agent %s connection creation error: %s", agent, err) + klog.ErrorS(err, "health check connection creation error", "agent", agent) ins.markAgentUnhealthy(agent) return } err = gs.SetAgent(agent) if err != nil { - log.Printf("Health check: agent %s set agent error: %s", agent, err) + klog.ErrorS(err, "health check set agent error", "agent", agent) ins.markAgentUnhealthy(agent) return } if err := gs.Connect(); err != nil { - log.Printf("Health check: agent %s connection error: %s", agent, err) + klog.ErrorS(err, "health check connect error", "agent", agent) ins.markAgentUnhealthy(agent) return } @@ -97,14 +97,14 @@ func (ins *Instance) checkAgentHealth(i int, agent string) { // Mark as unhealthy after reaching max fail count if status.failCount >= ins.MaxFailCount { if status.healthy { - log.Printf("Agent %s marked as unhealthy after %d consecutive failures", agent, status.failCount) + klog.Warningf("agent %s marked as unhealthy after %d consecutive failures", agent, status.failCount) status.healthy = false } } } else { // If it was unhealthy before, log recovery if !status.healthy { - log.Printf("Agent %s recovered and marked healthy", agent) + klog.InfoS("agent recovered and marked healthy", "agent", agent) } status.healthy = true status.failCount = 0 @@ -136,7 +136,7 @@ func (ins *Instance) markAgentUnhealthy(agent string) { status.failCount++ if status.failCount >= ins.MaxFailCount { if status.healthy { - log.Printf("Agent %s marked as unhealthy after %d consecutive failures", agent, status.failCount) + klog.Warningf("agent %s marked as unhealthy after %d consecutive failures", agent, status.failCount) status.healthy = false } } diff --git a/inputs/snmp/instances.go b/inputs/snmp/instances.go index b3cd51b24..b1b3be66a 100644 --- a/inputs/snmp/instances.go +++ b/inputs/snmp/instances.go @@ -2,7 +2,6 @@ package snmp import ( "fmt" - "log" "net" "net/url" "strings" @@ -11,6 +10,7 @@ import ( "github.com/freedomkk-qfeng/go-fastping" "github.com/gosnmp/gosnmp" + "k8s.io/klog/v2" "flashcat.cloud/categraf/config" "flashcat.cloud/categraf/types" @@ -229,16 +229,16 @@ func (ins *Instance) Gather(slist *types.SampleList) { gs, err := ins.getConnection(i) if err != nil { - log.Printf("agent %s ins: %s", agent, err) + klog.ErrorS(err, "failed to get snmp connection", "agent", agent) return } if !ins.isAgentHealthy(agent) { - log.Printf("Skipping unhealthy agent %s during collection", agent) + klog.Warningf("skipping unhealthy agent %s during collection", agent) return } if err := ins.gatherTable(slist, gs, t, topTags, extraTags, false); err != nil { - log.Printf("agent %s ins: %s", agent, err) + klog.ErrorS(err, "failed to gather snmp root table", "agent", agent) ins.markAgentUnhealthy(agent) } @@ -246,7 +246,7 @@ func (ins *Instance) Gather(slist *types.SampleList) { // Now is the real tables. for _, t := range ins.Tables { if err := ins.gatherTable(slist, gs, t, topTags, extraTags, true); err != nil { - log.Printf("agent %s ins: gathering table %s error: %s", agent, t.Name, err) + klog.ErrorS(err, "failed to gather snmp table", "agent", agent, "table", t.Name) markCnt++ } } @@ -351,7 +351,7 @@ func Ping(ip string, timeout int) (up, rttAvg, loss float64) { rtt, err := fastPingRtt(ip, timeout) if err != nil { lost++ - log.Printf("W! snmp ping %s error:%s", ip, err) + klog.Warningf("snmp ping %s error: %s", ip, err) continue } if rtt == -1 { diff --git a/inputs/snmp/netsnmp.go b/inputs/snmp/netsnmp.go index 296cc0570..04e23a7ef 100644 --- a/inputs/snmp/netsnmp.go +++ b/inputs/snmp/netsnmp.go @@ -5,10 +5,11 @@ import ( "bytes" "errors" "fmt" - "log" "os/exec" "strings" "sync" + + "k8s.io/klog/v2" ) // struct that implements the translator interface. This calls existing @@ -44,7 +45,7 @@ func (n *netsnmpTranslator) execCmd(arg0 string, args ...string) ([]byte, error) for _, arg := range args { quoted = append(quoted, fmt.Sprintf("%q", arg)) } - log.Printf("D! [inputs.snmp] executing %q %s", arg0, strings.Join(quoted, " ")) + klog.V(1).InfoS("inputs.snmp executing command", "command", arg0, "args", strings.Join(quoted, " ")) } out, err := execCommand(arg0, args...).Output() diff --git a/inputs/snmp/table.go b/inputs/snmp/table.go index 0b52d4433..3c9ffcde4 100644 --- a/inputs/snmp/table.go +++ b/inputs/snmp/table.go @@ -4,7 +4,6 @@ import ( "encoding/binary" "errors" "fmt" - "log" "math" "net" "regexp" @@ -14,6 +13,7 @@ import ( "github.com/Knetic/govaluate" "github.com/gosnmp/gosnmp" + "k8s.io/klog/v2" ) const ( @@ -77,7 +77,7 @@ func (t *Table) Init(tr Translator) error { return nil } if len(t.IncludeFilter) != 0 { - log.Println("W! include_filter is deprecated, please use filters instead") + klog.Warning("include_filter is deprecated, please use filters instead") t.Filters = append(t.Filters, t.IncludeFilter...) } @@ -349,7 +349,7 @@ func (t Table) Build(gs snmpConnection, walk bool, tr Translator) (*RTable, erro } ifv[""] = fv } else { - log.Println("W! no info for oid:", oid, "target:", gs.Host()) + klog.Warningf("no info for oid: %s target: %s", oid, gs.Host()) } } else { err := gs.Walk(oid, func(ent gosnmp.SnmpPDU) error { @@ -386,7 +386,7 @@ func (t Table) Build(gs snmpConnection, walk bool, tr Translator) (*RTable, erro // If no error translating, the original value for ent.Value should be replaced ent.Value = oidText } else { - log.Printf("E! translate error:%s, entOid:%s, oid:%s", err, entOid, oid) + klog.ErrorS(err, "translate error", "ent_oid", entOid, "oid", oid) } } } @@ -407,10 +407,10 @@ func (t Table) Build(gs snmpConnection, walk bool, tr Translator) (*RTable, erro // from the callback var walkErr *walkError if !errors.As(err, &walkErr) { - log.Printf("E! snmp walk error:%s, oid:%s ", err, oid) + klog.ErrorS(err, "snmp walk error", "oid", oid) return nil, fmt.Errorf("performing bulk walk for field %s(%s): %w", f.Name, oid, err) } else { - log.Printf("W! snmp walk error:%s(%s), oid:%s", err, walkErr.Unwrap(), oid) + klog.Warningf("snmp walk error: %s(%v), oid:%s", err, walkErr.Unwrap(), oid) } } } @@ -482,7 +482,7 @@ func (t Table) Build(gs snmpConnection, walk bool, tr Translator) (*RTable, erro if len(t.FilterExpression) != 0 { expr, err = govaluate.NewEvaluableExpression(t.FilterExpression) if err != nil { - log.Println("filters_expression err:", err) + klog.ErrorS(err, "filters_expression error") } } strictMode := t.FilterMode == StrictMode @@ -519,7 +519,7 @@ func (t Table) Build(gs snmpConnection, walk bool, tr Translator) (*RTable, erro if len(params) != 0 { result, err := expr.Evaluate(params) if err != nil { - log.Println("filter expression err:", err) + klog.ErrorS(err, "filter expression error") } if match, ok := result.(bool); ok && !match { continue @@ -570,7 +570,7 @@ func fieldConvert(tr Translator, conv string, ent gosnmp.SnmpPDU) (v interface{} var ret float64 floatVal, err := heuristicDataExtract(vt) if err != nil { - log.Printf("E! failed to extract float from string: %s, error: %v", vt, err) + klog.ErrorS(err, "failed to extract float from string", "value", vt) vf, _ := strconv.ParseFloat(vt, 64) ret = vf / math.Pow10(d) } else { diff --git a/inputs/snmp_trap/snmp_trap.go b/inputs/snmp_trap/snmp_trap.go index 78549dcf0..29f6ef441 100644 --- a/inputs/snmp_trap/snmp_trap.go +++ b/inputs/snmp_trap/snmp_trap.go @@ -2,7 +2,6 @@ package snmp_trap import ( "fmt" - "log" "net" "strconv" "strings" @@ -14,6 +13,7 @@ import ( "flashcat.cloud/categraf/inputs" "flashcat.cloud/categraf/pkg/snmp" "flashcat.cloud/categraf/types" + "k8s.io/klog/v2" ) const inputName = "snmp_trap" @@ -103,7 +103,7 @@ func (s *Instance) Init() error { } if err != nil { - log.Printf("Could not get path %v", err) + klog.ErrorS(err, "could not get snmp trap path") } if len(s.ServiceAddress) == 0 { @@ -248,7 +248,7 @@ func (s *Instance) start() error { select { case <-s.listener.Listening(): - log.Printf("Listening on %s", s.ServiceAddress) + klog.InfoS("snmp trap listener started", "service_address", s.ServiceAddress) case err := <-s.errCh: return err } @@ -260,7 +260,7 @@ func (s *Instance) Drop() { s.listener.Close() err := <-s.errCh if nil != err { - log.Printf("Error stopping trap listener %v", err) + klog.ErrorS(err, "error stopping trap listener") } } @@ -273,7 +273,7 @@ func setTrapOid(tags map[string]string, oid string, e snmp.MibEntry) { func makeTrapHandler(s *Instance, slist *types.SampleList) gosnmp.TrapHandlerFunc { return func(packet *gosnmp.SnmpPacket, addr *net.UDPAddr) { if s.DebugMod { - log.Printf("Received Trap from: %s, packet content: %v", addr.IP.String(), packet.SafeString()) + klog.V(1).InfoS("received snmp trap", "source", addr.IP.String(), "packet", packet.SafeString()) } fields := map[string]interface{}{} tags := map[string]string{} @@ -295,7 +295,7 @@ func makeTrapHandler(s *Instance, slist *types.SampleList) gosnmp.TrapHandlerFun if trapOid != "" { e, err := s.transl.lookup(trapOid) if err != nil { - log.Printf("Error resolving V1 OID, oid=%s, source=%s: %v", trapOid, tags["source"], err) + klog.ErrorS(err, "error resolving V1 OID", "oid", trapOid, "source", tags["source"]) return } setTrapOid(tags, trapOid, e) @@ -325,7 +325,7 @@ func makeTrapHandler(s *Instance, slist *types.SampleList) gosnmp.TrapHandlerFun case gosnmp.ObjectIdentifier: val, ok := v.Value.(string) if !ok { - log.Println("E! Error getting value OID") + klog.Error("error getting value OID") return } @@ -333,7 +333,7 @@ func makeTrapHandler(s *Instance, slist *types.SampleList) gosnmp.TrapHandlerFun var err error e, err = s.transl.lookup(val) if nil != err { - log.Printf("Error resolving value OID, oid=%s, source=%s: %v", val, tags["source"], err) + klog.ErrorS(err, "error resolving value OID", "oid", val, "source", tags["source"]) return } @@ -351,7 +351,7 @@ func makeTrapHandler(s *Instance, slist *types.SampleList) gosnmp.TrapHandlerFun e, err := s.transl.lookup(v.Name) if nil != err { - log.Printf("Error resolving OID oid=%s, source=%s: %v", v.Name, tags["source"], err) + klog.ErrorS(err, "error resolving OID", "oid", v.Name, "source", tags["source"]) return } diff --git a/inputs/snmp_zabbix/collector.go b/inputs/snmp_zabbix/collector.go index 0fe4f8e5a..6077893ad 100644 --- a/inputs/snmp_zabbix/collector.go +++ b/inputs/snmp_zabbix/collector.go @@ -3,13 +3,13 @@ package snmp_zabbix import ( "context" "fmt" - "log" "strconv" "strings" "sync" "time" "github.com/gosnmp/gosnmp" + "k8s.io/klog/v2" "flashcat.cloud/categraf/types" ) @@ -75,7 +75,7 @@ func (c *SNMPCollector) CollectItems(ctx context.Context, items []MonitorItem, s // 记录错误但继续处理其他结果 // 降低日志级别或增加频率限制,防止日志刷屏 if c.config.DebugMode { - log.Printf("D! collected agent: %s, key: %s, error: %v", result.Agent, result.Key, result.Error) + klog.V(1).InfoS("collected agent error", "agent", result.Agent, "key", result.Key, "error", result.Error) } continue } @@ -150,7 +150,7 @@ func (c *SNMPCollector) collectFromAgent(ctx context.Context, agent string, item if err != nil { // 只有在调试模式下才打印详细的批量失败日志 if c.config.DebugMode { - log.Printf("D! bulk request failed for agent %s (size: %d): %v. Falling back to individual collection.", agent, len(oids), err) + klog.V(1).InfoS("bulk request failed, falling back to individual collection", "agent", agent, "size", len(oids), "error", err) } // 降级:尝试单独请求这个批次的OID c.collectIndividually(client, agent, batchItems, resultChan) @@ -243,7 +243,7 @@ func (c *SNMPCollector) processSingleResult(agent string, item MonitorItem, resu ) if err != nil { if !strings.Contains(err.Error(), "no previous value") && c.config.DebugMode { - log.Printf("D! preprocessing failed for %s: %v", item.Key, err) + klog.V(1).InfoS("preprocessing failed", "key", item.Key, "error", err) } processedValue = rawValue } else { @@ -304,7 +304,7 @@ func (c *SNMPCollector) processSingleDependentItem(agent string, item MonitorIte if err != nil { if c.config.DebugMode { - log.Printf("D! dependent preprocessing failed for %s: %v", item.Key, err) + klog.V(1).InfoS("dependent preprocessing failed", "key", item.Key, "error", err) } return } diff --git a/inputs/snmp_zabbix/discovery.go b/inputs/snmp_zabbix/discovery.go index 88be304f9..105faeb07 100644 --- a/inputs/snmp_zabbix/discovery.go +++ b/inputs/snmp_zabbix/discovery.go @@ -4,7 +4,6 @@ import ( "context" "encoding/json" "fmt" - "log" "strconv" "strings" "sync" @@ -12,6 +11,7 @@ import ( "unicode" "github.com/gosnmp/gosnmp" + "k8s.io/klog/v2" ) const ( @@ -124,7 +124,6 @@ func (d *DiscoveryEngine) ExecuteDiscovery(ctx context.Context, agent string, ru } // 预处理的正确输入应该是这个 JSON 字符串 valueForPreprocessing = string(jsonBytes) - // log.Printf("DEBUG: Serialized discovery result for preprocessing: %s", valueForPreprocessing) } processedValue, err := ApplyDiscoveryPreprocessing(valueForPreprocessing, rule.Preprocessing) @@ -168,7 +167,7 @@ func (d *DiscoveryEngine) ExecuteDiscovery(ctx context.Context, agent string, ru // 应用过滤器 filtered := d.applyDiscoveryFilter(discoveries, rule.Filter) - log.Printf("I! filtered discovery results: %d items", len(filtered)) + klog.InfoS("filtered discovery results", "count", len(filtered)) ttl := parseZabbixDelay(rule.Delay) if ttl == 0 { ttl = time.Hour // 默认缓存1小时 @@ -264,7 +263,7 @@ func (d *DiscoveryEngine) performZabbixDependentDiscovery(ctx context.Context, c go func() { pdus, err := d.walkOID(client, pair.OID) if err != nil { - log.Printf("W!: %v", err) + klog.Warningf("SNMP walk returned warning: %v", err) } resultChan <- walkResult{pdus: pdus, err: err} }() @@ -275,7 +274,7 @@ func (d *DiscoveryEngine) performZabbixDependentDiscovery(ctx context.Context, c return nil, fmt.Errorf("SNMP walk for OID %s was canceled or timed out: %w", pair.OID, ctx.Err()) case res := <-resultChan: if res.err != nil { - log.Printf("Warning: SNMP walk failed for OID %s: %v", pair.OID, res.err) + klog.Warningf("SNMP walk failed for OID %s: %v", pair.OID, res.err) continue } results = res.pdus @@ -705,6 +704,7 @@ func ParseLLDLifetimes(rule DiscoveryRule) (time.Duration, time.Duration) { // - "DELETE_AFTER", "DISABLE_AFTER" -> 解析 durationStr,得到一个 >0 的延迟时长 // - "" (空字符串) -> 若 durationStr 非空,则按 *AFTER* 处理;否则返回 defaultValue // - 其他未知值 -> 返回 defaultValue +// // durationStr: 期望为 Zabbix 风格的延迟字符串,例如 "7d", "1h", "30m" 等;当 typeStr 为 *_AFTER 或为空且 durationStr 非空时生效 // defaultValue: 当无法从 typeStr 和 durationStr 推导策略时使用的默认时长。 // 返回值含义: diff --git a/inputs/snmp_zabbix/discovery_scheduler.go b/inputs/snmp_zabbix/discovery_scheduler.go index 957cbe422..b89a6eedf 100644 --- a/inputs/snmp_zabbix/discovery_scheduler.go +++ b/inputs/snmp_zabbix/discovery_scheduler.go @@ -3,9 +3,10 @@ package snmp_zabbix import ( "context" "fmt" - "log" "sync" "time" + + "k8s.io/klog/v2" ) // DiscoveryScheduler 管理所有发现规则的调度 @@ -132,7 +133,7 @@ func (s *DiscoveryScheduler) Start(ctx context.Context) { } } - log.Printf("DiscoveryScheduler started with %d intervals", len(s.intervals)) + klog.InfoS("discovery scheduler started", "intervals", len(s.intervals)) } // Stop 停止调度器 @@ -148,11 +149,11 @@ func (s *DiscoveryScheduler) Stop() { close(s.stopCh) s.runningIntervals = make(map[time.Duration]bool) - log.Println("DiscoveryScheduler stopped") + klog.InfoS("discovery scheduler stopped") } func (s *DiscoveryScheduler) runInterval(ctx context.Context, interval time.Duration) { - log.Printf("Starting discovery runner for interval %v", interval) + klog.InfoS("starting discovery runner", "interval", interval) // 立即执行一次发现 s.mu.RLock() @@ -181,10 +182,10 @@ func (s *DiscoveryScheduler) runInterval(ctx context.Context, interval time.Dura for { select { case <-ctx.Done(): - log.Printf("Discovery runner for interval %v stopped: context done", interval) + klog.InfoS("discovery runner stopped: context done", "interval", interval) return case <-s.stopCh: - log.Printf("Discovery runner for interval %v stopped", interval) + klog.InfoS("discovery runner stopped", "interval", interval) return case now := <-ticker.C: s.mu.RLock() @@ -211,21 +212,19 @@ func (s *DiscoveryScheduler) checkAndExecuteRules(ctx context.Context, now time. rule.LastRun = now // 计算下次运行时间,添加少量jitter避免同时执行 rule.NextRun = now.Add(rule.Interval).Add(jitter(rule.Interval)) - log.Printf("Scheduled discovery rule '%s' for agent %s, next run at %v", - rule.Rule.Key, rule.Agent, rule.NextRun) + klog.InfoS("scheduled discovery rule", "rule", rule.Rule.Key, "agent", rule.Agent, "next_run", rule.NextRun) } else { - log.Printf("Discovery rule '%s' for agent %s not ready yet, next run at %v (now: %v)", - rule.Rule.Key, rule.Agent, rule.NextRun, now) + klog.V(1).InfoS("discovery rule not ready yet", "rule", rule.Rule.Key, "agent", rule.Agent, "next_run", rule.NextRun, "now", now) } } s.mu.Unlock() if len(readyRules) == 0 { - log.Printf("No discovery rules ready to execute at %v", now) + klog.V(1).InfoS("no discovery rules ready to execute", "time", now) return } - log.Printf("Executing %d discovery rules at %v", len(readyRules), now) + klog.InfoS("executing discovery rules", "count", len(readyRules), "time", now) // 并发执行发现规则 var wg sync.WaitGroup @@ -246,9 +245,9 @@ func (s *DiscoveryScheduler) checkAndExecuteRules(ctx context.Context, now time. select { case <-done: - log.Printf("All discovery rules completed") + klog.InfoS("all discovery rules completed") case <-time.After(5 * time.Minute): // 5分钟超时 - log.Printf("Warning: Discovery execution timeout, some rules may not have completed") + klog.Warning("discovery execution timeout, some rules may not have completed") } } @@ -261,8 +260,7 @@ func (s *DiscoveryScheduler) executeDiscovery(ctx context.Context, scheduled *Sc scheduled.RunCount++ s.mu.Unlock() - log.Printf("Executing discovery rule '%s' for agent %s (run #%d)", - scheduled.Rule.Key, scheduled.Agent, scheduled.RunCount) + klog.InfoS("executing discovery rule", "rule", scheduled.Rule.Key, "agent", scheduled.Agent, "run_count", scheduled.RunCount) // 执行发现 discoveries, err := s.engine.ExecuteDiscovery(ctx, scheduled.Agent, scheduled.Rule) @@ -272,8 +270,7 @@ func (s *DiscoveryScheduler) executeDiscovery(ctx context.Context, scheduled *Sc scheduled.LastError = err scheduled.ErrorCount++ s.mu.Unlock() - log.Printf("Discovery rule '%s' for agent %s failed: %v", - scheduled.Rule.Key, scheduled.Agent, err) + klog.ErrorS(err, "discovery rule failed", "rule", scheduled.Rule.Key, "agent", scheduled.Agent) return } @@ -281,8 +278,7 @@ func (s *DiscoveryScheduler) executeDiscovery(ctx context.Context, scheduled *Sc scheduled.SuccessCount++ s.mu.Unlock() - log.Printf("Discovery rule '%s' for agent %s found %d items (took %v)", - scheduled.Rule.Key, scheduled.Agent, len(discoveries), time.Since(startTime)) + klog.InfoS("discovery rule completed", "rule", scheduled.Rule.Key, "agent", scheduled.Agent, "items", len(discoveries), "duration", time.Since(startTime)) // 应用item prototypes生成监控项 items := s.engine.ApplyItemPrototypes(discoveries, scheduled.Rule) @@ -305,7 +301,7 @@ func (s *DiscoveryScheduler) executeDiscovery(ctx context.Context, scheduled *Sc // LoadFromTemplate 从模板加载所有发现规则 func (s *DiscoveryScheduler) LoadFromTemplate(agents []string, template *ZabbixTemplate) { if template == nil { - log.Printf("W! no template provided for discovery scheduler") + klog.Warning("no template provided for discovery scheduler") return } @@ -315,20 +311,17 @@ func (s *DiscoveryScheduler) LoadFromTemplate(agents []string, template *ZabbixT for _, rule := range template.DiscoveryRules { // 只处理SNMP类型的发现规则 if itemType := ConvertZabbixItemType(rule.Type); itemType != "snmp" { - log.Printf("W! skipping non-SNMP discovery rule '%s' (type: %s -> %s)", - rule.Key, rule.Type, itemType) + klog.Warningf("skipping non-SNMP discovery rule '%s' (type: %s -> %s)", rule.Key, rule.Type, itemType) continue } - log.Printf("I! adding SNMP discovery rule '%s' (delay: %s) for agent %s", - rule.Key, rule.Delay, agent) + klog.InfoS("adding SNMP discovery rule", "rule", rule.Key, "delay", rule.Delay, "agent", agent) s.AddDiscoveryRule(agent, rule) addedCount++ } } - log.Printf("I! loaded %d discovery rules from template (skipped %d non-SNMP rules)", - addedCount, skippedCount) + klog.InfoS("loaded discovery rules from template", "added", addedCount, "skipped", skippedCount) } // removeFromIntervalSlice 从interval切片中移除指定的调度项 diff --git a/inputs/snmp_zabbix/preprocessing.go b/inputs/snmp_zabbix/preprocessing.go index aa033fc16..a8d0be768 100644 --- a/inputs/snmp_zabbix/preprocessing.go +++ b/inputs/snmp_zabbix/preprocessing.go @@ -6,7 +6,6 @@ import ( "errors" "fmt" "github.com/gosnmp/gosnmp" - "log" "net" "regexp" "strconv" @@ -16,6 +15,7 @@ import ( "github.com/dop251/goja" "github.com/oliveagle/jsonpath" + "k8s.io/klog/v2" ) var scriptCache = NewJSCache() @@ -142,7 +142,7 @@ func applyDiscoveryPreprocessingStep(value interface{}, step PreprocessStep) (in case "JSONPATH", "12": return applyJSONPath(value, step.Parameters) default: - log.Printf("W! unsupported preprocessing type in discovery phase: %s, skipping", step.Type) + klog.Warningf("unsupported preprocessing type in discovery phase: %s, skipping", step.Type) return value, nil } } @@ -185,7 +185,7 @@ func applyPreprocessingStep(value interface{}, step PreprocessStep, context *Pre default: // 未实现的预处理类型,记录警告但不中断处理 - log.Printf("W! unsupported preprocessing type: %s at step %d, skipping", step.Type, stepIndex) + klog.Warningf("unsupported preprocessing type: %s at step %d, skipping", step.Type, stepIndex) return value, nil } } @@ -754,15 +754,15 @@ func applyJavaScript(value interface{}, params []string) (interface{}, error) { } console := vm.NewObject() console.Set("log", func(call goja.FunctionCall) goja.Value { - log.Printf("I! JS-LOG: %s", call.Argument(0).String()) + klog.InfoS("JS-LOG", "message", call.Argument(0).String()) return goja.Undefined() }) console.Set("error", func(call goja.FunctionCall) goja.Value { - log.Printf("E! JS-ERROR: %s", call.Argument(0).String()) + klog.ErrorS(nil, "JS-ERROR", "message", call.Argument(0).String()) return goja.Undefined() }) console.Set("warn", func(call goja.FunctionCall) goja.Value { - log.Printf("W! JS-WARN: %s", call.Argument(0).String()) + klog.Warningf("JS-WARN: %s", call.Argument(0).String()) return goja.Undefined() }) vm.Set("console", console) @@ -775,4 +775,4 @@ func applyJavaScript(value interface{}, params []string) (interface{}, error) { // 从goja.Value转换回Go原生类型 return result.Export(), nil -} \ No newline at end of file +} diff --git a/inputs/snmp_zabbix/scheduler.go b/inputs/snmp_zabbix/scheduler.go index 0825af329..cc55b34f1 100644 --- a/inputs/snmp_zabbix/scheduler.go +++ b/inputs/snmp_zabbix/scheduler.go @@ -3,13 +3,14 @@ package snmp_zabbix import ( "container/heap" "context" + "fmt" "hash/fnv" - "log" "runtime/debug" "sync" "time" "flashcat.cloud/categraf/types" + "k8s.io/klog/v2" ) type ItemScheduler struct { @@ -174,7 +175,7 @@ func (s *ItemScheduler) runLoop(ctx context.Context) { func (s *ItemScheduler) executeTask(ctx context.Context, agent string, items []MonitorItem) { defer func() { if r := recover(); r != nil { - log.Printf("E! [CRITICAL] collection goroutine for agent %s panicked: %v\n%s", agent, r, debug.Stack()) + klog.ErrorS(fmt.Errorf("panic: %v", r), "collection goroutine panicked", "agent", agent, "stack", string(debug.Stack())) } }() @@ -183,7 +184,7 @@ func (s *ItemScheduler) executeTask(ctx context.Context, agent string, items []M } if err := s.collector.CollectItems(ctx, items, s.slist); err != nil { - log.Printf("Failed to collect items for agent %s: %v\n", agent, err) + klog.ErrorS(err, "failed to collect items", "agent", agent) } } @@ -262,7 +263,7 @@ func (s *ItemScheduler) UpdateDiscoveredDiff(ruleKey string, newItems []MonitorI if !sch.IsLost { sch.IsLost = true sch.LostSince = now - log.Printf("I! item marked as lost: %s", id) + klog.InfoS("item marked as lost", "id", id) } sch.DeleteTTL = deleteTTL sch.DisableTTL = disableTTL @@ -270,7 +271,7 @@ func (s *ItemScheduler) UpdateDiscoveredDiff(ruleKey string, newItems []MonitorI if !sch.IsDisabled && disableTTL == 0 { s.removeItemFromTask(sch) sch.IsDisabled = true - log.Printf("I! item disabled immediately: %s", id) + klog.InfoS("item disabled immediately", "id", id) } } } @@ -288,7 +289,7 @@ func (s *ItemScheduler) UpdateDiscoveredDiff(ruleKey string, newItems []MonitorI if sch.IsLost { sch.IsLost = false sch.LostSince = time.Time{} - log.Printf("I! item recovered: %s", id) + klog.InfoS("item recovered", "id", id) } wasDisabled := sch.IsDisabled diff --git a/inputs/snmp_zabbix/snmp.go b/inputs/snmp_zabbix/snmp.go index 683e6e843..4259b2c2e 100644 --- a/inputs/snmp_zabbix/snmp.go +++ b/inputs/snmp_zabbix/snmp.go @@ -3,7 +3,6 @@ package snmp_zabbix import ( "context" "fmt" - "log" "sort" "strings" "sync" @@ -13,6 +12,7 @@ import ( "flashcat.cloud/categraf/inputs" "flashcat.cloud/categraf/types" "flashcat.cloud/categraf/writer" + "k8s.io/klog/v2" ) const ( @@ -48,7 +48,7 @@ func (s *SnmpZabbix) GetInstances() []inputs.Instance { ret := make([]inputs.Instance, len(s.Instances)) inputLabels := s.GetLabels() if s.DebugMod { - log.Printf("D!, snmp_zabbix input labels:%+v", inputLabels) + klog.V(1).InfoS("snmp_zabbix input labels", "labels", inputLabels) } for i := 0; i < len(s.Instances); i++ { if len(s.Instances[i].Labels) == 0 { @@ -190,13 +190,13 @@ func (s *Instance) Init() error { if !s.EnableDiscovery { if s.DebugMod { - log.Printf("D! snmp_zabbix discovery disabled") + klog.V(1).InfoS("snmp_zabbix discovery disabled") } // return nil } if len(s.TemplateFiles) == 0 && len(s.TemplateFileContents) == 0 && len(s.Items) == 0 { if s.DebugMod { - log.Printf("D!, there are no template files, no template_file_contents, and no items defined") + klog.V(1).InfoS("there are no template files, no template_file_contents, and no items defined") } return types.ErrInstancesEmpty } @@ -252,7 +252,7 @@ func (s *Instance) Init() error { if len(s.TemplateFiles) != 0 { mergedTemplate, err = LoadAndMergeTemplates(s.TemplateFiles) if err != nil { - log.Printf("E! failed to load template file %v: %v", s.TemplateFiles, err) + klog.ErrorS(err, "failed to load template file", "template_files", s.TemplateFiles) } } @@ -268,7 +268,7 @@ func (s *Instance) Init() error { content := s.TemplateFileContents[key] templateToMerge, pErr := ParseTemplateFromContent([]byte(content)) if pErr != nil { - log.Printf("E! failed to parse template content for key '%s': %v", key, pErr) + klog.ErrorS(pErr, "failed to parse template content", "key", key) continue // 跳过解析失败的模板 } @@ -304,19 +304,18 @@ func (s *Instance) handleDiscoveryComplete(agent string, rule DiscoveryRule, ite var filtered []MonitorItem for _, item := range items { if strings.Contains(item.OID, "{#") { - log.Printf("W! item OID contains unexpanded macro: key=%s, oid=%s", item.Key, item.OID) + klog.Warningf("item OID contains unexpanded macro: key=%s, oid=%s", item.Key, item.OID) continue } if item.OID == "" { - log.Printf("W! item has empty OID: key=%s", item.Key) + klog.Warningf("item has empty OID: key=%s", item.Key) continue } item.IsDiscovered = true filtered = append(filtered, item) } - log.Printf("Discovery rule '%s' for agent %s produced %d valid items (filtered from %d)", - rule.Key, agent, len(filtered), len(items)) + klog.InfoS("discovery rule produced valid items", "rule", rule.Key, "agent", agent, "valid_items", len(filtered), "total_items", len(items)) // 解析生命周期 (DeleteTTL, DisableTTL) deleteTTL, disableTTL := ParseLLDLifetimes(rule) @@ -410,7 +409,7 @@ func (s *Instance) Start(_ *types.SampleList) error { // 启动发现调度器 s.discoveryScheduler.Start(baseCtx) - log.Printf("Discovery scheduler started with template rules") + klog.InfoS("discovery scheduler started with template rules") } go func(slist *types.SampleList) { @@ -555,7 +554,7 @@ func (s *Instance) getTemplateStaticItems() []MonitorItem { child := agentItemsMap[depTmpl.Key] if _, ok := agentItemsMap[depTmpl.MasterKey]; !ok { if s.DebugMod { - log.Printf("W! dependent item %s missing master %s on agent %s", child.Key, depTmpl.MasterKey, agentAddr) + klog.Warningf("dependent item %s missing master %s on agent %s", child.Key, depTmpl.MasterKey, agentAddr) } } } @@ -570,14 +569,14 @@ func (s *Instance) getTemplateStaticItems() []MonitorItem { ptr, ok := agentItemsMap[key] if !ok || ptr == nil { if s.DebugMod { - log.Printf("E! check failed: item key '%s' not found in map during tree build", key) + klog.ErrorS(nil, "item key not found in map during tree build", "key", key) } return MonitorItem{} } if visited[key] { if s.DebugMod { - log.Printf("E! cycle detected in item dependency: %s", key) + klog.ErrorS(nil, "cycle detected in item dependency", "key", key) } return *ptr } diff --git a/inputs/snmp_zabbix/snmp_client.go b/inputs/snmp_zabbix/snmp_client.go index 6fe2a8510..00dca358f 100644 --- a/inputs/snmp_zabbix/snmp_client.go +++ b/inputs/snmp_zabbix/snmp_client.go @@ -11,6 +11,7 @@ import ( "time" "github.com/gosnmp/gosnmp" + "k8s.io/klog/v2" ) type SNMPClientManager struct { @@ -89,7 +90,7 @@ func (m *SNMPClientManager) GetClient(agent string) (*gosnmp.GoSNMP, error) { } // 客户端不健康,尝试重连 - log.Printf("Client for %s is unhealthy, attempting to reconnect", agent) + klog.Warningf("client for %s is unhealthy, attempting to reconnect", agent) if err := m.reconnectClient(agent); err != nil { return nil, fmt.Errorf("failed to reconnect unhealthy client: %w", err) } @@ -147,7 +148,7 @@ func (m *SNMPClientManager) createNewClient(agent string) error { // 执行初始健康检查 if err := m.performHealthCheckNoLock(wrapper); err != nil { - log.Printf("Initial health check failed for %s: %v", agent, err) + klog.Warningf("initial health check failed for %s: %v", agent, err) wrapper.healthy = false wrapper.lastError = err } @@ -193,7 +194,7 @@ func (m *SNMPClientManager) reconnectClient(agent string) error { wrapper.retryCount = 0 wrapper.lastSuccess = time.Now() - log.Printf("Successfully reconnected client for %s", agent) + klog.InfoS("successfully reconnected client", "agent", agent) return nil } @@ -217,7 +218,7 @@ func (m *SNMPClientManager) healthCheckLoop() { case <-ticker.C: m.performAllHealthChecks() case <-m.stopHealthCheck: - log.Println("Stopping health check loop") + klog.InfoS("stopping health check loop") return } } @@ -252,7 +253,7 @@ func (m *SNMPClientManager) performAllHealthChecks() { case <-done: // 所有检查完成 case <-time.After(m.healthCheckTimeout * 2): - log.Println("Health check timeout, some checks may not have completed") + klog.Warning("health check timeout, some checks may not have completed") } } @@ -279,14 +280,13 @@ func (m *SNMPClientManager) checkClientHealth(agent string) { // 检查是否超过最大重试次数 if wrapper.retryCount >= m.maxRetries { wrapper.healthy = false - log.Printf("Client %s marked unhealthy after %d retries: %v", - agent, wrapper.retryCount, err) + klog.Warningf("client %s marked unhealthy after %d retries: %v", agent, wrapper.retryCount, err) // 尝试重连 go func() { time.Sleep(5 * time.Second) // 延迟重连 if err := m.reconnectClient(agent); err != nil { - log.Printf("Failed to reconnect %s: %v", agent, err) + klog.ErrorS(err, "failed to reconnect client", "agent", agent) } }() } @@ -419,7 +419,7 @@ func (m *SNMPClientManager) Close() { for agent, wrapper := range m.clients { if wrapper.client != nil && wrapper.client.Conn != nil { wrapper.client.Conn.Close() - log.Printf("Closed connection for agent %s", agent) + klog.InfoS("closed connection for agent", "agent", agent) } delete(m.clients, agent) } @@ -427,7 +427,7 @@ func (m *SNMPClientManager) Close() { // ForceHealthCheck 强制执行一次健康检查 func (m *SNMPClientManager) ForceHealthCheck() { - log.Println("Forcing health check on all clients") + klog.InfoS("forcing health check on all clients") go m.performAllHealthChecks() } @@ -557,8 +557,11 @@ func (m *SNMPClientManager) createClient(agent string) (*gosnmp.GoSNMP, error) { agentConfig.Host, agentConfig.Port, err) } - log.Printf("Successfully created SNMP client for %s://%s:%d (version: %d)", - agentConfig.Transport, agentConfig.Host, agentConfig.Port, agentConfig.Version) + klog.InfoS("successfully created SNMP client", + "transport", agentConfig.Transport, + "host", agentConfig.Host, + "port", agentConfig.Port, + "version", agentConfig.Version) return client, nil } @@ -588,7 +591,7 @@ func (m *SNMPClientManager) setAuthProtocol(client *gosnmp.GoSNMP, config *Agent return fmt.Errorf("unsupported auth protocol: %s", config.AuthProtocol) } - log.Printf("Set auth protocol to %s for user %s", config.AuthProtocol, config.Username) + klog.InfoS("set auth protocol", "auth_protocol", config.AuthProtocol, "username", config.Username) return nil } @@ -617,7 +620,7 @@ func (m *SNMPClientManager) setPrivProtocol(client *gosnmp.GoSNMP, config *Agent return fmt.Errorf("unsupported priv protocol: %s", config.PrivProtocol) } - log.Printf("Set privacy protocol to %s for user %s", config.PrivProtocol, config.Username) + klog.InfoS("set privacy protocol", "priv_protocol", config.PrivProtocol, "username", config.Username) return nil } @@ -690,4 +693,4 @@ type ClientHealthDetail struct { RetryCount int ErrorCount uint64 SuccessCount uint64 -} \ No newline at end of file +} diff --git a/inputs/snmp_zabbix/template.go b/inputs/snmp_zabbix/template.go index 4e19673f0..7ccefcb14 100644 --- a/inputs/snmp_zabbix/template.go +++ b/inputs/snmp_zabbix/template.go @@ -2,13 +2,13 @@ package snmp_zabbix import ( "fmt" - "log" "os" "regexp" "strings" "github.com/Knetic/govaluate" "gopkg.in/yaml.v3" + "k8s.io/klog/v2" ) // Zabbix 现代YAML模板结构(6.0+版本) @@ -862,7 +862,7 @@ func (t *ZabbixTemplate) evaluateFormula(formula string, results map[string]bool expression, err := govaluate.NewEvaluableExpression(expressionStr) if err != nil { // 如果公式本身有语法错误,记录日志并返回 false - log.Printf("E! failed to parse formula '%s': %v", formula, err) + klog.ErrorS(err, "failed to parse formula", "formula", formula) return false } @@ -877,7 +877,7 @@ func (t *ZabbixTemplate) evaluateFormula(formula string, results map[string]bool result, err := expression.Evaluate(parameters) if err != nil { // 如果执行过程中出错(例如缺少变量),记录日志并返回 false - log.Printf("E! failed to evaluate formula '%s' with params %v: %v", formula, parameters, err) + klog.ErrorS(err, "failed to evaluate formula", "formula", formula, "params", parameters) return false } @@ -885,7 +885,7 @@ func (t *ZabbixTemplate) evaluateFormula(formula string, results map[string]bool // govaluate 的结果是 interface{} 类型,需要进行类型断言 resultBool, ok := result.(bool) if !ok { - log.Printf("E! formula '%s' did not return a boolean value", formula) + klog.ErrorS(nil, "formula did not return a boolean value", "formula", formula) return false } diff --git a/inputs/sockstat/sockstat.go b/inputs/sockstat/sockstat.go index 366cc36ef..b88fa00e0 100644 --- a/inputs/sockstat/sockstat.go +++ b/inputs/sockstat/sockstat.go @@ -2,7 +2,6 @@ package sockstat import ( "errors" - "log" "os" "strings" @@ -11,6 +10,7 @@ import ( "flashcat.cloud/categraf/config" "flashcat.cloud/categraf/inputs" "flashcat.cloud/categraf/types" + "k8s.io/klog/v2" ) const inputName = "sockstat" @@ -59,7 +59,7 @@ type NetSockstatProtocol struct { func (ss *SockStat) Gather(slist *types.SampleList) { ns, err := ParseNetSockstat() if err != nil { - log.Println("E! failed to get net sockstat: ", err) + klog.ErrorS(err, "failed to get net sockstat") return } ss.parse(ns, slist) @@ -67,11 +67,11 @@ func (ss *SockStat) Gather(slist *types.SampleList) { ns6, err := ParseNetSockstat6() if err != nil { if ss.DebugMod { - log.Println("D! failed to get net sockstat6: ", err) + klog.V(1).InfoS("failed to get net sockstat6", "error", err) return } if !errors.Is(err, os.ErrNotExist) { - log.Println("E! failed to get net sockstat6: ", err) + klog.ErrorS(err, "failed to get net sockstat6") return } } diff --git a/inputs/sqlserver/sqlserver.go b/inputs/sqlserver/sqlserver.go index 81d6ce9c8..ead81c7df 100644 --- a/inputs/sqlserver/sqlserver.go +++ b/inputs/sqlserver/sqlserver.go @@ -6,7 +6,6 @@ import ( "time" "fmt" - "log" "strings" "sync" @@ -16,6 +15,7 @@ import ( "flashcat.cloud/categraf/types" mssql "github.com/denisenkom/go-mssqldb" + "k8s.io/klog/v2" ) const inputName = "sqlserver" @@ -111,7 +111,7 @@ func (s *Instance) Init() error { } if err := s.initQueries(); err != nil { - log.Println("E! initQueries err:", err) + klog.ErrorS(err, "failed to initialize SQL Server queries") return err } @@ -126,7 +126,7 @@ func (s *Instance) Init() error { var err error pool, err = sql.Open("mssql", serv) if err != nil { - log.Println("E! open mssql error:", err) + klog.ErrorS(err, "failed to open mssql connection", "server", serv) continue } default: @@ -141,7 +141,7 @@ func (s *Instance) Init() error { func (s *Instance) initQueries() error { s.queries = make(MapQuery) queries := s.queries - log.Println("Config: database_type: ", s.DatabaseType, " query_version: ", s.QueryVersion) + klog.InfoS("sqlserver query config", "database_type", s.DatabaseType, "query_version", s.QueryVersion) // To prevent query definition conflicts // Constant definitions for type "SQLServer" start with sqlServer @@ -199,7 +199,7 @@ func (s *Instance) initQueries() error { for query := range queries { querylist = append(querylist, query) } - log.Println("Config: Effective Queries: ", querylist) + klog.InfoS("sqlserver effective queries", "queries", querylist) return nil } @@ -252,7 +252,7 @@ func (s *Instance) Gather(slist *types.SampleList) { queryError := s.gatherServer(pool, query, slist, connectionString) if queryError != nil { - log.Println("E! queryError is ", queryError) + klog.ErrorS(queryError, "sqlserver query execution failed", "query", query.ScriptName, "connection", connectionString) } if s.HealthMetric { mutex.Lock() diff --git a/inputs/supervisor/supervisor.go b/inputs/supervisor/supervisor.go index db7d47829..8b9668de0 100644 --- a/inputs/supervisor/supervisor.go +++ b/inputs/supervisor/supervisor.go @@ -2,7 +2,6 @@ package supervisor import ( "fmt" - "log" "net" "net/url" @@ -12,6 +11,7 @@ import ( "flashcat.cloud/categraf/inputs" "flashcat.cloud/categraf/pkg/filter" "flashcat.cloud/categraf/types" + "k8s.io/klog/v2" ) const inputName = "supervisor" @@ -106,7 +106,7 @@ func (ins *Instance) Gather(slist *types.SampleList) { var rawProcessData []processInfo err := ins.rpcClient.Call("supervisor.getAllProcessInfo", nil, &rawProcessData) if err != nil { - log.Println("failed to get processes info: %w", err) + klog.ErrorS(err, "failed to get supervisor processes info", "rpc", "supervisor.getAllProcessInfo", "url", ins.Url) return } @@ -114,14 +114,14 @@ func (ins *Instance) Gather(slist *types.SampleList) { var status supervisorInfo err = ins.rpcClient.Call("supervisor.getState", nil, &status) if err != nil { - log.Println("failed to get processes info: %w", err) + klog.ErrorS(err, "failed to get supervisor state", "rpc", "supervisor.getState", "url", ins.Url) return } // API call to get identification string err = ins.rpcClient.Call("supervisor.getIdentification", nil, &status.Ident) if err != nil { - log.Println("failed to get instance identification: %w", err) + klog.ErrorS(err, "failed to get supervisor identification", "rpc", "supervisor.getIdentification", "url", ins.Url) return } @@ -129,7 +129,7 @@ func (ins *Instance) Gather(slist *types.SampleList) { for _, process := range rawProcessData { processTags, processFields, err := ins.parseProcessData(process, status) if err != nil { - log.Println("E! failed to parse process data: ", err) + klog.ErrorS(err, "failed to parse supervisor process data", "process", process.Name, "group", process.Group, "url", ins.Url) continue } slist.PushSamples("supervisor_processes", processFields, processTags) @@ -138,7 +138,7 @@ func (ins *Instance) Gather(slist *types.SampleList) { // Adding instance info fields to accumulator instanceTags, instanceFields, err := ins.parseInstanceData(status) if err != nil { - log.Println("failed to parse instance data: %w", err) + klog.ErrorS(err, "failed to parse supervisor instance data", "id", status.Ident, "url", ins.Url) return } slist.PushSamples("supervisor_instance", instanceFields, instanceTags) diff --git a/inputs/switch_legacy/switch_legacy.go b/inputs/switch_legacy/switch_legacy.go index 06ed366ad..5b8704e6d 100644 --- a/inputs/switch_legacy/switch_legacy.go +++ b/inputs/switch_legacy/switch_legacy.go @@ -3,7 +3,6 @@ package switch_legacy import ( "errors" "fmt" - "log" "sync" "time" @@ -16,6 +15,7 @@ import ( cmap "github.com/orcaman/concurrent-map" "github.com/toolkits/pkg/concurrent/semaphore" go_snmp "github.com/ulricqin/gosnmp" + "k8s.io/klog/v2" ) const inputName = "switch_legacy" @@ -149,10 +149,10 @@ func (ins *Instance) Gather(slist *types.SampleList) { start := time.Now() defer func() { - log.Println("I! switch gather use:", time.Since(start)) + klog.InfoS("switch gather completed", "duration", time.Since(start)) }() - log.Println("I! switch total ip count:", len(ips)) + klog.InfoS("switch total ip count", "count", len(ips)) if ins.PingEnable { ips = ins.gatherPing(ips, slist) @@ -194,7 +194,7 @@ func (ins *Instance) custstat(wg *sync.WaitGroup, ip string, slist *types.Sample defer func() { if r := recover(); r != nil { - log.Println("E! recovered in custstat, ip:", ip, "oid:", cust.OID, "error:", r, "stack:", runtimex.Stack(3)) + klog.ErrorS(fmt.Errorf("panic: %v", r), "recovered in custstat", "ip", ip, "oid", cust.OID, "stack", runtimex.Stack(3)) } }() @@ -208,7 +208,7 @@ func (ins *Instance) custstat(wg *sync.WaitGroup, ip string, slist *types.Sample if err == nil { slist.PushFront(types.NewSample(inputName, cust.Metric, value, cust.Tags, map[string]string{ins.parent.SwitchIdLabel: ins.parent.MappingIP(ip)})) } else { - log.Println("E! failed to convert to float64, ip:", ip, "oid:", cust.OID, "value:", snmpPDUs[0].Value) + klog.ErrorS(err, "failed to convert to float64", "ip", ip, "oid", cust.OID, "value", snmpPDUs[0].Value) } break } @@ -253,7 +253,7 @@ func (ins *Instance) memstat(wg *sync.WaitGroup, sema *semaphore.Semaphore, ip s utilPercent, err := sw.MemUtilization(ip, ins.Community, int(ins.SnmpTimeoutMs), ins.SnmpRetries) if err != nil { - log.Println("E! failed to gather mem, ip:", ip, "error:", err) + klog.ErrorS(err, "failed to gather mem", "ip", ip) return } @@ -298,7 +298,7 @@ func (ins *Instance) cpustat(wg *sync.WaitGroup, sema *semaphore.Semaphore, ip s utilPercent, err := sw.CpuUtilization(ip, ins.Community, int(ins.SnmpTimeoutMs), ins.SnmpRetries) if err != nil { - log.Println("E! failed to gather cpu, ip:", ip, "error:", err) + klog.ErrorS(err, "failed to gather cpu", "ip", ip) return } @@ -389,7 +389,7 @@ func (ins *Instance) gatherFlowMetrics(ips []string, slist *types.SampleList) { slist.PushFront(types.NewSample(inputName, "if_in_speed_percent", 100*IfHCInOctets/float64(ifStat.IfSpeed), tags)) } } else { - log.Println("W! if_in out of range, current:", ifStat.IfHCInOctets, "lasttime:", lastifStat.IfHCInOctets, "tags:", tags) + klog.Warningf("if_in out of range, current: %v, lasttime: %v, tags: %v", ifStat.IfHCInOctets, lastifStat.IfHCInOctets, tags) } if limitCheck(IfHCOutOctets, speedlimit) { @@ -398,7 +398,7 @@ func (ins *Instance) gatherFlowMetrics(ips []string, slist *types.SampleList) { slist.PushFront(types.NewSample(inputName, "if_out_speed_percent", 100*IfHCOutOctets/float64(ifStat.IfSpeed), tags)) } } else { - log.Println("W! if_out out of range, current:", ifStat.IfHCOutOctets, "lasttime:", lastifStat.IfHCOutOctets, "tags:", tags) + klog.Warningf("if_out out of range, current: %v, lasttime: %v, tags: %v", ifStat.IfHCOutOctets, lastifStat.IfHCOutOctets, tags) } } } @@ -416,13 +416,13 @@ func (ins *Instance) gatherFlowMetrics(ips []string, slist *types.SampleList) { if limitCheck(IfHCInBroadcastPkts, ins.BroadcastPktLimit) { slist.PushFront(types.NewSample(inputName, "if_in_broadcast_pkt", IfHCInBroadcastPkts, tags)) } else { - log.Println("W! if_in_broadcast_pkt out of range, current:", ifStat.IfHCInBroadcastPkts, "lasttime:", lastifStat.IfHCInBroadcastPkts, "tags:", tags) + klog.Warningf("if_in_broadcast_pkt out of range, current: %v, lasttime: %v, tags: %v", ifStat.IfHCInBroadcastPkts, lastifStat.IfHCInBroadcastPkts, tags) } if limitCheck(IfHCOutBroadcastPkts, ins.BroadcastPktLimit) { slist.PushFront(types.NewSample(inputName, "if_out_broadcast_pkt", IfHCOutBroadcastPkts, tags)) } else { - log.Println("W! if_out_broadcast_pkt out of range, current:", ifStat.IfHCOutBroadcastPkts, "lasttime:", lastifStat.IfHCOutBroadcastPkts, "tags:", tags) + klog.Warningf("if_out_broadcast_pkt out of range, current: %v, lasttime: %v, tags: %v", ifStat.IfHCOutBroadcastPkts, lastifStat.IfHCOutBroadcastPkts, tags) } } } @@ -441,13 +441,13 @@ func (ins *Instance) gatherFlowMetrics(ips []string, slist *types.SampleList) { if limitCheck(IfHCInMulticastPkts, ins.MulticastPktLimit) { slist.PushFront(types.NewSample(inputName, "if_in_multicast_pkt", IfHCInMulticastPkts, tags)) } else { - log.Println("W! if_in_multicast_pkt out of range, current:", ifStat.IfHCInMulticastPkts, "lasttime:", lastifStat.IfHCInMulticastPkts, "tags:", tags) + klog.Warningf("if_in_multicast_pkt out of range, current: %v, lasttime: %v, tags: %v", ifStat.IfHCInMulticastPkts, lastifStat.IfHCInMulticastPkts, tags) } if limitCheck(IfHCOutMulticastPkts, ins.MulticastPktLimit) { slist.PushFront(types.NewSample(inputName, "if_out_multicast_pkt", IfHCOutMulticastPkts, tags)) } else { - log.Println("W! if_out_multicast_pkt out of range, current:", ifStat.IfHCOutMulticastPkts, "lasttime:", lastifStat.IfHCOutMulticastPkts, "tags:", tags) + klog.Warningf("if_out_multicast_pkt out of range, current: %v, lasttime: %v, tags: %v", ifStat.IfHCOutMulticastPkts, lastifStat.IfHCOutMulticastPkts, tags) } } } @@ -466,13 +466,13 @@ func (ins *Instance) gatherFlowMetrics(ips []string, slist *types.SampleList) { if limitCheck(IfInDiscards, ins.DiscardsPktLimit) { slist.PushFront(types.NewSample(inputName, "if_in_discards", IfInDiscards, tags)) } else { - log.Println("W! if_in_discards out of range, current:", ifStat.IfInDiscards, "lasttime:", lastifStat.IfInDiscards, "tags:", tags) + klog.Warningf("if_in_discards out of range, current: %v, lasttime: %v, tags: %v", ifStat.IfInDiscards, lastifStat.IfInDiscards, tags) } if limitCheck(IfOutDiscards, ins.DiscardsPktLimit) { slist.PushFront(types.NewSample(inputName, "if_out_discards", IfOutDiscards, tags)) } else { - log.Println("W! if_out_discards out of range, current:", ifStat.IfOutDiscards, "lasttime:", lastifStat.IfOutDiscards, "tags:", tags) + klog.Warningf("if_out_discards out of range, current: %v, lasttime: %v, tags: %v", ifStat.IfOutDiscards, lastifStat.IfOutDiscards, tags) } } } @@ -491,13 +491,13 @@ func (ins *Instance) gatherFlowMetrics(ips []string, slist *types.SampleList) { if limitCheck(IfInErrors, ins.ErrorsPktLimit) { slist.PushFront(types.NewSample(inputName, "if_in_errors", IfInErrors, tags)) } else { - log.Println("W! if_in_errors out of range, current:", ifStat.IfInErrors, "lasttime:", lastifStat.IfInErrors, "tags:", tags) + klog.Warningf("if_in_errors out of range, current: %v, lasttime: %v, tags: %v", ifStat.IfInErrors, lastifStat.IfInErrors, tags) } if limitCheck(IfOutErrors, ins.ErrorsPktLimit) { slist.PushFront(types.NewSample(inputName, "if_out_errors", IfOutErrors, tags)) } else { - log.Println("W! if_out_errors out of range, current:", ifStat.IfOutErrors, "lasttime:", lastifStat.IfOutErrors, "tags:", tags) + klog.Warningf("if_out_errors out of range, current: %v, lasttime: %v, tags: %v", ifStat.IfOutErrors, lastifStat.IfOutErrors, tags) } } } @@ -513,7 +513,7 @@ func (ins *Instance) gatherFlowMetrics(ips []string, slist *types.SampleList) { if limitCheck(IfInUnknownProtos, ins.UnknownProtosPktLimit) { slist.PushFront(types.NewSample(inputName, "if_in_unknown_protos", IfInUnknownProtos, tags)) } else { - log.Println("W! if_in_unknown_protos out of range, current:", ifStat.IfInUnknownProtos, "lasttime:", lastifStat.IfInUnknownProtos, "tags:", tags) + klog.Warningf("if_in_unknown_protos out of range, current: %v, lasttime: %v, tags: %v", ifStat.IfInUnknownProtos, lastifStat.IfInUnknownProtos, tags) } } } @@ -529,7 +529,7 @@ func (ins *Instance) gatherFlowMetrics(ips []string, slist *types.SampleList) { if limitCheck(IfOutQLen, ins.OutQlenPktLimit) { slist.PushFront(types.NewSample(inputName, "if_out_qlen", IfOutQLen, tags)) } else { - log.Println("W! if_out_qlen out of range, current:", ifStat.IfOutQLen, "lasttime:", lastifStat.IfOutQLen, "tags:", tags) + klog.Warningf("if_out_qlen out of range, current: %v, lasttime: %v, tags: %v", ifStat.IfOutQLen, lastifStat.IfOutQLen, tags) } } } @@ -548,13 +548,13 @@ func (ins *Instance) gatherFlowMetrics(ips []string, slist *types.SampleList) { if limitCheck(IfHCInUcastPkts, ins.PktLimit) { slist.PushFront(types.NewSample(inputName, "if_in_pkts", IfHCInUcastPkts, tags)) } else { - log.Println("W! if_in_pkts out of range, current:", ifStat.IfHCInUcastPkts, "lasttime:", lastifStat.IfHCInUcastPkts, "tags:", tags) + klog.Warningf("if_in_pkts out of range, current: %v, lasttime: %v, tags: %v", ifStat.IfHCInUcastPkts, lastifStat.IfHCInUcastPkts, tags) } if limitCheck(IfHCOutUcastPkts, ins.PktLimit) { slist.PushFront(types.NewSample(inputName, "if_out_pkts", IfHCOutUcastPkts, tags)) } else { - log.Println("W! if_out_pkts out of range, current:", ifStat.IfHCOutUcastPkts, "lasttime:", lastifStat.IfHCOutUcastPkts, "tags:", tags) + klog.Warningf("if_out_pkts out of range, current: %v, lasttime: %v, tags: %v", ifStat.IfHCOutUcastPkts, lastifStat.IfHCOutUcastPkts, tags) } } } @@ -584,11 +584,11 @@ func (ins *Instance) ifstat(wg *sync.WaitGroup, sema *semaphore.Semaphore, ip st } if ins.DebugMod { - log.Println("D! switch gather ifstat, ip:", ip, "use:", time.Since(start)) + klog.V(1).InfoS("switch gather ifstat", "ip", ip, "duration", time.Since(start)) } if err != nil { - log.Println("E! failed to gather ifstat, ip:", ip, "error:", err) + klog.ErrorS(err, "failed to gather ifstat", "ip", ip) return } @@ -632,7 +632,7 @@ func (ins *Instance) gatherPing(ips []string, slist *types.SampleList) []string } } - log.Println("I! switch alive ip count:", len(ips)) + klog.InfoS("switch alive ip count", "count", len(ips)) return ips } diff --git a/inputs/system/ps.go b/inputs/system/ps.go index e36076f13..b05065f80 100644 --- a/inputs/system/ps.go +++ b/inputs/system/ps.go @@ -1,7 +1,6 @@ package system import ( - "log" "os" "path/filepath" "strings" @@ -11,6 +10,7 @@ import ( "github.com/shirou/gopsutil/v3/host" "github.com/shirou/gopsutil/v3/mem" "github.com/shirou/gopsutil/v3/net" + "k8s.io/klog/v2" ) type PS interface { @@ -155,7 +155,7 @@ func (s *SystemPS) DiskUsage( } du, err := s.PSDiskUsage(mountpoint) if err != nil { - log.Println("E! failed to get disk usage, mountpoint:", mountpoint, "error:", err) + klog.ErrorS(err, "failed to get disk usage", "mountpoint", mountpoint) dun.DeviceError = 1 du = &disk.UsageStat{} } diff --git a/inputs/system/system.go b/inputs/system/system.go index 0a4de899b..7538e81c4 100644 --- a/inputs/system/system.go +++ b/inputs/system/system.go @@ -1,7 +1,6 @@ package system import ( - "log" "os" "strings" @@ -11,6 +10,7 @@ import ( "github.com/shirou/gopsutil/v3/cpu" "github.com/shirou/gopsutil/v3/host" "github.com/shirou/gopsutil/v3/load" + "k8s.io/klog/v2" ) const inputName = "system" @@ -37,13 +37,13 @@ func (s *SystemStats) Name() string { func (s *SystemStats) Gather(slist *types.SampleList) { loadavg, err := load.Avg() if err != nil && !strings.Contains(err.Error(), "not implemented") { - log.Println("E! failed to gather system load:", err) + klog.ErrorS(err, "failed to gather system load") return } numCPUs, err := cpu.Counts(true) if err != nil { - log.Println("E! failed to gather cpu number:", err) + klog.ErrorS(err, "failed to gather cpu number") return } @@ -59,7 +59,7 @@ func (s *SystemStats) Gather(slist *types.SampleList) { uptime, err := host.Uptime() if err != nil { - log.Println("E! failed to get host uptime:", err) + klog.ErrorS(err, "failed to get host uptime") } else { fields["uptime"] = uptime } @@ -69,15 +69,15 @@ func (s *SystemStats) Gather(slist *types.SampleList) { if err == nil { fields["n_users"] = len(users) } else if os.IsNotExist(err) { - log.Println("W! reading os users:", err) + klog.Warningf("reading os users: %v", err) } else if os.IsPermission(err) { - log.Println("W! reading os users:", err) + klog.Warningf("reading os users: %v", err) } } hostInfo, err := host.Info() if err != nil { - log.Println("E! failed to gather host info:", err) + klog.ErrorS(err, "failed to gather host info") } else { slist.PushSample(inputName, "info", 1, map[string]string{ "kernel_version": hostInfo.KernelVersion, diff --git a/inputs/systemd/systemd_linux.go b/inputs/systemd/systemd_linux.go index a99db4e73..82e6e27d8 100644 --- a/inputs/systemd/systemd_linux.go +++ b/inputs/systemd/systemd_linux.go @@ -19,7 +19,6 @@ package systemd import ( "context" "fmt" - "log" "math" "regexp" "strconv" @@ -30,6 +29,7 @@ import ( "github.com/coreos/go-systemd/v22/dbus" "flashcat.cloud/categraf/types" + "k8s.io/klog/v2" ) // Init returns a new Collector exposing systemd statistics. @@ -68,13 +68,13 @@ func (s *Systemd) Gather(slist *types.SampleList) { systemdVersion, systemdVersionFull := s.getSystemdVersion() if systemdVersion < minSystemdVersionSystemState { - log.Println("msg", "Detected systemd version is lower than minimum, some systemd state and timer metrics will not be available", "current", systemdVersion, "minimum", minSystemdVersionSystemState) + klog.Warningf("detected systemd version is lower than minimum, some systemd state and timer metrics will not be available; current=%d minimum=%d", systemdVersion, minSystemdVersionSystemState) } slist.PushSample(inputName, "version", systemdVersion, map[string]string{"version": systemdVersionFull}) allUnits, err := s.getAllUnits() if err != nil { - log.Println("E! couldn't get units: %w", err) + klog.ErrorS(err, "couldn't get systemd units") return } @@ -82,13 +82,13 @@ func (s *Systemd) Gather(slist *types.SampleList) { summary := summarizeUnits(allUnits) s.collectSummaryMetrics(slist, summary) if s.DebugMod { - log.Println("D!", "collectSummaryMetrics took", "duration_seconds", time.Since(begin).Seconds()) + klog.V(1).InfoS("collectSummaryMetrics took", "duration_seconds", time.Since(begin).Seconds()) } begin = time.Now() units := filterUnits(allUnits, s.unitIncludePattern, s.unitExcludePattern) if s.DebugMod { - log.Println("D!", "filterUnits took", "duration_seconds", time.Since(begin).Seconds()) + klog.V(1).InfoS("filterUnits took", "duration_seconds", time.Since(begin).Seconds()) } var wg sync.WaitGroup @@ -140,7 +140,7 @@ func (s *Systemd) Gather(slist *types.SampleList) { err = s.collectSystemState(slist) } if err != nil { - log.Println("E! collect systemd state:", err) + klog.ErrorS(err, "collect systemd state") } } @@ -150,14 +150,14 @@ func (s *Systemd) collectUnitStatusMetrics(slist *types.SampleList, units []unit if strings.HasSuffix(unit.Name, ".service") { serviceTypeProperty, err := s.conn.GetUnitTypePropertyContext(context.TODO(), unit.Name, "Service", "Type") if err != nil { - log.Println("E!", "couldn't get unit type", "unit", unit.Name, "err", err) + klog.ErrorS(err, "couldn't get systemd unit type", "unit", unit.Name) } else { serviceType = serviceTypeProperty.Value.Value().(string) } } else if strings.HasSuffix(unit.Name, ".mount") { serviceTypeProperty, err := s.conn.GetUnitTypePropertyContext(context.TODO(), unit.Name, "Mount", "Type") if err != nil { - log.Println("E!", "couldn't get unit type", "unit", unit.Name, "err", err) + klog.ErrorS(err, "couldn't get systemd unit type", "unit", unit.Name) } else { serviceType = serviceTypeProperty.Value.Value().(string) } @@ -174,7 +174,7 @@ func (s *Systemd) collectUnitStatusMetrics(slist *types.SampleList, units []unit // NRestarts wasn't added until systemd 235. restartsCount, err := s.conn.GetUnitTypePropertyContext(context.TODO(), unit.Name, "Service", "NRestarts") if err != nil { - log.Println("E!", "couldn't get unit NRestarts", "unit", unit.Name, "err", err) + klog.ErrorS(err, "couldn't get systemd unit NRestarts", "unit", unit.Name) } else { slist.PushSample(inputName, "service_restart_total", restartsCount.Value.Value().(uint32), map[string]string{"name": unit.Name}) @@ -192,7 +192,7 @@ func (s *Systemd) collectSockets(slist *types.SampleList, units []unit) { acceptedConnectionCount, err := s.conn.GetUnitTypePropertyContext(context.TODO(), unit.Name, "Socket", "NAccepted") if err != nil { - log.Println("W!", "couldn't get unit NAccepted", "unit", unit.Name, "err", err) + klog.Warningf("couldn't get systemd unit NAccepted; unit=%s err=%v", unit.Name, err) continue } tag["name"] = unit.Name @@ -201,7 +201,7 @@ func (s *Systemd) collectSockets(slist *types.SampleList, units []unit) { currentConnectionCount, err := s.conn.GetUnitTypePropertyContext(context.TODO(), unit.Name, "Socket", "NConnections") if err != nil { - log.Println("W!", "couldn't get unit NConnections", "unit", unit.Name, "err", err) + klog.Warningf("couldn't get systemd unit NConnections; unit=%s err=%v", unit.Name, err) continue } slist.PushSample(inputName, "socket_current_connections", @@ -210,7 +210,7 @@ func (s *Systemd) collectSockets(slist *types.SampleList, units []unit) { // NRefused wasn't added until systemd 239. refusedConnectionCount, err := s.conn.GetUnitTypePropertyContext(context.TODO(), unit.Name, "Socket", "NRefused") if err != nil { - log.Printf("couldn't get unit '%s' NRefused: %s", unit.Name, err) + klog.Warningf("couldn't get systemd unit %q NRefused: %v", unit.Name, err) } else { slist.PushSample(inputName, "socket_refused_connections_total", refusedConnectionCount.Value.Value().(uint32), tag) @@ -228,7 +228,7 @@ func (s *Systemd) collectUnitStartTimeMetrics(slist *types.SampleList, units []u } else { timestampValue, err := s.conn.GetUnitPropertyContext(context.TODO(), unit.Name, "ActiveEnterTimestamp") if err != nil { - log.Println("W!", "couldn't get unit StartTimeUsec", "unit", unit.Name, "err", err) + klog.Warningf("couldn't get systemd unit StartTimeUsec; unit=%s err=%v", unit.Name, err) continue } startTimeUsec = timestampValue.Value.Value().(uint64) @@ -249,7 +249,7 @@ func (s *Systemd) collectUnitTasksMetrics(slist *types.SampleList, units []unit) if strings.HasSuffix(unit.Name, ".service") { tasksCurrentCount, err := s.conn.GetUnitTypePropertyContext(context.TODO(), unit.Name, "Service", "TasksCurrent") if err != nil { - log.Println("E!", "couldn't get unit TasksCurrent", "unit", unit.Name, "err", err) + klog.ErrorS(err, "couldn't get systemd unit TasksCurrent", "unit", unit.Name) } else { val = tasksCurrentCount.Value.Value().(uint64) // Don't set if tasksCurrent if dbus reports MaxUint64. @@ -259,7 +259,7 @@ func (s *Systemd) collectUnitTasksMetrics(slist *types.SampleList, units []unit) } tasksMaxCount, err := s.conn.GetUnitTypePropertyContext(context.TODO(), unit.Name, "Service", "TasksMax") if err != nil { - log.Println("E!", "couldn't get unit TasksMax", "unit", unit.Name, "err", err) + klog.ErrorS(err, "couldn't get systemd unit TasksMax", "unit", unit.Name) } else { val = tasksMaxCount.Value.Value().(uint64) // Don't set if tasksMax if dbus reports MaxUint64. @@ -281,7 +281,7 @@ func (s *Systemd) collectTimers(slist *types.SampleList, units []unit) { lastTriggerValue, err := s.conn.GetUnitTypePropertyContext(context.TODO(), unit.Name, "Timer", "LastTriggerUSec") if err != nil { - log.Println("W!", "couldn't get unit LastTriggerUSec", "unit", unit.Name, "err", err) + klog.Warningf("couldn't get systemd unit LastTriggerUSec; unit=%s err=%v", unit.Name, err) continue } diff --git a/inputs/tengine/tengine.go b/inputs/tengine/tengine.go index a21486d81..a5497dffa 100644 --- a/inputs/tengine/tengine.go +++ b/inputs/tengine/tengine.go @@ -5,7 +5,6 @@ import ( "errors" "fmt" "io" - "log" "net" "net/http" "net/url" @@ -18,6 +17,7 @@ import ( "flashcat.cloud/categraf/inputs" "flashcat.cloud/categraf/pkg/tls" "flashcat.cloud/categraf/types" + "k8s.io/klog/v2" ) const inputName = "tengine" @@ -136,14 +136,14 @@ func (ins *Instance) Gather(slist *types.SampleList) { for _, u := range ins.Urls { addr, err := url.Parse(u) if err != nil { - log.Println("E! failed to parse the url:", u, "error:", err) + klog.ErrorS(err, "failed to parse tengine url", "url", u) continue } wg.Add(1) go func(addr *url.URL) { defer wg.Done() if err := ins.gather(addr, slist); err != nil { - log.Println("E!", err) + klog.ErrorS(err, "failed to gather tengine metrics", "url", addr.String()) } }(addr) } @@ -153,7 +153,7 @@ func (ins *Instance) Gather(slist *types.SampleList) { func (ins *Instance) gather(addr *url.URL, slist *types.SampleList) error { if ins.DebugMod { - log.Println("D! tengine... url:", addr) + klog.V(1).InfoS("tengine gathering url", "url", addr.String()) } var tengineStatus TengineStatus diff --git a/inputs/tomcat/tomcat.go b/inputs/tomcat/tomcat.go index 81a52dc25..ba3486424 100644 --- a/inputs/tomcat/tomcat.go +++ b/inputs/tomcat/tomcat.go @@ -2,7 +2,6 @@ package tomcat import ( "encoding/xml" - "log" "net/http" "net/url" "strconv" @@ -12,6 +11,7 @@ import ( "flashcat.cloud/categraf/inputs" "flashcat.cloud/categraf/pkg/tls" "flashcat.cloud/categraf/types" + "k8s.io/klog/v2" ) const inputName = "tomcat" @@ -169,13 +169,13 @@ func (ins *Instance) Gather(slist *types.SampleList) { resp, err := ins.client.Do(ins.request) if err != nil { slist.PushFront(types.NewSample(inputName, "up", 0, tags)) - log.Println("E! failed to query tomcat url:", err) + klog.ErrorS(err, "failed to query tomcat url", "url", ins.URL) return } if resp.StatusCode != http.StatusOK { slist.PushFront(types.NewSample(inputName, "up", 0, tags)) - log.Println("E! received HTTP status code:", resp.StatusCode, "expected: 200") + klog.ErrorS(nil, "received unexpected HTTP status code", "url", ins.URL, "status_code", resp.StatusCode, "expected", 200) return } @@ -184,7 +184,7 @@ func (ins *Instance) Gather(slist *types.SampleList) { var status TomcatStatus if err := xml.NewDecoder(resp.Body).Decode(&status); err != nil { slist.PushFront(types.NewSample(inputName, "up", 0, tags)) - log.Println("E! failed to decode response body:", err) + klog.ErrorS(err, "failed to decode tomcat response body", "url", ins.URL) return } diff --git a/inputs/traffic_server/traffic_server.go b/inputs/traffic_server/traffic_server.go index 1a8fde914..08d3a9f05 100644 --- a/inputs/traffic_server/traffic_server.go +++ b/inputs/traffic_server/traffic_server.go @@ -4,7 +4,6 @@ import ( "encoding/json" "fmt" "io" - "log" "net/http" "net/url" "strconv" @@ -15,6 +14,7 @@ import ( "flashcat.cloud/categraf/config" "flashcat.cloud/categraf/inputs" "flashcat.cloud/categraf/types" + "k8s.io/klog/v2" ) const inputName = "traffic_server" @@ -108,7 +108,7 @@ type Data struct { func (ins *Instance) gather(slist *types.SampleList, target string) { if ins.DebugMod { - log.Println("D! traffic_server... target:", target) + klog.V(1).InfoS("traffic_server gathering target", "target", target) } labels := map[string]string{"target": target} @@ -117,7 +117,7 @@ func (ins *Instance) gather(slist *types.SampleList, target string) { err := ins.gatherJSONData(target, data) if err != nil { - log.Println("E! failed to gather json data:", err) + klog.ErrorS(err, "failed to gather traffic_server json data", "target", target) return } var fields = make(map[string]interface{}) diff --git a/inputs/vsphere/client.go b/inputs/vsphere/client.go index 129bfd38e..08ede31e4 100644 --- a/inputs/vsphere/client.go +++ b/inputs/vsphere/client.go @@ -4,7 +4,6 @@ import ( "context" "crypto/tls" "fmt" - "log" "net/url" "strconv" "strings" @@ -20,6 +19,7 @@ import ( "github.com/vmware/govmomi/vim25/methods" "github.com/vmware/govmomi/vim25/soap" "github.com/vmware/govmomi/vim25/types" + "k8s.io/klog/v2" ) // The highest number of metrics we can query for, no matter what settings @@ -96,7 +96,7 @@ func (cf *ClientFactory) testClient(ctx context.Context) error { ctx1, cancel1 := context.WithTimeout(ctx, time.Duration(cf.parent.Timeout)) defer cancel1() if _, err := methods.GetCurrentTime(ctx1, cf.client.Client); err != nil { - log.Println("I! Client session seems to have time out. Reauthenticating!") + klog.InfoS("vsphere client session timed out, reauthenticating", "vcenter", cf.vSphereURL.Host) ctx2, cancel2 := context.WithTimeout(ctx, time.Duration(cf.parent.Timeout)) defer cancel2() if err := cf.client.Client.SessionManager.Login(ctx2, url.UserPassword(cf.parent.Username, cf.parent.Password)); err != nil { @@ -124,7 +124,7 @@ func NewClient(ctx context.Context, vSphereURL *url.URL, vs *Instance) (*Client, vSphereURL.User = url.UserPassword(vs.Username, vs.Password) } if vs.DebugMod { - log.Println("D! Creating client: ", vSphereURL.Host) + klog.V(1).InfoS("creating vsphere client", "vcenter", vSphereURL.Host) } soapClient := soap.NewClient(vSphereURL, tlsCfg.InsecureSkipVerify) @@ -199,10 +199,10 @@ func NewClient(ctx context.Context, vSphereURL *url.URL, vs *Instance) (*Client, return nil, err } if vs.DebugMod { - log.Println("D! vCenter says max_query_metrics should be ", n) + klog.V(1).InfoS("vcenter reported max_query_metrics", "vcenter", vSphereURL.Host, "max_query_metrics", n) } if n < vs.MaxQueryMetrics { - log.Printf("W! Configured max_query_metrics is %d, but server limits it to %d. Reducing.", vs.MaxQueryMetrics, n) + klog.Warningf("configured max_query_metrics is %d, but server limits it to %d; reducing", vs.MaxQueryMetrics, n) vs.MaxQueryMetrics = n } return client, nil @@ -225,7 +225,7 @@ func (c *Client) close() { defer cancel() if c.Client != nil { if err := c.Client.Logout(ctx); err != nil { - log.Println("E! Logout: ", err.Error()) + klog.ErrorS(err, "vsphere logout failed") } } }) @@ -255,7 +255,7 @@ func (c *Client) GetMaxQueryMetrics(ctx context.Context) (int, error) { v, err := strconv.Atoi(s) if err == nil { if c.DebugMode { - log.Printf("D! vCenter maxQueryMetrics is defined: %d", v) + klog.V(1).InfoS("vcenter maxQueryMetrics is defined", "max_query_metrics", v) } if v == -1 { // Whatever the server says, we never ask for more metrics than this. @@ -267,17 +267,17 @@ func (c *Client) GetMaxQueryMetrics(ctx context.Context) (int, error) { // Fall through version-based inference if value isn't usable } } else { - log.Println("W! Option query for maxQueryMetrics failed. Using default") + klog.Warning("option query for maxQueryMetrics failed; using default") } // No usable maxQueryMetrics setting. Infer based on version ver := c.Client.Client.ServiceContent.About.Version parts := strings.Split(ver, ".") if len(parts) < 2 { - log.Printf("W! vCenter returned an invalid version string: %s. Using default query size=64", ver) + klog.Warningf("vCenter returned an invalid version string: %s; using default query size=64", ver) return 64, nil } - log.Println("I! vCenter version is: ", ver) + klog.InfoS("vcenter version detected", "version", ver) major, err := strconv.Atoi(parts[0]) if err != nil { return 0, err diff --git a/inputs/vsphere/endpoint.go b/inputs/vsphere/endpoint.go index 35a4e12bc..3cf6f9705 100644 --- a/inputs/vsphere/endpoint.go +++ b/inputs/vsphere/endpoint.go @@ -2,8 +2,6 @@ package vsphere import ( "context" - "fmt" - "log" "math" "math/rand" "net/url" @@ -18,6 +16,7 @@ import ( "github.com/vmware/govmomi/performance" "github.com/vmware/govmomi/vim25/mo" gtypes "github.com/vmware/govmomi/vim25/types" + "k8s.io/klog/v2" "flashcat.cloud/categraf/pkg/filter" "flashcat.cloud/categraf/types" @@ -255,7 +254,7 @@ func anythingEnabled(ex []string) bool { func newFilterOrPanic(include []string, exclude []string) filter.Filter { f, err := filter.NewIncludeExcludeFilter(include, exclude) if err != nil { - log.Println("E! Include/exclude filters are invalid: ", err) + klog.ErrorS(err, "include/exclude filters are invalid") return nil } return f @@ -281,10 +280,10 @@ func (e *Endpoint) startDiscovery(ctx context.Context) { case <-e.discoveryTicker.C: err := e.discover(ctx) if err != nil && err != context.Canceled { - log.Println(fmt.Sprintf("E! Discovery for %s: %s", e.URL.Host, err.Error())) + klog.ErrorS(err, "vsphere discovery failed", "host", e.URL.Host) } case <-ctx.Done(): - log.Println("D! Exiting discovery goroutine for ", e.URL.Host) + klog.V(1).InfoS("exiting discovery goroutine", "host", e.URL.Host) e.discoveryTicker.Stop() return } @@ -295,7 +294,7 @@ func (e *Endpoint) startDiscovery(ctx context.Context) { func (e *Endpoint) initalDiscovery(ctx context.Context) { err := e.discover(ctx) if err != nil && err != context.Canceled { - log.Println(fmt.Sprintf("E! Discovery for %s: %s", e.URL.Host, err.Error())) + klog.ErrorS(err, "initial vsphere discovery failed", "host", e.URL.Host) } e.startDiscovery(ctx) } @@ -310,14 +309,14 @@ func (e *Endpoint) init(ctx context.Context) error { if e.customAttrEnabled { fields, err := client.GetCustomFields(ctx) if err != nil { - log.Println("W! Could not load custom field metadata") + klog.Warning("could not load custom field metadata") } else { e.customFields = fields } } if time.Duration(e.Parent.ObjectDiscoveryInterval) > 0 { - log.Println("D! Running initial discovery") + klog.V(1).InfoS("running initial discovery", "host", e.URL.Host) e.initalDiscovery(ctx) } e.initialized = true @@ -387,7 +386,7 @@ func (e *Endpoint) getAncestorName(ctx context.Context, client *Client, resource defer cancel1() err := o.Properties(ctx1, here, []string{"parent", "name"}, &result) if err != nil { - log.Printf("W! Error while resolving parent. Assuming no parent exists. Error: %s", err.Error()) + klog.Warningf("error while resolving parent; assuming no parent exists: %s", err.Error()) return true } if result.Reference().Type == resourceType { @@ -397,7 +396,7 @@ func (e *Endpoint) getAncestorName(ctx context.Context, client *Client, resource } if result.Parent == nil { if e.debug() { - log.Printf("D! No parent found for %s (ascending from %s)", here.Reference(), r.Reference()) + klog.V(1).InfoS("no parent found while ascending", "current", here.Reference(), "origin", r.Reference()) } return true @@ -433,7 +432,7 @@ func (e *Endpoint) discover(ctx context.Context) error { return err } if e.debug() { - log.Printf("D! Discover new objects for %s", e.URL.Host) + klog.V(1).InfoS("discovering new objects", "host", e.URL.Host) } dcNameCache := make(map[string]string) @@ -443,7 +442,7 @@ func (e *Endpoint) discover(ctx context.Context) error { newObjects := make(map[string]objectMap) for k, res := range e.resourceKinds { if e.debug() { - log.Printf("D! Discovering resources for %s", res.name) + klog.V(1).InfoS("discovering resource kind", "resource", res.name) } // Need to do this for all resource types even if they are not enabled if res.enabled || k != "vm" { @@ -480,7 +479,7 @@ func (e *Endpoint) discover(ctx context.Context) error { } newObjects[k] = objects if e.debug() { - log.Printf("D! discovered_objects type is : %s and number is : %d ", res.name, int64(len(objects))) + klog.V(1).InfoS("discovered objects", "resource", res.name, "count", int64(len(objects))) } numRes += int64(len(objects)) } @@ -502,7 +501,7 @@ func (e *Endpoint) discover(ctx context.Context) error { if e.customAttrEnabled { fields, err = client.GetCustomFields(ctx) if err != nil { - log.Printf("W! Could not load custom field metadata ,Error: %s", err.Error()) + klog.Warningf("could not load custom field metadata: %s", err.Error()) fields = nil } } @@ -520,18 +519,18 @@ func (e *Endpoint) discover(ctx context.Context) error { e.customFields = fields } if e.debug() { - log.Printf("D! discovered_objects type is : %s and number is : %d ", "instance-total", numRes) + klog.V(1).InfoS("discovered objects", "resource", "instance-total", "count", numRes) } return nil } func (e *Endpoint) simpleMetadataSelect(ctx context.Context, client *Client, res *resourceKind) { if e.debug() { - log.Printf("D! Using fast metric metadata selection for %s", res.name) + klog.V(1).InfoS("using fast metric metadata selection", "resource", res.name) } m, err := client.CounterInfoByName(ctx) if err != nil { - log.Printf("E! Getting metric metadata. Discovery will be incomplete. Error: %s", err.Error()) + klog.ErrorS(err, "getting metric metadata failed; discovery will be incomplete", "resource", res.name) return } res.metrics = make(performance.MetricList, 0, len(res.include)) @@ -547,7 +546,7 @@ func (e *Endpoint) simpleMetadataSelect(ctx context.Context, client *Client, res } res.metrics = append(res.metrics, cnt) } else { - log.Printf("W! Metric name %s is unknown. Will not be collected", s) + klog.Warningf("metric name %s is unknown and will not be collected", s) } } } @@ -576,7 +575,7 @@ func (e *Endpoint) complexMetadataSelect(ctx context.Context, res *resourceKind, instInfoMux := sync.Mutex{} te, err := NewThrottledExecutor(e.Parent.DiscoverConcurrency) if err != nil { - log.Println("E! NewThrottledExecutor", err.Error()) + klog.ErrorS(err, "create throttled executor failed") return } for _, obj := range sampledObjects { @@ -584,7 +583,7 @@ func (e *Endpoint) complexMetadataSelect(ctx context.Context, res *resourceKind, te.Run(ctx, func() { metrics, err := e.getMetadata(ctx, obj, res.sampling) if err != nil { - log.Printf("E! Getting metric metadata. Discovery will be incomplete. Error: %s", err.Error()) + klog.ErrorS(err, "getting metric metadata failed; discovery will be incomplete", "resource", res.name, "object", obj.name) } mMap := make(map[string]gtypes.PerfMetricId) for _, m := range metrics { @@ -598,7 +597,7 @@ func (e *Endpoint) complexMetadataSelect(ctx context.Context, res *resourceKind, } } if e.debug() { - log.Printf("D!Found %d metrics for %s", len(mMap), obj.name) + klog.V(1).InfoS("found metrics for object", "count", len(mMap), "object", obj.name) } instInfoMux.Lock() defer instInfoMux.Unlock() @@ -622,7 +621,7 @@ func getDatacenters(ctx context.Context, e *Endpoint, resourceFilter *ResourceFi defer cancel1() err := resourceFilter.FindAll(ctx1, &resources) if err != nil { - log.Println("E! getDatacenters resourceFilter.FindAll Error:", err.Error()) + klog.ErrorS(err, "getDatacenters resourceFilter.FindAll error") return nil, err } m := make(objectMap, len(resources)) @@ -644,7 +643,7 @@ func getClusters(ctx context.Context, e *Endpoint, resourceFilter *ResourceFilte defer cancel1() err := resourceFilter.FindAll(ctx1, &resources) if err != nil { - log.Println("E! getClusters resourceFilter.FindAll Error:", err.Error()) + klog.ErrorS(err, "getClusters resourceFilter.FindAll error") return nil, err } cache := make(map[string]*gtypes.ManagedObjectReference) @@ -667,7 +666,7 @@ func getClusters(ctx context.Context, e *Endpoint, resourceFilter *ResourceFilte defer cancel3() err = o.Properties(ctx3, *r.Parent, []string{"parent"}, &folder) if err != nil { - log.Printf("W! Error while getting folder parent: %s", err.Error()) + klog.Warningf("error while getting folder parent: %s", err.Error()) p = nil } else { pp := folder.Parent.Reference() @@ -695,7 +694,7 @@ func getResourcePools(ctx context.Context, e *Endpoint, resourceFilter *Resource var resources []mo.ResourcePool err := resourceFilter.FindAll(ctx, &resources) if err != nil { - log.Println("E! getResourcePools resourceFilter.FindAll Error:", err.Error()) + klog.ErrorS(err, "getResourcePools resourceFilter.FindAll error") return nil, err } m := make(objectMap) @@ -725,7 +724,7 @@ func getHosts(ctx context.Context, e *Endpoint, resourceFilter *ResourceFilter) var resources []mo.HostSystem err := resourceFilter.FindAll(ctx, &resources) if err != nil { - log.Println("E! getHosts resourceFilter.FindAll Error:", err.Error()) + klog.ErrorS(err, "getHosts resourceFilter.FindAll error") return nil, err } m := make(objectMap) @@ -746,7 +745,7 @@ func getVMs(ctx context.Context, e *Endpoint, resourceFilter *ResourceFilter) (o defer cancel1() err := resourceFilter.FindAll(ctx1, &resources) if err != nil { - log.Println("E! getVMs resourceFilter.FindAll Error:", err.Error()) + klog.ErrorS(err, "getVMs resourceFilter.FindAll error") return nil, err } m := make(objectMap) @@ -826,7 +825,7 @@ func getVMs(ctx context.Context, e *Endpoint, resourceFilter *ResourceFilter) (o } key, ok := e.customFields[val.Key] if !ok { - log.Printf("W! Metadata for custom field %d not found. Skipping", val.Key) + klog.Warningf("metadata for custom field %d not found; skipping", val.Key) continue } if e.customAttrFilter.Match(key) { @@ -854,7 +853,7 @@ func getDatastores(ctx context.Context, e *Endpoint, resourceFilter *ResourceFil defer cancel1() err := resourceFilter.FindAll(ctx1, &resources) if err != nil { - log.Println("E! getDatastores resourceFilter.FindAll Error:", err.Error()) + klog.ErrorS(err, "getDatastores resourceFilter.FindAll error") return nil, err } m := make(objectMap) @@ -885,12 +884,12 @@ func (e *Endpoint) loadCustomAttributes(entity *mo.ManagedEntity) map[string]str for _, v := range entity.CustomValue { cv, ok := v.(*gtypes.CustomFieldStringValue) if !ok { - log.Printf("W! Metadata for custom field %d not of string type. Skipping", cv.Key) + klog.Warning("metadata for custom field is not of string type; skipping") continue } key, ok := e.customFields[cv.Key] if !ok { - log.Printf("W! Metadata for custom field %d not found. Skipping", cv.Key) + klog.Warningf("metadata for custom field %d not found; skipping", cv.Key) continue } if e.customAttrFilter.Match(key) { @@ -958,7 +957,7 @@ func submitChunkJob(ctx context.Context, te *ThrottledExecutor, job queryJob, pq func (e *Endpoint) chunkify(ctx context.Context, res *resourceKind, now time.Time, latest time.Time, job queryJob) { te, err := NewThrottledExecutor(e.Parent.CollectConcurrency) if err != nil { - log.Println("E! NewThrottledExecutor", err.Error()) + klog.ErrorS(err, "create throttled executor failed") return } maxMetrics := e.Parent.MaxQueryMetrics @@ -983,7 +982,7 @@ func (e *Endpoint) chunkify(ctx context.Context, res *resourceKind, now time.Tim metricName := e.getMetricNameForID(metric.CounterId) if metricName == "" { if e.debug() { - log.Printf("D! Unable to find metric name for id %d. Skipping!", metric.CounterId) + klog.V(1).InfoS("unable to find metric name for id; skipping", "counter_id", metric.CounterId) } continue } @@ -1015,8 +1014,12 @@ func (e *Endpoint) chunkify(ctx context.Context, res *resourceKind, now time.Tim // OR if we're past the absolute maximum limit if (!res.realTime && len(bucket.MetricId) >= maxMetrics) || len(bucket.MetricId) > maxRealtimeMetrics { if e.debug() { - log.Printf("D! Submitting partial query: %d metrics (%d remaining) of type %s for %s. Total objects %d", - len(bucket.MetricId), len(res.metrics)-metricIdx, res.name, e.URL.Host, len(res.objects)) + klog.V(1).InfoS("submitting partial query", + "metrics", len(bucket.MetricId), + "remaining", len(res.metrics)-metricIdx, + "resource", res.name, + "host", e.URL.Host, + "objects", len(res.objects)) } // Don't send work items if the context has been cancelled. @@ -1035,7 +1038,7 @@ func (e *Endpoint) chunkify(ctx context.Context, res *resourceKind, now time.Tim numQs += len(bucket.MetricId) if (!res.realTime && numQs > e.Parent.MaxQueryObjects) || numQs > maxRealtimeMetrics { if e.debug() { - log.Printf("D! Submitting final bucket job for %s: %d metrics", res.name, numQs) + klog.V(1).InfoS("submitting final bucket job", "resource", res.name, "metrics", numQs) } submitChunkJob(ctx, te, job, pqs) pqs = make(queryChunk, 0, e.Parent.MaxQueryObjects) @@ -1046,7 +1049,7 @@ func (e *Endpoint) chunkify(ctx context.Context, res *resourceKind, now time.Tim // Submit any jobs left in the queue if len(pqs) > 0 { if e.debug() { - log.Printf("D! Submitting job for %s: %d objects, %d metrics", res.name, len(pqs), numQs) + klog.V(1).InfoS("submitting job", "resource", res.name, "objects", len(pqs), "metrics", numQs) } submitChunkJob(ctx, te, job, pqs) } @@ -1059,15 +1062,15 @@ func (e *Endpoint) collectResource(ctx context.Context, resourceType string, sli res := e.resourceKinds[resourceType] client, err := e.clientFactory.GetClient(ctx) if e.debug() { - log.Printf("D! collectResource %s ", resourceType) + klog.V(1).InfoS("collecting resource", "resource", resourceType) } if err != nil { - log.Println("E! collectResource Error: ", err.Error()) + klog.ErrorS(err, "collectResource failed", "resource", resourceType) return err } now, err := client.GetServerTime(ctx) if err != nil { - log.Println("E! GetServerTime Error : ", err.Error()) + klog.ErrorS(err, "GetServerTime failed", "resource", resourceType) return err } @@ -1084,11 +1087,11 @@ func (e *Endpoint) collectResource(ctx context.Context, resourceType string, sli estInterval = s } if e.debug() { - log.Printf("D! resourceType %s Raw interval %s, padded: %s, estimated: %s", resourceType, rawInterval, paddedInterval, estInterval) + klog.V(1).InfoS("resource interval estimated", "resource", resourceType, "raw_interval", rawInterval, "padded_interval", paddedInterval, "estimated_interval", estInterval) } } if e.debug() { - log.Printf("D! resourceType %s Interval estimated to %s", resourceType, estInterval) + klog.V(1).InfoS("resource interval estimated", "resource", resourceType, "estimated_interval", estInterval) } res.lastColl = localNow @@ -1096,13 +1099,12 @@ func (e *Endpoint) collectResource(ctx context.Context, resourceType string, sli if !latest.IsZero() { elapsed := now.Sub(latest).Seconds() + 5.0 // Allow 5 second jitter. if e.debug() { - log.Printf("D! Latest: %s, elapsed: %f, resource: %s", latest, elapsed, resourceType) + klog.V(1).InfoS("latest sample state", "latest", latest, "elapsed", elapsed, "resource", resourceType) } if !res.realTime && elapsed < float64(res.sampling) { // No new data would be available. We're outta here! if e.debug() { - log.Printf("D! Sampling period for %s of %d has not elapsed on %s", - resourceType, res.sampling, e.URL.Host) + klog.V(1).InfoS("sampling period has not elapsed", "resource", resourceType, "sampling", res.sampling, "host", e.URL.Host) } return nil } @@ -1110,8 +1112,7 @@ func (e *Endpoint) collectResource(ctx context.Context, resourceType string, sli latest = now.Add(time.Duration(-res.sampling) * time.Second) } if e.debug() { - log.Printf("D! Collecting metrics for %d objects of type %s for %s", - len(res.objects), resourceType, e.URL.Host) + klog.V(1).InfoS("collecting metrics", "objects", len(res.objects), "resource", resourceType, "host", e.URL.Host) } count := int64(0) @@ -1125,12 +1126,12 @@ func (e *Endpoint) collectResource(ctx context.Context, resourceType string, sli n, localLatest, err := e.collectChunk(ctx, chunk, res, slist, estInterval) if err != nil { if e.debug() { - log.Printf("D! CollectChunk for %s returned %d metrics err: %s ", resourceType, n, err) + klog.V(1).InfoS("collectChunk returned error", "resource", resourceType, "metrics", n, "err", err) } return } if e.debug() { - log.Printf("D! CollectChunk for %s returned %d metrics", resourceType, n) + klog.V(1).InfoS("collectChunk completed", "resource", resourceType, "metrics", n) } atomic.AddInt64(&count, int64(n)) tsMux.Lock() @@ -1140,13 +1141,13 @@ func (e *Endpoint) collectResource(ctx context.Context, resourceType string, sli } }) if e.debug() { - log.Printf("D! Latest sample for %s set to %s", resourceType, latestSample) + klog.V(1).InfoS("latest sample updated", "resource", resourceType, "latest_sample", latestSample) } if !latestSample.IsZero() { res.latestSample = latestSample } if e.debug() { - log.Printf("discovered_objects type is : %s and number is : %d ", resourceType, count) + klog.V(1).InfoS("collected objects", "resource", resourceType, "count", count) } return nil @@ -1162,7 +1163,7 @@ func (e *Endpoint) alignSamples(info []gtypes.PerfSampleInfo, values []int64, in // data coming back with missing values. Take care of that gracefully! if idx >= len(values) { if e.debug() { - log.Printf("D! len(SampleInfo)>len(Value) %d > %d during alignment", len(info), len(values)) + klog.V(1).InfoS("sample/value length mismatch during alignment", "sample_info_len", len(info), "value_len", len(values)) } break } @@ -1194,14 +1195,14 @@ func (e *Endpoint) alignSamples(info []gtypes.PerfSampleInfo, values []int64, in func (e *Endpoint) collectChunk(ctx context.Context, pqs queryChunk, res *resourceKind, slist *types.SampleList, interval time.Duration) (int, time.Time, error) { if e.debug() { - log.Printf("D! Query for %s has %d QuerySpecs", res.name, len(pqs)) + klog.V(1).InfoS("query chunk specs", "resource", res.name, "query_specs", len(pqs)) } latestSample := time.Time{} count := 0 resourceType := res.name prefix := resourceType if e.debug() { - log.Printf("D! collectChunk for %s", resourceType) + klog.V(1).InfoS("collectChunk start", "resource", resourceType) } client, err := e.clientFactory.GetClient(ctx) if err != nil { @@ -1215,11 +1216,11 @@ func (e *Endpoint) collectChunk(ctx context.Context, pqs queryChunk, res *resour ems, err := client.QueryMetrics(ctx, pqs) if err != nil { - log.Printf("W! client.QueryMetrics for %s error is %s", resourceType, err.Error()) + klog.Warningf("client.QueryMetrics for %s error: %s", resourceType, err.Error()) return count, latestSample, err } if e.debug() { - log.Printf("D! Query for %s returned metrics for %d objects\r\n", resourceType, len(ems)) + klog.V(1).InfoS("query returned metrics", "resource", resourceType, "objects", len(ems)) } // Iterate through results @@ -1227,12 +1228,12 @@ func (e *Endpoint) collectChunk(ctx context.Context, pqs queryChunk, res *resour moid := em.Entity.Reference().Value instInfo, found := res.objects[moid] if !found { - log.Printf("E! MOID %s not found in cache. Skipping! (This should not happen!)\r\n", moid) + klog.ErrorS(nil, "MOID not found in cache; skipping", "moid", moid) continue } buckets := make(map[string]metricEntry) if e.debug() { - log.Printf("D! Query for %s em.Value len is %d \r\n", resourceType, len(em.Value)) + klog.V(1).InfoS("query returned value count", "resource", resourceType, "value_len", len(em.Value)) } for _, v := range em.Value { name := v.Name @@ -1245,7 +1246,7 @@ func (e *Endpoint) collectChunk(ctx context.Context, pqs queryChunk, res *resour // Populate tags objectRef, ok := res.objects[moid] if !ok { - log.Printf("E! MOID %s not found in cache. Skipping", moid) + klog.ErrorS(nil, "MOID not found in cache; skipping", "moid", moid) continue } e.populateTags(objectRef, resourceType, res, t, &v) @@ -1253,14 +1254,14 @@ func (e *Endpoint) collectChunk(ctx context.Context, pqs queryChunk, res *resour nValues := 0 alignedInfo, alignedValues := e.alignSamples(em.SampleInfo, v.Value, interval) if e.debug() { - log.Printf("D! Query for %s alignedInfo len is %d \r\n", resourceType, len(alignedInfo)) + klog.V(1).InfoS("aligned sample info count", "resource", resourceType, "aligned_info_len", len(alignedInfo)) } for idx, sample := range alignedInfo { // According to the docs, SampleInfo and Value should have the same length, but we've seen corrupted // data coming back with missing values. Take care of that gracefully! if idx >= len(alignedValues) { if e.debug() { - log.Printf("D! Len(SampleInfo)>len(Value) %d > %d\r\n", len(alignedInfo), len(alignedValues)) + klog.V(1).InfoS("aligned sample/value length mismatch", "sample_info_len", len(alignedInfo), "value_len", len(alignedValues)) } break } @@ -1273,7 +1274,7 @@ func (e *Endpoint) collectChunk(ctx context.Context, pqs queryChunk, res *resour // Organize the metrics into a bucket per measurement. mn, fn := e.makeMetricIdentifier(prefix, name) if e.debug() { - log.Printf("D! makeMetricIdentifier: %s %s\r\n", prefix, name) + klog.V(1).InfoS("makeMetricIdentifier", "prefix", prefix, "name", name) } bKey := mn + " " + v.Instance + " " + strconv.FormatInt(ts.UnixNano(), 10) bucket, found := buckets[bKey] @@ -1285,7 +1286,7 @@ func (e *Endpoint) collectChunk(ctx context.Context, pqs queryChunk, res *resour // Percentage values must be scaled down by 100. info, ok := metricInfo[name] if !ok { - log.Printf("E! Could not determine unit for %s. Skipping\r\n", name) + klog.ErrorS(nil, "could not determine unit; skipping", "metric", name) } v := alignedValues[idx] if info.UnitInfo.GetElementDescription().Key == "percent" { @@ -1304,7 +1305,7 @@ func (e *Endpoint) collectChunk(ctx context.Context, pqs queryChunk, res *resour } if nValues == 0 { if e.debug() { - log.Printf("D! Missing value for: %s, %s", name, objectRef.name) + klog.V(1).InfoS("missing value", "metric", name, "object", objectRef.name) } continue } diff --git a/inputs/vsphere/finder.go b/inputs/vsphere/finder.go index 903c8b9aa..3ca8d59df 100644 --- a/inputs/vsphere/finder.go +++ b/inputs/vsphere/finder.go @@ -3,7 +3,6 @@ package vsphere import ( "context" "fmt" - "log" "reflect" "strings" @@ -11,6 +10,7 @@ import ( "github.com/vmware/govmomi/view" "github.com/vmware/govmomi/vim25/mo" "github.com/vmware/govmomi/vim25/types" + "k8s.io/klog/v2" ) var childTypes map[string][]string @@ -75,7 +75,7 @@ func (f *Finder) findResources(ctx context.Context, resType, path string, objs m if err != nil { return err } - log.Printf("I! Find(%s, %s) returned %d objects\r\n", resType, path, len(objs)) + klog.InfoS("vsphere find returned objects", "resource_type", resType, "path", path, "count", len(objs)) return nil } diff --git a/inputs/vsphere/tscache.go b/inputs/vsphere/tscache.go index 78d420977..24704a279 100644 --- a/inputs/vsphere/tscache.go +++ b/inputs/vsphere/tscache.go @@ -1,10 +1,10 @@ package vsphere import ( - "fmt" - "log" "sync" "time" + + "k8s.io/klog/v2" ) // TSCache is a cache of timestamps used to determine the validity of datapoints @@ -36,7 +36,7 @@ func (t *TSCache) Purge() { } } if t.debug { - log.Println(fmt.Sprintf("D! purged timestamp cache. %d deleted with %d remaining", n, len(t.table))) + klog.V(1).InfoS("purged vsphere timestamp cache", "deleted", n, "remaining", len(t.table)) } } diff --git a/inputs/vsphere/vsphere.go b/inputs/vsphere/vsphere.go index 9a507bfb7..8cecf6144 100644 --- a/inputs/vsphere/vsphere.go +++ b/inputs/vsphere/vsphere.go @@ -2,10 +2,10 @@ package vsphere import ( "context" - "log" "time" "github.com/vmware/govmomi/vim25/soap" + "k8s.io/klog/v2" "flashcat.cloud/categraf/config" "flashcat.cloud/categraf/inputs" @@ -173,12 +173,12 @@ func (ins *Instance) Init() error { // Create endpoints, one for each vCenter we're monitoring u, err := soap.ParseURL(ins.Vcenter) if err != nil { - log.Println("E! soap.ParseURL", err) + klog.ErrorS(err, "soap.ParseURL failed", "vcenter", ins.Vcenter) return err } ep, err := NewEndpoint(ctx, ins, u) if err != nil { - log.Println("E! NewEndpoint", err) + klog.ErrorS(err, "create vsphere endpoint failed", "vcenter", ins.Vcenter) return err } ins.endpoints = ep @@ -186,7 +186,7 @@ func (ins *Instance) Init() error { } func (v *Instance) Drop() { - log.Printf("I! Stopping plugin") + klog.InfoS("stopping vsphere plugin") v.cancel() // Wait for all endpoints to finish. No need to wait for @@ -195,7 +195,7 @@ func (v *Instance) Drop() { // wait for any discovery to complete by trying to grab the // "busy" mutex. if v.DebugMod { - log.Printf("D! Waiting for endpoint %q to finish", v.endpoints.URL.Host) + klog.V(1).InfoS("waiting for endpoint to finish", "endpoint", v.endpoints.URL.Host) } func() { v.endpoints.busy.Lock() // Wait until discovery is finished @@ -214,7 +214,7 @@ func (ins *Instance) Gather(slist *types.SampleList) { } if err != nil { // acc.AddError(err) - log.Println("E! fail to gather: ", err) + klog.ErrorS(err, "vsphere gather failed") } } diff --git a/inputs/whois/whois.go b/inputs/whois/whois.go index 82b84de2c..77ff7b3a5 100644 --- a/inputs/whois/whois.go +++ b/inputs/whois/whois.go @@ -1,7 +1,6 @@ package whois import ( - "log" "sync" "time" @@ -12,6 +11,7 @@ import ( "flashcat.cloud/categraf/config" "flashcat.cloud/categraf/inputs" "flashcat.cloud/categraf/types" + "k8s.io/klog/v2" ) const inputName = "whois" @@ -142,7 +142,7 @@ func (ins *Instance) queryDomain(domain string, slist *types.SampleList) { if retry > 0 { sleepTime := time.Duration(retry) * 3 * time.Second time.Sleep(sleepTime) - log.Println("W! Retrying", retry+1, "of", maxRetries, "for domain", domain) + klog.Warningf("retrying whois query: attempt=%d max_retries=%d domain=%s", retry+1, maxRetries, domain) } result, err = ins.client.Whois(domain, ins.Server) @@ -150,18 +150,18 @@ func (ins *Instance) queryDomain(domain string, slist *types.SampleList) { break } - log.Println("W! query", domain, "attempt", retry+1, "failed:", err) + klog.Warningf("whois query attempt failed: domain=%s attempt=%d err=%v", domain, retry+1, err) } if err != nil { - log.Println("E! query", ins.Domain, "domain information failed:", err) + klog.ErrorS(err, "failed to query domain information", "domain", domain) return } // 使用 whois-parser 解析结果 parsedResult, err := whoisparser.Parse(result) if err != nil { - log.Println("E! parse", ins.Domain, "domain whois result failure:", err) + klog.ErrorS(err, "failed to parse whois result", "domain", domain) return } @@ -172,12 +172,12 @@ func (ins *Instance) queryDomain(domain string, slist *types.SampleList) { if parsedResult.Domain.CreatedDate != "" { CreatedDate, err = ParseTimeToUTCTimestamp(parsedResult.Domain.CreatedDate) if err != nil { - log.Println("E! parsing creation time:", parsedResult.Domain.CreatedDate, "time string failure:", err) + klog.ErrorS(err, "failed to parse domain creation time", "domain", domain, "time", parsedResult.Domain.CreatedDate) return } fields["domain_createddate"] = CreatedDate } else { - log.Println("E! creation time is null") + klog.ErrorS(nil, "domain creation time is empty", "domain", domain) return } @@ -185,22 +185,22 @@ func (ins *Instance) queryDomain(domain string, slist *types.SampleList) { if parsedResult.Domain.UpdatedDate != "" { UpdatedDate, err = ParseTimeToUTCTimestamp(parsedResult.Domain.UpdatedDate) if err != nil { - log.Println("E! parsing update time:", parsedResult.Domain.UpdatedDate, "time string failure:", err) + klog.ErrorS(err, "failed to parse domain update time", "domain", domain, "time", parsedResult.Domain.UpdatedDate) } fields["domain_updateddate"] = UpdatedDate } else { - log.Println("W! update time is null") + klog.Warningf("domain update time is empty: domain=%s", domain) } if parsedResult.Domain.ExpirationDate != "" { ExpirationDate, err = ParseTimeToUTCTimestamp(parsedResult.Domain.ExpirationDate) if err != nil { - log.Println("E! parsing expiration time:", parsedResult.Domain.ExpirationDate, "time string failure:", err) + klog.ErrorS(err, "failed to parse domain expiration time", "domain", domain, "time", parsedResult.Domain.ExpirationDate) return } fields["domain_expirationdate"] = ExpirationDate } else { - log.Println("E! expiration time is null") + klog.ErrorS(nil, "domain expiration time is empty", "domain", domain) return } @@ -218,7 +218,7 @@ func (ins *Instance) queryDomain(domain string, slist *types.SampleList) { slist.PushSamples(inputName, fields, tags) } else { - log.Println("E! creation、update、expiration time is all null") + klog.ErrorS(nil, "domain creation, update, and expiration times are all empty", "domain", domain) return } diff --git a/inputs/x509_cert/x509_cert.go b/inputs/x509_cert/x509_cert.go index 7a4ee6bc0..fccc56d38 100644 --- a/inputs/x509_cert/x509_cert.go +++ b/inputs/x509_cert/x509_cert.go @@ -8,7 +8,6 @@ import ( "encoding/pem" "errors" "fmt" - "log" "net" "net/http" "net/smtp" @@ -29,6 +28,7 @@ import ( "flashcat.cloud/categraf/pkg/proxy" commontls "flashcat.cloud/categraf/pkg/tls" "flashcat.cloud/categraf/types" + "k8s.io/klog/v2" ) const inputName = "x509_cert" @@ -126,7 +126,7 @@ func (ins *Instance) Gather(slist *types.SampleList) { } if err := ins.sourcesToURLs(); err != nil { - log.Printf("E! failed to update sources: %v", err) + klog.ErrorS(err, "failed to update x509 certificate sources") return } @@ -135,7 +135,7 @@ func (ins *Instance) Gather(slist *types.SampleList) { for _, location := range collectedUrls { certs, ocspresp, err := ins.getCert(location, time.Duration(ins.Timeout)) if err != nil { - log.Printf("E! cannot get SSL cert %q: %v", location, err) + klog.ErrorS(err, "failed to get SSL certificate", "target", location.String()) continue } @@ -238,16 +238,19 @@ func (ins *Instance) processCertificate(cert *x509.Certificate, opts x509.Verify chains, err := cert.Verify(opts) if err != nil { if ins.DebugMod { - log.Printf("W! Invalid certificate %v: %v", cert.SerialNumber.Text(16), err) - log.Printf("W! cert DNS names: %v", cert.DNSNames) - log.Printf("W! cert IP addresses: %v", cert.IPAddresses) - log.Printf("W! cert subject: %v", cert.Subject) - log.Printf("W! cert issuer: %v", cert.Issuer) - log.Printf("W! opts.DNSName: %v", opts.DNSName) - log.Printf("W! verify options: %v", opts) - log.Printf("W! verify error: %v", err) - log.Printf("W! tlsCfg.ServerName: %v", ins.tlsCfg.ServerName) - log.Printf("W! ServerName: %v", ins.ServerName) + klog.V(1).InfoS( + "invalid x509 certificate during verification", + "serial_number", cert.SerialNumber.Text(16), + "dns_names", cert.DNSNames, + "ip_addresses", cert.IPAddresses, + "subject", cert.Subject, + "issuer", cert.Issuer, + "verify_dns_name", opts.DNSName, + "verify_options", opts, + "tls_server_name", ins.tlsCfg.ServerName, + "server_name", ins.ServerName, + "error", err, + ) } } @@ -534,7 +537,7 @@ func (ins *Instance) collectCertURLs() []*url.URL { for _, path := range ins.globPaths { files := path.Match() if len(files) == 0 { - log.Println("W! could not find file:", path.GetRoots()) + klog.Warningf("could not find file: %v", path.GetRoots()) continue } for _, file := range files { diff --git a/inputs/xskyapi/xskyapi.go b/inputs/xskyapi/xskyapi.go index b9e862fa5..1a225628c 100644 --- a/inputs/xskyapi/xskyapi.go +++ b/inputs/xskyapi/xskyapi.go @@ -5,7 +5,6 @@ import ( "encoding/json" "fmt" "io" - "log" "net" "net/http" "net/url" @@ -18,6 +17,7 @@ import ( "flashcat.cloud/categraf/inputs" "flashcat.cloud/categraf/pkg/tls" "flashcat.cloud/categraf/types" + "k8s.io/klog/v2" ) const inputName = "xskyapi" @@ -153,7 +153,7 @@ func (ins *Instance) Gather(slist *types.SampleList) { func (ins *Instance) gather(slist *types.SampleList, server string, token string) { if ins.DebugMod { - log.Println("D! xskyapi... server:", server) + klog.V(1).InfoS("xskyapi gather server", "server", server) } pageSize := 500 @@ -172,7 +172,7 @@ func (ins *Instance) gather(slist *types.SampleList, server string, token string for { resp, _, err := ins.sendRequest(currentUrl, token, offset, pageSize) if err != nil { - log.Println("E! failed to send request to xskyapi url:", currentUrl, "error:", err) + klog.ErrorS(err, "failed to send request to xskyapi url", "url", currentUrl) } err = json.Unmarshal(resp, &osUsers) @@ -205,7 +205,7 @@ func (ins *Instance) gather(slist *types.SampleList, server string, token string for { resp, _, err := ins.sendRequest(currentUrl, token, offset, pageSize) if err != nil { - log.Println("E! failed to send request to xskyapi url:", currentUrl, "error:", err) + klog.ErrorS(err, "failed to send request to xskyapi url", "url", currentUrl) } err = json.Unmarshal(resp, &osBuckets) if err != nil { @@ -238,7 +238,7 @@ func (ins *Instance) gather(slist *types.SampleList, server string, token string for { resp, _, err := ins.sendRequest(currentUrl, token, offset, pageSize) if err != nil { - log.Println("E! failed to send request to xskyapi url:", currentUrl, "error:", err) + klog.ErrorS(err, "failed to send request to xskyapi url", "url", currentUrl) } er := json.Unmarshal(resp, &dfsQuotas) if er != nil { @@ -270,7 +270,7 @@ func (ins *Instance) gather(slist *types.SampleList, server string, token string for { resp, _, err := ins.sendRequest(currentUrl, token, offset, pageSize) if err != nil { - log.Println("E! failed to send request to xskyapi url:", currentUrl, "error:", err) + klog.ErrorS(err, "failed to send request to xskyapi url", "url", currentUrl) } er := json.Unmarshal(resp, &blockVolumes) if er != nil { @@ -301,7 +301,7 @@ func (ins *Instance) gather(slist *types.SampleList, server string, token string for { resp, _, err := ins.sendRequest(currentUrl, token, offset, pageSize) if err != nil { - log.Println("E! failed to send request to xskyapi url:", currentUrl, "error:", err) + klog.ErrorS(err, "failed to send request to xskyapi url", "url", currentUrl) } er := json.Unmarshal(resp, &fsFolders) @@ -332,7 +332,7 @@ func (ins *Instance) gather(slist *types.SampleList, server string, token string for { resp, _, err := ins.sendRequest(currentUrl, token, offset, pageSize) if err != nil { - log.Println("E! failed to send request to xskyapi url:", currentUrl, "error:", err) + klog.ErrorS(err, "failed to send request to xskyapi url", "url", currentUrl) } err = json.Unmarshal(resp, &blockVolumes) @@ -340,8 +340,6 @@ func (ins *Instance) gather(slist *types.SampleList, server string, token string fmt.Printf("Parsing JSON string exception:%s\n", err) } - // log.Println("D! len(OsUsers):", len(osUsers.OsUser)) - for _, blockVolume := range blockVolumes.BlockVolume { labels["name"] = blockVolume.Name labels["id"] = strconv.Itoa(blockVolume.ID) @@ -355,7 +353,7 @@ func (ins *Instance) gather(slist *types.SampleList, server string, token string } } default: - log.Printf("E! dss_type %s not suppported, expected oss, gfs or eus", ins.DssType) + klog.ErrorS(nil, "dss_type not supported, expected oss, gfs or eus", "dss_type", ins.DssType) } } @@ -363,7 +361,7 @@ func (ins *Instance) sendRequest(serverURL string, token string, offset int, pag // Prepare URL requestURL, _ := url.Parse(serverURL) if ins.DebugMod { - log.Println("D! now parseurl:", requestURL) + klog.V(1).InfoS("xskyapi parsed url", "url", requestURL.String()) } // Prepare request query and body diff --git a/inputs/zookeeper/zookeeper.go b/inputs/zookeeper/zookeeper.go index 3da505afb..5d6efcdd9 100644 --- a/inputs/zookeeper/zookeeper.go +++ b/inputs/zookeeper/zookeeper.go @@ -4,9 +4,9 @@ import ( crypto_tls "crypto/tls" "fmt" "io" - "log" "net" "regexp" + "runtime/debug" "strconv" "strings" "sync" @@ -16,6 +16,7 @@ import ( "flashcat.cloud/categraf/inputs" "flashcat.cloud/categraf/pkg/tls" "flashcat.cloud/categraf/types" + "k8s.io/klog/v2" ) const ( @@ -116,7 +117,7 @@ func (ins *Instance) gatherOneHost(wg *sync.WaitGroup, slist *types.SampleList, defer wg.Done() defer func() { if r := recover(); r != nil { - log.Println("E! Recovered in zookeeper gatherOneHost ", zkHost, r) + klog.ErrorS(fmt.Errorf("panic: %v", r), "recovered in zookeeper gatherOneHost", "zk_host", zkHost, "stack", string(debug.Stack())) } }() @@ -133,7 +134,7 @@ func (ins *Instance) gatherOneHost(wg *sync.WaitGroup, slist *types.SampleList, mntrConn, err := ins.ZkConnect(zkHost) if err != nil { slist.PushFront(types.NewSample("", "zk_up", 0, tags)) - log.Println("E! failed to connect zookeeper:", zkHost, "error:", err) + klog.ErrorS(err, "failed to connect zookeeper", "zk_host", zkHost, "command", "mntr") return } @@ -147,7 +148,7 @@ func (ins *Instance) gatherOneHost(wg *sync.WaitGroup, slist *types.SampleList, ruokConn, err := ins.ZkConnect(zkHost) if err != nil { slist.PushFront(types.NewSample("", "zk_ruok", 0, tags)) - log.Println("E! failed to connect zookeeper:", zkHost, "error:", err) + klog.ErrorS(err, "failed to connect zookeeper", "zk_host", zkHost, "command", "ruok") return } @@ -160,7 +161,7 @@ func (ins *Instance) gatherOneHost(wg *sync.WaitGroup, slist *types.SampleList, srvrConn, err := ins.ZkConnect(zkHost) if err != nil { slist.PushFront(types.NewSample("", "zk_zxid", 0, tags)) - log.Println("E! failed to connect zookeeper:", zkHost, "error:", err) + klog.ErrorS(err, "failed to connect zookeeper", "zk_host", zkHost, "command", "srvr") return } @@ -180,7 +181,7 @@ func (ins *Instance) gatherMntrResult(conn net.Conn, slist *types.SampleList, gl // 'mntr' command isn't allowed in zk config, log as warning if strings.Contains(lines[0], cmdNotExecutedSffx) { slist.PushFront(types.NewSample("", "zk_up", 0, globalTags)) - log.Printf(commandNotAllowedTmpl, "mntr", conn.RemoteAddr().String()) + klog.Warningf(commandNotAllowedTmpl, "mntr", conn.RemoteAddr().String()) return } @@ -225,7 +226,7 @@ func (ins *Instance) gatherMntrResult(conn net.Conn, slist *types.SampleList, gl var k string if !isDigit(value) { - log.Printf("warning: skipping metric %q which holds not-digit value: %q", key, value) + klog.Warningf("skipping zookeeper metric %q which holds non-digit value: %q", key, value) continue } k = metricNameReplacer.Replace(key) @@ -245,7 +246,7 @@ func (ins *Instance) gatherRuokResult(conn net.Conn, slist *types.SampleList, gl slist.PushFront(types.NewSample("", "zk_ruok", 1, globalTags)) } else { if strings.Contains(res, cmdNotExecutedSffx) { - log.Printf(commandNotAllowedTmpl, "ruok", conn.RemoteAddr().String()) + klog.Warningf(commandNotAllowedTmpl, "ruok", conn.RemoteAddr().String()) } slist.PushFront(types.NewSample("", "zk_ruok", 0, globalTags)) } @@ -254,7 +255,7 @@ func (ins *Instance) gatherRuokResult(conn net.Conn, slist *types.SampleList, gl func (ins *Instance) gatherSrvrResult(conn net.Conn, slist *types.SampleList, globalTags map[string]string) { res := sendZookeeperCmd(conn, "srvr") if strings.Contains(res, cmdNotExecutedSffx) { - log.Printf(commandNotAllowedTmpl, "srvr", conn.RemoteAddr().String()) + klog.Warningf(commandNotAllowedTmpl, "srvr", conn.RemoteAddr().String()) slist.PushFront(types.NewSample("", "zk_zxid", 0, globalTags)) return } @@ -267,7 +268,7 @@ func (ins *Instance) gatherSrvrResult(conn net.Conn, slist *types.SampleList, gl zxidStr := strings.TrimSpace(strings.Split(l, ":")[1]) zxid, err := strconv.ParseUint(zxidStr, 0, 64) if err != nil { - log.Printf("E! failed to parse zxid: %s", err) + klog.ErrorS(err, "failed to parse zookeeper zxid", "remote_addr", conn.RemoteAddr().String(), "value", zxidStr) return } low4Bytes := zxid & 0xFFFFFFFF @@ -278,13 +279,13 @@ func (ins *Instance) gatherSrvrResult(conn net.Conn, slist *types.SampleList, gl func sendZookeeperCmd(conn net.Conn, cmd string) string { _, err := conn.Write([]byte(cmd)) if err != nil { - log.Printf("E! failed to exec Zookeeper command: %s response from '%s': %s", cmd, conn.RemoteAddr().String(), err) + klog.ErrorS(err, "failed to exec zookeeper command", "command", cmd, "remote_addr", conn.RemoteAddr().String()) return "" } res, err := io.ReadAll(conn) if err != nil { - log.Printf("E! failed read Zookeeper command: '%s' response from '%s': %s", cmd, conn.RemoteAddr().String(), err) + klog.ErrorS(err, "failed to read zookeeper command response", "command", cmd, "remote_addr", conn.RemoteAddr().String()) return "" } return string(res) diff --git a/logs/auditor/auditor.go b/logs/auditor/auditor.go index 1f30720a5..4a3585807 100644 --- a/logs/auditor/auditor.go +++ b/logs/auditor/auditor.go @@ -9,7 +9,6 @@ package auditor import ( "encoding/json" - "log" "os" "path/filepath" "sync" @@ -17,6 +16,7 @@ import ( "flashcat.cloud/categraf/config" "flashcat.cloud/categraf/logs/message" + "k8s.io/klog/v2" ) // DefaultRegistryFilename is the default registry filename @@ -88,7 +88,7 @@ func (a *RegistryAuditor) Stop() { a.closeChannels() a.cleanupRegistry() if err := a.flushRegistry(); err != nil { - log.Println("W!", err) + klog.Warning(err) } } @@ -171,10 +171,10 @@ func (a *RegistryAuditor) run() { if err != nil { if os.IsPermission(err) || os.IsNotExist(err) { fileError.Do(func() { - log.Println("W!", err) + klog.Warning(err) }) } else { - log.Println("E!", err) + klog.Error(err) } } } @@ -186,15 +186,15 @@ func (a *RegistryAuditor) recoverRegistry() map[string]*RegistryEntry { mr, err := os.ReadFile(a.registryPath) if err != nil { if os.IsNotExist(err) { - log.Printf("I! Could not find state file at %q, will start with default offsets", a.registryPath) + klog.Infof("Could not find state file at %q, will start with default offsets", a.registryPath) } else { - log.Println("E!", err) + klog.Error(err) } return make(map[string]*RegistryEntry) } r, err := a.unmarshalRegistry(mr) if err != nil { - log.Println("E!", err) + klog.Error(err) return make(map[string]*RegistryEntry) } return r diff --git a/logs/client/http/destination.go b/logs/client/http/destination.go index da1ec2662..7fc006ee9 100644 --- a/logs/client/http/destination.go +++ b/logs/client/http/destination.go @@ -8,7 +8,6 @@ import ( "errors" "fmt" "io" - "log" "net/http" "net/url" "sync" @@ -19,6 +18,7 @@ import ( "flashcat.cloud/categraf/logs/client" "flashcat.cloud/categraf/pkg/backoff" httputils "flashcat.cloud/categraf/pkg/httpx" + "k8s.io/klog/v2" ) // ContentType options, @@ -108,7 +108,7 @@ func errorToTag(err error) string { // the error returned can be retryable and it is the responsibility of the callee to retry. func (d *Destination) Send(payload []byte) error { if d.blockedUntil.After(time.Now()) { - // log.Printf("%s: sleeping until %v before retrying\n", d.url, d.blockedUntil) + // wait until the backoff window expires before retrying d.waitForBackoff() } @@ -171,7 +171,7 @@ func (d *Destination) unconditionalSend(payload []byte) (err error) { return err } if resp.StatusCode >= 400 { - log.Printf("W! failed to post http payload. code=%d host=%s response=%s\n", resp.StatusCode, d.host, string(response)) + klog.Warningf("failed to post http payload. code=%d host=%s response=%s", resp.StatusCode, d.host, string(response)) } if resp.StatusCode == 429 || resp.StatusCode >= 500 { // the server could not serve the request, most likely because of an @@ -265,18 +265,18 @@ func buildContentEncoding(endpoint logsconfig.Endpoint) ContentEncoding { // CheckConnectivity check if sending logs through HTTP works func CheckConnectivity(endpoint logsconfig.Endpoint) logsconfig.HTTPConnectivity { - log.Println("I! Checking HTTP connectivity...") + klog.Info("Checking HTTP connectivity...") ctx := client.NewDestinationsContext() ctx.Start() defer ctx.Stop() // Lower the timeout to 5s because HTTP connectivity test is done synchronously during the agent bootstrap sequence destination := newDestination(endpoint, JSONContentType, ctx, time.Second*5, 0) - log.Println("I! Sending HTTP connectivity request to", destination.url) + klog.Info("Sending HTTP connectivity request to", destination.url) err := destination.unconditionalSend(emptyPayload) if err != nil { - log.Println("E! HTTP connectivity failure:", err) + klog.ErrorS(err, "HTTP connectivity failure") } else { - log.Println("HTTP connectivity successful") + klog.Info("HTTP connectivity successful") } return err == nil } diff --git a/logs/client/kafka/destination.go b/logs/client/kafka/destination.go index a5480b2ed..d1c1b3ffa 100644 --- a/logs/client/kafka/destination.go +++ b/logs/client/kafka/destination.go @@ -5,7 +5,6 @@ package kafka import ( "context" "errors" - "log" "strings" "sync" "time" @@ -18,6 +17,7 @@ import ( "flashcat.cloud/categraf/logs/client" "flashcat.cloud/categraf/logs/util" "flashcat.cloud/categraf/pkg/backoff" + "k8s.io/klog/v2" ) // ContentType options, @@ -119,7 +119,7 @@ func newDestination(endpoint logsconfig.Endpoint, contentType string, destinatio typ = AsyncProducer } if util.Debug() { - log.Println("D! producer type:", typ, coreconfig.Config.Logs.ChannelBufferSize, coreconfig.Config.Logs.Net.MaxOpenRequests) + klog.V(1).Info("producer type:", typ, coreconfig.Config.Logs.ChannelBufferSize, coreconfig.Config.Logs.Net.MaxOpenRequests) } coreconfig.Config.Logs.Config.Producer.Timeout = timeout @@ -160,7 +160,7 @@ func newDestination(endpoint logsconfig.Endpoint, contentType string, destinatio } } if util.Debug() { - log.Printf("D! saram config: %+v", coreconfig.Config.Logs.Config) + klog.V(1).Infof("saram config: %+v", coreconfig.Config.Logs.Config) } brokers := strings.Split(endpoint.Addr, ",") @@ -230,7 +230,7 @@ func (d *Destination) unconditionalSend(payload []byte) (err error) { data := &Data{} err = json.Unmarshal(payload, data) if err != nil { - log.Println("E! get topic from payload, ", err) + klog.ErrorS(err, "get topic from payload") } if data.Topic != "" { topic = data.Topic @@ -241,7 +241,7 @@ func (d *Destination) unconditionalSend(payload []byte) (err error) { } err = NewBuilder().WithMessage(msgKey, encodedPayload).WithTopic(topic).Send(d.client) if err != nil { - log.Printf("W! send message to kafka error %s, topic:%s", err, topic) + klog.Warningf("send message to kafka error %v, topic:%s", err, topic) if errors.Is(ctx.Err(), context.Canceled) { return ctx.Err() } diff --git a/logs/client/kafka/producer.go b/logs/client/kafka/producer.go index 2d323e28a..5dd168e32 100644 --- a/logs/client/kafka/producer.go +++ b/logs/client/kafka/producer.go @@ -2,11 +2,11 @@ package kafka import ( "fmt" - "log" "github.com/IBM/sarama" "flashcat.cloud/categraf/logs/util" + "k8s.io/klog/v2" ) const ( @@ -70,7 +70,7 @@ func (p *AsyncProducerWrapper) errorWorker() { for { select { case err := <-p.asyncProducer.Errors(): - log.Println("E! kafka producer error", err) + klog.ErrorS(err, "kafka producer error") case <-p.stop: return } @@ -83,7 +83,7 @@ func (p *AsyncProducerWrapper) successWorker() { case <-p.asyncProducer.Successes(): p.counter++ if util.Debug() { - log.Printf("D! kafka producer message success, total:%d", p.counter) + klog.V(1).Infof("kafka producer message success, total:%d", p.counter) } case <-p.stop: return @@ -96,7 +96,7 @@ func (p *SyncProducerWrapper) Send(msg *sarama.ProducerMessage) error { if err == nil { p.counter++ if util.Debug() { - log.Printf("D! kafka producer message success, total:%d", p.counter) + klog.V(1).Infof("kafka producer message success, total:%d", p.counter) } } return err diff --git a/logs/client/tcp/connection_manager.go b/logs/client/tcp/connection_manager.go index 846153a93..fb15b2131 100644 --- a/logs/client/tcp/connection_manager.go +++ b/logs/client/tcp/connection_manager.go @@ -12,7 +12,6 @@ import ( "crypto/tls" "fmt" "io" - "log" "math/rand" "net" "strconv" @@ -21,6 +20,7 @@ import ( "time" "golang.org/x/net/proxy" + "k8s.io/klog/v2" logsconfig "flashcat.cloud/categraf/config/logs" "flashcat.cloud/categraf/logs/status" @@ -60,9 +60,9 @@ func (cm *ConnectionManager) NewConnection(ctx context.Context) (net.Conn, error cm.firstConn.Do(func() { if cm.endpoint.ProxyAddress != "" { - log.Printf("Connecting to the backend: %v, via socks5: %v, with SSL: %v\n", cm.address(), cm.endpoint.ProxyAddress, cm.endpoint.UseSSL) + klog.Infof("Connecting to the backend: %v, via socks5: %v, with SSL: %v", cm.address(), cm.endpoint.ProxyAddress, cm.endpoint.UseSSL) } else { - log.Printf("Connecting to the backend: %v, with SSL: %v\n", cm.address(), cm.endpoint.UseSSL) + klog.Infof("Connecting to the backend: %v, with SSL: %v", cm.address(), cm.endpoint.UseSSL) } }) @@ -91,7 +91,7 @@ func (cm *ConnectionManager) NewConnection(ctx context.Context) (net.Conn, error var dialer proxy.Dialer dialer, err = proxy.SOCKS5("tcp", cm.endpoint.ProxyAddress, nil, proxy.Direct) if err != nil { - log.Println("E!", err) + klog.Error(err) continue } // TODO: handle timeouts with ctx. @@ -103,10 +103,10 @@ func (cm *ConnectionManager) NewConnection(ctx context.Context) (net.Conn, error conn, err = dialer.DialContext(dctx, "tcp", cm.address()) } if err != nil { - log.Println("W!", err) + klog.Warning(err) continue } - log.Println("I! connected to", cm.address()) + klog.Info("connected to", cm.address()) if cm.endpoint.UseSSL { sslConn := tls.Client(conn, &tls.Config{ @@ -114,10 +114,10 @@ func (cm *ConnectionManager) NewConnection(ctx context.Context) (net.Conn, error }) err = cm.handshakeWithTimeout(sslConn, connectionTimeout) if err != nil { - log.Println("E!", err) + klog.Error(err) continue } - log.Println("SSL handshake successful") + klog.Info("SSL handshake successful") conn = sslConn } @@ -152,7 +152,7 @@ func (cm *ConnectionManager) ShouldReset(connCreationTime time.Time) bool { // CloseConnection closes a connection on the client side func (cm *ConnectionManager) CloseConnection(conn net.Conn) { conn.Close() - log.Println("Connection closed") + klog.Info("Connection closed") } // handleServerClose lets the connection manager detect when a connection @@ -173,7 +173,7 @@ func (cm *ConnectionManager) handleServerClose(conn net.Conn) { cm.CloseConnection(conn) return default: - log.Println("E!", err) + klog.Error(err) return } } diff --git a/logs/decoder/auto_multiline_handler.go b/logs/decoder/auto_multiline_handler.go index 1b3cc9b79..8654c5eb1 100644 --- a/logs/decoder/auto_multiline_handler.go +++ b/logs/decoder/auto_multiline_handler.go @@ -8,13 +8,13 @@ package decoder import ( - "log" "regexp" "sort" "sync" "time" logsconfig "flashcat.cloud/categraf/config/logs" + "k8s.io/klog/v2" ) type scoredPattern struct { @@ -156,7 +156,7 @@ func (h *AutoMultilineHandler) processAndTry(message *Message) { timeout := false select { case <-h.timeoutTimer.C: - log.Println("Multiline auto detect timed out before reaching line test threshold") + klog.Info("Multiline auto detect timed out before reaching line test threshold") timeout = true break default: @@ -169,11 +169,11 @@ func (h *AutoMultilineHandler) processAndTry(message *Message) { matchRatio := float64(topMatch.score) / float64(h.linesTested) if matchRatio >= h.matchThreshold { - log.Printf("Pattern %v matched %d lines with a ratio of %f\n", topMatch.regexp.String(), topMatch.score, matchRatio) + klog.Infof("Pattern %v matched %d lines with a ratio of %f", topMatch.regexp.String(), topMatch.score, matchRatio) h.detectedPattern.Set(topMatch.regexp) h.switchToMultilineHandler(topMatch.regexp) } else { - log.Println("No pattern met the line match threshold during multiline autosensing - using single line handler") + klog.Info("No pattern met the line match threshold during multiline autosensing - using single line handler") // Stay with the single line handler and no longer attempt to detect multiline matches. h.processsingFunc = h.singleLineHandler.process } diff --git a/logs/decoder/decoder.go b/logs/decoder/decoder.go index 75fab38fe..487754bb2 100644 --- a/logs/decoder/decoder.go +++ b/logs/decoder/decoder.go @@ -9,13 +9,13 @@ package decoder import ( "bytes" - "log" "regexp" "sync/atomic" "time" config "flashcat.cloud/categraf/config/logs" "flashcat.cloud/categraf/logs/parser" + "k8s.io/klog/v2" ) // defaultContentLenLimit represents the max size for a line, @@ -124,10 +124,10 @@ func NewDecoderWithEndLineMatcher(source *config.LogSource, parser parser.Parser if lineHandler == nil { // TODO configure multiline if source.Config.AutoMultiLine { - log.Println("Auto multi line log detection enabled") + klog.Info("Auto multi line log detection enabled") if multiLinePattern != nil { - log.Println("Found a previously detected pattern - using multiline handler") + klog.Info("Found a previously detected pattern - using multiline handler") // Save the pattern again for the next rotation detectedPattern.Set(multiLinePattern) @@ -165,7 +165,7 @@ func buildAutoMultilineHandlerFromConfig(outputChan chan *Message, lineLimit int for _, p := range additionalPatterns { compiled, err := regexp.Compile("^" + p) if err != nil { - log.Println("logs_config.auto_multi_line_extra_patterns containing value: ", p, " is not a valid regular expression") + klog.Warningf("logs_config.auto_multi_line_extra_patterns containing value %q is not a valid regular expression", p) continue } additionalPatternsCompiled = append(additionalPatternsCompiled, compiled) diff --git a/logs/decoder/line_parser.go b/logs/decoder/line_parser.go index 03e8720a7..25604f9dc 100644 --- a/logs/decoder/line_parser.go +++ b/logs/decoder/line_parser.go @@ -9,10 +9,10 @@ package decoder import ( "bytes" - "log" "time" "flashcat.cloud/categraf/logs/parser" + "k8s.io/klog/v2" ) // LineParser e @@ -67,7 +67,7 @@ func (p *SingleLineParser) process(input *DecodedInput) { // Just parse an pass to the next step content, status, timestamp, _, err := p.parser.Parse(input.content) if err != nil { - log.Println(err) + klog.Error(err) } p.lineHandler.Handle(NewMessage(content, status, input.rawDataLen, timestamp)) } @@ -155,7 +155,7 @@ func (p *MultiLineParser) run() { func (p *MultiLineParser) process(input *DecodedInput) { content, status, timestamp, partial, err := p.parser.Parse(input.content) if err != nil { - log.Println(err, string(input.content)) + klog.ErrorS(err, "failed to parse multiline input", "content", string(input.content)) } // track the raw data length and the timestamp so that the agent tails // from the right place at restart diff --git a/logs/input/container/launcher.go b/logs/input/container/launcher.go index d04188f4f..54cbdd434 100644 --- a/logs/input/container/launcher.go +++ b/logs/input/container/launcher.go @@ -8,12 +8,12 @@ package container import ( - "log" "sync" "time" "flashcat.cloud/categraf/logs/restart" "flashcat.cloud/categraf/pkg/retry" + "k8s.io/klog/v2" ) // Launchable is a retryable wrapper for a restartable @@ -60,11 +60,11 @@ func (l *Launcher) shouldRetry() (bool, time.Duration) { } } if retryer == nil { - log.Println("Nothing to retry - stopping") + klog.Info("Nothing to retry - stopping") return false, 0 } nextRetry := time.Until(retryer.NextRetry()) - log.Printf("Could not find an available a container launcher - will try again in %s", nextRetry.Truncate(time.Second)) + klog.Infof("Could not find an available a container launcher - will try again in %s", nextRetry.Truncate(time.Second)) return true, nextRetry } diff --git a/logs/input/file/file_provider.go b/logs/input/file/file_provider.go index b661ff649..74b0eced9 100644 --- a/logs/input/file/file_provider.go +++ b/logs/input/file/file_provider.go @@ -9,13 +9,13 @@ package file import ( "fmt" - "log" "os" "path/filepath" "sort" logsconfig "flashcat.cloud/categraf/config/logs" "flashcat.cloud/categraf/logs/status" + "k8s.io/klog/v2" ) // OpenFilesLimitWarningType is the key of the message generated when too many @@ -84,7 +84,7 @@ func (p *Provider) FilesToTail(sources []*logsconfig.LogSource) []*File { source.Messages.AddMessage(source.Config.Path, fmt.Sprintf("%d files tailed out of %d files matching", tailedFileCounter, len(files))) } if shouldLogErrors { - log.Println("W! Could not collect files:", err) + klog.Warning("Could not collect files:", err) } continue } @@ -112,7 +112,7 @@ func (p *Provider) FilesToTail(sources []*logsconfig.LogSource) []*File { } if len(filesToTail) == p.filesLimit { - log.Println("W! Reached the limit on the maximum number of files in use: ", p.filesLimit) + klog.Warning("Reached the limit on the maximum number of files in use: ", p.filesLimit) return filesToTail } @@ -171,10 +171,10 @@ func (p *Provider) searchFiles(pattern string, source *logsconfig.LogSource) ([] return nil, fmt.Errorf("malformed exclusion pattern: %s, %s", excludePattern, err) } for _, excludedPath := range excludedGlob { - log.Println("Adding excluded path:", excludedPath) + klog.Info("Adding excluded path:", excludedPath) excludedPaths[excludedPath]++ if excludedPaths[excludedPath] > 1 { - log.Println("Overlapping excluded path:", excludedPath) + klog.Warning("Overlapping excluded path:", excludedPath) } } } diff --git a/logs/input/file/scanner.go b/logs/input/file/scanner.go index 34fcec109..e54d60cfa 100644 --- a/logs/input/file/scanner.go +++ b/logs/input/file/scanner.go @@ -8,7 +8,6 @@ package file import ( - "log" "os" "path/filepath" "regexp" @@ -22,6 +21,7 @@ import ( "flashcat.cloud/categraf/logs/pipeline" "flashcat.cloud/categraf/logs/restart" "flashcat.cloud/categraf/logs/util" + "k8s.io/klog/v2" ) // rxContainerID is used in the shouldIgnore func to do a best-effort validation @@ -204,7 +204,7 @@ func (s *Scanner) launchTailers(source *logsconfig.LogSource) { files, err := s.fileProvider.CollectFiles(source) if err != nil { source.Status.Error(err) - log.Println("W! Could not collect files: ", err) + klog.Warning("Could not collect files: ", err) return } for _, file := range files { @@ -260,15 +260,15 @@ func (s *Scanner) startNewTailer(file *File, m logsconfig.TailingMode) bool { offset, whence, err := Position(s.registry, tailer.Identifier(), mode) if err != nil { - log.Println("W! Could not recover offset for file with path", file.Path, err) + klog.Warning("Could not recover offset for file with path", file.Path, err) } if util.Debug() { - log.Printf("Starting a new tailer for: %s (offset: %d, whence: %d) for tailer key %s\n", file.Path, offset, whence, file.GetScanKey()) + klog.V(1).Infof("Starting a new tailer for: %s (offset: %d, whence: %d) for tailer key %s", file.Path, offset, whence, file.GetScanKey()) } err = tailer.Start(offset, whence) if err != nil { - log.Println(err) + klog.Error(err) return false } @@ -346,7 +346,7 @@ func (s *Scanner) handleTailingModeChange(tailerID string, currentTailingMode lo } previousMode, _ := logsconfig.TailingModeFromString(s.registry.GetTailingMode(tailerID)) if previousMode != currentTailingMode { - log.Printf("Tailing mode changed for %v. Was: %v: Now: %v\n", tailerID, previousMode, currentTailingMode) + klog.Infof("Tailing mode changed for %v. Was: %v: Now: %v", tailerID, previousMode, currentTailingMode) if currentTailingMode == logsconfig.Beginning { // end -> beginning, the offset will be honored if it exists return logsconfig.Beginning @@ -366,13 +366,13 @@ func (s *Scanner) stopTailer(tailer *Tailer) { // restartTailer safely stops tailer and starts a new one // returns true if the new tailer is up and running, false if an error occurred func (s *Scanner) restartTailerAfterFileRotation(tailer *Tailer, file *File) bool { - log.Println("Log rotation happened to ", file.Path) + klog.Info("Log rotation happened to ", file.Path) tailer.StopAfterFileRotation() tailer = s.createRotatedTailer(file, tailer.outputChan, tailer.GetDetectedPattern()) // force reading file from beginning since it has been log-rotated err := tailer.StartFromBeginning() if err != nil { - log.Println(err) + klog.Error(err) return false } s.tailers[file.GetScanKey()] = tailer diff --git a/logs/input/file/tailer.go b/logs/input/file/tailer.go index fb7d0e19a..167c6fa35 100644 --- a/logs/input/file/tailer.go +++ b/logs/input/file/tailer.go @@ -11,7 +11,6 @@ import ( "context" "fmt" "io" - "log" "os" "path/filepath" "regexp" @@ -26,6 +25,7 @@ import ( "flashcat.cloud/categraf/logs/message" "flashcat.cloud/categraf/logs/parser" "flashcat.cloud/categraf/logs/tag" + "k8s.io/klog/v2" ) // DefaultSleepDuration represents the amount of time the tailer waits before reading new data when no data is received @@ -180,7 +180,7 @@ func (t *Tailer) readForever() { select { case <-t.stop: if n != 0 && atomic.LoadInt32(&t.didFileRotate) == 1 { - log.Println("W! Tailer stopped after rotation close timeout with remaining unread data") + klog.Warning("Tailer stopped after rotation close timeout with remaining unread data") } // stop reading data from file return @@ -237,7 +237,7 @@ func (t *Tailer) startStopTimer() { func (t *Tailer) onStop() { t.osFile.Close() t.decoder.Stop() - log.Println("Closed", t.file.Path, "for tailer key", t.file.GetScanKey(), "read", t.bytesRead, "bytes and", t.decoder.GetLineCount(), "lines") + klog.Info("Closed", t.file.Path, "for tailer key", t.file.GetScanKey(), "read", t.bytesRead, "bytes and", t.decoder.GetLineCount(), "lines") } // forwardMessages lets the Tailer forward log messages to the output channel diff --git a/logs/input/file/tailer_nix.go b/logs/input/file/tailer_nix.go index 7e858036a..31d99c4ce 100644 --- a/logs/input/file/tailer_nix.go +++ b/logs/input/file/tailer_nix.go @@ -10,11 +10,11 @@ package file import ( "fmt" "io" - "log" "path/filepath" "flashcat.cloud/categraf/logs/decoder" "flashcat.cloud/categraf/logs/util" + "k8s.io/klog/v2" ) // setup sets up the file tailer @@ -29,7 +29,7 @@ func (t *Tailer) setup(offset int64, whence int) error { t.tags = t.buildTailerTags() if util.Debug() { - log.Println("I! Opening", t.file.Path, "for tailer key", t.file.GetScanKey()) + klog.V(1).Info("Opening", t.file.Path, "for tailer key", t.file.GetScanKey()) } f, err := openFile(fullpath) if err != nil { diff --git a/logs/input/file/tailer_windows.go b/logs/input/file/tailer_windows.go index 7f7f6a8bd..dd6a01336 100644 --- a/logs/input/file/tailer_windows.go +++ b/logs/input/file/tailer_windows.go @@ -10,11 +10,11 @@ package file import ( "fmt" "io" - "log" "os" "path/filepath" "flashcat.cloud/categraf/logs/decoder" + "k8s.io/klog/v2" ) // setup sets up the file tailer @@ -28,7 +28,7 @@ func (t *Tailer) setup(offset int64, whence int) error { // adds metadata to enable users to filter logs by filename t.tags = t.buildTailerTags() - log.Println("Opening ", t.fullpath) + klog.Info("Opening ", t.fullpath) f, err := openFile(t.fullpath) if err != nil { return err @@ -51,18 +51,18 @@ func (t *Tailer) readAvailable() (int, error) { st, err := f.Stat() if err != nil { - log.Println("Error stat()ing file", err) + klog.ErrorS(err, "Error stat()ing file") return 0, err } sz := st.Size() offset := t.GetReadOffset() if sz == 0 { - log.Println("File size now zero, resetting offset") + klog.Info("File size now zero, resetting offset") t.SetReadOffset(0) t.SetDecodedOffset(0) } else if sz < offset { - log.Println("Offset off end of file, resetting") + klog.Info("Offset off end of file, resetting") t.SetReadOffset(0) t.SetDecodedOffset(0) } diff --git a/logs/input/journald/launcher.go b/logs/input/journald/launcher.go index 155d21fc5..24aef70c4 100644 --- a/logs/input/journald/launcher.go +++ b/logs/input/journald/launcher.go @@ -8,12 +8,11 @@ package journald import ( - "log" - config "flashcat.cloud/categraf/config/logs" "flashcat.cloud/categraf/logs/auditor" "flashcat.cloud/categraf/logs/pipeline" "flashcat.cloud/categraf/logs/restart" + "k8s.io/klog/v2" ) // Launcher is in charge of starting and stopping new journald tailers @@ -53,7 +52,7 @@ func (l *Launcher) run() { } tailer, err := l.setupTailer(source) if err != nil { - log.Println("Could not set up journald tailer: ", err) + klog.ErrorS(err, "Could not set up journald tailer") } else { l.tailers[identifier] = tailer } diff --git a/logs/input/journald/tailer.go b/logs/input/journald/tailer.go index 69ae0027d..125f285f3 100644 --- a/logs/input/journald/tailer.go +++ b/logs/input/journald/tailer.go @@ -11,13 +11,13 @@ import ( "encoding/json" "fmt" "io" - "log" "time" "github.com/coreos/go-systemd/sdjournal" logsconfig "flashcat.cloud/categraf/config/logs" "flashcat.cloud/categraf/logs/message" + "k8s.io/klog/v2" ) // defaultWaitDuration represents the delay before which we try to collect a new log from the journal @@ -58,14 +58,14 @@ func (t *Tailer) Start(cursor string) error { } t.source.Status.Success() t.source.AddInput(t.journalPath()) - log.Println("Start tailing journal ", t.journalPath()) + klog.Info("Start tailing journal ", t.journalPath()) go t.tail() return nil } // Stop stops the tailer func (t *Tailer) Stop() { - log.Println("Stop tailing journal ", t.journalPath()) + klog.Info("Stop tailing journal ", t.journalPath()) t.stop <- struct{}{} t.source.RemoveInput(t.journalPath()) <-t.done @@ -139,7 +139,7 @@ func (t *Tailer) tail() { if err != nil && err != io.EOF { err := fmt.Errorf("cant't tail journal %s: %s", t.journalPath(), err) t.source.Status.Error(err) - log.Println(err) + klog.Error(err) return } if n < 1 { @@ -149,7 +149,7 @@ func (t *Tailer) tail() { } entry, err := t.journal.GetEntry() if err != nil { - log.Printf("Could not retrieve journal entry: %s\n", err) + klog.Warningf("Could not retrieve journal entry: %v", err) continue } if t.shouldDrop(entry) { diff --git a/logs/input/kubernetes/launcher.go b/logs/input/kubernetes/launcher.go index 54e1d4708..5b8cc7062 100644 --- a/logs/input/kubernetes/launcher.go +++ b/logs/input/kubernetes/launcher.go @@ -11,7 +11,6 @@ import ( "context" "encoding/json" "fmt" - "log" "os" "path/filepath" "strings" @@ -26,6 +25,7 @@ import ( "flashcat.cloud/categraf/logs/util/kubernetes/kubelet" "flashcat.cloud/categraf/pkg/kubernetes" "flashcat.cloud/categraf/pkg/retry" + "k8s.io/klog/v2" ) const ( @@ -68,10 +68,10 @@ func IsAvailable() (bool, *retry.Retrier) { util, retrier := kubelet.GetKubeUtilWithRetrier() if util != nil { - log.Println("Kubernetes launcher is available") + klog.Info("Kubernetes launcher is available") return true, nil } - log.Println("Kubernetes launcher is not available: ", retrier.LastError()) + klog.Warning("Kubernetes launcher is not available: ", retrier.LastError()) return false, retrier } @@ -79,7 +79,7 @@ func IsAvailable() (bool, *retry.Retrier) { func NewLauncher(sources *logsconfig.LogSources, services *service.Services, collectAll bool) *Launcher { kubeutil, err := kubelet.GetKubeUtil() if err != nil { - log.Println("KubeUtil not available, failed to create launcher: ", err) + klog.ErrorS(err, "KubeUtil not available, failed to create launcher") return nil } launcher := &Launcher{ @@ -99,13 +99,13 @@ func NewLauncher(sources *logsconfig.LogSources, services *service.Services, col // Start starts the launcher func (l *Launcher) Start() { - log.Println("Starting Kubernetes launcher") + klog.Info("Starting Kubernetes launcher") go l.run() } // Stop stops the launcher func (l *Launcher) Stop() { - log.Println("Stopping Kubernetes launcher") + klog.Info("Stopping Kubernetes launcher") l.stopped <- struct{}{} } @@ -121,7 +121,7 @@ func (l *Launcher) run() { case ops := <-l.retryOperations: l.addSource(ops.service) case <-l.stopped: - log.Println("Kubernetes launcher stopped") + klog.Info("Kubernetes launcher stopped") return } } @@ -153,7 +153,7 @@ func (l *Launcher) scheduleServiceForRetry(svc *service.Service) { func (l *Launcher) delayRetry(ops *retryOps) { delay := ops.backoff.NextBackOff() if delay == backoff.Stop { - log.Println("Unable to add source for container ", ops.service.GetEntityID()) + klog.Warning("Unable to add source for container ", ops.service.GetEntityID()) delete(l.pendingRetries, ops.service.GetEntityID()) return } @@ -169,7 +169,7 @@ func (l *Launcher) addSource(svc *service.Service) { // If the container is already tailed, we don't do anything // That shouldn't happen if _, exists := l.sourcesByContainer[svc.GetEntityID()]; exists { - log.Printf("A source already exist for container %v", svc.GetEntityID()) + klog.Warningf("A source already exist for container %v", svc.GetEntityID()) return } @@ -177,22 +177,22 @@ func (l *Launcher) addSource(svc *service.Service) { if err != nil { if errors.IsRetriable(err) { // Attempt to reschedule the source later - log.Printf("Failed to fetch pod info for container %v, will retry: %v", svc.Identifier, err) + klog.Warningf("Failed to fetch pod info for container %v, will retry: %v", svc.Identifier, err) l.scheduleServiceForRetry(svc) return } - log.Printf("Could not add source for container %v: %v", svc.Identifier, err) + klog.ErrorS(err, "Could not add source for container", "container", svc.Identifier) return } container, err := l.kubeutil.GetStatusForContainerID(pod, svc.GetEntityID()) if err != nil { - log.Println(err) + klog.Error(err) return } source, err := l.getSource(pod, container) if err != nil { if err != errCollectAllDisabled { - log.Printf("Invalid configuration for pod %v, container %v: %v", pod.Metadata.Name, container.Name, err) + klog.Warningf("Invalid configuration for pod %v, container %v: %v", pod.Metadata.Name, container.Name, err) } return } @@ -246,7 +246,7 @@ func (l *Launcher) getSource(pod *kubernetes.Pod, container kubernetes.Container logsSource := "" shortImageName, err := l.getShortImageName(pod, container.Name) if err != nil { - log.Printf("Couldn't get short image for container '%s': %v", container.Name, err) + klog.Warningf("Couldn't get short image for container '%s': %v", container.Name, err) // Fallback and use `kubernetes` as source name logsSource = kubernetesIntegration } else { @@ -257,7 +257,7 @@ func (l *Launcher) getSource(pod *kubernetes.Pod, container kubernetes.Container if ok { err = json.Unmarshal([]byte(ruleStr), &rules) if err != nil { - log.Printf("pod rule %s unmarshal error %s", ruleStr, err) + klog.Warningf("pod rule %s unmarshal error %v", ruleStr, err) } } topic := pod.Metadata.Annotations[AnnotationTopicKey] @@ -333,7 +333,7 @@ func buildTags(pod *kubernetes.Pod, container kubernetes.ContainerStatus) []stri func getTaggerEntityID(ctrID string) string { taggerEntityID, err := kubelet.KubeContainerIDToTaggerEntityID(ctrID) if err != nil { - log.Printf("Could not get tagger entity ID: %v", err) + klog.Warningf("Could not get tagger entity ID: %v", err) return ctrID } return taggerEntityID @@ -380,28 +380,28 @@ func (l *Launcher) getPath(basePath string, pod *kubernetes.Pod, container kuber v110Dir := filepath.Join(oldDirectory, container.Name) _, err := os.Stat(v110Dir) if err == nil { - log.Printf("Logs path found for container %s, v1.13 >= kubernetes version >= v1.10", container.Name) + klog.V(1).Infof("Logs path found for container %s, v1.13 >= kubernetes version >= v1.10", container.Name) return filepath.Join(v110Dir, anyLogFile) } if !os.IsNotExist(err) { - log.Printf("Cannot get file info for %s: %v", v110Dir, err) + klog.Warningf("Cannot get file info for %s: %v", v110Dir, err) } v19Files := filepath.Join(oldDirectory, fmt.Sprintf(anyV19LogFile, container.Name)) files, err := filepath.Glob(v19Files) if err == nil && len(files) > 0 { - log.Printf("Logs path found for container %s, kubernetes version <= v1.9", container.Name) + klog.V(1).Infof("Logs path found for container %s, kubernetes version <= v1.9", container.Name) return v19Files } if err != nil { - log.Printf("Cannot get file info for %s: %v", v19Files, err) + klog.Warningf("Cannot get file info for %s: %v", v19Files, err) } if len(files) == 0 { - log.Printf("Files matching %s not found", v19Files) + klog.V(1).Infof("Files matching %s not found", v19Files) } } - log.Printf("Using the latest kubernetes logs path for container %s", container.Name) + klog.V(1).Infof("Using the latest kubernetes logs path for container %s", container.Name) return filepath.Join(basePath, l.getPodDirectorySince1_14(pod), container.Name, anyLogFile) } @@ -423,7 +423,7 @@ func (l *Launcher) getShortImageName(pod *kubernetes.Pod, containerName string) } _, shortName, _, err := containers.SplitImageName(containerSpec.Image) if err != nil { - log.Printf("Cannot parse image name: %v", err) + klog.Warningf("Cannot parse image name: %v", err) } return shortName, err } diff --git a/logs/input/kubernetes/scanner.go b/logs/input/kubernetes/scanner.go index 71caa9559..ea94596d1 100644 --- a/logs/input/kubernetes/scanner.go +++ b/logs/input/kubernetes/scanner.go @@ -4,7 +4,6 @@ package kubernetes import ( "context" - "log" "strings" "sync" "time" @@ -14,6 +13,7 @@ import ( "flashcat.cloud/categraf/logs/util/kubernetes/kubelet" "flashcat.cloud/categraf/pkg/checksum" "flashcat.cloud/categraf/pkg/set" + "k8s.io/klog/v2" ) type ( @@ -39,7 +39,7 @@ func (s *Scanner) Scan() { if s.kubelet == nil { s.kubelet, err = kubelet.GetKubeUtil() if err != nil { - log.Printf("connect kubelet error %s", err) + klog.Warningf("connect kubelet error %v", err) return } } @@ -52,7 +52,7 @@ func (s *Scanner) Scan() { case <-ticker.C: pods, err := s.kubelet.GetLocalPodList(ctx) if err != nil { - log.Printf("get local pod list error %s", err) + klog.Warningf("get local pod list error %v", err) continue } fetched := make(map[string]checksum.Checksum) diff --git a/logs/input/listener/tailer.go b/logs/input/listener/tailer.go index bf5399630..8f4c1fc54 100644 --- a/logs/input/listener/tailer.go +++ b/logs/input/listener/tailer.go @@ -9,13 +9,13 @@ package listener import ( "io" - "log" "net" logsconfig "flashcat.cloud/categraf/config/logs" "flashcat.cloud/categraf/logs/decoder" "flashcat.cloud/categraf/logs/message" "flashcat.cloud/categraf/logs/parser" + "k8s.io/klog/v2" ) // Tailer reads data from a connection @@ -88,7 +88,7 @@ func (t *Tailer) readForever() { } if err != nil { // an error occurred, stop from reading new data - log.Printf("Couldn't read message from connection: %v\n", err) + klog.Warningf("Couldn't read message from connection: %v", err) return } t.source.BytesRead.Add(int64(len(data))) diff --git a/logs/input/listener/tcp.go b/logs/input/listener/tcp.go index bda99700d..439276075 100644 --- a/logs/input/listener/tcp.go +++ b/logs/input/listener/tcp.go @@ -9,7 +9,6 @@ package listener import ( "fmt" - "log" "net" "sync" "time" @@ -17,6 +16,7 @@ import ( logsconfig "flashcat.cloud/categraf/config/logs" "flashcat.cloud/categraf/logs/pipeline" "flashcat.cloud/categraf/logs/restart" + "k8s.io/klog/v2" ) // A TCPListener listens and accepts TCP connections and delegates the read operations to a tailer. @@ -38,7 +38,7 @@ func NewTCPListener(pipelineProvider pipeline.Provider, source *logsconfig.LogSo var err error idleTimeout, err = time.ParseDuration(source.Config.IdleTimeout) if err != nil { - log.Printf("Error parsing log's idle_timeout as a duration: %s\n", err) + klog.Warningf("Error parsing log's idle_timeout as a duration: %v", err) idleTimeout = 0 } } @@ -55,10 +55,10 @@ func NewTCPListener(pipelineProvider pipeline.Provider, source *logsconfig.LogSo // Start starts the listener to accepts new incoming connections. func (l *TCPListener) Start() { - log.Printf("Starting TCP forwarder on port %d, with read buffer size: %d\n", l.source.Config.Port, l.frameSize) + klog.Infof("Starting TCP forwarder on port %d, with read buffer size: %d", l.source.Config.Port, l.frameSize) err := l.startListener() if err != nil { - log.Printf("Can't start TCP forwarder on port %d: %v\n", l.source.Config.Port, err) + klog.Errorf("Can't start TCP forwarder on port %d: %v", l.source.Config.Port, err) l.source.Status.Error(err) return } @@ -68,7 +68,7 @@ func (l *TCPListener) Start() { // Stop stops the listener from accepting new connections and all the activer tailers. func (l *TCPListener) Stop() { - log.Printf("Stopping TCP forwarder on port %d\n", l.source.Config.Port) + klog.Infof("Stopping TCP forwarder on port %d", l.source.Config.Port) l.mu.Lock() defer l.mu.Unlock() l.stop <- struct{}{} @@ -95,11 +95,11 @@ func (l *TCPListener) run() { return case err != nil: // an error occurred, restart the listener. - log.Printf("Can't listen on port %d, restarting a listener: %v\n", l.source.Config.Port, err) + klog.Warningf("Can't listen on port %d, restarting a listener: %v", l.source.Config.Port, err) l.listener.Close() err := l.startListener() if err != nil { - log.Printf("Can't restart listener on port %d: %v\n", l.source.Config.Port, err) + klog.Errorf("Can't restart listener on port %d: %v", l.source.Config.Port, err) l.source.Status.Error(err) return } diff --git a/logs/input/listener/udp.go b/logs/input/listener/udp.go index 3a79d9b74..1890de407 100644 --- a/logs/input/listener/udp.go +++ b/logs/input/listener/udp.go @@ -9,11 +9,11 @@ package listener import ( "fmt" - "log" "net" logsconfig "flashcat.cloud/categraf/config/logs" "flashcat.cloud/categraf/logs/pipeline" + "k8s.io/klog/v2" ) // The UDP listener is limited by the size of its read buffer, @@ -46,10 +46,10 @@ func NewUDPListener(pipelineProvider pipeline.Provider, source *logsconfig.LogSo // Start opens a new UDP connection and starts a tailer. func (l *UDPListener) Start() { - log.Printf("Starting UDP forwarder on port: %d, with read buffer size: %d\n", l.source.Config.Port, l.frameSize) + klog.Infof("Starting UDP forwarder on port: %d, with read buffer size: %d", l.source.Config.Port, l.frameSize) err := l.startNewTailer() if err != nil { - log.Printf("Can't start UDP forwarder on port %d: %v\n", l.source.Config.Port, err) + klog.Errorf("Can't start UDP forwarder on port %d: %v", l.source.Config.Port, err) l.source.Status.Error(err) return } @@ -58,7 +58,7 @@ func (l *UDPListener) Start() { // Stop stops the tailer. func (l *UDPListener) Stop() { - log.Printf("Stopping UDP forwarder on port: %d\n", l.source.Config.Port) + klog.Infof("Stopping UDP forwarder on port: %d", l.source.Config.Port) l.tailer.Stop() } @@ -109,11 +109,11 @@ func (l *UDPListener) read(tailer *Tailer) ([]byte, error) { // resetTailer creates a new tailer. func (l *UDPListener) resetTailer() { - log.Printf("Resetting the UDP connection on port: %d\n", l.source.Config.Port) + klog.Infof("Resetting the UDP connection on port: %d", l.source.Config.Port) l.tailer.Stop() err := l.startNewTailer() if err != nil { - log.Printf("Could not reset the UDP connection on port %d: %v\n", l.source.Config.Port, err) + klog.Errorf("Could not reset the UDP connection on port %d: %v", l.source.Config.Port, err) l.source.Status.Error(err) return } diff --git a/logs/message/origin.go b/logs/message/origin.go index d2419dec2..1bad36f17 100644 --- a/logs/message/origin.go +++ b/logs/message/origin.go @@ -10,10 +10,10 @@ package message import ( "encoding/json" "fmt" - "log" "strings" logsconfig "flashcat.cloud/categraf/config/logs" + "k8s.io/klog/v2" ) // Origin represents the Origin of a message @@ -104,7 +104,7 @@ func (o *Origin) TagsToJsonString() string { if len(tagsMap) != 0 { data, err := json.Marshal(tagsMap) if err != nil { - log.Println("marshal tags error:", err) + klog.ErrorS(err, "marshal tags error") return ret } ret = string(data) diff --git a/logs/processor/processor.go b/logs/processor/processor.go index 7268728ac..e984a9d98 100644 --- a/logs/processor/processor.go +++ b/logs/processor/processor.go @@ -9,13 +9,13 @@ package processor import ( "context" - "log" "sync" logsconfig "flashcat.cloud/categraf/config/logs" "flashcat.cloud/categraf/logs/diagnostic" "flashcat.cloud/categraf/logs/message" "flashcat.cloud/categraf/logs/util" + "k8s.io/klog/v2" ) // A Processor updates messages from an inputChan and pushes @@ -92,11 +92,11 @@ func (p *Processor) processMessage(msg *message.Message) { // Encode the message to its final format content, err := p.encoder.Encode(msg, redactedMsg) if err != nil { - log.Println("unable to encode msg ", err) + klog.ErrorS(err, "unable to encode msg") return } if util.Debug() { - log.Println("D! log item:", string(content)) + klog.V(1).Info("log item:", string(content)) } msg.Content = content p.outputChan <- msg diff --git a/logs/sender/batch_strategy.go b/logs/sender/batch_strategy.go index 0e5bfa94a..1db17f265 100644 --- a/logs/sender/batch_strategy.go +++ b/logs/sender/batch_strategy.go @@ -9,11 +9,11 @@ package sender import ( "context" - "log" "sync" "time" "flashcat.cloud/categraf/logs/message" + "k8s.io/klog/v2" ) // batchStrategy contains all the logic to send logs in batch. @@ -109,7 +109,7 @@ func (s *batchStrategy) processMessage(m *message.Message, outputChan chan *mess added := s.buffer.AddMessage(m) if !added || s.buffer.IsFull() { if s.buffer.IsFull() { - log.Printf("I! buffer full len: %d, size: %d", + klog.Infof("buffer full len: %d, size: %d", len(s.buffer.messageBuffer), s.buffer.contentSize) } s.flushBuffer(outputChan, send) @@ -118,7 +118,7 @@ func (s *batchStrategy) processMessage(m *message.Message, outputChan chan *mess // it's possible that the m could not be added because the buffer was full // so we need to retry once again if !s.buffer.AddMessage(m) { - log.Printf("Dropped message in pipeline=%s reason=too-large ContentLength=%d ContentSizeLimit=%d\n", s.pipelineName, len(m.Content), s.buffer.ContentSizeLimit()) + klog.Warningf("dropped message in pipeline=%s reason=too-large ContentLength=%d ContentSizeLimit=%d", s.pipelineName, len(m.Content), s.buffer.ContentSizeLimit()) } } } @@ -151,7 +151,7 @@ func (s *batchStrategy) sendMessages(messages []*message.Message, outputChan cha if shouldStopSending(err) { return } - log.Printf("Could not send payload: %v\n", err) + klog.Warningf("could not send payload: %v", err) } for _, message := range messages { diff --git a/logs/sender/stream_strategy.go b/logs/sender/stream_strategy.go index 210c8b67d..8d162b494 100644 --- a/logs/sender/stream_strategy.go +++ b/logs/sender/stream_strategy.go @@ -9,9 +9,9 @@ package sender import ( "context" - "log" "flashcat.cloud/categraf/logs/message" + "k8s.io/klog/v2" ) // StreamStrategy is a shared stream strategy. @@ -35,7 +35,7 @@ func (s *streamStrategy) Send(inputChan chan *message.Message, outputChan chan * if shouldStopSending(err) { return } - log.Printf("Could not send payload: %v\n", err) + klog.Warningf("could not send payload: %v", err) } outputChan <- message } diff --git a/logs/tag/provider.go b/logs/tag/provider.go index 506589b83..cf23a26bd 100644 --- a/logs/tag/provider.go +++ b/logs/tag/provider.go @@ -50,7 +50,6 @@ func (p *provider) GetTags() []string { // tags, err := tagger.Tag(p.entityID, collectors.HighCardinality) // if err != nil { - // log.Printf("Cannot tag container %s: %v\n", p.entityID, err) // return []string{} // } diff --git a/logs/util/containers/filter.go b/logs/util/containers/filter.go index cdfdebece..dbaf5ecec 100644 --- a/logs/util/containers/filter.go +++ b/logs/util/containers/filter.go @@ -10,12 +10,12 @@ package containers import ( "errors" "fmt" - "log" "regexp" "strings" coreconfig "flashcat.cloud/categraf/config" "flashcat.cloud/categraf/logs/util" + "k8s.io/klog/v2" ) const ( @@ -112,7 +112,7 @@ func parseFilters(filters []string) (imageFilters, nameFilters, namespaceFilters namespaceFilters = append(namespaceFilters, r) default: warnmsg := fmt.Sprintf("Container filter %q is unknown, ignoring it. The supported filters are 'image', 'name' and 'kube_namespace'", filter) - log.Println(warnmsg) + klog.Warning(warnmsg) filterWarnings = append(filterWarnings, warnmsg) } @@ -278,7 +278,7 @@ func (cf Filter) IsExcluded(containerName, containerImage, podNamespace string) for _, r := range cf.ImageExcludeList { match := r.MatchString(containerImage) if util.Debug() { - log.Printf("D!, exclude item :%+v, container image:%s, %t\n", r, containerImage, match) + klog.V(1).Infof("exclude item :%+v, container image:%s, %t", r, containerImage, match) } if match { return true diff --git a/logs/util/containers/providers/provider.go b/logs/util/containers/providers/provider.go index 5993e1e12..80d5b2511 100644 --- a/logs/util/containers/providers/provider.go +++ b/logs/util/containers/providers/provider.go @@ -8,9 +8,8 @@ package providers import ( - "log" - "flashcat.cloud/categraf/logs/util/containers" + "k8s.io/klog/v2" ) // ContainerImpl without implementation @@ -31,7 +30,7 @@ func Register(impl containers.ContainerImplementation) { if containerImpl == nil { containerImpl = impl } else { - log.Printf("Trying to set multiple ContainerImplementation") + klog.Warning("Trying to set multiple ContainerImplementation") } } diff --git a/logs/util/docker/containers.go b/logs/util/docker/containers.go index 68f1265e2..aa009e953 100644 --- a/logs/util/docker/containers.go +++ b/logs/util/docker/containers.go @@ -13,7 +13,6 @@ import ( "fmt" "github.com/docker/docker/api/types/container" "io" - "log" "net" "regexp" "strings" @@ -25,6 +24,7 @@ import ( coreconfig "flashcat.cloud/categraf/config" "flashcat.cloud/categraf/logs/util/containers" "flashcat.cloud/categraf/logs/util/containers/providers" + "k8s.io/klog/v2" ) var healthRe = regexp.MustCompile(`\(health: (\w+)\)`) @@ -64,13 +64,13 @@ func (d *DockerUtil) ListContainers(ctx context.Context, cfg *ContainerListConfi // the inspect should be in the cache already so this is not a problem inspect, err := d.Inspect(ctx, container.ID, false) if err != nil { - log.Printf("Error inspecting container %s: %s", container.ID, err) + klog.Warningf("Error inspecting container %s: %v", container.ID, err) continue } networkMode, err := GetContainerNetworkMode(ctx, container.ID) - log.Printf("container %s network mode: %s", container.Name, networkMode) + klog.V(1).Infof("container %s network mode: %s", container.Name, networkMode) if err != nil { - log.Printf("Failed to get network mode for container %s. Network info will be missing. Error: %s", container.ID, err) + klog.Warningf("Failed to get network mode for container %s. Network info will be missing. Error: %v", container.ID, err) continue } // in awsvpc, and host mode, we assume that those ports are listening to all ip addresses @@ -84,7 +84,7 @@ func (d *DockerUtil) ListContainers(ctx context.Context, cfg *ContainerListConfi if networkMode == containers.HostNetworkMode { ips := GetDockerHostIPs() if len(ips) == 0 { - log.Printf("Failed to get host IPs. Container %s will be missing network info: %s", container.Name, err) + klog.Warningf("Failed to get host IPs. Container %s will be missing network info: %v", container.Name, err) continue } ipAddr := []containers.NetworkAddress{} @@ -123,7 +123,7 @@ func (d *DockerUtil) getContainerDetails(ctn *containers.Container) { var err error ctn.StartedAt, err = providers.ContainerImpl().GetContainerStartTime(ctn.ID) if err != nil { - log.Printf("ContainerImplementation cannot get StartTime for container %s, err: %s", ctn.ID[:12], err) + klog.Warningf("ContainerImplementation cannot get StartTime for container %s, err: %v", ctn.ID[:12], err) return } } @@ -152,7 +152,7 @@ func (d *DockerUtil) dockerContainers(ctx context.Context, cfg *ContainerListCon i, err := d.Inspect(ctx, c.ID, false) if err != nil { d.Unlock() - log.Printf("Error inspecting container %s: %s", c.ID, err) + klog.Warningf("Error inspecting container %s: %v", c.ID, err) continue } d.networkMappings[c.ID] = findDockerNetworks(c.ID, i.State.Pid, c) @@ -162,7 +162,7 @@ func (d *DockerUtil) dockerContainers(ctx context.Context, cfg *ContainerListCon image, err := d.ResolveImageName(ctx, c.Image) if err != nil { - log.Printf("Can't resolve image name %s: %s", c.Image, err) + klog.Warningf("Can't resolve image name %s: %v", c.Image, err) } pauseContainerExcluded := containers.IsPauseContainer(c.Labels) @@ -229,14 +229,14 @@ func (d *DockerUtil) parseContainerNetworkAddresses(cID string, ports []types.Po addrList := []containers.NetworkAddress{} tempAddrList := []containers.NetworkAddress{} if netSettings == nil || len(netSettings.Networks) == 0 { - log.Println("No network settings available from docker") + klog.Warning("No network settings available from docker") return addrList } for _, port := range ports { if isExposed(port) { IP := net.ParseIP(port.IP) if IP == nil { - log.Printf("Unable to parse IP: %v for container: %s", port.IP, container) + klog.Warningf("Unable to parse IP: %v for container: %s", port.IP, container) continue } addrList = append(addrList, containers.NetworkAddress{ @@ -254,12 +254,12 @@ func (d *DockerUtil) parseContainerNetworkAddresses(cID string, ports []types.Po // Retieve IPs from network settings for the cached ports for _, network := range netSettings.Networks { if network.IPAddress == "" { - log.Printf("No IP found for container %s in network %s", container, network.NetworkID) + klog.Warningf("No IP found for container %s in network %s", container, network.NetworkID) continue } IP := net.ParseIP(network.IPAddress) if IP == nil { - log.Printf("Unable to parse IP: %v for container: %s", network.IPAddress, container) + klog.Warningf("Unable to parse IP: %v for container: %s", network.IPAddress, container) continue } for _, addr := range tempAddrList { @@ -375,4 +375,4 @@ func GetDockerHostIPs() []string { return []string{coreconfig.Config.GetHostname()} } return []string{ip.String()} -} \ No newline at end of file +} diff --git a/logs/util/docker/docker.go b/logs/util/docker/docker.go index f3fd5fd43..c202b1cd0 100644 --- a/logs/util/docker/docker.go +++ b/logs/util/docker/docker.go @@ -12,7 +12,6 @@ import ( "encoding/json" "errors" "fmt" - "log" "sort" "strings" "sync" @@ -26,6 +25,7 @@ import ( "flashcat.cloud/categraf/logs/util/containers/providers" "flashcat.cloud/categraf/pkg/cache" "flashcat.cloud/categraf/pkg/retry" + "k8s.io/klog/v2" ) // DockerUtil wraps interactions with a local docker API. @@ -91,7 +91,7 @@ func ConnectToDocker(ctx context.Context) (*client.Client, error) { return nil, err } - log.Println("Successfully connected to Docker server") + klog.Info("Successfully connected to Docker server") return cli, nil } @@ -193,7 +193,7 @@ func (d *DockerUtil) ResolveImageName(ctx context.Context, image string) (string sp := strings.SplitN(r.RepoDigests[0], "@", 2) d.imageNameBySha[image] = sp[0] } else { - log.Printf("No information in image/inspect to resolve: %s", image) + klog.Warningf("No information in image/inspect to resolve: %s", image) d.imageNameBySha[image] = image } } @@ -226,7 +226,7 @@ func (d *DockerUtil) Inspect(ctx context.Context, id string, withSize bool) (typ if hit { container, ok := cached.(types.ContainerJSON) if !ok { - log.Println("Invalid inspect cache format, forcing a cache miss") + klog.Warning("Invalid inspect cache format, forcing a cache miss") } else { return container, nil } @@ -311,4 +311,4 @@ func (d *DockerUtil) GetContainerStats(ctx context.Context, containerID string) return nil, fmt.Errorf("error listing containers: %s", err) } return containerStats, nil -} \ No newline at end of file +} diff --git a/logs/util/docker/event_pull.go b/logs/util/docker/event_pull.go index 8d3e462b2..784b633b0 100644 --- a/logs/util/docker/event_pull.go +++ b/logs/util/docker/event_pull.go @@ -12,12 +12,12 @@ import ( "encoding/json" "fmt" "io" - "log" "strings" "time" "github.com/docker/docker/api/types/events" "github.com/docker/docker/api/types/filters" + "k8s.io/klog/v2" ) // openEventChannel just wraps the client.Event call with saner argument types. @@ -63,11 +63,11 @@ func (d *DockerUtil) processContainerEvent(ctx context.Context, msg events.Messa var err error imageName, err = d.ResolveImageName(ctx, imageName) if err != nil { - log.Printf("I! can't resolve image name %s: %s", imageName, err) + klog.Infof("can't resolve image name %s: %v", imageName, err) } } if d.cfg.filter.IsExcluded(containerName, imageName, "") { - log.Printf("I! events from %s are skipped as the image is excluded for the event collection", containerName) + klog.Infof("events from %s are skipped as the image is excluded for the event collection", containerName) return nil, nil } @@ -117,7 +117,7 @@ func (d *DockerUtil) LatestContainerEvents(ctx context.Context, since time.Time) case msg := <-msgChan: event, err := d.processContainerEvent(ctx, msg) if err != nil { - log.Println("W! error parsing docker message: ", err) + klog.Warning("error parsing docker message: ", err) continue } else if event == nil { continue @@ -134,4 +134,4 @@ func (d *DockerUtil) LatestContainerEvents(ctx context.Context, since time.Time) } } } -} \ No newline at end of file +} diff --git a/logs/util/docker/event_stream.go b/logs/util/docker/event_stream.go index 71fe830df..edffaf6a5 100644 --- a/logs/util/docker/event_stream.go +++ b/logs/util/docker/event_stream.go @@ -10,12 +10,12 @@ package docker import ( "context" "io" - "log" "strconv" "time" "github.com/docker/docker/api/types/events" "github.com/docker/docker/api/types/filters" + "k8s.io/klog/v2" ) // // eventStreamState logic unit tested in event_stream_test.go @@ -108,10 +108,10 @@ CONNECT: // Outer loop handles re-connecting in case the docker daemon closes th case err := <-errs: if err == io.EOF { // Silently ignore io.EOF that happens on http connection reset - log.Println("D! Got EOF, re-connecting") + klog.V(1).Info("Got EOF, re-connecting") } else { // Else, let's wait 10 seconds and try reconnecting - log.Println("W! Got error from docker, waiting for 10 seconds: ", err) + klog.Warning("Got error from docker, waiting for 10 seconds: ", err) time.Sleep(10 * time.Second) } cancelFunc() @@ -120,7 +120,7 @@ CONNECT: // Outer loop handles re-connecting in case the docker daemon closes th latestTimestamp = msg.Time event, err := d.processContainerEvent(ctx, msg) if err != nil { - log.Println("D! Skipping event: ", err) + klog.V(1).Info("Skipping event: ", err) continue } if event == nil { @@ -136,4 +136,4 @@ CONNECT: // Outer loop handles re-connecting in case the docker daemon closes th cancelFunc() close(sub.errorChan) close(sub.eventChan) -} \ No newline at end of file +} diff --git a/logs/util/docker/global.go b/logs/util/docker/global.go index c0c5dabe0..d94abb487 100644 --- a/logs/util/docker/global.go +++ b/logs/util/docker/global.go @@ -9,12 +9,12 @@ package docker import ( "context" - "log" "sync" "time" "flashcat.cloud/categraf/logs/util/containers" "flashcat.cloud/categraf/pkg/retry" + "k8s.io/klog/v2" ) var ( @@ -38,7 +38,7 @@ func GetDockerUtilWithRetrier() (*DockerUtil, *retry.Retrier) { }) } if err := globalDockerUtil.initRetry.TriggerRetry(); err != nil { - log.Printf("Docker init error: %s", err) + klog.Warningf("Docker init error: %v", err) return nil, &globalDockerUtil.initRetry } return globalDockerUtil, nil diff --git a/logs/util/docker/network.go b/logs/util/docker/network.go index 1bf98f43e..59364fbc6 100644 --- a/logs/util/docker/network.go +++ b/logs/util/docker/network.go @@ -12,7 +12,6 @@ import ( "encoding/binary" "errors" "fmt" - "log" "net" "sort" "strings" @@ -22,6 +21,7 @@ import ( "flashcat.cloud/categraf/logs/util/containers" "flashcat.cloud/categraf/logs/util/containers/providers" + "k8s.io/klog/v2" ) type dockerNetwork struct { @@ -52,14 +52,14 @@ func findDockerNetworks(containerID string, pid int, container types.Container) // Check the known network modes that require specific handling. // Other network modes will look at the docker NetworkSettings. if netMode == containers.HostNetworkMode { - log.Printf("Container %s is in network host mode, its network metrics are for the whole host", containerID) + klog.Infof("Container %s is in network host mode, its network metrics are for the whole host", containerID) return []dockerNetwork{hostNetwork} } else if netMode == containers.NoneNetworkMode { - log.Printf("Container %s is in network mode 'none', we will collect metrics for the whole host", containerID) + klog.Infof("Container %s is in network mode 'none', we will collect metrics for the whole host", containerID) return []dockerNetwork{hostNetwork} } else if strings.HasPrefix(netMode, "container:") { netContainerID := strings.TrimPrefix(netMode, "container:") - log.Printf("Container %s uses the network namespace of container:%s", containerID, netContainerID) + klog.Infof("Container %s uses the network namespace of container:%s", containerID, netContainerID) return []dockerNetwork{{routingContainerID: netContainerID}} } @@ -67,7 +67,7 @@ func findDockerNetworks(containerID string, pid int, container types.Container) // not provide the network settings in container inspect. netSettings := container.NetworkSettings if netSettings == nil || netSettings.Networks == nil || len(netSettings.Networks) == 0 { - log.Println("No network settings available from docker, defaulting to host network") + klog.Warning("No network settings available from docker, defaulting to host network") return []dockerNetwork{hostNetwork} } @@ -75,7 +75,7 @@ func findDockerNetworks(containerID string, pid int, container types.Container) interfaces := make(map[string]uint64) for netName, netConf := range netSettings.Networks { if netName == "host" { - log.Printf("Container %s is in network host mode, its network metrics are for the whole host", containerID) + klog.Infof("Container %s is in network host mode, its network metrics are for the whole host", containerID) return []dockerNetwork{hostNetwork} } @@ -85,13 +85,13 @@ func findDockerNetworks(containerID string, pid int, container types.Container) if strings.Contains(ipString, "/") { ip, _, err = net.ParseCIDR(ipString) if err != nil { - log.Printf("Malformed IP %s for container id %s: %s, skipping", ipString, containerID, err) + klog.Warningf("Malformed IP %s for container id %s: %v, skipping", ipString, containerID, err) continue } } else { ip = net.ParseIP(ipString) if ip == nil { - log.Printf("Malformed IP %s for container id %s: %s, skipping", ipString, containerID, err) + klog.Warningf("Malformed IP %s for container id %s: %v, skipping", ipString, containerID, err) continue } } @@ -102,7 +102,7 @@ func findDockerNetworks(containerID string, pid int, container types.Container) destinations, err := providers.ContainerImpl().DetectNetworkDestinations(pid) if err != nil { - log.Printf("Cannot list interfaces for container id %s: %s, skipping", containerID, err) + klog.Warningf("Cannot list interfaces for container id %s: %v, skipping", containerID, err) return nil } @@ -132,7 +132,7 @@ func resolveDockerNetworks(containerNetworks map[string][]dockerNetwork) { if cnw, ok := containerNetworks[nw.routingContainerID]; ok { containerNetworks[cid] = cnw } else { - log.Printf("Unable to resolve network for c:%s that uses namespace of c:%s", cid, nw.routingContainerID) + klog.Warningf("Unable to resolve network for c:%s that uses namespace of c:%s", cid, nw.routingContainerID) containerNetworks[cid] = nil } } diff --git a/logs/util/docker/rancher.go b/logs/util/docker/rancher.go index 2333871d4..08b56c8dd 100644 --- a/logs/util/docker/rancher.go +++ b/logs/util/docker/rancher.go @@ -8,8 +8,9 @@ package docker import ( - "log" "net" + + "k8s.io/klog/v2" ) const rancherIPLabel = "io.rancher.container.ip" @@ -21,7 +22,7 @@ func FindRancherIPInLabels(labels map[string]string) (string, bool) { if found { ipv4Addr, _, err := net.ParseCIDR(cidr) if err != nil { - log.Printf("error while retrieving Rancher IP: %q is not valid", cidr) + klog.Warningf("error while retrieving Rancher IP: %q is not valid", cidr) return "", false } return ipv4Addr.String(), true diff --git a/logs/util/docker/storage.go b/logs/util/docker/storage.go index 8ffba4ffd..3cfe9ff5c 100644 --- a/logs/util/docker/storage.go +++ b/logs/util/docker/storage.go @@ -11,11 +11,12 @@ import ( "errors" "fmt" "github.com/docker/docker/api/types/system" - "log" "math" "regexp" "strconv" "strings" + + "k8s.io/klog/v2" ) var ( @@ -55,7 +56,7 @@ func (s *StorageStats) GetPercentUsed() float64 { total := s.Total if s.Total != nil && s.Used != nil && s.Free != nil { if *s.Total < *s.Used+*s.Free { - log.Println("total lower than free+used, re-computing total") + klog.Info("total lower than free+used, re-computing total") totalValue := *s.Used + *s.Free total = &totalValue } @@ -88,12 +89,12 @@ func parseStorageStatsFromInfo(info system.Info) ([]*StorageStats, error) { valueString := entry[1] fields := strings.Fields(key) if len(fields) != 3 || strings.ToLower(fields[1]) != "space" { - log.Println("ignoring invalid storage stat: ", key) + klog.Warning("ignoring invalid storage stat: ", key) continue } valueInt, err := parseDiskQuantity(valueString) if err != nil { - log.Printf("ignoring invalid value %s for stat %s: %s", valueString, key, err) + klog.Warningf("ignoring invalid value %s for stat %s: %v", valueString, key, err) continue } storageType := strings.ToLower(fields[0]) @@ -135,4 +136,4 @@ func parseDiskQuantity(text string) (uint64, error) { } return uint64(value * float64(multi)), nil -} \ No newline at end of file +} diff --git a/logs/util/kubernetes/kubelet/containers.go b/logs/util/kubernetes/kubelet/containers.go index 4839ecac1..c88697fb5 100644 --- a/logs/util/kubernetes/kubelet/containers.go +++ b/logs/util/kubernetes/kubelet/containers.go @@ -10,13 +10,13 @@ package kubelet import ( "context" "fmt" - "log" "net" "time" "flashcat.cloud/categraf/logs/util/containers" "flashcat.cloud/categraf/logs/util/containers/providers" "flashcat.cloud/categraf/pkg/kubernetes" + "k8s.io/klog/v2" ) // ListContainers lists all non-excluded running containers, and retrieves their performance metrics @@ -40,7 +40,7 @@ func (ku *KubeUtil) ListContainers(ctx context.Context) ([]*containers.Container } container, err := parseContainerInPod(c, pod) if err != nil { - log.Printf("Cannot parse container %s in pod %s: %s", c.ID, pod.Metadata.Name, err) + klog.Warningf("Cannot parse container %s in pod %s: %v", c.ID, pod.Metadata.Name, err) continue } if container == nil { @@ -48,7 +48,7 @@ func (ku *KubeUtil) ListContainers(ctx context.Context) ([]*containers.Container continue } if !providers.ContainerImpl().ContainerExists(container.ID) { - log.Printf("No ContainerImplementation found for container %s in pod %s, skipping", container.ID, pod.Metadata.Name) + klog.Warningf("No ContainerImplementation found for container %s in pod %s, skipping", container.ID, pod.Metadata.Name) continue } ctrList = append(ctrList, container) @@ -76,7 +76,7 @@ func (ku *KubeUtil) getContainerDetails(ctn *containers.Container) { var err error ctn.StartedAt, err = providers.ContainerImpl().GetContainerStartTime(ctn.ID) if err != nil { - log.Printf("ContainerImplementation cannot get StartTime for container %s, err: %s", ctn.ID[:12], err) + klog.Warningf("ContainerImplementation cannot get StartTime for container %s, err: %v", ctn.ID[:12], err) return } } @@ -99,7 +99,7 @@ func parseContainerInPod(status kubernetes.ContainerStatus, pod *kubernetes.Pod) switch { case status.State.Waiting != nil: // We don't display waiting containers - log.Printf("Skipping waiting container %s", c.ID) + klog.V(1).Infof("Skipping waiting container %s", c.ID) return nil, nil case status.State.Running != nil: c.State = containers.ContainerRunningState @@ -124,12 +124,12 @@ func parseContainerNetworkAddresses(status kubernetes.ContainerStatus, pod *kube addrList := []containers.NetworkAddress{} podIP := net.ParseIP(pod.Status.PodIP) if podIP == nil { - log.Printf("Unable to parse pod IP: %v for pod: %s", pod.Status.PodIP, pod.Metadata.Name) + klog.Warningf("Unable to parse pod IP: %v for pod: %s", pod.Status.PodIP, pod.Metadata.Name) return addrList } hostIP := net.ParseIP(pod.Status.HostIP) if hostIP == nil { - log.Printf("Unable to parse host IP: %v for pod: %s", pod.Status.HostIP, pod.Metadata.Name) + klog.Warningf("Unable to parse host IP: %v for pod: %s", pod.Status.HostIP, pod.Metadata.Name) return addrList } // Look for the ports in container spec diff --git a/logs/util/kubernetes/kubelet/kubelet.go b/logs/util/kubernetes/kubelet/kubelet.go index b5da08bc0..d16cf09ca 100644 --- a/logs/util/kubernetes/kubelet/kubelet.go +++ b/logs/util/kubernetes/kubelet/kubelet.go @@ -10,7 +10,6 @@ package kubelet import ( "context" "fmt" - "log" "net/http" "strings" "sync" @@ -23,6 +22,7 @@ import ( "flashcat.cloud/categraf/pkg/cache" "flashcat.cloud/categraf/pkg/kubernetes" "flashcat.cloud/categraf/pkg/retry" + "k8s.io/klog/v2" ) const ( @@ -127,7 +127,7 @@ func GetKubeUtilWithRetrier() (KubeUtilInterface, *retry.Retrier) { } err := globalKubeUtil.initRetry.TriggerRetry() if err != nil { - log.Printf("Kube util init error: %s", err) + klog.Warningf("Kube util init error: %v", err) return nil, &globalKubeUtil.initRetry } return globalKubeUtil, nil @@ -186,7 +186,7 @@ func (ku *KubeUtil) GetLocalPodList(ctx context.Context) ([]*kubernetes.Pod, err if cached, hit := cache.Cache.Get(podListCacheKey); hit { pods, ok = cached.(kubernetes.PodList) if !ok { - log.Printf("Invalid pod list cache format, forcing a cache miss") + klog.Warning("Invalid pod list cache format, forcing a cache miss") } else { return pods.Items, nil } @@ -215,7 +215,7 @@ func (ku *KubeUtil) GetLocalPodList(ctx context.Context) ([]*kubernetes.Pod, err pod.Status.AllContainers = allContainers if !ku.filterPod(pod) { if util.Debug() { - log.Printf("D! filter include, pod name: %s, pod namespace: %s. pod image:[%v]", pod.Metadata.Name, pod.Metadata.Namespace, pod.Spec.Containers) + klog.V(1).Infof("filter include, pod name: %s, pod namespace: %s. pod image:[%v]", pod.Metadata.Name, pod.Metadata.Namespace, pod.Spec.Containers) } tmpSlice = append(tmpSlice, pod) } @@ -233,7 +233,7 @@ func (ku *KubeUtil) filterPod(pod *kubernetes.Pod) bool { for _, c := range pod.Status.GetAllContainers() { if ku.filter.IsExcluded(c.Name, c.Image, pod.Metadata.Namespace) { if util.Debug() { - log.Printf("D! container name:%s image:%s, ns:%s, exclude:true", c.Name, c.Image, pod.Metadata.Namespace) + klog.V(1).Infof("container name:%s image:%s, ns:%s, exclude:true", c.Name, c.Image, pod.Metadata.Namespace) } return true } @@ -263,7 +263,7 @@ func (ku *KubeUtil) GetPodForContainerID(ctx context.Context, containerID string // Retry with cache invalidation if err != nil && errors.IsNotFound(err) { - log.Printf("Cannot get container %q: %s, retrying without cache...", containerID, err) + klog.Warningf("Cannot get container %q: %v, retrying without cache...", containerID, err) pods, err = ku.ForceGetLocalPodList(ctx) if err != nil { return nil, err @@ -277,7 +277,7 @@ func (ku *KubeUtil) GetPodForContainerID(ctx context.Context, containerID string // On some kubelet versions, containers can take up to a second to // register in the podlist, retry a few times before failing if ku.waitOnMissingContainer == 0 { - log.Printf("Still cannot get container %q, wait disabled", containerID) + klog.Warningf("Still cannot get container %q, wait disabled", containerID) return pod, err } timeout := time.NewTimer(ku.waitOnMissingContainer) @@ -285,7 +285,7 @@ func (ku *KubeUtil) GetPodForContainerID(ctx context.Context, containerID string retryTicker := time.NewTicker(250 * time.Millisecond) defer retryTicker.Stop() for { - log.Printf("Still cannot get container %q: %s, retrying in 250ms", containerID, err) + klog.Warningf("Still cannot get container %q: %v, retrying in 250ms", containerID, err) select { case <-retryTicker.C: pods, err = ku.ForceGetLocalPodList(ctx) @@ -359,7 +359,7 @@ func (ku *KubeUtil) GetPodFromUID(ctx context.Context, podUID string) (*kubernet return pod, nil } } - log.Printf("cannot get the pod uid %q: %s, retrying without cache...", podUID, err) + klog.Warningf("cannot get the pod uid %q: %v, retrying without cache...", podUID, err) pods, err = ku.ForceGetLocalPodList(ctx) if err != nil { diff --git a/logs/util/kubernetes/kubelet/kubelet_client.go b/logs/util/kubernetes/kubelet/kubelet_client.go index f14c30cdb..6a3ef6597 100644 --- a/logs/util/kubernetes/kubelet/kubelet_client.go +++ b/logs/util/kubernetes/kubelet/kubelet_client.go @@ -14,7 +14,6 @@ import ( "expvar" "fmt" "io" - "log" "net/http" "os" "strconv" @@ -24,6 +23,7 @@ import ( coreconfig "flashcat.cloud/categraf/config" "flashcat.cloud/categraf/logs/util" "flashcat.cloud/categraf/logs/util/kubernetes" + "k8s.io/klog/v2" ) var ( @@ -124,19 +124,19 @@ func (kc *kubeletClient) query(ctx context.Context, path string) ([]byte, int, e response, err := kc.client.Do(req) kubeletExpVar.Add(1) if err != nil { - log.Printf("Cannot request %s: %s", req.URL.String(), err) + klog.Warningf("Cannot request %s: %v", req.URL.String(), err) return nil, 0, err } defer response.Body.Close() b, err := io.ReadAll(response.Body) if err != nil { - log.Printf("Fail to read request %s body: %s", req.URL.String(), err) + klog.Warningf("Fail to read request %s body: %v", req.URL.String(), err) return nil, 0, err } if util.Debug() { - log.Printf("Successfully queried %s, status code: %d, body len: %d", req.URL.String(), response.StatusCode, len(b)) + klog.V(1).Infof("Successfully queried %s, status code: %d, body len: %d", req.URL.String(), response.StatusCode, len(b)) } return b, response.StatusCode, nil } @@ -211,7 +211,7 @@ func getKubeletClient(ctx context.Context) (*kubeletClient, error) { potentialHosts = &connectionInfo{ hostnames: []string{apiServerHost}, } - log.Printf("EKS on Fargate mode detected, will proxy calls to the Kubelet through the APIServer at %s:%d%s", apiServerHost, kubeletHTTPSPort, kubeletPathPrefix) + klog.Infof("EKS on Fargate mode detected, will proxy calls to the Kubelet through the APIServer at %s:%d%s", apiServerHost, kubeletHTTPSPort, kubeletPathPrefix) } else { return nil, errors.New("kubelet proxy mode enabled but nodename is empty - unable to query") } @@ -225,7 +225,7 @@ func getKubeletClient(ctx context.Context) (*kubeletClient, error) { if kubeletHTTPSPort > 0 { httpsErr = checkKubeletConnection(ctx, "https", kubeletHTTPSPort, kubeletPathPrefix, potentialHosts, &clientConfig) if httpsErr != nil { - log.Println("Impossible to reach Kubelet through HTTPS") + klog.Warning("Impossible to reach Kubelet through HTTPS") if kubeletHTTPPort <= 0 { return nil, httpsErr } @@ -239,12 +239,12 @@ func getKubeletClient(ctx context.Context) (*kubeletClient, error) { if kubeletHTTPPort > 0 { httpErr = checkKubeletConnection(ctx, "http", kubeletHTTPPort, kubeletPathPrefix, potentialHosts, &clientConfig) if httpErr != nil { - log.Println("Impossible to reach Kubelet through HTTP") + klog.Warning("Impossible to reach Kubelet through HTTP") return nil, fmt.Errorf("impossible to reach Kubelet with host: %s. Please check if your setup requires kubelet_tls_verify = false. Activate debug logs to see all attempts made", kubeletHost) } if httpsErr != nil { - log.Println("Unable to access Kubelet through HTTPS - Using HTTP connection instead. Please check if your setup requires kubelet_tls_verify = false") + klog.Warning("Unable to access Kubelet through HTTPS - Using HTTP connection instead. Please check if your setup requires kubelet_tls_verify = false") } return newForConfig(clientConfig, kubeletTimeout) @@ -257,16 +257,16 @@ func checkKubeletConnection(ctx context.Context, scheme string, port int, prefix var err error var kubeClient *kubeletClient - log.Printf("Trying to reach Kubelet with scheme: %s", scheme) + klog.V(1).Infof("Trying to reach Kubelet with scheme: %s", scheme) clientConfig.scheme = scheme for _, ip := range hosts.ips { clientConfig.baseURL = fmt.Sprintf("%s:%d", ip, port) - log.Printf("Trying to reach Kubelet at: %s", clientConfig.baseURL) + klog.V(1).Infof("Trying to reach Kubelet at: %s", clientConfig.baseURL) kubeClient, err = newForConfig(*clientConfig, time.Second) if err != nil { - log.Printf("Failed to create Kubelet client for host: %s - error: %v", clientConfig.baseURL, err) + klog.Warningf("Failed to create Kubelet client for host: %s - error: %v", clientConfig.baseURL, err) continue } @@ -276,17 +276,17 @@ func checkKubeletConnection(ctx context.Context, scheme string, port int, prefix continue } - log.Printf("Successful configuration found for Kubelet, using URL: %s", kubeClient.kubeletURL) + klog.Infof("Successful configuration found for Kubelet, using URL: %s", kubeClient.kubeletURL) return nil } for _, host := range hosts.hostnames { clientConfig.baseURL = fmt.Sprintf("%s:%d%s", host, port, prefix) - log.Printf("Trying to reach Kubelet at: %s", clientConfig.baseURL) + klog.V(1).Infof("Trying to reach Kubelet at: %s", clientConfig.baseURL) kubeClient, err = newForConfig(*clientConfig, time.Second) if err != nil { - log.Printf("Failed to create Kubelet client for host: %s - error: %v", clientConfig.baseURL, err) + klog.Warningf("Failed to create Kubelet client for host: %s - error: %v", clientConfig.baseURL, err) continue } @@ -296,7 +296,7 @@ func checkKubeletConnection(ctx context.Context, scheme string, port int, prefix continue } - log.Printf("Successful configuration found for Kubelet, using URL: %s", kubeClient.kubeletURL) + klog.Infof("Successful configuration found for Kubelet, using URL: %s", kubeClient.kubeletURL) return nil } @@ -306,10 +306,10 @@ func checkKubeletConnection(ctx context.Context, scheme string, port int, prefix func logConnectionError(clientConfig *kubeletClientConfig, err error) { switch { case strings.Contains(err.Error(), "x509: certificate is valid for"): - log.Printf(`Invalid x509 settings, the kubelet server certificate is not valid for this subject alternative name: %s, %v, Please check the SAN of the kubelet server certificate with "openssl x509 -in ${KUBELET_CERTIFICATE} -text -noout". `, clientConfig.baseURL, err) + klog.Warningf(`Invalid x509 settings, the kubelet server certificate is not valid for this subject alternative name: %s, %v, Please check the SAN of the kubelet server certificate with "openssl x509 -in ${KUBELET_CERTIFICATE} -text -noout". `, clientConfig.baseURL, err) case strings.Contains(err.Error(), "x509: certificate signed by unknown authority"): - log.Printf(`The kubelet server certificate is signed by unknown authority, the current cacert is %s. Is the kubelet issuing self-signed certificates? Please validate the kubelet certificate with "openssl verify -CAfile %s ${KUBELET_CERTIFICATE}" to avoid this error: %v`, clientConfig.caPath, clientConfig.caPath, err) + klog.Warningf(`The kubelet server certificate is signed by unknown authority, the current cacert is %s. Is the kubelet issuing self-signed certificates? Please validate the kubelet certificate with "openssl verify -CAfile %s ${KUBELET_CERTIFICATE}" to avoid this error: %v`, clientConfig.caPath, clientConfig.caPath, err) default: - log.Printf("Failed to reach Kubelet at: %s - error: %v", clientConfig.baseURL, err) + klog.Warningf("Failed to reach Kubelet at: %s - error: %v", clientConfig.baseURL, err) } } diff --git a/logs/util/kubernetes/kubelet/kubelet_hosts.go b/logs/util/kubernetes/kubelet/kubelet_hosts.go index e7b953798..2c8e08d90 100644 --- a/logs/util/kubernetes/kubelet/kubelet_hosts.go +++ b/logs/util/kubernetes/kubelet/kubelet_hosts.go @@ -9,11 +9,11 @@ package kubelet import ( "context" - "log" "net" "time" "flashcat.cloud/categraf/logs/util/docker" + "k8s.io/klog/v2" ) // connectionInfo contains potential kubelet's ips and hostnames @@ -31,13 +31,13 @@ func getPotentialKubeletHosts(kubeletHost string) *connectionInfo { configIps, configHostnames := getKubeletHostFromConfig(ctx, kubeletHost) hosts.ips = append(hosts.ips, configIps...) hosts.hostnames = append(hosts.hostnames, configHostnames...) - log.Printf("Got potential kubelet connection info from config, ips: %v, hostnames: %v", configIps, configHostnames) + klog.V(1).Infof("Got potential kubelet connection info from config, ips: %v, hostnames: %v", configIps, configHostnames) } dockerIps, dockerHostnames := getKubeletHostFromDocker(ctx) hosts.ips = append(hosts.ips, dockerIps...) hosts.hostnames = append(hosts.hostnames, dockerHostnames...) - log.Printf("Got potential kubelet connection info from docker, ips: %v, hostnames: %v", dockerIps, dockerHostnames) + klog.V(1).Infof("Got potential kubelet connection info from docker, ips: %v, hostnames: %v", dockerIps, dockerHostnames) dedupeConnectionInfo(&hosts) @@ -48,32 +48,32 @@ func getKubeletHostFromConfig(ctx context.Context, kubeletHost string) ([]string var ips []string var hostnames []string if kubeletHost == "" { - log.Printf("kubernetes_kubelet_host is not set") + klog.V(1).Info("kubernetes_kubelet_host is not set") return ips, hostnames } - log.Printf("Trying to parse kubernetes_kubelet_host: %s", kubeletHost) + klog.V(1).Infof("Trying to parse kubernetes_kubelet_host: %s", kubeletHost) kubeletIP := net.ParseIP(kubeletHost) if kubeletIP == nil { - log.Printf("Parsing kubernetes_kubelet_host: %s is a hostname, cached, trying to resolve it to ip...", kubeletHost) + klog.V(1).Infof("Parsing kubernetes_kubelet_host: %s is a hostname, cached, trying to resolve it to ip...", kubeletHost) hostnames = append(hostnames, kubeletHost) ipAddrs, err := net.DefaultResolver.LookupIPAddr(ctx, kubeletHost) if err != nil { - log.Printf("Cannot LookupIP hostname %s: %v", kubeletHost, err) + klog.Warningf("Cannot LookupIP hostname %s: %v", kubeletHost, err) } else { - log.Printf("kubernetes_kubelet_host: %s is resolved to: %v", kubeletHost, ipAddrs) + klog.V(1).Infof("kubernetes_kubelet_host: %s is resolved to: %v", kubeletHost, ipAddrs) for _, ipAddr := range ipAddrs { ips = append(ips, ipAddr.IP.String()) } } } else { - log.Printf("Parsed kubernetes_kubelet_host: %s is an address: %v, cached, trying to resolve it to hostname", kubeletHost, kubeletIP) + klog.V(1).Infof("Parsed kubernetes_kubelet_host: %s is an address: %v, cached, trying to resolve it to hostname", kubeletHost, kubeletIP) ips = append(ips, kubeletIP.String()) addrs, err := net.DefaultResolver.LookupAddr(ctx, kubeletHost) if err != nil { - log.Printf("Cannot LookupHost ip %s: %v", kubeletHost, err) + klog.Warningf("Cannot LookupHost ip %s: %v", kubeletHost, err) } else { - log.Printf("kubernetes_kubelet_host: %s is resolved to: %v", kubeletHost, addrs) + klog.V(1).Infof("kubernetes_kubelet_host: %s is resolved to: %v", kubeletHost, addrs) for _, addr := range addrs { hostnames = append(hostnames, addr) } @@ -88,17 +88,17 @@ func getKubeletHostFromDocker(ctx context.Context) ([]string, []string) { var hostnames []string dockerHost, err := docker.HostnameProvider(ctx, nil) if err != nil { - log.Printf("unable to get hostname from docker, make sure to set the kubernetes_kubelet_host option: %s", err) + klog.Warningf("unable to get hostname from docker, make sure to set the kubernetes_kubelet_host option: %v", err) return ips, hostnames } - log.Printf("Trying to resolve host name %s provided by docker to ip...", dockerHost) + klog.V(1).Infof("Trying to resolve host name %s provided by docker to ip...", dockerHost) hostnames = append(hostnames, dockerHost) ipAddrs, err := net.DefaultResolver.LookupIPAddr(ctx, dockerHost) if err != nil { - log.Printf("Cannot resolve host name %s, cached, provided by docker to ip: %s", dockerHost, err) + klog.Warningf("Cannot resolve host name %s, cached, provided by docker to ip: %v", dockerHost, err) } else { - log.Printf("Resolved host name %s provided by docker to %v", dockerHost, ipAddrs) + klog.V(1).Infof("Resolved host name %s provided by docker to %v", dockerHost, ipAddrs) for _, ipAddr := range ipAddrs { ips = append(ips, ipAddr.IP.String()) } diff --git a/logs/util/kubernetes/kubelet/podwatcher.go b/logs/util/kubernetes/kubelet/podwatcher.go index f0caf69fb..f76c0a005 100644 --- a/logs/util/kubernetes/kubelet/podwatcher.go +++ b/logs/util/kubernetes/kubelet/podwatcher.go @@ -10,13 +10,13 @@ package kubelet import ( "context" "hash/fnv" - "log" "sort" "strconv" "sync" "time" "flashcat.cloud/categraf/pkg/kubernetes" + "k8s.io/klog/v2" ) const unreadinessTimeout = 30 * time.Second @@ -153,7 +153,7 @@ func (w *PodWatcher) computeChanges(podList []*kubernetes.Pod) ([]*kubernetes.Po updatedPods = append(updatedPods, pod) } } - log.Printf("Found %d changed pods out of %d", len(updatedPods), len(podList)) + klog.V(1).Infof("Found %d changed pods out of %d", len(updatedPods), len(podList)) return updatedPods, nil } diff --git a/logs/util/kubernetes/tags/builder.go b/logs/util/kubernetes/tags/builder.go index c3a33f250..670c615e6 100644 --- a/logs/util/kubernetes/tags/builder.go +++ b/logs/util/kubernetes/tags/builder.go @@ -8,8 +8,9 @@ package tags import ( - "log" "strings" + + "k8s.io/klog/v2" ) // newTagListBuilder returns a tagListBuilder. @@ -42,7 +43,7 @@ func (tlb *tagListBuilder) tags() []string { // It returns an empty string if one of the arguments is empty. func (tlb *tagListBuilder) buildTag(k, v string) string { if k == "" || v == "" { - log.Printf("Cannot build tag with empty key or value: key %q - value %q", k, v) + klog.Warningf("Cannot build tag with empty key or value: key %q - value %q", k, v) return "" } diff --git a/main.go b/main.go index 47059d278..7238a4ab7 100644 --- a/main.go +++ b/main.go @@ -3,11 +3,11 @@ package main import ( "flag" "fmt" - "log" _ "net/http/pprof" "os" "os/signal" "path/filepath" + "runtime" "strings" "syscall" @@ -15,7 +15,7 @@ import ( "github.com/kardianos/service" "github.com/toolkits/pkg/net/tcpx" "github.com/toolkits/pkg/runner" - "gopkg.in/natefinch/lumberjack.v2" + "k8s.io/klog/v2" "flashcat.cloud/categraf/agent" agentInstall "flashcat.cloud/categraf/agent/install" @@ -23,6 +23,7 @@ import ( "flashcat.cloud/categraf/api" "flashcat.cloud/categraf/config" "flashcat.cloud/categraf/heartbeat" + "flashcat.cloud/categraf/pkg/logging" "flashcat.cloud/categraf/pkg/osx" "flashcat.cloud/categraf/writer" ) @@ -47,36 +48,62 @@ var ( ) func init() { + logging.RegisterFlags(flag.CommandLine) + // change to current dir var err error if appPath, err = winsvc.GetAppPath(); err != nil { - log.Fatal(err) + fmt.Fprintln(os.Stderr, err) + os.Exit(1) } if err := os.Chdir(filepath.Dir(appPath)); err != nil { - log.Fatal(err) + fmt.Fprintln(os.Stderr, err) + os.Exit(1) } } func initLog(output string) { - switch { - case output == "stdout": - log.SetOutput(os.Stdout) - case output == "stderr": - log.SetOutput(os.Stderr) - case len(output) != 0: - log.SetOutput(&lumberjack.Logger{ - Filename: output, - MaxSize: config.Config.Log.MaxSize, - MaxAge: config.Config.Log.MaxAge, - MaxBackups: config.Config.Log.MaxBackups, - LocalTime: config.Config.Log.LocalTime, - Compress: config.Config.Log.Compress, - }) - default: - log.SetOutput(os.Stdout) + if output == "" { + output = config.Config.Log.FileName + if config.Config.Log.FileName == "stdout" || config.Config.Log.FileName == "stderr" || config.Config.Log.FileName == "" { + if runtime.GOOS == "windows" && !winsvc.IsAnInteractiveSession() { + output = "categraf.log" + } + } + } + + if err := logging.Configure( + output, + config.Config.Log.MaxSize, + config.Config.Log.MaxAge, + config.Config.Log.MaxBackups, + config.Config.Log.LocalTime, + config.Config.Log.Compress, + config.Config.DebugMode, + config.Config.DebugLevel, + ); err != nil { + fmt.Fprintln(os.Stderr, err) + os.Exit(1) + } +} + +func initServiceCommandLog() { + if err := logging.Configure("stderr", 0, 0, 0, false, false, *debugMode, *debugLevel); err != nil { + fmt.Fprintln(os.Stderr, err) + os.Exit(1) } } +func flagWasSet(name string) bool { + set := false + flag.CommandLine.Visit(func(f *flag.Flag) { + if f.Name == name { + set = true + } + }) + return set +} + func main() { flag.Parse() @@ -84,19 +111,35 @@ func main() { fmt.Println(config.Version) os.Exit(0) } + if *install || *remove || *start || *stop || *status || *update { - err := serviceProcess() - if err != nil { - log.Println("E!", err) + initServiceCommandLog() + defer logging.Sync() + + if err := serviceProcess(); err != nil { + klog.ErrorS(err, "service command failed") } return } // init configs - if err := config.InitConfig(*configDir, *debugLevel, *debugMode, *testMode, *interval, *inputFilters); err != nil { - log.Fatalln("F! failed to init config:", err) + if err := config.InitConfig( + *configDir, + *debugLevel, + *debugMode, + *testMode, + flagWasSet("debug-level"), + flagWasSet("debug"), + *interval, + *inputFilters, + ); err != nil { + fmt.Fprintf(os.Stderr, "failed to init config: %v\n", err) + os.Exit(1) } + initLog("") + defer logging.Sync() + doOSsvc() printEnv() @@ -108,15 +151,18 @@ func main() { tcpx.WaitHosts() ag, err := agent.NewAgent() if err != nil { - fmt.Println("F! failed to init agent:", err) - os.Exit(-1) + klog.ErrorS(err, "failed to init agent") + logging.Sync() + os.Exit(1) } runAgent(ag) } func initWriters() { if err := writer.InitWriters(); err != nil { - log.Fatalln("F! failed to init writer:", err) + klog.ErrorS(err, "failed to init writer") + logging.Sync() + os.Exit(1) } } @@ -131,10 +177,10 @@ EXIT: sig := <-sc switch sig { case syscall.SIGQUIT, syscall.SIGTERM, syscall.SIGINT: - log.Println("I! received signal:", sig.String()) + klog.InfoS("received signal", "signal", sig.String()) break EXIT case syscall.SIGHUP: - log.Println("I! received signal:", sig.String()) + klog.InfoS("received signal", "signal", sig.String()) ag.Reload() case syscall.SIGPIPE: // https://pkg.go.dev/os/signal#hdr-SIGPIPE @@ -143,15 +189,13 @@ EXIT: } ag.Stop() - log.Println("I! exited") + logging.Sync() + klog.InfoS("exited") } func printEnv() { runner.Init() - log.Println("I! runner.binarydir:", runner.Cwd) - log.Println("I! runner.hostname:", runner.Hostname) - log.Println("I! runner.fd_limits:", runner.FdLimits()) - log.Println("I! runner.vm_limits:", runner.VMLimits()) + klog.InfoS("runner environment", "binarydir", runner.Cwd, "hostname", runner.Hostname, "fd_limits", runner.FdLimits(), "vm_limits", runner.VMLimits()) } type program struct{} @@ -169,109 +213,109 @@ func serviceProcess() error { prg := &program{} s, err := service.New(prg, svcConfig) if err != nil { - fmt.Println("generate categraf service error " + err.Error()) + klog.ErrorS(err, "generate categraf service error") return nil } if *stop { if sts, err := s.Status(); err != nil { - log.Println("W! show categraf service status failed:", err) + klog.Warningf("show categraf service status failed: %v", err) } else { switch sts { case service.StatusRunning: - log.Println("I! categraf service status: running") + klog.InfoS("categraf service status", "status", "running") case service.StatusStopped: - log.Println("I! categraf service status: stopped") + klog.InfoS("categraf service status", "status", "stopped") default: - log.Println("I! categraf service status: unknown") + klog.InfoS("categraf service status", "status", "unknown") } } if err := s.Stop(); err != nil { - log.Println("E! stop categraf service failed:", err) + klog.ErrorS(err, "stop categraf service failed") } else { - log.Println("I! stop categraf service ok") + klog.InfoS("stop categraf service ok") } return nil } if *remove { if sts, err := s.Status(); err != nil { - log.Println("W! show categraf service status failed:", err) + klog.Warningf("show categraf service status failed: %v", err) } else { switch sts { case service.StatusRunning: - log.Println("I! categraf service status: running") + klog.InfoS("categraf service status", "status", "running") case service.StatusStopped: - log.Println("I! categraf service status: stopped") + klog.InfoS("categraf service status", "status", "stopped") default: - log.Println("I! categraf service status: unknown") + klog.InfoS("categraf service status", "status", "unknown") } } if err := s.Stop(); err != nil { - log.Println("W! stop categraf service failed:", err) + klog.ErrorS(err, "stop categraf service failed") } else { - log.Println("I! stop categraf service ok") + klog.InfoS("stop categraf service ok") } if err := s.Uninstall(); err != nil { - log.Println("E! remove categraf service failed:", err) + klog.ErrorS(err, "remove categraf service failed") } else { - log.Println("I! remove categraf service ok") + klog.InfoS("remove categraf service ok") } return nil } if *install { if sts, err := s.Status(); err != nil { - log.Println("W! show categraf service status failed:", err) + klog.Warningf("show categraf service status failed: %v", err) } else { switch sts { case service.StatusRunning: - log.Println("I! categraf service status: running") + klog.InfoS("categraf service status", "status", "running") case service.StatusStopped: - log.Println("I! categraf service status: stopped") + klog.InfoS("categraf service status", "status", "stopped") default: - log.Println("I! categraf service status: unknown") + klog.InfoS("categraf service status", "status", "unknown") } } if err := s.Install(); err != nil { - log.Println("E! install categraf service failed:", err) + klog.ErrorS(err, "install categraf service failed") } else { - log.Println("I! install categraf service ok") + klog.InfoS("install categraf service ok") } return nil } if *start { if sts, err := s.Status(); err != nil { - log.Println("W! show categraf service status failed:", err) + klog.Warningf("show categraf service status failed: %v", err) } else { switch sts { case service.StatusRunning: - log.Println("I! categraf service status: running") + klog.InfoS("categraf service status", "status", "running") case service.StatusStopped: - log.Println("I! categraf service status: stopped") + klog.InfoS("categraf service status", "status", "stopped") default: - log.Println("I! categraf service status: unknown") + klog.InfoS("categraf service status", "status", "unknown") } } if err := s.Start(); err != nil { - log.Println("E! start categraf service failed:", err) + klog.ErrorS(err, "start categraf service failed") } else { - log.Println("I! start categraf service ok") + klog.InfoS("start categraf service ok") } return nil } if *status { if sts, err := s.Status(); err != nil { - log.Println("E! show categraf service status failed:", err) + klog.ErrorS(err, "show categraf service status failed") } else { switch sts { case service.StatusRunning: - log.Println("I! show categraf service status: running") + klog.InfoS("show categraf service status", "status", "running") case service.StatusStopped: - log.Println("I! show categraf service status: stopped") + klog.InfoS("show categraf service status", "status", "stopped") default: - log.Println("I! show categraf service status: unknown") + klog.InfoS("show categraf service status", "status", "unknown") } } @@ -283,30 +327,30 @@ func serviceProcess() error { } if sts, err := s.Status(); err != nil { if strings.Contains(err.Error(), "not installed") { - log.Println("E! update only support mode that running in service mode") + klog.Warningf("update only support mode that running in service mode") } return nil } else { switch sts { case service.StatusRunning: - log.Println("I! categraf service status: running, version:", config.Version) + klog.InfoS("categraf service status", "status", "running", "version", config.Version) case service.StatusStopped: - log.Println("I! categraf service status: stopped, version:", config.Version) + klog.InfoS("categraf service status", "status", "stopped", "version", config.Version) default: - log.Println("I! categraf service status: unknown, version:", config.Version) + klog.InfoS("categraf service status", "status", "unknown", "version", config.Version) } } err := agentUpdate.Update(*updateFile) if err != nil { - log.Println("E! update categraf failed:", err) + klog.ErrorS(err, "update categraf failed") return nil } err = s.Restart() if err != nil { - log.Println("E! restart categraf failed:", err) + klog.ErrorS(err, "restart categraf failed") return nil } - log.Println("I! update categraf success") + klog.InfoS("update categraf success") } return nil } diff --git a/main_posix.go b/main_posix.go index c5c24154f..a97976c17 100644 --- a/main_posix.go +++ b/main_posix.go @@ -3,20 +3,18 @@ package main import ( - "log" "os" "os/signal" "syscall" "golang.org/x/sys/unix" + "k8s.io/klog/v2" "flashcat.cloud/categraf/agent" - "flashcat.cloud/categraf/config" "flashcat.cloud/categraf/pkg/pprof" ) func runAgent(ag *agent.Agent) { - initLog(config.Config.Log.FileName) ag.Start() go profile() go reapDaemon() @@ -85,11 +83,11 @@ func reapDaemon() { case unix.SIGCHLD: exits, err := reap() if err != nil { - log.Printf("E! reaping children failed: %v", err) + klog.ErrorS(err, "reaping children failed") continue } for _, e := range exits { - log.Printf("I! reaped pid: %d, status: %d", e.pid, e.status) + klog.InfoS("reaped child process", "pid", e.pid, "status", e.status) } } } diff --git a/main_windows.go b/main_windows.go index 1ceb8ae54..de3014ea9 100644 --- a/main_windows.go +++ b/main_windows.go @@ -5,7 +5,6 @@ package main import ( "flag" "fmt" - "log" "os" "path/filepath" "runtime" @@ -13,9 +12,11 @@ import ( "time" "github.com/chai2010/winsvc" + "k8s.io/klog/v2" "flashcat.cloud/categraf/agent" "flashcat.cloud/categraf/config" + "flashcat.cloud/categraf/pkg/logging" "flashcat.cloud/categraf/pkg/pprof" ) @@ -31,15 +32,10 @@ var ( func runAgent(ag *agent.Agent) { if !winsvc.IsAnInteractiveSession() { - if config.Config.Log.FileName == "stdout" || config.Config.Log.FileName == "stderr" || - config.Config.Log.FileName == "" { - initLog("categraf.log") - } else { - initLog(config.Config.Log.FileName) - } - if err := winsvc.RunAsService(*flagWinSvcName, ag.Start, ag.Stop, false); err != nil { - log.Fatalln("F! failed to run windows service:", err) + klog.ErrorS(err, "failed to run windows service") + logging.Sync() + os.Exit(1) } return } @@ -53,7 +49,9 @@ func doOSsvc() { // install service if *flagWinSvcInstall { if err := winsvc.InstallService(appPath, *flagWinSvcName, *flagWinSvcDesc); err != nil { - log.Fatalln("F! failed to install service:", *flagWinSvcName, "error:", err) + klog.ErrorS(err, "failed to install service", "service", *flagWinSvcName) + logging.Sync() + os.Exit(1) } fmt.Println("done") os.Exit(0) @@ -62,7 +60,9 @@ func doOSsvc() { // uninstall service if *flagWinSvcUninstall { if err := winsvc.RemoveService(*flagWinSvcName); err != nil { - log.Fatalln("F! failed to uninstall service:", *flagWinSvcName, "error:", err) + klog.ErrorS(err, "failed to uninstall service", "service", *flagWinSvcName) + logging.Sync() + os.Exit(1) } fmt.Println("done") os.Exit(0) @@ -71,7 +71,9 @@ func doOSsvc() { // start service if *flagWinSvcStart { if err := winsvc.StartService(*flagWinSvcName); err != nil { - log.Fatalln("F! failed to start service:", *flagWinSvcName, "error:", err) + klog.ErrorS(err, "failed to start service", "service", *flagWinSvcName) + logging.Sync() + os.Exit(1) } fmt.Println("done") os.Exit(0) @@ -80,7 +82,9 @@ func doOSsvc() { // stop service if *flagWinSvcStop && runtime.GOOS == "windows" { if err := winsvc.StopService(*flagWinSvcName); err != nil { - log.Fatalln("F! failed to stop service:", *flagWinSvcName, "error:", err) + klog.ErrorS(err, "failed to stop service", "service", *flagWinSvcName) + logging.Sync() + os.Exit(1) } fmt.Println("done") os.Exit(0) diff --git a/parser/influx/parser.go b/parser/influx/parser.go index f75d68d76..774dd182f 100644 --- a/parser/influx/parser.go +++ b/parser/influx/parser.go @@ -1,13 +1,13 @@ package influx import ( - "log" "strings" "time" "flashcat.cloud/categraf/types" "flashcat.cloud/categraf/types/metric" "github.com/influxdata/line-protocol/v2/lineprotocol" + "k8s.io/klog/v2" ) // Parser is an InfluxDB Line Protocol parser that implements the @@ -34,7 +34,7 @@ func (p *Parser) Parse(input []byte, slist *types.SampleList) error { for decoder.Next() { m, err := nextMetric(decoder, p.precision, p.defaultTime) if err != nil { - log.Println("E! failed to parse influx line:", string(input), err) + klog.ErrorS(err, "failed to parse influx line", "input", string(input)) continue } metrics = append(metrics, m) diff --git a/parser/prometheus/parser.go b/parser/prometheus/parser.go index b0ff5676a..143afb7de 100644 --- a/parser/prometheus/parser.go +++ b/parser/prometheus/parser.go @@ -3,12 +3,12 @@ package prometheus import ( "bytes" "io" - "log" "math" "mime" "net/http" dto "github.com/prometheus/client_model/go" + "k8s.io/klog/v2" "flashcat.cloud/categraf/pkg/filter" util "flashcat.cloud/categraf/pkg/metrics" @@ -158,7 +158,7 @@ func (p *Parser) Parse(buf []byte, slist *types.SampleList) error { } if err := p.parse(reader, slist); err != nil { - log.Println("E! parse metrics failed, error:", err) + klog.ErrorS(err, "parse metrics failed") } } } diff --git a/pkg/aop/logger.go b/pkg/aop/logger.go index 57d664385..d74954653 100644 --- a/pkg/aop/logger.go +++ b/pkg/aop/logger.go @@ -3,13 +3,13 @@ package aop import ( "fmt" "io" - "log" "net/http" "os" "time" "github.com/gin-gonic/gin" "github.com/mattn/go-isatty" + "k8s.io/klog/v2" ) type consoleColorModeValue int @@ -275,7 +275,7 @@ func LoggerWithConfig(conf LoggerConfig) gin.HandlerFunc { param.Path = path // fmt.Fprint(out, formatter(param)) - log.Println("I!", formatter(param)) + klog.Info(formatter(param)) // if c.Request.Method != "GET" { // logger.Debug(readBody(rdr1)) diff --git a/pkg/httpx/client.go b/pkg/httpx/client.go index d70e91230..1ca97527f 100644 --- a/pkg/httpx/client.go +++ b/pkg/httpx/client.go @@ -7,12 +7,13 @@ package httpx import ( "crypto/tls" - "log" "net" "net/http" "net/url" "sync" "time" + + "k8s.io/klog/v2" ) // ResetClient wraps (http.Client).Do and resets the underlying connections at the @@ -62,7 +63,7 @@ func (c *ResetClient) checkReset() { return } - log.Println("W! Resetting HTTP client's connections") + klog.Warning("resetting HTTP client's connections") c.lastReset = time.Now() // Close idle connections on underlying client. Safe to do while other goroutines use the client. // This is a best effort: if other goroutine(s) are currently using the client, diff --git a/pkg/httpx/transport.go b/pkg/httpx/transport.go index 7719c34d9..64f1a4b54 100644 --- a/pkg/httpx/transport.go +++ b/pkg/httpx/transport.go @@ -7,12 +7,13 @@ package httpx import ( "crypto/tls" - "log" "net" "net/http" "net/url" "sync" "time" + + "k8s.io/klog/v2" ) var ( @@ -41,7 +42,7 @@ func warnOnce(warnMap map[string]bool, key string, format string, params ...inte defer NoProxyMapMutex.Unlock() if _, ok := warnMap[key]; !ok { warnMap[key] = true - log.Printf(format, params...) + klog.Warningf(format, params...) } } diff --git a/pkg/kubernetes/pod.go b/pkg/kubernetes/pod.go index 5587a48c6..7e1a8fb58 100644 --- a/pkg/kubernetes/pod.go +++ b/pkg/kubernetes/pod.go @@ -7,7 +7,8 @@ package kubernetes import ( "encoding/json" - "log" + + "k8s.io/klog/v2" ) type creatorRef struct { @@ -35,11 +36,11 @@ func (p *Pod) Owners() []PodOwner { // Error handling if err != nil { - log.Printf("Cannot parse created-by field for pod %q: %s", p.Metadata.Name, err) + klog.Warningf("cannot parse created-by field for pod %q: %s", p.Metadata.Name, err) return nil } if ref.Kind != "SerializedReference" { - log.Printf("Cannot parse created-by field for pod %q: unknown kind %q", p.Metadata.Name, ref.Kind) + klog.Warningf("cannot parse created-by field for pod %q: unknown kind %q", p.Metadata.Name, ref.Kind) return nil } diff --git a/pkg/logging/logging.go b/pkg/logging/logging.go new file mode 100644 index 000000000..503c16b42 --- /dev/null +++ b/pkg/logging/logging.go @@ -0,0 +1,75 @@ +package logging + +import ( + "flag" + "io" + stdlog "log" + "os" + "strconv" + "time" + + "gopkg.in/natefinch/lumberjack.v2" + "k8s.io/klog/v2" +) + +// RegisterFlags registers klog flags on the provided flag set. +func RegisterFlags(fs *flag.FlagSet) { + klog.InitFlags(fs) +} + +// Configure initializes logging with the configured output target and klog flags. +func Configure(output string, maxSize, maxAge, maxBackups int, localTime, compress, debug bool, debugLevel int) error { + return configureWithWriter(newWriter(output, maxSize, maxAge, maxBackups, localTime, compress), flag.CommandLine, debug, debugLevel) +} + +func configureWithWriter(writer io.Writer, fs *flag.FlagSet, debug bool, debugLevel int) error { + verbosity := debugLevel + if debug && verbosity == 0 { + verbosity = 1 + } + + sets := []struct { + name string + value string + }{ + {name: "logtostderr", value: "false"}, + {name: "alsologtostderr", value: "false"}, + {name: "stderrthreshold", value: "FATAL"}, + {name: "one_output", value: "true"}, + {name: "v", value: strconv.Itoa(verbosity)}, + } + for _, set := range sets { + if err := fs.Set(set.name, set.value); err != nil { + return err + } + } + + stdlog.SetFlags(0) + klog.SetOutput(writer) + klog.CopyStandardLogTo("INFO") + klog.StartFlushDaemon(5 * time.Second) + return nil +} + +func newWriter(output string, maxSize, maxAge, maxBackups int, localTime, compress bool) io.Writer { + switch output { + case "", "stdout": + return os.Stdout + case "stderr": + return os.Stderr + default: + return &lumberjack.Logger{ + Filename: output, + MaxSize: maxSize, + MaxAge: maxAge, + MaxBackups: maxBackups, + LocalTime: localTime, + Compress: compress, + } + } +} + +// Sync flushes pending log I/O. +func Sync() { + klog.Flush() +} diff --git a/pkg/logging/logging_test.go b/pkg/logging/logging_test.go new file mode 100644 index 000000000..c8c396e3e --- /dev/null +++ b/pkg/logging/logging_test.go @@ -0,0 +1,116 @@ +package logging + +import ( + "bytes" + "flag" + "log" + "strings" + "testing" + + "k8s.io/klog/v2" +) + +func TestConfigureMapsDebugToVerbosity(t *testing.T) { + state := klog.CaptureState() + defer state.Restore() + oldOutput := log.Writer() + oldFlags := log.Flags() + oldPrefix := log.Prefix() + defer log.SetOutput(oldOutput) + defer log.SetFlags(oldFlags) + defer log.SetPrefix(oldPrefix) + + fs := flag.NewFlagSet("logging", flag.ContinueOnError) + RegisterFlags(fs) + if err := fs.Parse(nil); err != nil { + t.Fatalf("parse flags: %v", err) + } + + var buf bytes.Buffer + if err := configureWithWriter(&buf, fs, true, 0); err != nil { + t.Fatalf("configureWithWriter: %v", err) + } + + klog.V(1).InfoS("debug enabled") + klog.Flush() + + if !strings.Contains(buf.String(), "debug enabled") { + t.Fatalf("expected buffer to contain debug message, got %q", buf.String()) + } +} + +func TestConfigureBridgesStandardLibraryLog(t *testing.T) { + state := klog.CaptureState() + defer state.Restore() + oldOutput := log.Writer() + oldFlags := log.Flags() + oldPrefix := log.Prefix() + defer log.SetOutput(oldOutput) + defer log.SetFlags(oldFlags) + defer log.SetPrefix(oldPrefix) + + fs := flag.NewFlagSet("logging", flag.ContinueOnError) + RegisterFlags(fs) + if err := fs.Parse(nil); err != nil { + t.Fatalf("parse flags: %v", err) + } + + var buf bytes.Buffer + if err := configureWithWriter(&buf, fs, false, 0); err != nil { + t.Fatalf("configureWithWriter: %v", err) + } + + logger := log.New(log.Writer(), "", log.Flags()) + logger.Println("legacy bridge message") + klog.Flush() + + if !strings.Contains(buf.String(), "legacy bridge message") { + t.Fatalf("expected buffer to contain bridged message, got %q", buf.String()) + } +} + +func TestConfigureUsesConfiguredVerbosity(t *testing.T) { + state := klog.CaptureState() + defer state.Restore() + + fs := flag.NewFlagSet("logging", flag.ContinueOnError) + RegisterFlags(fs) + if err := fs.Parse(nil); err != nil { + t.Fatalf("parse flags: %v", err) + } + + var buf bytes.Buffer + if err := configureWithWriter(&buf, fs, false, 2); err != nil { + t.Fatalf("configureWithWriter: %v", err) + } + + klog.V(2).InfoS("verbosity two enabled") + klog.Flush() + + if !strings.Contains(buf.String(), "verbosity two enabled") { + t.Fatalf("expected buffer to contain v=2 message, got %q", buf.String()) + } +} + +func TestConfigureDoesNotExceedConfiguredVerbosity(t *testing.T) { + state := klog.CaptureState() + defer state.Restore() + + fs := flag.NewFlagSet("logging", flag.ContinueOnError) + RegisterFlags(fs) + if err := fs.Parse(nil); err != nil { + t.Fatalf("parse flags: %v", err) + } + + var buf bytes.Buffer + if err := configureWithWriter(&buf, fs, false, 1); err != nil { + t.Fatalf("configureWithWriter: %v", err) + } + + klog.V(2).InfoS("verbosity two should stay hidden") + klog.Flush() + + if strings.Contains(buf.String(), "verbosity two should stay hidden") { + t.Fatalf("expected buffer to exclude v=2 message, got %q", buf.String()) + } +} diff --git a/pkg/logging/repository_policy_test.go b/pkg/logging/repository_policy_test.go new file mode 100644 index 000000000..5cd5f594e --- /dev/null +++ b/pkg/logging/repository_policy_test.go @@ -0,0 +1,295 @@ +package logging + +import ( + "os" + "path/filepath" + "regexp" + "testing" +) + +var forbiddenStdLog = regexp.MustCompile(`\blog\.(Print|Println|Printf|Panic|Panicf|Panicln|Fatal|Fatalf|Fatalln)\b`) +var forbiddenDebugBranch = regexp.MustCompile(`if\s+(config\.Config\.DebugMode|Config\.DebugMode)\s*\{`) + +func TestCoreRuntimeDoesNotUseStandardLogOrDebugBranches(t *testing.T) { + repoRoot := filepath.Clean(filepath.Join("..", "..")) + files := []string{ + "main.go", + "main_posix.go", + "main_windows.go", + "agent/agent.go", + "agent/ibex_agent.go", + "agent/logs_agent.go", + "agent/metrics_agent.go", + "agent/metrics_reader.go", + "agent/prometheus_agent.go", + "agent/install/service_linux.go", + "agent/update/update_linux.go", + "agent/update/update_windows.go", + "api/router_falcon.go", + "api/router_opentsdb.go", + "api/server.go", + "config/config.go", + "config/hostname.go", + "config/urllabel.go", + "ibex/heartbeat.go", + "ibex/task.go", + "ibex/tasks.go", + "ibex/client/cli.go", + "inputs/http_provider.go", + "inputs/collector.go", + "inputs/haproxy/haproxy.go", + "inputs/haproxy/exporter.go", + "inputs/http_response/http_response.go", + "inputs/sockstat/sockstat.go", + "inputs/self_metrics/metrics.go", + "inputs/system/system.go", + "inputs/disk/disk.go", + "inputs/cpu/cpu.go", + "inputs/cloudwatch/cloudwatch.go", + "inputs/kernel/kernel.go", + "inputs/ldap/ldap.go", + "inputs/conntrack/conntrack.go", + "inputs/diskio/diskio.go", + "inputs/ethtool/command_linux.go", + "inputs/mem/mem.go", + "inputs/net/net.go", + "inputs/kernel_vmstat/kernel_vmstat.go", + "inputs/nats/nats.go", + "inputs/nfsclient/nfsclient.go", + "inputs/nsq/nsq.go", + "inputs/system/ps.go", + "inputs/ethtool/ethtool_notlinux.go", + "inputs/ethtool/ethtool_linux.go", + "inputs/filecount/filecount.go", + "inputs/gnmi/gnmi.go", + "inputs/gnmi/handler.go", + "inputs/googlecloud/instances.go", + "inputs/greenplum/greenplum.go", + "inputs/jenkins/jenkins.go", + "inputs/kafka/kafka.go", + "inputs/provider_manager.go", + "inputs/redis/redis.go", + "inputs/redis_sentinel/redis_sentinel.go", + "inputs/snmp/table.go", + "inputs/snmp/health_check.go", + "inputs/snmp/instances.go", + "inputs/jolokia_agent/jolokia_agent.go", + "inputs/mongodb/mongodb.go", + "inputs/mongodb/mongodb_server.go", + "inputs/mysql/engine_innodb.go", + "inputs/mysql/global_status.go", + "inputs/mysql/global_variables.go", + "inputs/mysql/binlog.go", + "inputs/mysql/custom_queries.go", + "inputs/mysql/mysql.go", + "inputs/mysql/processlist.go", + "inputs/mysql/processlist_by_user.go", + "inputs/mysql/schema_size.go", + "inputs/mysql/slave_status.go", + "inputs/mysql/table_size.go", + "inputs/clickhouse/clickhouse.go", + "inputs/bind/bind.go", + "inputs/chrony/chrony.go", + "inputs/consul/consul.go", + "inputs/dns_query/dns_query.go", + "inputs/dmesg/dmesg.go", + "inputs/docker/docker.go", + "inputs/aliyun/cloud.go", + "inputs/aliyun/internal/manager/cms.go", + "inputs/amd_rocm_smi/amd_rocm_smi.go", + "inputs/appdynamics/instances.go", + "inputs/arp_packet/arp_packet.go", + "inputs/cadvisor/instances.go", + "inputs/dcgm/exporter.go", + "inputs/emc_unity/emc_unity.go", + "inputs/exec/exec.go", + "inputs/hadoop/hadoop.go", + "inputs/huatuo/huatuo.go", + "inputs/iptables/iptables.go", + "inputs/ipmi/instances.go", + "inputs/ipmi/exporter/collector_bmc.go", + "inputs/ipmi/exporter/collector_bmc_watchdog.go", + "inputs/ipmi/exporter/collector_chassis.go", + "inputs/ipmi/exporter/collector_dcmi.go", + "inputs/ipmi/exporter/collector_ipmi.go", + "inputs/ipmi/exporter/collector_notwindows.go", + "inputs/ipmi/exporter/collector_sel.go", + "inputs/ipmi/exporter/collector_sm_lan_mode.go", + "inputs/ipmi/exporter/freeipmi/freeipmi.go", + "inputs/kubernetes/kubernetes.go", + "inputs/linux_sysctl_fs/linux_sysctl_fs_linux.go", + "inputs/net_response/net_response.go", + "inputs/nginx/nginx.go", + "inputs/netstat_filter/netstat_filter.go", + "inputs/ipvs/ipvs_linux_amd64.go", + "inputs/ethtool/namespace_linux.go", + "inputs/jolokia/gatherer.go", + "inputs/jolokia_proxy/jolokia_proxy.go", + "inputs/redfish/redfish.go", + "inputs/tengine/tengine.go", + "inputs/tomcat/tomcat.go", + "inputs/ntp/ntp.go", + "inputs/nvidia_smi/builder.go", + "inputs/nvidia_smi/nvidia_smi.go", + "inputs/node_exporter/exporter.go", + "inputs/node_exporter/collector/buddyinfo.go", + "inputs/node_exporter/collector/collector.go", + "inputs/node_exporter/collector/cpu_linux.go", + "inputs/node_exporter/collector/diskstats_common.go", + "inputs/node_exporter/collector/diskstats_linux.go", + "inputs/node_exporter/collector/ethtool_linux.go", + "inputs/node_exporter/collector/filesystem_common.go", + "inputs/node_exporter/collector/netclass_rtnl_linux.go", + "inputs/node_exporter/collector/netdev_common.go", + "inputs/node_exporter/collector/ntp.go", + "inputs/node_exporter/collector/perf_linux.go", + "inputs/node_exporter/collector/qdisc_linux.go", + "inputs/node_exporter/collector/runit.go", + "inputs/node_exporter/collector/supervisord.go", + "inputs/node_exporter/collector/systemd_linux.go", + "inputs/node_exporter/collector/textfile.go", + "inputs/oracle/oracle.go", + "inputs/phpfpm/phpfpm.go", + "inputs/ping/ping.go", + "inputs/ping/ping_notwindows.go", + "inputs/ping/ping_windows.go", + "inputs/postgresql/postgresql.go", + "inputs/prometheus/consul.go", + "inputs/prometheus/prometheus.go", + "inputs/processes/processes_notwindows.go", + "inputs/procstat/win_service_windows.go", + "inputs/procstat/procstat.go", + "inputs/nginx_upstream_check/nginx_upstream_check.go", + "inputs/netstat/netstat.go", + "inputs/netstat_filter/netstat_tcp.go", + "inputs/supervisor/supervisor.go", + "inputs/rocketmq_offset/rocketmq.go", + "inputs/rabbitmq/rabbitmq.go", + "inputs/snmp/netsnmp.go", + "inputs/smart/instances.go", + "inputs/sqlserver/sqlserver.go", + "inputs/switch_legacy/switch_legacy.go", + "inputs/systemd/systemd_linux.go", + "inputs/snmp_trap/snmp_trap.go", + "inputs/traffic_server/traffic_server.go", + "inputs/vsphere/finder.go", + "inputs/vsphere/client.go", + "inputs/vsphere/endpoint.go", + "inputs/vsphere/tscache.go", + "inputs/vsphere/vsphere.go", + "inputs/whois/whois.go", + "inputs/x509_cert/x509_cert.go", + "inputs/xskyapi/xskyapi.go", + "inputs/zookeeper/zookeeper.go", + "inputs/logstash/logstash.go", + "parser/influx/parser.go", + "parser/prometheus/parser.go", + "pkg/aop/logger.go", + "pkg/httpx/client.go", + "pkg/httpx/transport.go", + "pkg/kubernetes/pod.go", + "pkg/pprof/profile.go", + "pkg/snmp/translate.go", + "writer/writer.go", + "writer/writers.go", + "heartbeat/heartbeat.go", + } + + for _, rel := range files { + path := filepath.Join(repoRoot, rel) + content, err := os.ReadFile(path) + if err != nil { + t.Fatalf("read %s: %v", path, err) + } + if forbiddenStdLog.Match(content) { + t.Fatalf("%s still uses forbidden standard log calls", path) + } + if forbiddenDebugBranch.Match(content) { + t.Fatalf("%s still contains forbidden debug branch", path) + } + } +} + +func TestLoggingTestsDoNotUseDirectStandardLogCalls(t *testing.T) { + repoRoot := filepath.Clean(filepath.Join("..", "..")) + path := filepath.Join(repoRoot, "pkg/logging/logging_test.go") + + content, err := os.ReadFile(path) + if err != nil { + t.Fatalf("read %s: %v", path, err) + } + + if forbiddenStdLog.Match(content) { + t.Fatalf("%s still uses forbidden standard log calls", path) + } +} + +func TestNTPTestsDoNotUseDirectStandardLogCalls(t *testing.T) { + repoRoot := filepath.Clean(filepath.Join("..", "..")) + path := filepath.Join(repoRoot, "inputs/ntp/ntp_test.go") + + content, err := os.ReadFile(path) + if err != nil { + t.Fatalf("read %s: %v", path, err) + } + + if forbiddenStdLog.Match(content) { + t.Fatalf("%s still uses forbidden standard log calls", path) + } +} + +func TestElasticsearchTreeDoesNotUseStandardLogCalls(t *testing.T) { + repoRoot := filepath.Clean(filepath.Join("..", "..")) + checkGoTreeForForbiddenStdLog(t, filepath.Join(repoRoot, "inputs/elasticsearch")) +} + +func TestSNMPZabbixTreeDoesNotUseStandardLogCalls(t *testing.T) { + repoRoot := filepath.Clean(filepath.Join("..", "..")) + checkGoTreeForForbiddenStdLog(t, filepath.Join(repoRoot, "inputs/snmp_zabbix")) +} + +func TestMtailTreeDoesNotUseStandardLogCalls(t *testing.T) { + repoRoot := filepath.Clean(filepath.Join("..", "..")) + checkGoTreeForForbiddenStdLog(t, filepath.Join(repoRoot, "inputs/mtail")) +} + +func TestHeartbeatTreeDoesNotUseStandardLogCalls(t *testing.T) { + repoRoot := filepath.Clean(filepath.Join("..", "..")) + checkGoTreeForForbiddenStdLog(t, filepath.Join(repoRoot, "heartbeat")) +} + +func TestLogsTreeDoesNotUseStandardLogCalls(t *testing.T) { + repoRoot := filepath.Clean(filepath.Join("..", "..")) + checkGoTreeForForbiddenStdLog(t, filepath.Join(repoRoot, "logs")) +} + +func checkGoTreeForForbiddenStdLog(t *testing.T, root string) { + t.Helper() + + err := filepath.WalkDir(root, func(path string, d os.DirEntry, walkErr error) error { + if walkErr != nil { + return walkErr + } + if d.IsDir() { + return nil + } + if filepath.Ext(path) != ".go" { + return nil + } + if filepath.Base(path) == "README.md" { + return nil + } + + content, err := os.ReadFile(path) + if err != nil { + return err + } + if forbiddenStdLog.Match(content) { + t.Fatalf("%s still uses forbidden standard log calls", path) + } + return nil + }) + if err != nil { + t.Fatalf("walk %s: %v", root, err) + } +} diff --git a/pkg/pprof/profile.go b/pkg/pprof/profile.go index 242e5988b..fb4dddd6b 100644 --- a/pkg/pprof/profile.go +++ b/pkg/pprof/profile.go @@ -2,10 +2,11 @@ package pprof import ( "fmt" - "log" "net" "net/http" "sync/atomic" + + "k8s.io/klog/v2" ) var ( @@ -16,20 +17,20 @@ var ( func Go() { if !atomic.CompareAndSwapUint32(&pprof, 0, 1) { - log.Println("pprofile already started,", addr) + klog.InfoS("pprof already started", "address", addr) return } listener, err := net.Listen("tcp", "127.0.0.1:0") if err != nil { - log.Println(err) + klog.ErrorS(err, "failed to start pprof listener") return } addr = fmt.Sprintf("http://127.0.0.1:%d/debug/pprof", listener.Addr().(*net.TCPAddr).Port) - log.Printf("pprof started at %s", addr) + klog.InfoS("pprof started", "address", addr) err = http.Serve(listener, nil) if err != nil { - log.Println(err) + klog.ErrorS(err, "pprof server exited") return } } diff --git a/pkg/snmp/translate.go b/pkg/snmp/translate.go index 27a80f78d..1e0e78778 100644 --- a/pkg/snmp/translate.go +++ b/pkg/snmp/translate.go @@ -2,7 +2,6 @@ package snmp import ( "fmt" - "log" "os" "path/filepath" "strings" @@ -10,6 +9,7 @@ import ( "github.com/sleepinggenius2/gosmi" "github.com/sleepinggenius2/gosmi/types" + "k8s.io/klog/v2" ) // must init, append path for each directory, load module for every file @@ -54,34 +54,34 @@ func LoadMibsFromPath(paths []string, loader MibLoader) error { loader.appendPath(path) modules, err := os.ReadDir(path) if err != nil { - log.Printf("W! Can't read directory %v", modules) + klog.Warningf("can't read directory %s: %v", path, err) continue } for _, entry := range modules { info, err := entry.Info() if err != nil { - log.Printf("W! Couldn't get info for %v: %v", entry.Name(), err) + klog.Warningf("couldn't get info for %v: %v", entry.Name(), err) continue } if info.Mode()&os.ModeSymlink != 0 { symlink := filepath.Join(path, info.Name()) target, err := filepath.EvalSymlinks(symlink) if err != nil { - log.Printf("W! Couldn't evaluate symbolic links for %v: %v", symlink, err) + klog.Warningf("couldn't evaluate symbolic links for %v: %v", symlink, err) continue } // replace symlink's info with the target's info info, err = os.Lstat(target) if err != nil { - log.Printf("W! Couldn't stat target %v: %v", target, err) + klog.Warningf("couldn't stat target %v: %v", target, err) continue } } if info.Mode().IsRegular() { err := loader.loadModule(info.Name()) if err != nil { - log.Printf("W! Couldn't load module %v: %v", info.Name(), err) + klog.Warningf("couldn't load module %v: %v", info.Name(), err) continue } } @@ -107,9 +107,9 @@ func walkPaths(paths []string) ([]string, error) { err := filepath.Walk(mibPath, func(path string, info os.FileInfo, err error) error { if info == nil { - log.Println("W! No mibs found") + klog.Warning("no mibs found") if os.IsNotExist(err) { - log.Printf("W! MIB path doesn't exist: %q", mibPath) + klog.Warningf("MIB path doesn't exist: %q", mibPath) } else if err != nil { return err } @@ -119,11 +119,11 @@ func walkPaths(paths []string) ([]string, error) { if info.Mode()&os.ModeSymlink != 0 { target, err := filepath.EvalSymlinks(path) if err != nil { - log.Printf("W! Couldn't evaluate symbolic links for %v: %v", path, err) + klog.Warningf("couldn't evaluate symbolic links for %v: %v", path, err) } info, err = os.Lstat(target) if err != nil { - log.Printf("W! Couldn't stat target %v: %v", target, err) + klog.Warningf("couldn't stat target %v: %v", target, err) } path = target } diff --git a/writer/writer.go b/writer/writer.go index d1f943342..50df6239f 100644 --- a/writer/writer.go +++ b/writer/writer.go @@ -4,7 +4,6 @@ import ( "bytes" "context" "fmt" - "log" "net" "net/http" "strings" @@ -14,6 +13,7 @@ import ( "github.com/golang/snappy" "github.com/prometheus/client_golang/api" "github.com/prometheus/prometheus/prompb" + "k8s.io/klog/v2" "flashcat.cloud/categraf/config" ) @@ -67,20 +67,20 @@ func (w Writer) Write(items []prompb.TimeSeries) { data, err := proto.Marshal(req) if err != nil { - log.Println("W! marshal prom data to proto got error:", err, "data:", items) + klog.ErrorS(err, "marshal prom data to proto got error", "data", items) return } if err := w.post(snappy.Encode(nil, data)); err != nil { - log.Println("W! post to", w.Opts.Url, "got error:", err) - log.Println("W! example timeseries:", items[0].String()) + klog.ErrorS(err, "post remote write request got error", "url", w.Opts.Url) + klog.Warningf("example timeseries: %s", items[0].String()) } } func (w Writer) post(req []byte) error { httpReq, err := http.NewRequest("POST", w.Opts.Url, bytes.NewReader(req)) if err != nil { - log.Println("W! create remote write request got error:", err) + klog.ErrorS(err, "create remote write request got error", "url", w.Opts.Url) return err } @@ -102,7 +102,7 @@ func (w Writer) post(req []byte) error { resp, body, err := w.Client.Do(context.Background(), httpReq) if err != nil { - log.Println("W! push data with remote write request got error:", err, "response body:", string(body)) + klog.ErrorS(err, "push data with remote write request got error", "url", w.Opts.Url, "response_body", string(body)) return err } diff --git a/writer/writers.go b/writer/writers.go index 6ab3f0ec6..1a99dd7b9 100644 --- a/writer/writers.go +++ b/writer/writers.go @@ -2,13 +2,13 @@ package writer import ( "fmt" - "log" "sort" "strings" "sync" "time" "github.com/prometheus/prometheus/prompb" + "k8s.io/klog/v2" "flashcat.cloud/categraf/config" "flashcat.cloud/categraf/types" @@ -81,7 +81,7 @@ func WriteSamples(samples []*types.Sample) { printTestMetrics(samples) return } - if config.Config.DebugMode { + if klog.V(1).Enabled() { printTestMetrics(samples) } @@ -96,7 +96,7 @@ func WriteSamples(samples []*types.Sample) { success := writers.queue.PushFrontN(items) l := writers.queue.Len() if !success { - log.Printf("E! write %d samples failed, please increase queue size(%d)", len(items), l) + klog.Errorf("write %d samples failed, please increase queue size(%d)", len(items), l) } go snapshot(uint64(len(items)), uint64(l), success) } @@ -135,10 +135,7 @@ func WriteTimeSeries(timeSeries []prompb.TimeSeries) { }(key) } wg.Wait() - if config.Config.DebugMode { - log.Println("D!, write", len(timeSeries), "time series to all writers, cost:", - time.Since(now).Milliseconds(), "ms") - } + klog.V(1).InfoS("write time series to all writers", "count", len(timeSeries), "cost_ms", time.Since(now).Milliseconds()) } func printTestMetrics(samples []*types.Sample) {