|
3 | 3 | // small files, each with the output of "ifconfig" or "lshw" or another command |
4 | 4 | // like that. The hope is that by doing this, we will be able to track over |
5 | 5 | // time what hardware was installed, what software versions were running, and |
6 | | -// how the network was configured on every node in the M-Lab fleet. Every time |
7 | | -// we turn out to need a new small diagnostic command, that command should be |
8 | | -// added to the list and a new image pushed. |
| 6 | +// how the network was configured on every node in the M-Lab fleet. |
| 7 | +// |
| 8 | +// nodeinfo reads the list of commands and datatypes in from a config file. It |
| 9 | +// rereads the config file every time it runs, to allow that file to be deployed |
| 10 | +// as a ConfigMap in kubernetes. |
9 | 11 | package main |
10 | 12 |
|
11 | 13 | import ( |
12 | 14 | "context" |
13 | 15 | "flag" |
14 | 16 | "log" |
15 | | - "strings" |
16 | 17 | "time" |
17 | 18 |
|
18 | | - "github.com/m-lab/go/prometheusx" |
19 | | - |
20 | 19 | "github.com/m-lab/go/flagx" |
21 | 20 | "github.com/m-lab/go/memoryless" |
| 21 | + "github.com/m-lab/go/prometheusx" |
22 | 22 | "github.com/m-lab/go/rtx" |
23 | | - |
24 | | - "github.com/m-lab/nodeinfo/data" |
| 23 | + "github.com/m-lab/nodeinfo/config" |
| 24 | + "github.com/m-lab/nodeinfo/metrics" |
25 | 25 | ) |
26 | 26 |
|
| 27 | +// Command-line flags |
27 | 28 | var ( |
28 | | - datadir = flag.String("datadir", "/var/spool/nodeinfo", "The root directory in which to put all produced data") |
29 | | - once = flag.Bool("once", false, "Only gather data once") |
30 | | - smoketest = flag.Bool("smoketest", false, "Gather every type of data once. Used to test that all data types can be gathered.") |
31 | | - waittime = flag.Duration("wait", 1*time.Hour, "How long (in expectation) to wait between runs") |
32 | | - datatypes = flagx.StringArray{} |
33 | | - ctx, cancel = context.WithCancel(context.Background()) |
| 29 | + datadir = flag.String("datadir", "/var/spool/nodeinfo", "The root directory in which to put all produced data") |
| 30 | + once = flag.Bool("once", false, "Only gather data once") |
| 31 | + smoketest = flag.Bool("smoketest", false, "Gather every type of data once. Used to test that all configured data types can be gathered.") |
| 32 | + waittime = flag.Duration("wait", 1*time.Hour, "How long (in expectation) to wait between runs") |
| 33 | + configFile = flag.String("config", "/etc/nodeinfo/config.json", "The name of the config file to load from disk.") |
34 | 34 |
|
35 | | - gatherers = map[string]data.Gatherer{ |
36 | | - "lshw": { |
37 | | - Datatype: "lshw", |
38 | | - Filename: "lshw.json", |
39 | | - Cmd: []string{"lshw", "-json"}, |
40 | | - }, |
41 | | - "lspci": { |
42 | | - Datatype: "lspci", |
43 | | - Filename: "lspci.txt", |
44 | | - Cmd: []string{"lspci", "-mm", "-vv", "-k", "-nn"}, |
45 | | - }, |
46 | | - "lsusb": { |
47 | | - Datatype: "lsusb", |
48 | | - Filename: "lsusb.txt", |
49 | | - Cmd: []string{"lsusb", "-v"}, |
50 | | - }, |
51 | | - "ifconfig": { |
52 | | - Datatype: "ifconfig", |
53 | | - Filename: "ifconfig.txt", |
54 | | - Cmd: []string{"ifconfig", "-a"}, |
55 | | - }, |
56 | | - "route-v4": { |
57 | | - Datatype: "route", |
58 | | - Filename: "route-ipv4.txt", |
59 | | - Cmd: []string{"route", "-n", "-A", "inet"}, |
60 | | - }, |
61 | | - "route-v6": { |
62 | | - Datatype: "route", |
63 | | - Filename: "route-ipv6.txt", |
64 | | - Cmd: []string{"route", "-n", "-A", "inet6"}, |
65 | | - }, |
66 | | - "uname": { |
67 | | - Datatype: "uname", |
68 | | - Filename: "uname.txt", |
69 | | - Cmd: []string{"uname", "-a"}, |
70 | | - }, |
71 | | - } |
72 | | -) |
| 35 | + // A context and associate cancellation function which, when called, should cause main to exit. |
| 36 | + mainCtx, mainCancel = context.WithCancel(context.Background()) |
73 | 37 |
|
74 | | -func possibleTypes() []string { |
75 | | - datatypes := []string{} |
76 | | - for datatype := range gatherers { |
77 | | - datatypes = append(datatypes, datatype) |
78 | | - } |
79 | | - return datatypes |
80 | | -} |
| 38 | + // Contents of this should be filled in as part of parsing commandline flags. |
| 39 | + gatherers config.Config |
| 40 | +) |
81 | 41 |
|
82 | 42 | func init() { |
83 | 43 | log.SetFlags(log.Lshortfile | log.LUTC | log.LstdFlags) |
84 | | - |
85 | | - flag.Var(&datatypes, "datatype", "What datatype should be collected. This flag can be used multiple times. The set of possible datatypes is: {"+strings.Join(possibleTypes(), ", ")+"}") |
86 | 44 | } |
87 | 45 |
|
88 | 46 | // Runs every data gatherer. |
89 | 47 | func gather() { |
| 48 | + err := gatherers.Reload() |
| 49 | + if err != nil { |
| 50 | + metrics.ConfigLoadFailures.Inc() |
| 51 | + log.Println("Could not reload the config. Using old config.") |
| 52 | + } |
90 | 53 | t := time.Now() |
91 | | - for _, datatype := range datatypes { |
92 | | - g, ok := gatherers[datatype] |
93 | | - if ok { |
94 | | - g.Gather(t, *datadir, *smoketest) |
95 | | - } else { |
96 | | - log.Println("Unknown datatype:", datatype) |
97 | | - } |
| 54 | + for _, g := range gatherers.Gatherers() { |
| 55 | + g.Gather(t, *datadir, *smoketest) |
98 | 56 | } |
99 | 57 | } |
100 | 58 |
|
101 | 59 | func main() { |
102 | 60 | flag.Parse() |
103 | | - flagx.ArgsFromEnv(flag.CommandLine) |
104 | | - if *smoketest { |
105 | | - *once = true |
106 | | - datatypes = possibleTypes() |
107 | | - } |
| 61 | + rtx.Must(flagx.ArgsFromEnv(flag.CommandLine), "Could not parse args from environment") |
108 | 62 |
|
109 | | - srv := prometheusx.MustServeMetrics() |
110 | | - defer srv.Close() |
| 63 | + metricSrv := prometheusx.MustServeMetrics() |
| 64 | + defer metricSrv.Shutdown(mainCtx) |
111 | 65 |
|
| 66 | + var err error |
| 67 | + gatherers, err = config.Create(*configFile) |
| 68 | + rtx.Must(err, "Could not read config on the first try. Shutting down.") |
112 | 69 | rtx.Must( |
113 | | - memoryless.Run(ctx, gather, memoryless.Config{Expected: *waittime, Max: 4 * (*waittime), Once: *once}), |
| 70 | + memoryless.Run(mainCtx, gather, memoryless.Config{Expected: *waittime, Max: 4 * (*waittime), Once: *once || *smoketest}), |
114 | 71 | "Bad time arguments.") |
115 | 72 | } |
0 commit comments