Skip to content

Commit ec7733c

Browse files
authored
Merge pull request #9 from m-lab/convert-to-config
Move the config into the k8s deployment, where it belongs.
2 parents 207db96 + 247c731 commit ec7733c

7 files changed

Lines changed: 341 additions & 140 deletions

File tree

Dockerfile

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8,12 +8,8 @@ RUN go get \
88
./...
99

1010
FROM alpine:3.7
11+
# Add all binaries that we may want to run that are not in alpine by default.
1112
RUN apk add --no-cache lshw
1213
COPY --from=build /go/bin/nodeinfo /
1314
WORKDIR /
14-
# Run things once to verify that every command invoked can be invoked inside the container.
15-
RUN mkdir smoketest && /nodeinfo -smoketest -datadir smoketest && rm -Rf /smoketest
16-
# Remove the created directory to allow it to be a mountpoint when deployed.
17-
RUN rm -Rf /var/spool/nodeinfo
18-
# If we made it here, then everything works!
1915
ENTRYPOINT ["/nodeinfo"]

README.md

Lines changed: 59 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,65 @@ Available as a container in
1212
[measurementlab/nodeinfo](https://hub.docker.com/r/measurementlab/nodeinfo/) on
1313
Docker Hub.
1414

15-
# design
15+
## design
1616

17-
As simple as possible. This system is called `nodeinfo`. Every command produces its own type of data, and so is it own datatype. These two facts, together with [M-Lab's unified naming scheme for data](http://example.com), and the best practices for [Pusher](http://github.com/m-lab/pusher) mean that the directory structure for output is fully determined.
17+
As simple as possible. This system is called `nodeinfo`. Every command produces its own type of data, and so is it own datatype. These two facts, together with [M-Lab's unified naming scheme for data](http://example.com), and the best practices for [Pusher](http://github.com/m-lab/pusher) mean that the directory structure for output is fully determined.
1818

1919
This program calls a series of other programs, and directs the output of each call to the appropriate output file. The set of programs to call is currently hard-coded in the binary. If any of the commands run unsuccessfully, this crashes. Every command is rerun every hour on average, with some randomness. The inter-run times are drawn from the exponential distribution to try and make sure the resulting series of measurements has the [PASTA property](https://en.wikipedia.org/wiki/Arrival_theorem).
20+
21+
## example config file
22+
23+
```json
24+
[
25+
{
26+
"Dataype": "lshw",
27+
"Filename": "lshw.json",
28+
"Cmd": ["lshw", "-json"]
29+
},
30+
{
31+
"Dataype": "lspci",
32+
"Filename": "lspci.txt",
33+
"Cmd": ["lspci", "-mm", "-vv", "-k", "-nn"]
34+
},
35+
{
36+
"Dataype": "lsusb",
37+
"Filename": "lsusb.txt",
38+
"Cmd": ["lsusb", "-v"]
39+
},
40+
{
41+
"Dataype": "ip-address",
42+
"Filename": "ip-address.txt",
43+
"Cmd": ["ip", "address", "show"]
44+
},
45+
{
46+
"Dataype": "ip-route-4",
47+
"Filename": "ip-route-4.txt",
48+
"Cmd": ["ip", "-4", "route", "show"]
49+
},
50+
{
51+
"Dataype": "ip-route-6",
52+
"Filename": "ip-route-6.txt",
53+
"Cmd": ["ip", "-6", "route", "show"]
54+
},
55+
{
56+
"Dataype": "uname",
57+
"Filename": "uname.txt",
58+
"Cmd": ["uname", "-a"]
59+
},
60+
{
61+
"Dataype": "os-release",
62+
"Filename": "os-release.txt",
63+
"Cmd": ["cat", "/etc/os-release"]
64+
},
65+
{
66+
"Dataype": "bios_version",
67+
"Filename": "bios_version.txt",
68+
"Cmd": ["cat", "/sys/class/dmi/id/bios_version"]
69+
},
70+
{
71+
"Dataype": "chassis_serial",
72+
"Filename": "chassis_serial.txt",
73+
"Cmd": ["cat", "/sys/class/dmi/id/chassis_serial"]
74+
}
75+
]
76+
```

config/config.go

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
package config
2+
3+
import (
4+
"encoding/json"
5+
"io/ioutil"
6+
"log"
7+
8+
"github.com/m-lab/nodeinfo/data"
9+
"github.com/m-lab/nodeinfo/metrics"
10+
)
11+
12+
// Config contains the configuration of nodeinfo that is stored in a separate
13+
// config file.
14+
type Config interface {
15+
Reload() error
16+
Gatherers() []data.Gatherer
17+
}
18+
19+
// Create creates a new config based on the passed-in file name and contents. If
20+
// the file can't be read or parsed, then this will return a non-nil error.
21+
func Create(filename string) (Config, error) {
22+
c := &fileconfig{
23+
filename: filename,
24+
}
25+
err := c.Reload()
26+
return c, err
27+
}
28+
29+
// fileconfig contains the full runtime config of nodeinfo.
30+
type fileconfig struct {
31+
filename string
32+
gatherers []data.Gatherer
33+
}
34+
35+
// Reload the list of gatherers from the original config filename. Returns a
36+
// non-nil error if unsuccessful.
37+
func (c *fileconfig) Reload() error {
38+
metrics.ConfigLoadCount.Inc()
39+
contents, err := ioutil.ReadFile(c.filename)
40+
if err != nil {
41+
log.Println("Could not read file")
42+
return err
43+
}
44+
var g []data.Gatherer
45+
err = json.Unmarshal(contents, &g)
46+
if err != nil {
47+
log.Printf("Could not parse %q", c.filename)
48+
return err
49+
}
50+
c.gatherers = g
51+
metrics.ConfigLoadTime.SetToCurrentTime()
52+
return nil
53+
}
54+
55+
// Gatherers returns a slice of data gatherers. The backing storage for a given
56+
// slice should be immutable.
57+
func (c *fileconfig) Gatherers() []data.Gatherer {
58+
return c.gatherers
59+
}

config/config_test.go

Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
package config_test
2+
3+
import (
4+
"io/ioutil"
5+
"os"
6+
"reflect"
7+
"testing"
8+
9+
"github.com/m-lab/nodeinfo/config"
10+
"github.com/m-lab/nodeinfo/data"
11+
12+
"github.com/m-lab/go/rtx"
13+
)
14+
15+
func TestConfigCreationAndReload(t *testing.T) {
16+
dir, err := ioutil.TempDir("", "TestConfigCreation")
17+
rtx.Must(err, "Could not create tempdir")
18+
defer os.RemoveAll(dir)
19+
20+
filecontents := `[
21+
{
22+
"Datatype": "uname",
23+
"Filename": "uname.txt",
24+
"Cmd": ["uname", "-a"]
25+
},
26+
{
27+
"Datatype": "ifconfig",
28+
"Filename": "ifconfig.txt",
29+
"Cmd": ["ifconfig"]
30+
}
31+
]
32+
`
33+
expected := []data.Gatherer{
34+
{Datatype: "uname", Filename: "uname.txt", Cmd: []string{"uname", "-a"}},
35+
{Datatype: "ifconfig", Filename: "ifconfig.txt", Cmd: []string{"ifconfig"}},
36+
}
37+
rtx.Must(ioutil.WriteFile(dir+"/config.json", []byte(filecontents), 0666), "Could not write config")
38+
c, err := config.Create(dir + "/config.json")
39+
rtx.Must(err, "Could not read config.json")
40+
g := c.Gatherers()
41+
if !reflect.DeepEqual(g, expected) {
42+
t.Errorf("%v != %v", g, expected)
43+
}
44+
45+
filecontents2 := `[
46+
{
47+
"Datatype": "ls",
48+
"Filename": "ls.txt",
49+
"Cmd": ["ls", "-l"]
50+
}
51+
]
52+
`
53+
expected2 := []data.Gatherer{
54+
{Datatype: "ls", Filename: "ls.txt", Cmd: []string{"ls", "-l"}},
55+
}
56+
rtx.Must(ioutil.WriteFile(dir+"/config.json", []byte(filecontents2), 0666), "Could not write replacement config")
57+
rtx.Must(c.Reload(), "Could not reload config")
58+
g = c.Gatherers()
59+
if !reflect.DeepEqual(g, expected2) {
60+
t.Errorf("%v != %v", g, expected2)
61+
}
62+
rtx.Must(ioutil.WriteFile(dir+"/config.json", []byte("bad content"), 0666), "Could not write replacement config")
63+
if c.Reload() == nil {
64+
t.Error("We should not have been able to reload the config")
65+
}
66+
g = c.Gatherers()
67+
if !reflect.DeepEqual(g, expected2) {
68+
t.Errorf("%v != %v", g, expected2)
69+
}
70+
}
71+
72+
func TestConfigOnBadFile(t *testing.T) {
73+
_, err := config.Create("/this/file/does/not/exist")
74+
if err == nil {
75+
t.Error("This should not have succeeded")
76+
}
77+
}

main.go

Lines changed: 33 additions & 76 deletions
Original file line numberDiff line numberDiff line change
@@ -3,113 +3,70 @@
33
// small files, each with the output of "ifconfig" or "lshw" or another command
44
// like that. The hope is that by doing this, we will be able to track over
55
// time what hardware was installed, what software versions were running, and
6-
// how the network was configured on every node in the M-Lab fleet. Every time
7-
// we turn out to need a new small diagnostic command, that command should be
8-
// added to the list and a new image pushed.
6+
// how the network was configured on every node in the M-Lab fleet.
7+
//
8+
// nodeinfo reads the list of commands and datatypes in from a config file. It
9+
// rereads the config file every time it runs, to allow that file to be deployed
10+
// as a ConfigMap in kubernetes.
911
package main
1012

1113
import (
1214
"context"
1315
"flag"
1416
"log"
15-
"strings"
1617
"time"
1718

18-
"github.com/m-lab/go/prometheusx"
19-
2019
"github.com/m-lab/go/flagx"
2120
"github.com/m-lab/go/memoryless"
21+
"github.com/m-lab/go/prometheusx"
2222
"github.com/m-lab/go/rtx"
23-
24-
"github.com/m-lab/nodeinfo/data"
23+
"github.com/m-lab/nodeinfo/config"
24+
"github.com/m-lab/nodeinfo/metrics"
2525
)
2626

27+
// Command-line flags
2728
var (
28-
datadir = flag.String("datadir", "/var/spool/nodeinfo", "The root directory in which to put all produced data")
29-
once = flag.Bool("once", false, "Only gather data once")
30-
smoketest = flag.Bool("smoketest", false, "Gather every type of data once. Used to test that all data types can be gathered.")
31-
waittime = flag.Duration("wait", 1*time.Hour, "How long (in expectation) to wait between runs")
32-
datatypes = flagx.StringArray{}
33-
ctx, cancel = context.WithCancel(context.Background())
29+
datadir = flag.String("datadir", "/var/spool/nodeinfo", "The root directory in which to put all produced data")
30+
once = flag.Bool("once", false, "Only gather data once")
31+
smoketest = flag.Bool("smoketest", false, "Gather every type of data once. Used to test that all configured data types can be gathered.")
32+
waittime = flag.Duration("wait", 1*time.Hour, "How long (in expectation) to wait between runs")
33+
configFile = flag.String("config", "/etc/nodeinfo/config.json", "The name of the config file to load from disk.")
3434

35-
gatherers = map[string]data.Gatherer{
36-
"lshw": {
37-
Datatype: "lshw",
38-
Filename: "lshw.json",
39-
Cmd: []string{"lshw", "-json"},
40-
},
41-
"lspci": {
42-
Datatype: "lspci",
43-
Filename: "lspci.txt",
44-
Cmd: []string{"lspci", "-mm", "-vv", "-k", "-nn"},
45-
},
46-
"lsusb": {
47-
Datatype: "lsusb",
48-
Filename: "lsusb.txt",
49-
Cmd: []string{"lsusb", "-v"},
50-
},
51-
"ifconfig": {
52-
Datatype: "ifconfig",
53-
Filename: "ifconfig.txt",
54-
Cmd: []string{"ifconfig", "-a"},
55-
},
56-
"route-v4": {
57-
Datatype: "route",
58-
Filename: "route-ipv4.txt",
59-
Cmd: []string{"route", "-n", "-A", "inet"},
60-
},
61-
"route-v6": {
62-
Datatype: "route",
63-
Filename: "route-ipv6.txt",
64-
Cmd: []string{"route", "-n", "-A", "inet6"},
65-
},
66-
"uname": {
67-
Datatype: "uname",
68-
Filename: "uname.txt",
69-
Cmd: []string{"uname", "-a"},
70-
},
71-
}
72-
)
35+
// A context and associate cancellation function which, when called, should cause main to exit.
36+
mainCtx, mainCancel = context.WithCancel(context.Background())
7337

74-
func possibleTypes() []string {
75-
datatypes := []string{}
76-
for datatype := range gatherers {
77-
datatypes = append(datatypes, datatype)
78-
}
79-
return datatypes
80-
}
38+
// Contents of this should be filled in as part of parsing commandline flags.
39+
gatherers config.Config
40+
)
8141

8242
func init() {
8343
log.SetFlags(log.Lshortfile | log.LUTC | log.LstdFlags)
84-
85-
flag.Var(&datatypes, "datatype", "What datatype should be collected. This flag can be used multiple times. The set of possible datatypes is: {"+strings.Join(possibleTypes(), ", ")+"}")
8644
}
8745

8846
// Runs every data gatherer.
8947
func gather() {
48+
err := gatherers.Reload()
49+
if err != nil {
50+
metrics.ConfigLoadFailures.Inc()
51+
log.Println("Could not reload the config. Using old config.")
52+
}
9053
t := time.Now()
91-
for _, datatype := range datatypes {
92-
g, ok := gatherers[datatype]
93-
if ok {
94-
g.Gather(t, *datadir, *smoketest)
95-
} else {
96-
log.Println("Unknown datatype:", datatype)
97-
}
54+
for _, g := range gatherers.Gatherers() {
55+
g.Gather(t, *datadir, *smoketest)
9856
}
9957
}
10058

10159
func main() {
10260
flag.Parse()
103-
flagx.ArgsFromEnv(flag.CommandLine)
104-
if *smoketest {
105-
*once = true
106-
datatypes = possibleTypes()
107-
}
61+
rtx.Must(flagx.ArgsFromEnv(flag.CommandLine), "Could not parse args from environment")
10862

109-
srv := prometheusx.MustServeMetrics()
110-
defer srv.Close()
63+
metricSrv := prometheusx.MustServeMetrics()
64+
defer metricSrv.Shutdown(mainCtx)
11165

66+
var err error
67+
gatherers, err = config.Create(*configFile)
68+
rtx.Must(err, "Could not read config on the first try. Shutting down.")
11269
rtx.Must(
113-
memoryless.Run(ctx, gather, memoryless.Config{Expected: *waittime, Max: 4 * (*waittime), Once: *once}),
70+
memoryless.Run(mainCtx, gather, memoryless.Config{Expected: *waittime, Max: 4 * (*waittime), Once: *once || *smoketest}),
11471
"Bad time arguments.")
11572
}

0 commit comments

Comments
 (0)