diff --git a/README.md b/README.md index 0886b30..ce1f82d 100644 --- a/README.md +++ b/README.md @@ -78,6 +78,7 @@ This serves the embedded spec and Scalar reference at `http://localhost:8080`. H | `INFLUXDB_ORG` | InfluxDB organization name or email | | `INFLUXDB_BUCKET` | InfluxDB bucket name (`frog_fleet`) | | `API_KEY_DB_PATH` | Path to the SQLite file holding hashed API keys | +| `SENSORS_CACHE_TTL` | How long the `/sensors` list is cached (Go duration, e.g. `10m`; default `5m`) | ## API keys @@ -102,6 +103,7 @@ In production (on Fly.io), run the same subcommands against the deployed binary ```sh fly ssh console -C "/api keygen issue --owner researcher@uni.edu" +fly ssh console -C "/api keygen issue --owner you@example.com" fly ssh console -C "/api keygen list" fly ssh console -C "/api keygen revoke --id 7" ``` diff --git a/internal/sensors/cache.go b/internal/sensors/cache.go new file mode 100644 index 0000000..6e10349 --- /dev/null +++ b/internal/sensors/cache.go @@ -0,0 +1,88 @@ +package sensors + +import ( + "os" + "sync" + "time" +) + +// defaultCacheTTL is how long a fetched sensor list is served before it is +// refetched. The set of sensors changes slowly while the underlying InfluxDB +// schema query is expensive, so a multi-minute cache cuts query load sharply. +const defaultCacheTTL = 5 * time.Minute + +// cache memoizes the result of an expensive fetch for a TTL. It holds its lock +// across the fetch so that a burst of concurrent requests triggers at most one +// underlying query; the rest wait and share the result. +type cache struct { + ttl time.Duration + now func() time.Time + fetch func() ([]string, error) + + mu sync.Mutex + ids []string + expires time.Time + valid bool +} + +func newCache(ttl time.Duration, fetch func() ([]string, error)) *cache { + return &cache{ttl: ttl, now: time.Now, fetch: fetch} +} + +// get returns the cached IDs if they are still fresh, otherwise it refetches. +// Errors are not cached, so a failed fetch is retried on the next call. +func (c *cache) get() ([]string, error) { + c.mu.Lock() + defer c.mu.Unlock() + + if c.valid && c.now().Before(c.expires) { + return append([]string(nil), c.ids...), nil + } + + ids, err := c.fetch() + if err != nil { + return nil, err + } + + c.ids = ids + c.expires = c.now().Add(c.ttl) + c.valid = true + return append([]string(nil), ids...), nil +} + +// reset clears the cached value, forcing the next get to refetch. Used in tests. +func (c *cache) reset() { + c.mu.Lock() + defer c.mu.Unlock() + c.ids = nil + c.expires = time.Time{} + c.valid = false +} + +// sensorCache is the process-wide cache backing the /sensors handler. It is +// built lazily on first use so that cacheTTL reads SENSORS_CACHE_TTL after the +// .env file has been loaded, and so it picks up test overrides of fetchSensors. +var ( + sensorCacheOnce sync.Once + sensorCacheInst *cache +) + +func sensorCache() *cache { + sensorCacheOnce.Do(func() { + sensorCacheInst = newCache(cacheTTL(), func() ([]string, error) { + return fetchSensors() + }) + }) + return sensorCacheInst +} + +// cacheTTL resolves the cache lifetime from SENSORS_CACHE_TTL (a Go duration +// string such as "10m"), falling back to defaultCacheTTL when unset or invalid. +func cacheTTL() time.Duration { + if v := os.Getenv("SENSORS_CACHE_TTL"); v != "" { + if d, err := time.ParseDuration(v); err == nil && d > 0 { + return d + } + } + return defaultCacheTTL +} diff --git a/internal/sensors/cache_test.go b/internal/sensors/cache_test.go new file mode 100644 index 0000000..9e0f9b5 --- /dev/null +++ b/internal/sensors/cache_test.go @@ -0,0 +1,129 @@ +package sensors + +import ( + "errors" + "sync" + "testing" + "time" + + "github.com/stretchr/testify/require" +) + +func TestCache_ServesFromCacheWithinTTL(t *testing.T) { + var calls int + c := newCache(time.Minute, func() ([]string, error) { + calls++ + return []string{"frog-01"}, nil + }) + now := time.Unix(0, 0) + c.now = func() time.Time { return now } + + ids, err := c.get() + require.NoError(t, err) + require.Equal(t, []string{"frog-01"}, ids) + + now = now.Add(30 * time.Second) // still within the 1m TTL + ids, err = c.get() + require.NoError(t, err) + require.Equal(t, []string{"frog-01"}, ids) + require.Equal(t, 1, calls, "second call within TTL must not refetch") +} + +func TestCache_RefetchesAfterTTL(t *testing.T) { + results := [][]string{{"a"}, {"b"}} + var calls int + c := newCache(time.Minute, func() ([]string, error) { + r := results[calls] + calls++ + return r, nil + }) + now := time.Unix(0, 0) + c.now = func() time.Time { return now } + + ids, err := c.get() + require.NoError(t, err) + require.Equal(t, []string{"a"}, ids) + + now = now.Add(90 * time.Second) // past the 1m TTL + ids, err = c.get() + require.NoError(t, err) + require.Equal(t, []string{"b"}, ids) + require.Equal(t, 2, calls) +} + +func TestCache_DoesNotCacheErrors(t *testing.T) { + boom := errors.New("boom") + var calls int + c := newCache(time.Minute, func() ([]string, error) { + calls++ + return nil, boom + }) + now := time.Unix(0, 0) + c.now = func() time.Time { return now } + + _, err := c.get() + require.ErrorIs(t, err, boom) + + _, err = c.get() + require.ErrorIs(t, err, boom) + require.Equal(t, 2, calls, "errors must not be cached; each call retries") +} + +func TestCache_SerializesConcurrentFetches(t *testing.T) { + var mu sync.Mutex + var calls int + release := make(chan struct{}) + c := newCache(time.Minute, func() ([]string, error) { + mu.Lock() + calls++ + mu.Unlock() + <-release // hold the in-flight fetch open until all goroutines are racing + return []string{"frog"}, nil + }) + now := time.Unix(0, 0) + c.now = func() time.Time { return now } + + const n = 16 + errCh := make(chan error, n) + var wg sync.WaitGroup + wg.Add(n) + for i := 0; i < n; i++ { + go func() { + defer wg.Done() + ids, err := c.get() + if err != nil { + errCh <- err + return + } + if len(ids) != 1 || ids[0] != "frog" { + errCh <- errors.New("unexpected ids") + } + }() + } + close(release) + wg.Wait() + close(errCh) + + for err := range errCh { + require.NoError(t, err) + } + require.Equal(t, 1, calls, "a burst of concurrent requests must trigger only one fetch") +} + +func TestCacheTTL_DefaultWhenUnset(t *testing.T) { + t.Setenv("SENSORS_CACHE_TTL", "") + require.Equal(t, defaultCacheTTL, cacheTTL()) +} + +func TestCacheTTL_ParsesEnv(t *testing.T) { + t.Setenv("SENSORS_CACHE_TTL", "10m") + require.Equal(t, 10*time.Minute, cacheTTL()) +} + +func TestCacheTTL_FallsBackOnInvalidOrNonPositive(t *testing.T) { + t.Setenv("SENSORS_CACHE_TTL", "nonsense") + require.Equal(t, defaultCacheTTL, cacheTTL()) + + t.Setenv("SENSORS_CACHE_TTL", "0s") + require.Equal(t, defaultCacheTTL, cacheTTL()) +} diff --git a/internal/sensors/sensors.go b/internal/sensors/sensors.go index 317a7bb..3861796 100644 --- a/internal/sensors/sensors.go +++ b/internal/sensors/sensors.go @@ -46,7 +46,7 @@ func Handle(w http.ResponseWriter, r *http.Request) { return } - ids, err := fetchSensors() + ids, err := sensorCache().get() if err != nil { msg := "query failed" if errors.Is(err, errQueryResult) { diff --git a/internal/sensors/sensors_test.go b/internal/sensors/sensors_test.go index dcb74dd..4fecd08 100644 --- a/internal/sensors/sensors_test.go +++ b/internal/sensors/sensors_test.go @@ -17,7 +17,8 @@ func withFetchSensors(t *testing.T, fn func() ([]string, error)) { t.Helper() orig := fetchSensors fetchSensors = fn - t.Cleanup(func() { fetchSensors = orig }) + sensorCache().reset() + t.Cleanup(func() { fetchSensors = orig; sensorCache().reset() }) } func TestHandle_MethodNotAllowed(t *testing.T) {