Skip to content

Commit fce1213

Browse files
committed
Add capability for gradual rollout of transition mode
1 parent e72e24f commit fce1213

7 files changed

Lines changed: 288 additions & 38 deletions

File tree

certmagic.go

Lines changed: 0 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -499,27 +499,3 @@ var (
499499

500500
// Maximum size for the stack trace when recovering from panics.
501501
const stackTraceBufferSize = 1024 * 128
502-
503-
const (
504-
// Storage mode controls the format in which certificates are stored in `Storage`.
505-
//
506-
// Formats:
507-
// - legacy: Store cert, privkey and meta as three separate storage items (.cert, .key, .json).
508-
// - bundle: Store cert, privkey and meta as a single, bundled storage item (.bundle).
509-
//
510-
// Modes:
511-
// - legacy: Store and load certificates in legacy format.
512-
// - transition: Store in legacy and bundle format, load as bundle with fallback to legacy format.
513-
// - bundle: Store and load certificates in bundle format.
514-
//
515-
// In the transition mode, failures around reads and writes of the bundle are soft.
516-
// They should only log errors and try to work with the legacy format as fallback.
517-
// Operations on the legacy format are hard-failures, implying that errors should be propagated up.
518-
//
519-
// The storage mode is controlled via the CERTMAGIC_STORAGE_MODE environment variable
520-
StorageModeEnv = "CERTMAGIC_STORAGE_MODE"
521-
522-
StorageModeLegacy = "legacy"
523-
StorageModeTransition = "transition"
524-
StorageModeBundle = "bundle"
525-
)

config.go

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,6 @@ import (
3232
"net"
3333
"net/http"
3434
"net/url"
35-
"os"
3635
"strings"
3736
"time"
3837

@@ -1272,7 +1271,7 @@ func (cfg *Config) checkStorage(ctx context.Context) error {
12721271
// resources related to the certificate for domain.
12731272
// It switches storage modes between legacy and bundle mode based on the CERTMAGIC_STORAGE_MODE env.
12741273
func (cfg *Config) storageHasCertResources(ctx context.Context, issuer Issuer, domain string) bool {
1275-
switch os.Getenv(StorageModeEnv) {
1274+
switch StorageModeForDomain(domain) {
12761275
case StorageModeTransition:
12771276
if cfg.storageHasCertResourcesBundle(ctx, issuer, domain) {
12781277
return true
@@ -1313,7 +1312,7 @@ func (cfg *Config) storageHasCertResourcesBundle(ctx context.Context, issuer Iss
13131312
// issuer with the given issuer key.
13141313
// It switches storage modes between legacy and bundle mode based on the CERTMAGIC_STORAGE_MODE env.
13151314
func (cfg *Config) deleteSiteAssets(ctx context.Context, issuerKey, domain string) error {
1316-
switch os.Getenv(StorageModeEnv) {
1315+
switch StorageModeForDomain(domain) {
13171316
case StorageModeTransition:
13181317
if err := cfg.deleteSiteAssetsBundle(ctx, issuerKey, domain); err != nil {
13191318
cfg.Logger.Warn("unable to delete certificate resource bundle",

config_test.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -158,7 +158,7 @@ func mustJSON(val any) []byte {
158158
// testStorageModeSetup creates a test config with the specified storage mode
159159
func testStorageModeSetup(t *testing.T, mode, storagePath string) (*Config, *ACMEIssuer) {
160160
t.Helper()
161-
t.Setenv(StorageModeEnv, mode)
161+
ConfigureStorageMode(mode, 100)
162162

163163
am := &ACMEIssuer{CA: "https://example.com/acme/directory"}
164164
cfg := &Config{
@@ -291,7 +291,7 @@ func TestStorageModeTransitionFallback(t *testing.T) {
291291
cert := makeCertResource(am, domain, true)
292292

293293
// Save in legacy mode to simulate existing data
294-
os.Setenv(StorageModeEnv, StorageModeLegacy)
294+
ConfigureStorageMode(StorageModeLegacy, 0)
295295
if err := cfg.saveCertResource(ctx, am, cert); err != nil {
296296
t.Fatalf("Failed to save cert in legacy mode: %v", err)
297297
}
@@ -301,7 +301,7 @@ func TestStorageModeTransitionFallback(t *testing.T) {
301301
assertFileNotExists(t, ctx, cfg.Storage, StorageKeys.SiteBundle(issuerKey, domain))
302302

303303
// Switch to transition mode and verify fallback to legacy works
304-
os.Setenv(StorageModeEnv, StorageModeTransition)
304+
ConfigureStorageMode(StorageModeTransition, 100)
305305
loaded, err := cfg.loadCertResource(ctx, am, domain)
306306
if err != nil {
307307
t.Fatalf("Failed to load cert in transition mode with fallback: %v", err)

crypto.go

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,6 @@ import (
3030
"fmt"
3131
"hash/fnv"
3232
"io/fs"
33-
"os"
3433
"sort"
3534
"strings"
3635

@@ -144,7 +143,7 @@ func fastHash(input []byte) string {
144143
// saveCertResource saves the certificate resource to disk.
145144
// It switches storage modes between legacy and bundle mode based on the CERTMAGIC_STORAGE_MODE env.
146145
func (cfg *Config) saveCertResource(ctx context.Context, issuer Issuer, cert CertificateResource) error {
147-
switch os.Getenv(StorageModeEnv) {
146+
switch StorageModeForDomain(cert.NamesKey()) {
148147
case StorageModeTransition:
149148
if err := cfg.saveCertResourceBundle(ctx, issuer, cert); err != nil {
150149
cfg.Logger.Warn("unable to store certificate resource bundle",
@@ -274,7 +273,7 @@ func (cfg *Config) loadCertResourceAnyIssuer(ctx context.Context, certNamesKey s
274273
// loadCertResource loads a certificate resource from the given issuer's storage location.
275274
// It switches storage modes between legacy and bundle mode based on the CERTMAGIC_STORAGE_MODE env.
276275
func (cfg *Config) loadCertResource(ctx context.Context, issuer Issuer, certNamesKey string) (CertificateResource, error) {
277-
switch os.Getenv(StorageModeEnv) {
276+
switch StorageModeForDomain(certNamesKey) {
278277
case StorageModeTransition:
279278
certRes, err := cfg.loadCertResourceBundle(ctx, issuer, certNamesKey)
280279
if err == nil {

maintain.go

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,6 @@ import (
2222
"errors"
2323
"fmt"
2424
"io/fs"
25-
"os"
2625
"path"
2726
"runtime"
2827
"strings"
@@ -431,7 +430,7 @@ func (cfg *Config) storageHasNewerARI(ctx context.Context, cert Certificate) (bo
431430
// loadStoredACMECertificateMetadata loads the stored ACME certificate data.
432431
// It switches storage modes between legacy and bundle mode based on the CERTMAGIC_STORAGE_MODE env.
433432
func (cfg *Config) loadStoredACMECertificateMetadata(ctx context.Context, cert Certificate) (acme.Certificate, error) {
434-
switch os.Getenv(StorageModeEnv) {
433+
switch StorageModeForDomain(cert.Names[0]) {
435434
case StorageModeTransition:
436435
acmecert, err := cfg.loadStoredACMECertificateMetadataBundle(ctx, cert)
437436
if err == nil {
@@ -496,7 +495,7 @@ func (cfg *Config) loadStoredACMECertificateMetadataBundle(ctx context.Context,
496495
// NeedsRefresh() on the RenewalInfo first, and only call this if that returns true.
497496
// It switches storage modes between legacy and bundle mode based on the CERTMAGIC_STORAGE_MODE env.
498497
func (cfg *Config) updateARI(ctx context.Context, cert Certificate, logger *zap.Logger) (updatedCert Certificate, changed bool, err error) {
499-
switch os.Getenv(StorageModeEnv) {
498+
switch StorageModeForDomain(cert.Names[0]) {
500499
case StorageModeTransition:
501500
updatedCert, changed, err = cfg.updateARILegacy(ctx, cert, logger)
502501
if err == nil {
@@ -1046,7 +1045,7 @@ func deleteOldOCSPStaples(ctx context.Context, storage Storage, logger *zap.Logg
10461045
}
10471046

10481047
func deleteExpiredCerts(ctx context.Context, storage Storage, logger *zap.Logger, gracePeriod time.Duration) error {
1049-
switch os.Getenv(StorageModeEnv) {
1048+
switch StorageMode {
10501049
case StorageModeTransition:
10511050
if err := deleteExpiredCertsBundle(ctx, storage, logger, gracePeriod); err != nil {
10521051
logger.Warn("unable to delete expired certs from bundle",
@@ -1321,7 +1320,7 @@ func (cfg *Config) moveCompromisedPrivateKey(ctx context.Context, cert Certifica
13211320
// Delete the storage containing the compromised key based on storage mode.
13221321
// We intentionally ignore delete errors since the file might not exist,
13231322
// and we avoid calling .Exists() before .Delete() to minimize storage roundtrips.
1324-
switch os.Getenv(StorageModeEnv) {
1323+
switch StorageModeForDomain(cert.Names[0]) {
13251324
case StorageModeTransition:
13261325
cfg.Storage.Delete(ctx, bundleKey)
13271326
cfg.Storage.Delete(ctx, privKeyStorageKey)

storagemode.go

Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
package certmagic
2+
3+
import (
4+
"hash/fnv"
5+
"os"
6+
"strconv"
7+
)
8+
9+
const (
10+
// Storage mode controls the format in which certificates are stored in `Storage`.
11+
//
12+
// Formats:
13+
// - legacy: Store cert, privkey and meta as three separate storage items (.cert, .key, .json).
14+
// - bundle: Store cert, privkey and meta as a single, bundled storage item (.bundle).
15+
//
16+
// Modes:
17+
// - legacy: Store and load certificates in legacy format.
18+
// - transition: Store in legacy and bundle format, load as bundle with fallback to legacy format.
19+
// - bundle: Store and load certificates in bundle format.
20+
//
21+
// In the transition mode, failures around reads and writes of the bundle are soft.
22+
// They should only log errors and try to work with the legacy format as fallback.
23+
// Operations on the legacy format are hard-failures, implying that errors should be propagated up.
24+
//
25+
// The rollout percentage enables a phased migration by controlling which domains
26+
// enter the transition phase. If a domain's deterministic bucket (0-99) is below
27+
// the rollout percentage, it uses 'transition' mode (dual-write, bundle-read).
28+
// Otherwise, it remains in 'legacy' mode.
29+
//
30+
// The logic for selection is:
31+
// if mode == StorageModeTransition:
32+
// useTransition = hash(domain)%100 < rollout
33+
// return useTransition ? StorageModeTransition : StorageModeLegacy
34+
//
35+
// The storage mode is controlled via the CERTMAGIC_STORAGE_MODE environment variable
36+
StorageModeEnv = "CERTMAGIC_STORAGE_MODE"
37+
38+
StorageModeLegacy = "legacy"
39+
StorageModeTransition = "transition"
40+
StorageModeBundle = "bundle"
41+
42+
// StorageModeRolloutPercentEnv controls the percentage of domains that will use
43+
// the bundle format when the storage mode is set to "transition".
44+
// An empty rollout precent is equal to 0%.
45+
StorageModeRolloutPercentEnv = "CERTMAGIC_STORAGE_MODE_ROLLOUT_PERCENT"
46+
)
47+
48+
var (
49+
StorageMode string
50+
StorageModeRolloutPercent int
51+
)
52+
53+
func ConfigureStorageMode(mode string, rolloutPercent int) {
54+
StorageMode = mode
55+
StorageModeRolloutPercent = rolloutPercent
56+
}
57+
58+
func init() {
59+
mode := os.Getenv(StorageModeEnv)
60+
61+
// rolloutPercent becomes zero if env is unset or malformed
62+
rolloutPercent, _ := strconv.Atoi(os.Getenv(StorageModeRolloutPercentEnv))
63+
64+
ConfigureStorageMode(mode, rolloutPercent)
65+
}
66+
67+
func StorageModeForDomain(domain string) string {
68+
if StorageMode == StorageModeBundle {
69+
return StorageModeBundle
70+
}
71+
if StorageMode != StorageModeTransition {
72+
return StorageModeLegacy
73+
}
74+
if RolloutBucketForDomain(domain) < StorageModeRolloutPercent {
75+
return StorageModeTransition
76+
} else {
77+
return StorageModeLegacy
78+
}
79+
}
80+
81+
func RolloutBucketForDomain(domain string) int {
82+
h := fnv.New32a()
83+
h.Write([]byte(domain))
84+
return int(h.Sum32() % 100)
85+
}

0 commit comments

Comments
 (0)