From d893c8980e9927743f046ffd0204dc530f39378e Mon Sep 17 00:00:00 2001 From: Nav Saini Date: Tue, 19 May 2026 14:41:30 +0000 Subject: [PATCH] statsig-go: serialize purego FFI dispatches to dodge upstream race MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a process-wide sync.Mutex on *StatsigFFI and take it across every purego-registered call this binding dispatches. JSON marshal/unmarshal on the Go side stays outside the critical section. Workaround for a concurrent-FFI-return-value race in github.com/ebitengine/purego v0.9.x. Two goroutines making purego calls in flight simultaneously can have their return pointers swapped — the *c_char one caller expected lands in the other caller's read, and vice versa. Downstream this surfaces as the labmate crashes (SIGSEGV in runtime.memmove on a non-canonical pointer, nil-deref at the *gateJson read in GetFeatureGateWithOptions, or glibc "double free or corruption (out)" when two callers free the same buffer). Repros at every upstream Rust SDK version we tested (0.15.1, 0.16.0, 0.17.0, 0.18.0, 0.19.0-0.19.3, this fork's 0.19.4-figma1) when paired with the current Go binding. Does NOT reproduce when the same .so is called from plain C with pthreads under identical workload — purego is the only path that exhibits it. Minimal purego-only repro: 10 lines of C, 80 lines of Go, two concurrent goroutines calling a function with signature `func(uint64) *byte` see their return pointers swapped. Co-Authored-By: Claude Opus 4.7 (1M context) --- statsig-go/data_store.go | 16 ++++++-- statsig-go/observability_client.go | 15 +++++-- statsig-go/persistent_storage.go | 16 ++++++-- statsig-go/statsig.go | 64 +++++++++++++++++++++++------- statsig-go/statsig_ffi.go | 49 +++++++++++++++++++++-- statsig-go/statsig_options.go | 12 ++++-- statsig-go/statsig_types.go | 29 ++++++++++---- statsig-go/statsig_user.go | 12 ++++-- 8 files changed, 171 insertions(+), 42 deletions(-) diff --git a/statsig-go/data_store.go b/statsig-go/data_store.go index 804becac7..08da1ac87 100644 --- a/statsig-go/data_store.go +++ b/statsig-go/data_store.go @@ -27,7 +27,9 @@ func NewDataStore(functions DataStoreFunctions) *DataStore { ref: 0, } - store.ref = GetFFI().data_store_create( + ffi := GetFFI() + ffi.mu.Lock() + store.ref = ffi.data_store_create( store.functions.Initialize, store.functions.Shutdown, // Get @@ -63,16 +65,24 @@ func NewDataStore(functions DataStoreFunctions) *DataStore { return store.functions.ShouldBeUsedForQueryingUpdates(*keyStr) }, ) + ffi.mu.Unlock() runtime.SetFinalizer(store, func(obj *DataStore) { - GetFFI().data_store_release(obj.ref) + ffi := GetFFI() + ffi.mu.Lock() + ffi.data_store_release(obj.ref) + ffi.mu.Unlock() }) return store } func (d *DataStore) INTERNAL_testDataStore(path string, value string) string { - return GetFFI().__internal__test_data_store(d.ref, path, value) + ffi := GetFFI() + ffi.mu.Lock() + r := ffi.__internal__test_data_store(d.ref, path, value) + ffi.mu.Unlock() + return r } type dataStoreSetArgs struct { diff --git a/statsig-go/observability_client.go b/statsig-go/observability_client.go index 97a33897f..daef00b95 100644 --- a/statsig-go/observability_client.go +++ b/statsig-go/observability_client.go @@ -39,7 +39,9 @@ func NewObservabilityClient(functions ObservabilityClientFunctions) *Observabili ref: 0, } - client.ref = GetFFI().observability_client_create( + ffi := GetFFI() + ffi.mu.Lock() + client.ref = ffi.observability_client_create( client.functions.Init, // Increment func(argsPtr *byte, argsLength uint64) { @@ -84,16 +86,23 @@ func NewObservabilityClient(functions ObservabilityClientFunctions) *Observabili return client.functions.ShouldEnableHighCardinalityForThisTag(*tag) }, ) + ffi.mu.Unlock() runtime.SetFinalizer(client, func(obj *ObservabilityClient) { - GetFFI().observability_client_release(obj.ref) + ffi := GetFFI() + ffi.mu.Lock() + ffi.observability_client_release(obj.ref) + ffi.mu.Unlock() }) return client } func (c *ObservabilityClient) INTERNAL_testObservabilityClient(action string, metricName string, value float64, tags string) { - GetFFI().__internal__test_observability_client(c.ref, action, metricName, value, tags) + ffi := GetFFI() + ffi.mu.Lock() + ffi.__internal__test_observability_client(c.ref, action, metricName, value, tags) + ffi.mu.Unlock() } func tryMarshalStandardArgs(inputPtr *byte, inputLength uint64) (*obsClientArgs, error) { diff --git a/statsig-go/persistent_storage.go b/statsig-go/persistent_storage.go index 1214a8bc2..33d0b5840 100644 --- a/statsig-go/persistent_storage.go +++ b/statsig-go/persistent_storage.go @@ -52,7 +52,9 @@ func NewPersistentStorage(functions PersistentStorageFunctions) *PersistentStora ref: 0, } - storage.ref = GetFFI().persistent_storage_create( + ffi := GetFFI() + ffi.mu.Lock() + storage.ref = ffi.persistent_storage_create( "go", // Load func(argsPtr *byte, argsLength uint64) *byte { @@ -100,16 +102,24 @@ func NewPersistentStorage(functions PersistentStorageFunctions) *PersistentStora storage.functions.Delete(data.Key, data.ConfigName) }, ) + ffi.mu.Unlock() runtime.SetFinalizer(storage, func(obj *PersistentStorage) { - GetFFI().persistent_storage_release(obj.ref) + ffi := GetFFI() + ffi.mu.Lock() + ffi.persistent_storage_release(obj.ref) + ffi.mu.Unlock() }) return storage } func (c *PersistentStorage) INTERNAL_testPersistentStorage(action string, key string, configName string, data string) string { - return GetFFI().__internal__test_persistent_storage(c.ref, action, key, configName, data) + ffi := GetFFI() + ffi.mu.Lock() + r := ffi.__internal__test_persistent_storage(c.ref, action, key, configName, data) + ffi.mu.Unlock() + return r } func tryMarshalPersistentStorageArgs(inputPtr *byte, inputLength uint64) (*persistentStorageArgs, error) { diff --git a/statsig-go/statsig.go b/statsig-go/statsig.go index 6aac29850..e40933a4b 100644 --- a/statsig-go/statsig.go +++ b/statsig-go/statsig.go @@ -22,7 +22,10 @@ type Statsig struct { } func NewStatsig(sdkKey string) (*Statsig, error) { - ref := GetFFI().statsig_create(sdkKey, 0) + ffi := GetFFI() + ffi.mu.Lock() + ref := ffi.statsig_create(sdkKey, 0) + ffi.mu.Unlock() if ref == 0 { return nil, fmt.Errorf("error creating Statsig instance") } @@ -38,7 +41,10 @@ func NewStatsig(sdkKey string) (*Statsig, error) { } func NewStatsigWithOptions(sdkKey string, opts *StatsigOptions) (*Statsig, error) { - ref := GetFFI().statsig_create(sdkKey, opts.ref) + ffi := GetFFI() + ffi.mu.Lock() + ref := ffi.statsig_create(sdkKey, opts.ref) + ffi.mu.Unlock() if ref == 0 { return nil, fmt.Errorf("error creating Statsig instance") } @@ -66,15 +72,24 @@ func (s *Statsig) NewUserBuilderWithCustomIDs(customIDs map[string]any) *Statsig } func (s *Statsig) Initialize() { - GetFFI().statsig_initialize_blocking(s.ref.Load()) + ffi := GetFFI() + ffi.mu.Lock() + ffi.statsig_initialize_blocking(s.ref.Load()) + ffi.mu.Unlock() } func (s *Statsig) Shutdown() { - GetFFI().statsig_shutdown_blocking(s.ref.Load()) + ffi := GetFFI() + ffi.mu.Lock() + ffi.statsig_shutdown_blocking(s.ref.Load()) + ffi.mu.Unlock() } func (s *Statsig) FlushEvents() { - GetFFI().statsig_flush_events_blocking(s.ref.Load()) + ffi := GetFFI() + ffi.mu.Lock() + ffi.statsig_flush_events_blocking(s.ref.Load()) + ffi.mu.Unlock() } func (s *Statsig) LogEvent(user *StatsigUser, event EventPayload) { @@ -83,8 +98,10 @@ func (s *Statsig) LogEvent(user *StatsigUser, event EventPayload) { log.Printf("Failed to marshal Statsig event: %v", err) return } - - GetFFI().statsig_log_event(s.ref.Load(), user.ref, string(eventJson)) + ffi := GetFFI() + ffi.mu.Lock() + ffi.statsig_log_event(s.ref.Load(), user.ref, string(eventJson)) + ffi.mu.Unlock() } func (s *Statsig) CheckGate(user *StatsigUser, gateName string) bool { @@ -97,8 +114,11 @@ func (s *Statsig) CheckGateWithOptions(user *StatsigUser, gateName string, optio fmt.Printf("Failed to marshal FeatureGateEvaluationOptions: %v", err) return false } - - return GetFFI().statsig_check_gate(s.ref.Load(), user.ref, gateName, optionsJson) + ffi := GetFFI() + ffi.mu.Lock() + result := ffi.statsig_check_gate(s.ref.Load(), user.ref, gateName, optionsJson) + ffi.mu.Unlock() + return result } func (s *Statsig) GetFeatureGate(user *StatsigUser, gateName string) FeatureGate { @@ -300,19 +320,31 @@ func (s *Statsig) GetClientInitResponseWithOptions(user *StatsigUser, options *C } func (s *Statsig) ManuallyLogFeatureGateExposure(user *StatsigUser, gateName string) { - GetFFI().statsig_manually_log_gate_exposure(s.ref.Load(), user.ref, gateName) + ffi := GetFFI() + ffi.mu.Lock() + ffi.statsig_manually_log_gate_exposure(s.ref.Load(), user.ref, gateName) + ffi.mu.Unlock() } func (s *Statsig) ManuallyLogDynamicConfigExposure(user *StatsigUser, configName string) { - GetFFI().statsig_manually_log_dynamic_config_exposure(s.ref.Load(), user.ref, configName) + ffi := GetFFI() + ffi.mu.Lock() + ffi.statsig_manually_log_dynamic_config_exposure(s.ref.Load(), user.ref, configName) + ffi.mu.Unlock() } func (s *Statsig) ManuallyLogExperimentExposure(user *StatsigUser, experimentName string) { - GetFFI().statsig_manually_log_experiment_exposure(s.ref.Load(), user.ref, experimentName) + ffi := GetFFI() + ffi.mu.Lock() + ffi.statsig_manually_log_experiment_exposure(s.ref.Load(), user.ref, experimentName) + ffi.mu.Unlock() } func (s *Statsig) ManuallyLogLayerParamExposure(user *StatsigUser, layerName string, paramName string) { - GetFFI().statsig_manually_log_layer_parameter_exposure(s.ref.Load(), user.ref, layerName, paramName) + ffi := GetFFI() + ffi.mu.Lock() + ffi.statsig_manually_log_layer_parameter_exposure(s.ref.Load(), user.ref, layerName, paramName) + ffi.mu.Unlock() } func (s *Statsig) release() { @@ -320,8 +352,10 @@ func (s *Statsig) release() { if was == 0 { return } - - GetFFI().statsig_release(was) + ffi := GetFFI() + ffi.mu.Lock() + ffi.statsig_release(was) + ffi.mu.Unlock() } func tryMarshalOrEmpty[T any](data *T) (string, error) { diff --git a/statsig-go/statsig_ffi.go b/statsig-go/statsig_ffi.go index 143fe94e1..caebdd2bf 100644 --- a/statsig-go/statsig_ffi.go +++ b/statsig-go/statsig_ffi.go @@ -13,6 +13,43 @@ import ( type StatsigFFI struct { lib uintptr + // mu serializes every purego-dispatched call from this binding. + // + // Why: purego's reflect-based call wrapper (`func.go`) shares a + // process-wide `sync.Pool` of `*syscall15Args` across concurrent + // dispatches. When two goroutines are inside that wrapper at the + // same time, the return value of one call can land in the other + // goroutine's read of `syscall.a1` — i.e. two FFI calls' return + // pointers get swapped between callers. + // + // Symptoms downstream: + // - SIGSEGV in runtime.memmove on a non-canonical pointer (a + // `*c_char` from a different call than the one Go thinks it's + // reading) + // - SIGSEGV nil-deref at the `*gateJson` deref in + // GetFeatureGateWithOptions + // - glibc `double free or corruption (out)` when two callers + // both try to free the same buffer + // - Silently-wrong gate / config / experiment results — the FFI + // for one call returns data shaped like a sibling call's + // + // Repro and discrimination matrix: + // github.com/figma/statsig-server-core/... (TODO: tracking issue) + // Upstream: ebitengine/purego — TODO file issue, link here. + // + // Lock scope: held only across the C-side dispatch (and any + // surrounding bookkeeping that hits FFI, e.g. `free_string`). + // JSON marshal/unmarshal on the Go side is intentionally outside + // the critical section. Per-call hold time is microseconds on the + // hot path. + // + // Lock-ordering: callbacks registered via `RegisterFunc` (data + // store, observability client, persistent storage) MUST NOT take + // `mu` if there is any chance the Rust side invokes them + // synchronously from inside an FFI call this binding has dispatched + // (would self-deadlock). + mu sync.Mutex + // StatsigOptions statsig_options_create_from_data func(string) uint64 statsig_options_release func(uint64) @@ -223,13 +260,19 @@ func GetFFI() *StatsigFFI { } func UseRustString(handler func() (*byte, uint64)) *string { + // Hold the FFI mutex across both the C-string-returning dispatch + // (inside handler) AND the matching free_string. See the comment + // on StatsigFFI.mu for why this is required. + instance.mu.Lock() + defer instance.mu.Unlock() + ptr, length := handler() if ptr == nil { return nil } - - defer instance.free_string(ptr) - return internal.GoStringFromPointer(ptr, length) + s := internal.GoStringFromPointer(ptr, length) + instance.free_string(ptr) + return s } func (ffi *StatsigFFI) updateStatsigMetadata() { diff --git a/statsig-go/statsig_options.go b/statsig-go/statsig_options.go index 8b6b14be2..f408d15a6 100644 --- a/statsig-go/statsig_options.go +++ b/statsig-go/statsig_options.go @@ -155,9 +155,10 @@ func (o *StatsigOptionsBuilder) Build() (*StatsigOptions, error) { return nil, err } - ref := GetFFI().statsig_options_create_from_data( - string(data), - ) + ffi := GetFFI() + ffi.mu.Lock() + ref := ffi.statsig_options_create_from_data(string(data)) + ffi.mu.Unlock() if ref == 0 { return nil, fmt.Errorf("failed to create StatsigOptions") @@ -169,7 +170,10 @@ func (o *StatsigOptionsBuilder) Build() (*StatsigOptions, error) { } runtime.SetFinalizer(options, func(obj *StatsigOptions) { - GetFFI().statsig_options_release(obj.ref) + ffi := GetFFI() + ffi.mu.Lock() + ffi.statsig_options_release(obj.ref) + ffi.mu.Unlock() }) return options, nil diff --git a/statsig-go/statsig_types.go b/statsig-go/statsig_types.go index 4ae02b6a0..e2e5a15f4 100644 --- a/statsig-go/statsig_types.go +++ b/statsig-go/statsig_types.go @@ -146,7 +146,10 @@ func (l *Layer) UnmarshalJSON(b []byte) error { } func (l *Layer) logExposure(key string) { - GetFFI().log_layer_param_exposure_from_raw(l.statsigRef, l.rawJson, key) + ffi := GetFFI() + ffi.mu.Lock() + ffi.log_layer_param_exposure_from_raw(l.statsigRef, l.rawJson, key) + ffi.mu.Unlock() } // ------------------------------------------------------------------------------------- [ Parameter Store ] @@ -184,40 +187,52 @@ func (p *ParameterStore) GetString(key string, fallback string) string { func (p *ParameterStore) GetBool(key string, fallback bool) bool { return parameterStoreValue(p, fallback, func(optionsJson string) (bool, bool) { - return GetFFI().statsig_get_bool_parameter_from_parameter_store( + ffi := GetFFI() + ffi.mu.Lock() + v := ffi.statsig_get_bool_parameter_from_parameter_store( p.statsigRef, p.userRef, p.Name, key, safeOptBool(fallback), optionsJson, - ), true + ) + ffi.mu.Unlock() + return v, true }) } func (p *ParameterStore) GetNumber(key string, fallback float64) float64 { return parameterStoreValue(p, fallback, func(optionsJson string) (float64, bool) { - return GetFFI().statsig_get_float64_parameter_from_parameter_store( + ffi := GetFFI() + ffi.mu.Lock() + v := ffi.statsig_get_float64_parameter_from_parameter_store( p.statsigRef, p.userRef, p.Name, key, fallback, optionsJson, - ), true + ) + ffi.mu.Unlock() + return v, true }) } func (p *ParameterStore) GetInt(key string, fallback int64) int64 { return parameterStoreValue(p, fallback, func(optionsJson string) (int64, bool) { - return GetFFI().statsig_get_int_parameter_from_parameter_store( + ffi := GetFFI() + ffi.mu.Lock() + v := ffi.statsig_get_int_parameter_from_parameter_store( p.statsigRef, p.userRef, p.Name, key, fallback, optionsJson, - ), true + ) + ffi.mu.Unlock() + return v, true }) } diff --git a/statsig-go/statsig_user.go b/statsig-go/statsig_user.go index c19928ec4..7dd3cfd2f 100644 --- a/statsig-go/statsig_user.go +++ b/statsig-go/statsig_user.go @@ -102,9 +102,10 @@ func (b *StatsigUserBuilder) Build() (*StatsigUser, error) { return nil, fmt.Errorf("error marshalling user: %v", err) } - userRef := GetFFI().statsig_user_create_from_data( - string(jsonData), - ) + ffi := GetFFI() + ffi.mu.Lock() + userRef := ffi.statsig_user_create_from_data(string(jsonData)) + ffi.mu.Unlock() if userRef == 0 { return nil, fmt.Errorf("error creating StatsigUser") @@ -115,7 +116,10 @@ func (b *StatsigUserBuilder) Build() (*StatsigUser, error) { } runtime.SetFinalizer(user, func(obj *StatsigUser) { - GetFFI().statsig_user_release(obj.ref) + ffi := GetFFI() + ffi.mu.Lock() + ffi.statsig_user_release(obj.ref) + ffi.mu.Unlock() }) return user, nil