From 4e298ace75e36b11bd675aa871383316deefc0da Mon Sep 17 00:00:00 2001 From: Sonic Build Admin Date: Thu, 30 Apr 2026 01:02:15 +0000 Subject: [PATCH] [restapi] Fix nil pointer dereference in monitor_certs when cert file is missing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit #### Why I did it `monitor_certs()` in `go-server-server/main.go` calls `os.Lstat()` on each cert file path but silently discards the error return value with `_`. When a cert file is missing or its path is invalid, `os.Lstat()` returns `(nil, error)`. Calling `.ModTime()` on the nil `FileInfo` immediately causes a **nil pointer dereference (SIGSEGV)** that crashes the entire restapi process. Observed crash in SONiC nightly CI (Arista 4600C, image 20251110.22): ``` goroutine 1 [running]: main.monitor_certs(0xc000056060, 0xc000012100) go-server-server/main.go:122 +0x... panic: runtime error: invalid memory address or nil pointer dereference [signal SIGSEGV: segmentation violation addr=0x20 pc=...] ``` After the crash, `supervisord` restarted restapi and the proc-exit-listener emitted ERR alerts every 60 seconds, causing `system_health/test_system_status` to fail. ##### Work item tracking - Microsoft ADO: [37693847](https://msazure.visualstudio.com/One/_workitems/edit/37693847) #### How I did it Added explicit nil/error checks after every `os.Lstat()` call in `monitor_certs` (6 total): - **Startup section (3 calls):** use `log.Fatalf` so the process fails immediately with a descriptive error message instead of panicking with an opaque SIGSEGV. - **Monitor loop (3 calls):** use `log.Printf` + `time.Sleep` + `continue` so a transiently missing cert file is retried on the next cycle (every 3600 s) without crashing the server. #### How to verify it 1. Start restapi with `--https` flag and a cert path that does not exist on disk. 2. **Before fix:** process crashes with `panic: runtime error: invalid memory address or nil pointer dereference`. 3. **After fix:** process exits cleanly via `log.Fatalf` with `"error: couldn't stat client cert file: ..."`. #### Which release branch to backport (provide reason below if selected) - [ ] 202305 - [ ] 202311 - [ ] 202405 - [ ] 202411 - [ ] 202505 - [x] 202511 — same crash observed on image 20251110.22 (202511 branch) #### Tested branch (Please provide the tested image version) - [x] 20251110.22 (202511) — crash confirmed in nightly CI job [69e79fb88e43924279229609](https://elastictest.org/scheduler/testplan/69e79fb88e43924279229609) #### Description for the changelog Fix nil pointer dereference crash in `monitor_certs` when a TLS cert file path is invalid or missing; restapi now exits with a descriptive error at startup instead of panicking silently. Signed-off-by: Sonic Build Admin --- go-server-server/main.go | 36 ++++++++++++++++++++++++++++++------ 1 file changed, 30 insertions(+), 6 deletions(-) diff --git a/go-server-server/main.go b/go-server-server/main.go index b32865a..3e9d75c 100644 --- a/go-server-server/main.go +++ b/go-server-server/main.go @@ -102,15 +102,24 @@ func signal_handler(messenger chan<- int, wgroup *sync.WaitGroup) { func monitor_certs(messenger chan<- int, wgroup *sync.WaitGroup) { defer wgroup.Done() - client_cert_finfo, _ := os.Lstat(*sw.ClientCertFlag) + client_cert_finfo, err := os.Lstat(*sw.ClientCertFlag) + if err != nil { + log.Fatalf("error: couldn't stat client cert file: %s", err) + } prev_client_cert_mtime := client_cert_finfo.ModTime() log.Printf("trace: Last modified time of %s is %d", client_cert_finfo.Name(), prev_client_cert_mtime.Unix()) - server_cert_finfo, _ := os.Lstat(*sw.ServerCertFlag) + server_cert_finfo, err := os.Lstat(*sw.ServerCertFlag) + if err != nil { + log.Fatalf("error: couldn't stat server cert file: %s", err) + } prev_server_cert_mtime := server_cert_finfo.ModTime() log.Printf("trace: Last modified time of %s is %d", server_cert_finfo.Name(), prev_server_cert_mtime.Unix()) - sever_key_finfo, _ := os.Lstat(*sw.ServerKeyFlag) + sever_key_finfo, err := os.Lstat(*sw.ServerKeyFlag) + if err != nil { + log.Fatalf("error: couldn't stat server key file: %s", err) + } prev_sever_key_mtime := sever_key_finfo.ModTime() log.Printf("trace: Last modified time of %s is %d", sever_key_finfo.Name(), prev_sever_key_mtime.Unix()) @@ -118,7 +127,12 @@ func monitor_certs(messenger chan<- int, wgroup *sync.WaitGroup) { for { reload := false - client_cert_finfo, _ := os.Lstat(*sw.ClientCertFlag) + client_cert_finfo, err := os.Lstat(*sw.ClientCertFlag) + if err != nil { + log.Printf("error: couldn't stat client cert file: %s", err) + time.Sleep(CERT_MONITOR_FREQUENCY) + continue + } client_cert_mtime := client_cert_finfo.ModTime() log.Printf("trace: Last modified time of %s is %d", client_cert_finfo.Name(), client_cert_mtime.Unix()) if client_cert_mtime != prev_client_cert_mtime { @@ -127,7 +141,12 @@ func monitor_certs(messenger chan<- int, wgroup *sync.WaitGroup) { } prev_client_cert_mtime = client_cert_mtime - server_cert_finfo, _ := os.Lstat(*sw.ServerCertFlag) + server_cert_finfo, err := os.Lstat(*sw.ServerCertFlag) + if err != nil { + log.Printf("error: couldn't stat server cert file: %s", err) + time.Sleep(CERT_MONITOR_FREQUENCY) + continue + } server_cert_mtime := server_cert_finfo.ModTime() log.Printf("trace: Last modified time of %s is %d", server_cert_finfo.Name(), server_cert_mtime.Unix()) if server_cert_mtime != prev_server_cert_mtime { @@ -136,7 +155,12 @@ func monitor_certs(messenger chan<- int, wgroup *sync.WaitGroup) { } prev_server_cert_mtime = server_cert_mtime - sever_key_finfo, _ := os.Lstat(*sw.ServerKeyFlag) + sever_key_finfo, err := os.Lstat(*sw.ServerKeyFlag) + if err != nil { + log.Printf("error: couldn't stat server key file: %s", err) + time.Sleep(CERT_MONITOR_FREQUENCY) + continue + } sever_key_mtime := sever_key_finfo.ModTime() log.Printf("trace: Last modified time of %s is %d", sever_key_finfo.Name(), sever_key_mtime.Unix()) if sever_key_mtime != prev_sever_key_mtime {