From ada639cf042f19fa7a1a4e08221e9c6b5a242ce1 Mon Sep 17 00:00:00 2001 From: cpeterson42 Date: Fri, 23 Jan 2026 18:41:23 -0800 Subject: [PATCH 1/2] switching to a6000 --- api/client_test.go | 4 ++-- cmd/connect_test.go | 2 +- cmd/create.go | 8 ++++---- cmd/create_test.go | 30 +++++++++++++++--------------- cmd/modify.go | 18 +++++++++--------- cmd/status_test.go | 4 ++-- tui/connect.go | 5 +++-- tui/create.go | 13 +++++-------- tui/delete.go | 4 ++-- tui/help-menus/create.go | 6 +++--- tui/modify.go | 22 ++++++---------------- tui/snapshot_create.go | 3 ++- tui/status.go | 2 +- utils/common.go | 14 ++++++++++++++ 14 files changed, 69 insertions(+), 66 deletions(-) diff --git a/api/client_test.go b/api/client_test.go index c066844..8fcc7db 100644 --- a/api/client_test.go +++ b/api/client_test.go @@ -21,7 +21,7 @@ func TestNewClient(t *testing.T) { func TestCreateInstanceRequest(t *testing.T) { req := CreateInstanceRequest{ CPUCores: 8, - GPUType: "t4", + GPUType: "a6000", Template: "ubuntu-22.04", NumGPUs: 1, DiskSizeGB: 100, @@ -143,7 +143,7 @@ func TestTemplateStruct(t *testing.T) { Version: 1, DefaultSpecs: ThunderTemplateDefaultSpecs{ Cores: 8, - GpuType: "t4", + GpuType: "a6000", NumGpus: 1, Storage: 100, }, diff --git a/cmd/connect_test.go b/cmd/connect_test.go index 48cce2a..b4fd262 100644 --- a/cmd/connect_test.go +++ b/cmd/connect_test.go @@ -114,7 +114,7 @@ func createTestInstance(id, uuid, name, ip, status, template, mode string, port Mode: mode, Port: port, NumGPUs: "1", - GPUType: "t4", + GPUType: "a6000", } } diff --git a/cmd/create.go b/cmd/create.go index 501c41a..34f0693 100644 --- a/cmd/create.go +++ b/cmd/create.go @@ -37,8 +37,8 @@ var createCmd = &cobra.Command{ var ( prototypingGPUMap = map[string]string{ - "t4": "t4", - "a100": "a100xl", + "a6000": "a6000", + "a100": "a100xl", } productionGPUMap = map[string]string{ @@ -55,7 +55,7 @@ func init() { rootCmd.AddCommand(createCmd) createCmd.Flags().StringVar(&mode, "mode", "", "Instance mode: prototyping or production") - createCmd.Flags().StringVar(&gpuType, "gpu", "", "GPU type (prototyping: t4 or a100, production: a100 or h100)") + createCmd.Flags().StringVar(&gpuType, "gpu", "", "GPU type (prototyping: a6000 or a100, production: a100 or h100)") createCmd.Flags().IntVar(&numGPUs, "num-gpus", 0, "Number of GPUs (production only): 1, 2, 4, or 8") createCmd.Flags().IntVar(&vcpus, "vcpus", 0, "CPU cores (prototyping only): 4, 8, 16, or 32") createCmd.Flags().StringVar(&template, "template", "", "OS template key or name") @@ -313,7 +313,7 @@ func validateCreateConfig(config *tui.CreateConfig, templates []api.Template, sn if config.Mode == "prototyping" { canonical, ok := prototypingGPUMap[config.GPUType] if !ok { - return fmt.Errorf("prototyping mode supports GPU types: t4 or a100") + return fmt.Errorf("prototyping mode supports GPU types: a6000 or a100") } config.GPUType = canonical config.NumGPUs = 1 diff --git a/cmd/create_test.go b/cmd/create_test.go index 8592b3f..897e624 100644 --- a/cmd/create_test.go +++ b/cmd/create_test.go @@ -24,7 +24,7 @@ func TestValidateCreateConfig(t *testing.T) { name: "valid prototyping config", config: &tui.CreateConfig{ Mode: "prototyping", - GPUType: "t4", + GPUType: "a6000", NumGPUs: 1, VCPUs: 8, Template: "ubuntu-22.04", @@ -65,13 +65,13 @@ func TestValidateCreateConfig(t *testing.T) { GPUType: "invalid", }, expectError: true, - errorContains: "prototyping mode supports GPU types: t4 or a100", + errorContains: "prototyping mode supports GPU types: a6000 or a100", }, { name: "prototyping without vcpus", config: &tui.CreateConfig{ Mode: "prototyping", - GPUType: "t4", + GPUType: "a6000", VCPUs: 0, }, expectError: true, @@ -81,7 +81,7 @@ func TestValidateCreateConfig(t *testing.T) { name: "invalid vcpus for prototyping", config: &tui.CreateConfig{ Mode: "prototyping", - GPUType: "t4", + GPUType: "a6000", VCPUs: 6, }, expectError: true, @@ -91,7 +91,7 @@ func TestValidateCreateConfig(t *testing.T) { name: "production with invalid GPU type", config: &tui.CreateConfig{ Mode: "production", - GPUType: "t4", + GPUType: "a6000", }, expectError: true, errorContains: "production mode supports GPU types: a100 or h100", @@ -135,7 +135,7 @@ func TestValidateCreateConfig(t *testing.T) { name: "invalid disk size", config: &tui.CreateConfig{ Mode: "prototyping", - GPUType: "t4", + GPUType: "a6000", VCPUs: 8, Template: "ubuntu-22.04", DiskSizeGB: 50, @@ -150,7 +150,7 @@ func TestValidateCreateConfig(t *testing.T) { name: "missing template", config: &tui.CreateConfig{ Mode: "prototyping", - GPUType: "t4", + GPUType: "a6000", VCPUs: 8, DiskSizeGB: 100, }, @@ -161,7 +161,7 @@ func TestValidateCreateConfig(t *testing.T) { name: "template not found", config: &tui.CreateConfig{ Mode: "prototyping", - GPUType: "t4", + GPUType: "a6000", VCPUs: 8, Template: "nonexistent", DiskSizeGB: 100, @@ -193,7 +193,7 @@ func TestValidateCreateConfig(t *testing.T) { func TestCreateInstanceRequest(t *testing.T) { config := &tui.CreateConfig{ Mode: "prototyping", - GPUType: "t4", + GPUType: "a6000", NumGPUs: 1, VCPUs: 8, Template: "ubuntu-22.04", @@ -216,7 +216,7 @@ func TestCreateInstanceRequest(t *testing.T) { } assert.Equal(t, "prototyping", req.Mode) - assert.Equal(t, "t4", req.GPUType) + assert.Equal(t, "a6000", req.GPUType) assert.Equal(t, 1, req.NumGPUs) assert.Equal(t, 8, req.CPUCores) assert.Equal(t, "ubuntu-22.04", req.Template) @@ -279,7 +279,7 @@ func TestCreateConfigVCPUsAutoSet(t *testing.T) { func TestCreateConfigGPUTypeCaseInsensitive(t *testing.T) { config := &tui.CreateConfig{ Mode: "prototyping", - GPUType: "T4", + GPUType: "A6000", VCPUs: 8, Template: "ubuntu-22.04", DiskSizeGB: 100, @@ -292,7 +292,7 @@ func TestCreateConfigGPUTypeCaseInsensitive(t *testing.T) { err := validateCreateConfig(config, templates, []api.Snapshot{}, false) require.NoError(t, err) - assert.Equal(t, "t4", config.GPUType) + assert.Equal(t, "a6000", config.GPUType) } func TestCreateConfigA100Alias(t *testing.T) { @@ -319,7 +319,7 @@ func TestCreateConfigA100Alias(t *testing.T) { func TestCreateConfigTemplateCaseInsensitive(t *testing.T) { config := &tui.CreateConfig{ Mode: "prototyping", - GPUType: "t4", + GPUType: "a6000", VCPUs: 8, Template: "UBUNTU 22.04", DiskSizeGB: 100, @@ -340,7 +340,7 @@ func TestCreateConfigTemplateCaseInsensitive(t *testing.T) { func TestCreateConfigTemplateByDisplayName(t *testing.T) { config := &tui.CreateConfig{ Mode: "prototyping", - GPUType: "t4", + GPUType: "a6000", VCPUs: 8, Template: "Ubuntu 22.04", DiskSizeGB: 100, @@ -390,7 +390,7 @@ func TestCreateConfigDiskSizeBoundaries(t *testing.T) { t.Run(tt.name, func(t *testing.T) { config := &tui.CreateConfig{ Mode: "prototyping", - GPUType: "t4", + GPUType: "a6000", VCPUs: 8, Template: "ubuntu-22.04", DiskSizeGB: tt.diskSizeGB, diff --git a/cmd/modify.go b/cmd/modify.go index d40bcd6..0841847 100644 --- a/cmd/modify.go +++ b/cmd/modify.go @@ -31,7 +31,7 @@ var modifyCmd = &cobra.Command{ func init() { modifyCmd.Flags().String("mode", "", "Instance mode (prototyping or production)") - modifyCmd.Flags().String("gpu", "", "GPU type (t4, a100, h100)") + modifyCmd.Flags().String("gpu", "", "GPU type (a6000, a100, h100)") modifyCmd.Flags().Int("num-gpus", 0, "Number of GPUs (production mode: 1, 2, or 4)") modifyCmd.Flags().Int("vcpus", 0, "CPU cores (prototyping mode: 4, 8, 16, or 32)") modifyCmd.Flags().Int("disk-size-gb", 0, "Disk size in GB (100-1000, cannot shrink)") @@ -264,22 +264,22 @@ func buildModifyRequestFromFlags(cmd *cobra.Command, currentInstance *api.Instan // Normalize GPU names gpuMap := map[string]string{ - "t4": "t4", - "a100": "a100xl", - "h100": "h100", + "a6000": "a6000", + "a100": "a100xl", + "h100": "h100", } normalizedGPU, ok := gpuMap[gpuType] if !ok { - return req, fmt.Errorf("invalid GPU type '%s'. Valid options: t4, a100, h100", gpuType) + return req, fmt.Errorf("invalid GPU type '%s'. Valid options: a6000, a100, h100", gpuType) } // Validate GPU compatibility with mode - if effectiveMode == "prototyping" && normalizedGPU != "t4" && normalizedGPU != "a100xl" && normalizedGPU != "h100" { - return req, fmt.Errorf("GPU type '%s' is not available in prototyping mode (use t4, a100, or h100)", gpuType) + if effectiveMode == "prototyping" && normalizedGPU != "a6000" && normalizedGPU != "a100xl" && normalizedGPU != "h100" { + return req, fmt.Errorf("GPU type '%s' is not available in prototyping mode (use a6000, a100, or h100)", gpuType) } - if effectiveMode == "production" && normalizedGPU == "t4" { - return req, fmt.Errorf("GPU type 't4' is not available in production mode (use a100 or h100)") + if effectiveMode == "production" && normalizedGPU == "a6000" { + return req, fmt.Errorf("GPU type 'a6000' is not available in production mode (use a100 or h100)") } req.GpuType = &normalizedGPU diff --git a/cmd/status_test.go b/cmd/status_test.go index a4d0936..65680c9 100644 --- a/cmd/status_test.go +++ b/cmd/status_test.go @@ -34,7 +34,7 @@ func TestInstanceFields(t *testing.T) { CPUCores: "8", Memory: "32GB", Storage: 100, - GPUType: "T4", + GPUType: "a6000", NumGPUs: "1", Mode: "prototyping", Template: "ubuntu-22.04", @@ -52,7 +52,7 @@ func TestInstanceFields(t *testing.T) { assert.Equal(t, "8", instance.CPUCores) assert.Equal(t, "32GB", instance.Memory) assert.Equal(t, 100, instance.Storage) - assert.Equal(t, "T4", instance.GPUType) + assert.Equal(t, "a6000", instance.GPUType) assert.Equal(t, "1", instance.NumGPUs) assert.Equal(t, "prototyping", instance.Mode) assert.Equal(t, "ubuntu-22.04", instance.Template) diff --git a/tui/connect.go b/tui/connect.go index ce43159..40019a9 100644 --- a/tui/connect.go +++ b/tui/connect.go @@ -8,6 +8,7 @@ import ( "strings" "github.com/Thunder-Compute/thunder-cli/api" + "github.com/Thunder-Compute/thunder-cli/utils" "github.com/charmbracelet/bubbles/spinner" tea "github.com/charmbracelet/bubbletea" "github.com/charmbracelet/lipgloss" @@ -98,7 +99,7 @@ func (m ConnectModel) Update(msg tea.Msg) (tea.Model, tea.Cmd) { var items []string for _, inst := range msg.instances { if inst.Status == "RUNNING" { - displayName := fmt.Sprintf("%s (%s) - %s GPU: %s", inst.Name, inst.ID, inst.NumGPUs, inst.GPUType) + displayName := fmt.Sprintf("%s (%s) - %s GPU: %s", inst.Name, inst.ID, inst.NumGPUs, utils.FormatGPUType(inst.GPUType)) items = append(items, displayName) if m.displayToID == nil { m.displayToID = make(map[string]string) @@ -292,7 +293,7 @@ func RunConnectSelectWithInstances(instances []api.Instance) (string, error) { displayToID := make(map[string]string) for _, inst := range instances { if inst.Status == "RUNNING" { - displayName := fmt.Sprintf("(%s) %s - %s GPU: %s", inst.ID, inst.Name, inst.NumGPUs, inst.GPUType) + displayName := fmt.Sprintf("(%s) %s - %s GPU: %s", inst.ID, inst.Name, inst.NumGPUs, utils.FormatGPUType(inst.GPUType)) items = append(items, displayName) displayToID[displayName] = inst.ID } diff --git a/tui/create.go b/tui/create.go index fe0dc80..a366f0e 100644 --- a/tui/create.go +++ b/tui/create.go @@ -338,7 +338,7 @@ func (m createModel) handleEnter() (tea.Model, tea.Cmd) { func (m createModel) getGPUOptions() []string { switch m.config.Mode { case "prototyping": - return []string{"t4", "a100xl", "h100"} + return []string{"a6000", "a100xl", "h100"} case "production": return []string{"a100xl", "h100"} default: @@ -425,21 +425,18 @@ func (m createModel) View() string { if m.cursor == i { cursor = m.styles.cursor.Render("▶ ") } - displayName := strings.ToUpper(gpu) + displayName := utils.FormatGPUType(gpu) switch gpu { case "a100xl": - switch m.config.Mode { - case "production": - displayName = "A100 80GB" - case "prototyping": + if m.config.Mode == "prototyping" { displayName = "A100 80GB (more powerful)" } case "h100": if m.config.Mode == "prototyping" { displayName += " (most powerful)" } - case "t4": + case "a6000": if m.config.Mode == "prototyping" { displayName += " (more affordable)" } @@ -540,7 +537,7 @@ func (m createModel) View() string { var panel strings.Builder panel.WriteString(m.styles.label.Render("Mode: ") + utils.Capitalize(m.config.Mode) + "\n") - panel.WriteString(m.styles.label.Render("GPU Type: ") + strings.ToUpper(m.config.GPUType) + "\n") + panel.WriteString(m.styles.label.Render("GPU Type: ") + utils.FormatGPUType(m.config.GPUType) + "\n") panel.WriteString(m.styles.label.Render("GPUs: ") + strconv.Itoa(m.config.NumGPUs) + "\n") panel.WriteString(m.styles.label.Render("vCPUs: ") + strconv.Itoa(m.config.VCPUs) + "\n") ramPerVCPU := 8 diff --git a/tui/delete.go b/tui/delete.go index 8c3c449..8dd7e84 100644 --- a/tui/delete.go +++ b/tui/delete.go @@ -216,7 +216,7 @@ func (m deleteModel) View() string { statusText, statusSuffix, instance.NumGPUs, - instance.GPUType, + utils.FormatGPUType(instance.GPUType), utils.Capitalize(instance.Mode), ) @@ -240,7 +240,7 @@ func (m deleteModel) View() string { instanceInfo.WriteString(m.styles.label.Render("Name: ") + m.selected.Name + "\n") instanceInfo.WriteString(m.styles.label.Render("Status: ") + m.selected.Status + "\n") instanceInfo.WriteString(m.styles.label.Render("Mode: ") + utils.Capitalize(m.selected.Mode) + "\n") - instanceInfo.WriteString(m.styles.label.Render("GPU: ") + m.selected.NumGPUs + "x" + m.selected.GPUType + "\n") + instanceInfo.WriteString(m.styles.label.Render("GPU: ") + m.selected.NumGPUs + "x" + utils.FormatGPUType(m.selected.GPUType) + "\n") instanceInfo.WriteString(m.styles.label.Render("Template: ") + utils.Capitalize(m.selected.Template)) s.WriteString(m.styles.instanceBox.Render(instanceInfo.String())) diff --git a/tui/help-menus/create.go b/tui/help-menus/create.go index d317a4c..c58b50c 100644 --- a/tui/help-menus/create.go +++ b/tui/help-menus/create.go @@ -36,7 +36,7 @@ func RenderCreateHelp(cmd *cobra.Command) { output.WriteString(" ") output.WriteString(CommandStyle.Render("Prototyping")) output.WriteString(" ") - output.WriteString(DescStyle.Render("tnr create --mode prototyping --gpu {t4|a100} --vcpus {4|8|16|32} --template {base|comfy-ui|comfy-ui-wan|ollama|webui-forge} --disk-size-gb {100-400}")) + output.WriteString(DescStyle.Render("tnr create --mode prototyping --gpu {a6000|a100} --vcpus {4|8|16|32} --template {base|comfy-ui|comfy-ui-wan|ollama|webui-forge} --disk-size-gb {100-400}")) output.WriteString("\n") output.WriteString(" ") @@ -59,7 +59,7 @@ func RenderCreateHelp(cmd *cobra.Command) { output.WriteString(ExampleStyle.Render("# Prototyping instance (lowest cost)")) output.WriteString("\n") output.WriteString(" ") - output.WriteString(CommandTextStyle.Render("tnr create --mode prototyping --gpu t4 --vcpus 8 --template base --disk-size-gb 100")) + output.WriteString(CommandTextStyle.Render("tnr create --mode prototyping --gpu a6000 --vcpus 8 --template base --disk-size-gb 100")) output.WriteString("\n\n") output.WriteString(" ") @@ -82,7 +82,7 @@ func RenderCreateHelp(cmd *cobra.Command) { output.WriteString(" ") output.WriteString(FlagStyle.Render("--gpu")) output.WriteString(" ") - output.WriteString(DescStyle.Render("GPU type (prototyping: t4 or a100, production: a100 or h100)")) + output.WriteString(DescStyle.Render("GPU type (prototyping: a6000 or a100, production: a100 or h100)")) output.WriteString("\n") output.WriteString(" ") diff --git a/tui/modify.go b/tui/modify.go index 7443ac4..f7b686f 100644 --- a/tui/modify.go +++ b/tui/modify.go @@ -215,7 +215,7 @@ func (m modifyModel) handleEnter() (tea.Model, tea.Cmd) { var gpuValues []string if effectiveMode == "prototyping" { - gpuValues = []string{"t4", "a100xl", "h100"} + gpuValues = []string{"a6000", "a100xl", "h100"} } else { gpuValues = []string{"a100xl", "h100"} } @@ -305,7 +305,7 @@ func (m modifyModel) getCurrentGPUCursorPosition() int { currentGPU := strings.ToLower(m.currentInstance.GPUType) if effectiveMode == "prototyping" { - if currentGPU == "t4" { + if currentGPU == "a6000" { return 0 } if currentGPU == "a100xl" { @@ -320,17 +320,7 @@ func (m modifyModel) getCurrentGPUCursorPosition() int { } func (m modifyModel) formatGPUType(gpuType string) string { - gpuType = strings.ToLower(gpuType) - switch gpuType { - case "t4": - return "T4" - case "a100xl": - return "A100 80GB" - case "h100": - return "H100" - default: - return gpuType - } + return utils.FormatGPUType(gpuType) } func (m modifyModel) getCurrentComputeCursorPosition() int { @@ -370,7 +360,7 @@ func (m modifyModel) getMaxCursor() int { effectiveMode = m.config.Mode } if effectiveMode == "prototyping" { - return 2 // 3 GPU options (t4/a100xl/h100) + return 2 // 3 GPU options (a6000/a100xl/h100) } return 1 // 2 GPU options (a100xl/h100) @@ -478,11 +468,11 @@ func (m modifyModel) renderGPUStep() string { if effectiveMode == "prototyping" { optionLabels = []string{ - "T4 (more affordable)", + "RTX A6000 (more affordable)", "A100 80GB (high performance)", "H100 (most powerful)", } - optionValues = []string{"t4", "a100xl", "h100"} + optionValues = []string{"a6000", "a100xl", "h100"} } else { optionLabels = []string{ "A100 80GB", diff --git a/tui/snapshot_create.go b/tui/snapshot_create.go index 48d4069..dd8bedc 100644 --- a/tui/snapshot_create.go +++ b/tui/snapshot_create.go @@ -8,6 +8,7 @@ import ( "github.com/Thunder-Compute/thunder-cli/api" "github.com/Thunder-Compute/thunder-cli/tui/theme" + "github.com/Thunder-Compute/thunder-cli/utils" "github.com/charmbracelet/bubbles/spinner" "github.com/charmbracelet/bubbles/textinput" tea "github.com/charmbracelet/bubbletea" @@ -317,7 +318,7 @@ func (m snapshotCreateModel) View() string { instance.ID, instance.Name, instance.NumGPUs, - instance.GPUType, + utils.FormatGPUType(instance.GPUType), ) if m.cursor == i { display = m.styles.selected.Render(display) diff --git a/tui/status.go b/tui/status.go index b9027a0..f497909 100644 --- a/tui/status.go +++ b/tui/status.go @@ -247,7 +247,7 @@ func (m StatusModel) renderTable() string { address := truncate(instance.IP, colWidths["Address"]) mode := truncate(utils.Capitalize(instance.Mode), colWidths["Mode"]) disk := truncate(fmt.Sprintf("%dGB", instance.Storage), colWidths["Disk"]) - gpu := truncate(fmt.Sprintf("%sx%s", instance.NumGPUs, instance.GPUType), colWidths["GPU"]) + gpu := truncate(fmt.Sprintf("%sx%s", instance.NumGPUs, utils.FormatGPUType(instance.GPUType)), colWidths["GPU"]) vcpus := truncate(instance.CPUCores, colWidths["vCPUs"]) ram := truncate(fmt.Sprintf("%sGB", instance.Memory), colWidths["RAM"]) template := truncate(utils.Capitalize(instance.Template), colWidths["Template"]) diff --git a/utils/common.go b/utils/common.go index 2f66613..82a47d6 100644 --- a/utils/common.go +++ b/utils/common.go @@ -8,3 +8,17 @@ func Capitalize(s string) string { } return strings.ToUpper(s[:1]) + s[1:] } + +// FormatGPUType converts internal GPU type codes to user-facing display names +func FormatGPUType(gpuType string) string { + switch strings.ToLower(gpuType) { + case "a6000": + return "RTX A6000" + case "a100xl": + return "A100 80GB" + case "h100": + return "H100" + default: + return gpuType + } +} From 9e93261921b62869c3f0888aa7cbc09d95ffbceb Mon Sep 17 00:00:00 2001 From: cpeterson42 Date: Sun, 25 Jan 2026 16:55:08 -0800 Subject: [PATCH 2/2] a6000 and fixed connect --- api/client.go | 34 ------- api/types.go | 2 - cmd/connect.go | 235 ++---------------------------------------- cmd/connect_test.go | 18 ---- cmd/create_test.go | 2 +- cmd/modify.go | 6 +- tui/create.go | 2 +- tui/status.go | 2 +- utils/ssh.go | 20 ---- utils/thunder.go | 242 ++++---------------------------------------- 10 files changed, 33 insertions(+), 530 deletions(-) diff --git a/api/client.go b/api/client.go index b4403bf..88414bd 100644 --- a/api/client.go +++ b/api/client.go @@ -114,40 +114,6 @@ func (c *Client) ListInstancesWithIPUpdateCtx(ctx context.Context) ([]Instance, return instances, nil } -func (c *Client) GetLatestBinaryHashCtx(ctx context.Context) (string, error) { - metadataURL := "https://storage.googleapis.com/storage/v1/b/client-binary/o/client_linux_x86_64?alt=json" - - req, err := http.NewRequest("GET", metadataURL, nil) - if err != nil { - return "", fmt.Errorf("failed to create request: %w", err) - } - - resp, err := c.do(ctx, req) - if err != nil { - return "", fmt.Errorf("failed to make request: %w", err) - } - defer resp.Body.Close() - - if resp.StatusCode != 200 { - body, _ := io.ReadAll(resp.Body) - return "", fmt.Errorf("API request failed with status %d: %s", resp.StatusCode, string(body)) - } - - body, err := io.ReadAll(resp.Body) - if err != nil { - return "", fmt.Errorf("failed to read response: %w", err) - } - - var result struct { - Metadata map[string]string `json:"metadata"` - } - if err := json.Unmarshal(body, &result); err != nil { - return "", fmt.Errorf("failed to parse response: %w", err) - } - - return result.Metadata["hash"], nil -} - func (c *Client) AddSSHKeyCtx(ctx context.Context, instanceID string) (*AddSSHKeyResponse, error) { url := fmt.Sprintf("%s/instances/%s/add_key", c.baseURL, instanceID) diff --git a/api/types.go b/api/types.go index daeac8f..de033eb 100644 --- a/api/types.go +++ b/api/types.go @@ -117,6 +117,4 @@ type ConnectClient interface { ListInstances() ([]Instance, error) ListInstancesWithIPUpdateCtx(ctx context.Context) ([]Instance, error) AddSSHKeyCtx(ctx context.Context, instanceID string) (*AddSSHKeyResponse, error) - GetLatestBinaryHashCtx(ctx context.Context) (string, error) - GetNextDeviceID() (string, error) } diff --git a/cmd/connect.go b/cmd/connect.go index 8d19cd3..79f4581 100644 --- a/cmd/connect.go +++ b/cmd/connect.go @@ -2,7 +2,6 @@ package cmd import ( "context" - "encoding/base64" "errors" "fmt" "os" @@ -10,7 +9,6 @@ import ( "os/signal" "runtime" "strconv" - "strings" "time" tea "github.com/charmbracelet/bubbletea" @@ -251,9 +249,6 @@ func runConnectWithOptions(instanceID string, tunnelPortsStr []string, debug boo phase1Start := time.Now() tui.SendPhaseUpdate(p, 0, tui.PhaseInProgress, "Fetching instances...", 0) - hashChan := make(chan string, 1) - hashErrChan := make(chan error, 1) - if runtime.GOOS == "windows" { if err := checkWindowsOpenSSH(); err != nil { return err @@ -274,16 +269,6 @@ func runConnectWithOptions(instanceID string, tunnelPortsStr []string, debug boo return nil } - // Fetch binary hash in background - go func() { - hash, err := client.GetLatestBinaryHashCtx(ctx) - if err != nil { - hashErrChan <- err - return - } - hashChan <- hash - }() - if checkCancelled() { return nil } @@ -327,13 +312,6 @@ func runConnectWithOptions(instanceID string, tunnelPortsStr []string, debug boo port = 22 } - gpuCount := 1 - if instance.NumGPUs != "" { - if count, err := strconv.Atoi(instance.NumGPUs); err == nil { - gpuCount = count - } - } - phaseTimings["instance_validation"] = time.Since(phase2Start) tui.SendPhaseUpdate(p, 1, tui.PhaseCompleted, fmt.Sprintf("Found: %s (%s)", instance.Name, instance.IP), phaseTimings["instance_validation"]) @@ -491,211 +469,18 @@ func runConnectWithOptions(instanceID string, tunnelPortsStr []string, debug boo return nil } - // Get binary hash (already fetched in background) - var binaryHash string - select { - case hash := <-hashChan: - binaryHash = hash - case <-hashErrChan: - binaryHash = "" - case <-ctx.Done(): - if checkCancelled() { - return nil - } - case <-time.After(2 * time.Second): - binaryHash = "" - } - - // For production mode, check active sessions first (like VSCode extension) to skip operations if needed - var activeSessions int - var existingConfig *utils.ThunderConfig - var existingHash string - var canEarlyReturn bool - + // Set up token on the instance (binary is now managed by the instance itself) if instance.Mode == "production" { - var checkErr error - activeSessions, checkErr = utils.CheckActiveSessions(sshClient) - if checkErr != nil { - activeSessions = 0 - } - - if activeSessions > 1 { - tokenB64 := base64.StdEncoding.EncodeToString([]byte(config.Token)) - combinedTokenCmd := fmt.Sprintf("sudo install -d -m 755 /home/ubuntu/.thunder && echo '%s' | base64 -d | sudo tee /home/ubuntu/.thunder/token > /dev/null && sudo chown ubuntu:ubuntu /home/ubuntu/.thunder/token && sudo chmod 600 /home/ubuntu/.thunder/token && sudo sed -i '/export TNR_API_TOKEN/d' /home/ubuntu/.bashrc || true && echo 'export TNR_API_TOKEN=\"$(cat /home/ubuntu/.thunder/token)\"' | sudo tee -a /home/ubuntu/.bashrc > /dev/null", tokenB64) - _, _ = utils.ExecuteSSHCommand(sshClient, combinedTokenCmd) - phaseTimings["instance_setup"] = time.Since(phase5Start) - tui.SendPhaseComplete(p, 4, phaseTimings["instance_setup"]) - canEarlyReturn = true - } else { - // No active sessions - match VSCode extension: skip config/hash check, run cleanup (idempotent) - if err := utils.RemoveThunderVirtualization(sshClient, config.Token); err != nil { - shutdownTUI() - return fmt.Errorf("failed to remove Thunder virtualization: %w", err) - } - phaseTimings["instance_setup"] = time.Since(phase5Start) - tui.SendPhaseComplete(p, 4, phaseTimings["instance_setup"]) - canEarlyReturn = true - - } - } - - // For prototyping mode, do full config/hash read in parallel - if instance.Mode != "production" || !canEarlyReturn { - // Clean up ld.so.preload early if binary is missing to prevent stderr pollution - _ = utils.CleanupLdSoPreloadIfBinaryMissing(sshClient) - - type configResult struct { - config *utils.ThunderConfig - err error - } - type instanceHashResult struct { - hash string - err error - } - - configChan := make(chan configResult, 1) - instanceHashChan := make(chan instanceHashResult, 1) - - go func() { - config, err := utils.GetThunderConfig(sshClient) - configChan <- configResult{config: config, err: err} - }() - - expectedHash := utils.NormalizeHash(binaryHash) - isValidHash := expectedHash != "" && len(expectedHash) == 32 && utils.IsHexString(expectedHash) - hashAlgorithm := utils.DetectHashAlgorithm(expectedHash) - - if isValidHash { - go func() { - hash, err := utils.GetInstanceBinaryHash(sshClient, hashAlgorithm) - instanceHashChan <- instanceHashResult{hash: hash, err: err} - }() - } else { - instanceHashChan <- instanceHashResult{hash: "", err: nil} - } - - configRes := <-configChan - hashRes := <-instanceHashChan - - if configRes.err == nil { - existingConfig = configRes.config - } - - if hashRes.err == nil { - existingHash = hashRes.hash - } - - } - - ranConfigurator := false - - // Early return if GPU config and hash match - if !canEarlyReturn { - if instance.Mode == "prototyping" && existingConfig != nil && existingConfig.DeviceID != "" { - expectedHash := utils.NormalizeHash(binaryHash) - isValidHash := expectedHash != "" && len(expectedHash) == 32 && utils.IsHexString(expectedHash) - gpuTypeMatches := strings.EqualFold(existingConfig.GPUType, instance.GPUType) - gpuCountMatches := existingConfig.GPUCount == gpuCount - hashMatches := isValidHash && existingHash != "" && existingHash == expectedHash - - if gpuTypeMatches && gpuCountMatches && hashMatches { - phaseTimings["instance_setup"] = time.Since(phase5Start) - tui.SendPhaseComplete(p, 4, phaseTimings["instance_setup"]) - canEarlyReturn = true - ranConfigurator = true - } - } - } - - // Skip token/bootstrap operations if GPU config matches (ConfigureThunderVirtualization handles token update) - skipTokenBootstrap := canEarlyReturn - skipActiveSessionsCheck := canEarlyReturn - if !canEarlyReturn && instance.Mode == "prototyping" && existingConfig != nil && existingConfig.DeviceID != "" { - gpuTypeMatches := strings.EqualFold(existingConfig.GPUType, instance.GPUType) - gpuCountMatches := existingConfig.GPUCount == gpuCount - if gpuTypeMatches && gpuCountMatches { - skipTokenBootstrap = true - skipActiveSessionsCheck = true - } - } - - // For prototyping mode, handle token/bootstrap and active sessions check - if instance.Mode == "prototyping" && !canEarlyReturn { - if !skipTokenBootstrap { - // Combine token bootstrap and bashrc update into a single SSH command - tokenB64 := base64.StdEncoding.EncodeToString([]byte(config.Token)) - combinedTokenCmd := fmt.Sprintf("sudo install -d -m 755 /home/ubuntu/.thunder && echo '%s' | base64 -d | sudo tee /home/ubuntu/.thunder/token > /dev/null && sudo chown ubuntu:ubuntu /home/ubuntu/.thunder/token && sudo chmod 600 /home/ubuntu/.thunder/token && sudo sed -i '/export TNR_API_TOKEN/d' /home/ubuntu/.bashrc || true && echo 'export TNR_API_TOKEN=\"$(cat /home/ubuntu/.thunder/token)\"' | sudo tee -a /home/ubuntu/.bashrc > /dev/null", tokenB64) - _, _ = utils.ExecuteSSHCommand(sshClient, combinedTokenCmd) - } - - if !skipActiveSessionsCheck { - var checkErr error - activeSessions, checkErr = utils.CheckActiveSessions(sshClient) - if checkErr != nil { - activeSessions = 0 - } - } else { - activeSessions = 0 - } - } else if instance.Mode == "prototyping" { - activeSessions = 0 - } - - if !canEarlyReturn { - switch instance.Mode { - case "production": - tui.SendPhaseUpdate(p, 4, tui.PhaseInProgress, "Production mode detected, disabling Thunder virtualization...", 0) - if err := utils.RemoveThunderVirtualization(sshClient, config.Token); err != nil { - shutdownTUI() - return fmt.Errorf("failed to remove Thunder virtualization: %w", err) - } - default: - var deviceID string - if existingConfig != nil && existingConfig.DeviceID != "" { - deviceID = existingConfig.DeviceID - } else { - if newID, err := client.GetNextDeviceID(); err == nil { - deviceID = newID - } - } - - switch { - case activeSessions > 1: - tui.SendPhaseUpdate(p, 4, tui.PhaseInProgress, fmt.Sprintf("Detected %d active SSH sessions, skipping binary update", activeSessions), 0) - case deviceID == "": - tui.SendPhaseUpdate(p, 4, tui.PhaseWarning, "Unable to determine device ID, skipping environment setup", 0) - default: - tui.SendPhaseUpdate(p, 4, tui.PhaseInProgress, "Updating Thunder binary and config if needed...", 0) - if err := utils.ConfigureThunderVirtualization(sshClient, instanceID, deviceID, instance.GPUType, gpuCount, config.Token, binaryHash, existingConfig); err != nil { - shutdownTUI() - return fmt.Errorf("failed to configure Thunder virtualization: %w", err) - } - ranConfigurator = true - } + tui.SendPhaseUpdate(p, 4, tui.PhaseInProgress, "Production mode detected, setting up token...", 0) + if err := utils.RemoveThunderVirtualization(sshClient, config.Token); err != nil { + shutdownTUI() + return fmt.Errorf("failed to set up token: %w", err) } - } - - if checkCancelled() { - return nil - } - - if instance.Mode == "prototyping" && !ranConfigurator && binaryHash != "" { - tui.SendPhaseUpdate(p, 4, tui.PhaseInProgress, "Checking Thunder binary version...", 0) - expectedHash := utils.NormalizeHash(binaryHash) - hashAlgo := utils.DetectHashAlgorithm(expectedHash) - - existingHash, hashErr := utils.GetInstanceBinaryHash(sshClient, hashAlgo) - existingHashNormalized := utils.NormalizeHash(existingHash) - - if hashErr == nil && existingHashNormalized != "" && existingHashNormalized != expectedHash { - tui.SendPhaseUpdate(p, 4, tui.PhaseInProgress, "Binary outdated, updating in background...", 0) - deviceID := "" - if existingConfig != nil && existingConfig.DeviceID != "" { - deviceID = existingConfig.DeviceID - } - if deviceID != "" { - _ = utils.TriggerBackgroundSetup(sshClient, instanceID, deviceID, instance.GPUType, gpuCount, config.Token) - } + } else { + tui.SendPhaseUpdate(p, 4, tui.PhaseInProgress, "Setting up token...", 0) + if err := utils.SetupToken(sshClient, config.Token); err != nil { + shutdownTUI() + return fmt.Errorf("failed to set up token: %w", err) } } diff --git a/cmd/connect_test.go b/cmd/connect_test.go index b4fd262..fab366c 100644 --- a/cmd/connect_test.go +++ b/cmd/connect_test.go @@ -38,12 +38,6 @@ type mockAPIClient struct { addSSHKeyCalled int addSSHKeyInstanceIDs []string - binaryHash string - binaryHashErr error - - nextDeviceID string - nextDeviceIDErr error - mu sync.Mutex } @@ -72,18 +66,6 @@ func (m *mockAPIClient) AddSSHKeyCtx(ctx context.Context, instanceID string) (*a return m.addSSHKeyResponse, m.addSSHKeyErr } -func (m *mockAPIClient) GetLatestBinaryHashCtx(ctx context.Context) (string, error) { - m.mu.Lock() - defer m.mu.Unlock() - return m.binaryHash, m.binaryHashErr -} - -func (m *mockAPIClient) GetNextDeviceID() (string, error) { - m.mu.Lock() - defer m.mu.Unlock() - return m.nextDeviceID, m.nextDeviceIDErr -} - // ============================================================================= // Mock SSH Client // ============================================================================= diff --git a/cmd/create_test.go b/cmd/create_test.go index 897e624..2a2e8c5 100644 --- a/cmd/create_test.go +++ b/cmd/create_test.go @@ -65,7 +65,7 @@ func TestValidateCreateConfig(t *testing.T) { GPUType: "invalid", }, expectError: true, - errorContains: "prototyping mode supports GPU types: a6000 or a100", + errorContains: "prototyping mode supports GPU types: a6000 or a100xl or h100", }, { name: "prototyping without vcpus", diff --git a/cmd/modify.go b/cmd/modify.go index 0841847..69646eb 100644 --- a/cmd/modify.go +++ b/cmd/modify.go @@ -271,15 +271,15 @@ func buildModifyRequestFromFlags(cmd *cobra.Command, currentInstance *api.Instan normalizedGPU, ok := gpuMap[gpuType] if !ok { - return req, fmt.Errorf("invalid GPU type '%s'. Valid options: a6000, a100, h100", gpuType) + return req, fmt.Errorf("invalid GPU type '%s'. Valid options: a6000, a100xl, h100", gpuType) } // Validate GPU compatibility with mode if effectiveMode == "prototyping" && normalizedGPU != "a6000" && normalizedGPU != "a100xl" && normalizedGPU != "h100" { - return req, fmt.Errorf("GPU type '%s' is not available in prototyping mode (use a6000, a100, or h100)", gpuType) + return req, fmt.Errorf("GPU type '%s' is not available in prototyping mode (use a6000, a100xl, or h100)", gpuType) } if effectiveMode == "production" && normalizedGPU == "a6000" { - return req, fmt.Errorf("GPU type 'a6000' is not available in production mode (use a100 or h100)") + return req, fmt.Errorf("GPU type 'a6000' is not available in production mode (use a100xl or h100)") } req.GpuType = &normalizedGPU diff --git a/tui/create.go b/tui/create.go index a366f0e..3e44e85 100644 --- a/tui/create.go +++ b/tui/create.go @@ -552,7 +552,7 @@ func (m createModel) View() string { s.WriteString("\n") if m.config.Mode == "prototyping" { - warning := "⚠ Prototyping mode: for dev/testing; not for production inference or long-running tasks.\n" + warning := "⚠ Prototyping mode is optimized for dev/testing; switch to production mode for inference servers or large training runs.\n" s.WriteString(warningStyleTUI.Render(warning)) s.WriteString("\n") } diff --git a/tui/status.go b/tui/status.go index f497909..940806f 100644 --- a/tui/status.go +++ b/tui/status.go @@ -207,7 +207,7 @@ func (m StatusModel) renderTable() string { "Address": 18, "Mode": 15, "Disk": 8, - "GPU": 10, + "GPU": 14, "vCPUs": 8, "RAM": 8, "Template": 18, diff --git a/utils/ssh.go b/utils/ssh.go index 4347292..3c7dfd4 100644 --- a/utils/ssh.go +++ b/utils/ssh.go @@ -500,26 +500,6 @@ func ExecuteSSHCommandStdoutOnly(client *SSHClient, command string) (string, err return string(stdoutData), nil } -// CheckActiveSessions counts active SSH sessions (pts/ terminals) -func CheckActiveSessions(client *SSHClient) (int, error) { - // Use stdout-only and redirect stderr to avoid ld.so.preload error pollution - output, err := ExecuteSSHCommandStdoutOnly(client, "who | grep 'pts/' | wc -l 2>/dev/null") - if err != nil { - return 0, err - } - - // Filter out any remaining ld.so.preload errors - output = filterLdSoErrors(output) - - var count int - _, err = fmt.Sscanf(strings.TrimSpace(output), "%d", &count) - if err != nil { - return 0, fmt.Errorf("failed to parse session count: %w", err) - } - - return count, nil -} - // UploadFile uploads a single file via SSH stdin pipe func UploadFile(client *SSHClient, localPath, remotePath string) error { if client == nil || client.client == nil { diff --git a/utils/thunder.go b/utils/thunder.go index 008a0d3..658b7d4 100644 --- a/utils/thunder.go +++ b/utils/thunder.go @@ -2,192 +2,40 @@ package utils import ( "encoding/base64" - "encoding/json" "fmt" "strings" ) -type ThunderConfig struct { - InstanceID string `json:"instanceId"` - DeviceID string `json:"deviceId"` - GPUType string `json:"gpuType"` - GPUCount int `json:"gpuCount"` -} - -type HashAlgorithm string - -const ( - HashAlgoUnknown HashAlgorithm = "" - HashAlgoSHA256 HashAlgorithm = "sha256" - HashAlgoMD5 HashAlgorithm = "md5" -) - const ( - thunderBinaryURL = "https://storage.googleapis.com/client-binary/client_linux_x86_64" - thunderConfigDir = "/home/ubuntu/.thunder" - thunderConfigPath = "/home/ubuntu/.thunder/config.json" - thunderLibPath = "/home/ubuntu/.thunder/libthunder.so" - thunderSymlink = "/etc/thunder/libthunder.so" - ldPreloadPath = "/etc/ld.so.preload" - tokenPath = "/home/ubuntu/.thunder/token" - tokenSymlink = "/etc/thunder/token" + thunderConfigDir = "/home/ubuntu/.thunder" + thunderLibPath = "/home/ubuntu/.thunder/libthunder.so" + thunderSymlink = "/etc/thunder/libthunder.so" + ldPreloadPath = "/etc/ld.so.preload" + tokenPath = "/home/ubuntu/.thunder/token" ) -func GetThunderConfig(client *SSHClient) (*ThunderConfig, error) { - // Use stdout-only to avoid stderr pollution from ld.so.preload errors - output, err := ExecuteSSHCommandStdoutOnly(client, fmt.Sprintf("cat %s 2>/dev/null || echo '{}'", thunderConfigPath)) - if err != nil { - return nil, err - } - - output = filterLdSoErrors(output) - - var config ThunderConfig - if err := json.Unmarshal([]byte(output), &config); err != nil { - return nil, fmt.Errorf("failed to parse Thunder config: %w", err) - } - - return &config, nil -} - -// filterLdSoErrors prevents stderr pollution from breaking output parsing when /etc/ld.so.preload references a missing binary -func filterLdSoErrors(output string) string { - lines := strings.Split(output, "\n") - var filtered []string - for _, line := range lines { - line = strings.TrimSpace(line) - if line == "" { - continue - } - isLdSoError := strings.Contains(line, "ld.so: object") || - strings.Contains(line, "cannot be preloaded") || - strings.Contains(line, "ignored") || - strings.HasPrefix(line, "error: ld.so:") - if !isLdSoError { - filtered = append(filtered, line) - } - } - return strings.Join(filtered, "\n") -} - -// CleanupLdSoPreloadIfBinaryMissing prevents stderr pollution from breaking command output parsing -func CleanupLdSoPreloadIfBinaryMissing(client *SSHClient) error { - checkCmd := fmt.Sprintf("test -f %s && echo 'exists' || echo 'missing'", thunderLibPath) - output, err := ExecuteSSHCommandStdoutOnly(client, checkCmd) - if err != nil { - return err - } - output = filterLdSoErrors(output) - - if strings.TrimSpace(output) == "missing" { - cleanupCmd := fmt.Sprintf("sudo sed -i '/%s/d' %s 2>/dev/null || sudo rm -f %s 2>/dev/null || true", thunderSymlink, ldPreloadPath, ldPreloadPath) - _, err := ExecuteSSHCommand(client, cleanupCmd) - return err - } - return nil -} - -func ConfigureThunderVirtualization(client *SSHClient, instanceID, deviceID, gpuType string, gpuCount int, token, binaryHash string, existingConfig *ThunderConfig) error { - expectedHash := NormalizeHash(binaryHash) - isValidHash := expectedHash != "" && len(expectedHash) == 32 && IsHexString(expectedHash) - hashAlgorithm := DetectHashAlgorithm(expectedHash) - existingHash := "" - if isValidHash { - if h, err := GetInstanceBinaryHash(client, hashAlgorithm); err == nil { - existingHash = h - } - } - - // If binary hash matches, no update needed - if isValidHash && existingHash != "" && existingHash == expectedHash { - return nil - } - - binaryNeedsUpdate := !isValidHash || existingHash == "" || existingHash != expectedHash - +// SetupToken sets up the authentication token on the instance +func SetupToken(client *SSHClient, token string) error { tokenB64 := base64.StdEncoding.EncodeToString([]byte(token)) - var scriptParts []string - scriptParts = append(scriptParts, fmt.Sprintf("mkdir -p %s", thunderConfigDir)) - scriptParts = append(scriptParts, "sudo mkdir -p /etc/thunder") - - if binaryNeedsUpdate { - scriptParts = append(scriptParts, fmt.Sprintf("curl -sL %s -o /tmp/libthunder.tmp && mv /tmp/libthunder.tmp %s", thunderBinaryURL, thunderLibPath)) - scriptParts = append(scriptParts, fmt.Sprintf("sudo ln -sf %s %s", thunderLibPath, thunderSymlink)) - scriptParts = append(scriptParts, fmt.Sprintf("echo '%s' | sudo tee %s > /dev/null", thunderSymlink, ldPreloadPath)) + tokenCommands := []string{ + fmt.Sprintf("mkdir -p %s", thunderConfigDir), + fmt.Sprintf("echo '%s' | base64 -d > %s", tokenB64, tokenPath), + fmt.Sprintf("chmod 600 %s", tokenPath), + "sudo sed -i '/export TNR_API_TOKEN/d' /home/ubuntu/.bashrc || true", + "echo 'export TNR_API_TOKEN=\"$(cat /home/ubuntu/.thunder/token)\"' | sudo tee -a /home/ubuntu/.bashrc > /dev/null || true", } - // Always ensure token is set (in case it changed) - scriptParts = append(scriptParts, fmt.Sprintf("echo '%s' | base64 -d > %s", tokenB64, tokenPath)) - scriptParts = append(scriptParts, fmt.Sprintf("sudo ln -sf %s %s", tokenPath, tokenSymlink)) + setupScript := strings.Join(tokenCommands, " && ") - if len(scriptParts) > 0 { - setupScript := strings.Join(scriptParts, " && ") - if _, err := ExecuteSSHCommand(client, setupScript); err != nil { - return fmt.Errorf("failed to configure Thunder virtualization: %w", err) - } + if _, err := ExecuteSSHCommand(client, setupScript); err != nil { + return fmt.Errorf("failed to set up token: %w", err) } return nil } -func NormalizeHash(hash string) string { - trimmed := strings.TrimSpace(hash) - if trimmed == "" { - return "" - } - return strings.ToLower(trimmed) -} - -func IsHexString(value string) bool { - if value == "" || len(value)%2 != 0 { - return false - } - for _, c := range value { - if (c < '0' || c > '9') && (c < 'a' || c > 'f') { - return false - } - } - return true -} - -func DetectHashAlgorithm(hash string) HashAlgorithm { - if hash == "" { - return HashAlgoUnknown - } - switch len(hash) { - case 64: - if IsHexString(hash) { - return HashAlgoSHA256 - } - case 32: - if IsHexString(hash) { - return HashAlgoMD5 - } - } - return HashAlgoMD5 -} - -func GetInstanceBinaryHash(client *SSHClient, algorithm HashAlgorithm) (string, error) { - var cmd string - switch algorithm { - case HashAlgoMD5: - cmd = fmt.Sprintf("md5sum %s 2>/dev/null | awk '{print $1}' || echo ''", thunderLibPath) - default: - cmd = fmt.Sprintf("sha256sum %s 2>/dev/null | awk '{print $1}' || echo ''", thunderLibPath) - } - - output, err := ExecuteSSHCommandStdoutOnly(client, cmd) - if err != nil { - return "", err - } - - output = filterLdSoErrors(output) - normalized := NormalizeHash(output) - return normalized, nil -} - -// RemoveThunderVirtualization production: removes binary/config, keeps token +// RemoveThunderVirtualization removes Thunder binary/config for production mode and sets up token func RemoveThunderVirtualization(client *SSHClient, token string) error { productionCommands := []string{ fmt.Sprintf("sudo rm -f %s || true", ldPreloadPath), @@ -195,7 +43,6 @@ func RemoveThunderVirtualization(client *SSHClient, token string) error { "sudo chown root:root /etc/ld.so.preload || true", "sudo chmod 644 /etc/ld.so.preload || true", fmt.Sprintf("sudo rm -f %s || true", thunderLibPath), - fmt.Sprintf("sudo rm -f %s || true", thunderConfigPath), "sudo rm -rf /etc/thunder || true", fmt.Sprintf("echo '%s' | base64 -d > /tmp/token.tmp", base64.StdEncoding.EncodeToString([]byte(token))), "sudo install -d -m 755 /home/ubuntu/.thunder || true", @@ -213,58 +60,3 @@ func RemoveThunderVirtualization(client *SSHClient, token string) error { return nil } - -func TriggerBackgroundSetup(client *SSHClient, instanceID, deviceID, gpuType string, gpuCount int, token string) error { - config := ThunderConfig{ - InstanceID: instanceID, - DeviceID: deviceID, - GPUType: gpuType, - GPUCount: gpuCount, - } - configJSON, err := json.Marshal(config) - if err != nil { - return fmt.Errorf("failed to marshal config: %w", err) - } - configB64 := base64.StdEncoding.EncodeToString(configJSON) - tokenB64 := base64.StdEncoding.EncodeToString([]byte(token)) - - bgScript := fmt.Sprintf(`nohup bash -c ' -mkdir -p %s -sudo mkdir -p /etc/thunder -curl -sL %s -o /tmp/libthunder.tmp && mv /tmp/libthunder.tmp %s -sudo ln -sf %s %s -echo "%s" | sudo tee %s > /dev/null -echo "%s" | base64 -d > %s -sudo ln -sf %s /etc/thunder/config.json -echo "%s" | base64 -d > %s -sudo ln -sf %s %s -' > /dev/null 2>&1 &`, - thunderConfigDir, - thunderBinaryURL, thunderLibPath, - thunderLibPath, thunderSymlink, - thunderSymlink, ldPreloadPath, - configB64, thunderConfigPath, - thunderConfigPath, - tokenB64, tokenPath, - tokenPath, tokenSymlink) - - _, err = ExecuteSSHCommand(client, bgScript) - return err -} - -func TriggerBackgroundTokenSetup(client *SSHClient, token string) error { - tokenB64 := base64.StdEncoding.EncodeToString([]byte(token)) - - bgScript := fmt.Sprintf(`nohup bash -c ' -mkdir -p %s -sudo mkdir -p /etc/thunder -echo "%s" | base64 -d > %s -sudo ln -sf %s %s -' > /dev/null 2>&1 &`, - thunderConfigDir, - tokenB64, tokenPath, - tokenPath, tokenSymlink) - - _, err := ExecuteSSHCommand(client, bgScript) - return err -}