Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
59 changes: 47 additions & 12 deletions v1/providers/sfcompute/instance.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,10 @@ func (c *SFCClient) CreateInstance(ctx context.Context, attrs v1.CreateInstanceA
return nil, errors.WrapAndTrace(err)
}

// Create a name for the node
name := brevDataToSFCName(attrs.RefID, attrs.Name)
// Pack cloud cred ref ID, brev stage, instance ref ID, and name into the SFC node name.
// SFC has no tags API, so the node name is the only place to persist this metadata.
stage := getStageFromTags(attrs.Tags)
name := brevDataToSFCName(c.refID, stage, attrs.RefID, attrs.Name)

// Create the node
resp, err := c.client.Nodes.New(ctx, sfcnodes.NodeNewParams{
Expand Down Expand Up @@ -231,11 +233,15 @@ type sfcNodeInfo struct {
}

func (c *SFCClient) sfcNodeToBrevInstance(node sfcNodeInfo) (*v1.Instance, error) {
// Get the refID and name from the node name
refID, name, err := sfcNameToBrevData(node.name)
// Parse cloud cred ref ID, brev stage, instance ref ID, and name from the node name.
// Old-format names (refID_name) return empty cloudCredRefID — fall back to c.refID.
cloudCredRefID, _, refID, name, err := sfcNameToBrevData(node.name)
if err != nil {
return nil, errors.WrapAndTrace(err)
}
if cloudCredRefID == "" {
cloudCredRefID = c.refID
}

// Get the instance type for the zone
instanceType, err := getInstanceTypeForZone(*node.zone)
Expand Down Expand Up @@ -270,7 +276,7 @@ func (c *SFCClient) sfcNodeToBrevInstance(node sfcNodeInfo) (*v1.Instance, error
Spot: false,
Stoppable: false,
Rebootable: false,
CloudCredRefID: c.refID, // TODO: this should be pulled from the node itself
CloudCredRefID: cloudCredRefID,
}
return inst, nil
}
Expand Down Expand Up @@ -448,16 +454,45 @@ func (c *SFCClient) getSSHHostnameFromVM(ctx context.Context, vmID string, vmSta
return sshResponse.SSHHostname, nil
}

func brevDataToSFCName(refID string, name string) string {
return fmt.Sprintf("%s_%s", refID, name)
// brevDataToSFCName packs cloud credential ref ID, brev stage, instance ref ID, and instance
// name into a single SFC node name, separated by underscores. This is necessary because SFC
// has no tags/labels API — the node name is the only place to store metadata.
//
// Format: {cloudCredRefID}_{brevStage}_{refID}_{name}
func brevDataToSFCName(cloudCredRefID string, brevStage string, refID string, name string) string {
return fmt.Sprintf("%s_%s_%s_%s", cloudCredRefID, brevStage, refID, name)
}

// sfcNameToBrevData parses an SFC node name back into its components.
//
// Supports two formats for backward compatibility:
// - New (4+ parts): {cloudCredRefID}_{brevStage}_{refID}_{name}
// - Old (2 parts): {refID}_{name} — cloudCredRefID and brevStage returned empty
func sfcNameToBrevData(name string) (cloudCredRefID string, brevStage string, refID string, instanceName string, err error) {
parts := strings.SplitN(name, "_", 4)
switch len(parts) {
case 4:
// New format: cloudCredRefID_brevStage_refID_name
return parts[0], parts[1], parts[2], parts[3], nil
case 2:
// Old format: refID_name (backward compat — cloudCredRefID and stage unknown)
// TODO: remove this case once all old-format nodes have been cleaned up
return "", "", parts[0], parts[1], nil
Comment on lines 477 to 480
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good call, maybe need a TODO here to remove once we clean up the old nodes

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

fixed

default:
return "", "", "", "", errors.WrapAndTrace(fmt.Errorf("invalid node name %s: expected 2 or 4 underscore-separated parts", name))
}
}

func sfcNameToBrevData(name string) (string, string, error) {
parts := strings.SplitN(name, "_", 2)
if len(parts) != 2 {
return "", "", errors.WrapAndTrace(fmt.Errorf("invalid node name %s", name))
// getStageFromTags extracts the control plane stage value from instance tags.
// The tag key is prefixed by the control plane
// so we match any key ending with "-stage" to avoid coupling to a specific prefix.
func getStageFromTags(tags v1.Tags) string {
for k, v := range tags {
if strings.HasSuffix(k, "-stage") {
return v
}
}
return parts[0], parts[1], nil
return "unknown"
}

// Optional if supported:
Expand Down
13 changes: 11 additions & 2 deletions v1/providers/sfcompute/instancetype.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,11 @@ const (
interconnectInfiniband = "infiniband"
formFactorSXM5 = "sxm5"
diskTypeSSD = "ssd"

// Currently only 8xH100/H200 instance types are available
// so it's safe to hardcode vCPU and GPU count.
sfcVCPU = 112
sfcGPUCount = 8
)

func makeDefaultInstanceTypePrice(amount string, currencyCode string) currency.Amount {
Expand Down Expand Up @@ -112,6 +117,7 @@ func getInstanceTypeForZone(zone sfcnodes.ZoneListResponseData) (*v1.InstanceTyp
Type: makeInstanceTypeName(zone),
Memory: ram,
MemoryBytes: gpuMetadata.memoryBytes,
VCPU: gpuMetadata.vcpu,
Location: zoneToLocation(zone).Name,
Stoppable: false,
Rebootable: false,
Expand Down Expand Up @@ -216,6 +222,7 @@ type sfcInstanceTypeMetadata struct {
architecture v1.Architecture
memoryBytes v1.Bytes
diskBytes v1.Bytes
vcpu int32
gpuCount int32
gpuManufacturer v1.Manufacturer
gpuVRAM v1.Bytes
Expand All @@ -240,7 +247,8 @@ var h100InstanceTypeMetadata = sfcInstanceTypeMetadata{
architecture: v1.ArchitectureX86_64,
memoryBytes: v1.NewBytes(960, v1.Gigabyte),
diskBytes: v1.NewBytes(1500, v1.Gigabyte),
gpuCount: 8,
vcpu: sfcVCPU,
gpuCount: sfcGPUCount,
gpuManufacturer: v1.ManufacturerNVIDIA,
gpuVRAM: v1.NewBytes(80, v1.Gigabyte),
estimatedDeployTime: 14 * time.Minute,
Expand All @@ -253,7 +261,8 @@ var h200InstanceTypeMetadata = sfcInstanceTypeMetadata{
architecture: v1.ArchitectureX86_64,
memoryBytes: v1.NewBytes(960, v1.Gigabyte),
diskBytes: v1.NewBytes(1500, v1.Gigabyte),
gpuCount: 8,
vcpu: sfcVCPU,
gpuCount: sfcGPUCount,
gpuManufacturer: v1.ManufacturerNVIDIA,
gpuVRAM: v1.NewBytes(141, v1.Gigabyte),
estimatedDeployTime: 14 * time.Minute,
Expand Down
Loading