diff --git a/go.mod b/go.mod index d4043c53..7c7cd80a 100644 --- a/go.mod +++ b/go.mod @@ -26,6 +26,7 @@ require ( golang.org/x/text v0.29.0 google.golang.org/grpc v1.75.0 gopkg.in/validator.v2 v2.0.1 + github.com/sfcompute/nodes-go v0.1.0-alpha.3 gopkg.in/yaml.v3 v3.0.1 k8s.io/api v0.34.1 k8s.io/apimachinery v0.34.1 diff --git a/go.sum b/go.sum index 1f70d3ae..cf59e5b5 100644 --- a/go.sum +++ b/go.sum @@ -160,6 +160,7 @@ github.com/prometheus/procfs v0.17.0 h1:FuLQ+05u4ZI+SS/w9+BWEM2TXiHKsUQ9TADiRH7D github.com/prometheus/procfs v0.17.0/go.mod h1:oPQLaDAMRbA+u8H5Pbfq+dl3VDAvHxMUOVhe0wYB2zw= github.com/rogpeppe/go-internal v1.13.1 h1:KvO1DLK/DRN07sQ1LQKScxyZJuNnedQ5/wKSR38lUII= github.com/rogpeppe/go-internal v1.13.1/go.mod h1:uMEvuHeurkdAXX61udpOXGD/AzZDWNMNyH2VO9fmH0o= +github.com/sfcompute/nodes-go v0.1.0-alpha.3/go.mod h1:dF3O8MCxLz3FTVYhjCa876Z9O3EAM8E8fONivDpfmkM= github.com/sirupsen/logrus v1.9.3 h1:dueUQJ1C2q9oE3F7wvmSGAaVtTmUizReu6fjN8uqzbQ= github.com/sirupsen/logrus v1.9.3/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ= github.com/spf13/afero v1.15.0 h1:b/YBCLWAJdFWJTN9cLhiXXcD7mzKn9Dm86dNnfyQw1I= diff --git a/v1/providers/sfcompute/capabilities.go b/v1/providers/sfcompute/capabilities.go new file mode 100644 index 00000000..ac0604af --- /dev/null +++ b/v1/providers/sfcompute/capabilities.go @@ -0,0 +1,24 @@ +package v1 + +import ( + "context" + + v1 "github.com/brevdev/cloud/v1" +) + +func getSFCCapabilities() v1.Capabilities { + return v1.Capabilities{ + v1.CapabilityCreateInstance, + v1.CapabilityTerminateInstance, + v1.CapabilityCreateTerminateInstance, + // add others supported by your provider: reboot, stop/start, machine-image, tags, resize-volume, modify-firewall, etc. + } +} + +func (c *SFCClient) GetCapabilities(_ context.Context) (v1.Capabilities, error) { + return getSFCCapabilities(), nil +} + +func (c *SFCCredential) GetCapabilities(_ context.Context) (v1.Capabilities, error) { + return getSFCCapabilities(), nil +} diff --git a/v1/providers/sfcompute/client.go b/v1/providers/sfcompute/client.go new file mode 100644 index 00000000..fce08ca0 --- /dev/null +++ b/v1/providers/sfcompute/client.go @@ -0,0 +1,73 @@ +package v1 + +import ( + "context" + + v1 "github.com/brevdev/cloud/v1" + "github.com/sfcompute/nodes-go/option" + + sfcnodes "github.com/sfcompute/nodes-go" +) + +type SFCCredential struct { + RefID string + apiKey string `json:"api_key"` +} + +var _ v1.CloudCredential = &SFCCredential{} + +func NewSFCCredential(refID string, apiKey string /* auth fields */) *SFCCredential { + return &SFCCredential{ + RefID: refID, + apiKey: apiKey, + // ... + } +} + +func (c *SFCCredential) GetReferenceID() string { return c.RefID } +func (c *SFCCredential) GetAPIType() v1.APIType { return v1.APITypeLocational /* or v1.APITypeGlobal */ } +func (c *SFCCredential) GetCloudProviderID() v1.CloudProviderID { + return "sfcompute" // e.g., "lambdalabs" +} +func (c *SFCCredential) GetTenantID() (string, error) { + // sfc does not have a tenant system, return empty string + return "", nil +} + +func (c *SFCCredential) MakeClient(ctx context.Context, location string) (v1.CloudClient, error) { + // Create a client configured for a given location if locational API + return NewSFCClient(c.RefID, c.apiKey /* auth fields */).MakeClient(ctx, location) +} + +// ---------------- Client ---------------- + +type SFCClient struct { + v1.NotImplCloudClient + refID string + location string + apiKey string + client sfcnodes.Client // Add this field + // add http/sdk client fields, base URLs, etc. +} + +var _ v1.CloudClient = &SFCClient{} + +func NewSFCClient(refID string, apiKey string /* auth fields */) *SFCClient { + return &SFCClient{ + refID: refID, + apiKey: apiKey, + client: sfcnodes.NewClient( + option.WithBearerToken(apiKey)), + // init http/sdk clients here + } +} + +func (c *SFCClient) GetAPIType() v1.APIType { return v1.APITypeGlobal /* or Global */ } +func (c *SFCClient) GetCloudProviderID() v1.CloudProviderID { return "sfcompute" } +func (c *SFCClient) GetReferenceID() string { return c.refID } +func (c *SFCClient) GetTenantID() (string, error) { return "", nil } + +func (c *SFCClient) MakeClient(_ context.Context, location string) (v1.CloudClient, error) { + c.location = location + return c, nil +} diff --git a/v1/providers/sfcompute/instance.go b/v1/providers/sfcompute/instance.go new file mode 100644 index 00000000..4cc8f7b8 --- /dev/null +++ b/v1/providers/sfcompute/instance.go @@ -0,0 +1,159 @@ +package v1 + +import ( + "context" + "encoding/base64" + "fmt" + "strings" + "time" + + v1 "github.com/brevdev/cloud/v1" + sfcnodes "github.com/sfcompute/nodes-go" + "github.com/sfcompute/nodes-go/packages/param" +) + +// define function to convert string to b64 +func toBase64(s string) string { + return base64.StdEncoding.EncodeToString([]byte(s)) +} + +// define function to add ssh key to cloud init +func sshKeyCloudInit(sshKey string) string { + return toBase64(fmt.Sprintf("#cloud-config\nssh_authorized_keys:\n - %s", sshKey)) +} + +func mapSFCStatus(s string) v1.LifecycleStatus { + switch strings.ToLower(s) { + case "pending", "nodefailure", "unspecified", "awaitingcapacity", "unknown", "failed": + return v1.LifecycleStatusPending + case "running": + return v1.LifecycleStatusRunning + // case "stopping": + //return v1.LifecycleStatusStopping + case "stopped": + return v1.LifecycleStatusStopped + case "terminating", "released": + return v1.LifecycleStatusTerminating + case "destroyed", "deleted": + return v1.LifecycleStatusTerminated + default: + return v1.LifecycleStatusPending + } +} + +func (c *SFCClient) CreateInstance(ctx context.Context, attrs v1.CreateInstanceAttrs) (*v1.Instance, error) { + resp, err := c.client.Nodes.New(ctx, sfcnodes.NodeNewParams{ + CreateNodesRequest: sfcnodes.CreateNodesRequestParam{ + DesiredCount: 1, + MaxPricePerNodeHour: 1000, + Zone: attrs.Location, + ImageID: param.Opt[string]{Value: attrs.ImageID}, //this needs to point to a valid image + CloudInitUserData: param.Opt[string]{Value: sshKeyCloudInit(attrs.PublicKey)}, // encode ssh key to b64-wrapped cloud-init script + }, + }) + if err != nil { + return nil, err + } + + if len(resp.Data) == 0 { + return nil, fmt.Errorf("no nodes returned") + } + node := resp.Data[0] + + inst := &v1.Instance{ + Name: attrs.Name, + RefID: attrs.RefID, + CloudCredRefID: c.refID, + CloudID: v1.CloudProviderInstanceID(node.ID), // SFC ID + ImageID: attrs.ImageID, + InstanceType: attrs.InstanceType, + Location: attrs.Location, + CreatedAt: time.Now(), + Status: v1.Status{LifecycleStatus: mapSFCStatus(fmt.Sprint(node.Status))}, // map SDK status to our lifecycle + InstanceTypeID: v1.InstanceTypeID(node.GPUType), + SSHPort: 2222, // we use 2222/tcp for all of our SSH ports + } + + return inst, nil +} + +func (c *SFCClient) GetInstance(ctx context.Context, id v1.CloudProviderInstanceID) (*v1.Instance, error) { + node, err := c.client.Nodes.Get(ctx, string(id)) + if err != nil { + panic(err.Error()) + } + var vmID string + if len(node.VMs.Data) > 0 { + vmID = node.VMs.Data[0].ID + fmt.Println(vmID) + } + + ssh, err := c.client.VMs.SSH(ctx, sfcnodes.VMSSHParams{VMID: vmID}) + if err != nil { + panic(err.Error()) + } + + inst := &v1.Instance{ + Name: node.Name, + RefID: c.refID, + CloudCredRefID: c.refID, + CloudID: v1.CloudProviderInstanceID(node.ID), // SFC ID + PublicIP: ssh.SSHHostname, + CreatedAt: time.Unix(node.CreatedAt, 0), + Status: v1.Status{LifecycleStatus: mapSFCStatus(fmt.Sprint(node.Status))}, // map SDK status to our lifecycle + InstanceTypeID: v1.InstanceTypeID(node.GPUType), + } + return inst, nil +} + +func (c *SFCClient) ListInstances(ctx context.Context, args v1.ListInstancesArgs) ([]v1.Instance, error) { + resp, err := c.client.Nodes.List(ctx, sfcnodes.NodeListParams{}) + if err != nil { + return nil, err + } + + var instances []v1.Instance + for _, node := range resp.Data { + inst, err := c.GetInstance(ctx, v1.CloudProviderInstanceID(node.ID)) + if err != nil { + return nil, err + } + if inst != nil { + instances = append(instances, *inst) + } + } + return instances, nil +} + +func (c *SFCClient) TerminateInstance(ctx context.Context, id v1.CloudProviderInstanceID) error { + // release the node first + _, errRelease := c.client.Nodes.Release(ctx, string(id)) + if errRelease != nil { + panic(errRelease.Error()) + } + // then delete the node + errDelete := c.client.Nodes.Delete(ctx, string(id)) + if errDelete != nil { + panic(errDelete.Error()) + } + return nil +} + +// Optional if supported: +func (c *SFCClient) RebootInstance(ctx context.Context, id v1.CloudProviderInstanceID) error { + return fmt.Errorf("not implemented") +} +func (c *SFCClient) StopInstance(ctx context.Context, id v1.CloudProviderInstanceID) error { + return fmt.Errorf("not implemented") +} +func (c *SFCClient) StartInstance(ctx context.Context, id v1.CloudProviderInstanceID) error { + return fmt.Errorf("not implemented") +} + +// Merge strategies (pass-through is acceptable baseline). +func (c *SFCClient) MergeInstanceForUpdate(_ v1.Instance, newInst v1.Instance) v1.Instance { + return newInst +} +func (c *SFCClient) MergeInstanceTypeForUpdate(_ v1.InstanceType, newIt v1.InstanceType) v1.InstanceType { + return newIt +}