diff --git a/.gitignore b/.gitignore index 29d31af..0f518d2 100644 --- a/.gitignore +++ b/.gitignore @@ -33,4 +33,9 @@ notes/ docs/public docs/resources/_gen/ docs/.hugo_build.lock -test/integration/clab-* \ No newline at end of file +test/integration/**/clab-* + +# Only for development and testing purposes +# To be removed after development of targetsource +# ignored in order to not add unnecassary logging messages +lab/dev/resources/targetsources diff --git a/Makefile b/Makefile index fdcc2b2..98c42e7 100644 --- a/Makefile +++ b/Makefile @@ -308,9 +308,10 @@ delete-targetsources-dev-lab: ## Delete the target sources for the development l ##@ Testing Lab .PHONY: run-integration-tests -run-integration-tests: docker-build undeploy-test-cluster deploy-test-cluster install-test-cluster-dependencies load-test-image deploy install-kubectl install-gnmic install-containerlab deploy-test-topology apply-test-resources +run-integration-tests: docker-build undeploy-test-cluster deploy-test-cluster install-test-cluster-dependencies load-test-image deploy deploy-test-http-server install-kubectl install-gnmic install-containerlab deploy-test-topology apply-test-resources kubectl wait --for=condition=Ready cluster --all --timeout=180s kubectl wait --for=condition=Ready pipeline --all --timeout=180s + kubectl wait --for=jsonpath='{.status.targetsCount}'=3 targetsource --all --timeout=180s kubectl wait --for=jsonpath='{.status.connectionState}'=READY target --all --timeout=180s kubectl get subscriptions -o yaml kubectl get outputs -o yaml diff --git a/api/v1alpha1/targetsource_types.go b/api/v1alpha1/targetsource_types.go index feea000..45414ce 100644 --- a/api/v1alpha1/targetsource_types.go +++ b/api/v1alpha1/targetsource_types.go @@ -24,7 +24,8 @@ import ( // +kubebuilder:validation:Required type TargetSourceSpec struct { Provider *ProviderSpec `json:"provider"` - // + + // +kubebuilder:validation:Optional TargetLabels map[string]string `json:"targetLabels,omitempty"` // +kubebuilder:validation:MinLength=1 @@ -39,7 +40,10 @@ type ProviderSpec struct { type HTTPConfig struct { // +kubebuilder:validation:MinLength=1 - URL string `json:"url"` + URL string `json:"url"` + Token string `json:"token,omitempty"` + // +kubebuilder:validation:Optional + AcceptPush bool `json:"acceptPush,omitempty"` } type ConsulConfig struct { @@ -49,9 +53,10 @@ type ConsulConfig struct { // TargetSourceStatus defines the observed state of TargetSource type TargetSourceStatus struct { - Status string `json:"status"` - TargetsCount int32 `json:"targetsCount"` - LastSync metav1.Time `json:"lastSync"` + Status string `json:"status,omitempty"` + ObservedGeneration int64 `json:"observedGeneration"` + TargetsCount int32 `json:"targetsCount,omitempty"` + LastSync metav1.Time `json:"lastSync,omitempty"` } //+kubebuilder:object:root=true diff --git a/cmd/main.go b/cmd/main.go index 4c37a0d..3bb04f7 100644 --- a/cmd/main.go +++ b/cmd/main.go @@ -28,6 +28,7 @@ import ( certmanagerv1 "github.com/cert-manager/cert-manager/pkg/apis/certmanager/v1" "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/types" utilruntime "k8s.io/apimachinery/pkg/util/runtime" clientgoscheme "k8s.io/client-go/kubernetes/scheme" ctrl "sigs.k8s.io/controller-runtime" @@ -40,6 +41,8 @@ import ( operatorv1alpha1 "github.com/gnmic/operator/api/v1alpha1" "github.com/gnmic/operator/internal/apiserver" "github.com/gnmic/operator/internal/controller" + "github.com/gnmic/operator/internal/controller/discovery" + "github.com/gnmic/operator/internal/controller/discovery/core" webhookv1alpha1 "github.com/gnmic/operator/internal/webhook/v1alpha1" //+kubebuilder:scaffold:imports ) @@ -64,6 +67,8 @@ func main() { var probeAddr string var devMode bool var apiAddr string + var discoveryChunkSize int + var discoveryBufferSize int flag.StringVar(&apiAddr, "api-bind-address", "", "The address the operator API endpoint binds to. Disabled if empty.") flag.BoolVar(&devMode, "dev-mode", false, "Enable development mode.") flag.StringVar(&metricsAddr, "metrics-bind-address", ":8080", "The address the metric endpoint binds to.") @@ -71,6 +76,8 @@ func main() { flag.BoolVar(&enableLeaderElection, "leader-elect", false, "Enable leader election for controller manager. "+ "Enabling this will ensure there is only one active controller manager.") + flag.IntVar(&discoveryChunkSize, "discovery-chunk-size", 100, "Maximum number of targets/events sent in a single discovery message.") + flag.IntVar(&discoveryBufferSize, "discovery-buffer-size", 10, "Amount of discovery messages that can be queued in the channel buffer.") opts := zap.Options{ Development: devMode, } @@ -79,6 +86,8 @@ func main() { ctrl.SetLogger(zap.New(zap.UseFlagOptions(&opts))) + discoveryRegistry := discovery.NewRegistry[types.NamespacedName, core.DiscoveryRegistryValue]() + mgr, err := ctrl.NewManager(ctrl.GetConfigOrDie(), ctrl.Options{ Scheme: scheme, Metrics: metricsserver.Options{BindAddress: metricsAddr}, @@ -117,8 +126,11 @@ func main() { os.Exit(1) } if err := (&controller.TargetSourceReconciler{ - Client: mgr.GetClient(), - Scheme: mgr.GetScheme(), + Client: mgr.GetClient(), + Scheme: mgr.GetScheme(), + BufferSize: discoveryBufferSize, + ChunkSize: discoveryChunkSize, + DiscoveryRegistry: discoveryRegistry, }).SetupWithManager(mgr); err != nil { setupLog.Error(err, "unable to create controller", "controller", "TargetSource") os.Exit(1) @@ -220,6 +232,7 @@ func main() { if apiAddr != "" { apiServer := apiserver.New(apiAddr, clusterReconciler) + apiServer.DiscoveryRegistry = discoveryRegistry err = mgr.Add(manager.RunnableFunc(func(ctx context.Context) error { errCh := make(chan error) go func() { diff --git a/config/crd/bases/operator.gnmic.dev_targetsources.yaml b/config/crd/bases/operator.gnmic.dev_targetsources.yaml index f373822..54a4394 100644 --- a/config/crd/bases/operator.gnmic.dev_targetsources.yaml +++ b/config/crd/bases/operator.gnmic.dev_targetsources.yaml @@ -49,6 +49,10 @@ spec: type: object http: properties: + acceptPush: + type: boolean + token: + type: string url: minLength: 1 type: string @@ -77,15 +81,16 @@ spec: lastSync: format: date-time type: string + observedGeneration: + format: int64 + type: integer status: type: string targetsCount: format: int32 type: integer required: - - lastSync - - status - - targetsCount + - observedGeneration type: object type: object served: true diff --git a/go.mod b/go.mod index f236ded..827da2a 100644 --- a/go.mod +++ b/go.mod @@ -5,6 +5,7 @@ go 1.25.5 require ( github.com/cert-manager/cert-manager v1.19.3 github.com/go-logr/logr v1.4.3 + github.com/google/uuid v1.6.0 github.com/onsi/ginkgo/v2 v2.27.3 github.com/onsi/gomega v1.38.3 github.com/openconfig/gnmic/pkg/api v0.1.10 @@ -47,7 +48,6 @@ require ( github.com/google/gnostic-models v0.7.1 // indirect github.com/google/go-cmp v0.7.0 // indirect github.com/google/pprof v0.0.0-20251213031049-b05bdaca462f // indirect - github.com/google/uuid v1.6.0 // indirect github.com/json-iterator/go v1.1.12 // indirect github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect github.com/modern-go/reflect2 v1.0.3-0.20250322232337-35a7c28c31ee // indirect diff --git a/internal/apiserver/apiserver.go b/internal/apiserver/apiserver.go index f31abaa..5eb88b8 100644 --- a/internal/apiserver/apiserver.go +++ b/internal/apiserver/apiserver.go @@ -5,11 +5,16 @@ import ( "net/http" "github.com/gnmic/operator/internal/controller" + "github.com/gnmic/operator/internal/controller/discovery" + "github.com/gnmic/operator/internal/controller/discovery/core" + "k8s.io/apimachinery/pkg/types" ) type APIServer struct { Server *http.Server clusterReconciler *controller.ClusterReconciler + + DiscoveryRegistry *discovery.Registry[types.NamespacedName, core.DiscoveryRegistryValue] } func New(addr string, clusterReconciler *controller.ClusterReconciler) *APIServer { diff --git a/internal/controller/const.go b/internal/controller/const.go index b5196b8..5ef2e8f 100644 --- a/internal/controller/const.go +++ b/internal/controller/const.go @@ -21,6 +21,8 @@ const ( LabelCertType = "operator.gnmic.dev/cert-type" LabelValueCertTypeClient = "client" LabelValueCertTypeTunnel = "tunnel" + + LabelTargetSourceFinalizer = "operator.gnmic.dev/targetsource-finalizer" ) const ( diff --git a/internal/controller/discovery/client.go b/internal/controller/discovery/client.go index 3bc7ef7..e5cc5ea 100644 --- a/internal/controller/discovery/client.go +++ b/internal/controller/discovery/client.go @@ -1,22 +1,28 @@ package discovery -// File may become obsolete, depends on how the logic to compare desired vs. existing state will get implemented - import ( "context" + apierrors "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/types" + "k8s.io/client-go/util/retry" "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" gnmicv1alpha1 "github.com/gnmic/operator/api/v1alpha1" ) -func FetchExistingTargets(ctx context.Context, c client.Client, ts gnmicv1alpha1.TargetSource) ([]gnmicv1alpha1.Target, error) { +func fetchExistingTargets(ctx context.Context, c client.Client, ts *gnmicv1alpha1.TargetSource) ([]gnmicv1alpha1.Target, error) { var targetList gnmicv1alpha1.TargetList - err := c.List(ctx, &targetList, + err := c.List( + ctx, + &targetList, client.InNamespace(ts.Namespace), client.MatchingLabels{ - "gnmic.io/source": ts.Name, + LabelTargetSourceName: ts.Name, }, ) if err != nil { @@ -25,3 +31,59 @@ func FetchExistingTargets(ctx context.Context, c client.Client, ts gnmicv1alpha1 return targetList.Items, nil } + +func applyTarget(ctx context.Context, c client.Client, s *runtime.Scheme, desired *gnmicv1alpha1.Target, ts *gnmicv1alpha1.TargetSource) error { + existing := &gnmicv1alpha1.Target{ + ObjectMeta: metav1.ObjectMeta{ + Name: desired.Name, + Namespace: desired.Namespace, + }, + } + + _, err := controllerutil.CreateOrUpdate(ctx, c, existing, func() error { + existing.Spec = desired.Spec + existing.Labels = desired.Labels + + return controllerutil.SetControllerReference(ts, existing, s) + }) + + return err +} + +func deleteTarget(ctx context.Context, c client.Client, name string, namespace string) error { + existing := &gnmicv1alpha1.Target{} + + err := c.Get(ctx, types.NamespacedName{ + Name: name, + Namespace: namespace, + }, existing) + if apierrors.IsNotFound(err) { + return nil + } else if err != nil { + return err + } + + err = c.Delete(ctx, existing) + if apierrors.IsNotFound(err) { + return nil + } + + return err +} + +// updateTargetSourceStatus updates the status of the TargetSource Object ts. The only fields updated are targetCount and LastSync, which takes the current timestamp. +func updateTargetSourceStatus(ctx context.Context, c client.Client, ts *gnmicv1alpha1.TargetSource, targetCount int32) error { + err := retry.RetryOnConflict(retry.DefaultRetry, func() error { + latest := &gnmicv1alpha1.TargetSource{} + if err := c.Get(ctx, client.ObjectKeyFromObject(ts), latest); err != nil { + return err + } + + latest.Status.TargetsCount = targetCount + latest.Status.LastSync = metav1.Now() + + return c.Status().Update(ctx, latest) + }) + + return err +} diff --git a/internal/controller/discovery/const.go b/internal/controller/discovery/const.go new file mode 100644 index 0000000..8d37785 --- /dev/null +++ b/internal/controller/discovery/const.go @@ -0,0 +1,13 @@ +package discovery + +const ( + // Kubernetes Side Labels + LabelTargetSourceName = "operator.gnmic.dev/targetsource" +) + +const ( + // Prefix and Labels for external systems + ExternalLabelPrefix = "gnmic_operator_" + + ExternalLabelTargetProfile = ExternalLabelPrefix + "target_profile" +) diff --git a/internal/controller/discovery/core/loader_interface.go b/internal/controller/discovery/core/loader_interface.go index 82036a8..97810cb 100644 --- a/internal/controller/discovery/core/loader_interface.go +++ b/internal/controller/discovery/core/loader_interface.go @@ -12,12 +12,7 @@ type Loader interface { // Name returns the unique loader identifier e.g. "pull" Name() string - // Start begins discovery and pushes target snapshots into the out channel - // The loader must stop cleanly when ctx is cancelled - Start( - ctx context.Context, - targetsourceName string, - spec gnmicv1alpha1.TargetSourceSpec, - out chan<- []DiscoveryMessage, - ) error + // Run begins discovery and pushes target snapshots or events into the out channel + // The loader must stop cleanly when ctx is canceled + Run(ctx context.Context, out chan<- []DiscoveryMessage, spec gnmicv1alpha1.TargetSourceSpec) error } diff --git a/internal/controller/discovery/core/message_interface.go b/internal/controller/discovery/core/message_interface.go new file mode 100644 index 0000000..0836bc6 --- /dev/null +++ b/internal/controller/discovery/core/message_interface.go @@ -0,0 +1,8 @@ +package core + +type DiscoveryMessage interface { + isDiscoveryMessage() +} + +func (DiscoveryEvent) isDiscoveryMessage() {} +func (DiscoverySnapshot) isDiscoveryMessage() {} diff --git a/internal/controller/discovery/core/types.go b/internal/controller/discovery/core/types.go index 406a22b..7697bc3 100644 --- a/internal/controller/discovery/core/types.go +++ b/internal/controller/discovery/core/types.go @@ -1,5 +1,39 @@ package core +import ( + "context" + + "k8s.io/apimachinery/pkg/types" +) + +// DiscoveryRegistryValue represents the controller-owned runtime state +// with its configuration for a single TargetSource +type DiscoveryRegistryValue struct { + // Channel is the outbound communication channel used by discovery + // components (loaders, webhooks, etc.) to emit discovery messages + Channel chan<- []DiscoveryMessage + // Stop cancels the discovery context associated with this registry entry + Stop context.CancelFunc + + CommonLoaderConfig *CommonLoaderConfig +} + +type CommonLoaderConfig struct { + TargetsourceNN types.NamespacedName + ChunkSize int + AcceptPush bool +} + +// EventAction represents the type of a discovery event +type EventAction int + +const ( + // EventDelete indicates that a target should be removed + EventDelete EventAction = iota + // EventApply indicates that a target should be applied (created or updated) + EventApply +) + // DiscoveredTarget represents a target discovered from an external source // before it is materialized as a Kubernetes Target CR type DiscoveredTarget struct { @@ -8,15 +42,25 @@ type DiscoveredTarget struct { Labels map[string]string } -const ( - DELETE DiscoveryEvent = 0 - CREATE DiscoveryEvent = 1 - UPDATE DiscoveryEvent = 2 -) +type DiscoveryEvent struct { + Target DiscoveredTarget + Event EventAction +} -type DiscoveryEvent int +func (e EventAction) ToString() string { + switch e { + case EventDelete: + return "DELETE" + case EventApply: + return "APPLY" + default: + return "UNKNOWN" + } +} -type DiscoveryMessage struct { - Target DiscoveredTarget - Event DiscoveryEvent +type DiscoverySnapshot struct { + SnapshotID string + ChunkIndex int + TotalChunks int + Targets []DiscoveredTarget } diff --git a/internal/controller/discovery/discovery.go b/internal/controller/discovery/discovery.go new file mode 100644 index 0000000..491cdfb --- /dev/null +++ b/internal/controller/discovery/discovery.go @@ -0,0 +1,15 @@ +package discovery + +// Package discovery implements the discovery runtime subsystem. +// +// The discovery subsystem is responsible for: +// - Receiving discovery data from external providers (loaders, webhooks). +// - Applying discovered state to Kubernetes Targets. +// +// The package is structured into the following subpackages: +// - core: message contracts, snapshot/event types, and transport helpers. +// - message processor: snapshot + event target state application logic. +// - loaders: target discovery providers (HTTP, webhook, etc.). +// - registry: key -> channel registry. +// +// At the moment, the targetsource controller imports specific subpackages explicitly. diff --git a/internal/controller/discovery/loader.go b/internal/controller/discovery/loader.go deleted file mode 100644 index 3d45f42..0000000 --- a/internal/controller/discovery/loader.go +++ /dev/null @@ -1,24 +0,0 @@ -package discovery - -import ( - "fmt" - - gnmicv1alpha1 "github.com/gnmic/operator/api/v1alpha1" - "github.com/gnmic/operator/internal/controller/discovery/core" - pull "github.com/gnmic/operator/internal/controller/discovery/loaders/pull" -) - -// NewLoader creates a loader by name -func NewLoader(name string, namespace string, spec gnmicv1alpha1.TargetSourceSpec) (core.Loader, error) { - loaderName := namespace + "/" + name - - switch { - case spec.Provider.HTTP != nil: - return pull.New(), nil - case spec.Provider.Consul != nil: - return nil, fmt.Errorf("unknown targetsource loader, check TargetSource CRD for %s", loaderName) - default: - return nil, fmt.Errorf("unknown targetsource loader, check TargetSource CRD for %s", loaderName) - } - -} diff --git a/internal/controller/discovery/loaders.go b/internal/controller/discovery/loaders.go new file mode 100644 index 0000000..7f2c656 --- /dev/null +++ b/internal/controller/discovery/loaders.go @@ -0,0 +1,24 @@ +package discovery + +import ( + "fmt" + + gnmicv1alpha1 "github.com/gnmic/operator/api/v1alpha1" + "github.com/gnmic/operator/internal/controller/discovery/core" + http "github.com/gnmic/operator/internal/controller/discovery/loaders/http" +) + +// NewLoader creates a loader by name +func NewLoader(cfg core.CommonLoaderConfig, spec *gnmicv1alpha1.TargetSourceSpec) (core.Loader, core.CommonLoaderConfig, error) { + + switch { + case spec.Provider.HTTP != nil: + cfg.AcceptPush = spec.Provider.HTTP.AcceptPush + return http.New(cfg), cfg, nil + case spec.Provider.Consul != nil: + return nil, cfg, fmt.Errorf("unknown targetsource loader, check TargetSource CRD for %s", cfg.TargetsourceNN) + default: + return nil, cfg, fmt.Errorf("unknown targetsource loader, check TargetSource CRD for %s", cfg.TargetsourceNN) + } + +} diff --git a/internal/controller/discovery/loaders/all/all.go b/internal/controller/discovery/loaders/all/all.go deleted file mode 100644 index d05604b..0000000 --- a/internal/controller/discovery/loaders/all/all.go +++ /dev/null @@ -1,6 +0,0 @@ -package all - -import ( - _ "github.com/gnmic/operator/internal/controller/discovery/loaders/pull" - // _ "github.com/gnmic/operator/internal/controller/targetsource/loaders/push" -) diff --git a/internal/controller/discovery/loaders/http/loader.go b/internal/controller/discovery/loaders/http/loader.go new file mode 100644 index 0000000..e4994e5 --- /dev/null +++ b/internal/controller/discovery/loaders/http/loader.go @@ -0,0 +1,151 @@ +package http + +import ( + "context" + "encoding/json" + "errors" + "fmt" + "net/http" + "time" + + "sigs.k8s.io/controller-runtime/pkg/log" + + gnmicv1alpha1 "github.com/gnmic/operator/api/v1alpha1" + "github.com/gnmic/operator/internal/controller/discovery/core" + loaderUtils "github.com/gnmic/operator/internal/controller/discovery/loaders/utils" + "github.com/google/uuid" +) + +const ( + defaultPollInterval = 30 * time.Second +) + +// Loader implements the HTTP pull discovery mechanism +type Loader struct { + commonCfg core.CommonLoaderConfig +} + +// New instantiates the http loader with the provided config +func New(cfg core.CommonLoaderConfig) core.Loader { + return &Loader{commonCfg: cfg} +} + +func (l *Loader) Name() string { + return "http" +} + +func (l *Loader) Run(ctx context.Context, out chan<- []core.DiscoveryMessage, spec gnmicv1alpha1.TargetSourceSpec) error { + logger := log.FromContext(ctx).WithValues( + "component", "loader", + "name", l.Name(), + "targetsource", l.commonCfg.TargetsourceNN, + ) + + logger.Info( + "HTTP loader started", + "targetsource", l.commonCfg.TargetsourceNN.Name, + "namespace", l.commonCfg.TargetsourceNN.Namespace, + ) + + // Input Validation of spec + if spec.Provider == nil || spec.Provider.HTTP == nil { + return errors.New("HTTP loader requires spec.provider.http to be set") + } + + client := &http.Client{ + Timeout: 30 * time.Second, + } + + interval := defaultPollInterval + ticker := time.NewTicker(interval) + defer ticker.Stop() + + logger.Info( + "HTTP polling discovery started", + "interval", interval.String(), + "url", spec.Provider.HTTP.URL, + ) + + // helper function to fetch targets and emit discovery messages + fetchAndEmit := func() { + targets, err := l.fetchTargetsFromHTTPEndpoint( + ctx, + client, + spec.Provider.HTTP.URL, + spec.Provider.HTTP.Token, + ) + if err != nil { + logger.Error( + err, + "Failed to fetch targets from HTTP endpoint", + "url", spec.Provider.HTTP.URL, + ) + return + } + + snapshotID := fmt.Sprintf("%s-%s-%s", l.commonCfg.TargetsourceNN.Namespace, l.commonCfg.TargetsourceNN.Name, uuid.NewString()) + if err := loaderUtils.SendSnapshot(ctx, out, targets, snapshotID, l.commonCfg.ChunkSize); err != nil { + logger.Error( + err, + "Failed to send discovery snapshot", + "snapshotID", snapshotID, + "targets", len(targets), + ) + return + } + + logger.Info( + "Discovery snapshot sent", + "snapshotID", snapshotID, + "targets", len(targets), + ) + } + + // Immediate fetch on startup + fetchAndEmit() + + // Periodic fetch + for { + select { + case <-ctx.Done(): + logger.Info("HTTP loader stopped") + return nil + + case <-ticker.C: + fetchAndEmit() + } + } +} + +func (l *Loader) fetchTargetsFromHTTPEndpoint( + ctx context.Context, + client *http.Client, + url string, + token string, +) ([]core.DiscoveredTarget, error) { + + req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil) + if err != nil { + return nil, fmt.Errorf("creating HTTP request failed: %w", err) + } + + req.Header.Set("Accept", "application/json") + req.Header.Set("Authorization", "Token "+token) + + resp, err := client.Do(req) + if err != nil { + return nil, fmt.Errorf("HTTP request failed: %w", err) + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + return nil, fmt.Errorf("unexpected HTTP status code: %d", resp.StatusCode) + } + + var targets []core.DiscoveredTarget + if err := json.NewDecoder(resp.Body).Decode(&targets); err != nil { + return nil, fmt.Errorf("failed to decode HTTP response: %w", err) + } + + return targets, nil +} diff --git a/internal/controller/discovery/loaders/http/loader_test.go b/internal/controller/discovery/loaders/http/loader_test.go new file mode 100644 index 0000000..d02cfda --- /dev/null +++ b/internal/controller/discovery/loaders/http/loader_test.go @@ -0,0 +1 @@ +package http diff --git a/internal/controller/discovery/loaders/pull/loader.go b/internal/controller/discovery/loaders/pull/loader.go deleted file mode 100644 index 5540ea2..0000000 --- a/internal/controller/discovery/loaders/pull/loader.go +++ /dev/null @@ -1,78 +0,0 @@ -package pull - -import ( - "context" - "time" - - "sigs.k8s.io/controller-runtime/pkg/log" - - gnmicv1alpha1 "github.com/gnmic/operator/api/v1alpha1" - "github.com/gnmic/operator/internal/controller/discovery/core" -) - -type Loader struct{} - -// New instantiates the pull loader -func New() core.Loader { - return &Loader{} -} - -func (l *Loader) Name() string { - return "pull" -} - -func (l *Loader) Start( - ctx context.Context, - targetsourceName string, - spec gnmicv1alpha1.TargetSourceSpec, - out chan<- []core.DiscoveryMessage, -) error { - logger := log.FromContext(ctx).WithValues("loader", l.Name()) - - logger.Info("HTTP pull loader started") - - // Only for debugging: emit a static snapshot every 30 seconds - ticker := time.NewTicker(30 * time.Second) - defer ticker.Stop() - - for { - select { - case <-ctx.Done(): - logger.Info("HTTP pull loader stopped") - return nil - - case <-ticker.C: - // Example snapshot (placeholder) - targets := []core.DiscoveryMessage{ - { - Target: core.DiscoveredTarget{ - Name: "ceos1", - Address: "clab-3-nodes-ceos1:6030", - Labels: map[string]string{"TargetSource": targetsourceName}, - }, - Event: core.CREATE, - }, - { - Target: core.DiscoveredTarget{ - Name: "leaf1", - Address: "clab-3-nodes-leaf1:57400", - Labels: map[string]string{"TargetSource": targetsourceName}, - }, - Event: core.CREATE, - }, - } - - // Non-blocking context-aware send - select { - case out <- targets: - logger.V(1).Info( - "emitted target snapshot", - "count", len(targets), - ) - case <-ctx.Done(): - logger.Info("context cancelled while emitting targets") - return nil - } - } - } -} diff --git a/internal/controller/discovery/loaders/pull/loader_test.go b/internal/controller/discovery/loaders/pull/loader_test.go deleted file mode 100644 index 0493bec..0000000 --- a/internal/controller/discovery/loaders/pull/loader_test.go +++ /dev/null @@ -1 +0,0 @@ -package pull diff --git a/internal/controller/discovery/loaders/push/loader.go b/internal/controller/discovery/loaders/push/loader.go deleted file mode 100644 index 92f0ccc..0000000 --- a/internal/controller/discovery/loaders/push/loader.go +++ /dev/null @@ -1,4 +0,0 @@ -package push - -// this file implements the logic receive target updates via HTTP push -// REST API defined internal/apiserver diff --git a/internal/controller/discovery/loaders/push/loader_test.go b/internal/controller/discovery/loaders/push/loader_test.go deleted file mode 100644 index 63fdf61..0000000 --- a/internal/controller/discovery/loaders/push/loader_test.go +++ /dev/null @@ -1 +0,0 @@ -package push diff --git a/internal/controller/discovery/loaders/utils/send.go b/internal/controller/discovery/loaders/utils/send.go new file mode 100644 index 0000000..3cfba8d --- /dev/null +++ b/internal/controller/discovery/loaders/utils/send.go @@ -0,0 +1,94 @@ +package utils + +import ( + "context" + "fmt" + + "github.com/gnmic/operator/internal/controller/discovery/core" +) + +// sendMessages sends discovery messages over a channel in a context-aware manner +func sendMessages(ctx context.Context, out chan<- []core.DiscoveryMessage, messages []core.DiscoveryMessage) error { + select { + case <-ctx.Done(): + return ctx.Err() + case out <- messages: + } + return nil +} + +// forEachChunk iterates over ranges [start,end) for a total count using the provided chunkSize +func forEachChunk(total, chunkSize int, fn func(start, end int) error) error { + for i := 0; i < total; i += chunkSize { + end := i + chunkSize + if end > total { + end = total + } + if err := fn(i, end); err != nil { + return err + } + } + return nil +} + +// createDiscoverySnapshots takes a list of discovered targets and returns chunked DiscoverySnapshots +func createDiscoverySnapshots(targets []core.DiscoveredTarget, snapshotID string, chunkSize int) []core.DiscoverySnapshot { + var snapshots []core.DiscoverySnapshot + totalTargets := len(targets) + totalChunks := (totalTargets + chunkSize - 1) / chunkSize + + _ = forEachChunk(totalTargets, chunkSize, func(i, end int) error { + chunk := targets[i:end] + snapshots = append(snapshots, core.DiscoverySnapshot{ + Targets: chunk, + SnapshotID: snapshotID, + ChunkIndex: i / chunkSize, + TotalChunks: totalChunks, + }) + return nil + }) + + return snapshots +} + +// SendSnapshot sends discovered targets as a snapshot over a channel in chunks +func SendSnapshot(ctx context.Context, out chan<- []core.DiscoveryMessage, targets []core.DiscoveredTarget, snapshotID string, chunkSize int) error { + if len(targets) == 0 { + return fmt.Errorf("no targets in Snapshot") + } + + snapshots := createDiscoverySnapshots(targets, snapshotID, chunkSize) + for _, snapshot := range snapshots { + // Convert DiscoverySnapshot to DiscoveryMessage + messages := make([]core.DiscoveryMessage, 1) + messages[0] = snapshot + + if err := sendMessages(ctx, out, messages); err != nil { + return err + } + } + + return nil +} + +func eventsToMessages(events []core.DiscoveryEvent) []core.DiscoveryMessage { + message := make([]core.DiscoveryMessage, len(events)) + for i, event := range events { + message[i] = event + } + return message +} + +// SendEvents sends discovery messages over channel in a context-aware manner +func SendEvents(ctx context.Context, out chan<- []core.DiscoveryMessage, events []core.DiscoveryEvent, chunkSize int) error { + if len(events) == 0 { + return fmt.Errorf("no events to process") + } + + messages := eventsToMessages(events) + total := len(messages) + + return forEachChunk(total, chunkSize, func(i, end int) error { + return sendMessages(ctx, out, messages[i:end]) + }) +} diff --git a/internal/controller/discovery/mapper.go b/internal/controller/discovery/mapper.go index 18470b2..36ce541 100644 --- a/internal/controller/discovery/mapper.go +++ b/internal/controller/discovery/mapper.go @@ -1,4 +1,88 @@ package discovery -// This file makes diff between existing and new targets -// file decides which targets to create/update/delete +import ( + "maps" + "strings" + + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + + gnmicv1alpha1 "github.com/gnmic/operator/api/v1alpha1" + "github.com/gnmic/operator/internal/controller/discovery/core" +) + +// generateTargetResource converts a DiscoveredTarget into a Kubernetes Target Object based on the TargetSource Spec +func generateTargetResource(d core.DiscoveredTarget, ts *gnmicv1alpha1.TargetSource) *gnmicv1alpha1.Target { + // Create object instance + t := &gnmicv1alpha1.Target{ + ObjectMeta: metav1.ObjectMeta{ + Name: d.Name, + Namespace: ts.Namespace, + Labels: make(map[string]string), + }, + } + + // Add Address from DiscoveredTarget + t.Spec.Address = d.Address + // Add default Target Profile from the TargetSource Spec TargetProfile + t.Spec.Profile = ts.Spec.TargetProfile + + // Copy TargetLabels from TargetSource Spec + maps.Copy(t.Labels, ts.Spec.TargetLabels) + + // Handle labels from Source of Truth + for k, v := range d.Labels { + if strings.HasPrefix(k, ExternalLabelPrefix) { + switch k { + case ExternalLabelTargetProfile: // Overwrite TargetProfile if specified by SoT + t.Spec.Profile = v + default: + // TODO: handle unknown label + } + } else { // Copy all other labels into the Target + t.Labels[k] = v + } + } + + // Add TargetSource Label to the Target (precedence over all labels) + t.Labels[LabelTargetSourceName] = ts.Name + + return t +} + +// generateEvents returns a list of DiscoveryEvents. Needed for snapshot handling to determine which devices get deleted and which applied. +func generateEvents(existing []gnmicv1alpha1.Target, discovered []core.DiscoveredTarget) []core.DiscoveryEvent { + var events []core.DiscoveryEvent + + discoveredMap := make(map[string]core.DiscoveredTarget) + for _, d := range discovered { + discoveredMap[d.Name] = d + } + + // Create delete events for targets which are present in existing but not in discovered + for _, e := range existing { + if _, found := discoveredMap[e.Name]; !found { + events = append(events, core.DiscoveryEvent{ + Target: core.DiscoveredTarget{ + Name: e.Name, + }, + Event: core.EventDelete, + }) + } + } + + // Create apply events for all targets in discovered + for _, d := range discovered { + events = append(events, core.DiscoveryEvent{ + Target: d, + Event: core.EventApply, + }) + } + + return events +} + +// normalizeTarget adds the prefix to the target name for identification in Kubernetes +func normalizeTarget(t core.DiscoveredTarget, tsName string) core.DiscoveredTarget { + t.Name = tsName + "-" + t.Name + return t +} diff --git a/internal/controller/discovery/mapper_test.go b/internal/controller/discovery/mapper_test.go deleted file mode 100644 index 5844159..0000000 --- a/internal/controller/discovery/mapper_test.go +++ /dev/null @@ -1 +0,0 @@ -package discovery diff --git a/internal/controller/discovery/message_processor.go b/internal/controller/discovery/message_processor.go new file mode 100644 index 0000000..a918ce2 --- /dev/null +++ b/internal/controller/discovery/message_processor.go @@ -0,0 +1,377 @@ +package discovery + +import ( + "context" + "fmt" + + "k8s.io/apimachinery/pkg/runtime" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/log" + + gnmicv1alpha1 "github.com/gnmic/operator/api/v1alpha1" + "github.com/gnmic/operator/internal/controller/discovery/core" + "github.com/go-logr/logr" +) + +type snapshotBuffer struct { + snapshotID string + totalChunks int + received map[int][]core.DiscoveredTarget + complete bool +} + +// MessageProcessor consumes discovery messages and applies them to Kubernetes +type MessageProcessor struct { + ctx context.Context + client client.Client + scheme *runtime.Scheme + targetSource *gnmicv1alpha1.TargetSource + in <-chan []core.DiscoveryMessage + queue []core.DiscoveryMessage + activeSnapshot *snapshotBuffer + // Events are deferred while snapshot is in progress + deferredEvents []core.DiscoveryEvent + targetCount int32 +} + +// NewMessageProcessor wires a MessageProcessor instance +func NewMessageProcessor(c client.Client, s *runtime.Scheme, ts *gnmicv1alpha1.TargetSource, in <-chan []core.DiscoveryMessage) *MessageProcessor { + return &MessageProcessor{ + client: c, + scheme: s, + targetSource: ts, + in: in, + } +} + +// Run is a long‑running loop that receives target snapshots +// and reconciles Target CRs accordingly +func (m *MessageProcessor) Run(ctx context.Context) error { + m.ctx = ctx + + logger := log.FromContext(ctx).WithValues( + "component", "message-processor", + "targetsource", m.targetSource.Name, + "namespace", m.targetSource.Namespace, + ) + + logger.Info("Message processor started") + + // Update internal counter in case of a process restart + if existing, err := fetchExistingTargets(m.ctx, m.client, m.targetSource); err != nil { + logger.Error(err, "error fetching existing targets") + } else { + m.targetCount = int32(len(existing)) + } + + for m.ctx.Err() == nil { + select { + case batch, ok := <-m.in: + if !ok { + // Channel closed, pipeline is shutting down + logger.Info("Input channel closed; stopping message processor") + return nil + } + m.queue = append(m.queue, batch...) + + case <-ctx.Done(): + logger.Info("Context was canceled; stopping message processor") + return nil + } + + for len(m.queue) > 0 { + if ctx.Err() != nil { + return ctx.Err() + } + + msg := m.queue[0] + m.queue = m.queue[1:] + + if err := m.processMessage(m.ctx, msg, logger); err != nil { + // Returning error lets the supervisor (controller) + // tear down and restart the pipeline via reconciliation + // Q: when to return an error vs just log and continue? + return err + } + + } + } + + logger.Info("Message processor stopped") + return nil +} + +// processMessage handles all of the incoming messages from the channel +func (m *MessageProcessor) processMessage(ctx context.Context, message core.DiscoveryMessage, logger logr.Logger) error { + if err := ctx.Err(); err != nil { + return err + } + + // Type assert to determine if this is a snapshot or event + switch msg := message.(type) { + case core.DiscoverySnapshot: + // Collect snapshot chunks + logger.Info( + "Received discovery snapshot chunk", + "snapshotID", msg.SnapshotID, + "chunkIndex", msg.ChunkIndex, + "targets", len(msg.Targets), + ) + + for i := range msg.Targets { + msg.Targets[i] = normalizeTarget(msg.Targets[i], m.targetSource.Name) + } + + return m.processSnapshot(ctx, msg, logger) + + case core.DiscoveryEvent: + // Process individual event-driven update + logger.Info( + "Received discovery event", + "event", msg.Event, + "target", msg.Target.Name, + ) + + msg.Target = normalizeTarget(msg.Target, m.targetSource.Name) + return m.processEvent(ctx, msg, logger) + + default: + return fmt.Errorf("Unknown discovery message type %T", msg) + } +} + +// processSnapshot takes a complete snapshot of discovered targets and reconciles Target CRs accordingly +func (m *MessageProcessor) processSnapshot(ctx context.Context, chunk core.DiscoverySnapshot, logger logr.Logger) error { + if m.activeSnapshot == nil { + m.startNewSnapshot(chunk, logger) + return nil + } + + snapshot := m.activeSnapshot + // Check if a new snapshot arrived + if snapshot.snapshotID != chunk.SnapshotID { + // If current snapshot is complete apply it first + if snapshot.complete { + if err := m.applySnapshot(ctx, snapshot, logger); err != nil { + return err + } + } else { + // If a new snapshot is started before the old one completed + // the old one can be discarded + logger.Info( + "Discarded incomplete discovery snapshot", + "snapshotID", snapshot.snapshotID, + ) + } + + // Start collecting the new snapshot + m.startNewSnapshot(chunk, logger) + return nil + } + + return m.collectSnapshot(chunk, logger) +} + +func (m *MessageProcessor) startNewSnapshot(chunk core.DiscoverySnapshot, logger logr.Logger) { + m.activeSnapshot = &snapshotBuffer{ + snapshotID: chunk.SnapshotID, + totalChunks: chunk.TotalChunks, + received: make(map[int][]core.DiscoveredTarget), + complete: false, + } + // Delete buffered events that will be current with new snapshot + m.deferredEvents = nil + + m.collectSnapshot(chunk, logger) +} + +func (m *MessageProcessor) collectSnapshot(chunk core.DiscoverySnapshot, logger logr.Logger) error { + snapshot := m.activeSnapshot + + if chunk.TotalChunks != snapshot.totalChunks { + logger.Error( + nil, + "Snapshot totalChunks mismatch", + "snapshotID", snapshot.snapshotID, + ) + } + if chunk.ChunkIndex < 0 || chunk.ChunkIndex >= snapshot.totalChunks { + logger.Error( + nil, + "Snapshot chunk index out of range", + "chunkIndex", chunk.ChunkIndex, + ) + m.activeSnapshot = nil + return nil + } + if _, exists := snapshot.received[chunk.ChunkIndex]; exists { + logger.Error( + nil, + "Duplicate snapshot chunk received", + "chunkIndex", chunk.ChunkIndex, + ) + m.activeSnapshot = nil + return nil + } + + snapshot.received[chunk.ChunkIndex] = chunk.Targets + + if len(snapshot.received) == snapshot.totalChunks { + snapshot.complete = true + } + + return nil +} + +// processEvent handles a single DiscoveryEvent message. If a snapshot is in the queue, the events get deferred and applied after. +func (m *MessageProcessor) processEvent(ctx context.Context, event core.DiscoveryEvent, logger logr.Logger) error { + // If snapshot collecting is active defer events + if m.activeSnapshot != nil { + m.deferredEvents = append(m.deferredEvents, event) + return nil + } + + // Apply events + err := m.applyEvent(ctx, event, logger) + if err == nil { + switch event.Event { + case core.EventApply: + m.targetCount++ + m.updateStatus(logger) + case core.EventDelete: + m.targetCount-- + m.updateStatus(logger) + } + } + + return err +} + +// applySnapshot is in charge of getting the Events for the discovered targets and applying them through applyEvent +func (m *MessageProcessor) applySnapshot(ctx context.Context, snapshot *snapshotBuffer, logger logr.Logger) error { + select { + case <-ctx.Done(): + m.activeSnapshot = nil + return nil + default: + } + + var allTargets []core.DiscoveredTarget + for i := 0; i < snapshot.totalChunks; i++ { + select { + case <-ctx.Done(): + m.activeSnapshot = nil + return nil + default: + } + + chunk, ok := snapshot.received[i] + if !ok { + logger.Error( + nil, + "Missing snapshot chunk", + "chunkIndex", i, + ) + m.activeSnapshot = nil + return nil + } + allTargets = append(allTargets, chunk...) + } + + logger.Info( + "Applying discovery snapshot", + "snapshotID", snapshot.snapshotID, + "targets", len(allTargets), + ) + + existing, err := fetchExistingTargets(ctx, m.client, m.targetSource) + if err != nil { + logger.Error(err, "error fetching existing targets") + } else { + logger.Info("fetched existing targets", + "numOfTargets", len(existing), + ) + } + + events := generateEvents(existing, allTargets) + + nApply := 0 + nDelete := 0 + + for _, e := range events { + switch e.Event { + case core.EventApply: + nApply++ + case core.EventDelete: + nDelete++ + } + } + + logger.Info("generated events", + "numOfApply", nApply, + "numOfDelete", nDelete, + ) + + for _, e := range events { + m.applyEvent(ctx, e, logger) + } + + // Replay deferred events + for _, event := range m.deferredEvents { + select { + case <-ctx.Done(): + return nil + default: + } + if err := m.applyEvent(ctx, event, logger); err != nil { + return err + } + } + + // Because of idempotency, allTargets = desired state = targets existing in Kubernetes. Overwrites the counter to "reset" it. + m.targetCount = int32(len(allTargets)) + m.updateStatus(logger) + + m.activeSnapshot = nil + m.deferredEvents = nil + return nil +} + +func (m *MessageProcessor) applyEvent(ctx context.Context, event core.DiscoveryEvent, logger logr.Logger) error { + switch event.Event { + case core.EventDelete: + if err := deleteTarget(ctx, m.client, event.Target.Name, m.targetSource.Namespace); err != nil { + logger.Error(err, "error deleting target", + "targetName", event.Target.Name, + ) + } else { + logger.Info("deleted target object", + "name", event.Target.Name, + ) + } + case core.EventApply: + target := generateTargetResource(event.Target, m.targetSource) + + if err := applyTarget(ctx, m.client, m.scheme, target, m.targetSource); err != nil { + logger.Error(err, "error applying target", + "targetName", event.Target.Name, + ) + } else { + logger.Info("applied target object", + "name", event.Target.Name, + ) + } + } + + return nil +} + +func (m *MessageProcessor) updateStatus(logger logr.Logger) { + if err := updateTargetSourceStatus(m.ctx, m.client, m.targetSource, m.targetCount); err != nil { + logger.Error(err, "error updating TargetSource status") + } else { + logger.Info("updated target source status", + "targetCount", m.targetCount, + ) + } +} diff --git a/internal/controller/discovery/registry.go b/internal/controller/discovery/registry.go new file mode 100644 index 0000000..2193665 --- /dev/null +++ b/internal/controller/discovery/registry.go @@ -0,0 +1,49 @@ +package discovery + +import ( + "fmt" + "sync" +) + +// Registry is a thread-safe key -> channel registry +// K must be comparable so it can be used as a map key +// DO NOT USE a pointer type as K +type Registry[K comparable, V any] struct { + mu sync.RWMutex + m map[K]V +} + +func NewRegistry[K comparable, V any]() *Registry[K, V] { + return &Registry[K, V]{m: make(map[K]V)} +} + +func (r *Registry[K, V]) Register(key K, value V) error { + r.mu.Lock() + defer r.mu.Unlock() + if _, exists := r.m[key]; exists { + return fmt.Errorf("already registered: %v", key) + } + r.m[key] = value + return nil +} + +func (r *Registry[K, V]) Unregister(key K) { + r.mu.Lock() + delete(r.m, key) + r.mu.Unlock() +} + +func (r *Registry[K, V]) Get(key K) (V, bool) { + r.mu.RLock() + value, ok := r.m[key] + r.mu.RUnlock() + return value, ok +} + +func (r *Registry[K, V]) Exists(key K) bool { + r.mu.RLock() + defer r.mu.RUnlock() + + _, exists := r.m[key] + return exists +} diff --git a/internal/controller/discovery/target_manager.go b/internal/controller/discovery/target_manager.go deleted file mode 100644 index 245942d..0000000 --- a/internal/controller/discovery/target_manager.go +++ /dev/null @@ -1,70 +0,0 @@ -package discovery - -import ( - "context" - - "k8s.io/apimachinery/pkg/runtime" - "sigs.k8s.io/controller-runtime/pkg/client" - "sigs.k8s.io/controller-runtime/pkg/log" - - gnmicv1alpha1 "github.com/gnmic/operator/api/v1alpha1" - "github.com/gnmic/operator/internal/controller/discovery/core" -) - -// TargetManager consumes discovered targets and applies them to Kubernetes. -type TargetManager struct { - client client.Client - scheme *runtime.Scheme - targetSource *gnmicv1alpha1.TargetSource - in <-chan []core.DiscoveryMessage -} - -// NewTargetManager wires a TargetManager instance. -func NewTargetManager(c client.Client, s *runtime.Scheme, ts *gnmicv1alpha1.TargetSource, in <-chan []core.DiscoveryMessage) *TargetManager { - return &TargetManager{ - client: c, - scheme: s, - targetSource: ts, - in: in, - } -} - -// Run is a long‑running loop that receives target snapshots -// and reconciles Target CRs accordingly -func (m *TargetManager) Run(ctx context.Context) error { - logger := log.FromContext(ctx). - WithValues("targetSource", m.targetSource) - - logger.Info("target manager started") - - for { - select { - case <-ctx.Done(): - logger.Info("target manager stopped") - return nil - - case targets := <-m.in: - logger.Info( - "received discovered targets", - "count", len(targets), - ) - - // List existing Target CRs owned by this TargetSource - // var existing gnmicv1alpha1.TargetList - // if err := m.client.List( - // ctx, - // &existing, - // client.MatchingLabels{ - // "gnmic.dev/targetsource": m.targetsource, - // }, - // ); err != nil { - // return err - // } - - // TODO: Target Lifecycle Management - // 1. Compare and determine which Targets to create/update/delete - // 2. Create/update/delete Target CRs accordingly - // 3. Update TargetSource status with sync results - } - } -} diff --git a/internal/controller/targetsource_controller.go b/internal/controller/targetsource_controller.go index 8cd6f68..fc19543 100644 --- a/internal/controller/targetsource_controller.go +++ b/internal/controller/targetsource_controller.go @@ -18,33 +18,41 @@ package controller import ( "context" - "sync" + apierrors "k8s.io/apimachinery/pkg/api/errors" "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/types" ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/builder" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" "sigs.k8s.io/controller-runtime/pkg/log" + "sigs.k8s.io/controller-runtime/pkg/predicate" gnmicv1alpha1 "github.com/gnmic/operator/api/v1alpha1" "github.com/gnmic/operator/internal/controller/discovery" - "github.com/gnmic/operator/internal/controller/discovery/core" - _ "github.com/gnmic/operator/internal/controller/discovery/loaders/all" + discoveryTypes "github.com/gnmic/operator/internal/controller/discovery/core" + "github.com/go-logr/logr" ) -const targetSourceFinalizer = "operator.gnmic.dev/targetsource-finalizer" - -type runningSource struct { - cancel context.CancelFunc -} - // TargetSourceReconciler reconciles a TargetSource object +// +// Responsibilities: +// - Ensure at most one discovery runtime per TargetSource +// - Start runtime on reconcile if not already running +// - Restart runtime on reconcile if spec changed +// - Stop runtime on deletion or NotFound type TargetSourceReconciler struct { client.Client Scheme *runtime.Scheme - mu sync.Mutex - running map[client.ObjectKey]runningSource + BufferSize int + ChunkSize int + + DiscoveryRegistry *discovery.Registry[ + types.NamespacedName, + discoveryTypes.DiscoveryRegistryValue, + ] } // +kubebuilder:rbac:groups=operator.gnmic.dev,resources=targetsources,verbs=get;list;watch;create;update;patch;delete @@ -55,112 +63,194 @@ type TargetSourceReconciler struct { // Reconcile is part of the main kubernetes reconciliation loop which aims to // move the current state of the cluster closer to the desired state. func (r *TargetSourceReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { - logger := log.FromContext(ctx) - - var targetSource gnmicv1alpha1.TargetSource - if err := r.Get(ctx, req.NamespacedName, &targetSource); err != nil { - // If the TargetSource no longer exists, ensure runtime cleanup - if client.IgnoreNotFound(err) == nil { - r.stopDiscovery(req.NamespacedName) + logger := log.FromContext(ctx). + WithName("targetsource-controller"). + WithValues( + "targetsource", req.NamespacedName.Name, + "namespace", req.NamespacedName.Namespace, + ) + + targetSource, err := r.fetchTargetSource(ctx, req.NamespacedName) + // If the TargetSource no longer exists, ensure runtime cleanup + if apierrors.IsNotFound(err) { + if runtime, ok := r.DiscoveryRegistry.Get(req.NamespacedName); ok { + runtime.Stop() + r.DiscoveryRegistry.Unregister(req.NamespacedName) } - return ctrl.Result{}, client.IgnoreNotFound(err) + logger.Info("TargetSource not found; stopped discovery runtime") + return ctrl.Result{}, nil + } else if err != nil { + return ctrl.Result{}, err } - logger.Info("reconciling TargetSource", "name", targetSource.Name) - - // Handle deletion with finalizer if !targetSource.DeletionTimestamp.IsZero() { - logger.Info("TargetSource is being deleted, stopping pipeline", "name", targetSource.Name) + return r.reconcileDeletion(ctx, req.NamespacedName, targetSource) + } - r.stopDiscovery(req.NamespacedName) + if err := r.ensureFinalizer(ctx, targetSource); err != nil { + return ctrl.Result{}, err + } - // Remove finalizer if exists - if controllerutil.ContainsFinalizer(&targetSource, targetSourceFinalizer) { - controllerutil.RemoveFinalizer(&targetSource, targetSourceFinalizer) - if err := r.Update(ctx, &targetSource); err != nil { - return ctrl.Result{}, err - } + if r.DiscoveryRegistry.Exists(req.NamespacedName) { + if targetSource.Generation != targetSource.Status.ObservedGeneration { + r.reconcileDeletion(ctx, req.NamespacedName, targetSource) + } else { + logger.Info("Discovery runtime already running; reconciliation completed") + return ctrl.Result{}, nil } + } - return ctrl.Result{}, nil + if err := r.startDiscovery(req.NamespacedName, targetSource, logger); err != nil { + return ctrl.Result{}, err + } + + targetSource.Status.ObservedGeneration = targetSource.Generation + if err := r.Status().Update(ctx, targetSource); err != nil { + return ctrl.Result{}, err } - // Ensure finalizer is set - if !controllerutil.ContainsFinalizer(&targetSource, targetSourceFinalizer) { - controllerutil.AddFinalizer(&targetSource, targetSourceFinalizer) - if err := r.Update(ctx, &targetSource); err != nil { + logger.Info("Started discovery runtime") + return ctrl.Result{}, nil +} + +// fetchTargetSource retrieves a TargetSource by name, handling cleanup if not found +func (r *TargetSourceReconciler) fetchTargetSource(ctx context.Context, key types.NamespacedName) (*gnmicv1alpha1.TargetSource, error) { + var targetSource gnmicv1alpha1.TargetSource + if err := r.Get(ctx, key, &targetSource); err != nil { + return nil, err + } + return &targetSource, nil +} + +// reconcileDeletion stops the discovery runtime and removes the finalizer +func (r *TargetSourceReconciler) reconcileDeletion(ctx context.Context, key types.NamespacedName, targetSource *gnmicv1alpha1.TargetSource) (ctrl.Result, error) { + logger := log.FromContext(ctx).WithValues( + "targetsource", key.Name, + "namespace", key.Namespace, + ) + logger.Info("TargetSource was marked for deletion; stopping discovery runtime") + if runtime, ok := r.DiscoveryRegistry.Get(key); ok { + runtime.Stop() + r.DiscoveryRegistry.Unregister(key) + } + + // Remove finalizer if exists + if controllerutil.ContainsFinalizer(targetSource, LabelTargetSourceFinalizer) { + controllerutil.RemoveFinalizer(targetSource, LabelTargetSourceFinalizer) + if err := r.Update(ctx, targetSource); err != nil { return ctrl.Result{}, err } - // Requeue to continue with a clean state - return ctrl.Result{}, nil + + logger.Info("Removed TargetSource finalizer") } - // TODO: - // 1. Check if a pipeline is already running for this TargetSource - // 2. If not, create and start a new pipeline: - // a. Create a Loader based on TargetSource spec - // b. Start the Loader in a new goroutine, passing a channel for discovered targets - // c. Start a TargetManager in another goroutine to consume discovered targets and manage Target CRs - // 3. If yes, check if the spec has changed and restart the pipeline if needed - - r.mu.Lock() - _, exists := r.running[req.NamespacedName] - r.mu.Unlock() - - // If a targetsource loader exists, return immediately without starting - // any new loader or target manager - if exists { - return ctrl.Result{}, nil + return ctrl.Result{}, nil +} + +// ensureFinalizer adds the finalizer if not present and updates the TargetSource +func (r *TargetSourceReconciler) ensureFinalizer(ctx context.Context, targetSource *gnmicv1alpha1.TargetSource) error { + if controllerutil.ContainsFinalizer(targetSource, LabelTargetSourceFinalizer) { + return nil } - loader, err := discovery.NewLoader(targetSource.ObjectMeta.Name, targetSource.ObjectMeta.Namespace, targetSource.Spec) - if err != nil { - return ctrl.Result{}, err + controllerutil.AddFinalizer(targetSource, LabelTargetSourceFinalizer) + if err := r.Update(ctx, targetSource); err != nil { + return err } - runtimeCtx, cancel := context.WithCancel(context.Background()) + log.FromContext(ctx).Info( + "Added TargetSource finalizer", + "targetsource", targetSource.Name, + "namespace", targetSource.Namespace, + ) + + return nil +} - targetChannel := make(chan []core.DiscoveryMessage, 10) +// startDiscovery creates and starts a discovery runtime for a TargetSource +// +// Invariant: +// - MessageProcessor and Loader must run for the lifetime of the TargetSource +// - Any unexpected exit is treated as a bug and triggers full shutdown +func (r *TargetSourceReconciler) startDiscovery( + key types.NamespacedName, + targetSource *gnmicv1alpha1.TargetSource, + logger logr.Logger, +) error { + targetChannel := make(chan []discoveryTypes.DiscoveryMessage, r.BufferSize) + ctx, cancel := context.WithCancel(context.Background()) + loaderConfig := discoveryTypes.CommonLoaderConfig{ + TargetsourceNN: key, + ChunkSize: r.ChunkSize, + } - // start loader - go loader.Start(runtimeCtx, targetSource.Name, targetSource.Spec, targetChannel) + // Cleanup function to cleanup discovery runtime of targetsource + cleanup := func() { + cancel() + r.DiscoveryRegistry.Unregister(key) + close(targetChannel) + } - // start target manager - manager := discovery.NewTargetManager( + messageProcessor := discovery.NewMessageProcessor( r.Client, r.Scheme, - &targetSource, + targetSource, targetChannel, ) - go manager.Run(runtimeCtx) + loader, loaderConfig, err := discovery.NewLoader(loaderConfig, + &targetSource.Spec, + ) + if err != nil { + logger.Error(err, "Target loader could not be created") + cleanup() + return err + } - r.mu.Lock() - r.running[req.NamespacedName] = runningSource{cancel: cancel} - r.mu.Unlock() + // Register discovery runtime of targetsource + if err := r.DiscoveryRegistry.Register(key, discoveryTypes.DiscoveryRegistryValue{ + Channel: targetChannel, + Stop: cancel, + CommonLoaderConfig: &loaderConfig, + }); err != nil { + return err + } - logger.Info("TargetSource pipeline started", "name", targetSource.Name) + // Start message processor + go func() { + logger.Info("Message processor started") - return ctrl.Result{}, nil -} + if err := messageProcessor.Run(ctx); err != nil { + logger.Error(err, "Message processor exited unecpectedly") + } else { + logger.Error(nil, "Message processor exited unexpectedly without error") + } -// stopDiscovery stops and removes a running discovery pipeline -// for the given TargetSource key -func (r *TargetSourceReconciler) stopDiscovery(key client.ObjectKey) { - r.mu.Lock() - defer r.mu.Unlock() + // Any exit is considered a bug that should stop the discovery runtime + cleanup() + }() - if running, ok := r.running[key]; ok { - running.cancel() - delete(r.running, key) - } + // Start target loader + go func() { + if err := loader.Run(ctx, targetChannel, targetSource.Spec); err != nil { + logger.Error(err, "Target loader exited unexpectedly") + } else { + logger.Error(nil, "Target loader exited unexpectedly without error") + } + + // Any exit is considered a bug that should stop the discovery runtime + cleanup() + }() + + return nil } // SetupWithManager sets up the controller with the Manager. func (r *TargetSourceReconciler) SetupWithManager(mgr ctrl.Manager) error { - r.running = make(map[client.ObjectKey]runningSource) - return ctrl.NewControllerManagedBy(mgr). - For(&gnmicv1alpha1.TargetSource{}). + For( + &gnmicv1alpha1.TargetSource{}, + builder.WithPredicates(predicate.GenerationChangedPredicate{}), + ). Named("targetsource"). Complete(r) } diff --git a/lab/dev/http/targets.json b/lab/dev/http/targets.json new file mode 100644 index 0000000..882faae --- /dev/null +++ b/lab/dev/http/targets.json @@ -0,0 +1,13 @@ +[ + { + "address": "10.0.0.1:57000", + "name": "router1" + }, + { + "address": "10.0.0.2:57000", + "name": "router2", + "labels": { + "test": "asdf" + } + } +] \ No newline at end of file diff --git a/lab/dev/resources/targetsources/ctest1.yaml b/lab/dev/resources/targetsources/ctest1.yaml deleted file mode 100644 index e0aea43..0000000 --- a/lab/dev/resources/targetsources/ctest1.yaml +++ /dev/null @@ -1,12 +0,0 @@ -apiVersion: operator.gnmic.dev/v1alpha1 -kind: TargetSource -metadata: - name: http-discovery -spec: - provider: - http: - url: http://inventory-service:8080/targets - labels: - source: inventory - type: http - profile: eos \ No newline at end of file diff --git a/netbox.mk b/netbox.mk index 555c2f1..a133554 100644 --- a/netbox.mk +++ b/netbox.mk @@ -23,7 +23,7 @@ ifndef NETBOX_CLUSTER_NAME $(error NETBOX_CLUSTER_NAME is required. Usage: make netbox-deploy-cluster NETBOX_CLUSTER_NAME=cluster-name) endif kind get clusters | grep -q "$(NETBOX_CLUSTER_NAME)" || kind create cluster --name $(NETBOX_CLUSTER_NAME) - kubectl config use-context kind-$(CLUSTER_NAME) + kubectl config use-context kind-$(NETBOX_CLUSTER_NAME) .PHONY: netbox-undeploy netbox-undeploy: ## Undeploy the netbox cluster diff --git a/test.mk b/test.mk index 3497c2b..fb30c30 100644 --- a/test.mk +++ b/test.mk @@ -4,6 +4,8 @@ GNMIC_VERSION ?= 0.44.1 KUBECTL_VERSION ?= v1.31.0 TEST_CLUSTER_NAME ?= test-kind CERT_MANAGER_VERSION ?= v1.19.3 +NETBOX_TEST_PORT ?= 8082 + .PHONY: install-kubectl install-kubectl: ## Install kubectl if not present @@ -85,6 +87,42 @@ deploy-test-topology: ## Deploy a test topology for testing undeploy-test-topology: ## Undeploy a test topology for testing sudo containerlab destroy -t test/integration/t1.clab.yaml -c +.PHONY: deploy-test-http-server +deploy-test-http-server: ## Deploy a test http pod with a static file inventory for testing + kubectl apply -f test/integration/http/resources/ + +.PHONY: undeploy-test-http-server +undeploy-test-http-server: ## Undeploy the http pod for testing + kubectl delete -f test/integration/http/resources/ + +.PHONY: deploy-test-netbox-instance +deploy-test-netbox-instance: NETBOX_CLUSTER_NAME=$(TEST_CLUSTER_NAME) ## Deploy the test netbox instance for testing +deploy-test-netbox-instance: NETBOX_PASSWORD=Netbox123 +deploy-test-netbox-instance: netbox-setup + +.PHONY: deploy-test-netbox-topology +deploy-test-netbox-topology: ## Deploy the netbox test topology for testing + sudo containerlab deploy -t test/integration/netbox/netbox.clab.yaml -c + kubectl port-forward svc/netbox $(NETBOX_TEST_PORT):80 -n netbox --context kind-$(TEST_CLUSTER_NAME) --address=0.0.0.0 >/dev/null 2>&1 & + +.PHONY: sync-netbox-test-data +sync-test-netbox-data: NETBOX_CLUSTER_NAME=$(TEST_CLUSTER_NAME) ## Sync the netbox instance with the test topology for testing +sync-test-netbox-data: NETBOX_URL=http://localhost:$(NETBOX_TEST_PORT) +sync-test-netbox-data: NETBOX_INIT=test/integration/netbox/initializers +sync-test-netbox-data: netbox-sync-data + +.PHONY: undeploy-test-netbox-instance +undeploy-test-netbox-instance: NETBOX_CLUSTER_NAME=$(TEST_CLUSTER_NAME) ## Undeploy the netbox instance from the test cluster +undeploy-test-netbox-instance: netbox-delete + +.PHONY: undeploy-test-netbox-topology +undeploy-test-netbox-topology: ## Undeploy the netbox test topology for testing + sudo containerlab destroy -t test/integration/netbox/netbox.clab.yaml -c + +.PHONY: apply-test-targetsources +apply-test-targetsources: ## Apply the test targetsources for testing + kubectl apply -f test/integration/resources/targetsources + .PHONY: apply-test-targets apply-test-targets: ## Apply the test targets for testing kubectl apply -f test/integration/resources/targets/profile @@ -115,5 +153,5 @@ apply-test-clusters: ## Apply the test clusters for testing kubectl apply -f test/integration/resources/clusters .PHONY: apply-test-resources -apply-test-resources: apply-test-targets apply-test-subscriptions apply-test-outputs apply-test-pipelines apply-test-clusters +apply-test-resources: apply-test-targets apply-test-subscriptions apply-test-outputs apply-test-pipelines apply-test-clusters apply-test-targetsources diff --git a/test/integration/http/resources/configmap.yaml b/test/integration/http/resources/configmap.yaml new file mode 100644 index 0000000..f017566 --- /dev/null +++ b/test/integration/http/resources/configmap.yaml @@ -0,0 +1,32 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: http-target-cfg +data: + targets.json: | + [ + { + "address": "clab-t1-spine1:57400", + "name": "spine1", + "labels": { + "vendor": "nokia_srlinux", + "role": "spine" + } + }, + { + "address": "clab-t1-leaf1:57400", + "name": "leaf1", + "labels": { + "vendor": "nokia_srlinux", + "role": "leaf" + } + }, + { + "address": "clab-t1-leaf2:57400", + "name": "leaf2", + "labels": { + "vendor": "nokia_srlinux", + "role": "leaf" + } + } + ] \ No newline at end of file diff --git a/test/integration/http/resources/deployment.yaml b/test/integration/http/resources/deployment.yaml new file mode 100644 index 0000000..785c1e3 --- /dev/null +++ b/test/integration/http/resources/deployment.yaml @@ -0,0 +1,24 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: http-target-inv +spec: + replicas: 1 + selector: + matchLabels: + app: http-target-inv + template: + metadata: + labels: + app: http-target-inv + spec: + containers: + - name: nginx + image: nginx:alpine + volumeMounts: + - name: data + mountPath: /usr/share/nginx/html + volumes: + - name: data + configMap: + name: http-target-cfg \ No newline at end of file diff --git a/test/integration/http/resources/service.yaml b/test/integration/http/resources/service.yaml new file mode 100644 index 0000000..d4be4e7 --- /dev/null +++ b/test/integration/http/resources/service.yaml @@ -0,0 +1,10 @@ +apiVersion: v1 +kind: Service +metadata: + name: http-svc +spec: + selector: + app: http-target-inv + ports: + - port: 80 + targetPort: 80 \ No newline at end of file diff --git a/test/integration/netbox/initializers/device-roles.yaml b/test/integration/netbox/initializers/device-roles.yaml new file mode 100644 index 0000000..9167dab --- /dev/null +++ b/test/integration/netbox/initializers/device-roles.yaml @@ -0,0 +1,3 @@ +- name: Router + slug: router + color: ff0000 diff --git a/test/integration/netbox/initializers/device-types.yaml b/test/integration/netbox/initializers/device-types.yaml new file mode 100644 index 0000000..a6279ed --- /dev/null +++ b/test/integration/netbox/initializers/device-types.yaml @@ -0,0 +1,16 @@ +- model: ixr-d2l + slug: arista-ixr-d2l + manufacturer: + name: Arista +- model: ixr-d2l + slug: nokia-ixr-d2l + manufacturer: + name: Nokia +- model: ixr-d2l-leaf + slug: nokia-ixr-d2l-leaf + manufacturer: + name: Nokia +- model: ixr-d3l + slug: nokia-ixr-d3l + manufacturer: + name: Nokia diff --git a/test/integration/netbox/initializers/devices.yaml b/test/integration/netbox/initializers/devices.yaml new file mode 100644 index 0000000..17ed036 --- /dev/null +++ b/test/integration/netbox/initializers/devices.yaml @@ -0,0 +1,36 @@ +- name: ceos1 + role: + slug: router + manufacturer: + name: Arista + device_type: + slug: arista-ixr-d2l + site: + name: Lab +- name: leaf1 + role: + slug: router + manufacturer: + name: Nokia + device_type: + slug: nokia-ixr-d2l + site: + name: Lab +- name: leaf2 + role: + slug: router + manufacturer: + name: Nokia + device_type: + slug: nokia-ixr-d2l-leaf + site: + name: Lab +- name: spine1 + role: + slug: router + manufacturer: + name: Nokia + device_type: + slug: nokia-ixr-d3l + site: + name: Lab diff --git a/test/integration/netbox/initializers/interfaces.yaml b/test/integration/netbox/initializers/interfaces.yaml new file mode 100644 index 0000000..05e8d24 --- /dev/null +++ b/test/integration/netbox/initializers/interfaces.yaml @@ -0,0 +1,40 @@ +- device: + name: spine1 + name: e1-1 + type: 1000base-t +- device: + name: leaf1 + name: e1-49 + type: 1000base-t +- device: + name: spine1 + name: e1-2 + type: 1000base-t +- device: + name: leaf2 + name: e1-49 + type: 1000base-t +- device: + name: spine1 + name: e1-3 + type: 1000base-t +- device: + name: ceos1 + name: eth1 + type: 1000base-t +- device: + name: spine1 + name: mgmt0 + type: 1000base-t +- device: + name: leaf1 + name: mgmt0 + type: 1000base-t +- device: + name: leaf2 + name: mgmt0 + type: 1000base-t +- device: + name: ceos1 + name: mgmt0 + type: 1000base-t diff --git a/test/integration/netbox/initializers/ip-addresses.yaml b/test/integration/netbox/initializers/ip-addresses.yaml new file mode 100644 index 0000000..73453a8 --- /dev/null +++ b/test/integration/netbox/initializers/ip-addresses.yaml @@ -0,0 +1,24 @@ +- address: clab-t1-spine1 + assigned_object: + device: + name: spine1 + name: mgmt0 + status: active + primary: true + dns_name: t2-nodes-spine1 +- address: clab-t1-leaf1 + assigned_object: + device: + name: leaf1 + name: mgmt0 + status: active + primary: true + dns_name: t2-nodes-leaf1 +- address: clab-t1-leaf2 + assigned_object: + device: + name: leaf2 + name: mgmt0 + status: active + primary: true + dns_name: t2-nodes-leaf2 diff --git a/test/integration/netbox/initializers/manufacturers.yaml b/test/integration/netbox/initializers/manufacturers.yaml new file mode 100644 index 0000000..68627af --- /dev/null +++ b/test/integration/netbox/initializers/manufacturers.yaml @@ -0,0 +1,4 @@ +- name: Nokia + slug: nokia +- name: Arista + slug: arista diff --git a/test/integration/netbox/initializers/sites.yaml b/test/integration/netbox/initializers/sites.yaml new file mode 100644 index 0000000..bc8ed18 --- /dev/null +++ b/test/integration/netbox/initializers/sites.yaml @@ -0,0 +1,2 @@ +- name: Lab + slug: lab diff --git a/test/integration/resources/clusters/cluster1.yaml b/test/integration/resources/clusters/cluster1.yaml index 513b948..c01cc56 100644 --- a/test/integration/resources/clusters/cluster1.yaml +++ b/test/integration/resources/clusters/cluster1.yaml @@ -13,4 +13,4 @@ spec: memory: "500Mi" cpu: "1" targetDistribution: - podCapacity: 5 \ No newline at end of file + podCapacity: 10 \ No newline at end of file diff --git a/test/integration/resources/pipelines/pipeline1.yaml b/test/integration/resources/pipelines/pipeline1.yaml index 0dc67a3..82c0289 100644 --- a/test/integration/resources/pipelines/pipeline1.yaml +++ b/test/integration/resources/pipelines/pipeline1.yaml @@ -12,6 +12,8 @@ spec: - matchLabels: vendor: nokia_srlinux role: spine + - matchLabels: + operator.gnmic.dev/targetsource: http-ts subscriptionSelectors: - matchLabels: vendor: nokia_srlinux diff --git a/test/integration/resources/pipelines/pipeline2.yaml b/test/integration/resources/pipelines/pipeline2.yaml index 7420d7d..a361833 100644 --- a/test/integration/resources/pipelines/pipeline2.yaml +++ b/test/integration/resources/pipelines/pipeline2.yaml @@ -12,6 +12,8 @@ spec: - matchLabels: vendor: nokia_srlinux role: spine + - matchLabels: + operator.gnmic.dev/targetsource: http-ts subscriptionSelectors: - matchLabels: vendor: nokia_srlinux diff --git a/test/integration/resources/targetsources/http.yaml b/test/integration/resources/targetsources/http.yaml new file mode 100644 index 0000000..422cfdc --- /dev/null +++ b/test/integration/resources/targetsources/http.yaml @@ -0,0 +1,11 @@ +apiVersion: operator.gnmic.dev/v1alpha1 +kind: TargetSource +metadata: + name: http-ts +spec: + provider: + http: + url: http://http-svc.default.svc/targets.json + targetLabels: + integrationtest: http + targetProfile: default \ No newline at end of file