diff --git a/acceptance/apps/init-template/app/out.test.toml b/acceptance/apps/init-template/app/out.test.toml new file mode 100644 index 0000000000..d560f1de04 --- /dev/null +++ b/acceptance/apps/init-template/app/out.test.toml @@ -0,0 +1,5 @@ +Local = true +Cloud = false + +[EnvMatrix] + DATABRICKS_BUNDLE_ENGINE = ["terraform", "direct"] diff --git a/acceptance/apps/init-template/app/output.txt b/acceptance/apps/init-template/app/output.txt new file mode 100644 index 0000000000..a522103bfa --- /dev/null +++ b/acceptance/apps/init-template/app/output.txt @@ -0,0 +1 @@ +✓ Template instantiation succeeded diff --git a/acceptance/apps/init-template/app/script b/acceptance/apps/init-template/app/script new file mode 100644 index 0000000000..1f38796b6c --- /dev/null +++ b/acceptance/apps/init-template/app/script @@ -0,0 +1,4 @@ +#!/bin/bash +$CLI experimental apps-mcp tools init-template app --name test_app --sql-warehouse-id abc123 --output-dir output > /dev/null 2>&1 +echo "✓ Template instantiation succeeded" +rm -rf output diff --git a/acceptance/apps/init-template/app/test.toml b/acceptance/apps/init-template/app/test.toml new file mode 100644 index 0000000000..7d36fb9dc1 --- /dev/null +++ b/acceptance/apps/init-template/app/test.toml @@ -0,0 +1,2 @@ +Local = true +Cloud = false diff --git a/acceptance/apps/init-template/empty/out.test.toml b/acceptance/apps/init-template/empty/out.test.toml new file mode 100644 index 0000000000..d560f1de04 --- /dev/null +++ b/acceptance/apps/init-template/empty/out.test.toml @@ -0,0 +1,5 @@ +Local = true +Cloud = false + +[EnvMatrix] + DATABRICKS_BUNDLE_ENGINE = ["terraform", "direct"] diff --git a/acceptance/apps/init-template/empty/output.txt b/acceptance/apps/init-template/empty/output.txt new file mode 100644 index 0000000000..a522103bfa --- /dev/null +++ b/acceptance/apps/init-template/empty/output.txt @@ -0,0 +1 @@ +✓ Template instantiation succeeded diff --git a/acceptance/apps/init-template/empty/script b/acceptance/apps/init-template/empty/script new file mode 100644 index 0000000000..5d5a80bd97 --- /dev/null +++ b/acceptance/apps/init-template/empty/script @@ -0,0 +1,4 @@ +#!/bin/bash +$CLI experimental apps-mcp tools init-template empty --name test_empty --catalog main --output-dir output > /dev/null 2>&1 +echo "✓ Template instantiation succeeded" +rm -rf output diff --git a/acceptance/apps/init-template/empty/test.toml b/acceptance/apps/init-template/empty/test.toml new file mode 100644 index 0000000000..7d36fb9dc1 --- /dev/null +++ b/acceptance/apps/init-template/empty/test.toml @@ -0,0 +1,2 @@ +Local = true +Cloud = false diff --git a/acceptance/apps/init-template/job/out.test.toml b/acceptance/apps/init-template/job/out.test.toml new file mode 100644 index 0000000000..d560f1de04 --- /dev/null +++ b/acceptance/apps/init-template/job/out.test.toml @@ -0,0 +1,5 @@ +Local = true +Cloud = false + +[EnvMatrix] + DATABRICKS_BUNDLE_ENGINE = ["terraform", "direct"] diff --git a/acceptance/apps/init-template/job/output.txt b/acceptance/apps/init-template/job/output.txt new file mode 100644 index 0000000000..a522103bfa --- /dev/null +++ b/acceptance/apps/init-template/job/output.txt @@ -0,0 +1 @@ +✓ Template instantiation succeeded diff --git a/acceptance/apps/init-template/job/script b/acceptance/apps/init-template/job/script new file mode 100644 index 0000000000..8464089885 --- /dev/null +++ b/acceptance/apps/init-template/job/script @@ -0,0 +1,4 @@ +#!/bin/bash +$CLI experimental apps-mcp tools init-template job --name test_job --catalog main --output-dir output > /dev/null 2>&1 || exit 1 +echo "✓ Template instantiation succeeded" +rm -rf output diff --git a/acceptance/apps/init-template/job/test.toml b/acceptance/apps/init-template/job/test.toml new file mode 100644 index 0000000000..7d36fb9dc1 --- /dev/null +++ b/acceptance/apps/init-template/job/test.toml @@ -0,0 +1,2 @@ +Local = true +Cloud = false diff --git a/acceptance/apps/init-template/pipeline/out.test.toml b/acceptance/apps/init-template/pipeline/out.test.toml new file mode 100644 index 0000000000..d560f1de04 --- /dev/null +++ b/acceptance/apps/init-template/pipeline/out.test.toml @@ -0,0 +1,5 @@ +Local = true +Cloud = false + +[EnvMatrix] + DATABRICKS_BUNDLE_ENGINE = ["terraform", "direct"] diff --git a/acceptance/apps/init-template/pipeline/output.txt b/acceptance/apps/init-template/pipeline/output.txt new file mode 100644 index 0000000000..a522103bfa --- /dev/null +++ b/acceptance/apps/init-template/pipeline/output.txt @@ -0,0 +1 @@ +✓ Template instantiation succeeded diff --git a/acceptance/apps/init-template/pipeline/script b/acceptance/apps/init-template/pipeline/script new file mode 100644 index 0000000000..0d73aae59f --- /dev/null +++ b/acceptance/apps/init-template/pipeline/script @@ -0,0 +1,4 @@ +#!/bin/bash +$CLI experimental apps-mcp tools init-template pipeline --name test_pipeline --language python --catalog main --output-dir output > /dev/null 2>&1 +echo "✓ Template instantiation succeeded" +rm -rf output diff --git a/acceptance/apps/init-template/pipeline/test.toml b/acceptance/apps/init-template/pipeline/test.toml new file mode 100644 index 0000000000..7d36fb9dc1 --- /dev/null +++ b/acceptance/apps/init-template/pipeline/test.toml @@ -0,0 +1,2 @@ +Local = true +Cloud = false diff --git a/experimental/apps-mcp/cmd/init_template.go b/experimental/apps-mcp/cmd/init_template.go deleted file mode 100644 index a003b14d37..0000000000 --- a/experimental/apps-mcp/cmd/init_template.go +++ /dev/null @@ -1,355 +0,0 @@ -package mcp - -import ( - "context" - "encoding/json" - "errors" - "fmt" - "os" - "path/filepath" - "sort" - "strings" - - "github.com/databricks/cli/cmd/root" - "github.com/databricks/cli/experimental/apps-mcp/lib/common" - "github.com/databricks/cli/experimental/apps-mcp/lib/prompts" - "github.com/databricks/cli/experimental/apps-mcp/lib/state" - "github.com/databricks/cli/libs/cmdio" - "github.com/databricks/cli/libs/template" - "github.com/spf13/cobra" -) - -func validateAppNameLength(projectName string) error { - const maxAppNameLength = 30 - const devTargetPrefix = "dev-" - totalLength := len(devTargetPrefix) + len(projectName) - if totalLength > maxAppNameLength { - maxAllowed := maxAppNameLength - len(devTargetPrefix) - return fmt.Errorf( - "app name too long: 'dev-%s' is %d chars (max: %d). App name must be ≤%d characters", - projectName, totalLength, maxAppNameLength, maxAllowed, - ) - } - return nil -} - -func readClaudeMd(ctx context.Context, configFile string) { - showFallback := func() { - cmdio.LogString(ctx, "\nConsult with CLAUDE.md provided in the bundle if present.") - } - - if configFile == "" { - showFallback() - return - } - - configBytes, err := os.ReadFile(configFile) - if err != nil { - showFallback() - return - } - - var config map[string]any - if err := json.Unmarshal(configBytes, &config); err != nil { - showFallback() - return - } - - projectName, ok := config["project_name"].(string) - if !ok || projectName == "" { - showFallback() - return - } - - claudePath := filepath.Join(".", projectName, "CLAUDE.md") - content, err := os.ReadFile(claudePath) - if err != nil { - showFallback() - return - } - - cmdio.LogString(ctx, "\n=== CLAUDE.md ===") - cmdio.LogString(ctx, string(content)) - cmdio.LogString(ctx, "=================\n") -} - -// generateFileTree creates a tree-style visualization of the file structure. -// Collapses directories with more than 10 files to avoid clutter. -func generateFileTree(outputDir string) (string, error) { - const maxFilesToShow = 10 - - // collect all files in the output directory - var allFiles []string - err := filepath.Walk(outputDir, func(path string, info os.FileInfo, err error) error { - if err != nil { - return err - } - if !info.IsDir() { - relPath, err := filepath.Rel(outputDir, path) - if err != nil { - return err - } - allFiles = append(allFiles, filepath.ToSlash(relPath)) - } - return nil - }) - if err != nil { - return "", err - } - - // build a tree structure - tree := make(map[string][]string) - - for _, relPath := range allFiles { - parts := strings.Split(relPath, "/") - - if len(parts) == 1 { - // root level file - tree[""] = append(tree[""], parts[0]) - } else { - // file in subdirectory - dir := strings.Join(parts[:len(parts)-1], "/") - fileName := parts[len(parts)-1] - tree[dir] = append(tree[dir], fileName) - } - } - - // format as tree - var output strings.Builder - var sortedDirs []string - for dir := range tree { - sortedDirs = append(sortedDirs, dir) - } - sort.Strings(sortedDirs) - - for _, dir := range sortedDirs { - filesInDir := tree[dir] - if dir == "" { - // root files - always show all - for _, file := range filesInDir { - output.WriteString(file) - output.WriteString("\n") - } - } else { - // directory - output.WriteString(dir) - output.WriteString("/\n") - if len(filesInDir) <= maxFilesToShow { - // show all files - for _, file := range filesInDir { - output.WriteString(" ") - output.WriteString(file) - output.WriteString("\n") - } - } else { - // collapse large directories - output.WriteString(fmt.Sprintf(" (%d files)\n", len(filesInDir))) - } - } - } - - return output.String(), nil -} - -const ( - defaultTemplateRepo = "https://github.com/databricks/cli" - defaultTemplateDir = "experimental/apps-mcp/templates/appkit" - defaultBranch = "main" - templatePathEnvVar = "DATABRICKS_APPKIT_TEMPLATE_PATH" -) - -func newInitTemplateCmd() *cobra.Command { - cmd := &cobra.Command{ - Use: "init-template", - Short: "Initialize a Databricks App using the appkit template", - Args: cobra.NoArgs, - Long: `Initialize a Databricks App using the appkit template. - -Examples: - experimental apps-mcp tools init-template --name my-app - experimental apps-mcp tools init-template --name my-app --warehouse abc123 - experimental apps-mcp tools init-template --name my-app --description "My cool app" - experimental apps-mcp tools init-template --name my-app --output-dir ./projects - -Environment variables: - DATABRICKS_APPKIT_TEMPLATE_PATH Override template source with local path (for development) - -After initialization: - databricks bundle deploy --target dev -`, - } - - var name string - var warehouse string - var description string - var outputDir string - var describe bool - - cmd.Flags().StringVar(&name, "name", "", "Project name (required)") - cmd.Flags().StringVar(&warehouse, "warehouse", "", "SQL warehouse ID") - cmd.Flags().StringVar(&warehouse, "warehouse-id", "", "SQL warehouse ID (alias for --warehouse)") - cmd.Flags().StringVar(&warehouse, "sql-warehouse-id", "", "SQL warehouse ID (alias for --warehouse)") - cmd.Flags().StringVar(&warehouse, "sql_warehouse_id", "", "SQL warehouse ID (alias for --warehouse)") - cmd.Flags().StringVar(&description, "description", "", "App description") - cmd.Flags().StringVar(&outputDir, "output-dir", "", "Directory to write the initialized template to") - cmd.Flags().BoolVar(&describe, "describe", false, "Display template schema without initializing") - - // Hide the alias flags from help - cmd.Flags().MarkHidden("warehouse-id") - cmd.Flags().MarkHidden("sql-warehouse-id") - cmd.Flags().MarkHidden("sql_warehouse_id") - - cmd.PreRunE = root.MustWorkspaceClient - cmd.RunE = func(cmd *cobra.Command, args []string) error { - ctx := cmd.Context() - - // Resolve template source: env var override or default remote - templatePathOrUrl := os.Getenv(templatePathEnvVar) - templateDir := "" - branch := "" - - if templatePathOrUrl == "" { - templatePathOrUrl = defaultTemplateRepo - templateDir = defaultTemplateDir - branch = defaultBranch - } - - // Describe mode - show schema only - if describe { - r := template.Resolver{ - TemplatePathOrUrl: templatePathOrUrl, - ConfigFile: "", - OutputDir: outputDir, - TemplateDir: templateDir, - Branch: branch, - } - - tmpl, err := r.Resolve(ctx) - if err != nil { - return err - } - defer tmpl.Reader.Cleanup(ctx) - - schema, _, err := tmpl.Reader.LoadSchemaAndTemplateFS(ctx) - if err != nil { - return fmt.Errorf("failed to load template schema: %w", err) - } - - schemaJSON, err := json.MarshalIndent(schema, "", " ") - if err != nil { - return err - } - cmdio.LogString(ctx, string(schemaJSON)) - return nil - } - - // Validate required flag - if name == "" { - return errors.New("--name is required") - } - - if err := validateAppNameLength(name); err != nil { - return err - } - - // Build config map from flags - configMap := map[string]any{ - "project_name": name, - } - if warehouse != "" { - configMap["sql_warehouse_id"] = warehouse - } - if description != "" { - configMap["app_description"] = description - } - - // Write config to temp file - tmpFile, err := os.CreateTemp("", "mcp-template-config-*.json") - if err != nil { - return fmt.Errorf("create temp config file: %w", err) - } - defer os.Remove(tmpFile.Name()) - - configBytes, err := json.Marshal(configMap) - if err != nil { - return fmt.Errorf("marshal config: %w", err) - } - if _, err := tmpFile.Write(configBytes); err != nil { - return fmt.Errorf("write config file: %w", err) - } - if err := tmpFile.Close(); err != nil { - return fmt.Errorf("close config file: %w", err) - } - - configFile := tmpFile.Name() - - // Create output directory if specified and doesn't exist - if outputDir != "" { - if err := os.MkdirAll(outputDir, 0o755); err != nil { - return fmt.Errorf("create output directory: %w", err) - } - } - - r := template.Resolver{ - TemplatePathOrUrl: templatePathOrUrl, - ConfigFile: configFile, - OutputDir: outputDir, - TemplateDir: templateDir, - Branch: branch, - } - - tmpl, err := r.Resolve(ctx) - if err != nil { - return err - } - defer tmpl.Reader.Cleanup(ctx) - - err = tmpl.Writer.Materialize(ctx, tmpl.Reader) - if err != nil { - return err - } - tmpl.Writer.LogTelemetry(ctx) - - // Determine actual output directory (template writes to subdirectory with project name) - actualOutputDir := name - if outputDir != "" { - actualOutputDir = filepath.Join(outputDir, name) - } - - // Count files and get absolute path - fileCount := 0 - absOutputDir, err := filepath.Abs(actualOutputDir) - if err != nil { - absOutputDir = actualOutputDir - } - _ = filepath.Walk(absOutputDir, func(path string, info os.FileInfo, err error) error { - if err == nil && !info.IsDir() { - fileCount++ - } - return nil - }) - cmdio.LogString(ctx, common.FormatScaffoldSuccess("appkit", absOutputDir, fileCount)) - - // Generate and print file tree structure - fileTree, err := generateFileTree(absOutputDir) - if err == nil && fileTree != "" { - cmdio.LogString(ctx, "\nFile structure:") - cmdio.LogString(ctx, fileTree) - } - - // Inject L2 (target-specific guidance for apps) - targetApps := prompts.MustExecuteTemplate("target_apps.tmpl", map[string]any{}) - cmdio.LogString(ctx, targetApps) - - // Inject L3 (template-specific guidance from CLAUDE.md) - readClaudeMd(ctx, configFile) - - // Save initial scaffolded state - if err := state.SaveState(absOutputDir, state.NewScaffolded()); err != nil { - return fmt.Errorf("failed to save project state: %w", err) - } - - return nil - } - return cmd -} diff --git a/experimental/apps-mcp/cmd/init_template/app.go b/experimental/apps-mcp/cmd/init_template/app.go new file mode 100644 index 0000000000..227eb55bf6 --- /dev/null +++ b/experimental/apps-mcp/cmd/init_template/app.go @@ -0,0 +1,181 @@ +package init_template + +import ( + "context" + "encoding/json" + "errors" + "fmt" + "os" + "path/filepath" + + "github.com/databricks/cli/cmd/root" + "github.com/databricks/cli/experimental/apps-mcp/lib/state" + "github.com/databricks/cli/libs/cmdio" + "github.com/databricks/cli/libs/template" + "github.com/spf13/cobra" +) + +const ( + defaultTemplateRepo = "https://github.com/databricks/cli" + defaultTemplateDir = "experimental/apps-mcp/templates/appkit" + defaultBranch = "main" + templatePathEnvVar = "DATABRICKS_APPKIT_TEMPLATE_PATH" +) + +func readClaudeMd(ctx context.Context, projectDir string) { + claudePath := filepath.Join(projectDir, "CLAUDE.md") + content, err := os.ReadFile(claudePath) + if err != nil { + cmdio.LogString(ctx, "\nConsult with CLAUDE.md provided in the bundle if present.") + return + } + + cmdio.LogString(ctx, "\n=== CLAUDE.md ===") + cmdio.LogString(ctx, string(content)) + cmdio.LogString(ctx, "=================\n") +} + +func validateAppNameLength(projectName string) error { + const maxAppNameLength = 30 + const devTargetPrefix = "dev-" + totalLength := len(devTargetPrefix) + len(projectName) + if totalLength > maxAppNameLength { + maxAllowed := maxAppNameLength - len(devTargetPrefix) + return fmt.Errorf( + "app name too long: 'dev-%s' is %d chars (max: %d). App name must be ≤%d characters", + projectName, totalLength, maxAppNameLength, maxAllowed, + ) + } + return nil +} + +// newAppCmd creates the app subcommand for init-template. +func newAppCmd() *cobra.Command { + cmd := &cobra.Command{ + Use: "app", + Short: "Initialize a Databricks App using the appkit template", + Args: cobra.NoArgs, + Long: `Initialize a Databricks App using the appkit template. + +Examples: + experimental apps-mcp tools init-template app --name my-app + experimental apps-mcp tools init-template app --name my-app --warehouse abc123 + experimental apps-mcp tools init-template app --name my-app --description "My cool app" + experimental apps-mcp tools init-template app --name my-app --output-dir ./projects + +Environment variables: + DATABRICKS_APPKIT_TEMPLATE_PATH Override template source with local path (for development) + +After initialization: + databricks bundle deploy --target dev +`, + } + + var name string + var warehouse string + var description string + var outputDir string + var describe bool + + cmd.Flags().StringVar(&name, "name", "", "Project name (required)") + cmd.Flags().StringVar(&warehouse, "warehouse", "", "SQL warehouse ID") + cmd.Flags().StringVar(&warehouse, "warehouse-id", "", "SQL warehouse ID (alias for --warehouse)") + cmd.Flags().StringVar(&warehouse, "sql-warehouse-id", "", "SQL warehouse ID (alias for --warehouse)") + cmd.Flags().StringVar(&warehouse, "sql_warehouse_id", "", "SQL warehouse ID (alias for --warehouse)") + cmd.Flags().StringVar(&description, "description", "", "App description") + cmd.Flags().StringVar(&outputDir, "output-dir", "", "Directory to write the initialized template to") + cmd.Flags().BoolVar(&describe, "describe", false, "Display template schema without initializing") + + // Hide the alias flags from help + cmd.Flags().MarkHidden("warehouse-id") + cmd.Flags().MarkHidden("sql-warehouse-id") + cmd.Flags().MarkHidden("sql_warehouse_id") + + cmd.PreRunE = root.MustWorkspaceClient + cmd.RunE = func(cmd *cobra.Command, args []string) error { + ctx := cmd.Context() + + // Resolve template source: env var override or default remote + templatePathOrUrl := os.Getenv(templatePathEnvVar) + templateDir := "" + branch := "" + + if templatePathOrUrl == "" { + templatePathOrUrl = defaultTemplateRepo + templateDir = defaultTemplateDir + branch = defaultBranch + } + + // Describe mode - show schema only + if describe { + r := template.Resolver{ + TemplatePathOrUrl: templatePathOrUrl, + ConfigFile: "", + OutputDir: outputDir, + TemplateDir: templateDir, + Branch: branch, + } + + tmpl, err := r.Resolve(ctx) + if err != nil { + return err + } + defer tmpl.Reader.Cleanup(ctx) + + schema, _, err := tmpl.Reader.LoadSchemaAndTemplateFS(ctx) + if err != nil { + return fmt.Errorf("failed to load template schema: %w", err) + } + + schemaJSON, err := json.MarshalIndent(schema, "", " ") + if err != nil { + return err + } + cmdio.LogString(ctx, string(schemaJSON)) + return nil + } + + // Validate required flag + if name == "" { + return errors.New("--name is required") + } + + if err := validateAppNameLength(name); err != nil { + return err + } + + // Build config map from flags + configMap := map[string]any{ + "project_name": name, + } + if warehouse != "" { + configMap["sql_warehouse_id"] = warehouse + } + if description != "" { + configMap["app_description"] = description + } + + err := MaterializeTemplate(ctx, TemplateConfig{ + TemplatePath: templatePathOrUrl, + TemplateName: "appkit", + TemplateDir: templateDir, + Branch: branch, + }, configMap, name, outputDir) + if err != nil { + return err + } + + projectDir := filepath.Join(outputDir, name) + + // Inject L3 (template-specific guidance from CLAUDE.md) + // (we only do this for the app template; other templates use a generic CLAUDE.md) + readClaudeMd(ctx, projectDir) + + // Save initial scaffolded state for app state machine + if err := state.SaveState(projectDir, state.NewScaffolded()); err != nil { + return fmt.Errorf("failed to save project state: %w", err) + } + return nil + } + return cmd +} diff --git a/experimental/apps-mcp/cmd/init_template/common.go b/experimental/apps-mcp/cmd/init_template/common.go new file mode 100644 index 0000000000..7f78ec4a0b --- /dev/null +++ b/experimental/apps-mcp/cmd/init_template/common.go @@ -0,0 +1,219 @@ +package init_template + +import ( + "context" + "encoding/json" + "fmt" + "os" + "path/filepath" + "sort" + "strings" + + "github.com/databricks/cli/experimental/apps-mcp/lib/common" + "github.com/databricks/cli/experimental/apps-mcp/lib/detector" + "github.com/databricks/cli/experimental/apps-mcp/lib/prompts" + "github.com/databricks/cli/libs/cmdio" + "github.com/databricks/cli/libs/template" +) + +// TemplateConfig holds configuration for template materialization. +type TemplateConfig struct { + TemplatePath string // e.g., template.DefaultPython or remote URL + TemplateName string // e.g., "default-python", "lakeflow-pipelines", "appkit" + TemplateDir string // subdirectory within repo (for remote templates) + Branch string // git branch (for remote templates) +} + +// MaterializeTemplate handles the common template materialization workflow. +func MaterializeTemplate(ctx context.Context, cfg TemplateConfig, configMap map[string]any, name, outputDir string) error { + configFile, err := writeConfigToTempFile(configMap) + if err != nil { + return err + } + defer os.Remove(configFile) + + if outputDir != "" { + if err := os.MkdirAll(outputDir, 0o755); err != nil { + return fmt.Errorf("create output directory: %w", err) + } + } + + r := template.Resolver{ + TemplatePathOrUrl: cfg.TemplatePath, + ConfigFile: configFile, + OutputDir: outputDir, + TemplateDir: cfg.TemplateDir, + Branch: cfg.Branch, + } + + tmpl, err := r.Resolve(ctx) + if err != nil { + return err + } + defer tmpl.Reader.Cleanup(ctx) + + if err := tmpl.Writer.Materialize(ctx, tmpl.Reader); err != nil { + return err + } + tmpl.Writer.LogTelemetry(ctx) + + actualOutputDir := name + if outputDir != "" { + actualOutputDir = filepath.Join(outputDir, name) + } + + absOutputDir, err := filepath.Abs(actualOutputDir) + if err != nil { + absOutputDir = actualOutputDir + } + + fileCount := countFiles(absOutputDir) + cmdio.LogString(ctx, common.FormatScaffoldSuccess(cfg.TemplateName, absOutputDir, fileCount)) + + fileTree, err := generateFileTree(absOutputDir) + if err == nil && fileTree != "" { + cmdio.LogString(ctx, "\nFile structure:") + cmdio.LogString(ctx, fileTree) + } + + registry := detector.NewRegistry() + detected := registry.Detect(ctx, absOutputDir) + + // Only write generic CLAUDE.md for non-app projects + // (app projects have their own template-specific CLAUDE.md) + if !detected.IsAppOnly { + if err := writeAgentFiles(absOutputDir, map[string]any{}); err != nil { + return fmt.Errorf("failed to write agent files: %w", err) + } + } + + for _, targetType := range detected.TargetTypes { + templateName := fmt.Sprintf("target_%s.tmpl", targetType) + if prompts.TemplateExists(templateName) { + content := prompts.MustExecuteTemplate(templateName, map[string]any{}) + cmdio.LogString(ctx, content) + } + } + + return nil +} + +// countFiles counts the number of files in a directory. +func countFiles(dir string) int { + count := 0 + _ = filepath.Walk(dir, func(path string, info os.FileInfo, err error) error { + if err == nil && !info.IsDir() { + count++ + } + return nil + }) + return count +} + +// writeConfigToTempFile writes a config map to a temporary JSON file. +func writeConfigToTempFile(configMap map[string]any) (string, error) { + tmpFile, err := os.CreateTemp("", "mcp-template-config-*.json") + if err != nil { + return "", fmt.Errorf("create temp config file: %w", err) + } + + configBytes, err := json.Marshal(configMap) + if err != nil { + os.Remove(tmpFile.Name()) + return "", fmt.Errorf("marshal config: %w", err) + } + if _, err := tmpFile.Write(configBytes); err != nil { + os.Remove(tmpFile.Name()) + return "", fmt.Errorf("write config file: %w", err) + } + if err := tmpFile.Close(); err != nil { + os.Remove(tmpFile.Name()) + return "", fmt.Errorf("close config file: %w", err) + } + + return tmpFile.Name(), nil +} + +// generateFileTree creates a tree-style visualization of the file structure. +// Collapses directories with more than 10 files to avoid clutter. +func generateFileTree(outputDir string) (string, error) { + const maxFilesToShow = 10 + + var allFiles []string + err := filepath.Walk(outputDir, func(path string, info os.FileInfo, err error) error { + if err != nil { + return err + } + if !info.IsDir() { + relPath, err := filepath.Rel(outputDir, path) + if err != nil { + return err + } + allFiles = append(allFiles, filepath.ToSlash(relPath)) + } + return nil + }) + if err != nil { + return "", err + } + + tree := make(map[string][]string) + + for _, relPath := range allFiles { + parts := strings.Split(relPath, "/") + + if len(parts) == 1 { + tree[""] = append(tree[""], parts[0]) + } else { + dir := strings.Join(parts[:len(parts)-1], "/") + fileName := parts[len(parts)-1] + tree[dir] = append(tree[dir], fileName) + } + } + + var output strings.Builder + var sortedDirs []string + for dir := range tree { + sortedDirs = append(sortedDirs, dir) + } + sort.Strings(sortedDirs) + + for _, dir := range sortedDirs { + filesInDir := tree[dir] + if dir == "" { + for _, file := range filesInDir { + output.WriteString(file) + output.WriteString("\n") + } + } else { + output.WriteString(dir) + output.WriteString("/\n") + if len(filesInDir) <= maxFilesToShow { + for _, file := range filesInDir { + output.WriteString(" ") + output.WriteString(file) + output.WriteString("\n") + } + } else { + output.WriteString(fmt.Sprintf(" (%d files)\n", len(filesInDir))) + } + } + } + + return output.String(), nil +} + +// writeAgentFiles writes CLAUDE.md and AGENTS.md files to the output directory. +func writeAgentFiles(outputDir string, data map[string]any) error { + content := prompts.MustExecuteTemplate("AGENTS.tmpl", data) + + // Write both CLAUDE.md and AGENTS.md + if err := os.WriteFile(filepath.Join(outputDir, "CLAUDE.md"), []byte(content), 0o644); err != nil { + return fmt.Errorf("failed to write CLAUDE.md: %w", err) + } + if err := os.WriteFile(filepath.Join(outputDir, "AGENTS.md"), []byte(content), 0o644); err != nil { + return fmt.Errorf("failed to write AGENTS.md: %w", err) + } + + return nil +} diff --git a/experimental/apps-mcp/cmd/init_template/empty.go b/experimental/apps-mcp/cmd/init_template/empty.go new file mode 100644 index 0000000000..a2c5e76239 --- /dev/null +++ b/experimental/apps-mcp/cmd/init_template/empty.go @@ -0,0 +1,77 @@ +package init_template + +import ( + "errors" + "fmt" + + "github.com/databricks/cli/cmd/root" + "github.com/databricks/cli/libs/template" + "github.com/spf13/cobra" +) + +// newEmptyCmd creates the empty subcommand for init-template. +func newEmptyCmd() *cobra.Command { + cmd := &cobra.Command{ + Use: "empty", + Short: "Initialize an empty project for custom resources", + Args: cobra.NoArgs, + Long: `Initialize an empty Databricks Asset Bundle project. + +Use this for deploying resource types OTHER than apps, jobs, or pipelines, such as: +- Dashboards (Lakeview dashboards) +- Alerts (SQL alerts) +- Model serving endpoints +- Clusters +- Schemas and tables +- Any other Databricks resources + +This creates a minimal project structure without sample code. For apps, jobs, or pipelines, +use the dedicated 'app', 'job', or 'pipeline' commands instead. + +Examples: + experimental apps-mcp tools init-template empty --name my_dashboard_project + experimental apps-mcp tools init-template empty --name my_alerts --language sql --catalog my_catalog + experimental apps-mcp tools init-template empty --name my_project --output-dir ./projects + +After initialization: + Add resource definitions in resources/ (e.g., resources/my_dashboard.dashboard.yml) + Then deploy: databricks bundle deploy --target dev +`, + } + + var name string + var catalog string + var language string + var outputDir string + + cmd.Flags().StringVar(&name, "name", "", "Project name (required)") + cmd.Flags().StringVar(&catalog, "catalog", "", "Default catalog for tables (defaults to workspace default)") + cmd.Flags().StringVar(&language, "language", "python", "Initial language: 'python', 'sql', or 'other'") + cmd.Flags().StringVar(&outputDir, "output-dir", "", "Directory to write the initialized template to") + + cmd.PreRunE = root.MustWorkspaceClient + cmd.RunE = func(cmd *cobra.Command, args []string) error { + ctx := cmd.Context() + + if name == "" { + return errors.New("--name is required. Example: init-template empty --name my_project") + } + + if language != "python" && language != "sql" && language != "other" { + return fmt.Errorf("--language must be 'python', 'sql', or 'other', got '%s'", language) + } + + configMap := map[string]any{ + "project_name": name, + "personal_schemas": "yes", + "language_choice": language, + "default_catalog": catalog, + } + + return MaterializeTemplate(ctx, TemplateConfig{ + TemplatePath: string(template.DefaultMinimal), + TemplateName: "default-minimal", + }, configMap, name, outputDir) + } + return cmd +} diff --git a/experimental/apps-mcp/cmd/init_template/init_template.go b/experimental/apps-mcp/cmd/init_template/init_template.go new file mode 100644 index 0000000000..3e6adc2228 --- /dev/null +++ b/experimental/apps-mcp/cmd/init_template/init_template.go @@ -0,0 +1,25 @@ +package init_template + +import ( + "github.com/spf13/cobra" +) + +// NewInitTemplateCommand creates a command group for initializing project templates. +func NewInitTemplateCommand() *cobra.Command { + cmd := &cobra.Command{ + Use: "init-template", + Short: "Initialize project templates", + Long: `Initialize project templates for Databricks resources. + +Subcommands: + app Initialize a Databricks App using the appkit template + job Initialize a job project using the default-python template + pipeline Initialize a Lakeflow pipeline project + empty Initialize an empty bundle for custom resources (dashboards, alerts, etc.)`, + } + cmd.AddCommand(newAppCmd()) + cmd.AddCommand(newJobCmd()) + cmd.AddCommand(newPipelineCmd()) + cmd.AddCommand(newEmptyCmd()) + return cmd +} diff --git a/experimental/apps-mcp/cmd/init_template/job.go b/experimental/apps-mcp/cmd/init_template/job.go new file mode 100644 index 0000000000..6f5b061637 --- /dev/null +++ b/experimental/apps-mcp/cmd/init_template/job.go @@ -0,0 +1,74 @@ +package init_template + +import ( + "errors" + + "github.com/databricks/cli/cmd/root" + "github.com/databricks/cli/experimental/apps-mcp/lib/middlewares" + "github.com/databricks/cli/libs/template" + "github.com/spf13/cobra" +) + +// newJobCmd creates the job subcommand for init-template. +func newJobCmd() *cobra.Command { + cmd := &cobra.Command{ + Use: "job", + Short: "Initialize a job project using the default-python template", + Args: cobra.NoArgs, + Long: `Initialize a job project using the default-python template. + +This creates a project with: +- Python notebooks in src/ directory +- A wheel package defined in pyproject.toml +- Job definitions in resources/ using databricks.yml +- Serverless compute enabled by default +- Personal schemas for development + +Examples: + experimental apps-mcp tools init-template job --name my_job + experimental apps-mcp tools init-template job --name my_job --catalog my_catalog + experimental apps-mcp tools init-template job --name my_job --output-dir ./projects + +After initialization: + databricks bundle deploy --target dev +`, + } + + var name string + var catalog string + var outputDir string + + cmd.Flags().StringVar(&name, "name", "", "Project name (required)") + cmd.Flags().StringVar(&catalog, "catalog", "", "Default catalog for tables (defaults to workspace default)") + cmd.Flags().StringVar(&outputDir, "output-dir", "", "Directory to write the initialized template to") + + cmd.PreRunE = root.MustWorkspaceClient + cmd.RunE = func(cmd *cobra.Command, args []string) error { + ctx := cmd.Context() + + if name == "" { + return errors.New("--name is required. Example: init-template job --name my_job") + } + + // Default to workspace default catalog if not specified + if catalog == "" { + catalog = middlewares.GetDefaultCatalog(ctx) + } + + configMap := map[string]any{ + "project_name": name, + "include_job": "yes", + "include_pipeline": "no", + "include_python": "yes", + "serverless": "yes", + "personal_schemas": "yes", + "default_catalog": catalog, + } + + return MaterializeTemplate(ctx, TemplateConfig{ + TemplatePath: string(template.DefaultPython), + TemplateName: "default-python", + }, configMap, name, outputDir) + } + return cmd +} diff --git a/experimental/apps-mcp/cmd/init_template/pipeline.go b/experimental/apps-mcp/cmd/init_template/pipeline.go new file mode 100644 index 0000000000..68ca1b8e54 --- /dev/null +++ b/experimental/apps-mcp/cmd/init_template/pipeline.go @@ -0,0 +1,80 @@ +package init_template + +import ( + "errors" + "fmt" + + "github.com/databricks/cli/cmd/root" + "github.com/databricks/cli/experimental/apps-mcp/lib/middlewares" + "github.com/databricks/cli/libs/template" + "github.com/spf13/cobra" +) + +// newPipelineCmd creates the pipeline subcommand for init-template. +func newPipelineCmd() *cobra.Command { + cmd := &cobra.Command{ + Use: "pipeline", + Short: "Initialize a Lakeflow pipeline project", + Args: cobra.NoArgs, + Long: `Initialize a Lakeflow Declarative Pipeline project. + +This creates a project with: +- Pipeline definitions in src/ directory (Python or SQL) +- Pipeline configuration in resources/ using databricks.yml +- Serverless compute enabled by default +- Personal schemas for development + +Examples: + experimental apps-mcp tools init-template pipeline --name my_pipeline --language python + experimental apps-mcp tools init-template pipeline --name my_pipeline --language sql + experimental apps-mcp tools init-template pipeline --name my_pipeline --language python --catalog my_catalog + experimental apps-mcp tools init-template pipeline --name my_pipeline --language sql --output-dir ./projects + +After initialization: + databricks bundle deploy --target dev +`, + } + + var name string + var language string + var catalog string + var outputDir string + + cmd.Flags().StringVar(&name, "name", "", "Project name (required)") + cmd.Flags().StringVar(&language, "language", "", "Pipeline language: 'python' or 'sql' (required)") + cmd.Flags().StringVar(&catalog, "catalog", "", "Default catalog for tables (defaults to workspace default)") + cmd.Flags().StringVar(&outputDir, "output-dir", "", "Directory to write the initialized template to") + + cmd.PreRunE = root.MustWorkspaceClient + cmd.RunE = func(cmd *cobra.Command, args []string) error { + ctx := cmd.Context() + + if name == "" { + return errors.New("--name is required. Example: init-template pipeline --name my_pipeline --language python") + } + if language == "" { + return errors.New("--language is required. Choose 'python' or 'sql'. Example: init-template pipeline --name my_pipeline --language python") + } + if language != "python" && language != "sql" { + return fmt.Errorf("--language must be 'python' or 'sql', got '%s'", language) + } + + // Default to workspace default catalog if not specified + if catalog == "" { + catalog = middlewares.GetDefaultCatalog(ctx) + } + + configMap := map[string]any{ + "project_name": name, + "personal_schemas": "yes", + "language": language, + "default_catalog": catalog, + } + + return MaterializeTemplate(ctx, TemplateConfig{ + TemplatePath: string(template.LakeflowPipelines), + TemplateName: "lakeflow-pipelines", + }, configMap, name, outputDir) + } + return cmd +} diff --git a/experimental/apps-mcp/cmd/tools.go b/experimental/apps-mcp/cmd/tools.go index 6d88451147..473bacddf3 100644 --- a/experimental/apps-mcp/cmd/tools.go +++ b/experimental/apps-mcp/cmd/tools.go @@ -1,6 +1,7 @@ package mcp import ( + "github.com/databricks/cli/experimental/apps-mcp/cmd/init_template" "github.com/spf13/cobra" ) @@ -13,7 +14,7 @@ func newToolsCmd() *cobra.Command { cmd.AddCommand(newQueryCmd()) cmd.AddCommand(newDiscoverSchemaCmd()) - cmd.AddCommand(newInitTemplateCmd()) + cmd.AddCommand(init_template.NewInitTemplateCommand()) cmd.AddCommand(newValidateCmd()) cmd.AddCommand(newDeployCmd()) diff --git a/experimental/apps-mcp/lib/detector/bundle_detector.go b/experimental/apps-mcp/lib/detector/bundle_detector.go index c88c5d19b1..a61eba07ec 100644 --- a/experimental/apps-mcp/lib/detector/bundle_detector.go +++ b/experimental/apps-mcp/lib/detector/bundle_detector.go @@ -22,7 +22,9 @@ func (d *BundleDetector) Detect(ctx context.Context, workDir string, detected *D } // use full bundle loading to get all resources including from includes - ctx = logdiag.InitContext(ctx) + if !logdiag.IsSetup(ctx) { + ctx = logdiag.InitContext(ctx) + } b, err := bundle.Load(ctx, workDir) if err != nil || b == nil { return nil @@ -40,15 +42,38 @@ func (d *BundleDetector) Detect(ctx context.Context, workDir string, detected *D } // extract target types from fully loaded resources - if len(b.Config.Resources.Apps) > 0 { + hasApps := len(b.Config.Resources.Apps) > 0 + hasJobs := len(b.Config.Resources.Jobs) > 0 + hasPipelines := len(b.Config.Resources.Pipelines) > 0 + + if hasApps { detected.TargetTypes = append(detected.TargetTypes, "apps") } - if len(b.Config.Resources.Jobs) > 0 { + if hasJobs { detected.TargetTypes = append(detected.TargetTypes, "jobs") } - if len(b.Config.Resources.Pipelines) > 0 { + if hasPipelines { detected.TargetTypes = append(detected.TargetTypes, "pipelines") } + // Determine if this is an app-only project (only app resources, nothing else). + // App-only projects get focused app guidance; others get "mixed" guidance. + isAppOnly := hasApps && !hasJobs && !hasPipelines && + len(b.Config.Resources.Clusters) == 0 && + len(b.Config.Resources.Dashboards) == 0 && + len(b.Config.Resources.Experiments) == 0 && + len(b.Config.Resources.ModelServingEndpoints) == 0 && + len(b.Config.Resources.RegisteredModels) == 0 && + len(b.Config.Resources.Schemas) == 0 && + len(b.Config.Resources.QualityMonitors) == 0 && + len(b.Config.Resources.Volumes) == 0 + + detected.IsAppOnly = isAppOnly + + // Include "mixed" guidance for all projects except app-only projects + if !isAppOnly { + detected.TargetTypes = append(detected.TargetTypes, "mixed") + } + return nil } diff --git a/experimental/apps-mcp/lib/detector/detector.go b/experimental/apps-mcp/lib/detector/detector.go index 4b00a589ff..2e8e13288d 100644 --- a/experimental/apps-mcp/lib/detector/detector.go +++ b/experimental/apps-mcp/lib/detector/detector.go @@ -19,6 +19,7 @@ type DetectedContext struct { Template string // "appkit-typescript", "python", etc. BundleInfo *BundleInfo Metadata map[string]string + IsAppOnly bool // True if project contains only app resources, no jobs/pipelines/etc. } // Detector detects project context from a working directory. diff --git a/experimental/apps-mcp/lib/detector/detector_test.go b/experimental/apps-mcp/lib/detector/detector_test.go index fa25b78971..b2c4f72b26 100644 --- a/experimental/apps-mcp/lib/detector/detector_test.go +++ b/experimental/apps-mcp/lib/detector/detector_test.go @@ -59,7 +59,7 @@ resources: detected := registry.Detect(ctx, dir) assert.True(t, detected.InProject) - assert.Equal(t, []string{"jobs"}, detected.TargetTypes) + assert.Equal(t, []string{"jobs", "mixed"}, detected.TargetTypes) assert.Equal(t, "my-job", detected.BundleInfo.Name) } diff --git a/experimental/apps-mcp/lib/middlewares/databricks_client.go b/experimental/apps-mcp/lib/middlewares/databricks_client.go index 4190b22db9..784646b7d2 100644 --- a/experimental/apps-mcp/lib/middlewares/databricks_client.go +++ b/experimental/apps-mcp/lib/middlewares/databricks_client.go @@ -132,3 +132,19 @@ func newAuthError(ctx context.Context) error { } return errors.New(prompts.MustExecuteTemplate("auth_error.tmpl", data)) } + +// GetDefaultCatalog fetches the workspace default catalog name. +// Returns empty string if Unity Catalog is not available or on error. +func GetDefaultCatalog(ctx context.Context) string { + w, err := GetDatabricksClient(ctx) + if err != nil { + return "" + } + + metastore, err := w.Metastores.Current(ctx) + if err != nil { + return "" // gracefully handle any error (no UC, permission denied, etc.) + } + + return metastore.DefaultCatalogName +} diff --git a/experimental/apps-mcp/lib/prompts/AGENTS.tmpl b/experimental/apps-mcp/lib/prompts/AGENTS.tmpl new file mode 100644 index 0000000000..df7d451d75 --- /dev/null +++ b/experimental/apps-mcp/lib/prompts/AGENTS.tmpl @@ -0,0 +1,35 @@ +{{- /* + * Template for the AGENTS.md / CLAUDE.md file created in new projects. + * + * Goal for this file is to reinforce with agents that they need to call the databricks_discover tool. + * A secondary goal is to offer a placeholder to users to extend with their own instructions. + * It is a non-goal to inline all instructions. + * + * Note the app template currently has its own AGENTS.md / CLAUDE.md file. + * + */ -}} + +# Databricks Asset Bundles Project + +This project uses Databricks Asset Bundles for deployment. Add project-specific instructions below. + +## For AI Agents: MANDATORY First Action + +**BEFORE any other action, call `mcp__databricks-mcp__databricks_discover` with this project's path.** + +This is required because the discover tool: +- Detects project state (new, deployed, running) and provides relevant guidance +- Shows available CLI commands and recommended next steps +- Configures workspace authentication + +Without this context, you cannot effectively help with this project. + +If the tool is unavailable, the user needs to set up the Databricks CLI MCP server: +1. Install CLI: https://docs.databricks.com/dev-tools/cli/install +2. Run: `databricks experimental apps-mcp install` + +--- + +## Project Instructions + + diff --git a/experimental/apps-mcp/lib/prompts/flow.tmpl b/experimental/apps-mcp/lib/prompts/flow.tmpl index 40da4cf0f7..50a90c16fb 100644 --- a/experimental/apps-mcp/lib/prompts/flow.tmpl +++ b/experimental/apps-mcp/lib/prompts/flow.tmpl @@ -25,14 +25,31 @@ Use `invoke_databricks_cli ''` to run any Databricks CLI command. ### Project scaffolding +IMPORTANT: Always use 'experimental apps-mcp tools init-template' commands below instead of 'databricks bundle init'. +The init-template commands create agent-friendly projects with AGENTS.md/CLAUDE.md guidance files and proper MCP integration. + For apps: +invoke_databricks_cli 'experimental apps-mcp tools init-template app --name my-app --description "My app description"' -invoke_databricks_cli 'experimental apps-mcp tools init-template --name my-app --description "My app description"' +For jobs (Python notebooks with wheel package): +invoke_databricks_cli 'experimental apps-mcp tools init-template job --name my_job' +invoke_databricks_cli 'experimental apps-mcp tools init-template job --name my_job --catalog my_catalog' -- App name must be ≤26 characters (dev- prefix adds 4 chars, max total 30) -- Use lowercase letters, numbers, and hyphens only +For pipelines (Lakeflow Declarative Pipelines): +invoke_databricks_cli 'experimental apps-mcp tools init-template pipeline --name my_pipeline --language python' +invoke_databricks_cli 'experimental apps-mcp tools init-template pipeline --name my_pipeline --language sql --catalog my_catalog' +Note: --language is required (python or sql). Ask the user which language they prefer: + - SQL: Recommended for straightforward transformations (filters, joins, aggregations) + - Python: Recommended for complex logic (custom UDFs, ML, advanced processing) -Other types of projects are not yet supported. +For custom resources (dashboards, alerts, model serving, etc.): +invoke_databricks_cli 'experimental apps-mcp tools init-template empty --name my_project' +Note: Use this for resources OTHER than apps, jobs, or pipelines + +Notes: +- App name must be ≤26 characters (dev- prefix adds 4 chars, max total 30) +- Job/pipeline/project names: letters, numbers, underscores only +- --catalog defaults to workspace default catalog{{if .DefaultCatalog}} (currently '{{.DefaultCatalog}}'){{end}} ### Custom SQL Queries diff --git a/experimental/apps-mcp/lib/prompts/target_jobs.tmpl b/experimental/apps-mcp/lib/prompts/target_jobs.tmpl new file mode 100644 index 0000000000..470d77bd3c --- /dev/null +++ b/experimental/apps-mcp/lib/prompts/target_jobs.tmpl @@ -0,0 +1,57 @@ +{{- /* + * L2: Target-specific guidance for Lakeflow Jobs. + * + * Injected when: target type "jobs" is detected or after init-template job. + * Contains: job-specific development patterns, task configuration, code examples. + * Note: For adding NEW resources (dashboards, alerts, etc.), see target_mixed.tmpl guidance. + */ -}} + +## Lakeflow Jobs Development + +This guidance is for developing jobs in this project. + +### Project Structure +- `src/` - Python notebooks (.ipynb) and source code +- `resources/` - Job definitions in databricks.yml format + +### Configuring Tasks +Edit `resources/.job.yml` to configure tasks: + +```yaml +tasks: + - task_key: my_notebook + notebook_task: + notebook_path: ../src/my_notebook.ipynb + - task_key: my_python + python_wheel_task: + package_name: my_package + entry_point: main +``` + +Task types: `notebook_task`, `python_wheel_task`, `spark_python_task`, `pipeline_task`, `sql_task` + +### Job Parameters +Parameters defined at job level are passed to ALL tasks (no need to repeat per task). Example: +```yaml +resources: + jobs: + my_job: + parameters: + - name: catalog + default: ${var.catalog} + - name: schema + default: ${var.schema} +``` + +### Writing Notebook Code +- Use `spark.read.table("catalog.schema.table")` to read tables +- Use `spark.sql("SELECT ...")` for SQL queries +- Use `dbutils.widgets` for parameters + +### Unit Testing +Run unit tests locally with: `uv run pytest` + +### Documentation +- Lakeflow Jobs: https://docs.databricks.com/jobs +- Task types: https://docs.databricks.com/jobs/configure-task +- Databricks Asset Bundles / yml format examples: https://docs.databricks.com/dev-tools/bundles/examples diff --git a/experimental/apps-mcp/lib/prompts/target_mixed.tmpl b/experimental/apps-mcp/lib/prompts/target_mixed.tmpl new file mode 100644 index 0000000000..e1a01ea418 --- /dev/null +++ b/experimental/apps-mcp/lib/prompts/target_mixed.tmpl @@ -0,0 +1,54 @@ +{{- /* + * L2: Target-specific guidance for mixed/custom resource projects. + * + * Injected when: empty projects or projects with mixed resource types. Not for app-only projects. + * Contains: how to add any resource type, deployment commands, documentation. + */ -}} + +## Adding Databricks Resources + +Add resources by creating YAML files in resources/: + +**Jobs** - `resources/my_job.job.yml`: +```yaml +resources: + jobs: + my_job: + name: my_job + tasks: + - task_key: main + notebook_task: + notebook_path: ../src/notebook.py +``` + +**Pipelines** (Lakeflow Declarative Pipelines) - `resources/my_pipeline.pipeline.yml`: +```yaml +resources: + pipelines: + my_pipeline: + name: my_pipeline + catalog: ${var.catalog} + target: ${var.schema} + libraries: + - notebook: + path: ../src/pipeline.py +``` + +**Dashboards** - `resources/my_dashboard.dashboard.yml` +**Alerts** - `resources/my_alert.alert.yml` +**Model Serving** - `resources/my_endpoint.yml` +**Apps** - `resources/my_app.app.yml` + +**Other resource types**: clusters, schemas, volumes, registered_models, experiments, quality_monitors + +### Deployment +For dev targets you can deploy without user consent. This allows you to run resources on the workspace too! + + invoke_databricks_cli 'bundle deploy --target dev' + invoke_databricks_cli 'bundle run --target dev' + +View status with `invoke_databricks_cli 'bundle summary'`. + +### Documentation +- Resource types reference: https://docs.databricks.com/dev-tools/bundles/resources +- YAML examples: https://docs.databricks.com/dev-tools/bundles/examples diff --git a/experimental/apps-mcp/lib/prompts/target_pipelines.tmpl b/experimental/apps-mcp/lib/prompts/target_pipelines.tmpl new file mode 100644 index 0000000000..4f9a968565 --- /dev/null +++ b/experimental/apps-mcp/lib/prompts/target_pipelines.tmpl @@ -0,0 +1,61 @@ +{{- /* + * L2: Target-specific guidance for Lakeflow Declarative Pipelines. + * + * Injected when: target type "pipelines" is detected or after init-template pipeline. + * Contains: pipeline-specific development patterns, transformation syntax, scheduling. + * Note: For adding NEW resources (dashboards, alerts, etc.), see target_mixed.tmpl guidance. + */ -}} + +## Lakeflow Declarative Pipelines Development + +This guidance is for developing pipelines in this project. + +Lakeflow Declarative Pipelines (formerly Delta Live Tables) is a framework for building batch and streaming data pipelines. + +### Project Structure +- `src/` - Pipeline transformations (Python or SQL) +- `resources/` - Pipeline configuration in databricks.yml format + +### Adding Transformations + +**Python** - Create `.py` files in `src/`: +```python +from pyspark import pipelines as dp + +@dp.table +def my_table(): + return spark.read.table("catalog.schema.source") +``` + +By convention, each dataset definition like the @dp.table definition above should be in a file named +like the dataset name, e.g. `src/my_table.py`. + +**SQL** - Create `.sql` files in `src/`: +```sql +CREATE MATERIALIZED VIEW my_view AS +SELECT * FROM catalog.schema.source +``` + +This example would live in `src/my_view.sql`. + +Use `CREATE STREAMING TABLE` for incremental ingestion, `CREATE MATERIALIZED VIEW` for transformations. + +### Scheduling Pipelines +To schedule a pipeline, make sure you have a job that triggers it, like `resources/.job.yml`: +```yaml +resources: + jobs: + my_pipeline_job: + trigger: + periodic: + interval: 1 + unit: DAYS + tasks: + - task_key: refresh_pipeline + pipeline_task: + pipeline_id: ${resources.pipelines.my_pipeline.id} +``` + +### Documentation +- Lakeflow Declarative Pipelines: https://docs.databricks.com/ldp +- Databricks Asset Bundles / yml format examples: https://docs.databricks.com/dev-tools/bundles/examples diff --git a/experimental/apps-mcp/lib/providers/clitools/discover.go b/experimental/apps-mcp/lib/providers/clitools/discover.go index 3ac1a1d6ab..2e9324dc9b 100644 --- a/experimental/apps-mcp/lib/providers/clitools/discover.go +++ b/experimental/apps-mcp/lib/providers/clitools/discover.go @@ -24,16 +24,19 @@ func Discover(ctx context.Context, workingDirectory string) (string, error) { currentProfile := middlewares.GetDatabricksProfile(ctx) profiles := middlewares.GetAvailableProfiles(ctx) + // Get default catalog (non-fatal if unavailable) + defaultCatalog := middlewares.GetDefaultCatalog(ctx) + // run detectors to identify project context registry := detector.NewRegistry() detected := registry.Detect(ctx, workingDirectory) - return generateDiscoverGuidance(ctx, warehouse, currentProfile, profiles, detected), nil + return generateDiscoverGuidance(ctx, warehouse, currentProfile, profiles, defaultCatalog, detected), nil } // generateDiscoverGuidance creates guidance with L1 (flow) + L2 (target) layers. -func generateDiscoverGuidance(ctx context.Context, warehouse *sql.EndpointInfo, currentProfile string, profiles profile.Profiles, detected *detector.DetectedContext) string { - data := buildTemplateData(warehouse, currentProfile, profiles) +func generateDiscoverGuidance(ctx context.Context, warehouse *sql.EndpointInfo, currentProfile string, profiles profile.Profiles, defaultCatalog string, detected *detector.DetectedContext) string { + data := buildTemplateData(warehouse, currentProfile, profiles, defaultCatalog) // L1: always include flow guidance result := prompts.MustExecuteTemplate("flow.tmpl", data) @@ -61,7 +64,7 @@ func generateDiscoverGuidance(ctx context.Context, warehouse *sql.EndpointInfo, return result } -func buildTemplateData(warehouse *sql.EndpointInfo, currentProfile string, profiles profile.Profiles) map[string]string { +func buildTemplateData(warehouse *sql.EndpointInfo, currentProfile string, profiles profile.Profiles, defaultCatalog string) map[string]string { workspaceInfo := "Current Workspace Profile: " + currentProfile if len(profiles) > 0 { var currentHost string @@ -106,10 +109,11 @@ func buildTemplateData(warehouse *sql.EndpointInfo, currentProfile string, profi } return map[string]string{ - "WorkspaceInfo": workspaceInfo, - "WarehouseName": warehouseName, - "WarehouseID": warehouseID, - "ProfilesInfo": profilesInfo, - "Profile": currentProfile, + "WorkspaceInfo": workspaceInfo, + "WarehouseName": warehouseName, + "WarehouseID": warehouseID, + "ProfilesInfo": profilesInfo, + "Profile": currentProfile, + "DefaultCatalog": defaultCatalog, } }